从网址列表中筛选图像并将结果存储在电子表格中 [英] ocr images from list of urls and store the results in spreadsheet
问题描述
您好,我有一个包含数字的图像URL列表,我想对其进行OCR并将结果存储在Google电子表格中 我发现这些Google脚本可用于ocr图片
1- https://gist.github.com/tagplus5/07dde5ca61fe8f42045d >
2- https://ctrlq.org/code/20128 -extract-text-from-image-ocr
但是我不知道如何创建请求变量,因此我将请求变量替换为URL变量,如下所示:
function doGet(url) {
if (url != undefined && url != "") {
var imageBlob = UrlFetchApp.fetch(url).getBlob();
var resource = {
title: imageBlob.getName(),
mimeType: imageBlob.getContentType()
};
var options = {
ocr: true
};
var docFile = Drive.Files.insert(resource, imageBlob, options);
var doc = DocumentApp.openById(docFile.id);
var text = doc.getBody().getText().replace("\n", "");
Drive.Files.remove(docFile.id);
return ContentService.createTextOutput(text);
}
else {
return ContentService.createTextOutput("request error");
}
}
问题是,当我调用类似doGet(B1)的函数时,其中B1包含Google电子表格中图片的网址以执行OCR,并在单元格C1中获取结果文本,表示驱动器变量未定义
希望很快得到答复
好的,我修改了脚本并制作了一个表格来显示示例.该工作表位于此处(任何人都可以编辑),其脚本如下.应该启用高级驱动器服务的驱动器API(v2)才能运行此脚本.
function onOpen() {
var ss = SpreadsheetApp.getActive();
var menuItems = [
{name: 'RUN', functionName: 'doGet2'}
];
ss.addMenu('OCR', menuItems);
}
function doGet2() {
var ROW_START = 3;
var URL_COL = 1;
var TEXT_COL = 2;
var ss = SpreadsheetApp.getActive();
var sheet = ss.getActiveSheet();
var urls = sheet.getRange(ROW_START,URL_COL, sheet.getLastRow()-ROW_START+1,1).getValues();
var texts = [];
for(var i=0; i<urls.length; i++) {
var url = urls[i];
if(url != undefined && url != "") {
var imageBlob = UrlFetchApp.fetch(url).getBlob();
var resource = {
title: imageBlob.getName(),
mimeType: imageBlob.getContentType()
};
var options = {
ocr: true
};
var docFile = Drive.Files.insert(resource, imageBlob, options);
var doc = DocumentApp.openById(docFile.id);
var text = doc.getBody().getText().replace("\n", "");
texts.push([text]);
Drive.Files.remove(docFile.id);
}
else {
texts.push("request error");
}
}
sheet.getRange(ROW_START,TEXT_COL, urls.length,1).setValues(texts);
}
Hello I have a list of image URLs that contain numbers and I want to OCR them and store the results in google spreadsheet I've found these google scripts to ocr images
1- https://gist.github.com/tagplus5/07dde5ca61fe8f42045d
2- https://ctrlq.org/code/20128-extract-text-from-image-ocr
But I didn't know how to create a request variable so I've replaced request variable with URL variable like this:
function doGet(url) {
if (url != undefined && url != "") {
var imageBlob = UrlFetchApp.fetch(url).getBlob();
var resource = {
title: imageBlob.getName(),
mimeType: imageBlob.getContentType()
};
var options = {
ocr: true
};
var docFile = Drive.Files.insert(resource, imageBlob, options);
var doc = DocumentApp.openById(docFile.id);
var text = doc.getBody().getText().replace("\n", "");
Drive.Files.remove(docFile.id);
return ContentService.createTextOutput(text);
}
else {
return ContentService.createTextOutput("request error");
}
}
the problem is when I call the function like this doGet(B1) where B1 contain the url to the image in google spreadsheet to do the OCR and get the resulted text in the cell C1 it says Drive variable is undefined
Hope get answered soon
Okay, I modified your script and made a sheet to show an example. The sheet is here(anyone can edit) and its script is below. Drive API(v2) of Advanced Drive Service should be enabled to run this script.
function onOpen() {
var ss = SpreadsheetApp.getActive();
var menuItems = [
{name: 'RUN', functionName: 'doGet2'}
];
ss.addMenu('OCR', menuItems);
}
function doGet2() {
var ROW_START = 3;
var URL_COL = 1;
var TEXT_COL = 2;
var ss = SpreadsheetApp.getActive();
var sheet = ss.getActiveSheet();
var urls = sheet.getRange(ROW_START,URL_COL, sheet.getLastRow()-ROW_START+1,1).getValues();
var texts = [];
for(var i=0; i<urls.length; i++) {
var url = urls[i];
if(url != undefined && url != "") {
var imageBlob = UrlFetchApp.fetch(url).getBlob();
var resource = {
title: imageBlob.getName(),
mimeType: imageBlob.getContentType()
};
var options = {
ocr: true
};
var docFile = Drive.Files.insert(resource, imageBlob, options);
var doc = DocumentApp.openById(docFile.id);
var text = doc.getBody().getText().replace("\n", "");
texts.push([text]);
Drive.Files.remove(docFile.id);
}
else {
texts.push("request error");
}
}
sheet.getRange(ROW_START,TEXT_COL, urls.length,1).setValues(texts);
}
这篇关于从网址列表中筛选图像并将结果存储在电子表格中的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!