从网址列表中筛选图像并将结果存储在电子表格中 [英] ocr images from list of urls and store the results in spreadsheet

查看:135
本文介绍了从网址列表中筛选图像并将结果存储在电子表格中的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

您好,我有一个包含数字的图像URL列表,我想对其进行OCR并将结果存储在Google电子表格中 我发现这些Google脚本可用于ocr图片

1- https://gist.github.com/tagplus5/07dde5ca61fe8f42045d

2- https://ctrlq.org/code/20128 -extract-text-from-image-ocr

但是我不知道如何创建请求变量,因此我将请求变量替换为URL变量,如下所示:

function doGet(url) {
  if (url != undefined && url != "") {
    var imageBlob = UrlFetchApp.fetch(url).getBlob();
    var resource = {
      title: imageBlob.getName(),
      mimeType: imageBlob.getContentType()
    };
    var options = {
      ocr: true
    };

    var docFile = Drive.Files.insert(resource, imageBlob, options);
    var doc = DocumentApp.openById(docFile.id);
    var text = doc.getBody().getText().replace("\n", "");
    Drive.Files.remove(docFile.id);
    return ContentService.createTextOutput(text);
  }
  else {
    return ContentService.createTextOutput("request error");
  }
}

问题是,当我调用类似doGet(B1)的函数时,其中B1包含Google电子表格中图片的网址以执行OCR,并在单元格C1中获取结果文本,表示驱动器变量未定义

希望很快得到答复

解决方案

好的,我修改了脚本并制作了一个表格来显示示例.该工作表位于此处(任何人都可以编辑),其脚本如下.应该启用高级驱动器服务的驱动器API(v2)才能运行此脚本.

function onOpen() {
  var ss = SpreadsheetApp.getActive();
  var menuItems = [
    {name: 'RUN', functionName: 'doGet2'}
  ];

  ss.addMenu('OCR', menuItems);
}    


function doGet2() {
  var ROW_START = 3;
  var URL_COL = 1;
  var TEXT_COL = 2;

  var ss = SpreadsheetApp.getActive();
  var sheet = ss.getActiveSheet();

  var urls = sheet.getRange(ROW_START,URL_COL, sheet.getLastRow()-ROW_START+1,1).getValues();
  var texts = [];
  for(var i=0; i<urls.length; i++) {
    var url = urls[i];
    if(url != undefined && url != "") {
      var imageBlob = UrlFetchApp.fetch(url).getBlob();
      var resource = {
        title: imageBlob.getName(),
        mimeType: imageBlob.getContentType()
      };
      var options = {
        ocr: true
      };

      var docFile = Drive.Files.insert(resource, imageBlob, options);
      var doc = DocumentApp.openById(docFile.id);
      var text = doc.getBody().getText().replace("\n", "");

      texts.push([text]);
      Drive.Files.remove(docFile.id);
    }
    else {
      texts.push("request error");
    }
  }
  sheet.getRange(ROW_START,TEXT_COL, urls.length,1).setValues(texts);
}

Hello I have a list of image URLs that contain numbers and I want to OCR them and store the results in google spreadsheet I've found these google scripts to ocr images

1- https://gist.github.com/tagplus5/07dde5ca61fe8f42045d

2- https://ctrlq.org/code/20128-extract-text-from-image-ocr

But I didn't know how to create a request variable so I've replaced request variable with URL variable like this:

function doGet(url) {
  if (url != undefined && url != "") {
    var imageBlob = UrlFetchApp.fetch(url).getBlob();
    var resource = {
      title: imageBlob.getName(),
      mimeType: imageBlob.getContentType()
    };
    var options = {
      ocr: true
    };

    var docFile = Drive.Files.insert(resource, imageBlob, options);
    var doc = DocumentApp.openById(docFile.id);
    var text = doc.getBody().getText().replace("\n", "");
    Drive.Files.remove(docFile.id);
    return ContentService.createTextOutput(text);
  }
  else {
    return ContentService.createTextOutput("request error");
  }
}

the problem is when I call the function like this doGet(B1) where B1 contain the url to the image in google spreadsheet to do the OCR and get the resulted text in the cell C1 it says Drive variable is undefined

Hope get answered soon

解决方案

Okay, I modified your script and made a sheet to show an example. The sheet is here(anyone can edit) and its script is below. Drive API(v2) of Advanced Drive Service should be enabled to run this script.

function onOpen() {
  var ss = SpreadsheetApp.getActive();
  var menuItems = [
    {name: 'RUN', functionName: 'doGet2'}
  ];

  ss.addMenu('OCR', menuItems);
}    


function doGet2() {
  var ROW_START = 3;
  var URL_COL = 1;
  var TEXT_COL = 2;

  var ss = SpreadsheetApp.getActive();
  var sheet = ss.getActiveSheet();

  var urls = sheet.getRange(ROW_START,URL_COL, sheet.getLastRow()-ROW_START+1,1).getValues();
  var texts = [];
  for(var i=0; i<urls.length; i++) {
    var url = urls[i];
    if(url != undefined && url != "") {
      var imageBlob = UrlFetchApp.fetch(url).getBlob();
      var resource = {
        title: imageBlob.getName(),
        mimeType: imageBlob.getContentType()
      };
      var options = {
        ocr: true
      };

      var docFile = Drive.Files.insert(resource, imageBlob, options);
      var doc = DocumentApp.openById(docFile.id);
      var text = doc.getBody().getText().replace("\n", "");

      texts.push([text]);
      Drive.Files.remove(docFile.id);
    }
    else {
      texts.push("request error");
    }
  }
  sheet.getRange(ROW_START,TEXT_COL, urls.length,1).setValues(texts);
}

这篇关于从网址列表中筛选图像并将结果存储在电子表格中的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆