使用谷歌应用程序脚本刮表 [英] scrape table using google app scripts

查看:76
本文介绍了使用谷歌应用程序脚本刮表的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我很想从这个网站浏览数据:



我可以得到整张桌子吗?
谢谢

解决方案

以下修改如何?

修改后的脚本:



 检索到的数据被导入到Spreadsheet中。函数myFunction(start){
var url =https://finviz.com/screener.ashx?v=141&f=sh_avgvol_o500,sh_curvol_o2000,sh_price_u50&o=-volume&r=\"+ start;
var content = UrlFetchApp.fetch(url).getContentText(); ();();();();()
var res = [];

//如果您不想要列标题,请删除此部分。
var temp = [];
var titles = Parser.data(content).from(style = \cursor:pointer; \>)。(< / td>)。iterate();
titles.forEach(function(e){
if(!〜e.indexOf('\>')){
temp.push(e);
} else if(〜e.indexOf('img')){
temp.push(e.replace(/< img。+> / g,''));
}
$);
res.push(temp);
// -----

var temp = [];
var oticker = ;
scraped.forEach(function(e){
var ticker = Parser.data(e).from(< a href = \quote.ashx?t =)。 &)。build();
var data1 = Parser.data(e).from(screener-link \>)。(< / a>)。build ();
var data2 = Parser.data(data1).from(>)。(<)。build();
if(oticker ==)oticker = ticker;
if(ticker!= oticker){
temp.splice(1,0,oticker);
res.push(temp);
temp = [];
oticker = ticker;
temp.push(data1);
} else {
if(!〜(data2 || data1).indexOf('<'))temp .push(data2 || data1);
}
});
var ss = Spreadshe etApp.getActiveSheet();
ss.getRange(ss.getLastRow()+ 1,1,res.length,res [0] .length).setValues(res);



结果:




I would love to scape data from this website: https://finviz.com/screener.ashx?v=141&f=sh_avgvol_o500,sh_curvol_o2000,sh_price_u50&o=-volume

I want to scrape the whole table. I tried using this :

function myFunction(start) {
    var url = "https://finviz.com/screener.ashx?
v=141&f=sh_avgvol_o500,sh_curvol_o2000,sh_price_u50&o=-volume&r="+ 
start;
    var fromText = '<tbody>';
    var toText = '</tbody>';
    var content = UrlFetchApp.fetch(url).getContentText();
    var scraped = Parser
              .data(content)
              .from(fromText)
              .to(toText)
              .iterate();
}

I could scrape every element using xpath, but I think it would be quite slow.

Here is the html and the table:

Can I get the whole table ? Thanks

解决方案

How about a following modification? The retrieved data is imported to Spreadsheet.

Modified script :

function myFunction(start) {
  var url = "https://finviz.com/screener.ashx?v=141&f=sh_avgvol_o500,sh_curvol_o2000,sh_price_u50&o=-volume&r="+ start;
  var content = UrlFetchApp.fetch(url).getContentText();
  var scraped = Parser.data(content).from('class=\"screener-body-table-nw\"').to('</td>').iterate();
  var res = [];

  // If you don't want column titles, please remove this part.
  var temp = [];
  var titles = Parser.data(content).from("style=\"cursor:pointer;\">").to("</td>").iterate();
  titles.forEach(function(e){
    if (!~e.indexOf('\">')) {
      temp.push(e);
    } else if (~e.indexOf('img')) {
      temp.push(e.replace(/<img.+>/g, ''));
    }
  });
  res.push(temp);
  // -----

  var temp = [];
  var oticker = "";
  scraped.forEach(function(e){
    var ticker = Parser.data(e).from("<a href=\"quote.ashx?t=").to("&").build();
    var data1 = Parser.data(e).from("screener-link\">").to("</a>").build();
    var data2 = Parser.data(data1).from(">").to("<").build();
    if (oticker == "") oticker = ticker;
    if (ticker != oticker) {
      temp.splice(1, 0, oticker);
      res.push(temp);
      temp = [];
      oticker = ticker;
      temp.push(data1);
    } else {
      if (!~(data2 || data1).indexOf('<')) temp.push(data2 || data1);
    }
  });
  var ss = SpreadsheetApp.getActiveSheet();
  ss.getRange(ss.getLastRow() + 1, 1, res.length, res[0].length).setValues(res);
}

Result :

这篇关于使用谷歌应用程序脚本刮表的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆