CasperJS循环或遍历多个网页? [英] CasperJS loop or iterate through multiple web pages?

查看:143
本文介绍了CasperJS循环或遍历多个网页?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我有一个CasperJS脚本,可以从一个网页中删除评分和日期。现在我想从同一个网站下的多个页面上抓取相同的数据。我怎样才能通过给出这个代码循环不同的子页面:

  var ratings = []; 
var dates = [];
var casper = require('casper')。create({

pageSettings:{
loadImages:false,
loadPlugins:false
},
logLevel:debug,
verbose:true
});

var fs = require('fs');
$ b $ function getRatings(){
var ratings = document.querySelectorAll('#BVRRRatingOverall_Review_Display> div.BVRRRatingNormalImage> img');
return Array.prototype.map.call(ratings,function(e){
return e.getAttribute('title');
});
}

函数getDate(){
var dates = document.querySelectorAll('#BVSubmissionPopupContainer> div.BVRRReviewDisplayStyle5Header> div.BVRRReviewDateContainer> span.BVRRValue.BVRRReviewDate' );

返回Array.prototype.map.call(日期,函数(e){

返回e.innerHTML;

});
}

casper.start('http://www.t-mobile.com/cell-phones/samsung-galaxy-s-5.html?bvrrp=9060/reviews/ product / 1 / 598aea53-16d0-4c12-b53a-105157092c52.htm',function(){

this.echo('hi');
});
$ b casper.then(function(){

ratings = this.evaluate(getRatings);
dates = this.evaluate(getDate);

this.echo(ratings);
});


casper.run(function(){

this.echo(ratings.length +'ratings ratings:');
$ b $对于(var i = 0; i ratings [i] = ratings [i] +':'+ dates [i];
dates [i] =' ';
}
this.echo(ratings);
var content = ratings;
$ b content = content.join(\ n);
$ b $ fs.write(C:/Users/Karan/Copy/tweesis/implementation/scraping/samsungratings.txt,content,'w');

this.echo( date.length +'dates found:')。exit();



});

任何帮助表示赞赏:

解决方案

由于存在一个下一页按钮,您可以使用它来递归遍历所有页面:

  function getRatingsAndWrite(){
ratings = casper.evaluate(getRatings);
日期= casper.evaluate(getDate);

casper.echo(ratings);
casper.echo(ratings.length +'ratings ratings:');

for(var i = 0; i< ratings.length; i ++){
ratings [i] = ratings [i] +':'+ dates [i];
dates [i] ='';
}
casper.echo(ratings);
var content = ratings;

content = content.join(\\\
);

fs.write(C:/Users/Karan/Copy/tweesis/implementation/scraping/samsungratings.txt,content,'a');

casper.echo(dates.length +'dates found:');

var nextLink =.BVRRPageLink.BVRRNextPage> a;
if(casper.visible(nextLink)){
casper.thenClick(nextLink);
casper.then(getRatingsAndWrite);
} else {
casper.echo(END)
}
}

casper.start('http://www.t- mobile.com/cell-phones/samsung-galaxy-s-5.html?bvrrp=9060/reviews/product/1/598aea53-16d0-4c12-b53a-105157092c52.htm');

casper.then(getRatingsAndWrite);

casper.run();

相关答案是答:CasperJS在按钮点击后解析下一页


I have a CasperJS script that scrapes ratings and dates from one webpage. Now I want to scrape the same data from multiple pages under the same website. How can I loop through the different subpages given this code:

var ratings = [];
var dates = [];
var casper = require('casper').create({

    pageSettings: {
        loadImages:  false,         
        loadPlugins: false          
    },
    logLevel: "debug",             
    verbose: true                   
});

var fs = require('fs');

function getRatings() {
    var ratings = document.querySelectorAll('#BVRRRatingOverall_Review_Display > div.BVRRRatingNormalImage > img');
    return Array.prototype.map.call(ratings, function(e) {
        return e.getAttribute('title');
    });
}

function getDate() {
    var dates = document.querySelectorAll('#BVSubmissionPopupContainer > div.BVRRReviewDisplayStyle5Header > div.BVRRReviewDateContainer > span.BVRRValue.BVRRReviewDate');

    return Array.prototype.map.call(dates, function(e) {

        return e.innerHTML;

    });
}

casper.start('http://www.t-mobile.com/cell-phones/samsung-galaxy-s-5.html?bvrrp=9060/reviews/product/1/598aea53-16d0-4c12-b53a-105157092c52.htm', function(){

    this.echo('hi');
});

casper.then(function() {

    ratings = this.evaluate(getRatings);
    dates = this.evaluate(getDate);

    this.echo(ratings);
});


casper.run(function() {

    this.echo(ratings.length + ' ratings found:');

     for(var i=0; i<ratings.length; i++){
        ratings[i] = ratings[i]+': '+dates[i];
        dates[i] = '';
     }
    this.echo(ratings);
    var content = ratings;

    content = content.join("\n");

    fs.write("C:/Users/Karan/Copy/tweesis/implementation/scraping/samsungratings.txt", content, 'w'); 

    this.echo(dates.length + ' dates found:').exit();



});

Any help is appreciated :)

解决方案

Since there exists a next page button, you can use it to traverse all pages recursively:

function getRatingsAndWrite(){
    ratings = casper.evaluate(getRatings);
    dates = casper.evaluate(getDate);

    casper.echo(ratings);
    casper.echo(ratings.length + ' ratings found:');

    for(var i=0; i<ratings.length; i++){
        ratings[i] = ratings[i]+': '+dates[i];
        dates[i] = '';
    }
    casper.echo(ratings);
    var content = ratings;

    content = content.join("\n");

    fs.write("C:/Users/Karan/Copy/tweesis/implementation/scraping/samsungratings.txt", content, 'a'); 

    casper.echo(dates.length + ' dates found:');

    var nextLink = ".BVRRPageLink.BVRRNextPage > a";
    if (casper.visible(nextLink)) {
        casper.thenClick(nextLink);
        casper.then(getRatingsAndWrite);
    } else {
        casper.echo("END")
    }
}

casper.start('http://www.t-mobile.com/cell-phones/samsung-galaxy-s-5.html?bvrrp=9060/reviews/product/1/598aea53-16d0-4c12-b53a-105157092c52.htm');

casper.then(getRatingsAndWrite);

casper.run();

A related answer is A: CasperJS parse next page after button click.

这篇关于CasperJS循环或遍历多个网页?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆