CasperJS循环或遍历多个网页? [英] CasperJS loop or iterate through multiple web pages?
本文介绍了CasperJS循环或遍历多个网页?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
var ratings = [];
var dates = [];
var casper = require('casper')。create({
pageSettings:{
loadImages:false,
loadPlugins:false
},
logLevel:debug,
verbose:true
});
var fs = require('fs');
$ b $ function getRatings(){
var ratings = document.querySelectorAll('#BVRRRatingOverall_Review_Display> div.BVRRRatingNormalImage> img');
return Array.prototype.map.call(ratings,function(e){
return e.getAttribute('title');
});
}
函数getDate(){
var dates = document.querySelectorAll('#BVSubmissionPopupContainer> div.BVRRReviewDisplayStyle5Header> div.BVRRReviewDateContainer> span.BVRRValue.BVRRReviewDate' );
返回Array.prototype.map.call(日期,函数(e){
返回e.innerHTML;
});
}
casper.start('http://www.t-mobile.com/cell-phones/samsung-galaxy-s-5.html?bvrrp=9060/reviews/ product / 1 / 598aea53-16d0-4c12-b53a-105157092c52.htm',function(){
this.echo('hi');
});
$ b casper.then(function(){
ratings = this.evaluate(getRatings);
dates = this.evaluate(getDate);
this.echo(ratings);
});
casper.run(function(){
this.echo(ratings.length +'ratings ratings:');
$ b $对于(var i = 0; i ratings [i] = ratings [i] +':'+ dates [i];
dates [i] =' ';
}
this.echo(ratings);
var content = ratings;
$ b content = content.join(\ n);
$ b $ fs.write(C:/Users/Karan/Copy/tweesis/implementation/scraping/samsungratings.txt,content,'w');
this.echo( date.length +'dates found:')。exit();
});
任何帮助表示赞赏:
解决方案
由于存在一个下一页按钮,您可以使用它来递归遍历所有页面:
function getRatingsAndWrite(){
ratings = casper.evaluate(getRatings);
日期= casper.evaluate(getDate);
casper.echo(ratings);
casper.echo(ratings.length +'ratings ratings:');
for(var i = 0; i< ratings.length; i ++){
ratings [i] = ratings [i] +':'+ dates [i];
dates [i] ='';
}
casper.echo(ratings);
var content = ratings;
content = content.join(\\\
);
fs.write(C:/Users/Karan/Copy/tweesis/implementation/scraping/samsungratings.txt,content,'a');
casper.echo(dates.length +'dates found:');
var nextLink =.BVRRPageLink.BVRRNextPage> a;
if(casper.visible(nextLink)){
casper.thenClick(nextLink);
casper.then(getRatingsAndWrite);
} else {
casper.echo(END)
}
}
casper.start('http://www.t- mobile.com/cell-phones/samsung-galaxy-s-5.html?bvrrp=9060/reviews/product/1/598aea53-16d0-4c12-b53a-105157092c52.htm');
casper.then(getRatingsAndWrite);
casper.run();
相关答案是答:CasperJS在按钮点击后解析下一页。
I have a CasperJS script that scrapes ratings and dates from one webpage. Now I want to scrape the same data from multiple pages under the same website. How can I loop through the different subpages given this code:
var ratings = [];
var dates = [];
var casper = require('casper').create({
pageSettings: {
loadImages: false,
loadPlugins: false
},
logLevel: "debug",
verbose: true
});
var fs = require('fs');
function getRatings() {
var ratings = document.querySelectorAll('#BVRRRatingOverall_Review_Display > div.BVRRRatingNormalImage > img');
return Array.prototype.map.call(ratings, function(e) {
return e.getAttribute('title');
});
}
function getDate() {
var dates = document.querySelectorAll('#BVSubmissionPopupContainer > div.BVRRReviewDisplayStyle5Header > div.BVRRReviewDateContainer > span.BVRRValue.BVRRReviewDate');
return Array.prototype.map.call(dates, function(e) {
return e.innerHTML;
});
}
casper.start('http://www.t-mobile.com/cell-phones/samsung-galaxy-s-5.html?bvrrp=9060/reviews/product/1/598aea53-16d0-4c12-b53a-105157092c52.htm', function(){
this.echo('hi');
});
casper.then(function() {
ratings = this.evaluate(getRatings);
dates = this.evaluate(getDate);
this.echo(ratings);
});
casper.run(function() {
this.echo(ratings.length + ' ratings found:');
for(var i=0; i<ratings.length; i++){
ratings[i] = ratings[i]+': '+dates[i];
dates[i] = '';
}
this.echo(ratings);
var content = ratings;
content = content.join("\n");
fs.write("C:/Users/Karan/Copy/tweesis/implementation/scraping/samsungratings.txt", content, 'w');
this.echo(dates.length + ' dates found:').exit();
});
Any help is appreciated :)
解决方案
Since there exists a next page button, you can use it to traverse all pages recursively:
function getRatingsAndWrite(){
ratings = casper.evaluate(getRatings);
dates = casper.evaluate(getDate);
casper.echo(ratings);
casper.echo(ratings.length + ' ratings found:');
for(var i=0; i<ratings.length; i++){
ratings[i] = ratings[i]+': '+dates[i];
dates[i] = '';
}
casper.echo(ratings);
var content = ratings;
content = content.join("\n");
fs.write("C:/Users/Karan/Copy/tweesis/implementation/scraping/samsungratings.txt", content, 'a');
casper.echo(dates.length + ' dates found:');
var nextLink = ".BVRRPageLink.BVRRNextPage > a";
if (casper.visible(nextLink)) {
casper.thenClick(nextLink);
casper.then(getRatingsAndWrite);
} else {
casper.echo("END")
}
}
casper.start('http://www.t-mobile.com/cell-phones/samsung-galaxy-s-5.html?bvrrp=9060/reviews/product/1/598aea53-16d0-4c12-b53a-105157092c52.htm');
casper.then(getRatingsAndWrite);
casper.run();
A related answer is A: CasperJS parse next page after button click.
这篇关于CasperJS循环或遍历多个网页?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!
查看全文