CasperJS内存不足 [英] CasperJS running out of memory
问题描述
我正在用CasperJS运行以下脚本,并且在遍历数组的大约1/3后,它开始用尽交换空间,并且计算机变得非常慢。我在这里做什么错了?
I'm running the following script with CasperJS and after about 1/3rd of the way through the array it starts running out of swap space and the machine becomes extremely slow. What am i doing wrong here?
searchPages
是一个54个数字的数组,对应于搜索的URL值
searchPages
is an array of 54 numbers corresponding to a URL value for a search page.
casper.each(searchPages,function(casper,index){
loadSearch(casper,index);
});
function loadSearch(casper,index){
var currentTime = new Date();
var month = currentTime.getMonth() + 2;
var day = currentTime.getDate();
var year = currentTime.getFullYear();
var dateStart = month + "/" + day + "/" + year;
month = currentTime.getMonth() + 3;
var dateEnd = month + "/" + day + "/" + year;
casper.thenOpen(url,function(){
var myfile = "data-"+year + "-" + month + "-" + day+".html";
this.evaluate(function(j) {
document.querySelector('select[name="searchParameters.localeId"]').selectedIndex = j;
},index);
this.evaluate(function(start) {
$("#leaveDate").val(start);
},dateStart);
this.evaluate(function(end) {
$("#returnDate").val(end);
},dateEnd);
this.evaluate(function() {
$("#OSB_btn").click();
});
this.waitForSelector('#destinationForPackage', function() {
if (this.exists('#destinationForPackage')){
var name = casper.evaluate(function() {
return $("#destinationForPackage option[value='" + $("#destinationForPackage").val() + "']").text()
});
if (name != "Going To"){
if (name == null){
console.log("it's null");
}else{
name = name.replace("/","_");
casper.capture('Captures/Searches/search_' + name + '.jpg');
console.log("Capturing search_" + name);
}
}
}else{
console.log("Still doesn't exist...retry");
loadSearch(casper,index);
}
},function(){
console.log("Search page timed-out.");
},20000);
});
}
每个循环大约增加3GB。
And it adds about 3GB per loop.
推荐答案
事实证明这是一个PhantomJS的一个非常著名的问题。 3年以上是一个公开的错误,而且显然与QT Webkit有关。尽管如此,我仍然可以通过在循环期间关闭每个页面并重新打开新的Phantom页面来解决该问题。有点棘手的解决方法,但是内存消耗要少得多。但是,大约200页后,它仍然具有相当高的内存使用量(超过1GB)。因此,我将脚本分成200个块,并在完成后开始下一个脚本。这是在不占用过多内存的情况下成功完成的成品。由于某些原因,它在MacOS上的使用量少于Windows。
Well turns out this is a very well-known issue with PhantomJS. 3+ years as an open bug and apparently it has something to do with QT Webkit. Nonetheless, i was able to solve it by closing each page during the loop and re-opening a new Phantom page. It's a bit of a hacky work-around, but the memory consumption is far less. However, after about 200 pages, it still has a pretty high memory usage (1GB+). So, i break up my scripts into blocks of 200 and just start the next one upon completion. Here is the finished product that completes successfully without too much memory usage. It uses less on MacOS than Windows for some reason.
casper.start(url,function(){
this.echo('continuing captures...');
}).each(searchPages,function(casper,index){
loadSearch(this,index);
});
function loadSearch(casper,index){
var currentTime = new Date();
var month = currentTime.getMonth() + 1;
var day = currentTime.getDate() + 1;
var year = currentTime.getFullYear();
var dateStart = month + "/" + day + "/" + year;
var fortnightAway = new Date(+new Date + 12096e5);
var dateEnd = fortnightAway.getMonth() + 1 + "/" + fortnightAway.getDate() + "/" + fortnightAway.getFullYear();
casper.page.close();
casper.page = require('webpage').create();
casper.thenOpen(url,function(){
var myfile = "data-"+year + "-" + month + "-" + day+".html";
this.evaluate(function(j) {
document.querySelector('select[name="searchParameters.localeId"]').selectedIndex = j;
},index);
this.evaluate(function(start) {
$("#leaveDate").val(start);
},dateStart);
this.evaluate(function(end) {
$("#returnDate").val(end);
},dateEnd);
this.evaluate(function() {
$("#OSB_btn").click();
});
this.waitForSelector('#destinationForPackage', function() {
if (this.exists('#destinationForPackage')){
var name = casper.evaluate(function() {
return $("#destinationForPackage option[value='" + $("#destinationForPackage").val() + "']").text()
});
if (name != "Going To"){
if (name == null){
console.log("it's null");
}else{
name = name.replace("/","_");
name = name.replace("/","_");
casper.capture('Captures/Searches/search_' + name + '.jpg');
console.log("Capturing search_" + name);
}
}
}else{
console.log("Search failed to load. Retrying");
loadSearch(casper,index);
}
},function(){
console.log("Search page timed-out. Retrying");
loadSearch(casper,index);
},20000);
});
}
这篇关于CasperJS内存不足的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!