CasperJS内存不足 [英] CasperJS running out of memory

查看:77
本文介绍了CasperJS内存不足的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在用CasperJS运行以下脚本,并且在遍历数组的大约1/3后,它开始用尽交换空间,并且计算机变得非常慢。我在这里做什么错了?

I'm running the following script with CasperJS and after about 1/3rd of the way through the array it starts running out of swap space and the machine becomes extremely slow. What am i doing wrong here?

searchPages 是一个54个数字的数组,对应于搜索的URL值

searchPages is an array of 54 numbers corresponding to a URL value for a search page.

casper.each(searchPages,function(casper,index){
    loadSearch(casper,index);
});


function loadSearch(casper,index){
    var currentTime = new Date();
    var month = currentTime.getMonth() + 2;
    var day = currentTime.getDate();
    var year = currentTime.getFullYear();
    var dateStart = month + "/" + day + "/" + year;
    month = currentTime.getMonth() + 3;
    var dateEnd = month + "/" + day + "/" + year;

    casper.thenOpen(url,function(){
        var myfile = "data-"+year + "-" + month + "-" + day+".html";
        this.evaluate(function(j) {
            document.querySelector('select[name="searchParameters.localeId"]').selectedIndex = j;
        },index);
        this.evaluate(function(start) {
            $("#leaveDate").val(start);
        },dateStart);
        this.evaluate(function(end) {
            $("#returnDate").val(end);
        },dateEnd);
        this.evaluate(function() {
            $("#OSB_btn").click();
        });

        this.waitForSelector('#destinationForPackage', function() {
            if (this.exists('#destinationForPackage')){
                var name = casper.evaluate(function() {
                    return $("#destinationForPackage option[value='" + $("#destinationForPackage").val() + "']").text()
                });
                if (name != "Going To"){
                    if (name == null){
                        console.log("it's null");
                    }else{
                        name = name.replace("/","_");
                        casper.capture('Captures/Searches/search_' + name + '.jpg');
                        console.log("Capturing search_" + name);
                    }
                }
            }else{
                console.log("Still doesn't exist...retry");
                loadSearch(casper,index);
            }

        },function(){
            console.log("Search page timed-out.");  
        },20000);
    });
}

每个循环大约增加3GB。

And it adds about 3GB per loop.

推荐答案

事实证明这是一个PhantomJS的一个非常著名的问题。 3年以上是一个公开的错误,而且显然与QT Webkit有关。尽管如此,我仍然可以通过在循环期间关闭每个页面并重新打开新的Phantom页面来解决该问题。有点棘手的解决方法,但是内存消耗要少得多。但是,大约200页后,它仍然具有相当高的内存使用量(超过1GB)。因此,我将脚本分成200个块,并在完成后开始下一个脚本。这是在不占用过多内存的情况下成功完成的成品。由于某些原因,它在MacOS上的使用量少于Windows。

Well turns out this is a very well-known issue with PhantomJS. 3+ years as an open bug and apparently it has something to do with QT Webkit. Nonetheless, i was able to solve it by closing each page during the loop and re-opening a new Phantom page. It's a bit of a hacky work-around, but the memory consumption is far less. However, after about 200 pages, it still has a pretty high memory usage (1GB+). So, i break up my scripts into blocks of 200 and just start the next one upon completion. Here is the finished product that completes successfully without too much memory usage. It uses less on MacOS than Windows for some reason.

casper.start(url,function(){
    this.echo('continuing captures...');
}).each(searchPages,function(casper,index){
    loadSearch(this,index);
});

function loadSearch(casper,index){
    var currentTime = new Date();
    var month = currentTime.getMonth() + 1;
    var day = currentTime.getDate() + 1;
    var year = currentTime.getFullYear();
    var dateStart = month + "/" + day + "/" + year;
    var fortnightAway = new Date(+new Date + 12096e5);
    var dateEnd = fortnightAway.getMonth() + 1 + "/" + fortnightAway.getDate() + "/" + fortnightAway.getFullYear();

    casper.page.close();
    casper.page = require('webpage').create();

    casper.thenOpen(url,function(){
        var myfile = "data-"+year + "-" + month + "-" + day+".html";
        this.evaluate(function(j) {
            document.querySelector('select[name="searchParameters.localeId"]').selectedIndex = j;
        },index);
        this.evaluate(function(start) {
            $("#leaveDate").val(start);
        },dateStart);
        this.evaluate(function(end) {
            $("#returnDate").val(end);
        },dateEnd);
        this.evaluate(function() {
            $("#OSB_btn").click();
        });
        this.waitForSelector('#destinationForPackage', function() {
            if (this.exists('#destinationForPackage')){
                var name = casper.evaluate(function() {
                    return $("#destinationForPackage option[value='" + $("#destinationForPackage").val() + "']").text()
                });
                if (name != "Going To"){
                    if (name == null){
                        console.log("it's null");
                    }else{
                        name = name.replace("/","_");
                        name = name.replace("/","_");
                        casper.capture('Captures/Searches/search_' + name + '.jpg');
                        console.log("Capturing search_" + name);
                    }
                }
            }else{
                console.log("Search failed to load. Retrying");
                loadSearch(casper,index);
            }

        },function(){
            console.log("Search page timed-out. Retrying");
            loadSearch(casper,index);
        },20000);
    });
}

这篇关于CasperJS内存不足的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆