Phantomjs page.content没有检索页面内容 [英] Phantomjs page.content isn't retrieving the page content
问题描述
我用Phantomjs刮使用JavaScript和Ajax加载动态内容的网站。
我有以下的code:
VAR页=要求(网页)创建()。
page.onError =功能(味精,跟踪){
VAR msgStack = ['错误:+味精]。
如果(跟踪和放大器;&安培; trace.length){
msgStack.push(TRACE:);
trace.forEach(函数(T){
msgStack.push(' - >'+ t.file +:+ t.line +(t.function'(在函数+ t.function +)':''));
});
}
console.error(msgStack.join('\ N'));
};
page.onConsoleMessage =功能(味精,LINENUM,的sourceID){
的console.log('控制台:+味精+'(从行#+ LINENUM +中的'+的sourceID +)');
};
page.open('http://www.betexplorer.com/soccer/germany/oberliga-bayern-sud/wolfratshausen-unterhaching-ii/x8rBMAB8/',功能 () {
执行console.log(page.content);
phantom.exit();
});
现在的问题是,这个code没有检索源$ C $ CI希望。
如果输入通过Web浏览器(如铬)的URL并阅读页面的源代码code(动态源$ C $ c时,JavaScript和Ajax调用作了之后),你会看到Web浏览器来源$ C $ c和Phantomjs源$ C $ c是完全不一样的。照片
但在这种情况下,我需要Web浏览器的源$ C $ C。
通常这Phantomjs code检索源$ C $ CI的需要,但在这个网址(任何其他许多人)的情况下Phantomjs不检索正确的源$ C $ C。
我认为Phantomjs不知道如何处理JavaScript和Ajax调用加载动态内容到这个网页。
当我运行code我得到这些错误:
错误:类型错误:未定义不是一个函数(评估功能(E){
this.pointer.x = e.pageX;
this.pointer.y = e.pageY;
} .bind(本))
跟踪:
- > http://www.betexplorer.com/gres/tooltip.js?serial=1410131213:207
- > http://www.betexplorer.com/gres/tooltip.js?serial=1410131213:157
- > http://www.betexplorer.com/gres/tooltip.js?serial=1410131213:310(在功能上提示)
- > http://www.betexplorer.com/soccer/germany/oberliga-bayern-sud/wolfratshausen-unterhaching-ii/x8rBMAB8/: 291
- > http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js:2
- > http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js:2
- > http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js:2
- > http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js:2
控制台:无效的应用程序ID:必须是数字或再presenting应用程序ID数字串。 (从线#undefined在不确定)
控制台:FB.getLoginStatus()调用FB.init前称为()。 (从线#undefined在不确定)
让我怎么得到这个页面的动力之源$ C $ C(<一href="http://www.betexplorer.com/soccer/germany/oberliga-bayern-sud/wolfratshausen-unterhaching-ii/x8rBMAB8/" rel="nofollow">http://www.betexplorer.com/soccer/germany/oberliga-bayern-sud/wolfratshausen-unterhaching-ii/x8rBMAB8/)使用Phantomjs?
由于该页面是动态生成的,你需要等待一点点,然后才能访问预定页面的源代码。
<$p$p><$c$c>page.open('http://www.betexplorer.com/soccer/germany/oberliga-bayern-sud/wolfratshausen-unterhaching-ii/x8rBMAB8/',功能 () { 的setTimeout(函数(){ 执行console.log(page.content); phantom.exit(); },5000); // 5秒应该足够 });
在类型错误:未定义不是一个函数
引用绑定
,因为PhantomJS 1.x中没有按'科技支撑它。 PhantomJS 1.x使用QtWebKit的一个老叉这与Chrome浏览器13或Safari 5的新PhantomJS 2采用了新的引擎,它支持绑定
。如果仍然使用1.x版本,你需要添加一个垫片内 page.onInitialized
事件处理程序:
page.onInitialized =功能(){
page.evaluate(函数(){
VAR isFunction =功能(O){
返回的typeofØ=='功能';
};
VAR绑定,
片= [] .slice,
原= Function.prototype的,
featureMap;
featureMap = {
功能捆绑':'绑定'
};
函数(功能){
VAR道具= featureMap [功能]
返回isFunction(原[丙]);
}
//检查缺少的功能
如果(!有(功能捆绑')){
//改编自Mozilla开发者网络的例子在
// https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/Function/bind
绑定=功能绑定(OBJ){
变参= slice.call(参数1)
自=对此,
NOP =功能(){
},
势必=功能(){
返回self.apply(此的instanceof的nop此:(物镜|| {}),args.concat(slice.call(参数)));
};
nop.prototype = this.prototype || {}; //火狐哭有时,如果原型是未定义
bound.prototype =新的NOP();
返回绑定;
};
proto.bind =绑定;
}
});
};
这是我的回答两者这里。
I use Phantomjs to scrape websites that use JavaScript and Ajax to load dynamic content.
I have the following code:
var page = require('webpage').create();
page.onError = function(msg, trace) {
var msgStack = ['ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function +'")' : ''));
});
}
console.error(msgStack.join('\n'));
};
page.onConsoleMessage = function(msg, lineNum, sourceId) {
console.log('CONSOLE: ' + msg + ' (from line #' + lineNum + ' in "' + sourceId + '")');
};
page.open('http://www.betexplorer.com/soccer/germany/oberliga-bayern-sud/wolfratshausen-unterhaching-ii/x8rBMAB8/', function () {
console.log(page.content);
phantom.exit();
});
The problem is that this code doesn't retrieve the source code i want.
If you enter the URL through a web browser(like chrome) and read the source code(the dynamic source code, after the JavaScript and Ajax calls were made) of the page, you will see that the web browser source code and the Phantomjs source code are completely different.
But in this case i need the web browsers source code.
Usually this Phantomjs code retrieves the source code i need, but in the case of this url(any many others) Phantomjs does not retrieve the correct source code.
I assume Phantomjs doesn't know how to handle the JavaScript and Ajax calls that load dynamic content into this page.
I get these errors when i run the code:
ERROR: TypeError: 'undefined' is not a function (evaluating 'function(e){
this.pointer.x = e.pageX;
this.pointer.y = e.pageY;
}.bind(this)')
TRACE:
-> http://www.betexplorer.com/gres/tooltip.js?serial=1410131213: 207
-> http://www.betexplorer.com/gres/tooltip.js?serial=1410131213: 157
-> http://www.betexplorer.com/gres/tooltip.js?serial=1410131213: 310 (in function "tooltip")
-> http://www.betexplorer.com/soccer/germany/oberliga-bayern-sud/wolfratshausen-unterhaching-ii/x8rBMAB8/: 291
-> http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js: 2
-> http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js: 2
-> http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js: 2
-> http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js: 2
CONSOLE: Invalid App Id: Must be a number or numeric string representing the application id. (from line #undefined in "undefined")
CONSOLE: FB.getLoginStatus() called before calling FB.init(). (from line #undefined in "undefined")
So how do i get the dynamic source code of this page(http://www.betexplorer.com/soccer/germany/oberliga-bayern-sud/wolfratshausen-unterhaching-ii/x8rBMAB8/) using Phantomjs?
Since the page is dynamically generated, you need to wait a little before you can access the intended page source.
page.open('http://www.betexplorer.com/soccer/germany/oberliga-bayern-sud/wolfratshausen-unterhaching-ii/x8rBMAB8/', function () {
setTimeout(function(){
console.log(page.content);
phantom.exit();
}, 5000); // 5 sec should be enough
});
The TypeError: 'undefined' is not a function
refers to bind
, because PhantomJS 1.x doesn't support it. PhantomJS 1.x uses an old fork of QtWebkit which is comparable to Chrome 13 or Safari 5. The newer PhantomJS 2 uses a newer engine which supports bind
. If you still use version 1.x you need to add a shim inside of the page.onInitialized
event handler:
page.onInitialized = function(){
page.evaluate(function(){
var isFunction = function(o) {
return typeof o == 'function';
};
var bind,
slice = [].slice,
proto = Function.prototype,
featureMap;
featureMap = {
'function-bind': 'bind'
};
function has(feature) {
var prop = featureMap[feature];
return isFunction(proto[prop]);
}
// check for missing features
if (!has('function-bind')) {
// adapted from Mozilla Developer Network example at
// https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/Function/bind
bind = function bind(obj) {
var args = slice.call(arguments, 1),
self = this,
nop = function() {
},
bound = function() {
return self.apply(this instanceof nop ? this : (obj || {}), args.concat(slice.call(arguments)));
};
nop.prototype = this.prototype || {}; // Firefox cries sometimes if prototype is undefined
bound.prototype = new nop();
return bound;
};
proto.bind = bind;
}
});
};
Taken from my answer here.
这篇关于Phantomjs page.content没有检索页面内容的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!