无法在 PyQT4 中添加自定义请求标头 [英] Cannot add custom request headers in PyQT4
问题描述
我正在编写一个 python 程序,它使用 PyQT4 加载一些 URL 并处理它的内容/DOM(在它被 javascript 修改之后).我还需要自定义标题来请求该页面.
I am writing a python program which uses PyQT4 to load some URL and process it's contents/DOM (after it's being modified by javascript). I also need custom headers to request that page.
下面的代码可以正常工作,只是它无法使用我用 QNetworkRequest 定义的自定义标头获取 URL.
The code below is working except that it cannot fetch the URL using my custom headers i define with QNetworkRequest.
import sys
import signal
from optparse import OptionParser
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import QWebPage
from PyQt4.QtNetwork import QNetworkAccessManager, QNetworkRequest, QNetworkReply
class MyNetworkAccessManager(QNetworkAccessManager):
def __init__(self, url):
QNetworkAccessManager.__init__(self)
self.request = QNetworkRequest(QUrl(url))
self.request.setRawHeader('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US)')
self.request.setRawHeader("Accept-Language","en-us,en;q=0.5");
self.request.setRawHeader("Accept-Charset","ISO-8859-1,utf-8;q=0.7,*;q=0.7");
self.request.setRawHeader("Connection","keep-alive");
self.reply = self.get(self.request)
def createRequest(self, operation, request, data):
print "mymanager handles ", request.url()
return QNetworkAccessManager.createRequest( self, operation, request, data )
class Crawler( QWebPage ):
def __init__(self, url, file):
QWebPage.__init__( self )
self._url = url
self._file = file
manager = MyNetworkAccessManager(url)
self.setNetworkAccessManager(manager)
def userAgentForUrl(self, url):
return "Mozilla/122.0 (X11; Linux x86_64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1"
def crawl( self ):
signal.signal( signal.SIGINT, signal.SIG_DFL )
self.connect( self, SIGNAL( 'loadFinished(bool)' ), self._finished_loading )
self.mainFrame().load( QUrl( self._url ) )
def _finished_loading( self, result ):
file = open( self._file, 'w' )
file.write( self.mainFrame().toHtml() )
file.close()
sys.exit( 0 )
def main():
app = QApplication( sys.argv )
options = get_cmd_options()
crawler = Crawler( options.url, options.file )
crawler.crawl()
sys.exit( app.exec_() )
def get_cmd_options():
"""
gets and validates the input from the command line
"""
usage = "usage: %prog [options] args"
parser = OptionParser(usage)
parser.add_option('-u', '--url', dest = 'url', help = 'URL to fetch data from')
parser.add_option('-f', '--file', dest = 'file', help = 'Local file path to save data to')
(options,args) = parser.parse_args()
if not options.url:
print 'You must specify an URL.',sys.argv[0],'--help for more details'
exit(1)
if not options.file:
print 'You must specify a destination file.',sys.argv[0],'--help for more details'
exit(1)
return options
if __name__ == '__main__':
main()
谁能告诉我为什么不选择标题设置?
Could please anyone tell me why it isn't picking the header settings ?
推荐答案
将 setRawHeader
移动到 createRequest
函数中,它就会起作用.您可以在此处发送测试请求.
Move the setRawHeader
inside createRequest
function and it will work. You can send a request here for testing.
def __init__(self, url):
QNetworkAccessManager.__init__(self)
request = QNetworkRequest(QUrl(url))
self.reply = self.get(request)
def createRequest(self, operation, request, data):
print("mymanager handles ", request.url())
request.setRawHeader('User-Agent', 'Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101')
request.setRawHeader("Accept-Language","en-us,en;q=0.5");
request.setRawHeader("Accept-Charset","ISO-8859-1,utf-8;q=0.7,*;q=0.7");
request.setRawHeader("Connection","keep-alive");
return QNetworkAccessManager.createRequest( self, operation, request, data )
注意我也把User-agent
改成了User-Agent
这篇关于无法在 PyQT4 中添加自定义请求标头的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!