如何从pycurl多卷曲请求获取响应主体 [英] How can I get the response body from pycurl multi curl requests
问题描述
执行curl multi请求时,除了空响应外,我什么也无法得到.没有引发任何异常,但是响应值没有内容(在以下代码段中进行了注释)
这是我的代码的简化版本:
from StringIO import StringIO
import pycurl
class CurlStream(object):
curl_count = 0
curl_storage = []
def __init__(self):
self.curl_multi = pycurl.CurlMulti()
def add_request(self, request, post_fields=None):
self.curl_count += 1
curl = self._create_curl(request, post_fields)
self.curl_multi.add_handle(curl)
def perform(self):
while self.curl_count:
while True:
response, self.curl_count = self.curl_multi.perform()
if response != pycurl.E_CALL_MULTI_PERFORM:
break
self.curl_multi.select(1.0)
def read_all(self):
for response in self.curl_storage:
print response.getvalue() # this does nothing --prints blank lines
def close(self):
self.curl_multi.close()
def _create_curl(self, request, post_fields):
curl = pycurl.Curl()
output = StringIO()
self.curl_storage.append(output)
curl.setopt(curl.URL, request)
curl.setopt(curl.WRITEFUNCTION, output.write)
curl.setopt(curl.TIMEOUT, 20)
return curl
def main():
curl_stream = CurlStream()
curl_stream.add_request('http://www.google.com')
curl_stream.add_request('http://www.example.com')
curl_stream.perform()
curl_stream.read_all()
curl_stream.close()
if __name__ == '__main__':
main()
我已经使用相同的选项发出了单个请求,而没有使用curl multi,它可以工作.
好,所以当我将_create_curl
方法更改为此(添加write_out
进行调试)时,我发现它起作用了:
def _create_curl(self, request, post_fields):
curl = pycurl.Curl()
curl.setopt(curl.URL, request)
curl.setopt(curl.WRITEFUNCTION, self.write_out)
curl.setopt(curl.TIMEOUT, 20)
# Below is the important bit, I am now adding each curl object to a list
self.curl_storage.append(curl)
return curl
def write_out(self, data):
print data
return len(data)
问题是当将curl对象添加到多卷曲对象时,我没有保留对单个curl对象的任何引用,因此它被自动关闭.
根据 pycurl文档中的curl close()方法:>
对应于libcurl中的curl_easy_cleanup.这个方法是 当Curl对象不再具有任何Curl对象时,由pycurl自动调用 引用,但也可以显式调用.
I am unable to get anything but empty responses when performing curl multi requests. No exceptions are thrown, but the response value has no content (commented in the below snippet)
Here's a simplified version of my code:
from StringIO import StringIO
import pycurl
class CurlStream(object):
curl_count = 0
curl_storage = []
def __init__(self):
self.curl_multi = pycurl.CurlMulti()
def add_request(self, request, post_fields=None):
self.curl_count += 1
curl = self._create_curl(request, post_fields)
self.curl_multi.add_handle(curl)
def perform(self):
while self.curl_count:
while True:
response, self.curl_count = self.curl_multi.perform()
if response != pycurl.E_CALL_MULTI_PERFORM:
break
self.curl_multi.select(1.0)
def read_all(self):
for response in self.curl_storage:
print response.getvalue() # this does nothing --prints blank lines
def close(self):
self.curl_multi.close()
def _create_curl(self, request, post_fields):
curl = pycurl.Curl()
output = StringIO()
self.curl_storage.append(output)
curl.setopt(curl.URL, request)
curl.setopt(curl.WRITEFUNCTION, output.write)
curl.setopt(curl.TIMEOUT, 20)
return curl
def main():
curl_stream = CurlStream()
curl_stream.add_request('http://www.google.com')
curl_stream.add_request('http://www.example.com')
curl_stream.perform()
curl_stream.read_all()
curl_stream.close()
if __name__ == '__main__':
main()
I have made single requests with the same options without using curl multi and it works.
Okay, so I found it worked when I changed the _create_curl
method to this (adding write_out
for debugging):
def _create_curl(self, request, post_fields):
curl = pycurl.Curl()
curl.setopt(curl.URL, request)
curl.setopt(curl.WRITEFUNCTION, self.write_out)
curl.setopt(curl.TIMEOUT, 20)
# Below is the important bit, I am now adding each curl object to a list
self.curl_storage.append(curl)
return curl
def write_out(self, data):
print data
return len(data)
The issue was when adding the curl object to the multicurl object, I hadn't kept any reference to the single curl object, so it was automagically closed.
According to the pycurl docs for the curl close() method:
Corresponds to curl_easy_cleanup in libcurl. This method is automatically called by pycurl when a Curl object no longer has any references to it, but can also be called explicitly.
这篇关于如何从pycurl多卷曲请求获取响应主体的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!