Python使用alernating变量下载图像 [英] Python download images with alernating variables

查看:182
本文介绍了Python使用alernating变量下载图像的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

  url_image =http:// www .joblo.com / timthumb.php?src = / posters / images / full /+ str(title_2)+ -  poster1.jpg& h = 333& w = 225

user_agent = Mozilla / 5.0(Windows NT 6.1; Win64; x64)'
headers = {'User-Agent':user_agent}
req = urllib.request.Request(url_image,None,headers)


print(url_image)
#image,h = urllib.request.urlretrieve(url_image)
with urllib.request.urlopen(req)as response:
the_page = response.read()

#print(the_page)


with open('poster.jpg','wb')as f:
f.write(the_page)

追溯(最近呼叫最后):
文件 C:\Users\luke\Desktop\scraper\imager finder.py,第97行,
与urllib.request.urlopen(req)作为响应:
文件C: \Users\luke\AppData\Local\Programs\P ython\Python35-32\lib\urllib\request.py,第162行,urlopen
return opener.open(url,data,timeout)
文件C:\Users \luke\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py,第465行,打开
response = self._open(req,数据)
文件C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py,第483行_open
'_open',req)
文件C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\urllib\ request.py,第443行,_call_chain
result = func(* args)
文件C:\Users\luke\AppData\Local\Programs\Python\Python35 -32\lib\urllib\request.py,第1268行,在http_open
中返回self.do_open(http.client.HTTPConnection,req)
文件C:\Users\ luke\AppData \\ Local\Programs\Python\Python35-32\lib\urllib\request.py,第1243行,do_open
r = h.getresponse()
文件C:\\ \\ Users\luke\AppData\Local\Programs\Python\Python35-32\lib\http\client.py,第1174行,getresponse
response.begin()
文件C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\http\client.py,行282,开始
版本,status,reason = self._read_status()
文件C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\http \client.py,第264行,_read_status
raise BadStatusLine(line)
http.client.BadStatusLine:

解决方案

我的建议是使用urlib2。此外,我写了一个很好的功能(我认为),如果服务器支持,它也将允许gzip编码(减少带宽)。我使用它来下载社交媒体文件,但应该适用于任何东西。



我会尝试调试你的代码,但由于它只是一个代码段(并且错误消息是格式不正确),很难准确地知道错误发生的位置(代码片段中肯定不是第97行)。



这不是那么短是,但它是清楚和可重用的。这是python 2.7,看起来你正在使用3 - 在这种情况下,你google一些其他问题,如何在python 3中使用urllib2。

  import urllib2 
import gzip
from StringIO import StringIO

def download(url):

下载并返回在URL中指定的文件;如果可能,尝试使用
gzip编码

request = urllib2.Request(url)
request.add_header('Accept-Encoding ','gzip')
try:
response = urllib2.urlopen(request)
除了异常,e:
raise IOError(%s(%s)%s %(_ERRORS [1],url,e))
payload = response.read()
如果response.info()。get('Content-Encoding')=='gzip':
buf = StringIO(payload)
f = gzip.GzipFile(fileobj = buf)
payload = f.read()
返回有效负载

def save_media ,media):
file_handle = open(filename,wb)
file_handle.write(media)
file_handle.close()

title_2 =10-cloverfield-lane
media = download(http://www.joblo.com /timthumb.php?src=/posters/images/full/{}-poster1.jpg&h=333&w=225\".format(title_2))
save_media(poster.jpg,媒体)


I was trying to download images with url's that change but got an error.

url_image="http://www.joblo.com/timthumb.php?src=/posters/images/full/"+str(title_2)+"-poster1.jpg&h=333&w=225"

user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'
headers = {'User-Agent': user_agent}
req = urllib.request.Request(url_image, None, headers)


print(url_image)
#image, h = urllib.request.urlretrieve(url_image)
with urllib.request.urlopen(req) as response:
    the_page = response.read()

#print (the_page)


with open('poster.jpg', 'wb') as f:
    f.write(the_page)

Traceback (most recent call last): File "C:\Users\luke\Desktop\scraper\imager finder.py", line 97, in with urllib.request.urlopen(req) as response: File "C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 162, in urlopen return opener.open(url, data, timeout) File "C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 465, in open response = self._open(req, data) File "C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 483, in _open '_open', req) File "C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 443, in _call_chain result = func(*args) File "C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 1268, in http_open return self.do_open(http.client.HTTPConnection, req) File "C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\urllib\request.py", line 1243, in do_open r = h.getresponse() File "C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 1174, in getresponse response.begin() File "C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 282, in begin version, status, reason = self._read_status() File "C:\Users\luke\AppData\Local\Programs\Python\Python35-32\lib\http\client.py", line 264, in _read_status raise BadStatusLine(line) http.client.BadStatusLine:

解决方案

My advice is to use urlib2. In addition, I've written a nice function (I think) that will also allow gzip encoding (reduce bandwidth) if the server supports it. I use this for downloading social media files, but should work for anything.

I would try to debug your code, but since it's just a snippet (and the error messages are formatted badly), it's hard to know exactly where your error is occurring (it's certainly not line 97 in your code snippet).

This isn't as short as it could be, but it's clear and reusable. This is python 2.7, it looks like you're using 3 - in which case you google some other questions that address how to use urllib2 in python 3.

import urllib2
import gzip
from StringIO import StringIO

def download(url):
    """
    Download and return the file specified in the URL; attempt to use
    gzip encoding if possible.
    """
    request = urllib2.Request(url)
    request.add_header('Accept-Encoding', 'gzip')
    try:
        response = urllib2.urlopen(request)
    except Exception, e:
        raise IOError("%s(%s) %s" % (_ERRORS[1], url, e))
    payload = response.read()
    if response.info().get('Content-Encoding') == 'gzip':
        buf = StringIO(payload)
        f = gzip.GzipFile(fileobj=buf)
        payload = f.read()
    return payload

def save_media(filename, media):
    file_handle = open(filename, "wb")
    file_handle.write(media)
    file_handle.close()

title_2 = "10-cloverfield-lane"
media = download("http://www.joblo.com/timthumb.php?src=/posters/images/full/{}-poster1.jpg&h=333&w=225".format(title_2))
save_media("poster.jpg", media)

这篇关于Python使用alernating变量下载图像的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆