如何从Kickstarter网页上抓取所有图片网址? [英] How to scrape all the image urls from a Kickstarter webpage?

查看:70
本文介绍了如何从Kickstarter网页上抓取所有图片网址?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我要从此

I want to scrape all the image urls from this Kickstarter webpage, but the following code does not give all the images:

url = 'https://www.kickstarter.com/projects/1878352656/sleep-yoga-go-travel-pillow?ref=category_newest'
page = requests.get(url)
soup = BeautifulSoup(page.text, 'html.parser')
x = soup.select('img[src^="https://ksr-ugc.imgix.net/assets/"]')
print(x)

img_links = []
for img in x:
    img_links.append(img['src'])
    
for l in img_links:
    print(l)

推荐答案

import requests
from bs4 import BeautifulSoup

data = [
    {
        "operationName": "Campaign",
        "query": "query Campaign($slug: String!) {\n  project(slug: $slug) {\n    id\n    isSharingProjectBudget\n    risks\n    story(assetWidth: 680)\n    currency\n    spreadsheet {\n      displayMode\n      public\n      url\n      data {\n        name\n        value\n        phase\n        rowNum\n        __typename\n      }\n      dataLastUpdatedAt\n      __typename\n    }\n    environmentalCommitments {\n      id\n      commitmentCategory\n      description\n      __typename\n    }\n    __typename\n  }\n}\n",
        "variables": {
            "slug": "1878352656/sleep-yoga-go-travel-pillow"
        }
    }
]
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0"
}


def main(url):
    with requests.Session() as req:
        r = req.get(url[:27])
        soup = BeautifulSoup(r.content, 'html.parser')
        headers['X-CSRF-Token'] = soup.select_one(
            "meta[name=csrf-token]")['content']
        req.headers.update(headers)
        r = req.post(url, json=data, headers=headers).json()
        goal = r[0]['data']['project']['story']
        soup = BeautifulSoup(goal, 'html.parser')
        hey = [x['data-src'] for x in soup.findAll("img", {'data-src': True})]
        print(hey)


main("https://www.kickstarter.com/graph")

输出:

['https://ksr-ugc.imgix.net/assets/018/947/295/e28df5848b46dd364b0ccf7f08874ed1_original.png?ixlib=rb-2.1.0&w=680&fit=max&v=1509125786&auto=format&frame=1&lossless=true&s=aa182d32433644ed4b67536f9249b9a4', 'https://ksr-ugc.imgix.net/assets/019/532/467/0921999530e580a28726d31817e89219_original.JPG?ixlib=rb-2.1.0&w=680&fit=max&v=1512690440&auto=format&frame=1&q=92&s=12289756eee63bd8229e43d3fdb697e2', 'https://ksr-ugc.imgix.net/assets/018/950/941/145ffd2dcc872e18c0bb7f62a74f0ac9_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1509143419&auto=format&frame=1&q=92&s=312107a6ab70eb9b8d274d8e9536d759', 'https://ksr-ugc.imgix.net/assets/019/532/475/e5a27a164a960efdf14be1dfc3d937a8_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1512690498&auto=format&frame=1&q=92&s=c84f7614d7d26a16b71cb0a78eb5b964', 'https://ksr-ugc.imgix.net/assets/019/532/479/a1892bbea9c95e10abdb71bc3db3a18f_original.JPG?ixlib=rb-2.1.0&w=680&fit=max&v=1512690515&auto=format&frame=1&q=92&s=6a7efea040c55c59f084bea7ddf49713', 'https://ksr-ugc.imgix.net/assets/019/532/484/5eea3d7d665f9a28607615fb7e76520d_original.JPG?ixlib=rb-2.1.0&w=680&fit=max&v=1512690539&auto=format&frame=1&q=92&s=01323684393c3a32d89827f7d17034d3', 'https://ksr-ugc.imgix.net/assets/019/532/486/be1c40c7e1d2bba356e34c78e0a6cebb_original.JPG?ixlib=rb-2.1.0&w=680&fit=max&v=1512690553&auto=format&frame=1&q=92&s=0413e46ca9ffd51686188ea3aa496dc2', 'https://ksr-ugc.imgix.net/assets/018/915/460/61dba7d4eee548c30826c4ac1e7c1adf_original.JPG?ixlib=rb-2.1.0&w=680&fit=max&v=1508953589&auto=format&frame=1&q=92&s=ee0206f737d2a5dc987a7a8d7c5c0181', 'https://ksr-ugc.imgix.net/assets/019/532/556/67874ab9e61b5a62111bb8ffd5a86ec7_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1512690990&auto=format&frame=1&q=92&s=1a32cc86ea8356fb8f773a9946db1078', 'https://ksr-ugc.imgix.net/assets/018/916/228/07a65eaae32a1f35a2fc010368808597_original.gif?ixlib=rb-2.1.0&w=680&fit=max&v=1508956933&auto=format&gif-q=50&q=92&s=629409cb25ff98768e8244a855cd6537', 'https://ksr-ugc.imgix.net/assets/018/951/269/ae4a074d834e1017594ea570061f8693_original.gif?ixlib=rb-2.1.0&w=680&fit=max&v=1509145394&auto=format&gif-q=50&q=92&s=ba37411b148e6aaf68ec15b12bb08b4a', 'https://ksr-ugc.imgix.net/assets/019/532/499/8108b7c345be49d1a7f24c9a808883ea_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1512690652&auto=format&frame=1&q=92&s=7e706e7c22a89c177e7269e360219d6d', 'https://ksr-ugc.imgix.net/assets/019/532/503/9d14734ab1a118f333abc8c726be25dd_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1512690688&auto=format&frame=1&q=92&s=ebac74715b62fa63baa97348f555db39', 'https://ksr-ugc.imgix.net/assets/019/020/550/d03e197225cc7dff72a8b0781d971e2b_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1509562418&auto=format&frame=1&q=92&s=3dacbcc7501b93325aa5f3b96f4dd6bb', 'https://ksr-ugc.imgix.net/assets/018/984/033/1690b96aa7cf9c8a82d546959f76078e_original.JPG?ixlib=rb-2.1.0&w=680&fit=max&v=1509389255&auto=format&frame=1&q=92&s=c261edca6ff33ca4932692ec77b630a8', 'https://ksr-ugc.imgix.net/assets/019/532/505/989aa88cd062029b6d952f08418fcd79_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1512690721&auto=format&frame=1&q=92&s=5e61d60f95fec622497779fa84cce01e', 'https://ksr-ugc.imgix.net/assets/019/532/507/a4d83eac4c0f53d893e23ec163842c5b_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1512690737&auto=format&frame=1&q=92&s=fffba5b95e026d8727290e893191fa83', 'https://ksr-ugc.imgix.net/assets/019/023/799/c5619d8153edaa43789d5912a0c875ee_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1509574534&auto=format&frame=1&q=92&s=e7e0efcd372067e2b9a46d7b9dd5b195', 'https://ksr-ugc.imgix.net/assets/018/983/735/83c71def243879b687956cbf8b806b14_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1509388186&auto=format&frame=1&q=92&s=5e454a646cd64af594c00730b7b4c87a', 'https://ksr-ugc.imgix.net/assets/019/024/228/747a531dedc814049501a75141766164_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1509576227&auto=format&frame=1&q=92&s=faa3f0ec1fa32d4d4dd53835fb098f09', 'https://ksr-ugc.imgix.net/assets/019/042/081/530fbfc825cbc110df875ec0524feacc_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1509667007&auto=format&frame=1&q=92&s=4bf397a28271ec71ec1aa7f635cdb8f0', 'https://ksr-ugc.imgix.net/assets/018/920/236/d940db3bb0d32e4d4e2584a35d719993_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1508974842&auto=format&frame=1&q=92&s=0c4df9694d53562e9bd3b9cff38f53bf', 'https://ksr-ugc.imgix.net/assets/018/951/628/2f16dddba5c345c618af882cd1d3283c_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1509147803&auto=format&frame=1&q=92&s=03ab16df99ca9cdc7473d85370ce8616', 'https://ksr-ugc.imgix.net/assets/018/920/245/6d4cebd599b4c5b908139587b1888263_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1508974879&auto=format&frame=1&q=92&s=fdadb27846cdd0cd722a5a3f209b008c', 'https://ksr-ugc.imgix.net/assets/019/023/916/444a8d28593f4c013d99e225280dd00e_original.jpg?ixlib=rb-2.1.0&w=680&fit=max&v=1509574972&auto=format&frame=1&q=92&s=6b1ccf939b3bba80402a618087173f7a']

这篇关于如何从Kickstarter网页上抓取所有图片网址?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆