python - 爬虫返回状态码200但登录失败可能是什么原因

查看:1133
本文介绍了python - 爬虫返回状态码200但登录失败可能是什么原因的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

问 题

使用request用手机号的方式模拟登录知乎,返回码是200 ,但不能成功登录
顺便说一下这个_xsrf是提交表单时的一个动态数据,我已获取

# -*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup
def get_headers(headers):
    list = [str(i) for i in headers.split('\n')]
    list2 = []
    for i in list:
        list3 = i.split(':',1)
        list2.append(list3)
    dict1 = dict(list2)
    return dict1
if __name__ == '__main__':
    url = 'https://www.zhihu.com/signin?next=/'
    dict_data = {'accont': 'phone_num','password':'123456'}
    header = '''Accept:*/*
Accept-Encoding:gzip, deflate, br
Accept-Language:zh-CN,zh;q=0.8
Connection:keep-alive
Content-Type:application/x-www-form-urlencoded; charset=UTF-8
Cookie:d_c0="ACBC3UKxzwuPTshbFfLKRqjjMT0oWXgblAs=|1495696563"; _zap=437b6b27-f1ea-4408-80d7-5c0d01679561; q_c1=969379bf218041c8b0c146155605b388|1500691530000|1491210336000; q_c1=969379bf218041c8b0c146155605b388|1500691530000|1491210336000; aliyungf_tc=AQAAAMvycmzCfAMAXQ0sdy5m9cunAewC; capsion_ticket="2|1:0|10:1503046792|14:capsion_ticket|44:OTgxMmQ1ZWYxMzk1NDAwNzk2YTBmMTZkOTVmZmFmMDE=|886f268c2406b6999cf54f9de1306acec57e5c561a6e0a236e250960ba28f687"; _xsrf=e4f6faad-ed17-446a-8204-4cb57102e017; l_cap_id="NDJjZWY5MmQ5ZTFiNGNjZDhjZjUzMmQ1NmJiYjhlOWQ=|1503058417|f58fafeb509b28e04a3da1de781d4fb5ad9c935d"; r_cap_id="MmRiNmU0MGZhMWQ3NDYzOGE1NDU5NDU4ZDZlMzk0Y2Y=|1503058417|7a376b5ad222e073b75dd75d355114b864830b48"; cap_id="YzRmNzUzMzFjZjA0NGU4YTgwZjY0MDUzMzQwMzdkYTg=|1503058417|c113ad690d27d1443d711a988fee45ae86e450bb"; __utma=51854390.1774224497.1495696572.1503043822.1503058418.10; __utmb=51854390.0.10.1503058418; __utmc=51854390; __utmz=51854390.1503058418.10.8.utmcsr=zhihu.com|utmccn=(referral)|utmcmd=referral|utmcct=/; __utmv=51854390.000--|2=registration_date=20151025=1^3=entry_date=20170403=1
Host:www.zhihu.com
Origin:https://www.zhihu.com
Referer:https://www.zhihu.com/
User-Agent:Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Mobile Safari/537.36
X-Requested-With:XMLHttpRequest
X-Xsrftoken:e4f6faad-ed17-446a-8204-4cb57102e017'''
    headers = get_headers(header)
    r = requests.get(url, headers = headers)
    soup = BeautifulSoup(r.text,'lxml')
    _xsrf = soup.find_all('input',attrs = {'name' : '_xsrf'})[0]['value']
    dict_data['_xsrf'] = _xsrf
    headers['X-Xsrftoken'] = _xsrf
    url2 = 'https://www.zhihu.com/login/phone_num'
    raw_input()
    re = requests.post(url = url2, headers = headers, data = dict_data)
    print re.status_code
    print re.content
    print re.json()['msg']
    #the_page = r.text # we use the read method the read the file wo fetche from the URL.[@name="_xsrf"]/@vaule
    #print the_page


错误信息如下

200
{
    "r": 1,
    "errcode": 100030,
    
    "data": {"account":"\u767b\u5f55\u8fc7\u4e8e\u9891\u7e41\uff0c\u8bf7\u7a0d\u540e\u91cd\u8bd5"},
    
    
    "msg": "\u767b\u5f55\u8fc7\u4e8e\u9891\u7e41\uff0c\u8bf7\u7a0d\u540e\u91cd\u8bd5"
    
}
登录过于频繁,请稍后重试

我在浏览器上模拟时我的账号可以登录,并且我一个程序只发了一个登录请求,为什么会是登录频繁呢,求dalao分析一波。header我反复修改,似乎都没有用处。不知道该怎么搞了。

解决方案

你先把头写得跟浏览器一模一样再说吧。


知乎的登录,除了验证码,没有其它搞怪的机制的。

# -*- coding: utf-8 -*-

import json
import urllib
import re
from tornado.httpclient import HTTPClient

EMAIL = ''
PASS = ''
LOGIN_URL = 'https://www.zhihu.com/signin?next=/'
CAPTCHA_URL = 'https://www.zhihu.com/captcha.gif?type=login&lang=cn'
ACTION_URL = 'https://www.zhihu.com/login/email'


def main():
    response = HTTPClient().fetch(LOGIN_URL)
    cookie = '; '.join(x.split(';', 1)[0] for x in response.headers.get_list('Set-Cookie'))
    xsrf = re.findall('name="_xsrf" value="(.*?)"', response.body)[0]

    response = HTTPClient().fetch(CAPTCHA_URL, headers={'Cookie': cookie})
    with open('/home/zys/temp/a.gif', 'w') as f:
        f.write(response.body)

    points = []
    while 1:
        s = raw_input('input a point [exit with X]')
        if(s.strip() == 'X'):
            break
        points.append([float(x) for x in s.strip().split(',')])

    captcha = {
        'img_size': [400, 88],
        'input_points': points
    }

    p = {
        '_xsrf': xsrf,
        'password': PASS,
        'email': EMAIL,
        'captcha_type': 'cn',
        'captcha': json.dumps(captcha),
    }

    headers = {
        'Cookie': cookie,
        'X-Xsrftoken': re.findall('_xsrf=(.*?)', cookie)[0],
    }

    response = HTTPClient().fetch(ACTION_URL, method='POST', body=urllib.urlencode(p), headers=headers)
    print json.loads(response.body)['msg']



if __name__ == '__main__':
    main()

这篇关于python - 爬虫返回状态码200但登录失败可能是什么原因的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
相关文章
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆