python - 爬虫返回状态码200但登录失败可能是什么原因
本文介绍了python - 爬虫返回状态码200但登录失败可能是什么原因的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
问 题
使用request用手机号的方式模拟登录知乎,返回码是200 ,但不能成功登录
顺便说一下这个_xsrf是提交表单时的一个动态数据,我已获取
# -*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup
def get_headers(headers):
list = [str(i) for i in headers.split('\n')]
list2 = []
for i in list:
list3 = i.split(':',1)
list2.append(list3)
dict1 = dict(list2)
return dict1
if __name__ == '__main__':
url = 'https://www.zhihu.com/signin?next=/'
dict_data = {'accont': 'phone_num','password':'123456'}
header = '''Accept:*/*
Accept-Encoding:gzip, deflate, br
Accept-Language:zh-CN,zh;q=0.8
Connection:keep-alive
Content-Type:application/x-www-form-urlencoded; charset=UTF-8
Cookie:d_c0="ACBC3UKxzwuPTshbFfLKRqjjMT0oWXgblAs=|1495696563"; _zap=437b6b27-f1ea-4408-80d7-5c0d01679561; q_c1=969379bf218041c8b0c146155605b388|1500691530000|1491210336000; q_c1=969379bf218041c8b0c146155605b388|1500691530000|1491210336000; aliyungf_tc=AQAAAMvycmzCfAMAXQ0sdy5m9cunAewC; capsion_ticket="2|1:0|10:1503046792|14:capsion_ticket|44:OTgxMmQ1ZWYxMzk1NDAwNzk2YTBmMTZkOTVmZmFmMDE=|886f268c2406b6999cf54f9de1306acec57e5c561a6e0a236e250960ba28f687"; _xsrf=e4f6faad-ed17-446a-8204-4cb57102e017; l_cap_id="NDJjZWY5MmQ5ZTFiNGNjZDhjZjUzMmQ1NmJiYjhlOWQ=|1503058417|f58fafeb509b28e04a3da1de781d4fb5ad9c935d"; r_cap_id="MmRiNmU0MGZhMWQ3NDYzOGE1NDU5NDU4ZDZlMzk0Y2Y=|1503058417|7a376b5ad222e073b75dd75d355114b864830b48"; cap_id="YzRmNzUzMzFjZjA0NGU4YTgwZjY0MDUzMzQwMzdkYTg=|1503058417|c113ad690d27d1443d711a988fee45ae86e450bb"; __utma=51854390.1774224497.1495696572.1503043822.1503058418.10; __utmb=51854390.0.10.1503058418; __utmc=51854390; __utmz=51854390.1503058418.10.8.utmcsr=zhihu.com|utmccn=(referral)|utmcmd=referral|utmcct=/; __utmv=51854390.000--|2=registration_date=20151025=1^3=entry_date=20170403=1
Host:www.zhihu.com
Origin:https://www.zhihu.com
Referer:https://www.zhihu.com/
User-Agent:Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Mobile Safari/537.36
X-Requested-With:XMLHttpRequest
X-Xsrftoken:e4f6faad-ed17-446a-8204-4cb57102e017'''
headers = get_headers(header)
r = requests.get(url, headers = headers)
soup = BeautifulSoup(r.text,'lxml')
_xsrf = soup.find_all('input',attrs = {'name' : '_xsrf'})[0]['value']
dict_data['_xsrf'] = _xsrf
headers['X-Xsrftoken'] = _xsrf
url2 = 'https://www.zhihu.com/login/phone_num'
raw_input()
re = requests.post(url = url2, headers = headers, data = dict_data)
print re.status_code
print re.content
print re.json()['msg']
#the_page = r.text # we use the read method the read the file wo fetche from the URL.[@name="_xsrf"]/@vaule
#print the_page
错误信息如下
200
{
"r": 1,
"errcode": 100030,
"data": {"account":"\u767b\u5f55\u8fc7\u4e8e\u9891\u7e41\uff0c\u8bf7\u7a0d\u540e\u91cd\u8bd5"},
"msg": "\u767b\u5f55\u8fc7\u4e8e\u9891\u7e41\uff0c\u8bf7\u7a0d\u540e\u91cd\u8bd5"
}
登录过于频繁,请稍后重试
我在浏览器上模拟时我的账号可以登录,并且我一个程序只发了一个登录请求,为什么会是登录频繁呢,求dalao分析一波。header我反复修改,似乎都没有用处。不知道该怎么搞了。
解决方案
你先把头写得跟浏览器一模一样再说吧。
知乎的登录,除了验证码,没有其它搞怪的机制的。
# -*- coding: utf-8 -*-
import json
import urllib
import re
from tornado.httpclient import HTTPClient
EMAIL = ''
PASS = ''
LOGIN_URL = 'https://www.zhihu.com/signin?next=/'
CAPTCHA_URL = 'https://www.zhihu.com/captcha.gif?type=login&lang=cn'
ACTION_URL = 'https://www.zhihu.com/login/email'
def main():
response = HTTPClient().fetch(LOGIN_URL)
cookie = '; '.join(x.split(';', 1)[0] for x in response.headers.get_list('Set-Cookie'))
xsrf = re.findall('name="_xsrf" value="(.*?)"', response.body)[0]
response = HTTPClient().fetch(CAPTCHA_URL, headers={'Cookie': cookie})
with open('/home/zys/temp/a.gif', 'w') as f:
f.write(response.body)
points = []
while 1:
s = raw_input('input a point [exit with X]')
if(s.strip() == 'X'):
break
points.append([float(x) for x in s.strip().split(',')])
captcha = {
'img_size': [400, 88],
'input_points': points
}
p = {
'_xsrf': xsrf,
'password': PASS,
'email': EMAIL,
'captcha_type': 'cn',
'captcha': json.dumps(captcha),
}
headers = {
'Cookie': cookie,
'X-Xsrftoken': re.findall('_xsrf=(.*?)', cookie)[0],
}
response = HTTPClient().fetch(ACTION_URL, method='POST', body=urllib.urlencode(p), headers=headers)
print json.loads(response.body)['msg']
if __name__ == '__main__':
main()
这篇关于python - 爬虫返回状态码200但登录失败可能是什么原因的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!
查看全文