Python 模拟登录知乎

Last updated on

之前模拟登录pixiv时不需要验证码,知乎需要验证码,于是写这篇文章记录我学习模拟登录知乎的过程


需要的工具及环境

  • Python 3.6
  • requests 库
  • pillow 库
  • re 库
  • Chrome 浏览器

尝试登录

打开知乎登录页面,按F12->选中‘preserve log’,登录界面如图,点击提交,获得post数据和网址

我们可以获得获得post URL 地址

1
post_url = 'https://www.zhihu.com/login/phone_num' # post请求提交的URL

从post提交的数据看我们需要4个参数’xsrf’,’captcha’,’password’,’phone_num’,所以,我们构建提交的数据字典如下

1
2
3
4
5
6
datas = {
'_xsrf': '',
'password': 'your_password',
'captcha': '',
'phone_num': 'your_phonenumber'
}

获取验证码

右键点击验证码,查看源码,看到”img“标签下,src属性存在验证码的网址,鼠标移到src属性上即可查看源网址

r 是当前时间的时间戳单位为毫秒,具体获得代码如下:

1
2
3
time = str(int(time.time() * 1000))
captcha_url = 'http://www.zhihu.com/captcha.gif?r=' + time + "&type=login"
captcha = s.get(captcha_url)

完整代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# -*- coding=utf-8 -*-
import requests
import re
import time
from PIL import Image

headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.96 Safari/537.36'}

datas = {
'_xsrf': '',
'password': 'your_password',
'captcha': '',
'phone_num': 'your_phonenumber'
}

login_url = 'https://www.zhihu.com/' # 登录界面URL
post_url = 'https://www.zhihu.com/login/phone_num' # post请求提交的URL

s = requests.Session()
s.headers = headers

# 获取xsrf
res1 = s.get(login_url)
pattern = re.compile('<input type="hidden" name="_xsrf" value="(.*?)"/>')
p = pattern.findall(res1.text)
datas['_xsrf'] = p[0]

# 获取验证码
time = str(int(time.time() * 1000))
captcha_url = 'http://www.zhihu.com/captcha.gif?r=' + time + "&type=login"
captcha = s.get(captcha_url)
with open('captcha.gif', 'wb') as f:
f.write(captcha.content)
f.close()

# 显示验证码
im = Image.open('captcha.gif')
im.show()
im.close()

# 手动输入验证码
login_captcha = input('input captcha:\n')
datas['captcha'] = login_captcha

# 提交post请求,并且打印出json
result = s.post(post_url, data=datas)
print(result.text)

2017.5.16 完善代码

  • 增加了cookie,登录成功后可以用cookie登录
  • 增加了交互式
  • 完善代码,将代码进行了封装
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# -*- coding=utf-8 -*-
import requests
import re
import time
import http.cookiejar
from PIL import Image

class zhihuSpider(object):
def __init__(self):
self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.96 Safari/537.36'}
self.datas = {
'_xsrf': '',
'password': '',
'captcha': '',
'phone_num': ''
}
self.session = requests.Session()
self.session.headers = self.headers
self.session.cookies = http.cookiejar.LWPCookieJar('cookies')
try:
self.session.cookies.load('cookies', ignore_discard=True)
except:
print('cookies 不能加载')

# 获取xsrf
def get_xsrf(self):
login_url = 'https://www.zhihu.com/' # 登录界面URL
res = self.session.get(login_url)
pattern = re.compile('<input type="hidden" name="_xsrf" value="(.*?)"/>')
p = pattern.findall(res.text)
self.datas['_xsrf'] = p[0]

# 获取验证码
def get_captcha(self):
t = str(int(time.time() * 1000))
captcha_url = 'http://www.zhihu.com/captcha.gif?r=' + t + "&type=login"
# 获取验证码,并将验证码写入captcha.gif
captcha = self.session.get(captcha_url)
with open('captcha.gif', 'wb') as f:
f.write(captcha.content)
f.close()
# 显示验证码
im = Image.open('captcha.gif')
im.show()
im.close()
# 手动输入验证码
login_captcha = input('input captcha:\n')
self.datas['captcha'] = login_captcha


def already_login(self):
url = 'https://www.zhihu.com/settings/profile'
login_code = self.session.get(url, allow_redirects=False).status_code
if login_code == 200:
return True
else:
return False

def login(self, account, password):
post_url = 'https://www.zhihu.com/login/phone_num' # post请求提交的URL
# 获取xsrf
self.get_xsrf()
# 获取验证码
self.get_captcha()
self.datas['phone_num'] = account
self.datas['password'] = password
# 提交post请求,并且打印出json
result = self.session.post(post_url, data=self.datas)
print(result.text)
self.session.cookies.save(ignore_discard=True, ignore_expires=True)

if __name__ == "__main__":
spider = zhihuSpider()
if spider.already_login():
print('用户已经登录')
else:
account = input('请输入用户名 \n> ')
password = input('请输入密码 \n> ')
spider.login(account, password)