微博登录的实现代码来源:
相关环境
使用的python3.4,发现配置好环境后可以直接使用pip
easy_install
命令安装第三方库,比如本示例需要依赖的库:
pip install requestspip install rsa
代码实现
以下代码主要是登录成功后,爬取热闹微博的TOP 100,再保存到hotweb.html文件里边
import reimport jsonimport urllib.parseimport base64import binasciiimport json import rsaimport requestsimport loggingfrom pprint import pprint wbdom = r'd:\pyzone\hotwb.html';weclient = 'ssologin.js(v1.4.5)'FORMAT = '%(asctime)-15s %(message)s'user_agent = ( 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.11 (KHTML, like Gecko) ' 'Chrome/20.0.1132.57 Safari/536.11')logging.basicConfig(level=logging.DEBUG, format=FORMAT)logger = logging.getLogger('weibo')session = requests.session()session.headers['User-Agent'] = user_agent def encrypt_passwd(passwd, pubkey, servertime, nonce): key = rsa.PublicKey(int(pubkey, 16), int('10001', 16)) message = str(servertime) + '\t' + str(nonce) + '\n' + str(passwd) passwd = rsa.encrypt(message.encode(), key) return binascii.b2a_hex(passwd) def wblogin(username, password): resp = session.get( 'http://login.sina.com.cn/sso/prelogin.php?' 'entry=sso&callback=sinaSSOController.preloginCallBack&' 'su=%s&rsakt=mod&client=%s' % (base64.b64encode(username), weclient) ) pre_login_str = re.match(r'[^{]+({.+?})', resp.content.decode('gbk')).group(1) pre_login = json.loads(pre_login_str) pre_login = json.loads(pre_login_str) data = { 'entry': 'weibo', 'gateway': 1, 'from': '', 'savestate': 7, 'userticket': 1, 'ssosimplelogin': 1, 'su': base64.b64encode(urllib.parse.quote(username).encode()), 'service': 'miniblog', 'servertime': pre_login['servertime'], 'nonce': pre_login['nonce'], 'vsnf': 1, 'vsnval': '', 'pwencode': 'rsa2', 'sp': encrypt_passwd(password, pre_login['pubkey'], pre_login['servertime'], pre_login['nonce']), 'rsakv' : pre_login['rsakv'], 'encoding': 'gbk', 'prelt': '115', 'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.si' 'naSSOController.feedBackUrlCallBack', 'returntype': 'META' } resp = session.post( 'http://login.sina.com.cn/sso/login.php?client=%s' % weclient, data=data ) login_url = re.search(r'replace\([\"\']([^\'\"]+)[\"\']', resp.content.decode('gbk')).group(1) resp = session.get(login_url) login_str = re.match(r'[^{]+({.+?}})', resp.content.decode('gbk')) if(login_str): # result is not None logger.info('login success..') login_str = json.loads(login_str.group(1)) pprint(login_str) return True else: logger.info('login fail..') return Falsedef gethotwb(url): f = open(wbdom, mode='a', encoding='utf-8') for x in range(1,11): # page 1 to 10 r = session.get(url + str(x)) r.encoding = 'utf-8' f.write('\n--------page:'+ str(x) +'---------
\n\n') f.write(json.loads(r.text)['data']['html']) f.close() if __name__ == '__main__': flag = wblogin(b'xx@163.com', 'xx') if(flag): gethotwb('http://hot.weibo.com/ajax/feed?type=h&v=9999&page=');
总结
- 测试的过程中连接了FQ的VPN,异地登录需要验证码,此时retcode=4049,登录成功是0
- python各个版本之间不兼容好蛋痛
大家中秋快乐!
参考文档
requests文档 http://docs.python-requests.org/zh_CN/latest/
微博登录过程分析 http://www.cnblogs.com/pzxbc/archive/2012/02/03/2335027.html