以前转过一个很有名的帖子
一个获得网页代码的函数
import urllib2 import cookielib # pretent to be a browser: firefox 18.0 header_data = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/20100101 Firefox/18.0', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language':'en-gb,zh-cn;q=0.8,en-us;q=0.5,en;q=0.3', 'Connection':'keep-alive'} def GetSource(url): # enable cookie cookie = urllib2.HTTPCookieProcessor(cookielib.CookieJar()) opener = urllib2.build_opener(cookie,urllib2.HTTPHandler) # install opener urllib2.install_opener(opener) # pretent to be a browser request = urllib2.Request(url=url,headers=header_data) # send the request content = urllib2.urlopen(request) if content: return content.read() else: return ''
No comments :
Post a Comment