以前转过一个很有名的帖子
一个获得网页代码的函数
import urllib2
import cookielib
# pretent to be a browser: firefox 18.0
header_data = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/20100101 Firefox/18.0',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language':'en-gb,zh-cn;q=0.8,en-us;q=0.5,en;q=0.3',
'Connection':'keep-alive'}
def GetSource(url):
# enable cookie
cookie = urllib2.HTTPCookieProcessor(cookielib.CookieJar())
opener = urllib2.build_opener(cookie,urllib2.HTTPHandler)
# install opener
urllib2.install_opener(opener)
# pretent to be a browser
request = urllib2.Request(url=url,headers=header_data)
# send the request
content = urllib2.urlopen(request)
if content:
return content.read()
else:
return ''
No comments :
Post a Comment