get all hyperlinks
#!/usr/bin/env python
# get all hyperlinks
from re import compile as reComp
from urllib import urlopen, basejoin
def getURL(http, num=10):
""" get all hyperlinks and print the first num"""
data = urlopen(http).read()
regex = reComp(r'href="([^"]+)"')
urls = regex.findall(data)
for url in urls[:num]:
print basejoin(http, url)
#print url
def main():
getURL("http://52xenos.blogspot.com",5)
if __name__ == '__main__': main()
输出结果如下:
Results:
http://52xenos.blogspot.com/feeds/posts/default http://52xenos.blogspot.com/feeds/posts/default?alt=rss http://www.blogger.com/feeds/7524190211354587233/posts/default http://www.blogger.com/rsd.g?blogID=7524190211354587233 http://www.blogger.com/profile/07043019578098595151
No comments :
Post a Comment