get all hyperlinks
#!/usr/bin/env python # get all hyperlinks from re import compile as reComp from urllib import urlopen, basejoin def getURL(http, num=10): """ get all hyperlinks and print the first num""" data = urlopen(http).read() regex = reComp(r'href="([^"]+)"') urls = regex.findall(data) for url in urls[:num]: print basejoin(http, url) #print url def main(): getURL("http://52xenos.blogspot.com",5) if __name__ == '__main__': main()
输出结果如下:
Results:
http://52xenos.blogspot.com/feeds/posts/default http://52xenos.blogspot.com/feeds/posts/default?alt=rss http://www.blogger.com/feeds/7524190211354587233/posts/default http://www.blogger.com/rsd.g?blogID=7524190211354587233 http://www.blogger.com/profile/07043019578098595151
No comments :
Post a Comment