|
#!/usr/bin/python
# coding:utf8
import re
import urllib
import urllib2
def getHtml(url):
html1= urllib.urlopen(url.decode('utf-8'))
outhtml = html1.read()
return outhtml
def getImgList(html):
pattern = 'http://[^s,"]*.jpg'
imglist = re.findall(pattern, html)
#print imglist
return imglist
def download(imglist,page):
x=0
for i in imglist:
#print i
urllib.urlretrieve(i,'%s.jpg' % x)
x+=1
def downloadNum(pagenum):
page = 1
pageNum = pagenum
while(page <= pageNum):
html = getHtml(url)
imglist = getImgList(html)
download(imglist,page)
page= page + 1
url = u'http://www.sina.com.hk/'
if __name__=='__main__':
downloadNum(10)
Archiver|手机版|科学网 ( 京ICP备07017567号-12 )
GMT+8, 2024-10-19 22:10
Powered by ScienceNet.cn
Copyright © 2007- 中国科学报社