#-*-代碼: utf-8-*-
From urllib 2 import request、urlerror和urlopen
Import re、urllib和OS
Defget _ URL _ content (URL,retry _ times=2) :
Print 'Downloading: ',URL
tree :
Send_headers={
用戶代理“:”Mozilla(windows nt 6.2;RV :16.0)eco/2010 01 01 Firefox;而且,
Accept' :' text/html、application/XHTML XML、application/XML;Q=0.9,*/*;Q=0.8 ',
Connection': 'keep-alive '
}
Req=request (URL,headers=send _ headers)
Html _ content=urlopen (req)。read()。decode ('gbk ',' ignore ')。encode ('utf-)
Except URLError、e:
Print e.reason
Html_content=None
列印' retry times : ',retry _ times
If retry_times 0:
If hasattr (e,' code') and 500=e.code 6003360
Get _ URL _ content (URL、retry _ times-1)
Return html_content
defget _ pic _ URL(html _ content):
Pic_reg='src='(http://。*?(png|jpg|gif)' '
Patten=re.com pile (pic _ reg、re.ignorecase)
Return (html_content)
Defsave _ pic _ urllib (save _ path,pic _ URL) :
save _ pic _ name=save _ path('/')[len('/')-1]
If not os。(save_pic_name):
Print save_pic_name
Urllib.urlretrieve (pic _ URL、save _ pic _ name)
Def mkdir(mkdir_path):
Path=mkdir_()
If not os。(path):
Os.makedirs(路徑)
Return path
# print get _ URL _ content(';)。
If _ _ name _ _==' _ _ main _ _ ' :
save _ path=mkdir(' d : \ \ meizi \ \ ')
For index in range(1,755): #按ID攀登整個(gè)站點(diǎn)
src=' http://www . qiubaichengren . com/% s . html ' %(index)
URL _ content=get _ URL _ content(src)
If url_content:
son _ save _ path=mkdir(save _ path str(index)' \ \ ')
pic _ list=get _ pic _ URL(URL _ content)
for I in range(len(pic _ list)):
Pic_url=pic_list[i][0]
Save _ pic _ urllib (son _ save _ path,pic _ URL)
打印“頁(yè)”“str(索引)”頁(yè),爬網(wǎng)完成。
擼叼屎,拿去擼吧!'1.《【糗百成人版地址】Urllib2爬取糗事百科成人版妹子圖(老司機(jī)第四彈)》援引自互聯(lián)網(wǎng),旨在傳遞更多網(wǎng)絡(luò)信息知識(shí),僅代表作者本人觀點(diǎn),與本網(wǎng)站無關(guān),侵刪請(qǐng)聯(lián)系頁(yè)腳下方聯(lián)系方式。
2.《【糗百成人版地址】Urllib2爬取糗事百科成人版妹子圖(老司機(jī)第四彈)》僅供讀者參考,本網(wǎng)站未對(duì)該內(nèi)容進(jìn)行證實(shí),對(duì)其原創(chuàng)性、真實(shí)性、完整性、及時(shí)性不作任何保證。
3.文章轉(zhuǎn)載時(shí)請(qǐng)保留本站內(nèi)容來源地址,http://f99ss.com/yule/3196889.html