""" example05.py - 多线程版本爬虫 """ import os from concurrent.futures import ThreadPoolExecutor
import requests
defdownload_picture(url): filename = url[url.rfind('/') + 1:] resp = requests.get(url) if resp.status_code == 200: withopen(f'images/beauty/{filename}', 'wb') as file: file.write(resp.content)
defmain(): ifnot os.path.exists('images/beauty'): os.makedirs('images/beauty') with ThreadPoolExecutor(max_workers=16) as pool: for page inrange(3): resp = requests.get(f'https://image.so.com/zjl?ch=beauty&sn={page * 30}') if resp.status_code == 200: pic_dict_list = resp.json()['list'] for pic_dict in pic_dict_list: pool.submit(download_picture, pic_dict['qhimg_url'])
if __name__ == '__main__': main()
执行如下所示的命令。
1
time python3 example05.py
代码的执行结果如下所示:
1
python3 example05.py 2.65s user 0.40s system 95% cpu 3.193 total
asyncdefdownload_picture(session, url): filename = url[url.rfind('/') + 1:] asyncwith session.get(url, ssl=False) as resp: if resp.status == 200: data = await resp.read() asyncwith aiofile.async_open(f'images/beauty/{filename}', 'wb') as file: await file.write(data)
asyncdeffetch_json(): asyncwith aiohttp.ClientSession() as session: for page inrange(3): asyncwith session.get( url=f'https://image.so.com/zjl?ch=beauty&sn={page * 30}', ssl=False ) as resp: if resp.status == 200: json_str = await resp.text() result = json.loads(json_str) for pic_dict in result['list']: await download_picture(session, pic_dict['qhimg_url'])