level 6
贴吧用户_5SDS9P2
楼主
import requests
from lxml import etree
import time
def down_image( page ):
print(f'页面{page},开始')
t1 = time.time()
# page页面跳转
url = f'https://www.36992.com/girls/list-{page}.html'
resp = requests.get(url)
网页链接 = 'gbk'
with open('index.html', 'wb') as f:
f.write(resp.content)
tree = etree.HTML(resp.content)
# 页面标签,XPath的高级用法,相对路径
node_list = tree.xpath('//dd')
sub_url_list = []
for node in node_list:
# 获取网页路径,加if是规范写法
if len(node.xpath('./a/img/@src')) > 0:
img_url =str(node.xpath('./a/img/@src')[0]).replace("pic_360","pic")
# print(sub_url)
# 获取图片名称及格式,加if是规范写法
if len(node.xpath('./a/img/@title')) > 0:
title = node.xpath('./a/img/@title')[0]
sub_url_list.append((img_url, title))
#
# exit(2)
# 开始保存图片
for sub_url, title in sub_url_list:
suffix = sub_url.split('.')[-1]
img_content = requests.get(sub_url).content
with open(f'D:/123/456/{title}.{suffix}', 'wb') as f:
f.write(img_content)
f.close()
print(f'页面{page},完成')
if __name__ == '__main__':
for page in range(1,60):
down_image(page)
2021年08月10日 06点08分
1
from lxml import etree
import time
def down_image( page ):
print(f'页面{page},开始')
t1 = time.time()
# page页面跳转
url = f'https://www.36992.com/girls/list-{page}.html'
resp = requests.get(url)
网页链接 = 'gbk'
with open('index.html', 'wb') as f:
f.write(resp.content)
tree = etree.HTML(resp.content)
# 页面标签,XPath的高级用法,相对路径
node_list = tree.xpath('//dd')
sub_url_list = []
for node in node_list:
# 获取网页路径,加if是规范写法
if len(node.xpath('./a/img/@src')) > 0:
img_url =str(node.xpath('./a/img/@src')[0]).replace("pic_360","pic")
# print(sub_url)
# 获取图片名称及格式,加if是规范写法
if len(node.xpath('./a/img/@title')) > 0:
title = node.xpath('./a/img/@title')[0]
sub_url_list.append((img_url, title))
#
# exit(2)
# 开始保存图片
for sub_url, title in sub_url_list:
suffix = sub_url.split('.')[-1]
img_content = requests.get(sub_url).content
with open(f'D:/123/456/{title}.{suffix}', 'wb') as f:
f.write(img_content)
f.close()
print(f'页面{page},完成')
if __name__ == '__main__':
for page in range(1,60):
down_image(page)

