import requests
from lxml import etree
import os
import threading
def get_img(idx):
try:
url = f"https://www.92meinv.com/index-{idx}.html"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers)
html = response.text
tree = etree.HTML(html)
names = tree.xpath('//ul[@class="detail-list"]//a/img/@alt')
real = tree.xpath('//ul[@class="detail-list"]//div/a/@href')
index = 0
for src in real:
res = requests.get(src, headers=headers).text
tree = etree.HTML(res)
num = int(tree.xpath('//div[@class="des"]/h1/span/text()')[0].strip("()").split(" / ")[1])
dir = f"img/page{idx}/{index + 1}_{names[index]}"
if os.path.exists(dir):
index += 1
continue
else:
os.makedirs(dir, exist_ok=True)
print(f"page {idx} -> 正在下载:{names[index]}...")
for i in range(1, num + 1):
if i == 1:
link = tree.xpath('//div[@class="pp hh"]/a/img/@src')[0]
else:
src_ = src.replace(".html", "-" + str(i) + ".html")
res = requests.get(src_, headers=headers).text
tree = etree.HTML(res)
link = tree.xpath('//div[@class="pp hh"]/a/img/@src')[0]
img_data = requests.get(link, headers=headers).content
with open(f"{dir}/{i}.jpg", "wb") as f:
f.write(img_data)
f.close()
index += 1
print(f"page {idx}所有图片下载完成")
except:
print(f"page {idx} 下载失败")
if __name__ == "__main__":
page_st = int(input("请输入开始页数:"))
page_end = int(input("请输入结束页数:"))
threads = []
for idx in range(page_st, page_end + 1):
thread = threading.Thread(target=get_img, args=(idx,))
threads.append(thread)
for thread in threads:
thread.start()
for thread in threads:
thread.join()

该网站已跑路
网站恢复了 变更了一个域名而已