import requests
from bs4 import BeautifulSoup
import os
# 爬取指定大小的图片
def crawl_images(url, image_size):
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
images = soup.find_all('img', {'class': 'img-responsive lazy image_dta'})
for image in images:
if image_size in image['data-original']:
image_url = image['data-original']
save_image(image_url)
# 保存图片到本地
def save_image(image_url):
response = requests.get(image_url)
if response.status_code == 200:
file_path = 'images/' + image_url.split('/')[-1]
with open(file_path, 'wb') as file:
file.write(response.content)
print(f'图片 {image_url} 已保存到 {file_path}')
else:
print(f'图片 {image_url} 下载失败')
# 主函数
def main():
# 要爬取的斗鱼直播URL
url = 'http://www.dantiao.com/live/girl'
# 设置想要爬取的图片大小
image_size = '640x360'
crawl_images(url, image_size)
if __name__ == '__main__':
main()
在这个示例中,我们定义了crawl_images
函数来爬取指定大小的图片,save_image
函数来保存图片到本地,并在main
函数中调用这些函数。这个简单的爬虫示例展示了如何使用Python网络请求库requests和HTML解析库BeautifulSoup来爬取网页上的图片资源。