import requests
from bs4 import BeautifulSoup
import re
# 获取网页内容
def get_html(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
response = requests.get(url, headers=headers)
return response.text
# 解析网页,获取视频信息
def parse_html(html):
soup = BeautifulSoup(html, 'html.parser')
video_url = soup.find('video', id='video-player').find_all('source')[-1]['src']
return video_url
# 下载视频
def download_video(video_url, title):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
response = requests.get(video_url, headers=headers, stream=True)
with open(title + '.mp4', 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
print(f'视频 {title} 下载完成。')
# 主函数
def main(url):
html = get_html(url)
video_url = parse_html(html)
title = re.findall(r'<title>(.+)</title>', html)[0]
download_video(video_url, title)
# 测试用例
if __name__ == '__main__':
url = 'https://www.****.com/video/****' # 替换为你要下载视频的网页链接
main(url)
这段代码首先定义了获取网页内容、解析网页、下载视频的函数。主函数中调用这些函数来完成视频的下载。需要注意的是,这只是一个简单的示例,实际使用时可能需要处理更多的异常情况和网站特定的反爬虫策略。此外,下载内容应遵守相关法律法规,不违反版权法律法规。