Python 爬虫:获取 JS 动态内容——应用宝搜索应用!
import requests
from pyquery import PyQuery as pq
def get_js_content(url):
headers = {
'User-Agent': 'Mozilla/5.0',
'Referer': 'https://www.baidu.com/'
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.text
return None
def parse_js_content(html):
doc = pq(html)
items = doc('.app-item').items()
for item in items:
name = item.find('.name').text()
desc = item.find('.desc').text()
author = item.find('.author').text()
download_url = item.find('.down-btn').attr('href')
print(f'Name: {name}, Author: {author}, Description: {desc}, Download URL: {download_url}')
def main():
url = 'https://www.baidu.com/s?wd=手机壁纸应用'
html = get_js_content(url)
parse_js_content(html)
if __name__ == '__main__':
main()
这段代码首先定义了一个获取网页内容的函数get_js_content
,然后定义了一个解析网页内容并提取应用信息的函数parse_js_content
。最后在main
函数中调用这两个函数,实现了获取应用宝搜索结果页面的动态内容并打印出应用的相关信息。
评论已关闭