import requests
from bs4 import BeautifulSoup
 
# 设置代理服务器
proxies = {
    'http': 'http://user:password@proxy.server.com:port',
    'https': 'https://user:password@proxy.server.com:port'
}
 
# 设置请求头
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
    'Accept-Encoding': 'gzip, deflate',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Upgrade-Insecure-Requests': '1'
}
 
def get_html(url, proxies=proxies, headers=headers):
    try:
        response = requests.get(url, proxies=proxies, headers=headers)
        if response.status_code == 200:
            return response.text
        else:
            print('Failed to retrieve the webpage')
    except requests.exceptions.RequestException as e:
        print(e)
 
def parse_html(html):
    soup = BeautifulSoup(html, 'html.parser')
    # 解析soup中的数据,提取需要的信息
    # 例如提取所有的段落
    paragraphs = soup.find_all('p')
    for p in paragraphs:
        print(p.text)
 
def main():
    url = 'http://example.com'
    html = get_html(url)
    if html:
        parse_html(html)
 
if __name__ == '__main__':
    main()这个示例代码展示了如何使用Python3进行简单的网络爬虫,包括如何设置代理服务器和请求头,如何发送HTTP GET请求,以及如何使用BeautifulSoup解析HTML内容。这个例子是基于假设的网页和代理服务器,实际使用时需要替换为有效的URL和代理信息。