python 爬虫的重试机制
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from requests.exceptions import ConnectionError, Timeout
def requests_retry_session(retries=3, backoff_factor=0.3, status_forcelist=[500, 502, 503, 504], session=None):
session = session or requests.Session()
retries = Retry(total=retries, backoff_factor=backoff_factor, status_forcelist=status_forcelist)
session.mount('http://', HTTPAdapter(max_retries=retries))
session.mount('https://', HTTPAdapter(max_retries=retries))
return session
# 使用示例
session = requests_retry_session()
try:
response = session.get('http://example.com/api')
# 处理响应
except ConnectionError as e:
# 处理连接错误
print(f"Connection error occurred: {e}")
except Timeout as e:
# 处理请求超时
print(f"Timeout error occurred: {e}")
这段代码定义了一个函数requests_retry_session
,它创建了一个带有重试机制的requests.Session
对象。当遇到网络问题(如连接失败、超时)时,会自动重试请求。可以通过调整参数来控制重试的次数、延迟因子以及状态码的白名单。
评论已关闭