基于python的网络爬虫爬取天气数据及可视化分析
import requests
import json
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
# 设置请求头,模拟浏览器访问
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
# 获取城市代码
def get_city_code(city_name, api_url):
params = {'search': city_name, 'token': 'your_token'}
response = requests.get(api_url, headers=headers, params=params)
data = response.json()
return data[0]['id']
# 获取天气数据
def get_weather_data(city_code, api_url):
params = {'city': city_code, 'token': 'your_token'}
response = requests.get(api_url, headers=headers, params=params)
data = response.json()
return data['data']['realtime']
# 保存数据到CSV
def save_data_to_csv(data, filename):
with open(filename, 'w', encoding='utf-8') as f:
f.write(json.dumps(data, ensure_ascii=False))
# 读取CSV数据
def read_csv_data(filename):
data = pd.read_csv(filename, encoding='utf-8')
return data
# 绘制天气变化图
def plot_weather_change(dataframe, column_name):
dataframe.set_index('date', inplace=True)
dataframe.index = pd.to_datetime(dataframe.index)
plt.figure(figsize=(10, 5))
plt.plot(dataframe.index, dataframe[column_name], 'b')
plt.title('Weather Change over Time')
plt.xlabel('Date')
plt.ylabel(column_name)
plt.show()
# 使用示例
city_name = '北京'
api_url = 'https://tianqiapi.com/api/'
city_code = get_city_code(city_name, api_url)
weather_data = get_weather_data(city_code, api_url)
# 天气数据处理
weather_data['date'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
save_data_to_csv(weather_data, 'weather_data.csv')
dataframe = read_csv_data('weather_data.csv')
# 绘制天气变化图
plot_weather_change(dataframe, 'tem') # 绘制温度变化图
plot_weather_change(dataframe, 'humidity') # 绘制湿度变化图
这段代码提供了一个简化的例子,展示了如何使用Python进行网络爬取,处理数据,并进行可视化分析。需要注意的是,实际应用中应该使用适当的异常处理、错误处理机制,并遵守API使用协议及网络爬取的法律规定。
评论已关闭