保姆级随机森林算法Python教学
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import joblib
import numpy as np
# 加载鸢尾花数据集
iris = load_iris()
X = iris.data
y = iris.target
# 划分数据集为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 创建并训练随机森林分类器
random_forest_classifier = RandomForestClassifier(n_estimators=10, random_state=42)
random_forest_classifier.fit(X_train, y_train)
# 进行预测
y_pred = random_forest_classifier.predict(X_test)
# 计算准确率
accuracy = np.mean(y_pred == y_test)
print(f"Accuracy: {accuracy}")
# 保存模型
joblib.dump(random_forest_classifier, "random_forest_classifier.joblib")
# 加载模型
loaded_model = joblib.load("random_forest_classifier.joblib")
# 使用加载的模型进行预测
y_pred_loaded = loaded_model.predict(X_test)
# 计算加载模型的准确率
accuracy_loaded = np.mean(y_pred_loaded == y_test)
print(f"Accuracy (loaded model): {accuracy_loaded}")
这段代码展示了如何使用sklearn
库加载鸢尾花数据集,划分数据集,训练随机森林分类器,进行预测,计算准确率,并保存加载模型以进行预测和准确率的计算。这是一个很基础的机器学习教学例子,适合初学者学习和理解。
评论已关闭