import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
# 为了结果可复现,设置随机种子
np.random.seed(0)
# 生成一些非线性数据,并加入噪声
n_samples = 30
X = np.sort(np.random.rand(n_samples))
y = np.cos(1.5 * np.pi * X) + np.random.randn(n_samples) * 0.1
# 用于绘制平滑曲线的测试点
X_test = np.linspace(0, 1, 100)
# 设置matplotlib样式
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
# 定义要展示的多项式阶数
degrees = [1, 4, 15]
titles = ['欠拟合 (Underfitting)', '良好拟合 (Good Fit)', '过拟合 (Overfitting)']
for i, degree in enumerate(degrees):
ax = axes[i]
# 创建一个包含多项式特征和线性回归的管道
polynomial_features = PolynomialFeatures(degree=degree, include_bias=False)
linear_regression = LinearRegression()
pipeline = Pipeline([("polynomial_features", polynomial_features),
("linear_regression", linear_regression)])
pipeline.fit(X[:, np.newaxis], y)
# 绘制模型拟合曲线和原始数据点
ax.plot(X_test, pipeline.predict(X_test[:, np.newaxis]), label="模型拟合", color='crimson', linewidth=2)
ax.scatter(X, y, edgecolor='b', s=30, label="真实数据", facecolors='none')
ax.set_xlabel("X")
ax.set_ylabel("y")
ax.set_xlim((0, 1))
ax.set_ylim((-2, 2))
ax.set_title(f"多项式阶数 = {degree}\n{titles[i]}", fontsize=14)
ax.legend(loc='upper right')
fig.suptitle("过拟合与欠拟合的可视化解释", fontsize=18)
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()