#| echo: true
#| warning: false
# 为确保代码可复现,我们不依赖实时API,而是创建一个模拟数据集
# 该数据集的结构与yfinance获取的数据类似
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
sp500_tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'NVDA', 'TSLA', 'JPM', 'V', 'JNJ', 'WMT']
data = {
'MarketCap': [2.8e12, 2.5e12, 1.8e12, 1.5e12, 1.2e12, 8e11, 4.5e11, 5e11, 4.8e11, 4.2e11],
'trailingPE': [28.5, 35.2, 26.8, 60.1, 95.3, 120.2, 12.1, 38.5, 25.4, 22.1],
'forwardPE': [27.1, 33.1, 25.0, 55.6, 70.1, np.nan, 11.5, 36.2, 24.1, 21.0],
'returnOnEquity': [1.5, 0.45, 0.3, 0.25, 0.6, 0.28, 0.17, 0.22, np.nan, 0.2],
'priceToBook': [45.1, 12.3, 7.1, 9.8, 30.2, 25.1, 1.8, 12.5, 6.7, 5.4],
'debtToEquity': [150.1, 50.2, 12.5, 120.8, 30.1, 20.5, np.nan, 55.3, 40.1, 80.2]
}
df = pd.DataFrame(data, index=sp500_tickers)
df.index.name = 'Ticker'
print('模拟的原始数据 (前5行):')
print(df.head())