import osimport pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport seaborn as snsfrom sklearn.ensemble import RandomForestRegressorfrom sklearn.model_selection import train_test_splitfrom sklearn.inspection import PartialDependenceDisplayfrom statsmodels.stats.outliers_influence import variance_inflation_factorout_dir = r"C:\Users\Ayu\Pictures\结果"os.makedirs(out_dir, exist_ok=True)df = pd.read_csv(r"C:\Users\Ayu\Pictures\数据.csv")y = df["RSEI"]X = df.drop(columns=["RSEI"])plt.figure(figsize=(8,6))sns.heatmap(X.corr(), cmap="coolwarm", annot=True, fmt=".2f")plt.title("Correlation Matrix")plt.tight_layout()plt.savefig(os.path.join(out_dir, "correlation.jpg"), dpi=300)plt.close()X_vif = X.copy()X_vif["intercept"] = 1vif = pd.DataFrame()vif["Variable"] = X.columnsvif["VIF"] = [variance_inflation_factor(X_vif.values, i) for i in range(len(X.columns))]plt.figure(figsize=(8,5))sns.barplot(data=vif, x="VIF", y="Variable")plt.axvline(5, linestyle="--")plt.title("VIF")plt.tight_layout()plt.savefig(os.path.join(out_dir, "vif.jpg"), dpi=300)plt.close()X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)rf = RandomForestRegressor(n_estimators=500, random_state=42)rf.fit(X_train, y_train)imp = pd.DataFrame({ "Variable": X.columns, "Importance": rf.feature_importances_}).sort_values(by="Importance", ascending=False)plt.figure(figsize=(8,5))sns.barplot(data=imp, x="Importance", y="Variable")plt.title("Random Forest Importance")plt.tight_layout()plt.savefig(os.path.join(out_dir, "rf_importance.jpg"), dpi=300)plt.close()top_vars = imp["Variable"].iloc[:6].tolist()fig, ax = plt.subplots(figsize=(10,8))PartialDependenceDisplay.from_estimator(rf, X, features=top_vars, grid_resolution=50, ax=ax)plt.tight_layout()plt.savefig(os.path.join(out_dir, "pdp.jpg"), dpi=300)plt.close()