# 智能建造安全数据分析 - 自动生成的代码import pandas as pdimport numpy as npfrom sklearn.model_selection import train_test_splitfrom sklearn.ensemble import RandomForestClassifierimport matplotlib.pyplot as pltimport seaborn as sns# 1. 加载数据data = pd.read_csv('construction_safety_data.csv')print("数据概览:")print(data.head())# 2. 数据预处理# 处理缺失值data = data.fillna(data.mean())# 3. 探索性分析plt.figure(figsize=(12, 8))for i, column in enumerate(['safety_helmet_rate', 'protection_facility_rate']): plt.subplot(2, 3, i+1) sns.histplot(data[column], kde=True) plt.title(f'{column}分布')# 4. 准备特征和目标变量X = data.drop('accident_occurred', axis=1)y = data['accident_occurred']# 5. 划分训练集和测试集X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)# 6. 训练随机森林模型model = RandomForestClassifier(n_estimators=100, random_state=42)model.fit(X_train, y_train)# 7. 特征重要性分析feature_importance = pd.DataFrame({ 'feature': X.columns, 'importance': model.feature_importances_}).sort_values('importance', ascending=False)# 8. 可视化特征重要性plt.figure(figsize=(10, 6))sns.barplot(x='importance', y='feature', data=feature_importance)plt.title('施工现场安全因素重要性排名')plt.tight_layout()plt.show()print("分析完成!最重要的三个安全因素是:")print(feature_importance.head(3))