幸福感预测 Task14:集成学习案例一( 七 )

<8.8f}".format(mean_squared_error(oof_ridge_383, target)))
使用 弹性网络
folds = KFold(n_splits=5, shuffle=True, random_state=13)oof_en_383 = np.zeros(train_shape)predictions_en_383 = np.zeros(len(X_test_383))for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_383, y_train)):print("fold n°{}".format(fold_+1))tr_x = X_train_383[trn_idx]tr_y = y_train[trn_idx]#ElasticNet 弹性网络en_383 = en(alpha=1.0,l1_ratio=0.06)en_383.fit(tr_x,tr_y)oof_en_383[val_idx] = en_383.predict(X_train_383[val_idx])predictions_en_383 += en_383.predict(X_test_383) / folds.n_splitsprint("CV score: {:<8.8f}".format(mean_squared_error(oof_en_383, target)))
使用 贝叶斯岭回归
folds = KFold(n_splits=5, shuffle=True, random_state=13)oof_br_383 = np.zeros(train_shape)predictions_br_383 = np.zeros(len(X_test_383))for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_383, y_train)):print("fold n°{}".format(fold_+1))tr_x = X_train_383[trn_idx]tr_y = y_train[trn_idx]#BayesianRidge 贝叶斯回归br_383 = br()br_383.fit(tr_x,tr_y)oof_br_383[val_idx] = br_383.predict(X_train_383[val_idx])predictions_br_383 += br_383.predict(X_test_383) / folds.n_splitsprint("CV score: {:<8.8f}".format(mean_squared_error(oof_br_383, target)))
在每一种特征工程中 , 进行5折的交叉验证 , 并重复两次(简单的线性回归) , 取得每一个特征数下的模型的结果 。
train_stack1 = np.vstack([oof_br_383,oof_kr_383,oof_en_383,oof_ridge_383]).transpose()test_stack1 = np.vstack([predictions_br_383, predictions_kr_383,predictions_en_383,predictions_ridge_383]).transpose()folds_stack = RepeatedKFold(n_splits=5, n_repeats=2, random_state=7)oof_stack1 = np.zeros(train_stack1.shape[0])predictions_lr1 = np.zeros(test_stack1.shape[0])for fold_, (trn_idx, val_idx) in enumerate(folds_stack.split(train_stack1,target)):print("fold {}".format(fold_))trn_data, trn_y = train_stack1[trn_idx], target.iloc[trn_idx].valuesval_data, val_y = train_stack1[val_idx], target.iloc[val_idx].values# LinearRegression简单的线性回归lr1 = lr()lr1.fit(trn_data, trn_y)oof_stack1[val_idx] = lr1.predict(val_data)predictions_lr1 += lr1.predict(test_stack1) / 10mean_squared_error(target.values, oof_stack1)
6.4 49维数据新模型
由于49维的特征是最重要的特征 , 所以这里考虑增加更多的模型进行49维特征的数据的构建工作 。
核岭回归
folds = KFold(n_splits=5, shuffle=True, random_state=13)oof_kr_49 = np.zeros(train_shape)predictions_kr_49 = np.zeros(len(X_test_49))for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_49, y_train)):print("fold n°{}".format(fold_+1))tr_x = X_train_49[trn_idx]tr_y = y_train[trn_idx]kr_49 = kr()kr_49.fit(tr_x,tr_y)oof_kr_49[val_idx] = kr_49.predict(X_train_49[val_idx])predictions_kr_49 += kr_49.predict(X_test_49) / folds.n_splitsprint("CV score: {:<8.8f}".format(mean_squared_error(oof_kr_49, target)))
Ridge 岭回归
folds = KFold(n_splits=5, shuffle=True, random_state=13)oof_ridge_49 = np.zeros(train_shape)predictions_ridge_49 = np.zeros(len(X_test_49))for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_49, y_train)):print("fold n°{}".format(fold_+1))tr_x = X_train_49[trn_idx]tr_y = y_train[trn_idx]ridge_49 = Ridge(alpha=6)ridge_49.fit(tr_x,tr_y)oof_ridge_49[val_idx] = ridge_49.predict(X_train_49[val_idx])predictions_ridge_49 += ridge_49.predict(X_test_49) / folds.n_splitsprint("CV score: {:<8.8f}".format(mean_squared_error(oof_ridge_49, target)))
贝叶斯岭回归
folds = KFold(n_splits=5, shuffle=True, random_state=13)oof_br_49 = np.zeros(train_shape)predictions_br_49 = np.zeros(len(X_test_49))for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_49, y_train)):print("fold n°{}".format(fold_+1))tr_x = X_train_49[trn_idx]tr_y = y_train[trn_idx]br_49 = br()br_49.fit(tr_x,tr_y)oof_br_49[val_idx] = br_49.predict(X_train_49[val_idx])predictions_br_49 += br_49.predict(X_test_49) / folds.n_splitsprint("CV score: {: