幸福感预测 Task14:集成学习案例一( 六 )

<8.8f}".format(mean_squared_error(oof_lgb_49, target)))
##### xgb_49xgb_49_params = {'eta': 0.02, 'max_depth': 5, 'min_child_weight':3,'gamma':0,'subsample': 0.7, 'colsample_bytree': 0.35, 'lambda':2,'objective': 'reg:linear', 'eval_metric': 'rmse', 'silent': True, 'nthread': -1}folds = KFold(n_splits=5, shuffle=True, random_state=2019)oof_xgb_49 = np.zeros(len(X_train_49))predictions_xgb_49 = np.zeros(len(X_test_49))for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_49, y_train)):print("fold n°{}".format(fold_+1))trn_data = http://www.kingceram.com/post/xgb.DMatrix(X_train_49[trn_idx], y_train[trn_idx])val_data = xgb.DMatrix(X_train_49[val_idx], y_train[val_idx])watchlist = [(trn_data, 'train'), (val_data, 'valid_data')]xgb_49 = xgb.train(dtrain=trn_data, num_boost_round=3000, evals=watchlist, early_stopping_rounds=600, verbose_eval=500, params=xgb_49_params)oof_xgb_49[val_idx] = xgb_49.predict(xgb.DMatrix(X_train_49[val_idx]), ntree_limit=xgb_49.best_ntree_limit)predictions_xgb_49 += xgb_49.predict(xgb.DMatrix(X_test_49), ntree_limit=xgb_49.best_ntree_limit) / folds.n_splitsprint("CV score: {:<8.8f}".format(mean_squared_error(oof_xgb_49, target)))
essor梯度提升决策树
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=2018)oof_gbr_49 = np.zeros(train_shape)predictions_gbr_49 = np.zeros(len(X_test_49))#GradientBoostingRegressor梯度提升决策树for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_49, y_train)):print("fold n°{}".format(fold_+1))tr_x = X_train_49[trn_idx]tr_y = y_train[trn_idx]gbr_49 = gbr(n_estimators=600, learning_rate=0.01,subsample=0.65,max_depth=6, min_samples_leaf=20,max_features=0.35,verbose=1)gbr_49.fit(tr_x,tr_y)oof_gbr_49[val_idx] = gbr_49.predict(X_train_49[val_idx])predictions_gbr_49 += gbr_49.predict(X_test_49) / folds.n_splitsprint("CV score: {:<8.8f}".format(mean_squared_error(oof_gbr_49, target)))
在每一种特征工程中 , 进行5折的交叉验证 , 并重复两次( Ridge  , 核脊回归) , 取得每一个特征数下的模型的结果 。
train_stack3 = np.vstack([oof_lgb_49,oof_xgb_49,oof_gbr_49]).transpose()test_stack3 = np.vstack([predictions_lgb_49, predictions_xgb_49,predictions_gbr_49]).transpose()#folds_stack = RepeatedKFold(n_splits=5, n_repeats=2, random_state=7)oof_stack3 = np.zeros(train_stack3.shape[0])predictions_lr3 = np.zeros(test_stack3.shape[0])for fold_, (trn_idx, val_idx) in enumerate(folds_stack.split(train_stack3,target)):print("fold {}".format(fold_))trn_data, trn_y = train_stack3[trn_idx], target.iloc[trn_idx].valuesval_data, val_y = train_stack3[val_idx], target.iloc[val_idx].values#Kernel Ridge Regressionlr3 = kr()lr3.fit(trn_data, trn_y)oof_stack3[val_idx] = lr3.predict(val_data)predictions_lr3 += lr3.predict(test_stack3) / 10mean_squared_error(target.values, oof_stack3)
6.3 对于383维的数据的处理Ridge基于核的岭回归
folds = KFold(n_splits=5, shuffle=True, random_state=13)oof_kr_383 = np.zeros(train_shape)predictions_kr_383 = np.zeros(len(X_test_383))for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_383, y_train)):print("fold n°{}".format(fold_+1))tr_x = X_train_383[trn_idx]tr_y = y_train[trn_idx]#Kernel Ridge Regression 岭回归kr_383 = kr()kr_383.fit(tr_x,tr_y)oof_kr_383[val_idx] = kr_383.predict(X_train_383[val_idx])predictions_kr_383 += kr_383.predict(X_test_383) / folds.n_splitsprint("CV score: {:<8.8f}".format(mean_squared_error(oof_kr_383, target)))
使用普通岭回归
folds = KFold(n_splits=5, shuffle=True, random_state=13)oof_ridge_383 = np.zeros(train_shape)predictions_ridge_383 = np.zeros(len(X_test_383))for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_383, y_train)):print("fold n°{}".format(fold_+1))tr_x = X_train_383[trn_idx]tr_y = y_train[trn_idx]#使用岭回归ridge_383 = Ridge(alpha=1200)ridge_383.fit(tr_x,tr_y)oof_ridge_383[val_idx] = ridge_383.predict(X_train_383[val_idx])predictions_ridge_383 += ridge_383.predict(X_test_383) / folds.n_splitsprint("CV score: {: