<8.8f}".format(mean_squared_error(oof_ridge_383, target)))
使用 弹性网络
folds = KFold(n_splits=5, shuffle=True, random_state=13)oof_en_383 = np.zeros(train_shape)predictions_en_383 = np.zeros(len(X_test_383))for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_383, y_train)):print("fold n°{}".format(fold_+1))tr_x = X_train_383[trn_idx]tr_y = y_train[trn_idx]#ElasticNet 弹性网络en_383 = en(alpha=1.0,l1_ratio=0.06)en_383.fit(tr_x,tr_y)oof_en_383[val_idx] = en_383.predict(X_train_383[val_idx])predictions_en_383 += en_383.predict(X_test_383) / folds.n_splitsprint("CV score: {:<8.8f}".format(mean_squared_error(oof_en_383, target)))
使用 贝叶斯岭回归
folds = KFold(n_splits=5, shuffle=True, random_state=13)oof_br_383 = np.zeros(train_shape)predictions_br_383 = np.zeros(len(X_test_383))for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_383, y_train)):print("fold n°{}".format(fold_+1))tr_x = X_train_383[trn_idx]tr_y = y_train[trn_idx]#BayesianRidge 贝叶斯回归br_383 = br()br_383.fit(tr_x,tr_y)oof_br_383[val_idx] = br_383.predict(X_train_383[val_idx])predictions_br_383 += br_383.predict(X_test_383) / folds.n_splitsprint("CV score: {:<8.8f}".format(mean_squared_error(oof_br_383, target)))
在每一种特征工程中 , 进行5折的交叉验证 , 并重复两次(简单的线性回归) , 取得每一个特征数下的模型的结果 。
train_stack1 = np.vstack([oof_br_383,oof_kr_383,oof_en_383,oof_ridge_383]).transpose()test_stack1 = np.vstack([predictions_br_383, predictions_kr_383,predictions_en_383,predictions_ridge_383]).transpose()folds_stack = RepeatedKFold(n_splits=5, n_repeats=2, random_state=7)oof_stack1 = np.zeros(train_stack1.shape[0])predictions_lr1 = np.zeros(test_stack1.shape[0])for fold_, (trn_idx, val_idx) in enumerate(folds_stack.split(train_stack1,target)):print("fold {}".format(fold_))trn_data, trn_y = train_stack1[trn_idx], target.iloc[trn_idx].valuesval_data, val_y = train_stack1[val_idx], target.iloc[val_idx].values# LinearRegression简单的线性回归lr1 = lr()lr1.fit(trn_data, trn_y)oof_stack1[val_idx] = lr1.predict(val_data)predictions_lr1 += lr1.predict(test_stack1) / 10mean_squared_error(target.values, oof_stack1)
6.4 49维数据新模型
由于49维的特征是最重要的特征 , 所以这里考虑增加更多的模型进行49维特征的数据的构建工作 。
核岭回归
folds = KFold(n_splits=5, shuffle=True, random_state=13)oof_kr_49 = np.zeros(train_shape)predictions_kr_49 = np.zeros(len(X_test_49))for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_49, y_train)):print("fold n°{}".format(fold_+1))tr_x = X_train_49[trn_idx]tr_y = y_train[trn_idx]kr_49 = kr()kr_49.fit(tr_x,tr_y)oof_kr_49[val_idx] = kr_49.predict(X_train_49[val_idx])predictions_kr_49 += kr_49.predict(X_test_49) / folds.n_splitsprint("CV score: {:<8.8f}".format(mean_squared_error(oof_kr_49, target)))
Ridge 岭回归
folds = KFold(n_splits=5, shuffle=True, random_state=13)oof_ridge_49 = np.zeros(train_shape)predictions_ridge_49 = np.zeros(len(X_test_49))for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_49, y_train)):print("fold n°{}".format(fold_+1))tr_x = X_train_49[trn_idx]tr_y = y_train[trn_idx]ridge_49 = Ridge(alpha=6)ridge_49.fit(tr_x,tr_y)oof_ridge_49[val_idx] = ridge_49.predict(X_train_49[val_idx])predictions_ridge_49 += ridge_49.predict(X_test_49) / folds.n_splitsprint("CV score: {:<8.8f}".format(mean_squared_error(oof_ridge_49, target)))
贝叶斯岭回归
folds = KFold(n_splits=5, shuffle=True, random_state=13)oof_br_49 = np.zeros(train_shape)predictions_br_49 = np.zeros(len(X_test_49))for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_49, y_train)):print("fold n°{}".format(fold_+1))tr_x = X_train_49[trn_idx]tr_y = y_train[trn_idx]br_49 = br()br_49.fit(tr_x,tr_y)oof_br_49[val_idx] = br_49.predict(X_train_49[val_idx])predictions_br_49 += br_49.predict(X_test_49) / folds.n_splitsprint("CV score: {:
- 快来一起挖掘幸福感--新人实战--阿里云天池
- 为什么天气能预报 天气为什么能预报
- 未来计算机能否可能统治人类世界,未来学家预测人工智能或将统治人类社会
- 深度学习-第R1周心脏病预测
- 今日白银趋势预测,如何预测白银价格走向
- 为什么英布刚刚举起反旗就预测到了会失败
- 永城一周天气
- 魏明帝曹睿的原配虞氏:准确预测了曹魏的灭亡
- 高安一周天气
- 2023年中国数据仓库软件市场规模及结构预测分析 中国之最数据分析