SVM 支持向量机简介+SVM15种场景分类实例( 三 ) _支持向量机

###########对linearsvm分类器进行网格调参############param_test1 = {'C': np.arange(0.01, 1.0001, 0.01)#设定网格搜寻范围}gsearch1 = GridSearchCV(estimator = SVC(kernel='linear',class_weight='balanced',probability=True),param_grid = param_test1, scoring ="accuracy",cv=5,n_jobs=5,verbose=2) #cv指定交叉验证折数gsearch1.fit(train_hist, train_label) print(gsearch1.best_params_)print(gsearch1.best_score_)print(gsearch1.best_estimator_)
svm_t = sklearn.svm.SVC(C=0.9400000000000001, break_ties=False, cache_size=200, #调参后线性支持向量机性能检验class_weight='balanced', coef0=0.0, decision_function_shape='ovr', degree=3,gamma='scale', kernel='linear', max_iter=-1, probability=True,random_state=None, shrinking=True, tol=0.001, verbose=False)svm_t.fit(train_hist, train_label)predict_t=svm_t.predict(test_hist)print('准确率是：%s'%(accuracy_score(test_label,predict_t)))print(classification_report(test_label,predict_t))print(confusion_matrix(test_label,predict_t))
svm_rbf= sklearn.svm.SVC(kernel='rbf',class_weight='balanced',probability=True) #使用径向基支持向量机进行训练及预测svm_rbf.fit(train_hist, train_label)predict_rbf=svm_rbf.predict(pd.DataFrame(test_hist))print('准确率是：%s'%(accuracy_score(test_label,predict_rbf)))print(classification_report(test_label,predict_rbf)) #输出其在测试集上的表现print(confusion_matrix(test_label,predict_rbf))
svm_poly= sklearn.svm.SVC(kernel='poly',class_weight='balanced',probability=True) #使用多项式支持向量机进行训练及预测svm_poly.fit(train_hist, train_label)predict_poly=svm_poly.predict(pd.DataFrame(test_hist))print('准确率是：%s'%(accuracy_score(test_label,predict_poly)))print(classification_report(test_label,predict_poly)) #输出其在测试集上的表现print(confusion_matrix(test_label,predict_poly))
def plot_confusion_matrix(cm, classes,normalize=False,title='Confusion matrix',cmap=plt.cm.Blues):if normalize:cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]plt.imshow(cm, interpolation='nearest', cmap=cmap)plt.title(title)plt.colorbar()tick_marks = np.arange(len(classes))plt.xticks(tick_marks, classes, rotation=45)plt.yticks(tick_marks, classes)fmt = '.2f' if normalize else 'd'thresh = cm.max() / 2.for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):plt.text(j, i, format(cm[i, j], fmt),horizontalalignment="center",color="white" if cm[i, j] > thresh else "black")plt.tight_layout()plt.ylabel('True label')plt.xlabel('Predicted label')cnf_matrix = confusion_matrix(np.array([test_label]).T, predict_poly)#指定不同predict_poly更换不同混淆矩阵画图数据np.set_printoptions(precision=2)plt.figure(figsize=(18, 6))plot_confusion_matrix(cnf_matrix, classes=class_names,title='15 scene Confusion matrix')plt.figure(figsize=(18, 6))plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,title='15 scene Confusion matrix')plt.show()
线性支持向量机默认参数在独立测试集上的各项指标如下：
调参后的线性支持向量机性能有少量上升，在实验中可以观察到不同的核函数带来的差异也是不小的，同时，随着单词数的增加性能不断上升，在该应用场景中多项式支持向量机的准确度最高。
要想进一步提高SVM的准确度，可以从特征工程中进行改进，比如参考这篇论文中的空间金字塔特征构建方法，特征大幅度增加后SVM模型的性能也大幅度提高。