教女朋友学数据挖掘——泰坦尼克号获救预测( 七 )


7)神经网络
#importing all the required ML packagesfrom sklearn.linear_model import LogisticRegression #logistic regressionfrom sklearn import svm #support vector Machinefrom sklearn.ensemble import RandomForestClassifier #Random Forestfrom sklearn.neighbors import KNeighborsClassifier #KNNfrom sklearn.naive_bayes import GaussianNB #Naive bayesfrom sklearn.tree import DecisionTreeClassifier #Decision Treefrom sklearn.model_selection import train_test_split #training and testing data splitfrom sklearn import metrics #accuracy measurefrom sklearn.metrics import confusion_matrix #for confusion matrix
train,test=train_test_split(data,test_size=0.3,random_state=0,stratify=data['Survived'])train_X=train[train.columns[1:]]train_Y=train[train.columns[:1]]test_X=test[test.columns[1:]]test_Y=test[test.columns[:1]]X=data[data.columns[1:]]Y=data['Survived']
1)(rbf-SVM)
model=svm.SVC(kernel='rbf',C=1,gamma=0.1)model.fit(train_X,train_Y)prediction1=model.predict(test_X)print('Accuracy for rbf SVM is ',metrics.accuracy_score(prediction1,test_Y))
Accuracy for rbf SVM is0.835820895522
2)(-SVM)
model=svm.SVC(kernel='linear',C=0.1,gamma=0.1)model.fit(train_X,train_Y)prediction2=model.predict(test_X)print('Accuracy for linear SVM is',metrics.accuracy_score(prediction2,test_Y))
Accuracy for linear SVM is 0.817164179104
3)
model = LogisticRegression()model.fit(train_X,train_Y)prediction3=model.predict(test_X)print('The accuracy of the Logistic Regression is',metrics.accuracy_score(prediction3,test_Y))
The accuracy of the Logistic Regression is 0.817164179104
4)Tree
model=DecisionTreeClassifier()model.fit(train_X,train_Y)prediction4=model.predict(test_X)print('The accuracy of the Decision Tree is',metrics.accuracy_score(prediction4,test_Y))
The accuracy of the Decision Tree is 0.805970149254
5) K- (KNN)
model=KNeighborsClassifier() model.fit(train_X,train_Y)prediction5=model.predict(test_X)print('The accuracy of the KNN is',metrics.accuracy_score(prediction5,test_Y))
The accuracy of the KNN is 0.832089552239
现在的精度为KNN模型的变化,我们改变值属性 。默认值是5 。让我们检查的精度在不同时的结果 。
a_index=list(range(1,11))a=pd.Series()x=[0,1,2,3,4,5,6,7,8,9,10]for i in list(range(1,11)):model=KNeighborsClassifier(n_neighbors=i) model.fit(train_X,train_Y)prediction=model.predict(test_X)a=a.append(pd.Series(metrics.accuracy_score(prediction,test_Y)))plt.plot(a_index, a)plt.xticks(x)fig=plt.gcf()fig.set_size_inches(12,6)plt.show()print('Accuracies for different values of n are:',a.values,'with the max value as ',a.values.max())
Accuracies for different values of n are: [ 0.757462690.791044780.809701490.802238810.832089550.817164180.828358210.832089550.83582090.83208955] with the max value as0.835820895522
6)
model=GaussianNB()model.fit(train_X,train_Y)prediction6=model.predict(test_X)print('The accuracy of the NaiveBayes is',metrics.accuracy_score(prediction6,test_Y))
The accuracy of the NaiveBayes is 0.813432835821
7)
model=RandomForestClassifier(n_estimators=100)model.fit(train_X,train_Y)prediction7=model.predict(test_X)print('The accuracy of the Random Forests is',metrics.accuracy_score(prediction7,test_Y))