本文共 4737 字,大约阅读时间需要 15 分钟。
Confusion Matrix
import pandas as pdimport numpy as npdf = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None)from sklearn.preprocessing import LabelEncoderfrom sklearn.model_selection import train_test_splitX = df.loc[:, 2:].valuesy = df.loc[:, 1].valuesle = LabelEncoder()y = le.fit_transform(y)X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, stratify=y, random_state=1)from sklearn.model_selection import GridSearchCVfrom sklearn.svm import SVCfrom sklearn.pipeline import make_pipelinefrom sklearn.preprocessing import StandardScalerfrom sklearn.model_selection import cross_val_scorepipe_svc = make_pipeline(StandardScaler(), SVC(random_state=1))from sklearn.metrics import confusion_matrixpipe_svc.fit(X_train, y_train)y_pred = pipe_svc.predict(X_test)confmat = confusion_matrix(y_true=y_test, y_pred=y_pred)print(confmat)如果想要改变位置
confmat = confusion_matrix(y_true=y_test, y_pred=y_pred, labels=[1, 0])
import pandas as pdimport numpy as npdf = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None)from sklearn.preprocessing import LabelEncoderfrom sklearn.model_selection import train_test_splitX = df.loc[:, 2:].valuesy = df.loc[:, 1].valuesle = LabelEncoder()y = le.fit_transform(y)X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, stratify=y, random_state=1)from sklearn.model_selection import GridSearchCVfrom sklearn.svm import SVCfrom sklearn.pipeline import make_pipelinefrom sklearn.preprocessing import StandardScalerfrom sklearn.model_selection import cross_val_scorepipe_svc = make_pipeline(StandardScaler(), SVC(random_state=1))from sklearn.metrics import confusion_matrixpipe_svc.fit(X_train, y_train)y_pred = pipe_svc.predict(X_test)confmat = confusion_matrix(y_true=y_test, y_pred=y_pred)print(confmat)import matplotlib.pyplot as pltfig, ax = plt.subplots(figsize=(2.5, 2.5))ax.matshow(confmat, cmap=plt.cm.Blues, alpha=0.3)for i in range(confmat.shape[0]): for j in range(confmat.shape[1]): ax.text(x=j, y=i, s=confmat[i, j], va='center', ha='center')plt.xlabel('Predicted label')plt.ylabel('True label')plt.tight_layout()plt.show()
Precision/Recall/F1
import pandas as pdimport numpy as npdf = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None)from sklearn.preprocessing import LabelEncoderfrom sklearn.model_selection import train_test_splitX = df.loc[:, 2:].valuesy = df.loc[:, 1].valuesle = LabelEncoder()y = le.fit_transform(y)X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, stratify=y, random_state=1)from sklearn.model_selection import GridSearchCVfrom sklearn.svm import SVCfrom sklearn.pipeline import make_pipelinefrom sklearn.preprocessing import StandardScalerfrom sklearn.model_selection import cross_val_scorepipe_svc = make_pipeline(StandardScaler(), SVC(random_state=1))from sklearn.metrics import confusion_matrixpipe_svc.fit(X_train, y_train)y_pred = pipe_svc.predict(X_test)from sklearn.metrics import precision_score, recall_score, f1_scoreprint('Precision: %.3f' % precision_score(y_true=y_test, y_pred=y_pred))print('Recall: %.3f' % recall_score(y_true=y_test, y_pred=y_pred))print('F1: %.3f' % f1_score(y_true=y_test, y_pred=y_pred))
Scoring Metric(Precision/Recall/F1) in GridSearchCV
import pandas as pdimport numpy as npdf = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None)from sklearn.preprocessing import LabelEncoderfrom sklearn.model_selection import train_test_splitX = df.loc[:, 2:].valuesy = df.loc[:, 1].valuesle = LabelEncoder()y = le.fit_transform(y)X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, stratify=y, random_state=1)from sklearn.model_selection import GridSearchCVfrom sklearn.svm import SVCfrom sklearn.pipeline import make_pipelinefrom sklearn.preprocessing import StandardScalerfrom sklearn.model_selection import cross_val_scorepipe_svc = make_pipeline(StandardScaler(), SVC(random_state=1))from sklearn.metrics import confusion_matrixpipe_svc.fit(X_train, y_train)from sklearn.metrics import make_scorerfrom sklearn.metrics import precision_score, recall_score, f1_scorescorer = make_scorer(f1_score, pos_label=0)c_gamma_range = [0.01, 0.1, 1.0, 10.0]param_grid = [{'svc__C': c_gamma_range, 'svc__kernel': ['linear']}, {'svc__C': c_gamma_range, 'svc__gamma': c_gamma_range, 'svc__kernel': ['rbf']}]gs = GridSearchCV(estimator=pipe_svc, param_grid=param_grid, scoring=scorer, cv=10, n_jobs=-1)gs = gs.fit(X_train, y_train)print(gs.best_score_)print(gs.best_params_)
转载地址:http://emygf.baihongyu.com/