import numpy as np import pandas as pd import matplotlib.pyplot as plt plt.style.use("ggplot") %matplotlib inline import seaborn as sns from sklearn import datasets from sklearn.datasets import make_blobs from sklearn.model_selection import train_test_split from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.linear_model import LogisticRegression from sklearn.model_selection import cross_val_score
创建数据:
1 2 3 4 5 6 7 8 9 10 11
data, target = make_blobs(n_samples=10000, centers=2, random_state=1, cluster_std=1.0 ) ## 创建训练集和测试集 X_train1,X_test,y_train1,y_test = train_test_split(data, target, test_size=0.2, random_state=1) ## 创建训练集和验证集 X_train,X_val,y_train,y_val = train_test_split(X_train1, y_train1, test_size=0.3, random_state=1) print("The shape of training X:",X_train.shape) print("The shape of training y:",y_train.shape) print("The shape of validation X:",X_val.shape) print("The shape of validation y:",y_val.shape) print("The shape of test X:",X_test.shape) print("The shape of test y:",y_test.shape)
输出为:
1 2 3 4 5 6
The shape of training X: (5600, 2) The shape of training y: (5600,) The shape of validation X: (2400, 2) The shape of validation y: (2400,) The shape of test X: (2000, 2) The shape of test y: (2000,)
## 堆叠3折交叉验证(cv)分类 from sklearn import datasets from sklearn.model_selection import cross_val_score from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.naive_bayes import GaussianNB from sklearn.ensemble import RandomForestClassifier from mlxtend.classifier import StackingCVClassifier
iris = datasets.load_iris() X, y = iris.data[:, 1:3], iris.target
from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.naive_bayes import GaussianNB from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import GridSearchCV from mlxtend.classifier import StackingCVClassifier
## 第一层不同的分类器可以适合训练数据集中的不同特征子集。 from sklearn.datasets import load_iris from mlxtend.classifier import StackingCVClassifier from mlxtend.feature_selection import ColumnSelector from sklearn.pipeline import make_pipeline from sklearn.linear_model import LogisticRegression
from sklearn import model_selection from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier from mlxtend.classifier import StackingCVClassifier from sklearn.metrics import roc_curve, auc from sklearn.model_selection import train_test_split from sklearn import datasets from sklearn.preprocessing import label_binarize from sklearn.multiclass import OneVsRestClassifier
iris = datasets.load_iris() X, y = iris.data[:, [0, 1]], iris.target
# Binarize the output y = label_binarize(y, classes=[0, 1, 2]) n_classes = y.shape[1] RANDOM_SEED = 42 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=RANDOM_SEED)
# Learn to predict each class against the other classifier = OneVsRestClassifier(sclf) y_score = classifier.fit(X_train, y_train).decision_function(X_test)