导入需要用到的模块import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
读入数据df = pd.read_csv(r"iris\YT-Django-Iris-App-3xj9B0qqps-master\iris.csv")
将数据拆分成训练集和测试集x = ['sepal_length','sepal_width','petal_length','petal_width']
X = df[x]
y = df['classification']
X_train, X_test, Y_train, Y_test = train_test_split(X,y,test_size=0.2,random_state=1)
训练数据集合测试数据集的比例是8:2
训练模型并预测model = SVC(gamma='auto')
model.fit(X_train,Y_train)
predictions = model.predict(X_test)
输入数据预测
iris = [1,1,1,1]
results = model.predict([iris])
print(results)
结果results是一个列表
输出模型准确性print(accuracy_score(Y_test,predictions))
运行代码得到结果为 0.966666666667
保存模型pd.to_pickle(model,r"new_model.pickle")
如果需要用这个模型可以直接读入
model = pd.read_pickle(r"new_model.pickle")
完整代码
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
df = pd.read_csv(r"iris\YT-Django-Iris-App-3xj9B0qqps-master\iris.csv")
print(df.head())
x = ['sepal_length','sepal_width','petal_length','petal_width']
X = df[x]
y = df['classification']
X_train, X_test, Y_train, Y_test = train_test_split(X,y,test_size=0.2,random_state=1)
model = SVC(gamma='auto')
model.fit(X_train,Y_train)
predictions = model.predict(X_test)
print(accuracy_score(Y_test,predictions))
pd.to_pickle(model,r"new_model.pickle")
model = pd.read_pickle(r"new_model.pickle")
iris = [1,1,1,1]
results = model.predict([iris])
print(results)