# 使用sklearn编写SVM程序

# 1. 环境

python 3.7.2
sklearn（python机器学习库）
```
pip install scikit-learn
```
1
numpy（矩阵处理库）
matplotlib（绘图库）

# 2. 程序1

程序1说明
1. 程序主要功能为二维数据分成两类
2. 数据类型（二维）：x坐标，y坐标，标签
3. 输出：分类准确率，支持向量个数

程序1代码

# 用sklearn svm 训练rbf
import os
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
import csv
from sklearn.model_selection import train_test_split, GridSearchCV
path1 = os.path.abspath('.')


# 加载数据
def load(name):
    file = np.loadtxt(open(name, 'rb'), delimiter=',')
    x = file[:, [0, 1]]
    y = file[:, 2]
    return x, y


if __name__ == "__main__":
    # 读取数据,针对二维线性不可分数据
    data, label = load('./data/train.csv')
    # 交叉验证4:1
    x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=.2, random_state=0)
    # 设置参数
    c = 5000  # 惩罚参数
    g = 0.5  # 核函数参数
    # 初始化模型参数
    clf = SVC(cache_size=200, class_weight=None, coef0=0.0, C=c, decision_function_shape='ovr',
            degree=3, gamma=g, kernel='rbf', max_iter=-1, probability=False,
            random_state=None, shrinking=True, tol=0.001, verbose=False)
    clf.fit(x_train, y_train)
    # 预测data
    predict_list = clf.predict(x_test)
    # 预测精度
    precision = clf.score(x_test, y_test)
    print('第', i+1, '次 precision is : ', precision*100, "%")
    # 获取模型返回值
    n_Support_vector = clf.n_support_  # 支持向量个数
    print("支持向量个数为： ", n_Support_vector)
    Support_vector_index = clf.support_  # 支持向量索引

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41

# 3. 程序2

程序2在程序1的基础上添加了画图功能

程序2代码

# 用sklearn svm 训练，画图
import os
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
import csv
from sklearn.model_selection import train_test_split
path1 = os.path.abspath('.')


# 加载数据
def load(name):
    file = np.loadtxt(open(name, 'rb'), delimiter=',')
    x = file[:, [0, 1]]
    y = file[:, 2]
    return x, y


# 用返回的参数绘制超平面
def plot_point(data, label, Support_vector_index, clf, title):
    for i in range(np.shape(data)[0]):
        if label[i] == 1:
            plt.scatter(data[i][0], data[i][1], c='b', s=20)
        else:
            plt.scatter(data[i][0], data[i][1], c='y', s=20)
    for j in Support_vector_index:
        plt.scatter(data[j][0], data[j][1], s=100, c='', alpha=0.5, linewidth=1, edgecolor='g')
    # 画超平面
    x_min, x_max = data[:, 0].min() - 0.01, data[:, 0].max() + 0.01
    y_min, y_max = data[:, 1].min() - 0.01, data[:, 1].max() + 0.01
    h = 0.001
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.xticks(())
    plt.yticks(())
    z = clf.predict(np.c_[xx.ravel(), yy.ravel()])  # SVM的分割超平面
    # Put the result into a color plot
    z = z.reshape(xx.shape)
    plt.contourf(xx, yy, z, cmap='hot', alpha=0.3)
    plt.title(title)
    plt.show()


if __name__ == "__main__":
    # 读取数据,针对二维线性不可分数据
    data, label = load('./data/train.csv')
    # 交叉验证
    x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=.2, random_state=0)
    # 参数设置
    c = 1000
    g = 0.1
    # 初始化模型参数
    clf = SVC(cache_size=200, class_weight=None, coef0=0.0, C=c, decision_function_shape='ovr',
            degree=3, gamma=g, kernel='rbf', max_iter=-1, probability=False,
            random_state=None, shrinking=True, tol=0.001, verbose=False)
    clf.fit(x_train, y_train)
    # 预测data
    predict_list = clf.predict(x_test)
    # 预测精度
    precision = clf.score(x_test, y_test)
    print('第', i+1, '次 precision is : ', precision*100, "%")
    # 获取模型返回值
    n_Support_vector = clf.n_support_  # 支持向量个数
    print("支持向量个数为： ", n_Support_vector)
    Support_vector_index = clf.support_  # 支持向量索引
    title = 'C=' + str(c) + ', gamma=' + str(g)
    # 画图
    plot_point(data, label, Support_vector_index, clf, title)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70

← LSTM 学习笔记强化学习 →