决策树的简单实现

Coursera课程machine learning data analysis week1Decision Trees

Python代码:[最后图片部分有些许改动]

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn.tree import DecisionTreeClassifier import sklearn.metrics


"""
Data Engineering and Analysis
"""
#Load the dataset

AH_data = pd.read_csv("tree_addhealth.csv")

data_clean = AH_data.dropna()


# 一些常用的命令,整体上描述数据
print('---------desc-------------')
# print(data_clean.info())
# print(data_clean.dtypes)
print(data_clean.describe())

"""
Modeling and Prediction
"""
#Split into training and testing sets
# 除去TREG1后的24个字段
predictors = data_clean[['BIO_SEX','HISPANIC','WHITE','BLACK','NAMERICAN','ASIAN',
'age','ALCEVR1','ALCPROBS1','marever1','cocever1','inhever1','cigavail','DEP1',
'ESTEEM1','VIOL1','PASSIST','DEVIANT1','SCHCONN1','GPA1','EXPEL1','FAMCONCT','PARACTV',
'PARPRES']]

targets = data_clean.TREG1
# 拆分为测试集和训练集
pred_train, pred_test, tar_train, tar_test = train_test_split(predictors, targets, test_size=.4) # 测试集占数据集的40%

# 查看拆分结果

print('---------shapes------------')
print(pred_train.shape)
print(pred_test.shape)
print(tar_train.shape)
print(tar_test.shape)

#Build model on training data
classifier=DecisionTreeClassifier()
classifier=classifier.fit(pred_train,tar_train)

predictions=classifier.predict(pred_test)

sklearn.metrics.confusion_matrix(tar_test,predictions)
sklearn.metrics.accuracy_score(tar_test, predictions)

#Displaying the decision tree
import pydotplus
from sklearn import tree
from io import StringIO

out = StringIO()
tree.export_graphviz(classifier, out_file=out)
graph=pydotplus.graph_from_dot_data(out.getvalue())
data = graph.create_png() # 图片的二进制数据
with open('tree.png', 'wb') as f:
f.write(data)

遇到的问题:

InvocationException: GraphViz’s executables not found

解决:

pip install graphviz

这样虽说不报错了,但是图片还是无法显示,于是就做了变动,改为保存图片。
tree.png:

击蒙御寇