1 import os
2 import numpy
3
4 from PyML.classifiers import svm,multi,ridgeRegression,knn,composite,modelSelection
5 from PyML.feature_selection import featsel
6 from PyML.containers import ker,labels
7 from PyML.containers import vectorDatasets
8 from PyML.containers.aggregate import Aggregate
9 from PyML.containers.kernelData import KernelData
10 from PyML.containers.sequenceData import SequenceData
11 from PyML.evaluators import assess
12
13 heartdatafile = '../../data/heart.data'
14 irisdatafile = '../../data/iris.data'
15 yeastdatafile = '../../data/yeast.data'
16 container = 'SparseDataSet'
17
18
19
20 -def test(component='svm') :
21 try :
22 DataSet = getattr(vectorDatasets, container)
23 except :
24 raise ValueError, 'wrong container ' + container
25
26
27 results = {}
28
29 comp = 'general'
30 if component == 'all' or component == comp :
31 s = svm.SVM()
32 results = {}
33 d = DataSet (heartdatafile, labelsColumn = 0)
34 s.train(d)
35 s.test(d)
36 s = svm.SVM()
37 s.stratifiedCV(d)
38 print 'starting aggregate****************'
39 d2 = Aggregate([d,d])
40 print 'end aggregate'
41 r = s.stratifiedCV(d2)
42
43 d.attachKernel('polynomial')
44 s.cv(d)
45 d.attachKernel('linear')
46 s = svm.SVM()
47 s.train(d)
48 s.train(d, saveSpace = False)
49 s.save("tmp")
50 loaded = svm.loadSVM("tmp", datasetClass=DataSet)
51 r = loaded.test(d)
52 d.attachKernel('gaussian', gamma = 0.01)
53
54 s.train(d, saveSpace = False)
55 s.save("tmp")
56 loaded = svm.loadSVM("tmp", datasetClass=DataSet, labelsColumn = 1)
57 r = loaded.test(d)
58 os.remove('tmp')
59
60 d = DataSet(numpy.random.randn(100,10))
61 d = DataSet([[1,2], [2,3]])
62 d = SequenceData(['asa', 'ben', 'hur'])
63
64 comp = 'svm'
65 if component == 'all' or component == comp :
66 d = DataSet (heartdatafile, labelsColumn = 0)
67 results[comp] = []
68 d.attachKernel('polynomial')
69 s=svm.SVM()
70 results[comp].append(
71 s.cv(d, saveSpace = True))
72 d.attachKernel('linear')
73 results[comp].append(
74 s.cv(d))
75
76 comp = 'kernelData'
77 if component == 'all' or component == comp :
78 d = DataSet (heartdatafile, labelsColumn = 0)
79 results[comp] = []
80 kdata = KernelData('heart.kernel', gistFormat = True)
81 kdata.attachLabels(d.labels)
82 s=svm.SVM()
83 results[comp].append(
84 s.cv(kdata))
85 kdata.attachKernel('gaussian', gamma = 0.1)
86 results[comp].append(
87 s.cv(kdata))
88
89 comp = 'normalization'
90 if component == 'all' or component == comp :
91 results[comp] = []
92 data = DataSet (heartdatafile, labelsColumn = 0)
93 data.attachKernel('polynomial', degree = 4, normalization = 'dices')
94 s=svm.SVM()
95 results[comp].append(
96 s.cv(data))
97
98 comp = 'svr'
99 if component == 'all' or component == comp :
100 d = DataSet (heartdatafile, labelsColumn = 0, numericLabels = True)
101 results[comp] = []
102 s = svm.SVR()
103
104
105
106
107 results[comp].append( s.cv(d) )
108
109 comp = 'save'
110 if component == 'all' or component == comp :
111 results[comp] = []
112 s = svm.SVM()
113 data = DataSet (heartdatafile, labelsColumn = 0)
114 import tempfile
115 tmpfile = tempfile.mktemp()
116 r = s.cv(data)
117 r.save(tmpfile)
118 r = assess.loadResults(tmpfile)
119 results['save'].append(r)
120
121 r = s.nCV(data)
122 r.save(tmpfile)
123 results['save'].append(assess.loadResults(tmpfile))
124
125 r = {}
126 for i in range(10) :
127 r[i] = s.cv(data)
128
129 assess.saveResultObjects(r, tmpfile)
130 r = assess.loadResults(tmpfile)
131
132 comp = 'classifiers'
133 if component == 'all' or component == comp :
134 d = DataSet (heartdatafile, labelsColumn = 0)
135 results[comp] = []
136 cl = knn.KNN()
137 results[comp].append(
138 cl.stratifiedCV(d))
139 print 'testing ridge regression'
140 ridge = ridgeRegression.RidgeRegression()
141 results[comp].append(
142 ridge.cv(d))
143
144 comp = 'platt'
145 if component == 'all' or component == 'platt' :
146 results[comp] = []
147 d = DataSet (heartdatafile, labelsColumn = 0)
148 p = composite.Platt2(s)
149 results[comp].append(p.stratifiedCV(d))
150
151 comp = 'multi'
152 if component == 'all' or component == comp :
153 results[comp] = []
154 d = DataSet(irisdatafile, labelsColumn = -1)
155
156 mc = multi.OneAgainstOne (svm.SVM())
157 results[comp].append(
158 mc.cv(d))
159
160 d = DataSet(irisdatafile, labelsColumn = -1)
161
162 mc = multi.OneAgainstRest (svm.SVM())
163 results[comp].append(
164 mc.cv(d))
165
166 mc = multi.OneAgainstRest (svm.SVM())
167 d.attachKernel('poly')
168 results[comp].append(
169 mc.cv(d))
170 d.attachKernel('linear')
171 mc = multi.OneAgainstRest (svm.SVM())
172
173
174
175
176
177 comp = 'featsel'
178 if component == 'all' or component == comp :
179 results[comp] = []
180
181 s = svm.SVM()
182 d = DataSet (yeastdatafile, labelsColumn = 0)
183 d2 = labels.oneAgainstRest(d, '2')
184 results[comp].append(
185 s.stratifiedCV(d2))
186
187
188 m = composite.FeatureSelect (s, featsel.RFE())
189 results[comp].append(
190 m.stratifiedCV(d2, 3))
191
192
193
194
195
196 fs = featsel.FeatureScore ('golub')
197 f = featsel.Filter (fs, sigma = 2)
198 m = composite.FeatureSelect (s, f)
199 results[comp].append(
200 m.stratifiedCV(d2, 3))
201
202
203 c = composite.Chain ([f,s])
204
205
206 comp = 'modelSelection'
207 if component == 'all' or component == comp :
208 results[comp] = []
209 s = svm.SVM()
210 d = DataSet (heartdatafile, labelsColumn = 0)
211 p = modelSelection.ParamGrid(svm.SVM(ker.Polynomial()), 'C', [0.1, 1, 10, 100],
212 'kernel.degree', [2, 3, 4])
213 p = modelSelection.ParamGrid(svm.SVM(ker.Gaussian()), 'C', [0.1, 1, 10, 100],
214 'kernel.gamma', [0.01, 0.1, 1])
215
216
217 m = modelSelection.ModelSelector(p, measure = 'roc', foldsToPerform = 2)
218 m = modelSelection.ModelSelector(p)
219
220 results[comp].append(
221 m.cv(d))
222
223 return results
224
225 if __name__ == '__main__' :
226
227 if len(sys.argv) > 1 :
228 test(sys.argv[1])
229 else :
230 test()
231