Package PyML :: Package classifiers :: Module platt
[frames] | no frames]

Source Code for Module PyML.classifiers.platt

  1  import numpy 
  2  from PyML.classifiers.composite import CompositeClassifier 
  3  from PyML.classifiers.baseClassifiers import Classifier 
  4  from PyML.evaluators import assess 
  5  from PyML.datagen import sample 
  6   
7 -class Platt (CompositeClassifier) :
8 """ 9 Converts a real valued classifier into a conditional probability estimator. 10 This is achieved by fitting a sigmoid with parameters A and B to the 11 values of the decision function: 12 f(x) --> 1/(1+exp(A*f(x)+B) 13 14 code is a based on Platt's pseudocode from: 15 16 John C. Platt. Probabilistic Outputs for Support Vector 17 Machines and Comparisons to Regularized Likelihood Methods. in: 18 Advances in Large Margin Classifiers 19 A. J. Smola, B. Schoelkopf, D. Schuurmans, eds. MIT Press (1999). 20 21 :Keywords: 22 - `mode` - values: 'holdOut' (default), 'cv'. 23 The Platt object fits a sigmoid to the values of the classifier decision 24 function. The values of the decision function are computed in one of two 25 ways: on a hold-out set (the 'holdOut' mode), or by cross-validation 26 (the 'cv' mode). 27 - `fittingFraction` - which fraction of the training data to use for fitting 28 the sigmoid (the rest is used for the classifier training). default: 0.2 29 - `numFolds` - the number of cross-validation folds to use when in 'cv' mode. 30 31 """ 32 33 attributes = {'mode' : 'holdOut', 34 'numFolds' : 3, 35 'fittingFraction' : 0.2} 36
37 - def train(self, data, **args) :
38 39 Classifier.train(self, data, **args) 40 if self.labels.numClasses != 2 : 41 raise ValueError, 'number of classes is not 2' 42 43 if self.mode == 'cv' : 44 self.classifier.train(data, **args) 45 46 numTries = 0 47 maxNumTries = 5 48 success = False 49 while not success and numTries < maxNumTries : 50 numTries += 1 51 if self.mode == 'cv' : 52 fittingData = data 53 r = self.classifier.stratifiedCV(data, self.numFolds) 54 elif self.mode == 'holdOut' : 55 fittingData, trainingData = sample.splitDataset(data, self.fittingFraction) 56 self.classifier.train(trainingData, **args) 57 r = self.classifier.test(fittingData) 58 else : 59 raise ValueError, 'unknown mode for Platt' 60 self.labels = self.classifier.labels 61 62 prior1 = fittingData.labels.classSize[1] 63 prior0 = fittingData.labels.classSize[0] 64 out = numpy.array(r.Y, numpy.float_) 65 try : 66 self.fit_A_B(prior1, prior0, out, r.decisionFunc, r.givenY) 67 success = True 68 except : 69 pass 70 71 if not success : 72 print 'platt not successful' 73 self.A = None 74 self.B = None 75 results = self.classifier.test(data) 76 maxPos = 1e-3 77 minNeg = -1e-3 78 for f in results.decisionFunc : 79 if f > 0 : 80 if f > maxPos : 81 maxPos = f 82 elif f < 0 : 83 if f < minNeg : 84 minNeg = f 85 self.maxPos = maxPos 86 self.minNeg = abs(minNeg) 87 88 self.log.trainingTime = self.getTrainingTime()
89
90 - def fit_A_B(self, prior1, prior0, out, deci, Y) :
91 92 A = 0.0 93 B = math.log((prior0 + 1.0) / (prior1 + 1.0)) 94 hiTarget = (prior1 + 1.0) / (prior1 + 2.0) 95 loTarget = 1.0 / (prior0 + 2.0) 96 l = 1e-3 97 olderr = 1e15 98 99 pp = numpy.ones(len(data), numpy.float_) * \ 100 (prior1 + 1.0) / (len(data) + 2.0) 101 102 count = 0 103 t = numpy.zeros(len(data), numpy.float_) 104 for i in range(len(data)) : 105 if Y[i] == 1 : 106 t[i] = hiTarget 107 else : 108 t[i] = loTarget 109 110 for it in range(1,101) : 111 d1 = pp - t 112 d2 = pp * (1 - pp) 113 a = numpy.sum(out * out * d2) 114 b = numpy.sum(d2) 115 c = numpy.sum(out * d1) 116 d = numpy.sum(out * d1) 117 e = numpy.sum(d1) 118 if abs(d) < 1e-9 and abs(e) < 1e-9 : 119 break 120 oldA = A 121 oldB = B 122 err = 0.0 123 while 1 : 124 det = (a + l) * (b + l) - c * c 125 if det == 0 : 126 l *= 10 127 continue 128 A = oldA + ((b + l) * d - c * e) / det 129 B = oldB + ((a + l) * e - c * d) / det 130 131 pp = 1.0 / (1 + numpy.exp(out * A + B)) 132 pp2 = 1.0 / (1 + numpy.exp(-out * A - B)) 133 err = - numpy.sum(t * numpy.log(pp) + 134 (1-t) * numpy.log(pp2)) 135 if err < olderr * (1 + 1e-7) : 136 l *= 0.1 137 break 138 139 l *= 10 140 if l > 1e6 : 141 raise ValueError, 'lambda too big' 142 diff = err - olderr 143 scale = 0.5 * (err + olderr + 1.0) 144 if diff > -1e-3*scale and diff < 1e-7 * scale : 145 count += 1 146 else : 147 count = 0 148 olderr = err 149 if count == 3 : 150 break 151 152 self.A = A 153 self.B = B 154 self.log.trainingTime = self.getTrainingTime()
155
156 - def decisionFunc(self, data, i) :
157 158 f = self.classifier.decisionFunc(data, i) 159 if self.A is not None : 160 return 1.0 / (1 + math.exp(self.A * f + self.B)) 161 else : 162 if f > 0 : 163 return f / self.maxPos 164 else : 165 return f / self.minNeg
166
167 - def classify(self, data, i) :
168 169 prob = self.decisionFunc(data ,i) 170 if prob > 0.5 : 171 return (1,prob) 172 else: 173 return (0,prob)
174 175 test = assess.test 176
177 - def save(self, fileName) :
178 179 if type(fileName) == type('') : 180 outfile = open(fileName, 'w') 181 else : 182 outfile = fileName 183 184 outfile.write('#A=' + str(self.A) + '\n') 185 outfile.write('#B=' + str(self.B) + '\n') 186 187 self.classifier.save(outfile)
188
189 - def load(self, fileName) :
190 191 A = None 192 B = None 193 infile = open(fileName) 194 for line in infile : 195 if line.find('A=') > 0 : 196 self.A = float(line[3:]) 197 if line.find('B=') > 0 : 198 self.B = float(line[3:]) 199 break 200 infile.close() 201 self.classifier = svm.loadSVM(fileName) 202 self.labels = self.classifier.labels
203
204 -class Platt2 (Platt) :
205 ''' 206 Converts a real valued classifier into a conditional probability estimator. 207 This is achieved by fitting a sigmoid with parameters A and B to the 208 values of the decision function: 209 f(x) --> 1/(1+exp(A*f(x)+B) 210 211 The fitting procedure is a Levenberg-Marquardt 212 optimization derived by Tobias Mann using 213 Mathematica, to optimize the objective function 214 in: 215 216 John C. Platt. Probabilistic Outputs for Support Vector 217 Machines and Comparisons to Regularized Likelihood Methods. in: 218 Advances in Large Margin Classifiers 219 A. J. Smola, B. Schoelkopf, D. Schuurmans, eds. MIT Press (1999). 220 ''' 221
222 - def fit_A_B(self, prior1, prior0, out, deci, Y) :
223 224 hiTarget = (prior1 + 1.0) / (prior1 + 2.0) 225 loTarget = 1.0 / (prior0 + 2.0) 226 t = numpy.zeros(len(Y), numpy.float_) 227 for i in range(len(Y)) : 228 if Y[i] == 1 : 229 t[i] = hiTarget 230 else : 231 t[i] = loTarget 232 233 maxiter = 100 234 minstep = 1e-10 235 sigma = 1e-3 236 A = 0.0 237 B = math.log((prior0 + 1.0) / (prior1 + 1.0)) 238 A_init = A 239 B_init = B 240 ll = self.log_likelihood(t,deci,A,B) 241 lm_lambda = 1e-4 242 for it in range(maxiter) : 243 H = self.hessian(t,deci,A,B) 244 grad = self.gradient(t,deci,A,B) 245 H_for_inversion = H 246 H_for_inversion[0][0] = H_for_inversion[0][0]+lm_lambda 247 H_for_inversion[1][1] = H_for_inversion[1][1]+lm_lambda 248 cond = self.condition_number(H_for_inversion) 249 if cond is None or cond > 1e5: 250 A = A_init 251 B = B_init 252 break 253 254 inverse_H = self.two_by_two_inverse( H_for_inversion ) 255 update_vec = self.get_proposed_update_vec(inverse_H, grad) 256 proposed_ll = self.log_likelihood(t,deci,A-update_vec[0], 257 B-update_vec[1]) 258 if proposed_ll < ll: 259 A = A-update_vec[0] 260 B = B-update_vec[1] 261 lm_lambda = lm_lambda/10 262 delta = ll-proposed_ll 263 ll = proposed_ll 264 if delta < 1e-4: 265 break 266 else: 267 lm_lambda = lm_lambda * 10 268 269 self.A = A 270 self.B = B
271
272 - def get_proposed_update_vec(self,m, v):
273 update_vec = [0,0] 274 update_vec[0] = m[0][0]*v[0]+m[0][1]*v[1] 275 update_vec[1] = m[1][0]*v[0]+m[1][1]*v[1] 276 return update_vec
277
278 - def condition_number(self,M):
279 # assumes 2x2 matrices! 280 M_inverse = self.two_by_two_inverse(M) 281 282 if M_inverse is None: 283 condition_number = None 284 else: 285 M_norm = math.sqrt(M[0][0]**2+ 286 M[0][1]**2+ 287 M[1][0]**2+ 288 M[1][1]**2) 289 M_inverse_norm = math.sqrt(M_inverse[0][0]**2+ 290 M_inverse[0][1]**2+ 291 M_inverse[1][0]**2+ 292 M_inverse[1][1]**2) 293 condition_number = M_norm*M_inverse_norm 294 295 return condition_number
296 297 298
299 - def log_likelihood(self,t,f,A,B):
300 # computes Platt's log likelihood 301 # function. t is the target vector, 302 # f is the decision function vector, A and B 303 # are the sigmoid parameters 304 305 ll = 0 306 small = 1e-15 307 308 for i in range(len(t)): 309 exp_term = math.exp(A*f[i]+B) 310 p_i = 1/(1+exp_term) 311 312 # don't take the log of zero! 313 if p_i < small: 314 p_i = small 315 316 # also trouble if 1-p_i = 0... 317 if abs(p_i-1) < small: 318 p_i = 1-small 319 320 ll = ll + t[i]*math.log(p_i) + \ 321 (1-t[i])*math.log(1-p_i) 322 323 return -ll
324
325 - def two_by_two_inverse(self,M):
326 # for the 2x2 matrix M, 327 # with elements: 328 # [a,b 329 # c,d], the inverse 330 # is 331 # 1/(ad-bc) * [d -b; -c a] 332 333 a = M[0][0] 334 b = M[0][1] 335 c = M[1][0] 336 d = M[1][1] 337 det = a*d-b*c 338 I = [[0,0],[0,0]] 339 if det == 0: 340 I = None 341 else: 342 I[0][0] = d/det 343 I[0][1] = -b/det 344 I[1][0] = -c/det 345 I[1][1] = a/det 346 return I
347
348 - def gradient(self,t,f,A,B):
349 gradient = [0,0] 350 gradient[0] = self.dF_dA(t,f,A,B) 351 gradient[1] = self.dF_dB(t,f,A,B) 352 353 return gradient
354
355 - def hessian(self,t,f,A,B):
356 d2f_dA2 = self.dF_dAA(t,f,A,B) 357 d2f_dB2 = self.dF_dBB(t,f,A,B) 358 d2f_dAB = self.dF_dAB(t,f,A,B) 359 360 hessian = [[0,0],[0,0]] 361 hessian[0][0] = d2f_dA2 362 hessian[0][1] = d2f_dAB 363 hessian[1][0] = d2f_dAB 364 hessian[1][1] = d2f_dB2 365 366 return hessian
367
368 - def dF_dA(self,t,f,A,B):
369 # computes the partial derivative 370 # of F (the log likelihood) w.r.t. 371 # A 372 small = 1e-15 373 partial = 0 374 for i in range(len(t)): 375 invprob = 1+math.exp(B+A*f[i]) 376 prob = 1/invprob 377 if abs(prob-1) < small: 378 prob = 1-small 379 380 partial = partial + \ 381 (math.exp(B+A*f[i])*f[i]*(1-t[i]))/ \ 382 (invprob**2 * (1-prob)) - \ 383 math.exp(B+A*f[i])*prob*f[i]*t[i] 384 385 return -partial
386
387 - def dF_dB(self,t,f,A,B):
388 # computes the partial derivative 389 # of F (the log likelihood) w.r.t. 390 # B 391 small = 1e-15 392 partial = 0 393 for i in range(len(t)): 394 invprob = 1+math.exp(B+A*f[i]) 395 prob = 1/invprob 396 if abs(prob-1) < small: 397 prob = 1-small 398 399 partial = partial + \ 400 (math.exp(B+A*f[i])*(1-t[i]))/ \ 401 (invprob**2 * (1-prob)) - \ 402 math.exp(B+A*f[i])*prob*t[i] 403 404 return -partial
405
406 - def dF_dAA(self,t,f,A,B):
407 # computes the second partial 408 # derivative of F w.r.t. A 409 small = 1e-15 410 partial = 0 411 for i in range(len(t)): 412 invprob = 1+math.exp(B+A*f[i]) 413 prob = 1/invprob 414 if abs(prob-1) < small: 415 prob = 1-small 416 partial = partial + \ 417 -((math.exp(2*B + 2*A*f[i])*f[i]**2*(1 - t[i]))/(invprob**4*(1 - prob)**2)) - \ 418 (2*math.exp(2*B + 2*A*f[i])*f[i]**2*(1 - t[i]))/(invprob**3*(1 - prob)) + \ 419 (math.exp(B + A*f[i])*f[i]**2*(1 - t[i]))/(invprob**2*(1 - prob)) + \ 420 (math.exp(2*B + 2*A*f[i])*f[i]**2*t[i])/invprob**2 - \ 421 math.exp(B + A*f[i])*prob*f[i]**2*t[i] 422 423 return -partial
424
425 - def dF_dBB(self,t,f,A,B):
426 # computes the second partial 427 # derivative of F w.r.t. A 428 small = 1e-15 429 partial = 0 430 for i in range(len(t)): 431 invprob = 1+math.exp(B+A*f[i]) 432 prob = 1/invprob 433 if abs(prob-1) < small: 434 prob = 1-small 435 partial = partial + \ 436 -((math.exp(2*B + 2*A*f[i])*(1 - t[i]))/(invprob**4*(1 - prob)**2)) - \ 437 (2*math.exp(2*B + 2*A*f[i])*(1 - t[i]))/(invprob**3*(1 - prob)) + \ 438 (math.exp(B + A*f[i])*(1 - t[i]))/(invprob**2*(1 - prob)) + \ 439 (math.exp(2*B + 2*A*f[i])*t[i])/invprob**2 - math.exp(B + A*f[i])*prob*t[i] 440 441 return -partial
442
443 - def dF_dAB(self,t,f,A,B):
444 # computes the second partial 445 # derivative of F w.r.t. A and B 446 small = 1e-15 447 partial = 0 448 for i in range(len(t)): 449 invprob = 1+math.exp(B+A*f[i]) 450 prob = 1/invprob 451 if abs(prob-1) < small: 452 prob = 1-small 453 partial = partial + \ 454 -((math.exp(2*B + 2*A*f[i])*f[i]*(1 - t[i]))/(invprob**4*(1 - prob)**2)) - \ 455 (2*math.exp(2*B + 2*A*f[i])*f[i]*(1 - t[i]))/(invprob**3*(1 - prob)) + \ 456 (math.exp(B + A*f[i])*f[i]*(1 - t[i]))/(invprob**2*(1 - prob)) + \ 457 (math.exp(2*B + 2*A*f[i])*f[i]*t[i])/invprob**2 - math.exp(B + A*f[i])*prob*f[i]*t[i] 458 459 return -partial
460