1
2 from PyML.utils import misc
3 from PyML.utils import arrayWrap
4 from PyML.containers.ext import cpairdataset
5 from PyML.containers.baseDatasets import WrapperDataSet, BaseDataSet
6 from PyML.containers.labels import Labels
7
8 """
9 classes for dealing with data that is composed of pairs of simpler objects
10 for which a kernel is available
11 """
12
13 __docformat__ = "restructuredtext en"
14
15 -class PairDataSet (WrapperDataSet, cpairdataset.PairDataSet) :
16
17 """
18 DataSet container for pairs of objects.
19
20 The kernel between a pair is defined via the kernel between the
21 members of the pair:
22 K((X_1,X_2), (X'_1, X'_2)) = K'(X_1, X'_1) K'(X_2, X'_2) +
23 K'(X_1, X'_2) K'(X_2, X'_1)
24
25 file format::
26
27 id1_id2 label,... (can have additional fields that are ignored)
28
29 """
30
31 isVector = False
32
34 """
35 :Parameters:
36 - `arg` - a file name or another PairDataSet object.
37 if a file name is supplied the constructor expects a dataset
38 object as a keyword argument 'data'
39 :Keywords:
40 - `data` - a dataset object from which the kernel between the pairs
41 of patterns is derived.
42 - `patterns` - patterns to copy when performing copy construction
43 """
44
45 BaseDataSet.__init__(self)
46 if arg.__class__ == self.__class__ :
47 self.copyConstruct(arg, **args)
48 elif type(arg) == type('') :
49 if 'data' not in args :
50 raise ValueError, 'missing data object'
51 self._data = args['data']
52 self.constructFromFile(arg)
53
54 self.attachKernel('linear')
55
56 - def copy(self, other, patterns, deepcopy) :
57
58 self.callCopyConstructor(other, patterns)
59 self.pairs = [other.pairs[p] for p in patterns]
60 self._data = other._data
61
63
64 patternIDdict = misc.list2dict(self._data.labels.patternID,
65 range(len(self._data)))
66
67 labels = Labels(fileName)
68 patterns = []
69 pairs = []
70 for i in range(len(labels)) :
71 p1,p2 = labels.patternID[i].split('_')
72
73 if p1 in patternIDdict and p2 in patternIDdict :
74 pairs.append((patternIDdict[p1],patternIDdict[p2]))
75 patterns.append(i)
76 else :
77 print p1, ' or ', p2, 'not found'
78 labels = labels.__class__(labels, patterns = patterns)
79
80 self.pairs = pairs
81
82 first = [pair[0] for pair in pairs]
83 second = [pair[1] for pair in pairs]
84 firstVector = arrayWrap.intVector([pair[0] for pair in pairs])
85 secondVector = arrayWrap.intVector([pair[1] for pair in pairs])
86 self.callConstructor(firstVector, secondVector)
87
88 WrapperDataSet.attachLabels(self, labels)
89
90
95
99
101
102 return len(self.pairs)
103
105
106 return tuple(self.labels.patternID[i].split())
107
109
110 """
111 DataSet container for pairs of objects.
112
113 file format::
114
115 id1_id2, label,... (can have additional fields that are ignored)
116
117 """
118
119 isVector = False
120
122 """
123 :Parameters:
124 - `arg` - a file name or another PairDataSet object.
125 if a file name is supplied the constructor expects a dataset
126 object as a keyword argument 'data'
127 :Keywords:
128 - `data` - a dataset object from which the kernel between the pairs
129 of patterns is derived.
130 - `patterns` - patterns to copy when performing copy construction
131 """
132
133 BaseDataSet.__init__(self)
134 if arg.__class__ == self.__class__ :
135 if 'patterns' in args :
136 patterns = args['patterns']
137 else :
138 patterns = range(len(arg))
139 self.copyConstruct(arg, patterns)
140 elif type(arg) == type('') :
141 if 'data' not in args :
142 raise ValueError, 'missing data object'
143 self.data = args['data']
144 self.constructFromFile(arg)
145
147
148 self.pairs = [other.pairs[p] for p in patterns]
149 self.data = other.data
150 self.labels = Labels(other.labels, patterns = patterns)
151
153
154 delim = ','
155 if self.data is not None :
156 patternIDdict = misc.list2dict(self.data.labels.patternID,
157 range(len(self.data)))
158 else :
159 patternIDdict = {}
160
161 L = []
162 patternID = []
163 pairs = []
164 file = open(fileName)
165 for line in file :
166 tokens = line[:-1].split(delim)
167
168 p1,p2 = tokens[0].split('_')
169 if p1 > p2 : p1,p2 = p2,p1
170
171 if p1 in patternIDdict and p2 in patternIDdict or self.data is None :
172 pairs.append((p1,p2))
173 L.append(tokens[1])
174 patternID.append('_'.join([p1,p2]))
175 else :
176 print p1, ' or ', p2, 'not found'
177 self.pairs = pairs
178 self.labels = Labels(L, patternID = patternID)
179
181
182 return len(self.pairs)
183
185
186 return tuple(self.labels.patternID[i].split())
187
188
207
208
227