-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbase.py
350 lines (281 loc) · 11.9 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
#!/usr/bin/env python -3
# encoding: utf-8
#Created by gic on DATE.
# Copyright (C) 2010 Graham I Cummins
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA 02111-1307 USA
#
from __future__ import print_function, unicode_literals
import inspect, search
'''
This module defines template classes that should be used for building extension blocks. None of the classes here can be used directly
Note that classes Tag and Structure in module gicdat.tag, and classes
Visualizer, Control, Show from gicdat.gui.blocks are also GDBlock subclasses,
and can be used as base classes for extensions
'''
class GDBlock(object):
'''
This is the base class of all gicdat extension blocks. It exists solely
to allow type checking with "isinstance" and "issubclass", and provides no
API. All the other template classes in this module subclass it.
User-written blocks should subclass one of those more detailed templates
'''
def __init__(self):
object.__init__(self)
class Parser(GDBlock):
'''
This is the template for blocks that provide support for file IO
The work is done by the methods "read" and "write". Subclasses may not
change the signture of these methods
canread and canwrite are lists of filetype identifiers that the parsers can
handle. Extensions is a dictionary mapping file extensions the parsers
knows about (without the leading ".") to filetype identifiers.
For a working example, see gicdat.stdblocks.gicparse.JSONParse
'''
canread = ()
canwrite = ()
extensions = {}
def read(self, stream, filetype, **kw):
'''
stream is a file like object, open in read mode. filetype is a
mime-type-like string. kw args are implementation specific
read should return a 2-tuple (d, code). d should be a gicdat.doc.Doc
instance.
code may be None, or it may be a ZipFile instance that contains
additional gicdat blocks (which will be passed to
gicdat.blocks.register)
Note that Parsers should _not_ try to handle externalDocuments
themselves. Just return the include the nodes describing them in d
'''
pass
def write(self, d, stream, filetype, **kw):
'''
d is a gicdat.doc.Doc
Stream is a file-like object open in write mode.
filetype is a mime-type-like string.
kw args are implementation specific
Write should return None.
Write should not concern itself in any way with externalDocuments
'''
pass
class Transport(GDBlock):
'''
This is the template for blocks that provide the ability to open url
schemes in order to provide file like objects.
canread and canwrite are lists of "scheme" strings that the transport can
handle
The API of a transport is that it must provide a method "open" which
'''
canread = ()
canwrite = ()
def open(self, url, mode, filetype):
'''
Return a file-like object open in mode
'''
pass
class Tag(GDBlock):
'''
The implementation and documentation of the Tag class is actually in the
module gicdat.tags. This abstract superclass is used only used by
gicdat.blocks to identify Tags (blocks can't import gicdat.tags directly
without risking import loops)
'''
def __init__(self, pat, info):
self.__doc__ = info
self.pat = search.pattern(pat)
def strtoslice(s):
sl = []
for sub in s.split(','):
sub = sub.split(':')
for i in range(3):
if len(sub) < i + 1:
sub.append(None)
elif sub[i]:
sub[i] = int(sub[i])
else:
sub[i] = None
sl.append(tuple(sub))
if len(sl) == 1:
return sl[0]
else:
return tuple(sl)
class Transform(GDBlock):
#FIXME
'''
Transforms do most of the work of data analysis. The job of a Transform is
to provide a __call__ method that takes two gicdat.doc.Doc instances,
and returns a tuple (Doc, list of strings) (the list may be empty).
The first input doc contains the data used by the transform. The first
return value is a "patch" which represents changes to this document as the
result of running the transform (although in some cases this may be
interpreted as . The second doc provides the parameters used by the
transform. It transforms also accept the second argument to be a string, in
which case it is considered to be a path within the first document where
the parameters are stored.
The list of strings is a collection of "reports" that were generated during
the call method. Transforms should never use "print" or write data to
files, and they are not responsible for using the gicdat.control interface
either. If code within the transform fails, it should raise an exception as
normal, but if it would simply like to tell the user something (for
debugging, as a warning, to report on the probable validity of a
computation, etc), then it should add this report to the reports list which
will be returned by __call__.
Transforms should specify the attributes "inputs", "outputs", and "defaults",
which should be dictionaries. "defaults" is used as a starting set of
parameters, which is updated by the passed-in parameters during a call.
Ideally, a transform should have defaults provided for every parameter, so
that it is possible to run a test case with no input parameters (even if
this case is not typically useful, although if possible it should also be
the most useful and common case).
"inputs" and "outputs" are dictionaries that can be passed to
gicdat.search.pattern. The resulting pattern specifies the types of the
transform's inputs and outputs. These should be detailed enough that an
instance can determine if it is possible (and sensible) to run the
transform on a given input document and parameter set, by matching self.input
against self.pars(doc, pars, True). Any output that would be generated
by the transform should match self.output (consequently, the system can
determine if the output of one transform can be used as input to a second
transform by calling search.contains(p1.output, p2.input).
Transforms should avoid significant polymorphisms, which make the "input"
and "output" patterns inacurate or very complicated. Instead, provide several
related transforms.
The base class provides a __call__ which constructs a parameter dictionary
including the defaults and passes it to self.run, so subclasses may prefer
to overload self.run rather than self.__call__
'''
inputs = None
outputs = None
defaults = {}
def __init__(self):
if self.inputs:
self.inputs = search.pattern(self.inputs)
if self.inputs:
self.outputs = search.pattern(self.outputs)
def expandlinks(self, d):
'''
returns a patch document for the parameter document d which expands three
syntactic shortcuts that may be used in writing transform parameters. These
are key values that are strings beginning with "->". The remainder of such a
string will be treated as the _link attribute for a link dictionary. If the
string ends with a [...] python slice, this will be parsed and entered as the
_slice element. If the string begins with "->::" the remainder will be treated
as a local link within the parameter document (which may be needed when
building the intermediate steps of a remote or multiprocess tool chain, which
may recieve input data within the parameter set, rather than in an input
document)
'''
patch = d.new()
lls = []
for k in d:
if type(d[k]) in [str, unicode]:
if d[k].startswith('->::'):
lls.append((k, d[k][4:]))
elif d[k].startswith('->'):
ld = d.new()
s = d[k][2:]
if "[" in s:
s, ss = s.rstrip(']').split('[')
ld['_slice'] = strtoslice(ss)
ld['_link'] = s
patch[k] = ld
for k, v in lls:
if v in patch:
patch[k] = patch[v]
else:
patch[k] = d[v]
return patch
def __call__(self, doc, pars=None):
'''
Construct parameters dictionary using defaults if needed, and pass it
to self.run
'''
pars = self.pars(doc, pars, True)
r = doc.__class__()
m = []
self.run(pars, r, m)
return (r, m)
def extract(self, doc, pars):
p = pars.new()
for k in pars.keys(-1):
if isinstance(pars[k], dict) and pars[k]["_link"] != None:
p[k] = doc.resolvelink(pars[k])
if isinstance(p[k], dict):
p[k] = p[k].copy()
p[k + "._link"] = None
p[k + "._slice"] = None
return p
def requires(self, doc, pars):
need = []
pars = self.pars(doc, pars, extract=False)
for k in pars.keys(-1):
if isinstance(pars[k], dict) and pars[k]["_link"] != None:
need.append(pars[k]['_link'])
return set(need)
def pars(self, doc, pars, extract=True):
if type(pars) in [str, unicode]:
pars = doc.sub(pars)
elif type(pars) == dict:
pars = doc.__class__(pars)
elif pars == None:
pars = doc.__class__()
if self.defaults:
pars = doc.__class__(self.defaults).fuse(pars)
el = self.expandlinks(pars)
if el:
pars = pars.fuse(el)
if extract:
nd = self.extract(doc, pars)
if nd:
pars = pars.fuse(pars, nd)
pars.clean()
return pars
def test(self, doc, pars):
'''
Test if the given doc and pars are valid inputs according to
self.sig (and given self.defaults). If the test succeeds, return 0.
Otherwise, return a string containing an explanation of the failure.
The test uses search.setContext(self.sig, {'doc':doc}); self.sig ==
params. The failure return value uses search.whyFailed(self.sig)
'''
#FIXME
if self.input == None:
return "No Signature"
pars = self.pars(doc, pars, True)
if self.input == pars:
return 0
else:
return search.whynot(pars, self.pars)
def follows(self, tf):
'''
test if this transform can come in a sequence after transform tf
'''
if not tf.output or not self.input:
return "Missing Signature"
if search.contains(tf.output, self.input):
return 0
else:
return search.whynotcontains(tf.output, self.input)
def callWith(self, f, sp, p):
arg = {}
for a in inspect.getargspec(f)[0]:
if a in sp:
arg[a] = sp[a]
elif a in p:
arg[a] = p[a]
return f(**arg)
def run(self, pars, out, messages):
'''
Do the work, and return a list of strings. Subclasses should overload this.
'''
messages.append('called an empty Transform')
class Service(GDBlock):
pass