-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmarkdoc.py
501 lines (402 loc) · 18 KB
/
markdoc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
"""Docstring to Markdown.
This piece of code is used within [Shed](https://www.github.com/cmry/shed) to
convert NumPy-styled Python docstrings to Markdown. In this way, only the
code documentation needs to be editted, and the Readthedocs is automatically
updated thereafter. To use it, simply type `python3 markdoc.py
somefile.py somedoc.md`. Check class help for Python usage. Don't forget to
also check the class notes (they are important for docstring format
guidelines).
"""
from sys import argv
# Author: Chris Emmery
# License: MIT
# pylint: disable=W0142,R0201
ERR = '''
* list index out of range
- \"\"\" used for non-docstring strings.
- First Class or method Documentation line has newline after \"\"\".
Fix with: \"\"\"This is the first line.
Here comes the rest.
\"\"\"
* Class parameters not showing in code blocks
- Class is not structed new-style (class SomeClass(object):)
* Code blocks not showing
- Code is in IPython style rather than vanilla (i.e. In [1]: instead of >>>)
* tuple index out of range
- Mistake in var : type lists (for example var: type).
- Description also contains a :.
* some table parts are not showing
- Probably included some seperators in there such as : or -----.
* methods are garbled
- Did you name a parameter 'class' or any other matches?
'''
MD_TABLE = '''
| {0} | Type | Doc |
|:-------|:-----------------|:----------------|
'''
MD_TABLE_ROW = '| {0} | {1} | {2} | \n'
MD_TABLE_ALT = '''
| {0} | Doc |
|:-------|:----------------|
'''
MD_TABLE_ROW_ALT = '| {0} | {1} | \n'
MD_CODE = '``` python \n {0} \n```\n\n'
class MarkDoc(object):
r"""Documentation to Markdown.
This class converts a Python file with properly formed NumPy-styled
docstrings to a markdown file that can be used in for example Readthedocs
or Jekyll.
Parameters
----------
file_in : str
Python (.py) file from where the docstrings should be taken.
file_out : str
Markdown (.md) file where the documentation should be written.
cold : boolean, optional, default False
If you want to start the class without starting the reader, set True.
Attributes
----------
markdown : str
The entire Markdown file that will be appended to and written out.
Examples
--------
Use of the package should just be from commandline with `python3 markdoc.py
in.py out.md`. However, if for some reason it is required to load it as a
class, you can just do:
>>> from markdoc import MarkDoc
>>> md = MarkDoc('some/dir/file.py', '/some/otherdir/doc.md')
The string of the markdown can be accessed by:
>>> print(md.markdown)
Notes
-----
List of common errors and styleguide conflicts:
#### list index out of range?
- \"\"\" used for non-docstring strings.
- First Class or method Documentation line has newline after \"\"\".
Fix with: \"\"\"This is the first line.
Here comes the rest.
\"\"\"
#### Class parameters not showing in code blocks?
- Class is not structed new-style (object after class).
#### Code blocks not showing?
- Code is IPython style rather than vanilla (i.e. In [1]: instead of >>>).
#### tuple index out of range?
- Mistake in var : type lists (for example var: type).
- Description also contains a :.
#### some table parts are not showing?
- Probably included some seperators in there such as : or \-\-\-\-\-.
#### methods are garbled?
- Did you name a parameter 'class' or any other matches?
"""
def __init__(self, file_in, file_out, cold=False):
"""Handle in/out to readers and parsers."""
self.markdown = ''
if not cold:
self.read(file_in, file_out)
# FIXME: code does not handle staticmethods yet?
def read(self, file_in, file_out):
"""Open Python code file, spit out markdown file."""
with open(file_in, 'r') as file_in:
# get rid of special docstring parts
classes = file_in.read().replace('r"""', '"""').split('\nclass ')
self.markdown = [self.md_title(classes.pop(0))] # top of the file
self.handle_classes(classes)
with open(file_out, 'w') as file_out:
file_out.write('\n'.join(self.markdown))
def handle_classes(self, classes):
"""Given class docs only, write out their docs and methods."""
for class_code in classes:
class_code = " class " + class_code # got cut-off in split
class_parts = class_code.split('"""\n')[:-1]
# process all the class information
class_doc = class_parts.pop(0) # class docstring, rest is methods
self.markdown.append(
self.md_class_doc(self.structure_doc(class_doc)))
# replace object part with __init__ params
self.markdown[1] = \
self.md_class_method(self.markdown[1], class_parts)
# start handling methods
self.markdown.append(
self.md_methods_doc([self.structure_doc(method)
for method in class_parts]))
def find_declaration(self, lines):
"""Find `class Bla(object):` or `def method(params):` across lines."""
declaration, found_start = '', False
# skip lines until class/def, end if :
while ':' not in declaration:
if lines[0].startswith(' class ') or \
lines[0].startswith(' def '):
found_start = True
line = lines.pop(0)
if found_start:
declaration += line
return declaration
def structure_doc(self, doc):
"""Produce a structured dictionary from docstring.
The key is the part of the documentation (name, title, params,
attributes, etc.), and value is a list of lines that make up this part.
Is used to handle the order and different parses of different sections.
Parameters
----------
doc : str
Flat string structure of a docstring block.
Returns
-------
X_doc : dict
Class or method doc, a structure dict with section; lines.
"""
lines = [x for x in doc.split('\n') if x]
parts = {'name': self.find_declaration(lines)}
buffer_to = 'title'
# if ----- is found, set title to above, else buffer to name
for i in range(0, len(lines)):
line = lines[i]
if '---' in line: # get tile
buffer_to = lines[i-1]
continue
if not parts.get(buffer_to): # buffer to 'title' by default
parts[buffer_to] = [buffer_to]
if parts.get(buffer_to) and line:
parts[buffer_to].append(line)
# if line is shorter than 3, likely that there is no previous section
return {k.replace(' ', ''):
(v[:-1] if len(v) > 2 else v)[1:] for k, v in parts.items()}
def md_title(self, title_part, class_line=False):
"""Handle title and text parts for main and class-specific.
Pretty convoluted but delicate method that seperates the top part
that should not be part of the title docstring off the document,
handles correct punctuation of titles, for both classes and methods.
Also detects if there's a description under the first line.
Parameters
----------
title_part : str
Part of the document that starts with triple quotes and is below
any line seperators.
class_line : bool, optional, default False
The first line of the docstring by default servers as a title
header. However, for classes it should be formed into a normal
description. Thefefore, the # marker is removed and a . is added.
Returns
-------
line_buffer : str
Correctly formated markdown from the buffer.
"""
line_buffer, start_buffer, description = [], False, False
for line in title_part.split('\n'):
line = line.replace(' ', '')
if line.count('"""') == 2: # one liner
return '# ' + line.replace('"""', '')[:-1] # title
elif line.startswith('"""') and not start_buffer:
title = '# ' + line.replace('"""', '')[:-1] # title
if class_line:
title = title[2:] + '.' # normal description
line_buffer.append(title)
start_buffer = True
elif line.startswith('"""') and start_buffer: # handle top of file
return '\n'.join(line_buffer)
elif start_buffer:
if not description and not line.startswith('\n'):
line = "\n" + line
description = True
line_buffer.append(line)
return '\n'.join(line_buffer)
def md_table(self, doc, name):
r"""Place parameters and attributes in a table overview.
The documentation parts that are typed by 'var : type \n description'
can be splitted into a table. The same holds true for a list of class
methods. These are handled by this method. Table structures are on
the top of this Python file.
Parameters
----------
doc : str
Flat string structure of a docstring block.
name : str
Indicator for the table, can be for example Parameters or
Attributes.
Returns
-------
table : str
Markdown-type table with filled rows and formatted name.
"""
var_type, line_buffer, lines = '', (), []
table = MD_TABLE.format(name)
# given var : type \n description, splits these up into 3 cellss
if not doc:
return ''
for row in doc:
if ':' in row and line_buffer: # if we hit the next, store table
line_buffer += (''.join(lines), )
table += MD_TABLE_ROW.format(*line_buffer)
line_buffer = ()
lines = []
if ':' in row and not line_buffer: # it's var : type
var_type = row.split(' : ')
for part in var_type:
line_buffer += (part, )
elif line_buffer: # if no next, keep appending rows
lines.append(row.replace('\n', ' '))
if line_buffer: # empty the buffer
line_buffer += (''.join(lines), )
table += MD_TABLE_ROW.format(*line_buffer)
return table
def md_code_text(self, doc, name, flat=False):
"""Section text and/or code in the example part of the doc.
If some code related beginnings (>>>, ...) is founds buffer to code,
else we regard it as text. Record order so that multiple consecutive
blocks of text and code are possible.
Parameters
----------
doc : str
Flat string structure of a docstring block.
name : str
Name of the section (Examples, Notes).
flat : bool, optional, default False
If there are no code blocks, flat can be used to stop parsing them.
Returns
-------
code : str
Formatted block of fenced markdown code-snippets and flat text.
Notes
-----
Please note that this method assumes markdown is uses so-called
fenced codeblocks that are commonly accepted by Readthedocs, GitHub,
Bitbucket, and Jekyll (for example with Redcarpet).
This current implementation does NOT allow for IPython styled code
examples. (In [1]: Out[1]: etc.)
"""
head = '\n\n------- \n\n##{1}\n\n{0}'
order, text, code = [], '', ''
if not doc:
return ''
# if code marker, store to code, else store to text
for row in doc:
if not flat and ('>>>' in row or '...' in row):
if text:
order.append(text)
text = ''
row = row.replace(' >>>', '>>>')
row = row.replace(' ...', '...')
code += row + '\n'
else:
if code:
# if we found code, encapsulate in ``` python ``` blocks.
order.append(MD_CODE.format(code))
code = ''
row = row.replace(' ', '')
text += row + '\n' + ('\n' if any([row.endswith(x) for x
in ('!', '?', '.', ':')]) else '')
if text: # empty the buffer
order.append(text.replace('\\', ''))
if code:
order.append(MD_CODE.format(code))
return head.format('\n'.join(order).replace('.\n', '.\n\n'), name)
def md_class_method(self, doc, class_parts):
"""Replace `class ...(object)` with `__init__` arguments.
In NumPy-style, each class is represented as it's name, along with
the parameters accepted in `__init__` as its parameters, as you would
call the method in Python. The markdown therefore needs to fill the
(object) tag that is assigned to classes with the `__init__` params.
Parameters
----------
doc : str
Flat string structure of a docstring block.
class_parts : list
List with parts that the docstring constituted of. First line
should be the class name and can therefore be replaced.
Returns
-------
doc : str
Same as doc, only now (object) has been replaced with `__init__`
parameters.
Notes
-----
Please note that this implementation assumes you're using new-style
class declarations where for example `class SomeClass:` should be
written as `class SomeClass(object):`.
"""
# FIXME: does not seem to work for consecutive classes!
init_doc = self.structure_doc(class_parts.pop(0))['name']
init_doc = init_doc.replace(' ', '')
init_doc = init_doc.replace(' def __init__(self, ', '(')
# FIXME: (object) screws up other class inherrits, make general regex
return doc.replace('(object)', init_doc)
def md_methods_doc(self, method_doc):
"""Merge all method elements in one string.
This method will scan each method docstring and extract parameters,
returns and descriptions. It will append them to the method table
and link (TODO) them in the doc.
Parameters
----------
method_doc : list
Lower part of the class docstrings that holds a method docstring
per entry.
Returns
-------
methods : str
Formatted markdown string with all method information.
Notes
-----
On the bottom, dict calls are being done to order several parts of
the docstring. If you use more than Parameters and Returns, please make
sure they are added in the code there.
"""
mark_doc, mark_head = '', '\n--------- \n\n## Methods \n\n {0} \n {1}'
method_table = MD_TABLE_ALT.format('method')
for method in method_doc:
# isolate only the name of the function (without def and params)
name = method['name'].replace('(', ' (').split()[1]
title = self.md_title('\n'.join(method['title']), class_line=True)
# name and first line of description are added to method_table
method_table += MD_TABLE_ROW_ALT.format(name, title.split('\n')[0])
mark_doc += '\n\n### ' + name + '\n\n'
# show the method call with parameters
code = MD_CODE.format(method['name'].replace('def ', ''))
code = code.replace('self', '').replace('(, ', '(')
mark_doc += code
mark_doc += '\n' + title
# add to sections here if more blocks are possible!
mark_doc += self.md_table(method.get('Parameters'), 'Parameters')
mark_doc += self.md_table(method.get('Returns'), 'Returns')
mark_doc += self.md_code_text(method.get('Notes'), '## Notes')
return mark_head.format(method_table, mark_doc)
def md_class_doc(self, class_doc):
"""Merge all class section elements in one string.
This will piece together the descriptions contained in the docstring
of the class. Currently, they are written *below* the top of the
Python file, and not put into any subfiles per class.
Parameters
----------
class_doc : str
Flat text containing the class docstring part.
Returns
-------
mark_doc : str
Markdown formatted class docstring, including all subsections.
Notes
-----
On the bottom, dict calls are being done to order several parts of
the docstring. If you use more than Parameters, Attributes, Examples,
and Notes, please make sure they are added in the code there.
"""
mark_doc = ''
# class name is being used as title
mark_doc += '\n\n# {0} \n\n'.format(
class_doc['name'].replace('(', ' (').split()[1])
mark_doc += MD_CODE.format(class_doc['name'])
# frist docstring line for a class is just written as a sentence
mark_doc += self.md_title('\n'.join(class_doc['title']),
class_line=True)
# add to sections here if more blocks are possible!
mark_doc += self.md_table(class_doc.get('Parameters'), 'Parameters')
mark_doc += self.md_table(class_doc.get('Attributes'), 'Attributes')
mark_doc += self.md_code_text(class_doc.get('Examples'), 'Examples')
mark_doc += self.md_code_text(class_doc.get('Notes'), 'Notes',
flat=True)
return mark_doc
if __name__ == '__main__':
try:
D2M = MarkDoc(argv[1], argv[2])
print("Succesfully converted!")
except Exception as err:
exit("-"*10 + "\nERROR! Malformed code \n\nTraceback: \n\n{1} \n\n\n\
Some common errors: \n{0}".format(ERR, err))