Skip to content

Commit 0dac97a

Browse files
committed
docscrape
1 parent 7c550e8 commit 0dac97a

File tree

2 files changed

+294
-0
lines changed

2 files changed

+294
-0
lines changed

fastcore/docscrape.py

Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,276 @@
1+
"Parse numpy-style docstrings"
2+
3+
"""
4+
Based on code from numpy, which is:
5+
Copyright (c) 2005-2022, NumPy Developers.
6+
All rights reserved.
7+
8+
Redistribution and use in source and binary forms, with or without
9+
modification, are permitted provided that the following conditions are
10+
met:
11+
12+
* Redistributions of source code must retain the above copyright
13+
notice, this list of conditions and the following disclaimer.
14+
15+
* Redistributions in binary form must reproduce the above
16+
copyright notice, this list of conditions and the following
17+
disclaimer in the documentation and/or other materials provided
18+
with the distribution.
19+
20+
* Neither the name of the NumPy Developers nor the names of any
21+
contributors may be used to endorse or promote products derived
22+
from this software without specific prior written permission. """
23+
24+
__all__ = ['Parameter', 'NumpyDocString', 'dedent_lines']
25+
26+
Parameter = namedtuple('Parameter', ['name', 'type', 'desc'])
27+
28+
def strip_blank_lines(l):
29+
"Remove leading and trailing blank lines from a list of lines"
30+
while l and not l[0].strip(): del l[0]
31+
while l and not l[-1].strip(): del l[-1]
32+
return l
33+
34+
35+
class Reader:
36+
"""A line-based string reader."""
37+
def __init__(self, data):
38+
if isinstance(data, list): self._str = data
39+
else: self._str = data.split('\n')
40+
self.reset()
41+
42+
def __getitem__(self, n): return self._str[n]
43+
def reset(self): self._l = 0 # current line nr
44+
45+
def read(self):
46+
if not self.eof():
47+
out = self[self._l]
48+
self._l += 1
49+
return out
50+
else: return ''
51+
52+
def seek_next_non_empty_line(self):
53+
for l in self[self._l:]:
54+
if l.strip(): break
55+
else: self._l += 1
56+
57+
def eof(self): return self._l >= len(self._str)
58+
59+
def read_to_condition(self, condition_func):
60+
start = self._l
61+
for line in self[start:]:
62+
if condition_func(line): return self[start:self._l]
63+
self._l += 1
64+
if self.eof(): return self[start:self._l+1]
65+
return []
66+
67+
def read_to_next_empty_line(self):
68+
self.seek_next_non_empty_line()
69+
def is_empty(line): return not line.strip()
70+
return self.read_to_condition(is_empty)
71+
72+
def read_to_next_unindented_line(self):
73+
def is_unindented(line): return (line.strip() and (len(line.lstrip()) == len(line)))
74+
return self.read_to_condition(is_unindented)
75+
76+
def peek(self, n=0):
77+
if self._l + n < len(self._str): return self[self._l + n]
78+
else: return ''
79+
80+
def is_empty(self): return not ''.join(self._str).strip()
81+
82+
83+
class ParseError(Exception):
84+
def __str__(self):
85+
message = self.args[0]
86+
if hasattr(self, 'docstring'): message = f"{message} in {self.docstring!r}"
87+
return message
88+
89+
90+
class NumpyDocString(Mapping):
91+
"""Parses a numpydoc string to an abstract representation """
92+
sections = { 'Signature': '', 'Summary': [''], 'Extended': [], 'Parameters': [], 'Returns': [], 'Yields': [], 'Raises': [] }
93+
94+
def __init__(self, docstring, config=None):
95+
docstring = textwrap.dedent(docstring).split('\n')
96+
self._doc = Reader(docstring)
97+
self._parsed_data = copy.deepcopy(self.sections)
98+
self._parse()
99+
if 'Parameters' in self: self['Parameters'] = {o.name:o for o in self['Parameters']}
100+
101+
def __iter__(self): return iter(self._parsed_data)
102+
def __len__(self): return len(self._parsed_data)
103+
def __getitem__(self, key): return self._parsed_data[key]
104+
105+
def __setitem__(self, key, val):
106+
if key not in self._parsed_data: self._error_location(f"Unknown section {key}", error=False)
107+
else: self._parsed_data[key] = val
108+
109+
def _is_at_section(self):
110+
self._doc.seek_next_non_empty_line()
111+
if self._doc.eof(): return False
112+
l1 = self._doc.peek().strip() # e.g. Parameters
113+
l2 = self._doc.peek(1).strip() # ---------- or ==========
114+
if len(l2) >= 3 and (set(l2) in ({'-'}, {'='}) ) and len(l2) != len(l1):
115+
snip = '\n'.join(self._doc._str[:2])+'...'
116+
self._error_location("potentially wrong underline length... \n%s \n%s in \n%s" % (l1, l2, snip), error=False)
117+
return l2.startswith('-'*len(l1)) or l2.startswith('='*len(l1))
118+
119+
def _strip(self, doc):
120+
i = 0
121+
j = 0
122+
for i, line in enumerate(doc):
123+
if line.strip(): break
124+
for j, line in enumerate(doc[::-1]):
125+
if line.strip(): break
126+
return doc[i:len(doc)-j]
127+
128+
def _read_to_next_section(self):
129+
section = self._doc.read_to_next_empty_line()
130+
131+
while not self._is_at_section() and not self._doc.eof():
132+
if not self._doc.peek(-1).strip(): section += ['']
133+
section += self._doc.read_to_next_empty_line()
134+
return section
135+
136+
def _read_sections(self):
137+
while not self._doc.eof():
138+
data = self._read_to_next_section()
139+
name = data[0].strip()
140+
141+
if name.startswith('..'): yield name, data[1:]
142+
elif len(data) < 2: yield StopIteration
143+
else: yield name, self._strip(data[2:])
144+
145+
def _parse_param_list(self, content, single_element_is_type=False):
146+
content = dedent_lines(content)
147+
r = Reader(content)
148+
params = []
149+
while not r.eof():
150+
header = r.read().strip()
151+
if ' :' in header:
152+
arg_name, arg_type = header.split(' :', maxsplit=1)
153+
arg_name, arg_type = arg_name.strip(), arg_type.strip()
154+
else:
155+
if single_element_is_type: arg_name, arg_type = '', header
156+
else: arg_name, arg_type = header, ''
157+
158+
desc = r.read_to_next_unindented_line()
159+
desc = dedent_lines(desc)
160+
desc = strip_blank_lines(desc)
161+
params.append(Parameter(arg_name, arg_type, desc))
162+
return params
163+
164+
def _parse_summary(self):
165+
"""Grab signature (if given) and summary"""
166+
if self._is_at_section(): return
167+
168+
# If several signatures present, take the last one
169+
while True:
170+
summary = self._doc.read_to_next_empty_line()
171+
summary_str = " ".join([s.strip() for s in summary]).strip()
172+
compiled = re.compile(r'^([\w., ]+=)?\s*[\w\.]+\(.*\)$')
173+
if compiled.match(summary_str):
174+
self['Signature'] = summary_str
175+
if not self._is_at_section(): continue
176+
break
177+
178+
if summary is not None: self['Summary'] = summary
179+
if not self._is_at_section(): self['Extended'] = self._read_to_next_section()
180+
181+
def _parse(self):
182+
self._doc.reset()
183+
self._parse_summary()
184+
185+
sections = list(self._read_sections())
186+
section_names = {section for section, content in sections}
187+
188+
has_returns = 'Returns' in section_names
189+
has_yields = 'Yields' in section_names
190+
# We could do more tests, but we are not. Arbitrarily.
191+
if has_returns and has_yields:
192+
msg = 'Docstring contains both a Returns and Yields section.'
193+
raise ValueError(msg)
194+
if not has_yields and 'Receives' in section_names:
195+
msg = 'Docstring contains a Receives section but not Yields.'
196+
raise ValueError(msg)
197+
198+
for (section, content) in sections:
199+
if not section.startswith('..'):
200+
section = (s.capitalize() for s in section.split(' '))
201+
section = ' '.join(section)
202+
if self.get(section):
203+
self._error_location("The section %s appears twice in %s" % (section, '\n'.join(self._doc._str)))
204+
205+
if section in ('Parameters', 'Other Parameters', 'Attributes', 'Methods'):
206+
self[section] = self._parse_param_list(content)
207+
elif section in ('Returns', 'Yields', 'Raises', 'Warns', 'Receives'):
208+
self[section] = self._parse_param_list( content, single_element_is_type=True)
209+
else: self[section] = content
210+
211+
@property
212+
def _obj(self):
213+
if hasattr(self, '_cls'): return self._cls
214+
elif hasattr(self, '_f'): return self._f
215+
return None
216+
217+
def _error_location(self, msg, error=True):
218+
if self._obj is not None:
219+
# we know where the docs came from:
220+
try: filename = inspect.getsourcefile(self._obj)
221+
except TypeError: filename = None
222+
# Make UserWarning more descriptive via object introspection.
223+
# Skip if introspection fails
224+
name = getattr(self._obj, '__name__', None)
225+
if name is None:
226+
name = getattr(getattr(self._obj, '__class__', None), '__name__', None)
227+
if name is not None: msg += f" in the docstring of {name}"
228+
msg += f" in {filename}." if filename else ""
229+
if error: raise ValueError(msg)
230+
else: warn(msg)
231+
232+
# string conversion routines
233+
234+
def _str_header(self, name, symbol='-'): return [name, len(name)*symbol]
235+
def _str_indent(self, doc, indent=4): return [' '*indent + line for line in doc]
236+
237+
def _str_signature(self):
238+
if self['Signature']: return [self['Signature'].replace('*', r'\*')] + ['']
239+
return ['']
240+
241+
def _str_summary(self):
242+
if self['Summary']: return self['Summary'] + ['']
243+
return []
244+
245+
def _str_extended_summary(self):
246+
if self['Extended']: return self['Extended'] + ['']
247+
return []
248+
249+
def _str_param_list(self, name):
250+
out = []
251+
if self[name]:
252+
out += self._str_header(name)
253+
for param in self[name]:
254+
parts = []
255+
if param.name: parts.append(param.name)
256+
if param.type: parts.append(param.type)
257+
out += [' : '.join(parts)]
258+
if param.desc and ''.join(param.desc).strip(): out += self._str_indent(param.desc)
259+
out += ['']
260+
return out
261+
262+
def __str__(self, func_role=''):
263+
out = []
264+
out += self._str_signature()
265+
out += self._str_summary()
266+
out += self._str_extended_summary()
267+
for param_list in ('Parameters', 'Returns', 'Yields', 'Receives', 'Other Parameters', 'Raises', 'Warns'):
268+
out += self._str_param_list(param_list)
269+
for param_list in ('Attributes', 'Methods'): out += self._str_param_list(param_list)
270+
return '\n'.join(out)
271+
272+
273+
def dedent_lines(lines):
274+
"""Deindent a list of lines maximally"""
275+
return textwrap.dedent("\n".join(lines)).split("\n")
276+

nbs/06_docments.ipynb

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,24 @@
114114
" return a+b"
115115
]
116116
},
117+
{
118+
"cell_type": "code",
119+
"execution_count": null,
120+
"metadata": {},
121+
"outputs": [],
122+
"source": [
123+
"def add(a, b:int=0)->int:\n",
124+
" \"\"\"The sum of two numbers.\n",
125+
"\n",
126+
"Parameters\n",
127+
"----------\n",
128+
"x :\n",
129+
" the 1st number to add\n",
130+
"y : int\n",
131+
" the 2nd number to add (default: 0)\"\"\"\n",
132+
" return a+b"
133+
]
134+
},
117135
{
118136
"cell_type": "markdown",
119137
"metadata": {},

0 commit comments

Comments
 (0)