Merge pull request #413 from MarkB2/dataclass

jph00 · web-flow · commit 6db6aaa36efc · 2022-05-25T09:35:49.000+10:00
Add dataclass to docments
diff --git a/fastcore/_nbdev.py b/fastcore/_nbdev.py
@@ -247,6 +247,9 @@
          "Pipeline": "05_transform.ipynb",
          "docstring": "06_docments.ipynb",
          "parse_docstring": "06_docments.ipynb",
+         "isdataclass": "06_docments.ipynb",
+         "get_dataclass_source": "06_docments.ipynb",
+         "get_source": "06_docments.ipynb",
          "empty": "06_docments.ipynb",
          "docments": "06_docments.ipynb",
          "test_sig": "07_meta.ipynb",
diff --git a/fastcore/docments.py b/fastcore/docments.py
@@ -4,18 +4,19 @@
 from __future__ import annotations
 
 
-__all__ = ['docstring', 'parse_docstring', 'empty', 'docments']
+__all__ = ['docstring', 'parse_docstring', 'isdataclass', 'get_dataclass_source', 'get_source', 'empty', 'docments']
 
 # Cell
 #nbdev_comment from __future__ import annotations
 
 import re
 from tokenize import tokenize,COMMENT
-from ast import parse,FunctionDef
+from ast import parse,FunctionDef,AnnAssign
 from io import BytesIO
 from textwrap import dedent
 from types import SimpleNamespace
 from inspect import getsource,isfunction,isclass,signature,Parameter
+from dataclasses import dataclass, is_dataclass
 from .utils import *
 
 from fastcore import docscrape
@@ -36,13 +37,25 @@ def parse_docstring(sym):
     return AttrDict(**docscrape.NumpyDocString(docstring(sym)))
 
 # Cell
+def isdataclass(s):
+    "Check if `s` is a dataclass but not a dataclass' instance"
+    return is_dataclass(s) and isclass(s)
+
+def get_dataclass_source(s):
+    "Get source code for dataclass `s`"
+    return getsource(s) if not getattr(s, "__module__") == '__main__' else ""
+
+def get_source(s):
+    "Get source code for string, function object or dataclass `s`"
+    return getsource(s) if isfunction(s) else get_dataclass_source(s) if isdataclass(s) else s
+
 def _parses(s):
-    "Parse Python code in string or function object `s`"
-    return parse(dedent(getsource(s) if isfunction(s) else s))
+    "Parse Python code in string, function object or dataclass `s`"
+    return parse(dedent(get_source(s)))
 
 def _tokens(s):
     "Tokenize Python code in string or function object `s`"
-    if isfunction(s): s = getsource(s)
+    s = get_source(s)
     return tokenize(BytesIO(s.encode('utf-8')).readline)
 
 _clean_re = re.compile('^\s*#(.*)\s*$')
@@ -53,11 +66,16 @@ def _clean_comment(s):
 def _param_locs(s, returns=True):
     "`dict` of parameter line numbers to names"
     body = _parses(s).body
-    if len(body)!=1 or not isinstance(body[0], FunctionDef): return None
-    defn = body[0]
-    res = {arg.lineno:arg.arg for arg in defn.args.args}
-    if returns and defn.returns: res[defn.returns.lineno] = 'return'
-    return res
+    if len(body)==1: #or not isinstance(body[0], FunctionDef): return None
+        defn = body[0]
+        if isinstance(defn, FunctionDef):
+            res = {arg.lineno:arg.arg for arg in defn.args.args}
+            if returns and defn.returns: res[defn.returns.lineno] = 'return'
+            return res
+        elif isdataclass(s):
+            res = {arg.lineno:arg.target.id for arg in defn.body if isinstance(arg, AnnAssign)}
+            return res
+    return None
 
 # Cell
 empty = Parameter.empty
@@ -93,9 +111,9 @@ def _merge_docs(dms, npdocs):
 def docments(s, full=False, returns=True, eval_str=False):
     "`dict` of parameter names to 'docment-style' comments in function or string `s`"
     nps = parse_docstring(s)
-    if isclass(s): s = s.__init__ # Constructor for a class
+    if isclass(s) and not is_dataclass(s): s = s.__init__ # Constructor for a class
     comments = {o.start[0]:_clean_comment(o.string) for o in _tokens(s) if o.type==COMMENT}
-    parms = _param_locs(s, returns=returns)
+    parms = _param_locs(s, returns=returns) or {}
     docs = {arg:_get_comment(line, arg, comments, parms) for line,arg in parms.items()}
 
     if isinstance(s,str): s = eval(s)
diff --git a/nbs/06_docments.ipynb b/nbs/06_docments.ipynb
@@ -29,11 +29,12 @@
     "\n",
     "import re\n",
     "from tokenize import tokenize,COMMENT\n",
-    "from ast import parse,FunctionDef\n",
+    "from ast import parse,FunctionDef,AnnAssign\n",
     "from io import BytesIO\n",
     "from textwrap import dedent\n",
     "from types import SimpleNamespace\n",
     "from inspect import getsource,isfunction,isclass,signature,Parameter\n",
+    "from dataclasses import dataclass, is_dataclass\n",
     "from fastcore.utils import *\n",
     "\n",
     "from fastcore import docscrape\n",
@@ -217,13 +218,25 @@
    "outputs": [],
    "source": [
     "#export\n",
+    "def isdataclass(s):\n",
+    "    \"Check if `s` is a dataclass but not a dataclass' instance\"\n",
+    "    return is_dataclass(s) and isclass(s)\n",
+    "\n",
+    "def get_dataclass_source(s):\n",
+    "    \"Get source code for dataclass `s`\"\n",
+    "    return getsource(s) if not getattr(s, \"__module__\") == '__main__' else \"\"\n",
+    "\n",
+    "def get_source(s):\n",
+    "    \"Get source code for string, function object or dataclass `s`\"\n",
+    "    return getsource(s) if isfunction(s) else get_dataclass_source(s) if isdataclass(s) else s\n",
+    "\n",
     "def _parses(s):\n",
-    "    \"Parse Python code in string or function object `s`\"\n",
-    "    return parse(dedent(getsource(s) if isfunction(s) else s))\n",
+    "    \"Parse Python code in string, function object or dataclass `s`\"\n",
+    "    return parse(dedent(get_source(s)))\n",
     "\n",
     "def _tokens(s):\n",
     "    \"Tokenize Python code in string or function object `s`\"\n",
-    "    if isfunction(s): s = getsource(s)\n",
+    "    s = get_source(s)\n",
     "    return tokenize(BytesIO(s.encode('utf-8')).readline)\n",
     "\n",
     "_clean_re = re.compile('^\\s*#(.*)\\s*$')\n",
@@ -234,11 +247,16 @@
     "def _param_locs(s, returns=True):\n",
     "    \"`dict` of parameter line numbers to names\"\n",
     "    body = _parses(s).body\n",
-    "    if len(body)!=1 or not isinstance(body[0], FunctionDef): return None\n",
-    "    defn = body[0]\n",
-    "    res = {arg.lineno:arg.arg for arg in defn.args.args}\n",
-    "    if returns and defn.returns: res[defn.returns.lineno] = 'return'\n",
-    "    return res"
+    "    if len(body)==1: #or not isinstance(body[0], FunctionDef): return None\n",
+    "        defn = body[0]\n",
+    "        if isinstance(defn, FunctionDef):\n",
+    "            res = {arg.lineno:arg.arg for arg in defn.args.args}\n",
+    "            if returns and defn.returns: res[defn.returns.lineno] = 'return'\n",
+    "            return res\n",
+    "        elif isdataclass(s):\n",
+    "            res = {arg.lineno:arg.target.id for arg in defn.body if isinstance(arg, AnnAssign)}\n",
+    "            return res\n",
+    "    return None"
    ]
   },
   {
@@ -302,9 +320,9 @@
     "def docments(s, full=False, returns=True, eval_str=False):\n",
     "    \"`dict` of parameter names to 'docment-style' comments in function or string `s`\"\n",
     "    nps = parse_docstring(s)\n",
-    "    if isclass(s): s = s.__init__ # Constructor for a class\n",
+    "    if isclass(s) and not is_dataclass(s): s = s.__init__ # Constructor for a class\n",
     "    comments = {o.start[0]:_clean_comment(o.string) for o in _tokens(s) if o.type==COMMENT}\n",
-    "    parms = _param_locs(s, returns=returns)\n",
+    "    parms = _param_locs(s, returns=returns) or {}\n",
     "    docs = {arg:_get_comment(line, arg, comments, parms) for line,arg in parms.items()}\n",
     "\n",
     "    if isinstance(s,str): s = eval(s)\n",
@@ -737,6 +755,79 @@
     "docments(add_mixed, full=True)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can use docments with dataclasses:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/markdown": [
+       "```json\n",
+       "{'age': None, 'name': None, 'return': None, 'weight': None}\n",
+       "```"
+      ],
+      "text/plain": [
+       "{'name': None, 'age': None, 'weight': None, 'return': None}"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "@dataclass\n",
+    "class Person:\n",
+    "    name:str # The name of the person\n",
+    "    age:int # The age of the person\n",
+    "    weight:float # The weight of the person\n",
+    "\n",
+    "docments(Person)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Caveat: if class was defined in online notebook, docments will not contain parameters' comments. This is because the source code is not available in the notebook. After converting the notebook to a script, the docments will be available. Thus, documentation will have correct parameters' comments."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tmp = Path('person.py')\n",
+    "tmp.write_text('''\n",
+    "from dataclasses import dataclass\n",
+    "@dataclass\n",
+    "class Person:\n",
+    "    name:str # The name of the person\n",
+    "    age:int # The age of the person\n",
+    "    weight:float # The weight of the person\n",
+    "''')\n",
+    "import person\n",
+    "tst_dict = { \n",
+    "    'age': 'The age of the person',\n",
+    "    'name': 'The name of the person',\n",
+    "    'return': None,\n",
+    "    'weight': 'The weight of the person'}\n",
+    "assert tst_dict == docments(person.Person)\n",
+    "try: # to conform to python 3.6\n",
+    "    tmp.unlink()\n",
+    "except FileNotFoundError:\n",
+    "    pass"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -784,7 +875,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3.9.12 ('base')",
    "language": "python",
    "name": "python3"
   }