Skip to content

Commit edbfdaa

Browse files
chylekhauntsaninja
andauthored
Add option to include docstrings with stubgen (#13284)
### Description Closes #11965. Add a --include-docstrings flag to stubgen. This was suggested in #11965 along with a use case. When using this flag, the .pyi files will include docstrings for Python classes and functions and for C extension functions. The flag is optional and does not change the default stubgen behaviour. When using the flag, the resulting function stubs that contain docstring will no longer be one-liners, but functions without a docstring still retain the default one-liner style. Example input: ```python class A: """class docstring""" def func(): """func docstring""" ... def nodoc(): ... ``` output: ```python class A: """class docstring""" def func() -> None: """func docstring""" ... def nodoc() -> None: ... ``` ## Test Plan Tests `testIncludeDocstrings` and `testIgnoreDocstrings` were added to `test-data/unit/stubgen.test` to ensure the code works as intended. All other tests passed as well. C extension docstrings are tested using an updated bash script `misc/test_stubgenc.sh` with test data in `test-data/pybind11_mypy_demo/stubgen-include-docs` in same fashion as in an already existing test. --------- Co-authored-by: Shantanu <[email protected]>
1 parent 98881d2 commit edbfdaa

File tree

12 files changed

+311
-21
lines changed

12 files changed

+311
-21
lines changed

docs/source/stubgen.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,11 @@ Additional flags
163163
Instead, only export imported names that are not referenced in the module
164164
that contains the import.
165165

166+
.. option:: --include-docstrings
167+
168+
Include docstrings in stubs. This will add docstrings to Python function and
169+
classes stubs and to C extension function stubs.
170+
166171
.. option:: --search-path PATH
167172

168173
Specify module search directories, separated by colons (only used if

misc/test-stubgenc.sh

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,33 @@
33
set -e
44
set -x
55

6-
cd "$(dirname $0)/.."
6+
cd "$(dirname "$0")/.."
77

88
# Install dependencies, demo project and mypy
99
python -m pip install -r test-requirements.txt
1010
python -m pip install ./test-data/pybind11_mypy_demo
1111
python -m pip install .
1212

13-
# Remove expected stubs and generate new inplace
14-
STUBGEN_OUTPUT_FOLDER=./test-data/pybind11_mypy_demo/stubgen
15-
rm -rf $STUBGEN_OUTPUT_FOLDER/*
16-
stubgen -p pybind11_mypy_demo -o $STUBGEN_OUTPUT_FOLDER
13+
EXIT=0
1714

18-
# Compare generated stubs to expected ones
19-
git diff --exit-code $STUBGEN_OUTPUT_FOLDER
15+
# performs the stubgenc test
16+
# first argument is the test result folder
17+
# everything else is passed to stubgen as its arguments
18+
function stubgenc_test() {
19+
# Remove expected stubs and generate new inplace
20+
STUBGEN_OUTPUT_FOLDER=./test-data/pybind11_mypy_demo/$1
21+
rm -rf "${STUBGEN_OUTPUT_FOLDER:?}/*"
22+
stubgen -o "$STUBGEN_OUTPUT_FOLDER" "${@:2}"
23+
24+
# Compare generated stubs to expected ones
25+
if ! git diff --exit-code "$STUBGEN_OUTPUT_FOLDER";
26+
then
27+
EXIT=$?
28+
fi
29+
}
30+
31+
# create stubs without docstrings
32+
stubgenc_test stubgen -p pybind11_mypy_demo
33+
# create stubs with docstrings
34+
stubgenc_test stubgen-include-docs -p pybind11_mypy_demo --include-docstrings
35+
exit $EXIT

mypy/fastparse.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1008,6 +1008,8 @@ def do_func_def(
10081008
# FuncDef overrides set_line -- can't use self.set_line
10091009
func_def.set_line(lineno, n.col_offset, end_line, end_column)
10101010
retval = func_def
1011+
if self.options.include_docstrings:
1012+
func_def.docstring = ast3.get_docstring(n, clean=False)
10111013
self.class_and_function_stack.pop()
10121014
return retval
10131015

@@ -1121,6 +1123,8 @@ def visit_ClassDef(self, n: ast3.ClassDef) -> ClassDef:
11211123
cdef.line = n.lineno
11221124
cdef.deco_line = n.decorator_list[0].lineno if n.decorator_list else None
11231125

1126+
if self.options.include_docstrings:
1127+
cdef.docstring = ast3.get_docstring(n, clean=False)
11241128
cdef.column = n.col_offset
11251129
cdef.end_line = getattr(n, "end_lineno", None)
11261130
cdef.end_column = getattr(n, "end_col_offset", None)

mypy/nodes.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,7 @@ class FuncDef(FuncItem, SymbolNode, Statement):
751751
"is_mypy_only",
752752
# Present only when a function is decorated with @typing.datasclass_transform or similar
753753
"dataclass_transform_spec",
754+
"docstring",
754755
)
755756

756757
__match_args__ = ("name", "arguments", "type", "body")
@@ -779,6 +780,7 @@ def __init__(
779780
# Definitions that appear in if TYPE_CHECKING are marked with this flag.
780781
self.is_mypy_only = False
781782
self.dataclass_transform_spec: DataclassTransformSpec | None = None
783+
self.docstring: str | None = None
782784

783785
@property
784786
def name(self) -> str:
@@ -1081,6 +1083,7 @@ class ClassDef(Statement):
10811083
"analyzed",
10821084
"has_incompatible_baseclass",
10831085
"deco_line",
1086+
"docstring",
10841087
"removed_statements",
10851088
)
10861089

@@ -1127,6 +1130,7 @@ def __init__(
11271130
self.has_incompatible_baseclass = False
11281131
# Used for error reporting (to keep backwad compatibility with pre-3.8)
11291132
self.deco_line: int | None = None
1133+
self.docstring: str | None = None
11301134
self.removed_statements = []
11311135

11321136
@property

mypy/options.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,12 @@ def __init__(self) -> None:
283283
# mypy. (Like mypyc.)
284284
self.preserve_asts = False
285285

286+
# If True, function and class docstrings will be extracted and retained.
287+
# This isn't exposed as a command line option
288+
# because it is intended for software integrating with
289+
# mypy. (Like stubgen.)
290+
self.include_docstrings = False
291+
286292
# Paths of user plugins
287293
self.plugins: list[str] = []
288294

mypy/stubgen.py

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ def __init__(
243243
verbose: bool,
244244
quiet: bool,
245245
export_less: bool,
246+
include_docstrings: bool,
246247
) -> None:
247248
# See parse_options for descriptions of the flags.
248249
self.pyversion = pyversion
@@ -261,6 +262,7 @@ def __init__(
261262
self.verbose = verbose
262263
self.quiet = quiet
263264
self.export_less = export_less
265+
self.include_docstrings = include_docstrings
264266

265267

266268
class StubSource:
@@ -624,6 +626,7 @@ def __init__(
624626
include_private: bool = False,
625627
analyzed: bool = False,
626628
export_less: bool = False,
629+
include_docstrings: bool = False,
627630
) -> None:
628631
# Best known value of __all__.
629632
self._all_ = _all_
@@ -638,6 +641,7 @@ def __init__(
638641
self._state = EMPTY
639642
self._toplevel_names: list[str] = []
640643
self._include_private = include_private
644+
self._include_docstrings = include_docstrings
641645
self._current_class: ClassDef | None = None
642646
self.import_tracker = ImportTracker()
643647
# Was the tree semantically analysed before?
@@ -809,7 +813,13 @@ def visit_func_def(self, o: FuncDef) -> None:
809813
retfield = " -> " + retname
810814

811815
self.add(", ".join(args))
812-
self.add(f"){retfield}: ...\n")
816+
self.add(f"){retfield}:")
817+
if self._include_docstrings and o.docstring:
818+
docstring = mypy.util.quote_docstring(o.docstring)
819+
self.add(f"\n{self._indent} {docstring}\n")
820+
else:
821+
self.add(" ...\n")
822+
813823
self._state = FUNC
814824

815825
def is_none_expr(self, expr: Expression) -> bool:
@@ -910,8 +920,11 @@ def visit_class_def(self, o: ClassDef) -> None:
910920
if base_types:
911921
self.add(f"({', '.join(base_types)})")
912922
self.add(":\n")
913-
n = len(self._output)
914923
self._indent += " "
924+
if self._include_docstrings and o.docstring:
925+
docstring = mypy.util.quote_docstring(o.docstring)
926+
self.add(f"{self._indent}{docstring}\n")
927+
n = len(self._output)
915928
self._vars.append([])
916929
super().visit_class_def(o)
917930
self._indent = self._indent[:-4]
@@ -920,7 +933,8 @@ def visit_class_def(self, o: ClassDef) -> None:
920933
if len(self._output) == n:
921934
if self._state == EMPTY_CLASS and sep is not None:
922935
self._output[sep] = ""
923-
self._output[-1] = self._output[-1][:-1] + " ...\n"
936+
if not (self._include_docstrings and o.docstring):
937+
self._output[-1] = self._output[-1][:-1] + " ...\n"
924938
self._state = EMPTY_CLASS
925939
else:
926940
self._state = CLASS
@@ -1710,6 +1724,7 @@ def mypy_options(stubgen_options: Options) -> MypyOptions:
17101724
options.show_traceback = True
17111725
options.transform_source = remove_misplaced_type_comments
17121726
options.preserve_asts = True
1727+
options.include_docstrings = stubgen_options.include_docstrings
17131728

17141729
# Override cache_dir if provided in the environment
17151730
environ_cache_dir = os.getenv("MYPY_CACHE_DIR", "")
@@ -1773,6 +1788,7 @@ def generate_stub_from_ast(
17731788
parse_only: bool = False,
17741789
include_private: bool = False,
17751790
export_less: bool = False,
1791+
include_docstrings: bool = False,
17761792
) -> None:
17771793
"""Use analysed (or just parsed) AST to generate type stub for single file.
17781794
@@ -1784,6 +1800,7 @@ def generate_stub_from_ast(
17841800
include_private=include_private,
17851801
analyzed=not parse_only,
17861802
export_less=export_less,
1803+
include_docstrings=include_docstrings,
17871804
)
17881805
assert mod.ast is not None, "This function must be used only with analyzed modules"
17891806
mod.ast.accept(gen)
@@ -1845,7 +1862,12 @@ def generate_stubs(options: Options) -> None:
18451862
files.append(target)
18461863
with generate_guarded(mod.module, target, options.ignore_errors, options.verbose):
18471864
generate_stub_from_ast(
1848-
mod, target, options.parse_only, options.include_private, options.export_less
1865+
mod,
1866+
target,
1867+
options.parse_only,
1868+
options.include_private,
1869+
options.export_less,
1870+
include_docstrings=options.include_docstrings,
18491871
)
18501872

18511873
# Separately analyse C modules using different logic.
@@ -1859,7 +1881,11 @@ def generate_stubs(options: Options) -> None:
18591881
files.append(target)
18601882
with generate_guarded(mod.module, target, options.ignore_errors, options.verbose):
18611883
generate_stub_for_c_module(
1862-
mod.module, target, known_modules=all_modules, sig_generators=sig_generators
1884+
mod.module,
1885+
target,
1886+
known_modules=all_modules,
1887+
sig_generators=sig_generators,
1888+
include_docstrings=options.include_docstrings,
18631889
)
18641890
num_modules = len(py_modules) + len(c_modules)
18651891
if not options.quiet and num_modules > 0:
@@ -1913,6 +1939,11 @@ def parse_options(args: list[str]) -> Options:
19131939
action="store_true",
19141940
help="don't implicitly export all names imported from other modules in the same package",
19151941
)
1942+
parser.add_argument(
1943+
"--include-docstrings",
1944+
action="store_true",
1945+
help="include existing docstrings with the stubs",
1946+
)
19161947
parser.add_argument("-v", "--verbose", action="store_true", help="show more verbose messages")
19171948
parser.add_argument("-q", "--quiet", action="store_true", help="show fewer messages")
19181949
parser.add_argument(
@@ -1993,6 +2024,7 @@ def parse_options(args: list[str]) -> Options:
19932024
verbose=ns.verbose,
19942025
quiet=ns.quiet,
19952026
export_less=ns.export_less,
2027+
include_docstrings=ns.include_docstrings,
19962028
)
19972029

19982030

mypy/stubgenc.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from types import ModuleType
1515
from typing import Any, Final, Iterable, Mapping
1616

17+
import mypy.util
1718
from mypy.moduleinspect import is_c_module
1819
from mypy.stubdoc import (
1920
ArgSig,
@@ -169,6 +170,7 @@ def generate_stub_for_c_module(
169170
target: str,
170171
known_modules: list[str],
171172
sig_generators: Iterable[SignatureGenerator],
173+
include_docstrings: bool = False,
172174
) -> None:
173175
"""Generate stub for C module.
174176
@@ -201,6 +203,7 @@ def generate_stub_for_c_module(
201203
known_modules=known_modules,
202204
imports=imports,
203205
sig_generators=sig_generators,
206+
include_docstrings=include_docstrings,
204207
)
205208
done.add(name)
206209
types: list[str] = []
@@ -216,6 +219,7 @@ def generate_stub_for_c_module(
216219
known_modules=known_modules,
217220
imports=imports,
218221
sig_generators=sig_generators,
222+
include_docstrings=include_docstrings,
219223
)
220224
done.add(name)
221225
variables = []
@@ -319,15 +323,17 @@ def generate_c_function_stub(
319323
self_var: str | None = None,
320324
cls: type | None = None,
321325
class_name: str | None = None,
326+
include_docstrings: bool = False,
322327
) -> None:
323328
"""Generate stub for a single function or method.
324329
325-
The result (always a single line) will be appended to 'output'.
330+
The result will be appended to 'output'.
326331
If necessary, any required names will be added to 'imports'.
327332
The 'class_name' is used to find signature of __init__ or __new__ in
328333
'class_sigs'.
329334
"""
330335
inferred: list[FunctionSig] | None = None
336+
docstr: str | None = None
331337
if class_name:
332338
# method:
333339
assert cls is not None, "cls should be provided for methods"
@@ -379,13 +385,19 @@ def generate_c_function_stub(
379385
# a sig generator indicates @classmethod by specifying the cls arg
380386
if class_name and signature.args and signature.args[0].name == "cls":
381387
output.append("@classmethod")
382-
output.append(
383-
"def {function}({args}) -> {ret}: ...".format(
384-
function=name,
385-
args=", ".join(args),
386-
ret=strip_or_import(signature.ret_type, module, known_modules, imports),
387-
)
388+
output_signature = "def {function}({args}) -> {ret}:".format(
389+
function=name,
390+
args=", ".join(args),
391+
ret=strip_or_import(signature.ret_type, module, known_modules, imports),
388392
)
393+
if include_docstrings and docstr:
394+
docstr_quoted = mypy.util.quote_docstring(docstr.strip())
395+
docstr_indented = "\n ".join(docstr_quoted.split("\n"))
396+
output.append(output_signature)
397+
output.extend(f" {docstr_indented}".split("\n"))
398+
else:
399+
output_signature += " ..."
400+
output.append(output_signature)
389401

390402

391403
def strip_or_import(
@@ -493,6 +505,7 @@ def generate_c_type_stub(
493505
known_modules: list[str],
494506
imports: list[str],
495507
sig_generators: Iterable[SignatureGenerator],
508+
include_docstrings: bool = False,
496509
) -> None:
497510
"""Generate stub for a single class using runtime introspection.
498511
@@ -535,6 +548,7 @@ def generate_c_type_stub(
535548
cls=obj,
536549
class_name=class_name,
537550
sig_generators=sig_generators,
551+
include_docstrings=include_docstrings,
538552
)
539553
elif is_c_property(raw_value):
540554
generate_c_property_stub(
@@ -557,6 +571,7 @@ def generate_c_type_stub(
557571
imports=imports,
558572
known_modules=known_modules,
559573
sig_generators=sig_generators,
574+
include_docstrings=include_docstrings,
560575
)
561576
else:
562577
attrs.append((attr, value))

mypy/util.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -809,3 +809,20 @@ def plural_s(s: int | Sized) -> str:
809809
return "s"
810810
else:
811811
return ""
812+
813+
814+
def quote_docstring(docstr: str) -> str:
815+
"""Returns docstring correctly encapsulated in a single or double quoted form."""
816+
# Uses repr to get hint on the correct quotes and escape everything properly.
817+
# Creating multiline string for prettier output.
818+
docstr_repr = "\n".join(re.split(r"(?<=[^\\])\\n", repr(docstr)))
819+
820+
if docstr_repr.startswith("'"):
821+
# Enforce double quotes when it's safe to do so.
822+
# That is when double quotes are not in the string
823+
# or when it doesn't end with a single quote.
824+
if '"' not in docstr_repr[1:-1] and docstr_repr[-2] != "'":
825+
return f'"""{docstr_repr[1:-1]}"""'
826+
return f"''{docstr_repr}''"
827+
else:
828+
return f'""{docstr_repr}""'

test-data/pybind11_mypy_demo/src/main.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,8 @@ void bind_basics(py::module& basics) {
119119
using namespace basics;
120120

121121
// Functions
122-
basics.def("answer", &answer);
123-
basics.def("sum", &sum);
122+
basics.def("answer", &answer, "answer docstring, with end quote\""); // tests explicit docstrings
123+
basics.def("sum", &sum, "multiline docstring test, edge case quotes \"\"\"'''");
124124
basics.def("midpoint", &midpoint, py::arg("left"), py::arg("right"));
125125
basics.def("weighted_midpoint", weighted_midpoint, py::arg("left"), py::arg("right"), py::arg("alpha")=0.5);
126126

test-data/pybind11_mypy_demo/stubgen-include-docs/pybind11_mypy_demo/__init__.pyi

Whitespace-only changes.

0 commit comments

Comments
 (0)