Skip to content

Commit 4b1a255

Browse files
authored
[mypyc] Various improvements to annotated html generation (#18848)
Detect additional performance issues, such as calling decorated functions and constructing instances of non-native classes. Silence some non-actionable annotations where the performance impact is minimal, and/or the user likely doesn't learn anything useful from the annotation.
1 parent 836019a commit 4b1a255

File tree

7 files changed

+370
-50
lines changed

7 files changed

+370
-50
lines changed

Diff for: mypyc/annotate.py

+148-28
Original file line numberDiff line numberDiff line change
@@ -13,26 +13,39 @@
1313

1414
from mypy.build import BuildResult
1515
from mypy.nodes import (
16+
AssignmentStmt,
1617
CallExpr,
18+
ClassDef,
19+
Decorator,
20+
DictionaryComprehension,
1721
Expression,
1822
ForStmt,
1923
FuncDef,
24+
GeneratorExpr,
25+
IndexExpr,
2026
LambdaExpr,
2127
MemberExpr,
2228
MypyFile,
29+
NamedTupleExpr,
2330
NameExpr,
31+
NewTypeExpr,
2432
Node,
33+
OpExpr,
2534
RefExpr,
2635
TupleExpr,
36+
TypedDictExpr,
2737
TypeInfo,
38+
TypeVarExpr,
2839
Var,
40+
WithStmt,
2941
)
3042
from mypy.traverser import TraverserVisitor
3143
from mypy.types import AnyType, Instance, ProperType, Type, TypeOfAny, get_proper_type
3244
from mypy.util import FancyFormatter
3345
from mypyc.ir.func_ir import FuncIR
3446
from mypyc.ir.module_ir import ModuleIR
3547
from mypyc.ir.ops import CallC, LoadLiteral, LoadStatic, Value
48+
from mypyc.irbuild.mapper import Mapper
3649

3750

3851
class Annotation:
@@ -71,18 +84,21 @@ def __init__(self, message: str, priority: int = 1) -> None:
7184

7285
stdlib_hints: Final = {
7386
"functools.partial": Annotation(
74-
'"functools.partial" is inefficient in compiled code.', priority=2
87+
'"functools.partial" is inefficient in compiled code.', priority=3
7588
),
7689
"itertools.chain": Annotation(
7790
'"itertools.chain" is inefficient in compiled code (hint: replace with for loops).',
78-
priority=2,
91+
priority=3,
7992
),
8093
"itertools.groupby": Annotation(
81-
'"itertools.groupby" is inefficient in compiled code.', priority=2
94+
'"itertools.groupby" is inefficient in compiled code.', priority=3
8295
),
8396
"itertools.islice": Annotation(
8497
'"itertools.islice" is inefficient in compiled code (hint: replace with for loop over index range).',
85-
priority=2,
98+
priority=3,
99+
),
100+
"copy.deepcopy": Annotation(
101+
'"copy.deepcopy" tends to be slow. Make a shallow copy if possible.', priority=2
86102
),
87103
}
88104

@@ -127,14 +143,16 @@ def __init__(self, path: str, annotations: dict[int, list[Annotation]]) -> None:
127143

128144

129145
def generate_annotated_html(
130-
html_fnam: str, result: BuildResult, modules: dict[str, ModuleIR]
146+
html_fnam: str, result: BuildResult, modules: dict[str, ModuleIR], mapper: Mapper
131147
) -> None:
132148
annotations = []
133149
for mod, mod_ir in modules.items():
134150
path = result.graph[mod].path
135151
tree = result.graph[mod].tree
136152
assert tree is not None
137-
annotations.append(generate_annotations(path or "<source>", tree, mod_ir, result.types))
153+
annotations.append(
154+
generate_annotations(path or "<source>", tree, mod_ir, result.types, mapper)
155+
)
138156
html = generate_html_report(annotations)
139157
with open(html_fnam, "w") as f:
140158
f.write(html)
@@ -145,15 +163,18 @@ def generate_annotated_html(
145163

146164

147165
def generate_annotations(
148-
path: str, tree: MypyFile, ir: ModuleIR, type_map: dict[Expression, Type]
166+
path: str, tree: MypyFile, ir: ModuleIR, type_map: dict[Expression, Type], mapper: Mapper
149167
) -> AnnotatedSource:
150168
anns = {}
151169
for func_ir in ir.functions:
152170
anns.update(function_annotations(func_ir, tree))
153-
visitor = ASTAnnotateVisitor(type_map)
171+
visitor = ASTAnnotateVisitor(type_map, mapper)
154172
for defn in tree.defs:
155173
defn.accept(visitor)
156174
anns.update(visitor.anns)
175+
for line in visitor.ignored_lines:
176+
if line in anns:
177+
del anns[line]
157178
return AnnotatedSource(path, anns)
158179

159180

@@ -168,18 +189,28 @@ def function_annotations(func_ir: FuncIR, tree: MypyFile) -> dict[int, list[Anno
168189
ann: str | Annotation | None = None
169190
if name == "CPyObject_GetAttr":
170191
attr_name = get_str_literal(op.args[1])
171-
if attr_name == "__prepare__":
172-
# These attributes are internal to mypyc/CPython, and the user has
173-
# little control over them.
192+
if attr_name in ("__prepare__", "GeneratorExit", "StopIteration"):
193+
# These attributes are internal to mypyc/CPython, and/or accessed
194+
# implicitly in generated code. The user has little control over
195+
# them.
174196
ann = None
175197
elif attr_name:
176198
ann = f'Get non-native attribute "{attr_name}".'
177199
else:
178200
ann = "Dynamic attribute lookup."
201+
elif name == "PyObject_SetAttr":
202+
attr_name = get_str_literal(op.args[1])
203+
if attr_name == "__mypyc_attrs__":
204+
# This is set implicitly and can't be avoided.
205+
ann = None
206+
elif attr_name:
207+
ann = f'Set non-native attribute "{attr_name}".'
208+
else:
209+
ann = "Dynamic attribute set."
179210
elif name == "PyObject_VectorcallMethod":
180211
method_name = get_str_literal(op.args[0])
181212
if method_name:
182-
ann = f'Call non-native method "{method_name}".'
213+
ann = f'Call non-native method "{method_name}" (it may be defined in a non-native class, or decorated).'
183214
else:
184215
ann = "Dynamic method call."
185216
elif name in op_hints:
@@ -218,10 +249,12 @@ def function_annotations(func_ir: FuncIR, tree: MypyFile) -> dict[int, list[Anno
218249
class ASTAnnotateVisitor(TraverserVisitor):
219250
"""Generate annotations from mypy AST and inferred types."""
220251

221-
def __init__(self, type_map: dict[Expression, Type]) -> None:
252+
def __init__(self, type_map: dict[Expression, Type], mapper: Mapper) -> None:
222253
self.anns: dict[int, list[Annotation]] = {}
254+
self.ignored_lines: set[int] = set()
223255
self.func_depth = 0
224256
self.type_map = type_map
257+
self.mapper = mapper
225258

226259
def visit_func_def(self, o: FuncDef, /) -> None:
227260
if self.func_depth > 0:
@@ -235,21 +268,84 @@ def visit_func_def(self, o: FuncDef, /) -> None:
235268
self.func_depth -= 1
236269

237270
def visit_for_stmt(self, o: ForStmt, /) -> None:
238-
typ = self.get_type(o.expr)
239-
if isinstance(typ, AnyType):
240-
self.annotate(o.expr, 'For loop uses generic operations (iterable has type "Any").')
241-
elif isinstance(typ, Instance) and typ.type.fullname in (
242-
"typing.Iterable",
243-
"typing.Iterator",
244-
"typing.Sequence",
245-
"typing.MutableSequence",
246-
):
247-
self.annotate(
248-
o.expr,
249-
f'For loop uses generic operations (iterable has the abstract type "{typ.type.fullname}").',
250-
)
271+
self.check_iteration([o.expr], "For loop")
251272
super().visit_for_stmt(o)
252273

274+
def visit_dictionary_comprehension(self, o: DictionaryComprehension, /) -> None:
275+
self.check_iteration(o.sequences, "Comprehension")
276+
super().visit_dictionary_comprehension(o)
277+
278+
def visit_generator_expr(self, o: GeneratorExpr, /) -> None:
279+
self.check_iteration(o.sequences, "Comprehension or generator")
280+
super().visit_generator_expr(o)
281+
282+
def check_iteration(self, expressions: list[Expression], kind: str) -> None:
283+
for expr in expressions:
284+
typ = self.get_type(expr)
285+
if isinstance(typ, AnyType):
286+
self.annotate(expr, f'{kind} uses generic operations (iterable has type "Any").')
287+
elif isinstance(typ, Instance) and typ.type.fullname in (
288+
"typing.Iterable",
289+
"typing.Iterator",
290+
"typing.Sequence",
291+
"typing.MutableSequence",
292+
):
293+
self.annotate(
294+
expr,
295+
f'{kind} uses generic operations (iterable has the abstract type "{typ.type.fullname}").',
296+
)
297+
298+
def visit_class_def(self, o: ClassDef, /) -> None:
299+
super().visit_class_def(o)
300+
if self.func_depth == 0:
301+
# Don't complain about base classes at top level
302+
for base in o.base_type_exprs:
303+
self.ignored_lines.add(base.line)
304+
305+
for s in o.defs.body:
306+
if isinstance(s, AssignmentStmt):
307+
# Don't complain about attribute initializers
308+
self.ignored_lines.add(s.line)
309+
elif isinstance(s, Decorator):
310+
# Don't complain about decorator definitions that generate some
311+
# dynamic operations. This is a bit heavy-handed.
312+
self.ignored_lines.add(s.func.line)
313+
314+
def visit_with_stmt(self, o: WithStmt, /) -> None:
315+
for expr in o.expr:
316+
if isinstance(expr, CallExpr) and isinstance(expr.callee, RefExpr):
317+
node = expr.callee.node
318+
if isinstance(node, Decorator):
319+
if any(
320+
isinstance(d, RefExpr)
321+
and d.node
322+
and d.node.fullname == "contextlib.contextmanager"
323+
for d in node.decorators
324+
):
325+
self.annotate(
326+
expr,
327+
f'"{node.name}" uses @contextmanager, which is slow '
328+
+ "in compiled code. Use a native class with "
329+
+ '"__enter__" and "__exit__" methods instead.',
330+
priority=3,
331+
)
332+
super().visit_with_stmt(o)
333+
334+
def visit_assignment_stmt(self, o: AssignmentStmt, /) -> None:
335+
special_form = False
336+
if self.func_depth == 0:
337+
analyzed: Expression | None = o.rvalue
338+
if isinstance(o.rvalue, (CallExpr, IndexExpr, OpExpr)):
339+
analyzed = o.rvalue.analyzed
340+
if o.is_alias_def or isinstance(
341+
analyzed, (TypeVarExpr, NamedTupleExpr, TypedDictExpr, NewTypeExpr)
342+
):
343+
special_form = True
344+
if special_form:
345+
# TODO: Ignore all lines if multi-line
346+
self.ignored_lines.add(o.line)
347+
super().visit_assignment_stmt(o)
348+
253349
def visit_name_expr(self, o: NameExpr, /) -> None:
254350
if ann := stdlib_hints.get(o.fullname):
255351
self.annotate(o, ann)
@@ -268,6 +364,30 @@ def visit_call_expr(self, o: CallExpr, /) -> None:
268364
):
269365
arg = o.args[1]
270366
self.check_isinstance_arg(arg)
367+
elif isinstance(o.callee, RefExpr) and isinstance(o.callee.node, TypeInfo):
368+
info = o.callee.node
369+
class_ir = self.mapper.type_to_ir.get(info)
370+
if (class_ir and not class_ir.is_ext_class) or (
371+
class_ir is None and not info.fullname.startswith("builtins.")
372+
):
373+
self.annotate(
374+
o, f'Creating an instance of non-native class "{info.name}" ' + "is slow.", 2
375+
)
376+
elif class_ir and class_ir.is_augmented:
377+
self.annotate(
378+
o,
379+
f'Class "{info.name}" is only partially native, and '
380+
+ "constructing an instance is slow.",
381+
2,
382+
)
383+
elif isinstance(o.callee, RefExpr) and isinstance(o.callee.node, Decorator):
384+
decorator = o.callee.node
385+
if self.mapper.is_native_ref_expr(o.callee):
386+
self.annotate(
387+
o,
388+
f'Calling a decorated function ("{decorator.name}") is inefficient, even if it\'s native.',
389+
2,
390+
)
271391

272392
def check_isinstance_arg(self, arg: Expression) -> None:
273393
if isinstance(arg, RefExpr):
@@ -287,9 +407,9 @@ def visit_lambda_expr(self, o: LambdaExpr, /) -> None:
287407
)
288408
super().visit_lambda_expr(o)
289409

290-
def annotate(self, o: Node, ann: str | Annotation) -> None:
410+
def annotate(self, o: Node, ann: str | Annotation, priority: int = 1) -> None:
291411
if isinstance(ann, str):
292-
ann = Annotation(ann)
412+
ann = Annotation(ann, priority=priority)
293413
self.anns.setdefault(o.line, []).append(ann)
294414

295415
def get_type(self, e: Expression) -> ProperType:

Diff for: mypyc/build.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def generate_c(
242242
print(f"Parsed and typechecked in {t1 - t0:.3f}s")
243243

244244
errors = Errors(options)
245-
modules, ctext = emitmodule.compile_modules_to_c(
245+
modules, ctext, mapper = emitmodule.compile_modules_to_c(
246246
result, compiler_options=compiler_options, errors=errors, groups=groups
247247
)
248248
t2 = time.time()
@@ -255,7 +255,7 @@ def generate_c(
255255
print(f"Compiled to C in {t2 - t1:.3f}s")
256256

257257
if options.mypyc_annotation_file:
258-
generate_annotated_html(options.mypyc_annotation_file, result, modules)
258+
generate_annotated_html(options.mypyc_annotation_file, result, modules, mapper)
259259

260260
return ctext, "\n".join(format_modules(modules))
261261

Diff for: mypyc/codegen/emitmodule.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ def load_scc_from_cache(
397397

398398
def compile_modules_to_c(
399399
result: BuildResult, compiler_options: CompilerOptions, errors: Errors, groups: Groups
400-
) -> tuple[ModuleIRs, list[FileContents]]:
400+
) -> tuple[ModuleIRs, list[FileContents], Mapper]:
401401
"""Compile Python module(s) to the source of Python C extension modules.
402402
403403
This generates the source code for the "shared library" module
@@ -427,12 +427,12 @@ def compile_modules_to_c(
427427

428428
modules = compile_modules_to_ir(result, mapper, compiler_options, errors)
429429
if errors.num_errors > 0:
430-
return {}, []
430+
return {}, [], Mapper({})
431431

432432
ctext = compile_ir_to_c(groups, modules, result, mapper, compiler_options)
433433
write_cache(modules, result, group_map, ctext)
434434

435-
return modules, [ctext[name] for _, name in groups]
435+
return modules, [ctext[name] for _, name in groups], mapper
436436

437437

438438
def generate_function_declaration(fn: FuncIR, emitter: Emitter) -> None:

0 commit comments

Comments
 (0)