File size: 4,972 Bytes
c61ccee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# flake8: noqa: B950
from ._internal import register_artifact, register_log

DYNAMIC = ["torch.fx.experimental.symbolic_shapes", "torch.fx.experimental.sym_node"]
DISTRIBUTED = [
    "torch.distributed",
    "torch._dynamo.backends.distributed",
    "torch.nn.parallel.distributed",
]

register_log("dynamo", ["torch._dynamo", *DYNAMIC])
register_log("aot", ["torch._functorch.aot_autograd", "torch._functorch._aot_autograd"])
register_log("autograd", "torch.autograd")
register_log("inductor", ["torch._inductor", "torch._inductor.cudagraph_trees"])

register_artifact(
    "cudagraphs",
    "Logs information from wrapping inductor generated code with cudagraphs.",
)

register_log("dynamic", DYNAMIC)
register_log("torch", "torch")
register_log("distributed", DISTRIBUTED)
register_log(
    "dist_c10d", ["torch.distributed.distributed_c10d", "torch.distributed.rendezvous"]
)
register_log(
    "dist_ddp", ["torch.nn.parallel.distributed", "torch._dynamo.backends.distributed"]
)
register_log("dist_fsdp", ["torch.distributed.fsdp"])
register_log("onnx", "torch.onnx")
register_log("export", ["torch._dynamo", "torch.export", *DYNAMIC])

register_artifact(
    "guards",
    "This prints the guards for every compiled Dynamo frame. It does not tell you where the guards come from.",
    visible=True,
)
register_artifact("verbose_guards", "", off_by_default=True)
register_artifact(
    "bytecode",
    "Prints the original and modified bytecode from Dynamo. Mostly useful if you're debugging our bytecode generation in Dynamo.",
    off_by_default=True,
)
register_artifact(
    "graph",
    "Prints the dynamo traced graph (prior to AOTDispatch) in a table. If you prefer python code use `graph_code` instead. ",
)
register_artifact("graph_code", "Like `graph`, but gives you the Python code instead.")
register_artifact(
    "graph_sizes", "Prints the sizes of all FX nodes in the dynamo graph."
)
register_artifact(
    "trace_source",
    "As we execute bytecode, prints the file name / line number we are processing and the actual source code. Useful with `bytecode`",
)
register_artifact(
    "trace_call",
    "Like trace_source, but it will give you the per-expression blow-by-blow if your Python is recent enough.",
)
register_artifact(
    "aot_graphs",
    "Prints the FX forward and backward graph generated by AOTDispatch, after partitioning. Useful to understand what's being given to Inductor",
    visible=True,
)
register_artifact(
    "aot_joint_graph",
    "Print FX joint graph from AOTAutograd, prior to partitioning. Useful for debugging partitioning",
)
register_artifact(
    "post_grad_graphs",
    "Prints the FX graph generated by post grad passes. Useful to understand what's being given to Inductor after post grad passes",
)
register_artifact(
    "compiled_autograd",
    "Prints various logs in compiled_autograd, including but not limited to the graphs. Useful for debugging compiled_autograd.",
    visible=True,
)
register_artifact(
    "ddp_graphs",
    "Only relevant for compiling DDP. DDP splits into multiple graphs to trigger comms early. This will print each individual graph here.",
)
register_artifact(
    "recompiles",
    "Prints the reason why we recompiled a graph. Very, very useful.",
    visible=True,
)
register_artifact(
    "recompiles_verbose",
    "Prints all guard checks that fail during a recompilation. "
    "At runtime, Dynamo will stop at the first failed check for each failing guard. "
    "So not all logged failing checks are actually ran by Dynamo.",
    visible=True,
    off_by_default=True,
)
register_artifact(
    "graph_breaks",
    "Prints whenever Dynamo decides that it needs to graph break (i.e. create a new graph). Useful for debugging why torch.compile has poor performance",
    visible=True,
)
register_artifact(
    "not_implemented",
    "Prints log messages whenever we return NotImplemented in a multi-dispatch, letting you trace through each object we attempted to dispatch to",
)
register_artifact(
    "output_code",
    "Prints the code that Inductor generates (either Triton or C++)",
    off_by_default=True,
    visible=True,
)
register_artifact(
    "schedule",
    "Inductor scheduler information. Useful if working on Inductor fusion algo",
    off_by_default=True,
)
register_artifact("perf_hints", "", off_by_default=True)
register_artifact("onnx_diagnostics", "", off_by_default=True)
register_artifact(
    "fusion",
    "Detailed Inductor fusion decisions. More detailed than 'schedule'",
    off_by_default=True,
)
register_artifact(
    "overlap",
    "Detailed Inductor compute/comm overlap decisions",
    off_by_default=True,
)
register_artifact(
    "sym_node",
    "Logs extra info for various SymNode operations",
    off_by_default=True,
)

register_artifact("custom_format_test_artifact", "Testing only", log_format="")