benchmarks/dynamo/parse_logs.py - platform/external/pytorch - Git at Google

 import csv
 import os
 import re
 import sys

 # This script takes the logs produced by the benchmark scripts (e.g.,
 # torchbench.py) and parses it into a CSV file that summarizes what
 # is failing and why.  It is kept separate from the benchmark script
 # emitting a more structured output as it is often more convenient
 # to iterate quickly on log files offline instead of having to make
 # a change to the benchmark script and then do a full sweep to see
 # the updates.
 #
 # This script is not very well written, feel free to rewrite it as necessary

 assert len(sys.argv) == 2

 full_log = open(sys.argv[1]).read()

 # If the log contains a gist URL, extract it so we can include it in the CSV
 gist_url = ""
 m = re.search(r"https://gist.github.com/[a-f0-9]+", full_log)
 if m is not None:
     gist_url = m.group(0)

 # Split the log into an entry per benchmark
 entries = re.split(
     r"(?:cuda (?:train|eval) +([^ ]+)|WARNING:root:([^ ]+) failed to load)", full_log
 )[1:]
 # Entries schema example:
 # `['hf_Bert', None, '
 #  PASS\nTIMING: entire_frame_compile:1.80925 backend_compile:6e-05\nDynamo produced 1 graph(s) covering 367 ops\n']`


 def chunker(seq, size):
     return (seq[pos : pos + size] for pos in range(0, len(seq), size))


 c = 0
 i = 0

 out = csv.DictWriter(
     sys.stdout,
     [
         "bench",
         "name",
         "result",
         "component",
         "context",
         "explain",
         "frame_time",
         "backend_time",
         "graph_count",
         "op_count",
         "graph_breaks",
         "unique_graph_breaks",
     ],
     dialect="excel",
 )
 out.writeheader()
 out.writerow({"explain": gist_url})


 # Sometimes backtraces will be in third party code, which results
 # in very long file names.  Delete the absolute path in this case.
 def normalize_file(f):
     if "site-packages/" in f:
         return f.split("site-packages/", 2)[1]
     else:
         return os.path.relpath(f)


 # Assume we run torchbench, huggingface, timm_models in that order
 # (as output doesn't say which suite the benchmark is part of)
 # TODO: make this more robust

 bench = "torchbench"

 # 3 = 1 + number of matches in the entries split regex
 for name, name2, log in chunker(entries, 3):
     if name is None:
         name = name2
     if name.startswith("Albert"):
         bench = "huggingface"
     elif name.startswith("adv_inc"):
         bench = "timm_models"

     # Payload that will go into the csv
     r = "UNKNOWN"
     explain = ""
     component = ""
     context = ""

     if "PASS" in log:
         r = "PASS"
     if "TIMEOUT" in log:
         r = "FAIL TIMEOUT"
     if "Accuracy failed" in log:
         r = "FAIL ACCURACY"

     # Attempt to extract out useful information from the traceback

     log = log.split(
         "The above exception was the direct cause of the following exception"
     )[0]
     split = log.split("Traceback (most recent call last)", maxsplit=1)
     if len(split) == 2:
         log = split[1]
     log = log.split("Original traceback:")[0]
     m = re.search(
         r'File "([^"]+)", line ([0-9]+), in .+\n +(.+)\n([A-Za-z]+(?:Error|Exception|NotImplementedError): ?.*)',
         log,
     )

     if m is not None:
         r = "FAIL"
         component = f"{normalize_file(m.group(1))}:{m.group(2)}"
         context = m.group(3)
         explain = f"{m.group(4)}"
     else:
         m = re.search(
             r'File "([^"]+)", line ([0-9]+), in .+\n +(.+)\nAssertionError', log
         )
         if m is not None:
             r = "FAIL"
             component = f"{normalize_file(m.group(1))}:{m.group(2)}"
             context = m.group(3)
             explain = "AssertionError"

     # Sometimes, the benchmark will say FAIL without any useful info
     # See https://github.com/pytorch/torchdynamo/issues/1910
     if "FAIL" in log:
         r = "FAIL"

     if r == "UNKNOWN":
         c += 1

     backend_time = None
     frame_time = None
     if "TIMING:" in log:
         result = re.search("TIMING:(.*)\n", log).group(1)
         split_str = result.split("backend_compile:")
         if len(split_str) == 2:
             backend_time = float(split_str[1])
             frame_time = float(split_str[0].split("entire_frame_compile:")[1])

     if "STATS:" in log:
         result = re.search("STATS:(.*)\n", log).group(1)
         # call_* op count: 970 | FakeTensor.__torch_dispatch__:35285 | ProxyTorchDispatchMode.__torch_dispatch__:13339
         split_all = result.split("|")
         # TODO: rewrite this to work with arbitrarily many stats

     graph_count = None
     op_count = None
     graph_breaks = None
     unique_graph_breaks = None
     if m := re.search(
         r"Dynamo produced (\d+) graphs covering (\d+) ops with (\d+) graph breaks \((\d+) unique\)",
         log,
     ):
         graph_count = m.group(1)
         op_count = m.group(2)
         graph_breaks = m.group(3)
         unique_graph_breaks = m.group(4)

     # If the context string is too long, don't put it in the CSV.
     # This is a hack to try to make it more likely that Google Sheets will
     # offer to split columns
     if len(context) > 78:
         context = ""

     # Temporary file names are meaningless, report it's generated code in this
     # case
     if "/tmp/" in component:
         component = "generated code"
         context = ""

     out.writerow(
         {
             "bench": bench,
             "name": name,
             "result": r,
             "component": component,
             "context": context,
             "explain": explain,
             "frame_time": frame_time,
             "backend_time": backend_time,
             "graph_count": graph_count,
             "op_count": op_count,
             "graph_breaks": graph_breaks,
             "unique_graph_breaks": unique_graph_breaks,
         }
     )
     i += 1

 if c:
     print(f"failed to classify {c} entries", file=sys.stderr)
	import csv
	import os
	import re
	import sys

	# This script takes the logs produced by the benchmark scripts (e.g.,
	# torchbench.py) and parses it into a CSV file that summarizes what
	# is failing and why. It is kept separate from the benchmark script
	# emitting a more structured output as it is often more convenient
	# to iterate quickly on log files offline instead of having to make
	# a change to the benchmark script and then do a full sweep to see
	# the updates.
	#
	# This script is not very well written, feel free to rewrite it as necessary

	assert len(sys.argv) == 2

	full_log = open(sys.argv[1]).read()

	# If the log contains a gist URL, extract it so we can include it in the CSV
	gist_url = ""
	m = re.search(r"https://gist.github.com/[a-f0-9]+", full_log)
	if m is not None:
	gist_url = m.group(0)

	# Split the log into an entry per benchmark
	entries = re.split(
	r"(?:cuda (?:train\|eval) +([^ ]+)\|WARNING:root:([^ ]+) failed to load)", full_log
	)[1:]
	# Entries schema example:
	# `['hf_Bert', None, '
	# PASS\nTIMING: entire_frame_compile:1.80925 backend_compile:6e-05\nDynamo produced 1 graph(s) covering 367 ops\n']`


	def chunker(seq, size):
	return (seq[pos : pos + size] for pos in range(0, len(seq), size))


	c = 0
	i = 0

	out = csv.DictWriter(
	sys.stdout,
	[
	"bench",
	"name",
	"result",
	"component",
	"context",
	"explain",
	"frame_time",
	"backend_time",
	"graph_count",
	"op_count",
	"graph_breaks",
	"unique_graph_breaks",
	],
	dialect="excel",
	)
	out.writeheader()
	out.writerow({"explain": gist_url})


	# Sometimes backtraces will be in third party code, which results
	# in very long file names. Delete the absolute path in this case.
	def normalize_file(f):
	if "site-packages/" in f:
	return f.split("site-packages/", 2)[1]
	else:
	return os.path.relpath(f)


	# Assume we run torchbench, huggingface, timm_models in that order
	# (as output doesn't say which suite the benchmark is part of)
	# TODO: make this more robust

	bench = "torchbench"

	# 3 = 1 + number of matches in the entries split regex
	for name, name2, log in chunker(entries, 3):
	if name is None:
	name = name2
	if name.startswith("Albert"):
	bench = "huggingface"
	elif name.startswith("adv_inc"):
	bench = "timm_models"

	# Payload that will go into the csv
	r = "UNKNOWN"
	explain = ""
	component = ""
	context = ""

	if "PASS" in log:
	r = "PASS"
	if "TIMEOUT" in log:
	r = "FAIL TIMEOUT"
	if "Accuracy failed" in log:
	r = "FAIL ACCURACY"

	# Attempt to extract out useful information from the traceback

	log = log.split(
	"The above exception was the direct cause of the following exception"
	)[0]
	split = log.split("Traceback (most recent call last)", maxsplit=1)
	if len(split) == 2:
	log = split[1]
	log = log.split("Original traceback:")[0]
	m = re.search(
	r'File "([^"]+)", line ([0-9]+), in .+\n +(.+)\n([A-Za-z]+(?:Error\|Exception\|NotImplementedError): ?.*)',
	log,
	)

	if m is not None:
	r = "FAIL"
	component = f"{normalize_file(m.group(1))}:{m.group(2)}"
	context = m.group(3)
	explain = f"{m.group(4)}"
	else:
	m = re.search(
	r'File "([^"]+)", line ([0-9]+), in .+\n +(.+)\nAssertionError', log
	)
	if m is not None:
	r = "FAIL"
	component = f"{normalize_file(m.group(1))}:{m.group(2)}"
	context = m.group(3)
	explain = "AssertionError"

	# Sometimes, the benchmark will say FAIL without any useful info
	# See https://github.com/pytorch/torchdynamo/issues/1910
	if "FAIL" in log:
	r = "FAIL"

	if r == "UNKNOWN":
	c += 1

	backend_time = None
	frame_time = None
	if "TIMING:" in log:
	result = re.search("TIMING:(.*)\n", log).group(1)
	split_str = result.split("backend_compile:")
	if len(split_str) == 2:
	backend_time = float(split_str[1])
	frame_time = float(split_str[0].split("entire_frame_compile:")[1])

	if "STATS:" in log:
	result = re.search("STATS:(.*)\n", log).group(1)
	# call_* op count: 970 \| FakeTensor.__torch_dispatch__:35285 \| ProxyTorchDispatchMode.__torch_dispatch__:13339
	split_all = result.split("\|")
	# TODO: rewrite this to work with arbitrarily many stats

	graph_count = None
	op_count = None
	graph_breaks = None
	unique_graph_breaks = None
	if m := re.search(
	r"Dynamo produced (\d+) graphs covering (\d+) ops with (\d+) graph breaks \((\d+) unique\)",
	log,
	):
	graph_count = m.group(1)
	op_count = m.group(2)
	graph_breaks = m.group(3)
	unique_graph_breaks = m.group(4)

	# If the context string is too long, don't put it in the CSV.
	# This is a hack to try to make it more likely that Google Sheets will
	# offer to split columns
	if len(context) > 78:
	context = ""

	# Temporary file names are meaningless, report it's generated code in this
	# case
	if "/tmp/" in component:
	component = "generated code"
	context = ""

	out.writerow(
	{
	"bench": bench,
	"name": name,
	"result": r,
	"component": component,
	"context": context,
	"explain": explain,
	"frame_time": frame_time,
	"backend_time": backend_time,
	"graph_count": graph_count,
	"op_count": op_count,
	"graph_breaks": graph_breaks,
	"unique_graph_breaks": unique_graph_breaks,
	}
	)
	i += 1

	if c:
	print(f"failed to classify {c} entries", file=sys.stderr)