| import logging |
| import os |
| import re |
| from collections import defaultdict |
| |
| import click |
| import pandas as pd |
| from tabulate import tabulate |
| |
| |
| def gmean(s): |
| return s.product() ** (1 / len(s)) |
| |
| |
| def find_csv_files(path, perf_compare): |
| """ |
| Recursively search for all CSV files in directory and subdirectories whose |
| name contains a target string. |
| """ |
| |
| def is_csv(f): |
| if perf_compare: |
| regex = r"training_(torchbench|huggingface|timm_models)\.csv" |
| return re.match(regex, f) is not None |
| else: |
| return f.endswith("_performance.csv") |
| |
| csv_files = [] |
| for root, dirs, files in os.walk(path): |
| for file in files: |
| if is_csv(file): |
| csv_files.append(os.path.join(root, file)) |
| return csv_files |
| |
| |
| @click.command() |
| @click.argument("directory", default="artifacts") |
| @click.option("--amp", is_flag=True) |
| @click.option("--float32", is_flag=True) |
| @click.option( |
| "--perf-compare", |
| is_flag=True, |
| help="Set if the CSVs were generated by running manually the action rather than picking them from the nightly job", |
| ) |
| def main(directory, amp, float32, perf_compare): |
| """ |
| Given a directory containing multiple CSVs from --performance benchmark |
| runs, aggregates and generates summary statistics similar to the web UI at |
| https://torchci-git-fork-huydhn-add-compilers-bench-74abf8-fbopensource.vercel.app/benchmark/compilers |
| |
| This is most useful if you've downloaded CSVs from CI and need to quickly |
| look at aggregate stats. The CSVs are expected to follow exactly the same |
| naming convention that is used in CI. |
| |
| You may also be interested in |
| https://docs.google.com/document/d/1DQQxIgmKa3eF0HByDTLlcJdvefC4GwtsklJUgLs09fQ/edit# |
| which explains how to interpret the raw csv data. |
| """ |
| dtypes = ["amp", "float32"] |
| if amp and not float32: |
| dtypes = ["amp"] |
| if float32 and not amp: |
| dtypes = ["float32"] |
| |
| dfs = defaultdict(list) |
| for f in find_csv_files(directory, perf_compare): |
| try: |
| dfs[os.path.basename(f)].append(pd.read_csv(f)) |
| except Exception: |
| logging.warning("failed parsing %s", f) |
| raise |
| |
| # dtype -> statistic -> benchmark -> compiler -> value |
| results = defaultdict( # dtype |
| lambda: defaultdict( # statistic |
| lambda: defaultdict(dict) # benchmark # compiler -> value |
| ) |
| ) |
| |
| for k, v in sorted(dfs.items()): |
| if perf_compare: |
| regex = r"training_(torchbench|huggingface|timm_models)\.csv" |
| m = re.match(regex, k) |
| assert m is not None, k |
| compiler = "inductor" |
| benchmark = m.group(1) |
| dtype = "float32" |
| mode = "training" |
| device = "cuda" |
| else: |
| regex = ( |
| "(.+)_" |
| "(torchbench|huggingface|timm_models)_" |
| "(float32|amp)_" |
| "(inference|training)_" |
| "(cpu|cuda)_" |
| r"performance\.csv" |
| ) |
| m = re.match(regex, k) |
| compiler = m.group(1) |
| benchmark = m.group(2) |
| dtype = m.group(3) |
| mode = m.group(4) |
| device = m.group(5) |
| |
| df = pd.concat(v) |
| df = df.dropna().query("speedup != 0") |
| |
| statistics = { |
| "speedup": gmean(df["speedup"]), |
| "comptime": df["compilation_latency"].mean(), |
| "memory": gmean(df["compression_ratio"]), |
| } |
| |
| if dtype not in dtypes: |
| continue |
| |
| for statistic, v in statistics.items(): |
| results[f"{device} {dtype} {mode}"][statistic][benchmark][compiler] = v |
| |
| descriptions = { |
| "speedup": "Geometric mean speedup", |
| "comptime": "Mean compilation time", |
| "memory": "Peak memory compression ratio", |
| } |
| |
| for dtype_mode, r in results.items(): |
| print(f"# {dtype_mode} performance results") |
| for statistic, data in r.items(): |
| print(f"## {descriptions[statistic]}") |
| |
| table = [] |
| for row_name in data[next(iter(data.keys()))]: |
| row = [row_name] |
| for col_name in data: |
| row.append(round(data[col_name][row_name], 2)) |
| table.append(row) |
| |
| headers = list(data.keys()) |
| print(tabulate(table, headers=headers)) |
| print() |
| |
| |
| main() |