| """ |
| A tool to merge multiple csv files (generated by torchbench.py/etc) into a single csv file. |
| Performs an outer join based on the benchmark name, filling in any missing data with zeros. |
| """ |
| import argparse |
| import functools |
| import operator |
| from pathlib import Path |
| |
| import pandas as pd |
| |
| |
| def longest_common_prefix(strs): |
| shortest_str = min(strs, key=len) |
| for i, char in enumerate(shortest_str): |
| for other in strs: |
| if other[i] != char: |
| return shortest_str[:i] |
| return "" |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--field", "-f", default="speedup", type=str) |
| parser.add_argument("--output", "-o", type=str) |
| parser.add_argument("inputs", nargs="*") |
| args = parser.parse_args() |
| |
| prefix = longest_common_prefix([Path(inp).stem for inp in args.inputs]) |
| frames = [] |
| fields = [] |
| for inp in args.inputs: |
| field = Path(inp).stem[len(prefix) :] |
| fields.append(field) |
| frames.append( |
| pd.read_csv(inp) |
| .filter(["name", args.field]) |
| .rename(columns={args.field: field}) |
| ) |
| |
| df = frames[0] |
| for other in frames[1:]: |
| df = df.merge(other, how="outer", on="name") |
| df = df.fillna(0) |
| |
| # drop rows where all backends failed |
| df = df[functools.reduce(operator.or_, [df[f] != 0 for f in fields])] |
| |
| prefix = prefix.strip("_") or "output" |
| output = args.output or f"{prefix}.csv" |
| print(f"Writing {output}") |
| df.to_csv(output, index=False) |
| |
| |
| if __name__ == "__main__": |
| main() |