benchmarks/sparse/matmul_dlmc_bench.py - platform/external/pytorch - Git at Google

 # Sparse benchmarks

 # These benchmarks are for the sparse matrix functionality.
 # They exist for comparing the performance of sparse matrix routines
 # torch.sparse.mm(sparse, sparse)` with different backends (CPU/CUDA)
 # and with other frameworks such as scipy.

 import sys
 from scipy import sparse
 import numpy as np
 from pathlib import Path
 import pandas as pd
 import argparse
 import torch
 import torch.utils.benchmark as benchmark_utils

 def read_matrix_params(path):
     sys.stdin = open(path)
     nrows, ncols, nnz = map(lambda el: int(el), input().split(', '))
     return (nrows, ncols), nnz


 def load_matrix(path):
     sys.stdin = open(path)
     nrows, ncols, nnz = map(lambda el: int(el), input().split(', '))
     index_pointers = map(lambda el: int(el), input().split())
     indices = map(lambda el: int(el), input().split())

     index_pointers = list(index_pointers)
     indices = list(indices)
     data = np.random.rand(nnz)
     coo = sparse.csr_matrix(
         (data, np.array(indices), np.array(index_pointers)),
         shape=(nrows, ncols)).tocoo()
     return torch.sparse_coo_tensor([coo.row, coo.col], coo.data, coo.shape)


 def scipy_coo_matmul(mat1, mat2):
     result = mat1.dot(mat2).tocoo()
     return torch.sparse_coo_tensor([result.row, result.col], result.data,
                                    result.shape)


 def to_coo_scipy(x):
     indices_1 = x._indices().numpy()
     values_1 = x._values().numpy()
     return sparse.coo_matrix((values_1, (indices_1[0], indices_1[1])),
                              shape=x.shape)


 def torch_backward(a_dense, b_dense):
     a_dense.requires_grad = True
     b_dense.requires_grad = True
     r1 = a_dense.matmul(b_dense)
     f1 = torch.sum(r1)
     f1.backward()


 def sparse_torch_backward(a, b):
     a.requires_grad = True
     b.requires_grad = True

     r2 = torch.sparse.mm(a, b)
     f2 = torch.sparse.sum(r2)
     f2.backward()


 def load_dataset(dataset_path, hidden_size, sparsity, n_limit=20):
     current_folder_path = f"{dataset_path}/{sparsity}"
     path = Path(current_folder_path)
     files = path.glob('**/*.smtx')
     xs = []
     ys = []
     print(dataset_path, hidden_size, sparsity)
     index = 0
     for elem in files:
         if index == n_limit:
             break
         print('.', end='')
         size, nnz = read_matrix_params(elem.as_posix())
         if size[1] == hidden_size:
             xs.append(load_matrix(elem.as_posix()))
         if size[0] == hidden_size:
             ys.append(load_matrix(elem.as_posix()))
         index += 1
     print()
     return zip(xs, ys)


 if __name__ == '__main__':

     path = Path()
     parser = argparse.ArgumentParser(description='Sparse Matmul Bench')

     parser.add_argument('--path', type=str, help='dataset path')
     parser.add_argument('--dataset',
                         type=str,
                         help='dataset name',
                         default='random_pruning')
     parser.add_argument('--operation',
                         type=str,
                         help='matmul or backward',
                         default='matmul')
     parser.add_argument('--output',
                         type=str,
                         help='dataframe output path',
                         default='/tmp/matmul_bench.pkl')
     args = parser.parse_args()
     print('path     =', args.path)
     print('dataset       =', args.dataset)
     print('operation     =', args.operation)
     print('output        =', args.output)

     dataset_path = args.path
     dataset_name = args.dataset
     dataset_path = f"{dataset_path}/{dataset_name}"
     df_output_path = args.output
     tasks = []
     if args.operation == 'matmul':
         tasks = [
             ("matmul", "cpu", "torch", "torch.mm(dense_x, dense_y)"),
             ("matmul", "cpu", "torch.sparse", "torch.sparse.mm(tx, ty)"),
             ("matmul", "cpu", "scipy",
              "scipy_coo_matmul(scipy_varx, scipy_vary)"),
             ("matmul", "cuda", "torch",
              "torch.mm(dense_cuda_x, dense_cuda_y)"),
             ("matmul", "cuda", "torch.sparse",
              "torch.sparse.mm(tx_cuda, ty_cuda)"),
         ]
     else:
         tasks = [
             ("backward", "cpu", "torch", "torch_backward(dense_x, dense_y)"),
             ("backward", "cpu", "torch.sparse",
              "sparse_torch_backward(tx, ty)"),
             ("backward", "cuda", "torch",
              "torch_backward(dense_cuda_x, dense_cuda_y)"),
             ("backward", "cuda", "torch.sparse",
              "sparse_torch_backward(tx_cuda, ty_cuda)"),
         ]
     serialized_results = []
     repeats = 2
     timers = [
         benchmark_utils.Timer(
             stmt=stmt,
             globals={
                 "scipy_coo_matmul": scipy_coo_matmul,
                 "torch_backward": torch_backward,
                 "sparse_torch_backward": sparse_torch_backward,
                 "scipy_varx": to_coo_scipy(x),
                 "scipy_vary": to_coo_scipy(y),
                 "tx": x,
                 "ty": y,
                 "tx_cuda": x.cuda(),
                 "ty_cuda": y.cuda(),
                 "dense_cuda_x": x.to_dense().cuda(),
                 "dense_cuda_y": y.to_dense().cuda(),
                 "dense_x": x.to_dense(),
                 "dense_y": y.to_dense(),
             },
             label=label,
             sub_label=sub_label,
             description=f"{sparsity}",
             env=device,
             # num_threads=num_threads,
         ) for hidden_size in [512]
         for sparsity in [0.5, 0.7, 0.8, 0.9, 0.95, 0.98]
         for label, device, sub_label, stmt in tasks
         for num_threads in [1, 4, 8, 16]
         for x, y in load_dataset(dataset_path, hidden_size, sparsity)
     ]
     measurements = []

     for i, timer in enumerate(timers * repeats):
         m = timer.blocked_autorange(min_run_time=0.05)
         serialized_results.append(pickle.dumps(m))
         m.metadata = {
             "device": 'cuda' if m.task_spec.env.find("cuda") >= 0 else 'cpu'
         }
         measurements.append(m)
         print(f"\r{i + 1} / {len(timers) * repeats}", end="")
         sys.stdout.flush()
     print()

     comparison = benchmark_utils.Compare(
         [pickle.loads(i) for i in serialized_results])

     print("== Unformatted " + "=" * 80 + "\n" + "/" * 95 + "\n")
     comparison.print()

     print("== Formatted " + "=" * 80 + "\n" + "/" * 93 + "\n")
     comparison.trim_significant_figures()
     comparison.colorize()
     comparison.print()

     table = [(m.task_spec.sub_label, m.task_spec.description,
               m.metadata["device"], m.mean) for m in measurements]
     df = pd.DataFrame(table, columns=['method', 'sparsity', 'device', 'time'])
     df.to_pickle(df_output_path)
	# Sparse benchmarks

	# These benchmarks are for the sparse matrix functionality.
	# They exist for comparing the performance of sparse matrix routines
	# torch.sparse.mm(sparse, sparse)` with different backends (CPU/CUDA)
	# and with other frameworks such as scipy.

	import sys
	from scipy import sparse
	import numpy as np
	from pathlib import Path
	import pandas as pd
	import argparse
	import torch
	import torch.utils.benchmark as benchmark_utils

	def read_matrix_params(path):
	sys.stdin = open(path)
	nrows, ncols, nnz = map(lambda el: int(el), input().split(', '))
	return (nrows, ncols), nnz


	def load_matrix(path):
	sys.stdin = open(path)
	nrows, ncols, nnz = map(lambda el: int(el), input().split(', '))
	index_pointers = map(lambda el: int(el), input().split())
	indices = map(lambda el: int(el), input().split())

	index_pointers = list(index_pointers)
	indices = list(indices)
	data = np.random.rand(nnz)
	coo = sparse.csr_matrix(
	(data, np.array(indices), np.array(index_pointers)),
	shape=(nrows, ncols)).tocoo()
	return torch.sparse_coo_tensor([coo.row, coo.col], coo.data, coo.shape)


	def scipy_coo_matmul(mat1, mat2):
	result = mat1.dot(mat2).tocoo()
	return torch.sparse_coo_tensor([result.row, result.col], result.data,
	result.shape)


	def to_coo_scipy(x):
	indices_1 = x._indices().numpy()
	values_1 = x._values().numpy()
	return sparse.coo_matrix((values_1, (indices_1[0], indices_1[1])),
	shape=x.shape)


	def torch_backward(a_dense, b_dense):
	a_dense.requires_grad = True
	b_dense.requires_grad = True
	r1 = a_dense.matmul(b_dense)
	f1 = torch.sum(r1)
	f1.backward()


	def sparse_torch_backward(a, b):
	a.requires_grad = True
	b.requires_grad = True

	r2 = torch.sparse.mm(a, b)
	f2 = torch.sparse.sum(r2)
	f2.backward()


	def load_dataset(dataset_path, hidden_size, sparsity, n_limit=20):
	current_folder_path = f"{dataset_path}/{sparsity}"
	path = Path(current_folder_path)
	files = path.glob('*/.smtx')
	xs = []
	ys = []
	print(dataset_path, hidden_size, sparsity)
	index = 0
	for elem in files:
	if index == n_limit:
	break
	print('.', end='')
	size, nnz = read_matrix_params(elem.as_posix())
	if size[1] == hidden_size:
	xs.append(load_matrix(elem.as_posix()))
	if size[0] == hidden_size:
	ys.append(load_matrix(elem.as_posix()))
	index += 1
	print()
	return zip(xs, ys)


	if __name__ == '__main__':

	path = Path()
	parser = argparse.ArgumentParser(description='Sparse Matmul Bench')

	parser.add_argument('--path', type=str, help='dataset path')
	parser.add_argument('--dataset',
	type=str,
	help='dataset name',
	default='random_pruning')
	parser.add_argument('--operation',
	type=str,
	help='matmul or backward',
	default='matmul')
	parser.add_argument('--output',
	type=str,
	help='dataframe output path',
	default='/tmp/matmul_bench.pkl')
	args = parser.parse_args()
	print('path =', args.path)
	print('dataset =', args.dataset)
	print('operation =', args.operation)
	print('output =', args.output)

	dataset_path = args.path
	dataset_name = args.dataset
	dataset_path = f"{dataset_path}/{dataset_name}"
	df_output_path = args.output
	tasks = []
	if args.operation == 'matmul':
	tasks = [
	("matmul", "cpu", "torch", "torch.mm(dense_x, dense_y)"),
	("matmul", "cpu", "torch.sparse", "torch.sparse.mm(tx, ty)"),
	("matmul", "cpu", "scipy",
	"scipy_coo_matmul(scipy_varx, scipy_vary)"),
	("matmul", "cuda", "torch",
	"torch.mm(dense_cuda_x, dense_cuda_y)"),
	("matmul", "cuda", "torch.sparse",
	"torch.sparse.mm(tx_cuda, ty_cuda)"),
	]
	else:
	tasks = [
	("backward", "cpu", "torch", "torch_backward(dense_x, dense_y)"),
	("backward", "cpu", "torch.sparse",
	"sparse_torch_backward(tx, ty)"),
	("backward", "cuda", "torch",
	"torch_backward(dense_cuda_x, dense_cuda_y)"),
	("backward", "cuda", "torch.sparse",
	"sparse_torch_backward(tx_cuda, ty_cuda)"),
	]
	serialized_results = []
	repeats = 2
	timers = [
	benchmark_utils.Timer(
	stmt=stmt,
	globals={
	"scipy_coo_matmul": scipy_coo_matmul,
	"torch_backward": torch_backward,
	"sparse_torch_backward": sparse_torch_backward,
	"scipy_varx": to_coo_scipy(x),
	"scipy_vary": to_coo_scipy(y),
	"tx": x,
	"ty": y,
	"tx_cuda": x.cuda(),
	"ty_cuda": y.cuda(),
	"dense_cuda_x": x.to_dense().cuda(),
	"dense_cuda_y": y.to_dense().cuda(),
	"dense_x": x.to_dense(),
	"dense_y": y.to_dense(),
	},
	label=label,
	sub_label=sub_label,
	description=f"{sparsity}",
	env=device,
	# num_threads=num_threads,
	) for hidden_size in [512]
	for sparsity in [0.5, 0.7, 0.8, 0.9, 0.95, 0.98]
	for label, device, sub_label, stmt in tasks
	for num_threads in [1, 4, 8, 16]
	for x, y in load_dataset(dataset_path, hidden_size, sparsity)
	]
	measurements = []

	for i, timer in enumerate(timers * repeats):
	m = timer.blocked_autorange(min_run_time=0.05)
	serialized_results.append(pickle.dumps(m))
	m.metadata = {
	"device": 'cuda' if m.task_spec.env.find("cuda") >= 0 else 'cpu'
	}
	measurements.append(m)
	print(f"\r{i + 1} / {len(timers) * repeats}", end="")
	sys.stdout.flush()
	print()

	comparison = benchmark_utils.Compare(
	[pickle.loads(i) for i in serialized_results])

	print("== Unformatted " + "=" * 80 + "\n" + "/" * 95 + "\n")
	comparison.print()

	print("== Formatted " + "=" * 80 + "\n" + "/" * 93 + "\n")
	comparison.trim_significant_figures()
	comparison.colorize()
	comparison.print()

	table = [(m.task_spec.sub_label, m.task_spec.description,
	m.metadata["device"], m.mean) for m in measurements]
	df = pd.DataFrame(table, columns=['method', 'sparsity', 'device', 'time'])
	df.to_pickle(df_output_path)