| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| from __future__ import unicode_literals |
| |
| from caffe2.proto import caffe2_pb2 |
| from caffe2.python import cnn, workspace, core, utils, recurrent |
| |
| import argparse |
| import numpy as np |
| import time |
| |
| import logging |
| |
| logging.basicConfig() |
| log = logging.getLogger("lstm_bench") |
| log.setLevel(logging.DEBUG) |
| |
| |
| def generate_data(T, shape): |
| ''' |
| Fill a queue with input data |
| ''' |
| log.info("Generating T={} sequence batches".format(T)) |
| |
| generate_input_init_net = core.Net('generate_input_init') |
| queue = generate_input_init_net.CreateBlobsQueue( |
| [], "inputqueue", num_blobs=1, capacity=T, |
| ) |
| |
| workspace.RunNetOnce(generate_input_init_net) |
| generate_input_net = core.Net('generate_input') |
| scratch = generate_input_net.UniformFill([], ["input_scratch"], shape=shape) |
| generate_input_net.EnqueueBlobs([queue, scratch], [scratch]) |
| workspace.CreateNet(generate_input_net) |
| workspace.RunNet(generate_input_net.Proto().name, T) |
| log.info("Finished data generation") |
| return queue |
| |
| |
| def create_model(args, queue): |
| model = cnn.CNNModelHelper(name="LSTM_bench") |
| seq_lengths, hidden_init, cell_init, target = \ |
| model.net.AddExternalInputs( |
| 'seq_lengths', |
| 'hidden_init', |
| 'cell_init', |
| 'target', |
| ) |
| input_blob = model.DequeueBlobs(queue, "input_data") |
| all_hidden, last_hidden, _, last_state = recurrent.LSTM( |
| model=model, |
| input_blob=input_blob, |
| seq_lengths=seq_lengths, |
| initial_states=(hidden_init, cell_init), |
| dim_in=args.input_dim, |
| dim_out=args.hidden_dim, |
| scope="lstm1", |
| ) |
| |
| model.AddGradientOperators([all_hidden]) |
| |
| # carry states over |
| model.net.Copy(last_hidden, hidden_init) |
| model.net.Copy(last_hidden, cell_init) |
| |
| workspace.FeedBlob(hidden_init, np.zeros( |
| [1, args.batch_size, args.hidden_dim], dtype=np.float32 |
| )) |
| workspace.FeedBlob(cell_init, np.zeros( |
| [1, args.batch_size, args.hidden_dim], dtype=np.float32 |
| )) |
| return model |
| |
| |
| def Caffe2LSTM(args): |
| T = args.data_size // args.batch_size |
| input_blob_shape = [args.seq_length, args.batch_size, args.input_dim] |
| queue = generate_data(T // args.seq_length, input_blob_shape) |
| |
| workspace.FeedBlob( |
| "seq_lengths", |
| np.array([args.seq_length] * args.batch_size, dtype=np.int32) |
| ) |
| |
| model = create_model(args, queue) |
| |
| workspace.RunNetOnce(model.param_init_net) |
| workspace.CreateNet(model.net) |
| |
| last_time = time.time() |
| start_time = last_time |
| num_iters = T // args.seq_length |
| entries_per_iter = args.seq_length * args.batch_size |
| |
| # Run the Benchmark |
| log.info("------ Starting benchmark ------") |
| for iteration in range(0, num_iters, args.iters_to_report): |
| iters_once = min(args.iters_to_report, num_iters - iteration) |
| workspace.RunNet(model.net.Proto().name, iters_once) |
| new_time = time.time() |
| log.info("Iter: {} / {}. Entries Per Second: {}k". format( |
| iteration, |
| num_iters, |
| entries_per_iter * iters_once / (new_time - last_time) // 1000 |
| )) |
| last_time = new_time |
| |
| log.info("Done. Total EPS: {}k".format( |
| entries_per_iter * num_iters / (time.time() - start_time) // 1000 |
| )) |
| |
| |
| @utils.debug |
| def Benchmark(args): |
| Caffe2LSTM(args) |
| |
| |
| def GetArgumentParser(): |
| parser = argparse.ArgumentParser(description="LSTM benchmark.") |
| parser.add_argument( |
| "--hidden_dim", |
| type=int, |
| default=40, |
| help="Hidden dimension", |
| ) |
| parser.add_argument( |
| "--input_dim", |
| type=int, |
| default=40, |
| help="Input dimension", |
| ) |
| parser.add_argument( |
| "--batch_size", |
| type=int, |
| default=256, |
| help="The batch size." |
| ) |
| parser.add_argument( |
| "--seq_length", |
| type=int, |
| default=20, |
| help="Sequence length" |
| ) |
| parser.add_argument( |
| "--data_size", |
| type=int, |
| default=10000000, |
| help="Number of data points to generate" |
| ) |
| parser.add_argument( |
| "--iters_to_report", |
| type=int, |
| default=100, |
| help="Number of iteration to report progress" |
| ) |
| parser.add_argument( |
| "--gpu", |
| action="store_true", |
| help="Run all on GPU", |
| ) |
| |
| return parser |
| |
| |
| if __name__ == '__main__': |
| args = GetArgumentParser().parse_args() |
| |
| workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) |
| |
| device = core.DeviceOption( |
| caffe2_pb2.CUDA if args.gpu else caffe2_pb2.CPU, 0) |
| |
| with core.DeviceScope(device): |
| Benchmark(args) |