| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| |
| import numpy as np |
| import os |
| import sys |
| from libfb import pyinit |
| |
| from caffe2.python import core, cnn, workspace |
| from caffe2.python import SparseTransformer |
| import caffe2.python.models.resnet as resnet |
| |
| |
| def AddInput(model, batch_size, db, db_type): |
| """Adds the data input part.""" |
| # Load the data from a DB. |
| data_uint8, label_orig = model.TensorProtosDBInput( |
| [], ["data_uint8", "label_orig"], batch_size=batch_size, |
| db=db, db_type=db_type) |
| # Since we are going to do float computations, what we will do is to cast |
| # the data to float. |
| data = model.Cast(data_uint8, "data_nhwc", to=core.DataType.FLOAT) |
| data = model.NHWC2NCHW(data, "data") |
| data = model.Scale(data, data, scale=float(1. / 256)) |
| data = model.StopGradient(data, data) |
| |
| # Flatten the label |
| label = model.net.FlattenToVec(label_orig, "label") |
| return data, label |
| |
| |
| def AddAccuracy(model, softmax, label): |
| """Adds an accuracy op to the model""" |
| accuracy = model.Accuracy([softmax, label], "accuracy") |
| return accuracy |
| |
| |
| def AddTrainingOperators(model, softmax, label, nn_model): |
| """Adds training operators to the model.""" |
| xent = model.LabelCrossEntropy([softmax, label], 'xent') |
| loss = model.AveragedLoss(xent, "loss") |
| # For bookkeeping purposes, we will also compute the accuracy of the model. |
| AddAccuracy(model, softmax, label) |
| # Now, this is the key part of the training model: we add all the gradient |
| # operators to the model. The gradient is computed with respect to the loss |
| # that we computed above. |
| model.AddGradientOperators([loss]) |
| # Now, here what we will do is a very simple stochastic gradient descent. |
| ITER = model.Iter("iter") |
| # We do a simple learning rate schedule where lr = base_lr * (t ^ gamma) |
| # Note that we are doing minimization, so the base_lr is negative so we are |
| # going the DOWNHILL direction. |
| |
| LR = model.LearningRate( |
| ITER, "LR", base_lr=-0.01, policy="step", stepsize=15000, gamma=0.5) |
| # ONE is a constant value that is used in the gradient update. We only need |
| # to create it once, so it is explicitly placed in param_init_net. |
| ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0) |
| # Now, for each parameter, we do the gradient updates. |
| for param in model.params: |
| # Note how we get the gradient of each parameter - CNNModelHelper keeps |
| # track of that. |
| param_grad = model.param_to_grad[param] |
| # The update is a simple weighted sum: param = param + param_grad * LR |
| model.WeightedSum([param, ONE, param_grad, LR], param) |
| |
| |
| def AddBookkeepingOperators(model): |
| """This adds a few bookkeeping operators that we can inspect later. |
| |
| These operators do not affect the training procedure: they only collect |
| statistics and prints them to file or to logs. |
| """ |
| # Print basically prints out the content of the blob. to_file=1 routes the |
| # printed output to a file. The file is going to be stored under |
| # root_folder/[blob name] |
| model.Print('accuracy', [], to_file=1) |
| model.Print('loss', [], to_file=1) |
| # Summarizes the parameters. Different from Print, Summarize gives some |
| # statistics of the parameter, such as mean, std, min and max. |
| for param in model.params: |
| model.Summarize(param, [], to_file=1) |
| model.Summarize(model.param_to_grad[param], [], to_file=1) |
| # Now, if we really want to be very verbose, we can summarize EVERY blob |
| # that the model produces; it is probably not a good idea, because that |
| # is going to take time - summarization do not come for free. For this |
| # demo, we will only show how to summarize the parameters and their |
| # gradients. |
| |
| |
| def AlexNet(model, data, args): |
| conv1 = model.Conv( |
| data, |
| "conv1", |
| 3, |
| 64, |
| 5, |
| ('XavierFill', {}), |
| ('ConstantFill', {}), |
| pad=2 |
| ) |
| relu1 = model.Relu(conv1, "conv1") |
| pool1 = model.MaxPool(relu1, "pool1", kernel=3, stride=2) |
| conv2 = model.Conv( |
| pool1, |
| "conv2", |
| 64, |
| 192, |
| 3, |
| ('XavierFill', {}), |
| ('ConstantFill', {}), |
| pad=1 |
| ) |
| relu2 = model.Relu(conv2, "conv2") |
| pool2 = model.MaxPool(relu2, "pool2", kernel=3, stride=2) |
| conv3 = model.Conv( |
| pool2, |
| "conv3", |
| 192, |
| 384, |
| 3, |
| ('XavierFill', {}), |
| ('ConstantFill', {}), |
| pad=1 |
| ) |
| relu3 = model.Relu(conv3, "conv3") |
| conv4 = model.Conv( |
| relu3, |
| "conv4", |
| 384, |
| 256, |
| 3, |
| ('XavierFill', {}), |
| ('ConstantFill', {}), |
| pad=1 |
| ) |
| relu4 = model.Relu(conv4, "conv4") |
| conv5 = model.Conv( |
| relu4, |
| "conv5", |
| 256, |
| 256, |
| 3, |
| ('XavierFill', {}), |
| ('ConstantFill', {}), |
| pad=1 |
| ) |
| relu5 = model.Relu(conv5, "conv5") |
| pool5 = model.MaxPool(relu5, "pool5", kernel=3, stride=2) |
| fc6 = model.FC( |
| pool5, "fc6", 256 * 3 * 3, 4096, ('XavierFill', {}), |
| ('ConstantFill', {}) |
| ) |
| relu6 = model.Relu(fc6, "fc6") |
| fc7 = model.FC( |
| relu6, "fc7", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {}) |
| ) |
| relu7 = model.Relu(fc7, "fc7") |
| fc8 = model.FC( |
| relu7, "fc8", 4096, 10, ('XavierFill', {}), ('ConstantFill', {}) |
| ) |
| softmax = model.Softmax(fc8, "pred") |
| return softmax |
| |
| |
| def AlexNet_Prune(model, data, args): |
| conv1 = model.Conv( |
| data, |
| "conv1", |
| 3, |
| 64, |
| 5, |
| ('XavierFill', {}), |
| ('ConstantFill', {}), |
| pad=2 |
| ) |
| relu1 = model.Relu(conv1, "conv1") |
| pool1 = model.MaxPool(relu1, "pool1", kernel=3, stride=2) |
| conv2 = model.Conv( |
| pool1, |
| "conv2", |
| 64, |
| 192, |
| 3, |
| ('XavierFill', {}), |
| ('ConstantFill', {}), |
| pad=1 |
| ) |
| relu2 = model.Relu(conv2, "conv2") |
| pool2 = model.MaxPool(relu2, "pool2", kernel=3, stride=2) |
| conv3 = model.Conv( |
| pool2, |
| "conv3", |
| 192, |
| 384, |
| 3, |
| ('XavierFill', {}), |
| ('ConstantFill', {}), |
| pad=1 |
| ) |
| relu3 = model.Relu(conv3, "conv3") |
| conv4 = model.Conv( |
| relu3, |
| "conv4", |
| 384, |
| 256, |
| 3, |
| ('XavierFill', {}), |
| ('ConstantFill', {}), |
| pad=1 |
| ) |
| relu4 = model.Relu(conv4, "conv4") |
| conv5 = model.Conv( |
| relu4, |
| "conv5", |
| 256, |
| 256, |
| 3, |
| ('XavierFill', {}), |
| ('ConstantFill', {}), |
| pad=1 |
| ) |
| relu5 = model.Relu(conv5, "conv5") |
| pool5 = model.MaxPool(relu5, "pool5", kernel=3, stride=2) |
| fc6 = model.FC_Prune( |
| pool5, "fc6", 256 * 3 * 3, 4096, ('XavierFill', {}), |
| ('ConstantFill', {}), |
| mask_init=None, |
| threshold=args.prune_thres * 2, |
| need_compress_rate=True, |
| comp_lb=args.comp_lb |
| ) |
| compress_fc6 = fc6[1] |
| model.Print(compress_fc6, [], to_file=0) |
| fc6 = fc6[0] |
| relu6 = model.Relu(fc6, "fc6") |
| fc7 = model.FC_Prune( |
| relu6, "fc7", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {}), |
| mask_init=None, |
| threshold=args.prune_thres, |
| need_compress_rate=True, |
| comp_lb=args.comp_lb |
| ) |
| compress_fc7 = fc7[1] |
| model.Print(compress_fc7, [], to_file=0) |
| fc7 = fc7[0] |
| relu7 = model.Relu(fc7, "fc7") |
| fc8 = model.FC( |
| relu7, "fc8", 4096, 10, ('XavierFill', {}), ('ConstantFill', {}) |
| ) |
| softmax = model.Softmax(fc8, "pred") |
| return softmax |
| |
| |
| def ConvBNReLUDrop(model, currentblob, outputblob, |
| input_dim, output_dim, drop_ratio=None): |
| currentblob = model.Conv( |
| currentblob, |
| outputblob, |
| input_dim, |
| output_dim, |
| 3, |
| ('XavierFill', {}), |
| ('ConstantFill', {}), |
| stride=1, |
| pad=1 |
| ) |
| currentblob = model.SpatialBN(currentblob, |
| str(currentblob) + '_bn', |
| output_dim, epsilon=1e-3) |
| currentblob = model.Relu(currentblob, currentblob) |
| if drop_ratio: |
| currentblob = model.Dropout(currentblob, |
| str(currentblob) + '_dropout', |
| ratio=drop_ratio) |
| return currentblob |
| |
| |
| def VGG(model, data, args): |
| """Adds the VGG-Like kaggle winner Model on Cifar-10 |
| The original blog about the model can be found on: |
| http://torch.ch/blog/2015/07/30/cifar.html |
| """ |
| conv1 = ConvBNReLUDrop(model, data, 'conv1', 3, 64, drop_ratio=0.3) |
| conv2 = ConvBNReLUDrop(model, conv1, 'conv2', 64, 64) |
| pool2 = model.MaxPool(conv2, 'pool2', kernel=2, stride=1) |
| conv3 = ConvBNReLUDrop(model, pool2, 'conv3', 64, 128, drop_ratio=0.4) |
| conv4 = ConvBNReLUDrop(model, conv3, 'conv4', 128, 128) |
| pool4 = model.MaxPool(conv4, 'pool4', kernel=2, stride=2) |
| |
| conv5 = ConvBNReLUDrop(model, pool4, 'conv5', 128, 256, drop_ratio=0.4) |
| conv6 = ConvBNReLUDrop(model, conv5, 'conv6', 256, 256, drop_ratio=0.4) |
| conv7 = ConvBNReLUDrop(model, conv6, 'conv7', 256, 256) |
| pool7 = model.MaxPool(conv7, 'pool7', kernel=2, stride=2) |
| |
| conv8 = ConvBNReLUDrop(model, pool7, 'conv8', 256, 512, drop_ratio=0.4) |
| conv9 = ConvBNReLUDrop(model, conv8, 'conv9', 512, 512, drop_ratio=0.4) |
| conv10 = ConvBNReLUDrop(model, conv9, 'conv10', 512, 512) |
| pool10 = model.MaxPool(conv10, 'pool10', kernel=2, stride=2) |
| |
| conv11 = ConvBNReLUDrop(model, pool10, 'conv11', |
| 512, 512, drop_ratio=0.4) |
| conv12 = ConvBNReLUDrop(model, conv11, 'conv12', |
| 512, 512, drop_ratio=0.4) |
| conv13 = ConvBNReLUDrop(model, conv12, 'conv13', 512, 512) |
| pool13 = model.MaxPool(conv13, 'pool13', kernel=2, stride=2) |
| |
| fc14 = model.FC( |
| pool13, "fc14", 512, 512, ('XavierFill', {}), |
| ('ConstantFill', {}) |
| ) |
| relu14 = model.Relu(fc14, "fc14") |
| pred = model.FC( |
| relu14, "pred", 512, 10, ('XavierFill', {}), |
| ('ConstantFill', {}) |
| ) |
| softmax = model.Softmax(pred, 'softmax') |
| return softmax |
| |
| |
| def ResNet110(model, data, args): |
| """ |
| Residual net as described in section 4.2 of He at. al. (2015) |
| """ |
| return resnet.create_resnet_32x32( |
| model, |
| data, |
| num_input_channels=3, |
| num_groups=18, |
| num_labels=10, |
| ) |
| |
| |
| def ResNet20(model, data, args): |
| """ |
| Residual net as described in section 4.2 of He at. al. (2015) |
| """ |
| return resnet.create_resnet_32x32( |
| model, |
| data, |
| num_input_channels=3, |
| num_groups=3, |
| num_labels=10, |
| ) |
| |
| |
| def sparse_transform(model): |
| print("====================================================") |
| print(" Sparse Transformer ") |
| print("====================================================") |
| net_root, net_name2id, net_id2node = SparseTransformer.netbuilder(model) |
| SparseTransformer.Prune2Sparse( |
| net_root, |
| net_id2node, |
| net_name2id, |
| model.net.Proto().op, |
| model) |
| op_list = SparseTransformer.net2list(net_root) |
| del model.net.Proto().op[:] |
| model.net.Proto().op.extend(op_list) |
| |
| |
| def test_sparse(test_model): |
| # Sparse Implementation |
| sparse_transform(test_model) |
| sparse_test_accuracy = np.zeros(100) |
| for i in range(100): |
| workspace.RunNet(test_model.net.Proto().name) |
| sparse_test_accuracy[i] = workspace.FetchBlob('accuracy') |
| # After the execution is done, let's plot the values. |
| print('Sparse Test Accuracy:') |
| print(sparse_test_accuracy) |
| print('sparse_test_accuracy: %f' % sparse_test_accuracy.mean()) |
| |
| |
| def trainNtest(model_gen, args): |
| print("Print running on GPU: %s" % args.gpu) |
| train_model = cnn.CNNModelHelper( |
| "NCHW", |
| name="Cifar_%s" % (args.model), |
| use_cudnn=True, |
| cudnn_exhaustive_search=True) |
| data, label = AddInput( |
| train_model, batch_size=64, |
| db=args.train_input_path, |
| db_type=args.db_type) |
| softmax = model_gen(train_model, data, args) |
| AddTrainingOperators(train_model, softmax, label, args.model) |
| AddBookkeepingOperators(train_model) |
| |
| if args.gpu: |
| train_model.param_init_net.RunAllOnGPU() |
| train_model.net.RunAllOnGPU() |
| |
| # The parameter initialization network only needs to be run once. |
| workspace.RunNetOnce(train_model.param_init_net) |
| |
| # Now, since we are going to run the main network multiple times, |
| # we first create the network - which puts the actual network generated |
| # from the protobuf into the workspace - and then call RunNet by |
| # its name. |
| workspace.CreateNet(train_model.net) |
| |
| # On the Python side, we will create two numpy arrays to record the accuracy |
| # and loss for each iteration. |
| epoch_num = 200 |
| epoch_iters = 1000 |
| record = 1000 |
| |
| accuracy = np.zeros(int(epoch_num * epoch_iters / record)) |
| loss = np.zeros(int(epoch_num * epoch_iters / record)) |
| # Now, we will manually run the network for 200 iterations. |
| for e in range(epoch_num): |
| for i in range(epoch_iters): |
| workspace.RunNet(train_model.net.Proto().name) |
| if i % record is 0: |
| count = int(i / record) |
| accuracy[count] = workspace.FetchBlob('accuracy') |
| loss[count] = workspace.FetchBlob('loss') |
| print('Train Loss: {}'.format(loss[count])) |
| print('Train Accuracy: {}'.format(accuracy[count])) |
| |
| # Testing model. We will set the batch size to 100, so that the testing |
| # pass is 100 iterations (10,000 images in total). |
| # For the testing model, we need the data input part, the main LeNetModel |
| # part, and an accuracy part. Note that init_params is set False because |
| # we will be using the parameters obtained from the test model. |
| test_model = cnn.CNNModelHelper( |
| order="NCHW", name="cifar10_test", init_params=False) |
| data, label = AddInput( |
| test_model, batch_size=100, |
| db=args.test_input_path, |
| db_type=args.db_type) |
| softmax = model_gen(test_model, data, args) |
| AddAccuracy(test_model, softmax, label) |
| |
| # In[11]: |
| if args.gpu: |
| test_model.param_init_net.RunAllOnGPU() |
| test_model.net.RunAllOnGPU() |
| # Now, remember that we created the test net? We will run the test |
| # pass and report the test accuracy here. |
| workspace.RunNetOnce(test_model.param_init_net) |
| workspace.CreateNet(test_model.net) |
| # On the Python side, we will create two numpy arrays to record the accuracy |
| # and loss for each iteration. |
| test_accuracy = np.zeros(100) |
| for i in range(100): |
| workspace.RunNet(test_model.net.Proto().name) |
| test_accuracy[i] = workspace.FetchBlob('accuracy') |
| |
| print('Train Loss:') |
| print(loss) |
| print('Train Accuracy:') |
| print(accuracy) |
| print('Test Accuracy:') |
| print(test_accuracy) |
| print('test_accuracy: %f' % test_accuracy.mean()) |
| |
| if args.model == 'AlexNet_Prune': |
| test_sparse(test_model) |
| |
| |
| MODEL_TYPE_FUNCTIONS = { |
| 'AlexNet': AlexNet, |
| 'AlexNet_Prune': AlexNet_Prune, |
| 'VGG': VGG, |
| 'ResNet-110': ResNet110, |
| 'ResNet-20': ResNet20 |
| } |
| |
| if __name__ == '__main__': |
| # it's hard to init flags correctly... so here it is |
| sys.argv.append('--caffe2_keep_on_shrink') |
| |
| # FbcodeArgumentParser calls initFacebook which is necessary for NNLoader |
| # initialization |
| parser = pyinit.FbcodeArgumentParser(description='cifar-10 Tutorial') |
| |
| # arguments starting with single '-' are compatible with Lua |
| parser.add_argument("--model", type=str, default='AlexNet', |
| choices=MODEL_TYPE_FUNCTIONS.keys(), |
| help="The batch size of benchmark data.") |
| parser.add_argument("--prune_thres", type=float, default=0.0001, |
| help="Pruning threshold for FC layers.") |
| parser.add_argument("--comp_lb", type=float, default=0.02, |
| help="Compression Lower Bound for FC layers.") |
| parser.add_argument("--gpu", default=False, |
| help="Whether to run on gpu", type=bool) |
| parser.add_argument("--train_input_path", type=str, |
| default=None, |
| required=True, |
| help="Path to the database for training data") |
| parser.add_argument("--test_input_path", type=str, |
| default=None, |
| required=True, |
| help="Path to the database for test data") |
| parser.add_argument("--db_type", type=str, |
| default="lmbd", help="Database type") |
| args = parser.parse_args() |
| |
| # If you would like to see some really detailed initializations, |
| # you can change --caffe2_log_level=0 to --caffe2_log_level=-1 |
| core.GlobalInit(['caffe2', '--caffe2_log_level=0']) |
| |
| trainNtest(MODEL_TYPE_FUNCTIONS[args.model], args) |