| |
| |
| import collections |
| |
| import numpy as np |
| from caffe2.python import utils, workspace |
| from caffe2.quantization.server import dnnlowp_pybind11 |
| from hypothesis import assume |
| |
| |
| # This function asserts quantized results (output[1:]) are close enough to |
| # floating point results (output[0]). |
| # The error bound is derived based on assumption that there's no input |
| # quantization error. |
| def check_quantized_results_close(outputs, ref=None, symmetric=False, atol_scale=0.53): |
| if ref is None: |
| ref = outputs[0][0] |
| if ref.size == 0: |
| return |
| ref_min = min(np.min(ref), 0) |
| ref_max = max(np.max(ref), 0) |
| if symmetric: |
| ref_scale = 2 * max(abs(ref_max), abs(ref_min)) / 255 |
| else: |
| ref_scale = (ref_max - ref_min) / 255 |
| # should be divided by 2 in an exact math, but divide by 1.9 here |
| # considering finite precision in floating-point numbers |
| atol = ref_scale * atol_scale |
| for o in outputs[1:]: |
| np.testing.assert_allclose(o[0], outputs[0][0], atol=atol, rtol=0) |
| |
| |
| def pairwise(iterable): |
| "s -> (s0,s1), (s1,s2), (s2, s3), ..." |
| from itertools import tee |
| |
| a, b = tee(iterable) |
| next(b, None) |
| return zip(a, b) |
| |
| |
| # Make sure we won't have overflows from vpmaddubsw instruction used in fbgemm) |
| def avoid_vpmaddubsw_overflow_fc( |
| batch_size, input_channels, output_channels, X, X_min, X_max, W, W_min, W_max |
| ): |
| for i, j in np.ndindex((batch_size, output_channels)): |
| for k in range(0, input_channels // 2 * 2, 2): |
| x0 = X[i, k] - X_min |
| x1 = X[i, k + 1] - X_min |
| w0 = W[j, k] - 128 - W_min |
| w1 = W[j, k + 1] - 128 - W_min |
| if x0 * w0 + x1 * w1 < -(1 << 15): |
| w1_adjusted = (-(1 << 15) - float(x0) * w0) / x1 |
| W[j, k + 1] = int(w1_adjusted) + 128 + W_min |
| elif x0 * w0 + x1 * w1 > (1 << 15) - 1: |
| w1_adjusted = ((1 << 15) - 1 - float(x0) * w0) / x1 |
| W[j, k + 1] = int(w1_adjusted) + 128 + W_min |
| |
| # Go through the same loop again to double check we don't have any overflow |
| for i, j in np.ndindex((batch_size, output_channels)): |
| for k in range(0, input_channels // 2 * 2, 2): |
| x0 = X[i, k] - X_min |
| x1 = X[i, k + 1] - X_min |
| w0 = W[j, k] - 128 - W_min |
| w1 = W[j, k + 1] - 128 - W_min |
| assert -(1 << 15) <= x0 * w0 + x1 * w1 < (1 << 15) |
| |
| |
| # Make sure we won't have overflows from vpmaddubsw instruction used in |
| # fbgemm (FIXME: this assumes fbgemm is used only for NHWC and im2col |
| # is done in a way that input_channels is the fastest moving |
| # dimension). |
| # |
| # strides, pads, kernels, dilations, and sizes should be tuples with the same dimension |
| # (2 for 2D conv, 3 for 3D conv, and so on) |
| def avoid_vpmaddubsw_overflow( |
| strides, |
| pads, |
| kernels, |
| dilations, |
| sizes, |
| input_channels, |
| output_channels, |
| batch_size, |
| X, |
| X_min, |
| X_max, |
| W, |
| W_min, |
| W_max, |
| ): |
| ndim = len(sizes) |
| dkernels = tuple((dilations[i] * (kernels[i] - 1) + 1) for i in range(ndim)) |
| size_cols = tuple( |
| (sizes[i] + 2 * pads[i] - dkernels[i]) // strides[i] + 1 for i in range(ndim) |
| ) |
| for out_idx in np.ndindex((batch_size,) + size_cols + (output_channels,)): |
| b = out_idx[0] |
| oc = out_idx[-1] |
| o_spatial = out_idx[1:-1] |
| for filter_idx1, filter_idx2 in pairwise( |
| np.ndindex(kernels + (input_channels,)) |
| ): |
| f0 = filter_idx1[:-1] |
| ic0 = filter_idx1[-1] |
| |
| f1 = filter_idx2[:-1] |
| ic1 = filter_idx2[-1] |
| |
| i0s = tuple( |
| strides[i] * o_spatial[i] - pads[i] + dilations[i] * f0[i] |
| for i in range(ndim) |
| ) |
| i1s = tuple( |
| strides[i] * o_spatial[i] - pads[i] + dilations[i] * f1[i] |
| for i in range(ndim) |
| ) |
| |
| w0 = W[(oc,) + f0 + (ic0,)] - 128 - W_min |
| w1 = W[(oc,) + f1 + (ic1,)] - 128 - W_min |
| |
| if all(0 <= i0s[i] < sizes[i] for i in range(ndim)): |
| x0 = X[(b,) + i0s + (ic0,)] - X_min |
| else: |
| # padding |
| x0 = -X_min |
| |
| if all(0 <= i1s[i] < sizes[i] for i in range(ndim)): |
| x1 = X[(b,) + i1s + (ic1,)] - X_min |
| else: |
| # padding |
| x1 = -X_min |
| |
| if x0 * w0 + x1 * w1 < -(1 << 15): |
| w1_adjusted = (-(1 << 15) - float(x0) * w0) / x1 |
| W[(oc,) + f1 + (ic1,)] = int(w1_adjusted) + 128 + W_min |
| elif x0 * w0 + x1 * w1 >= (1 << 15): |
| w1_adjusted = ((1 << 15) - 1 - float(x0) * w0) / x1 |
| W[(oc,) + f1 + (ic1,)] = int(w1_adjusted) + 128 + W_min |
| |
| # Go through the same loop again to double check we don't have any overflow |
| for out_idx in np.ndindex((batch_size,) + size_cols + (output_channels,)): |
| b = out_idx[0] |
| oc = out_idx[-1] |
| o_spatial = out_idx[1:-1] |
| for filter_idx1, filter_idx2 in pairwise( |
| np.ndindex(kernels + (input_channels,)) |
| ): |
| f0 = filter_idx1[:-1] |
| ic0 = filter_idx1[-1] |
| |
| f1 = filter_idx2[:-1] |
| ic1 = filter_idx2[-1] |
| |
| i0s = tuple( |
| strides[i] * o_spatial[i] - pads[i] + dilations[i] * f0[i] |
| for i in range(ndim) |
| ) |
| i1s = tuple( |
| strides[i] * o_spatial[i] - pads[i] + dilations[i] * f1[i] |
| for i in range(ndim) |
| ) |
| |
| w0 = W[(oc,) + f0 + (ic0,)] - 128 - W_min |
| w1 = W[(oc,) + f1 + (ic1,)] - 128 - W_min |
| |
| if all(0 <= i0s[i] < sizes[i] for i in range(ndim)): |
| x0 = X[(b,) + i0s + (ic0,)] - X_min |
| else: |
| # padding |
| x0 = -X_min |
| |
| if all(0 <= i1s[i] < sizes[i] for i in range(ndim)): |
| x1 = X[(b,) + i1s + (ic1,)] - X_min |
| else: |
| # padding |
| x1 = -X_min |
| |
| assert -(1 << 15) <= x0 * w0 + x1 * w1 < (1 << 15) |
| |
| |
| # strides, pads, kernels, dilations, and sizes should be tuples with the same dimension |
| # (2 for 2D conv, 3 for 3D conv, and so on) |
| def generate_convnd_inputs( |
| strides, |
| pads, |
| kernels, |
| dilations, |
| sizes, |
| group, |
| input_channels_per_group, |
| output_channels_per_group, |
| batch_size, |
| order, |
| groupwise_quantization=False, |
| preserve_activation_sparsity=False, |
| preserve_weight_sparsity=False, |
| ): |
| dim = len(sizes) |
| assume(all(len(a) == dim for a in [strides, pads, kernels, dilations])) |
| assume(all(sizes[d] >= dilations[d] * (kernels[d] - 1) + 1 for d in range(dim))) |
| input_channels = input_channels_per_group * group |
| output_channels = output_channels_per_group * group |
| depthwise_convolution = ( |
| input_channels_per_group == 1 and output_channels_per_group == 1 |
| ) |
| |
| assert input_channels > 1 |
| assert output_channels > 1 |
| |
| # X and W have scale 1, so exactly represented after quantization |
| X_min = 0 if preserve_activation_sparsity else -77 |
| X_max = X_min + 255 |
| X_range = X_max - X_min |
| if depthwise_convolution and groupwise_quantization: |
| # For depthwise convolution, it's not enough to set input channel 0 |
| # to all X_min to avoid overflow from vpmaddubsw |
| X_range /= 2 |
| X = np.round( |
| np.random.rand(*((batch_size,) + tuple(sizes) + (input_channels,))) * X_range |
| + X_min |
| ) |
| X = X.astype(np.float32) |
| if ( |
| batch_size != 0 |
| and depthwise_convolution |
| and groupwise_quantization |
| and not preserve_activation_sparsity |
| ): |
| # Put X_max in a position not to be paired with any padded value. |
| # Put X_min to all positions that can be paired with the X_max value. |
| # |
| # This is an example of a pattern for 3x3x3 |
| # . . . . . |
| # . . . . . |
| # . . . . . |
| # . . . . . |
| # . . . . min |
| # |
| # . . . . . |
| # . . . . min |
| # . min max min . |
| # min . . . . |
| # . . . . . |
| # |
| # min . . . . |
| # . . . . . |
| # . . . . . |
| # . . . . . |
| # . . . . . |
| |
| # Make sure we have enough dimension |
| assert X.shape[1] >= 3 |
| assert all(X.shape[d + 1] >= kernels[d] + 2 for d in range(1, dim)) |
| |
| # Take subtensor we want to manipulate |
| X_sub = X[(0,) * (X.ndim - dim - 1) + (slice(None),) * dim + (0,)] |
| |
| # Put X_max in the middle of the subtensor |
| X_sub[(1,) + tuple(kernels[d] // 2 + 1 for d in range(1, dim))] = X_max |
| |
| # Put X_min to the positions that can be paired with X_max across |
| # the slowest moving dimension |
| X_sub[[[0, 2]] + [[kernels[d] + 1, 0] for d in range(1, dim)]] = X_min |
| |
| # Put X_min to other positions that can be paired with X_max |
| for d1 in range(1, dim): |
| X_sub[ |
| [[1]] |
| + [[kernels[d2] // 2 + 1] for d2 in range(1, d1)] |
| + [[kernels[d1] // 2, kernels[d1] // 2 + 2]] |
| + [[kernels[d2] + 1, 0] for d2 in range(d1 + 1, dim)] |
| ] = X_min |
| else: |
| # input channel 0 is all X_min to avoid overflow from vpmaddubsw when |
| # multiplied with W_min and W_max |
| X[..., 0] = X_min |
| if batch_size != 0: |
| X[(0,) * (X.ndim - 1) + (1,)] = X_max |
| |
| if preserve_weight_sparsity: |
| W_min = -128 |
| W_max = 100 |
| else: |
| W_min = -100 |
| W_max = W_min + 255 |
| W = np.round( |
| np.random.rand( |
| *((output_channels,) + tuple(kernels) + (input_channels_per_group,)) |
| ) |
| * (W_max - W_min) |
| + W_min |
| ) |
| W = W.astype(np.float32) |
| if groupwise_quantization: |
| for g in range(group): |
| W[(g * output_channels_per_group,) + (0,) * (W.ndim - 1)] = W_min |
| if depthwise_convolution: |
| W[(g * output_channels_per_group, 1) + (0,) * (W.ndim - 2)] = W_max |
| else: |
| assert output_channels_per_group > 1 |
| W[(g * output_channels_per_group + 1,) + (0,) * (W.ndim - 1)] = W_max |
| |
| # Make sure each group has different ranges to really see the effect |
| # of group-wise quantization. |
| if not preserve_weight_sparsity: |
| W[ |
| g * output_channels_per_group : (g + 1) * output_channels_per_group, |
| ] += g |
| else: |
| W[(0,) + (0,) * (W.ndim - 1)] = W_min |
| W[(1,) + (0,) * (W.ndim - 1)] = W_max |
| |
| different_range_per_group = groupwise_quantization and not preserve_weight_sparsity |
| for g in range(group): |
| avoid_vpmaddubsw_overflow( |
| strides, |
| pads, |
| kernels, |
| dilations, |
| sizes, |
| input_channels_per_group, |
| output_channels_per_group, |
| batch_size, |
| X[..., g * input_channels_per_group : (g + 1) * input_channels_per_group], |
| X_min, |
| X_max, |
| W[g * output_channels_per_group : (g + 1) * output_channels_per_group,], |
| W_min + (g if different_range_per_group else 0), |
| W_max + (g if different_range_per_group else 0), |
| ) |
| |
| if order == "NCHW": |
| X = utils.NHWC2NCHW(X) |
| W = utils.NHWC2NCHW(W) |
| |
| b = np.random.randn(output_channels).astype(np.float32) |
| |
| return X, W, b |
| |
| |
| def generate_conv_inputs( |
| stride, |
| pad, |
| kernel, |
| dilation, |
| size, |
| group, |
| input_channels_per_group, |
| output_channels_per_group, |
| batch_size, |
| order, |
| groupwise_quantization=False, |
| preserve_activation_sparsity=False, |
| preserve_weight_sparsity=False, |
| ): |
| return generate_convnd_inputs( |
| (stride,) * 2, |
| (pad,) * 2, |
| (kernel,) * 2, |
| (dilation,) * 2, |
| (size,) * 2, |
| group, |
| input_channels_per_group, |
| output_channels_per_group, |
| batch_size, |
| order, |
| groupwise_quantization, |
| preserve_activation_sparsity, |
| preserve_weight_sparsity, |
| ) |
| |
| |
| def run_conv_or_fc( |
| test_case, |
| init_net, |
| net, |
| X, |
| W, |
| b, |
| op_type, |
| engine, |
| order, |
| gc, |
| outputs, |
| scale=None, |
| zero_point=None, |
| ): |
| if order: |
| # Conv |
| Output = collections.namedtuple("Output", ["Y", "op_type", "engine", "order"]) |
| else: |
| # FC |
| Output = collections.namedtuple("Output", ["Y", "op_type", "engine"]) |
| |
| # We run DNNLOWP ops multiple times to test their first runs that |
| # do caching so exercises different code paths from the subsequent |
| # runs |
| |
| # self.ws.run re-creates operator every time so this test covers |
| # cases when we have multiple nets sharing the same workspace |
| test_case.ws.create_blob("X").feed(X, device_option=gc) |
| test_case.ws.create_blob("W").feed(W, device_option=gc) |
| test_case.ws.create_blob("b").feed(b, device_option=gc) |
| if scale is not None and zero_point is not None: |
| with workspace.WorkspaceGuard(test_case.ws): |
| dnnlowp_pybind11.CreateInt8QuantParamsBlob( |
| "quant_param", float(scale), int(zero_point) |
| ) |
| |
| if init_net: |
| test_case.ws.run(init_net) |
| for i in range(1 if engine == "" else 2): |
| test_case.ws.run(net) |
| Y = test_case.ws.blobs["Y"].fetch() |
| if order: |
| outputs.append(Output(Y=Y, op_type=op_type, engine=engine, order=order)) |
| else: |
| outputs.append(Output(Y=Y, op_type=op_type, engine=engine)) |
| |
| # workspace.CreateNet + workspace.RunNet reuses the same operator |
| if engine != "": |
| workspace.FeedBlob("X", X) |
| workspace.FeedBlob("W", W) |
| workspace.FeedBlob("b", b) |
| if scale is not None and zero_point is not None: |
| dnnlowp_pybind11.CreateInt8QuantParamsBlob( |
| "quant_param", float(scale), int(zero_point) |
| ) |
| |
| if init_net: |
| workspace.RunNetOnce(init_net) |
| workspace.CreateNet(net) |
| for i in range(2): |
| workspace.RunNet(net) |
| Y = workspace.FetchBlob("Y") |
| if order: |
| outputs.append(Output(Y=Y, op_type=op_type, engine=engine, order=order)) |
| else: |
| outputs.append(Output(Y=Y, op_type=op_type, engine=engine)) |