fix bug in pool_dnnlowp_op_avx2.cc (#18141)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/18141
VLEN should've been 32
Reviewed By: jianyuh
Differential Revision: D14510780
fbshipit-source-id: ddf12746e1c69677a268432432ddb088cc210084
diff --git a/caffe2/quantization/server/pool_dnnlowp_op_avx2.cc b/caffe2/quantization/server/pool_dnnlowp_op_avx2.cc
index 92d0816..8c35ebd 100644
--- a/caffe2/quantization/server/pool_dnnlowp_op_avx2.cc
+++ b/caffe2/quantization/server/pool_dnnlowp_op_avx2.cc
@@ -34,7 +34,7 @@
wstart = wstart > 0 ? wstart : 0;
uint8_t* Yh = Ydata_temp + (ph * pooled_width + pw) * channels;
- constexpr int VLEN = 8;
+ constexpr int VLEN = 32;
// vectorized loop
for (int c = 0; c < channels / VLEN * VLEN; c += VLEN) {
__m256i Y_v = _mm256_setzero_si256();