Replace large line extension with msan loads in superres.

The uninitialized memory error we were avoiding is in fact lack of
coverage in msan itself.

PiperOrigin-RevId: 355261573
Change-Id: I55aa8301b13bb36f8d26593d833a8c69348b750f
diff --git a/src/dsp/x86/super_res_sse4.cc b/src/dsp/x86/super_res_sse4.cc
index 5c2edf4..d46316b 100644
--- a/src/dsp/x86/super_res_sse4.cc
+++ b/src/dsp/x86/super_res_sse4.cc
@@ -102,18 +102,32 @@
     const auto* filter = static_cast<const uint8_t*>(coefficients);
     uint8_t* dst_ptr = dst;
     ExtendLine<uint8_t>(src + DivideBy2(kSuperResFilterTaps), downscaled_width,
-                        kSuperResHorizontalBorder, kSuperResHorizontalPadding);
+                        kSuperResHorizontalBorder, kSuperResHorizontalBorder);
     int subpixel_x = initial_subpixel_x;
-    // The below code calculates up to 15 extra upscaled
-    // pixels which will over-read up to 15 downscaled pixels in the end of each
-    // row. kSuperResHorizontalPadding accounts for this.
+    // The below code calculates up to 15 extra upscaled pixels which will
+    // over-read up to 15 downscaled pixels in the end of each row.
+    // kSuperResHorizontalPadding protects this behavior from segmentation
+    // faults and threading issues.
     int x = RightShiftWithCeiling(upscaled_width, 4);
     do {
       __m128i weighted_src[8];
       for (int i = 0; i < 8; ++i, filter += 16) {
-        __m128i s = LoadLo8(&src[subpixel_x >> kSuperResScaleBits]);
+        // TODO(b/178652672): Remove Msan loads when hadd bug is resolved.
+        // It's fine to write uninitialized bytes outside the frame, but the
+        // inside-frame pixels are incorrectly labeled uninitialized if
+        // uninitialized values go through the hadd intrinsics.
+        // |src| is offset 4 pixels to the left, and there are 4 extended border
+        // pixels, so a difference of 0 from |downscaled_width| indicates 8 good
+        // bytes. A difference of 1 indicates 7 good bytes.
+        const int msan_bytes_lo =
+            (subpixel_x >> kSuperResScaleBits) - downscaled_width;
+        __m128i s =
+            LoadLo8Msan(&src[subpixel_x >> kSuperResScaleBits], msan_bytes_lo);
         subpixel_x += step;
-        s = LoadHi8(s, &src[subpixel_x >> kSuperResScaleBits]);
+        const int msan_bytes_hi =
+            (subpixel_x >> kSuperResScaleBits) - downscaled_width;
+        s = LoadHi8Msan(s, &src[subpixel_x >> kSuperResScaleBits],
+                        msan_bytes_hi);
         subpixel_x += step;
         const __m128i f = LoadAligned16(filter);
         weighted_src[i] = _mm_maddubs_epi16(s, f);