add const to msa loads
Avoid warnings regarding loss of qualifiers:
warning: cast from type ‘const uint8_t* {aka const unsigned char*}’
to type ‘v16i8* {aka __vector(16) signed char*}’ casts away
qualifiers
BUG=libyuv:793
Change-Id: Ie0d215bc07b49285b5d06ee91ccc2c9a7979799e
Reviewed-on: https://chromium-review.googlesource.com/1107879
Reviewed-by: Frank Barchard <fbarchard@chromium.org>
Commit-Queue: Frank Barchard <fbarchard@chromium.org>
diff --git a/include/libyuv/macros_msa.h b/include/libyuv/macros_msa.h
index 921eb07..bba0e8a 100644
--- a/include/libyuv/macros_msa.h
+++ b/include/libyuv/macros_msa.h
@@ -18,7 +18,7 @@
#if (__mips_isa_rev >= 6)
#define LW(psrc) \
({ \
- uint8_t* psrc_lw_m = (uint8_t*)(psrc); /* NOLINT */ \
+ const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \
uint32_t val_m; \
asm volatile("lw %[val_m], %[psrc_lw_m] \n" \
: [val_m] "=r"(val_m) \
@@ -29,7 +29,7 @@
#if (__mips == 64)
#define LD(psrc) \
({ \
- uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \
+ const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
uint64_t val_m = 0; \
asm volatile("ld %[val_m], %[psrc_ld_m] \n" \
: [val_m] "=r"(val_m) \
@@ -39,7 +39,7 @@
#else // !(__mips == 64)
#define LD(psrc) \
({ \
- uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \
+ const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
uint32_t val0_m, val1_m; \
uint64_t val_m = 0; \
val0_m = LW(psrc_ld_m); \
@@ -83,7 +83,7 @@
#else // !(__mips_isa_rev >= 6)
#define LW(psrc) \
({ \
- uint8_t* psrc_lw_m = (uint8_t*)(psrc); /* NOLINT */ \
+ const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \
uint32_t val_m; \
asm volatile("ulw %[val_m], %[psrc_lw_m] \n" \
: [val_m] "=r"(val_m) \
@@ -94,7 +94,7 @@
#if (__mips == 64)
#define LD(psrc) \
({ \
- uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \
+ const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
uint64_t val_m = 0; \
asm volatile("uld %[val_m], %[psrc_ld_m] \n" \
: [val_m] "=r"(val_m) \
@@ -104,7 +104,7 @@
#else // !(__mips == 64)
#define LD(psrc) \
({ \
- uint8_t* psrc_ld_m = (uint8_t*)(psrc); /* NOLINT */ \
+ const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
uint32_t val0_m, val1_m; \
uint64_t val_m = 0; \
val0_m = LW(psrc_ld_m); \
@@ -138,7 +138,7 @@
// TODO(fbarchard): Consider removing __VAR_ARGS versions.
#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */
-#define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
+#define LD_UB(...) LD_B(const v16u8, __VA_ARGS__)
#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
@@ -158,14 +158,14 @@
out0 = LD_B(RTYPE, (psrc)); \
out1 = LD_B(RTYPE, (psrc) + stride); \
}
-#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
+#define LD_UB2(...) LD_B2(const v16u8, __VA_ARGS__)
#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \
{ \
LD_B2(RTYPE, (psrc), stride, out0, out1); \
LD_B2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \
}
-#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
+#define LD_UB4(...) LD_B4(const v16u8, __VA_ARGS__)
/* Description : Store two vectors with stride each having 16 'byte' sized
elements
diff --git a/source/row_msa.cc b/source/row_msa.cc
index 66666ce..4fb2631 100644
--- a/source/row_msa.cc
+++ b/source/row_msa.cc
@@ -942,10 +942,10 @@
21, 22, 24, 25, 26, 28, 29, 30};
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 32);
- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb, 48);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 48);
dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0);
dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1);
dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2);
@@ -966,10 +966,10 @@
21, 20, 26, 25, 24, 30, 29, 28};
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 32);
- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb, 48);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 48);
dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0);
dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1);
dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2);
@@ -987,8 +987,8 @@
v16i8 zero = {0};
for (x = 0; x < width; x += 8) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3);
vec1 = (v16u8)__msa_slli_b((v16i8)src0, 3);
vec2 = (v16u8)__msa_srai_b((v16i8)src0, 5);
@@ -1023,8 +1023,8 @@
v16i8 zero = {0};
for (x = 0; x < width; x += 8) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3);
vec1 = (v16u8)__msa_slli_b((v16i8)src0, 2);
vec2 = (v16u8)__msa_srai_b((v16i8)vec0, 3);
@@ -1066,8 +1066,8 @@
v16i8 zero = {0};
for (x = 0; x < width; x += 8) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
vec0 = (v16u8)__msa_srai_b((v16i8)src0, 4);
vec1 = (v16u8)__msa_srai_b((v16i8)src1, 4);
src0 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 1);
@@ -1098,10 +1098,10 @@
v16i8 zero = {0};
for (x = width; x > 0; x -= 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 32);
- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb, 48);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 48);
reg0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
reg1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
reg2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
@@ -1164,8 +1164,8 @@
v8i16 zero = {0};
for (x = 0; x < width; x += 4) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 0);
vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0);
vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0);
vec2 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src1);
@@ -1200,10 +1200,10 @@
v16u8 src0, src1, src2, src3, dst0, dst1;
for (x = 0; x < width; x += 8) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0);
- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 16);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 0);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 16);
dst0 = __msa_adds_u_b(src0, src2);
dst1 = __msa_adds_u_b(src1, src3);
ST_UB2(dst0, dst1, dst_argb, 16);
@@ -1221,10 +1221,10 @@
v16u8 src0, src1, src2, src3, dst0, dst1;
for (x = 0; x < width; x += 8) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0);
- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 16);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 0);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 16);
dst0 = __msa_subs_u_b(src0, src2);
dst1 = __msa_subs_u_b(src1, src3);
ST_UB2(dst0, dst1, dst_argb, 16);
@@ -1245,8 +1245,8 @@
v16u8 mask = {0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255};
for (x = 0; x < width; x += 8) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0);
vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0);
vec2 = (v8u16)__msa_ilvr_b((v16i8)src1, (v16i8)src1);
@@ -1316,8 +1316,8 @@
vec_d0 = (v8i16)__msa_ilvr_b(zero, (v16i8)vec_d0);
for (x = 0; x < width; x += 8) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
vec1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
reg0 = (v8i16)__msa_ilvev_b(zero, (v16i8)vec0);
@@ -1359,8 +1359,8 @@
shuffler_vec += vec0;
for (x = 0; x < width; x += 8) {
- src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0);
- src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16);
+ src0 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 16);
dst0 = (v16u8)__msa_vshf_b(shuffler_vec, (v16i8)src0, (v16i8)src0);
dst1 = (v16u8)__msa_vshf_b(shuffler_vec, (v16i8)src1, (v16i8)src1);
ST_UB2(dst0, dst1, dst_argb, 16);
@@ -1384,7 +1384,7 @@
rgba_scale = (v4u32)__msa_ilvr_h(zero, (v8i16)rgba_scale);
for (x = 0; x < width; x += 4) {
- src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0);
+ src0 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 0);
vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0);
vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0);
reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec0);
@@ -1416,8 +1416,8 @@
v16u8 const_0x4B0F = (v16u8)__msa_fill_h(0x4B0F);
for (x = 0; x < width; x += 8) {
- src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0);
- src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16);
+ src0 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 16);
vec0 = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0);
vec1 = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0);
reg0 = __msa_dotp_u_h(vec0, const_0x4B0F);
@@ -1483,8 +1483,8 @@
v16u8 dst0, dst1, dst2, dst3;
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0);
- src1 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 16);
+ src0 = (v16u8)__msa_ld_b((const v16u8*)src_argb4444, 0);
+ src1 = (v16u8)__msa_ld_b((const v16u8*)src_argb4444, 16);
vec0 = (v8u16)__msa_andi_b(src0, 0x0F);
vec1 = (v8u16)__msa_andi_b(src1, 0x0F);
vec2 = (v8u16)__msa_andi_b(src0, 0xF0);
@@ -1514,8 +1514,8 @@
v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F);
for (x = 0; x < width; x += 16) {
- src0 = (v8u16)__msa_ld_h((v8u16*)src_argb1555, 0);
- src1 = (v8u16)__msa_ld_h((v8u16*)src_argb1555, 16);
+ src0 = (v8u16)__msa_ld_h((const v8u16*)src_argb1555, 0);
+ src1 = (v8u16)__msa_ld_h((const v8u16*)src_argb1555, 16);
vec0 = src0 & const_0x1F;
vec1 = src1 & const_0x1F;
src0 = (v8u16)__msa_srli_h((v8i16)src0, 5);
@@ -1566,8 +1566,8 @@
v8u16 const_0xF800 = (v8u16)__msa_fill_h(0xF800);
for (x = 0; x < width; x += 16) {
- src0 = (v8u16)__msa_ld_h((v8u16*)src_rgb565, 0);
- src1 = (v8u16)__msa_ld_h((v8u16*)src_rgb565, 16);
+ src0 = (v8u16)__msa_ld_h((const v8u16*)src_rgb565, 0);
+ src1 = (v8u16)__msa_ld_h((const v8u16*)src_rgb565, 16);
vec0 = src0 & const_0x1F;
vec1 = src0 & const_0x7E0;
vec2 = src0 & const_0xF800;
@@ -1611,9 +1611,9 @@
v16i8 shuffler = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19};
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_rgb24, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_rgb24, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_rgb24, 32);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_rgb24, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_rgb24, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_rgb24, 32);
vec0 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 12);
vec1 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8);
vec2 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src2, 4);
@@ -1636,9 +1636,9 @@
v16i8 mask = {2, 1, 0, 16, 5, 4, 3, 17, 8, 7, 6, 18, 11, 10, 9, 19};
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_raw, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_raw, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_raw, 32);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 32);
vec0 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 12);
vec1 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8);
vec2 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src2, 4);
@@ -1666,8 +1666,8 @@
v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
for (x = 0; x < width; x += 16) {
- src0 = (v8u16)__msa_ld_b((v8i16*)src_argb1555, 0);
- src1 = (v8u16)__msa_ld_b((v8i16*)src_argb1555, 16);
+ src0 = (v8u16)__msa_ld_b((const v8i16*)src_argb1555, 0);
+ src1 = (v8u16)__msa_ld_b((const v8i16*)src_argb1555, 16);
vec0 = src0 & const_0x1F;
vec1 = src1 & const_0x1F;
src0 = (v8u16)__msa_srai_h((v8i16)src0, 5);
@@ -1725,8 +1725,8 @@
v8u16 const_0xF800 = (v8u16)__msa_fill_h(0xF800);
for (x = 0; x < width; x += 16) {
- src0 = (v8u16)__msa_ld_b((v8i16*)src_rgb565, 0);
- src1 = (v8u16)__msa_ld_b((v8i16*)src_rgb565, 16);
+ src0 = (v8u16)__msa_ld_b((const v8i16*)src_rgb565, 0);
+ src1 = (v8u16)__msa_ld_b((const v8i16*)src_rgb565, 16);
vec0 = src0 & const_0x1F;
vec1 = src0 & const_0x7E0;
vec2 = src0 & const_0xF800;
@@ -1789,9 +1789,9 @@
v16i8 zero = {0};
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 32);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32);
reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0);
reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1);
@@ -1830,9 +1830,9 @@
v16i8 zero = {0};
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 32);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32);
reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0);
reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1);
@@ -2041,12 +2041,12 @@
v16i8 zero = {0};
for (x = 0; x < width; x += 16) {
- inp0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
- inp1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
- inp2 = (v16u8)__msa_ld_b((v16i8*)s, 32);
- inp3 = (v16u8)__msa_ld_b((v16i8*)t, 0);
- inp4 = (v16u8)__msa_ld_b((v16i8*)t, 16);
- inp5 = (v16u8)__msa_ld_b((v16i8*)t, 32);
+ inp0 = (v16u8)__msa_ld_b((const v16i8*)s, 0);
+ inp1 = (v16u8)__msa_ld_b((const v16i8*)s, 16);
+ inp2 = (v16u8)__msa_ld_b((const v16i8*)s, 32);
+ inp3 = (v16u8)__msa_ld_b((const v16i8*)t, 0);
+ inp4 = (v16u8)__msa_ld_b((const v16i8*)t, 16);
+ inp5 = (v16u8)__msa_ld_b((const v16i8*)t, 32);
src1 = (v16u8)__msa_sldi_b((v16i8)inp1, (v16i8)inp0, 12);
src5 = (v16u8)__msa_sldi_b((v16i8)inp4, (v16i8)inp3, 12);
src2 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp1, 8);
@@ -2146,12 +2146,12 @@
v16i8 zero = {0};
for (x = 0; x < width; x += 16) {
- inp0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
- inp1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
- inp2 = (v16u8)__msa_ld_b((v16i8*)s, 32);
- inp3 = (v16u8)__msa_ld_b((v16i8*)t, 0);
- inp4 = (v16u8)__msa_ld_b((v16i8*)t, 16);
- inp5 = (v16u8)__msa_ld_b((v16i8*)t, 32);
+ inp0 = (v16u8)__msa_ld_b((const v16i8*)s, 0);
+ inp1 = (v16u8)__msa_ld_b((const v16i8*)s, 16);
+ inp2 = (v16u8)__msa_ld_b((const v16i8*)s, 32);
+ inp3 = (v16u8)__msa_ld_b((const v16i8*)t, 0);
+ inp4 = (v16u8)__msa_ld_b((const v16i8*)t, 16);
+ inp5 = (v16u8)__msa_ld_b((const v16i8*)t, 32);
src1 = (v16u8)__msa_sldi_b((v16i8)inp1, (v16i8)inp0, 12);
src5 = (v16u8)__msa_sldi_b((v16i8)inp4, (v16i8)inp3, 12);
src2 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp1, 8);
@@ -2353,8 +2353,8 @@
v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL);
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_sobelx, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_sobely, 0);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_sobelx, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_sobely, 0);
vec0 = __msa_adds_u_b(src0, src1);
dst0 = (v16u8)__msa_vshf_b(mask0, (v16i8)alpha, (v16i8)vec0);
dst1 = (v16u8)__msa_vshf_b(mask1, (v16i8)alpha, (v16i8)vec0);
@@ -2375,10 +2375,10 @@
v16u8 src0, src1, src2, src3, dst0, dst1;
for (x = 0; x < width; x += 32) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_sobelx, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_sobelx, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_sobely, 0);
- src3 = (v16u8)__msa_ld_b((v16i8*)src_sobely, 16);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_sobelx, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_sobelx, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_sobely, 0);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_sobely, 16);
dst0 = __msa_adds_u_b(src0, src2);
dst1 = __msa_adds_u_b(src1, src3);
ST_UB2(dst0, dst1, dst_y, 16);
@@ -2398,8 +2398,8 @@
v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL);
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_sobelx, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_sobely, 0);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_sobelx, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_sobely, 0);
vec0 = __msa_adds_u_b(src0, src1);
vec1 = (v16u8)__msa_ilvr_b((v16i8)src0, (v16i8)src1);
vec2 = (v16u8)__msa_ilvl_b((v16i8)src0, (v16i8)src1);
@@ -2424,10 +2424,10 @@
v8u16 const_0x40 = (v8u16)__msa_fill_h(0x40);
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 32);
- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 48);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 48);
ARGBTOY(src0, src1, src2, src3, const_0x4B0F, const_0x26, const_0x40, 7,
dst0);
ST_UB(dst0, dst_y);
@@ -2444,10 +2444,10 @@
v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 32);
- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 48);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 48);
ARGBTOY(src0, src1, src2, src3, const_0x4200, const_0x1981, const_0x1080, 8,
dst0);
ST_UB(dst0, dst_y);
@@ -2464,10 +2464,10 @@
v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 32);
- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 48);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 48);
ARGBTOY(src0, src1, src2, src3, const_0x8142, const_0x19, const_0x1080, 8,
dst0);
ST_UB(dst0, dst_y);
@@ -2484,10 +2484,10 @@
v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080);
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 32);
- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 48);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 48);
ARGBTOY(src0, src1, src2, src3, const_0x1900, const_0x4281, const_0x1080, 8,
dst0);
ST_UB(dst0, dst_y);
@@ -2518,14 +2518,14 @@
v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080);
for (x = 0; x < width; x += 32) {
- src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)s, 32);
- src3 = (v16u8)__msa_ld_b((v16i8*)s, 48);
- src4 = (v16u8)__msa_ld_b((v16i8*)t, 0);
- src5 = (v16u8)__msa_ld_b((v16i8*)t, 16);
- src6 = (v16u8)__msa_ld_b((v16i8*)t, 32);
- src7 = (v16u8)__msa_ld_b((v16i8*)t, 48);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)s, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)s, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)s, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)s, 48);
+ src4 = (v16u8)__msa_ld_b((const v16i8*)t, 0);
+ src5 = (v16u8)__msa_ld_b((const v16i8*)t, 16);
+ src6 = (v16u8)__msa_ld_b((const v16i8*)t, 32);
+ src7 = (v16u8)__msa_ld_b((const v16i8*)t, 48);
src0 = __msa_aver_u_b(src0, src4);
src1 = __msa_aver_u_b(src1, src5);
src2 = __msa_aver_u_b(src2, src6);
@@ -2746,7 +2746,7 @@
v8i16 zero = {0};
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_y, 0);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_y, 0);
vec0 = (v8i16)__msa_ilvr_b((v16i8)src0, (v16i8)src0);
vec1 = (v8i16)__msa_ilvl_b((v16i8)src0, (v16i8)src0);
reg0 = (v4i32)__msa_ilvr_h(zero, vec0);
@@ -2792,7 +2792,7 @@
v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL);
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_y, 0);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_y, 0);
vec0 = (v16u8)__msa_ilvr_b((v16i8)src0, (v16i8)src0);
vec1 = (v16u8)__msa_ilvl_b((v16i8)src0, (v16i8)src0);
vec2 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)src0);
@@ -2824,7 +2824,7 @@
vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug);
for (x = 0; x < width; x += 8) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_yuy2, 0);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_yuy2, 0);
src1 = (v16u8)__msa_pckev_b((v16i8)src0, (v16i8)src0);
src2 = (v16u8)__msa_pckod_b((v16i8)src0, (v16i8)src0);
YUVTORGB(src1, src2, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
@@ -2852,7 +2852,7 @@
vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug);
for (x = 0; x < width; x += 8) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_uyvy, 0);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_uyvy, 0);
src1 = (v16u8)__msa_pckod_b((v16i8)src0, (v16i8)src0);
src2 = (v16u8)__msa_pckev_b((v16i8)src0, (v16i8)src0);
YUVTORGB(src1, src2, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg,
@@ -2884,10 +2884,10 @@
if (128 == y1_fraction) {
for (x = 0; x < width; x += 32) {
- src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)t, 0);
- src3 = (v16u8)__msa_ld_b((v16i8*)t, 16);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)s, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)s, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)t, 0);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)t, 16);
dst0 = __msa_aver_u_b(src0, src2);
dst1 = __msa_aver_u_b(src1, src3);
ST_UB2(dst0, dst1, dst_ptr, 16);
@@ -2902,10 +2902,10 @@
y_frac = (v8u16)__msa_fill_h(y_fractions);
for (x = 0; x < width; x += 32) {
- src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)s, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)t, 0);
- src3 = (v16u8)__msa_ld_b((v16i8*)t, 16);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)s, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)s, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)t, 0);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)t, 16);
vec0 = (v8u16)__msa_ilvr_b((v16i8)src2, (v16i8)src0);
vec1 = (v8u16)__msa_ilvl_b((v16i8)src2, (v16i8)src0);
vec2 = (v8u16)__msa_ilvr_b((v16i8)src3, (v16i8)src1);
@@ -2947,9 +2947,9 @@
24, 23, 28, 27, 26, 31, 30, 29};
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_raw, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_raw, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_raw, 32);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 32);
src3 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 8);
src4 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8);
dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0);
@@ -2970,8 +2970,8 @@
v16u8 src0, src1, dst0, dst1;
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_u, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_v, 0);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_u, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_v, 0);
dst0 = (v16u8)__msa_ilvr_b((v16i8)src1, (v16i8)src0);
dst1 = (v16u8)__msa_ilvl_b((v16i8)src1, (v16i8)src0);
ST_UB2(dst0, dst1, dst_uv, 16);
@@ -2988,10 +2988,10 @@
v16u8 src0, src1, src2, src3, vec0, vec1, dst0;
for (i = 0; i < width; i += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 32);
- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb, 48);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 48);
vec0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
vec1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
dst0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0);
@@ -3015,10 +3015,10 @@
v16i8 zero = {0};
for (x = 0; x < width; x += 8) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0);
- src3 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 16);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 0);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 16);
vec0 = (v8u16)__msa_ilvr_b(zero, (v16i8)src0);
vec1 = (v8u16)__msa_ilvl_b(zero, (v16i8)src0);
vec2 = (v8u16)__msa_ilvr_b(zero, (v16i8)src1);
@@ -3191,8 +3191,8 @@
vec1 = (v8i16)__msa_ilvl_b(zero, src0);
for (x = 0; x < width; x += 8) {
- src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16);
vec2 = (v8i16)__msa_ilvr_b(zero, (v16i8)src1);
vec3 = (v8i16)__msa_ilvl_b(zero, (v16i8)src1);
vec4 = (v8i16)__msa_ilvr_b(zero, (v16i8)src2);
@@ -3289,10 +3289,10 @@
v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3;
for (x = 0; x < width; x += 32) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_uv, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_uv, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_uv, 32);
- src3 = (v16u8)__msa_ld_b((v16i8*)src_uv, 48);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 32);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 48);
dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
dst1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
dst2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
@@ -3329,10 +3329,10 @@
for (x = 0; x < width; x += 32) {
src_uv -= 64;
- src2 = (v16u8)__msa_ld_b((v16i8*)src_uv, 0);
- src3 = (v16u8)__msa_ld_b((v16i8*)src_uv, 16);
- src0 = (v16u8)__msa_ld_b((v16i8*)src_uv, 32);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_uv, 48);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 0);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 16);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 32);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 48);
dst0 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
dst1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2);
dst2 = (v16u8)__msa_vshf_b(mask0, (v16i8)src1, (v16i8)src0);
@@ -3359,12 +3359,12 @@
v8i16 max = __msa_ldi_h(255);
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_y0, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_y0, 16);
- src2 = (v16u8)__msa_ld_b((v16i8*)src_y1, 0);
- src3 = (v16u8)__msa_ld_b((v16i8*)src_y1, 16);
- src4 = (v16u8)__msa_ld_b((v16i8*)src_y2, 0);
- src5 = (v16u8)__msa_ld_b((v16i8*)src_y2, 16);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_y0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_y0, 16);
+ src2 = (v16u8)__msa_ld_b((const v16i8*)src_y1, 0);
+ src3 = (v16u8)__msa_ld_b((const v16i8*)src_y1, 16);
+ src4 = (v16u8)__msa_ld_b((const v16i8*)src_y2, 0);
+ src5 = (v16u8)__msa_ld_b((const v16i8*)src_y2, 16);
vec0 = (v8i16)__msa_vshf_b(mask0, (v16i8)src1, (v16i8)src0);
vec1 = (v8i16)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0);
vec2 = (v8i16)__msa_vshf_b(mask0, (v16i8)src3, (v16i8)src2);
@@ -3409,8 +3409,8 @@
v8i16 max = __msa_ldi_h(255);
for (x = 0; x < width; x += 16) {
- src0 = (v16u8)__msa_ld_b((v16i8*)src_y0, 0);
- src1 = (v16u8)__msa_ld_b((v16i8*)src_y1, 0);
+ src0 = (v16u8)__msa_ld_b((const v16i8*)src_y0, 0);
+ src1 = (v16u8)__msa_ld_b((const v16i8*)src_y1, 0);
vec0 = (v8i16)__msa_ilvr_b((v16i8)zero, (v16i8)src0);
vec1 = (v8i16)__msa_ilvl_b((v16i8)zero, (v16i8)src0);
vec2 = (v8i16)__msa_ilvr_b((v16i8)zero, (v16i8)src1);