Fix CDEF SIMD code for cb4x4
Change-Id: Iec83111328d5343250b9a35dc2896541393f3efa
diff --git a/av1/common/cdef.c b/av1/common/cdef.c
index d7fcc48..4ef52bb 100644
--- a/av1/common/cdef.c
+++ b/av1/common/cdef.c
@@ -107,7 +107,6 @@
void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src,
int sstride, int v, int h) {
int i, j;
- OD_ASSERT((h & 0x7) == 0);
for (i = 0; i < v; i++) {
for (j = 0; j < h; j++) {
dst[i * dstride + j] = src[i * sstride + j];
@@ -119,7 +118,6 @@
const uint16_t *src, int sstride, int v,
int h) {
int i, j;
- OD_ASSERT((h & 0x7) == 0);
for (i = 0; i < v; i++) {
for (j = 0; j < h; j++) {
dst[i * dstride + j] = src[i * sstride + j];
@@ -147,18 +145,9 @@
static INLINE void fill_rect(uint16_t *dst, int dstride, int v, int h,
uint16_t x) {
int i, j;
- OD_ASSERT((h & 0x7) == 0);
for (i = 0; i < v; i++) {
- for (j = 0; j < h; j += 8) {
- int k = i * dstride + j;
- dst[k + 0] = x;
- dst[k + 1] = x;
- dst[k + 2] = x;
- dst[k + 3] = x;
- dst[k + 4] = x;
- dst[k + 5] = x;
- dst[k + 6] = x;
- dst[k + 7] = x;
+ for (j = 0; j < h; j++) {
+ dst[i * dstride + j] = x;
}
}
}
@@ -166,19 +155,9 @@
static INLINE void copy_rect(uint16_t *dst, int dstride, const uint16_t *src,
int sstride, int v, int h) {
int i, j;
- OD_ASSERT((h & 0x7) == 0);
for (i = 0; i < v; i++) {
- for (j = 0; j < h; j += 8) {
- int k = i * dstride + j;
- int l = i * sstride + j;
- dst[k + 0] = src[l + 0];
- dst[k + 1] = src[l + 1];
- dst[k + 2] = src[l + 2];
- dst[k + 3] = src[l + 3];
- dst[k + 4] = src[l + 4];
- dst[k + 5] = src[l + 5];
- dst[k + 6] = src[l + 6];
- dst[k + 7] = src[l + 7];
+ for (j = 0; j < h; j++) {
+ dst[i * dstride + j] = src[i * sstride + j];
}
}
}
diff --git a/av1/common/od_dering_simd.h b/av1/common/od_dering_simd.h
index fc1981f..8659b2c 100644
--- a/av1/common/od_dering_simd.h
+++ b/av1/common/od_dering_simd.h
@@ -363,12 +363,14 @@
const uint8_t *src, int sstride, int v,
int h) {
int i, j;
- OD_ASSERT((h & 0x7) == 0);
for (i = 0; i < v; i++) {
- for (j = 0; j < h; j += 8) {
+ for (j = 0; j < (h & ~0x7); j += 8) {
v64 row = v64_load_unaligned(&src[i * sstride + j]);
v128_store_unaligned(&dst[i * dstride + j], v128_unpack_u8_s16(row));
}
+ while (j++ < h) {
+ dst[i * dstride + j] = src[i * sstride + j];
+ }
}
}
@@ -376,11 +378,13 @@
const uint16_t *src, int sstride,
int v, int h) {
int i, j;
- OD_ASSERT((h & 0x7) == 0);
for (i = 0; i < v; i++) {
- for (j = 0; j < h; j += 8) {
+ for (j = 0; j < (h & ~0x7); j += 8) {
v128 row = v128_load_unaligned(&src[i * sstride + j]);
v128_store_unaligned(&dst[i * dstride + j], row);
}
+ while (j++ < h) {
+ dst[i * dstride + j] = src[i * sstride + j];
+ }
}
}