Fix CDEF SIMD code for cb4x4

Change-Id: Iec83111328d5343250b9a35dc2896541393f3efa
diff --git a/av1/common/cdef.c b/av1/common/cdef.c
index d7fcc48..4ef52bb 100644
--- a/av1/common/cdef.c
+++ b/av1/common/cdef.c
@@ -107,7 +107,6 @@
 void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src,
                                 int sstride, int v, int h) {
   int i, j;
-  OD_ASSERT((h & 0x7) == 0);
   for (i = 0; i < v; i++) {
     for (j = 0; j < h; j++) {
       dst[i * dstride + j] = src[i * sstride + j];
@@ -119,7 +118,6 @@
                                  const uint16_t *src, int sstride, int v,
                                  int h) {
   int i, j;
-  OD_ASSERT((h & 0x7) == 0);
   for (i = 0; i < v; i++) {
     for (j = 0; j < h; j++) {
       dst[i * dstride + j] = src[i * sstride + j];
@@ -147,18 +145,9 @@
 static INLINE void fill_rect(uint16_t *dst, int dstride, int v, int h,
                              uint16_t x) {
   int i, j;
-  OD_ASSERT((h & 0x7) == 0);
   for (i = 0; i < v; i++) {
-    for (j = 0; j < h; j += 8) {
-      int k = i * dstride + j;
-      dst[k + 0] = x;
-      dst[k + 1] = x;
-      dst[k + 2] = x;
-      dst[k + 3] = x;
-      dst[k + 4] = x;
-      dst[k + 5] = x;
-      dst[k + 6] = x;
-      dst[k + 7] = x;
+    for (j = 0; j < h; j++) {
+      dst[i * dstride + j] = x;
     }
   }
 }
@@ -166,19 +155,9 @@
 static INLINE void copy_rect(uint16_t *dst, int dstride, const uint16_t *src,
                              int sstride, int v, int h) {
   int i, j;
-  OD_ASSERT((h & 0x7) == 0);
   for (i = 0; i < v; i++) {
-    for (j = 0; j < h; j += 8) {
-      int k = i * dstride + j;
-      int l = i * sstride + j;
-      dst[k + 0] = src[l + 0];
-      dst[k + 1] = src[l + 1];
-      dst[k + 2] = src[l + 2];
-      dst[k + 3] = src[l + 3];
-      dst[k + 4] = src[l + 4];
-      dst[k + 5] = src[l + 5];
-      dst[k + 6] = src[l + 6];
-      dst[k + 7] = src[l + 7];
+    for (j = 0; j < h; j++) {
+      dst[i * dstride + j] = src[i * sstride + j];
     }
   }
 }
diff --git a/av1/common/od_dering_simd.h b/av1/common/od_dering_simd.h
index fc1981f..8659b2c 100644
--- a/av1/common/od_dering_simd.h
+++ b/av1/common/od_dering_simd.h
@@ -363,12 +363,14 @@
                                          const uint8_t *src, int sstride, int v,
                                          int h) {
   int i, j;
-  OD_ASSERT((h & 0x7) == 0);
   for (i = 0; i < v; i++) {
-    for (j = 0; j < h; j += 8) {
+    for (j = 0; j < (h & ~0x7); j += 8) {
       v64 row = v64_load_unaligned(&src[i * sstride + j]);
       v128_store_unaligned(&dst[i * dstride + j], v128_unpack_u8_s16(row));
     }
+    while (j++ < h) {
+      dst[i * dstride + j] = src[i * sstride + j];
+    }
   }
 }
 
@@ -376,11 +378,13 @@
                                           const uint16_t *src, int sstride,
                                           int v, int h) {
   int i, j;
-  OD_ASSERT((h & 0x7) == 0);
   for (i = 0; i < v; i++) {
-    for (j = 0; j < h; j += 8) {
+    for (j = 0; j < (h & ~0x7); j += 8) {
       v128 row = v128_load_unaligned(&src[i * sstride + j]);
       v128_store_unaligned(&dst[i * dstride + j], row);
     }
+    while (j++ < h) {
+      dst[i * dstride + j] = src[i * sstride + j];
+    }
   }
 }