I422ToRGB24 AVX2 and I422ToRAW
BUG=none
TESTED=I422ToRGB24 unittest
R=tpsiaki@google.com

Review URL: https://webrtc-codereview.appspot.com/46619004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@1337 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/README.chromium b/README.chromium
index 4985117..08892be 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1334
+Version: 1337
 License: BSD
 License File: LICENSE
 
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index bbdbd7f..1a46eef 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -198,6 +198,8 @@
 #define HAS_I422TORGB565ROW_AVX2
 #define HAS_I422TOARGB1555ROW_AVX2
 #define HAS_I422TOARGB4444ROW_AVX2
+#define HAS_I422TORGB24ROW_AVX2
+#define HAS_I422TORAWROW_AVX2
 #define HAS_I444TOARGBROW_AVX2
 #define HAS_I411TOARGBROW_AVX2
 #define HAS_J400TOARGBROW_AVX2
@@ -1177,11 +1179,21 @@
                           const uint8* src_v,
                           uint8* dst_rgb24,
                           int width);
+void I422ToRGB24Row_AVX2(const uint8* src_y,
+                         const uint8* src_u,
+                         const uint8* src_v,
+                         uint8* dst_rgb24,
+                         int width);
 void I422ToRAWRow_SSSE3(const uint8* src_y,
                         const uint8* src_u,
                         const uint8* src_v,
                         uint8* dst_raw,
                         int width);
+void I422ToRAWRow_AVX2(const uint8* src_y,
+                       const uint8* src_u,
+                       const uint8* src_v,
+                       uint8* dst_raw,
+                       int width);
 void I422ToARGBRow_Any_AVX2(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
@@ -1331,11 +1343,21 @@
                               const uint8* src_v,
                               uint8* dst_argb,
                               int width);
+void I422ToRGB24Row_Any_AVX2(const uint8* src_y,
+                             const uint8* src_u,
+                             const uint8* src_v,
+                             uint8* dst_argb,
+                             int width);
 void I422ToRAWRow_Any_SSSE3(const uint8* src_y,
                             const uint8* src_u,
                             const uint8* src_v,
                             uint8* dst_argb,
                             int width);
+void I422ToRAWRow_Any_AVX2(const uint8* src_y,
+                           const uint8* src_u,
+                           const uint8* src_v,
+                           uint8* dst_argb,
+                           int width);
 
 void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width);
 void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index eb1c1ff..7ce1c41 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_
 
-#define LIBYUV_VERSION 1334
+#define LIBYUV_VERSION 1337
 
 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
diff --git a/source/convert_from.cc b/source/convert_from.cc
index d1ee22a..f78b545 100644
--- a/source/convert_from.cc
+++ b/source/convert_from.cc
@@ -739,6 +739,14 @@
     }
   }
 #endif
+#if defined(HAS_I422TORGB24ROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I422ToRGB24Row = I422ToRGB24Row_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToRGB24Row = I422ToRGB24Row_AVX2;
+    }
+  }
+#endif
 #if defined(HAS_I422TORGB24ROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
     I422ToRGB24Row = I422ToRGB24Row_Any_NEON;
@@ -791,6 +799,14 @@
     }
   }
 #endif
+#if defined(HAS_I422TORAWROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I422ToRAWRow = I422ToRAWRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToRAWRow = I422ToRAWRow_AVX2;
+    }
+  }
+#endif
 #if defined(HAS_I422TORAWROW_NEON)
   if (TestCpuFlag(kCpuHasNEON)) {
     I422ToRAWRow = I422ToRAWRow_Any_NEON;
diff --git a/source/row_any.cc b/source/row_any.cc
index 66b7ad9..b57a01a 100644
--- a/source/row_any.cc
+++ b/source/row_any.cc
@@ -57,6 +57,12 @@
 YANY(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, I422ToYUY2Row_C, 1, 2, 15)
 YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15)
 #endif  // HAS_I444TOARGBROW_SSSE3
+#ifdef HAS_I422TORGB24ROW_AVX2
+YANY(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, I422ToRGB24Row_C, 1, 3, 15)
+#endif
+#ifdef HAS_I422TORAWROW_AVX2
+YANY(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, I422ToRAWRow_C, 1, 3, 15)
+#endif
 #ifdef HAS_J422TOARGBROW_SSSE3
 YANY(J422ToARGBRow_Any_SSSE3, J422ToARGBRow_SSSE3, J422ToARGBRow_C, 1, 4, 7)
 #endif
diff --git a/source/row_common.cc b/source/row_common.cc
index a65553d..669dc66 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -2370,6 +2370,50 @@
 }
 #endif
 
+#if defined(HAS_I422TORGB24ROW_AVX2)
+void I422ToRGB24Row_AVX2(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_rgb24,
+                            int width) {
+  // Row buffer for intermediate ARGB pixels.
+  SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+  while (width > 0) {
+    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
+    // TODO(fbarchard): ARGBToRGB24Row_AVX2
+    ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
+    src_y += twidth;
+    src_u += twidth / 2;
+    src_v += twidth / 2;
+    dst_rgb24 += twidth * 3;
+    width -= twidth;
+  }
+}
+#endif
+
+#if defined(HAS_I422TORAWROW_AVX2)
+void I422ToRAWRow_AVX2(const uint8* src_y,
+                            const uint8* src_u,
+                            const uint8* src_v,
+                            uint8* dst_raw,
+                            int width) {
+  // Row buffer for intermediate ARGB pixels.
+  SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+  while (width > 0) {
+    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+    I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
+    // TODO(fbarchard): ARGBToRAWRow_AVX2
+    ARGBToRAWRow_SSSE3(row, dst_raw, twidth);
+    src_y += twidth;
+    src_u += twidth / 2;
+    src_v += twidth / 2;
+    dst_raw += twidth * 3;
+    width -= twidth;
+  }
+}
+#endif
+
 #if defined(HAS_NV12TORGB565ROW_AVX2)
 void NV12ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_uv,
                           uint8* dst_rgb565, int width) {