yuvconstants for all YUV to RGB conversion functions.
R=harryjin@google.com
BUG=libyuv:488
Review URL: https://codereview.chromium.org/1363503002 .
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index 66c4a64..502184e 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -108,8 +108,6 @@
#define HAS_I422TOARGB1555ROW_SSSE3
#define HAS_I422TOARGB4444ROW_SSSE3
#define HAS_I422TOARGBROW_SSSE3
-#define HAS_I422TOARGBMATRIXROW_SSSE3
-#define HAS_I422TOABGRMATRIXROW_SSSE3
#define HAS_I422TOBGRAROW_SSSE3
#define HAS_I422TORAWROW_SSSE3
#define HAS_I422TORGB24ROW_SSSE3
@@ -151,8 +149,6 @@
#define HAS_YUY2TOYROW_SSE2
#define HAS_I444TOARGBROW_SSSE3
#define HAS_I444TOABGRROW_SSSE3
-#define HAS_I444TOARGBMATRIXROW_SSSE3
-#define HAS_I444TOABGRMATRIXROW_SSSE3
// Effects:
#define HAS_ARGBADDROW_SSE2
@@ -191,8 +187,6 @@
(!defined(__clang__) || defined(__SSSE3__))
#define HAS_I422TOARGBROW_SSSE3
#define HAS_I422TOABGRROW_SSSE3
-#define HAS_I422TOARGBMATRIXROW_SSSE3
-#define HAS_I422TOABGRMATRIXROW_SSSE3
#endif
// The following are available for AVX2 Visual C and clangcl 32 bit:
@@ -218,8 +212,6 @@
#define HAS_NV21TOARGBROW_AVX2
#define HAS_NV21TORGB565ROW_AVX2
#define HAS_RGB565TOARGBROW_AVX2
-#define HAS_I444TOARGBMATRIXROW_AVX2
-#define HAS_I444TOABGRMATRIXROW_AVX2
#endif
// The following are available on all x86 platforms, but
@@ -239,8 +231,6 @@
#define HAS_I400TOARGBROW_AVX2
#define HAS_I422TOABGRROW_AVX2
#define HAS_I422TOARGBROW_AVX2
-#define HAS_I422TOARGBMATRIXROW_AVX2
-#define HAS_I422TOABGRMATRIXROW_AVX2
#define HAS_I422TOBGRAROW_AVX2
#define HAS_I422TORAWROW_AVX2
#define HAS_I422TORGB24ROW_AVX2
@@ -313,8 +303,6 @@
#define HAS_I422TOARGB4444ROW_NEON
// TODO(fbarchard): Implement aarch64 neon version
#ifndef __aarch64__
-#define HAS_I422TOARGBMATRIXROW_NEON
-#define HAS_I422TOABGRMATRIXROW_NEON
#define HAS_J422TOARGBROW_NEON
#define HAS_J422TOABGRROW_NEON
#define HAS_H422TOARGBROW_NEON
@@ -444,8 +432,19 @@
typedef uint8 ulvec8[32];
#endif
+#if defined(__arm__) || defined(__aarch64__)
+
+// This struct is for Arm color conversion.
+struct YuvConstants {
+ uvec8 kUVToRB;
+ uvec8 kUVToG;
+ vec16 kUVBiasBGR;
+ vec32 kYToRgb;
+};
+
+#else
+
// This struct is for Intel color conversion.
-// TODO(fnbarchard): Consider different struct for other platforms.
struct YuvConstants {
lvec8 kUVToB;
lvec8 kUVToG;
@@ -464,21 +463,12 @@
#define KUVBIASG 128
#define KUVBIASR 160
#define KYTORGB 192
-
-struct YuvConstantsNEON {
- uvec8 kUVToRB;
- uvec8 kUVToG;
- vec16 kUVBiasBGR;
- vec32 kYToRgb;
-};
+#endif
extern struct YuvConstants kYuvConstants;
extern struct YuvConstants kYvuConstants;
extern struct YuvConstants kYuvJConstants;
extern struct YuvConstants kYuvHConstants;
-extern struct YuvConstantsNEON kYuvConstantsNEON;
-extern struct YuvConstantsNEON kYuvJConstantsNEON;
-extern struct YuvConstantsNEON kYuvHConstantsNEON;
#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
#define OMITFP
@@ -569,110 +559,97 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
-void I422ToARGBMatrixRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- struct YuvConstantsNEON* YuvConstants,
- int width);
-void I422ToABGRMatrixRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- struct YuvConstantsNEON* YuvConstants,
- int width);
+void I422ToARGBRow_NEON(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width);
+void I422ToABGRRow_NEON(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width);
void I411ToARGBRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToBGRARow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_bgra,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToABGRRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGB24Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb24,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRAWRow_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_raw,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb565,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
+ struct YuvConstants* yuvconstants,
int width);
-void J422ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void H422ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void J422ToABGRRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width);
-void H422ToABGRRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width);
void NV12ToARGBRow_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
- int width);
-void NV21ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_rgb565,
- int width);
-void NV21ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_rgb565,
+ struct YuvConstants* yuvconstants,
int width);
void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void UYVYToARGBRow_NEON(const uint8* src_uyvy,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
@@ -1054,571 +1031,526 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I444ToABGRRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
-void I422ToARGBMatrixRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width);
-void I422ToABGRMatrixRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width);
+void I422ToARGBRow_C(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width);
+void I422ToABGRRow_C(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width);
void I411ToARGBRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_C(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void NV21ToRGB565Row_C(const uint8* src_y,
const uint8* src_vu,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB565Row_C(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
-void NV21ToARGBRow_C(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
- int width);
void YUY2ToARGBRow_C(const uint8* src_yuy2,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void UYVYToARGBRow_C(const uint8* src_uyvy,
uint8* dst_argb,
- int width);
-void J422ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void J422ToABGRRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void H422ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void H422ToABGRRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToBGRARow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_bgra,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToABGRRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGB24Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb24,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRAWRow_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_raw,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGB565Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb565,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
-void I422ToARGBMatrixRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width);
-void I422ToABGRMatrixRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width);
-void I422ToBGRARow_AVX2(const uint8* src_y,
+void I422ToARGBRow_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
- int width);
-void I422ToRGBARow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToABGRRow_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
-void I444ToARGBMatrixRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width);
-void I444ToARGBMatrixRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width);
+void I422ToBGRARow_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width);
+void I422ToRGBARow_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width);
+void I422ToABGRRow_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width);
void I444ToARGBRow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I444ToARGBRow_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
-void I444ToABGRMatrixRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- struct YuvConstants* YuvConstants,
- int width);
-void I444ToABGRMatrixRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- struct YuvConstants* YuvConstants,
- int width);
+void I444ToARGBRow_SSSE3(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width);
+void I444ToARGBRow_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width);
void I444ToABGRRow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
+ struct YuvConstants* yuvconstants,
int width);
void I444ToABGRRow_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
+ struct YuvConstants* yuvconstants,
+ int width);
+void I444ToABGRRow_SSSE3(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_abgr,
+ struct YuvConstants* yuvconstants,
+ int width);
+void I444ToABGRRow_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_abgr,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
-void I422ToARGBMatrixRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width);
-void I422ToABGRMatrixRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width);
+void I422ToARGBRow_SSSE3(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width);
+void I422ToABGRRow_SSSE3(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width);
void I411ToARGBRow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I411ToARGBRow_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_SSSE3(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
- int width);
-void NV21ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_AVX2(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
- int width);
-void NV21ToARGBRow_AVX2(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB565Row_SSSE3(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
- int width);
-void NV21ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB565Row_AVX2(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
- int width);
-void NV21ToRGB565Row_AVX2(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void YUY2ToARGBRow_AVX2(const uint8* src_yuy2,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void UYVYToARGBRow_AVX2(const uint8* src_uyvy,
uint8* dst_argb,
- int width);
-void J422ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void J422ToABGRRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void J422ToARGBRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void J422ToABGRRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void H422ToARGBRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void H422ToABGRRow_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void H422ToARGBRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void H422ToABGRRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToBGRARow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_bgra,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToABGRRow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGB565Row_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGB565Row_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGB24Row_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb24,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGB24Row_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb24,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRAWRow_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_raw,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRAWRow_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_raw,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToBGRARow_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToABGRRow_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I444ToARGBRow_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I444ToARGBRow_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I444ToABGRRow_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
+ struct YuvConstants* yuvconstants,
int width);
void I444ToABGRRow_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I411ToARGBRow_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void NV21ToARGBRow_Any_SSSE3(const uint8* src_y,
const uint8* src_vu,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void NV12ToARGBRow_Any_AVX2(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void NV21ToARGBRow_Any_AVX2(const uint8* src_y,
const uint8* src_vu,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
- int width);
-void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB565Row_Any_AVX2(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
- int width);
-void NV21ToRGB565Row_Any_AVX2(const uint8* src_y,
- const uint8* src_vu,
- uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void YUY2ToARGBRow_Any_AVX2(const uint8* src_yuy2,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void UYVYToARGBRow_Any_AVX2(const uint8* src_uyvy,
uint8* dst_argb,
- int width);
-void J422ToARGBRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void J422ToABGRRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void J422ToARGBRow_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void J422ToABGRRow_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void H422ToARGBRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void H422ToABGRRow_Any_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void H422ToARGBRow_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void H422ToABGRRow_Any_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToBGRARow_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_bgra,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToABGRRow_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGB565Row_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGB24Row_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRAWRow_Any_SSSE3(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRAWRow_Any_AVX2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width);
@@ -1716,128 +1648,121 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I411ToARGBRow_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToBGRARow_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToABGRRow_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGBARow_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGB24Row_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRAWRow_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGB4444Row_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGB1555Row_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToRGB565Row_Any_NEON(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
-void J422ToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void H422ToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- int width);
-void J422ToABGRRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width);
-void H422ToABGRRow_Any_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- int width);
void NV12ToARGBRow_Any_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
- int width);
-void NV21ToARGBRow_Any_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void NV12ToRGB565Row_Any_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
- int width);
-void NV21ToRGB565Row_Any_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width);
void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
diff --git a/source/convert_argb.cc b/source/convert_argb.cc
index dec44de..dc30719 100644
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -56,6 +56,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I444ToARGBRow_C;
if (!src_y || !src_u || !src_v ||
!dst_argb ||
@@ -103,7 +104,7 @@
#endif
for (y = 0; y < height; ++y) {
- I444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+ I444ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
@@ -124,6 +125,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I444ToABGRRow_C;
if (!src_y || !src_u || !src_v ||
!dst_abgr ||
@@ -171,7 +173,7 @@
#endif
for (y = 0; y < height; ++y) {
- I444ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+ I444ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
src_u += src_stride_u;
@@ -192,6 +194,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I422ToARGBRow_C;
if (!src_y || !src_u || !src_v ||
!dst_argb ||
@@ -248,7 +251,7 @@
#endif
for (y = 0; y < height; ++y) {
- I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+ I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
@@ -269,6 +272,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I411ToARGBRow_C;
if (!src_y || !src_u || !src_v ||
!dst_argb ||
@@ -316,7 +320,7 @@
#endif
for (y = 0; y < height; ++y) {
- I411ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+ I411ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
@@ -338,6 +342,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I422ToARGBRow_C;
void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
ARGBCopyYToAlphaRow_C;
@@ -436,7 +441,7 @@
#endif
for (y = 0; y < height; ++y) {
- I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+ I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
ARGBCopyYToAlphaRow(src_a, dst_argb, width);
ARGBAttenuateRow(dst_argb, dst_argb, width);
dst_argb += dst_stride_argb;
@@ -462,6 +467,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I422ToABGRRow_C;
void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
ARGBCopyYToAlphaRow_C;
@@ -560,7 +566,7 @@
#endif
for (y = 0; y < height; ++y) {
- I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+ I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
ARGBCopyYToAlphaRow(src_a, dst_abgr, width);
ARGBAttenuateRow(dst_abgr, dst_abgr, width);
dst_abgr += dst_stride_abgr;
@@ -639,7 +645,7 @@
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
- void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
+ void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int width) =
J400ToARGBRow_C;
if (!src_y || !dst_argb ||
width <= 0 || height == 0) {
@@ -766,7 +772,7 @@
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
- void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+ void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
RGB24ToARGBRow_C;
if (!src_rgb24 || !dst_argb ||
width <= 0 || height == 0) {
@@ -816,7 +822,7 @@
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
- void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+ void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
RAWToARGBRow_C;
if (!src_raw || !dst_argb ||
width <= 0 || height == 0) {
@@ -866,7 +872,7 @@
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
- void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
+ void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int width) =
RGB565ToARGBRow_C;
if (!src_rgb565 || !dst_argb ||
width <= 0 || height == 0) {
@@ -925,7 +931,7 @@
int width, int height) {
int y;
void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
- int pix) = ARGB1555ToARGBRow_C;
+ int width) = ARGB1555ToARGBRow_C;
if (!src_argb1555 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@@ -983,7 +989,7 @@
int width, int height) {
int y;
void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
- int pix) = ARGB4444ToARGBRow_C;
+ int width) = ARGB4444ToARGBRow_C;
if (!src_argb4444 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@@ -1044,6 +1050,7 @@
void (*NV12ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = NV12ToARGBRow_C;
if (!src_y || !src_uv || !dst_argb ||
width <= 0 || height == 0) {
@@ -1081,7 +1088,7 @@
#endif
for (y = 0; y < height; ++y) {
- NV12ToARGBRow(src_y, src_uv, dst_argb, width);
+ NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
@@ -1098,10 +1105,11 @@
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
- void (*NV21ToARGBRow)(const uint8* y_buf,
+ void (*NV12ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
- int width) = NV21ToARGBRow_C;
+ struct YuvConstants* yuvconstants,
+ int width) = NV12ToARGBRow_C;
if (!src_y || !src_uv || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@@ -1112,33 +1120,33 @@
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
-#if defined(HAS_NV21TOARGBROW_SSSE3)
+#if defined(HAS_NV12TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
+ NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- NV21ToARGBRow = NV21ToARGBRow_SSSE3;
+ NV12ToARGBRow = NV12ToARGBRow_SSSE3;
}
}
#endif
-#if defined(HAS_NV21TOARGBROW_AVX2)
+#if defined(HAS_NV12TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- NV21ToARGBRow = NV21ToARGBRow_Any_AVX2;
+ NV12ToARGBRow = NV12ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
- NV21ToARGBRow = NV21ToARGBRow_AVX2;
+ NV12ToARGBRow = NV12ToARGBRow_AVX2;
}
}
#endif
-#if defined(HAS_NV21TOARGBROW_NEON)
+#if defined(HAS_NV12TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- NV21ToARGBRow = NV21ToARGBRow_Any_NEON;
+ NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
- NV21ToARGBRow = NV21ToARGBRow_NEON;
+ NV12ToARGBRow = NV12ToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
- NV21ToARGBRow(src_y, src_uv, dst_argb, width);
+ NV12ToARGBRow(src_y, src_uv, dst_argb, &kYvuConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
@@ -1157,6 +1165,7 @@
void (*NV12ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = NV12ToARGBRow_C;
if (!src_m420 || !dst_argb ||
width <= 0 || height == 0) {
@@ -1194,14 +1203,16 @@
#endif
for (y = 0; y < height - 1; y += 2) {
- NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
+ NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
+ &kYuvConstants, width);
NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2,
- dst_argb + dst_stride_argb, width);
+ dst_argb + dst_stride_argb, &kYuvConstants, width);
dst_argb += dst_stride_argb * 2;
src_m420 += src_stride_m420 * 3;
}
if (height & 1) {
- NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
+ NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
+ &kYuvConstants, width);
}
return 0;
}
@@ -1212,7 +1223,10 @@
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
- void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
+ void (*YUY2ToARGBRow)(const uint8* src_yuy2,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width) =
YUY2ToARGBRow_C;
if (!src_yuy2 || !dst_argb ||
width <= 0 || height == 0) {
@@ -1256,7 +1270,7 @@
}
#endif
for (y = 0; y < height; ++y) {
- YUY2ToARGBRow(src_yuy2, dst_argb, width);
+ YUY2ToARGBRow(src_yuy2, dst_argb, &kYuvConstants, width);
src_yuy2 += src_stride_yuy2;
dst_argb += dst_stride_argb;
}
@@ -1269,7 +1283,10 @@
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
- void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
+ void (*UYVYToARGBRow)(const uint8* src_uyvy,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width) =
UYVYToARGBRow_C;
if (!src_uyvy || !dst_argb ||
width <= 0 || height == 0) {
@@ -1313,7 +1330,7 @@
}
#endif
for (y = 0; y < height; ++y) {
- UYVYToARGBRow(src_uyvy, dst_argb, width);
+ UYVYToARGBRow(src_uyvy, dst_argb, &kYuvConstants, width);
src_uyvy += src_stride_uyvy;
dst_argb += dst_stride_argb;
}
@@ -1328,11 +1345,12 @@
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
- void (*J422ToARGBRow)(const uint8* y_buf,
+ void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
- int width) = J422ToARGBRow_C;
+ struct YuvConstants* yuvconstants,
+ int width) = I422ToARGBRow_C;
if (!src_y || !src_u || !src_v || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@@ -1343,42 +1361,42 @@
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
-#if defined(HAS_J422TOARGBROW_SSSE3)
+#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- J422ToARGBRow = J422ToARGBRow_Any_SSSE3;
+ I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- J422ToARGBRow = J422ToARGBRow_SSSE3;
+ I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
-#if defined(HAS_J422TOARGBROW_AVX2)
+#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- J422ToARGBRow = J422ToARGBRow_Any_AVX2;
+ I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
- J422ToARGBRow = J422ToARGBRow_AVX2;
+ I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
-#if defined(HAS_J422TOARGBROW_NEON)
+#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- J422ToARGBRow = J422ToARGBRow_Any_NEON;
+ I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
- J422ToARGBRow = J422ToARGBRow_NEON;
+ I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
-#if defined(HAS_J422TOARGBROW_MIPS_DSPR2)
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2;
+ I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
- J422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+ I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvJConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
@@ -1397,11 +1415,12 @@
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
- void (*J422ToARGBRow)(const uint8* y_buf,
+ void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
- int width) = J422ToARGBRow_C;
+ struct YuvConstants* yuvconstants,
+ int width) = I422ToARGBRow_C;
if (!src_y || !src_u || !src_v ||
!dst_argb ||
width <= 0 || height == 0) {
@@ -1422,42 +1441,42 @@
height = 1;
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
}
-#if defined(HAS_J422TOARGBROW_SSSE3)
+#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- J422ToARGBRow = J422ToARGBRow_Any_SSSE3;
+ I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- J422ToARGBRow = J422ToARGBRow_SSSE3;
+ I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
-#if defined(HAS_J422TOARGBROW_AVX2)
+#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- J422ToARGBRow = J422ToARGBRow_Any_AVX2;
+ I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
- J422ToARGBRow = J422ToARGBRow_AVX2;
+ I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
-#if defined(HAS_J422TOARGBROW_NEON)
+#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- J422ToARGBRow = J422ToARGBRow_Any_NEON;
+ I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
- J422ToARGBRow = J422ToARGBRow_NEON;
+ I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
-#if defined(HAS_J422TOARGBROW_MIPS_DSPR2)
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2;
+ I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
- J422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+ I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvJConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
@@ -1474,11 +1493,12 @@
uint8* dst_abgr, int dst_stride_abgr,
int width, int height) {
int y;
- void (*J422ToABGRRow)(const uint8* y_buf,
+ void (*I422ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
- int width) = J422ToABGRRow_C;
+ struct YuvConstants* yuvconstants,
+ int width) = I422ToABGRRow_C;
if (!src_y || !src_u || !src_v || !dst_abgr ||
width <= 0 || height == 0) {
return -1;
@@ -1489,42 +1509,42 @@
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
dst_stride_abgr = -dst_stride_abgr;
}
-#if defined(HAS_J422TOABGRROW_SSSE3)
+#if defined(HAS_I422TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- J422ToABGRRow = J422ToABGRRow_Any_SSSE3;
+ I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- J422ToABGRRow = J422ToABGRRow_SSSE3;
+ I422ToABGRRow = I422ToABGRRow_SSSE3;
}
}
#endif
-#if defined(HAS_J422TOABGRROW_AVX2)
+#if defined(HAS_I422TOABGRROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- J422ToABGRRow = J422ToABGRRow_Any_AVX2;
+ I422ToABGRRow = I422ToABGRRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
- J422ToABGRRow = J422ToABGRRow_AVX2;
+ I422ToABGRRow = I422ToABGRRow_AVX2;
}
}
#endif
-#if defined(HAS_J422TOABGRROW_NEON)
+#if defined(HAS_I422TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- J422ToABGRRow = J422ToABGRRow_Any_NEON;
+ I422ToABGRRow = I422ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
- J422ToABGRRow = J422ToABGRRow_NEON;
+ I422ToABGRRow = I422ToABGRRow_NEON;
}
}
#endif
-#if defined(HAS_J422TOABGRROW_MIPS_DSPR2)
+#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
- J422ToABGRRow = J422ToABGRRow_MIPS_DSPR2;
+ I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
- J422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+ I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvJConstants, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
if (y & 1) {
@@ -1543,11 +1563,12 @@
uint8* dst_abgr, int dst_stride_abgr,
int width, int height) {
int y;
- void (*J422ToABGRRow)(const uint8* y_buf,
+ void (*I422ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
- int width) = J422ToABGRRow_C;
+ struct YuvConstants* yuvconstants,
+ int width) = I422ToABGRRow_C;
if (!src_y || !src_u || !src_v ||
!dst_abgr ||
width <= 0 || height == 0) {
@@ -1568,42 +1589,42 @@
height = 1;
src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
}
-#if defined(HAS_J422TOABGRROW_SSSE3)
+#if defined(HAS_I422TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- J422ToABGRRow = J422ToABGRRow_Any_SSSE3;
+ I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- J422ToABGRRow = J422ToABGRRow_SSSE3;
+ I422ToABGRRow = I422ToABGRRow_SSSE3;
}
}
#endif
-#if defined(HAS_J422TOABGRROW_AVX2)
+#if defined(HAS_I422TOABGRROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- J422ToABGRRow = J422ToABGRRow_Any_AVX2;
+ I422ToABGRRow = I422ToABGRRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
- J422ToABGRRow = J422ToABGRRow_AVX2;
+ I422ToABGRRow = I422ToABGRRow_AVX2;
}
}
#endif
-#if defined(HAS_J422TOABGRROW_NEON)
+#if defined(HAS_I422TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- J422ToABGRRow = J422ToABGRRow_Any_NEON;
+ I422ToABGRRow = I422ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
- J422ToABGRRow = J422ToABGRRow_NEON;
+ I422ToABGRRow = I422ToABGRRow_NEON;
}
}
#endif
-#if defined(HAS_J422TOABGRROW_MIPS_DSPR2)
+#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
- J422ToABGRRow = J422ToABGRRow_MIPS_DSPR2;
+ I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
- J422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+ I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvJConstants, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
src_u += src_stride_u;
@@ -1620,11 +1641,12 @@
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
- void (*H422ToARGBRow)(const uint8* y_buf,
+ void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
- int width) = H422ToARGBRow_C;
+ struct YuvConstants* yuvconstants,
+ int width) = I422ToARGBRow_C;
if (!src_y || !src_u || !src_v || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@@ -1635,42 +1657,42 @@
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
-#if defined(HAS_H422TOARGBROW_SSSE3)
+#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- H422ToARGBRow = H422ToARGBRow_Any_SSSE3;
+ I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- H422ToARGBRow = H422ToARGBRow_SSSE3;
+ I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
-#if defined(HAS_H422TOARGBROW_AVX2)
+#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- H422ToARGBRow = H422ToARGBRow_Any_AVX2;
+ I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
- H422ToARGBRow = H422ToARGBRow_AVX2;
+ I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
-#if defined(HAS_H422TOARGBROW_NEON)
+#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- H422ToARGBRow = H422ToARGBRow_Any_NEON;
+ I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
- H422ToARGBRow = H422ToARGBRow_NEON;
+ I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
-#if defined(HAS_H422TOARGBROW_MIPS_DSPR2)
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- H422ToARGBRow = H422ToARGBRow_MIPS_DSPR2;
+ I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
- H422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+ I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvHConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
@@ -1689,11 +1711,12 @@
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
- void (*H422ToARGBRow)(const uint8* y_buf,
+ void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
- int width) = H422ToARGBRow_C;
+ struct YuvConstants* yuvconstants,
+ int width) = I422ToARGBRow_C;
if (!src_y || !src_u || !src_v ||
!dst_argb ||
width <= 0 || height == 0) {
@@ -1714,42 +1737,42 @@
height = 1;
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
}
-#if defined(HAS_H422TOARGBROW_SSSE3)
+#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- H422ToARGBRow = H422ToARGBRow_Any_SSSE3;
+ I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- H422ToARGBRow = H422ToARGBRow_SSSE3;
+ I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
-#if defined(HAS_H422TOARGBROW_AVX2)
+#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- H422ToARGBRow = H422ToARGBRow_Any_AVX2;
+ I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
- H422ToARGBRow = H422ToARGBRow_AVX2;
+ I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
-#if defined(HAS_H422TOARGBROW_NEON)
+#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- H422ToARGBRow = H422ToARGBRow_Any_NEON;
+ I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
- H422ToARGBRow = H422ToARGBRow_NEON;
+ I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
-#if defined(HAS_H422TOARGBROW_MIPS_DSPR2)
+#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
- H422ToARGBRow = H422ToARGBRow_MIPS_DSPR2;
+ I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
- H422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+ I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvHConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
@@ -1766,11 +1789,12 @@
uint8* dst_abgr, int dst_stride_abgr,
int width, int height) {
int y;
- void (*H422ToABGRRow)(const uint8* y_buf,
+ void (*I422ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
- int width) = H422ToABGRRow_C;
+ struct YuvConstants* yuvconstants,
+ int width) = I422ToABGRRow_C;
if (!src_y || !src_u || !src_v || !dst_abgr ||
width <= 0 || height == 0) {
return -1;
@@ -1781,42 +1805,42 @@
dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
dst_stride_abgr = -dst_stride_abgr;
}
-#if defined(HAS_H422TOABGRROW_SSSE3)
+#if defined(HAS_I422TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- H422ToABGRRow = H422ToABGRRow_Any_SSSE3;
+ I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- H422ToABGRRow = H422ToABGRRow_SSSE3;
+ I422ToABGRRow = I422ToABGRRow_SSSE3;
}
}
#endif
-#if defined(HAS_H422TOABGRROW_AVX2)
+#if defined(HAS_I422TOABGRROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- H422ToABGRRow = H422ToABGRRow_Any_AVX2;
+ I422ToABGRRow = I422ToABGRRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
- H422ToABGRRow = H422ToABGRRow_AVX2;
+ I422ToABGRRow = I422ToABGRRow_AVX2;
}
}
#endif
-#if defined(HAS_H422TOABGRROW_NEON)
+#if defined(HAS_I422TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- H422ToABGRRow = H422ToABGRRow_Any_NEON;
+ I422ToABGRRow = I422ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
- H422ToABGRRow = H422ToABGRRow_NEON;
+ I422ToABGRRow = I422ToABGRRow_NEON;
}
}
#endif
-#if defined(HAS_H422TOABGRROW_MIPS_DSPR2)
+#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
- H422ToABGRRow = H422ToABGRRow_MIPS_DSPR2;
+ I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
- H422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+ I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvHConstants, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
if (y & 1) {
@@ -1835,11 +1859,12 @@
uint8* dst_abgr, int dst_stride_abgr,
int width, int height) {
int y;
- void (*H422ToABGRRow)(const uint8* y_buf,
+ void (*I422ToABGRRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
- int width) = H422ToABGRRow_C;
+ struct YuvConstants* yuvconstants,
+ int width) = I422ToABGRRow_C;
if (!src_y || !src_u || !src_v ||
!dst_abgr ||
width <= 0 || height == 0) {
@@ -1860,42 +1885,42 @@
height = 1;
src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
}
-#if defined(HAS_H422TOABGRROW_SSSE3)
+#if defined(HAS_I422TOABGRROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- H422ToABGRRow = H422ToABGRRow_Any_SSSE3;
+ I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- H422ToABGRRow = H422ToABGRRow_SSSE3;
+ I422ToABGRRow = I422ToABGRRow_SSSE3;
}
}
#endif
-#if defined(HAS_H422TOABGRROW_AVX2)
+#if defined(HAS_I422TOABGRROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- H422ToABGRRow = H422ToABGRRow_Any_AVX2;
+ I422ToABGRRow = I422ToABGRRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
- H422ToABGRRow = H422ToABGRRow_AVX2;
+ I422ToABGRRow = I422ToABGRRow_AVX2;
}
}
#endif
-#if defined(HAS_H422TOABGRROW_NEON)
+#if defined(HAS_I422TOABGRROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- H422ToABGRRow = H422ToABGRRow_Any_NEON;
+ I422ToABGRRow = I422ToABGRRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
- H422ToABGRRow = H422ToABGRRow_NEON;
+ I422ToABGRRow = I422ToABGRRow_NEON;
}
}
#endif
-#if defined(HAS_H422TOABGRROW_MIPS_DSPR2)
+#if defined(HAS_I422TOABGRROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_abgr, 4) && IS_ALIGNED(dst_stride_abgr, 4)) {
- H422ToABGRRow = H422ToABGRRow_MIPS_DSPR2;
+ I422ToABGRRow = I422ToABGRRow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
- H422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+ I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvHConstants, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
src_u += src_stride_u;
diff --git a/source/convert_from.cc b/source/convert_from.cc
index 31f1ac9..b9c7be9 100644
--- a/source/convert_from.cc
+++ b/source/convert_from.cc
@@ -462,6 +462,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I422ToARGBRow_C;
if (!src_y || !src_u || !src_v || !dst_argb ||
width <= 0 || height == 0) {
@@ -508,7 +509,7 @@
#endif
for (y = 0; y < height; ++y) {
- I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+ I422ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvConstants, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
@@ -531,6 +532,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I422ToBGRARow_C;
if (!src_y || !src_u || !src_v || !dst_bgra ||
width <= 0 || height == 0) {
@@ -577,7 +579,7 @@
#endif
for (y = 0; y < height; ++y) {
- I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
+ I422ToBGRARow(src_y, src_u, src_v, dst_bgra, &kYuvConstants, width);
dst_bgra += dst_stride_bgra;
src_y += src_stride_y;
if (y & 1) {
@@ -600,6 +602,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I422ToABGRRow_C;
if (!src_y || !src_u || !src_v || !dst_abgr ||
width <= 0 || height == 0) {
@@ -637,7 +640,7 @@
#endif
for (y = 0; y < height; ++y) {
- I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+ I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
if (y & 1) {
@@ -660,6 +663,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I422ToRGBARow_C;
if (!src_y || !src_u || !src_v || !dst_rgba ||
width <= 0 || height == 0) {
@@ -697,7 +701,7 @@
#endif
for (y = 0; y < height; ++y) {
- I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
+ I422ToRGBARow(src_y, src_u, src_v, dst_rgba, &kYuvConstants, width);
dst_rgba += dst_stride_rgba;
src_y += src_stride_y;
if (y & 1) {
@@ -720,6 +724,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I422ToRGB24Row_C;
if (!src_y || !src_u || !src_v || !dst_rgb24 ||
width <= 0 || height == 0) {
@@ -757,7 +762,7 @@
#endif
for (y = 0; y < height; ++y) {
- I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, width);
+ I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, &kYuvConstants, width);
dst_rgb24 += dst_stride_rgb24;
src_y += src_stride_y;
if (y & 1) {
@@ -780,6 +785,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I422ToRAWRow_C;
if (!src_y || !src_u || !src_v || !dst_raw ||
width <= 0 || height == 0) {
@@ -817,7 +823,7 @@
#endif
for (y = 0; y < height; ++y) {
- I422ToRAWRow(src_y, src_u, src_v, dst_raw, width);
+ I422ToRAWRow(src_y, src_u, src_v, dst_raw, &kYuvConstants, width);
dst_raw += dst_stride_raw;
src_y += src_stride_y;
if (y & 1) {
@@ -840,6 +846,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I422ToARGB1555Row_C;
if (!src_y || !src_u || !src_v || !dst_argb1555 ||
width <= 0 || height == 0) {
@@ -877,7 +884,7 @@
#endif
for (y = 0; y < height; ++y) {
- I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, width);
+ I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvConstants, width);
dst_argb1555 += dst_stride_argb1555;
src_y += src_stride_y;
if (y & 1) {
@@ -901,6 +908,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I422ToARGB4444Row_C;
if (!src_y || !src_u || !src_v || !dst_argb4444 ||
width <= 0 || height == 0) {
@@ -938,7 +946,7 @@
#endif
for (y = 0; y < height; ++y) {
- I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, width);
+ I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvConstants, width);
dst_argb4444 += dst_stride_argb4444;
src_y += src_stride_y;
if (y & 1) {
@@ -961,6 +969,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I422ToRGB565Row_C;
if (!src_y || !src_u || !src_v || !dst_rgb565 ||
width <= 0 || height == 0) {
@@ -998,7 +1007,7 @@
#endif
for (y = 0; y < height; ++y) {
- I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, width);
+ I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvConstants, width);
dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y;
if (y & 1) {
@@ -1029,6 +1038,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I422ToARGBRow_C;
void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C;
@@ -1105,7 +1115,7 @@
// Allocate a row of argb.
align_buffer_64(row_argb, width * 4);
for (y = 0; y < height; ++y) {
- I422ToARGBRow(src_y, src_u, src_v, row_argb, width);
+ I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvConstants, width);
ARGBToRGB565DitherRow(row_argb, dst_rgb565,
*(uint32*)(dither4x4 + ((y & 3) << 2)), width);
dst_rgb565 += dst_stride_rgb565;
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index 3cca5f4..2299ab8 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -287,9 +287,9 @@
int width, int height) {
int y;
void (*YUY2ToUV422Row)(const uint8* src_yuy2,
- uint8* dst_u, uint8* dst_v, int pix) =
+ uint8* dst_u, uint8* dst_v, int width) =
YUY2ToUV422Row_C;
- void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
+ void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) =
YUY2ToYRow_C;
// Negative height means invert the image.
if (height < 0) {
@@ -359,10 +359,10 @@
int width, int height) {
int y;
void (*UYVYToUV422Row)(const uint8* src_uyvy,
- uint8* dst_u, uint8* dst_v, int pix) =
+ uint8* dst_u, uint8* dst_v, int width) =
UYVYToUV422Row_C;
void (*UYVYToYRow)(const uint8* src_uyvy,
- uint8* dst_y, int pix) = UYVYToYRow_C;
+ uint8* dst_y, int width) = UYVYToYRow_C;
// Negative height means invert the image.
if (height < 0) {
height = -height;
@@ -790,6 +790,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I422ToBGRARow_C;
if (!src_y || !src_u || !src_v ||
!dst_bgra ||
@@ -846,7 +847,7 @@
#endif
for (y = 0; y < height; ++y) {
- I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
+ I422ToBGRARow(src_y, src_u, src_v, dst_bgra, &kYuvConstants, width);
dst_bgra += dst_stride_bgra;
src_y += src_stride_y;
src_u += src_stride_u;
@@ -867,6 +868,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I422ToABGRRow_C;
if (!src_y || !src_u || !src_v ||
!dst_abgr ||
@@ -914,7 +916,7 @@
#endif
for (y = 0; y < height; ++y) {
- I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
+ I422ToABGRRow(src_y, src_u, src_v, dst_abgr, &kYuvConstants, width);
dst_abgr += dst_stride_abgr;
src_y += src_stride_y;
src_u += src_stride_u;
@@ -935,6 +937,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = I422ToRGBARow_C;
if (!src_y || !src_u || !src_v ||
!dst_rgba ||
@@ -982,7 +985,7 @@
#endif
for (y = 0; y < height; ++y) {
- I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
+ I422ToRGBARow(src_y, src_u, src_v, dst_rgba, &kYuvConstants, width);
dst_rgba += dst_stride_rgba;
src_y += src_stride_y;
src_u += src_stride_u;
@@ -1001,6 +1004,7 @@
void (*NV12ToRGB565Row)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) = NV12ToRGB565Row_C;
if (!src_y || !src_uv || !dst_rgb565 ||
width <= 0 || height == 0) {
@@ -1038,7 +1042,7 @@
#endif
for (y = 0; y < height; ++y) {
- NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
+ NV12ToRGB565Row(src_y, src_uv, dst_rgb565, &kYuvConstants, width);
dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y;
if (y & 1) {
@@ -1055,10 +1059,11 @@
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height) {
int y;
- void (*NV21ToRGB565Row)(const uint8* y_buf,
+ void (*NV12ToRGB565Row)(const uint8* y_buf,
const uint8* src_vu,
uint8* rgb_buf,
- int width) = NV21ToRGB565Row_C;
+ struct YuvConstants* yuvconstants,
+ int width) = NV12ToRGB565Row_C;
if (!src_y || !src_vu || !dst_rgb565 ||
width <= 0 || height == 0) {
return -1;
@@ -1069,33 +1074,33 @@
dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
dst_stride_rgb565 = -dst_stride_rgb565;
}
-#if defined(HAS_NV21TORGB565ROW_SSSE3)
+#if defined(HAS_NV12TORGB565ROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
- NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
+ NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
- NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
+ NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
}
}
#endif
-#if defined(HAS_NV21TORGB565ROW_AVX2)
+#if defined(HAS_NV12TORGB565ROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
- NV21ToRGB565Row = NV21ToRGB565Row_Any_AVX2;
+ NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
- NV21ToRGB565Row = NV21ToRGB565Row_AVX2;
+ NV12ToRGB565Row = NV12ToRGB565Row_AVX2;
}
}
#endif
-#if defined(HAS_NV21TORGB565ROW_NEON)
+#if defined(HAS_NV12TORGB565ROW_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
- NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
+ NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
if (IS_ALIGNED(width, 8)) {
- NV21ToRGB565Row = NV21ToRGB565Row_NEON;
+ NV12ToRGB565Row = NV12ToRGB565Row_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
- NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
+ NV12ToRGB565Row(src_y, src_vu, dst_rgb565, &kYvuConstants, width);
dst_rgb565 += dst_stride_rgb565;
src_y += src_stride_y;
if (y & 1) {
@@ -1110,7 +1115,7 @@
int width, int height,
uint32 value) {
int y;
- void (*SetRow)(uint8* dst, uint8 value, int pix) = SetRow_C;
+ void (*SetRow)(uint8* dst, uint8 value, int width) = SetRow_C;
if (height < 0) {
height = -height;
dst_y = dst_y + (height - 1) * dst_stride_y;
@@ -1186,7 +1191,7 @@
int width, int height,
uint32 value) {
int y;
- void (*ARGBSetRow)(uint8* dst_argb, uint32 value, int pix) = ARGBSetRow_C;
+ void (*ARGBSetRow)(uint8* dst_argb, uint32 value, int width) = ARGBSetRow_C;
if (!dst_argb ||
width <= 0 || height == 0 ||
dst_x < 0 || dst_y < 0) {
@@ -1909,7 +1914,7 @@
const uint8* shuffler, int width, int height) {
int y;
void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
- const uint8* shuffler, int pix) = ARGBShuffleRow_C;
+ const uint8* shuffler, int width) = ARGBShuffleRow_C;
if (!src_bgra || !dst_argb ||
width <= 0 || height == 0) {
return -1;
@@ -1976,7 +1981,7 @@
const uint8* src_sobely,
uint8* dst, int width)) {
int y;
- void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int pix) =
+ void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int width) =
ARGBToYJRow_C;
void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
uint8* dst_sobely, int width) = SobelYRow_C;
@@ -2360,8 +2365,8 @@
int width, int height) {
int y;
int halfwidth = (width + 1) >> 1;
- void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
- SplitUVRow_C;
+ void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+ int width) = SplitUVRow_C;
void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
@@ -2464,8 +2469,8 @@
int width, int height) {
int y;
int halfwidth = (width + 1) >> 1;
- void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
- SplitUVRow_C;
+ void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+ int width) = SplitUVRow_C;
void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
ptrdiff_t src_stride, int dst_width,
int source_y_fraction) = InterpolateRow_C;
diff --git a/source/row_any.cc b/source/row_any.cc
index c309499..46cbdc7 100644
--- a/source/row_any.cc
+++ b/source/row_any.cc
@@ -40,103 +40,9 @@
memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
SS(r, DUVSHIFT) * BPP); \
}
-
-#ifdef HAS_I422TOARGBROW_SSSE3
-ANY31(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
-ANY31(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7)
-ANY31(J422ToARGBRow_Any_SSSE3, J422ToARGBRow_SSSE3, 1, 0, 4, 7)
-ANY31(J422ToABGRRow_Any_SSSE3, J422ToABGRRow_SSSE3, 1, 0, 4, 7)
-ANY31(H422ToARGBRow_Any_SSSE3, H422ToARGBRow_SSSE3, 1, 0, 4, 7)
-ANY31(H422ToABGRRow_Any_SSSE3, H422ToABGRRow_SSSE3, 1, 0, 4, 7)
-#endif
-#ifdef HAS_I444TOARGBROW_SSSE3
-ANY31(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
-ANY31(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7)
-ANY31(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7)
-ANY31(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
-ANY31(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
-ANY31(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
-ANY31(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
-ANY31(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7)
-ANY31(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7)
+#ifdef HAS_I422TOYUY2ROW_SSE2
ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15)
ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15)
-#endif // HAS_I444TOARGBROW_SSSE3
-#ifdef HAS_I444TOABGRROW_SSSE3
-ANY31(I444ToABGRRow_Any_SSSE3, I444ToABGRRow_SSSE3, 0, 0, 4, 7)
-#endif
-#ifdef HAS_I422TORGB24ROW_AVX2
-ANY31(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
-#endif
-#ifdef HAS_I422TORAWROW_AVX2
-ANY31(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15)
-#endif
-#ifdef HAS_J422TOARGBROW_AVX2
-ANY31(J422ToARGBRow_Any_AVX2, J422ToARGBRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_J422TOABGRROW_AVX2
-ANY31(J422ToABGRRow_Any_AVX2, J422ToABGRRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_H422TOARGBROW_AVX2
-ANY31(H422ToARGBRow_Any_AVX2, H422ToARGBRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_H422TOABGRROW_AVX2
-ANY31(H422ToABGRRow_Any_AVX2, H422ToABGRRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I422TOARGBROW_AVX2
-ANY31(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I422TOBGRAROW_AVX2
-ANY31(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I422TORGBAROW_AVX2
-ANY31(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I422TOABGRROW_AVX2
-ANY31(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15)
-#endif
-#ifdef HAS_I444TOARGBROW_AVX2
-ANY31(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
-#endif
-#ifdef HAS_I444TOABGRROW_AVX2
-ANY31(I444ToABGRRow_Any_AVX2, I444ToABGRRow_AVX2, 0, 0, 4, 15)
-#endif
-#ifdef HAS_I411TOARGBROW_AVX2
-ANY31(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15)
-#endif
-#ifdef HAS_I422TOARGB4444ROW_AVX2
-ANY31(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7)
-#endif
-#ifdef HAS_I422TOARGB1555ROW_AVX2
-ANY31(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7)
-#endif
-#ifdef HAS_I422TORGB565ROW_AVX2
-ANY31(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
-#endif
-#ifdef HAS_I422TOARGBROW_NEON
-ANY31(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
-ANY31(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
-ANY31(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
-ANY31(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7)
-ANY31(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7)
-ANY31(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
-ANY31(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
-ANY31(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7)
-ANY31(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
-ANY31(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
-ANY31(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
-#endif
-#ifdef HAS_J422TOARGBROW_NEON
-ANY31(J422ToARGBRow_Any_NEON, J422ToARGBRow_NEON, 1, 0, 4, 7)
-#endif
-#ifdef HAS_J422TOABGRROW_NEON
-ANY31(J422ToABGRRow_Any_NEON, J422ToABGRRow_NEON, 1, 0, 4, 7)
-#endif
-#ifdef HAS_H422TOARGBROW_NEON
-ANY31(H422ToARGBRow_Any_NEON, H422ToARGBRow_NEON, 1, 0, 4, 7)
-#endif
-#ifdef HAS_H422TOABGRROW_NEON
-ANY31(H422ToABGRRow_Any_NEON, H422ToABGRRow_NEON, 1, 0, 4, 7)
#endif
#ifdef HAS_I422TOYUY2ROW_NEON
ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15)
@@ -144,7 +50,97 @@
#ifdef HAS_I422TOUYVYROW_NEON
ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15)
#endif
-#undef ANY31
+#undef ANY31C
+
+// Any 3 planes to 1 with yuvconstants
+#define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \
+ void NAMEANY(const uint8* y_buf, const uint8* u_buf, const uint8* v_buf, \
+ uint8* dst_ptr, struct YuvConstants* yuvconstants, \
+ int width) { \
+ SIMD_ALIGNED(uint8 temp[64 * 4]); \
+ memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(temp, y_buf + n, r); \
+ memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \
+ ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, \
+ yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \
+ SS(r, DUVSHIFT) * BPP); \
+ }
+
+#ifdef HAS_I422TOARGBROW_SSSE3
+ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7)
+ANY31C(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_SSSE3, 1, 0, 4, 7)
+#endif
+#ifdef HAS_I444TOARGBROW_SSSE3
+ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7)
+ANY31C(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_SSSE3, 2, 0, 4, 7)
+ANY31C(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_SSSE3, 1, 0, 4, 7)
+ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7)
+ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7)
+ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7)
+ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7)
+ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 7)
+ANY31C(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, 1, 0, 3, 7)
+#endif // HAS_I444TOARGBROW_SSSE3
+#ifdef HAS_I444TOABGRROW_SSSE3
+ANY31C(I444ToABGRRow_Any_SSSE3, I444ToABGRRow_SSSE3, 0, 0, 4, 7)
+#endif
+#ifdef HAS_I422TORGB24ROW_AVX2
+ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 15)
+#endif
+#ifdef HAS_I422TORAWROW_AVX2
+ANY31C(I422ToRAWRow_Any_AVX2, I422ToRAWRow_AVX2, 1, 0, 3, 15)
+#endif
+#ifdef HAS_I422TOARGBROW_AVX2
+ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I422TOBGRAROW_AVX2
+ANY31C(I422ToBGRARow_Any_AVX2, I422ToBGRARow_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I422TORGBAROW_AVX2
+ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I422TOABGRROW_AVX2
+ANY31C(I422ToABGRRow_Any_AVX2, I422ToABGRRow_AVX2, 1, 0, 4, 15)
+#endif
+#ifdef HAS_I444TOARGBROW_AVX2
+ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15)
+#endif
+#ifdef HAS_I444TOABGRROW_AVX2
+ANY31C(I444ToABGRRow_Any_AVX2, I444ToABGRRow_AVX2, 0, 0, 4, 15)
+#endif
+#ifdef HAS_I411TOARGBROW_AVX2
+ANY31C(I411ToARGBRow_Any_AVX2, I411ToARGBRow_AVX2, 2, 0, 4, 15)
+#endif
+#ifdef HAS_I422TOARGB4444ROW_AVX2
+ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 7)
+#endif
+#ifdef HAS_I422TOARGB1555ROW_AVX2
+ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 7)
+#endif
+#ifdef HAS_I422TORGB565ROW_AVX2
+ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 7)
+#endif
+#ifdef HAS_I422TOARGBROW_NEON
+ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7)
+ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7)
+ANY31C(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, 2, 0, 4, 7)
+ANY31C(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, 1, 0, 4, 7)
+ANY31C(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, 1, 0, 4, 7)
+ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7)
+ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7)
+ANY31C(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, 1, 0, 3, 7)
+ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7)
+ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7)
+ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7)
+#endif
+#undef ANY31C
// Any 2 planes to 1.
#define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
@@ -164,32 +160,6 @@
memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
}
-// Biplanar to RGB.
-#ifdef HAS_NV12TOARGBROW_SSSE3
-ANY21(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
-ANY21(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
-#endif
-#ifdef HAS_NV12TOARGBROW_AVX2
-ANY21(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
-ANY21(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15)
-#endif
-#ifdef HAS_NV12TOARGBROW_NEON
-ANY21(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
-ANY21(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7)
-#endif
-#ifdef HAS_NV12TORGB565ROW_SSSE3
-ANY21(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
-ANY21(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
-#endif
-#ifdef HAS_NV12TORGB565ROW_AVX2
-ANY21(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
-ANY21(NV21ToRGB565Row_Any_AVX2, NV21ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
-#endif
-#ifdef HAS_NV12TORGB565ROW_NEON
-ANY21(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
-ANY21(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, 1, 1, 2, 2, 7)
-#endif
-
// Merge functions.
#ifdef HAS_MERGEUVROW_SSE2
ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15)
@@ -249,6 +219,46 @@
#endif
#undef ANY21
+// Any 2 planes to 1 with yuvconstants
+#define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \
+ void NAMEANY(const uint8* y_buf, const uint8* uv_buf, \
+ uint8* dst_ptr, struct YuvConstants* yuvconstants, \
+ int width) { \
+ SIMD_ALIGNED(uint8 temp[64 * 3]); \
+ memset(temp, 0, 64 * 2); /* for msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(temp, y_buf + n * SBPP, r * SBPP); \
+ memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \
+ SS(r, UVSHIFT) * SBPP2); \
+ ANY_SIMD(temp, temp + 64, temp + 128, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
+ }
+
+// Biplanar to RGB.
+#ifdef HAS_NV12TOARGBROW_SSSE3
+ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7)
+#endif
+#ifdef HAS_NV12TOARGBROW_AVX2
+ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15)
+#endif
+#ifdef HAS_NV12TOARGBROW_NEON
+ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7)
+#endif
+#ifdef HAS_NV12TORGB565ROW_SSSE3
+ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7)
+#endif
+#ifdef HAS_NV12TORGB565ROW_AVX2
+ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15)
+#endif
+#ifdef HAS_NV12TORGB565ROW_NEON
+ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7)
+#endif
+#undef ANY21C
+
// Any 1 to 1.
#define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
@@ -297,9 +307,7 @@
#if defined(HAS_I400TOARGBROW_AVX2)
ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15)
#endif
-#if defined(HAS_YUY2TOARGBROW_SSSE3)
-ANY11(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
-ANY11(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
+#if defined(HAS_RGB24TOARGBROW_SSSE3)
ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15)
ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15)
ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7)
@@ -315,10 +323,6 @@
#if defined(HAS_ARGB4444TOARGBROW_AVX2)
ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15)
#endif
-#if defined(HAS_YUY2TOARGBROW_AVX2)
-ANY11(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
-ANY11(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
-#endif
#if defined(HAS_ARGBTORGB24ROW_NEON)
ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7)
ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7)
@@ -327,8 +331,6 @@
ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7)
ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7)
ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7)
-ANY11(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
-ANY11(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
#endif
#ifdef HAS_ARGBTOYROW_AVX2
ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31)
@@ -426,6 +428,35 @@
#endif
#undef ANY11
+// Any 1 to 1 with yuvconstants
+#define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
+ void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, \
+ struct YuvConstants* yuvconstants, int width) { \
+ SIMD_ALIGNED(uint8 temp[128 * 2]); \
+ memset(temp, 0, 128); /* for YUY2 and msan */ \
+ int r = width & MASK; \
+ int n = width & ~MASK; \
+ if (n > 0) { \
+ ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \
+ } \
+ memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \
+ ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \
+ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \
+ }
+#if defined(HAS_YUY2TOARGBROW_SSSE3)
+ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15)
+ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15)
+#endif
+#if defined(HAS_YUY2TOARGBROW_AVX2)
+ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31)
+ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31)
+#endif
+#if defined(HAS_YUY2TOARGBROW_NEON)
+ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7)
+ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7)
+#endif
+#undef ANY11C
+
// Any 1 to 1 blended.
#define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \
void NAMEANY(const uint8* src_ptr, uint8* dst_ptr, int width) { \
diff --git a/source/row_common.cc b/source/row_common.cc
index 0a4520f..3413803 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -1014,6 +1014,22 @@
#define BG (UG * 128 + VG * 128 + YGB)
#define BR (VR * 128 + YGB)
+#if defined(__arm__) || defined(__aarch64__)
+YuvConstants SIMD_ALIGNED(kYuvConstants) = {
+ { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { BB, BG, BR, 0, 0, 0, 0, 0 },
+ { 0x0101 * YG, 0, 0, 0 }
+};
+
+YuvConstants SIMD_ALIGNED(kYvuConstants) = {
+ { -VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { BB, BG, BR, 0, 0, 0, 0, 0 },
+ { 0x0101 * YG, 0, 0, 0 }
+};
+
+#else
// BT601 constants for YUV to RGB.
YuvConstants SIMD_ALIGNED(kYuvConstants) = {
{ UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
@@ -1041,22 +1057,7 @@
{ BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
{ YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
};
-
-YuvConstantsNEON SIMD_ALIGNED(kYuvConstantsNEON) = {
- { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 },
- { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 },
- { BB, BG, BR, 0, 0, 0, 0, 0 },
- { 0x0101 * YG, 0, 0, 0 }
-};
-
-// C reference code that mimics the YUV assembly.
-static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
- uint8* b, uint8* g, uint8* r) {
- uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16;
- *b = Clamp((int32)(-(u * UB) + y1 + BB) >> 6);
- *g = Clamp((int32)(-(v * VG + u * UG) + y1 + BG) >> 6);
- *r = Clamp((int32)(-(v * VR)+ y1 + BR) >> 6);
-}
+#endif
// C reference code that mimics the YUV assembly.
static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) {
@@ -1065,15 +1066,50 @@
*g = Clamp((int32)(y1 + YGB) >> 6);
*r = Clamp((int32)(y1 + YGB) >> 6);
}
-#undef YG
+
+#undef BB
+#undef BG
+#undef BR
#undef YGB
#undef UB
#undef UG
#undef VG
#undef VR
-#undef BB
-#undef BG
-#undef BR
+#undef YG
+
+// C reference code that mimics the YUV assembly.
+static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
+ uint8* b, uint8* g, uint8* r,
+ struct YuvConstants* yuvconstants) {
+#if defined(__arm__) || defined(__aarch64__)
+
+ int UB = -yuvconstants->kUVToRB[0];
+ int VB = 0;
+ int UG = yuvconstants->kUVToG[0];
+ int VG = yuvconstants->kUVToG[4];
+ int UR = 0;
+ int VR = -yuvconstants->kUVToRB[4];
+ int BB = yuvconstants->kUVBiasBGR[0];
+ int BG = yuvconstants->kUVBiasBGR[1];
+ int BR = yuvconstants->kUVBiasBGR[2];
+ int YG = yuvconstants->kYToRgb[0];
+#else
+ int UB = yuvconstants->kUVToB[0];
+ int VB = yuvconstants->kUVToB[1]; // usually 0
+ int UG = yuvconstants->kUVToG[0];
+ int VG = yuvconstants->kUVToG[1];
+ int UR = yuvconstants->kUVToR[0]; // usually 0
+ int VR = yuvconstants->kUVToR[1];
+ int BB = yuvconstants->kUVBiasB[0];
+ int BG = yuvconstants->kUVBiasG[0];
+ int BR = yuvconstants->kUVBiasR[0];
+ int YG = yuvconstants->kYToRgb[0];
+#endif
+ uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16;
+ *b = Clamp((int32)(-(u * UB + v * VB) + y1 + BB) >> 6);
+ *g = Clamp((int32)(-(u * UG + v * VG) + y1 + BG) >> 6);
+ *r = Clamp((int32)(-(u * UR + v * VR) + y1 + BR) >> 6);
+}
// JPEG YUV to RGB reference
// * R = Y - V * -1.40200
@@ -1095,6 +1131,15 @@
#define BGJ (UGJ * 128 + VGJ * 128 + YGBJ)
#define BRJ (VRJ * 128 + YGBJ)
+#if defined(__arm__) || defined(__aarch64__)
+// JPEG constants for YUV to RGB.
+YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
+ { -UBJ, -UBJ, -UBJ, -UBJ, -VRJ, -VRJ, -VRJ, -VRJ, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { UGJ, UGJ, UGJ, UGJ, VGJ, VGJ, VGJ, VGJ, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { BBJ, BGJ, BRJ, 0, 0, 0, 0, 0 },
+ { 0x0101 * YGJ, 0, 0, 0 }
+};
+#else
// JPEG constants for YUV to RGB.
YuvConstants SIMD_ALIGNED(kYuvJConstants) = {
{ UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0, UBJ, 0,
@@ -1114,23 +1159,7 @@
{ YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ,
YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ, YGJ }
};
-
-// JPEG constants for YUV to RGB.
-YuvConstantsNEON SIMD_ALIGNED(kYuvJConstantsNEON) = {
- { -UBJ, -UBJ, -UBJ, -UBJ, -VRJ, -VRJ, -VRJ, -VRJ, 0, 0, 0, 0, 0, 0, 0, 0 },
- { UGJ, UGJ, UGJ, UGJ, VGJ, VGJ, VGJ, VGJ, 0, 0, 0, 0, 0, 0, 0, 0 },
- { BBJ, BGJ, BRJ, 0, 0, 0, 0, 0 },
- { 0x0101 * YGJ, 0, 0, 0 }
-};
-
-// C reference code that mimics the YUV assembly.
-static __inline void YuvJPixel(uint8 y, uint8 u, uint8 v,
- uint8* b, uint8* g, uint8* r) {
- uint32 y1 = (uint32)(y * 0x0101 * YGJ) >> 16;
- *b = Clamp((int32)(-(u * UBJ) + y1 + BBJ) >> 6);
- *g = Clamp((int32)(-(v * VGJ + u * UGJ) + y1 + BGJ) >> 6);
- *r = Clamp((int32)(-(v * VRJ) + y1 + BRJ) >> 6);
-}
+#endif
#undef YGJ
#undef YGBJ
@@ -1162,6 +1191,15 @@
#define BGH (UGH * 128 + VGH * 128 + YGBH)
#define BRH (VRH * 128 + YGBH)
+#if defined(__arm__) || defined(__aarch64__)
+// BT.709 constants for YUV to RGB.
+YuvConstants SIMD_ALIGNED(kYuvHConstants) = {
+ { -UBH, -UBH, -UBH, -UBH, -VRH, -VRH, -VRH, -VRH, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { UGH, UGH, UGH, UGH, VGH, VGH, VGH, VGH, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { BBH, BGH, BRH, 0, 0, 0, 0, 0 },
+ { 0x0101 * YGH, 0, 0, 0 }
+};
+#else
// BT.709 constants for YUV to RGB.
YuvConstants SIMD_ALIGNED(kYuvHConstants) = {
{ UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0, UBH, 0,
@@ -1181,23 +1219,7 @@
{ YGH, YGH, YGH, YGH, YGH, YGH, YGH, YGH,
YGH, YGH, YGH, YGH, YGH, YGH, YGH, YGH }
};
-
-// BT.709 constants for YUV to RGB.
-YuvConstantsNEON SIMD_ALIGNED(kYuvHConstantsNEON) = {
- { -UBH, -UBH, -UBH, -UBH, -VRH, -VRH, -VRH, -VRH, 0, 0, 0, 0, 0, 0, 0, 0 },
- { UGH, UGH, UGH, UGH, VGH, VGH, VGH, VGH, 0, 0, 0, 0, 0, 0, 0, 0 },
- { BBH, BGH, BRH, 0, 0, 0, 0, 0 },
- { 0x0101 * YGH, 0, 0, 0 }
-};
-
-// C reference code that mimics the YUV assembly.
-static __inline void YuvHPixel(uint8 y, uint8 u, uint8 v,
- uint8* b, uint8* g, uint8* r) {
- uint32 y1 = (uint32)(y * 0x0101 * YGH) >> 16;
- *b = Clamp((int32)(-(u * UBH) + y1 + BBH) >> 6);
- *g = Clamp((int32)(-(v * VGH + u * UGH) + y1 + BGH) >> 6);
- *r = Clamp((int32)(-(v * VRH) + y1 + BRH) >> 6);
-}
+#endif
#undef YGH
#undef YGBH
@@ -1217,14 +1239,17 @@
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
- YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
+ yuvconstants);
rgb_buf[3] = 255;
- YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+ YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
+ yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_u += 2;
@@ -1233,7 +1258,7 @@
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
}
}
@@ -1241,14 +1266,17 @@
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
- YuvPixel(src_y[0], u, v, rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+ YuvPixel(src_y[0], u, v, rgb_buf + 2, rgb_buf + 1, rgb_buf + 0,
+ yuvconstants);
rgb_buf[3] = 255;
- YuvPixel(src_y[1], u, v, rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
+ YuvPixel(src_y[1], u, v, rgb_buf + 6, rgb_buf + 5, rgb_buf + 4,
+ yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_u += 2;
@@ -1257,7 +1285,7 @@
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+ rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
}
}
#else
@@ -1265,11 +1293,12 @@
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width; ++x) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
src_y += 1;
src_u += 1;
@@ -1282,11 +1311,12 @@
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width; ++x) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+ rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
rgb_buf[3] = 255;
src_y += 1;
src_u += 1;
@@ -1301,14 +1331,15 @@
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+ rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_u += 1;
@@ -1317,124 +1348,23 @@
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
-void J422ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvJPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YuvJPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvJPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void J422ToABGRRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvJPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
- rgb_buf[3] = 255;
- YuvJPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
- rgb_buf[7] = 255;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvJPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
- rgb_buf[3] = 255;
- }
-}
-
-// TODO(fbarchard): replace with common matrix function.
-void H422ToARGBRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvHPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- YuvHPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvHPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void H422ToABGRRow_C(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvHPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
- rgb_buf[3] = 255;
- YuvHPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
- rgb_buf[7] = 255;
- src_y += 2;
- src_u += 1;
- src_v += 1;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvHPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
- rgb_buf[3] = 255;
- }
-}
-
-
void I422ToRGB24Row_C(const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 3, rgb_buf + 4, rgb_buf + 5);
+ rgb_buf + 3, rgb_buf + 4, rgb_buf + 5, yuvconstants);
src_y += 2;
src_u += 1;
src_v += 1;
@@ -1442,7 +1372,7 @@
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
}
}
@@ -1450,13 +1380,14 @@
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+ rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 5, rgb_buf + 4, rgb_buf + 3);
+ rgb_buf + 5, rgb_buf + 4, rgb_buf + 3, yuvconstants);
src_y += 2;
src_u += 1;
src_v += 1;
@@ -1464,7 +1395,7 @@
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+ rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
}
}
@@ -1472,6 +1403,7 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
+ struct YuvConstants* yuvconstants,
int width) {
uint8 b0;
uint8 g0;
@@ -1481,8 +1413,8 @@
uint8 r1;
int x;
for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
- YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
+ YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+ YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
b0 = b0 >> 4;
g0 = g0 >> 4;
r0 = r0 >> 4;
@@ -1497,7 +1429,7 @@
dst_argb4444 += 4; // Advance 2 pixels.
}
if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
+ YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
b0 = b0 >> 4;
g0 = g0 >> 4;
r0 = r0 >> 4;
@@ -1510,6 +1442,7 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
+ struct YuvConstants* yuvconstants,
int width) {
uint8 b0;
uint8 g0;
@@ -1519,8 +1452,8 @@
uint8 r1;
int x;
for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
- YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
+ YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+ YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
b0 = b0 >> 3;
g0 = g0 >> 3;
r0 = r0 >> 3;
@@ -1535,7 +1468,7 @@
dst_argb1555 += 4; // Advance 2 pixels.
}
if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
+ YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
b0 = b0 >> 3;
g0 = g0 >> 3;
r0 = r0 >> 3;
@@ -1548,6 +1481,7 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb565,
+ struct YuvConstants* yuvconstants,
int width) {
uint8 b0;
uint8 g0;
@@ -1557,8 +1491,8 @@
uint8 r1;
int x;
for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
- YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1);
+ YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
+ YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
@@ -1573,7 +1507,7 @@
dst_rgb565 += 4; // Advance 2 pixels.
}
if (width & 1) {
- YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0);
+ YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
@@ -1585,20 +1519,21 @@
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 3; x += 4) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+ rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
YuvPixel(src_y[2], src_u[0], src_v[0],
- rgb_buf + 8, rgb_buf + 9, rgb_buf + 10);
+ rgb_buf + 8, rgb_buf + 9, rgb_buf + 10, yuvconstants);
rgb_buf[11] = 255;
YuvPixel(src_y[3], src_u[0], src_v[0],
- rgb_buf + 12, rgb_buf + 13, rgb_buf + 14);
+ rgb_buf + 12, rgb_buf + 13, rgb_buf + 14, yuvconstants);
rgb_buf[15] = 255;
src_y += 4;
src_u += 1;
@@ -1607,17 +1542,17 @@
}
if (width & 2) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+ rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
@@ -1625,14 +1560,15 @@
void NV12ToARGBRow_C(const uint8* src_y,
const uint8* src_uv,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_uv[0], src_uv[1],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_y[1], src_uv[0], src_uv[1],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+ rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_uv += 2;
@@ -1640,32 +1576,7 @@
}
if (width & 1) {
YuvPixel(src_y[0], src_uv[0], src_uv[1],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
- }
-}
-
-void NV21ToARGBRow_C(const uint8* src_y,
- const uint8* src_vu,
- uint8* rgb_buf,
- int width) {
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_vu[1], src_vu[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
- rgb_buf[3] = 255;
-
- YuvPixel(src_y[1], src_vu[1], src_vu[0],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
- rgb_buf[7] = 255;
-
- src_y += 2;
- src_vu += 2;
- rgb_buf += 8; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], src_vu[1], src_vu[0],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
@@ -1673,6 +1584,7 @@
void NV12ToRGB565Row_C(const uint8* src_y,
const uint8* src_uv,
uint8* dst_rgb565,
+ struct YuvConstants* yuvconstants,
int width) {
uint8 b0;
uint8 g0;
@@ -1682,8 +1594,8 @@
uint8 r1;
int x;
for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0);
- YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1);
+ YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
+ YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
@@ -1697,42 +1609,7 @@
dst_rgb565 += 4; // Advance 2 pixels.
}
if (width & 1) {
- YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0);
- b0 = b0 >> 3;
- g0 = g0 >> 2;
- r0 = r0 >> 3;
- *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
- }
-}
-
-void NV21ToRGB565Row_C(const uint8* src_y,
- const uint8* vsrc_u,
- uint8* dst_rgb565,
- int width) {
- uint8 b0;
- uint8 g0;
- uint8 r0;
- uint8 b1;
- uint8 g1;
- uint8 r1;
- int x;
- for (x = 0; x < width - 1; x += 2) {
- YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
- YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1);
- b0 = b0 >> 3;
- g0 = g0 >> 2;
- r0 = r0 >> 3;
- b1 = b1 >> 3;
- g1 = g1 >> 2;
- r1 = r1 >> 3;
- *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
- (b1 << 16) | (g1 << 21) | (r1 << 27);
- src_y += 2;
- vsrc_u += 2;
- dst_rgb565 += 4; // Advance 2 pixels.
- }
- if (width & 1) {
- YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0);
+ YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
b0 = b0 >> 3;
g0 = g0 >> 2;
r0 = r0 >> 3;
@@ -1742,42 +1619,44 @@
void YUY2ToARGBRow_C(const uint8* src_yuy2,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+ rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_yuy2 += 4;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
void UYVYToARGBRow_C(const uint8* src_uyvy,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2],
- rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
+ rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
rgb_buf[7] = 255;
src_uyvy += 4;
rgb_buf += 8; // Advance 2 pixels.
}
if (width & 1) {
YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
- rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
+ rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
rgb_buf[3] = 255;
}
}
@@ -1786,14 +1665,15 @@
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
+ rgb_buf + 3, rgb_buf + 2, rgb_buf + 1, yuvconstants);
rgb_buf[0] = 255;
YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 7, rgb_buf + 6, rgb_buf + 5);
+ rgb_buf + 7, rgb_buf + 6, rgb_buf + 5, yuvconstants);
rgb_buf[4] = 255;
src_y += 2;
src_u += 1;
@@ -1802,7 +1682,7 @@
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 3, rgb_buf + 2, rgb_buf + 1);
+ rgb_buf + 3, rgb_buf + 2, rgb_buf + 1, yuvconstants);
rgb_buf[0] = 255;
}
}
@@ -1811,14 +1691,15 @@
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+ rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
rgb_buf[3] = 255;
YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 6, rgb_buf + 5, rgb_buf + 4);
+ rgb_buf + 6, rgb_buf + 5, rgb_buf + 4, yuvconstants);
rgb_buf[7] = 255;
src_y += 2;
src_u += 1;
@@ -1827,7 +1708,7 @@
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 2, rgb_buf + 1, rgb_buf + 0);
+ rgb_buf + 2, rgb_buf + 1, rgb_buf + 0, yuvconstants);
rgb_buf[3] = 255;
}
}
@@ -1836,14 +1717,15 @@
const uint8* src_u,
const uint8* src_v,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
+ rgb_buf + 1, rgb_buf + 2, rgb_buf + 3, yuvconstants);
rgb_buf[0] = 255;
YuvPixel(src_y[1], src_u[0], src_v[0],
- rgb_buf + 5, rgb_buf + 6, rgb_buf + 7);
+ rgb_buf + 5, rgb_buf + 6, rgb_buf + 7, yuvconstants);
rgb_buf[4] = 255;
src_y += 2;
src_u += 1;
@@ -1852,7 +1734,7 @@
}
if (width & 1) {
YuvPixel(src_y[0], src_u[0], src_v[0],
- rgb_buf + 1, rgb_buf + 2, rgb_buf + 3);
+ rgb_buf + 1, rgb_buf + 2, rgb_buf + 3, yuvconstants);
rgb_buf[0] = 255;
}
}
@@ -2401,374 +2283,10 @@
}
}
-#define ANYYUV(NAMEANY, ANY_SIMD, YUVCONSTANTS) \
- void NAMEANY(const uint8* y_buf, \
- const uint8* u_buf, \
- const uint8* v_buf, \
- uint8* dst_argb, \
- int width) { \
- ANY_SIMD(y_buf, u_buf, v_buf, dst_argb, &YUVCONSTANTS, width); \
- }
-
-#ifdef HAS_I422TOARGBMATRIXROW_NEON
-ANYYUV(I422ToARGBRow_NEON, I422ToARGBMatrixRow_NEON, kYuvConstantsNEON)
-ANYYUV(J422ToARGBRow_NEON, I422ToARGBMatrixRow_NEON, kYuvJConstantsNEON)
-ANYYUV(H422ToARGBRow_NEON, I422ToARGBMatrixRow_NEON, kYuvHConstantsNEON)
-#endif
-#ifdef HAS_I422TOABGRMATRIXROW_NEON
-ANYYUV(I422ToABGRRow_NEON, I422ToABGRMatrixRow_NEON, kYuvConstantsNEON)
-ANYYUV(J422ToABGRRow_NEON, I422ToABGRMatrixRow_NEON, kYuvJConstantsNEON)
-ANYYUV(H422ToABGRRow_NEON, I422ToABGRMatrixRow_NEON, kYuvHConstantsNEON)
-#endif
-#ifdef HAS_I422TOARGBMATRIXROW_SSSE3
-ANYYUV(I422ToARGBRow_SSSE3, I422ToARGBMatrixRow_SSSE3, kYuvConstants)
-ANYYUV(J422ToARGBRow_SSSE3, I422ToARGBMatrixRow_SSSE3, kYuvJConstants)
-ANYYUV(H422ToARGBRow_SSSE3, I422ToARGBMatrixRow_SSSE3, kYuvHConstants)
-#endif
-#ifdef HAS_I422TOARGBMATRIXROW_AVX2
-ANYYUV(I422ToARGBRow_AVX2, I422ToARGBMatrixRow_AVX2, kYuvConstants)
-ANYYUV(J422ToARGBRow_AVX2, I422ToARGBMatrixRow_AVX2, kYuvJConstants)
-ANYYUV(H422ToARGBRow_AVX2, I422ToARGBMatrixRow_AVX2, kYuvHConstants)
-#endif
-#ifdef HAS_I422TOABGRMATRIXROW_SSSE3
-ANYYUV(I422ToABGRRow_SSSE3, I422ToABGRMatrixRow_SSSE3, kYuvConstants)
-ANYYUV(J422ToABGRRow_SSSE3, I422ToABGRMatrixRow_SSSE3, kYuvJConstants)
-ANYYUV(H422ToABGRRow_SSSE3, I422ToABGRMatrixRow_SSSE3, kYuvHConstants)
-#endif
-#ifdef HAS_I422TOABGRMATRIXROW_AVX2
-ANYYUV(I422ToABGRRow_AVX2, I422ToABGRMatrixRow_AVX2, kYuvConstants)
-ANYYUV(J422ToABGRRow_AVX2, I422ToABGRMatrixRow_AVX2, kYuvJConstants)
-ANYYUV(H422ToABGRRow_AVX2, I422ToABGRMatrixRow_AVX2, kYuvHConstants)
-#endif
-// TODO(fbarchard): Neon, J444, H444 versions.
-#ifdef HAS_I444TOARGBMATRIXROW_SSSE3
-ANYYUV(I444ToARGBRow_SSSE3, I444ToARGBMatrixRow_SSSE3, kYuvConstants)
-#endif
-#ifdef HAS_I444TOARGBMATRIXROW_AVX2
-ANYYUV(I444ToARGBRow_AVX2, I444ToARGBMatrixRow_AVX2, kYuvConstants)
-#endif
-#ifdef HAS_I444TOABGRMATRIXROW_SSSE3
-ANYYUV(I444ToABGRRow_SSSE3, I444ToABGRMatrixRow_SSSE3, kYuvConstants)
-#endif
-#ifdef HAS_I444TOABGRMATRIXROW_AVX2
-ANYYUV(I444ToABGRRow_AVX2, I444ToABGRMatrixRow_AVX2, kYuvConstants)
-#endif
-
-// Maximum temporary width for wrappers to process at a time, in pixels.
-#define MAXTWIDTH 2048
-
-#if !(defined(_MSC_VER) && defined(_M_IX86)) && \
- defined(HAS_I422TORGB565ROW_SSSE3)
-// row_win.cc has asm version, but GCC uses 2 step wrapper.
-void I422ToRGB565Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
- int width) {
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth);
- ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
- src_y += twidth;
- src_u += twidth / 2;
- src_v += twidth / 2;
- dst_rgb565 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_I422TOARGB1555ROW_SSSE3)
-void I422ToARGB1555Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
- int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth);
- ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
- src_y += twidth;
- src_u += twidth / 2;
- src_v += twidth / 2;
- dst_argb1555 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_I422TOARGB4444ROW_SSSE3)
-void I422ToARGB4444Row_SSSE3(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
- int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, twidth);
- ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
- src_y += twidth;
- src_u += twidth / 2;
- src_v += twidth / 2;
- dst_argb4444 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_NV12TORGB565ROW_SSSE3)
-void NV12ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_uv,
- uint8* dst_rgb565, int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- NV12ToARGBRow_SSSE3(src_y, src_uv, row, twidth);
- ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
- src_y += twidth;
- src_uv += twidth;
- dst_rgb565 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_NV21TORGB565ROW_SSSE3)
-void NV21ToRGB565Row_SSSE3(const uint8* src_y, const uint8* src_vu,
- uint8* dst_rgb565, int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- NV21ToARGBRow_SSSE3(src_y, src_vu, row, twidth);
- ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
- src_y += twidth;
- src_vu += twidth;
- dst_rgb565 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_YUY2TOARGBROW_SSSE3)
-void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, uint8* dst_argb, int width) {
- // Row buffers for intermediate YUV pixels.
- SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]);
- SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]);
- SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, twidth);
- YUY2ToYRow_SSE2(src_yuy2, row_y, twidth);
- I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, twidth);
- src_yuy2 += twidth * 2;
- dst_argb += twidth * 4;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_UYVYTOARGBROW_SSSE3)
-void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, uint8* dst_argb, int width) {
- // Row buffers for intermediate YUV pixels.
- SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]);
- SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]);
- SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, twidth);
- UYVYToYRow_SSE2(src_uyvy, row_y, twidth);
- I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, twidth);
- src_uyvy += twidth * 2;
- dst_argb += twidth * 4;
- width -= twidth;
- }
-}
-#endif // !defined(LIBYUV_DISABLE_X86)
-
-#if defined(HAS_I422TORGB565ROW_AVX2)
-void I422ToRGB565Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb565,
- int width) {
- SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
- ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
- src_y += twidth;
- src_u += twidth / 2;
- src_v += twidth / 2;
- dst_rgb565 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_I422TOARGB1555ROW_AVX2)
-void I422ToARGB1555Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb1555,
- int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
- ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
- src_y += twidth;
- src_u += twidth / 2;
- src_v += twidth / 2;
- dst_argb1555 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_I422TOARGB4444ROW_AVX2)
-void I422ToARGB4444Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb4444,
- int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
- ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
- src_y += twidth;
- src_u += twidth / 2;
- src_v += twidth / 2;
- dst_argb4444 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_I422TORGB24ROW_AVX2)
-void I422ToRGB24Row_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_rgb24,
- int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
- // TODO(fbarchard): ARGBToRGB24Row_AVX2
- ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
- src_y += twidth;
- src_u += twidth / 2;
- src_v += twidth / 2;
- dst_rgb24 += twidth * 3;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_I422TORAWROW_AVX2)
-void I422ToRAWRow_AVX2(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_raw,
- int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- I422ToARGBRow_AVX2(src_y, src_u, src_v, row, twidth);
- // TODO(fbarchard): ARGBToRAWRow_AVX2
- ARGBToRAWRow_SSSE3(row, dst_raw, twidth);
- src_y += twidth;
- src_u += twidth / 2;
- src_v += twidth / 2;
- dst_raw += twidth * 3;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_NV12TORGB565ROW_AVX2)
-void NV12ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_uv,
- uint8* dst_rgb565, int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- NV12ToARGBRow_AVX2(src_y, src_uv, row, twidth);
- ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
- src_y += twidth;
- src_uv += twidth;
- dst_rgb565 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_NV21TORGB565ROW_AVX2)
-void NV21ToRGB565Row_AVX2(const uint8* src_y, const uint8* src_vu,
- uint8* dst_rgb565, int width) {
- // Row buffer for intermediate ARGB pixels.
- SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- NV21ToARGBRow_AVX2(src_y, src_vu, row, twidth);
- ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
- src_y += twidth;
- src_vu += twidth;
- dst_rgb565 += twidth * 2;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_YUY2TOARGBROW_AVX2)
-void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, uint8* dst_argb, int width) {
- // Row buffers for intermediate YUV pixels.
- SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]);
- SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]);
- SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- YUY2ToUV422Row_AVX2(src_yuy2, row_u, row_v, twidth);
- YUY2ToYRow_AVX2(src_yuy2, row_y, twidth);
- I422ToARGBRow_AVX2(row_y, row_u, row_v, dst_argb, twidth);
- src_yuy2 += twidth * 2;
- dst_argb += twidth * 4;
- width -= twidth;
- }
-}
-#endif
-
-#if defined(HAS_UYVYTOARGBROW_AVX2)
-void UYVYToARGBRow_AVX2(const uint8* src_uyvy, uint8* dst_argb, int width) {
- // Row buffers for intermediate YUV pixels.
- SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]);
- SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]);
- SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]);
- while (width > 0) {
- int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
- UYVYToUV422Row_AVX2(src_uyvy, row_u, row_v, twidth);
- UYVYToYRow_AVX2(src_uyvy, row_y, twidth);
- I422ToARGBRow_AVX2(row_y, row_u, row_v, dst_argb, twidth);
- src_uyvy += twidth * 2;
- dst_argb += twidth * 4;
- width -= twidth;
- }
-}
-#endif // !defined(LIBYUV_DISABLE_X86)
void ARGBPolynomialRow_C(const uint8* src_argb,
- uint8* dst_argb, const float* poly,
+ uint8* dst_argb,
+ const float* poly,
int width) {
int i;
for (i = 0; i < width; ++i) {
@@ -2868,6 +2386,311 @@
}
}
+// Maximum temporary width for wrappers to process at a time, in pixels.
+#define MAXTWIDTH 2048
+
+#if !(defined(_MSC_VER) && defined(_M_IX86)) && \
+ defined(HAS_I422TORGB565ROW_SSSE3)
+// row_win.cc has asm version, but GCC uses 2 step wrapper.
+void I422ToRGB565Row_SSSE3(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_rgb565,
+ struct YuvConstants* yuvconstants,
+ int width) {
+ SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
+ ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_rgb565 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_I422TOARGB1555ROW_SSSE3)
+void I422ToARGB1555Row_SSSE3(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb1555,
+ struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
+ ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_argb1555 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_I422TOARGB4444ROW_SSSE3)
+void I422ToARGB4444Row_SSSE3(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb4444,
+ struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
+ ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_argb4444 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_NV12TORGB565ROW_SSSE3)
+void NV12ToRGB565Row_SSSE3(const uint8* src_y,
+ const uint8* src_uv,
+ uint8* dst_rgb565,
+ struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
+ ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
+ src_y += twidth;
+ src_uv += twidth;
+ dst_rgb565 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_YUY2TOARGBROW_SSSE3)
+void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffers for intermediate YUV pixels.
+ SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]);
+ SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]);
+ SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, twidth);
+ YUY2ToYRow_SSE2(src_yuy2, row_y, twidth);
+ I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, yuvconstants, twidth);
+ src_yuy2 += twidth * 2;
+ dst_argb += twidth * 4;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_UYVYTOARGBROW_SSSE3)
+void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffers for intermediate YUV pixels.
+ SIMD_ALIGNED(uint8 row_y[MAXTWIDTH]);
+ SIMD_ALIGNED(uint8 row_u[MAXTWIDTH / 2]);
+ SIMD_ALIGNED(uint8 row_v[MAXTWIDTH / 2]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, twidth);
+ UYVYToYRow_SSE2(src_uyvy, row_y, twidth);
+ I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, yuvconstants, twidth);
+ src_uyvy += twidth * 2;
+ dst_argb += twidth * 4;
+ width -= twidth;
+ }
+}
+#endif // !defined(LIBYUV_DISABLE_X86)
+
+#if defined(HAS_I422TORGB565ROW_AVX2)
+void I422ToRGB565Row_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_rgb565,
+ struct YuvConstants* yuvconstants,
+ int width) {
+ SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+ ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_rgb565 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_I422TOARGB1555ROW_AVX2)
+void I422ToARGB1555Row_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb1555,
+ struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+ ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_argb1555 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_I422TOARGB4444ROW_AVX2)
+void I422ToARGB4444Row_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb4444,
+ struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+ ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_argb4444 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_I422TORGB24ROW_AVX2)
+void I422ToRGB24Row_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_rgb24,
+ struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+ // TODO(fbarchard): ARGBToRGB24Row_AVX2
+ ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_rgb24 += twidth * 3;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_I422TORAWROW_AVX2)
+void I422ToRAWRow_AVX2(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_raw,
+ struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
+ // TODO(fbarchard): ARGBToRAWRow_AVX2
+ ARGBToRAWRow_SSSE3(row, dst_raw, twidth);
+ src_y += twidth;
+ src_u += twidth / 2;
+ src_v += twidth / 2;
+ dst_raw += twidth * 3;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_NV12TORGB565ROW_AVX2)
+void NV12ToRGB565Row_AVX2(const uint8* src_y,
+ const uint8* src_uv,
+ uint8* dst_rgb565,
+ struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffer for intermediate ARGB pixels.
+ SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
+ ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
+ src_y += twidth;
+ src_uv += twidth;
+ dst_rgb565 += twidth * 2;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_YUY2TOARGBROW_AVX2)
+void YUY2ToARGBRow_AVX2(const uint8* src_yuy2,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffers for intermediate YUV pixels.
+ SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]);
+ SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]);
+ SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ YUY2ToUV422Row_AVX2(src_yuy2, row_u, row_v, twidth);
+ YUY2ToYRow_AVX2(src_yuy2, row_y, twidth);
+ I422ToARGBRow_AVX2(row_y, row_u, row_v, dst_argb, yuvconstants, twidth);
+ src_yuy2 += twidth * 2;
+ dst_argb += twidth * 4;
+ width -= twidth;
+ }
+}
+#endif
+
+#if defined(HAS_UYVYTOARGBROW_AVX2)
+void UYVYToARGBRow_AVX2(const uint8* src_uyvy,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width) {
+ // Row buffers for intermediate YUV pixels.
+ SIMD_ALIGNED32(uint8 row_y[MAXTWIDTH]);
+ SIMD_ALIGNED32(uint8 row_u[MAXTWIDTH / 2]);
+ SIMD_ALIGNED32(uint8 row_v[MAXTWIDTH / 2]);
+ while (width > 0) {
+ int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+ UYVYToUV422Row_AVX2(src_uyvy, row_u, row_v, twidth);
+ UYVYToYRow_AVX2(src_uyvy, row_y, twidth);
+ I422ToARGBRow_AVX2(row_y, row_u, row_v, dst_argb, yuvconstants, twidth);
+ src_uyvy += twidth * 2;
+ dst_argb += twidth * 4;
+ width -= twidth;
+ }
+}
+#endif // !defined(LIBYUV_DISABLE_X86)
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/source/row_gcc.cc b/source/row_gcc.cc
index 9497503..af5ca2b 100644
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@@ -1350,23 +1350,23 @@
"punpcklwd %%xmm0,%%xmm0 \n"
// Convert 8 pixels: 8 UV and 8 Y
-#define YUVTORGB(YuvConstants) \
+#define YUVTORGB(yuvconstants) \
"movdqa %%xmm0,%%xmm1 \n" \
"movdqa %%xmm0,%%xmm2 \n" \
"movdqa %%xmm0,%%xmm3 \n" \
- "movdqa " MEMACCESS2(96, [YuvConstants]) ",%%xmm0 \n" \
- "pmaddubsw " MEMACCESS([YuvConstants]) ",%%xmm1 \n" \
+ "movdqa " MEMACCESS2(96, [yuvconstants]) ",%%xmm0 \n" \
+ "pmaddubsw " MEMACCESS([yuvconstants]) ",%%xmm1 \n" \
"psubw %%xmm1,%%xmm0 \n" \
- "movdqa " MEMACCESS2(128, [YuvConstants]) ",%%xmm1 \n" \
- "pmaddubsw " MEMACCESS2(32, [YuvConstants]) ",%%xmm2 \n" \
+ "movdqa " MEMACCESS2(128, [yuvconstants]) ",%%xmm1 \n" \
+ "pmaddubsw " MEMACCESS2(32, [yuvconstants]) ",%%xmm2 \n" \
"psubw %%xmm2,%%xmm1 \n" \
- "movdqa " MEMACCESS2(160, [YuvConstants]) ",%%xmm2 \n" \
- "pmaddubsw " MEMACCESS2(64, [YuvConstants]) ",%%xmm3 \n" \
+ "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \
+ "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \
"psubw %%xmm3,%%xmm2 \n" \
"movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \
"lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
"punpcklbw %%xmm3,%%xmm3 \n" \
- "pmulhuw " MEMACCESS2(192, [YuvConstants]) ",%%xmm3 \n" \
+ "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm3 \n" \
"paddsw %%xmm3,%%xmm0 \n" \
"paddsw %%xmm3,%%xmm1 \n" \
"paddsw %%xmm3,%%xmm2 \n" \
@@ -1423,19 +1423,19 @@
"movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \
"lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n"
-void OMITFP I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width) {
+void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
READYUV444
- YUVTORGB(YuvConstants)
+ YUVTORGB(yuvconstants)
STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
@@ -1444,25 +1444,25 @@
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
- : [YuvConstants]"r"(YuvConstants) // %[kYuvConstants]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
-void OMITFP I444ToABGRMatrixRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_abgr,
- struct YuvConstants* YuvConstants,
- int width) {
+void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_abgr,
+ struct YuvConstants* yuvconstants,
+ int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
READYUV444
- YUVTORGB(YuvConstants)
+ YUVTORGB(yuvconstants)
STOREABGR
"sub $0x8,%[width] \n"
"jg 1b \n"
@@ -1471,7 +1471,7 @@
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
[width]"+rm"(width) // %[width]
- : [YuvConstants]"r"(YuvConstants) // %[kYuvConstants]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
@@ -1482,6 +1482,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_rgb24,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
@@ -1490,7 +1491,7 @@
LABELALIGN
"1: \n"
READYUV422
- YUVTORGB(kYuvConstants)
+ YUVTORGB(yuvconstants)
"punpcklbw %%xmm1,%%xmm0 \n"
"punpcklbw %%xmm2,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
@@ -1514,7 +1515,7 @@
#else
[width]"+rm"(width) // %[width]
#endif
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB),
+ : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
[kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
[kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
: "memory", "cc", NACL_R14
@@ -1526,6 +1527,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_raw,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n"
@@ -1534,7 +1536,7 @@
LABELALIGN
"1: \n"
READYUV422
- YUVTORGB(kYuvConstants)
+ YUVTORGB(yuvconstants)
"punpcklbw %%xmm1,%%xmm0 \n"
"punpcklbw %%xmm2,%%xmm2 \n"
"movdqa %%xmm0,%%xmm1 \n"
@@ -1558,7 +1560,7 @@
#else
[width]"+rm"(width) // %[width]
#endif
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB),
+ : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
[kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
[kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW)
: "memory", "cc", NACL_R14
@@ -1566,19 +1568,19 @@
);
}
-void OMITFP I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width) {
+void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
READYUV422
- YUVTORGB(YuvConstants)
+ YUVTORGB(yuvconstants)
STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
@@ -1587,7 +1589,7 @@
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
- : [YuvConstants]"r"(YuvConstants) // %[kYuvConstants]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
@@ -1597,6 +1599,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@@ -1604,7 +1607,7 @@
LABELALIGN
"1: \n"
READYUV411
- YUVTORGB(kYuvConstants)
+ YUVTORGB(yuvconstants)
STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
@@ -1613,7 +1616,7 @@
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
@@ -1622,13 +1625,14 @@
void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
READNV12
- YUVTORGB(kYuvConstants)
+ YUVTORGB(yuvconstants)
STOREARGB
"sub $0x8,%[width] \n"
"jg 1b \n"
@@ -1636,30 +1640,7 @@
[uv_buf]"+r"(uv_buf), // %[uv_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
- // Does not use r14.
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
- );
-}
-
-void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- asm volatile (
- "pcmpeqb %%xmm5,%%xmm5 \n"
- LABELALIGN
- "1: \n"
- READNV12
- YUVTORGB(kYuvConstants)
- STOREARGB
- "sub $0x8,%[width] \n"
- "jg 1b \n"
- : [y_buf]"+r"(y_buf), // %[y_buf]
- [uv_buf]"+r"(uv_buf), // %[uv_buf]
- [dst_argb]"+r"(dst_argb), // %[dst_argb]
- [width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYvuConstants.kUVToB) // %[kYuvConstants]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
// Does not use r14.
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
@@ -1669,6 +1650,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_bgra,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@@ -1676,7 +1658,7 @@
LABELALIGN
"1: \n"
READYUV422
- YUVTORGB(kYuvConstants)
+ YUVTORGB(yuvconstants)
STOREBGRA
"sub $0x8,%[width] \n"
"jg 1b \n"
@@ -1685,25 +1667,25 @@
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
[width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
-void OMITFP I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_abgr,
- struct YuvConstants* YuvConstants,
- int width) {
+void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_abgr,
+ struct YuvConstants* yuvconstants,
+ int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"pcmpeqb %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
READYUV422
- YUVTORGB(kYuvConstants)
+ YUVTORGB(yuvconstants)
STOREABGR
"sub $0x8,%[width] \n"
"jg 1b \n"
@@ -1712,7 +1694,7 @@
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
[width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(YuvConstants) // %[kYuvConstants]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
@@ -1722,6 +1704,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_rgba,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@@ -1729,7 +1712,7 @@
LABELALIGN
"1: \n"
READYUV422
- YUVTORGB(kYuvConstants)
+ YUVTORGB(yuvconstants)
STORERGBA
"sub $0x8,%[width] \n"
"jg 1b \n"
@@ -1738,7 +1721,7 @@
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_rgba]"+r"(dst_rgba), // %[dst_rgba]
[width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
@@ -1788,6 +1771,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_bgra,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@@ -1795,7 +1779,7 @@
LABELALIGN
"1: \n"
READYUV422_AVX2
- YUVTORGB_AVX2(kYuvConstants)
+ YUVTORGB_AVX2(yuvconstants)
// Step 3: Weave into BGRA
"vpunpcklbw %%ymm0,%%ymm1,%%ymm1 \n" // GB
@@ -1816,29 +1800,29 @@
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
[width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
#endif // HAS_I422TOBGRAROW_AVX2
-#if defined(HAS_I422TOARGBMATRIXROW_AVX2)
+#if defined(HAS_I422TOARGBROW_AVX2)
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
-void OMITFP I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width) {
+void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN
"1: \n"
READYUV422_AVX2
- YUVTORGB_AVX2(kYuvConstants)
+ YUVTORGB_AVX2(yuvconstants)
// Step 3: Weave into ARGB
"vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" // BG
@@ -1859,29 +1843,29 @@
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(YuvConstants) // %[kYuvConstants]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
}
-#endif // HAS_I422TOARGBMATRIXROW_AVX2
+#endif // HAS_I422TOARGBROW_AVX2
#if defined(HAS_I422TOABGRROW_AVX2)
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
-void OMITFP I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width) {
+void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
LABELALIGN
"1: \n"
READYUV422_AVX2
- YUVTORGB_AVX2(kYuvConstants)
+ YUVTORGB_AVX2(yuvconstants)
// Step 3: Weave into ABGR
"vpunpcklbw %%ymm1,%%ymm2,%%ymm1 \n" // RG
@@ -1901,7 +1885,7 @@
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(YuvConstants) // %[kYuvConstants]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
@@ -1915,6 +1899,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
"sub %[u_buf],%[v_buf] \n"
@@ -1922,7 +1907,7 @@
LABELALIGN
"1: \n"
READYUV422_AVX2
- YUVTORGB_AVX2(kYuvConstants)
+ YUVTORGB_AVX2(yuvconstants)
// Step 3: Weave into RGBA
"vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n"
@@ -1942,7 +1927,7 @@
[v_buf]"+r"(v_buf), // %[v_buf]
[dst_argb]"+r"(dst_argb), // %[dst_argb]
[width]"+rm"(width) // %[width]
- : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants]
+ : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
: "memory", "cc", NACL_R14
"xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
);
diff --git a/source/row_mips.cc b/source/row_mips.cc
index 1183c71..0720110 100644
--- a/source/row_mips.cc
+++ b/source/row_mips.cc
@@ -593,7 +593,7 @@
// t8 = | 0 | G1 | 0 | g1 |
// t2 = | 0 | R0 | 0 | r0 |
// t1 = | 0 | R1 | 0 | r1 |
-#define I422ToTransientMipsRGB \
+#define YUVTORGB \
"lw $t0, 0(%[y_buf]) \n" \
"lhu $t1, 0(%[u_buf]) \n" \
"lhu $t2, 0(%[v_buf]) \n" \
@@ -652,10 +652,12 @@
"addu.ph $t2, $t2, $s5 \n" \
"addu.ph $t1, $t1, $s5 \n"
+// TODO(fbarchard): accept yuv conversion constants.
void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) {
__asm__ __volatile__ (
".set push \n"
@@ -671,7 +673,7 @@
"ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
"1: \n"
- I422ToTransientMipsRGB
+ YUVTORGB
// Arranging into argb format
"precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1|
"precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0|
@@ -717,6 +719,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) {
__asm__ __volatile__ (
".set push \n"
@@ -732,7 +735,7 @@
"ori $s6, 0xff00 \n" // |ff|00|ff|00|
"1: \n"
- I422ToTransientMipsRGB
+ YUVTORGB
// Arranging into abgr format
"precr.qb.ph $t0, $t8, $t1 \n" // |G1|g1|R1|r1|
"precr.qb.ph $t3, $t9, $t2 \n" // |G0|g0|R0|r0|
@@ -778,6 +781,7 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
+ struct YuvConstants* yuvconstants,
int width) {
__asm__ __volatile__ (
".set push \n"
@@ -793,7 +797,7 @@
"ori $s6, 0xff \n" // |00|ff|00|ff|
"1: \n"
- I422ToTransientMipsRGB
+ YUVTORGB
// Arranging into bgra format
"precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1|
"precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0|
diff --git a/source/row_neon.cc b/source/row_neon.cc
index f7e6ba1..9052ed0 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -93,7 +93,7 @@
"vuzp.u8 d2, d3 \n" \
"vtrn.u32 d2, d3 \n"
-#define YUV422TORGB_SETUP_REG \
+#define YUVTORGB_SETUP \
MEMACCESS([kUVToRB]) \
"vld1.8 {d24}, [%[kUVToRB]] \n" \
MEMACCESS([kUVToG]) \
@@ -107,7 +107,7 @@
MEMACCESS([kYToRgb]) \
"vld1.32 {d30[], d31[]}, [%[kYToRgb]] \n"
-#define YUV422TORGB \
+#define YUVTORGB \
"vmull.u8 q8, d2, d24 \n" /* u/v B/R component */\
"vmull.u8 q9, d2, d25 \n" /* u/v G component */\
"vmovl.u8 q0, d0 \n" /* Y */\
@@ -138,12 +138,13 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV444
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(3)
@@ -154,26 +155,26 @@
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
- : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
- [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
- [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
-void I422ToARGBMatrixRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_argb,
- struct YuvConstantsNEON* YuvConstants,
- int width) {
+void I422ToARGBRow_NEON(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(3)
@@ -184,10 +185,10 @@
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
- : [kUVToRB]"r"(&YuvConstants->kUVToRB), // %5
- [kUVToG]"r"(&YuvConstants->kUVToG), // %6
- [kUVBiasBGR]"r"(&YuvConstants->kUVBiasBGR),
- [kYToRgb]"r"(&YuvConstants->kYToRgb)
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@@ -197,12 +198,13 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV411
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(3)
@@ -213,10 +215,10 @@
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
- : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
- [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
- [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@@ -226,12 +228,13 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_bgra,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
"vswp.u8 d20, d22 \n"
"vmov.u8 d19, #255 \n"
@@ -243,26 +246,26 @@
"+r"(src_v), // %2
"+r"(dst_bgra), // %3
"+r"(width) // %4
- : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
- [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
- [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
}
-void I422ToABGRMatrixRow_NEON(const uint8* src_y,
- const uint8* src_u,
- const uint8* src_v,
- uint8* dst_abgr,
- struct YuvConstantsNEON* YuvConstants,
- int width) {
+void I422ToABGRRow_NEON(const uint8* src_y,
+ const uint8* src_u,
+ const uint8* src_v,
+ uint8* dst_abgr,
+ struct YuvConstants* yuvconstants,
+ int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
"vswp.u8 d20, d22 \n"
"vmov.u8 d23, #255 \n"
@@ -274,10 +277,10 @@
"+r"(src_v), // %2
"+r"(dst_abgr), // %3
"+r"(width) // %4
- : [kUVToRB]"r"(&YuvConstants->kUVToRB), // %5
- [kUVToG]"r"(&YuvConstants->kUVToG), // %6
- [kUVBiasBGR]"r"(&YuvConstants->kUVBiasBGR),
- [kYToRgb]"r"(&YuvConstants->kYToRgb)
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@@ -287,12 +290,13 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
"vmov.u8 d19, #255 \n"
MEMACCESS(3)
@@ -303,10 +307,10 @@
"+r"(src_v), // %2
"+r"(dst_rgba), // %3
"+r"(width) // %4
- : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
- [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
- [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@@ -316,12 +320,13 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb24,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
MEMACCESS(3)
"vst3.8 {d20, d21, d22}, [%3]! \n"
@@ -331,10 +336,10 @@
"+r"(src_v), // %2
"+r"(dst_rgb24), // %3
"+r"(width) // %4
- : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
- [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
- [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@@ -344,12 +349,13 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_raw,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
"vswp.u8 d20, d22 \n"
MEMACCESS(3)
@@ -360,10 +366,10 @@
"+r"(src_v), // %2
"+r"(dst_raw), // %3
"+r"(width) // %4
- : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
- [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
- [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@@ -385,12 +391,13 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb565,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
ARGBTORGB565
MEMACCESS(3)
@@ -401,10 +408,10 @@
"+r"(src_v), // %2
"+r"(dst_rgb565), // %3
"+r"(width) // %4
- : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
- [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
- [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@@ -429,12 +436,13 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n"
ARGBTOARGB1555
@@ -446,10 +454,10 @@
"+r"(src_v), // %2
"+r"(dst_argb1555), // %3
"+r"(width) // %4
- : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
- [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
- [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@@ -468,13 +476,14 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"vmov.u8 d4, #0x0f \n" // bits to clear with vbic.
"1: \n"
READYUV422
- YUV422TORGB
+ YUVTORGB
"subs %4, %4, #8 \n"
"vmov.u8 d23, #255 \n"
ARGBTOARGB4444
@@ -486,10 +495,10 @@
"+r"(src_v), // %2
"+r"(dst_argb4444), // %3
"+r"(width) // %4
- : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %5
- [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %6
- [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@@ -499,10 +508,10 @@
uint8* dst_argb,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV400
- YUV422TORGB
+ YUVTORGB
"subs %2, %2, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(1)
@@ -511,10 +520,10 @@
: "+r"(src_y), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
- : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %3
- [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %4
- [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVToRB]"r"(&kYuvConstants.kUVToRB),
+ [kUVToG]"r"(&kYuvConstants.kUVToG),
+ [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@@ -545,12 +554,13 @@
void NV12ToARGBRow_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READNV12
- YUV422TORGB
+ YUVTORGB
"subs %3, %3, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(2)
@@ -560,37 +570,10 @@
"+r"(src_uv), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
- : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %4
- [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %5
- [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void NV21ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READNV21
- YUV422TORGB
- "subs %3, %3, #8 \n"
- "vmov.u8 d23, #255 \n"
- MEMACCESS(2)
- "vst4.8 {d20, d21, d22, d23}, [%2]! \n"
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %4
- [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %5
- [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@@ -599,12 +582,13 @@
void NV12ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_rgb565,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READNV12
- YUV422TORGB
+ YUVTORGB
"subs %3, %3, #8 \n"
ARGBTORGB565
MEMACCESS(2)
@@ -614,37 +598,10 @@
"+r"(src_uv), // %1
"+r"(dst_rgb565), // %2
"+r"(width) // %3
- : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %4
- [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %5
- [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
- : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
- "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
- );
-}
-
-void NV21ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READNV21
- YUV422TORGB
- "subs %3, %3, #8 \n"
- ARGBTORGB565
- MEMACCESS(2)
- "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565.
- "bgt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_rgb565), // %2
- "+r"(width) // %3
- : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %4
- [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %5
- [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@@ -652,12 +609,13 @@
void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUY2
- YUV422TORGB
+ YUVTORGB
"subs %2, %2, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(1)
@@ -666,10 +624,10 @@
: "+r"(src_yuy2), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
- : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %3
- [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %4
- [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
@@ -677,12 +635,13 @@
void UYVYToARGBRow_NEON(const uint8* src_uyvy,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READUYVY
- YUV422TORGB
+ YUVTORGB
"subs %2, %2, #8 \n"
"vmov.u8 d23, #255 \n"
MEMACCESS(1)
@@ -691,10 +650,10 @@
: "+r"(src_uyvy), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
- : [kUVToRB]"r"(&kYuvConstantsNEON.kUVToRB), // %3
- [kUVToG]"r"(&kYuvConstantsNEON.kUVToG), // %4
- [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVToRB]"r"(&yuvconstants->kUVToRB),
+ [kUVToG]"r"(&yuvconstants->kUVToG),
+ [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "q0", "q1", "q2", "q3", "q4",
"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
);
diff --git a/source/row_neon64.cc b/source/row_neon64.cc
index 668baef..ae7b32c 100644
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -91,7 +91,8 @@
"uzp2 v3.8b, v2.8b, v2.8b \n" \
"ins v1.s[1], v3.s[0] \n"
-#define YUV422TORGB_SETUP_REG \
+// TODO(fbarchard): replace movi with constants from struct.
+#define YUVTORGB_SETUP \
"ld1r {v24.8h}, [%[kUVBiasBGR]], #2 \n" \
"ld1r {v25.8h}, [%[kUVBiasBGR]], #2 \n" \
"ld1r {v26.8h}, [%[kUVBiasBGR]] \n" \
@@ -101,7 +102,7 @@
"movi v29.8h, #25 \n" \
"movi v30.8h, #52 \n"
-#define YUV422TORGB(vR, vG, vB) \
+#define YUVTORGB(vR, vG, vB) \
"uxtl v0.8h, v0.8b \n" /* Extract Y */ \
"shll v2.8h, v1.8b, #8 \n" /* Replicate UV */ \
"ushll2 v3.4s, v0.8h, #0 \n" /* Y */ \
@@ -143,12 +144,13 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV444
- YUV422TORGB(v22, v21, v20)
+ YUVTORGB(v22, v21, v20)
"subs %w4, %w4, #8 \n"
"movi v23.8b, #255 \n" /* A */
MEMACCESS(3)
@@ -159,8 +161,8 @@
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
- : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
+ [kYToRgb]"r"(&yuvconstants->kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@@ -173,12 +175,13 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB(v22, v21, v20)
+ YUVTORGB(v22, v21, v20)
"subs %w4, %w4, #8 \n"
"movi v23.8b, #255 \n" /* A */
MEMACCESS(3)
@@ -189,8 +192,8 @@
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
- : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@@ -202,12 +205,13 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV411
- YUV422TORGB(v22, v21, v20)
+ YUVTORGB(v22, v21, v20)
"subs %w4, %w4, #8 \n"
"movi v23.8b, #255 \n" /* A */
MEMACCESS(3)
@@ -218,8 +222,8 @@
"+r"(src_v), // %2
"+r"(dst_argb), // %3
"+r"(width) // %4
- : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@@ -231,12 +235,13 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_bgra,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB(v21, v22, v23)
+ YUVTORGB(v21, v22, v23)
"subs %w4, %w4, #8 \n"
"movi v20.8b, #255 \n" /* A */
MEMACCESS(3)
@@ -247,8 +252,8 @@
"+r"(src_v), // %2
"+r"(dst_bgra), // %3
"+r"(width) // %4
- : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@@ -261,12 +266,13 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_abgr,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB(v20, v21, v22)
+ YUVTORGB(v20, v21, v22)
"subs %w4, %w4, #8 \n"
"movi v23.8b, #255 \n" /* A */
MEMACCESS(3)
@@ -277,8 +283,8 @@
"+r"(src_v), // %2
"+r"(dst_abgr), // %3
"+r"(width) // %4
- : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@@ -290,12 +296,13 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgba,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB(v23, v22, v21)
+ YUVTORGB(v23, v22, v21)
"subs %w4, %w4, #8 \n"
"movi v20.8b, #255 \n" /* A */
MEMACCESS(3)
@@ -306,8 +313,8 @@
"+r"(src_v), // %2
"+r"(dst_rgba), // %3
"+r"(width) // %4
- : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@@ -319,12 +326,13 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb24,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB(v22, v21, v20)
+ YUVTORGB(v22, v21, v20)
"subs %w4, %w4, #8 \n"
MEMACCESS(3)
"st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n"
@@ -334,8 +342,8 @@
"+r"(src_v), // %2
"+r"(dst_rgb24), // %3
"+r"(width) // %4
- : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@@ -347,12 +355,13 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_raw,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB(v20, v21, v22)
+ YUVTORGB(v20, v21, v22)
"subs %w4, %w4, #8 \n"
MEMACCESS(3)
"st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n"
@@ -362,8 +371,8 @@
"+r"(src_v), // %2
"+r"(dst_raw), // %3
"+r"(width) // %4
- : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@@ -382,12 +391,13 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_rgb565,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB(v22, v21, v20)
+ YUVTORGB(v22, v21, v20)
"subs %w4, %w4, #8 \n"
ARGBTORGB565
MEMACCESS(3)
@@ -398,8 +408,8 @@
"+r"(src_v), // %2
"+r"(dst_rgb565), // %3
"+r"(width) // %4
- : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@@ -420,12 +430,13 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb1555,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV422
- YUV422TORGB(v22, v21, v20)
+ YUVTORGB(v22, v21, v20)
"subs %w4, %w4, #8 \n"
"movi v23.8b, #255 \n"
ARGBTOARGB1555
@@ -437,8 +448,8 @@
"+r"(src_v), // %2
"+r"(dst_argb1555), // %3
"+r"(width) // %4
- : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@@ -460,13 +471,14 @@
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb4444,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"movi v4.16b, #0x0f \n" // bits to clear with vbic.
"1: \n"
READYUV422
- YUV422TORGB(v22, v21, v20)
+ YUVTORGB(v22, v21, v20)
"subs %w4, %w4, #8 \n"
"movi v23.8b, #255 \n"
ARGBTOARGB4444
@@ -478,8 +490,8 @@
"+r"(src_v), // %2
"+r"(dst_argb4444), // %3
"+r"(width) // %4
- : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@@ -492,10 +504,10 @@
int width) {
int64 width64 = (int64)(width);
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUV400
- YUV422TORGB(v22, v21, v20)
+ YUVTORGB(v22, v21, v20)
"subs %w2, %w2, #8 \n"
"movi v23.8b, #255 \n"
MEMACCESS(1)
@@ -504,8 +516,8 @@
: "+r"(src_y), // %0
"+r"(dst_argb), // %1
"+r"(width64) // %2
- : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@@ -540,12 +552,13 @@
void NV12ToARGBRow_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READNV12
- YUV422TORGB(v22, v21, v20)
+ YUVTORGB(v22, v21, v20)
"subs %w3, %w3, #8 \n"
"movi v23.8b, #255 \n"
MEMACCESS(2)
@@ -555,51 +568,25 @@
"+r"(src_uv), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
- : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_NV12TOARGBROW_NEON
-#ifdef HAS_NV21TOARGBROW_NEON
-void NV21ToARGBRow_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_argb,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READNV21
- YUV422TORGB(v22, v21, v20)
- "subs %w3, %w3, #8 \n"
- "movi v23.8b, #255 \n"
- MEMACCESS(2)
- "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_argb), // %2
- "+r"(width) // %3
- : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_NV21TOARGBROW_NEON
-
#ifdef HAS_NV12TORGB565ROW_NEON
void NV12ToRGB565Row_NEON(const uint8* src_y,
const uint8* src_uv,
uint8* dst_rgb565,
+ struct YuvConstants* yuvconstants,
int width) {
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READNV12
- YUV422TORGB(v22, v21, v20)
+ YUVTORGB(v22, v21, v20)
"subs %w3, %w3, #8 \n"
ARGBTORGB565
MEMACCESS(2)
@@ -609,51 +596,25 @@
"+r"(src_uv), // %1
"+r"(dst_rgb565), // %2
"+r"(width) // %3
- : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
}
#endif // HAS_NV12TORGB565ROW_NEON
-#ifdef HAS_NV21TORGB565ROW_NEON
-void NV21ToRGB565Row_NEON(const uint8* src_y,
- const uint8* src_uv,
- uint8* dst_rgb565,
- int width) {
- asm volatile (
- YUV422TORGB_SETUP_REG
- "1: \n"
- READNV21
- YUV422TORGB(v22, v21, v20)
- "subs %w3, %w3, #8 \n"
- ARGBTORGB565
- MEMACCESS(2)
- "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565.
- "b.gt 1b \n"
- : "+r"(src_y), // %0
- "+r"(src_uv), // %1
- "+r"(dst_rgb565), // %2
- "+r"(width) // %3
- : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
- "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
- );
-}
-#endif // HAS_NV21TORGB565ROW_NEON
-
#ifdef HAS_YUY2TOARGBROW_NEON
void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
int64 width64 = (int64)(width);
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READYUY2
- YUV422TORGB(v22, v21, v20)
+ YUVTORGB(v22, v21, v20)
"subs %w2, %w2, #8 \n"
"movi v23.8b, #255 \n"
MEMACCESS(1)
@@ -662,8 +623,8 @@
: "+r"(src_yuy2), // %0
"+r"(dst_argb), // %1
"+r"(width64) // %2
- : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
@@ -673,13 +634,14 @@
#ifdef HAS_UYVYTOARGBROW_NEON
void UYVYToARGBRow_NEON(const uint8* src_uyvy,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
int64 width64 = (int64)(width);
asm volatile (
- YUV422TORGB_SETUP_REG
+ YUVTORGB_SETUP
"1: \n"
READUYVY
- YUV422TORGB(v22, v21, v20)
+ YUVTORGB(v22, v21, v20)
"subs %w2, %w2, #8 \n"
"movi v23.8b, #255 \n"
MEMACCESS(1)
@@ -688,8 +650,8 @@
: "+r"(src_uyvy), // %0
"+r"(dst_argb), // %1
"+r"(width64) // %2
- : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
- [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
+ : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
+ [kYToRgb]"r"(&kYuvConstants.kYToRgb)
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
"v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
);
diff --git a/source/row_win.cc b/source/row_win.cc
index f7a3105..f09d2a7 100644
--- a/source/row_win.cc
+++ b/source/row_win.cc
@@ -83,13 +83,13 @@
dst_argb += 32;
-#if defined(HAS_I422TOARGBMATRIXROW_SSSE3)
-void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width) {
+#if defined(HAS_I422TOARGBROW_SSSE3)
+void I422ToARGBRow_SSSE3(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width) {
__m128i xmm0, xmm1, xmm2, xmm3;
const __m128i xmm5 = _mm_set1_epi8(-1);
const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
@@ -102,13 +102,13 @@
}
#endif
-#if defined(HAS_I422TOABGRMATRIXROW_SSSE3)
-void I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width) {
+#if defined(HAS_I422TOABGRROW_SSSE3)
+void I422ToABGRRow_SSSE3(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width) {
__m128i xmm0, xmm1, xmm2, xmm3;
const __m128i xmm5 = _mm_set1_epi8(-1);
const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
@@ -1963,16 +1963,16 @@
__asm lea edx, [edx + 64] \
}
-#ifdef HAS_I422TOARGBMATRIXROW_AVX2
+#ifdef HAS_I422TOARGBROW_AVX2
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked)
-void I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width) {
+void I422ToARGBRow_AVX2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width) {
__asm {
push esi
push edi
@@ -2001,18 +2001,18 @@
ret
}
}
-#endif // HAS_I422TOARGBMATRIXROW_AVX2
+#endif // HAS_I422TOARGBROW_AVX2
-#ifdef HAS_I444TOARGBMATRIXROW_AVX2
+#ifdef HAS_I444TOARGBROW_AVX2
// 16 pixels
// 16 UV values with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked)
-void I444ToARGBMatrixRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width) {
+void I444ToARGBRow_AVX2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width) {
__asm {
push esi
push edi
@@ -2040,18 +2040,18 @@
ret
}
}
-#endif // HAS_I444TOARGBMATRIXROW_AVX2
+#endif // HAS_I444TOARGBROW_AVX2
-#ifdef HAS_I444TOABGRMATRIXROW_AVX2
+#ifdef HAS_I444TOABGRROW_AVX2
// 16 pixels
// 16 UV values with 16 Y producing 16 ABGR (64 bytes).
__declspec(naked)
-void I444ToABGRMatrixRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_abgr,
- struct YuvConstants* YuvConstants,
- int width) {
+void I444ToABGRRow_AVX2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_abgr,
+ struct YuvConstants* yuvconstants,
+ int width) {
__asm {
push esi
push edi
@@ -2079,7 +2079,7 @@
ret
}
}
-#endif // HAS_I444TOABGRMATRIXROW_AVX2
+#endif // HAS_I444TOABGRROW_AVX2
#ifdef HAS_I411TOARGBROW_AVX2
// 16 pixels
@@ -2089,26 +2089,30 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
+ push ebp
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // abgr
+ mov ebp, [esp + 12 + 20] // YuvConstants
+ mov ecx, [esp + 12 + 24] // width
sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop:
READYUV411_AVX2
- YUVTORGB_AVX2(kYuvConstants)
+ YUVTORGB_AVX2(ebp)
STOREARGB_AVX2
sub ecx, 16
jg convertloop
+ pop ebp
pop edi
pop esi
vzeroupper
@@ -2124,23 +2128,27 @@
void NV12ToARGBRow_AVX2(const uint8* y_buf,
const uint8* uv_buf,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
- mov eax, [esp + 4 + 4] // Y
- mov esi, [esp + 4 + 8] // UV
- mov edx, [esp + 4 + 12] // argb
- mov ecx, [esp + 4 + 16] // width
+ push ebp
+ mov eax, [esp + 8 + 4] // Y
+ mov esi, [esp + 8 + 8] // UV
+ mov edx, [esp + 8 + 12] // argb
+ mov ebp, [esp + 8 + 16] // YuvConstants
+ mov ecx, [esp + 8 + 20] // width
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop:
READNV12_AVX2
- YUVTORGB_AVX2(kYuvConstants)
+ YUVTORGB_AVX2(ebp)
STOREARGB_AVX2
sub ecx, 16
jg convertloop
+ pop ebp
pop esi
vzeroupper
ret
@@ -2148,37 +2156,6 @@
}
#endif // HAS_NV12TOARGBROW_AVX2
-#ifdef HAS_NV21TOARGBROW_AVX2
-// 16 pixels.
-// 8 VU values upsampled to 16 VU, mixed with 16 Y producing 16 ARGB (64 bytes).
-__declspec(naked)
-void NV21ToARGBRow_AVX2(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // Y
- mov esi, [esp + 4 + 8] // UV
- mov edx, [esp + 4 + 12] // argb
- mov ecx, [esp + 4 + 16] // width
- vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
-
- convertloop:
- READNV12_AVX2
- YUVTORGB_AVX2(kYvuConstants)
- STOREARGB_AVX2
-
- sub ecx, 16
- jg convertloop
-
- pop esi
- vzeroupper
- ret
- }
-}
-#endif // HAS_NV21TOARGBROW_AVX2
-
#ifdef HAS_I422TOBGRAROW_AVX2
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
@@ -2188,26 +2165,30 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
+ push ebp
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // abgr
+ mov ebp, [esp + 12 + 20] // YuvConstants
+ mov ecx, [esp + 12 + 24] // width
sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop:
READYUV422_AVX2
- YUVTORGB_AVX2(kYuvConstants)
+ YUVTORGB_AVX2(ebp)
STOREBGRA_AVX2
sub ecx, 16
jg convertloop
+ pop ebp
pop edi
pop esi
vzeroupper
@@ -2224,26 +2205,30 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // argb
- mov ecx, [esp + 8 + 20] // width
+ push ebp
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // abgr
+ mov ebp, [esp + 12 + 20] // YuvConstants
+ mov ecx, [esp + 12 + 24] // width
sub edi, esi
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
convertloop:
READYUV422_AVX2
- YUVTORGB_AVX2(kYuvConstants)
+ YUVTORGB_AVX2(ebp)
STORERGBA_AVX2
sub ecx, 16
jg convertloop
+ pop ebp
pop edi
pop esi
vzeroupper
@@ -2256,12 +2241,12 @@
// 16 pixels
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
__declspec(naked)
-void I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width) {
+void I422ToABGRRow_AVX2(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width) {
__asm {
push esi
push edi
@@ -2481,12 +2466,12 @@
// 8 pixels.
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked)
-void I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width) {
+void I444ToARGBRow_SSSE3(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width) {
__asm {
push esi
push edi
@@ -2518,12 +2503,12 @@
// 8 pixels.
// 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes).
__declspec(naked)
-void I444ToABGRMatrixRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_abgr,
- struct YuvConstants* YuvConstants,
- int width) {
+void I444ToABGRRow_SSSE3(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_abgr,
+ struct YuvConstants* yuvconstants,
+ int width) {
__asm {
push esi
push edi
@@ -2559,27 +2544,31 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_rgb24,
+ struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // rgb24
- mov ecx, [esp + 8 + 20] // width
+ push ebp
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebp, [esp + 12 + 20] // YuvConstants
+ mov ecx, [esp + 12 + 24] // width
sub edi, esi
movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0
movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24
convertloop:
READYUV422
- YUVTORGB(kYuvConstants)
+ YUVTORGB(ebp)
STORERGB24
sub ecx, 8
jg convertloop
+ pop ebp
pop edi
pop esi
ret
@@ -2593,27 +2582,31 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_raw,
+ struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // raw
- mov ecx, [esp + 8 + 20] // width
+ push ebp
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebp, [esp + 12 + 20] // YuvConstants
+ mov ecx, [esp + 12 + 24] // width
sub edi, esi
movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0
movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW
convertloop:
READYUV422
- YUVTORGB(kYuvConstants)
+ YUVTORGB(ebp)
STORERAW
sub ecx, 8
jg convertloop
+ pop ebp
pop edi
pop esi
ret
@@ -2627,15 +2620,18 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb565_buf,
+ struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // rgb565
- mov ecx, [esp + 8 + 20] // width
+ push ebp
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebp, [esp + 12 + 20] // YuvConstants
+ mov ecx, [esp + 12 + 24] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate mask 0x0000001f
psrld xmm5, 27
@@ -2647,12 +2643,13 @@
convertloop:
READYUV422
- YUVTORGB(kYuvConstants)
+ YUVTORGB(ebp)
STORERGB565
sub ecx, 8
jg convertloop
+ pop ebp
pop edi
pop esi
ret
@@ -2662,12 +2659,12 @@
// 8 pixels.
// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
__declspec(naked)
-void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_argb,
- struct YuvConstants* YuvConstants,
- int width) {
+void I422ToARGBRow_SSSE3(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
+ int width) {
__asm {
push esi
push edi
@@ -2704,30 +2701,32 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
__asm {
- push ebx
push esi
push edi
+ push ebp
mov eax, [esp + 12 + 4] // Y
mov esi, [esp + 12 + 8] // U
mov edi, [esp + 12 + 12] // V
- mov edx, [esp + 12 + 16] // argb
- mov ecx, [esp + 12 + 20] // width
+ mov edx, [esp + 12 + 16] // abgr
+ mov ebp, [esp + 12 + 20] // YuvConstants
+ mov ecx, [esp + 12 + 24] // width
sub edi, esi
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop:
- READYUV411 // modifies EBX
- YUVTORGB(kYuvConstants)
+ READYUV411
+ YUVTORGB(ebp)
STOREARGB
sub ecx, 8
jg convertloop
+ pop ebp
pop edi
pop esi
- pop ebx
ret
}
}
@@ -2738,51 +2737,27 @@
void NV12ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* uv_buf,
uint8* dst_argb,
+ struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
- mov eax, [esp + 4 + 4] // Y
- mov esi, [esp + 4 + 8] // UV
- mov edx, [esp + 4 + 12] // argb
- mov ecx, [esp + 4 + 16] // width
+ push ebp
+ mov eax, [esp + 8 + 4] // Y
+ mov esi, [esp + 8 + 8] // UV
+ mov edx, [esp + 8 + 12] // argb
+ mov ebp, [esp + 8 + 16] // YuvConstants
+ mov ecx, [esp + 8 + 20] // width
pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
convertloop:
READNV12
- YUVTORGB(kYuvConstants)
+ YUVTORGB(ebp)
STOREARGB
sub ecx, 8
jg convertloop
- pop esi
- ret
- }
-}
-
-// 8 pixels.
-// 4 VU values upsampled to 8 VU, mixed with 8 Y producing 8 ARGB (32 bytes).
-__declspec(naked)
-void NV21ToARGBRow_SSSE3(const uint8* y_buf,
- const uint8* uv_buf,
- uint8* dst_argb,
- int width) {
- __asm {
- push esi
- mov eax, [esp + 4 + 4] // Y
- mov esi, [esp + 4 + 8] // UV
- mov edx, [esp + 4 + 12] // argb
- mov ecx, [esp + 4 + 16] // width
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
-
- convertloop:
- READNV12
- YUVTORGB(kYvuConstants)
- STOREARGB
-
- sub ecx, 8
- jg convertloop
-
+ pop ebp
pop esi
ret
}
@@ -2793,25 +2768,29 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_bgra,
+ struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // bgra
- mov ecx, [esp + 8 + 20] // width
+ push ebp
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebp, [esp + 12 + 20] // YuvConstants
+ mov ecx, [esp + 12 + 24] // width
sub edi, esi
convertloop:
READYUV422
- YUVTORGB(kYuvConstants)
+ YUVTORGB(ebp)
STOREBGRA
sub ecx, 8
jg convertloop
+ pop ebp
pop edi
pop esi
ret
@@ -2819,12 +2798,12 @@
}
__declspec(naked)
-void I422ToABGRMatrixRow_SSSE3(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* dst_abgr,
- struct YuvConstants* YuvConstants,
- int width) {
+void I422ToABGRRow_SSSE3(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* dst_abgr,
+ struct YuvConstants* yuvconstants,
+ int width) {
__asm {
push esi
push edi
@@ -2858,31 +2837,34 @@
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_rgba,
+ struct YuvConstants* yuvconstants,
int width) {
__asm {
push esi
push edi
- mov eax, [esp + 8 + 4] // Y
- mov esi, [esp + 8 + 8] // U
- mov edi, [esp + 8 + 12] // V
- mov edx, [esp + 8 + 16] // rgba
- mov ecx, [esp + 8 + 20] // width
+ push ebp
+ mov eax, [esp + 12 + 4] // Y
+ mov esi, [esp + 12 + 8] // U
+ mov edi, [esp + 12 + 12] // V
+ mov edx, [esp + 12 + 16] // argb
+ mov ebp, [esp + 12 + 20] // YuvConstants
+ mov ecx, [esp + 12 + 24] // width
sub edi, esi
convertloop:
READYUV422
- YUVTORGB(kYuvConstants)
+ YUVTORGB(ebp)
STORERGBA
sub ecx, 8
jg convertloop
+ pop ebp
pop edi
pop esi
ret
}
}
-
#endif // HAS_I422TOARGBROW_SSSE3
#ifdef HAS_I400TOARGBROW_SSE2