Merge "DO NOT MERGE - external/libvpx/libwebm: Update snapshot" into klp-dev am: f2a8561 am: ba60a4c am: 99c1dbe -s ours am: 9ec14fb -s ours am: b554c80 am: c8208f7 am: ecc7810 -s ours am: c4bf8f3 am: 4382588 am: 1876c9d
am: fb95aca -s ours
* commit 'fb95acaa9540d1c3843bfba3a10a112674ffb1f5':
DO NOT MERGE - external/libvpx/libwebm: Update snapshot
Change-Id: Ie72588c7620ec2f0b7f8745f39ffa511e6481bb4
diff --git a/README.android b/README.android
index 5949fc6..36d716d 100644
--- a/README.android
+++ b/README.android
@@ -1,12 +1,12 @@
Name: libvpx
URL: http://www.webmproject.org
-Version: v1.4.0
+Version: v1.5.0
License: BSD
License File: libvpx/LICENSE
-Date: Tuesday August 25 2015
-Branch: origin/master
-Commit: 7105df53d7dc13d5e575bc8df714ec8d1da36b06
+Date: Thursday November 19 2015
+Branch: javanwhistlingduck
+Commit: cbecf57f3e0d85a7b7f97f3ab7c507f6fe640a93
Description:
Contains the sources used to compile libvpx.
diff --git a/README.version b/README.version
new file mode 100644
index 0000000..48e6229
--- /dev/null
+++ b/README.version
@@ -0,0 +1,4 @@
+URL: https://storage.googleapis.com/downloads.webmproject.org/releases/webm/libvpx-1.5.0.tar.bz2
+Version: 1.5.0
+BugComponent: 42195
+Owners: johannkoenig
diff --git a/config/arm-neon/libvpx_srcs.txt b/config/arm-neon/libvpx_srcs.txt
index 9d5084c..bdeae07 100644
--- a/config/arm-neon/libvpx_srcs.txt
+++ b/config/arm-neon/libvpx_srcs.txt
@@ -14,7 +14,6 @@
vp8/common/arm/armv6/filter_v6.asm
vp8/common/arm/armv6/idct_blk_v6.c
vp8/common/arm/armv6/idct_v6.asm
-vp8/common/arm/armv6/intra4x4_predict_v6.asm
vp8/common/arm/armv6/iwalsh_v6.asm
vp8/common/arm/armv6/loopfilter_v6.asm
vp8/common/arm/armv6/simpleloopfilter_v6.asm
@@ -36,7 +35,6 @@
vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c
vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c
vp8/common/arm/neon/mbloopfilter_neon.c
-vp8/common/arm/neon/reconintra_neon.c
vp8/common/arm/neon/shortidct4x4llm_neon.c
vp8/common/arm/neon/sixtappredict_neon.c
vp8/common/arm/neon/vp8_loopfilter_neon.c
@@ -80,6 +78,7 @@
vp8/common/reconinter.c
vp8/common/reconinter.h
vp8/common/reconintra.c
+vp8/common/reconintra.h
vp8/common/reconintra4x4.c
vp8/common/reconintra4x4.h
vp8/common/rtcd.c
@@ -298,6 +297,7 @@
vp9/vp9_common.mk
vp9/vp9_cx_iface.c
vp9/vp9_dx_iface.c
+vp9/vp9_dx_iface.h
vp9/vp9_iface_common.h
vp9/vp9cx.mk
vp9/vp9dx.mk
diff --git a/config/arm-neon/vp8_rtcd.h b/config/arm-neon/vp8_rtcd.h
index 0b836c4..6fd2dac 100644
--- a/config/arm-neon/vp8_rtcd.h
+++ b/config/arm-neon/vp8_rtcd.h
@@ -48,14 +48,6 @@
int vp8_block_error_c(short *coeff, short *dqcoeff);
#define vp8_block_error vp8_block_error_c
-void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride);
-void vp8_build_intra_predictors_mbuv_s_neon(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride);
-#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_neon
-
-void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride);
-void vp8_build_intra_predictors_mby_s_neon(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride);
-#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_neon
-
void vp8_clear_system_state_c();
#define vp8_clear_system_state vp8_clear_system_state_c
@@ -117,10 +109,6 @@
int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
#define vp8_full_search_sad vp8_full_search_sad_c
-void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
-void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
-#define vp8_intra4x4_predict vp8_intra4x4_predict_armv6
-
void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bh_armv6(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bh_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
diff --git a/config/arm-neon/vpx_config.asm b/config/arm-neon/vpx_config.asm
index 6f03266..5b623b8 100644
--- a/config/arm-neon/vpx_config.asm
+++ b/config/arm-neon/vpx_config.asm
@@ -28,7 +28,7 @@
.equ HAVE_UNISTD_H , 1
.equ CONFIG_DEPENDENCY_TRACKING , 1
.equ CONFIG_EXTERNAL_BUILD , 1
-.equ CONFIG_INSTALL_DOCS , 1
+.equ CONFIG_INSTALL_DOCS , 0
.equ CONFIG_INSTALL_BINS , 1
.equ CONFIG_INSTALL_LIBS , 1
.equ CONFIG_INSTALL_SRCS , 0
@@ -86,4 +86,5 @@
.equ CONFIG_SPATIAL_SVC , 0
.equ CONFIG_FP_MB_STATS , 0
.equ CONFIG_EMULATE_HARDWARE , 0
+.equ CONFIG_MISC_FIXES , 0
.section .note.GNU-stack,"",%progbits
diff --git a/config/arm-neon/vpx_config.h b/config/arm-neon/vpx_config.h
index 8d02c25..d9d5f1c 100644
--- a/config/arm-neon/vpx_config.h
+++ b/config/arm-neon/vpx_config.h
@@ -37,7 +37,7 @@
#define HAVE_UNISTD_H 1
#define CONFIG_DEPENDENCY_TRACKING 1
#define CONFIG_EXTERNAL_BUILD 1
-#define CONFIG_INSTALL_DOCS 1
+#define CONFIG_INSTALL_DOCS 0
#define CONFIG_INSTALL_BINS 1
#define CONFIG_INSTALL_LIBS 1
#define CONFIG_INSTALL_SRCS 0
@@ -95,4 +95,5 @@
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_FP_MB_STATS 0
#define CONFIG_EMULATE_HARDWARE 0
+#define CONFIG_MISC_FIXES 0
#endif /* VPX_CONFIG_H */
diff --git a/config/arm-neon/vpx_dsp_rtcd.h b/config/arm-neon/vpx_dsp_rtcd.h
index 4de075d..ccb5df4 100644
--- a/config/arm-neon/vpx_dsp_rtcd.h
+++ b/config/arm-neon/vpx_dsp_rtcd.h
@@ -103,6 +103,18 @@
void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c
+void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c
+
+void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c
+
+void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c
+
+void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c
+
void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_neon
@@ -118,6 +130,18 @@
void vpx_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_neon
+void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c
+
+void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c
+
+void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
+
+void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c
+
void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c
@@ -130,6 +154,21 @@
void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c
+void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c
+
+void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c
+
+void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
+
+void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c
+
+void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c
+
void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_neon
@@ -254,6 +293,9 @@
void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_h_predictor_8x8 vpx_h_predictor_8x8_neon
+void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
+
void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct16x16_10_add vpx_idct16x16_10_add_neon
@@ -743,6 +785,9 @@
uint32_t vpx_variance_halfpixvar16x16_v_media(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
#define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_media
+void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
+
void vpx_dsp_rtcd(void);
#include "vpx_config.h"
diff --git a/config/arm-neon/vpx_version.h b/config/arm-neon/vpx_version.h
index bce0381..3b6ea1e 100644
--- a/config/arm-neon/vpx_version.h
+++ b/config/arm-neon/vpx_version.h
@@ -1,7 +1,7 @@
#define VERSION_MAJOR 1
-#define VERSION_MINOR 4
+#define VERSION_MINOR 5
#define VERSION_PATCH 0
#define VERSION_EXTRA ""
#define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH))
-#define VERSION_STRING_NOSP "v1.4.0"
-#define VERSION_STRING " v1.4.0"
+#define VERSION_STRING_NOSP "v1.5.0"
+#define VERSION_STRING " v1.5.0"
diff --git a/config/arm/libvpx_srcs.txt b/config/arm/libvpx_srcs.txt
index 53c4fda..46a3c60 100644
--- a/config/arm/libvpx_srcs.txt
+++ b/config/arm/libvpx_srcs.txt
@@ -14,7 +14,6 @@
vp8/common/arm/armv6/filter_v6.asm
vp8/common/arm/armv6/idct_blk_v6.c
vp8/common/arm/armv6/idct_v6.asm
-vp8/common/arm/armv6/intra4x4_predict_v6.asm
vp8/common/arm/armv6/iwalsh_v6.asm
vp8/common/arm/armv6/loopfilter_v6.asm
vp8/common/arm/armv6/simpleloopfilter_v6.asm
@@ -64,6 +63,7 @@
vp8/common/reconinter.c
vp8/common/reconinter.h
vp8/common/reconintra.c
+vp8/common/reconintra.h
vp8/common/reconintra4x4.c
vp8/common/reconintra4x4.h
vp8/common/rtcd.c
@@ -272,6 +272,7 @@
vp9/vp9_common.mk
vp9/vp9_cx_iface.c
vp9/vp9_dx_iface.c
+vp9/vp9_dx_iface.h
vp9/vp9_iface_common.h
vp9/vp9cx.mk
vp9/vp9dx.mk
diff --git a/config/arm/vp8_rtcd.h b/config/arm/vp8_rtcd.h
index 7c2cefd..f7287a5 100644
--- a/config/arm/vp8_rtcd.h
+++ b/config/arm/vp8_rtcd.h
@@ -45,12 +45,6 @@
int vp8_block_error_c(short *coeff, short *dqcoeff);
#define vp8_block_error vp8_block_error_c
-void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride);
-#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_c
-
-void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride);
-#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_c
-
void vp8_clear_system_state_c();
#define vp8_clear_system_state vp8_clear_system_state_c
@@ -101,10 +95,6 @@
int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
#define vp8_full_search_sad vp8_full_search_sad_c
-void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
-void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
-#define vp8_intra4x4_predict vp8_intra4x4_predict_armv6
-
void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bh_armv6(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
#define vp8_loop_filter_bh vp8_loop_filter_bh_armv6
diff --git a/config/arm/vpx_config.asm b/config/arm/vpx_config.asm
index 2a69621..992fdee 100644
--- a/config/arm/vpx_config.asm
+++ b/config/arm/vpx_config.asm
@@ -28,7 +28,7 @@
.equ HAVE_UNISTD_H , 1
.equ CONFIG_DEPENDENCY_TRACKING , 1
.equ CONFIG_EXTERNAL_BUILD , 1
-.equ CONFIG_INSTALL_DOCS , 1
+.equ CONFIG_INSTALL_DOCS , 0
.equ CONFIG_INSTALL_BINS , 1
.equ CONFIG_INSTALL_LIBS , 1
.equ CONFIG_INSTALL_SRCS , 0
@@ -86,4 +86,5 @@
.equ CONFIG_SPATIAL_SVC , 0
.equ CONFIG_FP_MB_STATS , 0
.equ CONFIG_EMULATE_HARDWARE , 0
+.equ CONFIG_MISC_FIXES , 0
.section .note.GNU-stack,"",%progbits
diff --git a/config/arm/vpx_config.h b/config/arm/vpx_config.h
index 62b6285..d6d2809 100644
--- a/config/arm/vpx_config.h
+++ b/config/arm/vpx_config.h
@@ -37,7 +37,7 @@
#define HAVE_UNISTD_H 1
#define CONFIG_DEPENDENCY_TRACKING 1
#define CONFIG_EXTERNAL_BUILD 1
-#define CONFIG_INSTALL_DOCS 1
+#define CONFIG_INSTALL_DOCS 0
#define CONFIG_INSTALL_BINS 1
#define CONFIG_INSTALL_LIBS 1
#define CONFIG_INSTALL_SRCS 0
@@ -95,4 +95,5 @@
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_FP_MB_STATS 0
#define CONFIG_EMULATE_HARDWARE 0
+#define CONFIG_MISC_FIXES 0
#endif /* VPX_CONFIG_H */
diff --git a/config/arm/vpx_dsp_rtcd.h b/config/arm/vpx_dsp_rtcd.h
index bb570a0..ce2aeac 100644
--- a/config/arm/vpx_dsp_rtcd.h
+++ b/config/arm/vpx_dsp_rtcd.h
@@ -94,6 +94,18 @@
void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c
+void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c
+
+void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c
+
+void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c
+
+void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c
+
void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_c
@@ -106,6 +118,18 @@
void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_c
+void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c
+
+void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c
+
+void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
+
+void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c
+
void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c
@@ -118,6 +142,21 @@
void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c
+void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c
+
+void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c
+
+void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
+
+void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c
+
+void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c
+
void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_c
@@ -217,6 +256,9 @@
void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_h_predictor_8x8 vpx_h_predictor_8x8_c
+void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
+
void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct16x16_10_add vpx_idct16x16_10_add_c
@@ -652,6 +694,9 @@
uint32_t vpx_variance_halfpixvar16x16_v_media(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
#define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_media
+void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
+
void vpx_dsp_rtcd(void);
#include "vpx_config.h"
diff --git a/config/arm/vpx_version.h b/config/arm/vpx_version.h
index bce0381..3b6ea1e 100644
--- a/config/arm/vpx_version.h
+++ b/config/arm/vpx_version.h
@@ -1,7 +1,7 @@
#define VERSION_MAJOR 1
-#define VERSION_MINOR 4
+#define VERSION_MINOR 5
#define VERSION_PATCH 0
#define VERSION_EXTRA ""
#define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH))
-#define VERSION_STRING_NOSP "v1.4.0"
-#define VERSION_STRING " v1.4.0"
+#define VERSION_STRING_NOSP "v1.5.0"
+#define VERSION_STRING " v1.5.0"
diff --git a/config/arm64/libvpx_srcs.txt b/config/arm64/libvpx_srcs.txt
index 483ffbb..9770560 100644
--- a/config/arm64/libvpx_srcs.txt
+++ b/config/arm64/libvpx_srcs.txt
@@ -19,7 +19,6 @@
vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c
vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c
vp8/common/arm/neon/mbloopfilter_neon.c
-vp8/common/arm/neon/reconintra_neon.c
vp8/common/arm/neon/shortidct4x4llm_neon.c
vp8/common/arm/neon/sixtappredict_neon.c
vp8/common/arm/neon/vp8_loopfilter_neon.c
@@ -63,6 +62,7 @@
vp8/common/reconinter.c
vp8/common/reconinter.h
vp8/common/reconintra.c
+vp8/common/reconintra.h
vp8/common/reconintra4x4.c
vp8/common/reconintra4x4.h
vp8/common/rtcd.c
@@ -279,6 +279,7 @@
vp9/vp9_common.mk
vp9/vp9_cx_iface.c
vp9/vp9_dx_iface.c
+vp9/vp9_dx_iface.h
vp9/vp9_iface_common.h
vp9/vp9cx.mk
vp9/vp9dx.mk
diff --git a/config/arm64/vp8_rtcd.h b/config/arm64/vp8_rtcd.h
index 1f37629..5ab06f4 100644
--- a/config/arm64/vp8_rtcd.h
+++ b/config/arm64/vp8_rtcd.h
@@ -44,14 +44,6 @@
int vp8_block_error_c(short *coeff, short *dqcoeff);
#define vp8_block_error vp8_block_error_c
-void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride);
-void vp8_build_intra_predictors_mbuv_s_neon(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride);
-#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_neon
-
-void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride);
-void vp8_build_intra_predictors_mby_s_neon(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride);
-#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_neon
-
void vp8_clear_system_state_c();
#define vp8_clear_system_state vp8_clear_system_state_c
@@ -105,9 +97,6 @@
int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
#define vp8_full_search_sad vp8_full_search_sad_c
-void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
-#define vp8_intra4x4_predict vp8_intra4x4_predict_c
-
void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bh_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
#define vp8_loop_filter_bh vp8_loop_filter_bh_neon
diff --git a/config/arm64/vpx_config.asm b/config/arm64/vpx_config.asm
index b6c1a52..d7d6652 100644
--- a/config/arm64/vpx_config.asm
+++ b/config/arm64/vpx_config.asm
@@ -28,7 +28,7 @@
.equ HAVE_UNISTD_H , 1
.equ CONFIG_DEPENDENCY_TRACKING , 1
.equ CONFIG_EXTERNAL_BUILD , 1
-.equ CONFIG_INSTALL_DOCS , 1
+.equ CONFIG_INSTALL_DOCS , 0
.equ CONFIG_INSTALL_BINS , 1
.equ CONFIG_INSTALL_LIBS , 1
.equ CONFIG_INSTALL_SRCS , 0
@@ -86,4 +86,5 @@
.equ CONFIG_SPATIAL_SVC , 0
.equ CONFIG_FP_MB_STATS , 0
.equ CONFIG_EMULATE_HARDWARE , 0
+.equ CONFIG_MISC_FIXES , 0
.section .note.GNU-stack,"",%progbits
diff --git a/config/arm64/vpx_config.h b/config/arm64/vpx_config.h
index fb0eabc..981aa3e 100644
--- a/config/arm64/vpx_config.h
+++ b/config/arm64/vpx_config.h
@@ -37,7 +37,7 @@
#define HAVE_UNISTD_H 1
#define CONFIG_DEPENDENCY_TRACKING 1
#define CONFIG_EXTERNAL_BUILD 1
-#define CONFIG_INSTALL_DOCS 1
+#define CONFIG_INSTALL_DOCS 0
#define CONFIG_INSTALL_BINS 1
#define CONFIG_INSTALL_LIBS 1
#define CONFIG_INSTALL_SRCS 0
@@ -95,4 +95,5 @@
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_FP_MB_STATS 0
#define CONFIG_EMULATE_HARDWARE 0
+#define CONFIG_MISC_FIXES 0
#endif /* VPX_CONFIG_H */
diff --git a/config/arm64/vpx_dsp_rtcd.h b/config/arm64/vpx_dsp_rtcd.h
index 2cac9e6..e5fa148 100644
--- a/config/arm64/vpx_dsp_rtcd.h
+++ b/config/arm64/vpx_dsp_rtcd.h
@@ -103,6 +103,18 @@
void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c
+void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c
+
+void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c
+
+void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c
+
+void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c
+
void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_neon
@@ -118,6 +130,18 @@
void vpx_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_neon
+void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c
+
+void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c
+
+void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
+
+void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c
+
void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c
@@ -130,6 +154,21 @@
void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c
+void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c
+
+void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c
+
+void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
+
+void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c
+
+void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c
+
void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_neon
@@ -254,6 +293,9 @@
void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_h_predictor_8x8 vpx_h_predictor_8x8_neon
+void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
+
void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct16x16_10_add vpx_idct16x16_10_add_neon
@@ -728,6 +770,9 @@
uint32_t vpx_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
#define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_c
+void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
+
void vpx_dsp_rtcd(void);
#include "vpx_config.h"
diff --git a/config/arm64/vpx_version.h b/config/arm64/vpx_version.h
index bce0381..3b6ea1e 100644
--- a/config/arm64/vpx_version.h
+++ b/config/arm64/vpx_version.h
@@ -1,7 +1,7 @@
#define VERSION_MAJOR 1
-#define VERSION_MINOR 4
+#define VERSION_MINOR 5
#define VERSION_PATCH 0
#define VERSION_EXTRA ""
#define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH))
-#define VERSION_STRING_NOSP "v1.4.0"
-#define VERSION_STRING " v1.4.0"
+#define VERSION_STRING_NOSP "v1.5.0"
+#define VERSION_STRING " v1.5.0"
diff --git a/config/generic/libvpx_srcs.txt b/config/generic/libvpx_srcs.txt
index f6e76f0..212026b 100644
--- a/config/generic/libvpx_srcs.txt
+++ b/config/generic/libvpx_srcs.txt
@@ -44,6 +44,7 @@
vp8/common/reconinter.c
vp8/common/reconinter.h
vp8/common/reconintra.c
+vp8/common/reconintra.h
vp8/common/reconintra4x4.c
vp8/common/reconintra4x4.h
vp8/common/rtcd.c
@@ -248,6 +249,7 @@
vp9/vp9_common.mk
vp9/vp9_cx_iface.c
vp9/vp9_dx_iface.c
+vp9/vp9_dx_iface.h
vp9/vp9_iface_common.h
vp9/vp9cx.mk
vp9/vp9dx.mk
diff --git a/config/generic/vp8_rtcd.h b/config/generic/vp8_rtcd.h
index f5424bb..bad72a3 100644
--- a/config/generic/vp8_rtcd.h
+++ b/config/generic/vp8_rtcd.h
@@ -41,12 +41,6 @@
int vp8_block_error_c(short *coeff, short *dqcoeff);
#define vp8_block_error vp8_block_error_c
-void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride);
-#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_c
-
-void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride);
-#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_c
-
void vp8_clear_system_state_c();
#define vp8_clear_system_state vp8_clear_system_state_c
@@ -89,9 +83,6 @@
int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
#define vp8_full_search_sad vp8_full_search_sad_c
-void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
-#define vp8_intra4x4_predict vp8_intra4x4_predict_c
-
void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
#define vp8_loop_filter_bh vp8_loop_filter_bh_c
diff --git a/config/generic/vpx_config.asm b/config/generic/vpx_config.asm
index b684cd2..c3530a2 100644
--- a/config/generic/vpx_config.asm
+++ b/config/generic/vpx_config.asm
@@ -28,7 +28,7 @@
.equ HAVE_UNISTD_H , 1
.equ CONFIG_DEPENDENCY_TRACKING , 1
.equ CONFIG_EXTERNAL_BUILD , 1
-.equ CONFIG_INSTALL_DOCS , 1
+.equ CONFIG_INSTALL_DOCS , 0
.equ CONFIG_INSTALL_BINS , 1
.equ CONFIG_INSTALL_LIBS , 1
.equ CONFIG_INSTALL_SRCS , 0
@@ -86,4 +86,5 @@
.equ CONFIG_SPATIAL_SVC , 0
.equ CONFIG_FP_MB_STATS , 0
.equ CONFIG_EMULATE_HARDWARE , 0
+.equ CONFIG_MISC_FIXES , 0
.section .note.GNU-stack,"",%progbits
diff --git a/config/generic/vpx_config.h b/config/generic/vpx_config.h
index 9cdca1f..50da704 100644
--- a/config/generic/vpx_config.h
+++ b/config/generic/vpx_config.h
@@ -37,7 +37,7 @@
#define HAVE_UNISTD_H 1
#define CONFIG_DEPENDENCY_TRACKING 1
#define CONFIG_EXTERNAL_BUILD 1
-#define CONFIG_INSTALL_DOCS 1
+#define CONFIG_INSTALL_DOCS 0
#define CONFIG_INSTALL_BINS 1
#define CONFIG_INSTALL_LIBS 1
#define CONFIG_INSTALL_SRCS 0
@@ -95,4 +95,5 @@
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_FP_MB_STATS 0
#define CONFIG_EMULATE_HARDWARE 0
+#define CONFIG_MISC_FIXES 0
#endif /* VPX_CONFIG_H */
diff --git a/config/generic/vpx_dsp_rtcd.h b/config/generic/vpx_dsp_rtcd.h
index 010cbe7..f4929ee 100644
--- a/config/generic/vpx_dsp_rtcd.h
+++ b/config/generic/vpx_dsp_rtcd.h
@@ -94,6 +94,18 @@
void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c
+void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c
+
+void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c
+
+void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c
+
+void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c
+
void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_c
@@ -106,6 +118,18 @@
void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_c
+void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c
+
+void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c
+
+void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
+
+void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c
+
void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c
@@ -118,6 +142,21 @@
void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c
+void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c
+
+void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c
+
+void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
+
+void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c
+
+void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c
+
void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_c
@@ -217,6 +256,9 @@
void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_h_predictor_8x8 vpx_h_predictor_8x8_c
+void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
+
void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct16x16_10_add vpx_idct16x16_10_add_c
@@ -643,6 +685,9 @@
uint32_t vpx_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
#define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_c
+void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
+
void vpx_dsp_rtcd(void);
#include "vpx_config.h"
diff --git a/config/generic/vpx_version.h b/config/generic/vpx_version.h
index bce0381..3b6ea1e 100644
--- a/config/generic/vpx_version.h
+++ b/config/generic/vpx_version.h
@@ -1,7 +1,7 @@
#define VERSION_MAJOR 1
-#define VERSION_MINOR 4
+#define VERSION_MINOR 5
#define VERSION_PATCH 0
#define VERSION_EXTRA ""
#define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH))
-#define VERSION_STRING_NOSP "v1.4.0"
-#define VERSION_STRING " v1.4.0"
+#define VERSION_STRING_NOSP "v1.5.0"
+#define VERSION_STRING " v1.5.0"
diff --git a/config/mips32-dspr2/libvpx_srcs.txt b/config/mips32-dspr2/libvpx_srcs.txt
index 9ea5ede..452c0a3 100644
--- a/config/mips32-dspr2/libvpx_srcs.txt
+++ b/config/mips32-dspr2/libvpx_srcs.txt
@@ -50,6 +50,7 @@
vp8/common/reconinter.c
vp8/common/reconinter.h
vp8/common/reconintra.c
+vp8/common/reconintra.h
vp8/common/reconintra4x4.c
vp8/common/reconintra4x4.h
vp8/common/rtcd.c
@@ -257,6 +258,7 @@
vp9/vp9_common.mk
vp9/vp9_cx_iface.c
vp9/vp9_dx_iface.c
+vp9/vp9_dx_iface.h
vp9/vp9_iface_common.h
vp9/vp9cx.mk
vp9/vp9dx.mk
diff --git a/config/mips32-dspr2/vp8_rtcd.h b/config/mips32-dspr2/vp8_rtcd.h
index 4442f6a..03d3f0c 100644
--- a/config/mips32-dspr2/vp8_rtcd.h
+++ b/config/mips32-dspr2/vp8_rtcd.h
@@ -41,12 +41,6 @@
int vp8_block_error_c(short *coeff, short *dqcoeff);
#define vp8_block_error vp8_block_error_c
-void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride);
-#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_c
-
-void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride);
-#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_c
-
void vp8_clear_system_state_c();
#define vp8_clear_system_state vp8_clear_system_state_c
@@ -96,9 +90,6 @@
int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
#define vp8_full_search_sad vp8_full_search_sad_c
-void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
-#define vp8_intra4x4_predict vp8_intra4x4_predict_c
-
void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bh_dspr2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
#define vp8_loop_filter_bh vp8_loop_filter_bh_dspr2
diff --git a/config/mips32-dspr2/vpx_config.h b/config/mips32-dspr2/vpx_config.h
index f0a0556..4e8961c 100644
--- a/config/mips32-dspr2/vpx_config.h
+++ b/config/mips32-dspr2/vpx_config.h
@@ -37,7 +37,7 @@
#define HAVE_UNISTD_H 1
#define CONFIG_DEPENDENCY_TRACKING 1
#define CONFIG_EXTERNAL_BUILD 1
-#define CONFIG_INSTALL_DOCS 1
+#define CONFIG_INSTALL_DOCS 0
#define CONFIG_INSTALL_BINS 1
#define CONFIG_INSTALL_LIBS 1
#define CONFIG_INSTALL_SRCS 0
@@ -95,4 +95,5 @@
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_FP_MB_STATS 0
#define CONFIG_EMULATE_HARDWARE 0
+#define CONFIG_MISC_FIXES 0
#endif /* VPX_CONFIG_H */
diff --git a/config/mips32-dspr2/vpx_dsp_rtcd.h b/config/mips32-dspr2/vpx_dsp_rtcd.h
index b716181..7acb807 100644
--- a/config/mips32-dspr2/vpx_dsp_rtcd.h
+++ b/config/mips32-dspr2/vpx_dsp_rtcd.h
@@ -102,6 +102,18 @@
void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c
+void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c
+
+void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c
+
+void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c
+
+void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c
+
void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_c
@@ -114,6 +126,18 @@
void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_c
+void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c
+
+void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c
+
+void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
+
+void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c
+
void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c
@@ -126,6 +150,21 @@
void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c
+void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c
+
+void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c
+
+void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
+
+void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c
+
+void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c
+
void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_c
@@ -231,6 +270,9 @@
void vpx_h_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_h_predictor_8x8 vpx_h_predictor_8x8_dspr2
+void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
+
void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct16x16_10_add_dspr2(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct16x16_10_add vpx_idct16x16_10_add_dspr2
@@ -681,6 +723,9 @@
uint32_t vpx_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
#define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_c
+void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
+
void vpx_dsp_rtcd(void);
#include "vpx_config.h"
diff --git a/config/mips32-dspr2/vpx_version.h b/config/mips32-dspr2/vpx_version.h
index bce0381..3b6ea1e 100644
--- a/config/mips32-dspr2/vpx_version.h
+++ b/config/mips32-dspr2/vpx_version.h
@@ -1,7 +1,7 @@
#define VERSION_MAJOR 1
-#define VERSION_MINOR 4
+#define VERSION_MINOR 5
#define VERSION_PATCH 0
#define VERSION_EXTRA ""
#define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH))
-#define VERSION_STRING_NOSP "v1.4.0"
-#define VERSION_STRING " v1.4.0"
+#define VERSION_STRING_NOSP "v1.5.0"
+#define VERSION_STRING " v1.5.0"
diff --git a/config/mips32/libvpx_srcs.txt b/config/mips32/libvpx_srcs.txt
index f6e76f0..212026b 100644
--- a/config/mips32/libvpx_srcs.txt
+++ b/config/mips32/libvpx_srcs.txt
@@ -44,6 +44,7 @@
vp8/common/reconinter.c
vp8/common/reconinter.h
vp8/common/reconintra.c
+vp8/common/reconintra.h
vp8/common/reconintra4x4.c
vp8/common/reconintra4x4.h
vp8/common/rtcd.c
@@ -248,6 +249,7 @@
vp9/vp9_common.mk
vp9/vp9_cx_iface.c
vp9/vp9_dx_iface.c
+vp9/vp9_dx_iface.h
vp9/vp9_iface_common.h
vp9/vp9cx.mk
vp9/vp9dx.mk
diff --git a/config/mips32/vp8_rtcd.h b/config/mips32/vp8_rtcd.h
index 28e23b3..791c155 100644
--- a/config/mips32/vp8_rtcd.h
+++ b/config/mips32/vp8_rtcd.h
@@ -41,12 +41,6 @@
int vp8_block_error_c(short *coeff, short *dqcoeff);
#define vp8_block_error vp8_block_error_c
-void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride);
-#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_c
-
-void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride);
-#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_c
-
void vp8_clear_system_state_c();
#define vp8_clear_system_state vp8_clear_system_state_c
@@ -89,9 +83,6 @@
int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
#define vp8_full_search_sad vp8_full_search_sad_c
-void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
-#define vp8_intra4x4_predict vp8_intra4x4_predict_c
-
void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
#define vp8_loop_filter_bh vp8_loop_filter_bh_c
diff --git a/config/mips32/vpx_config.h b/config/mips32/vpx_config.h
index 1bc7afa..82c9cf5 100644
--- a/config/mips32/vpx_config.h
+++ b/config/mips32/vpx_config.h
@@ -37,7 +37,7 @@
#define HAVE_UNISTD_H 1
#define CONFIG_DEPENDENCY_TRACKING 1
#define CONFIG_EXTERNAL_BUILD 1
-#define CONFIG_INSTALL_DOCS 1
+#define CONFIG_INSTALL_DOCS 0
#define CONFIG_INSTALL_BINS 1
#define CONFIG_INSTALL_LIBS 1
#define CONFIG_INSTALL_SRCS 0
@@ -95,4 +95,5 @@
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_FP_MB_STATS 0
#define CONFIG_EMULATE_HARDWARE 0
+#define CONFIG_MISC_FIXES 0
#endif /* VPX_CONFIG_H */
diff --git a/config/mips32/vpx_dsp_rtcd.h b/config/mips32/vpx_dsp_rtcd.h
index cff36af..2d2bec2 100644
--- a/config/mips32/vpx_dsp_rtcd.h
+++ b/config/mips32/vpx_dsp_rtcd.h
@@ -94,6 +94,18 @@
void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c
+void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c
+
+void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c
+
+void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c
+
+void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c
+
void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_c
@@ -106,6 +118,18 @@
void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_c
+void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c
+
+void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c
+
+void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
+
+void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c
+
void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c
@@ -118,6 +142,21 @@
void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c
+void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c
+
+void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c
+
+void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
+
+void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c
+
+void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c
+
void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_c
@@ -217,6 +256,9 @@
void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_h_predictor_8x8 vpx_h_predictor_8x8_c
+void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
+
void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct16x16_10_add vpx_idct16x16_10_add_c
@@ -643,6 +685,9 @@
uint32_t vpx_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
#define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_c
+void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
+
void vpx_dsp_rtcd(void);
#include "vpx_config.h"
diff --git a/config/mips32/vpx_version.h b/config/mips32/vpx_version.h
index bce0381..3b6ea1e 100644
--- a/config/mips32/vpx_version.h
+++ b/config/mips32/vpx_version.h
@@ -1,7 +1,7 @@
#define VERSION_MAJOR 1
-#define VERSION_MINOR 4
+#define VERSION_MINOR 5
#define VERSION_PATCH 0
#define VERSION_EXTRA ""
#define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH))
-#define VERSION_STRING_NOSP "v1.4.0"
-#define VERSION_STRING " v1.4.0"
+#define VERSION_STRING_NOSP "v1.5.0"
+#define VERSION_STRING " v1.5.0"
diff --git a/config/mips64/libvpx_srcs.txt b/config/mips64/libvpx_srcs.txt
index f6e76f0..212026b 100644
--- a/config/mips64/libvpx_srcs.txt
+++ b/config/mips64/libvpx_srcs.txt
@@ -44,6 +44,7 @@
vp8/common/reconinter.c
vp8/common/reconinter.h
vp8/common/reconintra.c
+vp8/common/reconintra.h
vp8/common/reconintra4x4.c
vp8/common/reconintra4x4.h
vp8/common/rtcd.c
@@ -248,6 +249,7 @@
vp9/vp9_common.mk
vp9/vp9_cx_iface.c
vp9/vp9_dx_iface.c
+vp9/vp9_dx_iface.h
vp9/vp9_iface_common.h
vp9/vp9cx.mk
vp9/vp9dx.mk
diff --git a/config/mips64/vp8_rtcd.h b/config/mips64/vp8_rtcd.h
index 28e23b3..791c155 100644
--- a/config/mips64/vp8_rtcd.h
+++ b/config/mips64/vp8_rtcd.h
@@ -41,12 +41,6 @@
int vp8_block_error_c(short *coeff, short *dqcoeff);
#define vp8_block_error vp8_block_error_c
-void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride);
-#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_c
-
-void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride);
-#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_c
-
void vp8_clear_system_state_c();
#define vp8_clear_system_state vp8_clear_system_state_c
@@ -89,9 +83,6 @@
int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
#define vp8_full_search_sad vp8_full_search_sad_c
-void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
-#define vp8_intra4x4_predict vp8_intra4x4_predict_c
-
void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
#define vp8_loop_filter_bh vp8_loop_filter_bh_c
diff --git a/config/mips64/vpx_config.h b/config/mips64/vpx_config.h
index f19731b..b6cc04b 100644
--- a/config/mips64/vpx_config.h
+++ b/config/mips64/vpx_config.h
@@ -37,7 +37,7 @@
#define HAVE_UNISTD_H 1
#define CONFIG_DEPENDENCY_TRACKING 1
#define CONFIG_EXTERNAL_BUILD 1
-#define CONFIG_INSTALL_DOCS 1
+#define CONFIG_INSTALL_DOCS 0
#define CONFIG_INSTALL_BINS 1
#define CONFIG_INSTALL_LIBS 1
#define CONFIG_INSTALL_SRCS 0
@@ -95,4 +95,5 @@
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_FP_MB_STATS 0
#define CONFIG_EMULATE_HARDWARE 0
+#define CONFIG_MISC_FIXES 0
#endif /* VPX_CONFIG_H */
diff --git a/config/mips64/vpx_dsp_rtcd.h b/config/mips64/vpx_dsp_rtcd.h
index cff36af..2d2bec2 100644
--- a/config/mips64/vpx_dsp_rtcd.h
+++ b/config/mips64/vpx_dsp_rtcd.h
@@ -94,6 +94,18 @@
void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c
+void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c
+
+void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c
+
+void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c
+
+void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c
+
void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_c
@@ -106,6 +118,18 @@
void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_c
+void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c
+
+void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c
+
+void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
+
+void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c
+
void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c
@@ -118,6 +142,21 @@
void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c
+void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c
+
+void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c
+
+void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
+
+void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c
+
+void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c
+
void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_c
@@ -217,6 +256,9 @@
void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_h_predictor_8x8 vpx_h_predictor_8x8_c
+void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
+
void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct16x16_10_add vpx_idct16x16_10_add_c
@@ -643,6 +685,9 @@
uint32_t vpx_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
#define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_c
+void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
+
void vpx_dsp_rtcd(void);
#include "vpx_config.h"
diff --git a/config/mips64/vpx_version.h b/config/mips64/vpx_version.h
index bce0381..3b6ea1e 100644
--- a/config/mips64/vpx_version.h
+++ b/config/mips64/vpx_version.h
@@ -1,7 +1,7 @@
#define VERSION_MAJOR 1
-#define VERSION_MINOR 4
+#define VERSION_MINOR 5
#define VERSION_PATCH 0
#define VERSION_EXTRA ""
#define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH))
-#define VERSION_STRING_NOSP "v1.4.0"
-#define VERSION_STRING " v1.4.0"
+#define VERSION_STRING_NOSP "v1.5.0"
+#define VERSION_STRING " v1.5.0"
diff --git a/config/x86/libvpx_srcs.txt b/config/x86/libvpx_srcs.txt
index af63fd8..8815016 100644
--- a/config/x86/libvpx_srcs.txt
+++ b/config/x86/libvpx_srcs.txt
@@ -47,6 +47,7 @@
vp8/common/reconinter.c
vp8/common/reconinter.h
vp8/common/reconintra.c
+vp8/common/reconintra.h
vp8/common/reconintra4x4.c
vp8/common/reconintra4x4.h
vp8/common/rtcd.c
@@ -79,7 +80,6 @@
vp8/common/x86/postproc_sse2.asm
vp8/common/x86/recon_mmx.asm
vp8/common/x86/recon_sse2.asm
-vp8/common/x86/recon_wrapper_sse2.c
vp8/common/x86/subpixel_mmx.asm
vp8/common/x86/subpixel_sse2.asm
vp8/common/x86/subpixel_ssse3.asm
@@ -293,6 +293,7 @@
vp9/vp9_common.mk
vp9/vp9_cx_iface.c
vp9/vp9_dx_iface.c
+vp9/vp9_dx_iface.h
vp9/vp9_iface_common.h
vp9/vp9cx.mk
vp9/vp9dx.mk
@@ -349,6 +350,8 @@
vpx_dsp/x86/fwd_txfm_impl_sse2.h
vpx_dsp/x86/fwd_txfm_sse2.c
vpx_dsp/x86/fwd_txfm_sse2.h
+vpx_dsp/x86/halfpix_variance_impl_sse2.asm
+vpx_dsp/x86/halfpix_variance_sse2.c
vpx_dsp/x86/intrapred_sse2.asm
vpx_dsp/x86/intrapred_ssse3.asm
vpx_dsp/x86/inv_txfm_sse2.c
diff --git a/config/x86/vp8_rtcd.h b/config/x86/vp8_rtcd.h
index fc714f4..c4c7045 100644
--- a/config/x86/vp8_rtcd.h
+++ b/config/x86/vp8_rtcd.h
@@ -60,16 +60,6 @@
int vp8_block_error_xmm(short *coeff, short *dqcoeff);
#define vp8_block_error vp8_block_error_xmm
-void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride);
-void vp8_build_intra_predictors_mbuv_s_sse2(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride);
-void vp8_build_intra_predictors_mbuv_s_ssse3(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride);
-#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_ssse3
-
-void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride);
-void vp8_build_intra_predictors_mby_s_sse2(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride);
-void vp8_build_intra_predictors_mby_s_ssse3(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride);
-#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_ssse3
-
void vp8_clear_system_state_c();
void vpx_reset_mmx_state();
#define vp8_clear_system_state vpx_reset_mmx_state
@@ -146,9 +136,6 @@
int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
#define vp8_full_search_sad vp8_full_search_sadx3
-void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
-#define vp8_intra4x4_predict vp8_intra4x4_predict_c
-
void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bh_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
diff --git a/config/x86/vpx_config.asm b/config/x86/vpx_config.asm
index 2b7f1cc..b6557bb 100644
--- a/config/x86/vpx_config.asm
+++ b/config/x86/vpx_config.asm
@@ -25,7 +25,7 @@
%define HAVE_UNISTD_H 1
%define CONFIG_DEPENDENCY_TRACKING 1
%define CONFIG_EXTERNAL_BUILD 1
-%define CONFIG_INSTALL_DOCS 1
+%define CONFIG_INSTALL_DOCS 0
%define CONFIG_INSTALL_BINS 1
%define CONFIG_INSTALL_LIBS 1
%define CONFIG_INSTALL_SRCS 0
@@ -83,3 +83,4 @@
%define CONFIG_SPATIAL_SVC 0
%define CONFIG_FP_MB_STATS 0
%define CONFIG_EMULATE_HARDWARE 0
+%define CONFIG_MISC_FIXES 0
diff --git a/config/x86/vpx_config.h b/config/x86/vpx_config.h
index 634c67b..a516857 100644
--- a/config/x86/vpx_config.h
+++ b/config/x86/vpx_config.h
@@ -37,7 +37,7 @@
#define HAVE_UNISTD_H 1
#define CONFIG_DEPENDENCY_TRACKING 1
#define CONFIG_EXTERNAL_BUILD 1
-#define CONFIG_INSTALL_DOCS 1
+#define CONFIG_INSTALL_DOCS 0
#define CONFIG_INSTALL_BINS 1
#define CONFIG_INSTALL_LIBS 1
#define CONFIG_INSTALL_SRCS 0
@@ -95,4 +95,5 @@
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_FP_MB_STATS 0
#define CONFIG_EMULATE_HARDWARE 0
+#define CONFIG_MISC_FIXES 0
#endif /* VPX_CONFIG_H */
diff --git a/config/x86/vpx_dsp_rtcd.h b/config/x86/vpx_dsp_rtcd.h
index 64ee53f..af7917a 100644
--- a/config/x86/vpx_dsp_rtcd.h
+++ b/config/x86/vpx_dsp_rtcd.h
@@ -116,6 +116,18 @@
void vpx_d207_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_ssse3
+void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c
+
+void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c
+
+void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c
+
+void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c
+
void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d45_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_ssse3
@@ -132,6 +144,18 @@
void vpx_d45_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_ssse3
+void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c
+
+void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c
+
+void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
+
+void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c
+
void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d63_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_ssse3
@@ -148,6 +172,21 @@
void vpx_d63_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_ssse3
+void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c
+
+void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c
+
+void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
+
+void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c
+
+void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c
+
void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_sse2
@@ -281,6 +320,9 @@
void vpx_h_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_h_predictor_8x8 vpx_h_predictor_8x8_ssse3
+void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
+
void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct16x16_10_add vpx_idct16x16_10_add_sse2
@@ -864,15 +906,21 @@
uint32_t vpx_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
uint32_t vpx_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
-#define vpx_variance_halfpixvar16x16_h vpx_variance_halfpixvar16x16_h_mmx
+uint32_t vpx_variance_halfpixvar16x16_h_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
+#define vpx_variance_halfpixvar16x16_h vpx_variance_halfpixvar16x16_h_sse2
uint32_t vpx_variance_halfpixvar16x16_hv_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
uint32_t vpx_variance_halfpixvar16x16_hv_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
-#define vpx_variance_halfpixvar16x16_hv vpx_variance_halfpixvar16x16_hv_mmx
+uint32_t vpx_variance_halfpixvar16x16_hv_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
+#define vpx_variance_halfpixvar16x16_hv vpx_variance_halfpixvar16x16_hv_sse2
uint32_t vpx_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
uint32_t vpx_variance_halfpixvar16x16_v_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
-#define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_mmx
+uint32_t vpx_variance_halfpixvar16x16_v_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
+#define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_sse2
+
+void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
void vpx_dsp_rtcd(void);
diff --git a/config/x86/vpx_version.h b/config/x86/vpx_version.h
index bce0381..3b6ea1e 100644
--- a/config/x86/vpx_version.h
+++ b/config/x86/vpx_version.h
@@ -1,7 +1,7 @@
#define VERSION_MAJOR 1
-#define VERSION_MINOR 4
+#define VERSION_MINOR 5
#define VERSION_PATCH 0
#define VERSION_EXTRA ""
#define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH))
-#define VERSION_STRING_NOSP "v1.4.0"
-#define VERSION_STRING " v1.4.0"
+#define VERSION_STRING_NOSP "v1.5.0"
+#define VERSION_STRING " v1.5.0"
diff --git a/config/x86_64/libvpx_srcs.txt b/config/x86_64/libvpx_srcs.txt
index ac3de52..3794e3b 100644
--- a/config/x86_64/libvpx_srcs.txt
+++ b/config/x86_64/libvpx_srcs.txt
@@ -47,6 +47,7 @@
vp8/common/reconinter.c
vp8/common/reconinter.h
vp8/common/reconintra.c
+vp8/common/reconintra.h
vp8/common/reconintra4x4.c
vp8/common/reconintra4x4.h
vp8/common/rtcd.c
@@ -80,7 +81,6 @@
vp8/common/x86/postproc_sse2.asm
vp8/common/x86/recon_mmx.asm
vp8/common/x86/recon_sse2.asm
-vp8/common/x86/recon_wrapper_sse2.c
vp8/common/x86/subpixel_mmx.asm
vp8/common/x86/subpixel_sse2.asm
vp8/common/x86/subpixel_ssse3.asm
@@ -296,6 +296,7 @@
vp9/vp9_common.mk
vp9/vp9_cx_iface.c
vp9/vp9_dx_iface.c
+vp9/vp9_dx_iface.h
vp9/vp9_iface_common.h
vp9/vp9cx.mk
vp9/vp9dx.mk
@@ -353,6 +354,8 @@
vpx_dsp/x86/fwd_txfm_sse2.c
vpx_dsp/x86/fwd_txfm_sse2.h
vpx_dsp/x86/fwd_txfm_ssse3_x86_64.asm
+vpx_dsp/x86/halfpix_variance_impl_sse2.asm
+vpx_dsp/x86/halfpix_variance_sse2.c
vpx_dsp/x86/intrapred_sse2.asm
vpx_dsp/x86/intrapred_ssse3.asm
vpx_dsp/x86/inv_txfm_sse2.c
diff --git a/config/x86_64/vp8_rtcd.h b/config/x86_64/vp8_rtcd.h
index fc714f4..c4c7045 100644
--- a/config/x86_64/vp8_rtcd.h
+++ b/config/x86_64/vp8_rtcd.h
@@ -60,16 +60,6 @@
int vp8_block_error_xmm(short *coeff, short *dqcoeff);
#define vp8_block_error vp8_block_error_xmm
-void vp8_build_intra_predictors_mbuv_s_c(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride);
-void vp8_build_intra_predictors_mbuv_s_sse2(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride);
-void vp8_build_intra_predictors_mbuv_s_ssse3(struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride);
-#define vp8_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_ssse3
-
-void vp8_build_intra_predictors_mby_s_c(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride);
-void vp8_build_intra_predictors_mby_s_sse2(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride);
-void vp8_build_intra_predictors_mby_s_ssse3(struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride);
-#define vp8_build_intra_predictors_mby_s vp8_build_intra_predictors_mby_s_ssse3
-
void vp8_clear_system_state_c();
void vpx_reset_mmx_state();
#define vp8_clear_system_state vpx_reset_mmx_state
@@ -146,9 +136,6 @@
int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
#define vp8_full_search_sad vp8_full_search_sadx3
-void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
-#define vp8_intra4x4_predict vp8_intra4x4_predict_c
-
void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bh_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
diff --git a/config/x86_64/vpx_config.asm b/config/x86_64/vpx_config.asm
index 6f0800b..774d73f 100644
--- a/config/x86_64/vpx_config.asm
+++ b/config/x86_64/vpx_config.asm
@@ -25,7 +25,7 @@
%define HAVE_UNISTD_H 1
%define CONFIG_DEPENDENCY_TRACKING 1
%define CONFIG_EXTERNAL_BUILD 1
-%define CONFIG_INSTALL_DOCS 1
+%define CONFIG_INSTALL_DOCS 0
%define CONFIG_INSTALL_BINS 1
%define CONFIG_INSTALL_LIBS 1
%define CONFIG_INSTALL_SRCS 0
@@ -83,3 +83,4 @@
%define CONFIG_SPATIAL_SVC 0
%define CONFIG_FP_MB_STATS 0
%define CONFIG_EMULATE_HARDWARE 0
+%define CONFIG_MISC_FIXES 0
diff --git a/config/x86_64/vpx_config.h b/config/x86_64/vpx_config.h
index 8796347..9278f1e 100644
--- a/config/x86_64/vpx_config.h
+++ b/config/x86_64/vpx_config.h
@@ -37,7 +37,7 @@
#define HAVE_UNISTD_H 1
#define CONFIG_DEPENDENCY_TRACKING 1
#define CONFIG_EXTERNAL_BUILD 1
-#define CONFIG_INSTALL_DOCS 1
+#define CONFIG_INSTALL_DOCS 0
#define CONFIG_INSTALL_BINS 1
#define CONFIG_INSTALL_LIBS 1
#define CONFIG_INSTALL_SRCS 0
@@ -95,4 +95,5 @@
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_FP_MB_STATS 0
#define CONFIG_EMULATE_HARDWARE 0
+#define CONFIG_MISC_FIXES 0
#endif /* VPX_CONFIG_H */
diff --git a/config/x86_64/vpx_dsp_rtcd.h b/config/x86_64/vpx_dsp_rtcd.h
index e78d8ef..7396233 100644
--- a/config/x86_64/vpx_dsp_rtcd.h
+++ b/config/x86_64/vpx_dsp_rtcd.h
@@ -116,6 +116,18 @@
void vpx_d207_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_ssse3
+void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c
+
+void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c
+
+void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c
+
+void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c
+
void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d45_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_ssse3
@@ -132,6 +144,18 @@
void vpx_d45_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_ssse3
+void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c
+
+void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c
+
+void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
+
+void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c
+
void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d63_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_ssse3
@@ -148,6 +172,21 @@
void vpx_d63_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_ssse3
+void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c
+
+void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c
+
+void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
+
+void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c
+
+void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c
+
void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_sse2
@@ -282,6 +321,9 @@
void vpx_h_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_h_predictor_8x8 vpx_h_predictor_8x8_ssse3
+void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
+
void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct16x16_10_add vpx_idct16x16_10_add_sse2
@@ -870,15 +912,21 @@
uint32_t vpx_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
uint32_t vpx_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
-#define vpx_variance_halfpixvar16x16_h vpx_variance_halfpixvar16x16_h_mmx
+uint32_t vpx_variance_halfpixvar16x16_h_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
+#define vpx_variance_halfpixvar16x16_h vpx_variance_halfpixvar16x16_h_sse2
uint32_t vpx_variance_halfpixvar16x16_hv_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
uint32_t vpx_variance_halfpixvar16x16_hv_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
-#define vpx_variance_halfpixvar16x16_hv vpx_variance_halfpixvar16x16_hv_mmx
+uint32_t vpx_variance_halfpixvar16x16_hv_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
+#define vpx_variance_halfpixvar16x16_hv vpx_variance_halfpixvar16x16_hv_sse2
uint32_t vpx_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
uint32_t vpx_variance_halfpixvar16x16_v_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
-#define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_mmx
+uint32_t vpx_variance_halfpixvar16x16_v_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse);
+#define vpx_variance_halfpixvar16x16_v vpx_variance_halfpixvar16x16_v_sse2
+
+void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
void vpx_dsp_rtcd(void);
diff --git a/config/x86_64/vpx_version.h b/config/x86_64/vpx_version.h
index bce0381..3b6ea1e 100644
--- a/config/x86_64/vpx_version.h
+++ b/config/x86_64/vpx_version.h
@@ -1,7 +1,7 @@
#define VERSION_MAJOR 1
-#define VERSION_MINOR 4
+#define VERSION_MINOR 5
#define VERSION_PATCH 0
#define VERSION_EXTRA ""
#define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH))
-#define VERSION_STRING_NOSP "v1.4.0"
-#define VERSION_STRING " v1.4.0"
+#define VERSION_STRING_NOSP "v1.5.0"
+#define VERSION_STRING " v1.5.0"
diff --git a/libvpx.mk b/libvpx.mk
index f1f194f..d9fbd88 100644
--- a/libvpx.mk
+++ b/libvpx.mk
@@ -4,8 +4,10 @@
# Clang arm assembler cannot compile libvpx .s files yet.
LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as
# Pass incude path to GCC assembler.
-LOCAL_CLANG_ASFLAGS := \
+LOCAL_CLANG_ASFLAGS_$(TARGET_ARCH) += \
-Wa,-I$(TARGET_OUT_INTERMEDIATES)/STATIC_LIBRARIES/libvpx_intermediates/vp8/encoder
+LOCAL_CLANG_ASFLAGS_$(TARGET_2ND_ARCH) += \
+ -Wa,-I$($(TARGET_2ND_ARCH_VAR_PREFIX)TARGET_OUT_INTERMEDIATES)/STATIC_LIBRARIES/libvpx_intermediates/vp8/encoder
# vp9_mcomp.c:93:10: error: address of array 'x->nmvsadcost' will always evaluate to 'true'
LOCAL_CLANG_CFLAGS += -Wno-pointer-bool-conversion
diff --git a/libvpx/.mailmap b/libvpx/.mailmap
index 0bfda12..42f3617 100644
--- a/libvpx/.mailmap
+++ b/libvpx/.mailmap
@@ -1,14 +1,21 @@
Adrian Grange <agrange@google.com>
-Alex Converse <aconverse@google.com> <alex.converse@gmail.com>
+Adrian Grange <agrange@google.com> <agrange@agrange-macbookpro.roam.corp.google.com>
+Aâ„“ex Converse <aconverse@google.com>
+Aâ„“ex Converse <aconverse@google.com> <alex.converse@gmail.com>
Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
Alpha Lam <hclam@google.com> <hclam@chromium.org>
Deb Mukherjee <debargha@google.com>
Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
Guillaume Martres <gmartres@google.com> <smarter3@gmail.com>
Hangyu Kuang <hkuang@google.com>
+Hangyu Kuang <hkuang@google.com> <hkuang@hkuang-macbookpro.roam.corp.google.com>
+Hui Su <huisu@google.com>
+Jacky Chen <jackychen@google.com>
Jim Bankoski <jimbankoski@google.com>
Johann Koenig <johannkoenig@google.com>
Johann Koenig <johannkoenig@google.com> <johann.koenig@duck.com>
+Johann Koenig <johannkoenig@google.com> <johannkoenig@dhcp-172-19-7-52.mtv.corp.google.com>
+Johann Koenig <johannkoenig@google.com> <johann.koenig@gmail.com>
John Koleszar <jkoleszar@google.com>
Joshua Litt <joshualitt@google.com> <joshualitt@chromium.org>
Marco Paniconi <marpan@google.com>
@@ -17,10 +24,13 @@
Paul Wilkins <paulwilkins@google.com>
Ralph Giles <giles@xiph.org> <giles@entropywave.com>
Ralph Giles <giles@xiph.org> <giles@mozilla.com>
+Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com>
Sami Pietilä <samipietila@google.com>
Tamar Levy <tamar.levy@intel.com>
Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
Timothy B. Terriberry <tterribe@xiph.org> Tim Terriberry <tterriberry@mozilla.com>
Tom Finegan <tomfinegan@google.com>
+Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org>
Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
+Yaowu Xu <yaowu@google.com> <yaowu@YAOWU2-W.ad.corp.google.com>
diff --git a/libvpx/AUTHORS b/libvpx/AUTHORS
index 2f63d7c..f89b677 100644
--- a/libvpx/AUTHORS
+++ b/libvpx/AUTHORS
@@ -5,9 +5,9 @@
Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
Adam Xu <adam@xuyaowu.com>
Adrian Grange <agrange@google.com>
+Aâ„“ex Converse <aconverse@google.com>
Ahmad Sharif <asharif@google.com>
Alexander Voronov <avoronov@graphics.cs.msu.ru>
-Alex Converse <aconverse@google.com>
Alexis Ballier <aballier@gentoo.org>
Alok Ahuja <waveletcoeff@gmail.com>
Alpha Lam <hclam@google.com>
@@ -16,8 +16,10 @@
Andoni Morales Alastruey <ylatuya@gmail.com>
Andres Mejia <mcitadel@gmail.com>
Andrew Russell <anrussell@google.com>
+Angie Chiang <angiebird@google.com>
Aron Rosenberg <arosenberg@logitech.com>
Attila Nagy <attilanagy@google.com>
+Brion Vibber <bvibber@wikimedia.org>
changjun.yang <changjun.yang@intel.com>
Charles 'Buck' Krasic <ckrasic@google.com>
chm <chm@rock-chips.com>
@@ -27,6 +29,7 @@
Dim Temp <dimtemp0@gmail.com>
Dmitry Kovalev <dkovalev@google.com>
Dragan Mrdjan <dmrdjan@mips.com>
+Ed Baker <edward.baker@intel.com>
Ehsan Akhgari <ehsan.akhgari@gmail.com>
Erik Niemeyer <erik.a.niemeyer@intel.com>
Fabio Pedretti <fabio.ped@libero.it>
@@ -34,6 +37,8 @@
Fredrik Söderquist <fs@opera.com>
Fritz Koenig <frkoenig@google.com>
Gaute Strokkenes <gaute.strokkenes@broadcom.com>
+Geza Lore <gezalore@gmail.com>
+Ghislain MARY <ghislainmary2@gmail.com>
Giuseppe Scrivano <gscrivano@gnu.org>
Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
Guillaume Martres <gmartres@google.com>
@@ -44,7 +49,7 @@
Hui Su <huisu@google.com>
Ivan Maltz <ivanmaltz@google.com>
Jacek Caban <cjacek@gmail.com>
-JackyChen <jackychen@google.com>
+Jacky Chen <jackychen@google.com>
James Berry <jamesberry@google.com>
James Yu <james.yu@linaro.org>
James Zern <jzern@google.com>
@@ -60,9 +65,11 @@
Joey Parrish <joeyparrish@google.com>
Johann Koenig <johannkoenig@google.com>
John Koleszar <jkoleszar@google.com>
+Johnny Klonaris <google@jawknee.com>
John Stark <jhnstrk@gmail.com>
Joshua Bleecher Snyder <josh@treelinelabs.com>
Joshua Litt <joshualitt@google.com>
+Julia Robson <juliamrobson@gmail.com>
Justin Clift <justin@salasaga.org>
Justin Lebar <justin.lebar@gmail.com>
KO Myung-Hun <komh@chollian.net>
@@ -82,6 +89,7 @@
Mikhal Shemer <mikhal@google.com>
Minghai Shang <minghai@google.com>
Morton Jonuschat <yabawock@gmail.com>
+Nico Weber <thakis@chromium.org>
Parag Salasakar <img.mips1@gmail.com>
Pascal Massimino <pascal.massimino@gmail.com>
Patrik Westin <patrik.westin@gmail.com>
@@ -96,7 +104,7 @@
Rafaël Carré <funman@videolan.org>
Ralph Giles <giles@xiph.org>
Rob Bradford <rob@linux.intel.com>
-Ronald S. Bultje <rbultje@google.com>
+Ronald S. Bultje <rsbultje@gmail.com>
Rui Ueyama <ruiu@google.com>
Sami Pietilä <samipietila@google.com>
Scott Graham <scottmg@chromium.org>
@@ -104,6 +112,7 @@
Sean McGovern <gseanmcg@gmail.com>
Sergey Ulanov <sergeyu@chromium.org>
Shimon Doodkin <helpmepro1@gmail.com>
+Shunyao Li <shunyaoli@google.com>
Stefan Holmer <holmer@google.com>
Suman Sunkara <sunkaras@google.com>
Taekhyun Kim <takim@nvidia.com>
diff --git a/libvpx/CHANGELOG b/libvpx/CHANGELOG
index b0d3064..7746cc6 100644
--- a/libvpx/CHANGELOG
+++ b/libvpx/CHANGELOG
@@ -1,7 +1,19 @@
-xxxx-yy-zz v1.4.0 "Changes for next release"
- vpxenc is changed to use VP9 by default.
- Encoder controls added for 1 pass SVC.
- Decoder control to toggle on/off loopfilter.
+2015-11-09 v1.5.0 "Javan Whistling Duck"
+ This release improves upon the VP9 encoder and speeds up the encoding and
+ decoding processes.
+
+ - Upgrading:
+ This release is ABI incompatible with 1.4.0. It drops deprecated VP8
+ controls and adds a variety of VP9 controls for testing.
+
+ The vpxenc utility now prefers VP9 by default.
+
+ - Enhancements:
+ Faster VP9 encoding and decoding
+ Smaller library size by combining functions used by VP8 and VP9
+
+ - Bug Fixes:
+ A variety of fuzzing issues
2015-04-03 v1.4.0 "Indian Runner Duck"
This release includes significant improvements to the VP9 codec.
diff --git a/libvpx/build/make/Makefile b/libvpx/build/make/Makefile
index f1b1cca..3081a92 100644
--- a/libvpx/build/make/Makefile
+++ b/libvpx/build/make/Makefile
@@ -140,6 +140,8 @@
$(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx $(STACKREALIGN)
$(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2 $(STACKREALIGN)
$(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2 $(STACKREALIGN)
+$(BUILD_PFX)%vp9_reconintra.c.d: CFLAGS += $(STACKREALIGN)
+$(BUILD_PFX)%vp9_reconintra.c.o: CFLAGS += $(STACKREALIGN)
$(BUILD_PFX)%.c.d: %.c
$(if $(quiet),@echo " [DEP] $@")
@@ -285,7 +287,7 @@
# for creating them.
$(1):
$(if $(quiet),@echo " [AR] $$@")
- $(qexec)$$(AR) $$(ARFLAGS) $$@ $$?
+ $(qexec)$$(AR) $$(ARFLAGS) $$@ $$^
endef
define so_template
diff --git a/libvpx/build/make/configure.sh b/libvpx/build/make/configure.sh
index 688fa12..c592b63 100755
--- a/libvpx/build/make/configure.sh
+++ b/libvpx/build/make/configure.sh
@@ -73,6 +73,7 @@
--target=TARGET target platform tuple [generic-gnu]
--cpu=CPU optimize for a specific cpu rather than a family
--extra-cflags=ECFLAGS add ECFLAGS to CFLAGS [$CFLAGS]
+ --extra-cxxflags=ECXXFLAGS add ECXXFLAGS to CXXFLAGS [$CXXFLAGS]
${toggle_extra_warnings} emit harmless warnings (always non-fatal)
${toggle_werror} treat warnings as errors, if possible
(not available with all compilers)
@@ -200,6 +201,10 @@
eval test "x\$$1" = "xno"
}
+# Iterates through positional parameters, checks to confirm the parameter has
+# not been explicitly (force) disabled, and enables the setting controlled by
+# the parameter when the setting is not disabled.
+# Note: Does NOT alter RTCD generation options ($RTCD_OPTIONS).
soft_enable() {
for var in $*; do
if ! disabled $var; then
@@ -209,6 +214,10 @@
done
}
+# Iterates through positional parameters, checks to confirm the parameter has
+# not been explicitly (force) enabled, and disables the setting controlled by
+# the parameter when the setting is not enabled.
+# Note: Does NOT alter RTCD generation options ($RTCD_OPTIONS).
soft_disable() {
for var in $*; do
if ! enabled $var; then
@@ -337,6 +346,10 @@
check_cflags "$@" && add_cflags_only "$@"
}
+check_add_cxxflags() {
+ check_cxxflags "$@" && add_cxxflags_only "$@"
+}
+
check_add_asflags() {
log add_asflags "$@"
add_asflags "$@"
@@ -428,7 +441,7 @@
CFLAGS = ${CFLAGS}
CXXFLAGS = ${CXXFLAGS}
-ARFLAGS = -rus\$(if \$(quiet),c,v)
+ARFLAGS = -crs\$(if \$(quiet),,v)
LDFLAGS = ${LDFLAGS}
ASFLAGS = ${ASFLAGS}
extralibs = ${extralibs}
@@ -503,6 +516,9 @@
--extra-cflags=*)
extra_cflags="${optval}"
;;
+ --extra-cxxflags=*)
+ extra_cxxflags="${optval}"
+ ;;
--enable-?*|--disable-?*)
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
if echo "${ARCH_EXT_LIST}" | grep "^ *$option\$" >/dev/null; then
@@ -617,6 +633,11 @@
xcodebuild -sdk $1 -version Path 2>/dev/null
}
+# Print the major version number of the Darwin SDK specified by $1.
+show_darwin_sdk_major_version() {
+ xcrun --sdk $1 --show-sdk-version 2>/dev/null | cut -d. -f1
+}
+
process_common_toolchain() {
if [ -z "$toolchain" ]; then
gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}"
@@ -729,13 +750,14 @@
# platforms, so use the newest one available.
case ${toolchain} in
arm*-darwin*)
- ios_sdk_dir="$(show_darwin_sdk_path iphoneos)"
- if [ -d "${ios_sdk_dir}" ]; then
- add_cflags "-isysroot ${ios_sdk_dir}"
- add_ldflags "-isysroot ${ios_sdk_dir}"
+ add_cflags "-miphoneos-version-min=${IOS_VERSION_MIN}"
+ iphoneos_sdk_dir="$(show_darwin_sdk_path iphoneos)"
+ if [ -d "${iphoneos_sdk_dir}" ]; then
+ add_cflags "-isysroot ${iphoneos_sdk_dir}"
+ add_ldflags "-isysroot ${iphoneos_sdk_dir}"
fi
;;
- *-darwin*)
+ x86*-darwin*)
osx_sdk_dir="$(show_darwin_sdk_path macosx)"
if [ -d "${osx_sdk_dir}" ]; then
add_cflags "-isysroot ${osx_sdk_dir}"
@@ -811,16 +833,35 @@
die "Disabling neon while keeping neon-asm is not supported"
fi
case ${toolchain} in
+ # Apple iOS SDKs no longer support armv6 as of the version 9
+ # release (coincides with release of Xcode 7). Only enable media
+ # when using earlier SDK releases.
*-darwin*)
- # Neon is guaranteed on iOS 6+ devices, while old media extensions
- # no longer assemble with iOS 9 SDK
+ if [ "$(show_darwin_sdk_major_version iphoneos)" -lt 9 ]; then
+ soft_enable media
+ else
+ soft_disable media
+ RTCD_OPTIONS="${RTCD_OPTIONS}--disable-media "
+ fi
;;
*)
soft_enable media
+ ;;
esac
;;
armv6)
- soft_enable media
+ case ${toolchain} in
+ *-darwin*)
+ if [ "$(show_darwin_sdk_major_version iphoneos)" -lt 9 ]; then
+ soft_enable media
+ else
+ die "Your iOS SDK does not support armv6."
+ fi
+ ;;
+ *)
+ soft_enable media
+ ;;
+ esac
;;
esac
@@ -1003,6 +1044,12 @@
done
asm_conversion_cmd="${source_path}/build/make/ads2gas_apple.pl"
+
+ if [ "$(show_darwin_sdk_major_version iphoneos)" -gt 8 ]; then
+ check_add_cflags -fembed-bitcode
+ check_add_asflags -fembed-bitcode
+ check_add_ldflags -fembed-bitcode
+ fi
;;
linux*)
@@ -1081,7 +1128,9 @@
CROSS=${CROSS:-g}
;;
os2)
+ disable_feature pic
AS=${AS:-nasm}
+ add_ldflags -Zhigh-mem
;;
esac
@@ -1171,7 +1220,8 @@
&& AS=""
fi
[ "${AS}" = auto ] || [ -z "${AS}" ] \
- && die "Neither yasm nor nasm have been found"
+ && die "Neither yasm nor nasm have been found." \
+ "See the prerequisites section in the README for more info."
;;
esac
log_echo " using $AS"
@@ -1210,6 +1260,13 @@
enabled x86 && sim_arch="-arch i386" || sim_arch="-arch x86_64"
add_cflags ${sim_arch}
add_ldflags ${sim_arch}
+
+ if [ "$(show_darwin_sdk_major_version iphonesimulator)" -gt 8 ]; then
+ # yasm v1.3.0 doesn't know what -fembed-bitcode means, so turning it
+ # on is pointless (unless building a C-only lib). Warn the user, but
+ # do nothing here.
+ log "Warning: Bitcode embed disabled for simulator targets."
+ fi
;;
os2)
add_asflags -f aout
@@ -1323,12 +1380,6 @@
add_cflags -D_LARGEFILE_SOURCE
add_cflags -D_FILE_OFFSET_BITS=64
fi
-
- # append any user defined extra cflags
- if [ -n "${extra_cflags}" ] ; then
- check_add_cflags ${extra_cflags} || \
- die "Requested extra CFLAGS '${extra_cflags}' not supported by compiler"
- fi
}
process_toolchain() {
diff --git a/libvpx/build/make/iosbuild.sh b/libvpx/build/make/iosbuild.sh
index 89fa681..6f7180d 100755
--- a/libvpx/build/make/iosbuild.sh
+++ b/libvpx/build/make/iosbuild.sh
@@ -25,7 +25,6 @@
DIST_DIR="_dist"
FRAMEWORK_DIR="VPX.framework"
HEADER_DIR="${FRAMEWORK_DIR}/Headers/vpx"
-MAKE_JOBS=1
SCRIPT_DIR=$(dirname "$0")
LIBVPX_SOURCE_DIR=$(cd ${SCRIPT_DIR}/../..; pwd)
LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo)
@@ -41,15 +40,24 @@
build_target() {
local target="$1"
local old_pwd="$(pwd)"
+ local target_specific_flags=""
vlog "***Building target: ${target}***"
+ case "${target}" in
+ x86-*)
+ target_specific_flags="--enable-pic"
+ vlog "Enabled PIC for ${target}"
+ ;;
+ esac
+
mkdir "${target}"
cd "${target}"
eval "${LIBVPX_SOURCE_DIR}/configure" --target="${target}" \
- ${CONFIGURE_ARGS} ${EXTRA_CONFIGURE_ARGS} ${devnull}
+ ${CONFIGURE_ARGS} ${EXTRA_CONFIGURE_ARGS} ${target_specific_flags} \
+ ${devnull}
export DIST_DIR
- eval make -j ${MAKE_JOBS} dist ${devnull}
+ eval make dist ${devnull}
cd "${old_pwd}"
vlog "***Done building target: ${target}***"
@@ -194,11 +202,12 @@
Usage: ${0##*/} [arguments]
--help: Display this message and exit.
--extra-configure-args <args>: Extra args to pass when configuring libvpx.
- --jobs: Number of make jobs.
--preserve-build-output: Do not delete the build directory.
--show-build-output: Show output from each library build.
--targets <targets>: Override default target list. Defaults:
${TARGETS}
+ --test-link: Confirms all targets can be linked. Functionally identical to
+ passing --enable-examples via --extra-configure-args.
--verbose: Output information about the environment and each stage of the
build.
EOF
@@ -227,16 +236,15 @@
iosbuild_usage
exit
;;
- --jobs)
- MAKE_JOBS="$2"
- shift
- ;;
--preserve-build-output)
PRESERVE_BUILD_OUTPUT=yes
;;
--show-build-output)
devnull=
;;
+ --test-link)
+ EXTRA_CONFIGURE_ARGS="${EXTRA_CONFIGURE_ARGS} --enable-examples"
+ ;;
--targets)
TARGETS="$2"
shift
@@ -260,11 +268,11 @@
EXTRA_CONFIGURE_ARGS=${EXTRA_CONFIGURE_ARGS}
FRAMEWORK_DIR=${FRAMEWORK_DIR}
HEADER_DIR=${HEADER_DIR}
- MAKE_JOBS=${MAKE_JOBS}
- PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT}
LIBVPX_SOURCE_DIR=${LIBVPX_SOURCE_DIR}
LIPO=${LIPO}
+ MAKEFLAGS=${MAKEFLAGS}
ORIG_PWD=${ORIG_PWD}
+ PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT}
TARGETS="${TARGETS}"
EOF
fi
diff --git a/libvpx/configure b/libvpx/configure
index ac196da..a40f3ab 100755
--- a/libvpx/configure
+++ b/libvpx/configure
@@ -264,6 +264,7 @@
spatial_svc
fp_mb_stats
emulate_hardware
+ misc_fixes
"
CONFIG_LIST="
dependency_tracking
@@ -716,6 +717,16 @@
esac
# libwebm needs to be linked with C++ standard library
enabled webm_io && LD=${CXX}
+
+ # append any user defined extra cflags
+ if [ -n "${extra_cflags}" ] ; then
+ check_add_cflags ${extra_cflags} || \
+ die "Requested extra CFLAGS '${extra_cflags}' not supported by compiler"
+ fi
+ if [ -n "${extra_cxxflags}" ]; then
+ check_add_cxxflags ${extra_cxxflags} || \
+ die "Requested extra CXXFLAGS '${extra_cxxflags}' not supported by compiler"
+ fi
}
diff --git a/libvpx/examples.mk b/libvpx/examples.mk
index dfa5a65..f10bec6 100644
--- a/libvpx/examples.mk
+++ b/libvpx/examples.mk
@@ -36,6 +36,8 @@
third_party/libyuv/source/scale_neon64.cc \
third_party/libyuv/source/scale_win.cc \
+LIBWEBM_COMMON_SRCS += third_party/libwebm/webmids.hpp
+
LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer.cpp \
third_party/libwebm/mkvmuxerutil.cpp \
third_party/libwebm/mkvwriter.cpp \
@@ -43,8 +45,7 @@
third_party/libwebm/mkvmuxertypes.hpp \
third_party/libwebm/mkvmuxerutil.hpp \
third_party/libwebm/mkvparser.hpp \
- third_party/libwebm/mkvwriter.hpp \
- third_party/libwebm/webmids.hpp
+ third_party/libwebm/mkvwriter.hpp
LIBWEBM_PARSER_SRCS = third_party/libwebm/mkvparser.cpp \
third_party/libwebm/mkvreader.cpp \
@@ -68,6 +69,7 @@
vpxdec.SRCS += $(LIBYUV_SRCS)
endif
ifeq ($(CONFIG_WEBM_IO),yes)
+ vpxdec.SRCS += $(LIBWEBM_COMMON_SRCS)
vpxdec.SRCS += $(LIBWEBM_PARSER_SRCS)
vpxdec.SRCS += webmdec.cc webmdec.h
endif
@@ -89,6 +91,7 @@
vpxenc.SRCS += $(LIBYUV_SRCS)
endif
ifeq ($(CONFIG_WEBM_IO),yes)
+ vpxenc.SRCS += $(LIBWEBM_COMMON_SRCS)
vpxenc.SRCS += $(LIBWEBM_MUXER_SRCS)
vpxenc.SRCS += webmenc.cc webmenc.h
endif
diff --git a/libvpx/examples/vp9_spatial_svc_encoder.c b/libvpx/examples/vp9_spatial_svc_encoder.c
index 5a60976..b26e987 100644
--- a/libvpx/examples/vp9_spatial_svc_encoder.c
+++ b/libvpx/examples/vp9_spatial_svc_encoder.c
@@ -25,6 +25,7 @@
#include "../tools_common.h"
#include "../video_writer.h"
+#include "../vpx_ports/vpx_timer.h"
#include "vpx/svc_context.h"
#include "vpx/vp8cx.h"
#include "vpx/vpx_encoder.h"
@@ -79,6 +80,8 @@
ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
static const arg_def_t speed_arg =
ARG_DEF("sp", "speed", 1, "speed configuration");
+static const arg_def_t aqmode_arg =
+ ARG_DEF("aq", "aqmode", 1, "aq-mode off/on");
#if CONFIG_VP9_HIGHBITDEPTH
static const struct arg_enum_list bitdepth_enum[] = {
@@ -100,7 +103,7 @@
&kf_dist_arg, &scale_factors_arg, &passes_arg, &pass_arg,
&fpf_name_arg, &min_q_arg, &max_q_arg, &min_bitrate_arg,
&max_bitrate_arg, &temporal_layers_arg, &temporal_layering_mode_arg,
- &lag_in_frame_arg, &threads_arg,
+ &lag_in_frame_arg, &threads_arg, &aqmode_arg,
#if OUTPUT_RC_STATS
&output_rc_stats_arg,
#endif
@@ -220,6 +223,8 @@
#endif
} else if (arg_match(&arg, &speed_arg, argi)) {
svc_ctx->speed = arg_parse_uint(&arg);
+ } else if (arg_match(&arg, &aqmode_arg, argi)) {
+ svc_ctx->aqmode = arg_parse_uint(&arg);
} else if (arg_match(&arg, &threads_arg, argi)) {
svc_ctx->threads = arg_parse_uint(&arg);
} else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) {
@@ -539,6 +544,59 @@
}
#endif
+// Example pattern for spatial layers and 2 temporal layers used in the
+// bypass/flexible mode. The pattern corresponds to the pattern
+// VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in
+// non-flexible mode.
+void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers,
+ int is_key_frame,
+ vpx_svc_ref_frame_config_t *ref_frame_config) {
+ for (sl = 0; sl < num_spatial_layers; ++sl) {
+ if (!tl) {
+ if (!sl) {
+ ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_GF |
+ VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_GF |
+ VP8_EFLAG_NO_UPD_ARF;
+ } else {
+ if (is_key_frame) {
+ ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_LAST |
+ VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_GF |
+ VP8_EFLAG_NO_UPD_ARF;
+ } else {
+ ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_GF |
+ VP8_EFLAG_NO_UPD_ARF;
+ }
+ }
+ } else if (tl == 1) {
+ if (!sl) {
+ ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_GF |
+ VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_LAST |
+ VP8_EFLAG_NO_UPD_GF;
+ } else {
+ ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_LAST |
+ VP8_EFLAG_NO_UPD_GF;
+ }
+ }
+ if (tl == 0) {
+ ref_frame_config->lst_fb_idx[sl] = sl;
+ if (sl)
+ ref_frame_config->gld_fb_idx[sl] = sl - 1;
+ else
+ ref_frame_config->gld_fb_idx[sl] = 0;
+ ref_frame_config->alt_fb_idx[sl] = 0;
+ } else if (tl == 1) {
+ ref_frame_config->lst_fb_idx[sl] = sl;
+ ref_frame_config->gld_fb_idx[sl] = num_spatial_layers + sl - 1;
+ ref_frame_config->alt_fb_idx[sl] = num_spatial_layers + sl;
+ }
+ }
+}
+
int main(int argc, const char **argv) {
AppInput app_input = {0};
VpxVideoWriter *writer = NULL;
@@ -559,11 +617,14 @@
VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL};
struct RateControlStats rc;
vpx_svc_layer_id_t layer_id;
+ vpx_svc_ref_frame_config_t ref_frame_config;
int sl, tl;
double sum_bitrate = 0.0;
double sum_bitrate2 = 0.0;
double framerate = 30.0;
#endif
+ struct vpx_usec_timer timer;
+ int64_t cx_time = 0;
memset(&svc_ctx, 0, sizeof(svc_ctx));
svc_ctx.log_print = 1;
exec_name = argv[0];
@@ -632,6 +693,9 @@
vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
if (svc_ctx.threads)
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
+ if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1)
+ vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
+
// Encode frames
while (!end_of_stream) {
@@ -643,9 +707,36 @@
end_of_stream = 1;
}
+ // For BYPASS/FLEXIBLE mode, set the frame flags (reference and updates)
+ // and the buffer indices for each spatial layer of the current
+ // (super)frame to be encoded. The temporal layer_id for the current frame
+ // also needs to be set.
+ // TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS"
+ // mode to "VP9E_LAYERING_MODE_BYPASS".
+ if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+ // Example for 2 temporal layers.
+ if (frame_cnt % 2 == 0)
+ layer_id.temporal_layer_id = 0;
+ else
+ layer_id.temporal_layer_id = 1;
+ // Note that we only set the temporal layer_id, since we are calling
+ // the encode for the whole superframe. The encoder will internally loop
+ // over all the spatial layers for the current superframe.
+ vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
+ set_frame_flags_bypass_mode(sl, layer_id.temporal_layer_id,
+ svc_ctx.spatial_layers,
+ frame_cnt == 0,
+ &ref_frame_config);
+ vpx_codec_control(&codec, VP9E_SET_SVC_REF_FRAME_CONFIG,
+ &ref_frame_config);
+ }
+
+ vpx_usec_timer_start(&timer);
res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
pts, frame_duration, svc_ctx.speed >= 5 ?
VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY);
+ vpx_usec_timer_mark(&timer);
+ cx_time += vpx_usec_timer_elapsed(&timer);
printf("%s", vpx_svc_get_message(&svc_ctx));
if (res != VPX_CODEC_OK) {
@@ -784,6 +875,10 @@
}
}
#endif
+ printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
+ frame_cnt,
+ 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
+ 1000000 * (double)frame_cnt / (double)cx_time);
vpx_img_free(&raw);
// display average size, psnr
printf("%s", vpx_svc_dump_statistics(&svc_ctx));
diff --git a/libvpx/examples/vpx_temporal_svc_encoder.c b/libvpx/examples/vpx_temporal_svc_encoder.c
index ee7de6b..5adda9e 100644
--- a/libvpx/examples/vpx_temporal_svc_encoder.c
+++ b/libvpx/examples/vpx_temporal_svc_encoder.c
@@ -684,14 +684,14 @@
if (strncmp(encoder->name, "vp8", 3) == 0) {
vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed);
vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
- vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
+ vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
} else if (strncmp(encoder->name, "vp9", 3) == 0) {
vpx_svc_extra_cfg_t svc_params;
vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
- vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
+ vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0))
diff --git a/libvpx/libs.mk b/libvpx/libs.mk
index b9d4b28..f28d84a 100644
--- a/libvpx/libs.mk
+++ b/libvpx/libs.mk
@@ -53,7 +53,7 @@
include $(SRC_PATH_BARE)/vpx_util/vpx_util.mk
CODEC_SRCS-yes += $(addprefix vpx_util/,$(call enabled,UTIL_SRCS))
-ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
+ifeq ($(CONFIG_VP8),yes)
VP8_PREFIX=vp8/
include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
endif
@@ -76,7 +76,7 @@
CODEC_DOC_SECTIONS += vp8 vp8_decoder
endif
-ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),)
+ifeq ($(CONFIG_VP9),yes)
VP9_PREFIX=vp9/
include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9_common.mk
endif
@@ -110,7 +110,7 @@
$(BUILD_PFX)$(VP9_PREFIX)%.c.o: CFLAGS += -Wextra
# VP10 make file
-ifneq ($(CONFIG_VP10_ENCODER)$(CONFIG_VP10_DECODER),)
+ifeq ($(CONFIG_VP10),yes)
VP10_PREFIX=vp10/
include $(SRC_PATH_BARE)/$(VP10_PREFIX)vp10_common.mk
endif
@@ -260,7 +260,7 @@
LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
$(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
-SO_VERSION_MAJOR := 2
+SO_VERSION_MAJOR := 3
SO_VERSION_MINOR := 0
SO_VERSION_PATCH := 0
ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
diff --git a/libvpx/test/active_map_refresh_test.cc b/libvpx/test/active_map_refresh_test.cc
new file mode 100644
index 0000000..c945661
--- /dev/null
+++ b/libvpx/test/active_map_refresh_test.cc
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include <algorithm>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+
+namespace {
+
+// Check if any pixel in a 16x16 macroblock varies between frames.
+int CheckMb(const vpx_image_t ¤t, const vpx_image_t &previous,
+ int mb_r, int mb_c) {
+ for (int plane = 0; plane < 3; plane++) {
+ int r = 16 * mb_r;
+ int c0 = 16 * mb_c;
+ int r_top = std::min(r + 16, static_cast<int>(current.d_h));
+ int c_top = std::min(c0 + 16, static_cast<int>(current.d_w));
+ r = std::max(r, 0);
+ c0 = std::max(c0, 0);
+ if (plane > 0 && current.x_chroma_shift) {
+ c_top = (c_top + 1) >> 1;
+ c0 >>= 1;
+ }
+ if (plane > 0 && current.y_chroma_shift) {
+ r_top = (r_top + 1) >> 1;
+ r >>= 1;
+ }
+ for (; r < r_top; ++r) {
+ for (int c = c0; c < c_top; ++c) {
+ if (current.planes[plane][current.stride[plane] * r + c] !=
+ previous.planes[plane][previous.stride[plane] * r + c])
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+void GenerateMap(int mb_rows, int mb_cols, const vpx_image_t ¤t,
+ const vpx_image_t &previous, uint8_t *map) {
+ for (int mb_r = 0; mb_r < mb_rows; ++mb_r) {
+ for (int mb_c = 0; mb_c < mb_cols; ++mb_c) {
+ map[mb_r * mb_cols + mb_c] = CheckMb(current, previous, mb_r, mb_c);
+ }
+ }
+}
+
+const int kAqModeCyclicRefresh = 3;
+
+class ActiveMapRefreshTest
+ : public ::libvpx_test::EncoderTest,
+ public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+ protected:
+ ActiveMapRefreshTest() : EncoderTest(GET_PARAM(0)) {}
+ virtual ~ActiveMapRefreshTest() {}
+
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(GET_PARAM(1));
+ cpu_used_ = GET_PARAM(2);
+ }
+
+ virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+ ::libvpx_test::Encoder *encoder) {
+ ::libvpx_test::Y4mVideoSource *y4m_video =
+ static_cast<libvpx_test::Y4mVideoSource *>(video);
+ if (video->frame() == 1) {
+ encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
+ encoder->Control(VP9E_SET_AQ_MODE, kAqModeCyclicRefresh);
+ } else if (video->frame() >= 2 && video->img()) {
+ vpx_image_t *current = video->img();
+ vpx_image_t *previous = y4m_holder_->img();
+ ASSERT_TRUE(previous != NULL);
+ vpx_active_map_t map = vpx_active_map_t();
+ const int width = static_cast<int>(current->d_w);
+ const int height = static_cast<int>(current->d_h);
+ const int mb_width = (width + 15) / 16;
+ const int mb_height = (height + 15) / 16;
+ uint8_t *active_map = new uint8_t[mb_width * mb_height];
+ GenerateMap(mb_height, mb_width, *current, *previous, active_map);
+ map.cols = mb_width;
+ map.rows = mb_height;
+ map.active_map = active_map;
+ encoder->Control(VP8E_SET_ACTIVEMAP, &map);
+ delete[] active_map;
+ }
+ if (video->img()) {
+ y4m_video->SwapBuffers(y4m_holder_);
+ }
+ }
+
+ int cpu_used_;
+ ::libvpx_test::Y4mVideoSource *y4m_holder_;
+};
+
+TEST_P(ActiveMapRefreshTest, Test) {
+ cfg_.g_lag_in_frames = 0;
+ cfg_.g_profile = 1;
+ cfg_.rc_target_bitrate = 600;
+ cfg_.rc_resize_allowed = 0;
+ cfg_.rc_min_quantizer = 8;
+ cfg_.rc_max_quantizer = 30;
+ cfg_.g_pass = VPX_RC_ONE_PASS;
+ cfg_.rc_end_usage = VPX_CBR;
+ cfg_.kf_max_dist = 90000;
+
+ ::libvpx_test::Y4mVideoSource video("desktop_credits.y4m", 0, 30);
+ ::libvpx_test::Y4mVideoSource video_holder("desktop_credits.y4m", 0, 30);
+ video_holder.Begin();
+ y4m_holder_ = &video_holder;
+
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+VP9_INSTANTIATE_TEST_CASE(ActiveMapRefreshTest,
+ ::testing::Values(::libvpx_test::kRealTime),
+ ::testing::Range(5, 6));
+} // namespace
diff --git a/libvpx/test/convolve_test.cc b/libvpx/test/convolve_test.cc
index e0e929e..0826788 100644
--- a/libvpx/test/convolve_test.cc
+++ b/libvpx/test/convolve_test.cc
@@ -960,511 +960,72 @@
using std::tr1::make_tuple;
#if CONFIG_VP9_HIGHBITDEPTH
+#define WRAP(func, bd) \
+void wrap_ ## func ## _ ## bd(const uint8_t *src, ptrdiff_t src_stride, \
+ uint8_t *dst, ptrdiff_t dst_stride, \
+ const int16_t *filter_x, \
+ int filter_x_stride, \
+ const int16_t *filter_y, \
+ int filter_y_stride, \
+ int w, int h) { \
+ vpx_highbd_ ## func(src, src_stride, dst, dst_stride, filter_x, \
+ filter_x_stride, filter_y, filter_y_stride, \
+ w, h, bd); \
+}
#if HAVE_SSE2 && ARCH_X86_64
-void wrap_convolve8_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x,
- filter_x_stride, filter_y, filter_y_stride,
- w, h, 8);
-}
-
-void wrap_convolve8_avg_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 8);
-}
-
-void wrap_convolve8_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 8);
-}
-
-void wrap_convolve8_avg_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 8);
-}
-
-void wrap_convolve8_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 8);
-}
-
-void wrap_convolve8_avg_sse2_8(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 8);
-}
-
-void wrap_convolve8_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 10);
-}
-
-void wrap_convolve8_avg_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 10);
-}
-
-void wrap_convolve8_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 10);
-}
-
-void wrap_convolve8_avg_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 10);
-}
-
-void wrap_convolve8_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 10);
-}
-
-void wrap_convolve8_avg_sse2_10(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 10);
-}
-
-void wrap_convolve8_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_horiz_sse2(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 12);
-}
-
-void wrap_convolve8_avg_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 12);
-}
-
-void wrap_convolve8_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_vert_sse2(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 12);
-}
-
-void wrap_convolve8_avg_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 12);
-}
-
-void wrap_convolve8_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_sse2(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 12);
-}
-
-void wrap_convolve8_avg_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_avg_sse2(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 12);
-}
+#if CONFIG_USE_X86INC
+WRAP(convolve_copy_sse2, 8)
+WRAP(convolve_avg_sse2, 8)
+WRAP(convolve_copy_sse2, 10)
+WRAP(convolve_avg_sse2, 10)
+WRAP(convolve_copy_sse2, 12)
+WRAP(convolve_avg_sse2, 12)
+#endif // CONFIG_USE_X86INC
+WRAP(convolve8_horiz_sse2, 8)
+WRAP(convolve8_avg_horiz_sse2, 8)
+WRAP(convolve8_vert_sse2, 8)
+WRAP(convolve8_avg_vert_sse2, 8)
+WRAP(convolve8_sse2, 8)
+WRAP(convolve8_avg_sse2, 8)
+WRAP(convolve8_horiz_sse2, 10)
+WRAP(convolve8_avg_horiz_sse2, 10)
+WRAP(convolve8_vert_sse2, 10)
+WRAP(convolve8_avg_vert_sse2, 10)
+WRAP(convolve8_sse2, 10)
+WRAP(convolve8_avg_sse2, 10)
+WRAP(convolve8_horiz_sse2, 12)
+WRAP(convolve8_avg_horiz_sse2, 12)
+WRAP(convolve8_vert_sse2, 12)
+WRAP(convolve8_avg_vert_sse2, 12)
+WRAP(convolve8_sse2, 12)
+WRAP(convolve8_avg_sse2, 12)
#endif // HAVE_SSE2 && ARCH_X86_64
-void wrap_convolve_copy_c_8(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 8);
-}
-
-void wrap_convolve_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 8);
-}
-
-void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 8);
-}
-
-void wrap_convolve8_avg_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 8);
-}
-
-void wrap_convolve8_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 8);
-}
-
-void wrap_convolve8_avg_vert_c_8(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 8);
-}
-
-void wrap_convolve8_c_8(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 8);
-}
-
-void wrap_convolve8_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 8);
-}
-
-void wrap_convolve_copy_c_10(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 10);
-}
-
-void wrap_convolve_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 10);
-}
-
-void wrap_convolve8_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 10);
-}
-
-void wrap_convolve8_avg_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 10);
-}
-
-void wrap_convolve8_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 10);
-}
-
-void wrap_convolve8_avg_vert_c_10(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 10);
-}
-
-void wrap_convolve8_c_10(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 10);
-}
-
-void wrap_convolve8_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 10);
-}
-
-void wrap_convolve_copy_c_12(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 12);
-}
-
-void wrap_convolve_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 12);
-}
-
-void wrap_convolve8_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 12);
-}
-
-void wrap_convolve8_avg_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 12);
-}
-
-void wrap_convolve8_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 12);
-}
-
-void wrap_convolve8_avg_vert_c_12(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 12);
-}
-
-void wrap_convolve8_c_12(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 12);
-}
-
-void wrap_convolve8_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
- uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x,
- int filter_x_stride,
- const int16_t *filter_y,
- int filter_y_stride,
- int w, int h) {
- vpx_highbd_convolve8_avg_c(src, src_stride, dst, dst_stride,
- filter_x, filter_x_stride,
- filter_y, filter_y_stride, w, h, 12);
-}
+WRAP(convolve_copy_c, 8)
+WRAP(convolve_avg_c, 8)
+WRAP(convolve8_horiz_c, 8)
+WRAP(convolve8_avg_horiz_c, 8)
+WRAP(convolve8_vert_c, 8)
+WRAP(convolve8_avg_vert_c, 8)
+WRAP(convolve8_c, 8)
+WRAP(convolve8_avg_c, 8)
+WRAP(convolve_copy_c, 10)
+WRAP(convolve_avg_c, 10)
+WRAP(convolve8_horiz_c, 10)
+WRAP(convolve8_avg_horiz_c, 10)
+WRAP(convolve8_vert_c, 10)
+WRAP(convolve8_avg_vert_c, 10)
+WRAP(convolve8_c, 10)
+WRAP(convolve8_avg_c, 10)
+WRAP(convolve_copy_c, 12)
+WRAP(convolve_avg_c, 12)
+WRAP(convolve8_horiz_c, 12)
+WRAP(convolve8_avg_horiz_c, 12)
+WRAP(convolve8_vert_c, 12)
+WRAP(convolve8_avg_vert_c, 12)
+WRAP(convolve8_c, 12)
+WRAP(convolve8_avg_c, 12)
+#undef WRAP
const ConvolveFunctions convolve8_c(
wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
@@ -1563,7 +1124,11 @@
#if HAVE_SSE2 && ARCH_X86_64
#if CONFIG_VP9_HIGHBITDEPTH
const ConvolveFunctions convolve8_sse2(
+#if CONFIG_USE_X86INC
+ wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8,
+#else
wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
+#endif // CONFIG_USE_X86INC
wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8,
@@ -1571,7 +1136,11 @@
wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
const ConvolveFunctions convolve10_sse2(
+#if CONFIG_USE_X86INC
+ wrap_convolve_copy_sse2_10, wrap_convolve_avg_sse2_10,
+#else
wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
+#endif // CONFIG_USE_X86INC
wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10,
@@ -1579,7 +1148,11 @@
wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
const ConvolveFunctions convolve12_sse2(
+#if CONFIG_USE_X86INC
+ wrap_convolve_copy_sse2_12, wrap_convolve_avg_sse2_12,
+#else
wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
+#endif // CONFIG_USE_X86INC
wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12,
diff --git a/libvpx/test/dct16x16_test.cc b/libvpx/test/dct16x16_test.cc
index e9de76a..332210d 100644
--- a/libvpx/test/dct16x16_test.cc
+++ b/libvpx/test/dct16x16_test.cc
@@ -40,30 +40,6 @@
#endif
const int kNumCoeffs = 256;
-const double PI = 3.1415926535898;
-void reference2_16x16_idct_2d(double *input, double *output) {
- double x;
- for (int l = 0; l < 16; ++l) {
- for (int k = 0; k < 16; ++k) {
- double s = 0;
- for (int i = 0; i < 16; ++i) {
- for (int j = 0; j < 16; ++j) {
- x = cos(PI * j * (l + 0.5) / 16.0) *
- cos(PI * i * (k + 0.5) / 16.0) *
- input[i * 16 + j] / 256;
- if (i != 0)
- x *= sqrt(2.0);
- if (j != 0)
- x *= sqrt(2.0);
- s += x;
- }
- }
- output[k*16+l] = s;
- }
- }
-}
-
-
const double C1 = 0.995184726672197;
const double C2 = 0.98078528040323;
const double C3 = 0.956940335732209;
diff --git a/libvpx/test/encode_test_driver.cc b/libvpx/test/encode_test_driver.cc
index be4ef9a..128436e 100644
--- a/libvpx/test/encode_test_driver.cc
+++ b/libvpx/test/encode_test_driver.cc
@@ -195,6 +195,7 @@
video->Begin();
encoder->InitEncoder(video);
+ ASSERT_FALSE(::testing::Test::HasFatalFailure());
unsigned long dec_init_flags = 0; // NOLINT
// Use fragment decoder if encoder outputs partitions.
diff --git a/libvpx/test/encode_test_driver.h b/libvpx/test/encode_test_driver.h
index 9ecc498..6d0a72f 100644
--- a/libvpx/test/encode_test_driver.h
+++ b/libvpx/test/encode_test_driver.h
@@ -124,6 +124,11 @@
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
}
+ void Control(int ctrl_id, int *arg) {
+ const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
+ ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+ }
+
void Control(int ctrl_id, struct vpx_scaling_mode *arg) {
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
diff --git a/libvpx/test/error_resilience_test.cc b/libvpx/test/error_resilience_test.cc
index 9e512ad..9a2ad2f 100644
--- a/libvpx/test/error_resilience_test.cc
+++ b/libvpx/test/error_resilience_test.cc
@@ -20,10 +20,11 @@
const int kMaxDroppableFrames = 12;
class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
- public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+ public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, bool> {
protected:
ErrorResilienceTestLarge()
: EncoderTest(GET_PARAM(0)),
+ svc_support_(GET_PARAM(2)),
psnr_(0.0),
nframes_(0),
mismatch_psnr_(0.0),
@@ -193,6 +194,8 @@
pattern_switch_ = frame_switch;
}
+ bool svc_support_;
+
private:
double psnr_;
unsigned int nframes_;
@@ -302,6 +305,10 @@
// two layer temporal pattern. The base layer does not predict from the top
// layer, so successful decoding is expected.
TEST_P(ErrorResilienceTestLarge, 2LayersDropEnhancement) {
+ // This test doesn't run if SVC is not supported.
+ if (!svc_support_)
+ return;
+
const vpx_rational timebase = { 33333333, 1000000000 };
cfg_.g_timebase = timebase;
cfg_.rc_target_bitrate = 500;
@@ -347,6 +354,10 @@
// for a two layer temporal pattern, where at some point in the
// sequence, the LAST ref is not used anymore.
TEST_P(ErrorResilienceTestLarge, 2LayersNoRefLast) {
+ // This test doesn't run if SVC is not supported.
+ if (!svc_support_)
+ return;
+
const vpx_rational timebase = { 33333333, 1000000000 };
cfg_.g_timebase = timebase;
cfg_.rc_target_bitrate = 500;
@@ -579,9 +590,13 @@
}
}
-VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
+VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES,
+ ::testing::Values(true));
VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLargeCodecControls,
ONE_PASS_TEST_MODES);
-VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
-VP10_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
+VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES,
+ ::testing::Values(true));
+// SVC-related tests don't run for VP10 since SVC is not supported.
+VP10_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES,
+ ::testing::Values(false));
} // namespace
diff --git a/libvpx/test/frame_size_tests.cc b/libvpx/test/frame_size_tests.cc
index 95cc66a..d39c8f6 100644
--- a/libvpx/test/frame_size_tests.cc
+++ b/libvpx/test/frame_size_tests.cc
@@ -74,7 +74,7 @@
// size or almost 1 gig of memory.
// In total the allocations will exceed 2GiB which may cause a failure with
// mingw + wine, use a smaller size in that case.
-#if defined(_WIN32) && !defined(_WIN64)
+#if defined(_WIN32) && !defined(_WIN64) || defined(__OS2__)
video.SetSize(4096, 3072);
#else
video.SetSize(4096, 4096);
diff --git a/libvpx/test/idct8x8_test.cc b/libvpx/test/idct8x8_test.cc
index 987ba75..7f9d751 100644
--- a/libvpx/test/idct8x8_test.cc
+++ b/libvpx/test/idct8x8_test.cc
@@ -67,43 +67,6 @@
output[i] *= 2;
}
-void reference_idct_1d(double input[8], double output[8]) {
- const double kPi = 3.141592653589793238462643383279502884;
- const double kSqrt2 = 1.414213562373095048801688724209698;
- for (int k = 0; k < 8; k++) {
- output[k] = 0.0;
- for (int n = 0; n < 8; n++) {
- output[k] += input[n]*cos(kPi*(2*k+1)*n/16.0);
- if (n == 0)
- output[k] = output[k]/kSqrt2;
- }
- }
-}
-
-void reference_idct_2d(double input[64], int16_t output[64]) {
- double out[64], out2[64];
- // First transform rows
- for (int i = 0; i < 8; ++i) {
- double temp_in[8], temp_out[8];
- for (int j = 0; j < 8; ++j)
- temp_in[j] = input[j + i*8];
- reference_idct_1d(temp_in, temp_out);
- for (int j = 0; j < 8; ++j)
- out[j + i*8] = temp_out[j];
- }
- // Then transform columns
- for (int i = 0; i < 8; ++i) {
- double temp_in[8], temp_out[8];
- for (int j = 0; j < 8; ++j)
- temp_in[j] = out[j*8 + i];
- reference_idct_1d(temp_in, temp_out);
- for (int j = 0; j < 8; ++j)
- out2[j*8 + i] = temp_out[j];
- }
- for (int i = 0; i < 64; ++i)
- output[i] = round(out2[i]/32);
-}
-
TEST(VP9Idct8x8Test, AccuracyCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 10000;
diff --git a/libvpx/test/intrapred_test.cc b/libvpx/test/intrapred_test.cc
deleted file mode 100644
index 65a0697..0000000
--- a/libvpx/test/intrapred_test.cc
+++ /dev/null
@@ -1,406 +0,0 @@
-/*
- * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <string.h>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vpx_config.h"
-#include "./vp8_rtcd.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "vp8/common/blockd.h"
-#include "vpx_mem/vpx_mem.h"
-
-namespace {
-
-using libvpx_test::ACMRandom;
-
-class IntraPredBase {
- public:
- virtual ~IntraPredBase() { libvpx_test::ClearSystemState(); }
-
- protected:
- void SetupMacroblock(MACROBLOCKD *mbptr,
- MODE_INFO *miptr,
- uint8_t *data,
- int block_size,
- int stride,
- int num_planes) {
- mbptr_ = mbptr;
- miptr_ = miptr;
- mbptr_->up_available = 1;
- mbptr_->left_available = 1;
- mbptr_->mode_info_context = miptr_;
- stride_ = stride;
- block_size_ = block_size;
- num_planes_ = num_planes;
- for (int p = 0; p < num_planes; p++)
- data_ptr_[p] = data + stride * (block_size + 1) * p +
- stride + block_size;
- }
-
- void FillRandom() {
- // Fill edges with random data
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- for (int p = 0; p < num_planes_; p++) {
- for (int x = -1 ; x <= block_size_; x++)
- data_ptr_[p][x - stride_] = rnd.Rand8();
- for (int y = 0; y < block_size_; y++)
- data_ptr_[p][y * stride_ - 1] = rnd.Rand8();
- }
- }
-
- virtual void Predict(MB_PREDICTION_MODE mode) = 0;
-
- void SetLeftUnavailable() {
- mbptr_->left_available = 0;
- for (int p = 0; p < num_planes_; p++)
- for (int i = -1; i < block_size_; ++i)
- data_ptr_[p][stride_ * i - 1] = 129;
- }
-
- void SetTopUnavailable() {
- mbptr_->up_available = 0;
- for (int p = 0; p < num_planes_; p++)
- memset(&data_ptr_[p][-1 - stride_], 127, block_size_ + 2);
- }
-
- void SetTopLeftUnavailable() {
- SetLeftUnavailable();
- SetTopUnavailable();
- }
-
- int BlockSizeLog2Min1() const {
- switch (block_size_) {
- case 16:
- return 3;
- case 8:
- return 2;
- default:
- return 0;
- }
- }
-
- // check DC prediction output against a reference
- void CheckDCPrediction() const {
- for (int p = 0; p < num_planes_; p++) {
- // calculate expected DC
- int expected;
- if (mbptr_->up_available || mbptr_->left_available) {
- int sum = 0, shift = BlockSizeLog2Min1() + mbptr_->up_available +
- mbptr_->left_available;
- if (mbptr_->up_available)
- for (int x = 0; x < block_size_; x++)
- sum += data_ptr_[p][x - stride_];
- if (mbptr_->left_available)
- for (int y = 0; y < block_size_; y++)
- sum += data_ptr_[p][y * stride_ - 1];
- expected = (sum + (1 << (shift - 1))) >> shift;
- } else {
- expected = 0x80;
- }
- // check that all subsequent lines are equal to the first
- for (int y = 1; y < block_size_; ++y)
- ASSERT_EQ(0, memcmp(data_ptr_[p], &data_ptr_[p][y * stride_],
- block_size_));
- // within the first line, ensure that each pixel has the same value
- for (int x = 1; x < block_size_; ++x)
- ASSERT_EQ(data_ptr_[p][0], data_ptr_[p][x]);
- // now ensure that that pixel has the expected (DC) value
- ASSERT_EQ(expected, data_ptr_[p][0]);
- }
- }
-
- // check V prediction output against a reference
- void CheckVPrediction() const {
- // check that all lines equal the top border
- for (int p = 0; p < num_planes_; p++)
- for (int y = 0; y < block_size_; y++)
- ASSERT_EQ(0, memcmp(&data_ptr_[p][-stride_],
- &data_ptr_[p][y * stride_], block_size_));
- }
-
- // check H prediction output against a reference
- void CheckHPrediction() const {
- // for each line, ensure that each pixel is equal to the left border
- for (int p = 0; p < num_planes_; p++)
- for (int y = 0; y < block_size_; y++)
- for (int x = 0; x < block_size_; x++)
- ASSERT_EQ(data_ptr_[p][-1 + y * stride_],
- data_ptr_[p][x + y * stride_]);
- }
-
- static int ClipByte(int value) {
- if (value > 255)
- return 255;
- else if (value < 0)
- return 0;
- return value;
- }
-
- // check TM prediction output against a reference
- void CheckTMPrediction() const {
- for (int p = 0; p < num_planes_; p++)
- for (int y = 0; y < block_size_; y++)
- for (int x = 0; x < block_size_; x++) {
- const int expected = ClipByte(data_ptr_[p][x - stride_]
- + data_ptr_[p][stride_ * y - 1]
- - data_ptr_[p][-1 - stride_]);
- ASSERT_EQ(expected, data_ptr_[p][y * stride_ + x]);
- }
- }
-
- // Actual test
- void RunTest() {
- {
- SCOPED_TRACE("DC_PRED");
- FillRandom();
- Predict(DC_PRED);
- CheckDCPrediction();
- }
- {
- SCOPED_TRACE("DC_PRED LEFT");
- FillRandom();
- SetLeftUnavailable();
- Predict(DC_PRED);
- CheckDCPrediction();
- }
- {
- SCOPED_TRACE("DC_PRED TOP");
- FillRandom();
- SetTopUnavailable();
- Predict(DC_PRED);
- CheckDCPrediction();
- }
- {
- SCOPED_TRACE("DC_PRED TOP_LEFT");
- FillRandom();
- SetTopLeftUnavailable();
- Predict(DC_PRED);
- CheckDCPrediction();
- }
- {
- SCOPED_TRACE("H_PRED");
- FillRandom();
- Predict(H_PRED);
- CheckHPrediction();
- }
- {
- SCOPED_TRACE("V_PRED");
- FillRandom();
- Predict(V_PRED);
- CheckVPrediction();
- }
- {
- SCOPED_TRACE("TM_PRED");
- FillRandom();
- Predict(TM_PRED);
- CheckTMPrediction();
- }
- }
-
- MACROBLOCKD *mbptr_;
- MODE_INFO *miptr_;
- uint8_t *data_ptr_[2]; // in the case of Y, only [0] is used
- int stride_;
- int block_size_;
- int num_planes_;
-};
-
-typedef void (*IntraPredYFunc)(MACROBLOCKD *x,
- uint8_t *yabove_row,
- uint8_t *yleft,
- int left_stride,
- uint8_t *ypred_ptr,
- int y_stride);
-
-class IntraPredYTest
- : public IntraPredBase,
- public ::testing::TestWithParam<IntraPredYFunc> {
- public:
- static void SetUpTestCase() {
- mb_ = reinterpret_cast<MACROBLOCKD*>(
- vpx_memalign(32, sizeof(MACROBLOCKD)));
- mi_ = reinterpret_cast<MODE_INFO*>(
- vpx_memalign(32, sizeof(MODE_INFO)));
- data_array_ = reinterpret_cast<uint8_t*>(
- vpx_memalign(kDataAlignment, kDataBufferSize));
- }
-
- static void TearDownTestCase() {
- vpx_free(data_array_);
- vpx_free(mi_);
- vpx_free(mb_);
- data_array_ = NULL;
- }
-
- protected:
- static const int kBlockSize = 16;
- static const int kDataAlignment = 16;
- static const int kStride = kBlockSize * 3;
- // We use 48 so that the data pointer of the first pixel in each row of
- // each macroblock is 16-byte aligned, and this gives us access to the
- // top-left and top-right corner pixels belonging to the top-left/right
- // macroblocks.
- // We use 17 lines so we have one line above us for top-prediction.
- static const int kDataBufferSize = kStride * (kBlockSize + 1);
-
- virtual void SetUp() {
- pred_fn_ = GetParam();
- SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 1);
- }
-
- virtual void Predict(MB_PREDICTION_MODE mode) {
- mbptr_->mode_info_context->mbmi.mode = mode;
- ASM_REGISTER_STATE_CHECK(pred_fn_(mbptr_,
- data_ptr_[0] - kStride,
- data_ptr_[0] - 1, kStride,
- data_ptr_[0], kStride));
- }
-
- IntraPredYFunc pred_fn_;
- static uint8_t* data_array_;
- static MACROBLOCKD * mb_;
- static MODE_INFO *mi_;
-};
-
-MACROBLOCKD* IntraPredYTest::mb_ = NULL;
-MODE_INFO* IntraPredYTest::mi_ = NULL;
-uint8_t* IntraPredYTest::data_array_ = NULL;
-
-TEST_P(IntraPredYTest, IntraPredTests) {
- RunTest();
-}
-
-INSTANTIATE_TEST_CASE_P(C, IntraPredYTest,
- ::testing::Values(
- vp8_build_intra_predictors_mby_s_c));
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, IntraPredYTest,
- ::testing::Values(
- vp8_build_intra_predictors_mby_s_sse2));
-#endif
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredYTest,
- ::testing::Values(
- vp8_build_intra_predictors_mby_s_ssse3));
-#endif
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, IntraPredYTest,
- ::testing::Values(
- vp8_build_intra_predictors_mby_s_neon));
-#endif
-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(MSA, IntraPredYTest,
- ::testing::Values(
- vp8_build_intra_predictors_mby_s_msa));
-#endif
-
-typedef void (*IntraPredUvFunc)(MACROBLOCKD *x,
- uint8_t *uabove_row,
- uint8_t *vabove_row,
- uint8_t *uleft,
- uint8_t *vleft,
- int left_stride,
- uint8_t *upred_ptr,
- uint8_t *vpred_ptr,
- int pred_stride);
-
-class IntraPredUVTest
- : public IntraPredBase,
- public ::testing::TestWithParam<IntraPredUvFunc> {
- public:
- static void SetUpTestCase() {
- mb_ = reinterpret_cast<MACROBLOCKD*>(
- vpx_memalign(32, sizeof(MACROBLOCKD)));
- mi_ = reinterpret_cast<MODE_INFO*>(
- vpx_memalign(32, sizeof(MODE_INFO)));
- data_array_ = reinterpret_cast<uint8_t*>(
- vpx_memalign(kDataAlignment, kDataBufferSize));
- }
-
- static void TearDownTestCase() {
- vpx_free(data_array_);
- vpx_free(mi_);
- vpx_free(mb_);
- data_array_ = NULL;
- }
-
- protected:
- static const int kBlockSize = 8;
- static const int kDataAlignment = 8;
- static const int kStride = kBlockSize * 3;
- // We use 24 so that the data pointer of the first pixel in each row of
- // each macroblock is 8-byte aligned, and this gives us access to the
- // top-left and top-right corner pixels belonging to the top-left/right
- // macroblocks.
- // We use 9 lines so we have one line above us for top-prediction.
- // [0] = U, [1] = V
- static const int kDataBufferSize = 2 * kStride * (kBlockSize + 1);
-
- virtual void SetUp() {
- pred_fn_ = GetParam();
- SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 2);
- }
-
- virtual void Predict(MB_PREDICTION_MODE mode) {
- mbptr_->mode_info_context->mbmi.uv_mode = mode;
- pred_fn_(mbptr_, data_ptr_[0] - kStride, data_ptr_[1] - kStride,
- data_ptr_[0] - 1, data_ptr_[1] - 1, kStride,
- data_ptr_[0], data_ptr_[1], kStride);
- }
-
- IntraPredUvFunc pred_fn_;
- // We use 24 so that the data pointer of the first pixel in each row of
- // each macroblock is 8-byte aligned, and this gives us access to the
- // top-left and top-right corner pixels belonging to the top-left/right
- // macroblocks.
- // We use 9 lines so we have one line above us for top-prediction.
- // [0] = U, [1] = V
- static uint8_t* data_array_;
- static MACROBLOCKD* mb_;
- static MODE_INFO* mi_;
-};
-
-MACROBLOCKD* IntraPredUVTest::mb_ = NULL;
-MODE_INFO* IntraPredUVTest::mi_ = NULL;
-uint8_t* IntraPredUVTest::data_array_ = NULL;
-
-TEST_P(IntraPredUVTest, IntraPredTests) {
- RunTest();
-}
-
-INSTANTIATE_TEST_CASE_P(C, IntraPredUVTest,
- ::testing::Values(
- vp8_build_intra_predictors_mbuv_s_c));
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, IntraPredUVTest,
- ::testing::Values(
- vp8_build_intra_predictors_mbuv_s_sse2));
-#endif
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredUVTest,
- ::testing::Values(
- vp8_build_intra_predictors_mbuv_s_ssse3));
-#endif
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, IntraPredUVTest,
- ::testing::Values(
- vp8_build_intra_predictors_mbuv_s_neon));
-#endif
-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(MSA, IntraPredUVTest,
- ::testing::Values(
- vp8_build_intra_predictors_mbuv_s_msa));
-#endif
-
-} // namespace
diff --git a/libvpx/test/invalid_file_test.cc b/libvpx/test/invalid_file_test.cc
index 1b5ef5c..f4241eb 100644
--- a/libvpx/test/invalid_file_test.cc
+++ b/libvpx/test/invalid_file_test.cc
@@ -63,9 +63,22 @@
EXPECT_NE(res, EOF) << "Read result data failed";
// Check results match.
- EXPECT_EQ(expected_res_dec, res_dec)
- << "Results don't match: frame number = " << video.frame_number()
- << ". (" << decoder->DecodeError() << ")";
+ const DecodeParam input = GET_PARAM(1);
+ if (input.threads > 1) {
+ // The serial decode check is too strict for tile-threaded decoding as
+ // there is no guarantee on the decode order nor which specific error
+ // will take precedence. Currently a tile-level error is not forwarded so
+ // the frame will simply be marked corrupt.
+ EXPECT_TRUE(res_dec == expected_res_dec ||
+ res_dec == VPX_CODEC_CORRUPT_FRAME)
+ << "Results don't match: frame number = " << video.frame_number()
+ << ". (" << decoder->DecodeError() << "). Expected: "
+ << expected_res_dec << " or " << VPX_CODEC_CORRUPT_FRAME;
+ } else {
+ EXPECT_EQ(expected_res_dec, res_dec)
+ << "Results don't match: frame number = " << video.frame_number()
+ << ". (" << decoder->DecodeError() << ")";
+ }
return !HasFailure();
}
@@ -145,7 +158,7 @@
}
const DecodeParam kVP9InvalidFileInvalidPeekTests[] = {
- {1, "invalid-vp90-01-v2.webm"},
+ {1, "invalid-vp90-01-v3.webm"},
};
VP9_INSTANTIATE_TEST_CASE(InvalidFileInvalidPeekTest,
diff --git a/libvpx/test/lpf_8_test.cc b/libvpx/test/lpf_8_test.cc
index 966e109..0bf6b0c 100644
--- a/libvpx/test/lpf_8_test.cc
+++ b/libvpx/test/lpf_8_test.cc
@@ -590,7 +590,9 @@
make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1),
make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2),
make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8, 1),
- make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1)));
+ make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1),
+ make_tuple(&wrapper_vertical_16_dual_sse2,
+ &wrapper_vertical_16_dual_c, 8, 1)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif
diff --git a/libvpx/test/register_state_check.h b/libvpx/test/register_state_check.h
index 8e72f91..489c419 100644
--- a/libvpx/test/register_state_check.h
+++ b/libvpx/test/register_state_check.h
@@ -30,7 +30,9 @@
#if defined(_WIN64)
-#define _WIN32_LEAN_AND_MEAN
+#undef NOMINMAX
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <winnt.h>
diff --git a/libvpx/test/resize_test.cc b/libvpx/test/resize_test.cc
index f1134aa..98b6f87 100644
--- a/libvpx/test/resize_test.cc
+++ b/libvpx/test/resize_test.cc
@@ -81,6 +81,15 @@
const unsigned int kInitialWidth = 320;
const unsigned int kInitialHeight = 240;
+struct FrameInfo {
+ FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h)
+ : pts(_pts), w(_w), h(_h) {}
+
+ vpx_codec_pts_t pts;
+ unsigned int w;
+ unsigned int h;
+};
+
unsigned int ScaleForFrameNumber(unsigned int frame, unsigned int val) {
if (frame < 10)
return val;
@@ -120,15 +129,6 @@
virtual ~ResizeTest() {}
- struct FrameInfo {
- FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h)
- : pts(_pts), w(_w), h(_h) {}
-
- vpx_codec_pts_t pts;
- unsigned int w;
- unsigned int h;
- };
-
virtual void SetUp() {
InitializeConfig();
SetMode(GET_PARAM(1));
@@ -196,13 +196,27 @@
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
libvpx_test::Encoder *encoder) {
- if (video->frame() == kStepDownFrame) {
- struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE};
- encoder->Control(VP8E_SET_SCALEMODE, &mode);
- }
- if (video->frame() == kStepUpFrame) {
- struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL};
- encoder->Control(VP8E_SET_SCALEMODE, &mode);
+ if (change_config_) {
+ int new_q = 60;
+ if (video->frame() == 0) {
+ struct vpx_scaling_mode mode = {VP8E_ONETWO, VP8E_ONETWO};
+ encoder->Control(VP8E_SET_SCALEMODE, &mode);
+ }
+ if (video->frame() == 1) {
+ struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL};
+ encoder->Control(VP8E_SET_SCALEMODE, &mode);
+ cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = new_q;
+ encoder->Config(&cfg_);
+ }
+ } else {
+ if (video->frame() == kStepDownFrame) {
+ struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE};
+ encoder->Control(VP8E_SET_SCALEMODE, &mode);
+ }
+ if (video->frame() == kStepUpFrame) {
+ struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL};
+ encoder->Control(VP8E_SET_SCALEMODE, &mode);
+ }
}
}
@@ -227,6 +241,7 @@
#endif
double frame0_psnr_;
+ bool change_config_;
#if WRITE_COMPRESSED_STREAM
FILE *outfile_;
unsigned int out_frames_;
@@ -237,6 +252,7 @@
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 10);
init_flags_ = VPX_CODEC_USE_PSNR;
+ change_config_ = false;
// q picked such that initial keyframe on this clip is ~30dB PSNR
cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
@@ -261,6 +277,143 @@
}
}
+TEST_P(ResizeInternalTest, TestInternalResizeChangeConfig) {
+ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ 30, 1, 0, 10);
+ cfg_.g_w = 352;
+ cfg_.g_h = 288;
+ change_config_ = true;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+class ResizeInternalRealtimeTest : public ::libvpx_test::EncoderTest,
+ public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+ protected:
+ ResizeInternalRealtimeTest() : EncoderTest(GET_PARAM(0)) {}
+ virtual ~ResizeInternalRealtimeTest() {}
+
+ virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
+ libvpx_test::Encoder *encoder) {
+ if (video->frame() == 0) {
+ encoder->Control(VP9E_SET_AQ_MODE, 3);
+ encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
+ }
+
+ if (change_bitrate_ && video->frame() == 120) {
+ change_bitrate_ = false;
+ cfg_.rc_target_bitrate = 500;
+ encoder->Config(&cfg_);
+ }
+ }
+
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(GET_PARAM(1));
+ set_cpu_used_ = GET_PARAM(2);
+ }
+
+ virtual void DecompressedFrameHook(const vpx_image_t &img,
+ vpx_codec_pts_t pts) {
+ frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
+ }
+
+ void DefaultConfig() {
+ cfg_.g_w = 352;
+ cfg_.g_h = 288;
+ cfg_.rc_buf_initial_sz = 500;
+ cfg_.rc_buf_optimal_sz = 600;
+ cfg_.rc_buf_sz = 1000;
+ cfg_.rc_min_quantizer = 2;
+ cfg_.rc_max_quantizer = 56;
+ cfg_.rc_undershoot_pct = 50;
+ cfg_.rc_overshoot_pct = 50;
+ cfg_.rc_end_usage = VPX_CBR;
+ cfg_.kf_mode = VPX_KF_AUTO;
+ cfg_.g_lag_in_frames = 0;
+ cfg_.kf_min_dist = cfg_.kf_max_dist = 3000;
+ // Enable dropped frames.
+ cfg_.rc_dropframe_thresh = 1;
+ // Enable error_resilience mode.
+ cfg_.g_error_resilient = 1;
+ // Enable dynamic resizing.
+ cfg_.rc_resize_allowed = 1;
+ // Run at low bitrate.
+ cfg_.rc_target_bitrate = 200;
+ }
+
+ std::vector< FrameInfo > frame_info_list_;
+ int set_cpu_used_;
+ bool change_bitrate_;
+};
+
+// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
+// Run at low bitrate, with resize_allowed = 1, and verify that we get
+// one resize down event.
+TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDown) {
+ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ 30, 1, 0, 299);
+ DefaultConfig();
+ change_bitrate_ = false;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+ unsigned int last_w = cfg_.g_w;
+ unsigned int last_h = cfg_.g_h;
+ int resize_count = 0;
+ for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+ info != frame_info_list_.end(); ++info) {
+ if (info->w != last_w || info->h != last_h) {
+ // Verify that resize down occurs.
+ ASSERT_LT(info->w, last_w);
+ ASSERT_LT(info->h, last_h);
+ last_w = info->w;
+ last_h = info->h;
+ resize_count++;
+ }
+ }
+
+ // Verify that we get 1 resize down event in this test.
+ ASSERT_EQ(1, resize_count) << "Resizing should occur.";
+}
+
+// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
+// Start at low target bitrate, raise the bitrate in the middle of the clip,
+// scaling-up should occur after bitrate changed.
+TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
+ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ 30, 1, 0, 299);
+ DefaultConfig();
+ change_bitrate_ = true;
+ // Disable dropped frames.
+ cfg_.rc_dropframe_thresh = 0;
+ // Starting bitrate low.
+ cfg_.rc_target_bitrate = 100;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+ unsigned int last_w = cfg_.g_w;
+ unsigned int last_h = cfg_.g_h;
+ int resize_count = 0;
+ for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+ info != frame_info_list_.end(); ++info) {
+ if (info->w != last_w || info->h != last_h) {
+ resize_count++;
+ if (resize_count == 1) {
+ // Verify that resize down occurs.
+ ASSERT_LT(info->w, last_w);
+ ASSERT_LT(info->h, last_h);
+ } else if (resize_count == 2) {
+ // Verify that resize up occurs.
+ ASSERT_GT(info->w, last_w);
+ ASSERT_GT(info->h, last_h);
+ }
+ last_w = info->w;
+ last_h = info->h;
+ }
+ }
+
+ // Verify that we get 2 resize events in this test.
+ ASSERT_EQ(2, resize_count) << "Resizing should occur twice.";
+}
+
vpx_img_fmt_t CspForFrameNumber(int frame) {
if (frame < 10)
return VPX_IMG_FMT_I420;
@@ -371,6 +524,9 @@
::testing::Values(::libvpx_test::kRealTime));
VP9_INSTANTIATE_TEST_CASE(ResizeInternalTest,
::testing::Values(::libvpx_test::kOnePassBest));
+VP9_INSTANTIATE_TEST_CASE(ResizeInternalRealtimeTest,
+ ::testing::Values(::libvpx_test::kRealTime),
+ ::testing::Range(5, 9));
VP9_INSTANTIATE_TEST_CASE(ResizeCspTest,
::testing::Values(::libvpx_test::kRealTime));
} // namespace
diff --git a/libvpx/test/sixtap_predict_test.cc b/libvpx/test/sixtap_predict_test.cc
index 8c7c98d..1e682e7 100644
--- a/libvpx/test/sixtap_predict_test.cc
+++ b/libvpx/test/sixtap_predict_test.cc
@@ -201,7 +201,7 @@
const SixtapPredictFunc sixtap_8x8_neon = vp8_sixtap_predict8x8_neon;
const SixtapPredictFunc sixtap_8x4_neon = vp8_sixtap_predict8x4_neon;
INSTANTIATE_TEST_CASE_P(
- DISABLED_NEON, SixtapPredictTest, ::testing::Values(
+ NEON, SixtapPredictTest, ::testing::Values(
make_tuple(16, 16, sixtap_16x16_neon),
make_tuple(8, 8, sixtap_8x8_neon),
make_tuple(8, 4, sixtap_8x4_neon)));
diff --git a/libvpx/test/superframe_test.cc b/libvpx/test/superframe_test.cc
index a8102b7..90aa75b 100644
--- a/libvpx/test/superframe_test.cc
+++ b/libvpx/test/superframe_test.cc
@@ -16,8 +16,13 @@
namespace {
+const int kTestMode = 0;
+const int kSuperframeSyntax = 1;
+
+typedef std::tr1::tuple<libvpx_test::TestMode,int> SuperframeTestParam;
+
class SuperframeTest : public ::libvpx_test::EncoderTest,
- public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+ public ::libvpx_test::CodecTestWithParam<SuperframeTestParam> {
protected:
SuperframeTest() : EncoderTest(GET_PARAM(0)), modified_buf_(NULL),
last_sf_pts_(0) {}
@@ -25,9 +30,13 @@
virtual void SetUp() {
InitializeConfig();
- SetMode(GET_PARAM(1));
+ const SuperframeTestParam input = GET_PARAM(1);
+ const libvpx_test::TestMode mode = std::tr1::get<kTestMode>(input);
+ const int syntax = std::tr1::get<kSuperframeSyntax>(input);
+ SetMode(mode);
sf_count_ = 0;
sf_count_max_ = INT_MAX;
+ is_vp10_style_superframe_ = syntax;
}
virtual void TearDown() {
@@ -50,7 +59,8 @@
const uint8_t marker = buffer[pkt->data.frame.sz - 1];
const int frames = (marker & 0x7) + 1;
const int mag = ((marker >> 3) & 3) + 1;
- const unsigned int index_sz = 2 + mag * frames;
+ const unsigned int index_sz =
+ 2 + mag * (frames - is_vp10_style_superframe_);
if ((marker & 0xe0) == 0xc0 &&
pkt->data.frame.sz >= index_sz &&
buffer[pkt->data.frame.sz - index_sz] == marker) {
@@ -75,6 +85,7 @@
return pkt;
}
+ int is_vp10_style_superframe_;
int sf_count_;
int sf_count_max_;
vpx_codec_cx_pkt_t modified_pkt_;
@@ -92,9 +103,11 @@
EXPECT_EQ(sf_count_, 1);
}
-VP9_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Values(
- ::libvpx_test::kTwoPassGood));
+VP9_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Combine(
+ ::testing::Values(::libvpx_test::kTwoPassGood),
+ ::testing::Values(0)));
-VP10_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Values(
- ::libvpx_test::kTwoPassGood));
+VP10_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Combine(
+ ::testing::Values(::libvpx_test::kTwoPassGood),
+ ::testing::Values(CONFIG_MISC_FIXES)));
} // namespace
diff --git a/libvpx/test/test-data.mk b/libvpx/test/test-data.mk
index dda1c18..4280b35 100644
--- a/libvpx/test/test-data.mk
+++ b/libvpx/test/test-data.mk
@@ -18,6 +18,7 @@
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_444.y4m
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_440.yuv
+LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += desktop_credits.y4m
LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_1280_720_30.y4m
LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m
LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m
@@ -687,8 +688,8 @@
endif # CONFIG_VP9_HIGHBITDEPTH
# Invalid files for testing libvpx error checking.
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v2.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v2.webm.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02-v2.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02-v2.webm.res
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03-v3.webm
diff --git a/libvpx/test/test-data.sha1 b/libvpx/test/test-data.sha1
index 3590f4e..4e4ac62 100644
--- a/libvpx/test/test-data.sha1
+++ b/libvpx/test/test-data.sha1
@@ -6,8 +6,8 @@
456d1493e52d32a5c30edf44a27debc1fa6b253a *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf.res
c123d1f9f02fb4143abb5e271916e3a3080de8f6 *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf
456d1493e52d32a5c30edf44a27debc1fa6b253a *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf.res
-fe346136b9b8c1e6f6084cc106485706915795e4 *invalid-vp90-01-v2.webm
-25751f5d3b05ff03f0719ad42cd625348eb8961e *invalid-vp90-01-v2.webm.res
+fe346136b9b8c1e6f6084cc106485706915795e4 *invalid-vp90-01-v3.webm
+5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-vp90-01-v3.webm.res
d78e2fceba5ac942246503ec8366f879c4775ca5 *invalid-vp90-02-v2.webm
8e2eff4af87d2b561cce2365713269e301457ef3 *invalid-vp90-02-v2.webm.res
df1a1453feb3c00d7d89746c7003b4163523bff3 *invalid-vp90-03-v3.webm
@@ -743,3 +743,4 @@
e60d859b0ef2b331b21740cf6cb83fabe469b079 *invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf
0ae808dca4d3c1152a9576e14830b6faa39f1b4a *invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf.res
9cfc855459e7549fd015c79e8eca512b2f2cb7e3 *niklas_1280_720_30.y4m
+5b5763b388b1b52a81bb82b39f7ec25c4bd3d0e1 *desktop_credits.y4m
diff --git a/libvpx/test/test.mk b/libvpx/test/test.mk
index 6bb08be..8d66244 100644
--- a/libvpx/test/test.mk
+++ b/libvpx/test/test.mk
@@ -36,6 +36,7 @@
LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += invalid_file_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += user_priv_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_frame_parallel_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_refresh_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc
@@ -110,7 +111,6 @@
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += quantize_test.cc
LIBVPX_TEST_SRCS-yes += idct_test.cc
-LIBVPX_TEST_SRCS-yes += intrapred_test.cc
LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc
LIBVPX_TEST_SRCS-yes += vpx_scale_test.cc
@@ -167,6 +167,10 @@
TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) := test_intra_pred_speed.cc
TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) += ../md5_utils.h ../md5_utils.c
+## VP10
+LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_dct_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm_test.cc
+
endif # CONFIG_SHARED
include $(SRC_PATH_BARE)/test/test-data.mk
diff --git a/libvpx/test/test_libvpx.cc b/libvpx/test/test_libvpx.cc
index 2649917..005ea8d 100644
--- a/libvpx/test/test_libvpx.cc
+++ b/libvpx/test/test_libvpx.cc
@@ -26,6 +26,7 @@
extern void vpx_scale_rtcd();
}
+#if ARCH_X86 || ARCH_X86_64
static void append_negative_gtest_filter(const char *str) {
std::string filter = ::testing::FLAGS_gtest_filter;
// Negative patterns begin with one '-' followed by a ':' separated list.
@@ -33,6 +34,7 @@
filter += str;
::testing::FLAGS_gtest_filter = filter;
}
+#endif // ARCH_X86 || ARCH_X86_64
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
@@ -55,7 +57,7 @@
append_negative_gtest_filter(":AVX.*:AVX/*");
if (!(simd_caps & HAS_AVX2))
append_negative_gtest_filter(":AVX2.*:AVX2/*");
-#endif
+#endif // ARCH_X86 || ARCH_X86_64
#if !CONFIG_SHARED
// Shared library builds don't support whitebox tests
diff --git a/libvpx/test/util.h b/libvpx/test/util.h
index 3c45721..b27bffa 100644
--- a/libvpx/test/util.h
+++ b/libvpx/test/util.h
@@ -19,8 +19,7 @@
// Macros
#define GET_PARAM(k) std::tr1::get< k >(GetParam())
-static double compute_psnr(const vpx_image_t *img1,
- const vpx_image_t *img2) {
+inline double compute_psnr(const vpx_image_t *img1, const vpx_image_t *img2) {
assert((img1->fmt == img2->fmt) &&
(img1->d_w == img2->d_w) &&
(img1->d_h == img2->d_h));
diff --git a/libvpx/test/video_source.h b/libvpx/test/video_source.h
index 63294d1..ade323e 100644
--- a/libvpx/test/video_source.h
+++ b/libvpx/test/video_source.h
@@ -11,6 +11,9 @@
#define TEST_VIDEO_SOURCE_H_
#if defined(_WIN32)
+#undef NOMINMAX
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
#include <cstdio>
@@ -48,7 +51,7 @@
#undef TO_STRING
#undef STRINGIFY
-static FILE *OpenTestDataFile(const std::string& file_name) {
+inline FILE *OpenTestDataFile(const std::string& file_name) {
const std::string path_to_source = GetDataPath() + "/" + file_name;
return fopen(path_to_source.c_str(), "rb");
}
diff --git a/libvpx/test/vp10_dct_test.cc b/libvpx/test/vp10_dct_test.cc
new file mode 100644
index 0000000..b2c301a
--- /dev/null
+++ b/libvpx/test/vp10_dct_test.cc
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <new>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "./vpx_config.h"
+#include "vpx_ports/msvc.h"
+
+#undef CONFIG_COEFFICIENT_RANGE_CHECKING
+#define CONFIG_COEFFICIENT_RANGE_CHECKING 1
+#include "vp10/encoder/dct.c"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+void reference_dct_1d(const double *in, double *out, int size) {
+ const double PI = 3.141592653589793238462643383279502884;
+ const double kInvSqrt2 = 0.707106781186547524400844362104;
+ for (int k = 0; k < size; ++k) {
+ out[k] = 0;
+ for (int n = 0; n < size; ++n) {
+ out[k] += in[n] * cos(PI * (2 * n + 1) * k / (2 * size));
+ }
+ if (k == 0)
+ out[k] = out[k] * kInvSqrt2;
+ }
+}
+
+typedef void (*FdctFuncRef)(const double *in, double *out, int size);
+typedef void (*IdctFuncRef)(const double *in, double *out, int size);
+typedef void (*FdctFunc)(const tran_low_t *in, tran_low_t *out);
+typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out);
+
+class TransTestBase {
+ public:
+ virtual ~TransTestBase() {}
+
+ protected:
+ void RunFwdAccuracyCheck() {
+ tran_low_t *input = new tran_low_t[txfm_size_];
+ tran_low_t *output = new tran_low_t[txfm_size_];
+ double *ref_input = new double[txfm_size_];
+ double *ref_output = new double[txfm_size_];
+
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ const int count_test_block = 5000;
+ for (int ti = 0; ti < count_test_block; ++ti) {
+ for (int ni = 0; ni < txfm_size_; ++ni) {
+ input[ni] = rnd.Rand8() - rnd.Rand8();
+ ref_input[ni] = static_cast<double>(input[ni]);
+ }
+
+ fwd_txfm_(input, output);
+ fwd_txfm_ref_(ref_input, ref_output, txfm_size_);
+
+ for (int ni = 0; ni < txfm_size_; ++ni) {
+ EXPECT_LE(
+ abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))),
+ max_error_);
+ }
+ }
+
+ delete[] input;
+ delete[] output;
+ delete[] ref_input;
+ delete[] ref_output;
+ }
+
+ double max_error_;
+ int txfm_size_;
+ FdctFunc fwd_txfm_;
+ FdctFuncRef fwd_txfm_ref_;
+};
+
+typedef std::tr1::tuple<FdctFunc, FdctFuncRef, int, int> FdctParam;
+class Vp10FwdTxfm
+ : public TransTestBase,
+ public ::testing::TestWithParam<FdctParam> {
+ public:
+ virtual void SetUp() {
+ fwd_txfm_ = GET_PARAM(0);
+ fwd_txfm_ref_ = GET_PARAM(1);
+ txfm_size_ = GET_PARAM(2);
+ max_error_ = GET_PARAM(3);
+ }
+ virtual void TearDown() {}
+};
+
+TEST_P(Vp10FwdTxfm, RunFwdAccuracyCheck) {
+ RunFwdAccuracyCheck();
+}
+
+INSTANTIATE_TEST_CASE_P(
+ C, Vp10FwdTxfm,
+ ::testing::Values(
+ FdctParam(&fdct4, &reference_dct_1d, 4, 1),
+ FdctParam(&fdct8, &reference_dct_1d, 8, 1),
+ FdctParam(&fdct16, &reference_dct_1d, 16, 2)));
+} // namespace
diff --git a/libvpx/test/vp10_inv_txfm_test.cc b/libvpx/test/vp10_inv_txfm_test.cc
new file mode 100644
index 0000000..c49081e
--- /dev/null
+++ b/libvpx/test/vp10_inv_txfm_test.cc
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp10_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "vp10/common/blockd.h"
+#include "vp10/common/scan.h"
+#include "vpx/vpx_integer.h"
+#include "vp10/common/vp10_inv_txfm.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+const double PI = 3.141592653589793238462643383279502884;
+const double kInvSqrt2 = 0.707106781186547524400844362104;
+
+void reference_idct_1d(const double *in, double *out, int size) {
+ for (int n = 0; n < size; ++n) {
+ out[n] = 0;
+ for (int k = 0; k < size; ++k) {
+ if (k == 0)
+ out[n] += kInvSqrt2 * in[k] * cos(PI * (2 * n + 1) * k / (2 * size));
+ else
+ out[n] += in[k] * cos(PI * (2 * n + 1) * k / (2 * size));
+ }
+ }
+}
+
+typedef void (*IdctFuncRef)(const double *in, double *out, int size);
+typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out);
+
+class TransTestBase {
+ public:
+ virtual ~TransTestBase() {}
+
+ protected:
+ void RunInvAccuracyCheck() {
+ tran_low_t *input = new tran_low_t[txfm_size_];
+ tran_low_t *output = new tran_low_t[txfm_size_];
+ double *ref_input = new double[txfm_size_];
+ double *ref_output = new double[txfm_size_];
+
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ const int count_test_block = 5000;
+ for (int ti = 0; ti < count_test_block; ++ti) {
+ for (int ni = 0; ni < txfm_size_; ++ni) {
+ input[ni] = rnd.Rand8() - rnd.Rand8();
+ ref_input[ni] = static_cast<double>(input[ni]);
+ }
+
+ fwd_txfm_(input, output);
+ fwd_txfm_ref_(ref_input, ref_output, txfm_size_);
+
+ for (int ni = 0; ni < txfm_size_; ++ni) {
+ EXPECT_LE(
+ abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))),
+ max_error_);
+ }
+ }
+
+ delete[] input;
+ delete[] output;
+ delete[] ref_input;
+ delete[] ref_output;
+ }
+
+ double max_error_;
+ int txfm_size_;
+ IdctFunc fwd_txfm_;
+ IdctFuncRef fwd_txfm_ref_;
+};
+
+typedef std::tr1::tuple<IdctFunc, IdctFuncRef, int, int> IdctParam;
+class Vp10InvTxfm
+ : public TransTestBase,
+ public ::testing::TestWithParam<IdctParam> {
+ public:
+ virtual void SetUp() {
+ fwd_txfm_ = GET_PARAM(0);
+ fwd_txfm_ref_ = GET_PARAM(1);
+ txfm_size_ = GET_PARAM(2);
+ max_error_ = GET_PARAM(3);
+ }
+ virtual void TearDown() {}
+};
+
+TEST_P(Vp10InvTxfm, RunInvAccuracyCheck) {
+ RunInvAccuracyCheck();
+}
+
+INSTANTIATE_TEST_CASE_P(
+ C, Vp10InvTxfm,
+ ::testing::Values(
+ IdctParam(&vp10_idct4_c, &reference_idct_1d, 4, 1),
+ IdctParam(&vp10_idct8_c, &reference_idct_1d, 8, 2),
+ IdctParam(&vp10_idct16_c, &reference_idct_1d, 16, 4),
+ IdctParam(&vp10_idct32_c, &reference_idct_1d, 32, 6))
+);
+
+typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
+typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
+typedef std::tr1::tuple<FwdTxfmFunc,
+ InvTxfmFunc,
+ InvTxfmFunc,
+ TX_SIZE, int> PartialInvTxfmParam;
+const int kMaxNumCoeffs = 1024;
+class Vp10PartialIDctTest
+ : public ::testing::TestWithParam<PartialInvTxfmParam> {
+ public:
+ virtual ~Vp10PartialIDctTest() {}
+ virtual void SetUp() {
+ ftxfm_ = GET_PARAM(0);
+ full_itxfm_ = GET_PARAM(1);
+ partial_itxfm_ = GET_PARAM(2);
+ tx_size_ = GET_PARAM(3);
+ last_nonzero_ = GET_PARAM(4);
+ }
+
+ virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+ int last_nonzero_;
+ TX_SIZE tx_size_;
+ FwdTxfmFunc ftxfm_;
+ InvTxfmFunc full_itxfm_;
+ InvTxfmFunc partial_itxfm_;
+};
+
+TEST_P(Vp10PartialIDctTest, RunQuantCheck) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int size;
+ switch (tx_size_) {
+ case TX_4X4:
+ size = 4;
+ break;
+ case TX_8X8:
+ size = 8;
+ break;
+ case TX_16X16:
+ size = 16;
+ break;
+ case TX_32X32:
+ size = 32;
+ break;
+ default:
+ FAIL() << "Wrong Size!";
+ break;
+ }
+ DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
+ DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
+ DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
+ DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
+
+ const int count_test_block = 1000;
+ const int block_size = size * size;
+
+ DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
+ DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
+
+ int max_error = 0;
+ for (int i = 0; i < count_test_block; ++i) {
+ // clear out destination buffer
+ memset(dst1, 0, sizeof(*dst1) * block_size);
+ memset(dst2, 0, sizeof(*dst2) * block_size);
+ memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
+ memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
+
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+ for (int i = 0; i < count_test_block; ++i) {
+ // Initialize a test block with input range [-255, 255].
+ if (i == 0) {
+ for (int j = 0; j < block_size; ++j)
+ input_extreme_block[j] = 255;
+ } else if (i == 1) {
+ for (int j = 0; j < block_size; ++j)
+ input_extreme_block[j] = -255;
+ } else {
+ for (int j = 0; j < block_size; ++j) {
+ input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
+ }
+ }
+
+ ftxfm_(input_extreme_block, output_ref_block, size);
+
+ // quantization with maximum allowed step sizes
+ test_coef_block1[0] = (output_ref_block[0] / 1336) * 1336;
+ for (int j = 1; j < last_nonzero_; ++j)
+ test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]]
+ = (output_ref_block[j] / 1828) * 1828;
+ }
+
+ ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
+ ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block1, dst2, size));
+
+ for (int j = 0; j < block_size; ++j) {
+ const int diff = dst1[j] - dst2[j];
+ const int error = diff * diff;
+ if (max_error < error)
+ max_error = error;
+ }
+ }
+
+ EXPECT_EQ(0, max_error)
+ << "Error: partial inverse transform produces different results";
+}
+
+TEST_P(Vp10PartialIDctTest, ResultsMatch) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ int size;
+ switch (tx_size_) {
+ case TX_4X4:
+ size = 4;
+ break;
+ case TX_8X8:
+ size = 8;
+ break;
+ case TX_16X16:
+ size = 16;
+ break;
+ case TX_32X32:
+ size = 32;
+ break;
+ default:
+ FAIL() << "Wrong Size!";
+ break;
+ }
+ DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
+ DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
+ DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
+ DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
+ const int count_test_block = 1000;
+ const int max_coeff = 32766 / 4;
+ const int block_size = size * size;
+ int max_error = 0;
+ for (int i = 0; i < count_test_block; ++i) {
+ // clear out destination buffer
+ memset(dst1, 0, sizeof(*dst1) * block_size);
+ memset(dst2, 0, sizeof(*dst2) * block_size);
+ memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
+ memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
+ int max_energy_leftover = max_coeff * max_coeff;
+ for (int j = 0; j < last_nonzero_; ++j) {
+ int16_t coef = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) *
+ (rnd.Rand16() - 32768) / 65536);
+ max_energy_leftover -= coef * coef;
+ if (max_energy_leftover < 0) {
+ max_energy_leftover = 0;
+ coef = 0;
+ }
+ test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]] = coef;
+ }
+
+ memcpy(test_coef_block2, test_coef_block1,
+ sizeof(*test_coef_block2) * block_size);
+
+ ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
+ ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block2, dst2, size));
+
+ for (int j = 0; j < block_size; ++j) {
+ const int diff = dst1[j] - dst2[j];
+ const int error = diff * diff;
+ if (max_error < error)
+ max_error = error;
+ }
+ }
+
+ EXPECT_EQ(0, max_error)
+ << "Error: partial inverse transform produces different results";
+}
+using std::tr1::make_tuple;
+
+INSTANTIATE_TEST_CASE_P(
+ C, Vp10PartialIDctTest,
+ ::testing::Values(
+ make_tuple(&vpx_fdct32x32_c,
+ &vp10_idct32x32_1024_add_c,
+ &vp10_idct32x32_34_add_c,
+ TX_32X32, 34),
+ make_tuple(&vpx_fdct32x32_c,
+ &vp10_idct32x32_1024_add_c,
+ &vp10_idct32x32_1_add_c,
+ TX_32X32, 1),
+ make_tuple(&vpx_fdct16x16_c,
+ &vp10_idct16x16_256_add_c,
+ &vp10_idct16x16_10_add_c,
+ TX_16X16, 10),
+ make_tuple(&vpx_fdct16x16_c,
+ &vp10_idct16x16_256_add_c,
+ &vp10_idct16x16_1_add_c,
+ TX_16X16, 1),
+ make_tuple(&vpx_fdct8x8_c,
+ &vp10_idct8x8_64_add_c,
+ &vp10_idct8x8_12_add_c,
+ TX_8X8, 12),
+ make_tuple(&vpx_fdct8x8_c,
+ &vp10_idct8x8_64_add_c,
+ &vp10_idct8x8_1_add_c,
+ TX_8X8, 1),
+ make_tuple(&vpx_fdct4x4_c,
+ &vp10_idct4x4_16_add_c,
+ &vp10_idct4x4_1_add_c,
+ TX_4X4, 1)));
+} // namespace
diff --git a/libvpx/test/vp9_arf_freq_test.cc b/libvpx/test/vp9_arf_freq_test.cc
index 87ff15b..89200d4 100644
--- a/libvpx/test/vp9_arf_freq_test.cc
+++ b/libvpx/test/vp9_arf_freq_test.cc
@@ -230,9 +230,23 @@
::testing::ValuesIn(kEncodeVectors),
::testing::ValuesIn(kMinArfVectors));
+#if CONFIG_VP9_HIGHBITDEPTH
+# if CONFIG_VP10_ENCODER
+// TODO(angiebird): 25-29 fail in high bitdepth mode.
+INSTANTIATE_TEST_CASE_P(
+ DISABLED_VP10, ArfFreqTest,
+ ::testing::Combine(
+ ::testing::Values(static_cast<const libvpx_test::CodecFactory *>(
+ &libvpx_test::kVP10)),
+ ::testing::ValuesIn(kTestVectors),
+ ::testing::ValuesIn(kEncodeVectors),
+ ::testing::ValuesIn(kMinArfVectors)));
+# endif // CONFIG_VP10_ENCODER
+#else
VP10_INSTANTIATE_TEST_CASE(
ArfFreqTest,
::testing::ValuesIn(kTestVectors),
::testing::ValuesIn(kEncodeVectors),
::testing::ValuesIn(kMinArfVectors));
+#endif // CONFIG_VP9_HIGHBITDEPTH
} // namespace
diff --git a/libvpx/test/vp9_encoder_parms_get_to_decoder.cc b/libvpx/test/vp9_encoder_parms_get_to_decoder.cc
index a02070e..3ef6022 100644
--- a/libvpx/test/vp9_encoder_parms_get_to_decoder.cc
+++ b/libvpx/test/vp9_encoder_parms_get_to_decoder.cc
@@ -14,38 +14,10 @@
#include "test/encode_test_driver.h"
#include "test/util.h"
#include "test/y4m_video_source.h"
-#include "test/yuv_video_source.h"
-#include "vp9/decoder/vp9_decoder.h"
-
-typedef vpx_codec_stream_info_t vp9_stream_info_t;
-struct vpx_codec_alg_priv {
- vpx_codec_priv_t base;
- vpx_codec_dec_cfg_t cfg;
- vp9_stream_info_t si;
- struct VP9Decoder *pbi;
- int postproc_cfg_set;
- vp8_postproc_cfg_t postproc_cfg;
- vpx_decrypt_cb decrypt_cb;
- void *decrypt_state;
- vpx_image_t img;
- int img_avail;
- int flushed;
- int invert_tile_order;
- int frame_parallel_decode;
-
- // External frame buffer info to save for VP9 common.
- void *ext_priv; // Private data associated with the external frame buffers.
- vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb;
- vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb;
-};
-
-static vpx_codec_alg_priv_t *get_alg_priv(vpx_codec_ctx_t *ctx) {
- return (vpx_codec_alg_priv_t *)ctx->priv;
-}
+#include "vp9/vp9_dx_iface.h"
namespace {
-const unsigned int kFramerate = 50;
const int kCpuUsed = 2;
struct EncodePerfTestVideo {
@@ -66,35 +38,27 @@
int32_t lossless;
int32_t error_resilient;
int32_t frame_parallel;
+ vpx_color_range_t color_range;
vpx_color_space_t cs;
+ int render_size[2];
// TODO(JBB): quantizers / bitrate
};
const EncodeParameters kVP9EncodeParameterSet[] = {
- {0, 0, 0, 1, 0, VPX_CS_BT_601},
- {0, 0, 0, 0, 0, VPX_CS_BT_709},
- {0, 0, 1, 0, 0, VPX_CS_BT_2020},
- {0, 2, 0, 0, 1, VPX_CS_UNKNOWN},
- // TODO(JBB): Test profiles (requires more work).
+ {0, 0, 0, 1, 0, VPX_CR_STUDIO_RANGE, VPX_CS_BT_601},
+ {0, 0, 0, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_709},
+ {0, 0, 1, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_2020},
+ {0, 2, 0, 0, 1, VPX_CR_STUDIO_RANGE, VPX_CS_UNKNOWN, { 640, 480 }},
+ // TODO(JBB): Test profiles (requires more work).
};
-int is_extension_y4m(const char *filename) {
- const char *dot = strrchr(filename, '.');
- if (!dot || dot == filename)
- return 0;
- else
- return !strcmp(dot, ".y4m");
-}
-
class VpxEncoderParmsGetToDecoder
: public ::libvpx_test::EncoderTest,
- public ::libvpx_test::CodecTestWith2Params<EncodeParameters, \
+ public ::libvpx_test::CodecTestWith2Params<EncodeParameters,
EncodePerfTestVideo> {
protected:
VpxEncoderParmsGetToDecoder()
- : EncoderTest(GET_PARAM(0)),
- encode_parms(GET_PARAM(1)) {
- }
+ : EncoderTest(GET_PARAM(0)), encode_parms(GET_PARAM(1)) {}
virtual ~VpxEncoderParmsGetToDecoder() {}
@@ -112,6 +76,7 @@
::libvpx_test::Encoder *encoder) {
if (video->frame() == 1) {
encoder->Control(VP9E_SET_COLOR_SPACE, encode_parms.cs);
+ encoder->Control(VP9E_SET_COLOR_RANGE, encode_parms.color_range);
encoder->Control(VP9E_SET_LOSSLESS, encode_parms.lossless);
encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING,
encode_parms.frame_parallel);
@@ -122,37 +87,44 @@
encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
encoder->Control(VP8E_SET_ARNR_TYPE, 3);
+ if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0)
+ encoder->Control(VP9E_SET_RENDER_SIZE, encode_parms.render_size);
}
}
virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec,
- const libvpx_test::VideoSource& video,
+ const libvpx_test::VideoSource &video,
libvpx_test::Decoder *decoder) {
- vpx_codec_ctx_t* vp9_decoder = decoder->GetDecoder();
- vpx_codec_alg_priv_t* priv =
- (vpx_codec_alg_priv_t*) get_alg_priv(vp9_decoder);
-
- VP9Decoder* pbi = priv->pbi;
- VP9_COMMON* common = &pbi->common;
+ vpx_codec_ctx_t *const vp9_decoder = decoder->GetDecoder();
+ vpx_codec_alg_priv_t *const priv =
+ reinterpret_cast<vpx_codec_alg_priv_t *>(vp9_decoder->priv);
+ FrameWorkerData *const worker_data =
+ reinterpret_cast<FrameWorkerData *>(priv->frame_workers[0].data1);
+ VP9_COMMON *const common = &worker_data->pbi->common;
if (encode_parms.lossless) {
- EXPECT_EQ(common->base_qindex, 0);
- EXPECT_EQ(common->y_dc_delta_q, 0);
- EXPECT_EQ(common->uv_dc_delta_q, 0);
- EXPECT_EQ(common->uv_ac_delta_q, 0);
- EXPECT_EQ(common->tx_mode, ONLY_4X4);
+ EXPECT_EQ(0, common->base_qindex);
+ EXPECT_EQ(0, common->y_dc_delta_q);
+ EXPECT_EQ(0, common->uv_dc_delta_q);
+ EXPECT_EQ(0, common->uv_ac_delta_q);
+ EXPECT_EQ(ONLY_4X4, common->tx_mode);
}
- EXPECT_EQ(common->error_resilient_mode, encode_parms.error_resilient);
+ EXPECT_EQ(encode_parms.error_resilient, common->error_resilient_mode);
if (encode_parms.error_resilient) {
- EXPECT_EQ(common->frame_parallel_decoding_mode, 1);
- EXPECT_EQ(common->use_prev_frame_mvs, 0);
+ EXPECT_EQ(1, common->frame_parallel_decoding_mode);
+ EXPECT_EQ(0, common->use_prev_frame_mvs);
} else {
- EXPECT_EQ(common->frame_parallel_decoding_mode,
- encode_parms.frame_parallel);
+ EXPECT_EQ(encode_parms.frame_parallel,
+ common->frame_parallel_decoding_mode);
}
- EXPECT_EQ(common->color_space, encode_parms.cs);
- EXPECT_EQ(common->log2_tile_cols, encode_parms.tile_cols);
- EXPECT_EQ(common->log2_tile_rows, encode_parms.tile_rows);
+ EXPECT_EQ(encode_parms.color_range, common->color_range);
+ EXPECT_EQ(encode_parms.cs, common->color_space);
+ if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0) {
+ EXPECT_EQ(encode_parms.render_size[0], common->render_width);
+ EXPECT_EQ(encode_parms.render_size[1], common->render_height);
+ }
+ EXPECT_EQ(encode_parms.tile_cols, common->log2_tile_cols);
+ EXPECT_EQ(encode_parms.tile_rows, common->log2_tile_rows);
EXPECT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError();
return VPX_CODEC_OK == res_dec;
@@ -164,35 +136,18 @@
EncodeParameters encode_parms;
};
-// TODO(hkuang): This test conflicts with frame parallel decode. So disable it
-// for now until fix.
-TEST_P(VpxEncoderParmsGetToDecoder, DISABLED_BitstreamParms) {
+TEST_P(VpxEncoderParmsGetToDecoder, BitstreamParms) {
init_flags_ = VPX_CODEC_USE_PSNR;
- libvpx_test::VideoSource *video;
- if (is_extension_y4m(test_video_.name)) {
- video = new libvpx_test::Y4mVideoSource(test_video_.name,
- 0, test_video_.frames);
- } else {
- video = new libvpx_test::YUVVideoSource(test_video_.name,
- VPX_IMG_FMT_I420,
- test_video_.width,
- test_video_.height,
- kFramerate, 1, 0,
- test_video_.frames);
- }
+ libvpx_test::VideoSource *const video =
+ new libvpx_test::Y4mVideoSource(test_video_.name, 0, test_video_.frames);
+ ASSERT_TRUE(video != NULL);
ASSERT_NO_FATAL_FAILURE(RunLoop(video));
- delete(video);
+ delete video;
}
-VP9_INSTANTIATE_TEST_CASE(
- VpxEncoderParmsGetToDecoder,
- ::testing::ValuesIn(kVP9EncodeParameterSet),
- ::testing::ValuesIn(kVP9EncodePerfTestVectors));
-
-VP10_INSTANTIATE_TEST_CASE(
- VpxEncoderParmsGetToDecoder,
- ::testing::ValuesIn(kVP9EncodeParameterSet),
- ::testing::ValuesIn(kVP9EncodePerfTestVectors));
+VP9_INSTANTIATE_TEST_CASE(VpxEncoderParmsGetToDecoder,
+ ::testing::ValuesIn(kVP9EncodeParameterSet),
+ ::testing::ValuesIn(kVP9EncodePerfTestVectors));
} // namespace
diff --git a/libvpx/test/vp9_end_to_end_test.cc b/libvpx/test/vp9_end_to_end_test.cc
index e100eb9..be1fa68 100644
--- a/libvpx/test/vp9_end_to_end_test.cc
+++ b/libvpx/test/vp9_end_to_end_test.cc
@@ -187,9 +187,23 @@
::testing::ValuesIn(kTestVectors),
::testing::ValuesIn(kCpuUsedVectors));
+#if CONFIG_VP9_HIGHBITDEPTH
+# if CONFIG_VP10_ENCODER
+// TODO(angiebird): many fail in high bitdepth mode.
+INSTANTIATE_TEST_CASE_P(
+ DISABLED_VP10, EndToEndTestLarge,
+ ::testing::Combine(
+ ::testing::Values(static_cast<const libvpx_test::CodecFactory *>(
+ &libvpx_test::kVP10)),
+ ::testing::ValuesIn(kEncodingModeVectors),
+ ::testing::ValuesIn(kTestVectors),
+ ::testing::ValuesIn(kCpuUsedVectors)));
+# endif // CONFIG_VP10_ENCODER
+#else
VP10_INSTANTIATE_TEST_CASE(
EndToEndTestLarge,
::testing::ValuesIn(kEncodingModeVectors),
::testing::ValuesIn(kTestVectors),
::testing::ValuesIn(kCpuUsedVectors));
+#endif // CONFIG_VP9_HIGHBITDEPTH
} // namespace
diff --git a/libvpx/test/vp9_error_block_test.cc b/libvpx/test/vp9_error_block_test.cc
index 8c5d5a2..77b12ea 100644
--- a/libvpx/test/vp9_error_block_test.cc
+++ b/libvpx/test/vp9_error_block_test.cc
@@ -67,12 +67,22 @@
int64_t ret;
int64_t ref_ssz;
int64_t ref_ret;
+ const int msb = bit_depth_ + 8 - 1;
for (int i = 0; i < kNumIterations; ++i) {
int err_count = 0;
block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64
for (int j = 0; j < block_size; j++) {
- coeff[j] = rnd(2 << 20) - (1 << 20);
- dqcoeff[j] = rnd(2 << 20) - (1 << 20);
+ // coeff and dqcoeff will always have at least the same sign, and this
+ // can be used for optimization, so generate test input precisely.
+ if (rnd(2)) {
+ // Positive number
+ coeff[j] = rnd(1 << msb);
+ dqcoeff[j] = rnd(1 << msb);
+ } else {
+ // Negative number
+ coeff[j] = -rnd(1 << msb);
+ dqcoeff[j] = -rnd(1 << msb);
+ }
}
ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz,
bit_depth_);
@@ -85,7 +95,7 @@
err_count_total += err_count;
}
EXPECT_EQ(0, err_count_total)
- << "Error: Error Block Test, C output doesn't match SSE2 output. "
+ << "Error: Error Block Test, C output doesn't match optimized output. "
<< "First failed at test case " << first_failure;
}
@@ -100,23 +110,36 @@
int64_t ret;
int64_t ref_ssz;
int64_t ref_ret;
- int max_val = ((1 << 20) - 1);
+ const int msb = bit_depth_ + 8 - 1;
+ int max_val = ((1 << msb) - 1);
for (int i = 0; i < kNumIterations; ++i) {
int err_count = 0;
- int k = (i / 9) % 5;
+ int k = (i / 9) % 9;
// Change the maximum coeff value, to test different bit boundaries
- if ( k == 4 && (i % 9) == 0 ) {
+ if ( k == 8 && (i % 9) == 0 ) {
max_val >>= 1;
}
block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64
for (int j = 0; j < block_size; j++) {
- if (k < 4) { // Test at maximum values
- coeff[j] = k % 2 ? max_val : -max_val;
- dqcoeff[j] = (k >> 1) % 2 ? max_val : -max_val;
+ if (k < 4) {
+ // Test at positive maximum values
+ coeff[j] = k % 2 ? max_val : 0;
+ dqcoeff[j] = (k >> 1) % 2 ? max_val : 0;
+ } else if (k < 8) {
+ // Test at negative maximum values
+ coeff[j] = k % 2 ? -max_val : 0;
+ dqcoeff[j] = (k >> 1) % 2 ? -max_val : 0;
} else {
- coeff[j] = rnd(2 << 14) - (1 << 14);
- dqcoeff[j] = rnd(2 << 14) - (1 << 14);
+ if (rnd(2)) {
+ // Positive number
+ coeff[j] = rnd(1 << 14);
+ dqcoeff[j] = rnd(1 << 14);
+ } else {
+ // Negative number
+ coeff[j] = -rnd(1 << 14);
+ dqcoeff[j] = -rnd(1 << 14);
+ }
}
}
ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz,
@@ -130,13 +153,30 @@
err_count_total += err_count;
}
EXPECT_EQ(0, err_count_total)
- << "Error: Error Block Test, C output doesn't match SSE2 output. "
+ << "Error: Error Block Test, C output doesn't match optimized output. "
<< "First failed at test case " << first_failure;
}
using std::tr1::make_tuple;
+#if CONFIG_USE_X86INC
+int64_t wrap_vp9_highbd_block_error_8bit_c(const tran_low_t *coeff,
+ const tran_low_t *dqcoeff,
+ intptr_t block_size,
+ int64_t *ssz, int bps) {
+ assert(bps == 8);
+ return vp9_highbd_block_error_8bit_c(coeff, dqcoeff, block_size, ssz);
+}
+
#if HAVE_SSE2
+int64_t wrap_vp9_highbd_block_error_8bit_sse2(const tran_low_t *coeff,
+ const tran_low_t *dqcoeff,
+ intptr_t block_size,
+ int64_t *ssz, int bps) {
+ assert(bps == 8);
+ return vp9_highbd_block_error_8bit_sse2(coeff, dqcoeff, block_size, ssz);
+}
+
INSTANTIATE_TEST_CASE_P(
SSE2, ErrorBlockTest,
::testing::Values(
@@ -145,7 +185,27 @@
make_tuple(&vp9_highbd_block_error_sse2,
&vp9_highbd_block_error_c, VPX_BITS_12),
make_tuple(&vp9_highbd_block_error_sse2,
- &vp9_highbd_block_error_c, VPX_BITS_8)));
+ &vp9_highbd_block_error_c, VPX_BITS_8),
+ make_tuple(&wrap_vp9_highbd_block_error_8bit_sse2,
+ &wrap_vp9_highbd_block_error_8bit_c, VPX_BITS_8)));
#endif // HAVE_SSE2
+
+#if HAVE_AVX
+int64_t wrap_vp9_highbd_block_error_8bit_avx(const tran_low_t *coeff,
+ const tran_low_t *dqcoeff,
+ intptr_t block_size,
+ int64_t *ssz, int bps) {
+ assert(bps == 8);
+ return vp9_highbd_block_error_8bit_avx(coeff, dqcoeff, block_size, ssz);
+}
+
+INSTANTIATE_TEST_CASE_P(
+ AVX, ErrorBlockTest,
+ ::testing::Values(
+ make_tuple(&wrap_vp9_highbd_block_error_8bit_avx,
+ &wrap_vp9_highbd_block_error_8bit_c, VPX_BITS_8)));
+#endif // HAVE_AVX
+
+#endif // CONFIG_USE_X86INC
#endif // CONFIG_VP9_HIGHBITDEPTH
} // namespace
diff --git a/libvpx/test/vp9_thread_test.cc b/libvpx/test/vp9_thread_test.cc
index 233e1b1..92e4b96 100644
--- a/libvpx/test/vp9_thread_test.cc
+++ b/libvpx/test/vp9_thread_test.cc
@@ -190,7 +190,7 @@
void DecodeFiles(const FileList files[]) {
for (const FileList *iter = files; iter->name != NULL; ++iter) {
SCOPED_TRACE(iter->name);
- for (int t = 2; t <= 8; ++t) {
+ for (int t = 1; t <= 8; ++t) {
EXPECT_EQ(iter->expected_md5, DecodeFile(iter->name, t))
<< "threads = " << t;
}
@@ -235,13 +235,13 @@
EXPECT_EQ(expected_md5, DecodeFile(filename, 2));
}
-TEST(VP9DecodeMultiThreadedTest, Decode) {
+TEST(VP9DecodeMultiThreadedTest, NoTilesNonFrameParallel) {
// no tiles or frame parallel; this exercises loop filter threading.
EXPECT_EQ("b35a1b707b28e82be025d960aba039bc",
DecodeFile("vp90-2-03-size-226x226.webm", 2));
}
-TEST(VP9DecodeMultiThreadedTest, Decode2) {
+TEST(VP9DecodeMultiThreadedTest, FrameParallel) {
static const FileList files[] = {
{ "vp90-2-08-tile_1x2_frame_parallel.webm",
"68ede6abd66bae0a2edf2eb9232241b6" },
@@ -255,8 +255,7 @@
DecodeFiles(files);
}
-// Test tile quantity changes within one file.
-TEST(VP9DecodeMultiThreadedTest, Decode3) {
+TEST(VP9DecodeMultiThreadedTest, FrameParallelResize) {
static const FileList files[] = {
{ "vp90-2-14-resize-fp-tiles-1-16.webm",
"0cd5e632c326297e975f38949c31ea94" },
@@ -307,6 +306,19 @@
DecodeFiles(files);
}
+
+TEST(VP9DecodeMultiThreadedTest, NonFrameParallel) {
+ static const FileList files[] = {
+ { "vp90-2-08-tile_1x2.webm", "570b4a5d5a70d58b5359671668328a16" },
+ { "vp90-2-08-tile_1x4.webm", "988d86049e884c66909d2d163a09841a" },
+ { "vp90-2-08-tile_1x8.webm", "0941902a52e9092cb010905eab16364c" },
+ { "vp90-2-08-tile-4x1.webm", "06505aade6647c583c8e00a2f582266f" },
+ { "vp90-2-08-tile-4x4.webm", "85c2299892460d76e2c600502d52bfe2" },
+ { NULL, NULL }
+ };
+
+ DecodeFiles(files);
+}
#endif // CONFIG_WEBM_IO
INSTANTIATE_TEST_CASE_P(Synchronous, VPxWorkerThreadTest, ::testing::Bool());
diff --git a/libvpx/test/y4m_video_source.h b/libvpx/test/y4m_video_source.h
index 378e75b..03d9388 100644
--- a/libvpx/test/y4m_video_source.h
+++ b/libvpx/test/y4m_video_source.h
@@ -9,6 +9,7 @@
*/
#ifndef TEST_Y4M_VIDEO_SOURCE_H_
#define TEST_Y4M_VIDEO_SOURCE_H_
+#include <algorithm>
#include <string>
#include "test/video_source.h"
@@ -91,6 +92,18 @@
y4m_input_fetch_frame(&y4m_, input_file_, img_.get());
}
+ // Swap buffers with another y4m source. This allows reading a new frame
+ // while keeping the old frame around. A whole Y4mSource is required and
+ // not just a vpx_image_t because of how the y4m reader manipulates
+ // vpx_image_t internals,
+ void SwapBuffers(Y4mVideoSource *other) {
+ std::swap(other->y4m_.dst_buf, y4m_.dst_buf);
+ vpx_image_t *tmp;
+ tmp = other->img_.release();
+ other->img_.reset(img_.release());
+ img_.reset(tmp);
+ }
+
protected:
void CloseSource() {
y4m_input_close(&y4m_);
diff --git a/libvpx/third_party/libwebm/README.libvpx b/libvpx/third_party/libwebm/README.libvpx
index 91875e1..2989d3d 100644
--- a/libvpx/third_party/libwebm/README.libvpx
+++ b/libvpx/third_party/libwebm/README.libvpx
@@ -1,7 +1,10 @@
URL: https://chromium.googlesource.com/webm/libwebm
-Version: 2dec09426ab62b794464cc9971bd135b4d313e65
+Version: 476366249e1fda7710a389cd41c57db42305e0d4
License: BSD
License File: LICENSE.txt
Description:
libwebm is used to handle WebM container I/O.
+
+Local Changes:
+* <none>
diff --git a/libvpx/third_party/libwebm/mkvmuxer.hpp b/libvpx/third_party/libwebm/mkvmuxer.hpp
index 497ad4c..03a002c 100644
--- a/libvpx/third_party/libwebm/mkvmuxer.hpp
+++ b/libvpx/third_party/libwebm/mkvmuxer.hpp
@@ -528,7 +528,7 @@
public:
// Audio and video type defined by the Matroska specs.
enum { kVideo = 0x1, kAudio = 0x2 };
- // Opus, Vorbis, VP8, and VP9 codec ids defined by the Matroska specs.
+
static const char kOpusCodecId[];
static const char kVorbisCodecId[];
static const char kVp8CodecId[];
diff --git a/libvpx/third_party/libwebm/mkvparser.cpp b/libvpx/third_party/libwebm/mkvparser.cpp
index fc01be5..f2855d5 100644
--- a/libvpx/third_party/libwebm/mkvparser.cpp
+++ b/libvpx/third_party/libwebm/mkvparser.cpp
@@ -7,45 +7,66 @@
// be found in the AUTHORS file in the root of the source tree.
#include "mkvparser.hpp"
+
+#if defined(_MSC_VER) && _MSC_VER < 1800
+#include <float.h> // _isnan() / _finite()
+#define MSC_COMPAT
+#endif
+
#include <cassert>
+#include <climits>
+#include <cmath>
#include <cstring>
#include <new>
-#include <climits>
+
+#include "webmids.hpp"
#ifdef _MSC_VER
// Disable MSVC warnings that suggest making code non-portable.
#pragma warning(disable : 4996)
#endif
-mkvparser::IMkvReader::~IMkvReader() {}
+namespace mkvparser {
-void mkvparser::GetVersion(int& major, int& minor, int& build, int& revision) {
+#ifdef MSC_COMPAT
+inline bool isnan(double val) { return !!_isnan(val); }
+inline bool isinf(double val) { return !_finite(val); }
+#else
+inline bool isnan(double val) { return std::isnan(val); }
+inline bool isinf(double val) { return std::isinf(val); }
+#endif // MSC_COMPAT
+
+IMkvReader::~IMkvReader() {}
+
+template<typename Type> Type* SafeArrayAlloc(unsigned long long num_elements,
+ unsigned long long element_size) {
+ if (num_elements == 0 || element_size == 0)
+ return NULL;
+
+ const size_t kMaxAllocSize = 0x80000000; // 2GiB
+ const unsigned long long num_bytes = num_elements * element_size;
+ if (element_size > (kMaxAllocSize / num_elements))
+ return NULL;
+ if (num_bytes != static_cast<size_t>(num_bytes))
+ return NULL;
+
+ return new (std::nothrow) Type[static_cast<size_t>(num_bytes)];
+}
+
+void GetVersion(int& major, int& minor, int& build, int& revision) {
major = 1;
minor = 0;
build = 0;
revision = 30;
}
-long long mkvparser::ReadUInt(IMkvReader* pReader, long long pos, long& len) {
- assert(pReader);
- assert(pos >= 0);
-
- int status;
-
- //#ifdef _DEBUG
- // long long total, available;
- // status = pReader->Length(&total, &available);
- // assert(status >= 0);
- // assert((total < 0) || (available <= total));
- // assert(pos < available);
- // assert((available - pos) >= 1); //assume here max u-int len is 8
- //#endif
+long long ReadUInt(IMkvReader* pReader, long long pos, long& len) {
+ if (!pReader || pos < 0)
+ return E_FILE_FORMAT_INVALID;
len = 1;
-
unsigned char b;
-
- status = pReader->Read(pos, 1, &b);
+ int status = pReader->Read(pos, 1, &b);
if (status < 0) // error or underflow
return status;
@@ -63,10 +84,6 @@
++len;
}
- //#ifdef _DEBUG
- // assert((available - pos) >= len);
- //#endif
-
long long result = b & (~m);
++pos;
@@ -92,16 +109,76 @@
return result;
}
-long long mkvparser::GetUIntLength(IMkvReader* pReader, long long pos,
- long& len) {
- assert(pReader);
- assert(pos >= 0);
+// Reads an EBML ID and returns it.
+// An ID must at least 1 byte long, cannot exceed 4, and its value must be
+// greater than 0.
+// See known EBML values and EBMLMaxIDLength:
+// http://www.matroska.org/technical/specs/index.html
+// Returns the ID, or a value less than 0 to report an error while reading the
+// ID.
+long long ReadID(IMkvReader* pReader, long long pos, long& len) {
+ if (pReader == NULL || pos < 0)
+ return E_FILE_FORMAT_INVALID;
+
+ // Read the first byte. The length in bytes of the ID is determined by
+ // finding the first set bit in the first byte of the ID.
+ unsigned char temp_byte = 0;
+ int read_status = pReader->Read(pos, 1, &temp_byte);
+
+ if (read_status < 0)
+ return E_FILE_FORMAT_INVALID;
+ else if (read_status > 0) // No data to read.
+ return E_BUFFER_NOT_FULL;
+
+ if (temp_byte == 0) // ID length > 8 bytes; invalid file.
+ return E_FILE_FORMAT_INVALID;
+
+ int bit_pos = 0;
+ const int kMaxIdLengthInBytes = 4;
+ const int kCheckByte = 0x80;
+
+ // Find the first bit that's set.
+ bool found_bit = false;
+ for (; bit_pos < kMaxIdLengthInBytes; ++bit_pos) {
+ if ((kCheckByte >> bit_pos) & temp_byte) {
+ found_bit = true;
+ break;
+ }
+ }
+
+ if (!found_bit) {
+ // The value is too large to be a valid ID.
+ return E_FILE_FORMAT_INVALID;
+ }
+
+ // Read the remaining bytes of the ID (if any).
+ const int id_length = bit_pos + 1;
+ long long ebml_id = temp_byte;
+ for (int i = 1; i < id_length; ++i) {
+ ebml_id <<= 8;
+ read_status = pReader->Read(pos + i, 1, &temp_byte);
+
+ if (read_status < 0)
+ return E_FILE_FORMAT_INVALID;
+ else if (read_status > 0)
+ return E_BUFFER_NOT_FULL;
+
+ ebml_id |= temp_byte;
+ }
+
+ len = id_length;
+ return ebml_id;
+}
+
+long long GetUIntLength(IMkvReader* pReader, long long pos, long& len) {
+ if (!pReader || pos < 0)
+ return E_FILE_FORMAT_INVALID;
long long total, available;
int status = pReader->Length(&total, &available);
- assert(status >= 0);
- assert((total < 0) || (available <= total));
+ if (status < 0 || (total >= 0 && available > total))
+ return E_FILE_FORMAT_INVALID;
len = 1;
@@ -112,11 +189,9 @@
status = pReader->Read(pos, 1, &b);
- if (status < 0)
+ if (status != 0)
return status;
- assert(status == 0);
-
if (b == 0) // we can't handle u-int values larger than 8 bytes
return E_FILE_FORMAT_INVALID;
@@ -132,12 +207,8 @@
// TODO(vigneshv): This function assumes that unsigned values never have their
// high bit set.
-long long mkvparser::UnserializeUInt(IMkvReader* pReader, long long pos,
- long long size) {
- assert(pReader);
- assert(pos >= 0);
-
- if ((size <= 0) || (size > 8))
+long long UnserializeUInt(IMkvReader* pReader, long long pos, long long size) {
+ if (!pReader || pos < 0 || (size <= 0) || (size > 8))
return E_FILE_FORMAT_INVALID;
long long result = 0;
@@ -159,12 +230,9 @@
return result;
}
-long mkvparser::UnserializeFloat(IMkvReader* pReader, long long pos,
- long long size_, double& result) {
- assert(pReader);
- assert(pos >= 0);
-
- if ((size_ != 4) && (size_ != 8))
+long UnserializeFloat(IMkvReader* pReader, long long pos, long long size_,
+ double& result) {
+ if (!pReader || pos < 0 || ((size_ != 4) && (size_ != 8)))
return E_FILE_FORMAT_INVALID;
const long size = static_cast<long>(size_);
@@ -195,8 +263,6 @@
result = f;
} else {
- assert(size == 8);
-
union {
double d;
unsigned long long dd;
@@ -216,28 +282,25 @@
result = d;
}
+ if (mkvparser::isinf(result) || mkvparser::isnan(result))
+ return E_FILE_FORMAT_INVALID;
+
return 0;
}
-long mkvparser::UnserializeInt(IMkvReader* pReader, long long pos,
- long long size, long long& result) {
- assert(pReader);
- assert(pos >= 0);
- assert(size > 0);
- assert(size <= 8);
+long UnserializeInt(IMkvReader* pReader, long long pos, long long size,
+ long long& result_ref) {
+ if (!pReader || pos < 0 || size < 1 || size > 8)
+ return E_FILE_FORMAT_INVALID;
- {
- signed char b;
+ signed char first_byte = 0;
+ const long status = pReader->Read(pos, 1, (unsigned char*)&first_byte);
- const long status = pReader->Read(pos, 1, (unsigned char*)&b);
+ if (status < 0)
+ return status;
- if (status < 0)
- return status;
-
- result = b;
-
- ++pos;
- }
+ unsigned long long result = first_byte;
+ ++pos;
for (long i = 1; i < size; ++i) {
unsigned char b;
@@ -253,27 +316,28 @@
++pos;
}
- return 0; // success
+ result_ref = static_cast<long long>(result);
+ return 0;
}
-long mkvparser::UnserializeString(IMkvReader* pReader, long long pos,
- long long size_, char*& str) {
+long UnserializeString(IMkvReader* pReader, long long pos, long long size,
+ char*& str) {
delete[] str;
str = NULL;
- if (size_ >= LONG_MAX) // we need (size+1) chars
+ if (size >= LONG_MAX || size < 0)
return E_FILE_FORMAT_INVALID;
- const long size = static_cast<long>(size_);
+ // +1 for '\0' terminator
+ const long required_size = static_cast<long>(size) + 1;
- str = new (std::nothrow) char[size + 1];
-
+ str = SafeArrayAlloc<char>(1, required_size);
if (str == NULL)
- return -1;
+ return E_FILE_FORMAT_INVALID;
unsigned char* const buf = reinterpret_cast<unsigned char*>(str);
- const long status = pReader->Read(pos, size, buf);
+ const long status = pReader->Read(pos, static_cast<long>(size), buf);
if (status) {
delete[] str;
@@ -282,137 +346,149 @@
return status;
}
- str[size] = '\0';
-
- return 0; // success
+ str[required_size - 1] = '\0';
+ return 0;
}
-long mkvparser::ParseElementHeader(IMkvReader* pReader, long long& pos,
- long long stop, long long& id,
- long long& size) {
- if ((stop >= 0) && (pos >= stop))
+long ParseElementHeader(IMkvReader* pReader, long long& pos,
+ long long stop, long long& id,
+ long long& size) {
+ if (stop >= 0 && pos >= stop)
return E_FILE_FORMAT_INVALID;
long len;
- id = ReadUInt(pReader, pos, len);
+ id = ReadID(pReader, pos, len);
if (id < 0)
return E_FILE_FORMAT_INVALID;
pos += len; // consume id
- if ((stop >= 0) && (pos >= stop))
+ if (stop >= 0 && pos >= stop)
return E_FILE_FORMAT_INVALID;
size = ReadUInt(pReader, pos, len);
- if (size < 0)
+ if (size < 0 || len < 1 || len > 8) {
+ // Invalid: Negative payload size, negative or 0 length integer, or integer
+ // larger than 64 bits (libwebm cannot handle them).
+ return E_FILE_FORMAT_INVALID;
+ }
+
+ // Avoid rolling over pos when very close to LLONG_MAX.
+ const unsigned long long rollover_check =
+ static_cast<unsigned long long>(pos) + len;
+ if (rollover_check > LLONG_MAX)
return E_FILE_FORMAT_INVALID;
pos += len; // consume length of size
// pos now designates payload
- if ((stop >= 0) && ((pos + size) > stop))
+ if (stop >= 0 && pos >= stop)
return E_FILE_FORMAT_INVALID;
return 0; // success
}
-bool mkvparser::Match(IMkvReader* pReader, long long& pos, unsigned long id_,
- long long& val) {
- assert(pReader);
- assert(pos >= 0);
-
- long long total, available;
-
- const long status = pReader->Length(&total, &available);
- assert(status >= 0);
- assert((total < 0) || (available <= total));
- if (status < 0)
+bool Match(IMkvReader* pReader, long long& pos, unsigned long expected_id,
+ long long& val) {
+ if (!pReader || pos < 0)
return false;
- long len;
+ long long total = 0;
+ long long available = 0;
- const long long id = ReadUInt(pReader, pos, len);
- assert(id >= 0);
- assert(len > 0);
- assert(len <= 8);
- assert((pos + len) <= available);
+ const long status = pReader->Length(&total, &available);
+ if (status < 0 || (total >= 0 && available > total))
+ return false;
- if ((unsigned long)id != id_)
+ long len = 0;
+
+ const long long id = ReadID(pReader, pos, len);
+ if (id < 0 || (available - pos) > len)
+ return false;
+
+ if (static_cast<unsigned long>(id) != expected_id)
return false;
pos += len; // consume id
const long long size = ReadUInt(pReader, pos, len);
- assert(size >= 0);
- assert(size <= 8);
- assert(len > 0);
- assert(len <= 8);
- assert((pos + len) <= available);
+ if (size < 0 || size > 8 || len < 1 || len > 8 || (available - pos) > len)
+ return false;
pos += len; // consume length of size of payload
val = UnserializeUInt(pReader, pos, size);
- assert(val >= 0);
+ if (val < 0)
+ return false;
pos += size; // consume size of payload
return true;
}
-bool mkvparser::Match(IMkvReader* pReader, long long& pos, unsigned long id_,
- unsigned char*& buf, size_t& buflen) {
- assert(pReader);
- assert(pos >= 0);
-
- long long total, available;
-
- long status = pReader->Length(&total, &available);
- assert(status >= 0);
- assert((total < 0) || (available <= total));
- if (status < 0)
+bool Match(IMkvReader* pReader, long long& pos, unsigned long expected_id,
+ unsigned char*& buf, size_t& buflen) {
+ if (!pReader || pos < 0)
return false;
- long len;
- const long long id = ReadUInt(pReader, pos, len);
- assert(id >= 0);
- assert(len > 0);
- assert(len <= 8);
- assert((pos + len) <= available);
+ long long total = 0;
+ long long available = 0;
- if ((unsigned long)id != id_)
+ long status = pReader->Length(&total, &available);
+ if (status < 0 || (total >= 0 && available > total))
+ return false;
+
+ long len = 0;
+ const long long id = ReadID(pReader, pos, len);
+ if (id < 0 || (available - pos) > len)
+ return false;
+
+ if (static_cast<unsigned long>(id) != expected_id)
return false;
pos += len; // consume id
- const long long size_ = ReadUInt(pReader, pos, len);
- assert(size_ >= 0);
- assert(len > 0);
- assert(len <= 8);
- assert((pos + len) <= available);
+ const long long size = ReadUInt(pReader, pos, len);
+ if (size < 0 || len <= 0 || len > 8 || (available - pos) > len)
+ return false;
+
+ unsigned long long rollover_check =
+ static_cast<unsigned long long>(pos) + len;
+ if (rollover_check > LLONG_MAX)
+ return false;
pos += len; // consume length of size of payload
- assert((pos + size_) <= available);
- const long buflen_ = static_cast<long>(size_);
+ rollover_check = static_cast<unsigned long long>(pos) + size;
+ if (rollover_check > LLONG_MAX)
+ return false;
- buf = new (std::nothrow) unsigned char[buflen_];
- assert(buf); // TODO
+ if ((pos + size) > available)
+ return false;
+
+ if (size >= LONG_MAX)
+ return false;
+
+ const long buflen_ = static_cast<long>(size);
+
+ buf = SafeArrayAlloc<unsigned char>(1, buflen_);
+ if (!buf)
+ return false;
status = pReader->Read(pos, buflen_, buf);
- assert(status == 0); // TODO
+ if (status != 0)
+ return false;
buflen = buflen_;
- pos += size_; // consume size of payload
+ pos += size; // consume size of payload
return true;
}
-namespace mkvparser {
-
EBMLHeader::EBMLHeader() : m_docType(NULL) { Init(); }
EBMLHeader::~EBMLHeader() { delete[] m_docType; }
@@ -433,7 +509,8 @@
}
long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) {
- assert(pReader);
+ if (!pReader)
+ return E_FILE_FORMAT_INVALID;
long long total, available;
@@ -445,67 +522,45 @@
pos = 0;
long long end = (available >= 1024) ? 1024 : available;
- for (;;) {
- unsigned char b = 0;
+ // Scan until we find what looks like the first byte of the EBML header.
+ const long long kMaxScanBytes = (available >= 1024) ? 1024 : available;
+ const unsigned char kEbmlByte0 = 0x1A;
+ unsigned char scan_byte = 0;
- while (pos < end) {
- status = pReader->Read(pos, 1, &b);
+ while (pos < kMaxScanBytes) {
+ status = pReader->Read(pos, 1, &scan_byte);
- if (status < 0) // error
- return status;
+ if (status < 0) // error
+ return status;
+ else if (status > 0)
+ return E_BUFFER_NOT_FULL;
- if (b == 0x1A)
- break;
-
- ++pos;
- }
-
- if (b != 0x1A) {
- if (pos >= 1024)
- return E_FILE_FORMAT_INVALID; // don't bother looking anymore
-
- if ((total >= 0) && ((total - available) < 5))
- return E_FILE_FORMAT_INVALID;
-
- return available + 5; // 5 = 4-byte ID + 1st byte of size
- }
-
- if ((total >= 0) && ((total - pos) < 5))
- return E_FILE_FORMAT_INVALID;
-
- if ((available - pos) < 5)
- return pos + 5; // try again later
-
- long len;
-
- const long long result = ReadUInt(pReader, pos, len);
-
- if (result < 0) // error
- return result;
-
- if (result == 0x0A45DFA3) { // EBML Header ID
- pos += len; // consume ID
+ if (scan_byte == kEbmlByte0)
break;
- }
- ++pos; // throw away just the 0x1A byte, and try again
+ ++pos;
}
- // pos designates start of size field
+ long len = 0;
+ const long long ebml_id = ReadID(pReader, pos, len);
- // get length of size field
+ // TODO(tomfinegan): Move Matroska ID constants into a common namespace.
+ if (len != 4 || ebml_id != mkvmuxer::kMkvEBML)
+ return E_FILE_FORMAT_INVALID;
- long len;
+ // Move read pos forward to the EBML header size field.
+ pos += 4;
+
+ // Read length of size field.
long long result = GetUIntLength(pReader, pos, len);
if (result < 0) // error
- return result;
+ return E_FILE_FORMAT_INVALID;
+ else if (result > 0) // need more data
+ return E_BUFFER_NOT_FULL;
- if (result > 0) // need more data
- return result;
-
- assert(len > 0);
- assert(len <= 8);
+ if (len < 1 || len > 8)
+ return E_FILE_FORMAT_INVALID;
if ((total >= 0) && ((total - pos) < len))
return E_FILE_FORMAT_INVALID;
@@ -513,8 +568,7 @@
if ((available - pos) < len)
return pos + len; // try again later
- // get the EBML header size
-
+ // Read the EBML header size.
result = ReadUInt(pReader, pos, len);
if (result < 0) // error
@@ -542,30 +596,30 @@
if (status < 0) // error
return status;
- if (size == 0) // weird
+ if (size == 0)
return E_FILE_FORMAT_INVALID;
- if (id == 0x0286) { // version
+ if (id == mkvmuxer::kMkvEBMLVersion) {
m_version = UnserializeUInt(pReader, pos, size);
if (m_version <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x02F7) { // read version
+ } else if (id == mkvmuxer::kMkvEBMLReadVersion) {
m_readVersion = UnserializeUInt(pReader, pos, size);
if (m_readVersion <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x02F2) { // max id length
+ } else if (id == mkvmuxer::kMkvEBMLMaxIDLength) {
m_maxIdLength = UnserializeUInt(pReader, pos, size);
if (m_maxIdLength <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x02F3) { // max size length
+ } else if (id == mkvmuxer::kMkvEBMLMaxSizeLength) {
m_maxSizeLength = UnserializeUInt(pReader, pos, size);
if (m_maxSizeLength <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x0282) { // doctype
+ } else if (id == mkvmuxer::kMkvDocType) {
if (m_docType)
return E_FILE_FORMAT_INVALID;
@@ -573,12 +627,12 @@
if (status) // error
return status;
- } else if (id == 0x0287) { // doctype version
+ } else if (id == mkvmuxer::kMkvDocTypeVersion) {
m_docTypeVersion = UnserializeUInt(pReader, pos, size);
if (m_docTypeVersion <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x0285) { // doctype read version
+ } else if (id == mkvmuxer::kMkvDocTypeReadVersion) {
m_docTypeReadVersion = UnserializeUInt(pReader, pos, size);
if (m_docTypeReadVersion <= 0)
@@ -588,7 +642,18 @@
pos += size;
}
- assert(pos == end);
+ if (pos != end)
+ return E_FILE_FORMAT_INVALID;
+
+ // Make sure DocType, DocTypeReadVersion, and DocTypeVersion are valid.
+ if (m_docType == NULL || m_docTypeReadVersion <= 0 || m_docTypeVersion <= 0)
+ return E_FILE_FORMAT_INVALID;
+
+ // Make sure EBMLMaxIDLength and EBMLMaxSizeLength are valid.
+ if (m_maxIdLength <= 0 || m_maxIdLength > 4 ||
+ m_maxSizeLength <= 0 || m_maxSizeLength > 8)
+ return E_FILE_FORMAT_INVALID;
+
return 0;
}
@@ -621,8 +686,6 @@
while (i != j) {
Cluster* const p = *i++;
- assert(p);
-
delete p;
}
@@ -638,8 +701,8 @@
long long Segment::CreateInstance(IMkvReader* pReader, long long pos,
Segment*& pSegment) {
- assert(pReader);
- assert(pos >= 0);
+ if (pReader == NULL || pos < 0)
+ return E_PARSE_FAILED;
pSegment = NULL;
@@ -691,10 +754,10 @@
return pos + len;
const long long idpos = pos;
- const long long id = ReadUInt(pReader, pos, len);
+ const long long id = ReadID(pReader, pos, len);
- if (id < 0) // error
- return id;
+ if (id < 0)
+ return E_FILE_FORMAT_INVALID;
pos += len; // consume ID
@@ -723,7 +786,7 @@
// Handle "unknown size" for live streaming of webm files.
const long long unknown_size = (1LL << (7 * len)) - 1;
- if (id == 0x08538067) { // Segment ID
+ if (id == mkvmuxer::kMkvSegment) {
if (size == unknown_size)
size = -1;
@@ -733,12 +796,9 @@
else if ((pos + size) > total)
size = -1;
- pSegment = new (std::nothrow) Segment(pReader, idpos,
- // elem_size
- pos, size);
-
- if (pSegment == 0)
- return -1; // generic error
+ pSegment = new (std::nothrow) Segment(pReader, idpos, pos, size);
+ if (pSegment == NULL)
+ return E_PARSE_FAILED;
return 0; // success
}
@@ -767,11 +827,15 @@
if (status < 0) // error
return status;
- assert((total < 0) || (available <= total));
+ if (total > 0 && available > total)
+ return E_FILE_FORMAT_INVALID;
const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
- assert((segment_stop < 0) || (total < 0) || (segment_stop <= total));
- assert((segment_stop < 0) || (m_pos <= segment_stop));
+
+ if ((segment_stop >= 0 && total >= 0 && segment_stop > total) ||
+ (segment_stop >= 0 && m_pos > segment_stop)) {
+ return E_FILE_FORMAT_INVALID;
+ }
for (;;) {
if ((total >= 0) && (m_pos >= total))
@@ -783,6 +847,11 @@
long long pos = m_pos;
const long long element_start = pos;
+ // Avoid rolling over pos when very close to LLONG_MAX.
+ unsigned long long rollover_check = pos + 1ULL;
+ if (rollover_check > LLONG_MAX)
+ return E_FILE_FORMAT_INVALID;
+
if ((pos + 1) > available)
return (pos + 1);
@@ -792,8 +861,10 @@
if (result < 0) // error
return result;
- if (result > 0) // underflow (weird)
+ if (result > 0) {
+ // MkvReader doesn't have enough data to satisfy this read attempt.
return (pos + 1);
+ }
if ((segment_stop >= 0) && ((pos + len) > segment_stop))
return E_FILE_FORMAT_INVALID;
@@ -802,12 +873,12 @@
return pos + len;
const long long idpos = pos;
- const long long id = ReadUInt(m_pReader, idpos, len);
+ const long long id = ReadID(m_pReader, idpos, len);
- if (id < 0) // error
- return id;
+ if (id < 0)
+ return E_FILE_FORMAT_INVALID;
- if (id == 0x0F43B675) // Cluster ID
+ if (id == mkvmuxer::kMkvCluster)
break;
pos += len; // consume ID
@@ -821,8 +892,10 @@
if (result < 0) // error
return result;
- if (result > 0) // underflow (weird)
+ if (result > 0) {
+ // MkvReader doesn't have enough data to satisfy this read attempt.
return (pos + 1);
+ }
if ((segment_stop >= 0) && ((pos + len) > segment_stop))
return E_FILE_FORMAT_INVALID;
@@ -832,11 +905,19 @@
const long long size = ReadUInt(m_pReader, pos, len);
- if (size < 0) // error
+ if (size < 0 || len < 1 || len > 8) {
+ // TODO(tomfinegan): ReadUInt should return an error when len is < 1 or
+ // len > 8 is true instead of checking this _everywhere_.
return size;
+ }
pos += len; // consume length of size of element
+ // Avoid rolling over pos when very close to LLONG_MAX.
+ rollover_check = static_cast<unsigned long long>(pos) + size;
+ if (rollover_check > LLONG_MAX)
+ return E_FILE_FORMAT_INVALID;
+
const long long element_size = size + pos - element_start;
// Pos now points to start of payload
@@ -849,7 +930,7 @@
if ((pos + size) > available)
return pos + size;
- if (id == 0x0549A966) { // Segment Info ID
+ if (id == mkvmuxer::kMkvInfo) {
if (m_pInfo)
return E_FILE_FORMAT_INVALID;
@@ -863,7 +944,7 @@
if (status)
return status;
- } else if (id == 0x0654AE6B) { // Tracks ID
+ } else if (id == mkvmuxer::kMkvTracks) {
if (m_pTracks)
return E_FILE_FORMAT_INVALID;
@@ -877,7 +958,7 @@
if (status)
return status;
- } else if (id == 0x0C53BB6B) { // Cues ID
+ } else if (id == mkvmuxer::kMkvCues) {
if (m_pCues == NULL) {
m_pCues = new (std::nothrow)
Cues(this, pos, size, element_start, element_size);
@@ -885,7 +966,7 @@
if (m_pCues == NULL)
return -1;
}
- } else if (id == 0x014D9B74) { // SeekHead ID
+ } else if (id == mkvmuxer::kMkvSeekHead) {
if (m_pSeekHead == NULL) {
m_pSeekHead = new (std::nothrow)
SeekHead(this, pos, size, element_start, element_size);
@@ -898,7 +979,7 @@
if (status)
return status;
}
- } else if (id == 0x0043A770) { // Chapters ID
+ } else if (id == mkvmuxer::kMkvChapters) {
if (m_pChapters == NULL) {
m_pChapters = new (std::nothrow)
Chapters(this, pos, size, element_start, element_size);
@@ -911,7 +992,7 @@
if (status)
return status;
}
- } else if (id == 0x0254C367) { // Tags ID
+ } else if (id == mkvmuxer::kMkvTags) {
if (m_pTags == NULL) {
m_pTags = new (std::nothrow)
Tags(this, pos, size, element_start, element_size);
@@ -929,7 +1010,8 @@
m_pos = pos + size; // consume payload
}
- assert((segment_stop < 0) || (m_pos <= segment_stop));
+ if (segment_stop >= 0 && m_pos > segment_stop)
+ return E_FILE_FORMAT_INVALID;
if (m_pInfo == NULL) // TODO: liberalize this behavior
return E_FILE_FORMAT_INVALID;
@@ -960,7 +1042,8 @@
if (status < 0) // error
return status;
- assert((total < 0) || (avail <= total));
+ if (total >= 0 && avail > total)
+ return E_FILE_FORMAT_INVALID;
const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size;
@@ -988,7 +1071,7 @@
if (result < 0) // error
return static_cast<long>(result);
- if (result > 0) // weird
+ if (result > 0)
return E_BUFFER_NOT_FULL;
if ((segment_stop >= 0) && ((pos + len) > segment_stop))
@@ -998,10 +1081,10 @@
return E_BUFFER_NOT_FULL;
const long long idpos = pos;
- const long long id = ReadUInt(m_pReader, idpos, len);
+ const long long id = ReadID(m_pReader, idpos, len);
- if (id < 0) // error (or underflow)
- return static_cast<long>(id);
+ if (id < 0)
+ return E_FILE_FORMAT_INVALID;
pos += len; // consume ID
@@ -1017,7 +1100,7 @@
if (result < 0) // error
return static_cast<long>(result);
- if (result > 0) // weird
+ if (result > 0)
return E_BUFFER_NOT_FULL;
if ((segment_stop >= 0) && ((pos + len) > segment_stop))
@@ -1035,7 +1118,8 @@
// pos now points to start of payload
- if (size == 0) { // weird
+ if (size == 0) {
+ // Missing element payload: move on.
m_pos = pos;
continue;
}
@@ -1047,24 +1131,30 @@
return E_FILE_FORMAT_INVALID;
}
- if (id == 0x0C53BB6B) { // Cues ID
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID; // TODO: liberalize
+ if (id == mkvmuxer::kMkvCues) {
+ if (size == unknown_size) {
+ // Cues element of unknown size: Not supported.
+ return E_FILE_FORMAT_INVALID;
+ }
if (m_pCues == NULL) {
const long long element_size = (pos - idpos) + size;
- m_pCues = new Cues(this, pos, size, idpos, element_size);
- assert(m_pCues); // TODO
+ m_pCues = new (std::nothrow) Cues(this, pos, size, idpos, element_size);
+ if (m_pCues == NULL)
+ return -1;
}
m_pos = pos + size; // consume payload
continue;
}
- if (id != 0x0F43B675) { // Cluster ID
+ if (id != mkvmuxer::kMkvCluster) {
+ // Besides the Segment, Libwebm allows only cluster elements of unknown
+ // size. Fail the parse upon encountering a non-cluster element reporting
+ // unknown size.
if (size == unknown_size)
- return E_FILE_FORMAT_INVALID; // TODO: liberalize
+ return E_FILE_FORMAT_INVALID;
m_pos = pos + size; // consume payload
continue;
@@ -1080,7 +1170,10 @@
break;
}
- assert(cluster_off >= 0); // have cluster
+ if (cluster_off < 0) {
+ // No cluster, die.
+ return E_FILE_FORMAT_INVALID;
+ }
long long pos_;
long len_;
@@ -1126,14 +1219,16 @@
const long idx = m_clusterCount;
if (m_clusterPreloadCount > 0) {
- assert(idx < m_clusterSize);
+ if (idx >= m_clusterSize)
+ return E_FILE_FORMAT_INVALID;
Cluster* const pCluster = m_clusters[idx];
- assert(pCluster);
- assert(pCluster->m_index < 0);
+ if (pCluster == NULL || pCluster->m_index >= 0)
+ return E_FILE_FORMAT_INVALID;
const long long off = pCluster->GetPosition();
- assert(off >= 0);
+ if (off < 0)
+ return E_FILE_FORMAT_INVALID;
if (off == cluster_off) { // preloaded already
if (status == 0) // no entries found
@@ -1155,7 +1250,8 @@
--m_clusterPreloadCount;
m_pos = pos; // consume payload
- assert((segment_stop < 0) || (m_pos <= segment_stop));
+ if (segment_stop >= 0 && m_pos > segment_stop)
+ return E_FILE_FORMAT_INVALID;
return 0; // success
}
@@ -1182,19 +1278,21 @@
// status > 0 means we have an entry
Cluster* const pCluster = Cluster::Create(this, idx, cluster_off);
- // element_size);
- assert(pCluster);
+ if (pCluster == NULL)
+ return -1;
- AppendCluster(pCluster);
- assert(m_clusters);
- assert(idx < m_clusterSize);
- assert(m_clusters[idx] == pCluster);
+ if (!AppendCluster(pCluster)) {
+ delete pCluster;
+ return -1;
+ }
if (cluster_size >= 0) {
pos += cluster_size;
m_pos = pos;
- assert((segment_stop < 0) || (m_pos <= segment_stop));
+
+ if (segment_stop > 0 && m_pos > segment_stop)
+ return E_FILE_FORMAT_INVALID;
return 0;
}
@@ -1210,8 +1308,8 @@
}
long Segment::DoLoadClusterUnknownSize(long long& pos, long& len) {
- assert(m_pos < 0);
- assert(m_pUnknownSize);
+ if (m_pos >= 0 || m_pUnknownSize == NULL)
+ return E_PARSE_FAILED;
const long status = m_pUnknownSize->Parse(pos, len);
@@ -1221,12 +1319,11 @@
if (status == 0) // parsed a block
return 2; // continue parsing
- assert(status > 0); // nothing left to parse of this cluster
-
const long long start = m_pUnknownSize->m_element_start;
-
const long long size = m_pUnknownSize->GetElementSize();
- assert(size >= 0);
+
+ if (size < 0)
+ return E_FILE_FORMAT_INVALID;
pos = start + size;
m_pos = pos;
@@ -1236,24 +1333,26 @@
return 2; // continue parsing
}
-void Segment::AppendCluster(Cluster* pCluster) {
- assert(pCluster);
- assert(pCluster->m_index >= 0);
+bool Segment::AppendCluster(Cluster* pCluster) {
+ if (pCluster == NULL || pCluster->m_index < 0)
+ return false;
const long count = m_clusterCount + m_clusterPreloadCount;
long& size = m_clusterSize;
- assert(size >= count);
-
const long idx = pCluster->m_index;
- assert(idx == m_clusterCount);
+
+ if (size < count || idx != m_clusterCount)
+ return false;
if (count >= size) {
const long n = (size <= 0) ? 2048 : 2 * size;
- Cluster** const qq = new Cluster*[n];
- Cluster** q = qq;
+ Cluster** const qq = new (std::nothrow) Cluster*[n];
+ if (qq == NULL)
+ return false;
+ Cluster** q = qq;
Cluster** p = m_clusters;
Cluster** const pp = p + count;
@@ -1267,18 +1366,18 @@
}
if (m_clusterPreloadCount > 0) {
- assert(m_clusters);
-
Cluster** const p = m_clusters + m_clusterCount;
- assert(*p);
- assert((*p)->m_index < 0);
+ if (*p == NULL || (*p)->m_index >= 0)
+ return false;
Cluster** q = p + m_clusterPreloadCount;
- assert(q < (m_clusters + size));
+ if (q >= (m_clusters + size))
+ return false;
for (;;) {
Cluster** const qq = q - 1;
- assert((*qq)->m_index < 0);
+ if ((*qq)->m_index >= 0)
+ return false;
*q = *qq;
q = qq;
@@ -1290,22 +1389,25 @@
m_clusters[idx] = pCluster;
++m_clusterCount;
+ return true;
}
-void Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx) {
- assert(pCluster);
- assert(pCluster->m_index < 0);
- assert(idx >= m_clusterCount);
+bool Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx) {
+ if (pCluster == NULL || pCluster->m_index >= 0 || idx < m_clusterCount)
+ return false;
const long count = m_clusterCount + m_clusterPreloadCount;
long& size = m_clusterSize;
- assert(size >= count);
+ if (size < count)
+ return false;
if (count >= size) {
const long n = (size <= 0) ? 2048 : 2 * size;
- Cluster** const qq = new Cluster*[n];
+ Cluster** const qq = new (std::nothrow) Cluster*[n];
+ if (qq == NULL)
+ return false;
Cluster** q = qq;
Cluster** p = m_clusters;
@@ -1320,17 +1422,20 @@
size = n;
}
- assert(m_clusters);
+ if (m_clusters == NULL)
+ return false;
Cluster** const p = m_clusters + idx;
Cluster** q = m_clusters + count;
- assert(q >= p);
- assert(q < (m_clusters + size));
+ if (q < p || q >= (m_clusters + size))
+ return false;
while (q > p) {
Cluster** const qq = q - 1;
- assert((*qq)->m_index < 0);
+
+ if ((*qq)->m_index >= 0)
+ return false;
*q = *qq;
q = qq;
@@ -1338,13 +1443,12 @@
m_clusters[idx] = pCluster;
++m_clusterPreloadCount;
+ return true;
}
long Segment::Load() {
- assert(m_clusters == NULL);
- assert(m_clusterSize == 0);
- assert(m_clusterCount == 0);
- // assert(m_size >= 0);
+ if (m_clusters != NULL || m_clusterSize != 0 || m_clusterCount != 0)
+ return E_PARSE_FAILED;
// Outermost (level 0) segment object has been constructed,
// and pos designates start of payload. We need to find the
@@ -1358,8 +1462,8 @@
if (header_status > 0) // underflow
return E_BUFFER_NOT_FULL;
- assert(m_pInfo);
- assert(m_pTracks);
+ if (m_pInfo == NULL || m_pTracks == NULL)
+ return E_FILE_FORMAT_INVALID;
for (;;) {
const int status = LoadCluster();
@@ -1408,16 +1512,19 @@
if (status < 0) // error
return status;
- if (id == 0x0DBB) // SeekEntry ID
+ if (id == mkvmuxer::kMkvSeek)
++entry_count;
- else if (id == 0x6C) // Void ID
+ else if (id == mkvmuxer::kMkvVoid)
++void_element_count;
pos += size; // consume payload
- assert(pos <= stop);
+
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
- assert(pos == stop);
+ if (pos != stop)
+ return E_FILE_FORMAT_INVALID;
m_entries = new (std::nothrow) Entry[entry_count];
@@ -1446,14 +1553,14 @@
if (status < 0) // error
return status;
- if (id == 0x0DBB) { // SeekEntry ID
+ if (id == mkvmuxer::kMkvSeek) {
if (ParseEntry(pReader, pos, size, pEntry)) {
Entry& e = *pEntry++;
e.element_start = idpos;
e.element_size = (pos + size) - idpos;
}
- } else if (id == 0x6C) { // Void ID
+ } else if (id == mkvmuxer::kMkvVoid) {
VoidElement& e = *pVoidElement++;
e.element_start = idpos;
@@ -1461,10 +1568,12 @@
}
pos += size; // consume payload
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
- assert(pos == stop);
+ if (pos != stop)
+ return E_FILE_FORMAT_INVALID;
ptrdiff_t count_ = ptrdiff_t(pEntry - m_entries);
assert(count_ >= 0);
@@ -1553,9 +1662,9 @@
const long long idpos = pos;
- const long long id = ReadUInt(m_pReader, idpos, len);
+ const long long id = ReadID(m_pReader, idpos, len);
- if (id != 0x0C53BB6B) // Cues ID
+ if (id != mkvmuxer::kMkvCues)
return E_FILE_FORMAT_INVALID;
pos += len; // consume ID
@@ -1615,7 +1724,8 @@
m_pCues =
new (std::nothrow) Cues(this, pos, size, element_start, element_size);
- assert(m_pCues); // TODO
+ if (m_pCues == NULL)
+ return -1;
return 0; // success
}
@@ -1632,10 +1742,11 @@
// parse the container for the level-1 element ID
- const long long seekIdId = ReadUInt(pReader, pos, len);
- // seekIdId;
+ const long long seekIdId = ReadID(pReader, pos, len);
+ if (seekIdId < 0)
+ return false;
- if (seekIdId != 0x13AB) // SeekID ID
+ if (seekIdId != mkvmuxer::kMkvSeekID)
return false;
if ((pos + len) > stop)
@@ -1677,9 +1788,9 @@
pos += seekIdSize; // consume SeekID payload
- const long long seekPosId = ReadUInt(pReader, pos, len);
+ const long long seekPosId = ReadID(pReader, pos, len);
- if (seekPosId != 0x13AC) // SeekPos ID
+ if (seekPosId != mkvmuxer::kMkvSeekPosition)
return false;
if ((pos + len) > stop)
@@ -1757,8 +1868,8 @@
if (m_cue_points)
return true;
- assert(m_count == 0);
- assert(m_preload_count == 0);
+ if (m_count != 0 || m_preload_count != 0)
+ return false;
IMkvReader* const pReader = m_pSegment->m_pReader;
@@ -1772,7 +1883,7 @@
long len;
- const long long id = ReadUInt(pReader, pos, len);
+ const long long id = ReadID(pReader, pos, len);
if (id < 0 || (pos + len) > stop) {
return false;
}
@@ -1789,21 +1900,27 @@
return false;
}
- if (id == 0x3B) // CuePoint ID
- PreloadCuePoint(cue_points_size, idpos);
+ if (id == mkvmuxer::kMkvCuePoint) {
+ if (!PreloadCuePoint(cue_points_size, idpos))
+ return false;
+ }
pos += size; // skip payload
}
return true;
}
-void Cues::PreloadCuePoint(long& cue_points_size, long long pos) const {
- assert(m_count == 0);
+bool Cues::PreloadCuePoint(long& cue_points_size, long long pos) const {
+ if (m_count != 0)
+ return false;
if (m_preload_count >= cue_points_size) {
const long n = (cue_points_size <= 0) ? 2048 : 2 * cue_points_size;
- CuePoint** const qq = new CuePoint*[n];
+ CuePoint** const qq = new (std::nothrow) CuePoint*[n];
+ if (qq == NULL)
+ return false;
+
CuePoint** q = qq; // beginning of target
CuePoint** p = m_cue_points; // beginning of source
@@ -1818,14 +1935,15 @@
cue_points_size = n;
}
- CuePoint* const pCP = new CuePoint(m_preload_count, pos);
+ CuePoint* const pCP = new (std::nothrow) CuePoint(m_preload_count, pos);
+ if (pCP == NULL)
+ return false;
+
m_cue_points[m_preload_count++] = pCP;
+ return true;
}
bool Cues::LoadCuePoint() const {
- // odbgstream os;
- // os << "Cues::LoadCuePoint" << endl;
-
const long long stop = m_start + m_size;
if (m_pos >= stop)
@@ -1843,32 +1961,33 @@
long len;
- const long long id = ReadUInt(pReader, m_pos, len);
- assert(id >= 0); // TODO
- assert((m_pos + len) <= stop);
+ const long long id = ReadID(pReader, m_pos, len);
+ if (id < 0 || (m_pos + len) > stop)
+ return false;
m_pos += len; // consume ID
const long long size = ReadUInt(pReader, m_pos, len);
- assert(size >= 0);
- assert((m_pos + len) <= stop);
+ if (size < 0 || (m_pos + len) > stop)
+ return false;
m_pos += len; // consume Size field
- assert((m_pos + size) <= stop);
+ if ((m_pos + size) > stop)
+ return false;
- if (id != 0x3B) { // CuePoint ID
+ if (id != mkvmuxer::kMkvCuePoint) {
m_pos += size; // consume payload
- assert(m_pos <= stop);
+ if (m_pos > stop)
+ return false;
continue;
}
- assert(m_preload_count > 0);
+ if (m_preload_count < 1)
+ return false;
CuePoint* const pCP = m_cue_points[m_count];
- assert(pCP);
- assert((pCP->GetTimeCode() >= 0) || (-pCP->GetTimeCode() == idpos));
- if (pCP->GetTimeCode() < 0 && (-pCP->GetTimeCode() != idpos))
+ if (!pCP || (pCP->GetTimeCode() < 0 && (-pCP->GetTimeCode() != idpos)))
return false;
if (!pCP->Load(pReader)) {
@@ -1879,24 +1998,18 @@
--m_preload_count;
m_pos += size; // consume payload
- assert(m_pos <= stop);
+ if (m_pos > stop)
+ return false;
return true; // yes, we loaded a cue point
}
- // return (m_pos < stop);
return false; // no, we did not load a cue point
}
bool Cues::Find(long long time_ns, const Track* pTrack, const CuePoint*& pCP,
const CuePoint::TrackPosition*& pTP) const {
- assert(time_ns >= 0);
- assert(pTrack);
-
- if (m_cue_points == NULL)
- return false;
-
- if (m_count == 0)
+ if (time_ns < 0 || pTrack == NULL || m_cue_points == NULL || m_count == 0)
return false;
CuePoint** const ii = m_cue_points;
@@ -1906,7 +2019,8 @@
CuePoint** j = jj;
pCP = *i;
- assert(pCP);
+ if (pCP == NULL)
+ return false;
if (time_ns <= pCP->GetTime(m_pSegment)) {
pTP = pCP->Find(pTrack);
@@ -1920,10 +2034,12 @@
//[j, jj) > time_ns
CuePoint** const k = i + (j - i) / 2;
- assert(k < jj);
+ if (k >= jj)
+ return false;
CuePoint* const pCP = *k;
- assert(pCP);
+ if (pCP == NULL)
+ return false;
const long long t = pCP->GetTime(m_pSegment);
@@ -1932,16 +2048,17 @@
else
j = k;
- assert(i <= j);
+ if (i > j)
+ return false;
}
- assert(i == j);
- assert(i <= jj);
- assert(i > ii);
+ if (i != j || i > jj || i <= ii)
+ return false;
pCP = *--i;
- assert(pCP);
- assert(pCP->GetTime(m_pSegment) <= time_ns);
+
+ if (pCP == NULL || pCP->GetTime(m_pSegment) > time_ns)
+ return false;
// TODO: here and elsewhere, it's probably not correct to search
// for the cue point with this time, and then search for a matching
@@ -1956,55 +2073,50 @@
}
const CuePoint* Cues::GetFirst() const {
- if (m_cue_points == NULL)
- return NULL;
-
- if (m_count == 0)
+ if (m_cue_points == NULL || m_count == 0)
return NULL;
CuePoint* const* const pp = m_cue_points;
- assert(pp);
+ if (pp == NULL)
+ return NULL;
CuePoint* const pCP = pp[0];
- assert(pCP);
- assert(pCP->GetTimeCode() >= 0);
+ if (pCP == NULL || pCP->GetTimeCode() < 0)
+ return NULL;
return pCP;
}
const CuePoint* Cues::GetLast() const {
- if (m_cue_points == NULL)
- return NULL;
-
- if (m_count <= 0)
+ if (m_cue_points == NULL || m_count <= 0)
return NULL;
const long index = m_count - 1;
CuePoint* const* const pp = m_cue_points;
- assert(pp);
+ if (pp == NULL)
+ return NULL;
CuePoint* const pCP = pp[index];
- assert(pCP);
- assert(pCP->GetTimeCode() >= 0);
+ if (pCP == NULL || pCP->GetTimeCode() < 0)
+ return NULL;
return pCP;
}
const CuePoint* Cues::GetNext(const CuePoint* pCurr) const {
- if (pCurr == NULL)
+ if (pCurr == NULL || pCurr->GetTimeCode() < 0 ||
+ m_cue_points == NULL || m_count < 1) {
return NULL;
-
- assert(pCurr->GetTimeCode() >= 0);
- assert(m_cue_points);
- assert(m_count >= 1);
+ }
long index = pCurr->m_index;
- assert(index < m_count);
+ if (index >= m_count)
+ return NULL;
CuePoint* const* const pp = m_cue_points;
- assert(pp);
- assert(pp[index] == pCurr);
+ if (pp == NULL || pp[index] != pCurr)
+ return NULL;
++index;
@@ -2012,18 +2124,16 @@
return NULL;
CuePoint* const pNext = pp[index];
- assert(pNext);
- assert(pNext->GetTimeCode() >= 0);
+
+ if (pNext == NULL || pNext->GetTimeCode() < 0)
+ return NULL;
return pNext;
}
const BlockEntry* Cues::GetBlock(const CuePoint* pCP,
const CuePoint::TrackPosition* pTP) const {
- if (pCP == NULL)
- return NULL;
-
- if (pTP == NULL)
+ if (pCP == NULL || pTP == NULL)
return NULL;
return m_pSegment->GetBlock(*pCP, *pTP);
@@ -2070,11 +2180,15 @@
// assert(Cluster::HasBlockEntries(this, tp.m_pos));
Cluster* const pCluster = Cluster::Create(this, -1, tp.m_pos); //, -1);
- assert(pCluster);
+ if (pCluster == NULL)
+ return NULL;
const ptrdiff_t idx = i - m_clusters;
- PreloadCluster(pCluster, idx);
+ if (!PreloadCluster(pCluster, idx)) {
+ delete pCluster;
+ return NULL;
+ }
assert(m_clusters);
assert(m_clusterPreloadCount > 0);
assert(m_clusters[idx] == pCluster);
@@ -2125,12 +2239,15 @@
// assert(Cluster::HasBlockEntries(this, tp.m_pos));
Cluster* const pCluster = Cluster::Create(this, -1, requested_pos);
- //-1);
- assert(pCluster);
+ if (pCluster == NULL)
+ return NULL;
const ptrdiff_t idx = i - m_clusters;
- PreloadCluster(pCluster, idx);
+ if (!PreloadCluster(pCluster, idx)) {
+ delete pCluster;
+ return NULL;
+ }
assert(m_clusters);
assert(m_clusterPreloadCount > 0);
assert(m_clusters[idx] == pCluster);
@@ -2168,9 +2285,8 @@
{
long len;
- const long long id = ReadUInt(pReader, pos_, len);
- assert(id == 0x3B); // CuePoint ID
- if (id != 0x3B)
+ const long long id = ReadID(pReader, pos_, len);
+ if (id != mkvmuxer::kMkvCuePoint)
return false;
pos_ += len; // consume ID
@@ -2193,7 +2309,7 @@
while (pos < stop) {
long len;
- const long long id = ReadUInt(pReader, pos, len);
+ const long long id = ReadID(pReader, pos, len);
if ((id < 0) || (pos + len > stop)) {
return false;
}
@@ -2210,10 +2326,10 @@
return false;
}
- if (id == 0x33) // CueTime ID
+ if (id == mkvmuxer::kMkvCueTime)
m_timecode = UnserializeUInt(pReader, pos, size);
- else if (id == 0x37) // CueTrackPosition(s) ID
+ else if (id == mkvmuxer::kMkvCueTrackPositions)
++m_track_positions_count;
pos += size; // consume payload
@@ -2227,7 +2343,9 @@
// << " timecode=" << m_timecode
// << endl;
- m_track_positions = new TrackPosition[m_track_positions_count];
+ m_track_positions = new (std::nothrow) TrackPosition[m_track_positions_count];
+ if (m_track_positions == NULL)
+ return false;
// Now parse track positions
@@ -2237,9 +2355,9 @@
while (pos < stop) {
long len;
- const long long id = ReadUInt(pReader, pos, len);
- assert(id >= 0);
- assert((pos + len) <= stop);
+ const long long id = ReadID(pReader, pos, len);
+ if (id < 0 || (pos + len) > stop)
+ return false;
pos += len; // consume ID
@@ -2250,7 +2368,7 @@
pos += len; // consume Size field
assert((pos + size) <= stop);
- if (id == 0x37) { // CueTrackPosition(s) ID
+ if (id == mkvmuxer::kMkvCueTrackPositions) {
TrackPosition& tp = *p++;
if (!tp.Parse(pReader, pos, size)) {
return false;
@@ -2258,7 +2376,8 @@
}
pos += size; // consume payload
- assert(pos <= stop);
+ if (pos > stop)
+ return false;
}
assert(size_t(p - m_track_positions) == m_track_positions_count);
@@ -2281,7 +2400,7 @@
while (pos < stop) {
long len;
- const long long id = ReadUInt(pReader, pos, len);
+ const long long id = ReadID(pReader, pos, len);
if ((id < 0) || ((pos + len) > stop)) {
return false;
}
@@ -2298,13 +2417,11 @@
return false;
}
- if (id == 0x77) // CueTrack ID
+ if (id == mkvmuxer::kMkvCueTrack)
m_track = UnserializeUInt(pReader, pos, size);
-
- else if (id == 0x71) // CueClusterPos ID
+ else if (id == mkvmuxer::kMkvCueClusterPosition)
m_pos = UnserializeUInt(pReader, pos, size);
-
- else if (id == 0x1378) // CueBlockNumber
+ else if (id == mkvmuxer::kMkvCueBlockNumber)
m_block = UnserializeUInt(pReader, pos, size);
pos += size; // consume payload
@@ -2437,9 +2554,8 @@
if (result != 0)
return NULL;
- const long long id = ReadUInt(m_pReader, pos, len);
- assert(id == 0x0F43B675); // Cluster ID
- if (id != 0x0F43B675)
+ const long long id = ReadID(m_pReader, pos, len);
+ if (id != mkvmuxer::kMkvCluster)
return NULL;
pos += len; // consume ID
@@ -2474,8 +2590,9 @@
const long long idpos = pos; // pos of next (potential) cluster
- const long long id = ReadUInt(m_pReader, idpos, len);
- assert(id > 0); // TODO
+ const long long id = ReadID(m_pReader, idpos, len);
+ if (id < 0)
+ return NULL;
pos += len; // consume ID
@@ -2495,7 +2612,7 @@
if (size == 0) // weird
continue;
- if (id == 0x0F43B675) { // Cluster ID
+ if (id == mkvmuxer::kMkvCluster) {
const long long off_next_ = idpos - m_start;
long long pos_;
@@ -2553,11 +2670,15 @@
assert(i == j);
Cluster* const pNext = Cluster::Create(this, -1, off_next);
- assert(pNext);
+ if (pNext == NULL)
+ return NULL;
const ptrdiff_t idx_next = i - m_clusters; // insertion position
- PreloadCluster(pNext, idx_next);
+ if (!PreloadCluster(pNext, idx_next)) {
+ delete pNext;
+ return NULL;
+ }
assert(m_clusters);
assert(idx_next < m_clusterSize);
assert(m_clusters[idx_next] == pNext);
@@ -2641,7 +2762,7 @@
const long long id = ReadUInt(m_pReader, pos, len);
- if (id != 0x0F43B675) // weird: not Cluster ID
+ if (id != mkvmuxer::kMkvCluster)
return -1;
pos += len; // consume ID
@@ -2687,7 +2808,8 @@
// Pos now points to start of payload
pos += size; // consume payload (that is, the current cluster)
- assert((segment_stop < 0) || (pos <= segment_stop));
+ if (segment_stop >= 0 && pos > segment_stop)
+ return E_FILE_FORMAT_INVALID;
// By consuming the payload, we are assuming that the curr
// cluster isn't interesting. That is, we don't bother checking
@@ -2755,7 +2877,7 @@
const long long idpos = pos; // absolute
const long long idoff = pos - m_start; // relative
- const long long id = ReadUInt(m_pReader, idpos, len); // absolute
+ const long long id = ReadID(m_pReader, idpos, len); // absolute
if (id < 0) // error
return static_cast<long>(id);
@@ -2805,7 +2927,7 @@
return E_FILE_FORMAT_INVALID;
}
- if (id == 0x0C53BB6B) { // Cues ID
+ if (id == mkvmuxer::kMkvCues) {
if (size == unknown_size)
return E_FILE_FORMAT_INVALID;
@@ -2818,22 +2940,26 @@
const long long element_size = element_stop - element_start;
if (m_pCues == NULL) {
- m_pCues = new Cues(this, pos, size, element_start, element_size);
- assert(m_pCues); // TODO
+ m_pCues = new (std::nothrow)
+ Cues(this, pos, size, element_start, element_size);
+ if (m_pCues == NULL)
+ return false;
}
pos += size; // consume payload
- assert((segment_stop < 0) || (pos <= segment_stop));
+ if (segment_stop >= 0 && pos > segment_stop)
+ return E_FILE_FORMAT_INVALID;
continue;
}
- if (id != 0x0F43B675) { // not a Cluster ID
+ if (id != mkvmuxer::kMkvCluster) { // not a Cluster ID
if (size == unknown_size)
return E_FILE_FORMAT_INVALID;
pos += size; // consume payload
- assert((segment_stop < 0) || (pos <= segment_stop));
+ if (segment_stop >= 0 && pos > segment_stop)
+ return E_FILE_FORMAT_INVALID;
continue;
}
@@ -2905,12 +3031,15 @@
Cluster* const pNext = Cluster::Create(this,
-1, // preloaded
off_next);
- // element_size);
- assert(pNext);
+ if (pNext == NULL)
+ return -1;
const ptrdiff_t idx_next = i - m_clusters; // insertion position
- PreloadCluster(pNext, idx_next);
+ if (!PreloadCluster(pNext, idx_next)) {
+ delete pNext;
+ return -1;
+ }
assert(m_clusters);
assert(idx_next < m_clusterSize);
assert(m_clusters[idx_next] == pNext);
@@ -2953,7 +3082,7 @@
return E_BUFFER_NOT_FULL;
const long long idpos = pos;
- const long long id = ReadUInt(m_pReader, idpos, len);
+ const long long id = ReadID(m_pReader, idpos, len);
if (id < 0) // error (or underflow)
return static_cast<long>(id);
@@ -2962,10 +3091,7 @@
// that we have exhausted the sub-element's inside the cluster
// whose ID we parsed earlier.
- if (id == 0x0F43B675) // Cluster ID
- break;
-
- if (id == 0x0C53BB6B) // Cues ID
+ if (id == mkvmuxer::kMkvCluster || id == mkvmuxer::kMkvCues)
break;
pos += len; // consume ID (of sub-element)
@@ -3012,7 +3138,8 @@
return E_FILE_FORMAT_INVALID;
pos += size; // consume payload of sub-element
- assert((segment_stop < 0) || (pos <= segment_stop));
+ if (segment_stop >= 0 && pos > segment_stop)
+ return E_FILE_FORMAT_INVALID;
} // determine cluster size
cluster_size = pos - payload_pos;
@@ -3022,7 +3149,8 @@
}
pos += cluster_size; // consume payload
- assert((segment_stop < 0) || (pos <= segment_stop));
+ if (segment_stop >= 0 && pos > segment_stop)
+ return E_FILE_FORMAT_INVALID;
return 2; // try to find a cluster that follows next
}
@@ -3131,7 +3259,7 @@
if (size == 0) // weird
continue;
- if (id == 0x05B9) { // EditionEntry ID
+ if (id == mkvmuxer::kMkvEditionEntry) {
status = ParseEdition(pos, size);
if (status < 0) // error
@@ -3139,10 +3267,12 @@
}
pos += size;
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
- assert(pos == stop);
+ if (pos != stop)
+ return E_FILE_FORMAT_INVALID;
return 0;
}
@@ -3242,10 +3372,10 @@
if (status < 0) // error
return status;
- if (size == 0) // weird
+ if (size == 0)
continue;
- if (id == 0x36) { // Atom ID
+ if (id == mkvmuxer::kMkvChapterAtom) {
status = ParseAtom(pReader, pos, size);
if (status < 0) // error
@@ -3253,10 +3383,12 @@
}
pos += size;
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
- assert(pos == stop);
+ if (pos != stop)
+ return E_FILE_FORMAT_INVALID;
return 0;
}
@@ -3373,20 +3505,20 @@
if (status < 0) // error
return status;
- if (size == 0) // weird
+ if (size == 0) // 0 length payload, skip.
continue;
- if (id == 0x00) { // Display ID
+ if (id == mkvmuxer::kMkvChapterDisplay) {
status = ParseDisplay(pReader, pos, size);
if (status < 0) // error
return status;
- } else if (id == 0x1654) { // StringUID ID
+ } else if (id == mkvmuxer::kMkvChapterStringUID) {
status = UnserializeString(pReader, pos, size, m_string_uid);
if (status < 0) // error
return status;
- } else if (id == 0x33C4) { // UID ID
+ } else if (id == mkvmuxer::kMkvChapterUID) {
long long val;
status = UnserializeInt(pReader, pos, size, val);
@@ -3394,14 +3526,14 @@
return status;
m_uid = static_cast<unsigned long long>(val);
- } else if (id == 0x11) { // TimeStart ID
+ } else if (id == mkvmuxer::kMkvChapterTimeStart) {
const long long val = UnserializeUInt(pReader, pos, size);
if (val < 0) // error
return static_cast<long>(val);
m_start_timecode = val;
- } else if (id == 0x12) { // TimeEnd ID
+ } else if (id == mkvmuxer::kMkvChapterTimeEnd) {
const long long val = UnserializeUInt(pReader, pos, size);
if (val < 0) // error
@@ -3411,10 +3543,12 @@
}
pos += size;
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
- assert(pos == stop);
+ if (pos != stop)
+ return E_FILE_FORMAT_INVALID;
return 0;
}
@@ -3524,20 +3658,20 @@
if (status < 0) // error
return status;
- if (size == 0) // weird
+ if (size == 0) // No payload.
continue;
- if (id == 0x05) { // ChapterString ID
+ if (id == mkvmuxer::kMkvChapString) {
status = UnserializeString(pReader, pos, size, m_string);
if (status)
return status;
- } else if (id == 0x037C) { // ChapterLanguage ID
+ } else if (id == mkvmuxer::kMkvChapLanguage) {
status = UnserializeString(pReader, pos, size, m_language);
if (status)
return status;
- } else if (id == 0x037E) { // ChapterCountry ID
+ } else if (id == mkvmuxer::kMkvChapCountry) {
status = UnserializeString(pReader, pos, size, m_country);
if (status)
@@ -3545,10 +3679,12 @@
}
pos += size;
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
- assert(pos == stop);
+ if (pos != stop)
+ return E_FILE_FORMAT_INVALID;
return 0;
}
@@ -3588,7 +3724,7 @@
if (size == 0) // 0 length tag, read another
continue;
- if (id == 0x3373) { // Tag ID
+ if (id == mkvmuxer::kMkvTag) {
status = ParseTag(pos, size);
if (status < 0)
@@ -3596,14 +3732,12 @@
}
pos += size;
- assert(pos <= stop);
if (pos > stop)
- return -1;
+ return E_FILE_FORMAT_INVALID;
}
- assert(pos == stop);
if (pos != stop)
- return -1;
+ return E_FILE_FORMAT_INVALID;
return 0;
}
@@ -3706,7 +3840,7 @@
if (size == 0) // 0 length tag, read another
continue;
- if (id == 0x27C8) { // SimpleTag ID
+ if (id == mkvmuxer::kMkvSimpleTag) {
status = ParseSimpleTag(pReader, pos, size);
if (status < 0)
@@ -3714,14 +3848,12 @@
}
pos += size;
- assert(pos <= stop);
if (pos > stop)
- return -1;
+ return E_FILE_FORMAT_INVALID;
}
- assert(pos == stop);
if (pos != stop)
- return -1;
+ return E_FILE_FORMAT_INVALID;
return 0;
}
@@ -3799,12 +3931,12 @@
if (size == 0) // weird
continue;
- if (id == 0x5A3) { // TagName ID
+ if (id == mkvmuxer::kMkvTagName) {
status = UnserializeString(pReader, pos, size, m_tag_name);
if (status)
return status;
- } else if (id == 0x487) { // TagString ID
+ } else if (id == mkvmuxer::kMkvTagString) {
status = UnserializeString(pReader, pos, size, m_tag_string);
if (status)
@@ -3812,14 +3944,12 @@
}
pos += size;
- assert(pos <= stop);
if (pos > stop)
- return -1;
+ return E_FILE_FORMAT_INVALID;
}
- assert(pos == stop);
if (pos != stop)
- return -1;
+ return E_FILE_FORMAT_INVALID;
return 0;
}
@@ -3866,12 +3996,12 @@
if (status < 0) // error
return status;
- if (id == 0x0AD7B1) { // Timecode Scale
+ if (id == mkvmuxer::kMkvTimecodeScale) {
m_timecodeScale = UnserializeUInt(pReader, pos, size);
if (m_timecodeScale <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x0489) { // Segment duration
+ } else if (id == mkvmuxer::kMkvDuration) {
const long status = UnserializeFloat(pReader, pos, size, m_duration);
if (status < 0)
@@ -3879,19 +4009,19 @@
if (m_duration < 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x0D80) { // MuxingApp
+ } else if (id == mkvmuxer::kMkvMuxingApp) {
const long status =
UnserializeString(pReader, pos, size, m_pMuxingAppAsUTF8);
if (status)
return status;
- } else if (id == 0x1741) { // WritingApp
+ } else if (id == mkvmuxer::kMkvWritingApp) {
const long status =
UnserializeString(pReader, pos, size, m_pWritingAppAsUTF8);
if (status)
return status;
- } else if (id == 0x3BA9) { // Title
+ } else if (id == mkvmuxer::kMkvTitle) {
const long status = UnserializeString(pReader, pos, size, m_pTitleAsUTF8);
if (status)
@@ -3899,10 +4029,17 @@
}
pos += size;
- assert(pos <= stop);
+
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
- assert(pos == stop);
+ const double rollover_check = m_duration * m_timecodeScale;
+ if (rollover_check > LLONG_MAX)
+ return E_FILE_FORMAT_INVALID;
+
+ if (pos != stop)
+ return E_FILE_FORMAT_INVALID;
return 0;
}
@@ -4039,15 +4176,15 @@
if (status < 0) // error
return status;
- if (id == 0x7E8) {
- // AESSettingsCipherMode
+ if (id == mkvmuxer::kMkvAESSettingsCipherMode) {
aes->cipher_mode = UnserializeUInt(pReader, pos, size);
if (aes->cipher_mode != 1)
return E_FILE_FORMAT_INVALID;
}
pos += size; // consume payload
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
return 0;
@@ -4070,14 +4207,15 @@
if (status < 0) // error
return status;
- if (id == 0x1034) // ContentCompression ID
+ if (id == mkvmuxer::kMkvContentCompression)
++compression_count;
- if (id == 0x1035) // ContentEncryption ID
+ if (id == mkvmuxer::kMkvContentEncryption)
++encryption_count;
pos += size; // consume payload
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
if (compression_count <= 0 && encryption_count <= 0)
@@ -4108,19 +4246,15 @@
if (status < 0) // error
return status;
- if (id == 0x1031) {
- // ContentEncodingOrder
+ if (id == mkvmuxer::kMkvContentEncodingOrder) {
encoding_order_ = UnserializeUInt(pReader, pos, size);
- } else if (id == 0x1032) {
- // ContentEncodingScope
+ } else if (id == mkvmuxer::kMkvContentEncodingScope) {
encoding_scope_ = UnserializeUInt(pReader, pos, size);
if (encoding_scope_ < 1)
return -1;
- } else if (id == 0x1033) {
- // ContentEncodingType
+ } else if (id == mkvmuxer::kMkvContentEncodingType) {
encoding_type_ = UnserializeUInt(pReader, pos, size);
- } else if (id == 0x1034) {
- // ContentCompression ID
+ } else if (id == mkvmuxer::kMkvContentCompression) {
ContentCompression* const compression =
new (std::nothrow) ContentCompression();
if (!compression)
@@ -4132,8 +4266,7 @@
return status;
}
*compression_entries_end_++ = compression;
- } else if (id == 0x1035) {
- // ContentEncryption ID
+ } else if (id == mkvmuxer::kMkvContentEncryption) {
ContentEncryption* const encryption =
new (std::nothrow) ContentEncryption();
if (!encryption)
@@ -4148,10 +4281,12 @@
}
pos += size; // consume payload
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
- assert(pos == stop);
+ if (pos != stop)
+ return E_FILE_FORMAT_INVALID;
return 0;
}
@@ -4172,21 +4307,18 @@
if (status < 0) // error
return status;
- if (id == 0x254) {
- // ContentCompAlgo
+ if (id == mkvmuxer::kMkvContentCompAlgo) {
long long algo = UnserializeUInt(pReader, pos, size);
if (algo < 0)
return E_FILE_FORMAT_INVALID;
compression->algo = algo;
valid = true;
- } else if (id == 0x255) {
- // ContentCompSettings
+ } else if (id == mkvmuxer::kMkvContentCompSettings) {
if (size <= 0)
return E_FILE_FORMAT_INVALID;
const size_t buflen = static_cast<size_t>(size);
- typedef unsigned char* buf_t;
- const buf_t buf = new (std::nothrow) unsigned char[buflen];
+ unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen);
if (buf == NULL)
return -1;
@@ -4202,7 +4334,8 @@
}
pos += size; // consume payload
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
// ContentCompAlgo is mandatory
@@ -4227,13 +4360,11 @@
if (status < 0) // error
return status;
- if (id == 0x7E1) {
- // ContentEncAlgo
+ if (id == mkvmuxer::kMkvContentEncAlgo) {
encryption->algo = UnserializeUInt(pReader, pos, size);
if (encryption->algo != 5)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x7E2) {
- // ContentEncKeyID
+ } else if (id == mkvmuxer::kMkvContentEncKeyID) {
delete[] encryption->key_id;
encryption->key_id = NULL;
encryption->key_id_len = 0;
@@ -4242,8 +4373,7 @@
return E_FILE_FORMAT_INVALID;
const size_t buflen = static_cast<size_t>(size);
- typedef unsigned char* buf_t;
- const buf_t buf = new (std::nothrow) unsigned char[buflen];
+ unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen);
if (buf == NULL)
return -1;
@@ -4256,8 +4386,7 @@
encryption->key_id = buf;
encryption->key_id_len = buflen;
- } else if (id == 0x7E3) {
- // ContentSignature
+ } else if (id == mkvmuxer::kMkvContentSignature) {
delete[] encryption->signature;
encryption->signature = NULL;
encryption->signature_len = 0;
@@ -4266,8 +4395,7 @@
return E_FILE_FORMAT_INVALID;
const size_t buflen = static_cast<size_t>(size);
- typedef unsigned char* buf_t;
- const buf_t buf = new (std::nothrow) unsigned char[buflen];
+ unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen);
if (buf == NULL)
return -1;
@@ -4280,8 +4408,7 @@
encryption->signature = buf;
encryption->signature_len = buflen;
- } else if (id == 0x7E4) {
- // ContentSigKeyID
+ } else if (id == mkvmuxer::kMkvContentSigKeyID) {
delete[] encryption->sig_key_id;
encryption->sig_key_id = NULL;
encryption->sig_key_id_len = 0;
@@ -4290,8 +4417,7 @@
return E_FILE_FORMAT_INVALID;
const size_t buflen = static_cast<size_t>(size);
- typedef unsigned char* buf_t;
- const buf_t buf = new (std::nothrow) unsigned char[buflen];
+ unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen);
if (buf == NULL)
return -1;
@@ -4304,14 +4430,11 @@
encryption->sig_key_id = buf;
encryption->sig_key_id_len = buflen;
- } else if (id == 0x7E5) {
- // ContentSigAlgo
+ } else if (id == mkvmuxer::kMkvContentSigAlgo) {
encryption->sig_algo = UnserializeUInt(pReader, pos, size);
- } else if (id == 0x7E6) {
- // ContentSigHashAlgo
+ } else if (id == mkvmuxer::kMkvContentSigHashAlgo) {
encryption->sig_hash_algo = UnserializeUInt(pReader, pos, size);
- } else if (id == 0x7E7) {
- // ContentEncAESSettings
+ } else if (id == mkvmuxer::kMkvContentEncAESSettings) {
const long status = ParseContentEncAESSettingsEntry(
pos, size, pReader, &encryption->aes_settings);
if (status)
@@ -4319,7 +4442,8 @@
}
pos += size; // consume payload
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
return 0;
@@ -4418,7 +4542,7 @@
const size_t len = strlen(src);
- dst = new (std::nothrow) char[len + 1];
+ dst = SafeArrayAlloc<char>(1, len + 1);
if (dst == NULL)
return -1;
@@ -4469,7 +4593,7 @@
if (dst.codecPrivateSize != 0)
return -1;
- dst.codecPrivate = new (std::nothrow) unsigned char[codecPrivateSize];
+ dst.codecPrivate = SafeArrayAlloc<unsigned char>(1, codecPrivateSize);
if (dst.codecPrivate == NULL)
return -1;
@@ -4797,11 +4921,12 @@
return status;
// pos now designates start of element
- if (id == 0x2240) // ContentEncoding ID
+ if (id == mkvmuxer::kMkvContentEncoding)
++count;
pos += size; // consume payload
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
if (count <= 0)
@@ -4821,7 +4946,7 @@
return status;
// pos now designates start of element
- if (id == 0x2240) { // ContentEncoding ID
+ if (id == mkvmuxer::kMkvContentEncoding) {
ContentEncoding* const content_encoding =
new (std::nothrow) ContentEncoding();
if (!content_encoding)
@@ -4837,10 +4962,12 @@
}
pos += size; // consume payload
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
- assert(pos == stop);
+ if (pos != stop)
+ return E_FILE_FORMAT_INVALID;
return 0;
}
@@ -4892,37 +5019,37 @@
if (status < 0) // error
return status;
- if (id == 0x30) { // pixel width
+ if (id == mkvmuxer::kMkvPixelWidth) {
width = UnserializeUInt(pReader, pos, size);
if (width <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x3A) { // pixel height
+ } else if (id == mkvmuxer::kMkvPixelHeight) {
height = UnserializeUInt(pReader, pos, size);
if (height <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x14B0) { // display width
+ } else if (id == mkvmuxer::kMkvDisplayWidth) {
display_width = UnserializeUInt(pReader, pos, size);
if (display_width <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x14BA) { // display height
+ } else if (id == mkvmuxer::kMkvDisplayHeight) {
display_height = UnserializeUInt(pReader, pos, size);
if (display_height <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x14B2) { // display unit
+ } else if (id == mkvmuxer::kMkvDisplayUnit) {
display_unit = UnserializeUInt(pReader, pos, size);
if (display_unit < 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x13B8) { // stereo mode
+ } else if (id == mkvmuxer::kMkvStereoMode) {
stereo_mode = UnserializeUInt(pReader, pos, size);
if (stereo_mode < 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x0383E3) { // frame rate
+ } else if (id == mkvmuxer::kMkvFrameRate) {
const long status = UnserializeFloat(pReader, pos, size, rate);
if (status < 0)
@@ -4933,10 +5060,12 @@
}
pos += size; // consume payload
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
- assert(pos == stop);
+ if (pos != stop)
+ return E_FILE_FORMAT_INVALID;
VideoTrack* const pTrack =
new (std::nothrow) VideoTrack(pSegment, element_start, element_size);
@@ -5110,7 +5239,7 @@
if (status < 0) // error
return status;
- if (id == 0x35) { // Sample Rate
+ if (id == mkvmuxer::kMkvSamplingFrequency) {
status = UnserializeFloat(pReader, pos, size, rate);
if (status < 0)
@@ -5118,12 +5247,12 @@
if (rate <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x1F) { // Channel Count
+ } else if (id == mkvmuxer::kMkvChannels) {
channels = UnserializeUInt(pReader, pos, size);
if (channels <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x2264) { // Bit Depth
+ } else if (id == mkvmuxer::kMkvBitDepth) {
bit_depth = UnserializeUInt(pReader, pos, size);
if (bit_depth <= 0)
@@ -5131,10 +5260,12 @@
}
pos += size; // consume payload
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
- assert(pos == stop);
+ if (pos != stop)
+ return E_FILE_FORMAT_INVALID;
AudioTrack* const pTrack =
new (std::nothrow) AudioTrack(pSegment, element_start, element_size);
@@ -5194,14 +5325,16 @@
if (size == 0) // weird
continue;
- if (id == 0x2E) // TrackEntry ID
+ if (id == mkvmuxer::kMkvTrackEntry)
++count;
pos += size; // consume payload
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
- assert(pos == stop);
+ if (pos != stop)
+ return E_FILE_FORMAT_INVALID;
if (count <= 0)
return 0; // success
@@ -5234,13 +5367,12 @@
const long long element_size = payload_stop - element_start;
- if (id == 0x2E) { // TrackEntry ID
+ if (id == mkvmuxer::kMkvTrackEntry) {
Track*& pTrack = *m_trackEntriesEnd;
pTrack = NULL;
const long status = ParseTrackEntry(pos, payload_size, element_start,
element_size, pTrack);
-
if (status)
return status;
@@ -5249,10 +5381,12 @@
}
pos = payload_stop;
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
- assert(pos == stop);
+ if (pos != stop)
+ return E_FILE_FORMAT_INVALID;
return 0; // success
}
@@ -5309,16 +5443,16 @@
const long long start = pos;
- if (id == 0x60) { // VideoSettings ID
+ if (id == mkvmuxer::kMkvVideo) {
v.start = start;
v.size = size;
- } else if (id == 0x61) { // AudioSettings ID
+ } else if (id == mkvmuxer::kMkvAudio) {
a.start = start;
a.size = size;
- } else if (id == 0x2D80) { // ContentEncodings ID
+ } else if (id == mkvmuxer::kMkvContentEncodings) {
e.start = start;
e.size = size;
- } else if (id == 0x33C5) { // Track UID
+ } else if (id == mkvmuxer::kMkvTrackUID) {
if (size > 8)
return E_FILE_FORMAT_INVALID;
@@ -5340,49 +5474,49 @@
++pos_;
}
- } else if (id == 0x57) { // Track Number
+ } else if (id == mkvmuxer::kMkvTrackNumber) {
const long long num = UnserializeUInt(pReader, pos, size);
if ((num <= 0) || (num > 127))
return E_FILE_FORMAT_INVALID;
info.number = static_cast<long>(num);
- } else if (id == 0x03) { // Track Type
+ } else if (id == mkvmuxer::kMkvTrackType) {
const long long type = UnserializeUInt(pReader, pos, size);
if ((type <= 0) || (type > 254))
return E_FILE_FORMAT_INVALID;
info.type = static_cast<long>(type);
- } else if (id == 0x136E) { // Track Name
+ } else if (id == mkvmuxer::kMkvName) {
const long status =
UnserializeString(pReader, pos, size, info.nameAsUTF8);
if (status)
return status;
- } else if (id == 0x02B59C) { // Track Language
+ } else if (id == mkvmuxer::kMkvLanguage) {
const long status = UnserializeString(pReader, pos, size, info.language);
if (status)
return status;
- } else if (id == 0x03E383) { // Default Duration
+ } else if (id == mkvmuxer::kMkvDefaultDuration) {
const long long duration = UnserializeUInt(pReader, pos, size);
if (duration < 0)
return E_FILE_FORMAT_INVALID;
info.defaultDuration = static_cast<unsigned long long>(duration);
- } else if (id == 0x06) { // CodecID
+ } else if (id == mkvmuxer::kMkvCodecID) {
const long status = UnserializeString(pReader, pos, size, info.codecId);
if (status)
return status;
- } else if (id == 0x1C) { // lacing
+ } else if (id == mkvmuxer::kMkvFlagLacing) {
lacing = UnserializeUInt(pReader, pos, size);
if ((lacing < 0) || (lacing > 1))
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x23A2) { // Codec Private
+ } else if (id == mkvmuxer::kMkvCodecPrivate) {
delete[] info.codecPrivate;
info.codecPrivate = NULL;
info.codecPrivateSize = 0;
@@ -5390,9 +5524,7 @@
const size_t buflen = static_cast<size_t>(size);
if (buflen) {
- typedef unsigned char* buf_t;
-
- const buf_t buf = new (std::nothrow) unsigned char[buflen];
+ unsigned char* buf = SafeArrayAlloc<unsigned char>(1, buflen);
if (buf == NULL)
return -1;
@@ -5407,23 +5539,25 @@
info.codecPrivate = buf;
info.codecPrivateSize = buflen;
}
- } else if (id == 0x058688) { // Codec Name
+ } else if (id == mkvmuxer::kMkvCodecName) {
const long status =
UnserializeString(pReader, pos, size, info.codecNameAsUTF8);
if (status)
return status;
- } else if (id == 0x16AA) { // Codec Delay
+ } else if (id == mkvmuxer::kMkvCodecDelay) {
info.codecDelay = UnserializeUInt(pReader, pos, size);
- } else if (id == 0x16BB) { // Seek Pre Roll
+ } else if (id == mkvmuxer::kMkvSeekPreRoll) {
info.seekPreRoll = UnserializeUInt(pReader, pos, size);
}
pos += size; // consume payload
- assert(pos <= track_stop);
+ if (pos > track_stop)
+ return E_FILE_FORMAT_INVALID;
}
- assert(pos == track_stop);
+ if (pos != track_stop)
+ return E_FILE_FORMAT_INVALID;
if (info.number <= 0) // not specified
return E_FILE_FORMAT_INVALID;
@@ -5552,98 +5686,88 @@
}
long Cluster::Load(long long& pos, long& len) const {
- assert(m_pSegment);
- assert(m_pos >= m_element_start);
+ if (m_pSegment == NULL)
+ return E_PARSE_FAILED;
if (m_timecode >= 0) // at least partially loaded
return 0;
- assert(m_pos == m_element_start);
- assert(m_element_size < 0);
+ if (m_pos != m_element_start || m_element_size >= 0)
+ return E_PARSE_FAILED;
IMkvReader* const pReader = m_pSegment->m_pReader;
-
long long total, avail;
-
const int status = pReader->Length(&total, &avail);
if (status < 0) // error
return status;
- assert((total < 0) || (avail <= total));
- assert((total < 0) || (m_pos <= total)); // TODO: verify this
+ if (total >= 0 && (avail > total || m_pos > total))
+ return E_FILE_FORMAT_INVALID;
pos = m_pos;
long long cluster_size = -1;
- {
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- long long result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error or underflow
- return static_cast<long>(result);
-
- if (result > 0) // underflow (weird)
- return E_BUFFER_NOT_FULL;
-
- // if ((pos + len) > segment_stop)
- // return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long id_ = ReadUInt(pReader, pos, len);
-
- if (id_ < 0) // error
- return static_cast<long>(id_);
-
- if (id_ != 0x0F43B675) // Cluster ID
- return E_FILE_FORMAT_INVALID;
-
- pos += len; // consume id
-
- // read cluster size
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0) // weird
- return E_BUFFER_NOT_FULL;
-
- // if ((pos + len) > segment_stop)
- // return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(pReader, pos, len);
-
- if (size < 0) // error
- return static_cast<long>(cluster_size);
-
- if (size == 0)
- return E_FILE_FORMAT_INVALID; // TODO: verify this
-
- pos += len; // consume length of size of element
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (size != unknown_size)
- cluster_size = size;
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
}
+ long long result = GetUIntLength(pReader, pos, len);
+
+ if (result < 0) // error or underflow
+ return static_cast<long>(result);
+
+ if (result > 0)
+ return E_BUFFER_NOT_FULL;
+
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
+
+ const long long id_ = ReadID(pReader, pos, len);
+
+ if (id_ < 0) // error
+ return static_cast<long>(id_);
+
+ if (id_ != mkvmuxer::kMkvCluster)
+ return E_FILE_FORMAT_INVALID;
+
+ pos += len; // consume id
+
+ // read cluster size
+
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
+
+ result = GetUIntLength(pReader, pos, len);
+
+ if (result < 0) // error
+ return static_cast<long>(result);
+
+ if (result > 0)
+ return E_BUFFER_NOT_FULL;
+
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
+
+ const long long size = ReadUInt(pReader, pos, len);
+
+ if (size < 0) // error
+ return static_cast<long>(cluster_size);
+
+ if (size == 0)
+ return E_FILE_FORMAT_INVALID;
+
+ pos += len; // consume length of size of element
+
+ const long long unknown_size = (1LL << (7 * len)) - 1;
+
+ if (size != unknown_size)
+ cluster_size = size;
+
// pos points to start of payload
long long timecode = -1;
long long new_pos = -1;
@@ -5667,7 +5791,7 @@
if (result < 0) // error
return static_cast<long>(result);
- if (result > 0) // weird
+ if (result > 0)
return E_BUFFER_NOT_FULL;
if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
@@ -5676,7 +5800,7 @@
if ((pos + len) > avail)
return E_BUFFER_NOT_FULL;
- const long long id = ReadUInt(pReader, pos, len);
+ const long long id = ReadID(pReader, pos, len);
if (id < 0) // error
return static_cast<long>(id);
@@ -5688,10 +5812,10 @@
// that we have exhausted the sub-element's inside the cluster
// whose ID we parsed earlier.
- if (id == 0x0F43B675) // Cluster ID
+ if (id == mkvmuxer::kMkvCluster)
break;
- if (id == 0x0C53BB6B) // Cues ID
+ if (id == mkvmuxer::kMkvCues)
break;
pos += len; // consume ID field
@@ -5708,7 +5832,7 @@
if (result < 0) // error
return static_cast<long>(result);
- if (result > 0) // weird
+ if (result > 0)
return E_BUFFER_NOT_FULL;
if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
@@ -5734,13 +5858,13 @@
// pos now points to start of payload
- if (size == 0) // weird
+ if (size == 0)
continue;
if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
return E_FILE_FORMAT_INVALID;
- if (id == 0x67) { // TimeCode ID
+ if (id == mkvmuxer::kMkvTimecode) {
len = static_cast<long>(size);
if ((pos + size) > avail)
@@ -5755,19 +5879,21 @@
if (bBlock)
break;
- } else if (id == 0x20) { // BlockGroup ID
+ } else if (id == mkvmuxer::kMkvBlockGroup) {
bBlock = true;
break;
- } else if (id == 0x23) { // SimpleBlock ID
+ } else if (id == mkvmuxer::kMkvSimpleBlock) {
bBlock = true;
break;
}
pos += size; // consume payload
- assert((cluster_stop < 0) || (pos <= cluster_stop));
+ if (cluster_stop >= 0 && pos > cluster_stop)
+ return E_FILE_FORMAT_INVALID;
}
- assert((cluster_stop < 0) || (pos <= cluster_stop));
+ if (cluster_stop >= 0 && pos > cluster_stop)
+ return E_FILE_FORMAT_INVALID;
if (timecode < 0) // no timecode found
return E_FILE_FORMAT_INVALID;
@@ -5790,10 +5916,8 @@
if (status < 0)
return status;
- assert(m_pos >= m_element_start);
- assert(m_timecode >= 0);
- // assert(m_size > 0);
- // assert(m_element_size > m_size);
+ if (m_pos < m_element_start || m_timecode < 0)
+ return E_PARSE_FAILED;
const long long cluster_stop =
(m_element_size < 0) ? -1 : m_element_start + m_element_size;
@@ -5810,7 +5934,8 @@
if (status < 0) // error
return status;
- assert((total < 0) || (avail <= total));
+ if (total >= 0 && avail > total)
+ return E_FILE_FORMAT_INVALID;
pos = m_pos;
@@ -5837,7 +5962,7 @@
if (result < 0) // error
return static_cast<long>(result);
- if (result > 0) // weird
+ if (result > 0)
return E_BUFFER_NOT_FULL;
if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
@@ -5846,19 +5971,16 @@
if ((pos + len) > avail)
return E_BUFFER_NOT_FULL;
- const long long id = ReadUInt(pReader, pos, len);
+ const long long id = ReadID(pReader, pos, len);
- if (id < 0) // error
- return static_cast<long>(id);
-
- if (id == 0) // weird
+ if (id < 0)
return E_FILE_FORMAT_INVALID;
// This is the distinguished set of ID's we use to determine
// that we have exhausted the sub-element's inside the cluster
// whose ID we parsed earlier.
- if ((id == 0x0F43B675) || (id == 0x0C53BB6B)) { // Cluster or Cues ID
+ if ((id == mkvmuxer::kMkvCluster) || (id == mkvmuxer::kMkvCues)) {
if (m_element_size < 0)
m_element_size = pos - m_element_start;
@@ -5879,7 +6001,7 @@
if (result < 0) // error
return static_cast<long>(result);
- if (result > 0) // weird
+ if (result > 0)
return E_BUFFER_NOT_FULL;
if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
@@ -5905,7 +6027,7 @@
// pos now points to start of payload
- if (size == 0) // weird
+ if (size == 0)
continue;
// const long long block_start = pos;
@@ -5913,8 +6035,10 @@
if (cluster_stop >= 0) {
if (block_stop > cluster_stop) {
- if ((id == 0x20) || (id == 0x23))
+ if (id == mkvmuxer::kMkvBlockGroup ||
+ id == mkvmuxer::kMkvSimpleBlock) {
return E_FILE_FORMAT_INVALID;
+ }
pos = cluster_stop;
break;
@@ -5930,42 +6054,48 @@
Cluster* const this_ = const_cast<Cluster*>(this);
- if (id == 0x20) // BlockGroup
+ if (id == mkvmuxer::kMkvBlockGroup)
return this_->ParseBlockGroup(size, pos, len);
- if (id == 0x23) // SimpleBlock
+ if (id == mkvmuxer::kMkvSimpleBlock)
return this_->ParseSimpleBlock(size, pos, len);
pos += size; // consume payload
- assert((cluster_stop < 0) || (pos <= cluster_stop));
+ if (cluster_stop >= 0 && pos > cluster_stop)
+ return E_FILE_FORMAT_INVALID;
}
- assert(m_element_size > 0);
+ if (m_element_size < 1)
+ return E_FILE_FORMAT_INVALID;
m_pos = pos;
- assert((cluster_stop < 0) || (m_pos <= cluster_stop));
+ if (cluster_stop >= 0 && m_pos > cluster_stop)
+ return E_FILE_FORMAT_INVALID;
if (m_entries_count > 0) {
const long idx = m_entries_count - 1;
const BlockEntry* const pLast = m_entries[idx];
- assert(pLast);
+ if (pLast == NULL)
+ return E_PARSE_FAILED;
const Block* const pBlock = pLast->GetBlock();
- assert(pBlock);
+ if (pBlock == NULL)
+ return E_PARSE_FAILED;
const long long start = pBlock->m_start;
if ((total >= 0) && (start > total))
- return -1; // defend against trucated stream
+ return E_PARSE_FAILED; // defend against trucated stream
const long long size = pBlock->m_size;
const long long stop = start + size;
- assert((cluster_stop < 0) || (stop <= cluster_stop));
+ if (cluster_stop >= 0 && stop > cluster_stop)
+ return E_FILE_FORMAT_INVALID;
if ((total >= 0) && (stop > total))
- return -1; // defend against trucated stream
+ return E_PARSE_FAILED; // defend against trucated stream
}
return 1; // no more entries
@@ -6058,7 +6188,7 @@
return E_BUFFER_NOT_FULL;
}
- status = CreateBlock(0x23, // simple block id
+ status = CreateBlock(mkvmuxer::kMkvSimpleBlock,
block_start, block_size,
0); // DiscardPadding
@@ -6118,12 +6248,12 @@
if ((pos + len) > avail)
return E_BUFFER_NOT_FULL;
- const long long id = ReadUInt(pReader, pos, len);
+ const long long id = ReadID(pReader, pos, len);
if (id < 0) // error
return static_cast<long>(id);
- if (id == 0) // not a value ID
+ if (id == 0) // not a valid ID
return E_FILE_FORMAT_INVALID;
pos += len; // consume ID field
@@ -6169,14 +6299,14 @@
if (size == unknown_size)
return E_FILE_FORMAT_INVALID;
- if (id == 0x35A2) { // DiscardPadding
+ if (id == mkvmuxer::kMkvDiscardPadding) {
status = UnserializeInt(pReader, pos, size, discard_padding);
if (status < 0) // error
return status;
}
- if (id != 0x21) { // sub-part of BlockGroup is not a Block
+ if (id != mkvmuxer::kMkvBlock) {
pos += size; // consume sub-part of block group
if (pos > payload_stop)
@@ -6262,12 +6392,14 @@
}
pos = block_stop; // consume block-part of block group
- assert(pos <= payload_stop);
+ if (pos > payload_stop)
+ return E_FILE_FORMAT_INVALID;
}
- assert(pos == payload_stop);
+ if (pos != payload_stop)
+ return E_FILE_FORMAT_INVALID;
- status = CreateBlock(0x20, // BlockGroup ID
+ status = CreateBlock(mkvmuxer::kMkvBlockGroup,
payload_start, payload_size, discard_padding);
if (status != 0)
return status;
@@ -6310,17 +6442,14 @@
return E_BUFFER_NOT_FULL; // underflow, since more remains to be parsed
}
-Cluster* Cluster::Create(Segment* pSegment, long idx, long long off)
-// long long element_size)
-{
- assert(pSegment);
- assert(off >= 0);
+Cluster* Cluster::Create(Segment* pSegment, long idx, long long off) {
+ if (!pSegment || off < 0)
+ return NULL;
const long long element_start = pSegment->m_start + off;
- Cluster* const pCluster = new Cluster(pSegment, idx, element_start);
- // element_size);
- assert(pCluster);
+ Cluster* const pCluster =
+ new (std::nothrow) Cluster(pSegment, idx, element_start);
return pCluster;
}
@@ -6431,13 +6560,13 @@
if ((pos + len) > avail)
return E_BUFFER_NOT_FULL;
- const long long id = ReadUInt(pReader, pos, len);
+ const long long id = ReadID(pReader, pos, len);
if (id < 0) // error
return static_cast<long>(id);
- if (id != 0x0F43B675) // weird: not cluster ID
- return -1; // generic error
+ if (id != mkvmuxer::kMkvCluster)
+ return E_PARSE_FAILED;
pos += len; // consume Cluster ID field
@@ -6515,7 +6644,7 @@
if ((pos + len) > avail)
return E_BUFFER_NOT_FULL;
- const long long id = ReadUInt(pReader, pos, len);
+ const long long id = ReadID(pReader, pos, len);
if (id < 0) // error
return static_cast<long>(id);
@@ -6524,10 +6653,10 @@
// that we have exhausted the sub-element's inside the cluster
// whose ID we parsed earlier.
- if (id == 0x0F43B675) // Cluster ID
+ if (id == mkvmuxer::kMkvCluster)
return 0; // no entries found
- if (id == 0x0C53BB6B) // Cues ID
+ if (id == mkvmuxer::kMkvCues)
return 0; // no entries found
pos += len; // consume id field
@@ -6579,14 +6708,15 @@
if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
return E_FILE_FORMAT_INVALID;
- if (id == 0x20) // BlockGroup ID
+ if (id == mkvmuxer::kMkvBlockGroup)
return 1; // have at least one entry
- if (id == 0x23) // SimpleBlock ID
+ if (id == mkvmuxer::kMkvSimpleBlock)
return 1; // have at least one entry
pos += size; // consume payload
- assert((cluster_stop < 0) || (pos <= cluster_stop));
+ if (cluster_stop >= 0 && pos > cluster_stop)
+ return E_FILE_FORMAT_INVALID;
}
}
@@ -6656,14 +6786,17 @@
long Cluster::CreateBlock(long long id,
long long pos, // absolute pos of payload
long long size, long long discard_padding) {
- assert((id == 0x20) || (id == 0x23)); // BlockGroup or SimpleBlock
+ if (id != mkvmuxer::kMkvBlockGroup && id != mkvmuxer::kMkvSimpleBlock)
+ return E_PARSE_FAILED;
if (m_entries_count < 0) { // haven't parsed anything yet
assert(m_entries == NULL);
assert(m_entries_size == 0);
m_entries_size = 1024;
- m_entries = new BlockEntry*[m_entries_size];
+ m_entries = new (std::nothrow) BlockEntry*[m_entries_size];
+ if (m_entries == NULL)
+ return -1;
m_entries_count = 0;
} else {
@@ -6674,8 +6807,9 @@
if (m_entries_count >= m_entries_size) {
const long entries_size = 2 * m_entries_size;
- BlockEntry** const entries = new BlockEntry*[entries_size];
- assert(entries);
+ BlockEntry** const entries = new (std::nothrow) BlockEntry*[entries_size];
+ if (entries == NULL)
+ return -1;
BlockEntry** src = m_entries;
BlockEntry** const src_end = src + m_entries_count;
@@ -6692,9 +6826,9 @@
}
}
- if (id == 0x20) // BlockGroup ID
+ if (id == mkvmuxer::kMkvBlockGroup)
return CreateBlockGroup(pos, size, discard_padding);
- else // SimpleBlock ID
+ else
return CreateSimpleBlock(pos, size);
}
@@ -6725,9 +6859,9 @@
while (pos < stop) {
long len;
- const long long id = ReadUInt(pReader, pos, len);
- assert(id >= 0); // TODO
- assert((pos + len) <= stop);
+ const long long id = ReadID(pReader, pos, len);
+ if (id < 0 || (pos + len) > stop)
+ return E_FILE_FORMAT_INVALID;
pos += len; // consume ID
@@ -6737,12 +6871,12 @@
pos += len; // consume size
- if (id == 0x21) { // Block ID
+ if (id == mkvmuxer::kMkvBlock) {
if (bpos < 0) { // Block ID
bpos = pos;
bsize = size;
}
- } else if (id == 0x1B) { // Duration ID
+ } else if (id == mkvmuxer::kMkvBlockDuration) {
if (size > 8)
return E_FILE_FORMAT_INVALID;
@@ -6750,7 +6884,7 @@
if (duration < 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x7B) { // ReferenceBlock
+ } else if (id == mkvmuxer::kMkvReferenceBlock) {
if (size > 8 || size <= 0)
return E_FILE_FORMAT_INVALID;
const long size_ = static_cast<long>(size);
@@ -6764,17 +6898,19 @@
if (time <= 0) // see note above
prev = time;
- else // weird
+ else
next = time;
}
pos += size; // consume payload
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
}
if (bpos < 0)
return E_FILE_FORMAT_INVALID;
- assert(pos == stop);
+ if (pos != stop)
+ return E_FILE_FORMAT_INVALID;
assert(bsize >= 0);
const long idx = m_entries_count;
@@ -7213,7 +7349,9 @@
return E_FILE_FORMAT_INVALID;
m_frame_count = 1;
- m_frames = new Frame[m_frame_count];
+ m_frames = new (std::nothrow) Frame[m_frame_count];
+ if (m_frames == NULL)
+ return -1;
Frame& f = m_frames[0];
f.pos = pos;
@@ -7239,18 +7377,23 @@
return E_FILE_FORMAT_INVALID;
++pos; // consume frame count
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
m_frame_count = int(biased_count) + 1;
- m_frames = new Frame[m_frame_count];
- assert(m_frames);
+ m_frames = new (std::nothrow) Frame[m_frame_count];
+ if (m_frames == NULL)
+ return -1;
+
+ if (!m_frames)
+ return E_FILE_FORMAT_INVALID;
if (lacing == 1) { // Xiph
Frame* pf = m_frames;
Frame* const pf_end = pf + m_frame_count;
- long size = 0;
+ long long size = 0;
int frame_count = m_frame_count;
while (frame_count > 1) {
@@ -7277,6 +7420,8 @@
Frame& f = *pf++;
assert(pf < pf_end);
+ if (pf >= pf_end)
+ return E_FILE_FORMAT_INVALID;
f.pos = 0; // patch later
@@ -7289,8 +7434,8 @@
--frame_count;
}
- assert(pf < pf_end);
- assert(pos <= stop);
+ if (pf >= pf_end || pos > stop)
+ return E_FILE_FORMAT_INVALID;
{
Frame& f = *pf++;
@@ -7318,11 +7463,17 @@
Frame& f = *pf++;
assert((pos + f.len) <= stop);
+ if ((pos + f.len) > stop)
+ return E_FILE_FORMAT_INVALID;
+
f.pos = pos;
pos += f.len;
}
assert(pos == stop);
+ if (pos != stop)
+ return E_FILE_FORMAT_INVALID;
+
} else if (lacing == 2) { // fixed-size lacing
if (pos >= stop)
return E_FILE_FORMAT_INVALID;
@@ -7342,6 +7493,8 @@
while (pf != pf_end) {
assert((pos + frame_size) <= stop);
+ if ((pos + frame_size) > stop)
+ return E_FILE_FORMAT_INVALID;
Frame& f = *pf++;
@@ -7352,13 +7505,16 @@
}
assert(pos == stop);
+ if (pos != stop)
+ return E_FILE_FORMAT_INVALID;
+
} else {
assert(lacing == 3); // EBML lacing
if (pos >= stop)
return E_FILE_FORMAT_INVALID;
- long size = 0;
+ long long size = 0;
int frame_count = m_frame_count;
long long frame_size = ReadUInt(pReader, pos, len);
@@ -7396,6 +7552,9 @@
return E_FILE_FORMAT_INVALID;
assert(pf < pf_end);
+ if (pf >= pf_end)
+ return E_FILE_FORMAT_INVALID;
+
const Frame& prev = *pf++;
assert(prev.len == frame_size);
@@ -7403,6 +7562,8 @@
return E_FILE_FORMAT_INVALID;
assert(pf < pf_end);
+ if (pf >= pf_end)
+ return E_FILE_FORMAT_INVALID;
Frame& curr = *pf;
@@ -7417,7 +7578,8 @@
return E_FILE_FORMAT_INVALID;
pos += len; // consume length of (delta) size
- assert(pos <= stop);
+ if (pos > stop)
+ return E_FILE_FORMAT_INVALID;
const int exp = 7 * len - 1;
const long long bias = (1LL << exp) - 1LL;
@@ -7439,18 +7601,20 @@
// parse last frame
if (frame_count > 0) {
- assert(pos <= stop);
- assert(pf < pf_end);
+ if (pos > stop || pf >= pf_end)
+ return E_FILE_FORMAT_INVALID;
const Frame& prev = *pf++;
assert(prev.len == frame_size);
if (prev.len != frame_size)
return E_FILE_FORMAT_INVALID;
- assert(pf < pf_end);
+ if (pf >= pf_end)
+ return E_FILE_FORMAT_INVALID;
Frame& curr = *pf++;
- assert(pf == pf_end);
+ if (pf != pf_end)
+ return E_FILE_FORMAT_INVALID;
curr.pos = 0; // patch later
@@ -7471,6 +7635,8 @@
while (pf != pf_end) {
Frame& f = *pf++;
assert((pos + f.len) <= stop);
+ if ((pos + f.len) > stop)
+ return E_FILE_FORMAT_INVALID;
f.pos = pos;
pos += f.len;
diff --git a/libvpx/third_party/libwebm/mkvparser.hpp b/libvpx/third_party/libwebm/mkvparser.hpp
index aa0b432..75ef69d 100644
--- a/libvpx/third_party/libwebm/mkvparser.hpp
+++ b/libvpx/third_party/libwebm/mkvparser.hpp
@@ -9,12 +9,13 @@
#ifndef MKVPARSER_HPP
#define MKVPARSER_HPP
-#include <cstdlib>
-#include <cstdio>
#include <cstddef>
+#include <cstdio>
+#include <cstdlib>
namespace mkvparser {
+const int E_PARSE_FAILED = -1;
const int E_FILE_FORMAT_INVALID = -2;
const int E_BUFFER_NOT_FULL = -3;
@@ -27,8 +28,11 @@
virtual ~IMkvReader();
};
+template<typename Type> Type* SafeArrayAlloc(unsigned long long num_elements,
+ unsigned long long element_size);
long long GetUIntLength(IMkvReader*, long long, long&);
long long ReadUInt(IMkvReader*, long long, long&);
+long long ReadID(IMkvReader* pReader, long long pos, long& len);
long long UnserializeUInt(IMkvReader*, long long pos, long long size);
long UnserializeFloat(IMkvReader*, long long pos, long long size, double&);
@@ -833,7 +837,7 @@
private:
bool Init() const;
- void PreloadCuePoint(long&, long long) const;
+ bool PreloadCuePoint(long&, long long) const;
mutable CuePoint** m_cue_points;
mutable long m_count;
@@ -999,8 +1003,8 @@
long DoLoadClusterUnknownSize(long long&, long&);
long DoParseNext(const Cluster*&, long long&, long&);
- void AppendCluster(Cluster*);
- void PreloadCluster(Cluster*, ptrdiff_t);
+ bool AppendCluster(Cluster*);
+ bool PreloadCluster(Cluster*, ptrdiff_t);
// void ParseSeekHead(long long pos, long long size);
// void ParseSeekEntry(long long pos, long long size);
diff --git a/libvpx/third_party/libwebm/webmids.hpp b/libvpx/third_party/libwebm/webmids.hpp
index 6874e44..ad4ab57 100644
--- a/libvpx/third_party/libwebm/webmids.hpp
+++ b/libvpx/third_party/libwebm/webmids.hpp
@@ -41,6 +41,7 @@
kMkvTimecodeScale = 0x2AD7B1,
kMkvDuration = 0x4489,
kMkvDateUTC = 0x4461,
+ kMkvTitle = 0x7BA9,
kMkvMuxingApp = 0x4D80,
kMkvWritingApp = 0x5741,
// Cluster
@@ -107,9 +108,16 @@
kMkvContentEncodingOrder = 0x5031,
kMkvContentEncodingScope = 0x5032,
kMkvContentEncodingType = 0x5033,
+ kMkvContentCompression = 0x5034,
+ kMkvContentCompAlgo = 0x4254,
+ kMkvContentCompSettings = 0x4255,
kMkvContentEncryption = 0x5035,
kMkvContentEncAlgo = 0x47E1,
kMkvContentEncKeyID = 0x47E2,
+ kMkvContentSignature = 0x47E3,
+ kMkvContentSigKeyID = 0x47E4,
+ kMkvContentSigAlgo = 0x47E5,
+ kMkvContentSigHashAlgo = 0x47E6,
kMkvContentEncAESSettings = 0x47E7,
kMkvAESSettingsCipherMode = 0x47E8,
kMkvAESSettingsCipherInitData = 0x47E9,
diff --git a/libvpx/third_party/x86inc/README.libvpx b/libvpx/third_party/x86inc/README.libvpx
index fe5b076..e91e305 100644
--- a/libvpx/third_party/x86inc/README.libvpx
+++ b/libvpx/third_party/x86inc/README.libvpx
@@ -20,3 +20,5 @@
Use .text instead of .rodata on macho to avoid broken tables in PIC mode.
Use .text with no alignment for aout
Only use 'hidden' visibility with Chromium
+Move '%use smartalign' for nasm out of 'INIT_CPUFLAGS' and before
+ 'ALIGNMODE'.
diff --git a/libvpx/third_party/x86inc/x86inc.asm b/libvpx/third_party/x86inc/x86inc.asm
index 77a58f2..be59de3 100644
--- a/libvpx/third_party/x86inc/x86inc.asm
+++ b/libvpx/third_party/x86inc/x86inc.asm
@@ -876,6 +876,10 @@
%define cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
%define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
+%ifdef __NASM_VER__
+ %use smartalign
+%endif
+
; Takes an arbitrary number of cpuflags from the above list.
; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu.
; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co.
@@ -912,7 +916,6 @@
%endif
%ifdef __NASM_VER__
- %use smartalign
ALIGNMODE k7
%elif ARCH_X86_64 || cpuflag(sse2)
CPU amdnop
diff --git a/libvpx/vp8/common/arm/armv6/intra4x4_predict_v6.asm b/libvpx/vp8/common/arm/armv6/intra4x4_predict_v6.asm
deleted file mode 100644
index c5ec824..0000000
--- a/libvpx/vp8/common/arm/armv6/intra4x4_predict_v6.asm
+++ /dev/null
@@ -1,611 +0,0 @@
-;
-; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_intra4x4_predict_armv6|
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-
-;void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft,
-; B_PREDICTION_MODE left_stride, int b_mode,
-; unsigned char *dst, int dst_stride,
-; unsigned char top_left)
-
-; r0: *Above
-; r1: *yleft
-; r2: left_stride
-; r3: b_mode
-; sp + #40: dst
-; sp + #44: dst_stride
-; sp + #48: top_left
-|vp8_intra4x4_predict_armv6| PROC
- push {r4-r12, lr}
-
- cmp r3, #10
- addlt pc, pc, r3, lsl #2 ; position independent switch
- pop {r4-r12, pc} ; default
- b b_dc_pred
- b b_tm_pred
- b b_ve_pred
- b b_he_pred
- b b_ld_pred
- b b_rd_pred
- b b_vr_pred
- b b_vl_pred
- b b_hd_pred
- b b_hu_pred
-
-b_dc_pred
- ; load values
- ldr r8, [r0] ; Above
- ldrb r4, [r1], r2 ; Left[0]
- mov r9, #0
- ldrb r5, [r1], r2 ; Left[1]
- ldrb r6, [r1], r2 ; Left[2]
- usad8 r12, r8, r9
- ldrb r7, [r1] ; Left[3]
-
- ; calculate dc
- add r4, r4, r5
- add r4, r4, r6
- add r4, r4, r7
- add r4, r4, r12
- add r4, r4, #4
- ldr r0, [sp, #44] ; dst_stride
- mov r12, r4, asr #3 ; (expected_dc + 4) >> 3
-
- add r12, r12, r12, lsl #8
- ldr r3, [sp, #40] ; dst
- add r12, r12, r12, lsl #16
-
- ; store values
- str r12, [r3], r0
- str r12, [r3], r0
- str r12, [r3], r0
- str r12, [r3]
-
- pop {r4-r12, pc}
-
-b_tm_pred
- ldr r8, [r0] ; Above
- ldrb r9, [sp, #48] ; top_left
- ldrb r4, [r1], r2 ; Left[0]
- ldrb r5, [r1], r2 ; Left[1]
- ldrb r6, [r1], r2 ; Left[2]
- ldrb r7, [r1] ; Left[3]
- ldr r0, [sp, #44] ; dst_stride
- ldr r3, [sp, #40] ; dst
-
- add r9, r9, r9, lsl #16 ; [tl|tl]
- uxtb16 r10, r8 ; a[2|0]
- uxtb16 r11, r8, ror #8 ; a[3|1]
- ssub16 r10, r10, r9 ; a[2|0] - [tl|tl]
- ssub16 r11, r11, r9 ; a[3|1] - [tl|tl]
-
- add r4, r4, r4, lsl #16 ; l[0|0]
- add r5, r5, r5, lsl #16 ; l[1|1]
- add r6, r6, r6, lsl #16 ; l[2|2]
- add r7, r7, r7, lsl #16 ; l[3|3]
-
- sadd16 r1, r4, r10 ; l[0|0] + a[2|0] - [tl|tl]
- sadd16 r2, r4, r11 ; l[0|0] + a[3|1] - [tl|tl]
- usat16 r1, #8, r1
- usat16 r2, #8, r2
-
- sadd16 r4, r5, r10 ; l[1|1] + a[2|0] - [tl|tl]
- sadd16 r5, r5, r11 ; l[1|1] + a[3|1] - [tl|tl]
-
- add r12, r1, r2, lsl #8 ; [3|2|1|0]
- str r12, [r3], r0
-
- usat16 r4, #8, r4
- usat16 r5, #8, r5
-
- sadd16 r1, r6, r10 ; l[2|2] + a[2|0] - [tl|tl]
- sadd16 r2, r6, r11 ; l[2|2] + a[3|1] - [tl|tl]
-
- add r12, r4, r5, lsl #8 ; [3|2|1|0]
- str r12, [r3], r0
-
- usat16 r1, #8, r1
- usat16 r2, #8, r2
-
- sadd16 r4, r7, r10 ; l[3|3] + a[2|0] - [tl|tl]
- sadd16 r5, r7, r11 ; l[3|3] + a[3|1] - [tl|tl]
-
- add r12, r1, r2, lsl #8 ; [3|2|1|0]
-
- usat16 r4, #8, r4
- usat16 r5, #8, r5
-
- str r12, [r3], r0
-
- add r12, r4, r5, lsl #8 ; [3|2|1|0]
- str r12, [r3]
-
- pop {r4-r12, pc}
-
-b_ve_pred
- ldr r8, [r0] ; a[3|2|1|0]
- ldr r11, c00FF00FF
- ldrb r9, [sp, #48] ; top_left
- ldrb r10, [r0, #4] ; a[4]
-
- ldr r0, c00020002
-
- uxtb16 r4, r8 ; a[2|0]
- uxtb16 r5, r8, ror #8 ; a[3|1]
- ldr r2, [sp, #44] ; dst_stride
- pkhbt r9, r9, r5, lsl #16 ; a[1|-1]
-
- add r9, r9, r4, lsl #1 ;[a[1]+2*a[2] | tl+2*a[0] ]
- uxtab16 r9, r9, r5 ;[a[1]+2*a[2]+a[3] | tl+2*a[0]+a[1] ]
- ldr r3, [sp, #40] ; dst
- uxtab16 r9, r9, r0 ;[a[1]+2*a[2]+a[3]+2| tl+2*a[0]+a[1]+2]
-
- add r0, r0, r10, lsl #16 ;[a[4]+2 | 2]
- add r0, r0, r4, asr #16 ;[a[4]+2 | a[2]+2]
- add r0, r0, r5, lsl #1 ;[a[4]+2*a[3]+2 | a[2]+2*a[1]+2]
- uadd16 r4, r4, r0 ;[a[4]+2*a[3]+a[2]+2|a[2]+2*a[1]+a[0]+2]
-
- and r9, r11, r9, asr #2
- and r4, r11, r4, asr #2
- add r9, r9, r4, lsl #8
-
- ; store values
- str r9, [r3], r2
- str r9, [r3], r2
- str r9, [r3], r2
- str r9, [r3]
-
- pop {r4-r12, pc}
-
-
-b_he_pred
- ldrb r4, [r1], r2 ; Left[0]
- ldrb r8, [sp, #48] ; top_left
- ldrb r5, [r1], r2 ; Left[1]
- ldrb r6, [r1], r2 ; Left[2]
- ldrb r7, [r1] ; Left[3]
-
- add r8, r8, r4 ; tl + l[0]
- add r9, r4, r5 ; l[0] + l[1]
- add r10, r5, r6 ; l[1] + l[2]
- add r11, r6, r7 ; l[2] + l[3]
-
- mov r0, #2<<14
-
- add r8, r8, r9 ; tl + 2*l[0] + l[1]
- add r4, r9, r10 ; l[0] + 2*l[1] + l[2]
- add r5, r10, r11 ; l[1] + 2*l[2] + l[3]
- add r6, r11, r7, lsl #1 ; l[2] + 2*l[3] + l[3]
-
-
- add r8, r0, r8, lsl #14 ; (tl + 2*l[0] + l[1])>>2 in top half
- add r9, r0, r4, lsl #14 ; (l[0] + 2*l[1] + l[2])>>2 in top half
- add r10,r0, r5, lsl #14 ; (l[1] + 2*l[2] + l[3])>>2 in top half
- add r11,r0, r6, lsl #14 ; (l[2] + 2*l[3] + l[3])>>2 in top half
-
- pkhtb r8, r8, r8, asr #16 ; l[-|0|-|0]
- pkhtb r9, r9, r9, asr #16 ; l[-|1|-|1]
- pkhtb r10, r10, r10, asr #16 ; l[-|2|-|2]
- pkhtb r11, r11, r11, asr #16 ; l[-|3|-|3]
-
- ldr r0, [sp, #44] ; dst_stride
- ldr r3, [sp, #40] ; dst
-
- add r8, r8, r8, lsl #8 ; l[0|0|0|0]
- add r9, r9, r9, lsl #8 ; l[1|1|1|1]
- add r10, r10, r10, lsl #8 ; l[2|2|2|2]
- add r11, r11, r11, lsl #8 ; l[3|3|3|3]
-
- ; store values
- str r8, [r3], r0
- str r9, [r3], r0
- str r10, [r3], r0
- str r11, [r3]
-
- pop {r4-r12, pc}
-
-b_ld_pred
- ldr r4, [r0] ; Above[0-3]
- ldr r12, c00020002
- ldr r5, [r0, #4] ; Above[4-7]
- ldr lr, c00FF00FF
-
- uxtb16 r6, r4 ; a[2|0]
- uxtb16 r7, r4, ror #8 ; a[3|1]
- uxtb16 r8, r5 ; a[6|4]
- uxtb16 r9, r5, ror #8 ; a[7|5]
- pkhtb r10, r6, r8 ; a[2|4]
- pkhtb r11, r7, r9 ; a[3|5]
-
- add r4, r6, r7, lsl #1 ; [a2+2*a3 | a0+2*a1]
- add r4, r4, r10, ror #16 ; [a2+2*a3+a4 | a0+2*a1+a2]
- uxtab16 r4, r4, r12 ; [a2+2*a3+a4+2 | a0+2*a1+a2+2]
-
- add r5, r7, r10, ror #15 ; [a3+2*a4 | a1+2*a2]
- add r5, r5, r11, ror #16 ; [a3+2*a4+a5 | a1+2*a2+a3]
- uxtab16 r5, r5, r12 ; [a3+2*a4+a5+2 | a1+2*a2+a3+2]
-
- pkhtb r7, r9, r8, asr #16
- add r6, r8, r9, lsl #1 ; [a6+2*a7 | a4+2*a5]
- uadd16 r6, r6, r7 ; [a6+2*a7+a7 | a4+2*a5+a6]
- uxtab16 r6, r6, r12 ; [a6+2*a7+a7+2 | a4+2*a5+a6+2]
-
- uxth r7, r9 ; [ a5]
- add r7, r7, r8, asr #15 ; [ a5+2*a6]
- add r7, r7, r9, asr #16 ; [ a5+2*a6+a7]
- uxtah r7, r7, r12 ; [ a5+2*a6+a7+2]
-
- ldr r0, [sp, #44] ; dst_stride
- ldr r3, [sp, #40] ; dst
-
- ; scale down
- and r4, lr, r4, asr #2
- and r5, lr, r5, asr #2
- and r6, lr, r6, asr #2
- mov r7, r7, asr #2
-
- add r8, r4, r5, lsl #8 ; [3|2|1|0]
- str r8, [r3], r0
-
- mov r9, r8, lsr #8
- add r9, r9, r6, lsl #24 ; [4|3|2|1]
- str r9, [r3], r0
-
- mov r10, r9, lsr #8
- add r10, r10, r7, lsl #24 ; [5|4|3|2]
- str r10, [r3], r0
-
- mov r6, r6, lsr #16
- mov r11, r10, lsr #8
- add r11, r11, r6, lsl #24 ; [6|5|4|3]
- str r11, [r3]
-
- pop {r4-r12, pc}
-
-b_rd_pred
- ldrb r7, [r1], r2 ; l[0] = pp[3]
- ldr lr, [r0] ; Above = pp[8|7|6|5]
- ldrb r8, [sp, #48] ; tl = pp[4]
- ldrb r6, [r1], r2 ; l[1] = pp[2]
- ldrb r5, [r1], r2 ; l[2] = pp[1]
- ldrb r4, [r1], r2 ; l[3] = pp[0]
-
-
- uxtb16 r9, lr ; p[7|5]
- uxtb16 r10, lr, ror #8 ; p[8|6]
- add r4, r4, r6, lsl #16 ; p[2|0]
- add r5, r5, r7, lsl #16 ; p[3|1]
- add r6, r6, r8, lsl #16 ; p[4|2]
- pkhbt r7, r7, r9, lsl #16 ; p[5|3]
- pkhbt r8, r8, r10, lsl #16 ; p[6|4]
-
- ldr r12, c00020002
- ldr lr, c00FF00FF
-
- add r4, r4, r5, lsl #1 ; [p2+2*p3 | p0+2*p1]
- add r4, r4, r6 ; [p2+2*p3+p4 | p0+2*p1+p2]
- uxtab16 r4, r4, r12 ; [p2+2*p3+p4+2 | p0+2*p1+p2+2]
-
- add r5, r5, r6, lsl #1 ; [p3+2*p4 | p1+2*p2]
- add r5, r5, r7 ; [p3+2*p4+p5 | p1+2*p2+p3]
- uxtab16 r5, r5, r12 ; [p3+2*p4+p5+2 | p1+2*p2+p3+2]
-
- add r6, r7, r8, lsl #1 ; [p5+2*p6 | p3+2*p4]
- add r6, r6, r9 ; [p5+2*p6+p7 | p3+2*p4+p5]
- uxtab16 r6, r6, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2]
-
- add r7, r8, r9, lsl #1 ; [p6+2*p7 | p4+2*p5]
- add r7, r7, r10 ; [p6+2*p7+p8 | p4+2*p5+p6]
- uxtab16 r7, r7, r12 ; [p6+2*p7+p8+2 | p4+2*p5+p6+2]
-
- ldr r0, [sp, #44] ; dst_stride
- ldr r3, [sp, #40] ; dst
-
- ; scale down
- and r7, lr, r7, asr #2
- and r6, lr, r6, asr #2
- and r5, lr, r5, asr #2
- and r4, lr, r4, asr #2
-
- add r8, r6, r7, lsl #8 ; [6|5|4|3]
- str r8, [r3], r0
-
- mov r9, r8, lsl #8 ; [5|4|3|-]
- uxtab r9, r9, r4, ror #16 ; [5|4|3|2]
- str r9, [r3], r0
-
- mov r10, r9, lsl #8 ; [4|3|2|-]
- uxtab r10, r10, r5 ; [4|3|2|1]
- str r10, [r3], r0
-
- mov r11, r10, lsl #8 ; [3|2|1|-]
- uxtab r11, r11, r4 ; [3|2|1|0]
- str r11, [r3]
-
- pop {r4-r12, pc}
-
-b_vr_pred
- ldrb r7, [r1], r2 ; l[0] = pp[3]
- ldr lr, [r0] ; Above = pp[8|7|6|5]
- ldrb r8, [sp, #48] ; tl = pp[4]
- ldrb r6, [r1], r2 ; l[1] = pp[2]
- ldrb r5, [r1], r2 ; l[2] = pp[1]
- ldrb r4, [r1] ; l[3] = pp[0]
-
- add r5, r5, r7, lsl #16 ; p[3|1]
- add r6, r6, r8, lsl #16 ; p[4|2]
- uxtb16 r9, lr ; p[7|5]
- uxtb16 r10, lr, ror #8 ; p[8|6]
- pkhbt r7, r7, r9, lsl #16 ; p[5|3]
- pkhbt r8, r8, r10, lsl #16 ; p[6|4]
-
- ldr r4, c00010001
- ldr r12, c00020002
- ldr lr, c00FF00FF
-
- add r5, r5, r6, lsl #1 ; [p3+2*p4 | p1+2*p2]
- add r5, r5, r7 ; [p3+2*p4+p5 | p1+2*p2+p3]
- uxtab16 r5, r5, r12 ; [p3+2*p4+p5+2 | p1+2*p2+p3+2]
-
- add r6, r6, r7, lsl #1 ; [p4+2*p5 | p2+2*p3]
- add r6, r6, r8 ; [p4+2*p5+p6 | p2+2*p3+p4]
- uxtab16 r6, r6, r12 ; [p4+2*p5+p6+2 | p2+2*p3+p4+2]
-
- uadd16 r11, r8, r9 ; [p6+p7 | p4+p5]
- uhadd16 r11, r11, r4 ; [(p6+p7+1)>>1 | (p4+p5+1)>>1]
- ; [F|E]
-
- add r7, r7, r8, lsl #1 ; [p5+2*p6 | p3+2*p4]
- add r7, r7, r9 ; [p5+2*p6+p7 | p3+2*p4+p5]
- uxtab16 r7, r7, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2]
-
- uadd16 r2, r9, r10 ; [p7+p8 | p5+p6]
- uhadd16 r2, r2, r4 ; [(p7+p8+1)>>1 | (p5+p6+1)>>1]
- ; [J|I]
-
- add r8, r8, r9, lsl #1 ; [p6+2*p7 | p4+2*p5]
- add r8, r8, r10 ; [p6+2*p7+p8 | p4+2*p5+p6]
- uxtab16 r8, r8, r12 ; [p6+2*p7+p8+2 | p4+2*p5+p6+2]
-
- ldr r0, [sp, #44] ; dst_stride
- ldr r3, [sp, #40] ; dst
-
- ; scale down
- and r5, lr, r5, asr #2 ; [B|A]
- and r6, lr, r6, asr #2 ; [D|C]
- and r7, lr, r7, asr #2 ; [H|G]
- and r8, lr, r8, asr #2 ; [L|K]
-
- add r12, r11, r2, lsl #8 ; [J|F|I|E]
- str r12, [r3], r0
-
- add r12, r7, r8, lsl #8 ; [L|H|K|G]
- str r12, [r3], r0
-
- pkhbt r2, r6, r2, lsl #16 ; [-|I|-|C]
- add r2, r2, r11, lsl #8 ; [F|I|E|C]
-
- pkhtb r12, r6, r5 ; [-|D|-|A]
- pkhtb r10, r7, r5, asr #16 ; [-|H|-|B]
- str r2, [r3], r0
- add r12, r12, r10, lsl #8 ; [H|D|B|A]
- str r12, [r3]
-
- pop {r4-r12, pc}
-
-b_vl_pred
- ldr r4, [r0] ; [3|2|1|0] = Above[0-3]
- ldr r12, c00020002
- ldr r5, [r0, #4] ; [7|6|5|4] = Above[4-7]
- ldr lr, c00FF00FF
- ldr r2, c00010001
-
- mov r0, r4, lsr #16 ; [-|-|3|2]
- add r0, r0, r5, lsl #16 ; [5|4|3|2]
- uxtb16 r6, r4 ; [2|0]
- uxtb16 r7, r4, ror #8 ; [3|1]
- uxtb16 r8, r0 ; [4|2]
- uxtb16 r9, r0, ror #8 ; [5|3]
- uxtb16 r10, r5 ; [6|4]
- uxtb16 r11, r5, ror #8 ; [7|5]
-
- uadd16 r4, r6, r7 ; [p2+p3 | p0+p1]
- uhadd16 r4, r4, r2 ; [(p2+p3+1)>>1 | (p0+p1+1)>>1]
- ; [B|A]
-
- add r5, r6, r7, lsl #1 ; [p2+2*p3 | p0+2*p1]
- add r5, r5, r8 ; [p2+2*p3+p4 | p0+2*p1+p2]
- uxtab16 r5, r5, r12 ; [p2+2*p3+p4+2 | p0+2*p1+p2+2]
-
- uadd16 r6, r7, r8 ; [p3+p4 | p1+p2]
- uhadd16 r6, r6, r2 ; [(p3+p4+1)>>1 | (p1+p2+1)>>1]
- ; [F|E]
-
- add r7, r7, r8, lsl #1 ; [p3+2*p4 | p1+2*p2]
- add r7, r7, r9 ; [p3+2*p4+p5 | p1+2*p2+p3]
- uxtab16 r7, r7, r12 ; [p3+2*p4+p5+2 | p1+2*p2+p3+2]
-
- add r8, r8, r9, lsl #1 ; [p4+2*p5 | p2+2*p3]
- add r8, r8, r10 ; [p4+2*p5+p6 | p2+2*p3+p4]
- uxtab16 r8, r8, r12 ; [p4+2*p5+p6+2 | p2+2*p3+p4+2]
-
- add r9, r9, r10, lsl #1 ; [p5+2*p6 | p3+2*p4]
- add r9, r9, r11 ; [p5+2*p6+p7 | p3+2*p4+p5]
- uxtab16 r9, r9, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2]
-
- ldr r0, [sp, #44] ; dst_stride
- ldr r3, [sp, #40] ; dst
-
- ; scale down
- and r5, lr, r5, asr #2 ; [D|C]
- and r7, lr, r7, asr #2 ; [H|G]
- and r8, lr, r8, asr #2 ; [I|D]
- and r9, lr, r9, asr #2 ; [J|H]
-
- add r10, r4, r6, lsl #8 ; [F|B|E|A]
- str r10, [r3], r0
-
- add r5, r5, r7, lsl #8 ; [H|C|G|D]
- str r5, [r3], r0
-
- pkhtb r12, r8, r4, asr #16 ; [-|I|-|B]
- pkhtb r10, r9, r8 ; [-|J|-|D]
-
- add r12, r6, r12, lsl #8 ; [I|F|B|E]
- str r12, [r3], r0
-
- add r10, r7, r10, lsl #8 ; [J|H|D|G]
- str r10, [r3]
-
- pop {r4-r12, pc}
-
-b_hd_pred
- ldrb r7, [r1], r2 ; l[0] = pp[3]
- ldr lr, [r0] ; Above = pp[8|7|6|5]
- ldrb r8, [sp, #48] ; tl = pp[4]
- ldrb r6, [r1], r2 ; l[1] = pp[2]
- ldrb r5, [r1], r2 ; l[2] = pp[1]
- ldrb r4, [r1] ; l[3] = pp[0]
-
- uxtb16 r9, lr ; p[7|5]
- uxtb16 r10, lr, ror #8 ; p[8|6]
-
- add r4, r4, r5, lsl #16 ; p[1|0]
- add r5, r5, r6, lsl #16 ; p[2|1]
- add r6, r6, r7, lsl #16 ; p[3|2]
- add r7, r7, r8, lsl #16 ; p[4|3]
-
- ldr r12, c00020002
- ldr lr, c00FF00FF
- ldr r2, c00010001
-
- pkhtb r8, r7, r9 ; p[4|5]
- pkhtb r1, r9, r10 ; p[7|6]
- pkhbt r10, r8, r10, lsl #16 ; p[6|5]
-
- uadd16 r11, r4, r5 ; [p1+p2 | p0+p1]
- uhadd16 r11, r11, r2 ; [(p1+p2+1)>>1 | (p0+p1+1)>>1]
- ; [B|A]
-
- add r4, r4, r5, lsl #1 ; [p1+2*p2 | p0+2*p1]
- add r4, r4, r6 ; [p1+2*p2+p3 | p0+2*p1+p2]
- uxtab16 r4, r4, r12 ; [p1+2*p2+p3+2 | p0+2*p1+p2+2]
-
- uadd16 r0, r6, r7 ; [p3+p4 | p2+p3]
- uhadd16 r0, r0, r2 ; [(p3+p4+1)>>1 | (p2+p3+1)>>1]
- ; [F|E]
-
- add r5, r6, r7, lsl #1 ; [p3+2*p4 | p2+2*p3]
- add r5, r5, r8, ror #16 ; [p3+2*p4+p5 | p2+2*p3+p4]
- uxtab16 r5, r5, r12 ; [p3+2*p4+p5+2 | p2+2*p3+p4+2]
-
- add r6, r12, r8, ror #16 ; [p5+2 | p4+2]
- add r6, r6, r10, lsl #1 ; [p5+2+2*p6 | p4+2+2*p5]
- uxtab16 r6, r6, r1 ; [p5+2+2*p6+p7 | p4+2+2*p5+p6]
-
- ; scale down
- and r4, lr, r4, asr #2 ; [D|C]
- and r5, lr, r5, asr #2 ; [H|G]
- and r6, lr, r6, asr #2 ; [J|I]
-
- ldr lr, [sp, #44] ; dst_stride
- ldr r3, [sp, #40] ; dst
-
- pkhtb r2, r0, r6 ; [-|F|-|I]
- pkhtb r12, r6, r5, asr #16 ; [-|J|-|H]
- add r12, r12, r2, lsl #8 ; [F|J|I|H]
- add r2, r0, r5, lsl #8 ; [H|F|G|E]
- mov r12, r12, ror #24 ; [J|I|H|F]
- str r12, [r3], lr
-
- mov r7, r11, asr #16 ; [-|-|-|B]
- str r2, [r3], lr
- add r7, r7, r0, lsl #16 ; [-|E|-|B]
- add r7, r7, r4, asr #8 ; [-|E|D|B]
- add r7, r7, r5, lsl #24 ; [G|E|D|B]
- str r7, [r3], lr
-
- add r5, r11, r4, lsl #8 ; [D|B|C|A]
- str r5, [r3]
-
- pop {r4-r12, pc}
-
-
-
-b_hu_pred
- ldrb r4, [r1], r2 ; Left[0]
- ldr r12, c00020002
- ldrb r5, [r1], r2 ; Left[1]
- ldr lr, c00FF00FF
- ldrb r6, [r1], r2 ; Left[2]
- ldr r2, c00010001
- ldrb r7, [r1] ; Left[3]
-
- add r4, r4, r5, lsl #16 ; [1|0]
- add r5, r5, r6, lsl #16 ; [2|1]
- add r9, r6, r7, lsl #16 ; [3|2]
-
- uadd16 r8, r4, r5 ; [p1+p2 | p0+p1]
- uhadd16 r8, r8, r2 ; [(p1+p2+1)>>1 | (p0+p1+1)>>1]
- ; [B|A]
-
- add r4, r4, r5, lsl #1 ; [p1+2*p2 | p0+2*p1]
- add r4, r4, r9 ; [p1+2*p2+p3 | p0+2*p1+p2]
- uxtab16 r4, r4, r12 ; [p1+2*p2+p3+2 | p0+2*p1+p2+2]
- ldr r2, [sp, #44] ; dst_stride
- ldr r3, [sp, #40] ; dst
- and r4, lr, r4, asr #2 ; [D|C]
-
- add r10, r6, r7 ; [p2+p3]
- add r11, r10, r7, lsl #1 ; [p2+3*p3]
- add r10, r10, #1
- add r11, r11, #2
- mov r10, r10, asr #1 ; [E]
- mov r11, r11, asr #2 ; [F]
-
- add r9, r7, r9, asr #8 ; [-|-|G|G]
- add r0, r8, r4, lsl #8 ; [D|B|C|A]
- add r7, r9, r9, lsl #16 ; [G|G|G|G]
-
- str r0, [r3], r2
-
- mov r1, r8, asr #16 ; [-|-|-|B]
- add r1, r1, r4, asr #8 ; [-|-|D|B]
- add r1, r1, r10, lsl #16 ; [-|E|D|B]
- add r1, r1, r11, lsl #24 ; [F|E|D|B]
- str r1, [r3], r2
-
- add r10, r11, lsl #8 ; [-|-|F|E]
- add r10, r10, r9, lsl #16 ; [G|G|F|E]
- str r10, [r3], r2
-
- str r7, [r3]
-
- pop {r4-r12, pc}
-
- ENDP
-
-; constants
-c00010001
- DCD 0x00010001
-c00020002
- DCD 0x00020002
-c00FF00FF
- DCD 0x00FF00FF
-
- END
diff --git a/libvpx/vp8/common/arm/neon/reconintra_neon.c b/libvpx/vp8/common/arm/neon/reconintra_neon.c
deleted file mode 100644
index af52cd5..0000000
--- a/libvpx/vp8/common/arm/neon/reconintra_neon.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <arm_neon.h>
-
-#include "vp8/common/blockd.h"
-
-void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x,
- unsigned char * yabove_row,
- unsigned char * yleft,
- int left_stride,
- unsigned char * ypred_ptr,
- int y_stride) {
- const int mode = x->mode_info_context->mbmi.mode;
- int i;
-
- switch (mode) {
- case DC_PRED:
- {
- int shift = x->up_available + x->left_available;
- uint8x16_t v_expected_dc = vdupq_n_u8(128);
-
- if (shift) {
- unsigned int average = 0;
- int expected_dc;
- if (x->up_available) {
- const uint8x16_t v_above = vld1q_u8(yabove_row);
- const uint16x8_t a = vpaddlq_u8(v_above);
- const uint32x4_t b = vpaddlq_u16(a);
- const uint64x2_t c = vpaddlq_u32(b);
- const uint32x2_t d = vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)),
- vreinterpret_u32_u64(vget_high_u64(c)));
- average = vget_lane_u32(d, 0);
- }
- if (x->left_available) {
- for (i = 0; i < 16; ++i) {
- average += yleft[0];
- yleft += left_stride;
- }
- }
- shift += 3;
- expected_dc = (average + (1 << (shift - 1))) >> shift;
- v_expected_dc = vmovq_n_u8((uint8_t)expected_dc);
- }
- for (i = 0; i < 16; ++i) {
- vst1q_u8(ypred_ptr, v_expected_dc);
- ypred_ptr += y_stride;
- }
- }
- break;
- case V_PRED:
- {
- const uint8x16_t v_above = vld1q_u8(yabove_row);
- for (i = 0; i < 16; ++i) {
- vst1q_u8(ypred_ptr, v_above);
- ypred_ptr += y_stride;
- }
- }
- break;
- case H_PRED:
- {
- for (i = 0; i < 16; ++i) {
- const uint8x16_t v_yleft = vmovq_n_u8((uint8_t)yleft[0]);
- yleft += left_stride;
- vst1q_u8(ypred_ptr, v_yleft);
- ypred_ptr += y_stride;
- }
- }
- break;
- case TM_PRED:
- {
- const uint16x8_t v_ytop_left = vmovq_n_u16((int16_t)yabove_row[-1]);
- const uint8x16_t v_above = vld1q_u8(yabove_row);
- for (i = 0; i < 16; ++i) {
- const uint8x8_t v_yleft = vmov_n_u8((int8_t)yleft[0]);
- const uint16x8_t a_lo = vaddl_u8(vget_low_u8(v_above), v_yleft);
- const uint16x8_t a_hi = vaddl_u8(vget_high_u8(v_above), v_yleft);
- const int16x8_t b_lo = vsubq_s16(vreinterpretq_s16_u16(a_lo),
- vreinterpretq_s16_u16(v_ytop_left));
- const int16x8_t b_hi = vsubq_s16(vreinterpretq_s16_u16(a_hi),
- vreinterpretq_s16_u16(v_ytop_left));
- const uint8x8_t pred_lo = vqmovun_s16(b_lo);
- const uint8x8_t pred_hi = vqmovun_s16(b_hi);
-
- vst1q_u8(ypred_ptr, vcombine_u8(pred_lo, pred_hi));
- ypred_ptr += y_stride;
- yleft += left_stride;
- }
- }
- break;
- }
-}
-
-void vp8_build_intra_predictors_mbuv_s_neon(MACROBLOCKD *x,
- unsigned char * uabove_row,
- unsigned char * vabove_row,
- unsigned char * uleft,
- unsigned char * vleft,
- int left_stride,
- unsigned char * upred_ptr,
- unsigned char * vpred_ptr,
- int pred_stride) {
- const int mode = x->mode_info_context->mbmi.uv_mode;
- int i;
-
- switch (mode) {
- case DC_PRED:
- {
- int shift = x->up_available + x->left_available;
- uint8x8_t v_expected_udc = vdup_n_u8(128);
- uint8x8_t v_expected_vdc = vdup_n_u8(128);
-
- if (shift) {
- unsigned int average_u = 0;
- unsigned int average_v = 0;
- int expected_udc;
- int expected_vdc;
- if (x->up_available) {
- const uint8x8_t v_uabove = vld1_u8(uabove_row);
- const uint8x8_t v_vabove = vld1_u8(vabove_row);
- const uint16x8_t a = vpaddlq_u8(vcombine_u8(v_uabove, v_vabove));
- const uint32x4_t b = vpaddlq_u16(a);
- const uint64x2_t c = vpaddlq_u32(b);
- average_u = vgetq_lane_u32(vreinterpretq_u32_u64((c)), 0);
- average_v = vgetq_lane_u32(vreinterpretq_u32_u64((c)), 2);
- }
- if (x->left_available) {
- for (i = 0; i < 8; ++i) {
- average_u += uleft[0];
- uleft += left_stride;
- average_v += vleft[0];
- vleft += left_stride;
- }
- }
- shift += 2;
- expected_udc = (average_u + (1 << (shift - 1))) >> shift;
- expected_vdc = (average_v + (1 << (shift - 1))) >> shift;
- v_expected_udc = vmov_n_u8((uint8_t)expected_udc);
- v_expected_vdc = vmov_n_u8((uint8_t)expected_vdc);
- }
- for (i = 0; i < 8; ++i) {
- vst1_u8(upred_ptr, v_expected_udc);
- upred_ptr += pred_stride;
- vst1_u8(vpred_ptr, v_expected_vdc);
- vpred_ptr += pred_stride;
- }
- }
- break;
- case V_PRED:
- {
- const uint8x8_t v_uabove = vld1_u8(uabove_row);
- const uint8x8_t v_vabove = vld1_u8(vabove_row);
- for (i = 0; i < 8; ++i) {
- vst1_u8(upred_ptr, v_uabove);
- upred_ptr += pred_stride;
- vst1_u8(vpred_ptr, v_vabove);
- vpred_ptr += pred_stride;
- }
- }
- break;
- case H_PRED:
- {
- for (i = 0; i < 8; ++i) {
- const uint8x8_t v_uleft = vmov_n_u8((uint8_t)uleft[0]);
- const uint8x8_t v_vleft = vmov_n_u8((uint8_t)vleft[0]);
- uleft += left_stride;
- vleft += left_stride;
- vst1_u8(upred_ptr, v_uleft);
- upred_ptr += pred_stride;
- vst1_u8(vpred_ptr, v_vleft);
- vpred_ptr += pred_stride;
- }
- }
- break;
- case TM_PRED:
- {
- const uint16x8_t v_utop_left = vmovq_n_u16((int16_t)uabove_row[-1]);
- const uint16x8_t v_vtop_left = vmovq_n_u16((int16_t)vabove_row[-1]);
- const uint8x8_t v_uabove = vld1_u8(uabove_row);
- const uint8x8_t v_vabove = vld1_u8(vabove_row);
- for (i = 0; i < 8; ++i) {
- const uint8x8_t v_uleft = vmov_n_u8((int8_t)uleft[0]);
- const uint8x8_t v_vleft = vmov_n_u8((int8_t)vleft[0]);
- const uint16x8_t a_u = vaddl_u8(v_uabove, v_uleft);
- const uint16x8_t a_v = vaddl_u8(v_vabove, v_vleft);
- const int16x8_t b_u = vsubq_s16(vreinterpretq_s16_u16(a_u),
- vreinterpretq_s16_u16(v_utop_left));
- const int16x8_t b_v = vsubq_s16(vreinterpretq_s16_u16(a_v),
- vreinterpretq_s16_u16(v_vtop_left));
- const uint8x8_t pred_u = vqmovun_s16(b_u);
- const uint8x8_t pred_v = vqmovun_s16(b_v);
-
- vst1_u8(upred_ptr, pred_u);
- vst1_u8(vpred_ptr, pred_v);
- upred_ptr += pred_stride;
- vpred_ptr += pred_stride;
- uleft += left_stride;
- vleft += left_stride;
- }
- }
- break;
- }
-}
diff --git a/libvpx/vp8/common/common.h b/libvpx/vp8/common/common.h
index ba3d9f5..e58a9cc 100644
--- a/libvpx/vp8/common/common.h
+++ b/libvpx/vp8/common/common.h
@@ -22,9 +22,6 @@
extern "C" {
#endif
-#define MIN(x, y) (((x) < (y)) ? (x) : (y))
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-
/* Only need this for fixed-size arrays, for structs just assign. */
#define vp8_copy( Dest, Src) { \
diff --git a/libvpx/vp8/common/findnearmv.h b/libvpx/vp8/common/findnearmv.h
index 3c8c050..155847c 100644
--- a/libvpx/vp8/common/findnearmv.h
+++ b/libvpx/vp8/common/findnearmv.h
@@ -12,6 +12,7 @@
#ifndef VP8_COMMON_FINDNEARMV_H_
#define VP8_COMMON_FINDNEARMV_H_
+#include "./vpx_config.h"
#include "mv.h"
#include "blockd.h"
#include "modecont.h"
@@ -22,8 +23,8 @@
#endif
-static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp,
- const int *ref_frame_sign_bias)
+static INLINE void mv_bias(int refmb_ref_frame_sign_bias, int refframe,
+ int_mv *mvp, const int *ref_frame_sign_bias)
{
if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe])
{
@@ -34,7 +35,7 @@
#define LEFT_TOP_MARGIN (16 << 3)
#define RIGHT_BOTTOM_MARGIN (16 << 3)
-static void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd)
+static INLINE void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd)
{
if (mv->as_mv.col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
mv->as_mv.col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
@@ -47,8 +48,9 @@
mv->as_mv.row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
}
-static void vp8_clamp_mv(int_mv *mv, int mb_to_left_edge, int mb_to_right_edge,
- int mb_to_top_edge, int mb_to_bottom_edge)
+static INLINE void vp8_clamp_mv(int_mv *mv, int mb_to_left_edge,
+ int mb_to_right_edge, int mb_to_top_edge,
+ int mb_to_bottom_edge)
{
mv->as_mv.col = (mv->as_mv.col < mb_to_left_edge) ?
mb_to_left_edge : mv->as_mv.col;
@@ -59,9 +61,10 @@
mv->as_mv.row = (mv->as_mv.row > mb_to_bottom_edge) ?
mb_to_bottom_edge : mv->as_mv.row;
}
-static unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge,
- int mb_to_right_edge, int mb_to_top_edge,
- int mb_to_bottom_edge)
+static INLINE unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge,
+ int mb_to_right_edge,
+ int mb_to_top_edge,
+ int mb_to_bottom_edge)
{
unsigned int need_to_clamp;
need_to_clamp = (mv->as_mv.col < mb_to_left_edge);
@@ -101,7 +104,7 @@
extern const unsigned char vp8_mbsplit_offset[4][16];
-static int left_block_mv(const MODE_INFO *cur_mb, int b)
+static INLINE int left_block_mv(const MODE_INFO *cur_mb, int b)
{
if (!(b & 3))
{
@@ -116,7 +119,7 @@
return (cur_mb->bmi + b - 1)->mv.as_int;
}
-static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride)
+static INLINE int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride)
{
if (!(b >> 2))
{
@@ -130,7 +133,7 @@
return (cur_mb->bmi + (b - 4))->mv.as_int;
}
-static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b)
+static INLINE B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b)
{
if (!(b & 3))
{
@@ -156,7 +159,8 @@
return (cur_mb->bmi + b - 1)->as_mode;
}
-static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, int mi_stride)
+static INLINE B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b,
+ int mi_stride)
{
if (!(b >> 2))
{
diff --git a/libvpx/vp8/common/invtrans.h b/libvpx/vp8/common/invtrans.h
index affe57e..9cfea8d 100644
--- a/libvpx/vp8/common/invtrans.h
+++ b/libvpx/vp8/common/invtrans.h
@@ -12,7 +12,7 @@
#ifndef VP8_COMMON_INVTRANS_H_
#define VP8_COMMON_INVTRANS_H_
-#include "vpx_config.h"
+#include "./vpx_config.h"
#include "vp8_rtcd.h"
#include "blockd.h"
#include "onyxc_int.h"
@@ -37,7 +37,7 @@
}
}
-static void vp8_inverse_transform_mby(MACROBLOCKD *xd)
+static INLINE void vp8_inverse_transform_mby(MACROBLOCKD *xd)
{
short *DQC = xd->dequant_y1;
diff --git a/libvpx/vp8/common/mips/msa/reconintra_msa.c b/libvpx/vp8/common/mips/msa/reconintra_msa.c
deleted file mode 100644
index 57f705d..0000000
--- a/libvpx/vp8/common/mips/msa/reconintra_msa.c
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "./vp8_rtcd.h"
-#include "vp8/common/blockd.h"
-#include "vp8/common/mips/msa/vp8_macros_msa.h"
-
-static void intra_predict_vert_8x8_msa(uint8_t *src, uint8_t *dst,
- int32_t dst_stride)
-{
- uint64_t out = LD(src);
-
- SD4(out, out, out, out, dst, dst_stride);
- dst += (4 * dst_stride);
- SD4(out, out, out, out, dst, dst_stride);
-}
-
-static void intra_predict_vert_16x16_msa(uint8_t *src, uint8_t *dst,
- int32_t dst_stride)
-{
- v16u8 out = LD_UB(src);
-
- ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
- dst += (8 * dst_stride);
- ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
-}
-
-static void intra_predict_horiz_8x8_msa(uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride)
-{
- uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
- out0 = src[0 * src_stride] * 0x0101010101010101ull;
- out1 = src[1 * src_stride] * 0x0101010101010101ull;
- out2 = src[2 * src_stride] * 0x0101010101010101ull;
- out3 = src[3 * src_stride] * 0x0101010101010101ull;
- out4 = src[4 * src_stride] * 0x0101010101010101ull;
- out5 = src[5 * src_stride] * 0x0101010101010101ull;
- out6 = src[6 * src_stride] * 0x0101010101010101ull;
- out7 = src[7 * src_stride] * 0x0101010101010101ull;
-
- SD4(out0, out1, out2, out3, dst, dst_stride);
- dst += (4 * dst_stride);
- SD4(out4, out5, out6, out7, dst, dst_stride);
-}
-
-static void intra_predict_horiz_16x16_msa(uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride)
-{
- uint32_t row;
- uint8_t inp0, inp1, inp2, inp3;
- v16u8 src0, src1, src2, src3;
-
- for (row = 4; row--;)
- {
- inp0 = src[0];
- src += src_stride;
- inp1 = src[0];
- src += src_stride;
- inp2 = src[0];
- src += src_stride;
- inp3 = src[0];
- src += src_stride;
-
- src0 = (v16u8)__msa_fill_b(inp0);
- src1 = (v16u8)__msa_fill_b(inp1);
- src2 = (v16u8)__msa_fill_b(inp2);
- src3 = (v16u8)__msa_fill_b(inp3);
-
- ST_UB4(src0, src1, src2, src3, dst, dst_stride);
- dst += (4 * dst_stride);
- }
-}
-
-static void intra_predict_dc_8x8_msa(uint8_t *src_top, uint8_t *src_left,
- int32_t src_stride_left,
- uint8_t *dst, int32_t dst_stride,
- uint8_t is_above, uint8_t is_left)
-{
- uint32_t row, addition = 0;
- uint64_t out;
- v16u8 src_above, store;
- v8u16 sum_above;
- v4u32 sum_top;
- v2u64 sum;
-
- if (is_left && is_above)
- {
- src_above = LD_UB(src_top);
-
- sum_above = __msa_hadd_u_h(src_above, src_above);
- sum_top = __msa_hadd_u_w(sum_above, sum_above);
- sum = __msa_hadd_u_d(sum_top, sum_top);
- addition = __msa_copy_u_w((v4i32)sum, 0);
-
- for (row = 0; row < 8; ++row)
- {
- addition += src_left[row * src_stride_left];
- }
-
- addition = (addition + 8) >> 4;
- store = (v16u8)__msa_fill_b(addition);
- }
- else if (is_left)
- {
- for (row = 0; row < 8; ++row)
- {
- addition += src_left[row * src_stride_left];
- }
-
- addition = (addition + 4) >> 3;
- store = (v16u8)__msa_fill_b(addition);
- }
- else if (is_above)
- {
- src_above = LD_UB(src_top);
-
- sum_above = __msa_hadd_u_h(src_above, src_above);
- sum_top = __msa_hadd_u_w(sum_above, sum_above);
- sum = __msa_hadd_u_d(sum_top, sum_top);
- sum = (v2u64)__msa_srari_d((v2i64)sum, 3);
- store = (v16u8)__msa_splati_b((v16i8)sum, 0);
- }
- else
- {
- store = (v16u8)__msa_ldi_b(128);
- }
-
- out = __msa_copy_u_d((v2i64)store, 0);
-
- SD4(out, out, out, out, dst, dst_stride);
- dst += (4 * dst_stride);
- SD4(out, out, out, out, dst, dst_stride);
-}
-
-static void intra_predict_dc_16x16_msa(uint8_t *src_top, uint8_t *src_left,
- int32_t src_stride_left,
- uint8_t *dst, int32_t dst_stride,
- uint8_t is_above, uint8_t is_left)
-{
- uint32_t row;
- uint32_t addition = 0;
- v16u8 src_above, out;
- v8u16 sum_above;
- v4u32 sum_top;
- v2u64 sum;
-
- if (is_left && is_above)
- {
- src_above = LD_UB(src_top);
-
- sum_above = __msa_hadd_u_h(src_above, src_above);
- sum_top = __msa_hadd_u_w(sum_above, sum_above);
- sum = __msa_hadd_u_d(sum_top, sum_top);
- sum_top = (v4u32)__msa_pckev_w((v4i32)sum, (v4i32)sum);
- sum = __msa_hadd_u_d(sum_top, sum_top);
- addition = __msa_copy_u_w((v4i32)sum, 0);
-
- for (row = 0; row < 16; ++row)
- {
- addition += src_left[row * src_stride_left];
- }
-
- addition = (addition + 16) >> 5;
- out = (v16u8)__msa_fill_b(addition);
- }
- else if (is_left)
- {
- for (row = 0; row < 16; ++row)
- {
- addition += src_left[row * src_stride_left];
- }
-
- addition = (addition + 8) >> 4;
- out = (v16u8)__msa_fill_b(addition);
- }
- else if (is_above)
- {
- src_above = LD_UB(src_top);
-
- sum_above = __msa_hadd_u_h(src_above, src_above);
- sum_top = __msa_hadd_u_w(sum_above, sum_above);
- sum = __msa_hadd_u_d(sum_top, sum_top);
- sum_top = (v4u32)__msa_pckev_w((v4i32)sum, (v4i32)sum);
- sum = __msa_hadd_u_d(sum_top, sum_top);
- sum = (v2u64)__msa_srari_d((v2i64)sum, 4);
- out = (v16u8)__msa_splati_b((v16i8)sum, 0);
- }
- else
- {
- out = (v16u8)__msa_ldi_b(128);
- }
-
- ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
- dst += (8 * dst_stride);
- ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
-}
-
-void vp8_build_intra_predictors_mby_s_msa(struct macroblockd *x,
- unsigned char *yabove_row,
- unsigned char *yleft,
- int left_stride,
- unsigned char *ypred_ptr,
- int y_stride)
-{
- uint32_t row, col;
- uint8_t ytop_left = yabove_row[-1];
-
- switch (x->mode_info_context->mbmi.mode)
- {
- case DC_PRED:
- intra_predict_dc_16x16_msa(yabove_row, yleft, left_stride,
- ypred_ptr, y_stride,
- x->up_available, x->left_available);
- break;
-
- case V_PRED:
- intra_predict_vert_16x16_msa(yabove_row, ypred_ptr, y_stride);
- break;
-
- case H_PRED:
- intra_predict_horiz_16x16_msa(yleft, left_stride, ypred_ptr,
- y_stride);
- break;
-
- case TM_PRED:
- for (row = 0; row < 16; ++row)
- {
- for (col = 0; col < 16; ++col)
- {
- int pred = yleft[row * left_stride] + yabove_row[col] -
- ytop_left;
-
- if (pred < 0)
- pred = 0;
-
- if (pred > 255)
- pred = 255;
-
- ypred_ptr[col] = pred;
- }
-
- ypred_ptr += y_stride;
- }
- break;
-
- case B_PRED:
- case NEARESTMV:
- case NEARMV:
- case ZEROMV:
- case NEWMV:
- case SPLITMV:
- case MB_MODE_COUNT:
- break;
- }
-}
-
-void vp8_build_intra_predictors_mbuv_s_msa(struct macroblockd *x,
- unsigned char *uabove_row,
- unsigned char *vabove_row,
- unsigned char *uleft,
- unsigned char *vleft,
- int left_stride,
- unsigned char *upred_ptr,
- unsigned char *vpred_ptr,
- int pred_stride)
-{
- uint32_t row, col;
- uint8_t utop_left = uabove_row[-1];
- uint8_t vtop_left = vabove_row[-1];
-
- switch (x->mode_info_context->mbmi.uv_mode)
- {
- case DC_PRED:
- intra_predict_dc_8x8_msa(uabove_row, uleft, left_stride,
- upred_ptr, pred_stride,
- x->up_available, x->left_available);
- intra_predict_dc_8x8_msa(vabove_row, vleft, left_stride,
- vpred_ptr, pred_stride,
- x->up_available, x->left_available);
- break;
-
- case V_PRED:
- intra_predict_vert_8x8_msa(uabove_row, upred_ptr, pred_stride);
- intra_predict_vert_8x8_msa(vabove_row, vpred_ptr, pred_stride);
- break;
-
- case H_PRED:
- intra_predict_horiz_8x8_msa(uleft, left_stride, upred_ptr,
- pred_stride);
- intra_predict_horiz_8x8_msa(vleft, left_stride, vpred_ptr,
- pred_stride);
- break;
-
- case TM_PRED:
- for (row = 0; row < 8; ++row)
- {
- for (col = 0; col < 8; ++col)
- {
- int predu = uleft[row * left_stride] + uabove_row[col] -
- utop_left;
- int predv = vleft[row * left_stride] + vabove_row[col] -
- vtop_left;
-
- if (predu < 0)
- predu = 0;
-
- if (predu > 255)
- predu = 255;
-
- if (predv < 0)
- predv = 0;
-
- if (predv > 255)
- predv = 255;
-
- upred_ptr[col] = predu;
- vpred_ptr[col] = predv;
- }
-
- upred_ptr += pred_stride;
- vpred_ptr += pred_stride;
- }
- break;
-
- case B_PRED:
- case NEARESTMV:
- case NEARMV:
- case ZEROMV:
- case NEWMV:
- case SPLITMV:
- case MB_MODE_COUNT:
- break;
- }
-}
diff --git a/libvpx/vp8/common/onyx.h b/libvpx/vp8/common/onyx.h
index f39b675..febe815 100644
--- a/libvpx/vp8/common/onyx.h
+++ b/libvpx/vp8/common/onyx.h
@@ -65,7 +65,7 @@
#include <assert.h>
- static void Scale2Ratio(int mode, int *hr, int *hs)
+ static INLINE void Scale2Ratio(int mode, int *hr, int *hs)
{
switch (mode)
{
diff --git a/libvpx/vp8/common/postproc.c b/libvpx/vp8/common/postproc.c
index a4e6ae1..322b613 100644
--- a/libvpx/vp8/common/postproc.c
+++ b/libvpx/vp8/common/postproc.c
@@ -675,6 +675,7 @@
}
}
+#if CONFIG_POSTPROC_VISUALIZER
static void constrain_line (int x_0, int *x_1, int y_0, int *y_1, int width, int height)
{
int dx;
@@ -717,6 +718,7 @@
*x_1 = ((0-y_0)*dx)/dy + x_0;
}
}
+#endif // CONFIG_POSTPROC_VISUALIZER
#if CONFIG_POSTPROC
int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags)
diff --git a/libvpx/vp8/common/reconintra.c b/libvpx/vp8/common/reconintra.c
index 0a6c51b..356655d 100644
--- a/libvpx/vp8/common/reconintra.c
+++ b/libvpx/vp8/common/reconintra.c
@@ -9,272 +9,109 @@
*/
-#include "vpx_config.h"
-#include "vp8_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "./vp8_rtcd.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/vpx_once.h"
#include "blockd.h"
+#include "vp8/common/reconintra.h"
+#include "vp8/common/reconintra4x4.h"
-void vp8_build_intra_predictors_mby_s_c(MACROBLOCKD *x,
- unsigned char * yabove_row,
- unsigned char * yleft,
- int left_stride,
- unsigned char * ypred_ptr,
- int y_stride)
+enum {
+ SIZE_16,
+ SIZE_8,
+ NUM_SIZES,
+};
+
+typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left);
+
+static intra_pred_fn pred[4][NUM_SIZES];
+static intra_pred_fn dc_pred[2][2][NUM_SIZES];
+
+static void vp8_init_intra_predictors_internal(void)
{
- unsigned char yleft_col[16];
- unsigned char ytop_left = yabove_row[-1];
- int r, c, i;
+#define INIT_SIZE(sz) \
+ pred[V_PRED][SIZE_##sz] = vpx_v_predictor_##sz##x##sz; \
+ pred[H_PRED][SIZE_##sz] = vpx_h_predictor_##sz##x##sz; \
+ pred[TM_PRED][SIZE_##sz] = vpx_tm_predictor_##sz##x##sz; \
+ \
+ dc_pred[0][0][SIZE_##sz] = vpx_dc_128_predictor_##sz##x##sz; \
+ dc_pred[0][1][SIZE_##sz] = vpx_dc_top_predictor_##sz##x##sz; \
+ dc_pred[1][0][SIZE_##sz] = vpx_dc_left_predictor_##sz##x##sz; \
+ dc_pred[1][1][SIZE_##sz] = vpx_dc_predictor_##sz##x##sz
+
+ INIT_SIZE(16);
+ INIT_SIZE(8);
+ vp8_init_intra4x4_predictors_internal();
+}
+
+void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
+ unsigned char * yabove_row,
+ unsigned char * yleft,
+ int left_stride,
+ unsigned char * ypred_ptr,
+ int y_stride)
+{
+ MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode;
+ DECLARE_ALIGNED(16, uint8_t, yleft_col[16]);
+ int i;
+ intra_pred_fn fn;
for (i = 0; i < 16; i++)
{
yleft_col[i] = yleft[i* left_stride];
}
- /* for Y */
- switch (x->mode_info_context->mbmi.mode)
+ if (mode == DC_PRED)
{
- case DC_PRED:
+ fn = dc_pred[x->left_available][x->up_available][SIZE_16];
+ }
+ else
{
- int expected_dc;
- int shift;
- int average = 0;
-
-
- if (x->up_available || x->left_available)
- {
- if (x->up_available)
- {
- for (i = 0; i < 16; i++)
- {
- average += yabove_row[i];
- }
- }
-
- if (x->left_available)
- {
-
- for (i = 0; i < 16; i++)
- {
- average += yleft_col[i];
- }
-
- }
-
-
-
- shift = 3 + x->up_available + x->left_available;
- expected_dc = (average + (1 << (shift - 1))) >> shift;
- }
- else
- {
- expected_dc = 128;
- }
-
- /*memset(ypred_ptr, expected_dc, 256);*/
- for (r = 0; r < 16; r++)
- {
- memset(ypred_ptr, expected_dc, 16);
- ypred_ptr += y_stride;
- }
+ fn = pred[mode][SIZE_16];
}
- break;
- case V_PRED:
- {
- for (r = 0; r < 16; r++)
- {
-
- ((int *)ypred_ptr)[0] = ((int *)yabove_row)[0];
- ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
- ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
- ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
- ypred_ptr += y_stride;
- }
- }
- break;
- case H_PRED:
- {
-
- for (r = 0; r < 16; r++)
- {
-
- memset(ypred_ptr, yleft_col[r], 16);
- ypred_ptr += y_stride;
- }
-
- }
- break;
- case TM_PRED:
- {
-
- for (r = 0; r < 16; r++)
- {
- for (c = 0; c < 16; c++)
- {
- int pred = yleft_col[r] + yabove_row[ c] - ytop_left;
-
- if (pred < 0)
- pred = 0;
-
- if (pred > 255)
- pred = 255;
-
- ypred_ptr[c] = pred;
- }
-
- ypred_ptr += y_stride;
- }
-
- }
- break;
- case B_PRED:
- case NEARESTMV:
- case NEARMV:
- case ZEROMV:
- case NEWMV:
- case SPLITMV:
- case MB_MODE_COUNT:
- break;
- }
+ fn(ypred_ptr, y_stride, yabove_row, yleft_col);
}
-void vp8_build_intra_predictors_mbuv_s_c(MACROBLOCKD *x,
- unsigned char * uabove_row,
- unsigned char * vabove_row,
- unsigned char * uleft,
- unsigned char * vleft,
- int left_stride,
- unsigned char * upred_ptr,
- unsigned char * vpred_ptr,
- int pred_stride)
+void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x,
+ unsigned char * uabove_row,
+ unsigned char * vabove_row,
+ unsigned char * uleft,
+ unsigned char * vleft,
+ int left_stride,
+ unsigned char * upred_ptr,
+ unsigned char * vpred_ptr,
+ int pred_stride)
{
+ MB_PREDICTION_MODE uvmode = x->mode_info_context->mbmi.uv_mode;
unsigned char uleft_col[8];
- unsigned char utop_left = uabove_row[-1];
unsigned char vleft_col[8];
- unsigned char vtop_left = vabove_row[-1];
-
- int i, j;
+ int i;
+ intra_pred_fn fn;
for (i = 0; i < 8; i++)
{
- uleft_col[i] = uleft [i* left_stride];
- vleft_col[i] = vleft [i* left_stride];
+ uleft_col[i] = uleft[i * left_stride];
+ vleft_col[i] = vleft[i * left_stride];
}
- switch (x->mode_info_context->mbmi.uv_mode)
+ if (uvmode == DC_PRED)
{
- case DC_PRED:
+ fn = dc_pred[x->left_available][x->up_available][SIZE_8];
+ }
+ else
{
- int expected_udc;
- int expected_vdc;
- int shift;
- int Uaverage = 0;
- int Vaverage = 0;
-
- if (x->up_available)
- {
- for (i = 0; i < 8; i++)
- {
- Uaverage += uabove_row[i];
- Vaverage += vabove_row[i];
- }
- }
-
- if (x->left_available)
- {
- for (i = 0; i < 8; i++)
- {
- Uaverage += uleft_col[i];
- Vaverage += vleft_col[i];
- }
- }
-
- if (!x->up_available && !x->left_available)
- {
- expected_udc = 128;
- expected_vdc = 128;
- }
- else
- {
- shift = 2 + x->up_available + x->left_available;
- expected_udc = (Uaverage + (1 << (shift - 1))) >> shift;
- expected_vdc = (Vaverage + (1 << (shift - 1))) >> shift;
- }
-
-
- /*memset(upred_ptr,expected_udc,64);*/
- /*memset(vpred_ptr,expected_vdc,64);*/
- for (i = 0; i < 8; i++)
- {
- memset(upred_ptr, expected_udc, 8);
- memset(vpred_ptr, expected_vdc, 8);
- upred_ptr += pred_stride;
- vpred_ptr += pred_stride;
- }
- }
- break;
- case V_PRED:
- {
- for (i = 0; i < 8; i++)
- {
- memcpy(upred_ptr, uabove_row, 8);
- memcpy(vpred_ptr, vabove_row, 8);
- upred_ptr += pred_stride;
- vpred_ptr += pred_stride;
- }
-
- }
- break;
- case H_PRED:
- {
- for (i = 0; i < 8; i++)
- {
- memset(upred_ptr, uleft_col[i], 8);
- memset(vpred_ptr, vleft_col[i], 8);
- upred_ptr += pred_stride;
- vpred_ptr += pred_stride;
- }
+ fn = pred[uvmode][SIZE_8];
}
- break;
- case TM_PRED:
- {
- for (i = 0; i < 8; i++)
- {
- for (j = 0; j < 8; j++)
- {
- int predu = uleft_col[i] + uabove_row[j] - utop_left;
- int predv = vleft_col[i] + vabove_row[j] - vtop_left;
+ fn(upred_ptr, pred_stride, uabove_row, uleft_col);
+ fn(vpred_ptr, pred_stride, vabove_row, vleft_col);
+}
- if (predu < 0)
- predu = 0;
-
- if (predu > 255)
- predu = 255;
-
- if (predv < 0)
- predv = 0;
-
- if (predv > 255)
- predv = 255;
-
- upred_ptr[j] = predu;
- vpred_ptr[j] = predv;
- }
-
- upred_ptr += pred_stride;
- vpred_ptr += pred_stride;
- }
-
- }
- break;
- case B_PRED:
- case NEARESTMV:
- case NEARMV:
- case ZEROMV:
- case NEWMV:
- case SPLITMV:
- case MB_MODE_COUNT:
- break;
- }
+void vp8_init_intra_predictors(void)
+{
+ once(vp8_init_intra_predictors_internal);
}
diff --git a/libvpx/vp8/common/reconintra.h b/libvpx/vp8/common/reconintra.h
new file mode 100644
index 0000000..b6225a6
--- /dev/null
+++ b/libvpx/vp8/common/reconintra.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VP8_COMMON_RECONINTRA_H_
+#define VP8_COMMON_RECONINTRA_H_
+
+#include "vp8/common/blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
+ unsigned char *yabove_row,
+ unsigned char *yleft,
+ int left_stride,
+ unsigned char *ypred_ptr,
+ int y_stride);
+
+void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x,
+ unsigned char * uabove_row,
+ unsigned char * vabove_row,
+ unsigned char * uleft,
+ unsigned char * vleft,
+ int left_stride,
+ unsigned char * upred_ptr,
+ unsigned char * vpred_ptr,
+ int pred_stride);
+
+void vp8_init_intra_predictors(void);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP8_COMMON_RECONINTRA_H_
diff --git a/libvpx/vp8/common/reconintra4x4.c b/libvpx/vp8/common/reconintra4x4.c
index 3d4f2c4..35ad891 100644
--- a/libvpx/vp8/common/reconintra4x4.c
+++ b/libvpx/vp8/common/reconintra4x4.c
@@ -8,290 +8,47 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <string.h>
#include "vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
#include "vp8_rtcd.h"
#include "blockd.h"
-void vp8_intra4x4_predict_c(unsigned char *Above,
- unsigned char *yleft, int left_stride,
- int _b_mode,
- unsigned char *dst, int dst_stride,
- unsigned char top_left)
+typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left);
+
+static intra_pred_fn pred[10];
+
+void vp8_init_intra4x4_predictors_internal(void)
{
- int i, r, c;
- B_PREDICTION_MODE b_mode = (B_PREDICTION_MODE)_b_mode;
+ pred[B_DC_PRED] = vpx_dc_predictor_4x4;
+ pred[B_TM_PRED] = vpx_tm_predictor_4x4;
+ pred[B_VE_PRED] = vpx_ve_predictor_4x4;
+ pred[B_HE_PRED] = vpx_he_predictor_4x4;
+ pred[B_LD_PRED] = vpx_d45e_predictor_4x4;
+ pred[B_RD_PRED] = vpx_d135_predictor_4x4;
+ pred[B_VR_PRED] = vpx_d117_predictor_4x4;
+ pred[B_VL_PRED] = vpx_d63f_predictor_4x4;
+ pred[B_HD_PRED] = vpx_d153_predictor_4x4;
+ pred[B_HU_PRED] = vpx_d207_predictor_4x4;
+}
+
+void vp8_intra4x4_predict(unsigned char *above,
+ unsigned char *yleft, int left_stride,
+ B_PREDICTION_MODE b_mode,
+ unsigned char *dst, int dst_stride,
+ unsigned char top_left)
+{
unsigned char Left[4];
+ unsigned char Aboveb[12], *Above = Aboveb + 4;
+
Left[0] = yleft[0];
Left[1] = yleft[left_stride];
Left[2] = yleft[2 * left_stride];
Left[3] = yleft[3 * left_stride];
+ memcpy(Above, above, 8);
+ Above[-1] = top_left;
- switch (b_mode)
- {
- case B_DC_PRED:
- {
- int expected_dc = 0;
-
- for (i = 0; i < 4; i++)
- {
- expected_dc += Above[i];
- expected_dc += Left[i];
- }
-
- expected_dc = (expected_dc + 4) >> 3;
-
- for (r = 0; r < 4; r++)
- {
- for (c = 0; c < 4; c++)
- {
- dst[c] = expected_dc;
- }
-
- dst += dst_stride;
- }
- }
- break;
- case B_TM_PRED:
- {
- /* prediction similar to true_motion prediction */
- for (r = 0; r < 4; r++)
- {
- for (c = 0; c < 4; c++)
- {
- int pred = Above[c] - top_left + Left[r];
-
- if (pred < 0)
- pred = 0;
-
- if (pred > 255)
- pred = 255;
-
- dst[c] = pred;
- }
-
- dst += dst_stride;
- }
- }
- break;
-
- case B_VE_PRED:
- {
-
- unsigned int ap[4];
- ap[0] = (top_left + 2 * Above[0] + Above[1] + 2) >> 2;
- ap[1] = (Above[0] + 2 * Above[1] + Above[2] + 2) >> 2;
- ap[2] = (Above[1] + 2 * Above[2] + Above[3] + 2) >> 2;
- ap[3] = (Above[2] + 2 * Above[3] + Above[4] + 2) >> 2;
-
- for (r = 0; r < 4; r++)
- {
- for (c = 0; c < 4; c++)
- {
-
- dst[c] = ap[c];
- }
-
- dst += dst_stride;
- }
-
- }
- break;
-
-
- case B_HE_PRED:
- {
-
- unsigned int lp[4];
- lp[0] = (top_left + 2 * Left[0] + Left[1] + 2) >> 2;
- lp[1] = (Left[0] + 2 * Left[1] + Left[2] + 2) >> 2;
- lp[2] = (Left[1] + 2 * Left[2] + Left[3] + 2) >> 2;
- lp[3] = (Left[2] + 2 * Left[3] + Left[3] + 2) >> 2;
-
- for (r = 0; r < 4; r++)
- {
- for (c = 0; c < 4; c++)
- {
- dst[c] = lp[r];
- }
-
- dst += dst_stride;
- }
- }
- break;
- case B_LD_PRED:
- {
- unsigned char *ptr = Above;
- dst[0 * dst_stride + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2;
- dst[0 * dst_stride + 1] =
- dst[1 * dst_stride + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2;
- dst[0 * dst_stride + 2] =
- dst[1 * dst_stride + 1] =
- dst[2 * dst_stride + 0] = (ptr[2] + ptr[3] * 2 + ptr[4] + 2) >> 2;
- dst[0 * dst_stride + 3] =
- dst[1 * dst_stride + 2] =
- dst[2 * dst_stride + 1] =
- dst[3 * dst_stride + 0] = (ptr[3] + ptr[4] * 2 + ptr[5] + 2) >> 2;
- dst[1 * dst_stride + 3] =
- dst[2 * dst_stride + 2] =
- dst[3 * dst_stride + 1] = (ptr[4] + ptr[5] * 2 + ptr[6] + 2) >> 2;
- dst[2 * dst_stride + 3] =
- dst[3 * dst_stride + 2] = (ptr[5] + ptr[6] * 2 + ptr[7] + 2) >> 2;
- dst[3 * dst_stride + 3] = (ptr[6] + ptr[7] * 2 + ptr[7] + 2) >> 2;
-
- }
- break;
- case B_RD_PRED:
- {
-
- unsigned char pp[9];
-
- pp[0] = Left[3];
- pp[1] = Left[2];
- pp[2] = Left[1];
- pp[3] = Left[0];
- pp[4] = top_left;
- pp[5] = Above[0];
- pp[6] = Above[1];
- pp[7] = Above[2];
- pp[8] = Above[3];
-
- dst[3 * dst_stride + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
- dst[3 * dst_stride + 1] =
- dst[2 * dst_stride + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
- dst[3 * dst_stride + 2] =
- dst[2 * dst_stride + 1] =
- dst[1 * dst_stride + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
- dst[3 * dst_stride + 3] =
- dst[2 * dst_stride + 2] =
- dst[1 * dst_stride + 1] =
- dst[0 * dst_stride + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
- dst[2 * dst_stride + 3] =
- dst[1 * dst_stride + 2] =
- dst[0 * dst_stride + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
- dst[1 * dst_stride + 3] =
- dst[0 * dst_stride + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
- dst[0 * dst_stride + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
-
- }
- break;
- case B_VR_PRED:
- {
-
- unsigned char pp[9];
-
- pp[0] = Left[3];
- pp[1] = Left[2];
- pp[2] = Left[1];
- pp[3] = Left[0];
- pp[4] = top_left;
- pp[5] = Above[0];
- pp[6] = Above[1];
- pp[7] = Above[2];
- pp[8] = Above[3];
-
-
- dst[3 * dst_stride + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
- dst[2 * dst_stride + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
- dst[3 * dst_stride + 1] =
- dst[1 * dst_stride + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
- dst[2 * dst_stride + 1] =
- dst[0 * dst_stride + 0] = (pp[4] + pp[5] + 1) >> 1;
- dst[3 * dst_stride + 2] =
- dst[1 * dst_stride + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
- dst[2 * dst_stride + 2] =
- dst[0 * dst_stride + 1] = (pp[5] + pp[6] + 1) >> 1;
- dst[3 * dst_stride + 3] =
- dst[1 * dst_stride + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
- dst[2 * dst_stride + 3] =
- dst[0 * dst_stride + 2] = (pp[6] + pp[7] + 1) >> 1;
- dst[1 * dst_stride + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
- dst[0 * dst_stride + 3] = (pp[7] + pp[8] + 1) >> 1;
-
- }
- break;
- case B_VL_PRED:
- {
-
- unsigned char *pp = Above;
-
- dst[0 * dst_stride + 0] = (pp[0] + pp[1] + 1) >> 1;
- dst[1 * dst_stride + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
- dst[2 * dst_stride + 0] =
- dst[0 * dst_stride + 1] = (pp[1] + pp[2] + 1) >> 1;
- dst[1 * dst_stride + 1] =
- dst[3 * dst_stride + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
- dst[2 * dst_stride + 1] =
- dst[0 * dst_stride + 2] = (pp[2] + pp[3] + 1) >> 1;
- dst[3 * dst_stride + 1] =
- dst[1 * dst_stride + 2] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
- dst[0 * dst_stride + 3] =
- dst[2 * dst_stride + 2] = (pp[3] + pp[4] + 1) >> 1;
- dst[1 * dst_stride + 3] =
- dst[3 * dst_stride + 2] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
- dst[2 * dst_stride + 3] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
- dst[3 * dst_stride + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
- }
- break;
-
- case B_HD_PRED:
- {
- unsigned char pp[9];
- pp[0] = Left[3];
- pp[1] = Left[2];
- pp[2] = Left[1];
- pp[3] = Left[0];
- pp[4] = top_left;
- pp[5] = Above[0];
- pp[6] = Above[1];
- pp[7] = Above[2];
- pp[8] = Above[3];
-
-
- dst[3 * dst_stride + 0] = (pp[0] + pp[1] + 1) >> 1;
- dst[3 * dst_stride + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
- dst[2 * dst_stride + 0] =
- dst[3 * dst_stride + 2] = (pp[1] + pp[2] + 1) >> 1;
- dst[2 * dst_stride + 1] =
- dst[3 * dst_stride + 3] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
- dst[2 * dst_stride + 2] =
- dst[1 * dst_stride + 0] = (pp[2] + pp[3] + 1) >> 1;
- dst[2 * dst_stride + 3] =
- dst[1 * dst_stride + 1] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
- dst[1 * dst_stride + 2] =
- dst[0 * dst_stride + 0] = (pp[3] + pp[4] + 1) >> 1;
- dst[1 * dst_stride + 3] =
- dst[0 * dst_stride + 1] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
- dst[0 * dst_stride + 2] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
- dst[0 * dst_stride + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
- }
- break;
-
-
- case B_HU_PRED:
- {
- unsigned char *pp = Left;
- dst[0 * dst_stride + 0] = (pp[0] + pp[1] + 1) >> 1;
- dst[0 * dst_stride + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
- dst[0 * dst_stride + 2] =
- dst[1 * dst_stride + 0] = (pp[1] + pp[2] + 1) >> 1;
- dst[0 * dst_stride + 3] =
- dst[1 * dst_stride + 1] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
- dst[1 * dst_stride + 2] =
- dst[2 * dst_stride + 0] = (pp[2] + pp[3] + 1) >> 1;
- dst[1 * dst_stride + 3] =
- dst[2 * dst_stride + 1] = (pp[2] + pp[3] * 2 + pp[3] + 2) >> 2;
- dst[2 * dst_stride + 2] =
- dst[2 * dst_stride + 3] =
- dst[3 * dst_stride + 0] =
- dst[3 * dst_stride + 1] =
- dst[3 * dst_stride + 2] =
- dst[3 * dst_stride + 3] = pp[3];
- }
- break;
-
- default:
- break;
-
- }
+ pred[b_mode](dst, dst_stride, Above, Left);
}
diff --git a/libvpx/vp8/common/reconintra4x4.h b/libvpx/vp8/common/reconintra4x4.h
index ed59c9e..869841e 100644
--- a/libvpx/vp8/common/reconintra4x4.h
+++ b/libvpx/vp8/common/reconintra4x4.h
@@ -18,7 +18,7 @@
#endif
static void intra_prediction_down_copy(MACROBLOCKD *xd,
- unsigned char *above_right_src)
+ unsigned char *above_right_src)
{
int dst_stride = xd->dst.y_stride;
unsigned char *above_right_dst = xd->dst.y_buffer - dst_stride + 16;
@@ -33,6 +33,14 @@
*dst_ptr2 = *src_ptr;
}
+void vp8_intra4x4_predict(unsigned char *Above,
+ unsigned char *yleft, int left_stride,
+ B_PREDICTION_MODE b_mode,
+ unsigned char *dst, int dst_stride,
+ unsigned char top_left);
+
+void vp8_init_intra4x4_predictors_internal(void);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/libvpx/vp8/common/rtcd_defs.pl b/libvpx/vp8/common/rtcd_defs.pl
index 7924ae7..6799c27 100644
--- a/libvpx/vp8/common/rtcd_defs.pl
+++ b/libvpx/vp8/common/rtcd_defs.pl
@@ -152,16 +152,6 @@
$vp8_copy_mem8x4_media=vp8_copy_mem8x4_v6;
$vp8_copy_mem8x4_dspr2=vp8_copy_mem8x4_dspr2;
-add_proto qw/void vp8_build_intra_predictors_mby_s/, "struct macroblockd *x, unsigned char * yabove_row, unsigned char * yleft, int left_stride, unsigned char * ypred_ptr, int y_stride";
-specialize qw/vp8_build_intra_predictors_mby_s sse2 ssse3 neon msa/;
-
-add_proto qw/void vp8_build_intra_predictors_mbuv_s/, "struct macroblockd *x, unsigned char * uabove_row, unsigned char * vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char * upred_ptr, unsigned char * vpred_ptr, int pred_stride";
-specialize qw/vp8_build_intra_predictors_mbuv_s sse2 ssse3 neon msa/;
-
-add_proto qw/void vp8_intra4x4_predict/, "unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left";
-specialize qw/vp8_intra4x4_predict media/;
-$vp8_intra4x4_predict_media=vp8_intra4x4_predict_armv6;
-
#
# Postproc
#
diff --git a/libvpx/vp8/common/setupintrarecon.h b/libvpx/vp8/common/setupintrarecon.h
index 608f4a9..1857c4e 100644
--- a/libvpx/vp8/common/setupintrarecon.h
+++ b/libvpx/vp8/common/setupintrarecon.h
@@ -11,6 +11,7 @@
#ifndef VP8_COMMON_SETUPINTRARECON_H_
#define VP8_COMMON_SETUPINTRARECON_H_
+#include "./vpx_config.h"
#include "vpx_scale/yv12config.h"
#ifdef __cplusplus
@@ -19,12 +20,11 @@
extern void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf);
extern void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf);
-static
-void setup_intra_recon_left(unsigned char *y_buffer,
- unsigned char *u_buffer,
- unsigned char *v_buffer,
- int y_stride,
- int uv_stride)
+static INLINE void setup_intra_recon_left(unsigned char *y_buffer,
+ unsigned char *u_buffer,
+ unsigned char *v_buffer,
+ int y_stride,
+ int uv_stride)
{
int i;
diff --git a/libvpx/vp8/common/vp8_loopfilter.c b/libvpx/vp8/common/vp8_loopfilter.c
index 8b55dff..756ad48 100644
--- a/libvpx/vp8/common/vp8_loopfilter.c
+++ b/libvpx/vp8/common/vp8_loopfilter.c
@@ -141,8 +141,8 @@
else /* Delta Value */
{
lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
- lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0;
}
+ lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0;
}
if (!mbd->mode_ref_lf_delta_enabled)
diff --git a/libvpx/vp8/common/x86/recon_sse2.asm b/libvpx/vp8/common/x86/recon_sse2.asm
index 7141f83..cb89537 100644
--- a/libvpx/vp8/common/x86/recon_sse2.asm
+++ b/libvpx/vp8/common/x86/recon_sse2.asm
@@ -114,1002 +114,3 @@
UNSHADOW_ARGS
pop rbp
ret
-
-
-;void vp8_intra_pred_uv_dc_mmx2(
-; unsigned char *dst,
-; int dst_stride
-; unsigned char *above,
-; unsigned char *left,
-; int left_stride,
-; )
-global sym(vp8_intra_pred_uv_dc_mmx2) PRIVATE
-sym(vp8_intra_pred_uv_dc_mmx2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rsi
- push rdi
- ; end prolog
-
- ; from top
- mov rdi, arg(2) ;above;
- mov rsi, arg(3) ;left;
- movsxd rax, dword ptr arg(4) ;left_stride;
- pxor mm0, mm0
- movq mm1, [rdi]
- lea rdi, [rax*3]
- psadbw mm1, mm0
- ; from left
- movzx ecx, byte [rsi]
- movzx edx, byte [rsi+rax*1]
- add ecx, edx
- movzx edx, byte [rsi+rax*2]
- add ecx, edx
-
- movzx edx, byte [rsi+rdi]
- lea rsi, [rsi+rax*4]
- add ecx, edx
- movzx edx, byte [rsi]
- add ecx, edx
- movzx edx, byte [rsi+rax]
- add ecx, edx
- movzx edx, byte [rsi+rax*2]
- add ecx, edx
- movzx edx, byte [rsi+rdi]
- add ecx, edx
-
- ; add up
- pextrw edx, mm1, 0x0
- lea edx, [edx+ecx+8]
- sar edx, 4
- movd mm1, edx
- movsxd rcx, dword ptr arg(1) ;dst_stride
- pshufw mm1, mm1, 0x0
- mov rdi, arg(0) ;dst;
- packuswb mm1, mm1
-
- ; write out
- lea rax, [rcx*3]
- lea rdx, [rdi+rcx*4]
-
- movq [rdi ], mm1
- movq [rdi+rcx ], mm1
- movq [rdi+rcx*2], mm1
- movq [rdi+rax ], mm1
- movq [rdx ], mm1
- movq [rdx+rcx ], mm1
- movq [rdx+rcx*2], mm1
- movq [rdx+rax ], mm1
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp8_intra_pred_uv_dctop_mmx2(
-; unsigned char *dst,
-; int dst_stride
-; unsigned char *above,
-; unsigned char *left,
-; int left_stride,
-; )
-global sym(vp8_intra_pred_uv_dctop_mmx2) PRIVATE
-sym(vp8_intra_pred_uv_dctop_mmx2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ;arg(3), arg(4) not used
-
- ; from top
- mov rsi, arg(2) ;above;
- pxor mm0, mm0
- movq mm1, [rsi]
- psadbw mm1, mm0
-
- ; add up
- paddw mm1, [GLOBAL(dc_4)]
- psraw mm1, 3
- pshufw mm1, mm1, 0x0
- packuswb mm1, mm1
-
- ; write out
- mov rdi, arg(0) ;dst;
- movsxd rcx, dword ptr arg(1) ;dst_stride
- lea rax, [rcx*3]
-
- movq [rdi ], mm1
- movq [rdi+rcx ], mm1
- movq [rdi+rcx*2], mm1
- movq [rdi+rax ], mm1
- lea rdi, [rdi+rcx*4]
- movq [rdi ], mm1
- movq [rdi+rcx ], mm1
- movq [rdi+rcx*2], mm1
- movq [rdi+rax ], mm1
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp8_intra_pred_uv_dcleft_mmx2(
-; unsigned char *dst,
-; int dst_stride
-; unsigned char *above,
-; unsigned char *left,
-; int left_stride,
-; )
-global sym(vp8_intra_pred_uv_dcleft_mmx2) PRIVATE
-sym(vp8_intra_pred_uv_dcleft_mmx2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rsi
- push rdi
- ; end prolog
-
- ;arg(2) not used
-
- ; from left
- mov rsi, arg(3) ;left;
- movsxd rax, dword ptr arg(4) ;left_stride;
- lea rdi, [rax*3]
- movzx ecx, byte [rsi]
- movzx edx, byte [rsi+rax]
- add ecx, edx
- movzx edx, byte [rsi+rax*2]
- add ecx, edx
- movzx edx, byte [rsi+rdi]
- add ecx, edx
- lea rsi, [rsi+rax*4]
- movzx edx, byte [rsi]
- add ecx, edx
- movzx edx, byte [rsi+rax]
- add ecx, edx
- movzx edx, byte [rsi+rax*2]
- add ecx, edx
- movzx edx, byte [rsi+rdi]
- lea edx, [ecx+edx+4]
-
- ; add up
- shr edx, 3
- movd mm1, edx
- pshufw mm1, mm1, 0x0
- packuswb mm1, mm1
-
- ; write out
- mov rdi, arg(0) ;dst;
- movsxd rcx, dword ptr arg(1) ;dst_stride
- lea rax, [rcx*3]
-
- movq [rdi ], mm1
- movq [rdi+rcx ], mm1
- movq [rdi+rcx*2], mm1
- movq [rdi+rax ], mm1
- lea rdi, [rdi+rcx*4]
- movq [rdi ], mm1
- movq [rdi+rcx ], mm1
- movq [rdi+rcx*2], mm1
- movq [rdi+rax ], mm1
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp8_intra_pred_uv_dc128_mmx(
-; unsigned char *dst,
-; int dst_stride
-; unsigned char *above,
-; unsigned char *left,
-; int left_stride,
-; )
-global sym(vp8_intra_pred_uv_dc128_mmx) PRIVATE
-sym(vp8_intra_pred_uv_dc128_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- GET_GOT rbx
- ; end prolog
-
- ;arg(2), arg(3), arg(4) not used
-
- ; write out
- movq mm1, [GLOBAL(dc_128)]
- mov rax, arg(0) ;dst;
- movsxd rdx, dword ptr arg(1) ;dst_stride
- lea rcx, [rdx*3]
-
- movq [rax ], mm1
- movq [rax+rdx ], mm1
- movq [rax+rdx*2], mm1
- movq [rax+rcx ], mm1
- lea rax, [rax+rdx*4]
- movq [rax ], mm1
- movq [rax+rdx ], mm1
- movq [rax+rdx*2], mm1
- movq [rax+rcx ], mm1
-
- ; begin epilog
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp8_intra_pred_uv_tm_sse2(
-; unsigned char *dst,
-; int dst_stride
-; unsigned char *above,
-; unsigned char *left,
-; int left_stride,
-; )
-%macro vp8_intra_pred_uv_tm 1
-global sym(vp8_intra_pred_uv_tm_%1) PRIVATE
-sym(vp8_intra_pred_uv_tm_%1):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- GET_GOT rbx
- push rsi
- push rdi
- push rbx
- ; end prolog
-
- ; read top row
- mov edx, 4
- mov rsi, arg(2) ;above
- movsxd rax, dword ptr arg(4) ;left_stride;
- pxor xmm0, xmm0
-%ifidn %1, ssse3
- movdqa xmm2, [GLOBAL(dc_1024)]
-%endif
- movq xmm1, [rsi]
- punpcklbw xmm1, xmm0
-
- ; set up left ptrs ans subtract topleft
- movd xmm3, [rsi-1]
- mov rsi, arg(3) ;left;
-%ifidn %1, sse2
- punpcklbw xmm3, xmm0
- pshuflw xmm3, xmm3, 0x0
- punpcklqdq xmm3, xmm3
-%else
- pshufb xmm3, xmm2
-%endif
- psubw xmm1, xmm3
-
- ; set up dest ptrs
- mov rdi, arg(0) ;dst;
- movsxd rcx, dword ptr arg(1) ;dst_stride
-
-.vp8_intra_pred_uv_tm_%1_loop:
- mov bl, [rsi]
- movd xmm3, ebx
-
- mov bl, [rsi+rax]
- movd xmm5, ebx
-%ifidn %1, sse2
- punpcklbw xmm3, xmm0
- punpcklbw xmm5, xmm0
- pshuflw xmm3, xmm3, 0x0
- pshuflw xmm5, xmm5, 0x0
- punpcklqdq xmm3, xmm3
- punpcklqdq xmm5, xmm5
-%else
- pshufb xmm3, xmm2
- pshufb xmm5, xmm2
-%endif
- paddw xmm3, xmm1
- paddw xmm5, xmm1
- packuswb xmm3, xmm5
- movq [rdi ], xmm3
- movhps[rdi+rcx], xmm3
- lea rsi, [rsi+rax*2]
- lea rdi, [rdi+rcx*2]
- dec edx
- jnz .vp8_intra_pred_uv_tm_%1_loop
-
- ; begin epilog
- pop rbx
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-%endmacro
-
-vp8_intra_pred_uv_tm sse2
-vp8_intra_pred_uv_tm ssse3
-
-;void vp8_intra_pred_uv_ve_mmx(
-; unsigned char *dst,
-; int dst_stride
-; unsigned char *above,
-; unsigned char *left,
-; int left_stride,
-; )
-global sym(vp8_intra_pred_uv_ve_mmx) PRIVATE
-sym(vp8_intra_pred_uv_ve_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- ; end prolog
-
- ; arg(3), arg(4) not used
-
- ; read from top
- mov rax, arg(2) ;src;
-
- movq mm1, [rax]
-
- ; write out
- mov rax, arg(0) ;dst;
- movsxd rdx, dword ptr arg(1) ;dst_stride
- lea rcx, [rdx*3]
-
- movq [rax ], mm1
- movq [rax+rdx ], mm1
- movq [rax+rdx*2], mm1
- movq [rax+rcx ], mm1
- lea rax, [rax+rdx*4]
- movq [rax ], mm1
- movq [rax+rdx ], mm1
- movq [rax+rdx*2], mm1
- movq [rax+rcx ], mm1
-
- ; begin epilog
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp8_intra_pred_uv_ho_mmx2(
-; unsigned char *dst,
-; int dst_stride
-; unsigned char *above,
-; unsigned char *left,
-; int left_stride
-; )
-%macro vp8_intra_pred_uv_ho 1
-global sym(vp8_intra_pred_uv_ho_%1) PRIVATE
-sym(vp8_intra_pred_uv_ho_%1):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rsi
- push rdi
- push rbx
-%ifidn %1, ssse3
- GET_GOT rbx
-%endif
- ; end prolog
-
- ;arg(2) not used
-
- ; read from left and write out
-%ifidn %1, mmx2
- mov edx, 4
-%endif
- mov rsi, arg(3) ;left
- movsxd rax, dword ptr arg(4) ;left_stride;
- mov rdi, arg(0) ;dst;
- movsxd rcx, dword ptr arg(1) ;dst_stride
-%ifidn %1, ssse3
- lea rdx, [rcx*3]
- movdqa xmm2, [GLOBAL(dc_00001111)]
-%endif
-
-%ifidn %1, mmx2
-.vp8_intra_pred_uv_ho_%1_loop:
- mov bl, [rsi]
- movd mm0, ebx
-
- mov bl, [rsi+rax]
- movd mm1, ebx
-
- punpcklbw mm0, mm0
- punpcklbw mm1, mm1
- pshufw mm0, mm0, 0x0
- pshufw mm1, mm1, 0x0
- movq [rdi ], mm0
- movq [rdi+rcx], mm1
- lea rsi, [rsi+rax*2]
- lea rdi, [rdi+rcx*2]
- dec edx
- jnz .vp8_intra_pred_uv_ho_%1_loop
-%else
- mov bl, [rsi]
- movd xmm0, ebx
-
- mov bl, [rsi+rax]
- movd xmm3, ebx
-
- mov bl, [rsi+rax*2]
- movd xmm1, ebx
-
- lea rbx, [rax*3]
- mov bl, [rsi+rbx]
- movd xmm4, ebx
-
- punpcklbw xmm0, xmm3
- punpcklbw xmm1, xmm4
- pshufb xmm0, xmm2
- pshufb xmm1, xmm2
- movq [rdi ], xmm0
- movhps [rdi+rcx], xmm0
- movq [rdi+rcx*2], xmm1
- movhps [rdi+rdx], xmm1
- lea rsi, [rsi+rax*4]
- lea rdi, [rdi+rcx*4]
-
- mov bl, [rsi]
- movd xmm0, ebx
-
- mov bl, [rsi+rax]
- movd xmm3, ebx
-
- mov bl, [rsi+rax*2]
- movd xmm1, ebx
-
- lea rbx, [rax*3]
- mov bl, [rsi+rbx]
- movd xmm4, ebx
-
- punpcklbw xmm0, xmm3
- punpcklbw xmm1, xmm4
- pshufb xmm0, xmm2
- pshufb xmm1, xmm2
- movq [rdi ], xmm0
- movhps [rdi+rcx], xmm0
- movq [rdi+rcx*2], xmm1
- movhps [rdi+rdx], xmm1
-%endif
-
- ; begin epilog
-%ifidn %1, ssse3
- RESTORE_GOT
-%endif
- pop rbx
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-%endmacro
-
-vp8_intra_pred_uv_ho mmx2
-vp8_intra_pred_uv_ho ssse3
-
-;void vp8_intra_pred_y_dc_sse2(
-; unsigned char *dst,
-; int dst_stride
-; unsigned char *above,
-; unsigned char *left,
-; int left_stride
-; )
-global sym(vp8_intra_pred_y_dc_sse2) PRIVATE
-sym(vp8_intra_pred_y_dc_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rsi
- push rdi
- ; end prolog
-
- ; from top
- mov rdi, arg(2) ;above
- mov rsi, arg(3) ;left
- movsxd rax, dword ptr arg(4) ;left_stride;
-
- pxor xmm0, xmm0
- movdqa xmm1, [rdi]
- psadbw xmm1, xmm0
- movq xmm2, xmm1
- punpckhqdq xmm1, xmm1
- paddw xmm1, xmm2
-
- ; from left
- lea rdi, [rax*3]
-
- movzx ecx, byte [rsi]
- movzx edx, byte [rsi+rax]
- add ecx, edx
- movzx edx, byte [rsi+rax*2]
- add ecx, edx
- movzx edx, byte [rsi+rdi]
- add ecx, edx
- lea rsi, [rsi+rax*4]
-
- movzx edx, byte [rsi]
- add ecx, edx
- movzx edx, byte [rsi+rax]
- add ecx, edx
- movzx edx, byte [rsi+rax*2]
- add ecx, edx
- movzx edx, byte [rsi+rdi]
- add ecx, edx
- lea rsi, [rsi+rax*4]
-
- movzx edx, byte [rsi]
- add ecx, edx
- movzx edx, byte [rsi+rax]
- add ecx, edx
- movzx edx, byte [rsi+rax*2]
- add ecx, edx
- movzx edx, byte [rsi+rdi]
- add ecx, edx
- lea rsi, [rsi+rax*4]
-
- movzx edx, byte [rsi]
- add ecx, edx
- movzx edx, byte [rsi+rax]
- add ecx, edx
- movzx edx, byte [rsi+rax*2]
- add ecx, edx
- movzx edx, byte [rsi+rdi]
- add ecx, edx
-
- ; add up
- pextrw edx, xmm1, 0x0
- lea edx, [edx+ecx+16]
- sar edx, 5
- movd xmm1, edx
- ; FIXME use pshufb for ssse3 version
- pshuflw xmm1, xmm1, 0x0
- punpcklqdq xmm1, xmm1
- packuswb xmm1, xmm1
-
- ; write out
- mov rsi, 2
- mov rdi, arg(0) ;dst;
- movsxd rcx, dword ptr arg(1) ;dst_stride
- lea rax, [rcx*3]
-
-.label
- movdqa [rdi ], xmm1
- movdqa [rdi+rcx ], xmm1
- movdqa [rdi+rcx*2], xmm1
- movdqa [rdi+rax ], xmm1
- lea rdi, [rdi+rcx*4]
- movdqa [rdi ], xmm1
- movdqa [rdi+rcx ], xmm1
- movdqa [rdi+rcx*2], xmm1
- movdqa [rdi+rax ], xmm1
- lea rdi, [rdi+rcx*4]
- dec rsi
- jnz .label
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp8_intra_pred_y_dctop_sse2(
-; unsigned char *dst,
-; int dst_stride
-; unsigned char *above,
-; unsigned char *left,
-; int left_stride
-; )
-global sym(vp8_intra_pred_y_dctop_sse2) PRIVATE
-sym(vp8_intra_pred_y_dctop_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rsi
- GET_GOT rbx
- ; end prolog
-
- ;arg(3), arg(4) not used
-
- ; from top
- mov rcx, arg(2) ;above;
- pxor xmm0, xmm0
- movdqa xmm1, [rcx]
- psadbw xmm1, xmm0
- movdqa xmm2, xmm1
- punpckhqdq xmm1, xmm1
- paddw xmm1, xmm2
-
- ; add up
- paddw xmm1, [GLOBAL(dc_8)]
- psraw xmm1, 4
- ; FIXME use pshufb for ssse3 version
- pshuflw xmm1, xmm1, 0x0
- punpcklqdq xmm1, xmm1
- packuswb xmm1, xmm1
-
- ; write out
- mov rsi, 2
- mov rdx, arg(0) ;dst;
- movsxd rcx, dword ptr arg(1) ;dst_stride
- lea rax, [rcx*3]
-
-.label
- movdqa [rdx ], xmm1
- movdqa [rdx+rcx ], xmm1
- movdqa [rdx+rcx*2], xmm1
- movdqa [rdx+rax ], xmm1
- lea rdx, [rdx+rcx*4]
- movdqa [rdx ], xmm1
- movdqa [rdx+rcx ], xmm1
- movdqa [rdx+rcx*2], xmm1
- movdqa [rdx+rax ], xmm1
- lea rdx, [rdx+rcx*4]
- dec rsi
- jnz .label
-
- ; begin epilog
- RESTORE_GOT
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp8_intra_pred_y_dcleft_sse2(
-; unsigned char *dst,
-; int dst_stride
-; unsigned char *above,
-; unsigned char *left,
-; int left_stride
-; )
-global sym(vp8_intra_pred_y_dcleft_sse2) PRIVATE
-sym(vp8_intra_pred_y_dcleft_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rsi
- push rdi
- ; end prolog
-
- ;arg(2) not used
-
- ; from left
- mov rsi, arg(3) ;left;
- movsxd rax, dword ptr arg(4) ;left_stride;
-
- lea rdi, [rax*3]
- movzx ecx, byte [rsi]
- movzx edx, byte [rsi+rax]
- add ecx, edx
- movzx edx, byte [rsi+rax*2]
- add ecx, edx
- movzx edx, byte [rsi+rdi]
- add ecx, edx
- lea rsi, [rsi+rax*4]
- movzx edx, byte [rsi]
- add ecx, edx
- movzx edx, byte [rsi+rax]
- add ecx, edx
- movzx edx, byte [rsi+rax*2]
- add ecx, edx
- movzx edx, byte [rsi+rdi]
- add ecx, edx
- lea rsi, [rsi+rax*4]
- movzx edx, byte [rsi]
- add ecx, edx
- movzx edx, byte [rsi+rax]
- add ecx, edx
- movzx edx, byte [rsi+rax*2]
- add ecx, edx
- movzx edx, byte [rsi+rdi]
- add ecx, edx
- lea rsi, [rsi+rax*4]
- movzx edx, byte [rsi]
- add ecx, edx
- movzx edx, byte [rsi+rax]
- add ecx, edx
- movzx edx, byte [rsi+rax*2]
- add ecx, edx
- movzx edx, byte [rsi+rdi]
- lea edx, [ecx+edx+8]
-
- ; add up
- shr edx, 4
- movd xmm1, edx
- ; FIXME use pshufb for ssse3 version
- pshuflw xmm1, xmm1, 0x0
- punpcklqdq xmm1, xmm1
- packuswb xmm1, xmm1
-
- ; write out
- mov rsi, 2
- mov rdi, arg(0) ;dst;
- movsxd rcx, dword ptr arg(1) ;dst_stride
- lea rax, [rcx*3]
-
-.label
- movdqa [rdi ], xmm1
- movdqa [rdi+rcx ], xmm1
- movdqa [rdi+rcx*2], xmm1
- movdqa [rdi+rax ], xmm1
- lea rdi, [rdi+rcx*4]
- movdqa [rdi ], xmm1
- movdqa [rdi+rcx ], xmm1
- movdqa [rdi+rcx*2], xmm1
- movdqa [rdi+rax ], xmm1
- lea rdi, [rdi+rcx*4]
- dec rsi
- jnz .label
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp8_intra_pred_y_dc128_sse2(
-; unsigned char *dst,
-; int dst_stride
-; unsigned char *above,
-; unsigned char *left,
-; int left_stride
-; )
-global sym(vp8_intra_pred_y_dc128_sse2) PRIVATE
-sym(vp8_intra_pred_y_dc128_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rsi
- GET_GOT rbx
- ; end prolog
-
- ;arg(2), arg(3), arg(4) not used
-
- ; write out
- mov rsi, 2
- movdqa xmm1, [GLOBAL(dc_128)]
- mov rax, arg(0) ;dst;
- movsxd rdx, dword ptr arg(1) ;dst_stride
- lea rcx, [rdx*3]
-
-.label
- movdqa [rax ], xmm1
- movdqa [rax+rdx ], xmm1
- movdqa [rax+rdx*2], xmm1
- movdqa [rax+rcx ], xmm1
- lea rax, [rax+rdx*4]
- movdqa [rax ], xmm1
- movdqa [rax+rdx ], xmm1
- movdqa [rax+rdx*2], xmm1
- movdqa [rax+rcx ], xmm1
- lea rax, [rax+rdx*4]
- dec rsi
- jnz .label
-
- ; begin epilog
- RESTORE_GOT
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp8_intra_pred_y_tm_sse2(
-; unsigned char *dst,
-; int dst_stride
-; unsigned char *above,
-; unsigned char *left,
-; int left_stride
-; )
-%macro vp8_intra_pred_y_tm 1
-global sym(vp8_intra_pred_y_tm_%1) PRIVATE
-sym(vp8_intra_pred_y_tm_%1):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- SAVE_XMM 7
- push rsi
- push rdi
- push rbx
- GET_GOT rbx
- ; end prolog
-
- ; read top row
- mov edx, 8
- mov rsi, arg(2) ;above
- movsxd rax, dword ptr arg(4) ;left_stride;
- pxor xmm0, xmm0
-%ifidn %1, ssse3
- movdqa xmm3, [GLOBAL(dc_1024)]
-%endif
- movdqa xmm1, [rsi]
- movdqa xmm2, xmm1
- punpcklbw xmm1, xmm0
- punpckhbw xmm2, xmm0
-
- ; set up left ptrs ans subtract topleft
- movd xmm4, [rsi-1]
- mov rsi, arg(3) ;left
-%ifidn %1, sse2
- punpcklbw xmm4, xmm0
- pshuflw xmm4, xmm4, 0x0
- punpcklqdq xmm4, xmm4
-%else
- pshufb xmm4, xmm3
-%endif
- psubw xmm1, xmm4
- psubw xmm2, xmm4
-
- ; set up dest ptrs
- mov rdi, arg(0) ;dst;
- movsxd rcx, dword ptr arg(1) ;dst_stride
-vp8_intra_pred_y_tm_%1_loop:
- mov bl, [rsi]
- movd xmm4, ebx
-
- mov bl, [rsi+rax]
- movd xmm5, ebx
-%ifidn %1, sse2
- punpcklbw xmm4, xmm0
- punpcklbw xmm5, xmm0
- pshuflw xmm4, xmm4, 0x0
- pshuflw xmm5, xmm5, 0x0
- punpcklqdq xmm4, xmm4
- punpcklqdq xmm5, xmm5
-%else
- pshufb xmm4, xmm3
- pshufb xmm5, xmm3
-%endif
- movdqa xmm6, xmm4
- movdqa xmm7, xmm5
- paddw xmm4, xmm1
- paddw xmm6, xmm2
- paddw xmm5, xmm1
- paddw xmm7, xmm2
- packuswb xmm4, xmm6
- packuswb xmm5, xmm7
- movdqa [rdi ], xmm4
- movdqa [rdi+rcx], xmm5
- lea rsi, [rsi+rax*2]
- lea rdi, [rdi+rcx*2]
- dec edx
- jnz vp8_intra_pred_y_tm_%1_loop
-
- ; begin epilog
- RESTORE_GOT
- pop rbx
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-%endmacro
-
-vp8_intra_pred_y_tm sse2
-vp8_intra_pred_y_tm ssse3
-
-;void vp8_intra_pred_y_ve_sse2(
-; unsigned char *dst,
-; int dst_stride
-; unsigned char *above,
-; unsigned char *left,
-; int left_stride
-; )
-global sym(vp8_intra_pred_y_ve_sse2) PRIVATE
-sym(vp8_intra_pred_y_ve_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rsi
- ; end prolog
-
- ;arg(3), arg(4) not used
-
- mov rax, arg(2) ;above;
- mov rsi, 2
- movsxd rdx, dword ptr arg(1) ;dst_stride
-
- ; read from top
- movdqa xmm1, [rax]
-
- ; write out
- mov rax, arg(0) ;dst;
- lea rcx, [rdx*3]
-
-.label
- movdqa [rax ], xmm1
- movdqa [rax+rdx ], xmm1
- movdqa [rax+rdx*2], xmm1
- movdqa [rax+rcx ], xmm1
- lea rax, [rax+rdx*4]
- movdqa [rax ], xmm1
- movdqa [rax+rdx ], xmm1
- movdqa [rax+rdx*2], xmm1
- movdqa [rax+rcx ], xmm1
- lea rax, [rax+rdx*4]
- dec rsi
- jnz .label
-
- ; begin epilog
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vp8_intra_pred_y_ho_sse2(
-; unsigned char *dst,
-; int dst_stride
-; unsigned char *above,
-; unsigned char *left,
-; int left_stride,
-; )
-global sym(vp8_intra_pred_y_ho_sse2) PRIVATE
-sym(vp8_intra_pred_y_ho_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 5
- push rsi
- push rdi
- push rbx
- ; end prolog
-
- ;arg(2) not used
-
- ; read from left and write out
- mov edx, 8
- mov rsi, arg(3) ;left;
- movsxd rax, dword ptr arg(4) ;left_stride;
- mov rdi, arg(0) ;dst;
- movsxd rcx, dword ptr arg(1) ;dst_stride
-
-vp8_intra_pred_y_ho_sse2_loop:
- mov bl, [rsi]
- movd xmm0, ebx
- mov bl, [rsi+rax]
- movd xmm1, ebx
-
- ; FIXME use pshufb for ssse3 version
- punpcklbw xmm0, xmm0
- punpcklbw xmm1, xmm1
- pshuflw xmm0, xmm0, 0x0
- pshuflw xmm1, xmm1, 0x0
- punpcklqdq xmm0, xmm0
- punpcklqdq xmm1, xmm1
- movdqa [rdi ], xmm0
- movdqa [rdi+rcx], xmm1
- lea rsi, [rsi+rax*2]
- lea rdi, [rdi+rcx*2]
- dec edx
- jnz vp8_intra_pred_y_ho_sse2_loop
-
- ; begin epilog
- pop rbx
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-SECTION_RODATA
-align 16
-dc_128:
- times 16 db 128
-dc_4:
- times 4 dw 4
-align 16
-dc_8:
- times 8 dw 8
-align 16
-dc_1024:
- times 8 dw 0x400
-align 16
-dc_00001111:
- times 8 db 0
- times 8 db 1
diff --git a/libvpx/vp8/common/x86/recon_wrapper_sse2.c b/libvpx/vp8/common/x86/recon_wrapper_sse2.c
deleted file mode 100644
index 65f4251..0000000
--- a/libvpx/vp8/common/x86/recon_wrapper_sse2.c
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "vpx_config.h"
-#include "vp8_rtcd.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vp8/common/blockd.h"
-
-#define build_intra_predictors_mbuv_prototype(sym) \
- void sym(unsigned char *dst, int dst_stride, \
- const unsigned char *above, \
- const unsigned char *left, int left_stride)
-typedef build_intra_predictors_mbuv_prototype((*build_intra_predictors_mbuv_fn_t));
-
-extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc_mmx2);
-extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dctop_mmx2);
-extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dcleft_mmx2);
-extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc128_mmx);
-extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ho_mmx2);
-extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ho_ssse3);
-extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ve_mmx);
-extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_sse2);
-extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_ssse3);
-
-static void vp8_build_intra_predictors_mbuv_x86(MACROBLOCKD *x,
- unsigned char * uabove_row,
- unsigned char * vabove_row,
- unsigned char *dst_u,
- unsigned char *dst_v,
- int dst_stride,
- unsigned char * uleft,
- unsigned char * vleft,
- int left_stride,
- build_intra_predictors_mbuv_fn_t tm_func,
- build_intra_predictors_mbuv_fn_t ho_func)
-{
- int mode = x->mode_info_context->mbmi.uv_mode;
- build_intra_predictors_mbuv_fn_t fn;
-
- switch (mode) {
- case V_PRED: fn = vp8_intra_pred_uv_ve_mmx; break;
- case H_PRED: fn = ho_func; break;
- case TM_PRED: fn = tm_func; break;
- case DC_PRED:
- if (x->up_available) {
- if (x->left_available) {
- fn = vp8_intra_pred_uv_dc_mmx2; break;
- } else {
- fn = vp8_intra_pred_uv_dctop_mmx2; break;
- }
- } else if (x->left_available) {
- fn = vp8_intra_pred_uv_dcleft_mmx2; break;
- } else {
- fn = vp8_intra_pred_uv_dc128_mmx; break;
- }
- break;
- default: return;
- }
-
- fn(dst_u, dst_stride, uabove_row, uleft, left_stride);
- fn(dst_v, dst_stride, vabove_row, vleft, left_stride);
-}
-
-void vp8_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *x,
- unsigned char * uabove_row,
- unsigned char * vabove_row,
- unsigned char * uleft,
- unsigned char * vleft,
- int left_stride,
- unsigned char * upred_ptr,
- unsigned char * vpred_ptr,
- int pred_stride)
-{
- vp8_build_intra_predictors_mbuv_x86(x,
- uabove_row, vabove_row,
- upred_ptr,
- vpred_ptr, pred_stride,
- uleft,
- vleft,
- left_stride,
- vp8_intra_pred_uv_tm_sse2,
- vp8_intra_pred_uv_ho_mmx2);
-}
-
-void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x,
- unsigned char * uabove_row,
- unsigned char * vabove_row,
- unsigned char * uleft,
- unsigned char * vleft,
- int left_stride,
- unsigned char * upred_ptr,
- unsigned char * vpred_ptr,
- int pred_stride)
-{
- vp8_build_intra_predictors_mbuv_x86(x,
- uabove_row, vabove_row,
- upred_ptr,
- vpred_ptr, pred_stride,
- uleft,
- vleft,
- left_stride,
- vp8_intra_pred_uv_tm_ssse3,
- vp8_intra_pred_uv_ho_ssse3);
-}
-
-#define build_intra_predictors_mby_prototype(sym) \
- void sym(unsigned char *dst, int dst_stride, \
- const unsigned char *above, \
- const unsigned char *left, int left_stride)
-typedef build_intra_predictors_mby_prototype((*build_intra_predictors_mby_fn_t));
-
-extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dc_sse2);
-extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dctop_sse2);
-extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dcleft_sse2);
-extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_dc128_sse2);
-extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_ho_sse2);
-extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_ve_sse2);
-extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_tm_sse2);
-extern build_intra_predictors_mby_prototype(vp8_intra_pred_y_tm_ssse3);
-
-static void vp8_build_intra_predictors_mby_x86(MACROBLOCKD *x,
- unsigned char * yabove_row,
- unsigned char *dst_y,
- int dst_stride,
- unsigned char * yleft,
- int left_stride,
- build_intra_predictors_mby_fn_t tm_func)
-{
- int mode = x->mode_info_context->mbmi.mode;
- build_intra_predictors_mbuv_fn_t fn;
-
- switch (mode) {
- case V_PRED: fn = vp8_intra_pred_y_ve_sse2; break;
- case H_PRED: fn = vp8_intra_pred_y_ho_sse2; break;
- case TM_PRED: fn = tm_func; break;
- case DC_PRED:
- if (x->up_available) {
- if (x->left_available) {
- fn = vp8_intra_pred_y_dc_sse2; break;
- } else {
- fn = vp8_intra_pred_y_dctop_sse2; break;
- }
- } else if (x->left_available) {
- fn = vp8_intra_pred_y_dcleft_sse2; break;
- } else {
- fn = vp8_intra_pred_y_dc128_sse2; break;
- }
- break;
- default: return;
- }
-
- fn(dst_y, dst_stride, yabove_row, yleft, left_stride);
- return;
-}
-
-void vp8_build_intra_predictors_mby_s_sse2(MACROBLOCKD *x,
- unsigned char * yabove_row,
- unsigned char * yleft,
- int left_stride,
- unsigned char * ypred_ptr,
- int y_stride)
-{
- vp8_build_intra_predictors_mby_x86(x, yabove_row, ypred_ptr,
- y_stride, yleft, left_stride,
- vp8_intra_pred_y_tm_sse2);
-}
-
-void vp8_build_intra_predictors_mby_s_ssse3(MACROBLOCKD *x,
- unsigned char * yabove_row,
- unsigned char * yleft,
- int left_stride,
- unsigned char * ypred_ptr,
- int y_stride)
-{
- vp8_build_intra_predictors_mby_x86(x, yabove_row, ypred_ptr,
- y_stride, yleft, left_stride,
- vp8_intra_pred_y_tm_ssse3);
-
-}
diff --git a/libvpx/vp8/decoder/dboolhuff.c b/libvpx/vp8/decoder/dboolhuff.c
index b874d4c..8a7e332 100644
--- a/libvpx/vp8/decoder/dboolhuff.c
+++ b/libvpx/vp8/decoder/dboolhuff.c
@@ -11,6 +11,7 @@
#include "dboolhuff.h"
#include "vp8/common/common.h"
+#include "vpx_dsp/vpx_dsp_common.h"
int vp8dx_start_decode(BOOL_DECODER *br,
const unsigned char *source,
@@ -48,7 +49,7 @@
unsigned char decrypted[sizeof(VP8_BD_VALUE) + 1];
if (br->decrypt_cb) {
- size_t n = MIN(sizeof(decrypted), bytes_left);
+ size_t n = VPXMIN(sizeof(decrypted), bytes_left);
br->decrypt_cb(br->decrypt_state, bufptr, decrypted, (int)n);
bufptr = decrypted;
}
diff --git a/libvpx/vp8/decoder/dboolhuff.h b/libvpx/vp8/decoder/dboolhuff.h
index 51c5adc..cc9eaaf 100644
--- a/libvpx/vp8/decoder/dboolhuff.h
+++ b/libvpx/vp8/decoder/dboolhuff.h
@@ -15,7 +15,7 @@
#include <stddef.h>
#include <limits.h>
-#include "vpx_config.h"
+#include "./vpx_config.h"
#include "vpx_ports/mem.h"
#include "vpx/vp8dx.h"
#include "vpx/vpx_integer.h"
@@ -95,7 +95,7 @@
return bit;
}
-static int vp8_decode_value(BOOL_DECODER *br, int bits)
+static INLINE int vp8_decode_value(BOOL_DECODER *br, int bits)
{
int z = 0;
int bit;
@@ -108,7 +108,7 @@
return z;
}
-static int vp8dx_bool_error(BOOL_DECODER *br)
+static INLINE int vp8dx_bool_error(BOOL_DECODER *br)
{
/* Check if we have reached the end of the buffer.
*
diff --git a/libvpx/vp8/decoder/decodeframe.c b/libvpx/vp8/decoder/decodeframe.c
index 56e167d..4bc87eb 100644
--- a/libvpx/vp8/decoder/decodeframe.c
+++ b/libvpx/vp8/decoder/decodeframe.c
@@ -23,6 +23,7 @@
#include "vp8/common/entropymode.h"
#include "vp8/common/quant_common.h"
#include "vpx_scale/vpx_scale.h"
+#include "vp8/common/reconintra.h"
#include "vp8/common/setupintrarecon.h"
#include "decodemv.h"
@@ -34,6 +35,7 @@
#include "vp8/common/threading.h"
#include "decoderthreading.h"
#include "dboolhuff.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include <assert.h>
#include <stdio.h>
@@ -71,10 +73,9 @@
/* Delta Value */
else
- {
QIndex = pc->base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
- QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */
- }
+
+ QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */
}
else
QIndex = pc->base_qindex;
@@ -1021,7 +1022,7 @@
const unsigned char *clear = data;
if (pbi->decrypt_cb)
{
- int n = (int)MIN(sizeof(clear_buffer), data_end - data);
+ int n = (int)VPXMIN(sizeof(clear_buffer), data_end - data);
pbi->decrypt_cb(pbi->decrypt_state, data, clear_buffer, n);
clear = clear_buffer;
}
diff --git a/libvpx/vp8/decoder/error_concealment.c b/libvpx/vp8/decoder/error_concealment.c
index bb6d443..0b846a0 100644
--- a/libvpx/vp8/decoder/error_concealment.c
+++ b/libvpx/vp8/decoder/error_concealment.c
@@ -16,6 +16,7 @@
#include "vpx_mem/vpx_mem.h"
#include "vp8/common/findnearmv.h"
#include "vp8/common/common.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#define FLOOR(x,q) ((x) & -(1 << (q)))
@@ -93,13 +94,13 @@
*/
static int block_overlap(int b1_row, int b1_col, int b2_row, int b2_col)
{
- const int int_top = MAX(b1_row, b2_row); // top
- const int int_left = MAX(b1_col, b2_col); // left
+ const int int_top = VPXMAX(b1_row, b2_row); // top
+ const int int_left = VPXMAX(b1_col, b2_col); // left
/* Since each block is 4x4 pixels, adding 4 (Q3) to the left/top edge
* gives us the right/bottom edge.
*/
- const int int_right = MIN(b1_col + (4<<3), b2_col + (4<<3)); // right
- const int int_bottom = MIN(b1_row + (4<<3), b2_row + (4<<3)); // bottom
+ const int int_right = VPXMIN(b1_col + (4<<3), b2_col + (4<<3)); // right
+ const int int_bottom = VPXMIN(b1_row + (4<<3), b2_row + (4<<3)); // bottom
return (int_bottom - int_top) * (int_right - int_left);
}
@@ -124,7 +125,7 @@
/* If the block partly overlaps any previous MB, these coordinates
* can be < 0. We don't want to access blocks in previous MBs.
*/
- const int blk_idx = MAX(rel_ol_blk_row,0) * 4 + MAX(rel_ol_blk_col,0);
+ const int blk_idx = VPXMAX(rel_ol_blk_row,0) * 4 + VPXMAX(rel_ol_blk_col,0);
/* Upper left overlapping block */
B_OVERLAP *b_ol_ul = &(b_overlaps[blk_idx]);
@@ -132,8 +133,8 @@
* which the motion compensated block overlaps
*/
/* Avoid calculating overlaps for blocks in later MBs */
- int end_row = MIN(4 + mb_row * 4 - first_blk_row, 2);
- int end_col = MIN(4 + mb_col * 4 - first_blk_col, 2);
+ int end_row = VPXMIN(4 + mb_row * 4 - first_blk_row, 2);
+ int end_col = VPXMIN(4 + mb_col * 4 - first_blk_col, 2);
int row, col;
/* Check if new_row and new_col are evenly divisible by 4 (Q3),
@@ -208,8 +209,8 @@
overlap_mb_row = FLOOR((overlap_b_row << 3) / 4, 3) >> 3;
overlap_mb_col = FLOOR((overlap_b_col << 3) / 4, 3) >> 3;
- end_row = MIN(mb_rows - overlap_mb_row, 2);
- end_col = MIN(mb_cols - overlap_mb_col, 2);
+ end_row = VPXMIN(mb_rows - overlap_mb_row, 2);
+ end_col = VPXMIN(mb_cols - overlap_mb_col, 2);
/* Don't calculate overlap for MBs we don't overlap */
/* Check if the new block row starts at the last block row of the MB */
diff --git a/libvpx/vp8/decoder/onyxd_if.c b/libvpx/vp8/decoder/onyxd_if.c
index 9015fcb..3468268 100644
--- a/libvpx/vp8/decoder/onyxd_if.c
+++ b/libvpx/vp8/decoder/onyxd_if.c
@@ -25,9 +25,12 @@
#include <assert.h>
#include "vp8/common/quant_common.h"
+#include "vp8/common/reconintra.h"
+#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "vpx_scale/vpx_scale.h"
#include "vp8/common/systemdependent.h"
+#include "vpx_ports/vpx_once.h"
#include "vpx_ports/vpx_timer.h"
#include "detokenize.h"
#if CONFIG_ERROR_CONCEALMENT
@@ -42,6 +45,17 @@
static int get_free_fb (VP8_COMMON *cm);
static void ref_cnt_fb (int *buf, int *idx, int new_idx);
+static void initialize_dec(void) {
+ static volatile int init_done = 0;
+
+ if (!init_done)
+ {
+ vpx_dsp_rtcd();
+ vp8_init_intra_predictors();
+ init_done = 1;
+ }
+}
+
static void remove_decompressor(VP8D_COMP *pbi)
{
#if CONFIG_ERROR_CONCEALMENT
@@ -105,6 +119,8 @@
vp8_setup_block_dptrs(&pbi->mb);
+ once(initialize_dec);
+
return pbi;
}
diff --git a/libvpx/vp8/decoder/threading.c b/libvpx/vp8/decoder/threading.c
index 6801532..7c7184c 100644
--- a/libvpx/vp8/decoder/threading.c
+++ b/libvpx/vp8/decoder/threading.c
@@ -24,6 +24,7 @@
#include "detokenize.h"
#include "vp8/common/reconintra4x4.h"
#include "vp8/common/reconinter.h"
+#include "vp8/common/reconintra.h"
#include "vp8/common/setupintrarecon.h"
#if CONFIG_ERROR_CONCEALMENT
#include "error_concealment.h"
diff --git a/libvpx/vp8/decoder/treereader.h b/libvpx/vp8/decoder/treereader.h
index 35ee696..f7d23c3 100644
--- a/libvpx/vp8/decoder/treereader.h
+++ b/libvpx/vp8/decoder/treereader.h
@@ -12,6 +12,7 @@
#ifndef VP8_DECODER_TREEREADER_H_
#define VP8_DECODER_TREEREADER_H_
+#include "./vpx_config.h"
#include "vp8/common/treecoder.h"
#include "dboolhuff.h"
@@ -28,7 +29,7 @@
/* Intent of tree data structure is to make decoding trivial. */
-static int vp8_treed_read(
+static INLINE int vp8_treed_read(
vp8_reader *const r, /* !!! must return a 0 or 1 !!! */
vp8_tree t,
const vp8_prob *const p
diff --git a/libvpx/vp8/encoder/bitstream.c b/libvpx/vp8/encoder/bitstream.c
index ea279b3..f3d91b5 100644
--- a/libvpx/vp8/encoder/bitstream.c
+++ b/libvpx/vp8/encoder/bitstream.c
@@ -407,6 +407,7 @@
}
+#if CONFIG_MULTITHREAD
static void pack_mb_row_tokens(VP8_COMP *cpi, vp8_writer *w)
{
int mb_row;
@@ -421,6 +422,7 @@
}
}
+#endif // CONFIG_MULTITHREAD
static void write_mv_ref
(
@@ -1675,7 +1677,7 @@
if (cpi->b_multi_threaded)
pack_mb_row_tokens(cpi, &cpi->bc[1]);
else
-#endif
+#endif // CONFIG_MULTITHREAD
vp8_pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count);
vp8_stop_encode(&cpi->bc[1]);
diff --git a/libvpx/vp8/encoder/encodeframe.c b/libvpx/vp8/encoder/encodeframe.c
index d381d8d..b0aaa2f 100644
--- a/libvpx/vp8/encoder/encodeframe.c
+++ b/libvpx/vp8/encoder/encodeframe.c
@@ -700,6 +700,7 @@
vp8_zero(x->count_mb_ref_frame_usage);
}
+#if CONFIG_MULTITHREAD
static void sum_coef_counts(MACROBLOCK *x, MACROBLOCK *x_thread)
{
int i = 0;
@@ -729,6 +730,7 @@
}
while (++i < BLOCK_TYPES);
}
+#endif // CONFIG_MULTITHREAD
void vp8_encode_frame(VP8_COMP *cpi)
{
@@ -927,7 +929,7 @@
}
else
-#endif
+#endif // CONFIG_MULTITHREAD
{
/* for each macroblock row in image */
diff --git a/libvpx/vp8/encoder/encodeintra.c b/libvpx/vp8/encoder/encodeintra.c
index 938cc7e..44be959 100644
--- a/libvpx/vp8/encoder/encodeintra.c
+++ b/libvpx/vp8/encoder/encodeintra.c
@@ -13,6 +13,7 @@
#include "vp8_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "vp8/encoder/quantize.h"
+#include "vp8/common/reconintra.h"
#include "vp8/common/reconintra4x4.h"
#include "encodemb.h"
#include "vp8/common/invtrans.h"
diff --git a/libvpx/vp8/encoder/mcomp.c b/libvpx/vp8/encoder/mcomp.c
index f848e8f..768c764 100644
--- a/libvpx/vp8/encoder/mcomp.c
+++ b/libvpx/vp8/encoder/mcomp.c
@@ -20,6 +20,7 @@
#include <math.h>
#include "vp8/common/findnearmv.h"
#include "vp8/common/common.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#ifdef VP8_ENTROPY_STATS
static int mv_ref_ct [31] [4] [2];
@@ -223,14 +224,14 @@
unsigned int quarteriters = 4;
int thismse;
- int minc = MAX(x->mv_col_min * 4,
- (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
- int maxc = MIN(x->mv_col_max * 4,
- (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
- int minr = MAX(x->mv_row_min * 4,
- (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
- int maxr = MIN(x->mv_row_max * 4,
- (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
+ int minc = VPXMAX(x->mv_col_min * 4,
+ (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
+ int maxc = VPXMIN(x->mv_col_max * 4,
+ (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
+ int minr = VPXMAX(x->mv_row_min * 4,
+ (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
+ int maxr = VPXMIN(x->mv_row_max * 4,
+ (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
int y_stride;
int offset;
diff --git a/libvpx/vp8/encoder/mr_dissim.c b/libvpx/vp8/encoder/mr_dissim.c
index 8d96445..886cba2 100644
--- a/libvpx/vp8/encoder/mr_dissim.c
+++ b/libvpx/vp8/encoder/mr_dissim.c
@@ -13,6 +13,7 @@
#include "vpx_config.h"
#include "onyx_int.h"
#include "mr_dissim.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "rdopt.h"
#include "vp8/common/common.h"
@@ -192,11 +193,13 @@
}
}
- mmvx = MAX(abs(min_mvx - here->mbmi.mv.as_mv.row),
- abs(max_mvx - here->mbmi.mv.as_mv.row));
- mmvy = MAX(abs(min_mvy - here->mbmi.mv.as_mv.col),
- abs(max_mvy - here->mbmi.mv.as_mv.col));
- dissim = MAX(mmvx, mmvy);
+ mmvx = VPXMAX(
+ abs(min_mvx - here->mbmi.mv.as_mv.row),
+ abs(max_mvx - here->mbmi.mv.as_mv.row));
+ mmvy = VPXMAX(
+ abs(min_mvy - here->mbmi.mv.as_mv.col),
+ abs(max_mvy - here->mbmi.mv.as_mv.col));
+ dissim = VPXMAX(mmvx, mmvy);
}
}
diff --git a/libvpx/vp8/encoder/onyx_if.c b/libvpx/vp8/encoder/onyx_if.c
index 5e05c8c..df5bcf6 100644
--- a/libvpx/vp8/encoder/onyx_if.c
+++ b/libvpx/vp8/encoder/onyx_if.c
@@ -31,6 +31,7 @@
#include "vp8/common/postproc.h"
#endif
#include "vpx_mem/vpx_mem.h"
+#include "vp8/common/reconintra.h"
#include "vp8/common/swapyv12buffer.h"
#include "vp8/common/threading.h"
#include "vpx_ports/vpx_timer.h"
@@ -422,6 +423,16 @@
static void dealloc_raw_frame_buffers(VP8_COMP *cpi);
+void vp8_initialize_enc(void)
+{
+ static volatile int init_done = 0;
+
+ if (!init_done) {
+ vpx_dsp_rtcd();
+ vp8_init_intra_predictors();
+ init_done = 1;
+ }
+}
static void dealloc_compressor_data(VP8_COMP *cpi)
{
@@ -516,41 +527,6 @@
}
-static void segmentation_test_function(VP8_COMP *cpi)
-{
- unsigned char *seg_map;
- signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];
-
- // Create a temporary map for segmentation data.
- CHECK_MEM_ERROR(seg_map, vpx_calloc(cpi->common.mb_rows * cpi->common.mb_cols, 1));
-
- // Set the segmentation Map
- set_segmentation_map(cpi, seg_map);
-
- // Activate segmentation.
- enable_segmentation(cpi);
-
- // Set up the quant segment data
- feature_data[MB_LVL_ALT_Q][0] = 0;
- feature_data[MB_LVL_ALT_Q][1] = 4;
- feature_data[MB_LVL_ALT_Q][2] = 0;
- feature_data[MB_LVL_ALT_Q][3] = 0;
- // Set up the loop segment data
- feature_data[MB_LVL_ALT_LF][0] = 0;
- feature_data[MB_LVL_ALT_LF][1] = 0;
- feature_data[MB_LVL_ALT_LF][2] = 0;
- feature_data[MB_LVL_ALT_LF][3] = 0;
-
- // Initialise the feature data structure
- // SEGMENT_DELTADATA 0, SEGMENT_ABSDATA 1
- set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA);
-
- // Delete sementation map
- vpx_free(seg_map);
-
- seg_map = 0;
-}
-
/* A simple function to cyclically refresh the background at a lower Q */
static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment)
{
@@ -913,7 +889,7 @@
Speed = cpi->Speed;
switch (Mode)
{
-#if !(CONFIG_REALTIME_ONLY)
+#if !CONFIG_REALTIME_ONLY
case 0: /* best quality mode */
sf->first_step = 0;
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
@@ -1953,7 +1929,7 @@
* Currently this is tied to error resilliant mode
*/
cpi->cyclic_refresh_mode_enabled = cpi->oxcf.error_resilient_mode;
- cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 5;
+ cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 7;
if (cpi->oxcf.number_of_layers == 1) {
cpi->cyclic_refresh_mode_max_mbs_perframe =
(cpi->common.mb_rows * cpi->common.mb_cols) / 20;
@@ -2065,7 +2041,7 @@
cpi->output_pkt_list = oxcf->output_pkt_list;
-#if !(CONFIG_REALTIME_ONLY)
+#if !CONFIG_REALTIME_ONLY
if (cpi->pass == 1)
{
@@ -2227,7 +2203,7 @@
if (cpi && (cpi->common.current_video_frame > 0))
{
-#if !(CONFIG_REALTIME_ONLY)
+#if !CONFIG_REALTIME_ONLY
if (cpi->pass == 2)
{
@@ -3018,6 +2994,7 @@
}
+#if !CONFIG_REALTIME_ONLY
/* 1 = key, 0 = inter */
static int decide_key_frame(VP8_COMP *cpi)
{
@@ -3085,7 +3062,6 @@
}
-#if !(CONFIG_REALTIME_ONLY)
static void Pass1Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, unsigned int *frame_flags)
{
(void) size;
@@ -3131,6 +3107,7 @@
#endif
/* return of 0 means drop frame */
+#if !CONFIG_REALTIME_ONLY
/* Function to test for conditions that indeicate we should loop
* back and recode a frame.
*/
@@ -3180,6 +3157,7 @@
return force_recode;
}
+#endif // !CONFIG_REALTIME_ONLY
static void update_reference_frames(VP8_COMP *cpi)
{
@@ -3601,7 +3579,7 @@
VP8_COMMON *cm = &cpi->common;
int active_worst_qchanged = 0;
-#if !(CONFIG_REALTIME_ONLY)
+#if !CONFIG_REALTIME_ONLY
int q_low;
int q_high;
int zbin_oq_high;
@@ -3640,7 +3618,7 @@
/* For an alt ref frame in 2 pass we skip the call to the second pass
* function that sets the target bandwidth
*/
-#if !(CONFIG_REALTIME_ONLY)
+#if !CONFIG_REALTIME_ONLY
if (cpi->pass == 2)
{
@@ -4149,7 +4127,7 @@
/* Determine initial Q to try */
Q = vp8_regulate_q(cpi, cpi->this_frame_target);
-#if !(CONFIG_REALTIME_ONLY)
+#if !CONFIG_REALTIME_ONLY
/* Set highest allowed value for Zbin over quant */
if (cm->frame_type == KEY_FRAME)
@@ -4179,7 +4157,7 @@
vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit);
-#if !(CONFIG_REALTIME_ONLY)
+#if !CONFIG_REALTIME_ONLY
/* Limit Q range for the adaptive loop. */
bottom_index = cpi->active_best_quality;
top_index = cpi->active_worst_quality;
@@ -4410,7 +4388,7 @@
if (cpi->pass != 2 && cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME
&& cpi->compressor_speed != 2)
{
-#if !(CONFIG_REALTIME_ONLY)
+#if !CONFIG_REALTIME_ONLY
if (decide_key_frame(cpi))
{
/* Reset all our sizing numbers and recode */
@@ -4466,9 +4444,9 @@
/* Assume 1 qstep = about 4% on frame size. */
over_size_percent = (int)(over_size_percent * 0.96);
}
-#if !(CONFIG_REALTIME_ONLY)
+#if !CONFIG_REALTIME_ONLY
top_index = cpi->active_worst_quality;
-#endif
+#endif // !CONFIG_REALTIME_ONLY
/* If we have updated the active max Q do not call
* vp8_update_rate_correction_factors() this loop.
*/
@@ -4477,7 +4455,7 @@
else
active_worst_qchanged = 0;
-#if !(CONFIG_REALTIME_ONLY)
+#if !CONFIG_REALTIME_ONLY
/* Special case handling for forced key frames */
if ( (cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced )
{
@@ -5215,7 +5193,7 @@
}
-#if !(CONFIG_REALTIME_ONLY)
+#if !CONFIG_REALTIME_ONLY
static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, unsigned char * dest_end, unsigned int *frame_flags)
{
@@ -5299,7 +5277,7 @@
cpi->source = NULL;
-#if !(CONFIG_REALTIME_ONLY)
+#if !CONFIG_REALTIME_ONLY
/* Should we code an alternate reference frame */
if (cpi->oxcf.error_resilient_mode == 0 &&
cpi->oxcf.play_alternate &&
@@ -5367,7 +5345,7 @@
else
{
*size = 0;
-#if !(CONFIG_REALTIME_ONLY)
+#if !CONFIG_REALTIME_ONLY
if (flush && cpi->pass == 1 && !cpi->twopass.first_pass_done)
{
@@ -5560,7 +5538,7 @@
assert(i < NUM_YV12_BUFFERS );
}
-#if !(CONFIG_REALTIME_ONLY)
+#if !CONFIG_REALTIME_ONLY
if (cpi->pass == 1)
{
diff --git a/libvpx/vp8/encoder/onyx_int.h b/libvpx/vp8/encoder/onyx_int.h
index 8beba27..317e4b9 100644
--- a/libvpx/vp8/encoder/onyx_int.h
+++ b/libvpx/vp8/encoder/onyx_int.h
@@ -716,6 +716,8 @@
} rd_costs;
} VP8_COMP;
+void vp8_initialize_enc(void);
+
void vp8_alloc_compressor_data(VP8_COMP *cpi);
int vp8_reverse_trans(int x);
void vp8_new_framerate(VP8_COMP *cpi, double framerate);
diff --git a/libvpx/vp8/encoder/pickinter.c b/libvpx/vp8/encoder/pickinter.c
index 5ce98ad..d0fff3f 100644
--- a/libvpx/vp8/encoder/pickinter.c
+++ b/libvpx/vp8/encoder/pickinter.c
@@ -21,10 +21,12 @@
#include "vp8/common/findnearmv.h"
#include "encodemb.h"
#include "vp8/common/reconinter.h"
+#include "vp8/common/reconintra.h"
#include "vp8/common/reconintra4x4.h"
#include "vpx_dsp/variance.h"
#include "mcomp.h"
#include "rdopt.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#if CONFIG_TEMPORAL_DENOISING
#include "denoising.h"
@@ -72,7 +74,7 @@
int y2 = signal[offsetx * stride + offsety + sgny];
int y3 = signal[(offsetx + sgnx) * stride + offsety];
int y4 = signal[(offsetx + sgnx) * stride + offsety + sgny];
- return MAX(MAX(abs(y1 - y2), abs(y1 - y3)), abs(y1 - y4));
+ return VPXMAX(VPXMAX(abs(y1 - y2), abs(y1 - y3)), abs(y1 - y4));
}
static int check_dot_artifact_candidate(VP8_COMP *cpi,
@@ -813,9 +815,18 @@
// Check if current macroblock is in skin area.
{
- const int y = x->src.y_buffer[7 * x->src.y_stride + 7];
- const int cb = x->src.u_buffer[3 * x->src.uv_stride + 3];
- const int cr = x->src.v_buffer[3 * x->src.uv_stride + 3];
+ const int y = (x->src.y_buffer[7 * x->src.y_stride + 7] +
+ x->src.y_buffer[7 * x->src.y_stride + 8] +
+ x->src.y_buffer[8 * x->src.y_stride + 7] +
+ x->src.y_buffer[8 * x->src.y_stride + 8]) >> 2;
+ const int cb = (x->src.u_buffer[3 * x->src.uv_stride + 3] +
+ x->src.u_buffer[3 * x->src.uv_stride + 4] +
+ x->src.u_buffer[4 * x->src.uv_stride + 3] +
+ x->src.u_buffer[4 * x->src.uv_stride + 4]) >> 2;
+ const int cr = (x->src.v_buffer[3 * x->src.uv_stride + 3] +
+ x->src.v_buffer[3 * x->src.uv_stride + 4] +
+ x->src.v_buffer[4 * x->src.uv_stride + 3] +
+ x->src.v_buffer[4 * x->src.uv_stride + 4]) >> 2;
x->is_skin = 0;
if (!cpi->oxcf.screen_content_mode)
x->is_skin = is_skin_color(y, cb, cr);
@@ -824,7 +835,7 @@
if (cpi->oxcf.noise_sensitivity) {
// Under aggressive denoising mode, should we use skin map to reduce denoiser
// and ZEROMV bias? Will need to revisit the accuracy of this detection for
- // very noisy input. For now keep this as is (i.e., don't turn it off).
+ // very noisy input. For now keep this as is (i.e., don't turn it off).
// if (cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive)
// x->is_skin = 0;
}
@@ -874,7 +885,7 @@
/* If the frame has big static background and current MB is in low
* motion area, its mode decision is biased to ZEROMV mode.
- * No adjustment if cpu_used is <= -12 (i.e., cpi->Speed >= 12).
+ * No adjustment if cpu_used is <= -12 (i.e., cpi->Speed >= 12).
* At such speed settings, ZEROMV is already heavily favored.
*/
if (cpi->Speed < 12) {
@@ -1136,8 +1147,9 @@
#if CONFIG_MULTI_RES_ENCODING
if (parent_ref_valid && (parent_ref_frame == this_ref_frame) &&
dissim <= 2 &&
- MAX(abs(best_ref_mv.as_mv.row - parent_ref_mv.as_mv.row),
- abs(best_ref_mv.as_mv.col - parent_ref_mv.as_mv.col)) <= 4)
+ VPXMAX(abs(best_ref_mv.as_mv.row - parent_ref_mv.as_mv.row),
+ abs(best_ref_mv.as_mv.col - parent_ref_mv.as_mv.col)) <=
+ 4)
{
d->bmi.mv.as_int = mvp_full.as_int;
mode_mv[NEWMV].as_int = mvp_full.as_int;
diff --git a/libvpx/vp8/encoder/ratectrl.c b/libvpx/vp8/encoder/ratectrl.c
index e8796a1..7da3d71 100644
--- a/libvpx/vp8/encoder/ratectrl.c
+++ b/libvpx/vp8/encoder/ratectrl.c
@@ -22,6 +22,7 @@
#include "vpx_mem/vpx_mem.h"
#include "vp8/common/systemdependent.h"
#include "encodemv.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#define MIN_BPB_FACTOR 0.01
@@ -380,7 +381,8 @@
int initial_boost = 32; /* |3.0 * per_frame_bandwidth| */
/* Boost depends somewhat on frame rate: only used for 1 layer case. */
if (cpi->oxcf.number_of_layers == 1) {
- kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16));
+ kf_boost = VPXMAX(initial_boost,
+ (int)(2 * cpi->output_framerate - 16));
}
else {
/* Initial factor: set target size to: |3.0 * per_frame_bandwidth|. */
@@ -1591,11 +1593,38 @@
if (Q < thresh_qp &&
cpi->projected_frame_size > thresh_rate &&
pred_err_mb > thresh_pred_err_mb) {
+ double new_correction_factor = cpi->rate_correction_factor;
+ const int target_size = cpi->av_per_frame_bandwidth;
+ int target_bits_per_mb;
// Drop this frame: advance frame counters, and set force_maxqp flag.
cpi->common.current_video_frame++;
cpi->frames_since_key++;
// Flag to indicate we will force next frame to be encoded at max QP.
cpi->force_maxqp = 1;
+ // Reset the buffer levels.
+ cpi->buffer_level = cpi->oxcf.optimal_buffer_level;
+ cpi->bits_off_target = cpi->oxcf.optimal_buffer_level;
+ // Compute a new rate correction factor, corresponding to the current
+ // target frame size and max_QP, and adjust the rate correction factor
+ // upwards, if needed.
+ // This is to prevent a bad state where the re-encoded frame at max_QP
+ // undershoots significantly, and then we end up dropping every other
+ // frame because the QP/rate_correction_factor may have been too low
+ // before the drop and then takes too long to come up.
+ if (target_size >= (INT_MAX >> BPER_MB_NORMBITS))
+ target_bits_per_mb =
+ (target_size / cpi->common.MBs) << BPER_MB_NORMBITS;
+ else
+ target_bits_per_mb =
+ (target_size << BPER_MB_NORMBITS) / cpi->common.MBs;
+ // Rate correction factor based on target_size_per_mb and max_QP.
+ new_correction_factor = (double)target_bits_per_mb /
+ (double)vp8_bits_per_mb[INTER_FRAME][cpi->worst_quality];
+ if (new_correction_factor > cpi->rate_correction_factor)
+ cpi->rate_correction_factor =
+ VPXMIN(2.0 * cpi->rate_correction_factor, new_correction_factor);
+ if (cpi->rate_correction_factor > MAX_BPB_FACTOR)
+ cpi->rate_correction_factor = MAX_BPB_FACTOR;
return 1;
} else {
cpi->force_maxqp = 0;
diff --git a/libvpx/vp8/encoder/rdopt.c b/libvpx/vp8/encoder/rdopt.c
index fdff378..ab0ad15 100644
--- a/libvpx/vp8/encoder/rdopt.c
+++ b/libvpx/vp8/encoder/rdopt.c
@@ -24,6 +24,7 @@
#include "pickinter.h"
#include "vp8/common/entropymode.h"
#include "vp8/common/reconinter.h"
+#include "vp8/common/reconintra.h"
#include "vp8/common/reconintra4x4.h"
#include "vp8/common/findnearmv.h"
#include "vp8/common/quant_common.h"
diff --git a/libvpx/vp8/encoder/rdopt.h b/libvpx/vp8/encoder/rdopt.h
index b4fcd10..1cb1a07 100644
--- a/libvpx/vp8/encoder/rdopt.h
+++ b/libvpx/vp8/encoder/rdopt.h
@@ -12,13 +12,15 @@
#ifndef VP8_ENCODER_RDOPT_H_
#define VP8_ENCODER_RDOPT_H_
+#include "./vpx_config.h"
+
#ifdef __cplusplus
extern "C" {
#endif
#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
-static void insertsortmv(int arr[], int len)
+static INLINE void insertsortmv(int arr[], int len)
{
int i, j, k;
@@ -41,7 +43,7 @@
}
}
-static void insertsortsad(int arr[],int idx[], int len)
+static INLINE void insertsortsad(int arr[],int idx[], int len)
{
int i, j, k;
@@ -77,10 +79,10 @@
extern void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate);
-static void get_plane_pointers(const YV12_BUFFER_CONFIG *fb,
- unsigned char *plane[3],
- unsigned int recon_yoffset,
- unsigned int recon_uvoffset)
+static INLINE void get_plane_pointers(const YV12_BUFFER_CONFIG *fb,
+ unsigned char *plane[3],
+ unsigned int recon_yoffset,
+ unsigned int recon_uvoffset)
{
plane[0] = fb->y_buffer + recon_yoffset;
plane[1] = fb->u_buffer + recon_uvoffset;
@@ -88,10 +90,10 @@
}
-static void get_predictor_pointers(const VP8_COMP *cpi,
- unsigned char *plane[4][3],
- unsigned int recon_yoffset,
- unsigned int recon_uvoffset)
+static INLINE void get_predictor_pointers(const VP8_COMP *cpi,
+ unsigned char *plane[4][3],
+ unsigned int recon_yoffset,
+ unsigned int recon_uvoffset)
{
if (cpi->ref_frame_flags & VP8_LAST_FRAME)
get_plane_pointers(&cpi->common.yv12_fb[cpi->common.lst_fb_idx],
@@ -107,8 +109,8 @@
}
-static void get_reference_search_order(const VP8_COMP *cpi,
- int ref_frame_map[4])
+static INLINE void get_reference_search_order(const VP8_COMP *cpi,
+ int ref_frame_map[4])
{
int i=0;
diff --git a/libvpx/vp8/encoder/treewriter.h b/libvpx/vp8/encoder/treewriter.h
index cfb2730..2debf92 100644
--- a/libvpx/vp8/encoder/treewriter.h
+++ b/libvpx/vp8/encoder/treewriter.h
@@ -15,6 +15,7 @@
/* Trees map alphabets into huffman-like codes suitable for an arithmetic
bit coder. Timothy S Murphy 11 October 2004 */
+#include "./vpx_config.h"
#include "vp8/common/treecoder.h"
#include "boolhuff.h" /* for now */
@@ -46,7 +47,7 @@
/* Both of these return bits, not scaled bits. */
-static unsigned int vp8_cost_branch(const unsigned int ct[2], vp8_prob p)
+static INLINE unsigned int vp8_cost_branch(const unsigned int ct[2], vp8_prob p)
{
/* Imitate existing calculation */
@@ -76,7 +77,7 @@
}
while (n);
}
-static void vp8_write_token
+static INLINE void vp8_write_token
(
vp8_writer *const w,
vp8_tree t,
@@ -107,7 +108,7 @@
return c;
}
-static int vp8_cost_token
+static INLINE int vp8_cost_token
(
vp8_tree t,
const vp8_prob *const p,
diff --git a/libvpx/vp8/vp8_common.mk b/libvpx/vp8/vp8_common.mk
index 3ad11c7..4c4e856 100644
--- a/libvpx/vp8/vp8_common.mk
+++ b/libvpx/vp8/vp8_common.mk
@@ -45,6 +45,7 @@
VP8_COMMON_SRCS-yes += common/onyxc_int.h
VP8_COMMON_SRCS-yes += common/quant_common.h
VP8_COMMON_SRCS-yes += common/reconinter.h
+VP8_COMMON_SRCS-yes += common/reconintra.h
VP8_COMMON_SRCS-yes += common/reconintra4x4.h
VP8_COMMON_SRCS-yes += common/rtcd.c
VP8_COMMON_SRCS-yes += common/rtcd_defs.pl
@@ -88,7 +89,6 @@
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_blk_sse2.c
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm
-VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_wrapper_sse2.c
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm
@@ -118,7 +118,6 @@
VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/copymem_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/loopfilter_filters_msa.c
-VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/reconintra_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/sixtap_filter_msa.c
VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp8_macros_msa.h
@@ -146,7 +145,6 @@
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/loopfilter_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/simpleloopfilter_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/sixtappredict8x4_v6$(ASM)
-VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/intra4x4_predict_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/dequant_idct_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/dequantize_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/idct_blk_v6.c
@@ -165,7 +163,6 @@
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimplehorizontaledge_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimpleverticaledge_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/mbloopfilter_neon.c
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/reconintra_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/shortidct4x4llm_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict_neon.c
diff --git a/libvpx/vp8/vp8_cx_iface.c b/libvpx/vp8/vp8_cx_iface.c
index fe88cd4..c125ae8 100644
--- a/libvpx/vp8/vp8_cx_iface.c
+++ b/libvpx/vp8/vp8_cx_iface.c
@@ -17,6 +17,7 @@
#include "vpx/internal/vpx_codec_internal.h"
#include "vpx_version.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/vpx_once.h"
#include "vp8/encoder/onyx_int.h"
#include "vpx/vp8cx.h"
#include "vp8/encoder/firstpass.h"
@@ -237,7 +238,7 @@
RANGE_CHECK_HI(cfg, ts_periodicity, 16);
for (i=1; i<cfg->ts_number_layers; i++)
- if (cfg->ts_target_bitrate[i] <= cfg->ts_target_bitrate[i-1] &&
+ if (cfg->ts_target_bitrate[i] <= cfg->ts_target_bitrate[i-1] &&
cfg->rc_target_bitrate > 0)
ERROR("ts_target_bitrate entries are not strictly increasing");
@@ -693,6 +694,8 @@
else
ctx->priv->enc.total_encoders = 1;
+ once(vp8_initialize_enc);
+
res = validate_config(priv, &priv->cfg, &priv->vp8_cfg, 0);
if (!res)
@@ -879,7 +882,8 @@
}
ctx->control_frame_flags = 0;
- res = set_reference_and_update(ctx, flags);
+ if (!res)
+ res = set_reference_and_update(ctx, flags);
/* Handle fixed keyframe intervals */
if (ctx->cfg.kf_mode == VPX_KF_AUTO
@@ -1273,9 +1277,6 @@
{VP8_SET_REFERENCE, vp8e_set_reference},
{VP8_COPY_REFERENCE, vp8e_get_reference},
{VP8_SET_POSTPROC, vp8e_set_previewpp},
- {VP8E_UPD_ENTROPY, vp8e_update_entropy},
- {VP8E_UPD_REFERENCE, vp8e_update_reference},
- {VP8E_USE_REFERENCE, vp8e_use_reference},
{VP8E_SET_FRAME_FLAGS, vp8e_set_frame_flags},
{VP8E_SET_TEMPORAL_LAYER_ID, vp8e_set_temporal_layer_id},
{VP8E_SET_ROI_MAP, vp8e_set_roi_map},
diff --git a/libvpx/vp8/vp8_dx_iface.c b/libvpx/vp8/vp8_dx_iface.c
index 72e4770..a12a2ad 100644
--- a/libvpx/vp8/vp8_dx_iface.c
+++ b/libvpx/vp8/vp8_dx_iface.c
@@ -22,6 +22,7 @@
#include "common/common.h"
#include "common/onyxd.h"
#include "decoder/onyxd_int.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#if CONFIG_ERROR_CONCEALMENT
#include "decoder/error_concealment.h"
@@ -42,8 +43,6 @@
} mem_seg_id_t;
#define NELEMENTS(x) ((int)(sizeof(x)/sizeof(x[0])))
-static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t);
-
struct vpx_codec_alg_priv
{
vpx_codec_priv_t base;
@@ -68,18 +67,6 @@
FRAGMENT_DATA fragments;
};
-static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t flags)
-{
- /* Although this declaration is constant, we can't use it in the requested
- * segments list because we want to define the requested segments list
- * before defining the private type (so that the number of memory maps is
- * known)
- */
- (void)si;
- (void)flags;
- return sizeof(vpx_codec_alg_priv_t);
-}
-
static void vp8_init_ctx(vpx_codec_ctx_t *ctx)
{
vpx_codec_alg_priv_t *priv =
@@ -180,7 +167,7 @@
const uint8_t *clear = data;
if (decrypt_cb)
{
- int n = MIN(sizeof(clear_buffer), data_sz);
+ int n = VPXMIN(sizeof(clear_buffer), data_sz);
decrypt_cb(decrypt_state, data, clear_buffer, n);
clear = clear_buffer;
}
@@ -259,8 +246,8 @@
img->fmt = VPX_IMG_FMT_I420;
img->w = yv12->y_stride;
img->h = (yv12->y_height + 2 * VP8BORDERINPIXELS + 15) & ~15;
- img->d_w = yv12->y_width;
- img->d_h = yv12->y_height;
+ img->d_w = img->r_w = yv12->y_width;
+ img->d_h = img->r_h = yv12->y_height;
img->x_chroma_shift = 1;
img->y_chroma_shift = 1;
img->planes[VPX_PLANE_Y] = yv12->y_buffer;
diff --git a/libvpx/vp9/common/vp9_alloccommon.c b/libvpx/vp9/common/vp9_alloccommon.c
index ac417b6..24c6c54 100644
--- a/libvpx/vp9/common/vp9_alloccommon.c
+++ b/libvpx/vp9/common/vp9_alloccommon.c
@@ -115,6 +115,8 @@
cm->above_context = NULL;
vpx_free(cm->above_seg_context);
cm->above_seg_context = NULL;
+ vpx_free(cm->lf.lfm);
+ cm->lf.lfm = NULL;
}
int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) {
@@ -149,6 +151,16 @@
cm->above_context_alloc_cols = cm->mi_cols;
}
+ vpx_free(cm->lf.lfm);
+
+ // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The
+ // stride and rows are rounded up / truncated to a multiple of 8.
+ cm->lf.lfm_stride = (cm->mi_cols + (MI_BLOCK_SIZE - 1)) >> 3;
+ cm->lf.lfm = (LOOP_FILTER_MASK *)vpx_calloc(
+ ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride,
+ sizeof(*cm->lf.lfm));
+ if (!cm->lf.lfm) goto fail;
+
return 0;
fail:
diff --git a/libvpx/vp9/common/vp9_blockd.c b/libvpx/vp9/common/vp9_blockd.c
index e8334fc..0e104ee 100644
--- a/libvpx/vp9/common/vp9_blockd.c
+++ b/libvpx/vp9/common/vp9_blockd.c
@@ -129,7 +129,6 @@
int i;
for (i = 0; i < MAX_MB_PLANE; i++) {
- xd->plane[i].plane_type = i ? PLANE_TYPE_UV : PLANE_TYPE_Y;
xd->plane[i].subsampling_x = i ? ss_x : 0;
xd->plane[i].subsampling_y = i ? ss_y : 0;
}
diff --git a/libvpx/vp9/common/vp9_blockd.h b/libvpx/vp9/common/vp9_blockd.h
index d776b44..61eb591 100644
--- a/libvpx/vp9/common/vp9_blockd.h
+++ b/libvpx/vp9/common/vp9_blockd.h
@@ -14,6 +14,7 @@
#include "./vpx_config.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/mem.h"
#include "vpx_scale/yv12config.h"
@@ -119,7 +120,6 @@
struct macroblockd_plane {
tran_low_t *dqcoeff;
- PLANE_TYPE plane_type;
int subsampling_x;
int subsampling_y;
struct buf_2d dst;
@@ -175,7 +175,6 @@
int mb_to_bottom_edge;
FRAME_CONTEXT *fc;
- int frame_parallel_decoding_mode;
/* pointers to reference frames */
RefBuffer *block_refs[2];
@@ -200,6 +199,10 @@
struct vpx_internal_error_info *error_info;
} MACROBLOCKD;
+static INLINE PLANE_TYPE get_plane_type(int plane) {
+ return (PLANE_TYPE)(plane > 0);
+}
+
static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize,
PARTITION_TYPE partition) {
return subsize_lookup[partition][bsize];
@@ -235,7 +238,7 @@
return TX_4X4;
} else {
const BLOCK_SIZE plane_bsize = ss_size_lookup[bsize][xss][yss];
- return MIN(y_tx_size, max_txsize_lookup[plane_bsize]);
+ return VPXMIN(y_tx_size, max_txsize_lookup[plane_bsize]);
}
}
diff --git a/libvpx/vp9/common/vp9_common_data.c b/libvpx/vp9/common/vp9_common_data.c
index 0bf7cbc..a6dae6a 100644
--- a/libvpx/vp9/common/vp9_common_data.c
+++ b/libvpx/vp9/common/vp9_common_data.c
@@ -9,6 +9,7 @@
*/
#include "vp9/common/vp9_common_data.h"
+#include "vpx_dsp/vpx_dsp_common.h"
// Log 2 conversion lookup tables for block width and height
const uint8_t b_width_log2_lookup[BLOCK_SIZES] =
@@ -27,7 +28,7 @@
const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] =
{1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8};
-// MIN(3, MIN(b_width_log2(bsize), b_height_log2(bsize)))
+// VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize)))
const uint8_t size_group_lookup[BLOCK_SIZES] =
{0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3};
diff --git a/libvpx/vp9/common/vp9_entropy.h b/libvpx/vp9/common/vp9_entropy.h
index a1746bc..21611ed 100644
--- a/libvpx/vp9/common/vp9_entropy.h
+++ b/libvpx/vp9/common/vp9_entropy.h
@@ -75,21 +75,6 @@
#define EOB_MODEL_TOKEN 3
-typedef struct {
- const vpx_tree_index *tree;
- const vpx_prob *prob;
- int len;
- int base_val;
- const int16_t *cost;
-} vp9_extra_bit;
-
-// indexed by token value
-extern const vp9_extra_bit vp9_extra_bits[ENTROPY_TOKENS];
-#if CONFIG_VP9_HIGHBITDEPTH
-extern const vp9_extra_bit vp9_extra_bits_high10[ENTROPY_TOKENS];
-extern const vp9_extra_bit vp9_extra_bits_high12[ENTROPY_TOKENS];
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
#define DCT_MAX_VALUE 16384
#if CONFIG_VP9_HIGHBITDEPTH
#define DCT_MAX_VALUE_HIGH10 65536
diff --git a/libvpx/vp9/common/vp9_loopfilter.c b/libvpx/vp9/common/vp9_loopfilter.c
index 0915918..b8a1132 100644
--- a/libvpx/vp9/common/vp9_loopfilter.c
+++ b/libvpx/vp9/common/vp9_loopfilter.c
@@ -13,6 +13,7 @@
#include "vp9/common/vp9_loopfilter.h"
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_reconinter.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
@@ -775,7 +776,7 @@
// an 8x8 in that the internal ones can be skipped and don't depend on
// the prediction block size.
if (tx_size_y == TX_4X4)
- *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y;
+ *int_4x4_y |= size_mask[block_size] << shift_y;
if (tx_size_uv == TX_4X4)
*int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
@@ -821,7 +822,121 @@
left_64x64_txform_mask[tx_size_y]) << shift_y;
if (tx_size_y == TX_4X4)
- *int_4x4_y |= (size_mask[block_size] & 0xffffffffffffffffULL) << shift_y;
+ *int_4x4_y |= size_mask[block_size] << shift_y;
+}
+
+void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row,
+ const int mi_col, LOOP_FILTER_MASK *lfm) {
+ int i;
+
+ // The largest loopfilter we have is 16x16 so we use the 16x16 mask
+ // for 32x32 transforms also.
+ lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32];
+ lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32];
+ lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32];
+ lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32];
+
+ // We do at least 8 tap filter on every 32x32 even if the transform size
+ // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and
+ // remove it from the 4x4.
+ lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border;
+ lfm->left_y[TX_4X4] &= ~left_border;
+ lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border;
+ lfm->above_y[TX_4X4] &= ~above_border;
+ lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv;
+ lfm->left_uv[TX_4X4] &= ~left_border_uv;
+ lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv;
+ lfm->above_uv[TX_4X4] &= ~above_border_uv;
+
+ // We do some special edge handling.
+ if (mi_row + MI_BLOCK_SIZE > cm->mi_rows) {
+ const uint64_t rows = cm->mi_rows - mi_row;
+
+ // Each pixel inside the border gets a 1,
+ const uint64_t mask_y = (((uint64_t) 1 << (rows << 3)) - 1);
+ const uint16_t mask_uv = (((uint16_t) 1 << (((rows + 1) >> 1) << 2)) - 1);
+
+ // Remove values completely outside our border.
+ for (i = 0; i < TX_32X32; i++) {
+ lfm->left_y[i] &= mask_y;
+ lfm->above_y[i] &= mask_y;
+ lfm->left_uv[i] &= mask_uv;
+ lfm->above_uv[i] &= mask_uv;
+ }
+ lfm->int_4x4_y &= mask_y;
+ lfm->int_4x4_uv &= mask_uv;
+
+ // We don't apply a wide loop filter on the last uv block row. If set
+ // apply the shorter one instead.
+ if (rows == 1) {
+ lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16];
+ lfm->above_uv[TX_16X16] = 0;
+ }
+ if (rows == 5) {
+ lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00;
+ lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00);
+ }
+ }
+
+ if (mi_col + MI_BLOCK_SIZE > cm->mi_cols) {
+ const uint64_t columns = cm->mi_cols - mi_col;
+
+ // Each pixel inside the border gets a 1, the multiply copies the border
+ // to where we need it.
+ const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL;
+ const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111;
+
+ // Internal edges are not applied on the last column of the image so
+ // we mask 1 more for the internal edges
+ const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111;
+
+ // Remove the bits outside the image edge.
+ for (i = 0; i < TX_32X32; i++) {
+ lfm->left_y[i] &= mask_y;
+ lfm->above_y[i] &= mask_y;
+ lfm->left_uv[i] &= mask_uv;
+ lfm->above_uv[i] &= mask_uv;
+ }
+ lfm->int_4x4_y &= mask_y;
+ lfm->int_4x4_uv &= mask_uv_int;
+
+ // We don't apply a wide loop filter on the last uv column. If set
+ // apply the shorter one instead.
+ if (columns == 1) {
+ lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16];
+ lfm->left_uv[TX_16X16] = 0;
+ }
+ if (columns == 5) {
+ lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc);
+ lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc);
+ }
+ }
+ // We don't apply a loop filter on the first column in the image, mask that
+ // out.
+ if (mi_col == 0) {
+ for (i = 0; i < TX_32X32; i++) {
+ lfm->left_y[i] &= 0xfefefefefefefefeULL;
+ lfm->left_uv[i] &= 0xeeee;
+ }
+ }
+
+ // Assert if we try to apply 2 different loop filters at the same position.
+ assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8]));
+ assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4]));
+ assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4]));
+ assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16]));
+ assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8]));
+ assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4]));
+ assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4]));
+ assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16]));
+ assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8]));
+ assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4]));
+ assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4]));
+ assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16]));
+ assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8]));
+ assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4]));
+ assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4]));
+ assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16]));
}
// This function sets up the bit masks for the entire 64x64 region represented
@@ -854,7 +969,6 @@
const int shift_8_y[] = {0, 1, 8, 9};
const int shift_32_uv[] = {0, 2, 8, 10};
const int shift_16_uv[] = {0, 1, 4, 5};
- int i;
const int max_rows = (mi_row + MI_BLOCK_SIZE > cm->mi_rows ?
cm->mi_rows - mi_row : MI_BLOCK_SIZE);
const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ?
@@ -969,114 +1083,8 @@
}
break;
}
- // The largest loopfilter we have is 16x16 so we use the 16x16 mask
- // for 32x32 transforms also.
- lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32];
- lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32];
- lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32];
- lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32];
- // We do at least 8 tap filter on every 32x32 even if the transform size
- // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and
- // remove it from the 4x4.
- lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border;
- lfm->left_y[TX_4X4] &= ~left_border;
- lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border;
- lfm->above_y[TX_4X4] &= ~above_border;
- lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv;
- lfm->left_uv[TX_4X4] &= ~left_border_uv;
- lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv;
- lfm->above_uv[TX_4X4] &= ~above_border_uv;
-
- // We do some special edge handling.
- if (mi_row + MI_BLOCK_SIZE > cm->mi_rows) {
- const uint64_t rows = cm->mi_rows - mi_row;
-
- // Each pixel inside the border gets a 1,
- const uint64_t mask_y = (((uint64_t) 1 << (rows << 3)) - 1);
- const uint16_t mask_uv = (((uint16_t) 1 << (((rows + 1) >> 1) << 2)) - 1);
-
- // Remove values completely outside our border.
- for (i = 0; i < TX_32X32; i++) {
- lfm->left_y[i] &= mask_y;
- lfm->above_y[i] &= mask_y;
- lfm->left_uv[i] &= mask_uv;
- lfm->above_uv[i] &= mask_uv;
- }
- lfm->int_4x4_y &= mask_y;
- lfm->int_4x4_uv &= mask_uv;
-
- // We don't apply a wide loop filter on the last uv block row. If set
- // apply the shorter one instead.
- if (rows == 1) {
- lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16];
- lfm->above_uv[TX_16X16] = 0;
- }
- if (rows == 5) {
- lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00;
- lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00);
- }
- }
-
- if (mi_col + MI_BLOCK_SIZE > cm->mi_cols) {
- const uint64_t columns = cm->mi_cols - mi_col;
-
- // Each pixel inside the border gets a 1, the multiply copies the border
- // to where we need it.
- const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL;
- const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111;
-
- // Internal edges are not applied on the last column of the image so
- // we mask 1 more for the internal edges
- const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111;
-
- // Remove the bits outside the image edge.
- for (i = 0; i < TX_32X32; i++) {
- lfm->left_y[i] &= mask_y;
- lfm->above_y[i] &= mask_y;
- lfm->left_uv[i] &= mask_uv;
- lfm->above_uv[i] &= mask_uv;
- }
- lfm->int_4x4_y &= mask_y;
- lfm->int_4x4_uv &= mask_uv_int;
-
- // We don't apply a wide loop filter on the last uv column. If set
- // apply the shorter one instead.
- if (columns == 1) {
- lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16];
- lfm->left_uv[TX_16X16] = 0;
- }
- if (columns == 5) {
- lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc);
- lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc);
- }
- }
- // We don't apply a loop filter on the first column in the image, mask that
- // out.
- if (mi_col == 0) {
- for (i = 0; i < TX_32X32; i++) {
- lfm->left_y[i] &= 0xfefefefefefefefeULL;
- lfm->left_uv[i] &= 0xeeee;
- }
- }
-
- // Assert if we try to apply 2 different loop filters at the same position.
- assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8]));
- assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4]));
- assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4]));
- assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16]));
- assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8]));
- assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4]));
- assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4]));
- assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16]));
- assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8]));
- assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4]));
- assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4]));
- assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16]));
- assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8]));
- assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4]));
- assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4]));
- assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16]));
+ vp9_adjust_mask(cm, mi_row, mi_col, lfm);
}
static void filter_selectively_vert(uint8_t *s, int pitch,
@@ -1188,9 +1196,7 @@
const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ?
!(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1;
const int skip_this_r = skip_this && !block_edge_above;
- const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
- ? get_uv_tx_size(&mi[0].mbmi, plane)
- : mi[0].mbmi.tx_size;
+ const TX_SIZE tx_size = get_uv_tx_size(&mi[0].mbmi, plane);
const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
@@ -1427,6 +1433,7 @@
struct buf_2d *const dst = &plane->dst;
uint8_t *const dst0 = dst->buf;
int r, c;
+ uint8_t lfl_uv[16];
uint16_t mask_16x16 = lfm->left_uv[TX_16X16];
uint16_t mask_8x8 = lfm->left_uv[TX_8X8];
@@ -1437,11 +1444,9 @@
// Vertical pass: do 2 rows at one time
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) {
- if (plane->plane_type == 1) {
- for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) {
- lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)];
- lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)];
- }
+ for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) {
+ lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)];
+ lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)];
}
{
@@ -1456,18 +1461,18 @@
highbd_filter_selectively_vert_row2(
plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
- &lfm->lfl_uv[r << 1], (int)cm->bit_depth);
+ &lfl_uv[r << 1], (int)cm->bit_depth);
} else {
filter_selectively_vert_row2(
plane->subsampling_x, dst->buf, dst->stride,
mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
- &lfm->lfl_uv[r << 1]);
+ &lfl_uv[r << 1]);
}
#else
filter_selectively_vert_row2(
plane->subsampling_x, dst->buf, dst->stride,
mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
- &lfm->lfl_uv[r << 1]);
+ &lfl_uv[r << 1]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 16 * dst->stride;
@@ -1508,16 +1513,16 @@
highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
- &lfm->lfl_uv[r << 1], (int)cm->bit_depth);
+ &lfl_uv[r << 1], (int)cm->bit_depth);
} else {
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
- &lfm->lfl_uv[r << 1]);
+ &lfl_uv[r << 1]);
}
#else
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
- &lfm->lfl_uv[r << 1]);
+ &lfl_uv[r << 1]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
@@ -1528,13 +1533,11 @@
}
}
-void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
- VP9_COMMON *cm,
- struct macroblockd_plane planes[MAX_MB_PLANE],
- int start, int stop, int y_only) {
+static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP9_COMMON *cm,
+ struct macroblockd_plane planes[MAX_MB_PLANE],
+ int start, int stop, int y_only) {
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
enum lf_path path;
- LOOP_FILTER_MASK lfm;
int mi_row, mi_col;
if (y_only)
@@ -1548,24 +1551,24 @@
for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
+ LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0);
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE, ++lfm) {
int plane;
vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
// TODO(JBB): Make setup_mask work for non 420.
- vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,
- &lfm);
+ vp9_adjust_mask(cm, mi_row, mi_col, lfm);
- vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
+ vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm);
for (plane = 1; plane < num_planes; ++plane) {
switch (path) {
case LF_PATH_420:
- vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm);
+ vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, lfm);
break;
case LF_PATH_444:
- vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm);
+ vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, lfm);
break;
case LF_PATH_SLOW:
vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
@@ -1588,13 +1591,135 @@
if (partial_frame && cm->mi_rows > 8) {
start_mi_row = cm->mi_rows >> 1;
start_mi_row &= 0xfffffff8;
- mi_rows_to_filter = MAX(cm->mi_rows / 8, 8);
+ mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8);
}
end_mi_row = start_mi_row + mi_rows_to_filter;
+ loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only);
+}
+
+// Used by the encoder to build the loopfilter masks.
+void vp9_build_mask_frame(VP9_COMMON *cm, int frame_filter_level,
+ int partial_frame) {
+ int start_mi_row, end_mi_row, mi_rows_to_filter;
+ int mi_col, mi_row;
+ if (!frame_filter_level) return;
+ start_mi_row = 0;
+ mi_rows_to_filter = cm->mi_rows;
+ if (partial_frame && cm->mi_rows > 8) {
+ start_mi_row = cm->mi_rows >> 1;
+ start_mi_row &= 0xfffffff8;
+ mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8);
+ }
+ end_mi_row = start_mi_row + mi_rows_to_filter;
+
vp9_loop_filter_frame_init(cm, frame_filter_level);
- vp9_loop_filter_rows(frame, cm, xd->plane,
- start_mi_row, end_mi_row,
- y_only);
+
+ for (mi_row = start_mi_row; mi_row < end_mi_row; mi_row += MI_BLOCK_SIZE) {
+ MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
+ // vp9_setup_mask() zeros lfm
+ vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,
+ get_lfm(&cm->lf, mi_row, mi_col));
+ }
+ }
+}
+
+// 8x8 blocks in a superblock. A "1" represents the first block in a 16x16
+// or greater area.
+static const uint8_t first_block_in_16x16[8][8] = {
+ {1, 0, 1, 0, 1, 0, 1, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0},
+ {1, 0, 1, 0, 1, 0, 1, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0},
+ {1, 0, 1, 0, 1, 0, 1, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0},
+ {1, 0, 1, 0, 1, 0, 1, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0}
+};
+
+// This function sets up the bit masks for a block represented
+// by mi_row, mi_col in a 64x64 region.
+// TODO(SJL): This function only works for yv12.
+void vp9_build_mask(VP9_COMMON *cm, const MB_MODE_INFO *mbmi, int mi_row,
+ int mi_col, int bw, int bh) {
+ const BLOCK_SIZE block_size = mbmi->sb_type;
+ const TX_SIZE tx_size_y = mbmi->tx_size;
+ const loop_filter_info_n *const lfi_n = &cm->lf_info;
+ const int filter_level = get_filter_level(lfi_n, mbmi);
+ const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1);
+ LOOP_FILTER_MASK *const lfm = get_lfm(&cm->lf, mi_row, mi_col);
+ uint64_t *const left_y = &lfm->left_y[tx_size_y];
+ uint64_t *const above_y = &lfm->above_y[tx_size_y];
+ uint64_t *const int_4x4_y = &lfm->int_4x4_y;
+ uint16_t *const left_uv = &lfm->left_uv[tx_size_uv];
+ uint16_t *const above_uv = &lfm->above_uv[tx_size_uv];
+ uint16_t *const int_4x4_uv = &lfm->int_4x4_uv;
+ const int row_in_sb = (mi_row & 7);
+ const int col_in_sb = (mi_col & 7);
+ const int shift_y = col_in_sb + (row_in_sb << 3);
+ const int shift_uv = (col_in_sb >> 1) + ((row_in_sb >> 1) << 2);
+ const int build_uv = first_block_in_16x16[row_in_sb][col_in_sb];
+
+ if (!filter_level) {
+ return;
+ } else {
+ int index = shift_y;
+ int i;
+ for (i = 0; i < bh; i++) {
+ memset(&lfm->lfl_y[index], filter_level, bw);
+ index += 8;
+ }
+ }
+
+ // These set 1 in the current block size for the block size edges.
+ // For instance if the block size is 32x16, we'll set:
+ // above = 1111
+ // 0000
+ // and
+ // left = 1000
+ // = 1000
+ // NOTE : In this example the low bit is left most ( 1000 ) is stored as
+ // 1, not 8...
+ //
+ // U and V set things on a 16 bit scale.
+ //
+ *above_y |= above_prediction_mask[block_size] << shift_y;
+ *left_y |= left_prediction_mask[block_size] << shift_y;
+
+ if (build_uv) {
+ *above_uv |= above_prediction_mask_uv[block_size] << shift_uv;
+ *left_uv |= left_prediction_mask_uv[block_size] << shift_uv;
+ }
+
+ // If the block has no coefficients and is not intra we skip applying
+ // the loop filter on block edges.
+ if (mbmi->skip && is_inter_block(mbmi))
+ return;
+
+ // Add a mask for the transform size. The transform size mask is set to
+ // be correct for a 64x64 prediction block size. Mask to match the size of
+ // the block we are working on and then shift it into place.
+ *above_y |= (size_mask[block_size] &
+ above_64x64_txform_mask[tx_size_y]) << shift_y;
+ *left_y |= (size_mask[block_size] &
+ left_64x64_txform_mask[tx_size_y]) << shift_y;
+
+ if (build_uv) {
+ *above_uv |= (size_mask_uv[block_size] &
+ above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv;
+
+ *left_uv |= (size_mask_uv[block_size] &
+ left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv;
+ }
+
+ // Try to determine what to do with the internal 4x4 block boundaries. These
+ // differ from the 4x4 boundaries on the outside edge of an 8x8 in that the
+ // internal ones can be skipped and don't depend on the prediction block size.
+ if (tx_size_y == TX_4X4)
+ *int_4x4_y |= size_mask[block_size] << shift_y;
+
+ if (build_uv && tx_size_uv == TX_4X4)
+ *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv;
}
void vp9_loop_filter_data_reset(
@@ -1608,9 +1733,17 @@
memcpy(lf_data->planes, planes, sizeof(lf_data->planes));
}
+void vp9_reset_lfm(VP9_COMMON *const cm) {
+ if (cm->lf.filter_level) {
+ memset(cm->lf.lfm, 0,
+ ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride *
+ sizeof(*cm->lf.lfm));
+ }
+}
+
int vp9_loop_filter_worker(LFWorkerData *const lf_data, void *unused) {
(void)unused;
- vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
- lf_data->start, lf_data->stop, lf_data->y_only);
+ loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
+ lf_data->start, lf_data->stop, lf_data->y_only);
return 1;
}
diff --git a/libvpx/vp9/common/vp9_loopfilter.h b/libvpx/vp9/common/vp9_loopfilter.h
index f7cbde6..7f943ea 100644
--- a/libvpx/vp9/common/vp9_loopfilter.h
+++ b/libvpx/vp9/common/vp9_loopfilter.h
@@ -35,24 +35,6 @@
LF_PATH_SLOW,
};
-struct loopfilter {
- int filter_level;
-
- int sharpness_level;
- int last_sharpness_level;
-
- uint8_t mode_ref_delta_enabled;
- uint8_t mode_ref_delta_update;
-
- // 0 = Intra, Last, GF, ARF
- signed char ref_deltas[MAX_REF_LF_DELTAS];
- signed char last_ref_deltas[MAX_REF_LF_DELTAS];
-
- // 0 = ZERO_MV, MV
- signed char mode_deltas[MAX_MODE_LF_DELTAS];
- signed char last_mode_deltas[MAX_MODE_LF_DELTAS];
-};
-
// Need to align this structure so when it is declared and
// passed it can be loaded into vector registers.
typedef struct {
@@ -83,9 +65,29 @@
uint16_t above_uv[TX_SIZES];
uint16_t int_4x4_uv;
uint8_t lfl_y[64];
- uint8_t lfl_uv[16];
} LOOP_FILTER_MASK;
+struct loopfilter {
+ int filter_level;
+
+ int sharpness_level;
+ int last_sharpness_level;
+
+ uint8_t mode_ref_delta_enabled;
+ uint8_t mode_ref_delta_update;
+
+ // 0 = Intra, Last, GF, ARF
+ signed char ref_deltas[MAX_REF_LF_DELTAS];
+ signed char last_ref_deltas[MAX_REF_LF_DELTAS];
+
+ // 0 = ZERO_MV, MV
+ signed char mode_deltas[MAX_MODE_LF_DELTAS];
+ signed char last_mode_deltas[MAX_MODE_LF_DELTAS];
+
+ LOOP_FILTER_MASK *lfm;
+ int lfm_stride;
+};
+
/* assorted loopfilter functions which get used elsewhere */
struct VP9Common;
struct macroblockd;
@@ -116,7 +118,7 @@
void vp9_loop_filter_init(struct VP9Common *cm);
// Update the loop filter for the current frame.
-// This should be called before vp9_loop_filter_rows(), vp9_loop_filter_frame()
+// This should be called before vp9_loop_filter_frame(), vp9_build_mask_frame()
// calls this function directly.
void vp9_loop_filter_frame_init(struct VP9Common *cm, int default_filt_lvl);
@@ -126,11 +128,19 @@
int filter_level,
int y_only, int partial_frame);
-// Apply the loop filter to [start, stop) macro block rows in frame_buffer.
-void vp9_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
- struct VP9Common *cm,
- struct macroblockd_plane planes[MAX_MB_PLANE],
- int start, int stop, int y_only);
+// Get the superblock lfm for a given mi_row, mi_col.
+static INLINE LOOP_FILTER_MASK *get_lfm(const struct loopfilter *lf,
+ const int mi_row, const int mi_col) {
+ return &lf->lfm[(mi_col >> 3) + ((mi_row >> 3) * lf->lfm_stride)];
+}
+
+void vp9_build_mask(struct VP9Common *cm, const MB_MODE_INFO *mbmi, int mi_row,
+ int mi_col, int bw, int bh);
+void vp9_adjust_mask(struct VP9Common *const cm, const int mi_row,
+ const int mi_col, LOOP_FILTER_MASK *lfm);
+void vp9_build_mask_frame(struct VP9Common *cm, int frame_filter_level,
+ int partial_frame);
+void vp9_reset_lfm(struct VP9Common *const cm);
typedef struct LoopFilterWorkerData {
YV12_BUFFER_CONFIG *frame_buffer;
diff --git a/libvpx/vp9/common/vp9_onyxc_int.h b/libvpx/vp9/common/vp9_onyxc_int.h
index c373c02..ceffded 100644
--- a/libvpx/vp9/common/vp9_onyxc_int.h
+++ b/libvpx/vp9/common/vp9_onyxc_int.h
@@ -112,10 +112,11 @@
typedef struct VP9Common {
struct vpx_internal_error_info error;
vpx_color_space_t color_space;
+ vpx_color_range_t color_range;
int width;
int height;
- int display_width;
- int display_height;
+ int render_width;
+ int render_height;
int last_width;
int last_height;
@@ -357,13 +358,12 @@
xd->above_context[i] = cm->above_context +
i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols);
- if (xd->plane[i].plane_type == PLANE_TYPE_Y) {
+ if (get_plane_type(i) == PLANE_TYPE_Y) {
memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant));
} else {
memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant));
}
xd->fc = cm->fc;
- xd->frame_parallel_decoding_mode = cm->frame_parallel_decoding_mode;
}
xd->above_seg_context = cm->above_seg_context;
diff --git a/libvpx/vp9/common/vp9_postproc.c b/libvpx/vp9/common/vp9_postproc.c
index 71ab861..b685d81 100644
--- a/libvpx/vp9/common/vp9_postproc.c
+++ b/libvpx/vp9/common/vp9_postproc.c
@@ -16,6 +16,7 @@
#include "./vpx_scale_rtcd.h"
#include "./vp9_rtcd.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/system_state.h"
#include "vpx_scale/vpx_scale.h"
@@ -625,7 +626,7 @@
int vp9_post_proc_frame(struct VP9Common *cm,
YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *ppflags) {
- const int q = MIN(105, cm->lf.filter_level * 2);
+ const int q = VPXMIN(105, cm->lf.filter_level * 2);
const int flags = ppflags->post_proc_flag;
YV12_BUFFER_CONFIG *const ppbuf = &cm->post_proc_buffer;
struct postproc_state *const ppstate = &cm->postproc_state;
diff --git a/libvpx/vp9/common/vp9_pred_common.h b/libvpx/vp9/common/vp9_pred_common.h
index 67b95db..6f7af4a 100644
--- a/libvpx/vp9/common/vp9_pred_common.h
+++ b/libvpx/vp9/common/vp9_pred_common.h
@@ -13,6 +13,7 @@
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_onyxc_int.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#ifdef __cplusplus
extern "C" {
@@ -24,14 +25,14 @@
const int mi_offset = mi_row * cm->mi_cols + mi_col;
const int bw = num_8x8_blocks_wide_lookup[bsize];
const int bh = num_8x8_blocks_high_lookup[bsize];
- const int xmis = MIN(cm->mi_cols - mi_col, bw);
- const int ymis = MIN(cm->mi_rows - mi_row, bh);
+ const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
+ const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
int x, y, segment_id = MAX_SEGMENTS;
for (y = 0; y < ymis; ++y)
for (x = 0; x < xmis; ++x)
- segment_id = MIN(segment_id,
- segment_ids[mi_offset + y * cm->mi_cols + x]);
+ segment_id =
+ VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]);
assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
return segment_id;
diff --git a/libvpx/vp9/common/vp9_reconinter.c b/libvpx/vp9/common/vp9_reconinter.c
index f83f825..d8c14ec 100644
--- a/libvpx/vp9/common/vp9_reconinter.c
+++ b/libvpx/vp9/common/vp9_reconinter.c
@@ -187,7 +187,19 @@
const int is_scaled = vp9_is_scaled(sf);
if (is_scaled) {
- pre = pre_buf->buf + scaled_buffer_offset(x, y, pre_buf->stride, sf);
+ // Co-ordinate of containing block to pixel precision.
+ const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
+ const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
+ if (plane == 0)
+ pre_buf->buf = xd->block_refs[ref]->buf->y_buffer;
+ else if (plane == 1)
+ pre_buf->buf = xd->block_refs[ref]->buf->u_buffer;
+ else
+ pre_buf->buf = xd->block_refs[ref]->buf->v_buffer;
+
+ pre_buf->buf += scaled_buffer_offset(x_start + x, y_start + y,
+ pre_buf->stride, sf);
+ pre = pre_buf->buf;
scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
xs = sf->x_step_q4;
ys = sf->y_step_q4;
diff --git a/libvpx/vp9/common/vp9_reconintra.c b/libvpx/vp9/common/vp9_reconintra.c
index e60eff8..3d84a28 100644
--- a/libvpx/vp9/common/vp9_reconintra.c
+++ b/libvpx/vp9/common/vp9_reconintra.c
@@ -133,7 +133,6 @@
int frame_width, frame_height;
int x0, y0;
const struct macroblockd_plane *const pd = &xd->plane[plane];
- // int base=128;
int base = 128 << (bd - 8);
// 127 127 127 .. 127 127 127 127 127 127
// 129 A B .. Y Z
diff --git a/libvpx/vp9/common/vp9_rtcd_defs.pl b/libvpx/vp9/common/vp9_rtcd_defs.pl
index 737fc56..5bf71ef 100644
--- a/libvpx/vp9/common/vp9_rtcd_defs.pl
+++ b/libvpx/vp9/common/vp9_rtcd_defs.pl
@@ -85,16 +85,26 @@
# dct
#
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
- # Note as optimized versions of these functions are added we need to add a check to ensure
- # that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
- add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
- specialize qw/vp9_iht4x4_16_add/;
+ # Force C versions if CONFIG_EMULATE_HARDWARE is 1
+ if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
+ add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp9_iht4x4_16_add/;
- add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
- specialize qw/vp9_iht8x8_64_add/;
+ add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp9_iht8x8_64_add/;
- add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
- specialize qw/vp9_iht16x16_256_add/;
+ add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+ specialize qw/vp9_iht16x16_256_add/;
+ } else {
+ add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp9_iht4x4_16_add sse2/;
+
+ add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
+ specialize qw/vp9_iht8x8_64_add sse2/;
+
+ add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+ specialize qw/vp9_iht16x16_256_add sse2/;
+ }
} else {
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
@@ -231,11 +241,15 @@
}
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
-# the transform coefficients are held in 32-bit
-# values, so the assembler code for vp9_block_error can no longer be used.
add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp9_block_error/;
+ add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
+ specialize qw/vp9_highbd_block_error/, "$sse2_x86inc";
+
+ add_proto qw/int64_t vp9_highbd_block_error_8bit/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+ specialize qw/vp9_highbd_block_error_8bit/, "$sse2_x86inc", "$avx_x86inc";
+
add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_fp/;
@@ -310,9 +324,6 @@
# ENCODEMB INVOKE
- add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
- specialize qw/vp9_highbd_block_error sse2/;
-
add_proto qw/void vp9_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_highbd_quantize_fp/;
diff --git a/libvpx/vp9/common/vp9_thread_common.c b/libvpx/vp9/common/vp9_thread_common.c
index 6b11c93..db78d6b 100644
--- a/libvpx/vp9/common/vp9_thread_common.c
+++ b/libvpx/vp9/common/vp9_thread_common.c
@@ -9,6 +9,7 @@
*/
#include "./vpx_config.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_thread_common.h"
@@ -108,29 +109,27 @@
for (mi_row = start; mi_row < stop;
mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) {
MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
+ LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0);
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE, ++lfm) {
const int r = mi_row >> MI_BLOCK_SIZE_LOG2;
const int c = mi_col >> MI_BLOCK_SIZE_LOG2;
- LOOP_FILTER_MASK lfm;
int plane;
sync_read(lf_sync, r, c);
vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
- // TODO(JBB): Make setup_mask work for non 420.
- vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,
- &lfm);
+ vp9_adjust_mask(cm, mi_row, mi_col, lfm);
- vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
+ vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm);
for (plane = 1; plane < num_planes; ++plane) {
switch (path) {
case LF_PATH_420:
- vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm);
+ vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, lfm);
break;
case LF_PATH_444:
- vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm);
+ vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, lfm);
break;
case LF_PATH_SLOW:
vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
@@ -165,7 +164,7 @@
// Decoder may allocate more threads than number of tiles based on user's
// input.
const int tile_cols = 1 << cm->log2_tile_cols;
- const int num_workers = MIN(nworkers, tile_cols);
+ const int num_workers = VPXMIN(nworkers, tile_cols);
int i;
if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
@@ -229,7 +228,7 @@
if (partial_frame && cm->mi_rows > 8) {
start_mi_row = cm->mi_rows >> 1;
start_mi_row &= 0xfffffff8;
- mi_rows_to_filter = MAX(cm->mi_rows / 8, 8);
+ mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8);
}
end_mi_row = start_mi_row + mi_rows_to_filter;
vp9_loop_filter_frame_init(cm, frame_filter_level);
@@ -317,21 +316,21 @@
}
// Accumulate frame counts.
-void vp9_accumulate_frame_counts(VP9_COMMON *cm, FRAME_COUNTS *counts,
- int is_dec) {
+void vp9_accumulate_frame_counts(FRAME_COUNTS *accum,
+ const FRAME_COUNTS *counts, int is_dec) {
int i, j, k, l, m;
for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
for (j = 0; j < INTRA_MODES; j++)
- cm->counts.y_mode[i][j] += counts->y_mode[i][j];
+ accum->y_mode[i][j] += counts->y_mode[i][j];
for (i = 0; i < INTRA_MODES; i++)
for (j = 0; j < INTRA_MODES; j++)
- cm->counts.uv_mode[i][j] += counts->uv_mode[i][j];
+ accum->uv_mode[i][j] += counts->uv_mode[i][j];
for (i = 0; i < PARTITION_CONTEXTS; i++)
for (j = 0; j < PARTITION_TYPES; j++)
- cm->counts.partition[i][j] += counts->partition[i][j];
+ accum->partition[i][j] += counts->partition[i][j];
if (is_dec) {
int n;
@@ -340,10 +339,10 @@
for (k = 0; k < REF_TYPES; k++)
for (l = 0; l < COEF_BANDS; l++)
for (m = 0; m < COEFF_CONTEXTS; m++) {
- cm->counts.eob_branch[i][j][k][l][m] +=
+ accum->eob_branch[i][j][k][l][m] +=
counts->eob_branch[i][j][k][l][m];
for (n = 0; n < UNCONSTRAINED_NODES + 1; n++)
- cm->counts.coef[i][j][k][l][m][n] +=
+ accum->coef[i][j][k][l][m][n] +=
counts->coef[i][j][k][l][m][n];
}
} else {
@@ -352,64 +351,64 @@
for (k = 0; k < REF_TYPES; k++)
for (l = 0; l < COEF_BANDS; l++)
for (m = 0; m < COEFF_CONTEXTS; m++)
- cm->counts.eob_branch[i][j][k][l][m] +=
+ accum->eob_branch[i][j][k][l][m] +=
counts->eob_branch[i][j][k][l][m];
- // In the encoder, cm->counts.coef is only updated at frame
+ // In the encoder, coef is only updated at frame
// level, so not need to accumulate it here.
// for (n = 0; n < UNCONSTRAINED_NODES + 1; n++)
- // cm->counts.coef[i][j][k][l][m][n] +=
+ // accum->coef[i][j][k][l][m][n] +=
// counts->coef[i][j][k][l][m][n];
}
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
for (j = 0; j < SWITCHABLE_FILTERS; j++)
- cm->counts.switchable_interp[i][j] += counts->switchable_interp[i][j];
+ accum->switchable_interp[i][j] += counts->switchable_interp[i][j];
for (i = 0; i < INTER_MODE_CONTEXTS; i++)
for (j = 0; j < INTER_MODES; j++)
- cm->counts.inter_mode[i][j] += counts->inter_mode[i][j];
+ accum->inter_mode[i][j] += counts->inter_mode[i][j];
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
for (j = 0; j < 2; j++)
- cm->counts.intra_inter[i][j] += counts->intra_inter[i][j];
+ accum->intra_inter[i][j] += counts->intra_inter[i][j];
for (i = 0; i < COMP_INTER_CONTEXTS; i++)
for (j = 0; j < 2; j++)
- cm->counts.comp_inter[i][j] += counts->comp_inter[i][j];
+ accum->comp_inter[i][j] += counts->comp_inter[i][j];
for (i = 0; i < REF_CONTEXTS; i++)
for (j = 0; j < 2; j++)
for (k = 0; k < 2; k++)
- cm->counts.single_ref[i][j][k] += counts->single_ref[i][j][k];
+ accum->single_ref[i][j][k] += counts->single_ref[i][j][k];
for (i = 0; i < REF_CONTEXTS; i++)
for (j = 0; j < 2; j++)
- cm->counts.comp_ref[i][j] += counts->comp_ref[i][j];
+ accum->comp_ref[i][j] += counts->comp_ref[i][j];
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
for (j = 0; j < TX_SIZES; j++)
- cm->counts.tx.p32x32[i][j] += counts->tx.p32x32[i][j];
+ accum->tx.p32x32[i][j] += counts->tx.p32x32[i][j];
for (j = 0; j < TX_SIZES - 1; j++)
- cm->counts.tx.p16x16[i][j] += counts->tx.p16x16[i][j];
+ accum->tx.p16x16[i][j] += counts->tx.p16x16[i][j];
for (j = 0; j < TX_SIZES - 2; j++)
- cm->counts.tx.p8x8[i][j] += counts->tx.p8x8[i][j];
+ accum->tx.p8x8[i][j] += counts->tx.p8x8[i][j];
}
for (i = 0; i < TX_SIZES; i++)
- cm->counts.tx.tx_totals[i] += counts->tx.tx_totals[i];
+ accum->tx.tx_totals[i] += counts->tx.tx_totals[i];
for (i = 0; i < SKIP_CONTEXTS; i++)
for (j = 0; j < 2; j++)
- cm->counts.skip[i][j] += counts->skip[i][j];
+ accum->skip[i][j] += counts->skip[i][j];
for (i = 0; i < MV_JOINTS; i++)
- cm->counts.mv.joints[i] += counts->mv.joints[i];
+ accum->mv.joints[i] += counts->mv.joints[i];
for (k = 0; k < 2; k++) {
- nmv_component_counts *comps = &cm->counts.mv.comps[k];
- nmv_component_counts *comps_t = &counts->mv.comps[k];
+ nmv_component_counts *const comps = &accum->mv.comps[k];
+ const nmv_component_counts *const comps_t = &counts->mv.comps[k];
for (i = 0; i < 2; i++) {
comps->sign[i] += comps_t->sign[i];
diff --git a/libvpx/vp9/common/vp9_thread_common.h b/libvpx/vp9/common/vp9_thread_common.h
index 07af1bc..b3b60c2 100644
--- a/libvpx/vp9/common/vp9_thread_common.h
+++ b/libvpx/vp9/common/vp9_thread_common.h
@@ -8,12 +8,16 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_LOOPFILTER_THREAD_H_
-#define VP9_COMMON_VP9_LOOPFILTER_THREAD_H_
+#ifndef VP9_COMMON_VP9_THREAD_COMMON_H_
+#define VP9_COMMON_VP9_THREAD_COMMON_H_
#include "./vpx_config.h"
#include "vp9/common/vp9_loopfilter.h"
#include "vpx_util/vpx_thread.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct VP9Common;
struct FRAME_COUNTS;
@@ -51,7 +55,11 @@
VPxWorker *workers, int num_workers,
VP9LfSync *lf_sync);
-void vp9_accumulate_frame_counts(struct VP9Common *cm,
- struct FRAME_COUNTS *counts, int is_dec);
+void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum,
+ const struct FRAME_COUNTS *counts, int is_dec);
-#endif // VP9_COMMON_VP9_LOOPFILTER_THREAD_H_
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP9_COMMON_VP9_THREAD_COMMON_H_
diff --git a/libvpx/vp9/common/vp9_tile_common.c b/libvpx/vp9/common/vp9_tile_common.c
index 7a20e0a..9fcb97c 100644
--- a/libvpx/vp9/common/vp9_tile_common.c
+++ b/libvpx/vp9/common/vp9_tile_common.c
@@ -9,8 +9,8 @@
*/
#include "vp9/common/vp9_tile_common.h"
-
#include "vp9/common/vp9_onyxc_int.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#define MIN_TILE_WIDTH_B64 4
#define MAX_TILE_WIDTH_B64 64
@@ -18,7 +18,7 @@
static int get_tile_offset(int idx, int mis, int log2) {
const int sb_cols = mi_cols_aligned_to_sb(mis) >> MI_BLOCK_SIZE_LOG2;
const int offset = ((idx * sb_cols) >> log2) << MI_BLOCK_SIZE_LOG2;
- return MIN(offset, mis);
+ return VPXMIN(offset, mis);
}
void vp9_tile_set_row(TileInfo *tile, const VP9_COMMON *cm, int row) {
diff --git a/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
index 4a16345..8d312d0 100644
--- a/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -12,14 +12,14 @@
#include "vpx_dsp/x86/txfm_common_sse2.h"
#include "vpx_ports/mem.h"
-void vp9_iht4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride,
+void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
__m128i in[2];
const __m128i zero = _mm_setzero_si128();
const __m128i eight = _mm_set1_epi16(8);
- in[0] = _mm_loadu_si128((const __m128i *)(input));
- in[1] = _mm_loadu_si128((const __m128i *)(input + 8));
+ in[0] = load_input_data(input);
+ in[1] = load_input_data(input + 8);
switch (tx_type) {
case 0: // DCT_DCT
@@ -77,21 +77,21 @@
}
}
-void vp9_iht8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride,
+void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
__m128i in[8];
const __m128i zero = _mm_setzero_si128();
const __m128i final_rounding = _mm_set1_epi16(1 << 4);
// load input data
- in[0] = _mm_load_si128((const __m128i *)input);
- in[1] = _mm_load_si128((const __m128i *)(input + 8 * 1));
- in[2] = _mm_load_si128((const __m128i *)(input + 8 * 2));
- in[3] = _mm_load_si128((const __m128i *)(input + 8 * 3));
- in[4] = _mm_load_si128((const __m128i *)(input + 8 * 4));
- in[5] = _mm_load_si128((const __m128i *)(input + 8 * 5));
- in[6] = _mm_load_si128((const __m128i *)(input + 8 * 6));
- in[7] = _mm_load_si128((const __m128i *)(input + 8 * 7));
+ in[0] = load_input_data(input);
+ in[1] = load_input_data(input + 8 * 1);
+ in[2] = load_input_data(input + 8 * 2);
+ in[3] = load_input_data(input + 8 * 3);
+ in[4] = load_input_data(input + 8 * 4);
+ in[5] = load_input_data(input + 8 * 5);
+ in[6] = load_input_data(input + 8 * 6);
+ in[7] = load_input_data(input + 8 * 7);
switch (tx_type) {
case 0: // DCT_DCT
@@ -144,8 +144,8 @@
RECON_AND_STORE(dest + 7 * stride, in[7]);
}
-void vp9_iht16x16_256_add_sse2(const int16_t *input, uint8_t *dest, int stride,
- int tx_type) {
+void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
+ int stride, int tx_type) {
__m128i in0[16], in1[16];
load_buffer_8x16(input, in0);
diff --git a/libvpx/vp9/decoder/vp9_decodeframe.c b/libvpx/vp9/decoder/vp9_decodeframe.c
index fb7b3b8..f191663 100644
--- a/libvpx/vp9/decoder/vp9_decodeframe.c
+++ b/libvpx/vp9/decoder/vp9_decodeframe.c
@@ -17,6 +17,7 @@
#include "vpx_dsp/bitreader_buffer.h"
#include "vpx_dsp/bitreader.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/mem_ops.h"
@@ -658,7 +659,7 @@
// pixels of each superblock row can be changed by next superblock row.
if (pbi->frame_parallel_decode)
vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
- MAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
+ VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
// Skip border extension if block is inside the frame.
if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 ||
@@ -686,7 +687,7 @@
if (pbi->frame_parallel_decode) {
const int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS;
vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
- MAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
+ VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
}
}
#if CONFIG_VP9_HIGHBITDEPTH
@@ -757,8 +758,8 @@
static INLINE TX_SIZE dec_get_uv_tx_size(const MB_MODE_INFO *mbmi,
int n4_wl, int n4_hl) {
// get minimum log2 num4x4s dimension
- const int x = MIN(n4_wl, n4_hl);
- return MIN(mbmi->tx_size, x);
+ const int x = VPXMIN(n4_wl, n4_hl);
+ return VPXMIN(mbmi->tx_size, x);
}
static INLINE void dec_reset_skip_context(MACROBLOCKD *xd) {
@@ -819,8 +820,8 @@
const int less8x8 = bsize < BLOCK_8X8;
const int bw = 1 << (bwl - 1);
const int bh = 1 << (bhl - 1);
- const int x_mis = MIN(bw, cm->mi_cols - mi_col);
- const int y_mis = MIN(bh, cm->mi_rows - mi_row);
+ const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
+ const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
MB_MODE_INFO *mbmi = set_offsets(cm, xd, bsize, mi_row, mi_col,
bw, bh, x_mis, y_mis, bwl, bhl);
@@ -895,6 +896,10 @@
}
xd->corrupted |= vpx_reader_has_error(r);
+
+ if (cm->lf.filter_level) {
+ vp9_build_mask(cm, mbmi, mi_row, mi_col, bw, bh);
+ }
}
static INLINE int dec_partition_plane_context(const MACROBLOCKD *xd,
@@ -1180,11 +1185,11 @@
: literal_to_filter[vpx_rb_read_literal(rb, 2)];
}
-static void setup_display_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) {
- cm->display_width = cm->width;
- cm->display_height = cm->height;
+static void setup_render_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) {
+ cm->render_width = cm->width;
+ cm->render_height = cm->height;
if (vpx_rb_read_bit(rb))
- vp9_read_frame_size(rb, &cm->display_width, &cm->display_height);
+ vp9_read_frame_size(rb, &cm->render_width, &cm->render_height);
}
static void resize_mv_buffer(VP9_COMMON *cm) {
@@ -1232,7 +1237,7 @@
BufferPool *const pool = cm->buffer_pool;
vp9_read_frame_size(rb, &width, &height);
resize_context_buffers(cm, width, height);
- setup_display_size(cm, rb);
+ setup_render_size(cm, rb);
lock_buffer_pool(pool);
if (vpx_realloc_frame_buffer(
@@ -1255,6 +1260,9 @@
pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y;
pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth;
pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space;
+ pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range;
+ pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width;
+ pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height;
}
static INLINE int valid_ref_frame_img_fmt(vpx_bit_depth_t ref_bit_depth,
@@ -1313,7 +1321,7 @@
}
resize_context_buffers(cm, width, height);
- setup_display_size(cm, rb);
+ setup_render_size(cm, rb);
lock_buffer_pool(pool);
if (vpx_realloc_frame_buffer(
@@ -1336,6 +1344,9 @@
pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y;
pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth;
pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space;
+ pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range;
+ pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width;
+ pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height;
}
static void setup_tile_info(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) {
@@ -1358,12 +1369,6 @@
cm->log2_tile_rows += vpx_rb_read_bit(rb);
}
-typedef struct TileBuffer {
- const uint8_t *data;
- size_t size;
- int col; // only used with multi-threaded decoding
-} TileBuffer;
-
// Reads the next tile returning its size and adjusting '*data' accordingly
// based on 'is_last'.
static void get_tile_buffer(const uint8_t *const data_end,
@@ -1461,6 +1466,8 @@
memset(cm->above_seg_context, 0,
sizeof(*cm->above_seg_context) * aligned_cols);
+ vp9_reset_lfm(cm);
+
get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
if (pbi->tile_data == NULL ||
@@ -1560,30 +1567,54 @@
return vpx_reader_find_end(&tile_data->bit_reader);
}
+// On entry 'tile_data->data_end' points to the end of the input frame, on exit
+// it is updated to reflect the bitreader position of the final tile column if
+// present in the tile buffer group or NULL otherwise.
static int tile_worker_hook(TileWorkerData *const tile_data,
- const TileInfo *const tile) {
- int mi_row, mi_col;
+ VP9Decoder *const pbi) {
+ TileInfo *volatile tile = &tile_data->xd.tile;
+ const int final_col = (1 << pbi->common.log2_tile_cols) - 1;
+ const uint8_t *volatile bit_reader_end = NULL;
+ volatile int n = tile_data->buf_start;
+ tile_data->error_info.setjmp = 1;
if (setjmp(tile_data->error_info.jmp)) {
tile_data->error_info.setjmp = 0;
tile_data->xd.corrupted = 1;
+ tile_data->data_end = NULL;
return 0;
}
- tile_data->error_info.setjmp = 1;
tile_data->xd.error_info = &tile_data->error_info;
+ tile_data->xd.corrupted = 0;
- for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
- mi_row += MI_BLOCK_SIZE) {
- vp9_zero(tile_data->xd.left_context);
- vp9_zero(tile_data->xd.left_seg_context);
- for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
- mi_col += MI_BLOCK_SIZE) {
- decode_partition(tile_data->pbi, &tile_data->xd,
- mi_row, mi_col, &tile_data->bit_reader,
- BLOCK_64X64, 4);
+ do {
+ int mi_row, mi_col;
+ const TileBuffer *const buf = pbi->tile_buffers + n;
+ vp9_zero(tile_data->dqcoeff);
+ vp9_tile_init(tile, &pbi->common, 0, buf->col);
+ setup_token_decoder(buf->data, tile_data->data_end, buf->size,
+ &tile_data->error_info, &tile_data->bit_reader,
+ pbi->decrypt_cb, pbi->decrypt_state);
+ vp9_init_macroblockd(&pbi->common, &tile_data->xd, tile_data->dqcoeff);
+
+ for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
+ mi_row += MI_BLOCK_SIZE) {
+ vp9_zero(tile_data->xd.left_context);
+ vp9_zero(tile_data->xd.left_seg_context);
+ for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
+ mi_col += MI_BLOCK_SIZE) {
+ decode_partition(pbi, &tile_data->xd, mi_row, mi_col,
+ &tile_data->bit_reader, BLOCK_64X64, 4);
+ }
}
- }
+
+ if (buf->col == final_col) {
+ bit_reader_end = vpx_reader_find_end(&tile_data->bit_reader);
+ }
+ } while (!tile_data->xd.corrupted && ++n <= tile_data->buf_end);
+
+ tile_data->data_end = bit_reader_end;
return !tile_data->xd.corrupted;
}
@@ -1603,20 +1634,15 @@
const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
- const int num_workers = MIN(pbi->max_threads & ~1, tile_cols);
- TileBuffer tile_buffers[1][1 << 6];
+ const int num_workers = VPXMIN(pbi->max_threads, tile_cols);
int n;
- int final_worker = -1;
assert(tile_cols <= (1 << 6));
assert(tile_rows == 1);
(void)tile_rows;
- // TODO(jzern): See if we can remove the restriction of passing in max
- // threads to the decoder.
if (pbi->num_tile_workers == 0) {
- const int num_threads = pbi->max_threads & ~1;
- int i;
+ const int num_threads = pbi->max_threads;
CHECK_MEM_ERROR(cm, pbi->tile_workers,
vpx_malloc(num_threads * sizeof(*pbi->tile_workers)));
// Ensure tile data offsets will be properly aligned. This may fail on
@@ -1625,14 +1651,12 @@
CHECK_MEM_ERROR(cm, pbi->tile_worker_data,
vpx_memalign(32, num_threads *
sizeof(*pbi->tile_worker_data)));
- CHECK_MEM_ERROR(cm, pbi->tile_worker_info,
- vpx_malloc(num_threads * sizeof(*pbi->tile_worker_info)));
- for (i = 0; i < num_threads; ++i) {
- VPxWorker *const worker = &pbi->tile_workers[i];
+ for (n = 0; n < num_threads; ++n) {
+ VPxWorker *const worker = &pbi->tile_workers[n];
++pbi->num_tile_workers;
winterface->init(worker);
- if (i < num_threads - 1 && !winterface->reset(worker)) {
+ if (n < num_threads - 1 && !winterface->reset(worker)) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"Tile decoder thread creation failed");
}
@@ -1642,10 +1666,14 @@
// Reset tile decoding hook
for (n = 0; n < num_workers; ++n) {
VPxWorker *const worker = &pbi->tile_workers[n];
+ TileWorkerData *const tile_data = &pbi->tile_worker_data[n];
winterface->sync(worker);
+ tile_data->xd = pbi->mb;
+ tile_data->xd.counts =
+ cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts;
worker->hook = (VPxWorkerHook)tile_worker_hook;
- worker->data1 = &pbi->tile_worker_data[n];
- worker->data2 = &pbi->tile_worker_info[n];
+ worker->data1 = tile_data;
+ worker->data2 = pbi;
}
// Note: this memset assumes above_context[0], [1] and [2]
@@ -1655,101 +1683,95 @@
memset(cm->above_seg_context, 0,
sizeof(*cm->above_seg_context) * aligned_mi_cols);
+ vp9_reset_lfm(cm);
+
// Load tile data into tile_buffers
- get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers);
+ get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows,
+ &pbi->tile_buffers);
// Sort the buffers based on size in descending order.
- qsort(tile_buffers[0], tile_cols, sizeof(tile_buffers[0][0]),
+ qsort(pbi->tile_buffers, tile_cols, sizeof(pbi->tile_buffers[0]),
compare_tile_buffers);
- // Rearrange the tile buffers such that per-tile group the largest, and
- // presumably the most difficult, tile will be decoded in the main thread.
- // This should help minimize the number of instances where the main thread is
- // waiting for a worker to complete.
- {
- int group_start = 0;
- while (group_start < tile_cols) {
- const TileBuffer largest = tile_buffers[0][group_start];
- const int group_end = MIN(group_start + num_workers, tile_cols) - 1;
- memmove(tile_buffers[0] + group_start, tile_buffers[0] + group_start + 1,
- (group_end - group_start) * sizeof(tile_buffers[0][0]));
- tile_buffers[0][group_end] = largest;
- group_start = group_end + 1;
+ if (num_workers == tile_cols) {
+ // Rearrange the tile buffers such that the largest, and
+ // presumably the most difficult, tile will be decoded in the main thread.
+ // This should help minimize the number of instances where the main thread
+ // is waiting for a worker to complete.
+ const TileBuffer largest = pbi->tile_buffers[0];
+ memmove(pbi->tile_buffers, pbi->tile_buffers + 1,
+ (tile_cols - 1) * sizeof(pbi->tile_buffers[0]));
+ pbi->tile_buffers[tile_cols - 1] = largest;
+ } else {
+ int start = 0, end = tile_cols - 2;
+ TileBuffer tmp;
+
+ // Interleave the tiles to distribute the load between threads, assuming a
+ // larger tile implies it is more difficult to decode.
+ while (start < end) {
+ tmp = pbi->tile_buffers[start];
+ pbi->tile_buffers[start] = pbi->tile_buffers[end];
+ pbi->tile_buffers[end] = tmp;
+ start += 2;
+ end -= 2;
}
}
// Initialize thread frame counts.
if (!cm->frame_parallel_decoding_mode) {
- int i;
-
- for (i = 0; i < num_workers; ++i) {
+ for (n = 0; n < num_workers; ++n) {
TileWorkerData *const tile_data =
- (TileWorkerData*)pbi->tile_workers[i].data1;
+ (TileWorkerData*)pbi->tile_workers[n].data1;
vp9_zero(tile_data->counts);
}
}
- n = 0;
- while (n < tile_cols) {
- int i;
- for (i = 0; i < num_workers && n < tile_cols; ++i) {
- VPxWorker *const worker = &pbi->tile_workers[i];
- TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
- TileInfo *const tile = (TileInfo*)worker->data2;
- TileBuffer *const buf = &tile_buffers[0][n];
+ {
+ const int base = tile_cols / num_workers;
+ const int remain = tile_cols % num_workers;
+ int buf_start = 0;
- tile_data->pbi = pbi;
- tile_data->xd = pbi->mb;
- tile_data->xd.corrupted = 0;
- tile_data->xd.counts = cm->frame_parallel_decoding_mode ?
- 0 : &tile_data->counts;
- vp9_zero(tile_data->dqcoeff);
- vp9_tile_init(tile, cm, 0, buf->col);
- vp9_tile_init(&tile_data->xd.tile, cm, 0, buf->col);
- setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
- &tile_data->bit_reader, pbi->decrypt_cb,
- pbi->decrypt_state);
- vp9_init_macroblockd(cm, &tile_data->xd, tile_data->dqcoeff);
+ for (n = 0; n < num_workers; ++n) {
+ const int count = base + (remain + n) / num_workers;
+ VPxWorker *const worker = &pbi->tile_workers[n];
+ TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
+
+ tile_data->buf_start = buf_start;
+ tile_data->buf_end = buf_start + count - 1;
+ tile_data->data_end = data_end;
+ buf_start += count;
worker->had_error = 0;
- if (i == num_workers - 1 || n == tile_cols - 1) {
+ if (n == num_workers - 1) {
+ assert(tile_data->buf_end == tile_cols - 1);
winterface->execute(worker);
} else {
winterface->launch(worker);
}
-
- if (buf->col == tile_cols - 1) {
- final_worker = i;
- }
-
- ++n;
}
- for (; i > 0; --i) {
- VPxWorker *const worker = &pbi->tile_workers[i - 1];
+ for (; n > 0; --n) {
+ VPxWorker *const worker = &pbi->tile_workers[n - 1];
+ TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
// TODO(jzern): The tile may have specific error data associated with
// its vpx_internal_error_info which could be propagated to the main info
// in cm. Additionally once the threads have been synced and an error is
// detected, there's no point in continuing to decode tiles.
pbi->mb.corrupted |= !winterface->sync(worker);
- }
- if (final_worker > -1) {
- TileWorkerData *const tile_data =
- (TileWorkerData*)pbi->tile_workers[final_worker].data1;
- bit_reader_end = vpx_reader_find_end(&tile_data->bit_reader);
- final_worker = -1;
- }
-
- // Accumulate thread frame counts.
- if (n >= tile_cols && !cm->frame_parallel_decoding_mode) {
- for (i = 0; i < num_workers; ++i) {
- TileWorkerData *const tile_data =
- (TileWorkerData*)pbi->tile_workers[i].data1;
- vp9_accumulate_frame_counts(cm, &tile_data->counts, 1);
- }
+ if (!bit_reader_end) bit_reader_end = tile_data->data_end;
}
}
+ // Accumulate thread frame counts.
+ if (!cm->frame_parallel_decoding_mode) {
+ for (n = 0; n < num_workers; ++n) {
+ TileWorkerData *const tile_data =
+ (TileWorkerData*)pbi->tile_workers[n].data1;
+ vp9_accumulate_frame_counts(&cm->counts, &tile_data->counts, 1);
+ }
+ }
+
+ assert(bit_reader_end || pbi->mb.corrupted);
return bit_reader_end;
}
@@ -1773,7 +1795,7 @@
}
cm->color_space = vpx_rb_read_literal(rb, 3);
if (cm->color_space != VPX_CS_SRGB) {
- vpx_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range
+ cm->color_range = (vpx_color_range_t)vpx_rb_read_bit(rb);
if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
cm->subsampling_x = vpx_rb_read_bit(rb);
cm->subsampling_y = vpx_rb_read_bit(rb);
@@ -1787,6 +1809,7 @@
cm->subsampling_y = cm->subsampling_x = 1;
}
} else {
+ cm->color_range = VPX_CR_FULL_RANGE;
if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
// Note if colorspace is SRGB then 4:4:4 chroma sampling is assumed.
// 4:2:2 or 4:4:0 chroma sampling is not allowed.
@@ -1892,6 +1915,7 @@
// specifies that the default color format should be YUV 4:2:0 in this
// case (normative).
cm->color_space = VPX_CS_BT_601;
+ cm->color_range = VPX_CR_STUDIO_RANGE;
cm->subsampling_y = cm->subsampling_x = 1;
cm->bit_depth = VPX_BITS_8;
#if CONFIG_VP9_HIGHBITDEPTH
@@ -1942,6 +1966,9 @@
get_frame_new_buffer(cm)->bit_depth = cm->bit_depth;
#endif
get_frame_new_buffer(cm)->color_space = cm->color_space;
+ get_frame_new_buffer(cm)->color_range = cm->color_range;
+ get_frame_new_buffer(cm)->render_width = cm->render_width;
+ get_frame_new_buffer(cm)->render_height = cm->render_height;
if (pbi->need_resync) {
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
@@ -2102,7 +2129,7 @@
rb->error_handler = error_handler;
rb->error_handler_data = &pbi->common;
if (pbi->decrypt_cb) {
- const int n = (int)MIN(MAX_VP9_HEADER_SIZE, data_end - data);
+ const int n = (int)VPXMIN(MAX_VP9_HEADER_SIZE, data_end - data);
pbi->decrypt_cb(pbi->decrypt_state, data, clear_data, n);
rb->bit_buffer = clear_data;
rb->bit_buffer_end = clear_data + n;
diff --git a/libvpx/vp9/decoder/vp9_decodeframe.h b/libvpx/vp9/decoder/vp9_decodeframe.h
index 05af706..ce33cbd 100644
--- a/libvpx/vp9/decoder/vp9_decodeframe.h
+++ b/libvpx/vp9/decoder/vp9_decodeframe.h
@@ -16,6 +16,8 @@
extern "C" {
#endif
+#include "vp9/common/vp9_enums.h"
+
struct VP9Decoder;
struct vpx_read_bit_buffer;
diff --git a/libvpx/vp9/decoder/vp9_decodemv.c b/libvpx/vp9/decoder/vp9_decodemv.c
index 33818a9..d3ca7b3 100644
--- a/libvpx/vp9/decoder/vp9_decodemv.c
+++ b/libvpx/vp9/decoder/vp9_decodemv.c
@@ -22,6 +22,8 @@
#include "vp9/decoder/vp9_decodemv.h"
#include "vp9/decoder/vp9_decodeframe.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
static PREDICTION_MODE read_intra_mode(vpx_reader *r, const vpx_prob *p) {
return (PREDICTION_MODE)vpx_read_tree(r, vp9_intra_mode_tree, p);
}
@@ -87,7 +89,7 @@
if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8)
return read_selected_tx_size(cm, xd, max_tx_size, r);
else
- return MIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]);
+ return VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]);
}
static int dec_get_segment_id(const VP9_COMMON *cm, const uint8_t *segment_ids,
@@ -96,8 +98,8 @@
for (y = 0; y < y_mis; y++)
for (x = 0; x < x_mis; x++)
- segment_id = MIN(segment_id,
- segment_ids[mi_offset + y * cm->mi_cols + x]);
+ segment_id =
+ VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]);
assert(segment_id >= 0 && segment_id < MAX_SEGMENTS);
return segment_id;
@@ -156,8 +158,8 @@
const int bh = xd->plane[0].n4_h >> 1;
// TODO(slavarnway): move x_mis, y_mis into xd ?????
- const int x_mis = MIN(cm->mi_cols - mi_col, bw);
- const int y_mis = MIN(cm->mi_rows - mi_row, bh);
+ const int x_mis = VPXMIN(cm->mi_cols - mi_col, bw);
+ const int y_mis = VPXMIN(cm->mi_rows - mi_row, bh);
if (!seg->enabled)
return 0; // Default for disabled segmentation
@@ -212,8 +214,8 @@
const int bh = xd->plane[0].n4_h >> 1;
// TODO(slavarnway): move x_mis, y_mis into xd ?????
- const int x_mis = MIN(cm->mi_cols - mi_col, bw);
- const int y_mis = MIN(cm->mi_rows - mi_row, bh);
+ const int x_mis = VPXMIN(cm->mi_cols - mi_col, bw);
+ const int y_mis = VPXMIN(cm->mi_rows - mi_row, bh);
mbmi->segment_id = read_intra_segment_id(cm, mi_offset, x_mis, y_mis, r);
mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
diff --git a/libvpx/vp9/decoder/vp9_decoder.c b/libvpx/vp9/decoder/vp9_decoder.c
index 6734d00..4e88819 100644
--- a/libvpx/vp9/decoder/vp9_decoder.c
+++ b/libvpx/vp9/decoder/vp9_decoder.c
@@ -126,6 +126,9 @@
void vp9_decoder_remove(VP9Decoder *pbi) {
int i;
+ if (!pbi)
+ return;
+
vpx_get_worker_interface()->end(&pbi->lf_worker);
vpx_free(pbi->lf_worker.data1);
vpx_free(pbi->tile_data);
@@ -134,7 +137,6 @@
vpx_get_worker_interface()->end(worker);
}
vpx_free(pbi->tile_worker_data);
- vpx_free(pbi->tile_worker_info);
vpx_free(pbi->tile_workers);
if (pbi->num_tile_workers > 0) {
diff --git a/libvpx/vp9/decoder/vp9_decoder.h b/libvpx/vp9/decoder/vp9_decoder.h
index 915f9dc..4a5188f 100644
--- a/libvpx/vp9/decoder/vp9_decoder.h
+++ b/libvpx/vp9/decoder/vp9_decoder.h
@@ -36,8 +36,15 @@
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
} TileData;
+typedef struct TileBuffer {
+ const uint8_t *data;
+ size_t size;
+ int col; // only used with multi-threaded decoding
+} TileBuffer;
+
typedef struct TileWorkerData {
- struct VP9Decoder *pbi;
+ const uint8_t *data_end;
+ int buf_start, buf_end; // pbi->tile_buffers to decode, inclusive
vpx_reader bit_reader;
FRAME_COUNTS counts;
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
@@ -65,7 +72,7 @@
VPxWorker lf_worker;
VPxWorker *tile_workers;
TileWorkerData *tile_worker_data;
- TileInfo *tile_worker_info;
+ TileBuffer tile_buffers[64];
int num_tile_workers;
TileData *tile_data;
diff --git a/libvpx/vp9/decoder/vp9_detokenize.c b/libvpx/vp9/decoder/vp9_detokenize.c
index e4412dc..5912365 100644
--- a/libvpx/vp9/decoder/vp9_detokenize.c
+++ b/libvpx/vp9/decoder/vp9_detokenize.c
@@ -259,7 +259,7 @@
const int16_t *const dequant = pd->seg_dequant[seg_id];
const int ctx = get_entropy_context(tx_size, pd->above_context + x,
pd->left_context + y);
- const int eob = decode_coefs(xd, pd->plane_type,
+ const int eob = decode_coefs(xd, get_plane_type(plane),
pd->dqcoeff, tx_size,
dequant, ctx, sc->scan, sc->neighbors, r);
dec_set_contexts(xd, pd, tx_size, eob > 0, x, y);
diff --git a/libvpx/vp9/decoder/vp9_dthread.h b/libvpx/vp9/decoder/vp9_dthread.h
index f6cdccd..ba7c38a 100644
--- a/libvpx/vp9/decoder/vp9_dthread.h
+++ b/libvpx/vp9/decoder/vp9_dthread.h
@@ -15,6 +15,10 @@
#include "vpx_util/vpx_thread.h"
#include "vpx/internal/vpx_codec_internal.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct VP9Common;
struct VP9Decoder;
@@ -63,4 +67,8 @@
void vp9_frameworker_copy_context(VPxWorker *const dst_worker,
VPxWorker *const src_worker);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_DECODER_VP9_DTHREAD_H_
diff --git a/libvpx/vp9/encoder/vp9_aq_complexity.c b/libvpx/vp9/encoder/vp9_aq_complexity.c
index 15f227f..30ec191 100644
--- a/libvpx/vp9/encoder/vp9_aq_complexity.c
+++ b/libvpx/vp9/encoder/vp9_aq_complexity.c
@@ -10,6 +10,7 @@
#include <limits.h>
#include <math.h>
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/system_state.h"
#include "vp9/encoder/vp9_aq_complexity.h"
@@ -117,8 +118,8 @@
const int mi_offset = mi_row * cm->mi_cols + mi_col;
const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
- const int xmis = MIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]);
- const int ymis = MIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[bs]);
+ const int xmis = VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]);
+ const int ymis = VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[bs]);
int x, y;
int i;
unsigned char segment;
@@ -136,7 +137,7 @@
vpx_clear_system_state();
low_var_thresh = (cpi->oxcf.pass == 2)
- ? MAX(cpi->twopass.mb_av_energy, MIN_DEFAULT_LV_THRESH)
+ ? VPXMAX(cpi->twopass.mb_av_energy, MIN_DEFAULT_LV_THRESH)
: DEFAULT_LV_THRESH;
vp9_setup_src_planes(mb, cpi->Source, mi_row, mi_col);
diff --git a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
index e6b3686..2cd89c0 100644
--- a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -11,6 +11,7 @@
#include <limits.h>
#include <math.h>
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/system_state.h"
#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
@@ -20,46 +21,9 @@
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_segmentation.h"
-struct CYCLIC_REFRESH {
- // Percentage of blocks per frame that are targeted as candidates
- // for cyclic refresh.
- int percent_refresh;
- // Maximum q-delta as percentage of base q.
- int max_qdelta_perc;
- // Superblock starting index for cycling through the frame.
- int sb_index;
- // Controls how long block will need to wait to be refreshed again, in
- // excess of the cycle time, i.e., in the case of all zero motion, block
- // will be refreshed every (100/percent_refresh + time_for_refresh) frames.
- int time_for_refresh;
- // Target number of (8x8) blocks that are set for delta-q.
- int target_num_seg_blocks;
- // Actual number of (8x8) blocks that were applied delta-q.
- int actual_num_seg1_blocks;
- int actual_num_seg2_blocks;
- // RD mult. parameters for segment 1.
- int rdmult;
- // Cyclic refresh map.
- signed char *map;
- // Map of the last q a block was coded at.
- uint8_t *last_coded_q_map;
- // Thresholds applied to the projected rate/distortion of the coding block,
- // when deciding whether block should be refreshed.
- int64_t thresh_rate_sb;
- int64_t thresh_dist_sb;
- // Threshold applied to the motion vector (in units of 1/8 pel) of the
- // coding block, when deciding whether block should be refreshed.
- int16_t motion_thresh;
- // Rate target ratio to set q delta.
- double rate_ratio_qdelta;
- // Boost factor for rate target ratio, for segment CR_SEGMENT_ID_BOOST2.
- int rate_boost_fac;
- double low_content_avg;
- int qindex_delta[3];
-};
-
CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
size_t last_coded_q_map_size;
+ size_t consec_zero_mv_size;
CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr));
if (cr == NULL)
return NULL;
@@ -78,12 +42,20 @@
assert(MAXQ <= 255);
memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);
+ consec_zero_mv_size = mi_rows * mi_cols * sizeof(*cr->consec_zero_mv);
+ cr->consec_zero_mv = vpx_malloc(consec_zero_mv_size);
+ if (cr->consec_zero_mv == NULL) {
+ vpx_free(cr);
+ return NULL;
+ }
+ memset(cr->consec_zero_mv, 0, consec_zero_mv_size);
return cr;
}
void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
vpx_free(cr->map);
vpx_free(cr->last_coded_q_map);
+ vpx_free(cr->consec_zero_mv);
vpx_free(cr);
}
@@ -195,7 +167,8 @@
int num8x8bl = cm->MBs << 2;
// Weight for segment prior to encoding: take the average of the target
// number for the frame to be encoded and the actual from the previous frame.
- double weight_segment = (double)((cr->target_num_seg_blocks +
+ int target_refresh = cr->percent_refresh * cm->mi_rows * cm->mi_cols / 100;
+ double weight_segment = (double)((target_refresh +
cr->actual_num_seg1_blocks + cr->actual_num_seg2_blocks) >> 1) /
num8x8bl;
// Compute delta-q corresponding to qindex i.
@@ -223,8 +196,8 @@
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
const int bw = num_8x8_blocks_wide_lookup[bsize];
const int bh = num_8x8_blocks_high_lookup[bsize];
- const int xmis = MIN(cm->mi_cols - mi_col, bw);
- const int ymis = MIN(cm->mi_rows - mi_row, bh);
+ const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
+ const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
const int block_index = mi_row * cm->mi_cols + mi_col;
const int refresh_this_block = candidate_refresh_aq(cr, mbmi, rate, dist,
bsize);
@@ -236,7 +209,7 @@
// segment_id.
if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) {
mbmi->segment_id = refresh_this_block;
- // Reset segment_id if will be skipped.
+ // Reset segment_id if it will be skipped.
if (skip)
mbmi->segment_id = CR_SEGMENT_ID_BASE;
}
@@ -265,14 +238,48 @@
int map_offset = block_index + y * cm->mi_cols + x;
cr->map[map_offset] = new_map_value;
cpi->segmentation_map[map_offset] = mbmi->segment_id;
+ }
+}
+
+void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi,
+ const MB_MODE_INFO *const mbmi,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ const VP9_COMMON *const cm = &cpi->common;
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ MV mv = mbmi->mv[0].as_mv;
+ const int bw = num_8x8_blocks_wide_lookup[bsize];
+ const int bh = num_8x8_blocks_high_lookup[bsize];
+ const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
+ const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
+ const int block_index = mi_row * cm->mi_cols + mi_col;
+ int x, y;
+ for (y = 0; y < ymis; y++)
+ for (x = 0; x < xmis; x++) {
+ int map_offset = block_index + y * cm->mi_cols + x;
// Inter skip blocks were clearly not coded at the current qindex, so
// don't update the map for them. For cases where motion is non-zero or
// the reference frame isn't the previous frame, the previous value in
// the map for this spatial location is not entirely correct.
- if (!is_inter_block(mbmi) || !skip)
+ if ((!is_inter_block(mbmi) || !mbmi->skip) &&
+ mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) {
cr->last_coded_q_map[map_offset] = clamp(
cm->base_qindex + cr->qindex_delta[mbmi->segment_id], 0, MAXQ);
+ } else if (is_inter_block(mbmi) && mbmi->skip &&
+ mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) {
+ cr->last_coded_q_map[map_offset] = VPXMIN(
+ clamp(cm->base_qindex + cr->qindex_delta[mbmi->segment_id],
+ 0, MAXQ),
+ cr->last_coded_q_map[map_offset]);
+ // Update the consecutive zero/low_mv count.
+ if (is_inter_block(mbmi) && (abs(mv.row) < 8 && abs(mv.col) < 8)) {
+ if (cr->consec_zero_mv[map_offset] < 255)
+ cr->consec_zero_mv[map_offset]++;
+ } else {
+ cr->consec_zero_mv[map_offset] = 0;
+ }
}
+ }
}
// Update the actual number of blocks that were applied the segment delta q.
@@ -389,6 +396,10 @@
unsigned char *const seg_map = cpi->segmentation_map;
int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame;
int xmis, ymis, x, y;
+ int consec_zero_mv_thresh = 0;
+ int qindex_thresh = 0;
+ int count_sel = 0;
+ int count_tot = 0;
memset(seg_map, CR_SEGMENT_ID_BASE, cm->mi_rows * cm->mi_cols);
sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;
sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;
@@ -401,6 +412,12 @@
assert(cr->sb_index < sbs_in_frame);
i = cr->sb_index;
cr->target_num_seg_blocks = 0;
+ if (cpi->oxcf.content != VP9E_CONTENT_SCREEN)
+ consec_zero_mv_thresh = 100;
+ qindex_thresh =
+ cpi->oxcf.content == VP9E_CONTENT_SCREEN
+ ? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
+ : vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex);
do {
int sum_map = 0;
// Get the mi_row/mi_col corresponding to superblock index i.
@@ -408,18 +425,14 @@
int sb_col_index = i - sb_row_index * sb_cols;
int mi_row = sb_row_index * MI_BLOCK_SIZE;
int mi_col = sb_col_index * MI_BLOCK_SIZE;
- int qindex_thresh =
- cpi->oxcf.content == VP9E_CONTENT_SCREEN
- ? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
- : 0;
assert(mi_row >= 0 && mi_row < cm->mi_rows);
assert(mi_col >= 0 && mi_col < cm->mi_cols);
bl_index = mi_row * cm->mi_cols + mi_col;
// Loop through all 8x8 blocks in superblock and update map.
- xmis = MIN(cm->mi_cols - mi_col,
- num_8x8_blocks_wide_lookup[BLOCK_64X64]);
- ymis = MIN(cm->mi_rows - mi_row,
- num_8x8_blocks_high_lookup[BLOCK_64X64]);
+ xmis =
+ VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[BLOCK_64X64]);
+ ymis =
+ VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[BLOCK_64X64]);
for (y = 0; y < ymis; y++) {
for (x = 0; x < xmis; x++) {
const int bl_index2 = bl_index + y * cm->mi_cols + x;
@@ -427,8 +440,12 @@
// for possible boost/refresh (segment 1). The segment id may get
// reset to 0 later if block gets coded anything other than ZEROMV.
if (cr->map[bl_index2] == 0) {
- if (cr->last_coded_q_map[bl_index2] > qindex_thresh)
+ count_tot++;
+ if (cr->last_coded_q_map[bl_index2] > qindex_thresh ||
+ cr->consec_zero_mv[bl_index2] < consec_zero_mv_thresh) {
sum_map++;
+ count_sel++;
+ }
} else if (cr->map[bl_index2] < 0) {
cr->map[bl_index2]++;
}
@@ -449,6 +466,9 @@
}
} while (cr->target_num_seg_blocks < block_count && i != cr->sb_index);
cr->sb_index = i;
+ cr->reduce_refresh = 0;
+ if (count_sel < (3 * count_tot) >> 2)
+ cr->reduce_refresh = 1;
}
// Set cyclic refresh parameters.
@@ -457,6 +477,8 @@
const VP9_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
cr->percent_refresh = 10;
+ if (cr->reduce_refresh)
+ cr->percent_refresh = 5;
cr->max_qdelta_perc = 50;
cr->time_for_refresh = 0;
// Use larger delta-qp (increase rate_ratio_qdelta) for first few (~4)
@@ -476,7 +498,11 @@
cr->rate_boost_fac = 10;
} else {
cr->motion_thresh = 32;
- cr->rate_boost_fac = 17;
+ cr->rate_boost_fac = 15;
+ }
+ if (cpi->svc.spatial_layer_id > 0) {
+ cr->motion_thresh = 4;
+ cr->rate_boost_fac = 12;
}
}
@@ -489,11 +515,10 @@
const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc);
if (cm->current_video_frame == 0)
cr->low_content_avg = 0.0;
- // Don't apply refresh on key frame or enhancement layer frames.
+ // Don't apply refresh on key frame or temporal enhancement layer frames.
if (!apply_cyclic_refresh ||
(cm->frame_type == KEY_FRAME) ||
- (cpi->svc.temporal_layer_id > 0) ||
- (cpi->svc.spatial_layer_id > 0)) {
+ (cpi->svc.temporal_layer_id > 0)) {
// Set segmentation map to 0 and disable.
unsigned char *const seg_map = cpi->segmentation_map;
memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
@@ -501,6 +526,8 @@
if (cm->frame_type == KEY_FRAME) {
memset(cr->last_coded_q_map, MAXQ,
cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
+ memset(cr->consec_zero_mv, 0,
+ cm->mi_rows * cm->mi_cols * sizeof(*cr->consec_zero_mv));
cr->sb_index = 0;
}
return;
@@ -551,11 +578,16 @@
// Set a more aggressive (higher) q delta for segment BOOST2.
qindex_delta = compute_deltaq(
- cpi, cm->base_qindex, MIN(CR_MAX_RATE_TARGET_RATIO,
- 0.1 * cr->rate_boost_fac * cr->rate_ratio_qdelta));
+ cpi, cm->base_qindex,
+ VPXMIN(CR_MAX_RATE_TARGET_RATIO,
+ 0.1 * cr->rate_boost_fac * cr->rate_ratio_qdelta));
cr->qindex_delta[2] = qindex_delta;
vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta);
+ // Reset if resoluton change has occurred.
+ if (cpi->resize_pending != 0)
+ vp9_cyclic_refresh_reset_resize(cpi);
+
// Update the segmentation and refresh map.
cyclic_refresh_update_map(cpi);
}
@@ -569,6 +601,8 @@
const VP9_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
memset(cr->map, 0, cm->mi_rows * cm->mi_cols);
+ memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols);
+ memset(cr->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols);
cr->sb_index = 0;
cpi->refresh_golden_frame = 1;
}
diff --git a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h
index 29d2a91..a5b3813 100644
--- a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h
+++ b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -12,6 +12,7 @@
#ifndef VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
#define VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
+#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_blockd.h"
#ifdef __cplusplus
@@ -27,9 +28,49 @@
// Maximum rate target ratio for setting segment delta-qp.
#define CR_MAX_RATE_TARGET_RATIO 4.0
+struct CYCLIC_REFRESH {
+ // Percentage of blocks per frame that are targeted as candidates
+ // for cyclic refresh.
+ int percent_refresh;
+ // Maximum q-delta as percentage of base q.
+ int max_qdelta_perc;
+ // Superblock starting index for cycling through the frame.
+ int sb_index;
+ // Controls how long block will need to wait to be refreshed again, in
+ // excess of the cycle time, i.e., in the case of all zero motion, block
+ // will be refreshed every (100/percent_refresh + time_for_refresh) frames.
+ int time_for_refresh;
+ // Target number of (8x8) blocks that are set for delta-q.
+ int target_num_seg_blocks;
+ // Actual number of (8x8) blocks that were applied delta-q.
+ int actual_num_seg1_blocks;
+ int actual_num_seg2_blocks;
+ // RD mult. parameters for segment 1.
+ int rdmult;
+ // Cyclic refresh map.
+ signed char *map;
+ // Map of the last q a block was coded at.
+ uint8_t *last_coded_q_map;
+ // Count on how many consecutive times a block uses ZER0MV for encoding.
+ uint8_t *consec_zero_mv;
+ // Thresholds applied to the projected rate/distortion of the coding block,
+ // when deciding whether block should be refreshed.
+ int64_t thresh_rate_sb;
+ int64_t thresh_dist_sb;
+ // Threshold applied to the motion vector (in units of 1/8 pel) of the
+ // coding block, when deciding whether block should be refreshed.
+ int16_t motion_thresh;
+ // Rate target ratio to set q delta.
+ double rate_ratio_qdelta;
+ // Boost factor for rate target ratio, for segment CR_SEGMENT_ID_BOOST2.
+ int rate_boost_fac;
+ double low_content_avg;
+ int qindex_delta[3];
+ int reduce_refresh;
+};
+
struct VP9_COMP;
-struct CYCLIC_REFRESH;
typedef struct CYCLIC_REFRESH CYCLIC_REFRESH;
CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols);
@@ -54,6 +95,11 @@
int mi_row, int mi_col, BLOCK_SIZE bsize,
int64_t rate, int64_t dist, int skip);
+void vp9_cyclic_refresh_update_sb_postencode(struct VP9_COMP *const cpi,
+ const MB_MODE_INFO *const mbmi,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize);
+
// Update the segmentation map, and related quantities: cyclic refresh map,
// refresh sb_index, and target number of blocks to be refreshed.
void vp9_cyclic_refresh_update__map(struct VP9_COMP *const cpi);
diff --git a/libvpx/vp9/encoder/vp9_bitstream.c b/libvpx/vp9/encoder/vp9_bitstream.c
index d0de095..4615554 100644
--- a/libvpx/vp9/encoder/vp9_bitstream.c
+++ b/libvpx/vp9/encoder/vp9_bitstream.c
@@ -14,6 +14,7 @@
#include "vpx/vpx_encoder.h"
#include "vpx_dsp/bitwriter_buffer.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem_ops.h"
#include "vpx_ports/system_state.h"
@@ -175,12 +176,10 @@
const unsigned char *pb = b->prob;
int v = e >> 1;
int n = l; /* number of bits in v, assumed nonzero */
- int i = 0;
do {
const int bb = (v >> --n) & 1;
- vpx_write(w, bb, pb[i >> 1]);
- i = b->tree[i + bb];
+ vpx_write(w, bb, *pb++);
} while (n);
}
@@ -815,7 +814,7 @@
static void encode_txfm_probs(VP9_COMMON *cm, vpx_writer *w,
FRAME_COUNTS *counts) {
// Mode
- vpx_write_literal(w, MIN(cm->tx_mode, ALLOW_32X32), 2);
+ vpx_write_literal(w, VPXMIN(cm->tx_mode, ALLOW_32X32), 2);
if (cm->tx_mode >= ALLOW_32X32)
vpx_write_bit(w, cm->tx_mode == TX_MODE_SELECT);
@@ -968,14 +967,14 @@
return total_size;
}
-static void write_display_size(const VP9_COMMON *cm,
- struct vpx_write_bit_buffer *wb) {
- const int scaling_active = cm->width != cm->display_width ||
- cm->height != cm->display_height;
+static void write_render_size(const VP9_COMMON *cm,
+ struct vpx_write_bit_buffer *wb) {
+ const int scaling_active = cm->width != cm->render_width ||
+ cm->height != cm->render_height;
vpx_wb_write_bit(wb, scaling_active);
if (scaling_active) {
- vpx_wb_write_literal(wb, cm->display_width - 1, 16);
- vpx_wb_write_literal(wb, cm->display_height - 1, 16);
+ vpx_wb_write_literal(wb, cm->render_width - 1, 16);
+ vpx_wb_write_literal(wb, cm->render_height - 1, 16);
}
}
@@ -984,7 +983,7 @@
vpx_wb_write_literal(wb, cm->width - 1, 16);
vpx_wb_write_literal(wb, cm->height - 1, 16);
- write_display_size(cm, wb);
+ write_render_size(cm, wb);
}
static void write_frame_size_with_refs(VP9_COMP *cpi,
@@ -1022,7 +1021,7 @@
vpx_wb_write_literal(wb, cm->height - 1, 16);
}
- write_display_size(cm, wb);
+ write_render_size(cm, wb);
}
static void write_sync_code(struct vpx_write_bit_buffer *wb) {
@@ -1059,7 +1058,8 @@
}
vpx_wb_write_literal(wb, cm->color_space, 3);
if (cm->color_space != VPX_CS_SRGB) {
- vpx_wb_write_bit(wb, 0); // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
+ // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
+ vpx_wb_write_bit(wb, cm->color_range);
if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
assert(cm->subsampling_x != 1 || cm->subsampling_y != 1);
vpx_wb_write_bit(wb, cm->subsampling_x);
diff --git a/libvpx/vp9/encoder/vp9_context_tree.c b/libvpx/vp9/encoder/vp9_context_tree.c
index e87cccb..396ed3f 100644
--- a/libvpx/vp9/encoder/vp9_context_tree.c
+++ b/libvpx/vp9/encoder/vp9_context_tree.c
@@ -30,13 +30,13 @@
for (i = 0; i < MAX_MB_PLANE; ++i) {
for (k = 0; k < 3; ++k) {
CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
- vpx_memalign(16, num_pix * sizeof(*ctx->coeff[i][k])));
+ vpx_memalign(32, num_pix * sizeof(*ctx->coeff[i][k])));
CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
- vpx_memalign(16, num_pix * sizeof(*ctx->qcoeff[i][k])));
+ vpx_memalign(32, num_pix * sizeof(*ctx->qcoeff[i][k])));
CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
- vpx_memalign(16, num_pix * sizeof(*ctx->dqcoeff[i][k])));
+ vpx_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i][k])));
CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
- vpx_memalign(16, num_blk * sizeof(*ctx->eobs[i][k])));
+ vpx_memalign(32, num_blk * sizeof(*ctx->eobs[i][k])));
ctx->coeff_pbuf[i][k] = ctx->coeff[i][k];
ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k];
ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];
diff --git a/libvpx/vp9/encoder/vp9_context_tree.h b/libvpx/vp9/encoder/vp9_context_tree.h
index ac24497..8e365ce 100644
--- a/libvpx/vp9/encoder/vp9_context_tree.h
+++ b/libvpx/vp9/encoder/vp9_context_tree.h
@@ -14,6 +14,10 @@
#include "vp9/common/vp9_blockd.h"
#include "vp9/encoder/vp9_block.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct VP9_COMP;
struct VP9Common;
struct ThreadData;
@@ -84,4 +88,8 @@
void vp9_setup_pc_tree(struct VP9Common *cm, struct ThreadData *td);
void vp9_free_pc_tree(struct ThreadData *td);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif /* VP9_ENCODER_VP9_CONTEXT_TREE_H_ */
diff --git a/libvpx/vp9/encoder/vp9_denoiser.c b/libvpx/vp9/encoder/vp9_denoiser.c
index 5f99285..8623b42 100644
--- a/libvpx/vp9/encoder/vp9_denoiser.c
+++ b/libvpx/vp9/encoder/vp9_denoiser.c
@@ -10,19 +10,18 @@
#include <assert.h>
#include <limits.h>
+#include <math.h>
+
#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_scale/yv12config.h"
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/encoder/vp9_context_tree.h"
#include "vp9/encoder/vp9_denoiser.h"
+#include "vp9/encoder/vp9_encoder.h"
-/* The VP9 denoiser is a work-in-progress. It currently is only designed to work
- * with speed 6, though it (inexplicably) seems to also work with speed 5 (one
- * would need to modify the source code in vp9_pickmode.c and vp9_encoder.c to
- * make the calls to the vp9_denoiser_* functions when in speed 5).
- *
- * The implementation is very similar to that of the VP8 denoiser. While
+/* The VP9 denoiser is similar to that of the VP8 denoiser. While
* choosing the motion vectors / reference frames, the denoiser is run, and if
* it did not modify the signal to much, the denoised block is copied to the
* signal.
@@ -120,10 +119,10 @@
adj = adj_val[2];
}
if (diff > 0) {
- avg[c] = MIN(UINT8_MAX, sig[c] + adj);
+ avg[c] = VPXMIN(UINT8_MAX, sig[c] + adj);
total_adj += adj;
} else {
- avg[c] = MAX(0, sig[c] - adj);
+ avg[c] = VPXMAX(0, sig[c] - adj);
total_adj -= adj;
}
}
@@ -160,13 +159,13 @@
// Diff positive means we made positive adjustment above
// (in first try/attempt), so now make negative adjustment to bring
// denoised signal down.
- avg[c] = MAX(0, avg[c] - adj);
+ avg[c] = VPXMAX(0, avg[c] - adj);
total_adj -= adj;
} else {
// Diff negative means we made negative adjustment above
// (in first try/attempt), so now make positive adjustment to bring
// denoised signal up.
- avg[c] = MIN(UINT8_MAX, avg[c] + adj);
+ avg[c] = VPXMIN(UINT8_MAX, avg[c] + adj);
total_adj += adj;
}
}
@@ -194,8 +193,8 @@
int mi_row,
int mi_col,
PICK_MODE_CONTEXT *ctx,
- int *motion_magnitude
- ) {
+ int *motion_magnitude,
+ int is_skin) {
int mv_col, mv_row;
int sse_diff = ctx->zeromv_sse - ctx->newmv_sse;
MV_REFERENCE_FRAME frame;
@@ -213,6 +212,9 @@
saved_mbmi = *mbmi;
+ if (is_skin && *motion_magnitude > 16)
+ return COPY_BLOCK;
+
// If the best reference frame uses inter-prediction and there is enough of a
// difference in sum-squared-error, use it.
if (frame != INTRA_FRAME &&
@@ -312,18 +314,38 @@
int mi_row, int mi_col, BLOCK_SIZE bs,
PICK_MODE_CONTEXT *ctx) {
int motion_magnitude = 0;
- VP9_DENOISER_DECISION decision = FILTER_BLOCK;
+ VP9_DENOISER_DECISION decision = COPY_BLOCK;
YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;
uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col);
uint8_t *mc_avg_start = block_start(mc_avg.y_buffer, mc_avg.y_stride,
mi_row, mi_col);
struct buf_2d src = mb->plane[0].src;
+ int is_skin = 0;
- decision = perform_motion_compensation(denoiser, mb, bs,
- denoiser->increase_denoising,
- mi_row, mi_col, ctx,
- &motion_magnitude);
+ if (bs <= BLOCK_16X16 && denoiser->denoising_on) {
+ // Take center pixel in block to determine is_skin.
+ const int y_width_shift = (4 << b_width_log2_lookup[bs]) >> 1;
+ const int y_height_shift = (4 << b_height_log2_lookup[bs]) >> 1;
+ const int uv_width_shift = y_width_shift >> 1;
+ const int uv_height_shift = y_height_shift >> 1;
+ const int stride = mb->plane[0].src.stride;
+ const int strideuv = mb->plane[1].src.stride;
+ const uint8_t ysource =
+ mb->plane[0].src.buf[y_height_shift * stride + y_width_shift];
+ const uint8_t usource =
+ mb->plane[1].src.buf[uv_height_shift * strideuv + uv_width_shift];
+ const uint8_t vsource =
+ mb->plane[2].src.buf[uv_height_shift * strideuv + uv_width_shift];
+ is_skin = vp9_skin_pixel(ysource, usource, vsource);
+ }
+
+ if (denoiser->denoising_on)
+ decision = perform_motion_compensation(denoiser, mb, bs,
+ denoiser->increase_denoising,
+ mi_row, mi_col, ctx,
+ &motion_magnitude,
+ is_skin);
if (decision == FILTER_BLOCK) {
decision = vp9_denoiser_filter(src.buf, src.stride,
@@ -345,23 +367,24 @@
}
}
-static void copy_frame(YV12_BUFFER_CONFIG dest, const YV12_BUFFER_CONFIG src) {
+static void copy_frame(YV12_BUFFER_CONFIG * const dest,
+ const YV12_BUFFER_CONFIG * const src) {
int r;
- const uint8_t *srcbuf = src.y_buffer;
- uint8_t *destbuf = dest.y_buffer;
+ const uint8_t *srcbuf = src->y_buffer;
+ uint8_t *destbuf = dest->y_buffer;
- assert(dest.y_width == src.y_width);
- assert(dest.y_height == src.y_height);
+ assert(dest->y_width == src->y_width);
+ assert(dest->y_height == src->y_height);
- for (r = 0; r < dest.y_height; ++r) {
- memcpy(destbuf, srcbuf, dest.y_width);
- destbuf += dest.y_stride;
- srcbuf += src.y_stride;
+ for (r = 0; r < dest->y_height; ++r) {
+ memcpy(destbuf, srcbuf, dest->y_width);
+ destbuf += dest->y_stride;
+ srcbuf += src->y_stride;
}
}
-static void swap_frame_buffer(YV12_BUFFER_CONFIG *dest,
- YV12_BUFFER_CONFIG *src) {
+static void swap_frame_buffer(YV12_BUFFER_CONFIG * const dest,
+ YV12_BUFFER_CONFIG * const src) {
uint8_t *tmp_buf = dest->y_buffer;
assert(dest->y_width == src->y_width);
assert(dest->y_height == src->y_height);
@@ -374,27 +397,46 @@
FRAME_TYPE frame_type,
int refresh_alt_ref_frame,
int refresh_golden_frame,
- int refresh_last_frame) {
- if (frame_type == KEY_FRAME) {
+ int refresh_last_frame,
+ int resized) {
+ // Copy source into denoised reference buffers on KEY_FRAME or
+ // if the just encoded frame was resized.
+ if (frame_type == KEY_FRAME || resized != 0) {
int i;
// Start at 1 so as not to overwrite the INTRA_FRAME
for (i = 1; i < MAX_REF_FRAMES; ++i)
- copy_frame(denoiser->running_avg_y[i], src);
+ copy_frame(&denoiser->running_avg_y[i], &src);
return;
}
- /* For non key frames */
- if (refresh_alt_ref_frame) {
- swap_frame_buffer(&denoiser->running_avg_y[ALTREF_FRAME],
- &denoiser->running_avg_y[INTRA_FRAME]);
- }
- if (refresh_golden_frame) {
- swap_frame_buffer(&denoiser->running_avg_y[GOLDEN_FRAME],
- &denoiser->running_avg_y[INTRA_FRAME]);
- }
- if (refresh_last_frame) {
- swap_frame_buffer(&denoiser->running_avg_y[LAST_FRAME],
- &denoiser->running_avg_y[INTRA_FRAME]);
+ // If more than one refresh occurs, must copy frame buffer.
+ if ((refresh_alt_ref_frame + refresh_golden_frame + refresh_last_frame)
+ > 1) {
+ if (refresh_alt_ref_frame) {
+ copy_frame(&denoiser->running_avg_y[ALTREF_FRAME],
+ &denoiser->running_avg_y[INTRA_FRAME]);
+ }
+ if (refresh_golden_frame) {
+ copy_frame(&denoiser->running_avg_y[GOLDEN_FRAME],
+ &denoiser->running_avg_y[INTRA_FRAME]);
+ }
+ if (refresh_last_frame) {
+ copy_frame(&denoiser->running_avg_y[LAST_FRAME],
+ &denoiser->running_avg_y[INTRA_FRAME]);
+ }
+ } else {
+ if (refresh_alt_ref_frame) {
+ swap_frame_buffer(&denoiser->running_avg_y[ALTREF_FRAME],
+ &denoiser->running_avg_y[INTRA_FRAME]);
+ }
+ if (refresh_golden_frame) {
+ swap_frame_buffer(&denoiser->running_avg_y[GOLDEN_FRAME],
+ &denoiser->running_avg_y[INTRA_FRAME]);
+ }
+ if (refresh_last_frame) {
+ swap_frame_buffer(&denoiser->running_avg_y[LAST_FRAME],
+ &denoiser->running_avg_y[INTRA_FRAME]);
+ }
}
}
@@ -456,15 +498,43 @@
vp9_denoiser_free(denoiser);
return 1;
}
+
+ fail = vpx_alloc_frame_buffer(&denoiser->last_source, width, height,
+ ssx, ssy,
+#if CONFIG_VP9_HIGHBITDEPTH
+ use_highbitdepth,
+#endif
+ border, legacy_byte_alignment);
+ if (fail) {
+ vp9_denoiser_free(denoiser);
+ return 1;
+ }
#ifdef OUTPUT_YUV_DENOISED
make_grayscale(&denoiser->running_avg_y[i]);
#endif
denoiser->increase_denoising = 0;
denoiser->frame_buffer_initialized = 1;
-
+ vp9_denoiser_init_noise_estimate(denoiser, width, height);
return 0;
}
+void vp9_denoiser_init_noise_estimate(VP9_DENOISER *denoiser,
+ int width,
+ int height) {
+ // Denoiser is off by default, i.e., no denoising is performed.
+ // Noise level is measured periodically, and if observed to be above
+ // thresh_noise_estimate, then denoising is performed, i.e., denoising_on = 1.
+ denoiser->denoising_on = 0;
+ denoiser->noise_estimate = 0;
+ denoiser->noise_estimate_count = 0;
+ denoiser->thresh_noise_estimate = 20;
+ if (width * height >= 1920 * 1080) {
+ denoiser->thresh_noise_estimate = 70;
+ } else if (width * height >= 1280 * 720) {
+ denoiser->thresh_noise_estimate = 40;
+ }
+}
+
void vp9_denoiser_free(VP9_DENOISER *denoiser) {
int i;
denoiser->frame_buffer_initialized = 0;
@@ -475,6 +545,120 @@
vpx_free_frame_buffer(&denoiser->running_avg_y[i]);
}
vpx_free_frame_buffer(&denoiser->mc_running_avg_y);
+ vpx_free_frame_buffer(&denoiser->last_source);
+}
+
+void vp9_denoiser_update_noise_estimate(VP9_COMP *const cpi) {
+ const VP9_COMMON *const cm = &cpi->common;
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ int frame_period = 10;
+ int thresh_consec_zeromv = 8;
+ unsigned int thresh_sum_diff = 128;
+ int num_frames_estimate = 20;
+ int min_blocks_estimate = cm->mi_rows * cm->mi_cols >> 7;
+ // Estimate of noise level every frame_period frames.
+ // Estimate is between current source and last source.
+ if (cm->current_video_frame % frame_period != 0 ||
+ cpi->denoiser.last_source.y_buffer == NULL) {
+ copy_frame(&cpi->denoiser.last_source, cpi->Source);
+ return;
+ } else {
+ int num_samples = 0;
+ uint64_t avg_est = 0;
+ int bsize = BLOCK_16X16;
+ static const unsigned char const_source[16] = {
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128};
+ // Loop over sub-sample of 16x16 blocks of frame, and for blocks that have
+ // been encoded as zero/small mv at least x consecutive frames, compute
+ // the variance to update estimate of noise in the source.
+ const uint8_t *src_y = cpi->Source->y_buffer;
+ const int src_ystride = cpi->Source->y_stride;
+ const uint8_t *last_src_y = cpi->denoiser.last_source.y_buffer;
+ const int last_src_ystride = cpi->denoiser.last_source.y_stride;
+ const uint8_t *src_u = cpi->Source->u_buffer;
+ const uint8_t *src_v = cpi->Source->v_buffer;
+ const int src_uvstride = cpi->Source->uv_stride;
+ const int y_width_shift = (4 << b_width_log2_lookup[bsize]) >> 1;
+ const int y_height_shift = (4 << b_height_log2_lookup[bsize]) >> 1;
+ const int uv_width_shift = y_width_shift >> 1;
+ const int uv_height_shift = y_height_shift >> 1;
+ int mi_row, mi_col;
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row ++) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col ++) {
+ // 16x16 blocks, 1/4 sample of frame.
+ if (mi_row % 4 == 0 && mi_col % 4 == 0) {
+ int bl_index = mi_row * cm->mi_cols + mi_col;
+ int bl_index1 = bl_index + 1;
+ int bl_index2 = bl_index + cm->mi_cols;
+ int bl_index3 = bl_index2 + 1;
+ // Only consider blocks that are likely steady background. i.e, have
+ // been encoded as zero/low motion x (= thresh_consec_zeromv) frames
+ // in a row. consec_zero_mv[] defined for 8x8 blocks, so consider all
+ // 4 sub-blocks for 16x16 block. Also, avoid skin blocks.
+ const uint8_t ysource =
+ src_y[y_height_shift * src_ystride + y_width_shift];
+ const uint8_t usource =
+ src_u[uv_height_shift * src_uvstride + uv_width_shift];
+ const uint8_t vsource =
+ src_v[uv_height_shift * src_uvstride + uv_width_shift];
+ int is_skin = vp9_skin_pixel(ysource, usource, vsource);
+ if (cr->consec_zero_mv[bl_index] > thresh_consec_zeromv &&
+ cr->consec_zero_mv[bl_index1] > thresh_consec_zeromv &&
+ cr->consec_zero_mv[bl_index2] > thresh_consec_zeromv &&
+ cr->consec_zero_mv[bl_index3] > thresh_consec_zeromv &&
+ !is_skin) {
+ // Compute variance.
+ unsigned int sse;
+ unsigned int variance = cpi->fn_ptr[bsize].vf(src_y,
+ src_ystride,
+ last_src_y,
+ last_src_ystride,
+ &sse);
+ // Only consider this block as valid for noise measurement if the
+ // average term (sse - variance = N * avg^{2}, N = 16X16) of the
+ // temporal residual is small (avoid effects from lighting change).
+ if ((sse - variance) < thresh_sum_diff) {
+ unsigned int sse2;
+ const unsigned int spatial_variance =
+ cpi->fn_ptr[bsize].vf(src_y, src_ystride, const_source,
+ 0, &sse2);
+ avg_est += variance / (10 + spatial_variance);
+ num_samples++;
+ }
+ }
+ }
+ src_y += 8;
+ last_src_y += 8;
+ src_u += 4;
+ src_v += 4;
+ }
+ src_y += (src_ystride << 3) - (cm->mi_cols << 3);
+ last_src_y += (last_src_ystride << 3) - (cm->mi_cols << 3);
+ src_u += (src_uvstride << 2) - (cm->mi_cols << 2);
+ src_v += (src_uvstride << 2) - (cm->mi_cols << 2);
+ }
+ // Update noise estimate if we have at a minimum number of block samples,
+ // and avg_est > 0 (avg_est == 0 can happen if the application inputs
+ // duplicate frames).
+ if (num_samples > min_blocks_estimate && avg_est > 0) {
+ // Normalize.
+ avg_est = (avg_est << 8) / num_samples;
+ // Update noise estimate.
+ cpi->denoiser.noise_estimate = (3 * cpi->denoiser.noise_estimate +
+ avg_est) >> 2;
+ cpi->denoiser.noise_estimate_count++;
+ if (cpi->denoiser.noise_estimate_count == num_frames_estimate) {
+ // Reset counter and check noise level condition.
+ cpi->denoiser.noise_estimate_count = 0;
+ if (cpi->denoiser.noise_estimate > cpi->denoiser.thresh_noise_estimate)
+ cpi->denoiser.denoising_on = 1;
+ else
+ cpi->denoiser.denoising_on = 0;
+ }
+ }
+ }
+ copy_frame(&cpi->denoiser.last_source, cpi->Source);
}
#ifdef OUTPUT_YUV_DENOISED
diff --git a/libvpx/vp9/encoder/vp9_denoiser.h b/libvpx/vp9/encoder/vp9_denoiser.h
index b2af792..f8ad4ac 100644
--- a/libvpx/vp9/encoder/vp9_denoiser.h
+++ b/libvpx/vp9/encoder/vp9_denoiser.h
@@ -12,6 +12,7 @@
#define VP9_ENCODER_DENOISER_H_
#include "vp9/encoder/vp9_block.h"
+#include "vp9/encoder/vp9_skin_detection.h"
#include "vpx_scale/yv12config.h"
#ifdef __cplusplus
@@ -28,16 +29,24 @@
typedef struct vp9_denoiser {
YV12_BUFFER_CONFIG running_avg_y[MAX_REF_FRAMES];
YV12_BUFFER_CONFIG mc_running_avg_y;
+ YV12_BUFFER_CONFIG last_source;
int increase_denoising;
int frame_buffer_initialized;
+ int denoising_on;
+ int noise_estimate;
+ int thresh_noise_estimate;
+ int noise_estimate_count;
} VP9_DENOISER;
+struct VP9_COMP;
+
void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
YV12_BUFFER_CONFIG src,
FRAME_TYPE frame_type,
int refresh_alt_ref_frame,
int refresh_golden_frame,
- int refresh_last_frame);
+ int refresh_last_frame,
+ int resized);
void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
int mi_row, int mi_col, BLOCK_SIZE bs,
@@ -67,6 +76,12 @@
void vp9_denoiser_free(VP9_DENOISER *denoiser);
+void vp9_denoiser_init_noise_estimate(VP9_DENOISER *denoiser,
+ int width,
+ int height);
+
+void vp9_denoiser_update_noise_estimate(struct VP9_COMP *const cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/libvpx/vp9/encoder/vp9_encodeframe.c b/libvpx/vp9/encoder/vp9_encodeframe.c
index 295a751..2333a13 100644
--- a/libvpx/vp9/encoder/vp9_encodeframe.c
+++ b/libvpx/vp9/encoder/vp9_encodeframe.c
@@ -16,6 +16,7 @@
#include "./vpx_dsp_rtcd.h"
#include "./vpx_config.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/vpx_timer.h"
#include "vpx_ports/system_state.h"
@@ -979,8 +980,8 @@
const struct segmentation *const seg = &cm->seg;
const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type];
const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type];
- const int x_mis = MIN(bw, cm->mi_cols - mi_col);
- const int y_mis = MIN(bh, cm->mi_rows - mi_row);
+ const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
+ const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
MV_REF *const frame_mvs =
cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
int w, h;
@@ -1132,8 +1133,8 @@
mbmi->sb_type = bsize;
mbmi->mode = ZEROMV;
- mbmi->tx_size = MIN(max_txsize_lookup[bsize],
- tx_mode_to_biggest_tx_size[tx_mode]);
+ mbmi->tx_size =
+ VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[tx_mode]);
mbmi->skip = 1;
mbmi->uv_mode = DC_PRED;
mbmi->ref_frame[0] = LAST_FRAME;
@@ -1496,7 +1497,7 @@
int rows_left, int cols_left,
int *bh, int *bw) {
if (rows_left <= 0 || cols_left <= 0) {
- return MIN(bsize, BLOCK_8X8);
+ return VPXMIN(bsize, BLOCK_8X8);
} else {
for (; bsize > 0; bsize -= 3) {
*bh = num_8x8_blocks_high_lookup[bsize];
@@ -1672,8 +1673,8 @@
const struct segmentation *const seg = &cm->seg;
const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type];
const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type];
- const int x_mis = MIN(bw, cm->mi_cols - mi_col);
- const int y_mis = MIN(bh, cm->mi_rows - mi_row);
+ const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
+ const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
*(xd->mi[0]) = ctx->mic;
*(x->mbmi_ext) = ctx->mbmi_ext;
@@ -1738,10 +1739,12 @@
update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize);
#if CONFIG_VP9_TEMPORAL_DENOISING
- if (cpi->oxcf.noise_sensitivity > 0 && output_enabled &&
- cpi->common.frame_type != KEY_FRAME) {
+ if (cpi->oxcf.noise_sensitivity > 0 &&
+ output_enabled &&
+ cpi->common.frame_type != KEY_FRAME &&
+ cpi->resize_pending == 0) {
vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col,
- MAX(BLOCK_8X8, bsize), ctx);
+ VPXMAX(BLOCK_8X8, bsize), ctx);
}
#endif
@@ -2133,8 +2136,8 @@
MODE_INFO *mi = mi_8x8[index+j];
BLOCK_SIZE sb_type = mi ? mi->mbmi.sb_type : 0;
bs_hist[sb_type]++;
- *min_block_size = MIN(*min_block_size, sb_type);
- *max_block_size = MAX(*max_block_size, sb_type);
+ *min_block_size = VPXMIN(*min_block_size, sb_type);
+ *max_block_size = VPXMAX(*max_block_size, sb_type);
}
index += xd->mi_stride;
}
@@ -2211,8 +2214,8 @@
if (vp9_active_edge_sb(cpi, mi_row, mi_col)) {
min_size = BLOCK_4X4;
} else {
- min_size = MIN(cpi->sf.rd_auto_partition_min_limit,
- MIN(min_size, max_size));
+ min_size =
+ VPXMIN(cpi->sf.rd_auto_partition_min_limit, VPXMIN(min_size, max_size));
}
// When use_square_partition_only is true, make sure at least one square
@@ -2248,8 +2251,8 @@
for (idx = 0; idx < mi_width; ++idx) {
mi = prev_mi[idy * cm->mi_stride + idx];
bs = mi ? mi->mbmi.sb_type : bsize;
- min_size = MIN(min_size, bs);
- max_size = MAX(max_size, bs);
+ min_size = VPXMIN(min_size, bs);
+ max_size = VPXMAX(max_size, bs);
}
}
}
@@ -2258,8 +2261,8 @@
for (idy = 0; idy < mi_height; ++idy) {
mi = xd->mi[idy * cm->mi_stride - 1];
bs = mi ? mi->mbmi.sb_type : bsize;
- min_size = MIN(min_size, bs);
- max_size = MAX(max_size, bs);
+ min_size = VPXMIN(min_size, bs);
+ max_size = VPXMAX(max_size, bs);
}
}
@@ -2267,8 +2270,8 @@
for (idx = 0; idx < mi_width; ++idx) {
mi = xd->mi[idx - cm->mi_stride];
bs = mi ? mi->mbmi.sb_type : bsize;
- min_size = MIN(min_size, bs);
- max_size = MAX(max_size, bs);
+ min_size = VPXMIN(min_size, bs);
+ max_size = VPXMAX(max_size, bs);
}
}
@@ -2376,11 +2379,20 @@
bsize >= BLOCK_8X8;
int partition_vert_allowed = !force_horz_split && xss <= yss &&
bsize >= BLOCK_8X8;
- (void) *tp_orig;
+
+ int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_dist_thr;
+ int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr;
+
+ (void)*tp_orig;
assert(num_8x8_blocks_wide_lookup[bsize] ==
num_8x8_blocks_high_lookup[bsize]);
+ // Adjust dist breakout threshold according to the partition size.
+ dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] +
+ b_height_log2_lookup[bsize]);
+ rate_breakout_thr *= num_pels_log2_lookup[bsize];
+
vp9_rd_cost_init(&this_rdc);
vp9_rd_cost_init(&sum_rdc);
vp9_rd_cost_reset(&best_rdc);
@@ -2409,9 +2421,11 @@
force_vert_split);
do_split &= bsize > min_size;
}
- if (cpi->sf.use_square_partition_only) {
- partition_horz_allowed &= force_horz_split;
- partition_vert_allowed &= force_vert_split;
+
+ if (cpi->sf.use_square_partition_only &&
+ bsize > cpi->sf.use_square_only_threshold) {
+ partition_horz_allowed &= force_horz_split;
+ partition_vert_allowed &= force_vert_split;
}
save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
@@ -2433,9 +2447,9 @@
int mb_row = mi_row >> 1;
int mb_col = mi_col >> 1;
int mb_row_end =
- MIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
+ VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
int mb_col_end =
- MIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
+ VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
int r, c;
// compute a complexity measure, basically measure inconsistency of motion
@@ -2488,27 +2502,17 @@
}
if (this_rdc.rdcost < best_rdc.rdcost) {
- int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_dist_thr;
- int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr;
-
best_rdc = this_rdc;
if (bsize >= BLOCK_8X8)
pc_tree->partitioning = PARTITION_NONE;
- // Adjust dist breakout threshold according to the partition size.
- dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] +
- b_height_log2_lookup[bsize]);
-
- rate_breakout_thr *= num_pels_log2_lookup[bsize];
-
// If all y, u, v transform blocks in this partition are skippable, and
// the dist & rate are within the thresholds, the partition search is
// terminated for current branch of the partition search tree.
- // The dist & rate thresholds are set to 0 at speed 0 to disable the
- // early termination at that speed.
- if (!x->e_mbd.lossless &&
- (ctx->skippable && best_rdc.dist < dist_breakout_thr &&
- best_rdc.rate < rate_breakout_thr)) {
+ if (!x->e_mbd.lossless && ctx->skippable &&
+ ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
+ (best_rdc.dist < dist_breakout_thr &&
+ best_rdc.rate < rate_breakout_thr))) {
do_split = 0;
do_rect = 0;
}
@@ -2524,9 +2528,9 @@
int mb_row = mi_row >> 1;
int mb_col = mi_col >> 1;
int mb_row_end =
- MIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
+ VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
int mb_col_end =
- MIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
+ VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
int r, c;
int skip = 1;
@@ -2618,11 +2622,21 @@
if (sum_rdc.rdcost < best_rdc.rdcost) {
best_rdc = sum_rdc;
pc_tree->partitioning = PARTITION_SPLIT;
+
+ // Rate and distortion based partition search termination clause.
+ if (!x->e_mbd.lossless &&
+ ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
+ (best_rdc.dist < dist_breakout_thr &&
+ best_rdc.rate < rate_breakout_thr))) {
+ do_rect = 0;
+ }
}
} else {
// skip rectangular partition test when larger block size
// gives better rd cost
- if (cpi->sf.less_rectangular_check)
+ if ((cpi->sf.less_rectangular_check) &&
+ ((bsize > cpi->sf.use_square_only_threshold) ||
+ (best_rdc.dist < dist_breakout_thr)))
do_rect &= !partition_none_allowed;
}
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
@@ -2631,7 +2645,7 @@
// PARTITION_HORZ
if (partition_horz_allowed &&
(do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) {
- subsize = get_subsize(bsize, PARTITION_HORZ);
+ subsize = get_subsize(bsize, PARTITION_HORZ);
if (cpi->sf.adaptive_motion_search)
load_pred_mv(x, ctx);
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
@@ -2672,6 +2686,10 @@
if (sum_rdc.rdcost < best_rdc.rdcost) {
best_rdc = sum_rdc;
pc_tree->partitioning = PARTITION_HORZ;
+
+ if ((cpi->sf.less_rectangular_check) &&
+ (bsize > cpi->sf.use_square_only_threshold))
+ do_rect = 0;
}
}
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
@@ -2679,7 +2697,7 @@
// PARTITION_VERT
if (partition_vert_allowed &&
(do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) {
- subsize = get_subsize(bsize, PARTITION_VERT);
+ subsize = get_subsize(bsize, PARTITION_VERT);
if (cpi->sf.adaptive_motion_search)
load_pred_mv(x, ctx);
@@ -2733,7 +2751,6 @@
(void) best_rd;
*rd_cost = best_rdc;
-
if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
pc_tree->index != 3) {
int output_enabled = (bsize == BLOCK_64X64);
@@ -3646,7 +3663,7 @@
const int last_stride = cpi->Last_Source->y_stride;
// Pick cutoff threshold
- const int cutoff = (MIN(cm->width, cm->height) >= 720) ?
+ const int cutoff = (VPXMIN(cm->width, cm->height) >= 720) ?
(cm->MBs * VAR_HIST_LARGE_CUT_OFF / 100) :
(cm->MBs * VAR_HIST_SMALL_CUT_OFF / 100);
DECLARE_ALIGNED(16, int, hist[VAR_HIST_BINS]);
@@ -3947,7 +3964,7 @@
#endif
// If allowed, encoding tiles in parallel with one thread handling one tile.
- if (MIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1)
+ if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1)
vp9_encode_tiles_mt(cpi);
else
encode_tiles(cpi);
@@ -4162,10 +4179,10 @@
int plane;
mbmi->skip = 1;
for (plane = 0; plane < MAX_MB_PLANE; ++plane)
- vp9_encode_intra_block_plane(x, MAX(bsize, BLOCK_8X8), plane);
+ vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane);
if (output_enabled)
sum_intra_stats(td->counts, mi);
- vp9_tokenize_sb(cpi, td, t, !output_enabled, MAX(bsize, BLOCK_8X8));
+ vp9_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8));
} else {
int ref;
const int is_compound = has_second_ref(mbmi);
@@ -4178,12 +4195,14 @@
&xd->block_refs[ref]->sf);
}
if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip)
- vp9_build_inter_predictors_sby(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col,
+ VPXMAX(bsize, BLOCK_8X8));
- vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
+ vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col,
+ VPXMAX(bsize, BLOCK_8X8));
- vp9_encode_sb(x, MAX(bsize, BLOCK_8X8));
- vp9_tokenize_sb(cpi, td, t, !output_enabled, MAX(bsize, BLOCK_8X8));
+ vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8));
+ vp9_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8));
}
if (output_enabled) {
@@ -4197,8 +4216,8 @@
TX_SIZE tx_size;
// The new intra coding scheme requires no change of transform size
if (is_inter_block(&mi->mbmi)) {
- tx_size = MIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
- max_txsize_lookup[bsize]);
+ tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
+ max_txsize_lookup[bsize]);
} else {
tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4;
}
@@ -4210,5 +4229,7 @@
}
++td->counts->tx.tx_totals[mbmi->tx_size];
++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])];
+ if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+ vp9_cyclic_refresh_update_sb_postencode(cpi, mbmi, mi_row, mi_col, bsize);
}
}
diff --git a/libvpx/vp9/encoder/vp9_encodemb.c b/libvpx/vp9/encoder/vp9_encodemb.c
index 00e4c61..3c6a928 100644
--- a/libvpx/vp9/encoder/vp9_encodemb.c
+++ b/libvpx/vp9/encoder/vp9_encodemb.c
@@ -99,7 +99,7 @@
tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
const int eob = p->eobs[block];
- const PLANE_TYPE type = pd->plane_type;
+ const PLANE_TYPE type = get_plane_type(plane);
const int default_eob = 16 << (tx_size << 1);
const int mul = 1 + (tx_size == TX_32X32);
const int16_t *dequant_ptr = pd->dequant;
@@ -789,7 +789,7 @@
src_diff = &p->src_diff[4 * (j * diff_stride + i)];
if (tx_size == TX_4X4) {
- tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
+ tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block);
scan_order = &vp9_scan_orders[TX_4X4][tx_type];
mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
} else {
@@ -797,7 +797,7 @@
if (tx_size == TX_32X32) {
scan_order = &vp9_default_scan_orders[TX_32X32];
} else {
- tx_type = get_tx_type(pd->plane_type, xd);
+ tx_type = get_tx_type(get_plane_type(plane), xd);
scan_order = &vp9_scan_orders[tx_size][tx_type];
}
}
diff --git a/libvpx/vp9/encoder/vp9_encodemv.c b/libvpx/vp9/encoder/vp9_encodemv.c
index 7848c93..e719663 100644
--- a/libvpx/vp9/encoder/vp9_encodemv.c
+++ b/libvpx/vp9/encoder/vp9_encodemv.c
@@ -16,6 +16,8 @@
#include "vp9/encoder/vp9_cost.h"
#include "vp9/encoder/vp9_encodemv.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
static struct vp9_token mv_joint_encodings[MV_JOINTS];
static struct vp9_token mv_class_encodings[MV_CLASSES];
static struct vp9_token mv_fp_encodings[MV_FP_SIZE];
@@ -216,8 +218,8 @@
// If auto_mv_step_size is enabled then keep track of the largest
// motion vector component used.
if (cpi->sf.mv.auto_mv_step_size) {
- unsigned int maxv = MAX(abs(mv->row), abs(mv->col)) >> 3;
- cpi->max_mv_magnitude = MAX(maxv, cpi->max_mv_magnitude);
+ unsigned int maxv = VPXMAX(abs(mv->row), abs(mv->col)) >> 3;
+ cpi->max_mv_magnitude = VPXMAX(maxv, cpi->max_mv_magnitude);
}
}
diff --git a/libvpx/vp9/encoder/vp9_encoder.c b/libvpx/vp9/encoder/vp9_encoder.c
index 4654d63..72eafec 100644
--- a/libvpx/vp9/encoder/vp9_encoder.c
+++ b/libvpx/vp9/encoder/vp9_encoder.c
@@ -17,6 +17,7 @@
#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "vpx/internal/vpx_psnr.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_dsp/vpx_filter.h"
#if CONFIG_INTERNAL_STATS
#include "vpx_dsp/ssim.h"
@@ -411,6 +412,8 @@
vpx_free_frame_buffer(&cpi->svc.empty_frame.img);
memset(&cpi->svc.empty_frame, 0, sizeof(cpi->svc.empty_frame));
+
+ vp9_free_svc_cyclic_refresh(cpi);
}
static void save_coding_context(VP9_COMP *cpi) {
@@ -686,7 +689,7 @@
return 0;
}
-void vp9_alloc_compressor_data(VP9_COMP *cpi) {
+static void alloc_compressor_data(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
vp9_alloc_context_buffers(cm, cm->width, cm->height);
@@ -772,10 +775,11 @@
cm->use_highbitdepth = oxcf->use_highbitdepth;
#endif
cm->color_space = oxcf->color_space;
+ cm->color_range = oxcf->color_range;
cm->width = oxcf->width;
cm->height = oxcf->height;
- vp9_alloc_compressor_data(cpi);
+ alloc_compressor_data(cpi);
cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode;
@@ -1452,11 +1456,14 @@
void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
+ int last_w = cpi->oxcf.width;
+ int last_h = cpi->oxcf.height;
if (cm->profile != oxcf->profile)
cm->profile = oxcf->profile;
cm->bit_depth = oxcf->bit_depth;
cm->color_space = oxcf->color_space;
+ cm->color_range = oxcf->color_range;
if (cm->profile <= PROFILE_1)
assert(cm->bit_depth == VPX_BITS_8);
@@ -1490,8 +1497,8 @@
// Under a configuration change, where maximum_buffer_size may change,
// keep buffer level clipped to the maximum allowed buffer size.
- rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size);
- rc->buffer_level = MIN(rc->buffer_level, rc->maximum_buffer_size);
+ rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
+ rc->buffer_level = VPXMIN(rc->buffer_level, rc->maximum_buffer_size);
// Set up frame rate and related parameters rate control values.
vp9_new_framerate(cpi, cpi->framerate);
@@ -1502,15 +1509,25 @@
cm->interp_filter = cpi->sf.default_interp_filter;
- cm->display_width = cpi->oxcf.width;
- cm->display_height = cpi->oxcf.height;
- cm->width = cpi->oxcf.width;
- cm->height = cpi->oxcf.height;
+ if (cpi->oxcf.render_width > 0 && cpi->oxcf.render_height > 0) {
+ cm->render_width = cpi->oxcf.render_width;
+ cm->render_height = cpi->oxcf.render_height;
+ } else {
+ cm->render_width = cpi->oxcf.width;
+ cm->render_height = cpi->oxcf.height;
+ }
+ if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) {
+ cm->width = cpi->oxcf.width;
+ cm->height = cpi->oxcf.height;
+ }
if (cpi->initial_width) {
- if (cm->width > cpi->initial_width || cm->height > cpi->initial_height) {
+ int new_mi_size = 0;
+ vp9_set_mb_mi(cm, cm->width, cm->height);
+ new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows);
+ if (cm->mi_alloc_size < new_mi_size) {
vp9_free_context_buffers(cm);
- vp9_alloc_compressor_data(cpi);
+ alloc_compressor_data(cpi);
realloc_segmentation_maps(cpi);
cpi->initial_width = cpi->initial_height = 0;
}
@@ -1918,14 +1935,15 @@
snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V))
void vp9_remove_compressor(VP9_COMP *cpi) {
- VP9_COMMON *const cm = &cpi->common;
+ VP9_COMMON *cm;
unsigned int i;
int t;
if (!cpi)
return;
- if (cpi && (cm->current_video_frame > 0)) {
+ cm = &cpi->common;
+ if (cm->current_video_frame > 0) {
#if CONFIG_INTERNAL_STATS
vpx_clear_system_state();
@@ -2247,42 +2265,6 @@
uint32_t samples[4]; // total/y/u/v
} PSNR_STATS;
-static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
- PSNR_STATS *psnr) {
- static const double peak = 255.0;
- const int widths[3] = {
- a->y_crop_width, a->uv_crop_width, a->uv_crop_width};
- const int heights[3] = {
- a->y_crop_height, a->uv_crop_height, a->uv_crop_height};
- const uint8_t *a_planes[3] = {a->y_buffer, a->u_buffer, a->v_buffer};
- const int a_strides[3] = {a->y_stride, a->uv_stride, a->uv_stride};
- const uint8_t *b_planes[3] = {b->y_buffer, b->u_buffer, b->v_buffer};
- const int b_strides[3] = {b->y_stride, b->uv_stride, b->uv_stride};
- int i;
- uint64_t total_sse = 0;
- uint32_t total_samples = 0;
-
- for (i = 0; i < 3; ++i) {
- const int w = widths[i];
- const int h = heights[i];
- const uint32_t samples = w * h;
- const uint64_t sse = get_sse(a_planes[i], a_strides[i],
- b_planes[i], b_strides[i],
- w, h);
- psnr->sse[1 + i] = sse;
- psnr->samples[1 + i] = samples;
- psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse);
-
- total_sse += sse;
- total_samples += samples;
- }
-
- psnr->sse[0] = total_sse;
- psnr->samples[0] = total_samples;
- psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,
- (double)total_sse);
-}
-
#if CONFIG_VP9_HIGHBITDEPTH
static void calc_highbd_psnr(const YV12_BUFFER_CONFIG *a,
const YV12_BUFFER_CONFIG *b,
@@ -2335,6 +2317,44 @@
psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,
(double)total_sse);
}
+
+#else // !CONFIG_VP9_HIGHBITDEPTH
+
+static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
+ PSNR_STATS *psnr) {
+ static const double peak = 255.0;
+ const int widths[3] = {
+ a->y_crop_width, a->uv_crop_width, a->uv_crop_width};
+ const int heights[3] = {
+ a->y_crop_height, a->uv_crop_height, a->uv_crop_height};
+ const uint8_t *a_planes[3] = {a->y_buffer, a->u_buffer, a->v_buffer};
+ const int a_strides[3] = {a->y_stride, a->uv_stride, a->uv_stride};
+ const uint8_t *b_planes[3] = {b->y_buffer, b->u_buffer, b->v_buffer};
+ const int b_strides[3] = {b->y_stride, b->uv_stride, b->uv_stride};
+ int i;
+ uint64_t total_sse = 0;
+ uint32_t total_samples = 0;
+
+ for (i = 0; i < 3; ++i) {
+ const int w = widths[i];
+ const int h = heights[i];
+ const uint32_t samples = w * h;
+ const uint64_t sse = get_sse(a_planes[i], a_strides[i],
+ b_planes[i], b_strides[i],
+ w, h);
+ psnr->sse[1 + i] = sse;
+ psnr->samples[1 + i] = samples;
+ psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse);
+
+ total_sse += sse;
+ total_samples += samples;
+ }
+
+ psnr->sse[0] = total_sse;
+ psnr->samples[0] = total_samples;
+ psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,
+ (double)total_sse);
+}
#endif // CONFIG_VP9_HIGHBITDEPTH
static void generate_psnr_packet(VP9_COMP *cpi) {
@@ -2615,7 +2635,7 @@
if (rc->frame_size_selector == UNSCALED &&
q >= rc->rf_level_maxq[gf_group->rf_level[gf_group->index]]) {
const int max_size_thresh = (int)(rate_thresh_mult[SCALE_STEP1]
- * MAX(rc->this_frame_target, rc->avg_frame_bandwidth));
+ * VPXMAX(rc->this_frame_target, rc->avg_frame_bandwidth));
scale = rc->projected_frame_size > max_size_thresh ? 1 : 0;
}
return scale;
@@ -2736,7 +2756,8 @@
cpi->common.frame_type,
cpi->refresh_alt_ref_frame,
cpi->refresh_golden_frame,
- cpi->refresh_last_frame);
+ cpi->refresh_last_frame,
+ cpi->resize_pending);
}
#endif
}
@@ -2744,6 +2765,7 @@
static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
struct loopfilter *lf = &cm->lf;
+
if (xd->lossless) {
lf->filter_level = 0;
} else {
@@ -2760,6 +2782,8 @@
}
if (lf->filter_level > 0) {
+ vp9_build_mask_frame(cm, lf->filter_level, 0);
+
if (cpi->num_workers > 1)
vp9_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane,
lf->filter_level, 0, 0,
@@ -2998,7 +3022,7 @@
static void set_mv_search_params(VP9_COMP *cpi) {
const VP9_COMMON *const cm = &cpi->common;
- const unsigned int max_mv_def = MIN(cm->width, cm->height);
+ const unsigned int max_mv_def = VPXMIN(cm->width, cm->height);
// Default based on max resolution.
cpi->mv_step_param = vp9_init_search_range(max_mv_def);
@@ -3013,8 +3037,8 @@
// Allow mv_steps to correspond to twice the max mv magnitude found
// in the previous frame, capped by the default max_mv_magnitude based
// on resolution.
- cpi->mv_step_param =
- vp9_init_search_range(MIN(max_mv_def, 2 * cpi->max_mv_magnitude));
+ cpi->mv_step_param = vp9_init_search_range(
+ VPXMIN(max_mv_def, 2 * cpi->max_mv_magnitude));
}
cpi->max_mv_magnitude = 0;
}
@@ -3076,6 +3100,21 @@
#endif // CONFIG_VP9_POSTPROC
}
+#if CONFIG_VP9_TEMPORAL_DENOISING
+static void setup_denoiser_buffer(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ if (cpi->oxcf.noise_sensitivity > 0 &&
+ !cpi->denoiser.frame_buffer_initialized) {
+ vp9_denoiser_alloc(&(cpi->denoiser), cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ VP9_ENC_BORDER_IN_PIXELS);
+ }
+}
+#endif
+
static void init_motion_estimation(VP9_COMP *cpi) {
int y_stride = cpi->scaled_source.y_stride;
@@ -3107,26 +3146,30 @@
if (oxcf->pass == 0 &&
oxcf->rc_mode == VPX_CBR &&
!cpi->use_svc &&
- oxcf->resize_mode == RESIZE_DYNAMIC) {
- if (cpi->resize_pending == 1) {
- oxcf->scaled_frame_width =
- (cm->width * cpi->resize_scale_num) / cpi->resize_scale_den;
- oxcf->scaled_frame_height =
- (cm->height * cpi->resize_scale_num) /cpi->resize_scale_den;
- } else if (cpi->resize_pending == -1) {
- // Go back up to original size.
- oxcf->scaled_frame_width = oxcf->width;
- oxcf->scaled_frame_height = oxcf->height;
- }
- if (cpi->resize_pending != 0) {
- // There has been a change in frame size.
- vp9_set_size_literal(cpi,
- oxcf->scaled_frame_width,
- oxcf->scaled_frame_height);
+ oxcf->resize_mode == RESIZE_DYNAMIC &&
+ cpi->resize_pending != 0) {
+ oxcf->scaled_frame_width =
+ (oxcf->width * cpi->resize_scale_num) / cpi->resize_scale_den;
+ oxcf->scaled_frame_height =
+ (oxcf->height * cpi->resize_scale_num) /cpi->resize_scale_den;
+ // There has been a change in frame size.
+ vp9_set_size_literal(cpi,
+ oxcf->scaled_frame_width,
+ oxcf->scaled_frame_height);
- // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
- set_mv_search_params(cpi);
- }
+ // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
+ set_mv_search_params(cpi);
+
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ // Reset the denoiser on the resized frame.
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ vp9_denoiser_free(&(cpi->denoiser));
+ setup_denoiser_buffer(cpi);
+ // Dynamic resize is only triggered for non-SVC, so we can force
+ // golden frame update here as temporary fix to denoiser.
+ cpi->refresh_golden_frame = 1;
+ }
+#endif
}
if ((oxcf->pass == 2) &&
@@ -3193,11 +3236,26 @@
cpi->Source = vp9_scale_if_required(cm,
cpi->un_scaled_source,
- &cpi->scaled_source);
- if (cpi->unscaled_last_source != NULL)
+ &cpi->scaled_source,
+ (cpi->oxcf.pass == 0));
+
+ // Avoid scaling last_source unless its needed.
+ // Last source is currently only used for screen-content mode,
+ // or if partition_search_type == SOURCE_VAR_BASED_PARTITION.
+ if (cpi->unscaled_last_source != NULL &&
+ (cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
+ cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION))
cpi->Last_Source = vp9_scale_if_required(cm,
cpi->unscaled_last_source,
- &cpi->scaled_last_source);
+ &cpi->scaled_last_source,
+ (cpi->oxcf.pass == 0));
+
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity > 0 &&
+ cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
+ vp9_denoiser_update_noise_estimate(cpi);
+ }
+#endif
if (cpi->oxcf.pass == 0 &&
cpi->oxcf.rc_mode == VPX_CBR &&
@@ -3270,6 +3328,7 @@
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
cm->frame_type != KEY_FRAME &&
!cpi->use_svc &&
+ cpi->ext_refresh_frame_flags_pending == 0 &&
(cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR))
vp9_cyclic_refresh_check_golden_update(cpi);
@@ -3328,11 +3387,13 @@
}
cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source,
- &cpi->scaled_source);
+ &cpi->scaled_source,
+ (cpi->oxcf.pass == 0));
if (cpi->unscaled_last_source != NULL)
cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
- &cpi->scaled_last_source);
+ &cpi->scaled_last_source,
+ (cpi->oxcf.pass == 0));
if (frame_is_intra_only(cm) == 0) {
if (loop_count > 0) {
@@ -3414,7 +3475,7 @@
// Adjust Q
q = (int)((q * high_err_target) / kf_err);
- q = MIN(q, (q_high + q_low) >> 1);
+ q = VPXMIN(q, (q_high + q_low) >> 1);
} else if (kf_err < low_err_target &&
rc->projected_frame_size >= frame_under_shoot_limit) {
// The key frame is much better than the previous frame
@@ -3423,7 +3484,7 @@
// Adjust Q
q = (int)((q * low_err_target) / kf_err);
- q = MIN(q, (q_high + q_low + 1) >> 1);
+ q = VPXMIN(q, (q_high + q_low + 1) >> 1);
}
// Clamp Q to upper and lower limits:
@@ -3432,7 +3493,7 @@
loop = q != last_q;
} else if (recode_loop_test(
cpi, frame_over_shoot_limit, frame_under_shoot_limit,
- q, MAX(q_high, top_index), bottom_index)) {
+ q, VPXMAX(q_high, top_index), bottom_index)) {
// Is the projected frame size out of range and are we allowed
// to attempt to recode.
int last_q = q;
@@ -3474,12 +3535,12 @@
vp9_rc_update_rate_correction_factors(cpi);
q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
- bottom_index, MAX(q_high, top_index));
+ bottom_index, VPXMAX(q_high, top_index));
while (q < q_low && retries < 10) {
vp9_rc_update_rate_correction_factors(cpi);
q = vp9_rc_regulate_q(cpi, rc->this_frame_target,
- bottom_index, MAX(q_high, top_index));
+ bottom_index, VPXMAX(q_high, top_index));
retries++;
}
}
@@ -3578,26 +3639,22 @@
cpi->refresh_last_frame = cpi->ext_refresh_last_frame;
cpi->refresh_golden_frame = cpi->ext_refresh_golden_frame;
cpi->refresh_alt_ref_frame = cpi->ext_refresh_alt_ref_frame;
- cpi->ext_refresh_frame_flags_pending = 0;
}
}
YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
YV12_BUFFER_CONFIG *unscaled,
- YV12_BUFFER_CONFIG *scaled) {
+ YV12_BUFFER_CONFIG *scaled,
+ int use_normative_scaler) {
if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
cm->mi_rows * MI_SIZE != unscaled->y_height) {
#if CONFIG_VP9_HIGHBITDEPTH
- if (unscaled->y_width == (scaled->y_width << 1) &&
- unscaled->y_height == (scaled->y_height << 1))
+ if (use_normative_scaler)
scale_and_extend_frame(unscaled, scaled, (int)cm->bit_depth);
else
scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth);
#else
- // Use the faster normative (convolve8) scaling filter: for now only for
- // scaling factor of 2.
- if (unscaled->y_width == (scaled->y_width << 1) &&
- unscaled->y_height == (scaled->y_height << 1))
+ if (use_normative_scaler)
scale_and_extend_frame(unscaled, scaled);
else
scale_and_extend_frame_nonnormative(unscaled, scaled);
@@ -3747,6 +3804,7 @@
if (vp9_rc_drop_frame(cpi)) {
vp9_rc_postencode_update_drop_frame(cpi);
++cm->current_video_frame;
+ cpi->ext_refresh_frame_flags_pending = 0;
return;
}
}
@@ -3799,6 +3857,10 @@
cpi->refresh_last_frame = 1;
cm->frame_to_show = get_frame_new_buffer(cm);
+ cm->frame_to_show->color_space = cm->color_space;
+ cm->frame_to_show->color_range = cm->color_range;
+ cm->frame_to_show->render_width = cm->render_width;
+ cm->frame_to_show->render_height = cm->render_height;
// Pick the loop filter level for the frame.
loopfilter_frame(cpi, cm);
@@ -3828,6 +3890,8 @@
}
}
+ cpi->ext_refresh_frame_flags_pending = 0;
+
if (cpi->refresh_golden_frame == 1)
cpi->frame_flags |= FRAMEFLAGS_GOLDEN;
else
@@ -3953,21 +4017,6 @@
}
}
-#if CONFIG_VP9_TEMPORAL_DENOISING
-static void setup_denoiser_buffer(VP9_COMP *cpi) {
- VP9_COMMON *const cm = &cpi->common;
- if (cpi->oxcf.noise_sensitivity > 0 &&
- !cpi->denoiser.frame_buffer_initialized) {
- vp9_denoiser_alloc(&(cpi->denoiser), cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif
- VP9_ENC_BORDER_IN_PIXELS);
- }
-}
-#endif
-
int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags,
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
int64_t end_time) {
@@ -4053,8 +4102,8 @@
// Average this frame's rate into the last second's average
// frame rate. If we haven't seen 1 second yet, then average
// over the whole interval seen.
- const double interval = MIN((double)(source->ts_end
- - cpi->first_time_stamp_ever), 10000000.0);
+ const double interval = VPXMIN(
+ (double)(source->ts_end - cpi->first_time_stamp_ever), 10000000.0);
double avg_duration = 10000000.0 / cpi->framerate;
avg_duration *= (interval - avg_duration + this_duration);
avg_duration /= interval;
@@ -4118,7 +4167,7 @@
s->stat[U] += u;
s->stat[V] += v;
s->stat[ALL] += all;
- s->worst = MIN(s->worst, all);
+ s->worst = VPXMIN(s->worst, all);
}
#endif // CONFIG_INTERNAL_STATS
@@ -4237,7 +4286,8 @@
// non-zero spatial layer, it should not be an intra picture.
// TODO(Won Kap): this needs to change if per-layer intra frame is
// allowed.
- if ((source->flags & VPX_EFLAG_FORCE_KF) && cpi->svc.spatial_layer_id) {
+ if ((source->flags & VPX_EFLAG_FORCE_KF) &&
+ cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) {
source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF);
}
@@ -4448,7 +4498,7 @@
frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);
#endif // CONFIG_VP9_HIGHBITDEPTH
- cpi->worst_ssim= MIN(cpi->worst_ssim, frame_ssim2);
+ cpi->worst_ssim = VPXMIN(cpi->worst_ssim, frame_ssim2);
cpi->summed_quality += frame_ssim2 * weight;
cpi->summed_weights += weight;
@@ -4485,7 +4535,8 @@
cpi->Source->y_buffer, cpi->Source->y_stride,
cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
cpi->Source->y_width, cpi->Source->y_height);
- cpi->worst_blockiness = MAX(cpi->worst_blockiness, frame_blockiness);
+ cpi->worst_blockiness =
+ VPXMAX(cpi->worst_blockiness, frame_blockiness);
cpi->total_blockiness += frame_blockiness;
}
}
@@ -4505,8 +4556,8 @@
double consistency = vpx_sse_to_psnr(samples, peak,
(double)cpi->total_inconsistency);
if (consistency > 0.0)
- cpi->worst_consistency = MIN(cpi->worst_consistency,
- consistency);
+ cpi->worst_consistency =
+ VPXMIN(cpi->worst_consistency, consistency);
cpi->total_inconsistency += this_inconsistency;
}
}
@@ -4618,8 +4669,10 @@
// always go to the next whole number
cm->width = (hs - 1 + cpi->oxcf.width * hr) / hs;
cm->height = (vs - 1 + cpi->oxcf.height * vr) / vs;
- assert(cm->width <= cpi->initial_width);
- assert(cm->height <= cpi->initial_height);
+ if (cm->current_video_frame) {
+ assert(cm->width <= cpi->initial_width);
+ assert(cm->height <= cpi->initial_height);
+ }
update_frame_size(cpi);
diff --git a/libvpx/vp9/encoder/vp9_encoder.h b/libvpx/vp9/encoder/vp9_encoder.h
index c10abd2..159c03a 100644
--- a/libvpx/vp9/encoder/vp9_encoder.h
+++ b/libvpx/vp9/encoder/vp9_encoder.h
@@ -238,6 +238,9 @@
int use_highbitdepth;
#endif
vpx_color_space_t color_space;
+ vpx_color_range_t color_range;
+ int render_width;
+ int render_height;
VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode;
} VP9EncoderConfig;
@@ -605,8 +608,6 @@
const YV12_BUFFER_CONFIG *b);
#endif // CONFIG_VP9_HIGHBITDEPTH
-void vp9_alloc_compressor_data(VP9_COMP *cpi);
-
void vp9_scale_references(VP9_COMP *cpi);
void vp9_update_reference_frames(VP9_COMP *cpi);
@@ -615,7 +616,8 @@
YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
YV12_BUFFER_CONFIG *unscaled,
- YV12_BUFFER_CONFIG *scaled);
+ YV12_BUFFER_CONFIG *scaled,
+ int use_normative_scaler);
void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags);
diff --git a/libvpx/vp9/encoder/vp9_ethread.c b/libvpx/vp9/encoder/vp9_ethread.c
index 00025b7..ad25712 100644
--- a/libvpx/vp9/encoder/vp9_ethread.c
+++ b/libvpx/vp9/encoder/vp9_ethread.c
@@ -11,6 +11,7 @@
#include "vp9/encoder/vp9_encodeframe.h"
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_ethread.h"
+#include "vpx_dsp/vpx_dsp_common.h"
static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
int i, j, k, l, m, n;
@@ -67,7 +68,7 @@
VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
- const int num_workers = MIN(cpi->oxcf.max_threads, tile_cols);
+ const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols);
int i;
vp9_init_tile_data(cpi);
@@ -80,7 +81,7 @@
// resolution.
if (cpi->use_svc) {
int max_tile_cols = get_max_tile_cols(cpi);
- allocated_workers = MIN(cpi->oxcf.max_threads, max_tile_cols);
+ allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols);
}
CHECK_MEM_ERROR(cm, cpi->workers,
@@ -191,7 +192,7 @@
// Accumulate counters.
if (i < cpi->num_workers - 1) {
- vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0);
+ vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0);
accumulate_rd_opt(&cpi->td, thread_data->td);
}
}
diff --git a/libvpx/vp9/encoder/vp9_ethread.h b/libvpx/vp9/encoder/vp9_ethread.h
index e87c50b..1efa4dc 100644
--- a/libvpx/vp9/encoder/vp9_ethread.h
+++ b/libvpx/vp9/encoder/vp9_ethread.h
@@ -11,6 +11,10 @@
#ifndef VP9_ENCODER_VP9_ETHREAD_H_
#define VP9_ENCODER_VP9_ETHREAD_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
struct VP9_COMP;
struct ThreadData;
@@ -22,4 +26,8 @@
void vp9_encode_tiles_mt(struct VP9_COMP *cpi);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_ENCODER_VP9_ETHREAD_H_
diff --git a/libvpx/vp9/encoder/vp9_extend.c b/libvpx/vp9/encoder/vp9_extend.c
index 0c304dc..92585b8 100644
--- a/libvpx/vp9/encoder/vp9_extend.c
+++ b/libvpx/vp9/encoder/vp9_extend.c
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
@@ -111,10 +112,12 @@
// Motion estimation may use src block variance with the block size up
// to 64x64, so the right and bottom need to be extended to 64 multiple
// or up to 16, whichever is greater.
- const int er_y = MAX(src->y_width + 16, ALIGN_POWER_OF_TWO(src->y_width, 6))
- - src->y_crop_width;
- const int eb_y = MAX(src->y_height + 16, ALIGN_POWER_OF_TWO(src->y_height, 6))
- - src->y_crop_height;
+ const int er_y =
+ VPXMAX(src->y_width + 16, ALIGN_POWER_OF_TWO(src->y_width, 6)) -
+ src->y_crop_width;
+ const int eb_y =
+ VPXMAX(src->y_height + 16, ALIGN_POWER_OF_TWO(src->y_height, 6)) -
+ src->y_crop_height;
const int uv_width_subsampling = (src->uv_width != src->y_width);
const int uv_height_subsampling = (src->uv_height != src->y_height);
const int et_uv = et_y >> uv_height_subsampling;
diff --git a/libvpx/vp9/encoder/vp9_firstpass.c b/libvpx/vp9/encoder/vp9_firstpass.c
index e0c5966..30738b5 100644
--- a/libvpx/vp9/encoder/vp9_firstpass.c
+++ b/libvpx/vp9/encoder/vp9_firstpass.c
@@ -15,6 +15,7 @@
#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/system_state.h"
@@ -381,7 +382,7 @@
// for first pass test.
static int get_search_range(const VP9_COMP *cpi) {
int sr = 0;
- const int dim = MIN(cpi->initial_width, cpi->initial_height);
+ const int dim = VPXMIN(cpi->initial_width, cpi->initial_height);
while ((dim << sr) < MAX_FULL_PEL_VAL)
++sr;
@@ -596,7 +597,7 @@
(cpi->ref_frame_flags & VP9_GOLD_FLAG) ? GOLDEN_FRAME : NONE);
cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source,
- &cpi->scaled_source);
+ &cpi->scaled_source, 0);
}
vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
@@ -1024,7 +1025,7 @@
// Exclude any image dead zone
if (image_data_start_row > 0) {
intra_skip_count =
- MAX(0, intra_skip_count - (image_data_start_row * cm->mb_cols * 2));
+ VPXMAX(0, intra_skip_count - (image_data_start_row * cm->mb_cols * 2));
}
{
@@ -1161,7 +1162,7 @@
// Adjustment based on actual quantizer to power term.
const double power_term =
- MIN(vp9_convert_qindex_to_q(q, bit_depth) * 0.01 + pt_low, pt_high);
+ VPXMIN(vp9_convert_qindex_to_q(q, bit_depth) * 0.01 + pt_low, pt_high);
// Calculate correction factor.
if (power_term < 1.0)
@@ -1182,19 +1183,22 @@
double group_weight_factor) {
const RATE_CONTROL *const rc = &cpi->rc;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+ // Clamp the target rate to VBR min / max limts.
+ const int target_rate =
+ vp9_rc_clamp_pframe_target_size(cpi, section_target_bandwidth);
inactive_zone = fclamp(inactive_zone, 0.0, 1.0);
- if (section_target_bandwidth <= 0) {
+ if (target_rate <= 0) {
return rc->worst_quality; // Highest value allowed
} else {
const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
? cpi->initial_mbs : cpi->common.MBs;
- const int active_mbs = MAX(1, num_mbs - (int)(num_mbs * inactive_zone));
+ const int active_mbs = VPXMAX(1, num_mbs - (int)(num_mbs * inactive_zone));
const double av_err_per_mb = section_err / active_mbs;
const double speed_term = 1.0 + 0.04 * oxcf->speed;
const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR;
- const int target_norm_bits_per_mb = ((uint64_t)section_target_bandwidth <<
+ const int target_norm_bits_per_mb = ((uint64_t)target_rate <<
BPER_MB_NORMBITS) / active_mbs;
int q;
@@ -1223,7 +1227,7 @@
// Restriction on active max q for constrained quality mode.
if (cpi->oxcf.rc_mode == VPX_CQ)
- q = MAX(q, oxcf->cq_level);
+ q = VPXMAX(q, oxcf->cq_level);
return q;
}
}
@@ -1233,11 +1237,11 @@
RATE_CONTROL *const rc = &cpi->rc;
for (i = INTER_NORMAL; i < RATE_FACTOR_LEVELS; ++i) {
int qdelta = vp9_frame_type_qdelta(cpi, i, rc->worst_quality);
- rc->rf_level_maxq[i] = MAX(rc->worst_quality + qdelta, rc->best_quality);
+ rc->rf_level_maxq[i] = VPXMAX(rc->worst_quality + qdelta, rc->best_quality);
}
}
-void vp9_init_subsampling(VP9_COMP *cpi) {
+static void init_subsampling(VP9_COMP *cpi) {
const VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
const int w = cm->width;
@@ -1332,7 +1336,7 @@
twopass->last_kfgroup_zeromotion_pct = 100;
if (oxcf->resize_mode != RESIZE_NONE) {
- vp9_init_subsampling(cpi);
+ init_subsampling(cpi);
}
}
@@ -1364,12 +1368,12 @@
if ((sr_diff > LOW_SR_DIFF_TRHESH)) {
- sr_diff = MIN(sr_diff, SR_DIFF_MAX);
+ sr_diff = VPXMIN(sr_diff, SR_DIFF_MAX);
sr_decay = 1.0 - (SR_DIFF_PART * sr_diff) -
(MOTION_AMP_PART * motion_amplitude_factor) -
(INTRA_PART * modified_pcnt_intra);
}
- return MAX(sr_decay, MIN(DEFAULT_DECAY_LIMIT, modified_pct_inter));
+ return VPXMAX(sr_decay, VPXMIN(DEFAULT_DECAY_LIMIT, modified_pct_inter));
}
// This function gives an estimate of how badly we believe the prediction
@@ -1379,7 +1383,7 @@
const double zero_motion_pct = frame->pcnt_inter -
frame->pcnt_motion;
double sr_decay = get_sr_decay_rate(cpi, frame);
- return MIN(sr_decay, zero_motion_pct);
+ return VPXMIN(sr_decay, zero_motion_pct);
}
#define ZM_POWER_FACTOR 0.75
@@ -1391,8 +1395,8 @@
(0.95 * pow((next_frame->pcnt_inter - next_frame->pcnt_motion),
ZM_POWER_FACTOR));
- return MAX(zero_motion_factor,
- (sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor)));
+ return VPXMAX(zero_motion_factor,
+ (sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor)));
}
// Function to test for a condition where a complex transition is followed
@@ -1483,12 +1487,12 @@
const double lq =
vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME],
cpi->common.bit_depth);
- const double boost_q_correction = MIN((0.5 + (lq * 0.015)), 1.5);
+ const double boost_q_correction = VPXMIN((0.5 + (lq * 0.015)), 1.5);
int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
? cpi->initial_mbs : cpi->common.MBs;
// Correct for any inactive region in the image
- num_mbs = (int)MAX(1, num_mbs * calculate_active_area(cpi, this_frame));
+ num_mbs = (int)VPXMAX(1, num_mbs * calculate_active_area(cpi, this_frame));
// Underlying boost factor is based on inter error ratio.
frame_boost = (BASELINE_ERR_PER_MB * num_mbs) /
@@ -1504,7 +1508,7 @@
else
frame_boost += frame_boost * (this_frame_mv_in_out / 2.0);
- return MIN(frame_boost, max_boost * boost_q_correction);
+ return VPXMIN(frame_boost, max_boost * boost_q_correction);
}
static int calc_arf_boost(VP9_COMP *cpi, int offset,
@@ -1593,7 +1597,7 @@
arf_boost = (*f_boost + *b_boost);
if (arf_boost < ((b_frames + f_frames) * 20))
arf_boost = ((b_frames + f_frames) * 20);
- arf_boost = MAX(arf_boost, MIN_ARF_GF_BOOST);
+ arf_boost = VPXMAX(arf_boost, MIN_ARF_GF_BOOST);
return arf_boost;
}
@@ -1664,7 +1668,8 @@
}
// Calculate the number of extra bits for use in the boosted frame or frames.
- return MAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks), 0);
+ return VPXMAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks),
+ 0);
}
// Current limit on maximum number of active arfs in a GF/ARF group.
@@ -1803,7 +1808,7 @@
gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx];
target_frame_size = clamp(target_frame_size, 0,
- MIN(max_bits, (int)total_group_bits));
+ VPXMIN(max_bits, (int)total_group_bits));
gf_group->update_type[frame_index] = LF_UPDATE;
gf_group->rf_level[frame_index] = INTER_NORMAL;
@@ -1924,7 +1929,7 @@
int int_lbq =
(int)(vp9_convert_qindex_to_q(rc->last_boosted_qindex,
cpi->common.bit_depth));
- active_min_gf_interval = rc->min_gf_interval + MIN(2, int_max_q / 200);
+ active_min_gf_interval = rc->min_gf_interval + VPXMIN(2, int_max_q / 200);
if (active_min_gf_interval > rc->max_gf_interval)
active_min_gf_interval = rc->max_gf_interval;
@@ -1935,7 +1940,7 @@
// bits to spare and are better with a smaller interval and smaller boost.
// At high Q when there are few bits to spare we are better with a longer
// interval to spread the cost of the GF.
- active_max_gf_interval = 12 + MIN(4, (int_lbq / 6));
+ active_max_gf_interval = 12 + VPXMIN(4, (int_lbq / 6));
if (active_max_gf_interval < active_min_gf_interval)
active_max_gf_interval = active_min_gf_interval;
@@ -1980,8 +1985,8 @@
decay_accumulator = decay_accumulator * loop_decay_rate;
// Monitor for static sections.
- zero_motion_accumulator =
- MIN(zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
+ zero_motion_accumulator = VPXMIN(
+ zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
// Break clause to detect very still sections after motion. For example,
// a static image after a fade or other transition.
@@ -2037,7 +2042,7 @@
(cpi->multi_arf_allowed && (rc->baseline_gf_interval >= 6) &&
(zero_motion_accumulator < 0.995)) ? 1 : 0;
} else {
- rc->gfu_boost = MAX((int)boost_score, MIN_ARF_GF_BOOST);
+ rc->gfu_boost = VPXMAX((int)boost_score, MIN_ARF_GF_BOOST);
rc->source_alt_ref_pending = 0;
}
@@ -2092,11 +2097,11 @@
// rc factor is a weight factor that corrects for local rate control drift.
double rc_factor = 1.0;
if (rc->rate_error_estimate > 0) {
- rc_factor = MAX(RC_FACTOR_MIN,
- (double)(100 - rc->rate_error_estimate) / 100.0);
+ rc_factor = VPXMAX(RC_FACTOR_MIN,
+ (double)(100 - rc->rate_error_estimate) / 100.0);
} else {
- rc_factor = MIN(RC_FACTOR_MAX,
- (double)(100 - rc->rate_error_estimate) / 100.0);
+ rc_factor = VPXMIN(RC_FACTOR_MAX,
+ (double)(100 - rc->rate_error_estimate) / 100.0);
}
tmp_q =
get_twopass_worst_quality(cpi, group_av_err,
@@ -2104,7 +2109,7 @@
vbr_group_bits_per_frame,
twopass->kfgroup_inter_fraction * rc_factor);
twopass->active_worst_quality =
- MAX(tmp_q, twopass->active_worst_quality >> 1);
+ VPXMAX(tmp_q, twopass->active_worst_quality >> 1);
}
#endif
@@ -2421,7 +2426,7 @@
} else {
twopass->kf_group_bits = 0;
}
- twopass->kf_group_bits = MAX(0, twopass->kf_group_bits);
+ twopass->kf_group_bits = VPXMAX(0, twopass->kf_group_bits);
// Reset the first pass file position.
reset_fpf_position(twopass, start_position);
@@ -2435,22 +2440,21 @@
break;
// Monitor for static sections.
- zero_motion_accumulator =
- MIN(zero_motion_accumulator,
- get_zero_motion_factor(cpi, &next_frame));
+ zero_motion_accumulator = VPXMIN(
+ zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
// Not all frames in the group are necessarily used in calculating boost.
if ((i <= rc->max_gf_interval) ||
((i <= (rc->max_gf_interval * 4)) && (decay_accumulator > 0.5))) {
const double frame_boost =
- calc_frame_boost(cpi, this_frame, 0, KF_MAX_BOOST);
+ calc_frame_boost(cpi, &next_frame, 0, KF_MAX_BOOST);
// How fast is prediction quality decaying.
if (!detect_flash(twopass, 0)) {
const double loop_decay_rate =
get_prediction_decay_rate(cpi, &next_frame);
decay_accumulator *= loop_decay_rate;
- decay_accumulator = MAX(decay_accumulator, MIN_DECAY_FACTOR);
+ decay_accumulator = VPXMAX(decay_accumulator, MIN_DECAY_FACTOR);
av_decay_accumulator += decay_accumulator;
++loop_decay_counter;
}
@@ -2471,8 +2475,8 @@
// Apply various clamps for min and max boost
rc->kf_boost = (int)(av_decay_accumulator * boost_score);
- rc->kf_boost = MAX(rc->kf_boost, (rc->frames_to_key * 3));
- rc->kf_boost = MAX(rc->kf_boost, MIN_KF_BOOST);
+ rc->kf_boost = VPXMAX(rc->kf_boost, (rc->frames_to_key * 3));
+ rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_BOOST);
// Work out how many bits to allocate for the key frame itself.
kf_bits = calculate_boost_bits((rc->frames_to_key - 1),
@@ -2736,11 +2740,6 @@
}
target_rate = gf_group->bit_allocation[gf_group->index];
- if (cpi->common.frame_type == KEY_FRAME)
- target_rate = vp9_rc_clamp_iframe_target_size(cpi, target_rate);
- else
- target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate);
-
rc->base_frame_target = target_rate;
{
@@ -2770,7 +2769,7 @@
// is designed to prevent extreme behaviour at the end of a clip
// or group of frames.
rc->vbr_bits_off_target += rc->base_frame_target - rc->projected_frame_size;
- twopass->bits_left = MAX(twopass->bits_left - bits_used, 0);
+ twopass->bits_left = VPXMAX(twopass->bits_left - bits_used, 0);
// Calculate the pct rc error.
if (rc->total_actual_bits) {
@@ -2786,7 +2785,7 @@
twopass->kf_group_bits -= bits_used;
twopass->last_kfgroup_zeromotion_pct = twopass->kf_zeromotion_pct;
}
- twopass->kf_group_bits = MAX(twopass->kf_group_bits, 0);
+ twopass->kf_group_bits = VPXMAX(twopass->kf_group_bits, 0);
// Increment the gf group index ready for the next frame.
++twopass->gf_group.index;
@@ -2836,18 +2835,18 @@
rc->vbr_bits_off_target_fast +=
fast_extra_thresh - rc->projected_frame_size;
rc->vbr_bits_off_target_fast =
- MIN(rc->vbr_bits_off_target_fast, (4 * rc->avg_frame_bandwidth));
+ VPXMIN(rc->vbr_bits_off_target_fast, (4 * rc->avg_frame_bandwidth));
// Fast adaptation of minQ if necessary to use up the extra bits.
if (rc->avg_frame_bandwidth) {
twopass->extend_minq_fast =
(int)(rc->vbr_bits_off_target_fast * 8 / rc->avg_frame_bandwidth);
}
- twopass->extend_minq_fast = MIN(twopass->extend_minq_fast,
- minq_adj_limit - twopass->extend_minq);
+ twopass->extend_minq_fast = VPXMIN(
+ twopass->extend_minq_fast, minq_adj_limit - twopass->extend_minq);
} else if (rc->vbr_bits_off_target_fast) {
- twopass->extend_minq_fast = MIN(twopass->extend_minq_fast,
- minq_adj_limit - twopass->extend_minq);
+ twopass->extend_minq_fast = VPXMIN(
+ twopass->extend_minq_fast, minq_adj_limit - twopass->extend_minq);
} else {
twopass->extend_minq_fast = 0;
}
diff --git a/libvpx/vp9/encoder/vp9_firstpass.h b/libvpx/vp9/encoder/vp9_firstpass.h
index 49f9da3..5875a7b 100644
--- a/libvpx/vp9/encoder/vp9_firstpass.h
+++ b/libvpx/vp9/encoder/vp9_firstpass.h
@@ -153,8 +153,6 @@
// Post encode update of the rate control parameters for 2-pass
void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
-void vp9_init_subsampling(struct VP9_COMP *cpi);
-
void calculate_coded_size(struct VP9_COMP *cpi,
int *scaled_frame_width,
int *scaled_frame_height);
diff --git a/libvpx/vp9/encoder/vp9_mbgraph.c b/libvpx/vp9/encoder/vp9_mbgraph.c
index d59f315..41b6d19 100644
--- a/libvpx/vp9/encoder/vp9_mbgraph.c
+++ b/libvpx/vp9/encoder/vp9_mbgraph.c
@@ -13,6 +13,7 @@
#include "./vp9_rtcd.h"
#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/system_state.h"
#include "vp9/encoder/vp9_segmentation.h"
@@ -29,7 +30,8 @@
int mb_col) {
MACROBLOCK *const x = &cpi->td.mb;
MACROBLOCKD *const xd = &x->e_mbd;
- const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
+ MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
+ const SEARCH_METHODS old_search_method = mv_sf->search_method;
const vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
const int tmp_col_min = x->mv_col_min;
@@ -41,17 +43,18 @@
// Further step/diamond searches as necessary
int step_param = mv_sf->reduce_first_step_size;
- step_param = MIN(step_param, MAX_MVSEARCH_STEPS - 2);
+ step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
vp9_set_mv_search_range(x, ref_mv);
ref_full.col = ref_mv->col >> 3;
ref_full.row = ref_mv->row >> 3;
- /*cpi->sf.search_method == HEX*/
- vp9_hex_search(x, &ref_full, step_param, x->errorperbit, 0,
- cond_cost_list(cpi, cost_list),
- &v_fn_ptr, 0, ref_mv, dst_mv);
+ mv_sf->search_method = HEX;
+ vp9_full_pixel_search(cpi, x, BLOCK_16X16, &ref_full, step_param,
+ x->errorperbit, cond_cost_list(cpi, cost_list), ref_mv,
+ dst_mv, 0, 0);
+ mv_sf->search_method = old_search_method;
// Try sub-pixel MC
// if (bestsme > error_thresh && bestsme < INT_MAX)
diff --git a/libvpx/vp9/encoder/vp9_mcomp.c b/libvpx/vp9/encoder/vp9_mcomp.c
index aa3e51c..be8f57f 100644
--- a/libvpx/vp9/encoder/vp9_mcomp.c
+++ b/libvpx/vp9/encoder/vp9_mcomp.c
@@ -15,6 +15,7 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
@@ -37,10 +38,10 @@
int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
- col_min = MAX(col_min, (MV_LOW >> 3) + 1);
- row_min = MAX(row_min, (MV_LOW >> 3) + 1);
- col_max = MIN(col_max, (MV_UPP >> 3) - 1);
- row_max = MIN(row_max, (MV_UPP >> 3) - 1);
+ col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1);
+ row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1);
+ col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1);
+ row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1);
// Get intersection of UMV window and valid MV window to reduce # of checks
// in diamond search.
@@ -57,12 +58,12 @@
int vp9_init_search_range(int size) {
int sr = 0;
// Minimum search size no matter what the passed in value.
- size = MAX(16, size);
+ size = VPXMAX(16, size);
while ((size << sr) < MAX_FULL_PEL_VAL)
sr++;
- sr = MIN(sr, MAX_MVSEARCH_STEPS - 2);
+ sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2);
return sr;
}
@@ -297,10 +298,10 @@
int br = bestmv->row * 8; \
int bc = bestmv->col * 8; \
int hstep = 4; \
- const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); \
- const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); \
- const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); \
- const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); \
+ const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); \
+ const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); \
+ const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); \
+ const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); \
int tr = br; \
int tc = bc; \
\
@@ -668,10 +669,10 @@
int bc = bestmv->col * 8;
int hstep = 4;
int iter, round = 3 - forced_stop;
- const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
- const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
- const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
- const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
+ const int minc = VPXMAX(x->mv_col_min * 8, ref_mv->col - MV_MAX);
+ const int maxc = VPXMIN(x->mv_col_max * 8, ref_mv->col + MV_MAX);
+ const int minr = VPXMAX(x->mv_row_min * 8, ref_mv->row - MV_MAX);
+ const int maxr = VPXMIN(x->mv_row_max * 8, ref_mv->row + MV_MAX);
int tr = br;
int tc = bc;
const MV *search_step = search_step_table;
@@ -1371,15 +1372,15 @@
x->mvcost, x->errorperbit) : 0);
}
-int vp9_hex_search(const MACROBLOCK *x,
- MV *ref_mv,
- int search_param,
- int sad_per_bit,
- int do_init_search,
- int *cost_list,
- const vp9_variance_fn_ptr_t *vfp,
- int use_mvcost,
- const MV *center_mv, MV *best_mv) {
+static int hex_search(const MACROBLOCK *x,
+ MV *ref_mv,
+ int search_param,
+ int sad_per_bit,
+ int do_init_search,
+ int *cost_list,
+ const vp9_variance_fn_ptr_t *vfp,
+ int use_mvcost,
+ const MV *center_mv, MV *best_mv) {
// First scale has 8-closest points, the rest have 6 points in hex shape
// at increasing scales
static const int hex_num_candidates[MAX_PATTERN_SCALES] = {
@@ -1406,16 +1407,16 @@
hex_num_candidates, hex_candidates);
}
-int vp9_bigdia_search(const MACROBLOCK *x,
- MV *ref_mv,
- int search_param,
- int sad_per_bit,
- int do_init_search,
- int *cost_list,
- const vp9_variance_fn_ptr_t *vfp,
- int use_mvcost,
- const MV *center_mv,
- MV *best_mv) {
+static int bigdia_search(const MACROBLOCK *x,
+ MV *ref_mv,
+ int search_param,
+ int sad_per_bit,
+ int do_init_search,
+ int *cost_list,
+ const vp9_variance_fn_ptr_t *vfp,
+ int use_mvcost,
+ const MV *center_mv,
+ MV *best_mv) {
// First scale has 4-closest points, the rest have 8 points in diamond
// shape at increasing scales
static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
@@ -1448,16 +1449,16 @@
bigdia_num_candidates, bigdia_candidates);
}
-int vp9_square_search(const MACROBLOCK *x,
- MV *ref_mv,
- int search_param,
- int sad_per_bit,
- int do_init_search,
- int *cost_list,
- const vp9_variance_fn_ptr_t *vfp,
- int use_mvcost,
- const MV *center_mv,
- MV *best_mv) {
+static int square_search(const MACROBLOCK *x,
+ MV *ref_mv,
+ int search_param,
+ int sad_per_bit,
+ int do_init_search,
+ int *cost_list,
+ const vp9_variance_fn_ptr_t *vfp,
+ int use_mvcost,
+ const MV *center_mv,
+ MV *best_mv) {
// All scales have 8 closest points in square shape
static const int square_num_candidates[MAX_PATTERN_SCALES] = {
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
@@ -1490,34 +1491,34 @@
square_num_candidates, square_candidates);
}
-int vp9_fast_hex_search(const MACROBLOCK *x,
- MV *ref_mv,
- int search_param,
- int sad_per_bit,
- int do_init_search, // must be zero for fast_hex
- int *cost_list,
- const vp9_variance_fn_ptr_t *vfp,
- int use_mvcost,
- const MV *center_mv,
- MV *best_mv) {
- return vp9_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
- sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
- center_mv, best_mv);
+static int fast_hex_search(const MACROBLOCK *x,
+ MV *ref_mv,
+ int search_param,
+ int sad_per_bit,
+ int do_init_search, // must be zero for fast_hex
+ int *cost_list,
+ const vp9_variance_fn_ptr_t *vfp,
+ int use_mvcost,
+ const MV *center_mv,
+ MV *best_mv) {
+ return hex_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param),
+ sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
+ center_mv, best_mv);
}
-int vp9_fast_dia_search(const MACROBLOCK *x,
- MV *ref_mv,
- int search_param,
- int sad_per_bit,
- int do_init_search,
- int *cost_list,
- const vp9_variance_fn_ptr_t *vfp,
- int use_mvcost,
- const MV *center_mv,
- MV *best_mv) {
- return vp9_bigdia_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param),
- sad_per_bit, do_init_search, cost_list, vfp,
- use_mvcost, center_mv, best_mv);
+static int fast_dia_search(const MACROBLOCK *x,
+ MV *ref_mv,
+ int search_param,
+ int sad_per_bit,
+ int do_init_search,
+ int *cost_list,
+ const vp9_variance_fn_ptr_t *vfp,
+ int use_mvcost,
+ const MV *center_mv,
+ MV *best_mv) {
+ return bigdia_search(
+ x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), sad_per_bit,
+ do_init_search, cost_list, vfp, use_mvcost, center_mv, best_mv);
}
#undef CHECK_BETTER
@@ -1547,10 +1548,10 @@
best_sad = fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, ref_mv), in_what->stride) +
mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
- start_row = MAX(-range, x->mv_row_min - ref_mv->row);
- start_col = MAX(-range, x->mv_col_min - ref_mv->col);
- end_row = MIN(range, x->mv_row_max - ref_mv->row);
- end_col = MIN(range, x->mv_col_max - ref_mv->col);
+ start_row = VPXMAX(-range, x->mv_row_min - ref_mv->row);
+ start_col = VPXMAX(-range, x->mv_col_min - ref_mv->col);
+ end_row = VPXMIN(range, x->mv_row_max - ref_mv->row);
+ end_col = VPXMIN(range, x->mv_col_max - ref_mv->col);
for (r = start_row; r <= end_row; ++r) {
for (c = start_col; c <= end_col; c += 4) {
@@ -1946,15 +1947,16 @@
return best_sad;
}
+// Runs sequence of diamond searches in smaller steps for RD.
/* do_refine: If last step (1-away) of n-step search doesn't pick the center
point as the best match, we will do a final 1-away diamond
refining search */
-int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
- MV *mvp_full, int step_param,
- int sadpb, int further_steps, int do_refine,
- int *cost_list,
- const vp9_variance_fn_ptr_t *fn_ptr,
- const MV *ref_mv, MV *dst_mv) {
+static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
+ MV *mvp_full, int step_param,
+ int sadpb, int further_steps, int do_refine,
+ int *cost_list,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const MV *ref_mv, MV *dst_mv) {
MV temp_mv;
int thissme, n, num00 = 0;
int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
@@ -2021,10 +2023,10 @@
const MACROBLOCKD *const xd = &x->e_mbd;
const struct buf_2d *const what = &x->plane[0].src;
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
- const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
- const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
- const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
- const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
+ const int row_min = VPXMAX(ref_mv->row - distance, x->mv_row_min);
+ const int row_max = VPXMIN(ref_mv->row + distance, x->mv_row_max);
+ const int col_min = VPXMAX(ref_mv->col - distance, x->mv_col_min);
+ const int col_max = VPXMIN(ref_mv->col + distance, x->mv_col_max);
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
int best_sad = fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, ref_mv), in_what->stride) +
@@ -2054,10 +2056,10 @@
const MACROBLOCKD *const xd = &x->e_mbd;
const struct buf_2d *const what = &x->plane[0].src;
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
- const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
- const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
- const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
- const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
+ const int row_min = VPXMAX(ref_mv->row - distance, x->mv_row_min);
+ const int row_max = VPXMIN(ref_mv->row + distance, x->mv_row_max);
+ const int col_min = VPXMAX(ref_mv->col - distance, x->mv_col_min);
+ const int col_max = VPXMIN(ref_mv->col + distance, x->mv_col_max);
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, ref_mv), in_what->stride) +
@@ -2119,10 +2121,10 @@
const MACROBLOCKD *const xd = &x->e_mbd;
const struct buf_2d *const what = &x->plane[0].src;
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
- const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
- const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
- const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
- const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
+ const int row_min = VPXMAX(ref_mv->row - distance, x->mv_row_min);
+ const int row_max = VPXMIN(ref_mv->row + distance, x->mv_row_max);
+ const int col_min = VPXMAX(ref_mv->col - distance, x->mv_col_min);
+ const int col_max = VPXMIN(ref_mv->col + distance, x->mv_col_max);
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, ref_mv), in_what->stride) +
@@ -2346,29 +2348,29 @@
switch (method) {
case FAST_DIAMOND:
- var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
- cost_list, fn_ptr, 1, ref_mv, tmp_mv);
+ var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
+ cost_list, fn_ptr, 1, ref_mv, tmp_mv);
break;
case FAST_HEX:
- var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
- cost_list, fn_ptr, 1, ref_mv, tmp_mv);
+ var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
+ cost_list, fn_ptr, 1, ref_mv, tmp_mv);
break;
case HEX:
- var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1,
- cost_list, fn_ptr, 1, ref_mv, tmp_mv);
+ var = hex_search(x, mvp_full, step_param, error_per_bit, 1,
+ cost_list, fn_ptr, 1, ref_mv, tmp_mv);
break;
case SQUARE:
- var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1,
- cost_list, fn_ptr, 1, ref_mv, tmp_mv);
+ var = square_search(x, mvp_full, step_param, error_per_bit, 1,
+ cost_list, fn_ptr, 1, ref_mv, tmp_mv);
break;
case BIGDIA:
- var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1,
- cost_list, fn_ptr, 1, ref_mv, tmp_mv);
+ var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1,
+ cost_list, fn_ptr, 1, ref_mv, tmp_mv);
break;
case NSTEP:
- var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
- MAX_MVSEARCH_STEPS - 1 - step_param,
- 1, cost_list, fn_ptr, ref_mv, tmp_mv);
+ var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
+ MAX_MVSEARCH_STEPS - 1 - step_param,
+ 1, cost_list, fn_ptr, ref_mv, tmp_mv);
break;
default:
assert(0 && "Invalid search method.");
diff --git a/libvpx/vp9/encoder/vp9_mcomp.h b/libvpx/vp9/encoder/vp9_mcomp.h
index 817bd79..5efd543 100644
--- a/libvpx/vp9/encoder/vp9_mcomp.h
+++ b/libvpx/vp9/encoder/vp9_mcomp.h
@@ -72,38 +72,12 @@
const struct vp9_variance_vtable *fn_ptr,
const struct mv *center_mv);
-// Runs sequence of diamond searches in smaller steps for RD.
-int vp9_full_pixel_diamond(const struct VP9_COMP *cpi, MACROBLOCK *x,
- MV *mvp_full, int step_param,
- int sadpb, int further_steps, int do_refine,
- int *cost_list,
- const vp9_variance_fn_ptr_t *fn_ptr,
- const MV *ref_mv, MV *dst_mv);
-
// Perform integral projection based motion estimation.
unsigned int vp9_int_pro_motion_estimation(const struct VP9_COMP *cpi,
MACROBLOCK *x,
BLOCK_SIZE bsize,
int mi_row, int mi_col);
-typedef int (integer_mv_pattern_search_fn) (
- const MACROBLOCK *x,
- MV *ref_mv,
- int search_param,
- int error_per_bit,
- int do_init_search,
- int *cost_list,
- const vp9_variance_fn_ptr_t *vf,
- int use_mvcost,
- const MV *center_mv,
- MV *best_mv);
-
-integer_mv_pattern_search_fn vp9_hex_search;
-integer_mv_pattern_search_fn vp9_bigdia_search;
-integer_mv_pattern_search_fn vp9_square_search;
-integer_mv_pattern_search_fn vp9_fast_hex_search;
-integer_mv_pattern_search_fn vp9_fast_dia_search;
-
typedef int (fractional_mv_step_fp) (
const MACROBLOCK *x,
MV *bestmv, const MV *ref_mv,
diff --git a/libvpx/vp9/encoder/vp9_picklpf.c b/libvpx/vp9/encoder/vp9_picklpf.c
index 8e19103..5444bc8 100644
--- a/libvpx/vp9/encoder/vp9_picklpf.c
+++ b/libvpx/vp9/encoder/vp9_picklpf.c
@@ -40,6 +40,8 @@
VP9_COMMON *const cm = &cpi->common;
int64_t filt_err;
+ vp9_build_mask_frame(cm, filt_level, partial_frame);
+
if (cpi->num_workers > 1)
vp9_loop_filter_frame_mt(cm->frame_to_show, cm, cpi->td.mb.e_mbd.plane,
filt_level, 1, partial_frame,
@@ -92,8 +94,8 @@
ss_err[filt_mid] = best_err;
while (filter_step > 0) {
- const int filt_high = MIN(filt_mid + filter_step, max_filter_level);
- const int filt_low = MAX(filt_mid - filter_step, min_filter_level);
+ const int filt_high = VPXMIN(filt_mid + filter_step, max_filter_level);
+ const int filt_low = VPXMAX(filt_mid - filter_step, min_filter_level);
// Bias against raising loop filter in favor of lowering it.
int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
diff --git a/libvpx/vp9/encoder/vp9_pickmode.c b/libvpx/vp9/encoder/vp9_pickmode.c
index cc018fc..fc4d9ae 100644
--- a/libvpx/vp9/encoder/vp9_pickmode.c
+++ b/libvpx/vp9/encoder/vp9_pickmode.c
@@ -16,6 +16,7 @@
#include "./vp9_rtcd.h"
#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
@@ -293,8 +294,8 @@
if (cpi->common.tx_mode == TX_MODE_SELECT) {
if (sse > (var << 2))
- tx_size = MIN(max_txsize_lookup[bsize],
- tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
+ tx_size = VPXMIN(max_txsize_lookup[bsize],
+ tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
else
tx_size = TX_8X8;
@@ -304,8 +305,8 @@
else if (tx_size > TX_16X16)
tx_size = TX_16X16;
} else {
- tx_size = MIN(max_txsize_lookup[bsize],
- tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
+ tx_size = VPXMIN(max_txsize_lookup[bsize],
+ tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
}
assert(tx_size >= TX_8X8);
@@ -475,8 +476,8 @@
if (cpi->common.tx_mode == TX_MODE_SELECT) {
if (sse > (var << 2))
xd->mi[0]->mbmi.tx_size =
- MIN(max_txsize_lookup[bsize],
- tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
+ VPXMIN(max_txsize_lookup[bsize],
+ tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
else
xd->mi[0]->mbmi.tx_size = TX_8X8;
@@ -487,8 +488,8 @@
xd->mi[0]->mbmi.tx_size = TX_16X16;
} else {
xd->mi[0]->mbmi.tx_size =
- MIN(max_txsize_lookup[bsize],
- tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
+ VPXMIN(max_txsize_lookup[bsize],
+ tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
}
// Evaluate if the partition block is a skippable block in Y plane.
@@ -687,10 +688,11 @@
}
#endif
-static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE bsize,
+static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
int *out_rate_sum, int64_t *out_dist_sum,
- unsigned int *var_y, unsigned int *sse_y) {
+ unsigned int *var_y, unsigned int *sse_y,
+ int start_plane, int stop_plane) {
// Note our transform coeffs are 8 times an orthogonal transform.
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
@@ -702,12 +704,12 @@
*out_rate_sum = 0;
*out_dist_sum = 0;
- for (i = 1; i <= 2; ++i) {
+ for (i = start_plane; i <= stop_plane; ++i) {
struct macroblock_plane *const p = &x->plane[i];
struct macroblockd_plane *const pd = &xd->plane[i];
const uint32_t dc_quant = pd->dequant[0];
const uint32_t ac_quant = pd->dequant[1];
- const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
+ const BLOCK_SIZE bs = plane_bsize;
unsigned int var;
if (!x->color_sensitivity[i - 1])
@@ -791,7 +793,7 @@
const unsigned int max_thresh = 36000;
// The encode_breakout input
const unsigned int min_thresh =
- MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
+ VPXMIN(((unsigned int)x->encode_breakout << 4), max_thresh);
#if CONFIG_VP9_HIGHBITDEPTH
const int shift = (xd->bd << 1) - 16;
#endif
@@ -892,12 +894,8 @@
int i, j;
int rate;
int64_t dist;
- int64_t this_sse = INT64_MAX;
- int is_skippable;
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
- assert(plane == 0);
- (void) plane;
p->src.buf = &src_buf_base[4 * (j * src_stride + i)];
pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)];
@@ -907,13 +905,22 @@
x->skip_encode ? p->src.buf : pd->dst.buf,
x->skip_encode ? src_stride : dst_stride,
pd->dst.buf, dst_stride,
- i, j, 0);
+ i, j, plane);
- // TODO(jingning): This needs further refactoring.
- block_yrd(cpi, x, &rate, &dist, &is_skippable, &this_sse, 0,
- bsize_tx, MIN(tx_size, TX_16X16));
- x->skip_txfm[0] = is_skippable;
- rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), is_skippable);
+ if (plane == 0) {
+ int64_t this_sse = INT64_MAX;
+ int is_skippable;
+ // TODO(jingning): This needs further refactoring.
+ block_yrd(cpi, x, &rate, &dist, &is_skippable, &this_sse, 0,
+ bsize_tx, VPXMIN(tx_size, TX_16X16));
+ x->skip_txfm[0] = is_skippable;
+ // TODO(jingning): Skip is signalled per prediciton block not per tx block.
+ rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), is_skippable);
+ } else {
+ unsigned int var, sse;
+ model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &rate, &dist, &var, &sse,
+ plane, plane);
+ }
p->src.buf = src_buf_base;
pd->dst.buf = dst_buf_base;
@@ -961,8 +968,8 @@
if (thr_mode_idx == best_mode_idx)
*freq_fact -= (*freq_fact >> 4);
else
- *freq_fact = MIN(*freq_fact + RD_THRESH_INC,
- cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
+ *freq_fact = VPXMIN(*freq_fact + RD_THRESH_INC,
+ cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
}
void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
@@ -973,8 +980,8 @@
PREDICTION_MODE this_mode;
struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 };
const TX_SIZE intra_tx_size =
- MIN(max_txsize_lookup[bsize],
- tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
+ VPXMIN(max_txsize_lookup[bsize],
+ tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
MODE_INFO *const mic = xd->mi[0];
int *bmode_costs;
const MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
@@ -1160,8 +1167,8 @@
mbmi->sb_type = bsize;
mbmi->ref_frame[0] = NONE;
mbmi->ref_frame[1] = NONE;
- mbmi->tx_size = MIN(max_txsize_lookup[bsize],
- tx_mode_to_biggest_tx_size[cm->tx_mode]);
+ mbmi->tx_size = VPXMIN(max_txsize_lookup[bsize],
+ tx_mode_to_biggest_tx_size[cm->tx_mode]);
#if CONFIG_VP9_TEMPORAL_DENOISING
vp9_denoiser_reset_frame_stats(ctx);
@@ -1231,10 +1238,12 @@
if (const_motion[ref_frame] && this_mode == NEARMV)
continue;
- i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
- if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking)
- if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
- ref_frame_skip_mask |= (1 << ref_frame);
+ if (!(this_mode == ZEROMV && ref_frame == LAST_FRAME)) {
+ i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
+ if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking)
+ if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
+ ref_frame_skip_mask |= (1 << ref_frame);
+ }
if (ref_frame_skip_mask & (1 << ref_frame))
continue;
@@ -1414,7 +1423,7 @@
if (!this_early_term) {
this_sse = (int64_t)sse_y;
block_yrd(cpi, x, &this_rdc.rate, &this_rdc.dist, &is_skippable,
- &this_sse, 0, bsize, MIN(mbmi->tx_size, TX_16X16));
+ &this_sse, 0, bsize, VPXMIN(mbmi->tx_size, TX_16X16));
x->skip_txfm[0] = is_skippable;
if (is_skippable) {
this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
@@ -1442,12 +1451,13 @@
if (x->color_sensitivity[0] || x->color_sensitivity[1]) {
int uv_rate = 0;
int64_t uv_dist = 0;
+ const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, &xd->plane[1]);
if (x->color_sensitivity[0])
vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 1);
if (x->color_sensitivity[1])
vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 2);
- model_rd_for_sb_uv(cpi, bsize, x, xd, &uv_rate, &uv_dist,
- &var_y, &sse_y);
+ model_rd_for_sb_uv(cpi, uv_bsize, x, xd, &uv_rate, &uv_dist,
+ &var_y, &sse_y, 1, 2);
this_rdc.rate += uv_rate;
this_rdc.dist += uv_dist;
}
@@ -1522,11 +1532,13 @@
(!x->skip && best_rdc.rdcost > inter_mode_thresh &&
bsize <= cpi->sf.max_intra_bsize)) {
struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 };
- const TX_SIZE intra_tx_size =
- MIN(max_txsize_lookup[bsize],
- tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
int i;
TX_SIZE best_intra_tx_size = TX_SIZES;
+ TX_SIZE intra_tx_size =
+ VPXMIN(max_txsize_lookup[bsize],
+ tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
+ if (cpi->oxcf.content != VP9E_CONTENT_SCREEN && intra_tx_size > TX_16X16)
+ intra_tx_size = TX_16X16;
if (reuse_inter_pred && best_pred != NULL) {
if (best_pred->data == orig_dst.buf) {
@@ -1570,6 +1582,15 @@
mbmi->tx_size = intra_tx_size;
vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
estimate_block_intra, &args);
+ // Inter and intra RD will mismatch in scale for non-screen content.
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) {
+ if (x->color_sensitivity[0])
+ vp9_foreach_transformed_block_in_plane(xd, bsize, 1,
+ estimate_block_intra, &args);
+ if (x->color_sensitivity[1])
+ vp9_foreach_transformed_block_in_plane(xd, bsize, 2,
+ estimate_block_intra, &args);
+ }
this_rdc.rate = args.rate;
this_rdc.dist = args.dist;
this_rdc.rate += cpi->mbmode_cost[this_mode];
diff --git a/libvpx/vp9/encoder/vp9_ratectrl.c b/libvpx/vp9/encoder/vp9_ratectrl.c
index 4ba3406..d700685 100644
--- a/libvpx/vp9/encoder/vp9_ratectrl.c
+++ b/libvpx/vp9/encoder/vp9_ratectrl.c
@@ -15,6 +15,7 @@
#include <stdlib.h>
#include <string.h>
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/system_state.h"
@@ -106,8 +107,8 @@
static int get_minq_index(double maxq, double x3, double x2, double x1,
vpx_bit_depth_t bit_depth) {
int i;
- const double minqtarget = MIN(((x3 * maxq + x2) * maxq + x1) * maxq,
- maxq);
+ const double minqtarget = VPXMIN(((x3 * maxq + x2) * maxq + x1) * maxq,
+ maxq);
// Special case handling to deal with the step from q2.0
// down to lossless mode represented by q 1.0.
@@ -192,15 +193,15 @@
vpx_bit_depth_t bit_depth) {
const int bpm = (int)(vp9_rc_bits_per_mb(frame_type, q, correction_factor,
bit_depth));
- return MAX(FRAME_OVERHEAD_BITS,
- (int)((uint64_t)bpm * mbs) >> BPER_MB_NORMBITS);
+ return VPXMAX(FRAME_OVERHEAD_BITS,
+ (int)((uint64_t)bpm * mbs) >> BPER_MB_NORMBITS);
}
int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) {
const RATE_CONTROL *rc = &cpi->rc;
const VP9EncoderConfig *oxcf = &cpi->oxcf;
- const int min_frame_target = MAX(rc->min_frame_bandwidth,
- rc->avg_frame_bandwidth >> 5);
+ const int min_frame_target = VPXMAX(rc->min_frame_bandwidth,
+ rc->avg_frame_bandwidth >> 5);
if (target < min_frame_target)
target = min_frame_target;
if (cpi->refresh_golden_frame && rc->is_src_frame_alt_ref) {
@@ -216,7 +217,7 @@
if (oxcf->rc_max_inter_bitrate_pct) {
const int max_rate = rc->avg_frame_bandwidth *
oxcf->rc_max_inter_bitrate_pct / 100;
- target = MIN(target, max_rate);
+ target = VPXMIN(target, max_rate);
}
return target;
}
@@ -227,7 +228,7 @@
if (oxcf->rc_max_intra_bitrate_pct) {
const int max_rate = rc->avg_frame_bandwidth *
oxcf->rc_max_intra_bitrate_pct / 100;
- target = MIN(target, max_rate);
+ target = VPXMIN(target, max_rate);
}
if (target > rc->max_frame_bandwidth)
target = rc->max_frame_bandwidth;
@@ -250,7 +251,8 @@
lrc->bits_off_target += bits_off_for_this_layer;
// Clip buffer level to maximum buffer size for the layer.
- lrc->bits_off_target = MIN(lrc->bits_off_target, lrc->maximum_buffer_size);
+ lrc->bits_off_target =
+ VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size);
lrc->buffer_level = lrc->bits_off_target;
}
}
@@ -268,7 +270,14 @@
}
// Clip the buffer level to the maximum specified buffer size.
- rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size);
+ rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
+
+ // For screen-content mode, and if frame-dropper is off, don't let buffer
+ // level go below threshold, given here as -rc->maximum_ buffer_size.
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
+ cpi->oxcf.drop_frames_water_mark == 0)
+ rc->bits_off_target = VPXMAX(rc->bits_off_target, -rc->maximum_buffer_size);
+
rc->buffer_level = rc->bits_off_target;
if (is_one_pass_cbr_svc(cpi)) {
@@ -287,8 +296,8 @@
if (factor <= factor_safe)
return default_interval;
else
- return MAX(default_interval,
- (int)(MIN_GF_INTERVAL * factor / factor_safe + 0.5));
+ return VPXMAX(default_interval,
+ (int)(MIN_GF_INTERVAL * factor / factor_safe + 0.5));
// Note this logic makes:
// 4K24: 5
// 4K30: 6
@@ -296,9 +305,9 @@
}
int vp9_rc_get_default_max_gf_interval(double framerate, int min_gf_interval) {
- int interval = MIN(MAX_GF_INTERVAL, (int)(framerate * 0.75));
+ int interval = VPXMIN(MAX_GF_INTERVAL, (int)(framerate * 0.75));
interval += (interval & 0x01); // Round to even value
- return MAX(interval, min_gf_interval);
+ return VPXMAX(interval, min_gf_interval);
}
void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
@@ -478,7 +487,7 @@
// More heavily damped adjustment used if we have been oscillating either side
// of target.
adjustment_limit = 0.25 +
- 0.5 * MIN(1, fabs(log10(0.01 * correction_factor)));
+ 0.5 * VPXMIN(1, fabs(log10(0.01 * correction_factor)));
cpi->rc.q_2_frame = cpi->rc.q_1_frame;
cpi->rc.q_1_frame = cm->base_qindex;
@@ -531,8 +540,7 @@
do {
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
cm->seg.enabled &&
- cpi->svc.temporal_layer_id == 0 &&
- cpi->svc.spatial_layer_id == 0) {
+ cpi->svc.temporal_layer_id == 0) {
bits_per_mb_at_this_q =
(int)vp9_cyclic_refresh_rc_bits_per_mb(cpi, i, correction_factor);
} else {
@@ -558,8 +566,8 @@
if (cpi->oxcf.rc_mode == VPX_CBR &&
(cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) &&
cpi->rc.q_1_frame != cpi->rc.q_2_frame) {
- q = clamp(q, MIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame),
- MAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame));
+ q = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame),
+ VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame));
}
return q;
}
@@ -617,7 +625,7 @@
: rc->last_q[INTER_FRAME] * 2;
}
}
- return MIN(active_worst_quality, rc->worst_quality);
+ return VPXMIN(active_worst_quality, rc->worst_quality);
}
// Adjust active_worst_quality level based on buffer level.
@@ -644,10 +652,10 @@
// So for first few frames following key, the qp of that key frame is weighted
// into the active_worst_quality setting.
ambient_qp = (cm->current_video_frame < num_frames_weight_key) ?
- MIN(rc->avg_frame_qindex[INTER_FRAME], rc->avg_frame_qindex[KEY_FRAME]) :
- rc->avg_frame_qindex[INTER_FRAME];
- active_worst_quality = MIN(rc->worst_quality,
- ambient_qp * 5 / 4);
+ VPXMIN(rc->avg_frame_qindex[INTER_FRAME],
+ rc->avg_frame_qindex[KEY_FRAME]) :
+ rc->avg_frame_qindex[INTER_FRAME];
+ active_worst_quality = VPXMIN(rc->worst_quality, ambient_qp * 5 / 4);
if (rc->buffer_level > rc->optimal_buffer_level) {
// Adjust down.
// Maximum limit for down adjustment, ~30%.
@@ -700,7 +708,7 @@
int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
(last_boosted_q * 0.75),
cm->bit_depth);
- active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
+ active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
} else if (cm->current_video_frame > 0) {
// not first frame of one pass and kf_boost is set
double q_adj_factor = 1.0;
@@ -833,7 +841,7 @@
int delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
last_boosted_q * 0.75,
cm->bit_depth);
- active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
+ active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
} else {
// not first frame of one pass and kf_boost is set
double q_adj_factor = 1.0;
@@ -1002,21 +1010,21 @@
int qindex;
if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) {
- qindex = MIN(rc->last_kf_qindex, rc->last_boosted_qindex);
+ qindex = VPXMIN(rc->last_kf_qindex, rc->last_boosted_qindex);
active_best_quality = qindex;
last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
last_boosted_q * 1.25,
cm->bit_depth);
- active_worst_quality = MIN(qindex + delta_qindex, active_worst_quality);
-
+ active_worst_quality =
+ VPXMIN(qindex + delta_qindex, active_worst_quality);
} else {
qindex = rc->last_boosted_qindex;
last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
last_boosted_q * 0.75,
cm->bit_depth);
- active_best_quality = MAX(qindex + delta_qindex, rc->best_quality);
+ active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
}
} else {
// Not forced keyframe.
@@ -1116,8 +1124,8 @@
(cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH)) {
int qdelta = vp9_frame_type_qdelta(cpi, gf_group->rf_level[gf_group->index],
active_worst_quality);
- active_worst_quality = MAX(active_worst_quality + qdelta,
- active_best_quality);
+ active_worst_quality = VPXMAX(active_worst_quality + qdelta,
+ active_best_quality);
}
#endif
@@ -1126,7 +1134,8 @@
int qdelta = vp9_compute_qdelta_by_rate(rc, cm->frame_type,
active_best_quality, 2.0,
cm->bit_depth);
- active_best_quality = MAX(active_best_quality + qdelta, rc->best_quality);
+ active_best_quality =
+ VPXMAX(active_best_quality + qdelta, rc->best_quality);
}
active_best_quality = clamp(active_best_quality,
@@ -1141,7 +1150,7 @@
rc->this_key_frame_forced) {
// If static since last kf use better of last boosted and last kf q.
if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) {
- q = MIN(rc->last_kf_qindex, rc->last_boosted_qindex);
+ q = VPXMIN(rc->last_kf_qindex, rc->last_boosted_qindex);
} else {
q = rc->last_boosted_qindex;
}
@@ -1203,9 +1212,9 @@
// For very small rate targets where the fractional adjustment
// may be tiny make sure there is at least a minimum range.
const int tolerance = (cpi->sf.recode_tolerance * frame_target) / 100;
- *frame_under_shoot_limit = MAX(frame_target - tolerance - 200, 0);
- *frame_over_shoot_limit = MIN(frame_target + tolerance + 200,
- cpi->rc.max_frame_bandwidth);
+ *frame_under_shoot_limit = VPXMAX(frame_target - tolerance - 200, 0);
+ *frame_over_shoot_limit = VPXMIN(frame_target + tolerance + 200,
+ cpi->rc.max_frame_bandwidth);
}
}
@@ -1351,7 +1360,7 @@
rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits;
- if (!cpi->use_svc) {
+ if (!cpi->use_svc || is_two_pass_svc(cpi)) {
if (is_altref_enabled(cpi) && cpi->refresh_alt_ref_frame &&
(cm->frame_type != KEY_FRAME))
// Update the alternate reference frame stats as appropriate.
@@ -1458,7 +1467,8 @@
const SVC *const svc = &cpi->svc;
const int64_t diff = rc->optimal_buffer_level - rc->buffer_level;
const int64_t one_pct_bits = 1 + rc->optimal_buffer_level / 100;
- int min_frame_target = MAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS);
+ int min_frame_target =
+ VPXMAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS);
int target;
if (oxcf->gf_cbr_boost_pct) {
@@ -1480,23 +1490,24 @@
svc->temporal_layer_id, svc->number_temporal_layers);
const LAYER_CONTEXT *lc = &svc->layer_context[layer];
target = lc->avg_frame_size;
- min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS);
+ min_frame_target = VPXMAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS);
}
if (diff > 0) {
// Lower the target bandwidth for this frame.
- const int pct_low = (int)MIN(diff / one_pct_bits, oxcf->under_shoot_pct);
+ const int pct_low = (int)VPXMIN(diff / one_pct_bits, oxcf->under_shoot_pct);
target -= (target * pct_low) / 200;
} else if (diff < 0) {
// Increase the target bandwidth for this frame.
- const int pct_high = (int)MIN(-diff / one_pct_bits, oxcf->over_shoot_pct);
+ const int pct_high =
+ (int)VPXMIN(-diff / one_pct_bits, oxcf->over_shoot_pct);
target += (target * pct_high) / 200;
}
if (oxcf->rc_max_inter_bitrate_pct) {
const int max_rate = rc->avg_frame_bandwidth *
oxcf->rc_max_inter_bitrate_pct / 100;
- target = MIN(target, max_rate);
+ target = VPXMIN(target, max_rate);
}
- return MAX(min_frame_target, target);
+ return VPXMAX(min_frame_target, target);
}
static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
@@ -1518,7 +1529,7 @@
const LAYER_CONTEXT *lc = &svc->layer_context[layer];
framerate = lc->framerate;
}
- kf_boost = MAX(kf_boost, (int)(2 * framerate - 16));
+ kf_boost = VPXMAX(kf_boost, (int)(2 * framerate - 16));
if (rc->frames_since_key < framerate / 2) {
kf_boost = (int)(kf_boost * rc->frames_since_key /
(framerate / 2));
@@ -1584,7 +1595,7 @@
cpi->ref_frame_flags &= (~VP9_ALT_FLAG);
} else if (is_one_pass_cbr_svc(cpi)) {
LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
- if (cpi->svc.spatial_layer_id == 0) {
+ if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode) {
lc->is_key_frame = 0;
} else {
lc->is_key_frame =
@@ -1726,7 +1737,7 @@
rc->max_gf_interval = rc->static_scene_max_gf_interval;
// Clamp min to max
- rc->min_gf_interval = MIN(rc->min_gf_interval, rc->max_gf_interval);
+ rc->min_gf_interval = VPXMIN(rc->min_gf_interval, rc->max_gf_interval);
}
void vp9_rc_update_framerate(VP9_COMP *cpi) {
@@ -1739,7 +1750,8 @@
rc->min_frame_bandwidth = (int)(rc->avg_frame_bandwidth *
oxcf->two_pass_vbrmin_section / 100);
- rc->min_frame_bandwidth = MAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
+ rc->min_frame_bandwidth =
+ VPXMAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
// A maximum bitrate for a frame is defined.
// The baseline for this aligns with HW implementations that
@@ -1750,8 +1762,8 @@
// specifies lossless encode.
vbr_max_bits = (int)(((int64_t)rc->avg_frame_bandwidth *
oxcf->two_pass_vbrmax_section) / 100);
- rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P),
- vbr_max_bits);
+ rc->max_frame_bandwidth =
+ VPXMAX(VPXMAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits);
vp9_rc_set_gf_interval_range(cpi, rc);
}
@@ -1789,12 +1801,12 @@
// Dont do it for kf,arf,gf or overlay frames.
if (!frame_is_kf_gf_arf(cpi) && !rc->is_src_frame_alt_ref &&
rc->vbr_bits_off_target_fast) {
- int one_frame_bits = MAX(rc->avg_frame_bandwidth, *this_frame_target);
+ int one_frame_bits = VPXMAX(rc->avg_frame_bandwidth, *this_frame_target);
int fast_extra_bits;
- fast_extra_bits =
- (int)MIN(rc->vbr_bits_off_target_fast, one_frame_bits);
- fast_extra_bits = (int)MIN(fast_extra_bits,
- MAX(one_frame_bits / 8, rc->vbr_bits_off_target_fast / 8));
+ fast_extra_bits = (int)VPXMIN(rc->vbr_bits_off_target_fast, one_frame_bits);
+ fast_extra_bits = (int)VPXMIN(
+ fast_extra_bits,
+ VPXMAX(one_frame_bits / 8, rc->vbr_bits_off_target_fast / 8));
*this_frame_target += (int)fast_extra_bits;
rc->vbr_bits_off_target_fast -= fast_extra_bits;
}
@@ -1804,6 +1816,11 @@
RATE_CONTROL *const rc = &cpi->rc;
int target_rate = rc->base_frame_target;
+ if (cpi->common.frame_type == KEY_FRAME)
+ target_rate = vp9_rc_clamp_iframe_target_size(cpi, target_rate);
+ else
+ target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate);
+
// Correction to rate target based on prior over or under shoot.
if (cpi->oxcf.rc_mode == VPX_VBR || cpi->oxcf.rc_mode == VPX_CQ)
vbr_rate_correction(cpi, &target_rate);
@@ -1815,7 +1832,9 @@
int vp9_resize_one_pass_cbr(VP9_COMP *cpi) {
const VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
- int resize_now = 0;
+ RESIZE_ACTION resize_action = NO_RESIZE;
+ int avg_qp_thr1 = 70;
+ int avg_qp_thr2 = 50;
cpi->resize_scale_num = 1;
cpi->resize_scale_den = 1;
// Don't resize on key frame; reset the counters on key frame.
@@ -1824,10 +1843,19 @@
cpi->resize_count = 0;
return 0;
}
+
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ // If denoiser is on, apply a smaller qp threshold.
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ avg_qp_thr1 = 60;
+ avg_qp_thr2 = 40;
+ }
+#endif
+
// Resize based on average buffer underflow and QP over some window.
// Ignore samples close to key frame, since QP is usually high after key.
- if (cpi->rc.frames_since_key > 2 * cpi->framerate) {
- const int window = (int)(5 * cpi->framerate);
+ if (cpi->rc.frames_since_key > 1 * cpi->framerate) {
+ const int window = (int)(4 * cpi->framerate);
cpi->resize_avg_qp += cm->base_qindex;
if (cpi->rc.buffer_level < (int)(30 * rc->optimal_buffer_level / 100))
++cpi->resize_buffer_underflow;
@@ -1835,18 +1863,30 @@
// Check for resize action every "window" frames.
if (cpi->resize_count >= window) {
int avg_qp = cpi->resize_avg_qp / cpi->resize_count;
- // Resize down if buffer level has underflowed sufficent amount in past
- // window, and we are at original resolution.
+ // Resize down if buffer level has underflowed sufficient amount in past
+ // window, and we are at original or 3/4 of original resolution.
// Resize back up if average QP is low, and we are currently in a resized
- // down state.
- if (cpi->resize_state == 0 &&
- cpi->resize_buffer_underflow > (cpi->resize_count >> 2)) {
- resize_now = 1;
- cpi->resize_state = 1;
- } else if (cpi->resize_state == 1 &&
- avg_qp < 40 * cpi->rc.worst_quality / 100) {
- resize_now = -1;
- cpi->resize_state = 0;
+ // down state, i.e. 1/2 or 3/4 of original resolution.
+ // Currently, use a flag to turn 3/4 resizing feature on/off.
+ if (cpi->resize_buffer_underflow > (cpi->resize_count >> 2)) {
+ if (cpi->resize_state == THREE_QUARTER) {
+ resize_action = DOWN_ONEHALF;
+ cpi->resize_state = ONE_HALF;
+ } else if (cpi->resize_state == ORIG) {
+ resize_action = ONEHALFONLY_RESIZE ? DOWN_ONEHALF : DOWN_THREEFOUR;
+ cpi->resize_state = ONEHALFONLY_RESIZE ? ONE_HALF : THREE_QUARTER;
+ }
+ } else if (cpi->resize_state != ORIG &&
+ avg_qp < avg_qp_thr1 * cpi->rc.worst_quality / 100) {
+ if (cpi->resize_state == THREE_QUARTER ||
+ avg_qp < avg_qp_thr2 * cpi->rc.worst_quality / 100 ||
+ ONEHALFONLY_RESIZE) {
+ resize_action = UP_ORIG;
+ cpi->resize_state = ORIG;
+ } else if (cpi->resize_state == ONE_HALF) {
+ resize_action = UP_THREEFOUR;
+ cpi->resize_state = THREE_QUARTER;
+ }
}
// Reset for next window measurement.
cpi->resize_avg_qp = 0;
@@ -1856,26 +1896,30 @@
}
// If decision is to resize, reset some quantities, and check is we should
// reduce rate correction factor,
- if (resize_now != 0) {
+ if (resize_action != NO_RESIZE) {
int target_bits_per_frame;
int active_worst_quality;
int qindex;
int tot_scale_change;
- // For now, resize is by 1/2 x 1/2.
- cpi->resize_scale_num = 1;
- cpi->resize_scale_den = 2;
+ if (resize_action == DOWN_THREEFOUR || resize_action == UP_THREEFOUR) {
+ cpi->resize_scale_num = 3;
+ cpi->resize_scale_den = 4;
+ } else if (resize_action == DOWN_ONEHALF) {
+ cpi->resize_scale_num = 1;
+ cpi->resize_scale_den = 2;
+ } else { // UP_ORIG or anything else
+ cpi->resize_scale_num = 1;
+ cpi->resize_scale_den = 1;
+ }
tot_scale_change = (cpi->resize_scale_den * cpi->resize_scale_den) /
(cpi->resize_scale_num * cpi->resize_scale_num);
// Reset buffer level to optimal, update target size.
rc->buffer_level = rc->optimal_buffer_level;
rc->bits_off_target = rc->optimal_buffer_level;
rc->this_frame_target = calc_pframe_target_size_one_pass_cbr(cpi);
- // Reset cyclic refresh parameters.
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
- vp9_cyclic_refresh_reset_resize(cpi);
// Get the projected qindex, based on the scaled target frame size (scaled
// so target_bits_per_mb in vp9_rc_regulate_q will be correct target).
- target_bits_per_frame = (resize_now == 1) ?
+ target_bits_per_frame = (resize_action >= 0) ?
rc->this_frame_target * tot_scale_change :
rc->this_frame_target / tot_scale_change;
active_worst_quality = calc_active_worst_quality_one_pass_cbr(cpi);
@@ -1886,19 +1930,19 @@
// If resize is down, check if projected q index is close to worst_quality,
// and if so, reduce the rate correction factor (since likely can afford
// lower q for resized frame).
- if (resize_now == 1 &&
+ if (resize_action > 0 &&
qindex > 90 * cpi->rc.worst_quality / 100) {
rc->rate_correction_factors[INTER_NORMAL] *= 0.85;
}
// If resize is back up, check if projected q index is too much above the
// current base_qindex, and if so, reduce the rate correction factor
// (since prefer to keep q for resized frame at least close to previous q).
- if (resize_now == -1 &&
+ if (resize_action < 0 &&
qindex > 130 * cm->base_qindex / 100) {
rc->rate_correction_factors[INTER_NORMAL] *= 0.9;
}
}
- return resize_now;
+ return resize_action;
}
// Compute average source sad (temporal sad: between current source and
@@ -1948,7 +1992,7 @@
// between current and the previous frame value(s). Use a minimum threshold
// for cases where there is small change from content that is completely
// static.
- if (avg_sad > MAX(4000, (rc->avg_source_sad << 3)) &&
+ if (avg_sad > VPXMAX(4000, (rc->avg_source_sad << 3)) &&
rc->frames_since_key > 1)
rc->high_source_sad = 1;
else
@@ -1968,16 +2012,59 @@
int thresh_rate = rc->avg_frame_bandwidth * 10;
if (cm->base_qindex < thresh_qp &&
frame_size > thresh_rate) {
+ double rate_correction_factor =
+ cpi->rc.rate_correction_factors[INTER_NORMAL];
+ const int target_size = cpi->rc.avg_frame_bandwidth;
+ double new_correction_factor;
+ int target_bits_per_mb;
+ double q2;
+ int enumerator;
// Force a re-encode, and for now use max-QP.
*q = cpi->rc.worst_quality;
- // Adjust avg_frame_qindex and buffer_level, as these parameters will affect
- // QP selection for subsequent frames. If they have settled down to a very
- // different (low QP) state, then not re-adjusting them may cause next
- // frame to select low QP and overshoot again.
- // TODO(marpan): Check if rate correction factor should also be adjusted.
+ // Adjust avg_frame_qindex, buffer_level, and rate correction factors, as
+ // these parameters will affect QP selection for subsequent frames. If they
+ // have settled down to a very different (low QP) state, then not adjusting
+ // them may cause next frame to select low QP and overshoot again.
cpi->rc.avg_frame_qindex[INTER_FRAME] = *q;
rc->buffer_level = rc->optimal_buffer_level;
rc->bits_off_target = rc->optimal_buffer_level;
+ // Reset rate under/over-shoot flags.
+ cpi->rc.rc_1_frame = 0;
+ cpi->rc.rc_2_frame = 0;
+ // Adjust rate correction factor.
+ target_bits_per_mb = ((uint64_t)target_size << BPER_MB_NORMBITS) / cm->MBs;
+ // Rate correction factor based on target_bits_per_mb and qp (==max_QP).
+ // This comes from the inverse computation of vp9_rc_bits_per_mb().
+ q2 = vp9_convert_qindex_to_q(*q, cm->bit_depth);
+ enumerator = 1800000; // Factor for inter frame.
+ enumerator += (int)(enumerator * q2) >> 12;
+ new_correction_factor = (double)target_bits_per_mb * q2 / enumerator;
+ if (new_correction_factor > rate_correction_factor) {
+ rate_correction_factor =
+ VPXMIN(2.0 * rate_correction_factor, new_correction_factor);
+ if (rate_correction_factor > MAX_BPB_FACTOR)
+ rate_correction_factor = MAX_BPB_FACTOR;
+ cpi->rc.rate_correction_factors[INTER_NORMAL] = rate_correction_factor;
+ }
+ // For temporal layers, reset the rate control parametes across all
+ // temporal layers.
+ if (cpi->use_svc) {
+ int i = 0;
+ SVC *svc = &cpi->svc;
+ for (i = 0; i < svc->number_temporal_layers; ++i) {
+ const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *lc = &svc->layer_context[layer];
+ RATE_CONTROL *lrc = &lc->rc;
+ lrc->avg_frame_qindex[INTER_FRAME] = *q;
+ lrc->buffer_level = rc->optimal_buffer_level;
+ lrc->bits_off_target = rc->optimal_buffer_level;
+ lrc->rc_1_frame = 0;
+ lrc->rc_2_frame = 0;
+ lrc->rate_correction_factors[INTER_NORMAL] =
+ rate_correction_factor;
+ }
+ }
return 1;
} else {
return 0;
diff --git a/libvpx/vp9/encoder/vp9_ratectrl.h b/libvpx/vp9/encoder/vp9_ratectrl.h
index 11dfa35..136fd3e 100644
--- a/libvpx/vp9/encoder/vp9_ratectrl.h
+++ b/libvpx/vp9/encoder/vp9_ratectrl.h
@@ -26,6 +26,7 @@
#define MIN_GF_INTERVAL 4
#define MAX_GF_INTERVAL 16
+#define ONEHALFONLY_RESIZE 0
typedef enum {
INTER_NORMAL = 0,
@@ -43,6 +44,20 @@
FRAME_SCALE_STEPS
} FRAME_SCALE_LEVEL;
+typedef enum {
+ NO_RESIZE = 0,
+ DOWN_THREEFOUR = 1, // From orig to 3/4.
+ DOWN_ONEHALF = 2, // From orig or 3/4 to 1/2.
+ UP_THREEFOUR = -1, // From 1/2 to 3/4.
+ UP_ORIG = -2, // From 1/2 or 3/4 to orig.
+} RESIZE_ACTION;
+
+typedef enum {
+ ORIG = 0,
+ THREE_QUARTER = 1,
+ ONE_HALF = 2
+} RESIZE_STATE;
+
// Frame dimensions multiplier wrt the native frame size, in 1/16ths,
// specified for the scale-up case.
// e.g. 24 => 16/24 = 2/3 of native size. The restriction to 1/16th is
diff --git a/libvpx/vp9/encoder/vp9_rd.c b/libvpx/vp9/encoder/vp9_rd.c
index 2f2f7c1..b085c7a 100644
--- a/libvpx/vp9/encoder/vp9_rd.c
+++ b/libvpx/vp9/encoder/vp9_rd.c
@@ -14,6 +14,7 @@
#include "./vp9_rtcd.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/bitops.h"
#include "vpx_ports/mem.h"
@@ -172,7 +173,7 @@
if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
- const int boost_index = MIN(15, (cpi->rc.gfu_boost / 100));
+ const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
@@ -204,7 +205,7 @@
q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
#endif // CONFIG_VP9_HIGHBITDEPTH
// TODO(debargha): Adjust the function below.
- return MAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
+ return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
}
void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
@@ -404,7 +405,7 @@
static const uint32_t MAX_XSQ_Q10 = 245727;
const uint64_t xsq_q10_64 =
(((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
- const int xsq_q10 = (int)MIN(xsq_q10_64, MAX_XSQ_Q10);
+ const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
model_rd_norm(xsq_q10, &r_q10, &d_q10);
*rate = ((r_q10 << n_log2) + 2) >> 2;
*dist = (var * (int64_t)d_q10 + 512) >> 10;
@@ -485,7 +486,7 @@
continue;
fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
- max_mv = MAX(max_mv, MAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
+ max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
if (fp_row ==0 && fp_col == 0 && zero_seen)
continue;
@@ -629,16 +630,15 @@
const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
int mode;
for (mode = 0; mode < top_mode; ++mode) {
- const BLOCK_SIZE min_size = MAX(bsize - 1, BLOCK_4X4);
- const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64);
+ const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
+ const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
BLOCK_SIZE bs;
for (bs = min_size; bs <= max_size; ++bs) {
int *const fact = &factor_buf[bs][mode];
if (mode == best_mode_index) {
*fact -= (*fact >> 4);
} else {
- *fact = MIN(*fact + RD_THRESH_INC,
- rd_thresh * RD_THRESH_MAX_FACT);
+ *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
}
}
}
diff --git a/libvpx/vp9/encoder/vp9_rdopt.c b/libvpx/vp9/encoder/vp9_rdopt.c
index 96c6474..4f3a06e 100644
--- a/libvpx/vp9/encoder/vp9_rdopt.c
+++ b/libvpx/vp9/encoder/vp9_rdopt.c
@@ -14,6 +14,7 @@
#include "./vp9_rtcd.h"
#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/system_state.h"
@@ -192,8 +193,8 @@
const int64_t ac_thr = p->quant_thred[1] >> shift;
// The low thresholds are used to measure if the prediction errors are
// low enough so that we can skip the mode search.
- const int64_t low_dc_thr = MIN(50, dc_thr >> 2);
- const int64_t low_ac_thr = MIN(80, ac_thr >> 2);
+ const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2);
+ const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2);
int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
int idx, idy;
@@ -268,6 +269,52 @@
*out_dist_sum = dist_sum << 4;
}
+#if CONFIG_VP9_HIGHBITDEPTH
+int64_t vp9_highbd_block_error_c(const tran_low_t *coeff,
+ const tran_low_t *dqcoeff,
+ intptr_t block_size,
+ int64_t *ssz, int bd) {
+ int i;
+ int64_t error = 0, sqcoeff = 0;
+ int shift = 2 * (bd - 8);
+ int rounding = shift > 0 ? 1 << (shift - 1) : 0;
+
+ for (i = 0; i < block_size; i++) {
+ const int64_t diff = coeff[i] - dqcoeff[i];
+ error += diff * diff;
+ sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
+ }
+ assert(error >= 0 && sqcoeff >= 0);
+ error = (error + rounding) >> shift;
+ sqcoeff = (sqcoeff + rounding) >> shift;
+
+ *ssz = sqcoeff;
+ return error;
+}
+
+int64_t vp9_highbd_block_error_8bit_c(const tran_low_t *coeff,
+ const tran_low_t *dqcoeff,
+ intptr_t block_size,
+ int64_t *ssz) {
+ // Note that the C versions of these 2 functions (vp9_block_error and
+ // vp9_highbd_block_error_8bit are the same, but the optimized assembly
+ // routines are not compatible in the non high bitdepth configuration, so
+ // they still cannot share the same name.
+ return vp9_block_error_c(coeff, dqcoeff, block_size, ssz);
+}
+
+static int64_t vp9_highbd_block_error_dispatch(const tran_low_t *coeff,
+ const tran_low_t *dqcoeff,
+ intptr_t block_size,
+ int64_t *ssz, int bd) {
+ if (bd == 8) {
+ return vp9_highbd_block_error_8bit(coeff, dqcoeff, block_size, ssz);
+ } else {
+ return vp9_highbd_block_error(coeff, dqcoeff, block_size, ssz, bd);
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
intptr_t block_size, int64_t *ssz) {
int i;
@@ -296,30 +343,6 @@
return error;
}
-#if CONFIG_VP9_HIGHBITDEPTH
-int64_t vp9_highbd_block_error_c(const tran_low_t *coeff,
- const tran_low_t *dqcoeff,
- intptr_t block_size,
- int64_t *ssz, int bd) {
- int i;
- int64_t error = 0, sqcoeff = 0;
- int shift = 2 * (bd - 8);
- int rounding = shift > 0 ? 1 << (shift - 1) : 0;
-
- for (i = 0; i < block_size; i++) {
- const int64_t diff = coeff[i] - dqcoeff[i];
- error += diff * diff;
- sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
- }
- assert(error >= 0 && sqcoeff >= 0);
- error = (error + rounding) >> shift;
- sqcoeff = (sqcoeff + rounding) >> shift;
-
- *ssz = sqcoeff;
- return error;
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
/* The trailing '0' is a terminator which is used inside cost_coeffs() to
* decide whether to include cost of a trailing EOB node or not (i.e. we
* can skip this if the last coefficient in this transform block, e.g. the
@@ -340,8 +363,7 @@
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
const struct macroblock_plane *p = &x->plane[plane];
- const struct macroblockd_plane *pd = &xd->plane[plane];
- const PLANE_TYPE type = pd->plane_type;
+ const PLANE_TYPE type = get_plane_type(plane);
const int16_t *band_count = &band_counts[tx_size][1];
const int eob = p->eobs[block];
const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
@@ -357,8 +379,8 @@
#endif
// Check for consistency of tx_size with mode info
- assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
- : get_uv_tx_size(mbmi, pd) == tx_size);
+ assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size :
+ get_uv_tx_size(mbmi, &xd->plane[plane]) == tx_size);
if (eob == 0) {
// single eob token
@@ -430,8 +452,9 @@
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
#if CONFIG_VP9_HIGHBITDEPTH
const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
- *out_dist = vp9_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
- &this_sse, bd) >> shift;
+ *out_dist = vp9_highbd_block_error_dispatch(coeff, dqcoeff,
+ 16 << ss_txfrm_size,
+ &this_sse, bd) >> shift;
#else
*out_dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
&this_sse) >> shift;
@@ -505,7 +528,7 @@
if (tx_size != TX_32X32)
dc_correct >>= 2;
- dist = MAX(0, sse - dc_correct);
+ dist = VPXMAX(0, sse - dc_correct);
}
} else {
// SKIP_TXFM_AC_DC
@@ -531,7 +554,7 @@
rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
// TODO(jingning): temporarily enabled only for luma component
- rd = MIN(rd1, rd2);
+ rd = VPXMIN(rd1, rd2);
if (plane == 0)
x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
(rd1 > rd2 && !xd->lossless);
@@ -569,7 +592,7 @@
vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
- args.so = get_scan(xd, tx_size, pd->plane_type, 0);
+ args.so = get_scan(xd, tx_size, get_plane_type(plane), 0);
vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
block_rd_txfm, &args);
@@ -597,7 +620,7 @@
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
+ mbmi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
txfm_rd_in_plane(x, rate, distortion, skip,
sse, ref_best_rd, 0, bs,
@@ -637,8 +660,8 @@
start_tx = max_tx_size;
end_tx = 0;
} else {
- TX_SIZE chosen_tx_size = MIN(max_tx_size,
- tx_mode_to_biggest_tx_size[cm->tx_mode]);
+ TX_SIZE chosen_tx_size = VPXMIN(max_tx_size,
+ tx_mode_to_biggest_tx_size[cm->tx_mode]);
start_tx = chosen_tx_size;
end_tx = chosen_tx_size;
}
@@ -663,6 +686,7 @@
} else if (s[n]) {
if (is_inter_block(mbmi)) {
rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
+ r[n][1] -= r_tx_size;
} else {
rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]);
@@ -672,6 +696,11 @@
rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
}
+ if (is_inter_block(mbmi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) {
+ rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
+ rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
+ }
+
// Early termination in transform size search.
if (cpi->sf.tx_size_search_breakout &&
(rd[n][1] == INT64_MAX ||
@@ -825,7 +854,7 @@
ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
so->scan, so->neighbors,
cpi->sf.use_fast_coef_costing);
- distortion += vp9_highbd_block_error(
+ distortion += vp9_highbd_block_error_dispatch(
coeff, BLOCK_OFFSET(pd->dqcoeff, block),
16, &unused, xd->bd) >> 2;
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
@@ -923,8 +952,13 @@
ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
so->scan, so->neighbors,
cpi->sf.use_fast_coef_costing);
+#if CONFIG_VP9_HIGHBITDEPTH
+ distortion += vp9_highbd_block_error_8bit(
+ coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16, &unused) >> 2;
+#else
distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
16, &unused) >> 2;
+#endif
if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
goto next;
vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
@@ -1362,6 +1396,9 @@
k = i;
for (idy = 0; idy < height / 4; ++idy) {
for (idx = 0; idx < width / 4; ++idx) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
+#endif
int64_t ssz, rd, rd1, rd2;
tran_low_t* coeff;
@@ -1371,14 +1408,8 @@
coeff, 8);
vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- thisdistortion += vp9_highbd_block_error(coeff,
- BLOCK_OFFSET(pd->dqcoeff, k),
- 16, &ssz, xd->bd);
- } else {
- thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
- 16, &ssz);
- }
+ thisdistortion += vp9_highbd_block_error_dispatch(
+ coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz, bd);
#else
thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
16, &ssz);
@@ -1389,7 +1420,7 @@
cpi->sf.use_fast_coef_costing);
rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
- rd = MIN(rd1, rd2);
+ rd = VPXMIN(rd1, rd2);
if (rd >= best_yrd)
return INT64_MAX;
}
@@ -1808,7 +1839,8 @@
if (i == 0)
max_mv = x->max_mv_context[mbmi->ref_frame[0]];
else
- max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
+ max_mv =
+ VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
// Take wtd average of the step_params based on the last frame's
@@ -1826,7 +1858,7 @@
if (cpi->sf.adaptive_motion_search) {
mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
- step_param = MAX(step_param, 8);
+ step_param = VPXMAX(step_param, 8);
}
// adjust src pointer for this block
@@ -2231,7 +2263,7 @@
vp9_set_mv_search_range(x, &ref_mv);
// Work out the size of the first step in the mv step search.
- // 0 here is maximum length first step. 1 is MAX >> 1 etc.
+ // 0 here is maximum length first step. 1 is VPXMAX >> 1 etc.
if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
// Take wtd average of the step_params based on the last frame's
// max mv magnitude and that based on the best ref mvs of the current
@@ -2243,9 +2275,10 @@
}
if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
- int boffset = 2 * (b_width_log2_lookup[BLOCK_64X64] -
- MIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
- step_param = MAX(step_param, boffset);
+ int boffset =
+ 2 * (b_width_log2_lookup[BLOCK_64X64] -
+ VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
+ step_param = VPXMAX(step_param, boffset);
}
if (cpi->sf.adaptive_motion_search) {
@@ -2466,7 +2499,7 @@
// motion field, where the distortion gain for a single block may not
// be enough to overcome the cost of a new mv.
if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) {
- *rate2 += MAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
+ *rate2 += VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
} else {
*rate2 += rate_mv;
}
@@ -2502,10 +2535,10 @@
// initiation of a motion field.
if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]],
mode_mv, refs[0])) {
- *rate2 += MIN(cost_mv_ref(cpi, this_mode,
- mbmi_ext->mode_context[refs[0]]),
- cost_mv_ref(cpi, NEARESTMV,
- mbmi_ext->mode_context[refs[0]]));
+ *rate2 += VPXMIN(cost_mv_ref(cpi, this_mode,
+ mbmi_ext->mode_context[refs[0]]),
+ cost_mv_ref(cpi, NEARESTMV,
+ mbmi_ext->mode_context[refs[0]]));
} else {
*rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]);
}
@@ -2547,10 +2580,10 @@
rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
filter_cache[i] = rd;
filter_cache[SWITCHABLE_FILTERS] =
- MIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
+ VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
if (cm->interp_filter == SWITCHABLE)
rd += rs_rd;
- *mask_filter = MAX(*mask_filter, rd);
+ *mask_filter = VPXMAX(*mask_filter, rd);
} else {
int rate_sum = 0;
int64_t dist_sum = 0;
@@ -2580,10 +2613,10 @@
rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
filter_cache[i] = rd;
filter_cache[SWITCHABLE_FILTERS] =
- MIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
+ VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
if (cm->interp_filter == SWITCHABLE)
rd += rs_rd;
- *mask_filter = MAX(*mask_filter, rd);
+ *mask_filter = VPXMAX(*mask_filter, rd);
if (i == 0 && intpel_mv) {
tmp_rate_sum = rate_sum;
@@ -2694,7 +2727,7 @@
*distortion += distortion_y;
rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
- rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
+ rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
&sseuv, bsize, ref_best_rd - rdcosty)) {
@@ -2759,7 +2792,7 @@
pd[1].subsampling_x,
pd[1].subsampling_y);
rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
- &dist_uv, &uv_skip, MAX(BLOCK_8X8, bsize),
+ &dist_uv, &uv_skip, VPXMAX(BLOCK_8X8, bsize),
max_uv_tx_size);
if (y_skip && uv_skip) {
@@ -2826,12 +2859,12 @@
// to a predictor with a low spatial complexity compared to the source.
if ((source_variance > LOW_VAR_THRESH) && (ref_frame == INTRA_FRAME) &&
(source_variance > recon_variance)) {
- var_factor = MIN(absvar_diff, MIN(VLOW_ADJ_MAX, var_error));
+ var_factor = VPXMIN(absvar_diff, VPXMIN(VLOW_ADJ_MAX, var_error));
// A second possible case of interest is where the source variance
// is very low and we wish to discourage false texture or motion trails.
} else if ((source_variance < (LOW_VAR_THRESH >> 1)) &&
(recon_variance > source_variance)) {
- var_factor = MIN(absvar_diff, MIN(VHIGH_ADJ_MAX, var_error));
+ var_factor = VPXMIN(absvar_diff, VPXMIN(VHIGH_ADJ_MAX, var_error));
}
*this_rd += (*this_rd * var_factor) / 100;
}
@@ -2861,7 +2894,7 @@
top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
- bottom_edge = MAX(top_edge, bottom_edge);
+ bottom_edge = VPXMAX(top_edge, bottom_edge);
}
if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
@@ -2888,7 +2921,7 @@
left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
- right_edge = MAX(left_edge, right_edge);
+ right_edge = VPXMAX(left_edge, right_edge);
}
if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
@@ -3135,7 +3168,7 @@
}
if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
- (ref_frame_skip_mask[1] & (1 << MAX(0, second_ref_frame))))
+ (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
continue;
if (mode_skip_mask[ref_frame] & (1 << this_mode))
@@ -3149,10 +3182,10 @@
continue;
if (sf->motion_field_mode_search) {
- const int mi_width = MIN(num_8x8_blocks_wide_lookup[bsize],
- tile_info->mi_col_end - mi_col);
- const int mi_height = MIN(num_8x8_blocks_high_lookup[bsize],
- tile_info->mi_row_end - mi_row);
+ const int mi_width = VPXMIN(num_8x8_blocks_wide_lookup[bsize],
+ tile_info->mi_col_end - mi_col);
+ const int mi_height = VPXMIN(num_8x8_blocks_high_lookup[bsize],
+ tile_info->mi_row_end - mi_row);
const int bsl = mi_width_log2_lookup[bsize];
int cb_partition_search_ctrl = (((mi_row + mi_col) >> bsl)
+ get_chessboard_index(cm->current_video_frame)) & 0x1;
@@ -3370,9 +3403,9 @@
if (!disable_skip && ref_frame == INTRA_FRAME) {
for (i = 0; i < REFERENCE_MODES; ++i)
- best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
+ best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
- best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
+ best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
}
// Did this mode help.. i.e. is it the new best mode
@@ -3471,7 +3504,7 @@
adj_rd = filter_cache[i] - ref;
adj_rd += this_rd;
- best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
+ best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
}
}
}
@@ -3783,6 +3816,7 @@
int this_skip2 = 0;
int64_t total_sse = INT_MAX;
int early_term = 0;
+ struct buf_2d backup_yv12[2][MAX_MB_PLANE];
ref_frame = vp9_ref_order[ref_index].ref_frame[0];
second_ref_frame = vp9_ref_order[ref_index].ref_frame[1];
@@ -3814,7 +3848,7 @@
}
if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
- (ref_frame_skip_mask[1] & (1 << MAX(0, second_ref_frame))))
+ (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
continue;
// Test best rd so far against threshold for trying this mode.
@@ -3840,16 +3874,6 @@
continue;
}
- // TODO(jingning, jkoleszar): scaling reference frame not supported for
- // sub8x8 blocks.
- if (ref_frame > INTRA_FRAME &&
- vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
- continue;
-
- if (second_ref_frame > INTRA_FRAME &&
- vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
- continue;
-
if (comp_pred)
mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
else if (ref_frame != INTRA_FRAME)
@@ -3928,6 +3952,25 @@
int pred_exists = 0;
int uv_skippable;
+ YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL};
+ int ref;
+
+ for (ref = 0; ref < 2; ++ref) {
+ scaled_ref_frame[ref] = mbmi->ref_frame[ref] > INTRA_FRAME ?
+ vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[ref]) : NULL;
+
+ if (scaled_ref_frame[ref]) {
+ int i;
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ backup_yv12[ref][i] = xd->plane[i].pre[ref];
+ vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
+ NULL);
+ }
+ }
+
this_rd_thresh = (ref_frame == LAST_FRAME) ?
rd_opt->threshes[segment_id][bsize][THR_LAST] :
rd_opt->threshes[segment_id][bsize][THR_ALTR];
@@ -3969,12 +4012,11 @@
rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
filter_cache[switchable_filter_index] = tmp_rd;
filter_cache[SWITCHABLE_FILTERS] =
- MIN(filter_cache[SWITCHABLE_FILTERS],
- tmp_rd + rs_rd);
+ VPXMIN(filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd);
if (cm->interp_filter == SWITCHABLE)
tmp_rd += rs_rd;
- mask_filter = MAX(mask_filter, tmp_rd);
+ mask_filter = VPXMAX(mask_filter, tmp_rd);
newbest = (tmp_rd < tmp_best_rd);
if (newbest) {
@@ -4051,9 +4093,9 @@
compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
- tmp_best_rdu = best_rd -
- MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
- RDCOST(x->rdmult, x->rddiv, 0, total_sse));
+ tmp_best_rdu =
+ best_rd - VPXMIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
+ RDCOST(x->rdmult, x->rddiv, 0, total_sse));
if (tmp_best_rdu > 0) {
// If even the 'Y' rd value of split is higher than best so far
@@ -4062,14 +4104,31 @@
BLOCK_8X8);
memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
- &uv_sse, BLOCK_8X8, tmp_best_rdu))
+ &uv_sse, BLOCK_8X8, tmp_best_rdu)) {
+ for (ref = 0; ref < 2; ++ref) {
+ if (scaled_ref_frame[ref]) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ xd->plane[i].pre[ref] = backup_yv12[ref][i];
+ }
+ }
continue;
+ }
rate2 += rate_uv;
distortion2 += distortion_uv;
skippable = skippable && uv_skippable;
total_sse += uv_sse;
}
+
+ for (ref = 0; ref < 2; ++ref) {
+ if (scaled_ref_frame[ref]) {
+ // Restore the prediction frame pointers to their unscaled versions.
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; ++i)
+ xd->plane[i].pre[ref] = backup_yv12[ref][i];
+ }
+ }
}
if (cm->reference_mode == REFERENCE_MODE_SELECT)
@@ -4113,9 +4172,9 @@
if (!disable_skip && ref_frame == INTRA_FRAME) {
for (i = 0; i < REFERENCE_MODES; ++i)
- best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
+ best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
- best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
+ best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
}
// Did this mode help.. i.e. is it the new best mode
@@ -4214,7 +4273,7 @@
adj_rd = filter_cache[i] - ref;
adj_rd += this_rd;
- best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
+ best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
}
}
diff --git a/libvpx/vp9/encoder/vp9_resize.h b/libvpx/vp9/encoder/vp9_resize.h
index 067af53..b5feb38 100644
--- a/libvpx/vp9/encoder/vp9_resize.h
+++ b/libvpx/vp9/encoder/vp9_resize.h
@@ -14,6 +14,10 @@
#include <stdio.h>
#include "vpx/vpx_integer.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
void vp9_resize_plane(const uint8_t *const input,
int height,
int width,
@@ -121,4 +125,9 @@
int owidth,
int bd);
#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
#endif // VP9_ENCODER_VP9_RESIZE_H_
diff --git a/libvpx/vp9/encoder/vp9_skin_detection.c b/libvpx/vp9/encoder/vp9_skin_detection.c
index aaa8ea0..c2763b7 100644
--- a/libvpx/vp9/encoder/vp9_skin_detection.c
+++ b/libvpx/vp9/encoder/vp9_skin_detection.c
@@ -98,12 +98,13 @@
uint8_t ysource4 = src_y[(ypos + 1) * src_ystride + (ypos + 1)];
uint8_t usource4 = src_u[(uvpos + 1) * src_uvstride + (uvpos + 1)];
uint8_t vsource4 = src_v[(uvpos + 1) * src_uvstride + (uvpos + 1)];
+ int is_skin = 0;
if (mode_filter == 1) {
ysource = (ysource + ysource2 + ysource3 + ysource4) >> 2;
usource = (usource + usource2 + usource3 + usource4) >> 2;
vsource = (vsource + vsource2 + vsource3 + vsource4) >> 2;
}
- const int is_skin = vp9_skin_pixel(ysource, usource, vsource);
+ is_skin = vp9_skin_pixel(ysource, usource, vsource);
for (i = 0; i < y_bsize; i++) {
for (j = 0; j < y_bsize; j++) {
if (is_skin)
diff --git a/libvpx/vp9/encoder/vp9_skin_detection.h b/libvpx/vp9/encoder/vp9_skin_detection.h
index 3d4e737..0a87ef9 100644
--- a/libvpx/vp9/encoder/vp9_skin_detection.h
+++ b/libvpx/vp9/encoder/vp9_skin_detection.h
@@ -25,7 +25,8 @@
#ifdef OUTPUT_YUV_SKINMAP
// For viewing skin map on input source.
-void vp9_compute_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file);
+void vp9_compute_skin_map(struct VP9_COMP *const cpi, FILE *yuv_skinmap_file);
+extern void vp9_write_yuv_frame_420(YV12_BUFFER_CONFIG *s, FILE *f);
#endif
#ifdef __cplusplus
diff --git a/libvpx/vp9/encoder/vp9_speed_features.c b/libvpx/vp9/encoder/vp9_speed_features.c
index 5e72c4c..a539629 100644
--- a/libvpx/vp9/encoder/vp9_speed_features.c
+++ b/libvpx/vp9/encoder/vp9_speed_features.c
@@ -13,6 +13,7 @@
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_speed_features.h"
#include "vp9/encoder/vp9_rdopt.h"
+#include "vpx_dsp/vpx_dsp_common.h"
// Intra only frames, golden frames (except alt ref overlays) and
@@ -49,7 +50,7 @@
VP9_COMMON *const cm = &cpi->common;
if (speed >= 1) {
- if (MIN(cm->width, cm->height) >= 720) {
+ if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
sf->partition_search_breakout_dist_thr = (1 << 23);
@@ -60,7 +61,7 @@
}
if (speed >= 2) {
- if (MIN(cm->width, cm->height) >= 720) {
+ if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
sf->adaptive_pred_interp_filter = 0;
@@ -75,7 +76,7 @@
}
if (speed >= 3) {
- if (MIN(cm->width, cm->height) >= 720) {
+ if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = DISABLE_ALL_SPLIT;
sf->schedule_mode_search = cm->base_qindex < 220 ? 1 : 0;
sf->partition_search_breakout_dist_thr = (1 << 25);
@@ -99,7 +100,7 @@
}
if (speed >= 4) {
- if (MIN(cm->width, cm->height) >= 720) {
+ if (VPXMIN(cm->width, cm->height) >= 720) {
sf->partition_search_breakout_dist_thr = (1 << 26);
} else {
sf->partition_search_breakout_dist_thr = (1 << 24);
@@ -112,8 +113,14 @@
SPEED_FEATURES *sf, int speed) {
const int boosted = frame_is_boosted(cpi);
+ sf->partition_search_breakout_dist_thr = (1 << 20);
+ sf->partition_search_breakout_rate_thr = 80;
+ sf->tx_size_search_breakout = 1;
sf->adaptive_rd_thresh = 1;
sf->allow_skip_recode = 1;
+ sf->less_rectangular_check = 1;
+ sf->use_square_partition_only = !frame_is_boosted(cpi);
+ sf->use_square_only_threshold = BLOCK_16X16;
if (speed >= 1) {
if ((cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ||
@@ -122,6 +129,7 @@
} else {
sf->use_square_partition_only = !frame_is_intra_only(cm);
}
+ sf->use_square_only_threshold = BLOCK_4X4;
sf->less_rectangular_check = 1;
@@ -138,9 +146,6 @@
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
-
- sf->tx_size_search_breakout = 1;
- sf->partition_search_breakout_rate_thr = 80;
}
if (speed >= 2) {
@@ -215,7 +220,7 @@
VP9_COMMON *const cm = &cpi->common;
if (speed >= 1) {
- if (MIN(cm->width, cm->height) >= 720) {
+ if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
} else {
@@ -224,7 +229,7 @@
}
if (speed >= 2) {
- if (MIN(cm->width, cm->height) >= 720) {
+ if (VPXMIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
} else {
@@ -233,7 +238,7 @@
}
if (speed >= 5) {
- if (MIN(cm->width, cm->height) >= 720) {
+ if (VPXMIN(cm->width, cm->height) >= 720) {
sf->partition_search_breakout_dist_thr = (1 << 25);
} else {
sf->partition_search_breakout_dist_thr = (1 << 23);
@@ -241,7 +246,7 @@
}
if (speed >= 7) {
- sf->encode_breakout_thresh = (MIN(cm->width, cm->height) >= 720) ?
+ sf->encode_breakout_thresh = (VPXMIN(cm->width, cm->height) >= 720) ?
800 : 300;
}
}
@@ -381,7 +386,6 @@
}
if (speed >= 6) {
- // Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION.
sf->partition_search_type = VAR_BASED_PARTITION;
// Turn on this to use non-RD key frame coding mode.
sf->use_nonrd_pick_mode = 1;
@@ -471,6 +475,7 @@
sf->partition_search_type = SEARCH_PARTITION;
sf->less_rectangular_check = 0;
sf->use_square_partition_only = 0;
+ sf->use_square_only_threshold = BLOCK_SIZES;
sf->auto_min_max_partition_size = NOT_IN_USE;
sf->rd_auto_partition_min_limit = BLOCK_4X4;
sf->default_max_partition_size = BLOCK_64X64;
diff --git a/libvpx/vp9/encoder/vp9_speed_features.h b/libvpx/vp9/encoder/vp9_speed_features.h
index 95038ce..575e98c 100644
--- a/libvpx/vp9/encoder/vp9_speed_features.h
+++ b/libvpx/vp9/encoder/vp9_speed_features.h
@@ -267,6 +267,7 @@
// Disable testing non square partitions. (eg 16x32)
int use_square_partition_only;
+ BLOCK_SIZE use_square_only_threshold;
// Sets min and max partition sizes for this 64x64 region based on the
// same 64x64 in last encoded frame, and the left and above neighbor.
diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.c b/libvpx/vp9/encoder/vp9_svc_layercontext.c
index e69404a..8a6818c 100644
--- a/libvpx/vp9/encoder/vp9_svc_layercontext.c
+++ b/libvpx/vp9/encoder/vp9_svc_layercontext.c
@@ -10,9 +10,11 @@
#include <math.h>
+#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_svc_layercontext.h"
#include "vp9/encoder/vp9_extend.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#define SMALL_FRAME_FB_IDX 7
#define SMALL_FRAME_WIDTH 32
@@ -21,11 +23,14 @@
void vp9_init_layer_context(VP9_COMP *const cpi) {
SVC *const svc = &cpi->svc;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+ int mi_rows = cpi->common.mi_rows;
+ int mi_cols = cpi->common.mi_cols;
int sl, tl;
int alt_ref_idx = svc->number_spatial_layers;
svc->spatial_layer_id = 0;
svc->temporal_layer_id = 0;
+ svc->first_spatial_layer_to_encode = 0;
if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img,
@@ -93,6 +98,26 @@
lrc->buffer_level = oxcf->starting_buffer_level_ms *
lc->target_bandwidth / 1000;
lrc->bits_off_target = lrc->buffer_level;
+
+ // Initialize the cyclic refresh parameters. If spatial layers are used
+ // (i.e., ss_number_layers > 1), these need to be updated per spatial
+ // layer.
+ // Cyclic refresh is only applied on base temporal layer.
+ if (oxcf->ss_number_layers > 1 &&
+ tl == 0) {
+ size_t last_coded_q_map_size;
+ size_t consec_zero_mv_size;
+ lc->sb_index = 0;
+ lc->map = vpx_malloc(mi_rows * mi_cols * sizeof(signed char));
+ memset(lc->map, 0, mi_rows * mi_cols);
+ last_coded_q_map_size = mi_rows * mi_cols * sizeof(uint8_t);
+ lc->last_coded_q_map = vpx_malloc(last_coded_q_map_size);
+ assert(MAXQ <= 255);
+ memset(lc->last_coded_q_map, MAXQ, last_coded_q_map_size);
+ consec_zero_mv_size = mi_rows * mi_cols * sizeof(uint8_t);
+ lc->consec_zero_mv = vpx_malloc(consec_zero_mv_size);
+ memset(lc->consec_zero_mv, 0, consec_zero_mv_size);
+ }
}
}
@@ -113,8 +138,6 @@
if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
- spatial_layer_target = 0;
-
for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers);
svc->layer_context[layer].target_bandwidth =
@@ -141,8 +164,8 @@
lrc->maximum_buffer_size =
(int64_t)(rc->maximum_buffer_size * bitrate_alloc);
lrc->bits_off_target =
- MIN(lrc->bits_off_target, lrc->maximum_buffer_size);
- lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size);
+ VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size);
+ lrc->buffer_level = VPXMIN(lrc->buffer_level, lrc->maximum_buffer_size);
lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[tl];
lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
@@ -173,9 +196,9 @@
(int64_t)(rc->optimal_buffer_level * bitrate_alloc);
lrc->maximum_buffer_size =
(int64_t)(rc->maximum_buffer_size * bitrate_alloc);
- lrc->bits_off_target = MIN(lrc->bits_off_target,
- lrc->maximum_buffer_size);
- lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size);
+ lrc->bits_off_target = VPXMIN(lrc->bits_off_target,
+ lrc->maximum_buffer_size);
+ lrc->buffer_level = VPXMIN(lrc->buffer_level, lrc->maximum_buffer_size);
// Update framerate-related quantities.
if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer];
@@ -258,6 +281,24 @@
cpi->rc.frames_since_key = old_frame_since_key;
cpi->rc.frames_to_key = old_frame_to_key;
}
+
+ // For spatial-svc, allow cyclic-refresh to be applied on the spatial layers,
+ // for the base temporal layer.
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
+ cpi->svc.number_spatial_layers > 1 &&
+ cpi->svc.temporal_layer_id == 0) {
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ signed char *temp = cr->map;
+ uint8_t *temp2 = cr->last_coded_q_map;
+ uint8_t *temp3 = cr->consec_zero_mv;
+ cr->map = lc->map;
+ lc->map = temp;
+ cr->last_coded_q_map = lc->last_coded_q_map;
+ lc->last_coded_q_map = temp2;
+ cr->consec_zero_mv = lc->consec_zero_mv;
+ lc->consec_zero_mv = temp3;
+ cr->sb_index = lc->sb_index;
+ }
}
void vp9_save_layer_context(VP9_COMP *const cpi) {
@@ -268,6 +309,24 @@
lc->twopass = cpi->twopass;
lc->target_bandwidth = (int)oxcf->target_bandwidth;
lc->alt_ref_source = cpi->alt_ref_source;
+
+ // For spatial-svc, allow cyclic-refresh to be applied on the spatial layers,
+ // for the base temporal layer.
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
+ cpi->svc.number_spatial_layers > 1 &&
+ cpi->svc.temporal_layer_id == 0) {
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ signed char *temp = lc->map;
+ uint8_t *temp2 = lc->last_coded_q_map;
+ uint8_t *temp3 = lc->consec_zero_mv;
+ lc->map = cr->map;
+ cr->map = temp;
+ lc->last_coded_q_map = cr->last_coded_q_map;
+ cr->last_coded_q_map = temp2;
+ lc->consec_zero_mv = cr->consec_zero_mv;
+ cr->consec_zero_mv = temp3;
+ lc->sb_index = cr->sb_index;
+ }
}
void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) {
@@ -492,19 +551,35 @@
set_flags_and_fb_idx_for_temporal_mode2(cpi);
} else if (cpi->svc.temporal_layering_mode ==
VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
- // VP9E_TEMPORAL_LAYERING_MODE_BYPASS :
- // if the code goes here, it means the encoder will be relying on the
- // flags from outside for layering.
- // However, since when spatial+temporal layering is used, the buffer indices
- // cannot be derived automatically, the bypass mode will only work when the
- // number of spatial layers equals 1.
- assert(cpi->svc.number_spatial_layers == 1);
+ // In the BYPASS/flexible mode, the encoder is relying on the application
+ // to specify, for each spatial layer, the flags and buffer indices for the
+ // layering.
+ // Note that the check (cpi->ext_refresh_frame_flags_pending == 0) is
+ // needed to support the case where the frame flags may be passed in via
+ // vpx_codec_encode(), which can be used for the temporal-only svc case.
+ if (cpi->ext_refresh_frame_flags_pending == 0) {
+ int sl;
+ cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
+ sl = cpi->svc.spatial_layer_id;
+ vp9_apply_encoding_flags(cpi, cpi->svc.ext_frame_flags[sl]);
+ cpi->lst_fb_idx = cpi->svc.ext_lst_fb_idx[sl];
+ cpi->gld_fb_idx = cpi->svc.ext_gld_fb_idx[sl];
+ cpi->alt_fb_idx = cpi->svc.ext_alt_fb_idx[sl];
+ }
}
lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
cpi->svc.number_temporal_layers +
cpi->svc.temporal_layer_id];
+ // Setting the worst/best_quality via the encoder control: SET_SVC_PARAMETERS,
+ // only for non-BYPASS mode for now.
+ if (cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+ RATE_CONTROL *const lrc = &lc->rc;
+ lrc->worst_quality = vp9_quantizer_to_qindex(lc->max_q);
+ lrc->best_quality = vp9_quantizer_to_qindex(lc->min_q);
+ }
+
get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height,
lc->scaling_factor_num, lc->scaling_factor_den,
&width, &height);
@@ -643,3 +718,21 @@
}
return buf;
}
+
+void vp9_free_svc_cyclic_refresh(VP9_COMP *const cpi) {
+ int sl, tl;
+ SVC *const svc = &cpi->svc;
+ const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+ for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
+ for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
+ int layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ if (lc->map)
+ vpx_free(lc->map);
+ if (lc->last_coded_q_map)
+ vpx_free(lc->last_coded_q_map);
+ if (lc->consec_zero_mv)
+ vpx_free(lc->consec_zero_mv);
+ }
+ }
+}
diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.h b/libvpx/vp9/encoder/vp9_svc_layercontext.h
index b6a5ea5..694b5ab 100644
--- a/libvpx/vp9/encoder/vp9_svc_layercontext.h
+++ b/libvpx/vp9/encoder/vp9_svc_layercontext.h
@@ -41,6 +41,11 @@
int has_alt_frame;
size_t layer_size;
struct vpx_psnr_pkt psnr_pkt;
+ // Cyclic refresh parameters (aq-mode=3), that need to be updated per-frame.
+ int sb_index;
+ signed char *map;
+ uint8_t *last_coded_q_map;
+ uint8_t *consec_zero_mv;
} LAYER_CONTEXT;
typedef struct {
@@ -50,6 +55,7 @@
int number_temporal_layers;
int spatial_layer_to_encode;
+ int first_spatial_layer_to_encode;
// Workaround for multiple frame contexts
enum {
@@ -70,6 +76,12 @@
// Indicates what sort of temporal layering is used.
// Currently, this only works for CBR mode.
VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode;
+ // Frame flags and buffer indexes for each spatial layer, set by the
+ // application (external settings).
+ int ext_frame_flags[VPX_MAX_LAYERS];
+ int ext_lst_fb_idx[VPX_MAX_LAYERS];
+ int ext_gld_fb_idx[VPX_MAX_LAYERS];
+ int ext_alt_fb_idx[VPX_MAX_LAYERS];
} SVC;
struct VP9_COMP;
@@ -115,6 +127,8 @@
int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi);
+void vp9_free_svc_cyclic_refresh(struct VP9_COMP *const cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/libvpx/vp9/encoder/vp9_temporal_filter.c b/libvpx/vp9/encoder/vp9_temporal_filter.c
index 439eac6..16f9c85 100644
--- a/libvpx/vp9/encoder/vp9_temporal_filter.c
+++ b/libvpx/vp9/encoder/vp9_temporal_filter.c
@@ -23,6 +23,7 @@
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_segmentation.h"
#include "vp9/encoder/vp9_temporal_filter.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/vpx_timer.h"
@@ -216,7 +217,8 @@
int stride) {
MACROBLOCK *const x = &cpi->td.mb;
MACROBLOCKD *const xd = &x->e_mbd;
- const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
+ MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
+ const SEARCH_METHODS old_search_method = mv_sf->search_method;
int step_param;
int sadpb = x->sadperbit16;
int bestsme = INT_MAX;
@@ -242,12 +244,13 @@
xd->plane[0].pre[0].stride = stride;
step_param = mv_sf->reduce_first_step_size;
- step_param = MIN(step_param, MAX_MVSEARCH_STEPS - 2);
+ step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
- // Ignore mv costing by sending NULL pointer instead of cost arrays
- vp9_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1,
- cond_cost_list(cpi, cost_list),
- &cpi->fn_ptr[BLOCK_16X16], 0, &best_ref_mv1, ref_mv);
+ mv_sf->search_method = HEX;
+ vp9_full_pixel_search(cpi, x, BLOCK_16X16, &best_ref_mv1_full, step_param,
+ sadpb, cond_cost_list(cpi, cost_list), &best_ref_mv1,
+ ref_mv, 0, 0);
+ mv_sf->search_method = old_search_method;
// Ignore mv costing by sending NULL pointer instead of cost array
bestsme = cpi->find_fractional_mv_step(x, ref_mv,
@@ -718,7 +721,7 @@
"Failed to reallocate alt_ref_buffer");
}
frames[frame] = vp9_scale_if_required(
- cm, frames[frame], &cpi->svc.scaled_frames[frame_used]);
+ cm, frames[frame], &cpi->svc.scaled_frames[frame_used], 0);
++frame_used;
}
}
diff --git a/libvpx/vp9/encoder/vp9_tokenize.c b/libvpx/vp9/encoder/vp9_tokenize.c
index 85cb2fc..6076e2a 100644
--- a/libvpx/vp9/encoder/vp9_tokenize.c
+++ b/libvpx/vp9/encoder/vp9_tokenize.c
@@ -66,14 +66,6 @@
-CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 10 = CAT_FIVE
};
-static const vpx_tree_index cat1[2] = {0, 0};
-static const vpx_tree_index cat2[4] = {2, 2, 0, 0};
-static const vpx_tree_index cat3[6] = {2, 2, 4, 4, 0, 0};
-static const vpx_tree_index cat4[8] = {2, 2, 4, 4, 6, 6, 0, 0};
-static const vpx_tree_index cat5[10] = {2, 2, 4, 4, 6, 6, 8, 8, 0, 0};
-static const vpx_tree_index cat6[28] = {2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12,
- 14, 14, 16, 16, 18, 18, 20, 20, 22, 22, 24, 24, 26, 26, 0, 0};
-
static const int16_t zero_cost[] = {0};
static const int16_t one_cost[] = {255, 257};
static const int16_t two_cost[] = {255, 257};
@@ -366,68 +358,49 @@
};
#endif
-#if CONFIG_VP9_HIGHBITDEPTH
-static const vpx_tree_index cat1_high10[2] = {0, 0};
-static const vpx_tree_index cat2_high10[4] = {2, 2, 0, 0};
-static const vpx_tree_index cat3_high10[6] = {2, 2, 4, 4, 0, 0};
-static const vpx_tree_index cat4_high10[8] = {2, 2, 4, 4, 6, 6, 0, 0};
-static const vpx_tree_index cat5_high10[10] = {2, 2, 4, 4, 6, 6, 8, 8, 0, 0};
-static const vpx_tree_index cat6_high10[32] = {2, 2, 4, 4, 6, 6, 8, 8, 10, 10,
- 12, 12, 14, 14, 16, 16, 18, 18, 20, 20, 22, 22, 24, 24, 26, 26, 28, 28,
- 30, 30, 0, 0};
-static const vpx_tree_index cat1_high12[2] = {0, 0};
-static const vpx_tree_index cat2_high12[4] = {2, 2, 0, 0};
-static const vpx_tree_index cat3_high12[6] = {2, 2, 4, 4, 0, 0};
-static const vpx_tree_index cat4_high12[8] = {2, 2, 4, 4, 6, 6, 0, 0};
-static const vpx_tree_index cat5_high12[10] = {2, 2, 4, 4, 6, 6, 8, 8, 0, 0};
-static const vpx_tree_index cat6_high12[36] = {2, 2, 4, 4, 6, 6, 8, 8, 10, 10,
- 12, 12, 14, 14, 16, 16, 18, 18, 20, 20, 22, 22, 24, 24, 26, 26, 28, 28,
- 30, 30, 32, 32, 34, 34, 0, 0};
-#endif
-
const vp9_extra_bit vp9_extra_bits[ENTROPY_TOKENS] = {
- {0, 0, 0, 0, zero_cost}, // ZERO_TOKEN
- {0, 0, 0, 1, one_cost}, // ONE_TOKEN
- {0, 0, 0, 2, two_cost}, // TWO_TOKEN
- {0, 0, 0, 3, three_cost}, // THREE_TOKEN
- {0, 0, 0, 4, four_cost}, // FOUR_TOKEN
- {cat1, vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost}, // CATEGORY1_TOKEN
- {cat2, vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost}, // CATEGORY2_TOKEN
- {cat3, vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost}, // CATEGORY3_TOKEN
- {cat4, vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost}, // CATEGORY4_TOKEN
- {cat5, vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost}, // CATEGORY5_TOKEN
- {cat6, vp9_cat6_prob, 14, CAT6_MIN_VAL, 0}, // CATEGORY6_TOKEN
- {0, 0, 0, 0, zero_cost} // EOB_TOKEN
+ {0, 0, 0, zero_cost}, // ZERO_TOKEN
+ {0, 0, 1, one_cost}, // ONE_TOKEN
+ {0, 0, 2, two_cost}, // TWO_TOKEN
+ {0, 0, 3, three_cost}, // THREE_TOKEN
+ {0, 0, 4, four_cost}, // FOUR_TOKEN
+ {vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost}, // CATEGORY1_TOKEN
+ {vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost}, // CATEGORY2_TOKEN
+ {vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost}, // CATEGORY3_TOKEN
+ {vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost}, // CATEGORY4_TOKEN
+ {vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost}, // CATEGORY5_TOKEN
+ {vp9_cat6_prob, 14, CAT6_MIN_VAL, 0}, // CATEGORY6_TOKEN
+ {0, 0, 0, zero_cost} // EOB_TOKEN
};
#if CONFIG_VP9_HIGHBITDEPTH
const vp9_extra_bit vp9_extra_bits_high10[ENTROPY_TOKENS] = {
- {0, 0, 0, 0, zero_cost}, // ZERO
- {0, 0, 0, 1, one_cost}, // ONE
- {0, 0, 0, 2, two_cost}, // TWO
- {0, 0, 0, 3, three_cost}, // THREE
- {0, 0, 0, 4, four_cost}, // FOUR
- {cat1_high10, vp9_cat1_prob_high10, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1
- {cat2_high10, vp9_cat2_prob_high10, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2
- {cat3_high10, vp9_cat3_prob_high10, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3
- {cat4_high10, vp9_cat4_prob_high10, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4
- {cat5_high10, vp9_cat5_prob_high10, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5
- {cat6_high10, vp9_cat6_prob_high10, 16, CAT6_MIN_VAL, 0}, // CAT6
- {0, 0, 0, 0, zero_cost} // EOB
+ {0, 0, 0, zero_cost}, // ZERO
+ {0, 0, 1, one_cost}, // ONE
+ {0, 0, 2, two_cost}, // TWO
+ {0, 0, 3, three_cost}, // THREE
+ {0, 0, 4, four_cost}, // FOUR
+ {vp9_cat1_prob_high10, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1
+ {vp9_cat2_prob_high10, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2
+ {vp9_cat3_prob_high10, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3
+ {vp9_cat4_prob_high10, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4
+ {vp9_cat5_prob_high10, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5
+ {vp9_cat6_prob_high10, 16, CAT6_MIN_VAL, 0}, // CAT6
+ {0, 0, 0, zero_cost} // EOB
};
const vp9_extra_bit vp9_extra_bits_high12[ENTROPY_TOKENS] = {
- {0, 0, 0, 0, zero_cost}, // ZERO
- {0, 0, 0, 1, one_cost}, // ONE
- {0, 0, 0, 2, two_cost}, // TWO
- {0, 0, 0, 3, three_cost}, // THREE
- {0, 0, 0, 4, four_cost}, // FOUR
- {cat1_high12, vp9_cat1_prob_high12, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1
- {cat2_high12, vp9_cat2_prob_high12, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2
- {cat3_high12, vp9_cat3_prob_high12, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3
- {cat4_high12, vp9_cat4_prob_high12, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4
- {cat5_high12, vp9_cat5_prob_high12, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5
- {cat6_high12, vp9_cat6_prob_high12, 18, CAT6_MIN_VAL, 0}, // CAT6
- {0, 0, 0, 0, zero_cost} // EOB
+ {0, 0, 0, zero_cost}, // ZERO
+ {0, 0, 1, one_cost}, // ONE
+ {0, 0, 2, two_cost}, // TWO
+ {0, 0, 3, three_cost}, // THREE
+ {0, 0, 4, four_cost}, // FOUR
+ {vp9_cat1_prob_high12, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1
+ {vp9_cat2_prob_high12, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2
+ {vp9_cat3_prob_high12, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3
+ {vp9_cat4_prob_high12, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4
+ {vp9_cat5_prob_high12, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5
+ {vp9_cat6_prob_high12, 18, CAT6_MIN_VAL, 0}, // CAT6
+ {0, 0, 0, zero_cost} // EOB
};
#endif
@@ -503,7 +476,7 @@
int c;
TOKENEXTRA *t = *tp; /* store tokens starting here */
int eob = p->eobs[block];
- const PLANE_TYPE type = pd->plane_type;
+ const PLANE_TYPE type = get_plane_type(plane);
const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
const int segment_id = mbmi->segment_id;
const int16_t *scan, *nb;
diff --git a/libvpx/vp9/encoder/vp9_tokenize.h b/libvpx/vp9/encoder/vp9_tokenize.h
index 11b78ba..c0f09c7 100644
--- a/libvpx/vp9/encoder/vp9_tokenize.h
+++ b/libvpx/vp9/encoder/vp9_tokenize.h
@@ -54,6 +54,20 @@
void vp9_tokenize_sb(struct VP9_COMP *cpi, struct ThreadData *td,
TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize);
+typedef struct {
+ const vpx_prob *prob;
+ int len;
+ int base_val;
+ const int16_t *cost;
+} vp9_extra_bit;
+
+// indexed by token value
+extern const vp9_extra_bit vp9_extra_bits[ENTROPY_TOKENS];
+#if CONFIG_VP9_HIGHBITDEPTH
+extern const vp9_extra_bit vp9_extra_bits_high10[ENTROPY_TOKENS];
+extern const vp9_extra_bit vp9_extra_bits_high12[ENTROPY_TOKENS];
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
extern const int16_t *vp9_dct_value_cost_ptr;
/* TODO: The Token field should be broken out into a separate char array to
* improve cache locality, since it's needed for costing when the rest of the
diff --git a/libvpx/vp9/encoder/x86/vp9_highbd_error_avx.asm b/libvpx/vp9/encoder/x86/vp9_highbd_error_avx.asm
new file mode 100644
index 0000000..e476323
--- /dev/null
+++ b/libvpx/vp9/encoder/x86/vp9_highbd_error_avx.asm
@@ -0,0 +1,261 @@
+;
+; Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%define private_prefix vp9
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION .text
+ALIGN 16
+
+;
+; int64_t vp9_highbd_block_error_8bit(int32_t *coeff, int32_t *dqcoeff,
+; intptr_t block_size, int64_t *ssz)
+;
+
+INIT_XMM avx
+cglobal highbd_block_error_8bit, 4, 5, 8, uqc, dqc, size, ssz
+ vzeroupper
+
+ ; If only one iteration is required, then handle this as a special case.
+ ; It is the most frequent case, so we can have a significant gain here
+ ; by not setting up a loop and accumulators.
+ cmp sizeq, 16
+ jne .generic
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; Common case of size == 16
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ ; Load input vectors
+ mova xm0, [dqcq]
+ packssdw xm0, [dqcq+16]
+ mova xm2, [uqcq]
+ packssdw xm2, [uqcq+16]
+
+ mova xm1, [dqcq+32]
+ packssdw xm1, [dqcq+48]
+ mova xm3, [uqcq+32]
+ packssdw xm3, [uqcq+48]
+
+ ; Compute the errors.
+ psubw xm0, xm2
+ psubw xm1, xm3
+
+ ; Individual errors are max 15bit+sign, so squares are 30bit, and
+ ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit).
+ pmaddwd xm2, xm2
+ pmaddwd xm3, xm3
+
+ pmaddwd xm0, xm0
+ pmaddwd xm1, xm1
+
+ ; Squares are always positive, so we can use unsigned arithmetic after
+ ; squaring. As mentioned earlier 2 sums fit in 31 bits, so 4 sums will
+ ; fit in 32bits
+ paddd xm2, xm3
+ paddd xm0, xm1
+
+ ; Accumulate horizontally in 64 bits, there is no chance of overflow here
+ pxor xm5, xm5
+
+ pblendw xm3, xm5, xm2, 0x33 ; Zero extended low of a pair of 32 bits
+ psrlq xm2, 32 ; Zero extended high of a pair of 32 bits
+
+ pblendw xm1, xm5, xm0, 0x33 ; Zero extended low of a pair of 32 bits
+ psrlq xm0, 32 ; Zero extended high of a pair of 32 bits
+
+ paddq xm2, xm3
+ paddq xm0, xm1
+
+ psrldq xm3, xm2, 8
+ psrldq xm1, xm0, 8
+
+ paddq xm2, xm3
+ paddq xm0, xm1
+
+ ; Store the return value
+%if ARCH_X86_64
+ movq rax, xm0
+ movq [sszq], xm2
+%else
+ movd eax, xm0
+ pextrd edx, xm0, 1
+ movq [sszd], xm2
+%endif
+ RET
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; Generic case of size != 16, speculative low precision
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ALIGN 16
+.generic:
+ pxor xm4, xm4 ; sse accumulator
+ pxor xm5, xm5 ; overflow detection register for xm4
+ pxor xm6, xm6 ; ssz accumulator
+ pxor xm7, xm7 ; overflow detection register for xm6
+ lea uqcq, [uqcq+sizeq*4]
+ lea dqcq, [dqcq+sizeq*4]
+ neg sizeq
+
+ ; Push the negative size as the high precision code might need it
+ push sizeq
+
+.loop:
+ ; Load input vectors
+ mova xm0, [dqcq+sizeq*4]
+ packssdw xm0, [dqcq+sizeq*4+16]
+ mova xm2, [uqcq+sizeq*4]
+ packssdw xm2, [uqcq+sizeq*4+16]
+
+ mova xm1, [dqcq+sizeq*4+32]
+ packssdw xm1, [dqcq+sizeq*4+48]
+ mova xm3, [uqcq+sizeq*4+32]
+ packssdw xm3, [uqcq+sizeq*4+48]
+
+ add sizeq, 16
+
+ ; Compute the squared errors.
+ ; Individual errors are max 15bit+sign, so squares are 30bit, and
+ ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit).
+ psubw xm0, xm2
+ pmaddwd xm2, xm2
+ pmaddwd xm0, xm0
+
+ psubw xm1, xm3
+ pmaddwd xm3, xm3
+ pmaddwd xm1, xm1
+
+ ; Squares are always positive, so we can use unsigned arithmetic after
+ ; squaring. As mentioned earlier 2 sums fit in 31 bits, so 4 sums will
+ ; fit in 32bits
+ paddd xm2, xm3
+ paddd xm0, xm1
+
+ ; We accumulate using 32 bit arithmetic, but detect potential overflow
+ ; by checking if the MSB of the accumulators have ever been a set bit.
+ ; If yes, we redo the whole compute at the end on higher precision, but
+ ; this happens extremely rarely, so we still achieve a net gain.
+ paddd xm4, xm0
+ paddd xm6, xm2
+ por xm5, xm4 ; OR in the accumulator for overflow detection
+ por xm7, xm6 ; OR in the accumulator for overflow detection
+
+ jnz .loop
+
+ ; Add pairs horizontally (still only on 32 bits)
+ phaddd xm4, xm4
+ por xm5, xm4 ; OR in the accumulator for overflow detection
+ phaddd xm6, xm6
+ por xm7, xm6 ; OR in the accumulator for overflow detection
+
+ ; Check for possibility of overflow by testing if bit 32 of each dword lane
+ ; have ever been set. If they were not, then there was no overflow and the
+ ; final sum will fit in 32 bits. If overflow happened, then
+ ; we redo the whole computation on higher precision.
+ por xm7, xm5
+ pmovmskb r4, xm7
+ test r4, 0x8888
+ jnz .highprec
+
+ phaddd xm4, xm4
+ phaddd xm6, xm6
+ pmovzxdq xm4, xm4
+ pmovzxdq xm6, xm6
+
+ ; Restore stack
+ pop sizeq
+
+ ; Store the return value
+%if ARCH_X86_64
+ movq rax, xm4
+ movq [sszq], xm6
+%else
+ movd eax, xm4
+ pextrd edx, xm4, 1
+ movq [sszd], xm6
+%endif
+ RET
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; Generic case of size != 16, high precision case
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+.highprec:
+ pxor xm4, xm4 ; sse accumulator
+ pxor xm5, xm5 ; dedicated zero register
+ pxor xm6, xm6 ; ssz accumulator
+ pop sizeq
+
+.loophp:
+ mova xm0, [dqcq+sizeq*4]
+ packssdw xm0, [dqcq+sizeq*4+16]
+ mova xm2, [uqcq+sizeq*4]
+ packssdw xm2, [uqcq+sizeq*4+16]
+
+ mova xm1, [dqcq+sizeq*4+32]
+ packssdw xm1, [dqcq+sizeq*4+48]
+ mova xm3, [uqcq+sizeq*4+32]
+ packssdw xm3, [uqcq+sizeq*4+48]
+
+ add sizeq, 16
+
+ ; individual errors are max. 15bit+sign, so squares are 30bit, and
+ ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit)
+
+ psubw xm0, xm2
+ pmaddwd xm2, xm2
+ pmaddwd xm0, xm0
+
+ psubw xm1, xm3
+ pmaddwd xm3, xm3
+ pmaddwd xm1, xm1
+
+ ; accumulate in 64bit
+ punpckldq xm7, xm0, xm5
+ punpckhdq xm0, xm5
+ paddq xm4, xm7
+
+ punpckldq xm7, xm2, xm5
+ punpckhdq xm2, xm5
+ paddq xm6, xm7
+
+ punpckldq xm7, xm1, xm5
+ punpckhdq xm1, xm5
+ paddq xm4, xm7
+
+ punpckldq xm7, xm3, xm5
+ punpckhdq xm3, xm5
+ paddq xm6, xm7
+
+ paddq xm4, xm0
+ paddq xm4, xm1
+ paddq xm6, xm2
+ paddq xm6, xm3
+
+ jnz .loophp
+
+ ; Accumulate horizontally
+ movhlps xm5, xm4
+ movhlps xm7, xm6
+ paddq xm4, xm5
+ paddq xm6, xm7
+
+ ; Store the return value
+%if ARCH_X86_64
+ movq rax, xm4
+ movq [sszq], xm6
+%else
+ movd eax, xm4
+ pextrd edx, xm4, 1
+ movq [sszd], xm6
+%endif
+ RET
+
+END
diff --git a/libvpx/vp9/encoder/x86/vp9_highbd_error_sse2.asm b/libvpx/vp9/encoder/x86/vp9_highbd_error_sse2.asm
new file mode 100644
index 0000000..f3b8f01
--- /dev/null
+++ b/libvpx/vp9/encoder/x86/vp9_highbd_error_sse2.asm
@@ -0,0 +1,98 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%define private_prefix vp9
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION .text
+ALIGN 16
+
+;
+; int64_t vp9_highbd_block_error_8bit(int32_t *coeff, int32_t *dqcoeff,
+; intptr_t block_size, int64_t *ssz)
+;
+
+INIT_XMM sse2
+cglobal highbd_block_error_8bit, 3, 3, 8, uqc, dqc, size, ssz
+ pxor m4, m4 ; sse accumulator
+ pxor m6, m6 ; ssz accumulator
+ pxor m5, m5 ; dedicated zero register
+ lea uqcq, [uqcq+sizeq*4]
+ lea dqcq, [dqcq+sizeq*4]
+ neg sizeq
+
+ ALIGN 16
+
+.loop:
+ mova m0, [dqcq+sizeq*4]
+ packssdw m0, [dqcq+sizeq*4+mmsize]
+ mova m2, [uqcq+sizeq*4]
+ packssdw m2, [uqcq+sizeq*4+mmsize]
+
+ mova m1, [dqcq+sizeq*4+mmsize*2]
+ packssdw m1, [dqcq+sizeq*4+mmsize*3]
+ mova m3, [uqcq+sizeq*4+mmsize*2]
+ packssdw m3, [uqcq+sizeq*4+mmsize*3]
+
+ add sizeq, mmsize
+
+ ; individual errors are max. 15bit+sign, so squares are 30bit, and
+ ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit)
+
+ psubw m0, m2
+ pmaddwd m2, m2
+ pmaddwd m0, m0
+
+ psubw m1, m3
+ pmaddwd m3, m3
+ pmaddwd m1, m1
+
+ ; accumulate in 64bit
+ punpckldq m7, m0, m5
+ punpckhdq m0, m5
+ paddq m4, m7
+
+ punpckldq m7, m2, m5
+ punpckhdq m2, m5
+ paddq m6, m7
+
+ punpckldq m7, m1, m5
+ punpckhdq m1, m5
+ paddq m4, m7
+
+ punpckldq m7, m3, m5
+ punpckhdq m3, m5
+ paddq m6, m7
+
+ paddq m4, m0
+ paddq m4, m1
+ paddq m6, m2
+ paddq m6, m3
+
+ jnz .loop
+
+ ; accumulate horizontally and store in return value
+ movhlps m5, m4
+ movhlps m7, m6
+ paddq m4, m5
+ paddq m6, m7
+
+%if ARCH_X86_64
+ movq rax, m4
+ movq [sszq], m6
+%else
+ mov eax, sszm
+ pshufd m5, m4, 0x1
+ movq [eax], m6
+ movd eax, m4
+ movd edx, m5
+%endif
+ RET
diff --git a/libvpx/vp9/vp9_cx_iface.c b/libvpx/vp9/vp9_cx_iface.c
index f155b9a..6ccba0f 100644
--- a/libvpx/vp9/vp9_cx_iface.c
+++ b/libvpx/vp9/vp9_cx_iface.c
@@ -45,6 +45,9 @@
vpx_bit_depth_t bit_depth;
vp9e_tune_content content;
vpx_color_space_t color_space;
+ vpx_color_range_t color_range;
+ int render_width;
+ int render_height;
};
static struct vp9_extracfg default_extra_cfg = {
@@ -71,6 +74,9 @@
VPX_BITS_8, // Bit depth
VP9E_CONTENT_DEFAULT, // content
VPX_CS_UNKNOWN, // color space
+ 0, // color range
+ 0, // render width
+ 0, // render height
};
struct vpx_codec_alg_priv {
@@ -321,6 +327,8 @@
ERROR("Codec bit-depth 8 not supported in profile > 1");
}
RANGE_CHECK(extra_cfg, color_space, VPX_CS_UNKNOWN, VPX_CS_SRGB);
+ RANGE_CHECK(extra_cfg, color_range,
+ VPX_CR_STUDIO_RANGE, VPX_CR_FULL_RANGE);
return VPX_CODEC_OK;
}
@@ -465,6 +473,9 @@
#endif
oxcf->color_space = extra_cfg->color_space;
+ oxcf->color_range = extra_cfg->color_range;
+ oxcf->render_width = extra_cfg->render_width;
+ oxcf->render_height = extra_cfg->render_height;
oxcf->arnr_max_frames = extra_cfg->arnr_max_frames;
oxcf->arnr_strength = extra_cfg->arnr_strength;
oxcf->min_gf_interval = extra_cfg->min_gf_interval;
@@ -1256,30 +1267,6 @@
}
}
-static vpx_codec_err_t ctrl_update_entropy(vpx_codec_alg_priv_t *ctx,
- va_list args) {
- const int update = va_arg(args, int);
-
- vp9_update_entropy(ctx->cpi, update);
- return VPX_CODEC_OK;
-}
-
-static vpx_codec_err_t ctrl_update_reference(vpx_codec_alg_priv_t *ctx,
- va_list args) {
- const int ref_frame_flags = va_arg(args, int);
-
- vp9_update_reference(ctx->cpi, ref_frame_flags);
- return VPX_CODEC_OK;
-}
-
-static vpx_codec_err_t ctrl_use_reference(vpx_codec_alg_priv_t *ctx,
- va_list args) {
- const int reference_flag = va_arg(args, int);
-
- vp9_use_as_reference(ctx->cpi, reference_flag);
- return VPX_CODEC_OK;
-}
-
static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx,
va_list args) {
(void)ctx;
@@ -1362,17 +1349,21 @@
VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
SVC *const svc = &cpi->svc;
- svc->spatial_layer_id = data->spatial_layer_id;
+ svc->first_spatial_layer_to_encode = data->spatial_layer_id;
+ svc->spatial_layer_to_encode = data->spatial_layer_id;
svc->temporal_layer_id = data->temporal_layer_id;
// Checks on valid layer_id input.
if (svc->temporal_layer_id < 0 ||
svc->temporal_layer_id >= (int)ctx->cfg.ts_number_layers) {
return VPX_CODEC_INVALID_PARAM;
}
- if (svc->spatial_layer_id < 0 ||
- svc->spatial_layer_id >= (int)ctx->cfg.ss_number_layers) {
+ if (svc->first_spatial_layer_to_encode < 0 ||
+ svc->first_spatial_layer_to_encode >= (int)ctx->cfg.ss_number_layers) {
return VPX_CODEC_INVALID_PARAM;
}
+ // First spatial layer to encode not implemented for two-pass.
+ if (is_two_pass_svc(cpi) && svc->first_spatial_layer_to_encode > 0)
+ return VPX_CODEC_INVALID_PARAM;
return VPX_CODEC_OK;
}
@@ -1412,6 +1403,20 @@
return VPX_CODEC_OK;
}
+static vpx_codec_err_t ctrl_set_svc_ref_frame_config(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ VP9_COMP *const cpi = ctx->cpi;
+ vpx_svc_ref_frame_config_t *data = va_arg(args, vpx_svc_ref_frame_config_t *);
+ int sl;
+ for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) {
+ cpi->svc.ext_frame_flags[sl] = data->frame_flags[sl];
+ cpi->svc.ext_lst_fb_idx[sl] = data->lst_fb_idx[sl];
+ cpi->svc.ext_gld_fb_idx[sl] = data->gld_fb_idx[sl];
+ cpi->svc.ext_alt_fb_idx[sl] = data->alt_fb_idx[sl];
+ }
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_err_t ctrl_register_cx_callback(vpx_codec_alg_priv_t *ctx,
va_list args) {
vpx_codec_priv_output_cx_pkt_cb_pair_t *cbp =
@@ -1436,11 +1441,24 @@
return update_extra_cfg(ctx, &extra_cfg);
}
+static vpx_codec_err_t ctrl_set_color_range(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.color_range = CAST(VP9E_SET_COLOR_RANGE, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_render_size(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ int *const render_size = va_arg(args, int *);
+ extra_cfg.render_width = render_size[0];
+ extra_cfg.render_height = render_size[1];
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{VP8_COPY_REFERENCE, ctrl_copy_reference},
- {VP8E_UPD_ENTROPY, ctrl_update_entropy},
- {VP8E_UPD_REFERENCE, ctrl_update_reference},
- {VP8E_USE_REFERENCE, ctrl_use_reference},
// Setters
{VP8_SET_REFERENCE, ctrl_set_reference},
@@ -1472,9 +1490,12 @@
{VP9E_SET_SVC_LAYER_ID, ctrl_set_svc_layer_id},
{VP9E_SET_TUNE_CONTENT, ctrl_set_tune_content},
{VP9E_SET_COLOR_SPACE, ctrl_set_color_space},
+ {VP9E_SET_COLOR_RANGE, ctrl_set_color_range},
{VP9E_SET_NOISE_SENSITIVITY, ctrl_set_noise_sensitivity},
{VP9E_SET_MIN_GF_INTERVAL, ctrl_set_min_gf_interval},
{VP9E_SET_MAX_GF_INTERVAL, ctrl_set_max_gf_interval},
+ {VP9E_SET_SVC_REF_FRAME_CONFIG, ctrl_set_svc_ref_frame_config},
+ {VP9E_SET_RENDER_SIZE, ctrl_set_render_size},
// Getters
{VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer},
diff --git a/libvpx/vp9/vp9_dx_iface.c b/libvpx/vp9/vp9_dx_iface.c
index 96ede3c..be5d160 100644
--- a/libvpx/vp9/vp9_dx_iface.c
+++ b/libvpx/vp9/vp9_dx_iface.c
@@ -18,67 +18,19 @@
#include "vpx/vp8dx.h"
#include "vpx/vpx_decoder.h"
#include "vpx_dsp/bitreader_buffer.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_util/vpx_thread.h"
#include "vp9/common/vp9_alloccommon.h"
#include "vp9/common/vp9_frame_buffers.h"
-#include "vp9/decoder/vp9_decoder.h"
#include "vp9/decoder/vp9_decodeframe.h"
+#include "vp9/vp9_dx_iface.h"
#include "vp9/vp9_iface_common.h"
#define VP9_CAP_POSTPROC (CONFIG_VP9_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0)
-typedef vpx_codec_stream_info_t vp9_stream_info_t;
-
-// This limit is due to framebuffer numbers.
-// TODO(hkuang): Remove this limit after implementing ondemand framebuffers.
-#define FRAME_CACHE_SIZE 6 // Cache maximum 6 decoded frames.
-
-typedef struct cache_frame {
- int fb_idx;
- vpx_image_t img;
-} cache_frame;
-
-struct vpx_codec_alg_priv {
- vpx_codec_priv_t base;
- vpx_codec_dec_cfg_t cfg;
- vp9_stream_info_t si;
- int postproc_cfg_set;
- vp8_postproc_cfg_t postproc_cfg;
- vpx_decrypt_cb decrypt_cb;
- void *decrypt_state;
- vpx_image_t img;
- int img_avail;
- int flushed;
- int invert_tile_order;
- int last_show_frame; // Index of last output frame.
- int byte_alignment;
- int skip_loop_filter;
-
- // Frame parallel related.
- int frame_parallel_decode; // frame-based threading.
- VPxWorker *frame_workers;
- int num_frame_workers;
- int next_submit_worker_id;
- int last_submit_worker_id;
- int next_output_worker_id;
- int available_threads;
- cache_frame frame_cache[FRAME_CACHE_SIZE];
- int frame_cache_write;
- int frame_cache_read;
- int num_cache_frames;
- int need_resync; // wait for key/intra-only frame
- // BufferPool that holds all reference frames. Shared by all the FrameWorkers.
- BufferPool *buffer_pool;
-
- // External frame buffer info to save for VP9 common.
- void *ext_priv; // Private data associated with the external frame buffers.
- vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb;
- vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb;
-};
-
static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx,
vpx_codec_priv_enc_mr_cfg_t *data) {
// This function only allocates space for the vpx_codec_alg_priv_t
@@ -87,7 +39,8 @@
(void)data;
if (!ctx->priv) {
- vpx_codec_alg_priv_t *const priv = vpx_calloc(1, sizeof(*priv));
+ vpx_codec_alg_priv_t *const priv =
+ (vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv));
if (priv == NULL)
return VPX_CODEC_MEM_ERROR;
@@ -183,7 +136,7 @@
si->w = si->h = 0;
if (decrypt_cb) {
- data_sz = MIN(sizeof(clear_buffer), data_sz);
+ data_sz = VPXMIN(sizeof(clear_buffer), data_sz);
decrypt_cb(decrypt_state, data, clear_buffer, data_sz);
data = clear_buffer;
}
@@ -977,9 +930,9 @@
return VPX_CODEC_INVALID_PARAM;
}
-static vpx_codec_err_t ctrl_get_display_size(vpx_codec_alg_priv_t *ctx,
- va_list args) {
- int *const display_size = va_arg(args, int *);
+static vpx_codec_err_t ctrl_get_render_size(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ int *const render_size = va_arg(args, int *);
// Only support this function in serial decode.
if (ctx->frame_parallel_decode) {
@@ -987,14 +940,14 @@
return VPX_CODEC_INCAPABLE;
}
- if (display_size) {
+ if (render_size) {
if (ctx->frame_workers) {
VPxWorker *const worker = ctx->frame_workers;
FrameWorkerData *const frame_worker_data =
(FrameWorkerData *)worker->data1;
const VP9_COMMON *const cm = &frame_worker_data->pbi->common;
- display_size[0] = cm->display_width;
- display_size[1] = cm->display_height;
+ render_size[0] = cm->render_width;
+ render_size[1] = cm->render_height;
return VPX_CODEC_OK;
} else {
return VPX_CODEC_ERROR;
@@ -1093,7 +1046,7 @@
{VP8D_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates},
{VP8D_GET_FRAME_CORRUPTED, ctrl_get_frame_corrupted},
{VP9_GET_REFERENCE, ctrl_get_reference},
- {VP9D_GET_DISPLAY_SIZE, ctrl_get_display_size},
+ {VP9D_GET_DISPLAY_SIZE, ctrl_get_render_size},
{VP9D_GET_BIT_DEPTH, ctrl_get_bit_depth},
{VP9D_GET_FRAME_SIZE, ctrl_get_frame_size},
diff --git a/libvpx/vp9/vp9_dx_iface.h b/libvpx/vp9/vp9_dx_iface.h
new file mode 100644
index 0000000..e0e948e
--- /dev/null
+++ b/libvpx/vp9/vp9_dx_iface.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_VP9_DX_IFACE_H_
+#define VP9_VP9_DX_IFACE_H_
+
+#include "vp9/decoder/vp9_decoder.h"
+
+typedef vpx_codec_stream_info_t vp9_stream_info_t;
+
+// This limit is due to framebuffer numbers.
+// TODO(hkuang): Remove this limit after implementing ondemand framebuffers.
+#define FRAME_CACHE_SIZE 6 // Cache maximum 6 decoded frames.
+
+typedef struct cache_frame {
+ int fb_idx;
+ vpx_image_t img;
+} cache_frame;
+
+struct vpx_codec_alg_priv {
+ vpx_codec_priv_t base;
+ vpx_codec_dec_cfg_t cfg;
+ vp9_stream_info_t si;
+ int postproc_cfg_set;
+ vp8_postproc_cfg_t postproc_cfg;
+ vpx_decrypt_cb decrypt_cb;
+ void *decrypt_state;
+ vpx_image_t img;
+ int img_avail;
+ int flushed;
+ int invert_tile_order;
+ int last_show_frame; // Index of last output frame.
+ int byte_alignment;
+ int skip_loop_filter;
+
+ // Frame parallel related.
+ int frame_parallel_decode; // frame-based threading.
+ VPxWorker *frame_workers;
+ int num_frame_workers;
+ int next_submit_worker_id;
+ int last_submit_worker_id;
+ int next_output_worker_id;
+ int available_threads;
+ cache_frame frame_cache[FRAME_CACHE_SIZE];
+ int frame_cache_write;
+ int frame_cache_read;
+ int num_cache_frames;
+ int need_resync; // wait for key/intra-only frame
+ // BufferPool that holds all reference frames. Shared by all the FrameWorkers.
+ BufferPool *buffer_pool;
+
+ // External frame buffer info to save for VP9 common.
+ void *ext_priv; // Private data associated with the external frame buffers.
+ vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb;
+ vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb;
+};
+
+#endif // VP9_VP9_DX_IFACE_H_
diff --git a/libvpx/vp9/vp9_iface_common.h b/libvpx/vp9/vp9_iface_common.h
index 58bb7d5..938d422 100644
--- a/libvpx/vp9/vp9_iface_common.h
+++ b/libvpx/vp9/vp9_iface_common.h
@@ -37,11 +37,14 @@
}
}
img->cs = yv12->color_space;
+ img->range = yv12->color_range;
img->bit_depth = 8;
img->w = yv12->y_stride;
img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3);
img->d_w = yv12->y_crop_width;
img->d_h = yv12->y_crop_height;
+ img->r_w = yv12->render_width;
+ img->r_h = yv12->render_height;
img->x_chroma_shift = yv12->subsampling_x;
img->y_chroma_shift = yv12->subsampling_y;
img->planes[VPX_PLANE_Y] = yv12->y_buffer;
@@ -56,7 +59,7 @@
if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) {
// vpx_image_t uses byte strides and a pointer to the first byte
// of the image.
- img->fmt |= VPX_IMG_FMT_HIGHBITDEPTH;
+ img->fmt = (vpx_img_fmt_t)(img->fmt | VPX_IMG_FMT_HIGHBITDEPTH);
img->bit_depth = yv12->bit_depth;
img->planes[VPX_PLANE_Y] = (uint8_t*)CONVERT_TO_SHORTPTR(yv12->y_buffer);
img->planes[VPX_PLANE_U] = (uint8_t*)CONVERT_TO_SHORTPTR(yv12->u_buffer);
@@ -83,6 +86,8 @@
yv12->y_crop_width = img->d_w;
yv12->y_crop_height = img->d_h;
+ yv12->render_width = img->r_w;
+ yv12->render_height = img->r_h;
yv12->y_width = img->d_w;
yv12->y_height = img->d_h;
@@ -96,6 +101,7 @@
yv12->y_stride = img->stride[VPX_PLANE_Y];
yv12->uv_stride = img->stride[VPX_PLANE_U];
yv12->color_space = img->cs;
+ yv12->color_range = img->range;
#if CONFIG_VP9_HIGHBITDEPTH
if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
diff --git a/libvpx/vp9/vp9cx.mk b/libvpx/vp9/vp9cx.mk
index 84b12d7..25a176f 100644
--- a/libvpx/vp9/vp9cx.mk
+++ b/libvpx/vp9/vp9cx.mk
@@ -100,8 +100,13 @@
ifeq ($(CONFIG_USE_X86INC),yes)
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_error_sse2.asm
+VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_highbd_error_avx.asm
+else
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
endif
+endif
ifeq ($(ARCH_X86_64),yes)
ifeq ($(CONFIG_USE_X86INC),yes)
diff --git a/libvpx/vp9/vp9dx.mk b/libvpx/vp9/vp9dx.mk
index 0e9cf16..4c6fd00 100644
--- a/libvpx/vp9/vp9dx.mk
+++ b/libvpx/vp9/vp9dx.mk
@@ -16,6 +16,7 @@
VP9_DX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no)
VP9_DX_SRCS-yes += vp9_dx_iface.c
+VP9_DX_SRCS-yes += vp9_dx_iface.h
VP9_DX_SRCS-yes += decoder/vp9_decodemv.c
VP9_DX_SRCS-yes += decoder/vp9_decodeframe.c
diff --git a/libvpx/vpx/src/svc_encodeframe.c b/libvpx/vpx/src/svc_encodeframe.c
index 9844ace..ff60083 100644
--- a/libvpx/vpx/src/svc_encodeframe.c
+++ b/libvpx/vpx/src/svc_encodeframe.c
@@ -339,7 +339,8 @@
(spatial_layer_target >> 1) + (spatial_layer_target >> 2);
enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 2] =
spatial_layer_target;
- } else if (svc_ctx->temporal_layering_mode == 2) {
+ } else if (svc_ctx->temporal_layering_mode == 2 ||
+ svc_ctx->temporal_layering_mode == 1) {
enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] =
spatial_layer_target * 2 / 3;
enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] =
@@ -417,7 +418,8 @@
// si->svc_params.temporal_layering_mode = svc_ctx->temporal_layering_mode;
if (svc_ctx->temporal_layering_mode == 3) {
svc_ctx->temporal_layers = 3;
- } else if (svc_ctx->temporal_layering_mode == 2) {
+ } else if (svc_ctx->temporal_layering_mode == 2 ||
+ svc_ctx->temporal_layering_mode == 1) {
svc_ctx->temporal_layers = 2;
}
@@ -477,10 +479,10 @@
if (enc_cfg->rc_end_usage == VPX_CBR) {
enc_cfg->rc_resize_allowed = 0;
enc_cfg->rc_min_quantizer = 2;
- enc_cfg->rc_max_quantizer = 63;
+ enc_cfg->rc_max_quantizer = 56;
enc_cfg->rc_undershoot_pct = 50;
enc_cfg->rc_overshoot_pct = 50;
- enc_cfg->rc_buf_initial_sz = 20;
+ enc_cfg->rc_buf_initial_sz = 500;
enc_cfg->rc_buf_optimal_sz = 600;
enc_cfg->rc_buf_sz = 1000;
}
@@ -494,10 +496,10 @@
svc_log(svc_ctx, SVC_LOG_ERROR, "svc_enc_init error\n");
return res;
}
-
- vpx_codec_control(codec_ctx, VP9E_SET_SVC, 1);
- vpx_codec_control(codec_ctx, VP9E_SET_SVC_PARAMETERS, &si->svc_params);
-
+ if (svc_ctx->spatial_layers > 1 || svc_ctx->temporal_layers > 1) {
+ vpx_codec_control(codec_ctx, VP9E_SET_SVC, 1);
+ vpx_codec_control(codec_ctx, VP9E_SET_SVC_PARAMETERS, &si->svc_params);
+ }
return VPX_CODEC_OK;
}
diff --git a/libvpx/vpx/svc_context.h b/libvpx/vpx/svc_context.h
index a09651c..5bc2518 100644
--- a/libvpx/vpx/svc_context.h
+++ b/libvpx/vpx/svc_context.h
@@ -40,6 +40,7 @@
int output_rc_stat; // for outputting rc stats
int speed; // speed setting for codec
int threads;
+ int aqmode; // turns on aq-mode=3 (cyclic_refresh): 0=off, 1=on.
// private storage for vpx_svc_encode
void *internal;
} SvcContext;
diff --git a/libvpx/vpx/vp8.h b/libvpx/vpx/vp8.h
index 2a31af6..8a035f9 100644
--- a/libvpx/vpx/vp8.h
+++ b/libvpx/vpx/vp8.h
@@ -116,19 +116,29 @@
vpx_image_t img; /**< img structure to populate (output) */
} vp9_ref_frame_t;
+/*!\cond */
/*!\brief vp8 decoder control function parameter type
*
* defines the data type for each of VP8 decoder control function requires
*/
VPX_CTRL_USE_TYPE(VP8_SET_REFERENCE, vpx_ref_frame_t *)
+#define VPX_CTRL_VP8_SET_REFERENCE
VPX_CTRL_USE_TYPE(VP8_COPY_REFERENCE, vpx_ref_frame_t *)
+#define VPX_CTRL_VP8_COPY_REFERENCE
VPX_CTRL_USE_TYPE(VP8_SET_POSTPROC, vp8_postproc_cfg_t *)
+#define VPX_CTRL_VP8_SET_POSTPROC
VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_REF_FRAME, int)
+#define VPX_CTRL_VP8_SET_DBG_COLOR_REF_FRAME
VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_MB_MODES, int)
+#define VPX_CTRL_VP8_SET_DBG_COLOR_MB_MODES
VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_B_MODES, int)
+#define VPX_CTRL_VP8_SET_DBG_COLOR_B_MODES
VPX_CTRL_USE_TYPE(VP8_SET_DBG_DISPLAY_MV, int)
+#define VPX_CTRL_VP8_SET_DBG_DISPLAY_MV
VPX_CTRL_USE_TYPE(VP9_GET_REFERENCE, vp9_ref_frame_t *)
+#define VPX_CTRL_VP9_GET_REFERENCE
+/*!\endcond */
/*! @} - end defgroup vp8 */
#ifdef __cplusplus
diff --git a/libvpx/vpx/vp8cx.h b/libvpx/vpx/vp8cx.h
index 31120df..bd99c6d 100644
--- a/libvpx/vpx/vp8cx.h
+++ b/libvpx/vpx/vp8cx.h
@@ -141,29 +141,11 @@
* \sa #vpx_codec_control
*/
enum vp8e_enc_control_id {
- /*!\brief Codec control function to set mode of entropy update in encoder.
- *
- * Supported in codecs: VP8, VP9
- */
- VP8E_UPD_ENTROPY = 5,
-
- /*!\brief Codec control function to set reference update mode in encoder.
- *
- * Supported in codecs: VP8, VP9
- */
- VP8E_UPD_REFERENCE,
-
- /*!\brief Codec control function to set which reference frame encoder can use.
- *
- * Supported in codecs: VP8, VP9
- */
- VP8E_USE_REFERENCE,
-
/*!\brief Codec control function to pass an ROI map to encoder.
*
* Supported in codecs: VP8, VP9
*/
- VP8E_SET_ROI_MAP,
+ VP8E_SET_ROI_MAP = 8,
/*!\brief Codec control function to pass an Active map to encoder.
*
@@ -547,6 +529,31 @@
* Supported in codecs: VP9
*/
VP9E_GET_ACTIVEMAP,
+
+ /*!\brief Codec control function to set color range bit.
+ * \note Valid ranges: 0..1, default is 0
+ * 0 = Limited range (16..235 or HBD equivalent)
+ * 1 = Full range (0..255 or HBD equivalent)
+ *
+ * Supported in codecs: VP9
+ */
+ VP9E_SET_COLOR_RANGE,
+
+ /*!\brief Codec control function to set the frame flags and buffer indices
+ * for spatial layers. The frame flags and buffer indices are set using the
+ * struct #vpx_svc_ref_frame_config defined below.
+ *
+ * Supported in codecs: VP9
+ */
+ VP9E_SET_SVC_REF_FRAME_CONFIG,
+
+ /*!\brief Codec control function to set intended rendering image size.
+ *
+ * By default, this is identical to the image size in pixels.
+ *
+ * Supported in codecs: VP9
+ */
+ VP9E_SET_RENDER_SIZE,
};
/*!\brief vpx 1-D scaling mode
@@ -673,6 +680,22 @@
int temporal_layer_id; /**< Temporal layer id number. */
} vpx_svc_layer_id_t;
+/*!\brief vp9 svc frame flag parameters.
+ *
+ * This defines the frame flags and buffer indices for each spatial layer for
+ * svc encoding.
+ * This is used with the #VP9E_SET_SVC_REF_FRAME_CONFIG control to set frame
+ * flags and buffer indices for each spatial layer for the current (super)frame.
+ *
+ */
+typedef struct vpx_svc_ref_frame_config {
+ int frame_flags[VPX_TS_MAX_LAYERS]; /**< Frame flags. */
+ int lst_fb_idx[VPX_TS_MAX_LAYERS]; /**< Last buffer index. */
+ int gld_fb_idx[VPX_TS_MAX_LAYERS]; /**< Golden buffer index. */
+ int alt_fb_idx[VPX_TS_MAX_LAYERS]; /**< Altref buffer index. */
+} vpx_svc_ref_frame_config_t;
+
+/*!\cond */
/*!\brief VP8 encoder control function parameter type
*
* Defines the data types that VP8E control functions take. Note that
@@ -680,83 +703,113 @@
*
*/
-
-/* These controls have been deprecated in favor of the flags parameter to
- * vpx_codec_encode(). See the definition of VP8_EFLAG_* above.
- */
-VPX_CTRL_USE_TYPE_DEPRECATED(VP8E_UPD_ENTROPY, int)
-VPX_CTRL_USE_TYPE_DEPRECATED(VP8E_UPD_REFERENCE, int)
-VPX_CTRL_USE_TYPE_DEPRECATED(VP8E_USE_REFERENCE, int)
-
VPX_CTRL_USE_TYPE(VP8E_SET_FRAME_FLAGS, int)
+#define VPX_CTRL_VP8E_SET_FRAME_FLAGS
VPX_CTRL_USE_TYPE(VP8E_SET_TEMPORAL_LAYER_ID, int)
+#define VPX_CTRL_VP8E_SET_TEMPORAL_LAYER_ID
VPX_CTRL_USE_TYPE(VP8E_SET_ROI_MAP, vpx_roi_map_t *)
+#define VPX_CTRL_VP8E_SET_ROI_MAP
VPX_CTRL_USE_TYPE(VP8E_SET_ACTIVEMAP, vpx_active_map_t *)
+#define VPX_CTRL_VP8E_SET_ACTIVEMAP
VPX_CTRL_USE_TYPE(VP8E_SET_SCALEMODE, vpx_scaling_mode_t *)
+#define VPX_CTRL_VP8E_SET_SCALEMODE
VPX_CTRL_USE_TYPE(VP9E_SET_SVC, int)
+#define VPX_CTRL_VP9E_SET_SVC
VPX_CTRL_USE_TYPE(VP9E_SET_SVC_PARAMETERS, void *)
+#define VPX_CTRL_VP9E_SET_SVC_PARAMETERS
VPX_CTRL_USE_TYPE(VP9E_REGISTER_CX_CALLBACK, void *)
+#define VPX_CTRL_VP9E_REGISTER_CX_CALLBACK
VPX_CTRL_USE_TYPE(VP9E_SET_SVC_LAYER_ID, vpx_svc_layer_id_t *)
+#define VPX_CTRL_VP9E_SET_SVC_LAYER_ID
VPX_CTRL_USE_TYPE(VP8E_SET_CPUUSED, int)
+#define VPX_CTRL_VP8E_SET_CPUUSED
VPX_CTRL_USE_TYPE(VP8E_SET_ENABLEAUTOALTREF, unsigned int)
+#define VPX_CTRL_VP8E_SET_ENABLEAUTOALTREF
VPX_CTRL_USE_TYPE(VP8E_SET_NOISE_SENSITIVITY, unsigned int)
+#define VPX_CTRL_VP8E_SET_NOISE_SENSITIVITY
VPX_CTRL_USE_TYPE(VP8E_SET_SHARPNESS, unsigned int)
+#define VPX_CTRL_VP8E_SET_SHARPNESS
VPX_CTRL_USE_TYPE(VP8E_SET_STATIC_THRESHOLD, unsigned int)
+#define VPX_CTRL_VP8E_SET_STATIC_THRESHOLD
VPX_CTRL_USE_TYPE(VP8E_SET_TOKEN_PARTITIONS, int) /* vp8e_token_partitions */
+#define VPX_CTRL_VP8E_SET_TOKEN_PARTITIONS
VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_MAXFRAMES, unsigned int)
+#define VPX_CTRL_VP8E_SET_ARNR_MAXFRAMES
VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_STRENGTH, unsigned int)
+#define VPX_CTRL_VP8E_SET_ARNR_STRENGTH
VPX_CTRL_USE_TYPE_DEPRECATED(VP8E_SET_ARNR_TYPE, unsigned int)
+#define VPX_CTRL_VP8E_SET_ARNR_TYPE
VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, int) /* vp8e_tuning */
+#define VPX_CTRL_VP8E_SET_TUNING
VPX_CTRL_USE_TYPE(VP8E_SET_CQ_LEVEL, unsigned int)
+#define VPX_CTRL_VP8E_SET_CQ_LEVEL
VPX_CTRL_USE_TYPE(VP9E_SET_TILE_COLUMNS, int)
+#define VPX_CTRL_VP9E_SET_TILE_COLUMNS
VPX_CTRL_USE_TYPE(VP9E_SET_TILE_ROWS, int)
+#define VPX_CTRL_VP9E_SET_TILE_ROWS
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *)
+#define VPX_CTRL_VP8E_GET_LAST_QUANTIZER
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *)
+#define VPX_CTRL_VP8E_GET_LAST_QUANTIZER_64
VPX_CTRL_USE_TYPE(VP9E_GET_SVC_LAYER_ID, vpx_svc_layer_id_t *)
+#define VPX_CTRL_VP9E_GET_SVC_LAYER_ID
VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTRA_BITRATE_PCT, unsigned int)
+#define VPX_CTRL_VP8E_SET_MAX_INTRA_BITRATE_PCT
VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTER_BITRATE_PCT, unsigned int)
+#define VPX_CTRL_VP8E_SET_MAX_INTER_BITRATE_PCT
VPX_CTRL_USE_TYPE(VP8E_SET_SCREEN_CONTENT_MODE, unsigned int)
+#define VPX_CTRL_VP8E_SET_SCREEN_CONTENT_MODE
VPX_CTRL_USE_TYPE(VP9E_SET_GF_CBR_BOOST_PCT, unsigned int)
+#define VPX_CTRL_VP9E_SET_GF_CBR_BOOST_PCT
VPX_CTRL_USE_TYPE(VP9E_SET_LOSSLESS, unsigned int)
+#define VPX_CTRL_VP9E_SET_LOSSLESS
VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PARALLEL_DECODING, unsigned int)
+#define VPX_CTRL_VP9E_SET_FRAME_PARALLEL_DECODING
VPX_CTRL_USE_TYPE(VP9E_SET_AQ_MODE, unsigned int)
+#define VPX_CTRL_VP9E_SET_AQ_MODE
VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PERIODIC_BOOST, unsigned int)
+#define VPX_CTRL_VP9E_SET_FRAME_PERIODIC_BOOST
VPX_CTRL_USE_TYPE(VP9E_SET_NOISE_SENSITIVITY, unsigned int)
+#define VPX_CTRL_VP9E_SET_NOISE_SENSITIVITY
VPX_CTRL_USE_TYPE(VP9E_SET_TUNE_CONTENT, int) /* vp9e_tune_content */
+#define VPX_CTRL_VP9E_SET_TUNE_CONTENT
VPX_CTRL_USE_TYPE(VP9E_SET_COLOR_SPACE, int)
+#define VPX_CTRL_VP9E_SET_COLOR_SPACE
VPX_CTRL_USE_TYPE(VP9E_SET_MIN_GF_INTERVAL, unsigned int)
-
-/*!\brief
- *
- * TODO(debargha) : add support of the control in ffmpeg
- */
#define VPX_CTRL_VP9E_SET_MIN_GF_INTERVAL
-
VPX_CTRL_USE_TYPE(VP9E_SET_MAX_GF_INTERVAL, unsigned int)
-/*!\brief
- *
- * TODO(debargha) : add support of the control in ffmpeg
- */
#define VPX_CTRL_VP9E_SET_MAX_GF_INTERVAL
VPX_CTRL_USE_TYPE(VP9E_GET_ACTIVEMAP, vpx_active_map_t *)
+#define VPX_CTRL_VP9E_GET_ACTIVEMAP
+
+VPX_CTRL_USE_TYPE(VP9E_SET_COLOR_RANGE, int)
+#define VPX_CTRL_VP9E_SET_COLOR_RANGE
+
+VPX_CTRL_USE_TYPE(VP9E_SET_SVC_REF_FRAME_CONFIG, vpx_svc_ref_frame_config_t *)
+#define VPX_CTRL_VP9E_SET_SVC_REF_FRAME_CONFIG
+
+VPX_CTRL_USE_TYPE(VP9E_SET_RENDER_SIZE, int *)
+#define VPX_CTRL_VP9E_SET_RENDER_SIZE
+
+/*!\endcond */
/*! @} - end defgroup vp8_encoder */
#ifdef __cplusplus
} // extern "C"
diff --git a/libvpx/vpx/vp8dx.h b/libvpx/vpx/vp8dx.h
index 27b9f78..1f02fd5 100644
--- a/libvpx/vpx/vp8dx.h
+++ b/libvpx/vpx/vp8dx.h
@@ -147,6 +147,7 @@
typedef vpx_decrypt_init vp8_decrypt_init;
+/*!\cond */
/*!\brief VP8 decoder control function parameter type
*
* Defines the data types that VP8D control functions take. Note that
@@ -156,15 +157,25 @@
VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_UPDATES, int *)
+#define VPX_CTRL_VP8D_GET_LAST_REF_UPDATES
VPX_CTRL_USE_TYPE(VP8D_GET_FRAME_CORRUPTED, int *)
+#define VPX_CTRL_VP8D_GET_FRAME_CORRUPTED
VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_USED, int *)
+#define VPX_CTRL_VP8D_GET_LAST_REF_USED
VPX_CTRL_USE_TYPE(VPXD_SET_DECRYPTOR, vpx_decrypt_init *)
+#define VPX_CTRL_VPXD_SET_DECRYPTOR
VPX_CTRL_USE_TYPE(VP8D_SET_DECRYPTOR, vpx_decrypt_init *)
+#define VPX_CTRL_VP8D_SET_DECRYPTOR
VPX_CTRL_USE_TYPE(VP9D_GET_DISPLAY_SIZE, int *)
+#define VPX_CTRL_VP9D_GET_DISPLAY_SIZE
VPX_CTRL_USE_TYPE(VP9D_GET_BIT_DEPTH, unsigned int *)
+#define VPX_CTRL_VP9D_GET_BIT_DEPTH
VPX_CTRL_USE_TYPE(VP9D_GET_FRAME_SIZE, int *)
+#define VPX_CTRL_VP9D_GET_FRAME_SIZE
VPX_CTRL_USE_TYPE(VP9_INVERT_TILE_DECODE_ORDER, int)
+#define VPX_CTRL_VP9_INVERT_TILE_DECODE_ORDER
+/*!\endcond */
/*! @} - end defgroup vp8_decoder */
#ifdef __cplusplus
diff --git a/libvpx/vpx/vpx_encoder.h b/libvpx/vpx/vpx_encoder.h
index 2b17f98..955e873 100644
--- a/libvpx/vpx/vpx_encoder.h
+++ b/libvpx/vpx/vpx_encoder.h
@@ -150,7 +150,7 @@
partitions can be decoded even
though earlier partitions have
been lost. Note that intra
- predicition is still done over
+ prediction is still done over
the partition boundary. */
/*!\brief Encoder output packet variants
diff --git a/libvpx/vpx/vpx_image.h b/libvpx/vpx/vpx_image.h
index c06d351..e9e952c 100644
--- a/libvpx/vpx/vpx_image.h
+++ b/libvpx/vpx/vpx_image.h
@@ -78,10 +78,17 @@
VPX_CS_SRGB = 7 /**< sRGB */
} vpx_color_space_t; /**< alias for enum vpx_color_space */
+ /*!\brief List of supported color range */
+ typedef enum vpx_color_range {
+ VPX_CR_STUDIO_RANGE = 0, /**< Y [16..235], UV [16..240] */
+ VPX_CR_FULL_RANGE = 1 /**< YUV/RGB [0..255] */
+ } vpx_color_range_t; /**< alias for enum vpx_color_range */
+
/**\brief Image Descriptor */
typedef struct vpx_image {
vpx_img_fmt_t fmt; /**< Image Format */
vpx_color_space_t cs; /**< Color Space */
+ vpx_color_range_t range; /**< Color Range */
/* Image storage dimensions */
unsigned int w; /**< Stored image width */
@@ -92,6 +99,10 @@
unsigned int d_w; /**< Displayed image width */
unsigned int d_h; /**< Displayed image height */
+ /* Image intended rendering dimensions */
+ unsigned int r_w; /**< Intended rendering image width */
+ unsigned int r_h; /**< Intended rendering image height */
+
/* Chroma subsampling info */
unsigned int x_chroma_shift; /**< subsampling order, X */
unsigned int y_chroma_shift; /**< subsampling order, Y */
diff --git a/libvpx/vpx_dsp/bitreader.c b/libvpx/vpx_dsp/bitreader.c
index 4420fad..6ad806a 100644
--- a/libvpx/vpx_dsp/bitreader.c
+++ b/libvpx/vpx_dsp/bitreader.c
@@ -13,6 +13,7 @@
#include "vpx_dsp/bitreader.h"
#include "vpx_dsp/prob.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/mem.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_util/endian_inl.h"
@@ -48,7 +49,7 @@
int shift = BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT);
if (r->decrypt_cb) {
- size_t n = MIN(sizeof(r->clear_buffer), bytes_left);
+ size_t n = VPXMIN(sizeof(r->clear_buffer), bytes_left);
r->decrypt_cb(r->decrypt_state, buffer, r->clear_buffer, (int)n);
buffer = r->clear_buffer;
buffer_start = r->clear_buffer;
diff --git a/libvpx/vpx_dsp/bitreader_buffer.c b/libvpx/vpx_dsp/bitreader_buffer.c
index fb04ee6..bb91726 100644
--- a/libvpx/vpx_dsp/bitreader_buffer.c
+++ b/libvpx/vpx_dsp/bitreader_buffer.c
@@ -7,6 +7,7 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "./vpx_config.h"
#include "./bitreader_buffer.h"
size_t vpx_rb_bytes_read(struct vpx_read_bit_buffer *rb) {
@@ -39,3 +40,14 @@
const int value = vpx_rb_read_literal(rb, bits);
return vpx_rb_read_bit(rb) ? -value : value;
}
+
+int vpx_rb_read_inv_signed_literal(struct vpx_read_bit_buffer *rb,
+ int bits) {
+#if CONFIG_MISC_FIXES
+ const int nbits = sizeof(unsigned) * 8 - bits - 1;
+ const unsigned value = vpx_rb_read_literal(rb, bits + 1) << nbits;
+ return ((int) value) >> nbits;
+#else
+ return vpx_rb_read_signed_literal(rb, bits);
+#endif
+}
diff --git a/libvpx/vpx_dsp/bitreader_buffer.h b/libvpx/vpx_dsp/bitreader_buffer.h
index 03b156b..8a48a95 100644
--- a/libvpx/vpx_dsp/bitreader_buffer.h
+++ b/libvpx/vpx_dsp/bitreader_buffer.h
@@ -38,6 +38,8 @@
int vpx_rb_read_signed_literal(struct vpx_read_bit_buffer *rb, int bits);
+int vpx_rb_read_inv_signed_literal(struct vpx_read_bit_buffer *rb, int bits);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/libvpx/vpx_dsp/bitwriter_buffer.c b/libvpx/vpx_dsp/bitwriter_buffer.c
index 0dfb859..6182a72 100644
--- a/libvpx/vpx_dsp/bitwriter_buffer.c
+++ b/libvpx/vpx_dsp/bitwriter_buffer.c
@@ -9,7 +9,9 @@
*/
#include <limits.h>
+#include <stdlib.h>
+#include "./vpx_config.h"
#include "./bitwriter_buffer.h"
size_t vpx_wb_bytes_written(const struct vpx_write_bit_buffer *wb) {
@@ -34,3 +36,13 @@
for (bit = bits - 1; bit >= 0; bit--)
vpx_wb_write_bit(wb, (data >> bit) & 1);
}
+
+void vpx_wb_write_inv_signed_literal(struct vpx_write_bit_buffer *wb,
+ int data, int bits) {
+#if CONFIG_MISC_FIXES
+ vpx_wb_write_literal(wb, data, bits + 1);
+#else
+ vpx_wb_write_literal(wb, abs(data), bits);
+ vpx_wb_write_bit(wb, data < 0);
+#endif
+}
diff --git a/libvpx/vpx_dsp/bitwriter_buffer.h b/libvpx/vpx_dsp/bitwriter_buffer.h
index 9397668..a123a2f 100644
--- a/libvpx/vpx_dsp/bitwriter_buffer.h
+++ b/libvpx/vpx_dsp/bitwriter_buffer.h
@@ -28,6 +28,8 @@
void vpx_wb_write_literal(struct vpx_write_bit_buffer *wb, int data, int bits);
+void vpx_wb_write_inv_signed_literal(struct vpx_write_bit_buffer *wb, int data,
+ int bits);
#ifdef __cplusplus
} // extern "C"
diff --git a/libvpx/vpx_dsp/intrapred.c b/libvpx/vpx_dsp/intrapred.c
index 9ba0f64..a9669e5 100644
--- a/libvpx/vpx_dsp/intrapred.c
+++ b/libvpx/vpx_dsp/intrapred.c
@@ -44,6 +44,21 @@
dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
}
+static INLINE void d207e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ int r, c;
+ (void) above;
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ dst[c] = c & 1 ? AVG3(left[(c >> 1) + r], left[(c >> 1) + r + 1],
+ left[(c >> 1) + r + 2])
+ : AVG2(left[(c >> 1) + r], left[(c >> 1) + r + 1]);
+ }
+ dst += stride;
+ }
+}
+
static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
int r, c;
@@ -61,6 +76,20 @@
}
}
+static INLINE void d63e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ int r, c;
+ (void) left;
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1],
+ above[(r >> 1) + c + 2])
+ : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]);
+ }
+ dst += stride;
+ }
+}
+
static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
const uint8_t above_right = above[bs - 1];
@@ -80,6 +109,19 @@
}
}
+static INLINE void d45e_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
+ const uint8_t *above, const uint8_t *left) {
+ int r, c;
+ (void) left;
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ dst[c] = AVG3(above[r + c], above[r + c + 1],
+ above[r + c + 1 + (r + c + 2 < bs * 2)]);
+ }
+ dst += stride;
+ }
+}
+
static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
int r, c;
@@ -247,6 +289,38 @@
}
}
+void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const int H = above[-1];
+ const int I = left[0];
+ const int J = left[1];
+ const int K = left[2];
+ const int L = left[3];
+
+ memset(dst + stride * 0, AVG3(H, I, J), 4);
+ memset(dst + stride * 1, AVG3(I, J, K), 4);
+ memset(dst + stride * 2, AVG3(J, K, L), 4);
+ memset(dst + stride * 3, AVG3(K, L, L), 4);
+}
+
+void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const int H = above[-1];
+ const int I = above[0];
+ const int J = above[1];
+ const int K = above[2];
+ const int L = above[3];
+ const int M = above[4];
+
+ dst[0] = AVG3(H, I, J);
+ dst[1] = AVG3(I, J, K);
+ dst[2] = AVG3(J, K, L);
+ dst[3] = AVG3(K, L, M);
+ memcpy(dst + stride * 1, dst, 4);
+ memcpy(dst + stride * 2, dst, 4);
+ memcpy(dst + stride * 3, dst, 4);
+}
+
void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
const int I = left[0];
@@ -287,6 +361,30 @@
DST(3, 3) = AVG3(E, F, G); // differs from vp8
}
+void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const int A = above[0];
+ const int B = above[1];
+ const int C = above[2];
+ const int D = above[3];
+ const int E = above[4];
+ const int F = above[5];
+ const int G = above[6];
+ const int H = above[7];
+ (void)left;
+ DST(0, 0) = AVG2(A, B);
+ DST(1, 0) = DST(0, 2) = AVG2(B, C);
+ DST(2, 0) = DST(1, 2) = AVG2(C, D);
+ DST(3, 0) = DST(2, 2) = AVG2(D, E);
+ DST(3, 2) = AVG3(E, F, G);
+
+ DST(0, 1) = AVG3(A, B, C);
+ DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
+ DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
+ DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
+ DST(3, 3) = AVG3(F, G, H);
+}
+
void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
const int A = above[0];
@@ -308,6 +406,27 @@
DST(3, 3) = H; // differs from vp8
}
+void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const int A = above[0];
+ const int B = above[1];
+ const int C = above[2];
+ const int D = above[3];
+ const int E = above[4];
+ const int F = above[5];
+ const int G = above[6];
+ const int H = above[7];
+ (void)stride;
+ (void)left;
+ DST(0, 0) = AVG3(A, B, C);
+ DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
+ DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
+ DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
+ DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
+ DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
+ DST(3, 3) = AVG3(G, H, H);
+}
+
void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
const int I = left[0];
@@ -409,6 +528,23 @@
}
}
+static INLINE void highbd_d207e_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int r, c;
+ (void) above;
+ (void) bd;
+
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ dst[c] = c & 1 ? AVG3(left[(c >> 1) + r], left[(c >> 1) + r + 1],
+ left[(c >> 1) + r + 2])
+ : AVG2(left[(c >> 1) + r], left[(c >> 1) + r + 1]);
+ }
+ dst += stride;
+ }
+}
+
static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride,
int bs, const uint16_t *above,
const uint16_t *left, int bd) {
@@ -425,6 +561,8 @@
}
}
+#define highbd_d63e_predictor highbd_d63_predictor
+
static INLINE void highbd_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
const uint16_t *above,
const uint16_t *left, int bd) {
@@ -441,6 +579,21 @@
}
}
+static INLINE void highbd_d45e_predictor(uint16_t *dst, ptrdiff_t stride,
+ int bs, const uint16_t *above,
+ const uint16_t *left, int bd) {
+ int r, c;
+ (void) left;
+ (void) bd;
+ for (r = 0; r < bs; ++r) {
+ for (c = 0; c < bs; ++c) {
+ dst[c] = AVG3(above[r + c], above[r + c + 1],
+ above[r + c + 1 + (r + c + 2 < bs * 2)]);
+ }
+ dst += stride;
+ }
+}
+
static INLINE void highbd_d117_predictor(uint16_t *dst, ptrdiff_t stride,
int bs, const uint16_t *above,
const uint16_t *left, int bd) {
@@ -679,6 +832,11 @@
intra_pred_no_4x4(d207)
intra_pred_no_4x4(d63)
intra_pred_no_4x4(d45)
+#if CONFIG_MISC_FIXES
+intra_pred_allsizes(d207e)
+intra_pred_allsizes(d63e)
+intra_pred_no_4x4(d45e)
+#endif
intra_pred_no_4x4(d117)
intra_pred_no_4x4(d135)
intra_pred_no_4x4(d153)
diff --git a/libvpx/vpx_dsp/inv_txfm.c b/libvpx/vpx_dsp/inv_txfm.c
index 3afa8cd..5f3cfdd 100644
--- a/libvpx/vpx_dsp/inv_txfm.c
+++ b/libvpx/vpx_dsp/inv_txfm.c
@@ -170,16 +170,25 @@
step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
- // stage 2 & stage 3 - even half
- idct4_c(step1, step1);
-
- // stage 2 - odd half
+ // stage 2
+ temp1 = (step1[0] + step1[2]) * cospi_16_64;
+ temp2 = (step1[0] - step1[2]) * cospi_16_64;
+ step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
+ temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64;
+ temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64;
+ step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
+ step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
step2[4] = WRAPLOW(step1[4] + step1[5], 8);
step2[5] = WRAPLOW(step1[4] - step1[5], 8);
step2[6] = WRAPLOW(-step1[6] + step1[7], 8);
step2[7] = WRAPLOW(step1[6] + step1[7], 8);
- // stage 3 -odd half
+ // stage 3
+ step1[0] = WRAPLOW(step2[0] + step2[3], 8);
+ step1[1] = WRAPLOW(step2[1] + step2[2], 8);
+ step1[2] = WRAPLOW(step2[1] - step2[2], 8);
+ step1[3] = WRAPLOW(step2[0] - step2[3], 8);
step1[4] = step2[4];
temp1 = (step2[6] - step2[5]) * cospi_16_64;
temp2 = (step2[5] + step2[6]) * cospi_16_64;
diff --git a/libvpx/vpx_dsp/mips/loopfilter_masks_dspr2.h b/libvpx/vpx_dsp/mips/loopfilter_masks_dspr2.h
index e82dfb7..2c964af 100644
--- a/libvpx/vpx_dsp/mips/loopfilter_masks_dspr2.h
+++ b/libvpx/vpx_dsp/mips/loopfilter_masks_dspr2.h
@@ -355,7 +355,7 @@
/* flat & flatmask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3) */
"and %[flat1], %[flat3], %[flat1] \n\t"
- : [c] "=&r" (c), [r_k] "=&r" (r_k), [r] "=&r" (r),
+ : [c] "=&r" (c), [r_k] "=&r" (r_k), [r] "=&r" (r),
[r_flat] "=&r" (r_flat), [flat1] "=&r" (flat1), [flat3] "=&r" (flat3)
: [p4] "r" (p4), [p3] "r" (p3), [p2] "r" (p2),
[p1] "r" (p1), [p0] "r" (p0), [q0] "r" (q0), [q1] "r" (q1),
diff --git a/libvpx/vpx_dsp/prob.h b/libvpx/vpx_dsp/prob.h
index 729f90a..c3cb103 100644
--- a/libvpx/vpx_dsp/prob.h
+++ b/libvpx/vpx_dsp/prob.h
@@ -65,7 +65,7 @@
unsigned int count_sat,
unsigned int max_update_factor) {
const vpx_prob prob = get_binary_prob(ct[0], ct[1]);
- const unsigned int count = MIN(ct[0] + ct[1], count_sat);
+ const unsigned int count = VPXMIN(ct[0] + ct[1], count_sat);
const unsigned int factor = max_update_factor * count / count_sat;
return weighted_prob(pre_prob, prob, factor);
}
@@ -82,7 +82,7 @@
if (den == 0) {
return pre_prob;
} else {
- const unsigned int count = MIN(den, MODE_MV_COUNT_SAT);
+ const unsigned int count = VPXMIN(den, MODE_MV_COUNT_SAT);
const unsigned int factor = count_to_update_factor[count];
const vpx_prob prob =
clip_prob(((int64_t)(ct[0]) * 256 + (den >> 1)) / den);
diff --git a/libvpx/vpx_dsp/psnrhvs.c b/libvpx/vpx_dsp/psnrhvs.c
index 2de77c0..3001705 100644
--- a/libvpx/vpx_dsp/psnrhvs.c
+++ b/libvpx/vpx_dsp/psnrhvs.c
@@ -191,7 +191,7 @@
for (i = 0; i < 8; i++) {
for (j = 0; j < 8; j++) {
float err;
- err = fabs(dct_s_coef[i * 8 + j] - dct_d_coef[i * 8 + j]);
+ err = fabs((float)(dct_s_coef[i * 8 + j] - dct_d_coef[i * 8 + j]));
if (i != 0 || j != 0)
err = err < s_mask / mask[i][j] ? 0 : err - s_mask / mask[i][j];
ret += (err * _csf[i][j]) * (err * _csf[i][j]);
diff --git a/libvpx/vpx_dsp/vpx_dsp.mk b/libvpx/vpx_dsp/vpx_dsp.mk
index 1959c4d..9620eaa 100644
--- a/libvpx/vpx_dsp/vpx_dsp.mk
+++ b/libvpx/vpx_dsp/vpx_dsp.mk
@@ -36,13 +36,13 @@
endif
# intra predictions
-ifneq ($(filter yes,$(CONFIG_VP9) $(CONFIG_VP10)),)
DSP_SRCS-yes += intrapred.c
ifeq ($(CONFIG_USE_X86INC),yes)
DSP_SRCS-$(HAVE_SSE) += x86/intrapred_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/intrapred_sse2.asm
DSP_SRCS-$(HAVE_SSSE3) += x86/intrapred_ssse3.asm
+DSP_SRCS-$(HAVE_SSSE3) += x86/vpx_subpixel_8t_ssse3.asm
endif # CONFIG_USE_X86INC
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
@@ -58,7 +58,6 @@
DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred4_dspr2.c
DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred8_dspr2.c
DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred16_dspr2.c
-endif # CONFIG_VP9 || CONFIG_VP10
DSP_SRCS-$(HAVE_DSPR2) += mips/common_dspr2.h
DSP_SRCS-$(HAVE_DSPR2) += mips/common_dspr2.c
@@ -249,7 +248,8 @@
endif
ifeq ($(ARCH_X86_64),yes)
ifeq ($(CONFIG_USE_X86INC),yes)
-DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm
+DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm
+DSP_SRCS-$(HAVE_AVX) += x86/quantize_avx_x86_64.asm
endif
endif
endif # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
@@ -308,6 +308,8 @@
DSP_SRCS-$(HAVE_MMX) += x86/variance_impl_mmx.asm
DSP_SRCS-$(HAVE_SSE) += x86/variance_sse2.c
DSP_SRCS-$(HAVE_SSE2) += x86/variance_sse2.c # Contains SSE2 and SSSE3
+DSP_SRCS-$(HAVE_SSE2) += x86/halfpix_variance_sse2.c
+DSP_SRCS-$(HAVE_SSE2) += x86/halfpix_variance_impl_sse2.asm
DSP_SRCS-$(HAVE_AVX2) += x86/variance_avx2.c
DSP_SRCS-$(HAVE_AVX2) += x86/variance_impl_avx2.c
diff --git a/libvpx/vpx_dsp/vpx_dsp_common.h b/libvpx/vpx_dsp/vpx_dsp_common.h
index ccb8189..a9e180e 100644
--- a/libvpx/vpx_dsp/vpx_dsp_common.h
+++ b/libvpx/vpx_dsp/vpx_dsp_common.h
@@ -13,14 +13,15 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_ports/mem.h"
#ifdef __cplusplus
extern "C" {
#endif
-#define MIN(x, y) (((x) < (y)) ? (x) : (y))
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define VPXMIN(x, y) (((x) < (y)) ? (x) : (y))
+#define VPXMAX(x, y) (((x) > (y)) ? (x) : (y))
#if CONFIG_VP9_HIGHBITDEPTH
// Note:
diff --git a/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl b/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 1e56d53..b369b05 100644
--- a/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -54,322 +54,401 @@
# Intra prediction
#
-if ((vpx_config("CONFIG_VP9") eq "yes") || (vpx_config("CONFIG_VP10") eq "yes")) {
- add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d207_predictor_4x4/, "$ssse3_x86inc";
+add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d207_predictor_4x4/, "$ssse3_x86inc";
- add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d45_predictor_4x4 neon/, "$ssse3_x86inc";
+add_proto qw/void vpx_d207e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d207e_predictor_4x4/;
- add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d63_predictor_4x4/, "$ssse3_x86inc";
+add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d45_predictor_4x4 neon/, "$ssse3_x86inc";
- add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc";
+add_proto qw/void vpx_d45e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d45e_predictor_4x4/;
- add_proto qw/void vpx_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d117_predictor_4x4/;
+add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d63_predictor_4x4/, "$ssse3_x86inc";
- add_proto qw/void vpx_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d135_predictor_4x4 neon/;
+add_proto qw/void vpx_d63e_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d63e_predictor_4x4/;
- add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d153_predictor_4x4/, "$ssse3_x86inc";
+add_proto qw/void vpx_d63f_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d63f_predictor_4x4/;
- add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_v_predictor_4x4 neon msa/, "$sse_x86inc";
+add_proto qw/void vpx_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc";
- add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc";
+add_proto qw/void vpx_he_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_he_predictor_4x4/;
- add_proto qw/void vpx_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon/, "$sse_x86inc";
+add_proto qw/void vpx_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d117_predictor_4x4/;
- add_proto qw/void vpx_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_dc_top_predictor_4x4 msa neon/, "$sse_x86inc";
+add_proto qw/void vpx_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d135_predictor_4x4 neon/;
- add_proto qw/void vpx_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_dc_left_predictor_4x4 msa neon/, "$sse_x86inc";
+add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d153_predictor_4x4/, "$ssse3_x86inc";
- add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_dc_128_predictor_4x4 msa neon/, "$sse_x86inc";
+add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_v_predictor_4x4 neon msa/, "$sse_x86inc";
- add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d207_predictor_8x8/, "$ssse3_x86inc";
+add_proto qw/void vpx_ve_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_ve_predictor_4x4/;
- add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d45_predictor_8x8 neon/, "$ssse3_x86inc";
+add_proto qw/void vpx_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc";
- add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d63_predictor_8x8/, "$ssse3_x86inc";
+add_proto qw/void vpx_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_dc_predictor_4x4 dspr2 msa neon/, "$sse_x86inc";
- add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc";
+add_proto qw/void vpx_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_dc_top_predictor_4x4 msa neon/, "$sse_x86inc";
- add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d117_predictor_8x8/;
+add_proto qw/void vpx_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_dc_left_predictor_4x4 msa neon/, "$sse_x86inc";
- add_proto qw/void vpx_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d135_predictor_8x8/;
+add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_dc_128_predictor_4x4 msa neon/, "$sse_x86inc";
- add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d153_predictor_8x8/, "$ssse3_x86inc";
+add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d207_predictor_8x8/, "$ssse3_x86inc";
- add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_v_predictor_8x8 neon msa/, "$sse_x86inc";
+add_proto qw/void vpx_d207e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d207e_predictor_8x8/;
- add_proto qw/void vpx_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc";
+add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d45_predictor_8x8 neon/, "$ssse3_x86inc";
- add_proto qw/void vpx_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa/, "$sse_x86inc";
+add_proto qw/void vpx_d45e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d45e_predictor_8x8/;
- add_proto qw/void vpx_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_dc_top_predictor_8x8 neon msa/, "$sse_x86inc";
+add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d63_predictor_8x8/, "$ssse3_x86inc";
- add_proto qw/void vpx_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_dc_left_predictor_8x8 neon msa/, "$sse_x86inc";
+add_proto qw/void vpx_d63e_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d63e_predictor_8x8/;
- add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_dc_128_predictor_8x8 neon msa/, "$sse_x86inc";
+add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc";
- add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d207_predictor_16x16/, "$ssse3_x86inc";
+add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d117_predictor_8x8/;
- add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d45_predictor_16x16 neon/, "$ssse3_x86inc";
+add_proto qw/void vpx_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d135_predictor_8x8/;
- add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d63_predictor_16x16/, "$ssse3_x86inc";
+add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d153_predictor_8x8/, "$ssse3_x86inc";
- add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc";
+add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_v_predictor_8x8 neon msa/, "$sse_x86inc";
- add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d117_predictor_16x16/;
+add_proto qw/void vpx_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc";
- add_proto qw/void vpx_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d135_predictor_16x16/;
+add_proto qw/void vpx_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_dc_predictor_8x8 dspr2 neon msa/, "$sse_x86inc";
- add_proto qw/void vpx_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d153_predictor_16x16/, "$ssse3_x86inc";
+add_proto qw/void vpx_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_dc_top_predictor_8x8 neon msa/, "$sse_x86inc";
- add_proto qw/void vpx_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_v_predictor_16x16 neon msa/, "$sse2_x86inc";
+add_proto qw/void vpx_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_dc_left_predictor_8x8 neon msa/, "$sse_x86inc";
- add_proto qw/void vpx_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_tm_predictor_16x16 neon msa/, "$sse2_x86inc";
+add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_dc_128_predictor_8x8 neon msa/, "$sse_x86inc";
- add_proto qw/void vpx_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc";
+add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d207_predictor_16x16/, "$ssse3_x86inc";
- add_proto qw/void vpx_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc";
+add_proto qw/void vpx_d207e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d207e_predictor_16x16/;
- add_proto qw/void vpx_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc";
+add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d45_predictor_16x16 neon/, "$ssse3_x86inc";
- add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc";
+add_proto qw/void vpx_d45e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d45e_predictor_16x16/;
- add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d207_predictor_32x32/, "$ssse3_x86inc";
+add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d63_predictor_16x16/, "$ssse3_x86inc";
- add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d45_predictor_32x32/, "$ssse3_x86inc";
+add_proto qw/void vpx_d63e_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d63e_predictor_16x16/;
- add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d63_predictor_32x32/, "$ssse3_x86inc";
+add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc";
- add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_h_predictor_32x32 neon msa/, "$ssse3_x86inc";
+add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d117_predictor_16x16/;
- add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d117_predictor_32x32/;
+add_proto qw/void vpx_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d135_predictor_16x16/;
- add_proto qw/void vpx_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d135_predictor_32x32/;
+add_proto qw/void vpx_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d153_predictor_16x16/, "$ssse3_x86inc";
- add_proto qw/void vpx_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_d153_predictor_32x32/, "$ssse3_x86inc";
+add_proto qw/void vpx_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_v_predictor_16x16 neon msa/, "$sse2_x86inc";
- add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_v_predictor_32x32 neon msa/, "$sse2_x86inc";
+add_proto qw/void vpx_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_tm_predictor_16x16 neon msa/, "$sse2_x86inc";
- add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc";
+add_proto qw/void vpx_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc";
- add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_dc_predictor_32x32 msa neon/, "$sse2_x86inc";
+add_proto qw/void vpx_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc";
- add_proto qw/void vpx_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc";
+add_proto qw/void vpx_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc";
- add_proto qw/void vpx_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc";
+add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc";
- add_proto qw/void vpx_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
- specialize qw/vpx_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc";
+add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d207_predictor_32x32/, "$ssse3_x86inc";
+
+add_proto qw/void vpx_d207e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d207e_predictor_32x32/;
+
+add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d45_predictor_32x32/, "$ssse3_x86inc";
+
+add_proto qw/void vpx_d45e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d45e_predictor_32x32/;
+
+add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d63_predictor_32x32/, "$ssse3_x86inc";
+
+add_proto qw/void vpx_d63e_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d63e_predictor_32x32/;
+
+add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_h_predictor_32x32 neon msa/, "$ssse3_x86inc";
+
+add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d117_predictor_32x32/;
+
+add_proto qw/void vpx_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d135_predictor_32x32/;
+
+add_proto qw/void vpx_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d153_predictor_32x32/, "$ssse3_x86inc";
+
+add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_v_predictor_32x32 neon msa/, "$sse2_x86inc";
+
+add_proto qw/void vpx_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_tm_predictor_32x32 neon msa/, "$sse2_x86_64_x86inc";
+
+add_proto qw/void vpx_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_dc_predictor_32x32 msa neon/, "$sse2_x86inc";
+
+add_proto qw/void vpx_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc";
+
+add_proto qw/void vpx_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc";
+
+add_proto qw/void vpx_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc";
# High bitdepth functions
- if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
- add_proto qw/void vpx_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d207_predictor_4x4/;
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void vpx_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d207_predictor_4x4/;
- add_proto qw/void vpx_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d45_predictor_4x4/;
+ add_proto qw/void vpx_highbd_d207e_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d207e_predictor_4x4/;
- add_proto qw/void vpx_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d63_predictor_4x4/;
+ add_proto qw/void vpx_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d45_predictor_4x4/;
- add_proto qw/void vpx_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_h_predictor_4x4/;
+ add_proto qw/void vpx_highbd_d45e_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d45e_predictor_4x4/;
- add_proto qw/void vpx_highbd_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d117_predictor_4x4/;
+ add_proto qw/void vpx_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d63_predictor_4x4/;
- add_proto qw/void vpx_highbd_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d135_predictor_4x4/;
+ add_proto qw/void vpx_highbd_d63e_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d63e_predictor_4x4/;
- add_proto qw/void vpx_highbd_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d153_predictor_4x4/;
+ add_proto qw/void vpx_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_h_predictor_4x4/;
- add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_v_predictor_4x4/, "$sse_x86inc";
+ add_proto qw/void vpx_highbd_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d117_predictor_4x4/;
- add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse_x86inc";
+ add_proto qw/void vpx_highbd_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d135_predictor_4x4/;
- add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse_x86inc";
+ add_proto qw/void vpx_highbd_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d153_predictor_4x4/;
- add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_top_predictor_4x4/;
+ add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_v_predictor_4x4/, "$sse_x86inc";
- add_proto qw/void vpx_highbd_dc_left_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_left_predictor_4x4/;
+ add_proto qw/void vpx_highbd_tm_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_tm_predictor_4x4/, "$sse_x86inc";
- add_proto qw/void vpx_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_128_predictor_4x4/;
+ add_proto qw/void vpx_highbd_dc_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_dc_predictor_4x4/, "$sse_x86inc";
- add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d207_predictor_8x8/;
+ add_proto qw/void vpx_highbd_dc_top_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_dc_top_predictor_4x4/;
- add_proto qw/void vpx_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d45_predictor_8x8/;
+ add_proto qw/void vpx_highbd_dc_left_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_dc_left_predictor_4x4/;
- add_proto qw/void vpx_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d63_predictor_8x8/;
+ add_proto qw/void vpx_highbd_dc_128_predictor_4x4/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_dc_128_predictor_4x4/;
- add_proto qw/void vpx_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_h_predictor_8x8/;
+ add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d207_predictor_8x8/;
- add_proto qw/void vpx_highbd_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d117_predictor_8x8/;
+ add_proto qw/void vpx_highbd_d207e_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d207e_predictor_8x8/;
- add_proto qw/void vpx_highbd_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d135_predictor_8x8/;
+ add_proto qw/void vpx_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d45_predictor_8x8/;
- add_proto qw/void vpx_highbd_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d153_predictor_8x8/;
+ add_proto qw/void vpx_highbd_d45e_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d45e_predictor_8x8/;
- add_proto qw/void vpx_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_v_predictor_8x8/, "$sse2_x86inc";
+ add_proto qw/void vpx_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d63_predictor_8x8/;
- add_proto qw/void vpx_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_tm_predictor_8x8/, "$sse2_x86inc";
+ add_proto qw/void vpx_highbd_d63e_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d63e_predictor_8x8/;
- add_proto qw/void vpx_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_predictor_8x8/, "$sse2_x86inc";;
+ add_proto qw/void vpx_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_h_predictor_8x8/;
- add_proto qw/void vpx_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_top_predictor_8x8/;
+ add_proto qw/void vpx_highbd_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d117_predictor_8x8/;
- add_proto qw/void vpx_highbd_dc_left_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_left_predictor_8x8/;
+ add_proto qw/void vpx_highbd_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d135_predictor_8x8/;
- add_proto qw/void vpx_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_128_predictor_8x8/;
+ add_proto qw/void vpx_highbd_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d153_predictor_8x8/;
- add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d207_predictor_16x16/;
+ add_proto qw/void vpx_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_v_predictor_8x8/, "$sse2_x86inc";
- add_proto qw/void vpx_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d45_predictor_16x16/;
+ add_proto qw/void vpx_highbd_tm_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_tm_predictor_8x8/, "$sse2_x86inc";
- add_proto qw/void vpx_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d63_predictor_16x16/;
+ add_proto qw/void vpx_highbd_dc_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_dc_predictor_8x8/, "$sse2_x86inc";;
- add_proto qw/void vpx_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_h_predictor_16x16/;
+ add_proto qw/void vpx_highbd_dc_top_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_dc_top_predictor_8x8/;
- add_proto qw/void vpx_highbd_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d117_predictor_16x16/;
+ add_proto qw/void vpx_highbd_dc_left_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_dc_left_predictor_8x8/;
- add_proto qw/void vpx_highbd_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d135_predictor_16x16/;
+ add_proto qw/void vpx_highbd_dc_128_predictor_8x8/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_dc_128_predictor_8x8/;
- add_proto qw/void vpx_highbd_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d153_predictor_16x16/;
+ add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d207_predictor_16x16/;
- add_proto qw/void vpx_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_v_predictor_16x16/, "$sse2_x86inc";
+ add_proto qw/void vpx_highbd_d207e_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d207e_predictor_16x16/;
- add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_tm_predictor_16x16/, "$sse2_x86_64_x86inc";
+ add_proto qw/void vpx_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d45_predictor_16x16/;
- add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_predictor_16x16/, "$sse2_x86inc";
+ add_proto qw/void vpx_highbd_d45e_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d45e_predictor_16x16/;
- add_proto qw/void vpx_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_top_predictor_16x16/;
+ add_proto qw/void vpx_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d63_predictor_16x16/;
- add_proto qw/void vpx_highbd_dc_left_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_left_predictor_16x16/;
+ add_proto qw/void vpx_highbd_d63e_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d63e_predictor_16x16/;
- add_proto qw/void vpx_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_128_predictor_16x16/;
+ add_proto qw/void vpx_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_h_predictor_16x16/;
- add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d207_predictor_32x32/;
+ add_proto qw/void vpx_highbd_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d117_predictor_16x16/;
- add_proto qw/void vpx_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d45_predictor_32x32/;
+ add_proto qw/void vpx_highbd_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d135_predictor_16x16/;
- add_proto qw/void vpx_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d63_predictor_32x32/;
+ add_proto qw/void vpx_highbd_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d153_predictor_16x16/;
- add_proto qw/void vpx_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_h_predictor_32x32/;
+ add_proto qw/void vpx_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_v_predictor_16x16/, "$sse2_x86inc";
- add_proto qw/void vpx_highbd_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d117_predictor_32x32/;
+ add_proto qw/void vpx_highbd_tm_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_tm_predictor_16x16/, "$sse2_x86_64_x86inc";
- add_proto qw/void vpx_highbd_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d135_predictor_32x32/;
+ add_proto qw/void vpx_highbd_dc_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_dc_predictor_16x16/, "$sse2_x86inc";
- add_proto qw/void vpx_highbd_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d153_predictor_32x32/;
+ add_proto qw/void vpx_highbd_dc_top_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_dc_top_predictor_16x16/;
- add_proto qw/void vpx_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_v_predictor_32x32/, "$sse2_x86inc";
+ add_proto qw/void vpx_highbd_dc_left_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_dc_left_predictor_16x16/;
- add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86_64_x86inc";
+ add_proto qw/void vpx_highbd_dc_128_predictor_16x16/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_dc_128_predictor_16x16/;
- add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86_64_x86inc";
+ add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d207_predictor_32x32/;
- add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_top_predictor_32x32/;
+ add_proto qw/void vpx_highbd_d207e_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d207e_predictor_32x32/;
- add_proto qw/void vpx_highbd_dc_left_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_left_predictor_32x32/;
+ add_proto qw/void vpx_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d45_predictor_32x32/;
- add_proto qw/void vpx_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_dc_128_predictor_32x32/;
- } # CONFIG_VP9_HIGHBITDEPTH
-} # CONFIG_VP9 || CONFIG_VP10
+ add_proto qw/void vpx_highbd_d45e_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d45e_predictor_32x32/;
+
+ add_proto qw/void vpx_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d63_predictor_32x32/;
+
+ add_proto qw/void vpx_highbd_d63e_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d63e_predictor_32x32/;
+
+ add_proto qw/void vpx_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_h_predictor_32x32/;
+
+ add_proto qw/void vpx_highbd_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d117_predictor_32x32/;
+
+ add_proto qw/void vpx_highbd_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d135_predictor_32x32/;
+
+ add_proto qw/void vpx_highbd_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_d153_predictor_32x32/;
+
+ add_proto qw/void vpx_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_v_predictor_32x32/, "$sse2_x86inc";
+
+ add_proto qw/void vpx_highbd_tm_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_tm_predictor_32x32/, "$sse2_x86_64_x86inc";
+
+ add_proto qw/void vpx_highbd_dc_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_dc_predictor_32x32/, "$sse2_x86_64_x86inc";
+
+ add_proto qw/void vpx_highbd_dc_top_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_dc_top_predictor_32x32/;
+
+ add_proto qw/void vpx_highbd_dc_left_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_dc_left_predictor_32x32/;
+
+ add_proto qw/void vpx_highbd_dc_128_predictor_32x32/, "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
+ specialize qw/vpx_highbd_dc_128_predictor_32x32/;
+} # CONFIG_VP9_HIGHBITDEPTH
#
# Sub Pixel Filters
@@ -421,10 +500,10 @@
# Sub Pixel Filters
#
add_proto qw/void vpx_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/vpx_highbd_convolve_copy/;
+ specialize qw/vpx_highbd_convolve_copy/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/vpx_highbd_convolve_avg/;
+ specialize qw/vpx_highbd_convolve_avg/, "$sse2_x86inc";
add_proto qw/void vpx_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
specialize qw/vpx_highbd_convolve8/, "$sse2_x86_64";
@@ -616,39 +695,6 @@
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# Note as optimized versions of these functions are added we need to add a check to ensure
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
- add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct4x4_1_add/;
-
- add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct4x4_16_add/;
-
- add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct8x8_1_add/;
-
- add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct8x8_64_add/;
-
- add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct8x8_12_add/;
-
- add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct16x16_1_add/;
-
- add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct16x16_256_add/;
-
- add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct16x16_10_add/;
-
- add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct32x32_1024_add/;
-
- add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct32x32_34_add/;
-
- add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct32x32_1_add/;
-
add_proto qw/void vpx_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_iwht4x4_1_add/;
@@ -681,6 +727,39 @@
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
+ add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct4x4_16_add/;
+
+ add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct4x4_1_add/;
+
+ add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct8x8_64_add/;
+
+ add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct8x8_12_add/;
+
+ add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct8x8_1_add/;
+
+ add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct16x16_256_add/;
+
+ add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct16x16_10_add/;
+
+ add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct16x16_1_add/;
+
+ add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_1024_add/;
+
+ add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_34_add/;
+
+ add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_1_add/;
+
add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vpx_highbd_idct4x4_16_add/;
@@ -696,6 +775,39 @@
add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vpx_highbd_idct16x16_10_add/;
} else {
+ add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct4x4_16_add sse2/;
+
+ add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct4x4_1_add sse2/;
+
+ add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct8x8_64_add sse2/;
+
+ add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct8x8_12_add sse2/;
+
+ add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct8x8_1_add sse2/;
+
+ add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct16x16_256_add sse2/;
+
+ add_proto qw/void vpx_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct16x16_10_add sse2/;
+
+ add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct16x16_1_add sse2/;
+
+ add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_1024_add sse2/;
+
+ add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_34_add sse2/;
+
+ add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
+ specialize qw/vpx_idct32x32_1_add sse2/;
+
add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vpx_highbd_idct4x4_16_add sse2/;
@@ -801,25 +913,19 @@
# Quantization
#
if ((vpx_config("CONFIG_VP9_ENCODER") eq "yes") || (vpx_config("CONFIG_VP10_ENCODER") eq "yes")) {
-if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vpx_quantize_b/;
+ specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc";
add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vpx_quantize_b_32x32/;
+ specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64_x86inc", "$avx_x86_64_x86inc";
- add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vpx_highbd_quantize_b sse2/;
+ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vpx_highbd_quantize_b sse2/;
- add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vpx_highbd_quantize_b_32x32 sse2/;
-} else {
- add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vpx_quantize_b sse2/, "$ssse3_x86_64_x86inc";
-
- add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vpx_quantize_b_32x32/, "$ssse3_x86_64_x86inc";
-} # CONFIG_VP9_HIGHBITDEPTH
+ add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vpx_highbd_quantize_b_32x32 sse2/;
+ } # CONFIG_VP9_HIGHBITDEPTH
} # CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
if (vpx_config("CONFIG_ENCODERS") eq "yes") {
@@ -1373,13 +1479,13 @@
# Specialty Subpixel
#
add_proto qw/uint32_t vpx_variance_halfpixvar16x16_h/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_variance_halfpixvar16x16_h mmx media/;
+ specialize qw/vpx_variance_halfpixvar16x16_h mmx sse2 media/;
add_proto qw/uint32_t vpx_variance_halfpixvar16x16_v/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_variance_halfpixvar16x16_v mmx media/;
+ specialize qw/vpx_variance_halfpixvar16x16_v mmx sse2 media/;
add_proto qw/uint32_t vpx_variance_halfpixvar16x16_hv/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, uint32_t *sse";
- specialize qw/vpx_variance_halfpixvar16x16_hv mmx media/;
+ specialize qw/vpx_variance_halfpixvar16x16_hv mmx sse2 media/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/unsigned int vpx_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
diff --git a/libvpx/vpx_dsp/x86/halfpix_variance_impl_sse2.asm b/libvpx/vpx_dsp/x86/halfpix_variance_impl_sse2.asm
new file mode 100644
index 0000000..cc26bb6
--- /dev/null
+++ b/libvpx/vpx_dsp/x86/halfpix_variance_impl_sse2.asm
@@ -0,0 +1,346 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void vpx_half_horiz_vert_variance16x_h_sse2(unsigned char *ref,
+; int ref_stride,
+; unsigned char *src,
+; int src_stride,
+; unsigned int height,
+; int *sum,
+; unsigned int *sumsquared)
+global sym(vpx_half_horiz_vert_variance16x_h_sse2) PRIVATE
+sym(vpx_half_horiz_vert_variance16x_h_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ pxor xmm6, xmm6 ; error accumulator
+ pxor xmm7, xmm7 ; sse eaccumulator
+ mov rsi, arg(0) ;ref
+
+ mov rdi, arg(2) ;src
+ movsxd rcx, dword ptr arg(4) ;height
+ movsxd rax, dword ptr arg(1) ;ref_stride
+ movsxd rdx, dword ptr arg(3) ;src_stride
+
+ pxor xmm0, xmm0 ;
+
+ movdqu xmm5, XMMWORD PTR [rsi]
+ movdqu xmm3, XMMWORD PTR [rsi+1]
+ pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1
+
+ lea rsi, [rsi + rax]
+
+vpx_half_horiz_vert_variance16x_h_1:
+ movdqu xmm1, XMMWORD PTR [rsi] ;
+ movdqu xmm2, XMMWORD PTR [rsi+1] ;
+ pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1
+
+ pavgb xmm5, xmm1 ; xmm = vertical average of the above
+
+ movdqa xmm4, xmm5
+ punpcklbw xmm5, xmm0 ; xmm5 = words of above
+ punpckhbw xmm4, xmm0
+
+ movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7
+ punpcklbw xmm3, xmm0 ; xmm3 = words of above
+ psubw xmm5, xmm3 ; xmm5 -= xmm3
+
+ movq xmm3, QWORD PTR [rdi+8]
+ punpcklbw xmm3, xmm0
+ psubw xmm4, xmm3
+
+ paddw xmm6, xmm5 ; xmm6 += accumulated column differences
+ paddw xmm6, xmm4
+ pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
+ pmaddwd xmm4, xmm4
+ paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
+ paddd xmm7, xmm4
+
+ movdqa xmm5, xmm1 ; save xmm1 for use on the next row
+
+ lea rsi, [rsi + rax]
+ lea rdi, [rdi + rdx]
+
+ sub rcx, 1 ;
+ jnz vpx_half_horiz_vert_variance16x_h_1 ;
+
+ pxor xmm1, xmm1
+ pxor xmm5, xmm5
+
+ punpcklwd xmm0, xmm6
+ punpckhwd xmm1, xmm6
+ psrad xmm0, 16
+ psrad xmm1, 16
+ paddd xmm0, xmm1
+ movdqa xmm1, xmm0
+
+ movdqa xmm6, xmm7
+ punpckldq xmm6, xmm5
+ punpckhdq xmm7, xmm5
+ paddd xmm6, xmm7
+
+ punpckldq xmm0, xmm5
+ punpckhdq xmm1, xmm5
+ paddd xmm0, xmm1
+
+ movdqa xmm7, xmm6
+ movdqa xmm1, xmm0
+
+ psrldq xmm7, 8
+ psrldq xmm1, 8
+
+ paddd xmm6, xmm7
+ paddd xmm0, xmm1
+
+ mov rsi, arg(5) ;[Sum]
+ mov rdi, arg(6) ;[SSE]
+
+ movd [rsi], xmm0
+ movd [rdi], xmm6
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vpx_half_vert_variance16x_h_sse2(unsigned char *ref,
+; int ref_stride,
+; unsigned char *src,
+; int src_stride,
+; unsigned int height,
+; int *sum,
+; unsigned int *sumsquared)
+global sym(vpx_half_vert_variance16x_h_sse2) PRIVATE
+sym(vpx_half_vert_variance16x_h_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ pxor xmm6, xmm6 ; error accumulator
+ pxor xmm7, xmm7 ; sse eaccumulator
+ mov rsi, arg(0) ;ref
+
+ mov rdi, arg(2) ;src
+ movsxd rcx, dword ptr arg(4) ;height
+ movsxd rax, dword ptr arg(1) ;ref_stride
+ movsxd rdx, dword ptr arg(3) ;src_stride
+
+ movdqu xmm5, XMMWORD PTR [rsi]
+ lea rsi, [rsi + rax ]
+ pxor xmm0, xmm0
+
+vpx_half_vert_variance16x_h_1:
+ movdqu xmm3, XMMWORD PTR [rsi]
+
+ pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
+ movdqa xmm4, xmm5
+ punpcklbw xmm5, xmm0
+ punpckhbw xmm4, xmm0
+
+ movq xmm2, QWORD PTR [rdi]
+ punpcklbw xmm2, xmm0
+ psubw xmm5, xmm2
+ movq xmm2, QWORD PTR [rdi+8]
+ punpcklbw xmm2, xmm0
+ psubw xmm4, xmm2
+
+ paddw xmm6, xmm5 ; xmm6 += accumulated column differences
+ paddw xmm6, xmm4
+ pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
+ pmaddwd xmm4, xmm4
+ paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
+ paddd xmm7, xmm4
+
+ movdqa xmm5, xmm3
+
+ lea rsi, [rsi + rax]
+ lea rdi, [rdi + rdx]
+
+ sub rcx, 1
+ jnz vpx_half_vert_variance16x_h_1
+
+ pxor xmm1, xmm1
+ pxor xmm5, xmm5
+
+ punpcklwd xmm0, xmm6
+ punpckhwd xmm1, xmm6
+ psrad xmm0, 16
+ psrad xmm1, 16
+ paddd xmm0, xmm1
+ movdqa xmm1, xmm0
+
+ movdqa xmm6, xmm7
+ punpckldq xmm6, xmm5
+ punpckhdq xmm7, xmm5
+ paddd xmm6, xmm7
+
+ punpckldq xmm0, xmm5
+ punpckhdq xmm1, xmm5
+ paddd xmm0, xmm1
+
+ movdqa xmm7, xmm6
+ movdqa xmm1, xmm0
+
+ psrldq xmm7, 8
+ psrldq xmm1, 8
+
+ paddd xmm6, xmm7
+ paddd xmm0, xmm1
+
+ mov rsi, arg(5) ;[Sum]
+ mov rdi, arg(6) ;[SSE]
+
+ movd [rsi], xmm0
+ movd [rdi], xmm6
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+;void vpx_half_horiz_variance16x_h_sse2(unsigned char *ref,
+; int ref_stride
+; unsigned char *src,
+; int src_stride,
+; unsigned int height,
+; int *sum,
+; unsigned int *sumsquared)
+global sym(vpx_half_horiz_variance16x_h_sse2) PRIVATE
+sym(vpx_half_horiz_variance16x_h_sse2):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 7
+ SAVE_XMM 7
+ GET_GOT rbx
+ push rsi
+ push rdi
+ ; end prolog
+
+ pxor xmm6, xmm6 ; error accumulator
+ pxor xmm7, xmm7 ; sse eaccumulator
+ mov rsi, arg(0) ;ref
+
+ mov rdi, arg(2) ;src
+ movsxd rcx, dword ptr arg(4) ;height
+ movsxd rax, dword ptr arg(1) ;ref_stride
+ movsxd rdx, dword ptr arg(3) ;src_stride
+
+ pxor xmm0, xmm0 ;
+
+vpx_half_horiz_variance16x_h_1:
+ movdqu xmm5, XMMWORD PTR [rsi] ; xmm5 = s0,s1,s2..s15
+ movdqu xmm3, XMMWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s16
+
+ pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
+ movdqa xmm1, xmm5
+ punpcklbw xmm5, xmm0 ; xmm5 = words of above
+ punpckhbw xmm1, xmm0
+
+ movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7
+ punpcklbw xmm3, xmm0 ; xmm3 = words of above
+ movq xmm2, QWORD PTR [rdi+8]
+ punpcklbw xmm2, xmm0
+
+ psubw xmm5, xmm3 ; xmm5 -= xmm3
+ psubw xmm1, xmm2
+ paddw xmm6, xmm5 ; xmm6 += accumulated column differences
+ paddw xmm6, xmm1
+ pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
+ pmaddwd xmm1, xmm1
+ paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
+ paddd xmm7, xmm1
+
+ lea rsi, [rsi + rax]
+ lea rdi, [rdi + rdx]
+
+ sub rcx, 1 ;
+ jnz vpx_half_horiz_variance16x_h_1 ;
+
+ pxor xmm1, xmm1
+ pxor xmm5, xmm5
+
+ punpcklwd xmm0, xmm6
+ punpckhwd xmm1, xmm6
+ psrad xmm0, 16
+ psrad xmm1, 16
+ paddd xmm0, xmm1
+ movdqa xmm1, xmm0
+
+ movdqa xmm6, xmm7
+ punpckldq xmm6, xmm5
+ punpckhdq xmm7, xmm5
+ paddd xmm6, xmm7
+
+ punpckldq xmm0, xmm5
+ punpckhdq xmm1, xmm5
+ paddd xmm0, xmm1
+
+ movdqa xmm7, xmm6
+ movdqa xmm1, xmm0
+
+ psrldq xmm7, 8
+ psrldq xmm1, 8
+
+ paddd xmm6, xmm7
+ paddd xmm0, xmm1
+
+ mov rsi, arg(5) ;[Sum]
+ mov rdi, arg(6) ;[SSE]
+
+ movd [rsi], xmm0
+ movd [rdi], xmm6
+
+ ; begin epilog
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ RESTORE_XMM
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+SECTION_RODATA
+; short xmm_bi_rd[8] = { 64, 64, 64, 64,64, 64, 64, 64};
+align 16
+xmm_bi_rd:
+ times 8 dw 64
+align 16
+vpx_bilinear_filters_sse2:
+ dw 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0
+ dw 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16
+ dw 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32
+ dw 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48
+ dw 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
+ dw 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80
+ dw 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96
+ dw 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112
diff --git a/libvpx/vpx_dsp/x86/halfpix_variance_sse2.c b/libvpx/vpx_dsp/x86/halfpix_variance_sse2.c
new file mode 100644
index 0000000..5782155
--- /dev/null
+++ b/libvpx/vpx_dsp/x86/halfpix_variance_sse2.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+
+void vpx_half_horiz_vert_variance16x_h_sse2(const unsigned char *ref,
+ int ref_stride,
+ const unsigned char *src,
+ int src_stride,
+ unsigned int height,
+ int *sum,
+ unsigned int *sumsquared);
+void vpx_half_horiz_variance16x_h_sse2(const unsigned char *ref, int ref_stride,
+ const unsigned char *src, int src_stride,
+ unsigned int height, int *sum,
+ unsigned int *sumsquared);
+void vpx_half_vert_variance16x_h_sse2(const unsigned char *ref, int ref_stride,
+ const unsigned char *src, int src_stride,
+ unsigned int height, int *sum,
+ unsigned int *sumsquared);
+
+uint32_t vpx_variance_halfpixvar16x16_h_sse2(const unsigned char *src,
+ int src_stride,
+ const unsigned char *dst,
+ int dst_stride,
+ uint32_t *sse) {
+ int xsum0;
+ unsigned int xxsum0;
+
+ vpx_half_horiz_variance16x_h_sse2(src, src_stride, dst, dst_stride, 16,
+ &xsum0, &xxsum0);
+
+ *sse = xxsum0;
+ return (xxsum0 - (((uint32_t)xsum0 * xsum0) >> 8));
+}
+
+uint32_t vpx_variance_halfpixvar16x16_v_sse2(const unsigned char *src,
+ int src_stride,
+ const unsigned char *dst,
+ int dst_stride,
+ uint32_t *sse) {
+ int xsum0;
+ unsigned int xxsum0;
+ vpx_half_vert_variance16x_h_sse2(src, src_stride, dst, dst_stride, 16,
+ &xsum0, &xxsum0);
+
+ *sse = xxsum0;
+ return (xxsum0 - (((uint32_t)xsum0 * xsum0) >> 8));
+}
+
+
+uint32_t vpx_variance_halfpixvar16x16_hv_sse2(const unsigned char *src,
+ int src_stride,
+ const unsigned char *dst,
+ int dst_stride,
+ uint32_t *sse) {
+ int xsum0;
+ unsigned int xxsum0;
+
+ vpx_half_horiz_vert_variance16x_h_sse2(src, src_stride, dst, dst_stride, 16,
+ &xsum0, &xxsum0);
+
+ *sse = xxsum0;
+ return (xxsum0 - (((uint32_t)xsum0 * xsum0) >> 8));
+}
diff --git a/libvpx/vpx_dsp/x86/inv_txfm_sse2.c b/libvpx/vpx_dsp/x86/inv_txfm_sse2.c
index f3af68f..ae907fd 100644
--- a/libvpx/vpx_dsp/x86/inv_txfm_sse2.c
+++ b/libvpx/vpx_dsp/x86/inv_txfm_sse2.c
@@ -21,7 +21,8 @@
*(int *)(dest) = _mm_cvtsi128_si32(d0); \
}
-void vpx_idct4x4_16_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+void vpx_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest,
+ int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i eight = _mm_set1_epi16(8);
const __m128i cst = _mm_setr_epi16(
@@ -32,8 +33,8 @@
__m128i input0, input1, input2, input3;
// Rows
- input0 = _mm_load_si128((const __m128i *)input);
- input2 = _mm_load_si128((const __m128i *)(input + 8));
+ input0 = load_input_data(input);
+ input2 = load_input_data(input + 8);
// Construct i3, i1, i3, i1, i2, i0, i2, i0
input0 = _mm_shufflelo_epi16(input0, 0xd8);
@@ -151,7 +152,8 @@
}
}
-void vpx_idct4x4_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+void vpx_idct4x4_1_add_sse2(const tran_low_t *input, uint8_t *dest,
+ int stride) {
__m128i dc_value;
const __m128i zero = _mm_setzero_si128();
int a;
@@ -449,7 +451,8 @@
out7 = _mm_subs_epi16(stp1_0, stp2_7); \
}
-void vpx_idct8x8_64_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+void vpx_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest,
+ int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1 << 4);
@@ -469,14 +472,14 @@
int i;
// Load input data.
- in0 = _mm_load_si128((const __m128i *)input);
- in1 = _mm_load_si128((const __m128i *)(input + 8 * 1));
- in2 = _mm_load_si128((const __m128i *)(input + 8 * 2));
- in3 = _mm_load_si128((const __m128i *)(input + 8 * 3));
- in4 = _mm_load_si128((const __m128i *)(input + 8 * 4));
- in5 = _mm_load_si128((const __m128i *)(input + 8 * 5));
- in6 = _mm_load_si128((const __m128i *)(input + 8 * 6));
- in7 = _mm_load_si128((const __m128i *)(input + 8 * 7));
+ in0 = load_input_data(input);
+ in1 = load_input_data(input + 8 * 1);
+ in2 = load_input_data(input + 8 * 2);
+ in3 = load_input_data(input + 8 * 3);
+ in4 = load_input_data(input + 8 * 4);
+ in5 = load_input_data(input + 8 * 5);
+ in6 = load_input_data(input + 8 * 6);
+ in7 = load_input_data(input + 8 * 7);
// 2-D
for (i = 0; i < 2; i++) {
@@ -518,7 +521,8 @@
RECON_AND_STORE(dest + 7 * stride, in7);
}
-void vpx_idct8x8_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+void vpx_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest,
+ int stride) {
__m128i dc_value;
const __m128i zero = _mm_setzero_si128();
int a;
@@ -792,7 +796,8 @@
in[7] = _mm_sub_epi16(k__const_0, s1);
}
-void vpx_idct8x8_12_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+void vpx_idct8x8_12_add_sse2(const tran_low_t *input, uint8_t *dest,
+ int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1 << 4);
@@ -812,10 +817,10 @@
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
// Rows. Load 4-row input data.
- in0 = _mm_load_si128((const __m128i *)input);
- in1 = _mm_load_si128((const __m128i *)(input + 8 * 1));
- in2 = _mm_load_si128((const __m128i *)(input + 8 * 2));
- in3 = _mm_load_si128((const __m128i *)(input + 8 * 3));
+ in0 = load_input_data(input);
+ in1 = load_input_data(input + 8 * 1);
+ in2 = load_input_data(input + 8 * 2);
+ in3 = load_input_data(input + 8 * 3);
// 8x4 Transpose
TRANSPOSE_8X8_10(in0, in1, in2, in3, in0, in1);
@@ -1169,7 +1174,7 @@
stp2_10, stp2_13, stp2_11, stp2_12) \
}
-void vpx_idct16x16_256_add_sse2(const int16_t *input, uint8_t *dest,
+void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
int stride) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1 << 5);
@@ -1214,22 +1219,22 @@
// 1-D idct
// Load input data.
- in[0] = _mm_load_si128((const __m128i *)input);
- in[8] = _mm_load_si128((const __m128i *)(input + 8 * 1));
- in[1] = _mm_load_si128((const __m128i *)(input + 8 * 2));
- in[9] = _mm_load_si128((const __m128i *)(input + 8 * 3));
- in[2] = _mm_load_si128((const __m128i *)(input + 8 * 4));
- in[10] = _mm_load_si128((const __m128i *)(input + 8 * 5));
- in[3] = _mm_load_si128((const __m128i *)(input + 8 * 6));
- in[11] = _mm_load_si128((const __m128i *)(input + 8 * 7));
- in[4] = _mm_load_si128((const __m128i *)(input + 8 * 8));
- in[12] = _mm_load_si128((const __m128i *)(input + 8 * 9));
- in[5] = _mm_load_si128((const __m128i *)(input + 8 * 10));
- in[13] = _mm_load_si128((const __m128i *)(input + 8 * 11));
- in[6] = _mm_load_si128((const __m128i *)(input + 8 * 12));
- in[14] = _mm_load_si128((const __m128i *)(input + 8 * 13));
- in[7] = _mm_load_si128((const __m128i *)(input + 8 * 14));
- in[15] = _mm_load_si128((const __m128i *)(input + 8 * 15));
+ in[0] = load_input_data(input);
+ in[8] = load_input_data(input + 8 * 1);
+ in[1] = load_input_data(input + 8 * 2);
+ in[9] = load_input_data(input + 8 * 3);
+ in[2] = load_input_data(input + 8 * 4);
+ in[10] = load_input_data(input + 8 * 5);
+ in[3] = load_input_data(input + 8 * 6);
+ in[11] = load_input_data(input + 8 * 7);
+ in[4] = load_input_data(input + 8 * 8);
+ in[12] = load_input_data(input + 8 * 9);
+ in[5] = load_input_data(input + 8 * 10);
+ in[13] = load_input_data(input + 8 * 11);
+ in[6] = load_input_data(input + 8 * 12);
+ in[14] = load_input_data(input + 8 * 13);
+ in[7] = load_input_data(input + 8 * 14);
+ in[15] = load_input_data(input + 8 * 15);
array_transpose_8x8(in, in);
array_transpose_8x8(in + 8, in + 8);
@@ -1294,7 +1299,8 @@
}
}
-void vpx_idct16x16_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest,
+ int stride) {
__m128i dc_value;
const __m128i zero = _mm_setzero_si128();
int a, i;
@@ -2152,7 +2158,7 @@
iadst16_8col(in1);
}
-void vpx_idct16x16_10_add_sse2(const int16_t *input, uint8_t *dest,
+void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest,
int stride) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1 << 5);
@@ -2184,10 +2190,10 @@
int i;
// First 1-D inverse DCT
// Load input data.
- in[0] = _mm_load_si128((const __m128i *)input);
- in[1] = _mm_load_si128((const __m128i *)(input + 8 * 2));
- in[2] = _mm_load_si128((const __m128i *)(input + 8 * 4));
- in[3] = _mm_load_si128((const __m128i *)(input + 8 * 6));
+ in[0] = load_input_data(input);
+ in[1] = load_input_data(input + 8 * 2);
+ in[2] = load_input_data(input + 8 * 4);
+ in[3] = load_input_data(input + 8 * 6);
TRANSPOSE_8X4(in[0], in[1], in[2], in[3], in[0], in[1]);
@@ -2391,7 +2397,7 @@
#define LOAD_DQCOEFF(reg, input) \
{ \
- reg = _mm_load_si128((const __m128i *) input); \
+ reg = load_input_data(input); \
input += 8; \
} \
@@ -3029,7 +3035,7 @@
}
// Only upper-left 8x8 has non-zero coeff
-void vpx_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
+void vpx_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest,
int stride) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1<<5);
@@ -3081,14 +3087,14 @@
int i;
// Load input data. Only need to load the top left 8x8 block.
- in[0] = _mm_load_si128((const __m128i *)input);
- in[1] = _mm_load_si128((const __m128i *)(input + 32));
- in[2] = _mm_load_si128((const __m128i *)(input + 64));
- in[3] = _mm_load_si128((const __m128i *)(input + 96));
- in[4] = _mm_load_si128((const __m128i *)(input + 128));
- in[5] = _mm_load_si128((const __m128i *)(input + 160));
- in[6] = _mm_load_si128((const __m128i *)(input + 192));
- in[7] = _mm_load_si128((const __m128i *)(input + 224));
+ in[0] = load_input_data(input);
+ in[1] = load_input_data(input + 32);
+ in[2] = load_input_data(input + 64);
+ in[3] = load_input_data(input + 96);
+ in[4] = load_input_data(input + 128);
+ in[5] = load_input_data(input + 160);
+ in[6] = load_input_data(input + 192);
+ in[7] = load_input_data(input + 224);
for (i = 8; i < 32; ++i) {
in[i] = _mm_setzero_si128();
@@ -3188,7 +3194,7 @@
}
}
-void vpx_idct32x32_1024_add_sse2(const int16_t *input, uint8_t *dest,
+void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest,
int stride) {
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1 << 5);
@@ -3464,10 +3470,11 @@
}
}
-void vpx_idct32x32_1_add_sse2(const int16_t *input, uint8_t *dest, int stride) {
+void vpx_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest,
+ int stride) {
__m128i dc_value;
const __m128i zero = _mm_setzero_si128();
- int a, i;
+ int a, j;
a = dct_const_round_shift(input[0] * cospi_16_64);
a = dct_const_round_shift(a * cospi_16_64);
@@ -3475,12 +3482,11 @@
dc_value = _mm_set1_epi16(a);
- for (i = 0; i < 4; ++i) {
- int j;
- for (j = 0; j < 32; ++j) {
- RECON_AND_STORE(dest + j * stride, dc_value);
- }
- dest += 8;
+ for (j = 0; j < 32; ++j) {
+ RECON_AND_STORE(dest + 0 + j * stride, dc_value);
+ RECON_AND_STORE(dest + 8 + j * stride, dc_value);
+ RECON_AND_STORE(dest + 16 + j * stride, dc_value);
+ RECON_AND_STORE(dest + 24 + j * stride, dc_value);
}
}
diff --git a/libvpx/vpx_dsp/x86/inv_txfm_sse2.h b/libvpx/vpx_dsp/x86/inv_txfm_sse2.h
index 658a914..bd520c1 100644
--- a/libvpx/vpx_dsp/x86/inv_txfm_sse2.h
+++ b/libvpx/vpx_dsp/x86/inv_txfm_sse2.h
@@ -15,6 +15,7 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
#include "vpx_dsp/inv_txfm.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
// perform 8x8 transpose
static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
@@ -89,24 +90,35 @@
res0[15] = tbuf[7];
}
-static INLINE void load_buffer_8x16(const int16_t *input, __m128i *in) {
- in[0] = _mm_load_si128((const __m128i *)(input + 0 * 16));
- in[1] = _mm_load_si128((const __m128i *)(input + 1 * 16));
- in[2] = _mm_load_si128((const __m128i *)(input + 2 * 16));
- in[3] = _mm_load_si128((const __m128i *)(input + 3 * 16));
- in[4] = _mm_load_si128((const __m128i *)(input + 4 * 16));
- in[5] = _mm_load_si128((const __m128i *)(input + 5 * 16));
- in[6] = _mm_load_si128((const __m128i *)(input + 6 * 16));
- in[7] = _mm_load_si128((const __m128i *)(input + 7 * 16));
+// Function to allow 8 bit optimisations to be used when profile 0 is used with
+// highbitdepth enabled
+static INLINE __m128i load_input_data(const tran_low_t *data) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ return octa_set_epi16(data[0], data[1], data[2], data[3], data[4], data[5],
+ data[6], data[7]);
+#else
+ return _mm_load_si128((const __m128i *)data);
+#endif
+}
- in[8] = _mm_load_si128((const __m128i *)(input + 8 * 16));
- in[9] = _mm_load_si128((const __m128i *)(input + 9 * 16));
- in[10] = _mm_load_si128((const __m128i *)(input + 10 * 16));
- in[11] = _mm_load_si128((const __m128i *)(input + 11 * 16));
- in[12] = _mm_load_si128((const __m128i *)(input + 12 * 16));
- in[13] = _mm_load_si128((const __m128i *)(input + 13 * 16));
- in[14] = _mm_load_si128((const __m128i *)(input + 14 * 16));
- in[15] = _mm_load_si128((const __m128i *)(input + 15 * 16));
+static INLINE void load_buffer_8x16(const tran_low_t *input, __m128i *in) {
+ in[0] = load_input_data(input + 0 * 16);
+ in[1] = load_input_data(input + 1 * 16);
+ in[2] = load_input_data(input + 2 * 16);
+ in[3] = load_input_data(input + 3 * 16);
+ in[4] = load_input_data(input + 4 * 16);
+ in[5] = load_input_data(input + 5 * 16);
+ in[6] = load_input_data(input + 6 * 16);
+ in[7] = load_input_data(input + 7 * 16);
+
+ in[8] = load_input_data(input + 8 * 16);
+ in[9] = load_input_data(input + 9 * 16);
+ in[10] = load_input_data(input + 10 * 16);
+ in[11] = load_input_data(input + 11 * 16);
+ in[12] = load_input_data(input + 12 * 16);
+ in[13] = load_input_data(input + 13 * 16);
+ in[14] = load_input_data(input + 14 * 16);
+ in[15] = load_input_data(input + 15 * 16);
}
#define RECON_AND_STORE(dest, in_x) \
diff --git a/libvpx/vpx_dsp/x86/quantize_avx_x86_64.asm b/libvpx/vpx_dsp/x86/quantize_avx_x86_64.asm
new file mode 100644
index 0000000..01c4129
--- /dev/null
+++ b/libvpx/vpx_dsp/x86/quantize_avx_x86_64.asm
@@ -0,0 +1,544 @@
+;
+; Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+%include "third_party/x86inc/x86inc.asm"
+
+SECTION .text
+
+%macro QUANTIZE_FN 2
+cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
+ shift, qcoeff, dqcoeff, dequant, \
+ eob, scan, iscan
+
+ vzeroupper
+
+ ; If we can skip this block, then just zero the output
+ cmp skipmp, 0
+ jne .blank
+
+%ifnidn %1, b_32x32
+
+ ; Special case for ncoeff == 16, as it is frequent and we can save on
+ ; not setting up a loop.
+ cmp ncoeffmp, 16
+ jne .generic
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; Special case of ncoeff == 16
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+.single:
+
+ movifnidn coeffq, coeffmp
+ movifnidn zbinq, zbinmp
+ mova m0, [zbinq] ; m0 = zbin
+
+ ; Get DC and first 15 AC coeffs - in this special case, that is all.
+%if CONFIG_VP9_HIGHBITDEPTH
+ ; coeff stored as 32bit numbers but we process them as 16 bit numbers
+ mova m9, [coeffq]
+ packssdw m9, [coeffq+16] ; m9 = c[i]
+ mova m10, [coeffq+32]
+ packssdw m10, [coeffq+48] ; m10 = c[i]
+%else
+ mova m9, [coeffq] ; m9 = c[i]
+ mova m10, [coeffq+16] ; m10 = c[i]
+%endif
+
+ mov r0, eobmp ; Output pointer
+ mov r1, qcoeffmp ; Output pointer
+ mov r2, dqcoeffmp ; Output pointer
+
+ pxor m5, m5 ; m5 = dedicated zero
+
+ pcmpeqw m4, m4 ; All word lanes -1
+ paddw m0, m4 ; m0 = zbin - 1
+
+ pabsw m6, m9 ; m6 = abs(m9)
+ pabsw m11, m10 ; m11 = abs(m10)
+ pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
+ punpckhqdq m0, m0
+ pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
+
+ ; Check if all coeffs are less than zbin. If yes, we just write zeros
+ ; to the outputs and we are done.
+ por m14, m7, m12
+ ptest m14, m14
+ jnz .single_nonzero
+
+%if CONFIG_VP9_HIGHBITDEPTH
+ mova [r1 ], ymm5
+ mova [r1+32], ymm5
+ mova [r2 ], ymm5
+ mova [r2+32], ymm5
+%else
+ mova [r1], ymm5
+ mova [r2], ymm5
+%endif
+ mov [r0], word 0
+
+ vzeroupper
+ RET
+
+.single_nonzero:
+
+ ; Actual quantization of size 16 block - setup pointers, rounders, etc.
+ movifnidn r4, roundmp
+ movifnidn r5, quantmp
+ mov r3, dequantmp
+ mov r6, shiftmp
+ mova m1, [r4] ; m1 = round
+ mova m2, [r5] ; m2 = quant
+ mova m3, [r3] ; m3 = dequant
+ mova m4, [r6] ; m4 = shift
+
+ mov r3, iscanmp
+
+ DEFINE_ARGS eob, qcoeff, dqcoeff, iscan
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ paddsw m6, m1 ; m6 += round
+ punpckhqdq m1, m1
+ paddsw m11, m1 ; m11 += round
+ pmulhw m8, m6, m2 ; m8 = m6*q>>16
+ punpckhqdq m2, m2
+ pmulhw m13, m11, m2 ; m13 = m11*q>>16
+ paddw m8, m6 ; m8 += m6
+ paddw m13, m11 ; m13 += m11
+ pmulhw m8, m4 ; m8 = m8*qsh>>16
+ punpckhqdq m4, m4
+ pmulhw m13, m4 ; m13 = m13*qsh>>16
+ psignw m8, m9 ; m8 = reinsert sign
+ psignw m13, m10 ; m13 = reinsert sign
+ pand m8, m7
+ pand m13, m12
+
+%if CONFIG_VP9_HIGHBITDEPTH
+ ; Store 16bit numbers as 32bit numbers in array pointed to by qcoeff
+ pcmpgtw m6, m5, m8
+ punpckhwd m6, m8, m6
+ pmovsxwd m11, m8
+ mova [qcoeffq ], m11
+ mova [qcoeffq+16], m6
+ pcmpgtw m6, m5, m13
+ punpckhwd m6, m13, m6
+ pmovsxwd m11, m13
+ mova [qcoeffq+32], m11
+ mova [qcoeffq+48], m6
+%else
+ mova [qcoeffq ], m8
+ mova [qcoeffq+16], m13
+%endif
+
+ pmullw m8, m3 ; dqc[i] = qc[i] * q
+ punpckhqdq m3, m3
+ pmullw m13, m3 ; dqc[i] = qc[i] * q
+
+%if CONFIG_VP9_HIGHBITDEPTH
+ ; Store 16bit numbers as 32bit numbers in array pointed to by qcoeff
+ pcmpgtw m6, m5, m8
+ punpckhwd m6, m8, m6
+ pmovsxwd m11, m8
+ mova [dqcoeffq ], m11
+ mova [dqcoeffq+16], m6
+ pcmpgtw m6, m5, m13
+ punpckhwd m6, m13, m6
+ pmovsxwd m11, m13
+ mova [dqcoeffq+32], m11
+ mova [dqcoeffq+48], m6
+%else
+ mova [dqcoeffq ], m8
+ mova [dqcoeffq+16], m13
+%endif
+
+ mova m6, [iscanq] ; m6 = scan[i]
+ mova m11, [iscanq+16] ; m11 = scan[i]
+
+ pcmpeqw m8, m8, m5 ; m8 = c[i] == 0
+ pcmpeqw m13, m13, m5 ; m13 = c[i] == 0
+ psubw m6, m6, m7 ; m6 = scan[i] + 1
+ psubw m11, m11, m12 ; m11 = scan[i] + 1
+ pandn m8, m8, m6 ; m8 = max(eob)
+ pandn m13, m13, m11 ; m13 = max(eob)
+ pmaxsw m8, m8, m13
+
+ ; Horizontally accumulate/max eobs and write into [eob] memory pointer
+ pshufd m7, m8, 0xe
+ pmaxsw m8, m7
+ pshuflw m7, m8, 0xe
+ pmaxsw m8, m7
+ pshuflw m7, m8, 0x1
+ pmaxsw m8, m7
+ movq rax, m8
+ mov [eobq], ax
+
+ vzeroupper
+ RET
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; Generic case of ncoeff != 16
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+.generic:
+
+%endif ; %ifnidn %1, b_32x32
+
+DEFINE_ARGS coeff, ncoeff, skip, zbin, round, quant, shift, \
+ qcoeff, dqcoeff, dequant, eob, scan, iscan
+
+ ; Actual quantization loop - setup pointers, rounders, etc.
+ movifnidn coeffq, coeffmp
+ movifnidn ncoeffq, ncoeffmp
+ mov r2, dequantmp
+ movifnidn zbinq, zbinmp
+ movifnidn roundq, roundmp
+ movifnidn quantq, quantmp
+ mova m0, [zbinq] ; m0 = zbin
+ mova m1, [roundq] ; m1 = round
+ mova m2, [quantq] ; m2 = quant
+ mova m3, [r2] ; m3 = dequant
+ pcmpeqw m4, m4 ; All lanes -1
+%ifidn %1, b_32x32
+ psubw m0, m4
+ psubw m1, m4
+ psrlw m0, 1 ; m0 = (m0 + 1) / 2
+ psrlw m1, 1 ; m1 = (m1 + 1) / 2
+%endif
+ paddw m0, m4 ; m0 = m0 + 1
+
+ mov r2, shiftmp
+ mov r3, qcoeffmp
+ mova m4, [r2] ; m4 = shift
+ mov r4, dqcoeffmp
+ mov r5, iscanmp
+%ifidn %1, b_32x32
+ psllw m4, 1
+%endif
+ pxor m5, m5 ; m5 = dedicated zero
+
+ DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob
+
+%if CONFIG_VP9_HIGHBITDEPTH
+ lea coeffq, [ coeffq+ncoeffq*4]
+ lea qcoeffq, [ qcoeffq+ncoeffq*4]
+ lea dqcoeffq, [dqcoeffq+ncoeffq*4]
+%else
+ lea coeffq, [ coeffq+ncoeffq*2]
+ lea qcoeffq, [ qcoeffq+ncoeffq*2]
+ lea dqcoeffq, [dqcoeffq+ncoeffq*2]
+%endif
+ lea iscanq, [ iscanq+ncoeffq*2]
+ neg ncoeffq
+
+ ; get DC and first 15 AC coeffs
+%if CONFIG_VP9_HIGHBITDEPTH
+ ; coeff stored as 32bit numbers & require 16bit numbers
+ mova m9, [coeffq+ncoeffq*4+ 0]
+ packssdw m9, [coeffq+ncoeffq*4+16]
+ mova m10, [coeffq+ncoeffq*4+32]
+ packssdw m10, [coeffq+ncoeffq*4+48]
+%else
+ mova m9, [coeffq+ncoeffq*2+ 0] ; m9 = c[i]
+ mova m10, [coeffq+ncoeffq*2+16] ; m10 = c[i]
+%endif
+
+ pabsw m6, m9 ; m6 = abs(m9)
+ pabsw m11, m10 ; m11 = abs(m10)
+ pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
+ punpckhqdq m0, m0
+ pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
+
+ ; Check if all coeffs are less than zbin. If yes, skip forward quickly.
+ por m14, m7, m12
+ ptest m14, m14
+ jnz .first_nonzero
+
+%if CONFIG_VP9_HIGHBITDEPTH
+ mova [qcoeffq+ncoeffq*4 ], ymm5
+ mova [qcoeffq+ncoeffq*4+32], ymm5
+ mova [dqcoeffq+ncoeffq*4 ], ymm5
+ mova [dqcoeffq+ncoeffq*4+32], ymm5
+%else
+ mova [qcoeffq+ncoeffq*2], ymm5
+ mova [dqcoeffq+ncoeffq*2], ymm5
+%endif
+
+ add ncoeffq, mmsize
+
+ punpckhqdq m1, m1
+ punpckhqdq m2, m2
+ punpckhqdq m3, m3
+ punpckhqdq m4, m4
+ pxor m8, m8
+
+ jmp .ac_only_loop
+
+.first_nonzero:
+
+ paddsw m6, m1 ; m6 += round
+ punpckhqdq m1, m1
+ paddsw m11, m1 ; m11 += round
+ pmulhw m8, m6, m2 ; m8 = m6*q>>16
+ punpckhqdq m2, m2
+ pmulhw m13, m11, m2 ; m13 = m11*q>>16
+ paddw m8, m6 ; m8 += m6
+ paddw m13, m11 ; m13 += m11
+ pmulhw m8, m4 ; m8 = m8*qsh>>16
+ punpckhqdq m4, m4
+ pmulhw m13, m4 ; m13 = m13*qsh>>16
+ psignw m8, m9 ; m8 = reinsert sign
+ psignw m13, m10 ; m13 = reinsert sign
+ pand m8, m7
+ pand m13, m12
+
+%if CONFIG_VP9_HIGHBITDEPTH
+ ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
+ pcmpgtw m6, m5, m8
+ punpckhwd m6, m8, m6
+ pmovsxwd m11, m8
+ mova [qcoeffq+ncoeffq*4+ 0], m11
+ mova [qcoeffq+ncoeffq*4+16], m6
+ pcmpgtw m6, m5, m13
+ punpckhwd m6, m13, m6
+ pmovsxwd m11, m13
+ mova [qcoeffq+ncoeffq*4+32], m11
+ mova [qcoeffq+ncoeffq*4+48], m6
+%else
+ mova [qcoeffq+ncoeffq*2+ 0], m8
+ mova [qcoeffq+ncoeffq*2+16], m13
+%endif
+
+%ifidn %1, b_32x32
+ pabsw m8, m8
+ pabsw m13, m13
+%endif
+ pmullw m8, m3 ; dqc[i] = qc[i] * q
+ punpckhqdq m3, m3
+ pmullw m13, m3 ; dqc[i] = qc[i] * q
+%ifidn %1, b_32x32
+ psrlw m8, 1
+ psrlw m13, 1
+ psignw m8, m9
+ psignw m13, m10
+%endif
+
+%if CONFIG_VP9_HIGHBITDEPTH
+ ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
+ pcmpgtw m6, m5, m8
+ punpckhwd m6, m8, m6
+ pmovsxwd m11, m8
+ mova [dqcoeffq+ncoeffq*4+ 0], m11
+ mova [dqcoeffq+ncoeffq*4+16], m6
+ pcmpgtw m6, m5, m13
+ punpckhwd m6, m13, m6
+ pmovsxwd m11, m13
+ mova [dqcoeffq+ncoeffq*4+32], m11
+ mova [dqcoeffq+ncoeffq*4+48], m6
+%else
+ mova [dqcoeffq+ncoeffq*2+ 0], m8
+ mova [dqcoeffq+ncoeffq*2+16], m13
+%endif
+
+ pcmpeqw m8, m5 ; m8 = c[i] == 0
+ pcmpeqw m13, m5 ; m13 = c[i] == 0
+ mova m6, [iscanq+ncoeffq*2] ; m6 = scan[i]
+ mova m11, [iscanq+ncoeffq*2+16] ; m11 = scan[i]
+ psubw m6, m7 ; m6 = scan[i] + 1
+ psubw m11, m12 ; m11 = scan[i] + 1
+ pandn m8, m6 ; m8 = max(eob)
+ pandn m13, m11 ; m13 = max(eob)
+ pmaxsw m8, m13
+ add ncoeffq, mmsize
+
+.ac_only_loop:
+
+%if CONFIG_VP9_HIGHBITDEPTH
+ ; pack coeff from 32bit to 16bit array
+ mova m9, [coeffq+ncoeffq*4+ 0]
+ packssdw m9, [coeffq+ncoeffq*4+16]
+ mova m10, [coeffq+ncoeffq*4+32]
+ packssdw m10, [coeffq+ncoeffq*4+48]
+%else
+ mova m9, [coeffq+ncoeffq*2+ 0] ; m9 = c[i]
+ mova m10, [coeffq+ncoeffq*2+16] ; m10 = c[i]
+%endif
+
+ pabsw m6, m9 ; m6 = abs(m9)
+ pabsw m11, m10 ; m11 = abs(m10)
+ pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
+ pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
+
+ ; Check if all coeffs are less than zbin. If yes, skip this itertion.
+ ; And just write zeros as the result would be.
+ por m14, m7, m12
+ ptest m14, m14
+ jnz .rest_nonzero
+
+%if CONFIG_VP9_HIGHBITDEPTH
+ mova [qcoeffq+ncoeffq*4+ 0], ymm5
+ mova [qcoeffq+ncoeffq*4+32], ymm5
+ mova [dqcoeffq+ncoeffq*4+ 0], ymm5
+ mova [dqcoeffq+ncoeffq*4+32], ymm5
+%else
+ mova [qcoeffq+ncoeffq*2+ 0], ymm5
+ mova [dqcoeffq+ncoeffq*2+ 0], ymm5
+%endif
+ add ncoeffq, mmsize
+ jnz .ac_only_loop
+
+ ; Horizontally accumulate/max eobs and write into [eob] memory pointer
+ mov r2, eobmp
+ pshufd m7, m8, 0xe
+ pmaxsw m8, m7
+ pshuflw m7, m8, 0xe
+ pmaxsw m8, m7
+ pshuflw m7, m8, 0x1
+ pmaxsw m8, m7
+ movq rax, m8
+ mov [r2], ax
+ vzeroupper
+ RET
+
+.rest_nonzero:
+ paddsw m6, m1 ; m6 += round
+ paddsw m11, m1 ; m11 += round
+ pmulhw m14, m6, m2 ; m14 = m6*q>>16
+ pmulhw m13, m11, m2 ; m13 = m11*q>>16
+ paddw m14, m6 ; m14 += m6
+ paddw m13, m11 ; m13 += m11
+ pmulhw m14, m4 ; m14 = m14*qsh>>16
+ pmulhw m13, m4 ; m13 = m13*qsh>>16
+ psignw m14, m9 ; m14 = reinsert sign
+ psignw m13, m10 ; m13 = reinsert sign
+ pand m14, m7
+ pand m13, m12
+
+%if CONFIG_VP9_HIGHBITDEPTH
+ ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
+ pcmpgtw m6, m5, m14
+ punpckhwd m6, m14, m6
+ pmovsxwd m11, m14
+ mova [qcoeffq+ncoeffq*4+ 0], m11
+ mova [qcoeffq+ncoeffq*4+16], m6
+ pcmpgtw m6, m5, m13
+ punpckhwd m6, m13, m6
+ pmovsxwd m11, m13
+ mova [qcoeffq+ncoeffq*4+32], m11
+ mova [qcoeffq+ncoeffq*4+48], m6
+%else
+ mova [qcoeffq+ncoeffq*2+ 0], m14
+ mova [qcoeffq+ncoeffq*2+16], m13
+%endif
+
+%ifidn %1, b_32x32
+ pabsw m14, m14
+ pabsw m13, m13
+%endif
+ pmullw m14, m3 ; dqc[i] = qc[i] * q
+ pmullw m13, m3 ; dqc[i] = qc[i] * q
+%ifidn %1, b_32x32
+ psrlw m14, 1
+ psrlw m13, 1
+ psignw m14, m9
+ psignw m13, m10
+%endif
+
+%if CONFIG_VP9_HIGHBITDEPTH
+ ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
+ pcmpgtw m6, m5, m14
+ punpckhwd m6, m14, m6
+ pmovsxwd m11, m14
+ mova [dqcoeffq+ncoeffq*4+ 0], m11
+ mova [dqcoeffq+ncoeffq*4+16], m6
+ pcmpgtw m6, m5, m13
+ punpckhwd m6, m13, m6
+ pmovsxwd m11, m13
+ mova [dqcoeffq+ncoeffq*4+32], m11
+ mova [dqcoeffq+ncoeffq*4+48], m6
+%else
+ mova [dqcoeffq+ncoeffq*2+ 0], m14
+ mova [dqcoeffq+ncoeffq*2+16], m13
+%endif
+
+ pcmpeqw m14, m5 ; m14 = c[i] == 0
+ pcmpeqw m13, m5 ; m13 = c[i] == 0
+ mova m6, [iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
+ mova m11, [iscanq+ncoeffq*2+16] ; m11 = scan[i]
+ psubw m6, m7 ; m6 = scan[i] + 1
+ psubw m11, m12 ; m11 = scan[i] + 1
+ pandn m14, m6 ; m14 = max(eob)
+ pandn m13, m11 ; m13 = max(eob)
+ pmaxsw m8, m14
+ pmaxsw m8, m13
+ add ncoeffq, mmsize
+ jnz .ac_only_loop
+
+ ; Horizontally accumulate/max eobs and write into [eob] memory pointer
+ mov r2, eobmp
+ pshufd m7, m8, 0xe
+ pmaxsw m8, m7
+ pshuflw m7, m8, 0xe
+ pmaxsw m8, m7
+ pshuflw m7, m8, 0x1
+ pmaxsw m8, m7
+ movq rax, m8
+ mov [r2], ax
+ vzeroupper
+ RET
+
+ ; Skip-block, i.e. just write all zeroes
+.blank:
+
+DEFINE_ARGS coeff, ncoeff, skip, zbin, round, quant, shift, \
+ qcoeff, dqcoeff, dequant, eob, scan, iscan
+
+ mov r0, dqcoeffmp
+ movifnidn ncoeffq, ncoeffmp
+ mov r2, qcoeffmp
+ mov r3, eobmp
+
+DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob
+
+%if CONFIG_VP9_HIGHBITDEPTH
+ lea dqcoeffq, [dqcoeffq+ncoeffq*4]
+ lea qcoeffq, [ qcoeffq+ncoeffq*4]
+%else
+ lea dqcoeffq, [dqcoeffq+ncoeffq*2]
+ lea qcoeffq, [ qcoeffq+ncoeffq*2]
+%endif
+
+ neg ncoeffq
+ pxor m7, m7
+
+.blank_loop:
+%if CONFIG_VP9_HIGHBITDEPTH
+ mova [dqcoeffq+ncoeffq*4+ 0], ymm7
+ mova [dqcoeffq+ncoeffq*4+32], ymm7
+ mova [qcoeffq+ncoeffq*4+ 0], ymm7
+ mova [qcoeffq+ncoeffq*4+32], ymm7
+%else
+ mova [dqcoeffq+ncoeffq*2+ 0], ymm7
+ mova [qcoeffq+ncoeffq*2+ 0], ymm7
+%endif
+ add ncoeffq, mmsize
+ jl .blank_loop
+
+ mov [eobq], word 0
+
+ vzeroupper
+ RET
+%endmacro
+
+INIT_XMM avx
+QUANTIZE_FN b, 7
+QUANTIZE_FN b_32x32, 7
+
+END
diff --git a/libvpx/vpx_dsp/x86/quantize_sse2.c b/libvpx/vpx_dsp/x86/quantize_sse2.c
index c2a804e..8aa4568 100644
--- a/libvpx/vpx_dsp/x86/quantize_sse2.c
+++ b/libvpx/vpx_dsp/x86/quantize_sse2.c
@@ -14,11 +14,36 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
-void vpx_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
+static INLINE __m128i load_coefficients(const tran_low_t *coeff_ptr) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ return _mm_setr_epi16((int16_t)coeff_ptr[0], (int16_t)coeff_ptr[1],
+ (int16_t)coeff_ptr[2], (int16_t)coeff_ptr[3], (int16_t)coeff_ptr[4],
+ (int16_t)coeff_ptr[5], (int16_t)coeff_ptr[6], (int16_t)coeff_ptr[7]);
+#else
+ return _mm_load_si128((const __m128i *)coeff_ptr);
+#endif
+}
+
+static INLINE void store_coefficients(__m128i coeff_vals,
+ tran_low_t *coeff_ptr) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ __m128i one = _mm_set1_epi16(1);
+ __m128i coeff_vals_hi = _mm_mulhi_epi16(coeff_vals, one);
+ __m128i coeff_vals_lo = _mm_mullo_epi16(coeff_vals, one);
+ __m128i coeff_vals_1 = _mm_unpacklo_epi16(coeff_vals_lo, coeff_vals_hi);
+ __m128i coeff_vals_2 = _mm_unpackhi_epi16(coeff_vals_lo, coeff_vals_hi);
+ _mm_store_si128((__m128i*)(coeff_ptr), coeff_vals_1);
+ _mm_store_si128((__m128i*)(coeff_ptr + 4), coeff_vals_2);
+#else
+ _mm_store_si128((__m128i*)(coeff_ptr), coeff_vals);
+#endif
+}
+
+void vpx_quantize_b_sse2(const tran_low_t* coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t* zbin_ptr,
const int16_t* round_ptr, const int16_t* quant_ptr,
- const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr,
- int16_t* dqcoeff_ptr, const int16_t* dequant_ptr,
+ const int16_t* quant_shift_ptr, tran_low_t* qcoeff_ptr,
+ tran_low_t* dqcoeff_ptr, const int16_t* dequant_ptr,
uint16_t* eob_ptr,
const int16_t* scan_ptr,
const int16_t* iscan_ptr) {
@@ -56,8 +81,8 @@
__m128i qtmp0, qtmp1;
__m128i cmp_mask0, cmp_mask1;
// Do DC and first 15 AC
- coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
- coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
+ coeff0 = load_coefficients(coeff_ptr + n_coeffs);
+ coeff1 = load_coefficients(coeff_ptr + n_coeffs + 8);
// Poor man's sign extract
coeff0_sign = _mm_srai_epi16(coeff0, 15);
@@ -92,15 +117,15 @@
qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+ store_coefficients(qcoeff0, qcoeff_ptr + n_coeffs);
+ store_coefficients(qcoeff1, qcoeff_ptr + n_coeffs + 8);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
dequant = _mm_unpackhi_epi64(dequant, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ store_coefficients(coeff0, dqcoeff_ptr + n_coeffs);
+ store_coefficients(coeff1, dqcoeff_ptr + n_coeffs + 8);
}
{
@@ -134,8 +159,8 @@
__m128i qtmp0, qtmp1;
__m128i cmp_mask0, cmp_mask1;
- coeff0 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs));
- coeff1 = _mm_load_si128((const __m128i*)(coeff_ptr + n_coeffs) + 1);
+ coeff0 = load_coefficients(coeff_ptr + n_coeffs);
+ coeff1 = load_coefficients(coeff_ptr + n_coeffs + 8);
// Poor man's sign extract
coeff0_sign = _mm_srai_epi16(coeff0, 15);
@@ -166,14 +191,14 @@
qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+ store_coefficients(qcoeff0, qcoeff_ptr + n_coeffs);
+ store_coefficients(qcoeff1, qcoeff_ptr + n_coeffs + 8);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ store_coefficients(coeff0, dqcoeff_ptr + n_coeffs);
+ store_coefficients(coeff1, dqcoeff_ptr + n_coeffs + 8);
}
{
@@ -212,10 +237,10 @@
}
} else {
do {
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
+ store_coefficients(zero, dqcoeff_ptr + n_coeffs);
+ store_coefficients(zero, dqcoeff_ptr + n_coeffs + 8);
+ store_coefficients(zero, qcoeff_ptr + n_coeffs);
+ store_coefficients(zero, qcoeff_ptr + n_coeffs + 8);
n_coeffs += 8 * 2;
} while (n_coeffs < 0);
*eob_ptr = 0;
diff --git a/libvpx/vpx_dsp/x86/quantize_ssse3_x86_64.asm b/libvpx/vpx_dsp/x86/quantize_ssse3_x86_64.asm
index 3784d9d..ca21539 100644
--- a/libvpx/vpx_dsp/x86/quantize_ssse3_x86_64.asm
+++ b/libvpx/vpx_dsp/x86/quantize_ssse3_x86_64.asm
@@ -53,15 +53,29 @@
%endif
pxor m5, m5 ; m5 = dedicated zero
DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob
+%if CONFIG_VP9_HIGHBITDEPTH
+ lea coeffq, [ coeffq+ncoeffq*4]
+ lea qcoeffq, [ qcoeffq+ncoeffq*4]
+ lea dqcoeffq, [dqcoeffq+ncoeffq*4]
+%else
lea coeffq, [ coeffq+ncoeffq*2]
- lea iscanq, [ iscanq+ncoeffq*2]
lea qcoeffq, [ qcoeffq+ncoeffq*2]
lea dqcoeffq, [dqcoeffq+ncoeffq*2]
+%endif
+ lea iscanq, [ iscanq+ncoeffq*2]
neg ncoeffq
; get DC and first 15 AC coeffs
+%if CONFIG_VP9_HIGHBITDEPTH
+ ; coeff stored as 32bit numbers & require 16bit numbers
+ mova m9, [ coeffq+ncoeffq*4+ 0]
+ packssdw m9, [ coeffq+ncoeffq*4+16]
+ mova m10, [ coeffq+ncoeffq*4+32]
+ packssdw m10, [ coeffq+ncoeffq*4+48]
+%else
mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
+%endif
pabsw m6, m9 ; m6 = abs(m9)
pabsw m11, m10 ; m11 = abs(m10)
pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
@@ -82,8 +96,28 @@
psignw m13, m10 ; m13 = reinsert sign
pand m8, m7
pand m13, m12
+%if CONFIG_VP9_HIGHBITDEPTH
+ ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
+ mova m11, m8
+ mova m6, m8
+ pcmpgtw m5, m8
+ punpcklwd m11, m5
+ punpckhwd m6, m5
+ mova [qcoeffq+ncoeffq*4+ 0], m11
+ mova [qcoeffq+ncoeffq*4+16], m6
+ pxor m5, m5
+ mova m11, m13
+ mova m6, m13
+ pcmpgtw m5, m13
+ punpcklwd m11, m5
+ punpckhwd m6, m5
+ mova [qcoeffq+ncoeffq*4+32], m11
+ mova [qcoeffq+ncoeffq*4+48], m6
+ pxor m5, m5 ; reset m5 to zero register
+%else
mova [qcoeffq+ncoeffq*2+ 0], m8
mova [qcoeffq+ncoeffq*2+16], m13
+%endif
%ifidn %1, b_32x32
pabsw m8, m8
pabsw m13, m13
@@ -97,8 +131,28 @@
psignw m8, m9
psignw m13, m10
%endif
+%if CONFIG_VP9_HIGHBITDEPTH
+ ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
+ mova m11, m8
+ mova m6, m8
+ pcmpgtw m5, m8
+ punpcklwd m11, m5
+ punpckhwd m6, m5
+ mova [dqcoeffq+ncoeffq*4+ 0], m11
+ mova [dqcoeffq+ncoeffq*4+16], m6
+ pxor m5, m5
+ mova m11, m13
+ mova m6, m13
+ pcmpgtw m5, m13
+ punpcklwd m11, m5
+ punpckhwd m6, m5
+ mova [dqcoeffq+ncoeffq*4+32], m11
+ mova [dqcoeffq+ncoeffq*4+48], m6
+ pxor m5, m5 ; reset m5 to zero register
+%else
mova [dqcoeffq+ncoeffq*2+ 0], m8
mova [dqcoeffq+ncoeffq*2+16], m13
+%endif
pcmpeqw m8, m5 ; m8 = c[i] == 0
pcmpeqw m13, m5 ; m13 = c[i] == 0
mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
@@ -112,8 +166,16 @@
jz .accumulate_eob
.ac_only_loop:
+%if CONFIG_VP9_HIGHBITDEPTH
+ ; pack coeff from 32bit to 16bit array
+ mova m9, [ coeffq+ncoeffq*4+ 0]
+ packssdw m9, [ coeffq+ncoeffq*4+16]
+ mova m10, [ coeffq+ncoeffq*4+32]
+ packssdw m10, [ coeffq+ncoeffq*4+48]
+%else
mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
+%endif
pabsw m6, m9 ; m6 = abs(m9)
pabsw m11, m10 ; m11 = abs(m10)
pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
@@ -136,8 +198,29 @@
psignw m13, m10 ; m13 = reinsert sign
pand m14, m7
pand m13, m12
+%if CONFIG_VP9_HIGHBITDEPTH
+ ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
+ pxor m11, m11
+ mova m11, m14
+ mova m6, m14
+ pcmpgtw m5, m14
+ punpcklwd m11, m5
+ punpckhwd m6, m5
+ mova [qcoeffq+ncoeffq*4+ 0], m11
+ mova [qcoeffq+ncoeffq*4+16], m6
+ pxor m5, m5
+ mova m11, m13
+ mova m6, m13
+ pcmpgtw m5, m13
+ punpcklwd m11, m5
+ punpckhwd m6, m5
+ mova [qcoeffq+ncoeffq*4+32], m11
+ mova [qcoeffq+ncoeffq*4+48], m6
+ pxor m5, m5 ; reset m5 to zero register
+%else
mova [qcoeffq+ncoeffq*2+ 0], m14
mova [qcoeffq+ncoeffq*2+16], m13
+%endif
%ifidn %1, b_32x32
pabsw m14, m14
pabsw m13, m13
@@ -150,8 +233,28 @@
psignw m14, m9
psignw m13, m10
%endif
+%if CONFIG_VP9_HIGHBITDEPTH
+ ; store 16bit numbers as 32bit numbers in array pointed to by qcoeff
+ mova m11, m14
+ mova m6, m14
+ pcmpgtw m5, m14
+ punpcklwd m11, m5
+ punpckhwd m6, m5
+ mova [dqcoeffq+ncoeffq*4+ 0], m11
+ mova [dqcoeffq+ncoeffq*4+16], m6
+ pxor m5, m5
+ mova m11, m13
+ mova m6, m13
+ pcmpgtw m5, m13
+ punpcklwd m11, m5
+ punpckhwd m6, m5
+ mova [dqcoeffq+ncoeffq*4+32], m11
+ mova [dqcoeffq+ncoeffq*4+48], m6
+ pxor m5, m5
+%else
mova [dqcoeffq+ncoeffq*2+ 0], m14
mova [dqcoeffq+ncoeffq*2+16], m13
+%endif
pcmpeqw m14, m5 ; m14 = c[i] == 0
pcmpeqw m13, m5 ; m13 = c[i] == 0
mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
@@ -168,10 +271,21 @@
%ifidn %1, b_32x32
jmp .accumulate_eob
.skip_iter:
+%if CONFIG_VP9_HIGHBITDEPTH
+ mova [qcoeffq+ncoeffq*4+ 0], m5
+ mova [qcoeffq+ncoeffq*4+16], m5
+ mova [qcoeffq+ncoeffq*4+32], m5
+ mova [qcoeffq+ncoeffq*4+48], m5
+ mova [dqcoeffq+ncoeffq*4+ 0], m5
+ mova [dqcoeffq+ncoeffq*4+16], m5
+ mova [dqcoeffq+ncoeffq*4+32], m5
+ mova [dqcoeffq+ncoeffq*4+48], m5
+%else
mova [qcoeffq+ncoeffq*2+ 0], m5
mova [qcoeffq+ncoeffq*2+16], m5
mova [dqcoeffq+ncoeffq*2+ 0], m5
mova [dqcoeffq+ncoeffq*2+16], m5
+%endif
add ncoeffq, mmsize
jl .ac_only_loop
%endif
@@ -196,15 +310,31 @@
mov r2, qcoeffmp
mov r3, eobmp
DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob
+%if CONFIG_VP9_HIGHBITDEPTH
+ lea dqcoeffq, [dqcoeffq+ncoeffq*4]
+ lea qcoeffq, [ qcoeffq+ncoeffq*4]
+%else
lea dqcoeffq, [dqcoeffq+ncoeffq*2]
lea qcoeffq, [ qcoeffq+ncoeffq*2]
+%endif
neg ncoeffq
pxor m7, m7
.blank_loop:
+%if CONFIG_VP9_HIGHBITDEPTH
+ mova [dqcoeffq+ncoeffq*4+ 0], m7
+ mova [dqcoeffq+ncoeffq*4+16], m7
+ mova [dqcoeffq+ncoeffq*4+32], m7
+ mova [dqcoeffq+ncoeffq*4+48], m7
+ mova [qcoeffq+ncoeffq*4+ 0], m7
+ mova [qcoeffq+ncoeffq*4+16], m7
+ mova [qcoeffq+ncoeffq*4+32], m7
+ mova [qcoeffq+ncoeffq*4+48], m7
+%else
mova [dqcoeffq+ncoeffq*2+ 0], m7
mova [dqcoeffq+ncoeffq*2+16], m7
mova [qcoeffq+ncoeffq*2+ 0], m7
mova [qcoeffq+ncoeffq*2+16], m7
+%endif
add ncoeffq, mmsize
jl .blank_loop
mov word [eobq], 0
diff --git a/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm b/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm
index b263837..9c5b414 100644
--- a/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm
+++ b/libvpx/vpx_dsp/x86/vpx_convolve_copy_sse2.asm
@@ -12,21 +12,77 @@
SECTION .text
-%macro convolve_fn 1
+%macro convolve_fn 1-2
INIT_XMM sse2
+%ifidn %2, highbd
+%define pavg pavgw
+cglobal %2_convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \
+ fx, fxs, fy, fys, w, h, bd
+%else
+%define pavg pavgb
cglobal convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \
fx, fxs, fy, fys, w, h
+%endif
mov r4d, dword wm
+%ifidn %2, highbd
+ shl r4d, 1
+ shl srcq, 1
+ shl src_strideq, 1
+ shl dstq, 1
+ shl dst_strideq, 1
+%else
cmp r4d, 4
je .w4
+%endif
cmp r4d, 8
je .w8
cmp r4d, 16
je .w16
cmp r4d, 32
je .w32
+%ifidn %2, highbd
+ cmp r4d, 64
+ je .w64
mov r4d, dword hm
+.loop128:
+ movu m0, [srcq]
+ movu m1, [srcq+16]
+ movu m2, [srcq+32]
+ movu m3, [srcq+48]
+%ifidn %1, avg
+ pavg m0, [dstq]
+ pavg m1, [dstq+16]
+ pavg m2, [dstq+32]
+ pavg m3, [dstq+48]
+%endif
+ mova [dstq ], m0
+ mova [dstq+16], m1
+ mova [dstq+32], m2
+ mova [dstq+48], m3
+ movu m0, [srcq+64]
+ movu m1, [srcq+80]
+ movu m2, [srcq+96]
+ movu m3, [srcq+112]
+ add srcq, src_strideq
+%ifidn %1, avg
+ pavg m0, [dstq+64]
+ pavg m1, [dstq+80]
+ pavg m2, [dstq+96]
+ pavg m3, [dstq+112]
+%endif
+ mova [dstq+64], m0
+ mova [dstq+80], m1
+ mova [dstq+96], m2
+ mova [dstq+112], m3
+ add dstq, dst_strideq
+ dec r4d
+ jnz .loop128
+ RET
+%endif
+
+.w64
+ mov r4d, dword hm
.loop64:
movu m0, [srcq]
movu m1, [srcq+16]
@@ -34,10 +90,10 @@
movu m3, [srcq+48]
add srcq, src_strideq
%ifidn %1, avg
- pavgb m0, [dstq]
- pavgb m1, [dstq+16]
- pavgb m2, [dstq+32]
- pavgb m3, [dstq+48]
+ pavg m0, [dstq]
+ pavg m1, [dstq+16]
+ pavg m2, [dstq+32]
+ pavg m3, [dstq+48]
%endif
mova [dstq ], m0
mova [dstq+16], m1
@@ -57,10 +113,10 @@
movu m3, [srcq+src_strideq+16]
lea srcq, [srcq+src_strideq*2]
%ifidn %1, avg
- pavgb m0, [dstq]
- pavgb m1, [dstq +16]
- pavgb m2, [dstq+dst_strideq]
- pavgb m3, [dstq+dst_strideq+16]
+ pavg m0, [dstq]
+ pavg m1, [dstq +16]
+ pavg m2, [dstq+dst_strideq]
+ pavg m3, [dstq+dst_strideq+16]
%endif
mova [dstq ], m0
mova [dstq +16], m1
@@ -82,10 +138,10 @@
movu m3, [srcq+r5q]
lea srcq, [srcq+src_strideq*4]
%ifidn %1, avg
- pavgb m0, [dstq]
- pavgb m1, [dstq+dst_strideq]
- pavgb m2, [dstq+dst_strideq*2]
- pavgb m3, [dstq+r6q]
+ pavg m0, [dstq]
+ pavg m1, [dstq+dst_strideq]
+ pavg m2, [dstq+dst_strideq*2]
+ pavg m3, [dstq+r6q]
%endif
mova [dstq ], m0
mova [dstq+dst_strideq ], m1
@@ -108,10 +164,10 @@
movu m3, [srcq+r5q]
lea srcq, [srcq+src_strideq*4]
%ifidn %1, avg
- pavgb m0, [dstq]
- pavgb m1, [dstq+dst_strideq]
- pavgb m2, [dstq+dst_strideq*2]
- pavgb m3, [dstq+r6q]
+ pavg m0, [dstq]
+ pavg m1, [dstq+dst_strideq]
+ pavg m2, [dstq+dst_strideq*2]
+ pavg m3, [dstq+r6q]
%endif
mova [dstq ], m0
mova [dstq+dst_strideq ], m1
@@ -122,6 +178,7 @@
jnz .loop8
RET
+%ifnidn %2, highbd
.w4:
mov r4d, dword hm
lea r5q, [src_strideq*3]
@@ -137,10 +194,10 @@
movh m5, [dstq+dst_strideq]
movh m6, [dstq+dst_strideq*2]
movh m7, [dstq+r6q]
- pavgb m0, m4
- pavgb m1, m5
- pavgb m2, m6
- pavgb m3, m7
+ pavg m0, m4
+ pavg m1, m5
+ pavg m2, m6
+ pavg m3, m7
%endif
movh [dstq ], m0
movh [dstq+dst_strideq ], m1
@@ -150,7 +207,12 @@
sub r4d, 4
jnz .loop4
RET
+%endif
%endmacro
convolve_fn copy
convolve_fn avg
+%if CONFIG_VP9_HIGHBITDEPTH
+convolve_fn copy, highbd
+convolve_fn avg, highbd
+%endif
diff --git a/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c b/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c
index 29ede19..b718678 100644
--- a/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c
+++ b/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c
@@ -41,7 +41,10 @@
#if defined(__clang__)
# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ <= 3) || \
- (defined(__APPLE__) && __clang_major__ == 5 && __clang_minor__ == 0)
+ (defined(__APPLE__) && \
+ ((__clang_major__ == 4 && __clang_minor__ <= 2) || \
+ (__clang_major__ == 5 && __clang_minor__ == 0)))
+
# define MM256_BROADCASTSI128_SI256(x) \
_mm_broadcastsi128_si256((__m128i const *)&(x))
# else // clang > 3.3, and not 5.0 on macosx.
diff --git a/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c b/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
index 772e01e..6fd5208 100644
--- a/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
+++ b/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
@@ -203,123 +203,6 @@
}
}
-static void vpx_filter_block1d16_h8_intrin_ssse3(const uint8_t *src_ptr,
- ptrdiff_t src_pixels_per_line,
- uint8_t *output_ptr,
- ptrdiff_t output_pitch,
- uint32_t output_height,
- const int16_t *filter) {
- __m128i addFilterReg64, filtersReg, srcReg1, srcReg2;
- __m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg;
- __m128i firstFilters, secondFilters, thirdFilters, forthFilters;
- __m128i srcRegFilt1_1, srcRegFilt2_1, srcRegFilt2, srcRegFilt3;
- unsigned int i;
-
- // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
- addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((const __m128i *)filter);
- // converting the 16 bit (short) to 8 bit (byte) and have the same data
- // in both lanes of 128 bit register.
- filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
-
- // duplicate only the first 16 bits (first and second byte)
- // across 128 bit register
- firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u));
- // duplicate only the second 16 bits (third and forth byte)
- // across 128 bit register
- secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u));
- // duplicate only the third 16 bits (fifth and sixth byte)
- // across 128 bit register
- thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u));
- // duplicate only the forth 16 bits (seventh and eighth byte)
- // across 128 bit register
- forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
-
- filt1Reg = _mm_load_si128((__m128i const *)filt1_global);
- filt2Reg = _mm_load_si128((__m128i const *)filt2_global);
- filt3Reg = _mm_load_si128((__m128i const *)filt3_global);
- filt4Reg = _mm_load_si128((__m128i const *)filt4_global);
-
- for (i = 0; i < output_height; i++) {
- srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr - 3));
-
- // filter the source buffer
- srcRegFilt1_1= _mm_shuffle_epi8(srcReg1, filt1Reg);
- srcRegFilt2= _mm_shuffle_epi8(srcReg1, filt4Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt1_1 = _mm_maddubs_epi16(srcRegFilt1_1, firstFilters);
- srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, forthFilters);
-
- // add and saturate the results together
- srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2);
-
- // filter the source buffer
- srcRegFilt3= _mm_shuffle_epi8(srcReg1, filt2Reg);
- srcRegFilt2= _mm_shuffle_epi8(srcReg1, filt3Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters);
- srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters);
-
- // add and saturate the results together
- srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1,
- _mm_min_epi16(srcRegFilt3, srcRegFilt2));
-
- // reading the next 16 bytes.
- // (part of it was being read by earlier read)
- srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + 5));
-
- // add and saturate the results together
- srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1,
- _mm_max_epi16(srcRegFilt3, srcRegFilt2));
-
- // filter the source buffer
- srcRegFilt2_1= _mm_shuffle_epi8(srcReg2, filt1Reg);
- srcRegFilt2= _mm_shuffle_epi8(srcReg2, filt4Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt2_1 = _mm_maddubs_epi16(srcRegFilt2_1, firstFilters);
- srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, forthFilters);
-
- // add and saturate the results together
- srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, srcRegFilt2);
-
- // filter the source buffer
- srcRegFilt3= _mm_shuffle_epi8(srcReg2, filt2Reg);
- srcRegFilt2= _mm_shuffle_epi8(srcReg2, filt3Reg);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters);
- srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters);
-
- // add and saturate the results together
- srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1,
- _mm_min_epi16(srcRegFilt3, srcRegFilt2));
- srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1,
- _mm_max_epi16(srcRegFilt3, srcRegFilt2));
-
- srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, addFilterReg64);
- srcRegFilt2_1 = _mm_adds_epi16(srcRegFilt2_1, addFilterReg64);
-
- // shift by 7 bit each 16 bit
- srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 7);
- srcRegFilt2_1 = _mm_srai_epi16(srcRegFilt2_1, 7);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve
- // result
- srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, srcRegFilt2_1);
-
- src_ptr+=src_pixels_per_line;
-
- // save 16 bytes
- _mm_store_si128((__m128i*)output_ptr, srcRegFilt1_1);
-
- output_ptr+=output_pitch;
- }
-}
-
void vpx_filter_block1d8_v8_intrin_ssse3(const uint8_t *src_ptr,
ptrdiff_t src_pitch,
uint8_t *output_ptr,
@@ -408,141 +291,12 @@
}
}
-static void vpx_filter_block1d16_v8_intrin_ssse3(const uint8_t *src_ptr,
- ptrdiff_t src_pitch,
- uint8_t *output_ptr,
- ptrdiff_t out_pitch,
- uint32_t output_height,
- const int16_t *filter) {
- __m128i addFilterReg64, filtersReg, srcRegFilt1, srcRegFilt3;
- __m128i firstFilters, secondFilters, thirdFilters, forthFilters;
- __m128i srcRegFilt5, srcRegFilt6, srcRegFilt7, srcRegFilt8;
- __m128i srcReg1, srcReg2, srcReg3, srcReg4, srcReg5, srcReg6, srcReg7;
- __m128i srcReg8;
- unsigned int i;
-
- // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
- addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((const __m128i *)filter);
- // converting the 16 bit (short) to 8 bit (byte) and have the same data
- // in both lanes of 128 bit register.
- filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
-
- // duplicate only the first 16 bits in the filter
- firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u));
- // duplicate only the second 16 bits in the filter
- secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u));
- // duplicate only the third 16 bits in the filter
- thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u));
- // duplicate only the forth 16 bits in the filter
- forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
-
- // load the first 7 rows of 16 bytes
- srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr));
- srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch));
- srcReg3 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2));
- srcReg4 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3));
- srcReg5 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4));
- srcReg6 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5));
- srcReg7 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6));
-
- for (i = 0; i < output_height; i++) {
- // load the last 16 bytes
- srcReg8 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7));
-
- // merge the result together
- srcRegFilt5 = _mm_unpacklo_epi8(srcReg1, srcReg2);
- srcRegFilt6 = _mm_unpacklo_epi8(srcReg7, srcReg8);
- srcRegFilt1 = _mm_unpackhi_epi8(srcReg1, srcReg2);
- srcRegFilt3 = _mm_unpackhi_epi8(srcReg7, srcReg8);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt5 = _mm_maddubs_epi16(srcRegFilt5, firstFilters);
- srcRegFilt6 = _mm_maddubs_epi16(srcRegFilt6, forthFilters);
- srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
- srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, forthFilters);
-
- // add and saturate the results together
- srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, srcRegFilt6);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt3);
-
- // merge the result together
- srcRegFilt3 = _mm_unpacklo_epi8(srcReg3, srcReg4);
- srcRegFilt6 = _mm_unpackhi_epi8(srcReg3, srcReg4);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters);
- srcRegFilt6 = _mm_maddubs_epi16(srcRegFilt6, secondFilters);
-
- // merge the result together
- srcRegFilt7 = _mm_unpacklo_epi8(srcReg5, srcReg6);
- srcRegFilt8 = _mm_unpackhi_epi8(srcReg5, srcReg6);
-
- // multiply 2 adjacent elements with the filter and add the result
- srcRegFilt7 = _mm_maddubs_epi16(srcRegFilt7, thirdFilters);
- srcRegFilt8 = _mm_maddubs_epi16(srcRegFilt8, thirdFilters);
-
- // add and saturate the results together
- srcRegFilt5 = _mm_adds_epi16(srcRegFilt5,
- _mm_min_epi16(srcRegFilt3, srcRegFilt7));
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1,
- _mm_min_epi16(srcRegFilt6, srcRegFilt8));
-
- // add and saturate the results together
- srcRegFilt5 = _mm_adds_epi16(srcRegFilt5,
- _mm_max_epi16(srcRegFilt3, srcRegFilt7));
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1,
- _mm_max_epi16(srcRegFilt6, srcRegFilt8));
- srcRegFilt5 = _mm_adds_epi16(srcRegFilt5, addFilterReg64);
- srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
-
- // shift by 7 bit each 16 bit
- srcRegFilt5 = _mm_srai_epi16(srcRegFilt5, 7);
- srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
-
- // shrink to 8 bit each 16 bits, the first lane contain the first
- // convolve result and the second lane contain the second convolve
- // result
- srcRegFilt1 = _mm_packus_epi16(srcRegFilt5, srcRegFilt1);
-
- src_ptr+=src_pitch;
-
- // shift down a row
- srcReg1 = srcReg2;
- srcReg2 = srcReg3;
- srcReg3 = srcReg4;
- srcReg4 = srcReg5;
- srcReg5 = srcReg6;
- srcReg6 = srcReg7;
- srcReg7 = srcReg8;
-
- // save 16 bytes convolve result
- _mm_store_si128((__m128i*)output_ptr, srcRegFilt1);
-
- output_ptr+=out_pitch;
- }
-}
-
-#if ARCH_X86_64
-filter8_1dfunction vpx_filter_block1d16_v8_intrin_ssse3;
-filter8_1dfunction vpx_filter_block1d16_h8_intrin_ssse3;
-filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3;
-filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3;
-filter8_1dfunction vpx_filter_block1d4_v8_ssse3;
-filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3;
-#define vpx_filter_block1d16_v8_ssse3 vpx_filter_block1d16_v8_intrin_ssse3
-#define vpx_filter_block1d16_h8_ssse3 vpx_filter_block1d16_h8_intrin_ssse3
-#define vpx_filter_block1d8_v8_ssse3 vpx_filter_block1d8_v8_intrin_ssse3
-#define vpx_filter_block1d8_h8_ssse3 vpx_filter_block1d8_h8_intrin_ssse3
-#define vpx_filter_block1d4_h8_ssse3 vpx_filter_block1d4_h8_intrin_ssse3
-#else // ARCH_X86
filter8_1dfunction vpx_filter_block1d16_v8_ssse3;
filter8_1dfunction vpx_filter_block1d16_h8_ssse3;
filter8_1dfunction vpx_filter_block1d8_v8_ssse3;
filter8_1dfunction vpx_filter_block1d8_h8_ssse3;
filter8_1dfunction vpx_filter_block1d4_v8_ssse3;
filter8_1dfunction vpx_filter_block1d4_h8_ssse3;
-#endif // ARCH_X86_64
filter8_1dfunction vpx_filter_block1d16_v8_avg_ssse3;
filter8_1dfunction vpx_filter_block1d16_h8_avg_ssse3;
filter8_1dfunction vpx_filter_block1d8_v8_avg_ssse3;
diff --git a/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm b/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
index 68acc03..3fbaa27 100644
--- a/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
+++ b/libvpx/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
@@ -1,5 +1,5 @@
;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+; Copyright (c) 2015 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
@@ -8,1064 +8,662 @@
; be found in the AUTHORS file in the root of the source tree.
;
+%include "third_party/x86inc/x86inc.asm"
-%include "vpx_ports/x86_abi_support.asm"
+SECTION_RODATA
+pw_64: times 8 dw 64
-%macro VERTx4 1
- mov rdx, arg(5) ;filter ptr
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
- mov rcx, 0x0400040
+; %define USE_PMULHRSW
+; NOTE: pmulhrsw has a latency of 5 cycles. Tests showed a performance loss
+; when using this instruction.
- movdqa xmm4, [rdx] ;load filters
- movq xmm5, rcx
- packsswb xmm4, xmm4
- pshuflw xmm0, xmm4, 0b ;k0_k1
- pshuflw xmm1, xmm4, 01010101b ;k2_k3
- pshuflw xmm2, xmm4, 10101010b ;k4_k5
- pshuflw xmm3, xmm4, 11111111b ;k6_k7
-
- punpcklqdq xmm0, xmm0
- punpcklqdq xmm1, xmm1
- punpcklqdq xmm2, xmm2
- punpcklqdq xmm3, xmm3
-
- movdqa k0k1, xmm0
- movdqa k2k3, xmm1
- pshufd xmm5, xmm5, 0
- movdqa k4k5, xmm2
- movdqa k6k7, xmm3
- movdqa krd, xmm5
-
- movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
-
-%if ABI_IS_32BIT=0
- movsxd r8, DWORD PTR arg(3) ;out_pitch
-%endif
- mov rax, rsi
- movsxd rcx, DWORD PTR arg(4) ;output_height
- add rax, rdx
-
- lea rbx, [rdx + rdx*4]
- add rbx, rdx ;pitch * 6
-
-.loop:
- movd xmm0, [rsi] ;A
- movd xmm1, [rsi + rdx] ;B
- movd xmm2, [rsi + rdx * 2] ;C
- movd xmm3, [rax + rdx * 2] ;D
- movd xmm4, [rsi + rdx * 4] ;E
- movd xmm5, [rax + rdx * 4] ;F
-
- punpcklbw xmm0, xmm1 ;A B
- punpcklbw xmm2, xmm3 ;C D
- punpcklbw xmm4, xmm5 ;E F
-
- movd xmm6, [rsi + rbx] ;G
- movd xmm7, [rax + rbx] ;H
-
- pmaddubsw xmm0, k0k1
- pmaddubsw xmm2, k2k3
- punpcklbw xmm6, xmm7 ;G H
- pmaddubsw xmm4, k4k5
- pmaddubsw xmm6, k6k7
-
- movdqa xmm1, xmm2
- paddsw xmm0, xmm6
- pmaxsw xmm2, xmm4
- pminsw xmm4, xmm1
- paddsw xmm0, xmm4
- paddsw xmm0, xmm2
-
- paddsw xmm0, krd
- psraw xmm0, 7
- packuswb xmm0, xmm0
-
- add rsi, rdx
- add rax, rdx
-%if %1
- movd xmm1, [rdi]
- pavgb xmm0, xmm1
-%endif
- movd [rdi], xmm0
-
-%if ABI_IS_32BIT
- add rdi, DWORD PTR arg(3) ;out_pitch
+SECTION .text
+%if ARCH_X86_64
+ %define LOCAL_VARS_SIZE 16*4
%else
- add rdi, r8
+ %define LOCAL_VARS_SIZE 16*6
%endif
- dec rcx
- jnz .loop
+
+%macro SETUP_LOCAL_VARS 0
+ ; TODO(slavarnway): using xmm registers for these on ARCH_X86_64 +
+ ; pmaddubsw has a higher latency on some platforms, this might be eased by
+ ; interleaving the instructions.
+ %define k0k1 [rsp + 16*0]
+ %define k2k3 [rsp + 16*1]
+ %define k4k5 [rsp + 16*2]
+ %define k6k7 [rsp + 16*3]
+ packsswb m4, m4
+ ; TODO(slavarnway): multiple pshufb instructions had a higher latency on
+ ; some platforms.
+ pshuflw m0, m4, 0b ;k0_k1
+ pshuflw m1, m4, 01010101b ;k2_k3
+ pshuflw m2, m4, 10101010b ;k4_k5
+ pshuflw m3, m4, 11111111b ;k6_k7
+ punpcklqdq m0, m0
+ punpcklqdq m1, m1
+ punpcklqdq m2, m2
+ punpcklqdq m3, m3
+ mova k0k1, m0
+ mova k2k3, m1
+ mova k4k5, m2
+ mova k6k7, m3
+%if ARCH_X86_64
+ %define krd m12
+ %define tmp m13
+ mova krd, [GLOBAL(pw_64)]
+%else
+ %define tmp [rsp + 16*4]
+ %define krd [rsp + 16*5]
+%if CONFIG_PIC=0
+ mova m6, [GLOBAL(pw_64)]
+%else
+ ; build constants without accessing global memory
+ pcmpeqb m6, m6 ;all ones
+ psrlw m6, 15
+ psllw m6, 6 ;aka pw_64
+%endif
+ mova krd, m6
+%endif
%endm
-%macro VERTx8 1
- mov rdx, arg(5) ;filter ptr
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
- mov rcx, 0x0400040
-
- movdqa xmm4, [rdx] ;load filters
- movq xmm5, rcx
- packsswb xmm4, xmm4
- pshuflw xmm0, xmm4, 0b ;k0_k1
- pshuflw xmm1, xmm4, 01010101b ;k2_k3
- pshuflw xmm2, xmm4, 10101010b ;k4_k5
- pshuflw xmm3, xmm4, 11111111b ;k6_k7
-
- punpcklqdq xmm0, xmm0
- punpcklqdq xmm1, xmm1
- punpcklqdq xmm2, xmm2
- punpcklqdq xmm3, xmm3
-
- movdqa k0k1, xmm0
- movdqa k2k3, xmm1
- pshufd xmm5, xmm5, 0
- movdqa k4k5, xmm2
- movdqa k6k7, xmm3
- movdqa krd, xmm5
-
- movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
-
-%if ABI_IS_32BIT=0
- movsxd r8, DWORD PTR arg(3) ;out_pitch
-%endif
- mov rax, rsi
- movsxd rcx, DWORD PTR arg(4) ;output_height
- add rax, rdx
-
- lea rbx, [rdx + rdx*4]
- add rbx, rdx ;pitch * 6
-
-.loop:
- movq xmm0, [rsi] ;A
- movq xmm1, [rsi + rdx] ;B
- movq xmm2, [rsi + rdx * 2] ;C
- movq xmm3, [rax + rdx * 2] ;D
- movq xmm4, [rsi + rdx * 4] ;E
- movq xmm5, [rax + rdx * 4] ;F
-
- punpcklbw xmm0, xmm1 ;A B
- punpcklbw xmm2, xmm3 ;C D
- punpcklbw xmm4, xmm5 ;E F
-
- movq xmm6, [rsi + rbx] ;G
- movq xmm7, [rax + rbx] ;H
-
- pmaddubsw xmm0, k0k1
- pmaddubsw xmm2, k2k3
- punpcklbw xmm6, xmm7 ;G H
- pmaddubsw xmm4, k4k5
- pmaddubsw xmm6, k6k7
-
- paddsw xmm0, xmm6
- movdqa xmm1, xmm2
- pmaxsw xmm2, xmm4
- pminsw xmm4, xmm1
- paddsw xmm0, xmm4
- paddsw xmm0, xmm2
-
- paddsw xmm0, krd
- psraw xmm0, 7
- packuswb xmm0, xmm0
-
- add rsi, rdx
- add rax, rdx
-%if %1
- movq xmm1, [rdi]
- pavgb xmm0, xmm1
-%endif
- movq [rdi], xmm0
-
-%if ABI_IS_32BIT
- add rdi, DWORD PTR arg(3) ;out_pitch
-%else
- add rdi, r8
-%endif
- dec rcx
- jnz .loop
-%endm
-
-
-%macro VERTx16 1
- mov rdx, arg(5) ;filter ptr
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
- mov rcx, 0x0400040
-
- movdqa xmm4, [rdx] ;load filters
- movq xmm5, rcx
- packsswb xmm4, xmm4
- pshuflw xmm0, xmm4, 0b ;k0_k1
- pshuflw xmm1, xmm4, 01010101b ;k2_k3
- pshuflw xmm2, xmm4, 10101010b ;k4_k5
- pshuflw xmm3, xmm4, 11111111b ;k6_k7
-
- punpcklqdq xmm0, xmm0
- punpcklqdq xmm1, xmm1
- punpcklqdq xmm2, xmm2
- punpcklqdq xmm3, xmm3
-
- movdqa k0k1, xmm0
- movdqa k2k3, xmm1
- pshufd xmm5, xmm5, 0
- movdqa k4k5, xmm2
- movdqa k6k7, xmm3
- movdqa krd, xmm5
-
- movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
-
-%if ABI_IS_32BIT=0
- movsxd r8, DWORD PTR arg(3) ;out_pitch
-%endif
- mov rax, rsi
- movsxd rcx, DWORD PTR arg(4) ;output_height
- add rax, rdx
-
- lea rbx, [rdx + rdx*4]
- add rbx, rdx ;pitch * 6
-
-.loop:
- movq xmm0, [rsi] ;A
- movq xmm1, [rsi + rdx] ;B
- movq xmm2, [rsi + rdx * 2] ;C
- movq xmm3, [rax + rdx * 2] ;D
- movq xmm4, [rsi + rdx * 4] ;E
- movq xmm5, [rax + rdx * 4] ;F
-
- punpcklbw xmm0, xmm1 ;A B
- punpcklbw xmm2, xmm3 ;C D
- punpcklbw xmm4, xmm5 ;E F
-
- movq xmm6, [rsi + rbx] ;G
- movq xmm7, [rax + rbx] ;H
-
- pmaddubsw xmm0, k0k1
- pmaddubsw xmm2, k2k3
- punpcklbw xmm6, xmm7 ;G H
- pmaddubsw xmm4, k4k5
- pmaddubsw xmm6, k6k7
-
- paddsw xmm0, xmm6
- movdqa xmm1, xmm2
- pmaxsw xmm2, xmm4
- pminsw xmm4, xmm1
- paddsw xmm0, xmm4
- paddsw xmm0, xmm2
-
- paddsw xmm0, krd
- psraw xmm0, 7
- packuswb xmm0, xmm0
-%if %1
- movq xmm1, [rdi]
- pavgb xmm0, xmm1
-%endif
- movq [rdi], xmm0
-
- movq xmm0, [rsi + 8] ;A
- movq xmm1, [rsi + rdx + 8] ;B
- movq xmm2, [rsi + rdx * 2 + 8] ;C
- movq xmm3, [rax + rdx * 2 + 8] ;D
- movq xmm4, [rsi + rdx * 4 + 8] ;E
- movq xmm5, [rax + rdx * 4 + 8] ;F
-
- punpcklbw xmm0, xmm1 ;A B
- punpcklbw xmm2, xmm3 ;C D
- punpcklbw xmm4, xmm5 ;E F
-
- movq xmm6, [rsi + rbx + 8] ;G
- movq xmm7, [rax + rbx + 8] ;H
- punpcklbw xmm6, xmm7 ;G H
-
- pmaddubsw xmm0, k0k1
- pmaddubsw xmm2, k2k3
- pmaddubsw xmm4, k4k5
- pmaddubsw xmm6, k6k7
-
- paddsw xmm0, xmm6
- movdqa xmm1, xmm2
- pmaxsw xmm2, xmm4
- pminsw xmm4, xmm1
- paddsw xmm0, xmm4
- paddsw xmm0, xmm2
-
- paddsw xmm0, krd
- psraw xmm0, 7
- packuswb xmm0, xmm0
-
- add rsi, rdx
- add rax, rdx
-%if %1
- movq xmm1, [rdi+8]
- pavgb xmm0, xmm1
-%endif
-
- movq [rdi+8], xmm0
-
-%if ABI_IS_32BIT
- add rdi, DWORD PTR arg(3) ;out_pitch
-%else
- add rdi, r8
-%endif
- dec rcx
- jnz .loop
-%endm
-
-;void vpx_filter_block1d8_v8_ssse3
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pitch,
-; unsigned char *output_ptr,
-; unsigned int out_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(vpx_filter_block1d4_v8_ssse3) PRIVATE
-sym(vpx_filter_block1d4_v8_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- push rbx
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16*5
- %define k0k1 [rsp + 16*0]
- %define k2k3 [rsp + 16*1]
- %define k4k5 [rsp + 16*2]
- %define k6k7 [rsp + 16*3]
- %define krd [rsp + 16*4]
-
- VERTx4 0
-
- add rsp, 16*5
- pop rsp
- pop rbx
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vpx_filter_block1d8_v8_ssse3
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pitch,
-; unsigned char *output_ptr,
-; unsigned int out_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(vpx_filter_block1d8_v8_ssse3) PRIVATE
-sym(vpx_filter_block1d8_v8_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- push rbx
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16*5
- %define k0k1 [rsp + 16*0]
- %define k2k3 [rsp + 16*1]
- %define k4k5 [rsp + 16*2]
- %define k6k7 [rsp + 16*3]
- %define krd [rsp + 16*4]
-
- VERTx8 0
-
- add rsp, 16*5
- pop rsp
- pop rbx
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vpx_filter_block1d16_v8_ssse3
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pitch,
-; unsigned char *output_ptr,
-; unsigned int out_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(vpx_filter_block1d16_v8_ssse3) PRIVATE
-sym(vpx_filter_block1d16_v8_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- push rbx
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16*5
- %define k0k1 [rsp + 16*0]
- %define k2k3 [rsp + 16*1]
- %define k4k5 [rsp + 16*2]
- %define k6k7 [rsp + 16*3]
- %define krd [rsp + 16*4]
-
- VERTx16 0
-
- add rsp, 16*5
- pop rsp
- pop rbx
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-
-global sym(vpx_filter_block1d4_v8_avg_ssse3) PRIVATE
-sym(vpx_filter_block1d4_v8_avg_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- push rbx
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16*5
- %define k0k1 [rsp + 16*0]
- %define k2k3 [rsp + 16*1]
- %define k4k5 [rsp + 16*2]
- %define k6k7 [rsp + 16*3]
- %define krd [rsp + 16*4]
-
- VERTx4 1
-
- add rsp, 16*5
- pop rsp
- pop rbx
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(vpx_filter_block1d8_v8_avg_ssse3) PRIVATE
-sym(vpx_filter_block1d8_v8_avg_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- push rbx
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16*5
- %define k0k1 [rsp + 16*0]
- %define k2k3 [rsp + 16*1]
- %define k4k5 [rsp + 16*2]
- %define k6k7 [rsp + 16*3]
- %define krd [rsp + 16*4]
-
- VERTx8 1
-
- add rsp, 16*5
- pop rsp
- pop rbx
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(vpx_filter_block1d16_v8_avg_ssse3) PRIVATE
-sym(vpx_filter_block1d16_v8_avg_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rsi
- push rdi
- push rbx
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16*5
- %define k0k1 [rsp + 16*0]
- %define k2k3 [rsp + 16*1]
- %define k4k5 [rsp + 16*2]
- %define k6k7 [rsp + 16*3]
- %define krd [rsp + 16*4]
-
- VERTx16 1
-
- add rsp, 16*5
- pop rsp
- pop rbx
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
%macro HORIZx4_ROW 2
- movdqa %2, %1
- pshufb %1, [GLOBAL(shuf_t0t1)]
- pshufb %2, [GLOBAL(shuf_t2t3)]
- pmaddubsw %1, k0k1k4k5
- pmaddubsw %2, k2k3k6k7
+ mova %2, %1
+ punpcklbw %1, %1
+ punpckhbw %2, %2
- movdqa xmm4, %1
- movdqa xmm5, %2
- psrldq %1, 8
- psrldq %2, 8
- movdqa xmm6, xmm5
+ mova m3, %2
+ palignr %2, %1, 1
+ palignr m3, %1, 5
- paddsw xmm4, %2
- pmaxsw xmm5, %1
- pminsw %1, xmm6
- paddsw %1, xmm4
- paddsw %1, xmm5
+ pmaddubsw %2, k0k1k4k5
+ pmaddubsw m3, k2k3k6k7
- paddsw %1, krd
- psraw %1, 7
- packuswb %1, %1
+ mova m4, %2
+ mova m5, m3
+ psrldq %2, 8
+ psrldq m3, 8
+ mova m6, m5
+
+ paddsw m4, m3
+ pmaxsw m5, %2
+ pminsw %2, m6
+ paddsw %2, m4
+ paddsw %2, m5
+ paddsw %2, krd
+ psraw %2, 7
+ packuswb %2, %2
%endm
-%macro HORIZx4 1
- mov rdx, arg(5) ;filter ptr
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
- mov rcx, 0x0400040
-
- movdqa xmm4, [rdx] ;load filters
- movq xmm5, rcx
- packsswb xmm4, xmm4
- pshuflw xmm6, xmm4, 0b ;k0_k1
- pshufhw xmm6, xmm6, 10101010b ;k0_k1_k4_k5
- pshuflw xmm7, xmm4, 01010101b ;k2_k3
- pshufhw xmm7, xmm7, 11111111b ;k2_k3_k6_k7
- pshufd xmm5, xmm5, 0 ;rounding
-
- movdqa k0k1k4k5, xmm6
- movdqa k2k3k6k7, xmm7
- movdqa krd, xmm5
-
- movsxd rax, dword ptr arg(1) ;src_pixels_per_line
- movsxd rdx, dword ptr arg(3) ;output_pitch
- movsxd rcx, dword ptr arg(4) ;output_height
- shr rcx, 1
-.loop:
- ;Do two rows once
- movq xmm0, [rsi - 3] ;load src
- movq xmm1, [rsi + 5]
- movq xmm2, [rsi + rax - 3]
- movq xmm3, [rsi + rax + 5]
- punpcklqdq xmm0, xmm1
- punpcklqdq xmm2, xmm3
-
- HORIZx4_ROW xmm0, xmm1
- HORIZx4_ROW xmm2, xmm3
-%if %1
- movd xmm1, [rdi]
- pavgb xmm0, xmm1
- movd xmm3, [rdi + rdx]
- pavgb xmm2, xmm3
+;-------------------------------------------------------------------------------
+%macro SUBPIX_HFILTER4 1
+cglobal filter_block1d4_%1, 6, 6+(ARCH_X86_64*2), 11, LOCAL_VARS_SIZE, \
+ src, sstride, dst, dstride, height, filter
+ mova m4, [filterq]
+ packsswb m4, m4
+%if ARCH_X86_64
+ %define k0k1k4k5 m8
+ %define k2k3k6k7 m9
+ %define krd m10
+ %define orig_height r7d
+ mova krd, [GLOBAL(pw_64)]
+ pshuflw k0k1k4k5, m4, 0b ;k0_k1
+ pshufhw k0k1k4k5, k0k1k4k5, 10101010b ;k0_k1_k4_k5
+ pshuflw k2k3k6k7, m4, 01010101b ;k2_k3
+ pshufhw k2k3k6k7, k2k3k6k7, 11111111b ;k2_k3_k6_k7
+%else
+ %define k0k1k4k5 [rsp + 16*0]
+ %define k2k3k6k7 [rsp + 16*1]
+ %define krd [rsp + 16*2]
+ %define orig_height [rsp + 16*3]
+ pshuflw m6, m4, 0b ;k0_k1
+ pshufhw m6, m6, 10101010b ;k0_k1_k4_k5
+ pshuflw m7, m4, 01010101b ;k2_k3
+ pshufhw m7, m7, 11111111b ;k2_k3_k6_k7
+%if CONFIG_PIC=0
+ mova m1, [GLOBAL(pw_64)]
+%else
+ ; build constants without accessing global memory
+ pcmpeqb m1, m1 ;all ones
+ psrlw m1, 15
+ psllw m1, 6 ;aka pw_64
%endif
- movd [rdi], xmm0
- movd [rdi +rdx], xmm2
+ mova k0k1k4k5, m6
+ mova k2k3k6k7, m7
+ mova krd, m1
+%endif
+ mov orig_height, heightd
+ shr heightd, 1
+.loop:
+ ;Do two rows at once
+ movh m0, [srcq - 3]
+ movh m1, [srcq + 5]
+ punpcklqdq m0, m1
+ mova m1, m0
+ movh m2, [srcq + sstrideq - 3]
+ movh m3, [srcq + sstrideq + 5]
+ punpcklqdq m2, m3
+ mova m3, m2
+ punpcklbw m0, m0
+ punpckhbw m1, m1
+ punpcklbw m2, m2
+ punpckhbw m3, m3
+ mova m4, m1
+ palignr m4, m0, 1
+ pmaddubsw m4, k0k1k4k5
+ palignr m1, m0, 5
+ pmaddubsw m1, k2k3k6k7
+ mova m7, m3
+ palignr m7, m2, 1
+ pmaddubsw m7, k0k1k4k5
+ palignr m3, m2, 5
+ pmaddubsw m3, k2k3k6k7
+ mova m0, m4
+ mova m5, m1
+ mova m2, m7
+ psrldq m4, 8
+ psrldq m1, 8
+ mova m6, m5
+ paddsw m0, m1
+ mova m1, m3
+ psrldq m7, 8
+ psrldq m3, 8
+ paddsw m2, m3
+ mova m3, m1
+ pmaxsw m5, m4
+ pminsw m4, m6
+ paddsw m4, m0
+ paddsw m4, m5
+ pmaxsw m1, m7
+ pminsw m7, m3
+ paddsw m7, m2
+ paddsw m7, m1
- lea rsi, [rsi + rax]
- prefetcht0 [rsi + 4 * rax - 3]
- lea rsi, [rsi + rax]
- lea rdi, [rdi + 2 * rdx]
- prefetcht0 [rsi + 2 * rax - 3]
+ paddsw m4, krd
+ psraw m4, 7
+ packuswb m4, m4
+ paddsw m7, krd
+ psraw m7, 7
+ packuswb m7, m7
- dec rcx
- jnz .loop
+%ifidn %1, h8_avg
+ movd m0, [dstq]
+ pavgb m4, m0
+ movd m2, [dstq + dstrideq]
+ pavgb m7, m2
+%endif
+ movd [dstq], m4
+ movd [dstq + dstrideq], m7
+
+ lea srcq, [srcq + sstrideq ]
+ prefetcht0 [srcq + 4 * sstrideq - 3]
+ lea srcq, [srcq + sstrideq ]
+ lea dstq, [dstq + 2 * dstrideq ]
+ prefetcht0 [srcq + 2 * sstrideq - 3]
+
+ dec heightd
+ jnz .loop
; Do last row if output_height is odd
- movsxd rcx, dword ptr arg(4) ;output_height
- and rcx, 1
- je .done
+ mov heightd, orig_height
+ and heightd, 1
+ je .done
- movq xmm0, [rsi - 3] ; load src
- movq xmm1, [rsi + 5]
- punpcklqdq xmm0, xmm1
+ movh m0, [srcq - 3] ; load src
+ movh m1, [srcq + 5]
+ punpcklqdq m0, m1
- HORIZx4_ROW xmm0, xmm1
-%if %1
- movd xmm1, [rdi]
- pavgb xmm0, xmm1
+ HORIZx4_ROW m0, m1
+%ifidn %1, h8_avg
+ movd m0, [dstq]
+ pavgb m1, m0
%endif
- movd [rdi], xmm0
+ movd [dstq], m1
.done
+ RET
%endm
-%macro HORIZx8_ROW 4
- movdqa %2, %1
- movdqa %3, %1
- movdqa %4, %1
+%macro HORIZx8_ROW 5
+ mova %2, %1
+ punpcklbw %1, %1
+ punpckhbw %2, %2
- pshufb %1, [GLOBAL(shuf_t0t1)]
- pshufb %2, [GLOBAL(shuf_t2t3)]
- pshufb %3, [GLOBAL(shuf_t4t5)]
- pshufb %4, [GLOBAL(shuf_t6t7)]
+ mova %3, %2
+ mova %4, %2
+ mova %5, %2
- pmaddubsw %1, k0k1
- pmaddubsw %2, k2k3
- pmaddubsw %3, k4k5
- pmaddubsw %4, k6k7
+ palignr %2, %1, 1
+ palignr %3, %1, 5
+ palignr %4, %1, 9
+ palignr %5, %1, 13
- paddsw %1, %4
- movdqa %4, %2
- pmaxsw %2, %3
- pminsw %3, %4
- paddsw %1, %3
- paddsw %1, %2
+ pmaddubsw %2, k0k1
+ pmaddubsw %3, k2k3
+ pmaddubsw %4, k4k5
+ pmaddubsw %5, k6k7
- paddsw %1, krd
- psraw %1, 7
- packuswb %1, %1
+ paddsw %2, %5
+ mova %1, %3
+ pminsw %3, %4
+ pmaxsw %1, %4
+ paddsw %2, %3
+ paddsw %1, %2
+ paddsw %1, krd
+ psraw %1, 7
+ packuswb %1, %1
%endm
-%macro HORIZx8 1
- mov rdx, arg(5) ;filter ptr
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
- mov rcx, 0x0400040
-
- movdqa xmm4, [rdx] ;load filters
- movq xmm5, rcx
- packsswb xmm4, xmm4
- pshuflw xmm0, xmm4, 0b ;k0_k1
- pshuflw xmm1, xmm4, 01010101b ;k2_k3
- pshuflw xmm2, xmm4, 10101010b ;k4_k5
- pshuflw xmm3, xmm4, 11111111b ;k6_k7
-
- punpcklqdq xmm0, xmm0
- punpcklqdq xmm1, xmm1
- punpcklqdq xmm2, xmm2
- punpcklqdq xmm3, xmm3
-
- movdqa k0k1, xmm0
- movdqa k2k3, xmm1
- pshufd xmm5, xmm5, 0
- movdqa k4k5, xmm2
- movdqa k6k7, xmm3
- movdqa krd, xmm5
-
- movsxd rax, dword ptr arg(1) ;src_pixels_per_line
- movsxd rdx, dword ptr arg(3) ;output_pitch
- movsxd rcx, dword ptr arg(4) ;output_height
- shr rcx, 1
+;-------------------------------------------------------------------------------
+%macro SUBPIX_HFILTER8 1
+cglobal filter_block1d8_%1, 6, 6+(ARCH_X86_64*1), 14, LOCAL_VARS_SIZE, \
+ src, sstride, dst, dstride, height, filter
+ mova m4, [filterq]
+ SETUP_LOCAL_VARS
+%if ARCH_X86_64
+ %define orig_height r7d
+%else
+ %define orig_height heightmp
+%endif
+ mov orig_height, heightd
+ shr heightd, 1
.loop:
- movq xmm0, [rsi - 3] ;load src
- movq xmm3, [rsi + 5]
- movq xmm4, [rsi + rax - 3]
- movq xmm7, [rsi + rax + 5]
- punpcklqdq xmm0, xmm3
- punpcklqdq xmm4, xmm7
+ movh m0, [srcq - 3]
+ movh m3, [srcq + 5]
+ movh m4, [srcq + sstrideq - 3]
+ movh m7, [srcq + sstrideq + 5]
+ punpcklqdq m0, m3
+ mova m1, m0
+ punpcklbw m0, m0
+ punpckhbw m1, m1
+ mova m5, m1
+ palignr m5, m0, 13
+ pmaddubsw m5, k6k7
+ mova m2, m1
+ mova m3, m1
+ palignr m1, m0, 1
+ pmaddubsw m1, k0k1
+ punpcklqdq m4, m7
+ mova m6, m4
+ punpcklbw m4, m4
+ palignr m2, m0, 5
+ punpckhbw m6, m6
+ palignr m3, m0, 9
+ mova m7, m6
+ pmaddubsw m2, k2k3
+ pmaddubsw m3, k4k5
- HORIZx8_ROW xmm0, xmm1, xmm2, xmm3
- HORIZx8_ROW xmm4, xmm5, xmm6, xmm7
-%if %1
- movq xmm1, [rdi]
- movq xmm2, [rdi + rdx]
- pavgb xmm0, xmm1
- pavgb xmm4, xmm2
+ palignr m7, m4, 13
+ paddsw m1, m5
+ mova m5, m6
+ mova m0, m2
+ palignr m5, m4, 5
+ pminsw m2, m3
+ pmaddubsw m7, k6k7
+ pmaxsw m3, m0
+ paddsw m1, m2
+ mova m0, m6
+ palignr m6, m4, 1
+ pmaddubsw m5, k2k3
+ paddsw m1, m3
+ pmaddubsw m6, k0k1
+ palignr m0, m4, 9
+ paddsw m1, krd
+ pmaddubsw m0, k4k5
+ mova m4, m5
+ psraw m1, 7
+ pminsw m5, m0
+ paddsw m6, m7
+ packuswb m1, m1
+
+ paddsw m6, m5
+ pmaxsw m0, m4
+ paddsw m6, m0
+ paddsw m6, krd
+ psraw m6, 7
+ packuswb m6, m6
+
+%ifidn %1, h8_avg
+ movh m0, [dstq]
+ movh m2, [dstq + dstrideq]
+ pavgb m1, m0
+ pavgb m6, m2
%endif
- movq [rdi], xmm0
- movq [rdi + rdx], xmm4
+ movh [dstq], m1
+ movh [dstq + dstrideq], m6
- lea rsi, [rsi + rax]
- prefetcht0 [rsi + 4 * rax - 3]
- lea rsi, [rsi + rax]
- lea rdi, [rdi + 2 * rdx]
- prefetcht0 [rsi + 2 * rax - 3]
- dec rcx
- jnz .loop
+ lea srcq, [srcq + sstrideq ]
+ prefetcht0 [srcq + 4 * sstrideq - 3]
+ lea srcq, [srcq + sstrideq ]
+ lea dstq, [dstq + 2 * dstrideq ]
+ prefetcht0 [srcq + 2 * sstrideq - 3]
+ dec heightd
+ jnz .loop
;Do last row if output_height is odd
- movsxd rcx, dword ptr arg(4) ;output_height
- and rcx, 1
- je .done
+ mov heightd, orig_height
+ and heightd, 1
+ je .done
- movq xmm0, [rsi - 3]
- movq xmm3, [rsi + 5]
- punpcklqdq xmm0, xmm3
+ movh m0, [srcq - 3]
+ movh m3, [srcq + 5]
+ punpcklqdq m0, m3
- HORIZx8_ROW xmm0, xmm1, xmm2, xmm3
-%if %1
- movq xmm1, [rdi]
- pavgb xmm0, xmm1
+ HORIZx8_ROW m0, m1, m2, m3, m4
+
+%ifidn %1, h8_avg
+ movh m1, [dstq]
+ pavgb m0, m1
%endif
- movq [rdi], xmm0
-.done
+ movh [dstq], m0
+.done:
+ RET
%endm
-%macro HORIZx16 1
- mov rdx, arg(5) ;filter ptr
- mov rsi, arg(0) ;src_ptr
- mov rdi, arg(2) ;output_ptr
- mov rcx, 0x0400040
+;-------------------------------------------------------------------------------
+%macro SUBPIX_HFILTER16 1
+cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*0), 14, LOCAL_VARS_SIZE, \
+ src, sstride, dst, dstride, height, filter
+ mova m4, [filterq]
+ SETUP_LOCAL_VARS
+.loop:
+ prefetcht0 [srcq + 2 * sstrideq -3]
- movdqa xmm4, [rdx] ;load filters
- movq xmm5, rcx
- packsswb xmm4, xmm4
- pshuflw xmm0, xmm4, 0b ;k0_k1
- pshuflw xmm1, xmm4, 01010101b ;k2_k3
- pshuflw xmm2, xmm4, 10101010b ;k4_k5
- pshuflw xmm3, xmm4, 11111111b ;k6_k7
+ movh m0, [srcq - 3]
+ movh m4, [srcq + 5]
+ movh m6, [srcq + 13]
+ punpcklqdq m0, m4
+ mova m7, m0
+ punpckhbw m0, m0
+ mova m1, m0
+ punpcklqdq m4, m6
+ mova m3, m0
+ punpcklbw m7, m7
- punpcklqdq xmm0, xmm0
- punpcklqdq xmm1, xmm1
- punpcklqdq xmm2, xmm2
- punpcklqdq xmm3, xmm3
+ palignr m3, m7, 13
+ mova m2, m0
+ pmaddubsw m3, k6k7
+ palignr m0, m7, 1
+ pmaddubsw m0, k0k1
+ palignr m1, m7, 5
+ pmaddubsw m1, k2k3
+ palignr m2, m7, 9
+ pmaddubsw m2, k4k5
+ paddsw m0, m3
+ mova m3, m4
+ punpckhbw m4, m4
+ mova m5, m4
+ punpcklbw m3, m3
+ mova m7, m4
+ palignr m5, m3, 5
+ mova m6, m4
+ palignr m4, m3, 1
+ pmaddubsw m4, k0k1
+ pmaddubsw m5, k2k3
+ palignr m6, m3, 9
+ pmaddubsw m6, k4k5
+ palignr m7, m3, 13
+ pmaddubsw m7, k6k7
- movdqa k0k1, xmm0
- movdqa k2k3, xmm1
- pshufd xmm5, xmm5, 0
- movdqa k4k5, xmm2
- movdqa k6k7, xmm3
- movdqa krd, xmm5
+ mova m3, m1
+ pmaxsw m1, m2
+ pminsw m2, m3
+ paddsw m0, m2
+ paddsw m0, m1
+ paddsw m4, m7
+ mova m7, m5
+ pmaxsw m5, m6
+ pminsw m6, m7
+ paddsw m4, m6
+ paddsw m4, m5
+ paddsw m0, krd
+ paddsw m4, krd
+ psraw m0, 7
+ psraw m4, 7
+ packuswb m0, m4
+%ifidn %1, h8_avg
+ mova m1, [dstq]
+ pavgb m0, m1
+%endif
+ lea srcq, [srcq + sstrideq]
+ mova [dstq], m0
+ lea dstq, [dstq + dstrideq]
+ dec heightd
+ jnz .loop
+ RET
+%endm
- movsxd rax, dword ptr arg(1) ;src_pixels_per_line
- movsxd rdx, dword ptr arg(3) ;output_pitch
- movsxd rcx, dword ptr arg(4) ;output_height
+INIT_XMM ssse3
+SUBPIX_HFILTER16 h8
+SUBPIX_HFILTER16 h8_avg
+SUBPIX_HFILTER8 h8
+SUBPIX_HFILTER8 h8_avg
+SUBPIX_HFILTER4 h8
+SUBPIX_HFILTER4 h8_avg
+
+;-------------------------------------------------------------------------------
+%macro SUBPIX_VFILTER 2
+cglobal filter_block1d%2_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \
+ src, sstride, dst, dstride, height, filter
+ mova m4, [filterq]
+ SETUP_LOCAL_VARS
+%if ARCH_X86_64
+ %define src1q r7
+ %define sstride6q r8
+ %define dst_stride dstrideq
+%else
+ %define src1q filterq
+ %define sstride6q dstrideq
+ %define dst_stride dstridemp
+%endif
+ mov src1q, srcq
+ add src1q, sstrideq
+ lea sstride6q, [sstrideq + sstrideq * 4]
+ add sstride6q, sstrideq ;pitch * 6
+
+%ifidn %2, 8
+ %define movx movh
+%else
+ %define movx movd
+%endif
+.loop:
+ movx m0, [srcq ] ;A
+ movx m1, [srcq + sstrideq ] ;B
+ punpcklbw m0, m1 ;A B
+ movx m2, [srcq + sstrideq * 2 ] ;C
+ pmaddubsw m0, k0k1
+ mova m6, m2
+ movx m3, [src1q + sstrideq * 2] ;D
+ punpcklbw m2, m3 ;C D
+ pmaddubsw m2, k2k3
+ movx m4, [srcq + sstrideq * 4 ] ;E
+ mova m7, m4
+ movx m5, [src1q + sstrideq * 4] ;F
+ punpcklbw m4, m5 ;E F
+ pmaddubsw m4, k4k5
+ punpcklbw m1, m6 ;A B next iter
+ movx m6, [srcq + sstride6q ] ;G
+ punpcklbw m5, m6 ;E F next iter
+ punpcklbw m3, m7 ;C D next iter
+ pmaddubsw m5, k4k5
+ movx m7, [src1q + sstride6q ] ;H
+ punpcklbw m6, m7 ;G H
+ pmaddubsw m6, k6k7
+ mova tmp, m2
+ pmaddubsw m3, k2k3
+ pmaddubsw m1, k0k1
+ pmaxsw m2, m4
+ paddsw m0, m6
+ movx m6, [srcq + sstrideq * 8 ] ;H next iter
+ punpcklbw m7, m6
+ pmaddubsw m7, k6k7
+ pminsw m4, tmp
+ paddsw m0, m4
+ mova m4, m3
+ paddsw m0, m2
+ pminsw m3, m5
+ pmaxsw m5, m4
+ paddsw m0, krd
+ psraw m0, 7
+ paddsw m1, m7
+ packuswb m0, m0
+
+ paddsw m1, m3
+ paddsw m1, m5
+ paddsw m1, krd
+ psraw m1, 7
+ lea srcq, [srcq + sstrideq * 2 ]
+ lea src1q, [src1q + sstrideq * 2]
+ packuswb m1, m1
+
+%ifidn %1, v8_avg
+ movx m2, [dstq]
+ pavgb m0, m2
+%endif
+ movx [dstq], m0
+ add dstq, dst_stride
+%ifidn %1, v8_avg
+ movx m3, [dstq]
+ pavgb m1, m3
+%endif
+ movx [dstq], m1
+ add dstq, dst_stride
+ sub heightd, 2
+ cmp heightd, 1
+ jg .loop
+
+ cmp heightd, 0
+ je .done
+
+ movx m0, [srcq ] ;A
+ movx m1, [srcq + sstrideq ] ;B
+ movx m6, [srcq + sstride6q ] ;G
+ punpcklbw m0, m1 ;A B
+ movx m7, [rax + sstride6q ] ;H
+ pmaddubsw m0, k0k1
+ movx m2, [srcq + sstrideq * 2 ] ;C
+ punpcklbw m6, m7 ;G H
+ movx m3, [rax + sstrideq * 2 ] ;D
+ pmaddubsw m6, k6k7
+ movx m4, [srcq + sstrideq * 4 ] ;E
+ punpcklbw m2, m3 ;C D
+ movx m5, [src1q + sstrideq * 4] ;F
+ punpcklbw m4, m5 ;E F
+ pmaddubsw m2, k2k3
+ pmaddubsw m4, k4k5
+ paddsw m0, m6
+ mova m1, m2
+ pmaxsw m2, m4
+ pminsw m4, m1
+ paddsw m0, m4
+ paddsw m0, m2
+ paddsw m0, krd
+ psraw m0, 7
+ packuswb m0, m0
+%ifidn %1, v8_avg
+ movx m1, [dstq]
+ pavgb m0, m1
+%endif
+ movx [dstq], m0
+.done:
+ RET
+%endm
+
+;-------------------------------------------------------------------------------
+%macro SUBPIX_VFILTER16 1
+cglobal filter_block1d16_%1, 6, 6+(ARCH_X86_64*3), 14, LOCAL_VARS_SIZE, \
+ src, sstride, dst, dstride, height, filter
+
+ mova m4, [filterq]
+ SETUP_LOCAL_VARS
+%if ARCH_X86_64
+ %define src1q r7
+ %define sstride6q r8
+ %define dst_stride dstrideq
+%else
+ %define src1q filterq
+ %define sstride6q dstrideq
+ %define dst_stride dstridemp
+%endif
+ mov src1q, srcq
+ add src1q, sstrideq
+ lea sstride6q, [sstrideq + sstrideq * 4]
+ add sstride6q, sstrideq ;pitch * 6
.loop:
- prefetcht0 [rsi + 2 * rax -3]
+ movh m0, [srcq ] ;A
+ movh m1, [srcq + sstrideq ] ;B
+ movh m2, [srcq + sstrideq * 2 ] ;C
+ movh m3, [src1q + sstrideq * 2] ;D
+ movh m4, [srcq + sstrideq * 4 ] ;E
+ movh m5, [src1q + sstrideq * 4] ;F
- movq xmm0, [rsi - 3] ;load src data
- movq xmm4, [rsi + 5]
- movq xmm6, [rsi + 13]
- punpcklqdq xmm0, xmm4
- punpcklqdq xmm4, xmm6
-
- movdqa xmm7, xmm0
-
- punpcklbw xmm7, xmm7
- punpckhbw xmm0, xmm0
- movdqa xmm1, xmm0
- movdqa xmm2, xmm0
- movdqa xmm3, xmm0
-
- palignr xmm0, xmm7, 1
- palignr xmm1, xmm7, 5
- pmaddubsw xmm0, k0k1
- palignr xmm2, xmm7, 9
- pmaddubsw xmm1, k2k3
- palignr xmm3, xmm7, 13
-
- pmaddubsw xmm2, k4k5
- pmaddubsw xmm3, k6k7
- paddsw xmm0, xmm3
-
- movdqa xmm3, xmm4
- punpcklbw xmm3, xmm3
- punpckhbw xmm4, xmm4
-
- movdqa xmm5, xmm4
- movdqa xmm6, xmm4
- movdqa xmm7, xmm4
-
- palignr xmm4, xmm3, 1
- palignr xmm5, xmm3, 5
- palignr xmm6, xmm3, 9
- palignr xmm7, xmm3, 13
-
- movdqa xmm3, xmm1
- pmaddubsw xmm4, k0k1
- pmaxsw xmm1, xmm2
- pmaddubsw xmm5, k2k3
- pminsw xmm2, xmm3
- pmaddubsw xmm6, k4k5
- paddsw xmm0, xmm2
- pmaddubsw xmm7, k6k7
- paddsw xmm0, xmm1
-
- paddsw xmm4, xmm7
- movdqa xmm7, xmm5
- pmaxsw xmm5, xmm6
- pminsw xmm6, xmm7
- paddsw xmm4, xmm6
- paddsw xmm4, xmm5
-
- paddsw xmm0, krd
- paddsw xmm4, krd
- psraw xmm0, 7
- psraw xmm4, 7
- packuswb xmm0, xmm0
- packuswb xmm4, xmm4
- punpcklqdq xmm0, xmm4
-%if %1
- movdqa xmm1, [rdi]
- pavgb xmm0, xmm1
+ punpcklbw m0, m1 ;A B
+ movh m6, [srcq + sstride6q] ;G
+ punpcklbw m2, m3 ;C D
+ movh m7, [src1q + sstride6q] ;H
+ punpcklbw m4, m5 ;E F
+ pmaddubsw m0, k0k1
+ movh m3, [srcq + 8] ;A
+ pmaddubsw m2, k2k3
+ punpcklbw m6, m7 ;G H
+ movh m5, [srcq + sstrideq + 8] ;B
+ pmaddubsw m4, k4k5
+ punpcklbw m3, m5 ;A B
+ movh m7, [srcq + sstrideq * 2 + 8] ;C
+ pmaddubsw m6, k6k7
+ mova m1, m2
+ movh m5, [src1q + sstrideq * 2 + 8] ;D
+ pmaxsw m2, m4
+ punpcklbw m7, m5 ;C D
+ pminsw m4, m1
+ paddsw m0, m6
+ pmaddubsw m3, k0k1
+ movh m1, [srcq + sstrideq * 4 + 8] ;E
+ paddsw m0, m4
+ pmaddubsw m7, k2k3
+ movh m6, [src1q + sstrideq * 4 + 8] ;F
+ punpcklbw m1, m6 ;E F
+ paddsw m0, m2
+ paddsw m0, krd
+ movh m2, [srcq + sstride6q + 8] ;G
+ pmaddubsw m1, k4k5
+ movh m5, [src1q + sstride6q + 8] ;H
+ psraw m0, 7
+ punpcklbw m2, m5 ;G H
+ packuswb m0, m0
+ pmaddubsw m2, k6k7
+%ifidn %1, v8_avg
+ movh m4, [dstq]
+ pavgb m0, m4
%endif
+ movh [dstq], m0
+ mova m6, m7
+ pmaxsw m7, m1
+ pminsw m1, m6
+ paddsw m3, m2
+ paddsw m3, m1
+ paddsw m3, m7
+ paddsw m3, krd
+ psraw m3, 7
+ packuswb m3, m3
- lea rsi, [rsi + rax]
- movdqa [rdi], xmm0
-
- lea rdi, [rdi + rdx]
- dec rcx
- jnz .loop
+ add srcq, sstrideq
+ add src1q, sstrideq
+%ifidn %1, v8_avg
+ movh m1, [dstq + 8]
+ pavgb m3, m1
+%endif
+ movh [dstq + 8], m3
+ add dstq, dst_stride
+ dec heightd
+ jnz .loop
+ RET
%endm
-;void vpx_filter_block1d4_h8_ssse3
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; unsigned int output_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(vpx_filter_block1d4_h8_ssse3) PRIVATE
-sym(vpx_filter_block1d4_h8_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16 * 3
- %define k0k1k4k5 [rsp + 16 * 0]
- %define k2k3k6k7 [rsp + 16 * 1]
- %define krd [rsp + 16 * 2]
-
- HORIZx4 0
-
- add rsp, 16 * 3
- pop rsp
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vpx_filter_block1d8_h8_ssse3
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; unsigned int output_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(vpx_filter_block1d8_h8_ssse3) PRIVATE
-sym(vpx_filter_block1d8_h8_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16*5
- %define k0k1 [rsp + 16*0]
- %define k2k3 [rsp + 16*1]
- %define k4k5 [rsp + 16*2]
- %define k6k7 [rsp + 16*3]
- %define krd [rsp + 16*4]
-
- HORIZx8 0
-
- add rsp, 16*5
- pop rsp
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-;void vpx_filter_block1d16_h8_ssse3
-;(
-; unsigned char *src_ptr,
-; unsigned int src_pixels_per_line,
-; unsigned char *output_ptr,
-; unsigned int output_pitch,
-; unsigned int output_height,
-; short *filter
-;)
-global sym(vpx_filter_block1d16_h8_ssse3) PRIVATE
-sym(vpx_filter_block1d16_h8_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16*5
- %define k0k1 [rsp + 16*0]
- %define k2k3 [rsp + 16*1]
- %define k4k5 [rsp + 16*2]
- %define k6k7 [rsp + 16*3]
- %define krd [rsp + 16*4]
-
- HORIZx16 0
-
- add rsp, 16*5
- pop rsp
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(vpx_filter_block1d4_h8_avg_ssse3) PRIVATE
-sym(vpx_filter_block1d4_h8_avg_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16 * 3
- %define k0k1k4k5 [rsp + 16 * 0]
- %define k2k3k6k7 [rsp + 16 * 1]
- %define krd [rsp + 16 * 2]
-
- HORIZx4 1
-
- add rsp, 16 * 3
- pop rsp
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(vpx_filter_block1d8_h8_avg_ssse3) PRIVATE
-sym(vpx_filter_block1d8_h8_avg_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16*5
- %define k0k1 [rsp + 16*0]
- %define k2k3 [rsp + 16*1]
- %define k4k5 [rsp + 16*2]
- %define k6k7 [rsp + 16*3]
- %define krd [rsp + 16*4]
-
- HORIZx8 1
-
- add rsp, 16*5
- pop rsp
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-global sym(vpx_filter_block1d16_h8_avg_ssse3) PRIVATE
-sym(vpx_filter_block1d16_h8_avg_ssse3):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- ; end prolog
-
- ALIGN_STACK 16, rax
- sub rsp, 16*5
- %define k0k1 [rsp + 16*0]
- %define k2k3 [rsp + 16*1]
- %define k4k5 [rsp + 16*2]
- %define k6k7 [rsp + 16*3]
- %define krd [rsp + 16*4]
-
- HORIZx16 1
-
- add rsp, 16*5
- pop rsp
-
- ; begin epilog
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-SECTION_RODATA
-align 16
-shuf_t0t1:
- db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
-align 16
-shuf_t2t3:
- db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10
-align 16
-shuf_t4t5:
- db 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12
-align 16
-shuf_t6t7:
- db 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14
+INIT_XMM ssse3
+SUBPIX_VFILTER16 v8
+SUBPIX_VFILTER16 v8_avg
+SUBPIX_VFILTER v8, 8
+SUBPIX_VFILTER v8_avg, 8
+SUBPIX_VFILTER v8, 4
+SUBPIX_VFILTER v8_avg, 4
diff --git a/libvpx/vpx_mem/vpx_mem.c b/libvpx/vpx_mem/vpx_mem.c
index c6f501a..b98fe83 100644
--- a/libvpx/vpx_mem/vpx_mem.c
+++ b/libvpx/vpx_mem/vpx_mem.c
@@ -93,11 +93,10 @@
#if CONFIG_VP9_HIGHBITDEPTH
void *vpx_memset16(void *dest, int val, size_t length) {
- int i;
- void *orig = dest;
- uint16_t *dest16 = dest;
+ size_t i;
+ uint16_t *dest16 = (uint16_t *)dest;
for (i = 0; i < length; i++)
*dest16++ = val;
- return orig;
+ return dest;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/libvpx/vpx_ports/bitops.h b/libvpx/vpx_ports/bitops.h
index 0d3223e..84ff365 100644
--- a/libvpx/vpx_ports/bitops.h
+++ b/libvpx/vpx_ports/bitops.h
@@ -11,6 +11,8 @@
#ifndef VPX_PORTS_BITOPS_H_
#define VPX_PORTS_BITOPS_H_
+#include <assert.h>
+
#include "vpx_ports/msvc.h"
#ifdef _MSC_VER
@@ -25,10 +27,15 @@
extern "C" {
#endif
+// These versions of get_msb() are only valid when n != 0 because all
+// of the optimized versions are undefined when n == 0:
+// https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html
+
// use GNU builtins where available.
#if defined(__GNUC__) && \
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
static INLINE int get_msb(unsigned int n) {
+ assert(n != 0);
return 31 ^ __builtin_clz(n);
}
#elif defined(USE_MSC_INTRINSICS)
@@ -36,6 +43,7 @@
static INLINE int get_msb(unsigned int n) {
unsigned long first_set_bit;
+ assert(n != 0);
_BitScanReverse(&first_set_bit, n);
return first_set_bit;
}
@@ -47,6 +55,8 @@
unsigned int value = n;
int i;
+ assert(n != 0);
+
for (i = 4; i >= 0; --i) {
const int shift = (1 << i);
const unsigned int x = value >> shift;
diff --git a/libvpx/vpx_scale/yv12config.h b/libvpx/vpx_scale/yv12config.h
index fd5d54b..37b255d 100644
--- a/libvpx/vpx_scale/yv12config.h
+++ b/libvpx/vpx_scale/yv12config.h
@@ -56,6 +56,9 @@
int subsampling_y;
unsigned int bit_depth;
vpx_color_space_t color_space;
+ vpx_color_range_t color_range;
+ int render_width;
+ int render_height;
int corrupted;
int flags;
diff --git a/libvpx/vpx_util/endian_inl.h b/libvpx/vpx_util/endian_inl.h
index 6b177f1..37bdce1 100644
--- a/libvpx/vpx_util/endian_inl.h
+++ b/libvpx/vpx_util/endian_inl.h
@@ -25,14 +25,10 @@
# define LOCAL_GCC_PREREQ(maj, min) 0
#endif
-#ifdef __clang__
-# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
-# define LOCAL_CLANG_PREREQ(maj, min) \
- (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
-#else
-# define LOCAL_CLANG_VERSION 0
-# define LOCAL_CLANG_PREREQ(maj, min) 0
-#endif // __clang__
+// handle clang compatibility
+#ifndef __has_builtin
+# define __has_builtin(x) 0
+#endif
// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
#if !defined(WORDS_BIGENDIAN) && \
@@ -53,16 +49,18 @@
#define HToBE32(X) BSwap32(X)
#endif
-// clang-3.3 and gcc-4.3 have builtin functions for swap32/swap64
-#if LOCAL_GCC_PREREQ(4, 3) || LOCAL_CLANG_PREREQ(3, 3)
-#define HAVE_BUILTIN_BSWAP32
-#define HAVE_BUILTIN_BSWAP64
-#endif
-// clang-3.3 and gcc-4.8 have a builtin function for swap16
-#if LOCAL_GCC_PREREQ(4, 8) || LOCAL_CLANG_PREREQ(3, 3)
+#if LOCAL_GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16)
#define HAVE_BUILTIN_BSWAP16
#endif
+#if LOCAL_GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32)
+#define HAVE_BUILTIN_BSWAP32
+#endif
+
+#if LOCAL_GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64)
+#define HAVE_BUILTIN_BSWAP64
+#endif
+
#if HAVE_MIPS32 && defined(__mips__) && !defined(__mips64) && \
defined(__mips_isa_rev) && (__mips_isa_rev >= 2) && (__mips_isa_rev < 6)
#define VPX_USE_MIPS32_R2
diff --git a/libvpx/vpxdec.c b/libvpx/vpxdec.c
index 3c61bd9..285d58e 100644
--- a/libvpx/vpxdec.c
+++ b/libvpx/vpxdec.c
@@ -562,7 +562,7 @@
int opt_i420 = 0;
vpx_codec_dec_cfg_t cfg = {0, 0, 0};
#if CONFIG_VP9_HIGHBITDEPTH
- int output_bit_depth = 0;
+ unsigned int output_bit_depth = 0;
#endif
#if CONFIG_VP8_DECODER
vp8_postproc_cfg_t vp8_pp_cfg = {0};
@@ -618,9 +618,6 @@
use_y4m = 0;
flipuv = 1;
opt_yv12 = 1;
-#if CONFIG_VP9_HIGHBITDEPTH
- output_bit_depth = 8; // For yv12 8-bit depth output is assumed
-#endif
} else if (arg_match(&arg, &use_i420, argi)) {
use_y4m = 0;
flipuv = 0;
@@ -956,22 +953,22 @@
// these is set to 0, use the display size set in the first frame
// header. If that is unavailable, use the raw decoded size of the
// first decoded frame.
- int display_width = vpx_input_ctx.width;
- int display_height = vpx_input_ctx.height;
- if (!display_width || !display_height) {
- int display_size[2];
+ int render_width = vpx_input_ctx.width;
+ int render_height = vpx_input_ctx.height;
+ if (!render_width || !render_height) {
+ int render_size[2];
if (vpx_codec_control(&decoder, VP9D_GET_DISPLAY_SIZE,
- display_size)) {
+ render_size)) {
// As last resort use size of first frame as display size.
- display_width = img->d_w;
- display_height = img->d_h;
+ render_width = img->d_w;
+ render_height = img->d_h;
} else {
- display_width = display_size[0];
- display_height = display_size[1];
+ render_width = render_size[0];
+ render_height = render_size[1];
}
}
- scaled_img = vpx_img_alloc(NULL, img->fmt, display_width,
- display_height, 16);
+ scaled_img = vpx_img_alloc(NULL, img->fmt, render_width,
+ render_height, 16);
scaled_img->bit_depth = img->bit_depth;
}
@@ -990,11 +987,11 @@
}
#if CONFIG_VP9_HIGHBITDEPTH
// Default to codec bit depth if output bit depth not set
- if (!output_bit_depth) {
+ if (!output_bit_depth && single_file && !do_md5) {
output_bit_depth = img->bit_depth;
}
// Shift up or down if necessary
- if (output_bit_depth != img->bit_depth) {
+ if (output_bit_depth != 0 && output_bit_depth != img->bit_depth) {
const vpx_img_fmt_t shifted_fmt = output_bit_depth == 8 ?
img->fmt ^ (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) :
img->fmt | VPX_IMG_FMT_HIGHBITDEPTH;
diff --git a/libvpx/vpxenc.c b/libvpx/vpxenc.c
index 06604ea..cb78226 100644
--- a/libvpx/vpxenc.c
+++ b/libvpx/vpxenc.c
@@ -1996,7 +1996,7 @@
usage_exit();
/* Decide if other chroma subsamplings than 4:2:0 are supported */
- if (global.codec->fourcc == VP9_FOURCC)
+ if (global.codec->fourcc == VP9_FOURCC || global.codec->fourcc == VP10_FOURCC)
input.only_i420 = 0;
for (pass = global.pass ? global.pass - 1 : 0; pass < global.passes; pass++) {
diff --git a/libvpx/webmdec.cc b/libvpx/webmdec.cc
index 1020d04..f541cfe 100644
--- a/libvpx/webmdec.cc
+++ b/libvpx/webmdec.cc
@@ -94,7 +94,7 @@
}
}
- if (video_track == NULL) {
+ if (video_track == NULL || video_track->GetCodecId() == NULL) {
rewind_and_reset(webm_ctx, vpx_ctx);
return 0;
}
diff --git a/libwebm.mk b/libwebm.mk
index 0528cfb..3afa4a0 100644
--- a/libwebm.mk
+++ b/libwebm.mk
@@ -2,7 +2,9 @@
LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)
-LOCAL_SRC_FILES := libwebm/mkvparser.cpp
+LOCAL_SRC_FILES := libwebm/mkvparser/mkvparser.cc
+LOCAL_CPP_EXTENSION := .cc
+LOCAL_C_INCLUDES += $(LOCAL_PATH)/libwebm/
LOCAL_MODULE := libwebm
diff --git a/libwebm/README.android b/libwebm/README.android
new file mode 100644
index 0000000..dba2327
--- /dev/null
+++ b/libwebm/README.android
@@ -0,0 +1,35 @@
+Name: libwebm
+URL: http://www.webmproject.org
+
+Commit: 5c50e310e7050192b952fe588186fd1dadc08b6e
+
+Description:
+Contains the sources used to compile libwebm's matroska parser.
+
+The libwebm source is from webmproject.org:
+ https://chromium.googlesource.com/webm/libwebm
+
+Notes on updating libwebm source code:
+
+Please follow these steps to update libvpx source code:
+
+1. Copy over the following files from the the libwebm checkout:
+ - mkvparser/mkvparser.cc
+ - mkvparser/mkvparser.h
+ - common/webmids.h
+
+2. Update README.android (this file) with the upstream hash.
+
+3. Copy the git log summary of changes by using the following in the libwebm
+ checkout: git log --pretty="%h %s" <previous_hash>...<current_hash>
+
+4. Commit the changes. The commit message should look like this:
+
+ libwebm: Pull from upstream
+
+ Current HEAD: <hash>
+
+ git log from upstream:
+ a6b2070 <git commit message 1>
+ 08dabbc <git commit message 2>
+ c29fb02 <git commit message 3>
diff --git a/libwebm/common/webmids.h b/libwebm/common/webmids.h
new file mode 100644
index 0000000..32a0c5f
--- /dev/null
+++ b/libwebm/common/webmids.h
@@ -0,0 +1,184 @@
+// Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+#ifndef COMMON_WEBMIDS_H_
+#define COMMON_WEBMIDS_H_
+
+namespace libwebm {
+
+enum MkvId {
+ kMkvEBML = 0x1A45DFA3,
+ kMkvEBMLVersion = 0x4286,
+ kMkvEBMLReadVersion = 0x42F7,
+ kMkvEBMLMaxIDLength = 0x42F2,
+ kMkvEBMLMaxSizeLength = 0x42F3,
+ kMkvDocType = 0x4282,
+ kMkvDocTypeVersion = 0x4287,
+ kMkvDocTypeReadVersion = 0x4285,
+ kMkvVoid = 0xEC,
+ kMkvSignatureSlot = 0x1B538667,
+ kMkvSignatureAlgo = 0x7E8A,
+ kMkvSignatureHash = 0x7E9A,
+ kMkvSignaturePublicKey = 0x7EA5,
+ kMkvSignature = 0x7EB5,
+ kMkvSignatureElements = 0x7E5B,
+ kMkvSignatureElementList = 0x7E7B,
+ kMkvSignedElement = 0x6532,
+ // segment
+ kMkvSegment = 0x18538067,
+ // Meta Seek Information
+ kMkvSeekHead = 0x114D9B74,
+ kMkvSeek = 0x4DBB,
+ kMkvSeekID = 0x53AB,
+ kMkvSeekPosition = 0x53AC,
+ // Segment Information
+ kMkvInfo = 0x1549A966,
+ kMkvTimecodeScale = 0x2AD7B1,
+ kMkvDuration = 0x4489,
+ kMkvDateUTC = 0x4461,
+ kMkvTitle = 0x7BA9,
+ kMkvMuxingApp = 0x4D80,
+ kMkvWritingApp = 0x5741,
+ // Cluster
+ kMkvCluster = 0x1F43B675,
+ kMkvTimecode = 0xE7,
+ kMkvPrevSize = 0xAB,
+ kMkvBlockGroup = 0xA0,
+ kMkvBlock = 0xA1,
+ kMkvBlockDuration = 0x9B,
+ kMkvReferenceBlock = 0xFB,
+ kMkvLaceNumber = 0xCC,
+ kMkvSimpleBlock = 0xA3,
+ kMkvBlockAdditions = 0x75A1,
+ kMkvBlockMore = 0xA6,
+ kMkvBlockAddID = 0xEE,
+ kMkvBlockAdditional = 0xA5,
+ kMkvDiscardPadding = 0x75A2,
+ // Track
+ kMkvTracks = 0x1654AE6B,
+ kMkvTrackEntry = 0xAE,
+ kMkvTrackNumber = 0xD7,
+ kMkvTrackUID = 0x73C5,
+ kMkvTrackType = 0x83,
+ kMkvFlagEnabled = 0xB9,
+ kMkvFlagDefault = 0x88,
+ kMkvFlagForced = 0x55AA,
+ kMkvFlagLacing = 0x9C,
+ kMkvDefaultDuration = 0x23E383,
+ kMkvMaxBlockAdditionID = 0x55EE,
+ kMkvName = 0x536E,
+ kMkvLanguage = 0x22B59C,
+ kMkvCodecID = 0x86,
+ kMkvCodecPrivate = 0x63A2,
+ kMkvCodecName = 0x258688,
+ kMkvCodecDelay = 0x56AA,
+ kMkvSeekPreRoll = 0x56BB,
+ // video
+ kMkvVideo = 0xE0,
+ kMkvFlagInterlaced = 0x9A,
+ kMkvStereoMode = 0x53B8,
+ kMkvAlphaMode = 0x53C0,
+ kMkvPixelWidth = 0xB0,
+ kMkvPixelHeight = 0xBA,
+ kMkvPixelCropBottom = 0x54AA,
+ kMkvPixelCropTop = 0x54BB,
+ kMkvPixelCropLeft = 0x54CC,
+ kMkvPixelCropRight = 0x54DD,
+ kMkvDisplayWidth = 0x54B0,
+ kMkvDisplayHeight = 0x54BA,
+ kMkvDisplayUnit = 0x54B2,
+ kMkvAspectRatioType = 0x54B3,
+ kMkvFrameRate = 0x2383E3,
+ // end video
+ // colour
+ kMkvColour = 0x55B0,
+ kMkvMatrixCoefficients = 0x55B1,
+ kMkvBitsPerChannel = 0x55B2,
+ kMkvChromaSubsamplingHorz = 0x55B3,
+ kMkvChromaSubsamplingVert = 0x55B4,
+ kMkvCbSubsamplingHorz = 0x55B5,
+ kMkvCbSubsamplingVert = 0x55B6,
+ kMkvChromaSitingHorz = 0x55B7,
+ kMkvChromaSitingVert = 0x55B8,
+ kMkvRange = 0x55B9,
+ kMkvTransferCharacteristics = 0x55BA,
+ kMkvPrimaries = 0x55BB,
+ kMkvMaxCLL = 0x55BC,
+ kMkvMaxFALL = 0x55BD,
+ // mastering metadata
+ kMkvMasteringMetadata = 0x55D0,
+ kMkvPrimaryRChromaticityX = 0x55D1,
+ kMkvPrimaryRChromaticityY = 0x55D2,
+ kMkvPrimaryGChromaticityX = 0x55D3,
+ kMkvPrimaryGChromaticityY = 0x55D4,
+ kMkvPrimaryBChromaticityX = 0x55D5,
+ kMkvPrimaryBChromaticityY = 0x55D6,
+ kMkvWhitePointChromaticityX = 0x55D7,
+ kMkvWhitePointChromaticityY = 0x55D8,
+ kMkvLuminanceMax = 0x55D9,
+ kMkvLuminanceMin = 0x55DA,
+ // end mastering metadata
+ // end colour
+ // audio
+ kMkvAudio = 0xE1,
+ kMkvSamplingFrequency = 0xB5,
+ kMkvOutputSamplingFrequency = 0x78B5,
+ kMkvChannels = 0x9F,
+ kMkvBitDepth = 0x6264,
+ // end audio
+ // ContentEncodings
+ kMkvContentEncodings = 0x6D80,
+ kMkvContentEncoding = 0x6240,
+ kMkvContentEncodingOrder = 0x5031,
+ kMkvContentEncodingScope = 0x5032,
+ kMkvContentEncodingType = 0x5033,
+ kMkvContentCompression = 0x5034,
+ kMkvContentCompAlgo = 0x4254,
+ kMkvContentCompSettings = 0x4255,
+ kMkvContentEncryption = 0x5035,
+ kMkvContentEncAlgo = 0x47E1,
+ kMkvContentEncKeyID = 0x47E2,
+ kMkvContentSignature = 0x47E3,
+ kMkvContentSigKeyID = 0x47E4,
+ kMkvContentSigAlgo = 0x47E5,
+ kMkvContentSigHashAlgo = 0x47E6,
+ kMkvContentEncAESSettings = 0x47E7,
+ kMkvAESSettingsCipherMode = 0x47E8,
+ kMkvAESSettingsCipherInitData = 0x47E9,
+ // end ContentEncodings
+ // Cueing Data
+ kMkvCues = 0x1C53BB6B,
+ kMkvCuePoint = 0xBB,
+ kMkvCueTime = 0xB3,
+ kMkvCueTrackPositions = 0xB7,
+ kMkvCueTrack = 0xF7,
+ kMkvCueClusterPosition = 0xF1,
+ kMkvCueBlockNumber = 0x5378,
+ // Chapters
+ kMkvChapters = 0x1043A770,
+ kMkvEditionEntry = 0x45B9,
+ kMkvChapterAtom = 0xB6,
+ kMkvChapterUID = 0x73C4,
+ kMkvChapterStringUID = 0x5654,
+ kMkvChapterTimeStart = 0x91,
+ kMkvChapterTimeEnd = 0x92,
+ kMkvChapterDisplay = 0x80,
+ kMkvChapString = 0x85,
+ kMkvChapLanguage = 0x437C,
+ kMkvChapCountry = 0x437E,
+ // Tags
+ kMkvTags = 0x1254C367,
+ kMkvTag = 0x7373,
+ kMkvSimpleTag = 0x67C8,
+ kMkvTagName = 0x45A3,
+ kMkvTagString = 0x4487
+};
+
+} // namespace libwebm
+
+#endif // COMMON_WEBMIDS_H_
diff --git a/libwebm/mkvparser.cpp b/libwebm/mkvparser/mkvparser.cc
similarity index 87%
rename from libwebm/mkvparser.cpp
rename to libwebm/mkvparser/mkvparser.cc
index 651dc8f..ff13327 100644
--- a/libwebm/mkvparser.cpp
+++ b/libwebm/mkvparser/mkvparser.cc
@@ -5,26 +5,40 @@
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
+#include "mkvparser/mkvparser.h"
-#include "mkvparser.hpp"
+#if defined(_MSC_VER) && _MSC_VER < 1800
+#include <float.h> // _isnan() / _finite()
+#define MSC_COMPAT
+#endif
#include <cassert>
+#include <cfloat>
#include <climits>
#include <cmath>
#include <cstring>
+#include <memory>
#include <new>
-#ifdef _MSC_VER
-// Disable MSVC warnings that suggest making code non-portable.
-#pragma warning(disable : 4996)
-#endif
+#include "common/webmids.h"
namespace mkvparser {
+const float MasteringMetadata::kValueNotPresent = FLT_MAX;
+const long long Colour::kValueNotPresent = LLONG_MAX;
+
+#ifdef MSC_COMPAT
+inline bool isnan(double val) { return !!_isnan(val); }
+inline bool isinf(double val) { return !_finite(val); }
+#else
+inline bool isnan(double val) { return std::isnan(val); }
+inline bool isinf(double val) { return std::isinf(val); }
+#endif // MSC_COMPAT
IMkvReader::~IMkvReader() {}
-template<typename Type> Type* SafeArrayAlloc(unsigned long long num_elements,
- unsigned long long element_size) {
+template <typename Type>
+Type* SafeArrayAlloc(unsigned long long num_elements,
+ unsigned long long element_size) {
if (num_elements == 0 || element_size == 0)
return NULL;
@@ -32,8 +46,10 @@
const unsigned long long num_bytes = num_elements * element_size;
if (element_size > (kMaxAllocSize / num_elements))
return NULL;
+ if (num_bytes != static_cast<size_t>(num_bytes))
+ return NULL;
- return new (std::nothrow) Type[num_bytes];
+ return new (std::nothrow) Type[static_cast<size_t>(num_bytes)];
}
void GetVersion(int& major, int& minor, int& build, int& revision) {
@@ -92,14 +108,65 @@
return result;
}
+// Reads an EBML ID and returns it.
+// An ID must at least 1 byte long, cannot exceed 4, and its value must be
+// greater than 0.
+// See known EBML values and EBMLMaxIDLength:
+// http://www.matroska.org/technical/specs/index.html
+// Returns the ID, or a value less than 0 to report an error while reading the
+// ID.
long long ReadID(IMkvReader* pReader, long long pos, long& len) {
- const long long id = ReadUInt(pReader, pos, len);
- if (id < 0 || len < 1 || len > 4) {
- // An ID must be at least 1 byte long, and cannot exceed 4.
- // See EBMLMaxIDLength: http://www.matroska.org/technical/specs/index.html
+ if (pReader == NULL || pos < 0)
+ return E_FILE_FORMAT_INVALID;
+
+ // Read the first byte. The length in bytes of the ID is determined by
+ // finding the first set bit in the first byte of the ID.
+ unsigned char temp_byte = 0;
+ int read_status = pReader->Read(pos, 1, &temp_byte);
+
+ if (read_status < 0)
+ return E_FILE_FORMAT_INVALID;
+ else if (read_status > 0) // No data to read.
+ return E_BUFFER_NOT_FULL;
+
+ if (temp_byte == 0) // ID length > 8 bytes; invalid file.
+ return E_FILE_FORMAT_INVALID;
+
+ int bit_pos = 0;
+ const int kMaxIdLengthInBytes = 4;
+ const int kCheckByte = 0x80;
+
+ // Find the first bit that's set.
+ bool found_bit = false;
+ for (; bit_pos < kMaxIdLengthInBytes; ++bit_pos) {
+ if ((kCheckByte >> bit_pos) & temp_byte) {
+ found_bit = true;
+ break;
+ }
+ }
+
+ if (!found_bit) {
+ // The value is too large to be a valid ID.
return E_FILE_FORMAT_INVALID;
}
- return id;
+
+ // Read the remaining bytes of the ID (if any).
+ const int id_length = bit_pos + 1;
+ long long ebml_id = temp_byte;
+ for (int i = 1; i < id_length; ++i) {
+ ebml_id <<= 8;
+ read_status = pReader->Read(pos + i, 1, &temp_byte);
+
+ if (read_status < 0)
+ return E_FILE_FORMAT_INVALID;
+ else if (read_status > 0)
+ return E_BUFFER_NOT_FULL;
+
+ ebml_id |= temp_byte;
+ }
+
+ len = id_length;
+ return ebml_id;
}
long long GetUIntLength(IMkvReader* pReader, long long pos, long& len) {
@@ -214,7 +281,7 @@
result = d;
}
- if (std::isinf(result) || std::isnan(result))
+ if (mkvparser::isinf(result) || mkvparser::isnan(result))
return E_FILE_FORMAT_INVALID;
return 0;
@@ -269,7 +336,7 @@
unsigned char* const buf = reinterpret_cast<unsigned char*>(str);
- const long status = pReader->Read(pos, size, buf);
+ const long status = pReader->Read(pos, static_cast<long>(size), buf);
if (status) {
delete[] str;
@@ -282,9 +349,8 @@
return 0;
}
-long ParseElementHeader(IMkvReader* pReader, long long& pos,
- long long stop, long long& id,
- long long& size) {
+long ParseElementHeader(IMkvReader* pReader, long long& pos, long long stop,
+ long long& id, long long& size) {
if (stop >= 0 && pos >= stop)
return E_FILE_FORMAT_INVALID;
@@ -308,10 +374,10 @@
return E_FILE_FORMAT_INVALID;
}
- // Avoid rolling over pos when very close to LONG_LONG_MAX.
+ // Avoid rolling over pos when very close to LLONG_MAX.
const unsigned long long rollover_check =
static_cast<unsigned long long>(pos) + len;
- if (rollover_check > LONG_LONG_MAX)
+ if (rollover_check > LLONG_MAX)
return E_FILE_FORMAT_INVALID;
pos += len; // consume length of size
@@ -390,13 +456,13 @@
unsigned long long rollover_check =
static_cast<unsigned long long>(pos) + len;
- if (rollover_check > LONG_LONG_MAX)
+ if (rollover_check > LLONG_MAX)
return false;
pos += len; // consume length of size of payload
rollover_check = static_cast<unsigned long long>(pos) + size;
- if (rollover_check > LONG_LONG_MAX)
+ if (rollover_check > LLONG_MAX)
return false;
if ((pos + size) > available)
@@ -452,66 +518,45 @@
return status;
pos = 0;
- long long end = (available >= 1024) ? 1024 : available;
- for (;;) {
- unsigned char b = 0;
+ // Scan until we find what looks like the first byte of the EBML header.
+ const long long kMaxScanBytes = (available >= 1024) ? 1024 : available;
+ const unsigned char kEbmlByte0 = 0x1A;
+ unsigned char scan_byte = 0;
- while (pos < end) {
- status = pReader->Read(pos, 1, &b);
+ while (pos < kMaxScanBytes) {
+ status = pReader->Read(pos, 1, &scan_byte);
- if (status < 0) // error
- return status;
+ if (status < 0) // error
+ return status;
+ else if (status > 0)
+ return E_BUFFER_NOT_FULL;
- if (b == 0x1A)
- break;
-
- ++pos;
- }
-
- if (b != 0x1A) {
- if (pos >= 1024)
- return E_FILE_FORMAT_INVALID; // don't bother looking anymore
-
- if ((total >= 0) && ((total - available) < 5))
- return E_FILE_FORMAT_INVALID;
-
- return available + 5; // 5 = 4-byte ID + 1st byte of size
- }
-
- if ((total >= 0) && ((total - pos) < 5))
- return E_FILE_FORMAT_INVALID;
-
- if ((available - pos) < 5)
- return pos + 5; // try again later
-
- long len;
-
- const long long result = ReadUInt(pReader, pos, len);
-
- if (result < 0) // error
- return result;
-
- if (result == 0x0A45DFA3) { // EBML Header ID
- pos += len; // consume ID
+ if (scan_byte == kEbmlByte0)
break;
- }
- ++pos; // throw away just the 0x1A byte, and try again
+ ++pos;
}
- // pos designates start of size field
+ long len = 0;
+ const long long ebml_id = ReadID(pReader, pos, len);
- // get length of size field
+ if (ebml_id == E_BUFFER_NOT_FULL)
+ return E_BUFFER_NOT_FULL;
- long len;
+ if (len != 4 || ebml_id != libwebm::kMkvEBML)
+ return E_FILE_FORMAT_INVALID;
+
+ // Move read pos forward to the EBML header size field.
+ pos += 4;
+
+ // Read length of size field.
long long result = GetUIntLength(pReader, pos, len);
if (result < 0) // error
- return result;
-
- if (result > 0) // need more data
- return result;
+ return E_FILE_FORMAT_INVALID;
+ else if (result > 0) // need more data
+ return E_BUFFER_NOT_FULL;
if (len < 1 || len > 8)
return E_FILE_FORMAT_INVALID;
@@ -522,8 +567,7 @@
if ((available - pos) < len)
return pos + len; // try again later
- // get the EBML header size
-
+ // Read the EBML header size.
result = ReadUInt(pReader, pos, len);
if (result < 0) // error
@@ -539,7 +583,7 @@
if ((available - pos) < result)
return pos + result;
- end = pos + result;
+ const long long end = pos + result;
Init();
@@ -551,30 +595,30 @@
if (status < 0) // error
return status;
- if (size == 0) // weird
+ if (size == 0)
return E_FILE_FORMAT_INVALID;
- if (id == 0x0286) { // version
+ if (id == libwebm::kMkvEBMLVersion) {
m_version = UnserializeUInt(pReader, pos, size);
if (m_version <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x02F7) { // read version
+ } else if (id == libwebm::kMkvEBMLReadVersion) {
m_readVersion = UnserializeUInt(pReader, pos, size);
if (m_readVersion <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x02F2) { // max id length
+ } else if (id == libwebm::kMkvEBMLMaxIDLength) {
m_maxIdLength = UnserializeUInt(pReader, pos, size);
if (m_maxIdLength <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x02F3) { // max size length
+ } else if (id == libwebm::kMkvEBMLMaxSizeLength) {
m_maxSizeLength = UnserializeUInt(pReader, pos, size);
if (m_maxSizeLength <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x0282) { // doctype
+ } else if (id == libwebm::kMkvDocType) {
if (m_docType)
return E_FILE_FORMAT_INVALID;
@@ -582,12 +626,12 @@
if (status) // error
return status;
- } else if (id == 0x0287) { // doctype version
+ } else if (id == libwebm::kMkvDocTypeVersion) {
m_docTypeVersion = UnserializeUInt(pReader, pos, size);
if (m_docTypeVersion <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x0285) { // doctype read version
+ } else if (id == libwebm::kMkvDocTypeReadVersion) {
m_docTypeReadVersion = UnserializeUInt(pReader, pos, size);
if (m_docTypeReadVersion <= 0)
@@ -600,6 +644,15 @@
if (pos != end)
return E_FILE_FORMAT_INVALID;
+ // Make sure DocType, DocTypeReadVersion, and DocTypeVersion are valid.
+ if (m_docType == NULL || m_docTypeReadVersion <= 0 || m_docTypeVersion <= 0)
+ return E_FILE_FORMAT_INVALID;
+
+ // Make sure EBMLMaxIDLength and EBMLMaxSizeLength are valid.
+ if (m_maxIdLength <= 0 || m_maxIdLength > 4 || m_maxSizeLength <= 0 ||
+ m_maxSizeLength > 8)
+ return E_FILE_FORMAT_INVALID;
+
return 0;
}
@@ -732,7 +785,7 @@
// Handle "unknown size" for live streaming of webm files.
const long long unknown_size = (1LL << (7 * len)) - 1;
- if (id == 0x08538067) { // Segment ID
+ if (id == libwebm::kMkvSegment) {
if (size == unknown_size)
size = -1;
@@ -742,12 +795,9 @@
else if ((pos + size) > total)
size = -1;
- pSegment = new (std::nothrow) Segment(pReader, idpos,
- // elem_size
- pos, size);
-
- if (pSegment == 0)
- return -1; // generic error
+ pSegment = new (std::nothrow) Segment(pReader, idpos, pos, size);
+ if (pSegment == NULL)
+ return E_PARSE_FAILED;
return 0; // success
}
@@ -796,9 +846,9 @@
long long pos = m_pos;
const long long element_start = pos;
- // Avoid rolling over pos when very close to LONG_LONG_MAX.
+ // Avoid rolling over pos when very close to LLONG_MAX.
unsigned long long rollover_check = pos + 1ULL;
- if (rollover_check > LONG_LONG_MAX)
+ if (rollover_check > LLONG_MAX)
return E_FILE_FORMAT_INVALID;
if ((pos + 1) > available)
@@ -827,7 +877,7 @@
if (id < 0)
return E_FILE_FORMAT_INVALID;
- if (id == 0x0F43B675) // Cluster ID
+ if (id == libwebm::kMkvCluster)
break;
pos += len; // consume ID
@@ -862,9 +912,9 @@
pos += len; // consume length of size of element
- // Avoid rolling over pos when very close to LONG_LONG_MAX.
+ // Avoid rolling over pos when very close to LLONG_MAX.
rollover_check = static_cast<unsigned long long>(pos) + size;
- if (rollover_check > LONG_LONG_MAX)
+ if (rollover_check > LLONG_MAX)
return E_FILE_FORMAT_INVALID;
const long long element_size = size + pos - element_start;
@@ -879,7 +929,7 @@
if ((pos + size) > available)
return pos + size;
- if (id == 0x0549A966) { // Segment Info ID
+ if (id == libwebm::kMkvInfo) {
if (m_pInfo)
return E_FILE_FORMAT_INVALID;
@@ -893,7 +943,7 @@
if (status)
return status;
- } else if (id == 0x0654AE6B) { // Tracks ID
+ } else if (id == libwebm::kMkvTracks) {
if (m_pTracks)
return E_FILE_FORMAT_INVALID;
@@ -907,7 +957,7 @@
if (status)
return status;
- } else if (id == 0x0C53BB6B) { // Cues ID
+ } else if (id == libwebm::kMkvCues) {
if (m_pCues == NULL) {
m_pCues = new (std::nothrow)
Cues(this, pos, size, element_start, element_size);
@@ -915,7 +965,7 @@
if (m_pCues == NULL)
return -1;
}
- } else if (id == 0x014D9B74) { // SeekHead ID
+ } else if (id == libwebm::kMkvSeekHead) {
if (m_pSeekHead == NULL) {
m_pSeekHead = new (std::nothrow)
SeekHead(this, pos, size, element_start, element_size);
@@ -928,7 +978,7 @@
if (status)
return status;
}
- } else if (id == 0x0043A770) { // Chapters ID
+ } else if (id == libwebm::kMkvChapters) {
if (m_pChapters == NULL) {
m_pChapters = new (std::nothrow)
Chapters(this, pos, size, element_start, element_size);
@@ -941,7 +991,7 @@
if (status)
return status;
}
- } else if (id == 0x0254C367) { // Tags ID
+ } else if (id == libwebm::kMkvTags) {
if (m_pTags == NULL) {
m_pTags = new (std::nothrow)
Tags(this, pos, size, element_start, element_size);
@@ -1020,7 +1070,7 @@
if (result < 0) // error
return static_cast<long>(result);
- if (result > 0) // weird
+ if (result > 0)
return E_BUFFER_NOT_FULL;
if ((segment_stop >= 0) && ((pos + len) > segment_stop))
@@ -1049,7 +1099,7 @@
if (result < 0) // error
return static_cast<long>(result);
- if (result > 0) // weird
+ if (result > 0)
return E_BUFFER_NOT_FULL;
if ((segment_stop >= 0) && ((pos + len) > segment_stop))
@@ -1067,7 +1117,8 @@
// pos now points to start of payload
- if (size == 0) { // weird
+ if (size == 0) {
+ // Missing element payload: move on.
m_pos = pos;
continue;
}
@@ -1079,9 +1130,11 @@
return E_FILE_FORMAT_INVALID;
}
- if (id == 0x0C53BB6B) { // Cues ID
- if (size == unknown_size)
- return E_FILE_FORMAT_INVALID; // TODO: liberalize
+ if (id == libwebm::kMkvCues) {
+ if (size == unknown_size) {
+ // Cues element of unknown size: Not supported.
+ return E_FILE_FORMAT_INVALID;
+ }
if (m_pCues == NULL) {
const long long element_size = (pos - idpos) + size;
@@ -1095,9 +1148,12 @@
continue;
}
- if (id != 0x0F43B675) { // Cluster ID
+ if (id != libwebm::kMkvCluster) {
+ // Besides the Segment, Libwebm allows only cluster elements of unknown
+ // size. Fail the parse upon encountering a non-cluster element reporting
+ // unknown size.
if (size == unknown_size)
- return E_FILE_FORMAT_INVALID; // TODO: liberalize
+ return E_FILE_FORMAT_INVALID;
m_pos = pos + size; // consume payload
continue;
@@ -1336,14 +1392,14 @@
}
bool Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx) {
- assert(pCluster);
- assert(pCluster->m_index < 0);
- assert(idx >= m_clusterCount);
+ if (pCluster == NULL || pCluster->m_index >= 0 || idx < m_clusterCount)
+ return false;
const long count = m_clusterCount + m_clusterPreloadCount;
long& size = m_clusterSize;
- assert(size >= count);
+ if (size < count)
+ return false;
if (count >= size) {
const long n = (size <= 0) ? 2048 : 2 * size;
@@ -1365,17 +1421,20 @@
size = n;
}
- assert(m_clusters);
+ if (m_clusters == NULL)
+ return false;
Cluster** const p = m_clusters + idx;
Cluster** q = m_clusters + count;
- assert(q >= p);
- assert(q < (m_clusters + size));
+ if (q < p || q >= (m_clusters + size))
+ return false;
while (q > p) {
Cluster** const qq = q - 1;
- assert((*qq)->m_index < 0);
+
+ if ((*qq)->m_index >= 0)
+ return false;
*q = *qq;
q = qq;
@@ -1387,10 +1446,8 @@
}
long Segment::Load() {
- assert(m_clusters == NULL);
- assert(m_clusterSize == 0);
- assert(m_clusterCount == 0);
- // assert(m_size >= 0);
+ if (m_clusters != NULL || m_clusterSize != 0 || m_clusterCount != 0)
+ return E_PARSE_FAILED;
// Outermost (level 0) segment object has been constructed,
// and pos designates start of payload. We need to find the
@@ -1454,9 +1511,9 @@
if (status < 0) // error
return status;
- if (id == 0x0DBB) // SeekEntry ID
+ if (id == libwebm::kMkvSeek)
++entry_count;
- else if (id == 0x6C) // Void ID
+ else if (id == libwebm::kMkvVoid)
++void_element_count;
pos += size; // consume payload
@@ -1495,14 +1552,14 @@
if (status < 0) // error
return status;
- if (id == 0x0DBB) { // SeekEntry ID
+ if (id == libwebm::kMkvSeek) {
if (ParseEntry(pReader, pos, size, pEntry)) {
Entry& e = *pEntry++;
e.element_start = idpos;
e.element_size = (pos + size) - idpos;
}
- } else if (id == 0x6C) { // Void ID
+ } else if (id == libwebm::kMkvVoid) {
VoidElement& e = *pVoidElement++;
e.element_start = idpos;
@@ -1606,7 +1663,7 @@
const long long id = ReadID(m_pReader, idpos, len);
- if (id != 0x0C53BB6B) // Cues ID
+ if (id != libwebm::kMkvCues)
return E_FILE_FORMAT_INVALID;
pos += len; // consume ID
@@ -1688,7 +1745,7 @@
if (seekIdId < 0)
return false;
- if (seekIdId != 0x13AB) // SeekID ID
+ if (seekIdId != libwebm::kMkvSeekID)
return false;
if ((pos + len) > stop)
@@ -1730,9 +1787,9 @@
pos += seekIdSize; // consume SeekID payload
- const long long seekPosId = ReadUInt(pReader, pos, len);
+ const long long seekPosId = ReadID(pReader, pos, len);
- if (seekPosId != 0x13AC) // SeekPos ID
+ if (seekPosId != libwebm::kMkvSeekPosition)
return false;
if ((pos + len) > stop)
@@ -1842,7 +1899,7 @@
return false;
}
- if (id == 0x3B) { // CuePoint ID
+ if (id == libwebm::kMkvCuePoint) {
if (!PreloadCuePoint(cue_points_size, idpos))
return false;
}
@@ -1917,7 +1974,7 @@
if ((m_pos + size) > stop)
return false;
- if (id != 0x3B) { // CuePoint ID
+ if (id != libwebm::kMkvCuePoint) {
m_pos += size; // consume payload
if (m_pos > stop)
return false;
@@ -2047,8 +2104,8 @@
}
const CuePoint* Cues::GetNext(const CuePoint* pCurr) const {
- if (pCurr == NULL || pCurr->GetTimeCode() < 0 ||
- m_cue_points == NULL || m_count < 1) {
+ if (pCurr == NULL || pCurr->GetTimeCode() < 0 || m_cue_points == NULL ||
+ m_count < 1) {
return NULL;
}
@@ -2228,7 +2285,7 @@
long len;
const long long id = ReadID(pReader, pos_, len);
- if (id != 0x3B)
+ if (id != libwebm::kMkvCuePoint)
return false;
pos_ += len; // consume ID
@@ -2268,10 +2325,10 @@
return false;
}
- if (id == 0x33) // CueTime ID
+ if (id == libwebm::kMkvCueTime)
m_timecode = UnserializeUInt(pReader, pos, size);
- else if (id == 0x37) // CueTrackPosition(s) ID
+ else if (id == libwebm::kMkvCueTrackPositions)
++m_track_positions_count;
pos += size; // consume payload
@@ -2310,7 +2367,7 @@
pos += len; // consume Size field
assert((pos + size) <= stop);
- if (id == 0x37) { // CueTrackPosition(s) ID
+ if (id == libwebm::kMkvCueTrackPositions) {
TrackPosition& tp = *p++;
if (!tp.Parse(pReader, pos, size)) {
return false;
@@ -2359,13 +2416,11 @@
return false;
}
- if (id == 0x77) // CueTrack ID
+ if (id == libwebm::kMkvCueTrack)
m_track = UnserializeUInt(pReader, pos, size);
-
- else if (id == 0x71) // CueClusterPos ID
+ else if (id == libwebm::kMkvCueClusterPosition)
m_pos = UnserializeUInt(pReader, pos, size);
-
- else if (id == 0x1378) // CueBlockNumber
+ else if (id == libwebm::kMkvCueBlockNumber)
m_block = UnserializeUInt(pReader, pos, size);
pos += size; // consume payload
@@ -2499,7 +2554,7 @@
return NULL;
const long long id = ReadID(m_pReader, pos, len);
- if (id != 0x0F43B675) // Cluster ID
+ if (id != libwebm::kMkvCluster)
return NULL;
pos += len; // consume ID
@@ -2556,7 +2611,7 @@
if (size == 0) // weird
continue;
- if (id == 0x0F43B675) { // Cluster ID
+ if (id == libwebm::kMkvCluster) {
const long long off_next_ = idpos - m_start;
long long pos_;
@@ -2706,7 +2761,7 @@
const long long id = ReadUInt(m_pReader, pos, len);
- if (id != 0x0F43B675) // weird: not Cluster ID
+ if (id != libwebm::kMkvCluster)
return -1;
pos += len; // consume ID
@@ -2821,7 +2876,7 @@
const long long idpos = pos; // absolute
const long long idoff = pos - m_start; // relative
- const long long id = ReadUInt(m_pReader, idpos, len); // absolute
+ const long long id = ReadID(m_pReader, idpos, len); // absolute
if (id < 0) // error
return static_cast<long>(id);
@@ -2871,7 +2926,7 @@
return E_FILE_FORMAT_INVALID;
}
- if (id == 0x0C53BB6B) { // Cues ID
+ if (id == libwebm::kMkvCues) {
if (size == unknown_size)
return E_FILE_FORMAT_INVALID;
@@ -2897,7 +2952,7 @@
continue;
}
- if (id != 0x0F43B675) { // not a Cluster ID
+ if (id != libwebm::kMkvCluster) { // not a Cluster ID
if (size == unknown_size)
return E_FILE_FORMAT_INVALID;
@@ -3026,7 +3081,7 @@
return E_BUFFER_NOT_FULL;
const long long idpos = pos;
- const long long id = ReadUInt(m_pReader, idpos, len);
+ const long long id = ReadID(m_pReader, idpos, len);
if (id < 0) // error (or underflow)
return static_cast<long>(id);
@@ -3035,10 +3090,7 @@
// that we have exhausted the sub-element's inside the cluster
// whose ID we parsed earlier.
- if (id == 0x0F43B675) // Cluster ID
- break;
-
- if (id == 0x0C53BB6B) // Cues ID
+ if (id == libwebm::kMkvCluster || id == libwebm::kMkvCues)
break;
pos += len; // consume ID (of sub-element)
@@ -3206,7 +3258,7 @@
if (size == 0) // weird
continue;
- if (id == 0x05B9) { // EditionEntry ID
+ if (id == libwebm::kMkvEditionEntry) {
status = ParseEdition(pos, size);
if (status < 0) // error
@@ -3319,10 +3371,10 @@
if (status < 0) // error
return status;
- if (size == 0) // weird
+ if (size == 0)
continue;
- if (id == 0x36) { // Atom ID
+ if (id == libwebm::kMkvChapterAtom) {
status = ParseAtom(pReader, pos, size);
if (status < 0) // error
@@ -3452,20 +3504,20 @@
if (status < 0) // error
return status;
- if (size == 0) // weird
+ if (size == 0) // 0 length payload, skip.
continue;
- if (id == 0x00) { // Display ID
+ if (id == libwebm::kMkvChapterDisplay) {
status = ParseDisplay(pReader, pos, size);
if (status < 0) // error
return status;
- } else if (id == 0x1654) { // StringUID ID
+ } else if (id == libwebm::kMkvChapterStringUID) {
status = UnserializeString(pReader, pos, size, m_string_uid);
if (status < 0) // error
return status;
- } else if (id == 0x33C4) { // UID ID
+ } else if (id == libwebm::kMkvChapterUID) {
long long val;
status = UnserializeInt(pReader, pos, size, val);
@@ -3473,14 +3525,14 @@
return status;
m_uid = static_cast<unsigned long long>(val);
- } else if (id == 0x11) { // TimeStart ID
+ } else if (id == libwebm::kMkvChapterTimeStart) {
const long long val = UnserializeUInt(pReader, pos, size);
if (val < 0) // error
return static_cast<long>(val);
m_start_timecode = val;
- } else if (id == 0x12) { // TimeEnd ID
+ } else if (id == libwebm::kMkvChapterTimeEnd) {
const long long val = UnserializeUInt(pReader, pos, size);
if (val < 0) // error
@@ -3605,20 +3657,20 @@
if (status < 0) // error
return status;
- if (size == 0) // weird
+ if (size == 0) // No payload.
continue;
- if (id == 0x05) { // ChapterString ID
+ if (id == libwebm::kMkvChapString) {
status = UnserializeString(pReader, pos, size, m_string);
if (status)
return status;
- } else if (id == 0x037C) { // ChapterLanguage ID
+ } else if (id == libwebm::kMkvChapLanguage) {
status = UnserializeString(pReader, pos, size, m_language);
if (status)
return status;
- } else if (id == 0x037E) { // ChapterCountry ID
+ } else if (id == libwebm::kMkvChapCountry) {
status = UnserializeString(pReader, pos, size, m_country);
if (status)
@@ -3671,7 +3723,7 @@
if (size == 0) // 0 length tag, read another
continue;
- if (id == 0x3373) { // Tag ID
+ if (id == libwebm::kMkvTag) {
status = ParseTag(pos, size);
if (status < 0)
@@ -3787,7 +3839,7 @@
if (size == 0) // 0 length tag, read another
continue;
- if (id == 0x27C8) { // SimpleTag ID
+ if (id == libwebm::kMkvSimpleTag) {
status = ParseSimpleTag(pReader, pos, size);
if (status < 0)
@@ -3878,12 +3930,12 @@
if (size == 0) // weird
continue;
- if (id == 0x5A3) { // TagName ID
+ if (id == libwebm::kMkvTagName) {
status = UnserializeString(pReader, pos, size, m_tag_name);
if (status)
return status;
- } else if (id == 0x487) { // TagString ID
+ } else if (id == libwebm::kMkvTagString) {
status = UnserializeString(pReader, pos, size, m_tag_string);
if (status)
@@ -3943,12 +3995,12 @@
if (status < 0) // error
return status;
- if (id == 0x0AD7B1) { // Timecode Scale
+ if (id == libwebm::kMkvTimecodeScale) {
m_timecodeScale = UnserializeUInt(pReader, pos, size);
if (m_timecodeScale <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x0489) { // Segment duration
+ } else if (id == libwebm::kMkvDuration) {
const long status = UnserializeFloat(pReader, pos, size, m_duration);
if (status < 0)
@@ -3956,19 +4008,19 @@
if (m_duration < 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x0D80) { // MuxingApp
+ } else if (id == libwebm::kMkvMuxingApp) {
const long status =
UnserializeString(pReader, pos, size, m_pMuxingAppAsUTF8);
if (status)
return status;
- } else if (id == 0x1741) { // WritingApp
+ } else if (id == libwebm::kMkvWritingApp) {
const long status =
UnserializeString(pReader, pos, size, m_pWritingAppAsUTF8);
if (status)
return status;
- } else if (id == 0x3BA9) { // Title
+ } else if (id == libwebm::kMkvTitle) {
const long status = UnserializeString(pReader, pos, size, m_pTitleAsUTF8);
if (status)
@@ -3982,7 +4034,7 @@
}
const double rollover_check = m_duration * m_timecodeScale;
- if (rollover_check > LONG_LONG_MAX)
+ if (rollover_check > LLONG_MAX)
return E_FILE_FORMAT_INVALID;
if (pos != stop)
@@ -4123,8 +4175,7 @@
if (status < 0) // error
return status;
- if (id == 0x7E8) {
- // AESSettingsCipherMode
+ if (id == libwebm::kMkvAESSettingsCipherMode) {
aes->cipher_mode = UnserializeUInt(pReader, pos, size);
if (aes->cipher_mode != 1)
return E_FILE_FORMAT_INVALID;
@@ -4155,10 +4206,10 @@
if (status < 0) // error
return status;
- if (id == 0x1034) // ContentCompression ID
+ if (id == libwebm::kMkvContentCompression)
++compression_count;
- if (id == 0x1035) // ContentEncryption ID
+ if (id == libwebm::kMkvContentEncryption)
++encryption_count;
pos += size; // consume payload
@@ -4194,19 +4245,15 @@
if (status < 0) // error
return status;
- if (id == 0x1031) {
- // ContentEncodingOrder
+ if (id == libwebm::kMkvContentEncodingOrder) {
encoding_order_ = UnserializeUInt(pReader, pos, size);
- } else if (id == 0x1032) {
- // ContentEncodingScope
+ } else if (id == libwebm::kMkvContentEncodingScope) {
encoding_scope_ = UnserializeUInt(pReader, pos, size);
if (encoding_scope_ < 1)
return -1;
- } else if (id == 0x1033) {
- // ContentEncodingType
+ } else if (id == libwebm::kMkvContentEncodingType) {
encoding_type_ = UnserializeUInt(pReader, pos, size);
- } else if (id == 0x1034) {
- // ContentCompression ID
+ } else if (id == libwebm::kMkvContentCompression) {
ContentCompression* const compression =
new (std::nothrow) ContentCompression();
if (!compression)
@@ -4218,8 +4265,7 @@
return status;
}
*compression_entries_end_++ = compression;
- } else if (id == 0x1035) {
- // ContentEncryption ID
+ } else if (id == libwebm::kMkvContentEncryption) {
ContentEncryption* const encryption =
new (std::nothrow) ContentEncryption();
if (!encryption)
@@ -4260,15 +4306,13 @@
if (status < 0) // error
return status;
- if (id == 0x254) {
- // ContentCompAlgo
+ if (id == libwebm::kMkvContentCompAlgo) {
long long algo = UnserializeUInt(pReader, pos, size);
if (algo < 0)
return E_FILE_FORMAT_INVALID;
compression->algo = algo;
valid = true;
- } else if (id == 0x255) {
- // ContentCompSettings
+ } else if (id == libwebm::kMkvContentCompSettings) {
if (size <= 0)
return E_FILE_FORMAT_INVALID;
@@ -4315,13 +4359,11 @@
if (status < 0) // error
return status;
- if (id == 0x7E1) {
- // ContentEncAlgo
+ if (id == libwebm::kMkvContentEncAlgo) {
encryption->algo = UnserializeUInt(pReader, pos, size);
if (encryption->algo != 5)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x7E2) {
- // ContentEncKeyID
+ } else if (id == libwebm::kMkvContentEncKeyID) {
delete[] encryption->key_id;
encryption->key_id = NULL;
encryption->key_id_len = 0;
@@ -4343,8 +4385,7 @@
encryption->key_id = buf;
encryption->key_id_len = buflen;
- } else if (id == 0x7E3) {
- // ContentSignature
+ } else if (id == libwebm::kMkvContentSignature) {
delete[] encryption->signature;
encryption->signature = NULL;
encryption->signature_len = 0;
@@ -4366,8 +4407,7 @@
encryption->signature = buf;
encryption->signature_len = buflen;
- } else if (id == 0x7E4) {
- // ContentSigKeyID
+ } else if (id == libwebm::kMkvContentSigKeyID) {
delete[] encryption->sig_key_id;
encryption->sig_key_id = NULL;
encryption->sig_key_id_len = 0;
@@ -4389,14 +4429,11 @@
encryption->sig_key_id = buf;
encryption->sig_key_id_len = buflen;
- } else if (id == 0x7E5) {
- // ContentSigAlgo
+ } else if (id == libwebm::kMkvContentSigAlgo) {
encryption->sig_algo = UnserializeUInt(pReader, pos, size);
- } else if (id == 0x7E6) {
- // ContentSigHashAlgo
+ } else if (id == libwebm::kMkvContentSigHashAlgo) {
encryption->sig_hash_algo = UnserializeUInt(pReader, pos, size);
- } else if (id == 0x7E7) {
- // ContentEncAESSettings
+ } else if (id == libwebm::kMkvContentEncAESSettings) {
const long status = ParseContentEncAESSettingsEntry(
pos, size, pReader, &encryption->aes_settings);
if (status)
@@ -4883,7 +4920,7 @@
return status;
// pos now designates start of element
- if (id == 0x2240) // ContentEncoding ID
+ if (id == libwebm::kMkvContentEncoding)
++count;
pos += size; // consume payload
@@ -4908,7 +4945,7 @@
return status;
// pos now designates start of element
- if (id == 0x2240) { // ContentEncoding ID
+ if (id == libwebm::kMkvContentEncoding) {
ContentEncoding* const content_encoding =
new (std::nothrow) ContentEncoding();
if (!content_encoding)
@@ -4940,9 +4977,222 @@
const Block* Track::EOSBlock::GetBlock() const { return NULL; }
+bool PrimaryChromaticity::Parse(IMkvReader* reader, long long read_pos,
+ long long value_size, bool is_x,
+ PrimaryChromaticity** chromaticity) {
+ if (!reader)
+ return false;
+
+ std::auto_ptr<PrimaryChromaticity> chromaticity_ptr;
+
+ if (!*chromaticity) {
+ chromaticity_ptr.reset(new PrimaryChromaticity());
+ } else {
+ chromaticity_ptr.reset(*chromaticity);
+ }
+
+ if (!chromaticity_ptr.get())
+ return false;
+
+ float* value = is_x ? &chromaticity_ptr->x : &chromaticity_ptr->y;
+
+ double parser_value = 0;
+ const long long value_parse_status =
+ UnserializeFloat(reader, read_pos, value_size, parser_value);
+
+ *value = static_cast<float>(parser_value);
+
+ if (value_parse_status < 0 || *value < 0.0 || *value > 1.0)
+ return false;
+
+ *chromaticity = chromaticity_ptr.release();
+ return true;
+}
+
+bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start,
+ long long mm_size, MasteringMetadata** mm) {
+ if (!reader || *mm)
+ return false;
+
+ std::auto_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata());
+ if (!mm_ptr.get())
+ return false;
+
+ const long long mm_end = mm_start + mm_size;
+ long long read_pos = mm_start;
+
+ while (read_pos < mm_end) {
+ long long child_id = 0;
+ long long child_size = 0;
+
+ const long long status =
+ ParseElementHeader(reader, read_pos, mm_end, child_id, child_size);
+ if (status < 0)
+ return false;
+
+ if (child_id == libwebm::kMkvLuminanceMax) {
+ double value = 0;
+ const long long value_parse_status =
+ UnserializeFloat(reader, read_pos, child_size, value);
+ mm_ptr->luminance_max = static_cast<float>(value);
+ if (value_parse_status < 0 || mm_ptr->luminance_max < 0.0 ||
+ mm_ptr->luminance_max > 9999.99) {
+ return false;
+ }
+ } else if (child_id == libwebm::kMkvLuminanceMin) {
+ double value = 0;
+ const long long value_parse_status =
+ UnserializeFloat(reader, read_pos, child_size, value);
+ mm_ptr->luminance_min = static_cast<float>(value);
+ if (value_parse_status < 0 || mm_ptr->luminance_min < 0.0 ||
+ mm_ptr->luminance_min > 999.9999) {
+ return false;
+ }
+ } else {
+ bool is_x = false;
+ PrimaryChromaticity** chromaticity;
+ switch (child_id) {
+ case libwebm::kMkvPrimaryRChromaticityX:
+ case libwebm::kMkvPrimaryRChromaticityY:
+ is_x = child_id == libwebm::kMkvPrimaryRChromaticityX;
+ chromaticity = &mm_ptr->r;
+ break;
+ case libwebm::kMkvPrimaryGChromaticityX:
+ case libwebm::kMkvPrimaryGChromaticityY:
+ is_x = child_id == libwebm::kMkvPrimaryGChromaticityX;
+ chromaticity = &mm_ptr->g;
+ break;
+ case libwebm::kMkvPrimaryBChromaticityX:
+ case libwebm::kMkvPrimaryBChromaticityY:
+ is_x = child_id == libwebm::kMkvPrimaryBChromaticityX;
+ chromaticity = &mm_ptr->b;
+ break;
+ case libwebm::kMkvWhitePointChromaticityX:
+ case libwebm::kMkvWhitePointChromaticityY:
+ is_x = child_id == libwebm::kMkvWhitePointChromaticityX;
+ chromaticity = &mm_ptr->white_point;
+ break;
+ default:
+ return false;
+ }
+ const bool value_parse_status = PrimaryChromaticity::Parse(
+ reader, read_pos, child_size, is_x, chromaticity);
+ if (!value_parse_status)
+ return false;
+ }
+
+ read_pos += child_size;
+ if (read_pos > mm_end)
+ return false;
+ }
+
+ *mm = mm_ptr.release();
+ return true;
+}
+
+bool Colour::Parse(IMkvReader* reader, long long colour_start,
+ long long colour_size, Colour** colour) {
+ if (!reader || *colour)
+ return false;
+
+ std::auto_ptr<Colour> colour_ptr(new Colour());
+ if (!colour_ptr.get())
+ return false;
+
+ const long long colour_end = colour_start + colour_size;
+ long long read_pos = colour_start;
+
+ while (read_pos < colour_end) {
+ long long child_id = 0;
+ long long child_size = 0;
+
+ const long status =
+ ParseElementHeader(reader, read_pos, colour_end, child_id, child_size);
+ if (status < 0)
+ return false;
+
+ if (child_id == libwebm::kMkvMatrixCoefficients) {
+ colour_ptr->matrix_coefficients =
+ UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->matrix_coefficients < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvBitsPerChannel) {
+ colour_ptr->bits_per_channel =
+ UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->bits_per_channel < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvChromaSubsamplingHorz) {
+ colour_ptr->chroma_subsampling_horz =
+ UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->chroma_subsampling_horz < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvChromaSubsamplingVert) {
+ colour_ptr->chroma_subsampling_vert =
+ UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->chroma_subsampling_vert < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvCbSubsamplingHorz) {
+ colour_ptr->cb_subsampling_horz =
+ UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->cb_subsampling_horz < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvCbSubsamplingVert) {
+ colour_ptr->cb_subsampling_vert =
+ UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->cb_subsampling_vert < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvChromaSitingHorz) {
+ colour_ptr->chroma_siting_horz =
+ UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->chroma_siting_horz < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvChromaSitingVert) {
+ colour_ptr->chroma_siting_vert =
+ UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->chroma_siting_vert < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvRange) {
+ colour_ptr->range = UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->range < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvTransferCharacteristics) {
+ colour_ptr->transfer_characteristics =
+ UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->transfer_characteristics < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvPrimaries) {
+ colour_ptr->primaries = UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->primaries < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvMaxCLL) {
+ colour_ptr->max_cll = UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->max_cll < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvMaxFALL) {
+ colour_ptr->max_fall = UnserializeUInt(reader, read_pos, child_size);
+ if (colour_ptr->max_fall < 0)
+ return false;
+ } else if (child_id == libwebm::kMkvMasteringMetadata) {
+ if (!MasteringMetadata::Parse(reader, read_pos, child_size,
+ &colour_ptr->mastering_metadata))
+ return false;
+ } else {
+ return false;
+ }
+
+ read_pos += child_size;
+ if (read_pos > colour_end)
+ return false;
+ }
+ *colour = colour_ptr.release();
+ return true;
+}
+
VideoTrack::VideoTrack(Segment* pSegment, long long element_start,
long long element_size)
- : Track(pSegment, element_start, element_size) {}
+ : Track(pSegment, element_start, element_size), m_colour(NULL) {}
+
+VideoTrack::~VideoTrack() { delete m_colour; }
long VideoTrack::Parse(Segment* pSegment, const Info& info,
long long element_start, long long element_size,
@@ -4973,6 +5223,8 @@
const long long stop = pos + s.size;
+ Colour* colour = NULL;
+
while (pos < stop) {
long long id, size;
@@ -4981,37 +5233,37 @@
if (status < 0) // error
return status;
- if (id == 0x30) { // pixel width
+ if (id == libwebm::kMkvPixelWidth) {
width = UnserializeUInt(pReader, pos, size);
if (width <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x3A) { // pixel height
+ } else if (id == libwebm::kMkvPixelHeight) {
height = UnserializeUInt(pReader, pos, size);
if (height <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x14B0) { // display width
+ } else if (id == libwebm::kMkvDisplayWidth) {
display_width = UnserializeUInt(pReader, pos, size);
if (display_width <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x14BA) { // display height
+ } else if (id == libwebm::kMkvDisplayHeight) {
display_height = UnserializeUInt(pReader, pos, size);
if (display_height <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x14B2) { // display unit
+ } else if (id == libwebm::kMkvDisplayUnit) {
display_unit = UnserializeUInt(pReader, pos, size);
if (display_unit < 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x13B8) { // stereo mode
+ } else if (id == libwebm::kMkvStereoMode) {
stereo_mode = UnserializeUInt(pReader, pos, size);
if (stereo_mode < 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x0383E3) { // frame rate
+ } else if (id == libwebm::kMkvFrameRate) {
const long status = UnserializeFloat(pReader, pos, size, rate);
if (status < 0)
@@ -5019,6 +5271,9 @@
if (rate <= 0)
return E_FILE_FORMAT_INVALID;
+ } else if (id == libwebm::kMkvColour) {
+ if (!Colour::Parse(pReader, pos, size, &colour))
+ return E_FILE_FORMAT_INVALID;
}
pos += size; // consume payload
@@ -5049,6 +5304,7 @@
pTrack->m_display_unit = display_unit;
pTrack->m_stereo_mode = stereo_mode;
pTrack->m_rate = rate;
+ pTrack->m_colour = colour;
pResult = pTrack;
return 0; // success
@@ -5147,6 +5403,8 @@
return 0;
}
+Colour* VideoTrack::GetColour() const { return m_colour; }
+
long long VideoTrack::GetWidth() const { return m_width; }
long long VideoTrack::GetHeight() const { return m_height; }
@@ -5201,7 +5459,7 @@
if (status < 0) // error
return status;
- if (id == 0x35) { // Sample Rate
+ if (id == libwebm::kMkvSamplingFrequency) {
status = UnserializeFloat(pReader, pos, size, rate);
if (status < 0)
@@ -5209,12 +5467,12 @@
if (rate <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x1F) { // Channel Count
+ } else if (id == libwebm::kMkvChannels) {
channels = UnserializeUInt(pReader, pos, size);
if (channels <= 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x2264) { // Bit Depth
+ } else if (id == libwebm::kMkvBitDepth) {
bit_depth = UnserializeUInt(pReader, pos, size);
if (bit_depth <= 0)
@@ -5287,7 +5545,7 @@
if (size == 0) // weird
continue;
- if (id == 0x2E) // TrackEntry ID
+ if (id == libwebm::kMkvTrackEntry)
++count;
pos += size; // consume payload
@@ -5329,13 +5587,12 @@
const long long element_size = payload_stop - element_start;
- if (id == 0x2E) { // TrackEntry ID
+ if (id == libwebm::kMkvTrackEntry) {
Track*& pTrack = *m_trackEntriesEnd;
pTrack = NULL;
const long status = ParseTrackEntry(pos, payload_size, element_start,
element_size, pTrack);
-
if (status)
return status;
@@ -5406,16 +5663,16 @@
const long long start = pos;
- if (id == 0x60) { // VideoSettings ID
+ if (id == libwebm::kMkvVideo) {
v.start = start;
v.size = size;
- } else if (id == 0x61) { // AudioSettings ID
+ } else if (id == libwebm::kMkvAudio) {
a.start = start;
a.size = size;
- } else if (id == 0x2D80) { // ContentEncodings ID
+ } else if (id == libwebm::kMkvContentEncodings) {
e.start = start;
e.size = size;
- } else if (id == 0x33C5) { // Track UID
+ } else if (id == libwebm::kMkvTrackUID) {
if (size > 8)
return E_FILE_FORMAT_INVALID;
@@ -5437,49 +5694,49 @@
++pos_;
}
- } else if (id == 0x57) { // Track Number
+ } else if (id == libwebm::kMkvTrackNumber) {
const long long num = UnserializeUInt(pReader, pos, size);
if ((num <= 0) || (num > 127))
return E_FILE_FORMAT_INVALID;
info.number = static_cast<long>(num);
- } else if (id == 0x03) { // Track Type
+ } else if (id == libwebm::kMkvTrackType) {
const long long type = UnserializeUInt(pReader, pos, size);
if ((type <= 0) || (type > 254))
return E_FILE_FORMAT_INVALID;
info.type = static_cast<long>(type);
- } else if (id == 0x136E) { // Track Name
+ } else if (id == libwebm::kMkvName) {
const long status =
UnserializeString(pReader, pos, size, info.nameAsUTF8);
if (status)
return status;
- } else if (id == 0x02B59C) { // Track Language
+ } else if (id == libwebm::kMkvLanguage) {
const long status = UnserializeString(pReader, pos, size, info.language);
if (status)
return status;
- } else if (id == 0x03E383) { // Default Duration
+ } else if (id == libwebm::kMkvDefaultDuration) {
const long long duration = UnserializeUInt(pReader, pos, size);
if (duration < 0)
return E_FILE_FORMAT_INVALID;
info.defaultDuration = static_cast<unsigned long long>(duration);
- } else if (id == 0x06) { // CodecID
+ } else if (id == libwebm::kMkvCodecID) {
const long status = UnserializeString(pReader, pos, size, info.codecId);
if (status)
return status;
- } else if (id == 0x1C) { // lacing
+ } else if (id == libwebm::kMkvFlagLacing) {
lacing = UnserializeUInt(pReader, pos, size);
if ((lacing < 0) || (lacing > 1))
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x23A2) { // Codec Private
+ } else if (id == libwebm::kMkvCodecPrivate) {
delete[] info.codecPrivate;
info.codecPrivate = NULL;
info.codecPrivateSize = 0;
@@ -5502,15 +5759,15 @@
info.codecPrivate = buf;
info.codecPrivateSize = buflen;
}
- } else if (id == 0x058688) { // Codec Name
+ } else if (id == libwebm::kMkvCodecName) {
const long status =
UnserializeString(pReader, pos, size, info.codecNameAsUTF8);
if (status)
return status;
- } else if (id == 0x16AA) { // Codec Delay
+ } else if (id == libwebm::kMkvCodecDelay) {
info.codecDelay = UnserializeUInt(pReader, pos, size);
- } else if (id == 0x16BB) { // Seek Pre Roll
+ } else if (id == libwebm::kMkvSeekPreRoll) {
info.seekPreRoll = UnserializeUInt(pReader, pos, size);
}
@@ -5649,98 +5906,88 @@
}
long Cluster::Load(long long& pos, long& len) const {
- assert(m_pSegment);
- assert(m_pos >= m_element_start);
+ if (m_pSegment == NULL)
+ return E_PARSE_FAILED;
if (m_timecode >= 0) // at least partially loaded
return 0;
- assert(m_pos == m_element_start);
- assert(m_element_size < 0);
+ if (m_pos != m_element_start || m_element_size >= 0)
+ return E_PARSE_FAILED;
IMkvReader* const pReader = m_pSegment->m_pReader;
-
long long total, avail;
-
const int status = pReader->Length(&total, &avail);
if (status < 0) // error
return status;
- assert((total < 0) || (avail <= total));
- assert((total < 0) || (m_pos <= total)); // TODO: verify this
+ if (total >= 0 && (avail > total || m_pos > total))
+ return E_FILE_FORMAT_INVALID;
pos = m_pos;
long long cluster_size = -1;
- {
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- long long result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error or underflow
- return static_cast<long>(result);
-
- if (result > 0) // underflow (weird)
- return E_BUFFER_NOT_FULL;
-
- // if ((pos + len) > segment_stop)
- // return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long id_ = ReadUInt(pReader, pos, len);
-
- if (id_ < 0) // error
- return static_cast<long>(id_);
-
- if (id_ != 0x0F43B675) // Cluster ID
- return E_FILE_FORMAT_INVALID;
-
- pos += len; // consume id
-
- // read cluster size
-
- if ((pos + 1) > avail) {
- len = 1;
- return E_BUFFER_NOT_FULL;
- }
-
- result = GetUIntLength(pReader, pos, len);
-
- if (result < 0) // error
- return static_cast<long>(result);
-
- if (result > 0) // weird
- return E_BUFFER_NOT_FULL;
-
- // if ((pos + len) > segment_stop)
- // return E_FILE_FORMAT_INVALID;
-
- if ((pos + len) > avail)
- return E_BUFFER_NOT_FULL;
-
- const long long size = ReadUInt(pReader, pos, len);
-
- if (size < 0) // error
- return static_cast<long>(cluster_size);
-
- if (size == 0)
- return E_FILE_FORMAT_INVALID; // TODO: verify this
-
- pos += len; // consume length of size of element
-
- const long long unknown_size = (1LL << (7 * len)) - 1;
-
- if (size != unknown_size)
- cluster_size = size;
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
}
+ long long result = GetUIntLength(pReader, pos, len);
+
+ if (result < 0) // error or underflow
+ return static_cast<long>(result);
+
+ if (result > 0)
+ return E_BUFFER_NOT_FULL;
+
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
+
+ const long long id_ = ReadID(pReader, pos, len);
+
+ if (id_ < 0) // error
+ return static_cast<long>(id_);
+
+ if (id_ != libwebm::kMkvCluster)
+ return E_FILE_FORMAT_INVALID;
+
+ pos += len; // consume id
+
+ // read cluster size
+
+ if ((pos + 1) > avail) {
+ len = 1;
+ return E_BUFFER_NOT_FULL;
+ }
+
+ result = GetUIntLength(pReader, pos, len);
+
+ if (result < 0) // error
+ return static_cast<long>(result);
+
+ if (result > 0)
+ return E_BUFFER_NOT_FULL;
+
+ if ((pos + len) > avail)
+ return E_BUFFER_NOT_FULL;
+
+ const long long size = ReadUInt(pReader, pos, len);
+
+ if (size < 0) // error
+ return static_cast<long>(cluster_size);
+
+ if (size == 0)
+ return E_FILE_FORMAT_INVALID;
+
+ pos += len; // consume length of size of element
+
+ const long long unknown_size = (1LL << (7 * len)) - 1;
+
+ if (size != unknown_size)
+ cluster_size = size;
+
// pos points to start of payload
long long timecode = -1;
long long new_pos = -1;
@@ -5764,7 +6011,7 @@
if (result < 0) // error
return static_cast<long>(result);
- if (result > 0) // weird
+ if (result > 0)
return E_BUFFER_NOT_FULL;
if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
@@ -5773,7 +6020,7 @@
if ((pos + len) > avail)
return E_BUFFER_NOT_FULL;
- const long long id = ReadUInt(pReader, pos, len);
+ const long long id = ReadID(pReader, pos, len);
if (id < 0) // error
return static_cast<long>(id);
@@ -5785,10 +6032,10 @@
// that we have exhausted the sub-element's inside the cluster
// whose ID we parsed earlier.
- if (id == 0x0F43B675) // Cluster ID
+ if (id == libwebm::kMkvCluster)
break;
- if (id == 0x0C53BB6B) // Cues ID
+ if (id == libwebm::kMkvCues)
break;
pos += len; // consume ID field
@@ -5805,7 +6052,7 @@
if (result < 0) // error
return static_cast<long>(result);
- if (result > 0) // weird
+ if (result > 0)
return E_BUFFER_NOT_FULL;
if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
@@ -5831,13 +6078,13 @@
// pos now points to start of payload
- if (size == 0) // weird
+ if (size == 0)
continue;
if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
return E_FILE_FORMAT_INVALID;
- if (id == 0x67) { // TimeCode ID
+ if (id == libwebm::kMkvTimecode) {
len = static_cast<long>(size);
if ((pos + size) > avail)
@@ -5852,10 +6099,10 @@
if (bBlock)
break;
- } else if (id == 0x20) { // BlockGroup ID
+ } else if (id == libwebm::kMkvBlockGroup) {
bBlock = true;
break;
- } else if (id == 0x23) { // SimpleBlock ID
+ } else if (id == libwebm::kMkvSimpleBlock) {
bBlock = true;
break;
}
@@ -5889,10 +6136,8 @@
if (status < 0)
return status;
- assert(m_pos >= m_element_start);
- assert(m_timecode >= 0);
- // assert(m_size > 0);
- // assert(m_element_size > m_size);
+ if (m_pos < m_element_start || m_timecode < 0)
+ return E_PARSE_FAILED;
const long long cluster_stop =
(m_element_size < 0) ? -1 : m_element_start + m_element_size;
@@ -5909,7 +6154,8 @@
if (status < 0) // error
return status;
- assert((total < 0) || (avail <= total));
+ if (total >= 0 && avail > total)
+ return E_FILE_FORMAT_INVALID;
pos = m_pos;
@@ -5936,7 +6182,7 @@
if (result < 0) // error
return static_cast<long>(result);
- if (result > 0) // weird
+ if (result > 0)
return E_BUFFER_NOT_FULL;
if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
@@ -5945,19 +6191,16 @@
if ((pos + len) > avail)
return E_BUFFER_NOT_FULL;
- const long long id = ReadUInt(pReader, pos, len);
+ const long long id = ReadID(pReader, pos, len);
- if (id < 0) // error
- return static_cast<long>(id);
-
- if (id == 0) // weird
+ if (id < 0)
return E_FILE_FORMAT_INVALID;
// This is the distinguished set of ID's we use to determine
// that we have exhausted the sub-element's inside the cluster
// whose ID we parsed earlier.
- if ((id == 0x0F43B675) || (id == 0x0C53BB6B)) { // Cluster or Cues ID
+ if ((id == libwebm::kMkvCluster) || (id == libwebm::kMkvCues)) {
if (m_element_size < 0)
m_element_size = pos - m_element_start;
@@ -5978,7 +6221,7 @@
if (result < 0) // error
return static_cast<long>(result);
- if (result > 0) // weird
+ if (result > 0)
return E_BUFFER_NOT_FULL;
if ((cluster_stop >= 0) && ((pos + len) > cluster_stop))
@@ -6004,7 +6247,7 @@
// pos now points to start of payload
- if (size == 0) // weird
+ if (size == 0)
continue;
// const long long block_start = pos;
@@ -6012,8 +6255,9 @@
if (cluster_stop >= 0) {
if (block_stop > cluster_stop) {
- if ((id == 0x20) || (id == 0x23))
+ if (id == libwebm::kMkvBlockGroup || id == libwebm::kMkvSimpleBlock) {
return E_FILE_FORMAT_INVALID;
+ }
pos = cluster_stop;
break;
@@ -6029,10 +6273,10 @@
Cluster* const this_ = const_cast<Cluster*>(this);
- if (id == 0x20) // BlockGroup
+ if (id == libwebm::kMkvBlockGroup)
return this_->ParseBlockGroup(size, pos, len);
- if (id == 0x23) // SimpleBlock
+ if (id == libwebm::kMkvSimpleBlock)
return this_->ParseSimpleBlock(size, pos, len);
pos += size; // consume payload
@@ -6040,7 +6284,8 @@
return E_FILE_FORMAT_INVALID;
}
- assert(m_element_size > 0);
+ if (m_element_size < 1)
+ return E_FILE_FORMAT_INVALID;
m_pos = pos;
if (cluster_stop >= 0 && m_pos > cluster_stop)
@@ -6050,23 +6295,26 @@
const long idx = m_entries_count - 1;
const BlockEntry* const pLast = m_entries[idx];
- assert(pLast);
+ if (pLast == NULL)
+ return E_PARSE_FAILED;
const Block* const pBlock = pLast->GetBlock();
- assert(pBlock);
+ if (pBlock == NULL)
+ return E_PARSE_FAILED;
const long long start = pBlock->m_start;
if ((total >= 0) && (start > total))
- return -1; // defend against trucated stream
+ return E_PARSE_FAILED; // defend against trucated stream
const long long size = pBlock->m_size;
const long long stop = start + size;
- assert((cluster_stop < 0) || (stop <= cluster_stop));
+ if (cluster_stop >= 0 && stop > cluster_stop)
+ return E_FILE_FORMAT_INVALID;
if ((total >= 0) && (stop > total))
- return -1; // defend against trucated stream
+ return E_PARSE_FAILED; // defend against trucated stream
}
return 1; // no more entries
@@ -6159,8 +6407,7 @@
return E_BUFFER_NOT_FULL;
}
- status = CreateBlock(0x23, // simple block id
- block_start, block_size,
+ status = CreateBlock(libwebm::kMkvSimpleBlock, block_start, block_size,
0); // DiscardPadding
if (status != 0)
@@ -6219,12 +6466,12 @@
if ((pos + len) > avail)
return E_BUFFER_NOT_FULL;
- const long long id = ReadUInt(pReader, pos, len);
+ const long long id = ReadID(pReader, pos, len);
if (id < 0) // error
return static_cast<long>(id);
- if (id == 0) // not a value ID
+ if (id == 0) // not a valid ID
return E_FILE_FORMAT_INVALID;
pos += len; // consume ID field
@@ -6270,14 +6517,14 @@
if (size == unknown_size)
return E_FILE_FORMAT_INVALID;
- if (id == 0x35A2) { // DiscardPadding
+ if (id == libwebm::kMkvDiscardPadding) {
status = UnserializeInt(pReader, pos, size, discard_padding);
if (status < 0) // error
return status;
}
- if (id != 0x21) { // sub-part of BlockGroup is not a Block
+ if (id != libwebm::kMkvBlock) {
pos += size; // consume sub-part of block group
if (pos > payload_stop)
@@ -6370,8 +6617,8 @@
if (pos != payload_stop)
return E_FILE_FORMAT_INVALID;
- status = CreateBlock(0x20, // BlockGroup ID
- payload_start, payload_size, discard_padding);
+ status = CreateBlock(libwebm::kMkvBlockGroup, payload_start, payload_size,
+ discard_padding);
if (status != 0)
return status;
@@ -6531,13 +6778,13 @@
if ((pos + len) > avail)
return E_BUFFER_NOT_FULL;
- const long long id = ReadUInt(pReader, pos, len);
+ const long long id = ReadID(pReader, pos, len);
if (id < 0) // error
return static_cast<long>(id);
- if (id != 0x0F43B675) // weird: not cluster ID
- return -1; // generic error
+ if (id != libwebm::kMkvCluster)
+ return E_PARSE_FAILED;
pos += len; // consume Cluster ID field
@@ -6615,7 +6862,7 @@
if ((pos + len) > avail)
return E_BUFFER_NOT_FULL;
- const long long id = ReadUInt(pReader, pos, len);
+ const long long id = ReadID(pReader, pos, len);
if (id < 0) // error
return static_cast<long>(id);
@@ -6624,10 +6871,10 @@
// that we have exhausted the sub-element's inside the cluster
// whose ID we parsed earlier.
- if (id == 0x0F43B675) // Cluster ID
+ if (id == libwebm::kMkvCluster)
return 0; // no entries found
- if (id == 0x0C53BB6B) // Cues ID
+ if (id == libwebm::kMkvCues)
return 0; // no entries found
pos += len; // consume id field
@@ -6679,10 +6926,10 @@
if ((cluster_stop >= 0) && ((pos + size) > cluster_stop))
return E_FILE_FORMAT_INVALID;
- if (id == 0x20) // BlockGroup ID
+ if (id == libwebm::kMkvBlockGroup)
return 1; // have at least one entry
- if (id == 0x23) // SimpleBlock ID
+ if (id == libwebm::kMkvSimpleBlock)
return 1; // have at least one entry
pos += size; // consume payload
@@ -6757,7 +7004,8 @@
long Cluster::CreateBlock(long long id,
long long pos, // absolute pos of payload
long long size, long long discard_padding) {
- assert((id == 0x20) || (id == 0x23)); // BlockGroup or SimpleBlock
+ if (id != libwebm::kMkvBlockGroup && id != libwebm::kMkvSimpleBlock)
+ return E_PARSE_FAILED;
if (m_entries_count < 0) { // haven't parsed anything yet
assert(m_entries == NULL);
@@ -6796,9 +7044,9 @@
}
}
- if (id == 0x20) // BlockGroup ID
+ if (id == libwebm::kMkvBlockGroup)
return CreateBlockGroup(pos, size, discard_padding);
- else // SimpleBlock ID
+ else
return CreateSimpleBlock(pos, size);
}
@@ -6841,12 +7089,12 @@
pos += len; // consume size
- if (id == 0x21) { // Block ID
+ if (id == libwebm::kMkvBlock) {
if (bpos < 0) { // Block ID
bpos = pos;
bsize = size;
}
- } else if (id == 0x1B) { // Duration ID
+ } else if (id == libwebm::kMkvBlockDuration) {
if (size > 8)
return E_FILE_FORMAT_INVALID;
@@ -6854,7 +7102,7 @@
if (duration < 0)
return E_FILE_FORMAT_INVALID;
- } else if (id == 0x7B) { // ReferenceBlock
+ } else if (id == libwebm::kMkvReferenceBlock) {
if (size > 8 || size <= 0)
return E_FILE_FORMAT_INVALID;
const long size_ = static_cast<long>(size);
@@ -6868,7 +7116,7 @@
if (time <= 0) // see note above
prev = time;
- else // weird
+ else
next = time;
}
@@ -7201,7 +7449,6 @@
BlockEntry::BlockEntry(Cluster* p, long idx) : m_pCluster(p), m_index(idx) {}
BlockEntry::~BlockEntry() {}
-bool BlockEntry::EOS() const { return (GetKind() == kBlockEOS); }
const Cluster* BlockEntry::GetCluster() const { return m_pCluster; }
long BlockEntry::GetIndex() const { return m_index; }
@@ -7525,7 +7772,6 @@
if (pf >= pf_end)
return E_FILE_FORMAT_INVALID;
-
const Frame& prev = *pf++;
assert(prev.len == frame_size);
if (prev.len != frame_size)
@@ -7691,4 +7937,4 @@
long long Block::GetDiscardPadding() const { return m_discard_padding; }
-} // end namespace mkvparser
+} // namespace mkvparser
diff --git a/libwebm/mkvparser.hpp b/libwebm/mkvparser/mkvparser.h
similarity index 89%
rename from libwebm/mkvparser.hpp
rename to libwebm/mkvparser/mkvparser.h
index 75ef69d..42e6e88 100644
--- a/libwebm/mkvparser.hpp
+++ b/libwebm/mkvparser/mkvparser.h
@@ -5,13 +5,10 @@
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
-
-#ifndef MKVPARSER_HPP
-#define MKVPARSER_HPP
+#ifndef MKVPARSER_MKVPARSER_H_
+#define MKVPARSER_MKVPARSER_H_
#include <cstddef>
-#include <cstdio>
-#include <cstdlib>
namespace mkvparser {
@@ -28,8 +25,9 @@
virtual ~IMkvReader();
};
-template<typename Type> Type* SafeArrayAlloc(unsigned long long num_elements,
- unsigned long long element_size);
+template <typename Type>
+Type* SafeArrayAlloc(unsigned long long num_elements,
+ unsigned long long element_size);
long long GetUIntLength(IMkvReader*, long long, long&);
long long ReadUInt(IMkvReader*, long long, long&);
long long ReadID(IMkvReader* pReader, long long pos, long& len);
@@ -128,7 +126,7 @@
public:
virtual ~BlockEntry();
- bool EOS() const;
+ bool EOS() const { return (GetKind() == kBlockEOS); }
const Cluster* GetCluster() const;
long GetIndex() const;
virtual const Block* GetBlock() const = 0;
@@ -391,6 +389,90 @@
ContentEncoding** content_encoding_entries_end_;
};
+struct PrimaryChromaticity {
+ PrimaryChromaticity() : x(0), y(0) {}
+ ~PrimaryChromaticity() {}
+ static bool Parse(IMkvReader* reader, long long read_pos,
+ long long value_size, bool is_x,
+ PrimaryChromaticity** chromaticity);
+ float x;
+ float y;
+};
+
+struct MasteringMetadata {
+ static const float kValueNotPresent;
+
+ MasteringMetadata()
+ : r(NULL),
+ g(NULL),
+ b(NULL),
+ white_point(NULL),
+ luminance_max(kValueNotPresent),
+ luminance_min(kValueNotPresent) {}
+ ~MasteringMetadata() {
+ delete r;
+ delete g;
+ delete b;
+ delete white_point;
+ }
+
+ static bool Parse(IMkvReader* reader, long long element_start,
+ long long element_size,
+ MasteringMetadata** mastering_metadata);
+
+ PrimaryChromaticity* r;
+ PrimaryChromaticity* g;
+ PrimaryChromaticity* b;
+ PrimaryChromaticity* white_point;
+ float luminance_max;
+ float luminance_min;
+};
+
+struct Colour {
+ static const long long kValueNotPresent;
+
+ // Unless otherwise noted all values assigned upon construction are the
+ // equivalent of unspecified/default.
+ Colour()
+ : matrix_coefficients(kValueNotPresent),
+ bits_per_channel(kValueNotPresent),
+ chroma_subsampling_horz(kValueNotPresent),
+ chroma_subsampling_vert(kValueNotPresent),
+ cb_subsampling_horz(kValueNotPresent),
+ cb_subsampling_vert(kValueNotPresent),
+ chroma_siting_horz(kValueNotPresent),
+ chroma_siting_vert(kValueNotPresent),
+ range(kValueNotPresent),
+ transfer_characteristics(kValueNotPresent),
+ primaries(kValueNotPresent),
+ max_cll(kValueNotPresent),
+ max_fall(kValueNotPresent),
+ mastering_metadata(NULL) {}
+ ~Colour() {
+ delete mastering_metadata;
+ mastering_metadata = NULL;
+ }
+
+ static bool Parse(IMkvReader* reader, long long element_start,
+ long long element_size, Colour** colour);
+
+ long long matrix_coefficients;
+ long long bits_per_channel;
+ long long chroma_subsampling_horz;
+ long long chroma_subsampling_vert;
+ long long cb_subsampling_horz;
+ long long cb_subsampling_vert;
+ long long chroma_siting_horz;
+ long long chroma_siting_vert;
+ long long range;
+ long long transfer_characteristics;
+ long long primaries;
+ long long max_cll;
+ long long max_fall;
+
+ MasteringMetadata* mastering_metadata;
+};
+
class VideoTrack : public Track {
VideoTrack(const VideoTrack&);
VideoTrack& operator=(const VideoTrack&);
@@ -398,6 +480,7 @@
VideoTrack(Segment*, long long element_start, long long element_size);
public:
+ virtual ~VideoTrack();
static long Parse(Segment*, const Info&, long long element_start,
long long element_size, VideoTrack*&);
@@ -412,6 +495,8 @@
bool VetEntry(const BlockEntry*) const;
long Seek(long long time_ns, const BlockEntry*&) const;
+ Colour* GetColour() const;
+
private:
long long m_width;
long long m_height;
@@ -421,6 +506,8 @@
long long m_stereo_mode;
double m_rate;
+
+ Colour* m_colour;
};
class AudioTrack : public Track {
@@ -1013,7 +1100,7 @@
const BlockEntry* GetBlock(const CuePoint&, const CuePoint::TrackPosition&);
};
-} // end namespace mkvparser
+} // namespace mkvparser
inline long mkvparser::Segment::LoadCluster() {
long long pos;
@@ -1022,4 +1109,4 @@
return LoadCluster(pos, size);
}
-#endif // MKVPARSER_HPP
+#endif // MKVPARSER_MKVPARSER_H_
diff --git a/update_libvpx.sh b/update_libvpx.sh
index 92e40eb..4e41bf5 100755
--- a/update_libvpx.sh
+++ b/update_libvpx.sh
@@ -10,7 +10,7 @@
# Usage:
#
# $ ./update_libvpx.sh [branch | revision | file or url containing a revision]
-# When specifying a branch it must be prefixed with origin/
+# When specifying a branch it may be necessary to prefix with origin/
# Tools required for running this tool:
#
@@ -113,7 +113,7 @@
# Add and remove files.
echo "$add" | xargs -I {} git add {}
-echo "$delete" | xargs -I {} git rm {}
+echo "$delete" | xargs -I {} git rm --ignore-unmatch {}
# Find empty directories and remove them.
find . -type d -empty -exec git rm {} \;