libvpx: Pull from upstream
Current HEAD: c0061cc24f254d648737986ce14ac1a4bcb45874
git log from upstream:
5f732c2 Fix build for examples/vp8_multi_resolution_encoder.
00fc0e3 Tune SSSE3 implementation of fast path quantization
3316918 Remove an empty line
f60a117 Cleanup motion search speed features.
5820c5d Adds support for reading and writing 10/12-bit y4m
79199e4 Reverting "Adds support for reading and writing 10/12-bit y4m" for now because of Mac Build Failure.
15123db Cleanup vp9_rd.
03c276e Split vp9_rdopt into vp9_rdopt and vp9_rd.
4635a2b Cleaning up and simplifying read_frame_stats().
3bc1193 Fix rd threshold overflow issue
03a3ba4 VP9 denoiser implemented FILTER_BLOCK case
9c9922d VP9 denoising enabled by noise_sensitivity param
49741fe Replaced loops with vpx_memcpy()
8830772 Multi-arf: Add code to turn it on and off.
92a6db7 Added a speed feature controlling a motion search parameter
adf4293 Adapt strength of AQ2.
82dc133 Adds support for reading and writing 10/12-bit y4m
95853db vpxdec: add --keep-going option
9ac2f66 Re-design quantization process
0256a75 Allow lossless skipping in RD mode decision.
aaabbd6 Store/read 16x16 block statistics obtained from the first pass
ee38021 Add a test that tests invalid partitions for profile 1
f31ff02 Elevate NEWMV mode checking threshold in real time
<...>
3b9c19a Remove unused vp9_init_quant_tables function
TBR=tomfinegan@chromium.org
Review URL: https://codereview.chromium.org/375983002
git-svn-id: http://src.chromium.org/svn/trunk/deps/third_party/libvpx@281806 4ff67af0-8c30-449e-8e8b-ad334ec8d88c
diff --git a/README.chromium b/README.chromium
index c1f74bb..a0c927b 100644
--- a/README.chromium
+++ b/README.chromium
@@ -5,9 +5,9 @@
License File: source/libvpx/LICENSE
Security Critical: yes
-Date: Saturday June 21 2014
+Date: Tuesday July 8 2014
Branch: master
-Commit: 9be46e4565f553460a1bbbf58d9f99067d3242ce
+Commit: c0061cc24f254d648737986ce14ac1a4bcb45874
Description:
Contains the sources used to compile libvpx binaries used by Google Chrome and
diff --git a/libvpx_srcs_arm.gypi b/libvpx_srcs_arm.gypi
index 346280e..556b17d 100644
--- a/libvpx_srcs_arm.gypi
+++ b/libvpx_srcs_arm.gypi
@@ -269,6 +269,8 @@
'<(libvpx_source)/vp9/encoder/vp9_quantize.h',
'<(libvpx_source)/vp9/encoder/vp9_ratectrl.c',
'<(libvpx_source)/vp9/encoder/vp9_ratectrl.h',
+ '<(libvpx_source)/vp9/encoder/vp9_rd.c',
+ '<(libvpx_source)/vp9/encoder/vp9_rd.h',
'<(libvpx_source)/vp9/encoder/vp9_rdopt.c',
'<(libvpx_source)/vp9/encoder/vp9_rdopt.h',
'<(libvpx_source)/vp9/encoder/vp9_resize.c',
diff --git a/libvpx_srcs_arm64.gypi b/libvpx_srcs_arm64.gypi
index 677eba4..dd1b237 100644
--- a/libvpx_srcs_arm64.gypi
+++ b/libvpx_srcs_arm64.gypi
@@ -254,6 +254,8 @@
'<(libvpx_source)/vp9/encoder/vp9_quantize.h',
'<(libvpx_source)/vp9/encoder/vp9_ratectrl.c',
'<(libvpx_source)/vp9/encoder/vp9_ratectrl.h',
+ '<(libvpx_source)/vp9/encoder/vp9_rd.c',
+ '<(libvpx_source)/vp9/encoder/vp9_rd.h',
'<(libvpx_source)/vp9/encoder/vp9_rdopt.c',
'<(libvpx_source)/vp9/encoder/vp9_rdopt.h',
'<(libvpx_source)/vp9/encoder/vp9_resize.c',
diff --git a/libvpx_srcs_arm_neon.gypi b/libvpx_srcs_arm_neon.gypi
index e700dfb..987812a 100644
--- a/libvpx_srcs_arm_neon.gypi
+++ b/libvpx_srcs_arm_neon.gypi
@@ -320,6 +320,8 @@
'<(libvpx_source)/vp9/encoder/vp9_quantize.h',
'<(libvpx_source)/vp9/encoder/vp9_ratectrl.c',
'<(libvpx_source)/vp9/encoder/vp9_ratectrl.h',
+ '<(libvpx_source)/vp9/encoder/vp9_rd.c',
+ '<(libvpx_source)/vp9/encoder/vp9_rd.h',
'<(libvpx_source)/vp9/encoder/vp9_rdopt.c',
'<(libvpx_source)/vp9/encoder/vp9_rdopt.h',
'<(libvpx_source)/vp9/encoder/vp9_resize.c',
diff --git a/libvpx_srcs_arm_neon_cpu_detect.gypi b/libvpx_srcs_arm_neon_cpu_detect.gypi
index a107b27..ee93807 100644
--- a/libvpx_srcs_arm_neon_cpu_detect.gypi
+++ b/libvpx_srcs_arm_neon_cpu_detect.gypi
@@ -303,6 +303,8 @@
'<(libvpx_source)/vp9/encoder/vp9_quantize.h',
'<(libvpx_source)/vp9/encoder/vp9_ratectrl.c',
'<(libvpx_source)/vp9/encoder/vp9_ratectrl.h',
+ '<(libvpx_source)/vp9/encoder/vp9_rd.c',
+ '<(libvpx_source)/vp9/encoder/vp9_rd.h',
'<(libvpx_source)/vp9/encoder/vp9_rdopt.c',
'<(libvpx_source)/vp9/encoder/vp9_rdopt.h',
'<(libvpx_source)/vp9/encoder/vp9_resize.c',
diff --git a/libvpx_srcs_generic.gypi b/libvpx_srcs_generic.gypi
index 58d441d..37acf64 100644
--- a/libvpx_srcs_generic.gypi
+++ b/libvpx_srcs_generic.gypi
@@ -235,6 +235,8 @@
'<(libvpx_source)/vp9/encoder/vp9_quantize.h',
'<(libvpx_source)/vp9/encoder/vp9_ratectrl.c',
'<(libvpx_source)/vp9/encoder/vp9_ratectrl.h',
+ '<(libvpx_source)/vp9/encoder/vp9_rd.c',
+ '<(libvpx_source)/vp9/encoder/vp9_rd.h',
'<(libvpx_source)/vp9/encoder/vp9_rdopt.c',
'<(libvpx_source)/vp9/encoder/vp9_rdopt.h',
'<(libvpx_source)/vp9/encoder/vp9_resize.c',
diff --git a/libvpx_srcs_mips.gypi b/libvpx_srcs_mips.gypi
index 1c14a13..5e82d24 100644
--- a/libvpx_srcs_mips.gypi
+++ b/libvpx_srcs_mips.gypi
@@ -237,6 +237,8 @@
'<(libvpx_source)/vp9/encoder/vp9_quantize.h',
'<(libvpx_source)/vp9/encoder/vp9_ratectrl.c',
'<(libvpx_source)/vp9/encoder/vp9_ratectrl.h',
+ '<(libvpx_source)/vp9/encoder/vp9_rd.c',
+ '<(libvpx_source)/vp9/encoder/vp9_rd.h',
'<(libvpx_source)/vp9/encoder/vp9_rdopt.c',
'<(libvpx_source)/vp9/encoder/vp9_rdopt.h',
'<(libvpx_source)/vp9/encoder/vp9_resize.c',
diff --git a/libvpx_srcs_nacl.gypi b/libvpx_srcs_nacl.gypi
index 58d441d..37acf64 100644
--- a/libvpx_srcs_nacl.gypi
+++ b/libvpx_srcs_nacl.gypi
@@ -235,6 +235,8 @@
'<(libvpx_source)/vp9/encoder/vp9_quantize.h',
'<(libvpx_source)/vp9/encoder/vp9_ratectrl.c',
'<(libvpx_source)/vp9/encoder/vp9_ratectrl.h',
+ '<(libvpx_source)/vp9/encoder/vp9_rd.c',
+ '<(libvpx_source)/vp9/encoder/vp9_rd.h',
'<(libvpx_source)/vp9/encoder/vp9_rdopt.c',
'<(libvpx_source)/vp9/encoder/vp9_rdopt.h',
'<(libvpx_source)/vp9/encoder/vp9_resize.c',
diff --git a/libvpx_srcs_x86.gypi b/libvpx_srcs_x86.gypi
index dd71ba0..dc8afd7 100644
--- a/libvpx_srcs_x86.gypi
+++ b/libvpx_srcs_x86.gypi
@@ -5,7 +5,6 @@
{
'sources': [
- '<(libvpx_source)/third_party/x86inc/x86inc.asm',
'<(libvpx_source)/vp8/common/alloccommon.c',
'<(libvpx_source)/vp8/common/alloccommon.h',
'<(libvpx_source)/vp8/common/blockd.c',
@@ -281,6 +280,8 @@
'<(libvpx_source)/vp9/encoder/vp9_quantize.h',
'<(libvpx_source)/vp9/encoder/vp9_ratectrl.c',
'<(libvpx_source)/vp9/encoder/vp9_ratectrl.h',
+ '<(libvpx_source)/vp9/encoder/vp9_rd.c',
+ '<(libvpx_source)/vp9/encoder/vp9_rd.h',
'<(libvpx_source)/vp9/encoder/vp9_rdopt.c',
'<(libvpx_source)/vp9/encoder/vp9_rdopt.h',
'<(libvpx_source)/vp9/encoder/vp9_resize.c',
diff --git a/libvpx_srcs_x86_64.gypi b/libvpx_srcs_x86_64.gypi
index b5b9af2..cdf5b7d 100644
--- a/libvpx_srcs_x86_64.gypi
+++ b/libvpx_srcs_x86_64.gypi
@@ -5,7 +5,6 @@
{
'sources': [
- '<(libvpx_source)/third_party/x86inc/x86inc.asm',
'<(libvpx_source)/vp8/common/alloccommon.c',
'<(libvpx_source)/vp8/common/alloccommon.h',
'<(libvpx_source)/vp8/common/blockd.c',
@@ -284,6 +283,8 @@
'<(libvpx_source)/vp9/encoder/vp9_quantize.h',
'<(libvpx_source)/vp9/encoder/vp9_ratectrl.c',
'<(libvpx_source)/vp9/encoder/vp9_ratectrl.h',
+ '<(libvpx_source)/vp9/encoder/vp9_rd.c',
+ '<(libvpx_source)/vp9/encoder/vp9_rd.h',
'<(libvpx_source)/vp9/encoder/vp9_rdopt.c',
'<(libvpx_source)/vp9/encoder/vp9_rdopt.h',
'<(libvpx_source)/vp9/encoder/vp9_resize.c',
diff --git a/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h b/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h
index 4ab0f6b..05afa41 100644
--- a/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h
+++ b/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h
@@ -91,6 +91,10 @@
int vp8_denoiser_filter_neon(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
RTCD_EXTERN int (*vp8_denoiser_filter)(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_uv_neon(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+RTCD_EXTERN int (*vp8_denoiser_filter_uv)(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
void vp8_dequant_idct_add_v6(short *input, short *dq, unsigned char *output, int stride);
void vp8_dequant_idct_add_neon(short *input, short *dq, unsigned char *output, int stride);
@@ -459,6 +463,8 @@
if (flags & HAS_NEON) vp8_dc_only_idct_add = vp8_dc_only_idct_add_neon;
vp8_denoiser_filter = vp8_denoiser_filter_c;
if (flags & HAS_NEON) vp8_denoiser_filter = vp8_denoiser_filter_neon;
+ vp8_denoiser_filter_uv = vp8_denoiser_filter_uv_c;
+ if (flags & HAS_NEON) vp8_denoiser_filter_uv = vp8_denoiser_filter_uv_neon;
vp8_dequant_idct_add = vp8_dequant_idct_add_c;
if (flags & HAS_MEDIA) vp8_dequant_idct_add = vp8_dequant_idct_add_v6;
if (flags & HAS_NEON) vp8_dequant_idct_add = vp8_dequant_idct_add_neon;
diff --git a/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h b/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
index af10f6f..cd62360 100644
--- a/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
+++ b/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
@@ -388,6 +388,9 @@
void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
#define vp9_quantize_b_32x32 vp9_quantize_b_32x32_c
+void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+#define vp9_quantize_fp vp9_quantize_fp_c
+
int vp9_refining_search_sad_c(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
#define vp9_refining_search_sad vp9_refining_search_sad_c
diff --git a/source/config/linux/arm-neon-cpu-detect/vpx_config.asm b/source/config/linux/arm-neon-cpu-detect/vpx_config.asm
index 416a4bf..c59bd7a 100644
--- a/source/config/linux/arm-neon-cpu-detect/vpx_config.asm
+++ b/source/config/linux/arm-neon-cpu-detect/vpx_config.asm
@@ -84,4 +84,5 @@
.equ CONFIG_MULTIPLE_ARF , 0
.equ CONFIG_SPATIAL_SVC , 0
.equ CONFIG_DENOISING , 0
+.equ CONFIG_FP_MB_STATS , 0
.section .note.GNU-stack,"",%progbits
diff --git a/source/config/linux/arm-neon-cpu-detect/vpx_config.h b/source/config/linux/arm-neon-cpu-detect/vpx_config.h
index 6ef26ff..ee7d62d 100644
--- a/source/config/linux/arm-neon-cpu-detect/vpx_config.h
+++ b/source/config/linux/arm-neon-cpu-detect/vpx_config.h
@@ -93,4 +93,5 @@
#define CONFIG_MULTIPLE_ARF 0
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_DENOISING 0
+#define CONFIG_FP_MB_STATS 0
#endif /* VPX_CONFIG_H */
diff --git a/source/config/linux/arm-neon/vp8_rtcd.h b/source/config/linux/arm-neon/vp8_rtcd.h
index 184b486..7ebf52c 100644
--- a/source/config/linux/arm-neon/vp8_rtcd.h
+++ b/source/config/linux/arm-neon/vp8_rtcd.h
@@ -91,6 +91,10 @@
int vp8_denoiser_filter_neon(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
#define vp8_denoiser_filter vp8_denoiser_filter_neon
+int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_uv_neon(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+#define vp8_denoiser_filter_uv vp8_denoiser_filter_uv_neon
+
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
void vp8_dequant_idct_add_v6(short *input, short *dq, unsigned char *output, int stride);
void vp8_dequant_idct_add_neon(short *input, short *dq, unsigned char *output, int stride);
diff --git a/source/config/linux/arm-neon/vp9_rtcd.h b/source/config/linux/arm-neon/vp9_rtcd.h
index 48004d1..29c2160 100644
--- a/source/config/linux/arm-neon/vp9_rtcd.h
+++ b/source/config/linux/arm-neon/vp9_rtcd.h
@@ -388,6 +388,9 @@
void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
#define vp9_quantize_b_32x32 vp9_quantize_b_32x32_c
+void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+#define vp9_quantize_fp vp9_quantize_fp_c
+
int vp9_refining_search_sad_c(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
#define vp9_refining_search_sad vp9_refining_search_sad_c
diff --git a/source/config/linux/arm-neon/vpx_config.asm b/source/config/linux/arm-neon/vpx_config.asm
index c0bef17..58c0f03 100644
--- a/source/config/linux/arm-neon/vpx_config.asm
+++ b/source/config/linux/arm-neon/vpx_config.asm
@@ -84,4 +84,5 @@
.equ CONFIG_MULTIPLE_ARF , 0
.equ CONFIG_SPATIAL_SVC , 0
.equ CONFIG_DENOISING , 0
+.equ CONFIG_FP_MB_STATS , 0
.section .note.GNU-stack,"",%progbits
diff --git a/source/config/linux/arm-neon/vpx_config.h b/source/config/linux/arm-neon/vpx_config.h
index 3669aec..0ee231a 100644
--- a/source/config/linux/arm-neon/vpx_config.h
+++ b/source/config/linux/arm-neon/vpx_config.h
@@ -93,4 +93,5 @@
#define CONFIG_MULTIPLE_ARF 0
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_DENOISING 0
+#define CONFIG_FP_MB_STATS 0
#endif /* VPX_CONFIG_H */
diff --git a/source/config/linux/arm/vp8_rtcd.h b/source/config/linux/arm/vp8_rtcd.h
index 20208cc..ec35c11 100644
--- a/source/config/linux/arm/vp8_rtcd.h
+++ b/source/config/linux/arm/vp8_rtcd.h
@@ -82,6 +82,9 @@
int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
#define vp8_denoiser_filter vp8_denoiser_filter_c
+int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+#define vp8_denoiser_filter_uv vp8_denoiser_filter_uv_c
+
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
void vp8_dequant_idct_add_v6(short *input, short *dq, unsigned char *output, int stride);
#define vp8_dequant_idct_add vp8_dequant_idct_add_v6
diff --git a/source/config/linux/arm/vp9_rtcd.h b/source/config/linux/arm/vp9_rtcd.h
index ab4ac1e..8d6344c 100644
--- a/source/config/linux/arm/vp9_rtcd.h
+++ b/source/config/linux/arm/vp9_rtcd.h
@@ -352,6 +352,9 @@
void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
#define vp9_quantize_b_32x32 vp9_quantize_b_32x32_c
+void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+#define vp9_quantize_fp vp9_quantize_fp_c
+
int vp9_refining_search_sad_c(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
#define vp9_refining_search_sad vp9_refining_search_sad_c
diff --git a/source/config/linux/arm/vpx_config.asm b/source/config/linux/arm/vpx_config.asm
index 68ef4b1..dee0569 100644
--- a/source/config/linux/arm/vpx_config.asm
+++ b/source/config/linux/arm/vpx_config.asm
@@ -84,4 +84,5 @@
.equ CONFIG_MULTIPLE_ARF , 0
.equ CONFIG_SPATIAL_SVC , 0
.equ CONFIG_DENOISING , 0
+.equ CONFIG_FP_MB_STATS , 0
.section .note.GNU-stack,"",%progbits
diff --git a/source/config/linux/arm/vpx_config.h b/source/config/linux/arm/vpx_config.h
index 62c430e..f61e89c 100644
--- a/source/config/linux/arm/vpx_config.h
+++ b/source/config/linux/arm/vpx_config.h
@@ -93,4 +93,5 @@
#define CONFIG_MULTIPLE_ARF 0
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_DENOISING 0
+#define CONFIG_FP_MB_STATS 0
#endif /* VPX_CONFIG_H */
diff --git a/source/config/linux/arm64/vp8_rtcd.h b/source/config/linux/arm64/vp8_rtcd.h
index 9601515..1a292e1 100644
--- a/source/config/linux/arm64/vp8_rtcd.h
+++ b/source/config/linux/arm64/vp8_rtcd.h
@@ -83,6 +83,10 @@
int vp8_denoiser_filter_neon(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
#define vp8_denoiser_filter vp8_denoiser_filter_neon
+int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_uv_neon(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+#define vp8_denoiser_filter_uv vp8_denoiser_filter_uv_neon
+
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
void vp8_dequant_idct_add_neon(short *input, short *dq, unsigned char *output, int stride);
#define vp8_dequant_idct_add vp8_dequant_idct_add_neon
diff --git a/source/config/linux/arm64/vp9_rtcd.h b/source/config/linux/arm64/vp9_rtcd.h
index ab4ac1e..8d6344c 100644
--- a/source/config/linux/arm64/vp9_rtcd.h
+++ b/source/config/linux/arm64/vp9_rtcd.h
@@ -352,6 +352,9 @@
void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
#define vp9_quantize_b_32x32 vp9_quantize_b_32x32_c
+void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+#define vp9_quantize_fp vp9_quantize_fp_c
+
int vp9_refining_search_sad_c(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
#define vp9_refining_search_sad vp9_refining_search_sad_c
diff --git a/source/config/linux/arm64/vpx_config.asm b/source/config/linux/arm64/vpx_config.asm
index 1877adb..d1a8cad 100644
--- a/source/config/linux/arm64/vpx_config.asm
+++ b/source/config/linux/arm64/vpx_config.asm
@@ -84,4 +84,5 @@
.equ CONFIG_MULTIPLE_ARF , 0
.equ CONFIG_SPATIAL_SVC , 0
.equ CONFIG_DENOISING , 0
+.equ CONFIG_FP_MB_STATS , 0
.section .note.GNU-stack,"",%progbits
diff --git a/source/config/linux/arm64/vpx_config.h b/source/config/linux/arm64/vpx_config.h
index 5e75000..9746b7c 100644
--- a/source/config/linux/arm64/vpx_config.h
+++ b/source/config/linux/arm64/vpx_config.h
@@ -93,4 +93,5 @@
#define CONFIG_MULTIPLE_ARF 0
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_DENOISING 0
+#define CONFIG_FP_MB_STATS 0
#endif /* VPX_CONFIG_H */
diff --git a/source/config/linux/generic/vp8_rtcd.h b/source/config/linux/generic/vp8_rtcd.h
index ef9fa5f..298886d 100644
--- a/source/config/linux/generic/vp8_rtcd.h
+++ b/source/config/linux/generic/vp8_rtcd.h
@@ -74,6 +74,9 @@
int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
#define vp8_denoiser_filter vp8_denoiser_filter_c
+int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+#define vp8_denoiser_filter_uv vp8_denoiser_filter_uv_c
+
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
#define vp8_dequant_idct_add vp8_dequant_idct_add_c
diff --git a/source/config/linux/generic/vp9_rtcd.h b/source/config/linux/generic/vp9_rtcd.h
index 2e245dc..aae324b 100644
--- a/source/config/linux/generic/vp9_rtcd.h
+++ b/source/config/linux/generic/vp9_rtcd.h
@@ -352,6 +352,9 @@
void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
#define vp9_quantize_b_32x32 vp9_quantize_b_32x32_c
+void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+#define vp9_quantize_fp vp9_quantize_fp_c
+
int vp9_refining_search_sad_c(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
#define vp9_refining_search_sad vp9_refining_search_sad_c
diff --git a/source/config/linux/generic/vpx_config.asm b/source/config/linux/generic/vpx_config.asm
index c753a6e..0132f55 100644
--- a/source/config/linux/generic/vpx_config.asm
+++ b/source/config/linux/generic/vpx_config.asm
@@ -84,4 +84,5 @@
.equ CONFIG_MULTIPLE_ARF , 0
.equ CONFIG_SPATIAL_SVC , 0
.equ CONFIG_DENOISING , 0
+.equ CONFIG_FP_MB_STATS , 0
.section .note.GNU-stack,"",%progbits
diff --git a/source/config/linux/generic/vpx_config.h b/source/config/linux/generic/vpx_config.h
index 78f354e..625ed12 100644
--- a/source/config/linux/generic/vpx_config.h
+++ b/source/config/linux/generic/vpx_config.h
@@ -93,4 +93,5 @@
#define CONFIG_MULTIPLE_ARF 0
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_DENOISING 0
+#define CONFIG_FP_MB_STATS 0
#endif /* VPX_CONFIG_H */
diff --git a/source/config/linux/ia32/vp8_rtcd.h b/source/config/linux/ia32/vp8_rtcd.h
index 21fe8a1..4dc2d75 100644
--- a/source/config/linux/ia32/vp8_rtcd.h
+++ b/source/config/linux/ia32/vp8_rtcd.h
@@ -100,6 +100,10 @@
int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
RTCD_EXTERN int (*vp8_denoiser_filter)(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_uv_sse2(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+RTCD_EXTERN int (*vp8_denoiser_filter_uv)(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, int stride);
RTCD_EXTERN void (*vp8_dequant_idct_add)(short *input, short *dq, unsigned char *output, int stride);
@@ -526,6 +530,8 @@
if (flags & HAS_MMX) vp8_dc_only_idct_add = vp8_dc_only_idct_add_mmx;
vp8_denoiser_filter = vp8_denoiser_filter_c;
if (flags & HAS_SSE2) vp8_denoiser_filter = vp8_denoiser_filter_sse2;
+ vp8_denoiser_filter_uv = vp8_denoiser_filter_uv_c;
+ if (flags & HAS_SSE2) vp8_denoiser_filter_uv = vp8_denoiser_filter_uv_sse2;
vp8_dequant_idct_add = vp8_dequant_idct_add_c;
if (flags & HAS_MMX) vp8_dequant_idct_add = vp8_dequant_idct_add_mmx;
vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
diff --git a/source/config/linux/ia32/vp9_rtcd.h b/source/config/linux/ia32/vp9_rtcd.h
index a7aab82..61372c7 100644
--- a/source/config/linux/ia32/vp9_rtcd.h
+++ b/source/config/linux/ia32/vp9_rtcd.h
@@ -440,6 +440,9 @@
void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
#define vp9_quantize_b_32x32 vp9_quantize_b_32x32_c
+void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+#define vp9_quantize_fp vp9_quantize_fp_c
+
int vp9_refining_search_sad_c(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_refining_search_sadx4(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_refining_search_sad)(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
diff --git a/source/config/linux/ia32/vpx_config.asm b/source/config/linux/ia32/vpx_config.asm
index bfd40e9..8bacc7e 100644
--- a/source/config/linux/ia32/vpx_config.asm
+++ b/source/config/linux/ia32/vpx_config.asm
@@ -81,3 +81,4 @@
CONFIG_MULTIPLE_ARF equ 0
CONFIG_SPATIAL_SVC equ 0
CONFIG_DENOISING equ 0
+CONFIG_FP_MB_STATS equ 0
diff --git a/source/config/linux/ia32/vpx_config.h b/source/config/linux/ia32/vpx_config.h
index 4468c96..3de10fd 100644
--- a/source/config/linux/ia32/vpx_config.h
+++ b/source/config/linux/ia32/vpx_config.h
@@ -93,4 +93,5 @@
#define CONFIG_MULTIPLE_ARF 0
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_DENOISING 0
+#define CONFIG_FP_MB_STATS 0
#endif /* VPX_CONFIG_H */
diff --git a/source/config/linux/mipsel/vp8_rtcd.h b/source/config/linux/mipsel/vp8_rtcd.h
index bfb056b..58dc2fb 100644
--- a/source/config/linux/mipsel/vp8_rtcd.h
+++ b/source/config/linux/mipsel/vp8_rtcd.h
@@ -74,6 +74,9 @@
int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
#define vp8_denoiser_filter vp8_denoiser_filter_c
+int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+#define vp8_denoiser_filter_uv vp8_denoiser_filter_uv_c
+
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
#define vp8_dequant_idct_add vp8_dequant_idct_add_c
diff --git a/source/config/linux/mipsel/vp9_rtcd.h b/source/config/linux/mipsel/vp9_rtcd.h
index 2e245dc..aae324b 100644
--- a/source/config/linux/mipsel/vp9_rtcd.h
+++ b/source/config/linux/mipsel/vp9_rtcd.h
@@ -352,6 +352,9 @@
void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
#define vp9_quantize_b_32x32 vp9_quantize_b_32x32_c
+void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+#define vp9_quantize_fp vp9_quantize_fp_c
+
int vp9_refining_search_sad_c(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
#define vp9_refining_search_sad vp9_refining_search_sad_c
diff --git a/source/config/linux/mipsel/vpx_config.h b/source/config/linux/mipsel/vpx_config.h
index 4123554..f2012f7 100644
--- a/source/config/linux/mipsel/vpx_config.h
+++ b/source/config/linux/mipsel/vpx_config.h
@@ -93,4 +93,5 @@
#define CONFIG_MULTIPLE_ARF 0
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_DENOISING 0
+#define CONFIG_FP_MB_STATS 0
#endif /* VPX_CONFIG_H */
diff --git a/source/config/linux/x64/vp8_rtcd.h b/source/config/linux/x64/vp8_rtcd.h
index e826db2..7caa03a 100644
--- a/source/config/linux/x64/vp8_rtcd.h
+++ b/source/config/linux/x64/vp8_rtcd.h
@@ -100,6 +100,10 @@
int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
#define vp8_denoiser_filter vp8_denoiser_filter_sse2
+int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_uv_sse2(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+#define vp8_denoiser_filter_uv vp8_denoiser_filter_uv_sse2
+
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, int stride);
#define vp8_dequant_idct_add vp8_dequant_idct_add_mmx
diff --git a/source/config/linux/x64/vp9_rtcd.h b/source/config/linux/x64/vp9_rtcd.h
index 66ff424..427ad25 100644
--- a/source/config/linux/x64/vp9_rtcd.h
+++ b/source/config/linux/x64/vp9_rtcd.h
@@ -445,6 +445,10 @@
void vp9_quantize_b_32x32_ssse3(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
RTCD_EXTERN void (*vp9_quantize_b_32x32)(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+void vp9_quantize_fp_ssse3(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+RTCD_EXTERN void (*vp9_quantize_fp)(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+
int vp9_refining_search_sad_c(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_refining_search_sadx4(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_refining_search_sad)(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
@@ -971,6 +975,8 @@
if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3;
vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c;
if (flags & HAS_SSSE3) vp9_quantize_b_32x32 = vp9_quantize_b_32x32_ssse3;
+ vp9_quantize_fp = vp9_quantize_fp_c;
+ if (flags & HAS_SSSE3) vp9_quantize_fp = vp9_quantize_fp_ssse3;
vp9_refining_search_sad = vp9_refining_search_sad_c;
if (flags & HAS_SSE3) vp9_refining_search_sad = vp9_refining_search_sadx4;
vp9_sad16x16x3 = vp9_sad16x16x3_c;
diff --git a/source/config/linux/x64/vpx_config.asm b/source/config/linux/x64/vpx_config.asm
index afa3a78..19e06fe 100644
--- a/source/config/linux/x64/vpx_config.asm
+++ b/source/config/linux/x64/vpx_config.asm
@@ -81,3 +81,4 @@
CONFIG_MULTIPLE_ARF equ 0
CONFIG_SPATIAL_SVC equ 0
CONFIG_DENOISING equ 0
+CONFIG_FP_MB_STATS equ 0
diff --git a/source/config/linux/x64/vpx_config.h b/source/config/linux/x64/vpx_config.h
index 4d489aa..e121381 100644
--- a/source/config/linux/x64/vpx_config.h
+++ b/source/config/linux/x64/vpx_config.h
@@ -93,4 +93,5 @@
#define CONFIG_MULTIPLE_ARF 0
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_DENOISING 0
+#define CONFIG_FP_MB_STATS 0
#endif /* VPX_CONFIG_H */
diff --git a/source/config/mac/ia32/vp8_rtcd.h b/source/config/mac/ia32/vp8_rtcd.h
index 21fe8a1..4dc2d75 100644
--- a/source/config/mac/ia32/vp8_rtcd.h
+++ b/source/config/mac/ia32/vp8_rtcd.h
@@ -100,6 +100,10 @@
int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
RTCD_EXTERN int (*vp8_denoiser_filter)(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_uv_sse2(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+RTCD_EXTERN int (*vp8_denoiser_filter_uv)(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, int stride);
RTCD_EXTERN void (*vp8_dequant_idct_add)(short *input, short *dq, unsigned char *output, int stride);
@@ -526,6 +530,8 @@
if (flags & HAS_MMX) vp8_dc_only_idct_add = vp8_dc_only_idct_add_mmx;
vp8_denoiser_filter = vp8_denoiser_filter_c;
if (flags & HAS_SSE2) vp8_denoiser_filter = vp8_denoiser_filter_sse2;
+ vp8_denoiser_filter_uv = vp8_denoiser_filter_uv_c;
+ if (flags & HAS_SSE2) vp8_denoiser_filter_uv = vp8_denoiser_filter_uv_sse2;
vp8_dequant_idct_add = vp8_dequant_idct_add_c;
if (flags & HAS_MMX) vp8_dequant_idct_add = vp8_dequant_idct_add_mmx;
vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
diff --git a/source/config/mac/ia32/vp9_rtcd.h b/source/config/mac/ia32/vp9_rtcd.h
index 288130a..d4073a0 100644
--- a/source/config/mac/ia32/vp9_rtcd.h
+++ b/source/config/mac/ia32/vp9_rtcd.h
@@ -410,6 +410,9 @@
void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
#define vp9_quantize_b_32x32 vp9_quantize_b_32x32_c
+void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+#define vp9_quantize_fp vp9_quantize_fp_c
+
int vp9_refining_search_sad_c(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_refining_search_sadx4(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_refining_search_sad)(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
diff --git a/source/config/mac/ia32/vpx_config.asm b/source/config/mac/ia32/vpx_config.asm
index ec16779..e876fcd 100644
--- a/source/config/mac/ia32/vpx_config.asm
+++ b/source/config/mac/ia32/vpx_config.asm
@@ -81,3 +81,4 @@
CONFIG_MULTIPLE_ARF equ 0
CONFIG_SPATIAL_SVC equ 0
CONFIG_DENOISING equ 0
+CONFIG_FP_MB_STATS equ 0
diff --git a/source/config/mac/ia32/vpx_config.h b/source/config/mac/ia32/vpx_config.h
index 1965acb..4f8de02 100644
--- a/source/config/mac/ia32/vpx_config.h
+++ b/source/config/mac/ia32/vpx_config.h
@@ -93,4 +93,5 @@
#define CONFIG_MULTIPLE_ARF 0
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_DENOISING 0
+#define CONFIG_FP_MB_STATS 0
#endif /* VPX_CONFIG_H */
diff --git a/source/config/mac/x64/vp8_rtcd.h b/source/config/mac/x64/vp8_rtcd.h
index e826db2..7caa03a 100644
--- a/source/config/mac/x64/vp8_rtcd.h
+++ b/source/config/mac/x64/vp8_rtcd.h
@@ -100,6 +100,10 @@
int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
#define vp8_denoiser_filter vp8_denoiser_filter_sse2
+int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_uv_sse2(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+#define vp8_denoiser_filter_uv vp8_denoiser_filter_uv_sse2
+
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, int stride);
#define vp8_dequant_idct_add vp8_dequant_idct_add_mmx
diff --git a/source/config/mac/x64/vp9_rtcd.h b/source/config/mac/x64/vp9_rtcd.h
index 66ff424..427ad25 100644
--- a/source/config/mac/x64/vp9_rtcd.h
+++ b/source/config/mac/x64/vp9_rtcd.h
@@ -445,6 +445,10 @@
void vp9_quantize_b_32x32_ssse3(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
RTCD_EXTERN void (*vp9_quantize_b_32x32)(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+void vp9_quantize_fp_ssse3(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+RTCD_EXTERN void (*vp9_quantize_fp)(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+
int vp9_refining_search_sad_c(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_refining_search_sadx4(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_refining_search_sad)(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
@@ -971,6 +975,8 @@
if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3;
vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c;
if (flags & HAS_SSSE3) vp9_quantize_b_32x32 = vp9_quantize_b_32x32_ssse3;
+ vp9_quantize_fp = vp9_quantize_fp_c;
+ if (flags & HAS_SSSE3) vp9_quantize_fp = vp9_quantize_fp_ssse3;
vp9_refining_search_sad = vp9_refining_search_sad_c;
if (flags & HAS_SSE3) vp9_refining_search_sad = vp9_refining_search_sadx4;
vp9_sad16x16x3 = vp9_sad16x16x3_c;
diff --git a/source/config/mac/x64/vpx_config.asm b/source/config/mac/x64/vpx_config.asm
index afa3a78..19e06fe 100644
--- a/source/config/mac/x64/vpx_config.asm
+++ b/source/config/mac/x64/vpx_config.asm
@@ -81,3 +81,4 @@
CONFIG_MULTIPLE_ARF equ 0
CONFIG_SPATIAL_SVC equ 0
CONFIG_DENOISING equ 0
+CONFIG_FP_MB_STATS equ 0
diff --git a/source/config/mac/x64/vpx_config.h b/source/config/mac/x64/vpx_config.h
index 4d489aa..e121381 100644
--- a/source/config/mac/x64/vpx_config.h
+++ b/source/config/mac/x64/vpx_config.h
@@ -93,4 +93,5 @@
#define CONFIG_MULTIPLE_ARF 0
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_DENOISING 0
+#define CONFIG_FP_MB_STATS 0
#endif /* VPX_CONFIG_H */
diff --git a/source/config/nacl/vp8_rtcd.h b/source/config/nacl/vp8_rtcd.h
index ef9fa5f..298886d 100644
--- a/source/config/nacl/vp8_rtcd.h
+++ b/source/config/nacl/vp8_rtcd.h
@@ -74,6 +74,9 @@
int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
#define vp8_denoiser_filter vp8_denoiser_filter_c
+int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+#define vp8_denoiser_filter_uv vp8_denoiser_filter_uv_c
+
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
#define vp8_dequant_idct_add vp8_dequant_idct_add_c
diff --git a/source/config/nacl/vp9_rtcd.h b/source/config/nacl/vp9_rtcd.h
index 2e245dc..aae324b 100644
--- a/source/config/nacl/vp9_rtcd.h
+++ b/source/config/nacl/vp9_rtcd.h
@@ -352,6 +352,9 @@
void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
#define vp9_quantize_b_32x32 vp9_quantize_b_32x32_c
+void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+#define vp9_quantize_fp vp9_quantize_fp_c
+
int vp9_refining_search_sad_c(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
#define vp9_refining_search_sad vp9_refining_search_sad_c
diff --git a/source/config/nacl/vpx_config.asm b/source/config/nacl/vpx_config.asm
index c753a6e..0132f55 100644
--- a/source/config/nacl/vpx_config.asm
+++ b/source/config/nacl/vpx_config.asm
@@ -84,4 +84,5 @@
.equ CONFIG_MULTIPLE_ARF , 0
.equ CONFIG_SPATIAL_SVC , 0
.equ CONFIG_DENOISING , 0
+.equ CONFIG_FP_MB_STATS , 0
.section .note.GNU-stack,"",%progbits
diff --git a/source/config/nacl/vpx_config.h b/source/config/nacl/vpx_config.h
index 78f354e..625ed12 100644
--- a/source/config/nacl/vpx_config.h
+++ b/source/config/nacl/vpx_config.h
@@ -93,4 +93,5 @@
#define CONFIG_MULTIPLE_ARF 0
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_DENOISING 0
+#define CONFIG_FP_MB_STATS 0
#endif /* VPX_CONFIG_H */
diff --git a/source/config/win/ia32/vp8_rtcd.h b/source/config/win/ia32/vp8_rtcd.h
index 21fe8a1..4dc2d75 100644
--- a/source/config/win/ia32/vp8_rtcd.h
+++ b/source/config/win/ia32/vp8_rtcd.h
@@ -100,6 +100,10 @@
int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
RTCD_EXTERN int (*vp8_denoiser_filter)(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_uv_sse2(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+RTCD_EXTERN int (*vp8_denoiser_filter_uv)(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, int stride);
RTCD_EXTERN void (*vp8_dequant_idct_add)(short *input, short *dq, unsigned char *output, int stride);
@@ -526,6 +530,8 @@
if (flags & HAS_MMX) vp8_dc_only_idct_add = vp8_dc_only_idct_add_mmx;
vp8_denoiser_filter = vp8_denoiser_filter_c;
if (flags & HAS_SSE2) vp8_denoiser_filter = vp8_denoiser_filter_sse2;
+ vp8_denoiser_filter_uv = vp8_denoiser_filter_uv_c;
+ if (flags & HAS_SSE2) vp8_denoiser_filter_uv = vp8_denoiser_filter_uv_sse2;
vp8_dequant_idct_add = vp8_dequant_idct_add_c;
if (flags & HAS_MMX) vp8_dequant_idct_add = vp8_dequant_idct_add_mmx;
vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
diff --git a/source/config/win/ia32/vp9_rtcd.h b/source/config/win/ia32/vp9_rtcd.h
index a7aab82..61372c7 100644
--- a/source/config/win/ia32/vp9_rtcd.h
+++ b/source/config/win/ia32/vp9_rtcd.h
@@ -440,6 +440,9 @@
void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
#define vp9_quantize_b_32x32 vp9_quantize_b_32x32_c
+void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+#define vp9_quantize_fp vp9_quantize_fp_c
+
int vp9_refining_search_sad_c(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_refining_search_sadx4(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_refining_search_sad)(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
diff --git a/source/config/win/ia32/vpx_config.asm b/source/config/win/ia32/vpx_config.asm
index c5ae3b6..66e2406 100644
--- a/source/config/win/ia32/vpx_config.asm
+++ b/source/config/win/ia32/vpx_config.asm
@@ -81,3 +81,4 @@
CONFIG_MULTIPLE_ARF equ 0
CONFIG_SPATIAL_SVC equ 0
CONFIG_DENOISING equ 0
+CONFIG_FP_MB_STATS equ 0
diff --git a/source/config/win/ia32/vpx_config.h b/source/config/win/ia32/vpx_config.h
index 94986ea..6592c0d 100644
--- a/source/config/win/ia32/vpx_config.h
+++ b/source/config/win/ia32/vpx_config.h
@@ -93,4 +93,5 @@
#define CONFIG_MULTIPLE_ARF 0
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_DENOISING 0
+#define CONFIG_FP_MB_STATS 0
#endif /* VPX_CONFIG_H */
diff --git a/source/config/win/x64/vp8_rtcd.h b/source/config/win/x64/vp8_rtcd.h
index e826db2..7caa03a 100644
--- a/source/config/win/x64/vp8_rtcd.h
+++ b/source/config/win/x64/vp8_rtcd.h
@@ -100,6 +100,10 @@
int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
#define vp8_denoiser_filter vp8_denoiser_filter_sse2
+int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+int vp8_denoiser_filter_uv_sse2(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising);
+#define vp8_denoiser_filter_uv vp8_denoiser_filter_uv_sse2
+
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, int stride);
#define vp8_dequant_idct_add vp8_dequant_idct_add_mmx
diff --git a/source/config/win/x64/vp9_rtcd.h b/source/config/win/x64/vp9_rtcd.h
index 66ff424..427ad25 100644
--- a/source/config/win/x64/vp9_rtcd.h
+++ b/source/config/win/x64/vp9_rtcd.h
@@ -445,6 +445,10 @@
void vp9_quantize_b_32x32_ssse3(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
RTCD_EXTERN void (*vp9_quantize_b_32x32)(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+void vp9_quantize_fp_ssse3(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+RTCD_EXTERN void (*vp9_quantize_fp)(const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
+
int vp9_refining_search_sad_c(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_refining_search_sadx4(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_refining_search_sad)(const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
@@ -971,6 +975,8 @@
if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3;
vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c;
if (flags & HAS_SSSE3) vp9_quantize_b_32x32 = vp9_quantize_b_32x32_ssse3;
+ vp9_quantize_fp = vp9_quantize_fp_c;
+ if (flags & HAS_SSSE3) vp9_quantize_fp = vp9_quantize_fp_ssse3;
vp9_refining_search_sad = vp9_refining_search_sad_c;
if (flags & HAS_SSE3) vp9_refining_search_sad = vp9_refining_search_sadx4;
vp9_sad16x16x3 = vp9_sad16x16x3_c;
diff --git a/source/config/win/x64/vpx_config.asm b/source/config/win/x64/vpx_config.asm
index 022fedf..892dff9 100644
--- a/source/config/win/x64/vpx_config.asm
+++ b/source/config/win/x64/vpx_config.asm
@@ -81,3 +81,4 @@
CONFIG_MULTIPLE_ARF equ 0
CONFIG_SPATIAL_SVC equ 0
CONFIG_DENOISING equ 0
+CONFIG_FP_MB_STATS equ 0
diff --git a/source/config/win/x64/vpx_config.h b/source/config/win/x64/vpx_config.h
index 68c4925..b095df7 100644
--- a/source/config/win/x64/vpx_config.h
+++ b/source/config/win/x64/vpx_config.h
@@ -93,4 +93,5 @@
#define CONFIG_MULTIPLE_ARF 0
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_DENOISING 0
+#define CONFIG_FP_MB_STATS 0
#endif /* VPX_CONFIG_H */
diff --git a/source/libvpx/README b/source/libvpx/README
index f209105..f9c24ff 100644
--- a/source/libvpx/README
+++ b/source/libvpx/README
@@ -55,6 +55,7 @@
armv6-linux-rvct
armv6-linux-gcc
armv6-none-rvct
+ arm64-darwin-gcc
armv7-android-gcc
armv7-darwin-gcc
armv7-linux-rvct
diff --git a/source/libvpx/build/make/configure.sh b/source/libvpx/build/make/configure.sh
index d4124c7..6881626 100755
--- a/source/libvpx/build/make/configure.sh
+++ b/source/libvpx/build/make/configure.sh
@@ -799,7 +799,7 @@
arm*)
# on arm, isa versions are supersets
case ${tgt_isa} in
- armv8)
+ arm64|armv8)
soft_enable neon
;;
armv7|armv7s)
@@ -1048,14 +1048,6 @@
esac
;;
x86*)
- bits=32
- enabled x86_64 && bits=64
- check_cpp <<EOF && bits=x32
-#ifndef __ILP32__
-#error "not x32"
-#endif
-EOF
-
case ${tgt_os} in
win*)
enabled gcc && add_cflags -fno-common
@@ -1094,8 +1086,6 @@
esac
;;
gcc*)
- add_cflags -m${bits}
- add_ldflags -m${bits}
link_with_cc=gcc
tune_cflags="-march="
setup_gnu_toolchain
@@ -1120,6 +1110,20 @@
;;
esac
+ bits=32
+ enabled x86_64 && bits=64
+ check_cpp <<EOF && bits=x32
+#ifndef __ILP32__
+#error "not x32"
+#endif
+EOF
+ case ${tgt_cc} in
+ gcc*)
+ add_cflags -m${bits}
+ add_ldflags -m${bits}
+ ;;
+ esac
+
soft_enable runtime_cpu_detect
# We can't use 'check_cflags' until the compiler is configured and CC is
# populated.
@@ -1222,10 +1226,12 @@
fi
fi
- # default use_x86inc to yes if pic is no or 64bit or we are not on darwin
- if [ ${tgt_isa} = x86_64 -o ! "$pic" = "yes" -o \
- "${tgt_os#darwin}" = "${tgt_os}" ]; then
- soft_enable use_x86inc
+ tgt_os_no_version=$(echo "${tgt_os}" | tr -d "[0-9]")
+ # Default use_x86inc to yes when we are 64 bit, non-pic, or on any
+ # non-Darwin target.
+ if [ "${tgt_isa}" = "x86_64" ] || [ "${pic}" != "yes" ] || \
+ [ "${tgt_os_no_version}" != "darwin" ]; then
+ soft_enable use_x86inc
fi
# Position Independent Code (PIC) support, for building relocatable
diff --git a/source/libvpx/build/make/iosbuild.sh b/source/libvpx/build/make/iosbuild.sh
index 9d9c374..35ae891 100755
--- a/source/libvpx/build/make/iosbuild.sh
+++ b/source/libvpx/build/make/iosbuild.sh
@@ -25,7 +25,8 @@
LIBVPX_SOURCE_DIR=$(dirname "$0" | sed -e s,/build/make,,)
LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo)
ORIG_PWD="$(pwd)"
-TARGETS="armv6-darwin-gcc
+TARGETS="arm64-darwin-gcc
+ armv6-darwin-gcc
armv7-darwin-gcc
armv7s-darwin-gcc
x86-iphonesimulator-gcc
@@ -54,6 +55,9 @@
target_to_preproc_symbol() {
target="$1"
case "${target}" in
+ arm64-*)
+ echo "__aarch64__"
+ ;;
armv6-*)
echo "__ARM_ARCH_6__"
;;
diff --git a/source/libvpx/configure b/source/libvpx/configure
index 9a7de73..b98480e 100755
--- a/source/libvpx/configure
+++ b/source/libvpx/configure
@@ -96,6 +96,7 @@
all_platforms="${all_platforms} armv6-linux-rvct"
all_platforms="${all_platforms} armv6-linux-gcc"
all_platforms="${all_platforms} armv6-none-rvct"
+all_platforms="${all_platforms} arm64-darwin-gcc"
all_platforms="${all_platforms} armv7-android-gcc" #neon Cortex-A8
all_platforms="${all_platforms} armv7-darwin-gcc" #neon Cortex-A8
all_platforms="${all_platforms} armv7-linux-rvct" #neon Cortex-A8
@@ -273,6 +274,7 @@
multiple_arf
spatial_svc
denoising
+ fp_mb_stats
"
CONFIG_LIST="
external_build
diff --git a/source/libvpx/examples.mk b/source/libvpx/examples.mk
index ce833fc..a47db04 100644
--- a/source/libvpx/examples.mk
+++ b/source/libvpx/examples.mk
@@ -179,7 +179,8 @@
ifeq ($(CONFIG_MULTI_RES_ENCODING),yes)
ifeq ($(CONFIG_LIBYUV),yes)
-EXAMPLES-$(CONFIG_VP8_DECODER) += vp8_multi_resolution_encoder.c
+EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8_multi_resolution_encoder.c
+vp8_multi_resolution_encoder.SRCS += tools_common.h tools_common.c
vp8_multi_resolution_encoder.SRCS += $(LIBYUV_SRCS)
vp8_multi_resolution_encoder.GUID = 04f8738e-63c8-423b-90fa-7c2703a374de
vp8_multi_resolution_encoder.DESCRIPTION = VP8 Multiple-resolution Encoding
diff --git a/source/libvpx/examples/vp8_multi_resolution_encoder.c b/source/libvpx/examples/vp8_multi_resolution_encoder.c
index 1fef7db..d41e442 100644
--- a/source/libvpx/examples/vp8_multi_resolution_encoder.c
+++ b/source/libvpx/examples/vp8_multi_resolution_encoder.c
@@ -27,8 +27,9 @@
#define interface (vpx_codec_vp8_cx())
#define fourcc 0x30385056
-#define IVF_FILE_HDR_SZ (32)
-#define IVF_FRAME_HDR_SZ (12)
+void usage_exit() {
+ exit(EXIT_FAILURE);
+}
/*
* The input video frame is downsampled several times to generate a multi-level
@@ -45,25 +46,6 @@
#include "third_party/libyuv/include/libyuv/scale.h"
#include "third_party/libyuv/include/libyuv/cpu_id.h"
-static void die(const char *fmt, ...) {
- va_list ap;
-
- va_start(ap, fmt);
- vprintf(fmt, ap);
- if(fmt[strlen(fmt)-1] != '\n')
- printf("\n");
- exit(EXIT_FAILURE);
-}
-
-static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
- const char *detail = vpx_codec_error_detail(ctx);
-
- printf("%s: %s\n", s, vpx_codec_error(ctx));
- if(detail)
- printf(" %s\n",detail);
- exit(EXIT_FAILURE);
-}
-
int (*read_frame_p)(FILE *f, vpx_image_t *img);
static int read_frame(FILE *f, vpx_image_t *img) {
diff --git a/source/libvpx/examples/vp9_spatial_svc_encoder.c b/source/libvpx/examples/vp9_spatial_svc_encoder.c
index 5bc6575..223f37e 100644
--- a/source/libvpx/examples/vp9_spatial_svc_encoder.c
+++ b/source/libvpx/examples/vp9_spatial_svc_encoder.c
@@ -28,16 +28,6 @@
#include "vpx/vpx_encoder.h"
#include "./vpxstats.h"
-static const struct arg_enum_list encoding_mode_enum[] = {
- {"i", INTER_LAYER_PREDICTION_I},
- {"alt-ip", ALT_INTER_LAYER_PREDICTION_IP},
- {"ip", INTER_LAYER_PREDICTION_IP},
- {"gf", USE_GOLDEN_FRAME},
- {NULL, 0}
-};
-
-static const arg_def_t encoding_mode_arg = ARG_DEF_ENUM(
- "m", "encoding-mode", 1, "Encoding mode algorithm", encoding_mode_enum);
static const arg_def_t skip_frames_arg =
ARG_DEF("s", "skip-frames", 1, "input frames to skip");
static const arg_def_t frames_arg =
@@ -58,9 +48,6 @@
ARG_DEF("q", "quantizers", 1, "quantizers for non key frames, also will "
"be applied to key frames if -qn is not specified (lowest to "
"highest layer)");
-static const arg_def_t quantizers_keyframe_arg =
- ARG_DEF("qn", "quantizers-keyframe", 1, "quantizers for key frames (lowest "
- "to highest layer)");
static const arg_def_t passes_arg =
ARG_DEF("p", "passes", 1, "Number of passes (1/2)");
static const arg_def_t pass_arg =
@@ -77,16 +64,13 @@
ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate");
static const arg_def_t *svc_args[] = {
- &encoding_mode_arg, &frames_arg, &width_arg, &height_arg,
+ &frames_arg, &width_arg, &height_arg,
&timebase_arg, &bitrate_arg, &skip_frames_arg, &layers_arg,
- &kf_dist_arg, &scale_factors_arg, &quantizers_arg,
- &quantizers_keyframe_arg, &passes_arg, &pass_arg,
- &fpf_name_arg, &min_q_arg, &max_q_arg, &min_bitrate_arg,
- &max_bitrate_arg, NULL
+ &kf_dist_arg, &scale_factors_arg, &quantizers_arg, &passes_arg,
+ &pass_arg, &fpf_name_arg, &min_q_arg, &max_q_arg,
+ &min_bitrate_arg, &max_bitrate_arg, NULL
};
-static const SVC_ENCODING_MODE default_encoding_mode =
- INTER_LAYER_PREDICTION_IP;
static const uint32_t default_frames_to_skip = 0;
static const uint32_t default_frames_to_code = 60 * 60;
static const uint32_t default_width = 1920;
@@ -135,7 +119,6 @@
// initialize SvcContext with parameters that will be passed to vpx_svc_init
svc_ctx->log_level = SVC_LOG_DEBUG;
svc_ctx->spatial_layers = default_spatial_layers;
- svc_ctx->encoding_mode = default_encoding_mode;
// start with default encoder configuration
res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
@@ -161,9 +144,7 @@
for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
arg.argv_step = 1;
- if (arg_match(&arg, &encoding_mode_arg, argi)) {
- svc_ctx->encoding_mode = arg_parse_enum_or_int(&arg);
- } else if (arg_match(&arg, &frames_arg, argi)) {
+ if (arg_match(&arg, &frames_arg, argi)) {
app_input->frames_to_code = arg_parse_uint(&arg);
} else if (arg_match(&arg, &width_arg, argi)) {
enc_cfg->g_w = arg_parse_uint(&arg);
@@ -183,9 +164,7 @@
} else if (arg_match(&arg, &scale_factors_arg, argi)) {
vpx_svc_set_scale_factors(svc_ctx, arg.val);
} else if (arg_match(&arg, &quantizers_arg, argi)) {
- vpx_svc_set_quantizers(svc_ctx, arg.val, 0);
- } else if (arg_match(&arg, &quantizers_keyframe_arg, argi)) {
- vpx_svc_set_quantizers(svc_ctx, arg.val, 1);
+ vpx_svc_set_quantizers(svc_ctx, arg.val);
} else if (arg_match(&arg, &passes_arg, argi)) {
passes = arg_parse_uint(&arg);
if (passes < 1 || passes > 2) {
@@ -270,12 +249,12 @@
printf(
"Codec %s\nframes: %d, skip: %d\n"
- "mode: %d, layers: %d\n"
+ "layers: %d\n"
"width %d, height: %d,\n"
"num: %d, den: %d, bitrate: %d,\n"
"gop size: %d\n",
vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code,
- app_input->frames_to_skip, svc_ctx->encoding_mode,
+ app_input->frames_to_skip,
svc_ctx->spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
@@ -296,6 +275,7 @@
int frame_duration = 1; /* 1 timebase tick per frame */
FILE *infile = NULL;
int end_of_stream = 0;
+ int frame_size;
memset(&svc_ctx, 0, sizeof(svc_ctx));
svc_ctx.log_print = 1;
@@ -351,11 +331,10 @@
die_codec(&codec, "Failed to encode frame");
}
if (!(app_input.passes == 2 && app_input.pass == 1)) {
- if (vpx_svc_get_frame_size(&svc_ctx) > 0) {
+ while ((frame_size = vpx_svc_get_frame_size(&svc_ctx)) > 0) {
vpx_video_writer_write_frame(writer,
vpx_svc_get_buffer(&svc_ctx),
- vpx_svc_get_frame_size(&svc_ctx),
- pts);
+ frame_size, pts);
}
}
if (vpx_svc_get_rc_stats_buffer_size(&svc_ctx) > 0) {
diff --git a/source/libvpx/libs.mk b/source/libvpx/libs.mk
index 2b072b6..1e01639 100644
--- a/source/libvpx/libs.mk
+++ b/source/libvpx/libs.mk
@@ -170,7 +170,7 @@
CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c
INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c
ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
-CODEC_SRCS-$(BUILD_LIBVPX) += third_party/x86inc/x86inc.asm
+INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm
endif
CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
diff --git a/source/libvpx/test/cpu_speed_test.cc b/source/libvpx/test/cpu_speed_test.cc
index be651b4..961a0b8 100644
--- a/source/libvpx/test/cpu_speed_test.cc
+++ b/source/libvpx/test/cpu_speed_test.cc
@@ -14,30 +14,49 @@
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
#include "test/util.h"
+#include "test/y4m_video_source.h"
namespace {
+const int kMaxPSNR = 100;
+
class CpuSpeedTest : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWith2Params<
libvpx_test::TestMode, int> {
protected:
- CpuSpeedTest() : EncoderTest(GET_PARAM(0)) {}
+ CpuSpeedTest()
+ : EncoderTest(GET_PARAM(0)),
+ encoding_mode_(GET_PARAM(1)),
+ set_cpu_used_(GET_PARAM(2)),
+ min_psnr_(kMaxPSNR) {}
virtual ~CpuSpeedTest() {}
virtual void SetUp() {
InitializeConfig();
- SetMode(GET_PARAM(1));
- set_cpu_used_ = GET_PARAM(2);
+ SetMode(encoding_mode_);
+ if (encoding_mode_ != ::libvpx_test::kRealTime) {
+ cfg_.g_lag_in_frames = 25;
+ cfg_.rc_end_usage = VPX_VBR;
+ } else {
+ cfg_.g_lag_in_frames = 0;
+ cfg_.rc_end_usage = VPX_CBR;
+ }
+ }
+
+ virtual void BeginPassHook(unsigned int /*pass*/) {
+ min_psnr_ = kMaxPSNR;
}
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) {
if (video->frame() == 1) {
encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
- encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
- encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
- encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
- encoder->Control(VP8E_SET_ARNR_TYPE, 3);
+ if (encoding_mode_ != ::libvpx_test::kRealTime) {
+ encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
+ encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
+ encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
+ encoder->Control(VP8E_SET_ARNR_TYPE, 3);
+ }
}
}
@@ -45,7 +64,15 @@
if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
}
}
+
+ virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
+ if (pkt->data.psnr.psnr[0] < min_psnr_)
+ min_psnr_ = pkt->data.psnr.psnr[0];
+ }
+
+ ::libvpx_test::TestMode encoding_mode_;
int set_cpu_used_;
+ double min_psnr_;
};
TEST_P(CpuSpeedTest, TestQ0) {
@@ -53,7 +80,6 @@
// without a mismatch when passing in a very low max q. This pushes
// the encoder to producing lots of big partitions which will likely
// extend into the border and test the border condition.
- cfg_.g_lag_in_frames = 25;
cfg_.rc_2pass_vbr_minsection_pct = 5;
cfg_.rc_2pass_vbr_minsection_pct = 2000;
cfg_.rc_target_bitrate = 400;
@@ -63,16 +89,32 @@
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
20);
+ init_flags_ = VPX_CODEC_USE_PSNR;
+
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ EXPECT_GE(min_psnr_, kMaxPSNR);
}
+TEST_P(CpuSpeedTest, TestScreencastQ0) {
+ ::libvpx_test::Y4mVideoSource video("screendata.y4m", 0, 25);
+ cfg_.g_timebase = video.timebase();
+ cfg_.rc_2pass_vbr_minsection_pct = 5;
+ cfg_.rc_2pass_vbr_minsection_pct = 2000;
+ cfg_.rc_target_bitrate = 400;
+ cfg_.rc_max_quantizer = 0;
+ cfg_.rc_min_quantizer = 0;
+
+ init_flags_ = VPX_CODEC_USE_PSNR;
+
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ EXPECT_GE(min_psnr_, kMaxPSNR);
+}
TEST_P(CpuSpeedTest, TestEncodeHighBitrate) {
// Validate that this non multiple of 64 wide clip encodes and decodes
// without a mismatch when passing in a very low max q. This pushes
// the encoder to producing lots of big partitions which will likely
// extend into the border and test the border condition.
- cfg_.g_lag_in_frames = 25;
cfg_.rc_2pass_vbr_minsection_pct = 5;
cfg_.rc_2pass_vbr_minsection_pct = 2000;
cfg_.rc_target_bitrate = 12000;
@@ -89,7 +131,6 @@
// when passing in a very high min q. This pushes the encoder to producing
// lots of small partitions which might will test the other condition.
- cfg_.g_lag_in_frames = 25;
cfg_.rc_2pass_vbr_minsection_pct = 5;
cfg_.rc_2pass_vbr_minsection_pct = 2000;
cfg_.rc_target_bitrate = 200;
@@ -108,6 +149,7 @@
VP9_INSTANTIATE_TEST_CASE(
CpuSpeedTest,
- ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood),
+ ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
+ ::libvpx_test::kRealTime),
::testing::Range(0, 8));
} // namespace
diff --git a/source/libvpx/test/datarate_test.cc b/source/libvpx/test/datarate_test.cc
index 80be05e..8dcf26c 100644
--- a/source/libvpx/test/datarate_test.cc
+++ b/source/libvpx/test/datarate_test.cc
@@ -576,7 +576,7 @@
// Expect some frame drops in this test: for this 200 frames test,
// expect at least 10% and not more than 60% drops.
ASSERT_GE(num_drops_, 20);
- ASSERT_LE(num_drops_, 120);
+ ASSERT_LE(num_drops_, 130);
}
}
diff --git a/source/libvpx/test/decode_test_driver.cc b/source/libvpx/test/decode_test_driver.cc
index 2defacc..8bea4cc 100644
--- a/source/libvpx/test/decode_test_driver.cc
+++ b/source/libvpx/test/decode_test_driver.cc
@@ -15,13 +15,27 @@
namespace libvpx_test {
+const char kVP8Name[] = "WebM Project VP8";
+
+vpx_codec_err_t Decoder::PeekStream(const uint8_t *cxdata, size_t size,
+ vpx_codec_stream_info_t *stream_info) {
+ return vpx_codec_peek_stream_info(CodecInterface(),
+ cxdata, static_cast<unsigned int>(size),
+ stream_info);
+}
+
vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size) {
+ return DecodeFrame(cxdata, size, NULL);
+}
+
+vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size,
+ void *user_priv) {
vpx_codec_err_t res_dec;
InitOnce();
REGISTER_STATE_CHECK(
res_dec = vpx_codec_decode(&decoder_,
cxdata, static_cast<unsigned int>(size),
- NULL, 0));
+ user_priv, 0));
return res_dec;
}
@@ -29,10 +43,33 @@
vpx_codec_dec_cfg_t dec_cfg = {0};
Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
ASSERT_TRUE(decoder != NULL);
+ const char *codec_name = decoder->GetDecoderName();
+ const bool is_vp8 = strncmp(kVP8Name, codec_name, sizeof(kVP8Name) - 1) == 0;
// Decode frames.
- for (video->Begin(); video->cxdata(); video->Next()) {
+ for (video->Begin(); !::testing::Test::HasFailure() && video->cxdata();
+ video->Next()) {
PreDecodeFrameHook(*video, decoder);
+
+ vpx_codec_stream_info_t stream_info;
+ stream_info.sz = sizeof(stream_info);
+ const vpx_codec_err_t res_peek = decoder->PeekStream(video->cxdata(),
+ video->frame_size(),
+ &stream_info);
+ if (is_vp8) {
+ /* Vp8's implementation of PeekStream returns an error if the frame you
+ * pass it is not a keyframe, so we only expect VPX_CODEC_OK on the first
+ * frame, which must be a keyframe. */
+ if (video->frame_number() == 0)
+ ASSERT_EQ(VPX_CODEC_OK, res_peek) << "Peek return failed: "
+ << vpx_codec_err_to_string(res_peek);
+ } else {
+ /* The Vp9 implementation of PeekStream returns an error only if the
+ * data passed to it isn't a valid Vp9 chunk. */
+ ASSERT_EQ(VPX_CODEC_OK, res_peek) << "Peek return failed: "
+ << vpx_codec_err_to_string(res_peek);
+ }
+
vpx_codec_err_t res_dec = decoder->DecodeFrame(video->cxdata(),
video->frame_size());
if (!HandleDecodeResult(res_dec, *video, decoder))
diff --git a/source/libvpx/test/decode_test_driver.h b/source/libvpx/test/decode_test_driver.h
index 4f7294c..dd3593e 100644
--- a/source/libvpx/test/decode_test_driver.h
+++ b/source/libvpx/test/decode_test_driver.h
@@ -49,8 +49,14 @@
vpx_codec_destroy(&decoder_);
}
+ vpx_codec_err_t PeekStream(const uint8_t *cxdata, size_t size,
+ vpx_codec_stream_info_t *stream_info);
+
vpx_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size);
+ vpx_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size,
+ void *user_priv);
+
DxDataIterator GetDxData() {
return DxDataIterator(&decoder_);
}
@@ -85,6 +91,10 @@
&decoder_, cb_get, cb_release, user_priv);
}
+ const char* GetDecoderName() {
+ return vpx_codec_iface_name(CodecInterface());
+ }
+
protected:
virtual vpx_codec_iface_t* CodecInterface() const = 0;
diff --git a/source/libvpx/test/invalid_file_test.cc b/source/libvpx/test/invalid_file_test.cc
index e7f2a48..8c83034 100644
--- a/source/libvpx/test/invalid_file_test.cc
+++ b/source/libvpx/test/invalid_file_test.cc
@@ -94,7 +94,12 @@
}
const char *const kVP9InvalidFileTests[] = {
- "invalid-vp90-01.webm"
+ "invalid-vp90-01.webm",
+ "invalid-vp90-02.webm",
+ "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf",
+ "invalid-vp90-03.webm",
+ "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf",
+ "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf",
};
#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0]))
diff --git a/source/libvpx/test/md5_helper.h b/source/libvpx/test/md5_helper.h
index dd446f4..dc95582 100644
--- a/source/libvpx/test/md5_helper.h
+++ b/source/libvpx/test/md5_helper.h
@@ -28,10 +28,11 @@
// plane, we never want to round down and thus skip a pixel so if
// we are shifting by 1 (chroma_shift) we add 1 before doing the shift.
// This works only for chroma_shift of 0 and 1.
+ const int bytes_per_sample = (img->fmt & VPX_IMG_FMT_HIGH) ? 2 : 1;
const int h = plane ? (img->d_h + img->y_chroma_shift) >>
img->y_chroma_shift : img->d_h;
- const int w = plane ? (img->d_w + img->x_chroma_shift) >>
- img->x_chroma_shift : img->d_w;
+ const int w = (plane ? (img->d_w + img->x_chroma_shift) >>
+ img->x_chroma_shift : img->d_w) * bytes_per_sample;
for (int y = 0; y < h; ++y) {
MD5Update(&md5_, buf, w);
diff --git a/source/libvpx/test/svc_test.cc b/source/libvpx/test/svc_test.cc
index db26a8e..417790b 100644
--- a/source/libvpx/test/svc_test.cc
+++ b/source/libvpx/test/svc_test.cc
@@ -41,7 +41,6 @@
virtual ~SvcTest() {}
virtual void SetUp() {
- svc_.encoding_mode = INTER_LAYER_PREDICTION_IP;
svc_.log_level = SVC_LOG_DEBUG;
svc_.log_print = 0;
@@ -131,22 +130,13 @@
EXPECT_EQ(3, svc_.spatial_layers);
}
-TEST_F(SvcTest, SetEncodingMode) {
- vpx_codec_err_t res = vpx_svc_set_options(&svc_, "encoding-mode=alt-ip");
- EXPECT_EQ(VPX_CODEC_OK, res);
- res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
- EXPECT_EQ(VPX_CODEC_OK, res);
- codec_initialized_ = true;
- EXPECT_EQ(ALT_INTER_LAYER_PREDICTION_IP, svc_.encoding_mode);
-}
-
TEST_F(SvcTest, SetMultipleOptions) {
- vpx_codec_err_t res = vpx_svc_set_options(&svc_, "layers=2 encoding-mode=ip");
+ vpx_codec_err_t res =
+ vpx_svc_set_options(&svc_, "layers=2 scale-factors=1/3,2/3");
res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
EXPECT_EQ(VPX_CODEC_OK, res);
codec_initialized_ = true;
EXPECT_EQ(2, svc_.spatial_layers);
- EXPECT_EQ(INTER_LAYER_PREDICTION_IP, svc_.encoding_mode);
}
TEST_F(SvcTest, SetScaleFactorsOption) {
@@ -177,48 +167,20 @@
codec_initialized_ = true;
}
-TEST_F(SvcTest, SetKeyFrameQuantizersOption) {
- svc_.spatial_layers = 2;
- vpx_codec_err_t res = vpx_svc_set_options(&svc_,
- "quantizers-keyframe=not-quantizers");
- EXPECT_EQ(VPX_CODEC_OK, res);
- res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
- EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
-
- vpx_svc_set_options(&svc_, "quantizers-keyframe=40,45");
- res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
- EXPECT_EQ(VPX_CODEC_OK, res);
- codec_initialized_ = true;
-}
-
TEST_F(SvcTest, SetQuantizers) {
- vpx_codec_err_t res = vpx_svc_set_quantizers(NULL, "40,30", 0);
+ vpx_codec_err_t res = vpx_svc_set_quantizers(NULL, "40,30");
EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
- res = vpx_svc_set_quantizers(&svc_, NULL, 0);
+ res = vpx_svc_set_quantizers(&svc_, NULL);
EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
svc_.spatial_layers = 2;
- res = vpx_svc_set_quantizers(&svc_, "40", 0);
+ res = vpx_svc_set_quantizers(&svc_, "40");
EXPECT_EQ(VPX_CODEC_OK, res);
res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
- res = vpx_svc_set_quantizers(&svc_, "40,30", 0);
- EXPECT_EQ(VPX_CODEC_OK, res);
- res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
- EXPECT_EQ(VPX_CODEC_OK, res);
- codec_initialized_ = true;
-}
-
-TEST_F(SvcTest, SetKeyFrameQuantizers) {
- vpx_codec_err_t res = vpx_svc_set_quantizers(NULL, "40,31", 1);
- EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
-
- res = vpx_svc_set_quantizers(&svc_, NULL, 1);
- EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
-
- res = vpx_svc_set_quantizers(&svc_, "40,30", 1);
+ res = vpx_svc_set_quantizers(&svc_, "40,30");
EXPECT_EQ(VPX_CODEC_OK, res);
res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
EXPECT_EQ(VPX_CODEC_OK, res);
@@ -249,7 +211,7 @@
TEST_F(SvcTest, FirstFrameHasLayers) {
svc_.spatial_layers = 2;
vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
- vpx_svc_set_quantizers(&svc_, "40,30", 0);
+ vpx_svc_set_quantizers(&svc_, "40,30");
vpx_codec_err_t res =
vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
@@ -265,9 +227,17 @@
video.duration(), VPX_DL_GOOD_QUALITY);
EXPECT_EQ(VPX_CODEC_OK, res);
+ if (vpx_svc_get_frame_size(&svc_) == 0) {
+ // Flush encoder
+ res = vpx_svc_encode(&svc_, &codec_, NULL, 0,
+ video.duration(), VPX_DL_GOOD_QUALITY);
+ EXPECT_EQ(VPX_CODEC_OK, res);
+ }
+
+ int frame_size = vpx_svc_get_frame_size(&svc_);
+ EXPECT_GT(frame_size, 0);
const vpx_codec_err_t res_dec = decoder_->DecodeFrame(
- static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
- vpx_svc_get_frame_size(&svc_));
+ static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
// this test fails with a decoder error
ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
@@ -276,7 +246,10 @@
TEST_F(SvcTest, EncodeThreeFrames) {
svc_.spatial_layers = 2;
vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
- vpx_svc_set_quantizers(&svc_, "40,30", 0);
+ vpx_svc_set_quantizers(&svc_, "40,30");
+ int decoded_frames = 0;
+ vpx_codec_err_t res_dec;
+ int frame_size;
vpx_codec_err_t res =
vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
@@ -291,13 +264,14 @@
// This frame is a keyframe.
res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
video.duration(), VPX_DL_GOOD_QUALITY);
- ASSERT_EQ(VPX_CODEC_OK, res);
- EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_));
- vpx_codec_err_t res_dec = decoder_->DecodeFrame(
- static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
- vpx_svc_get_frame_size(&svc_));
- ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+ if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
+ EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
+ res_dec = decoder_->DecodeFrame(
+ static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
+ ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+ ++decoded_frames;
+ }
// FRAME 1
video.Next();
@@ -305,12 +279,14 @@
res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
video.duration(), VPX_DL_GOOD_QUALITY);
ASSERT_EQ(VPX_CODEC_OK, res);
- EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));
- res_dec = decoder_->DecodeFrame(
- static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
- vpx_svc_get_frame_size(&svc_));
- ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+ if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
+ EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
+ res_dec = decoder_->DecodeFrame(
+ static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
+ ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+ ++decoded_frames;
+ }
// FRAME 2
video.Next();
@@ -318,18 +294,35 @@
res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
video.duration(), VPX_DL_GOOD_QUALITY);
ASSERT_EQ(VPX_CODEC_OK, res);
- EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));
- res_dec = decoder_->DecodeFrame(
- static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
- vpx_svc_get_frame_size(&svc_));
- ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+ if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
+ EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
+ res_dec = decoder_->DecodeFrame(
+ static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
+ ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+ ++decoded_frames;
+ }
+
+ // Flush encoder
+ res = vpx_svc_encode(&svc_, &codec_, NULL, 0,
+ video.duration(), VPX_DL_GOOD_QUALITY);
+ EXPECT_EQ(VPX_CODEC_OK, res);
+
+ while ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
+ EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
+ res_dec = decoder_->DecodeFrame(
+ static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
+ ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+ ++decoded_frames;
+ }
+
+ EXPECT_EQ(decoded_frames, 3);
}
TEST_F(SvcTest, GetLayerResolution) {
svc_.spatial_layers = 2;
vpx_svc_set_scale_factors(&svc_, "4/16,8/16");
- vpx_svc_set_quantizers(&svc_, "40,30", 0);
+ vpx_svc_set_quantizers(&svc_, "40,30");
vpx_codec_err_t res =
vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
@@ -368,7 +361,7 @@
svc_.spatial_layers = 2;
codec_enc_.g_pass = VPX_RC_FIRST_PASS;
vpx_svc_set_scale_factors(&svc_, "4/16,16/16");
- vpx_svc_set_quantizers(&svc_, "40,30", 0);
+ vpx_svc_set_quantizers(&svc_, "40,30");
vpx_codec_err_t res =
vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
@@ -413,6 +406,9 @@
vpx_codec_destroy(&codec_);
// Second pass encode
+ int decoded_frames = 0;
+ vpx_codec_err_t res_dec;
+ int frame_size;
codec_enc_.g_pass = VPX_RC_LAST_PASS;
codec_enc_.rc_twopass_stats_in.buf = &stats_buf[0];
codec_enc_.rc_twopass_stats_in.sz = stats_buf.size();
@@ -427,12 +423,14 @@
res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
video.duration(), VPX_DL_GOOD_QUALITY);
ASSERT_EQ(VPX_CODEC_OK, res);
- EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_));
- vpx_codec_err_t res_dec = decoder_->DecodeFrame(
- static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
- vpx_svc_get_frame_size(&svc_));
- ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+ if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
+ EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
+ res_dec = decoder_->DecodeFrame(
+ static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
+ ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+ ++decoded_frames;
+ }
// FRAME 1
video.Next();
@@ -440,12 +438,14 @@
res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
video.duration(), VPX_DL_GOOD_QUALITY);
ASSERT_EQ(VPX_CODEC_OK, res);
- EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));
- res_dec = decoder_->DecodeFrame(
- static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
- vpx_svc_get_frame_size(&svc_));
- ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+ if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
+ EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
+ res_dec = decoder_->DecodeFrame(
+ static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
+ ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+ ++decoded_frames;
+ }
// FRAME 2
video.Next();
@@ -453,12 +453,29 @@
res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(),
video.duration(), VPX_DL_GOOD_QUALITY);
ASSERT_EQ(VPX_CODEC_OK, res);
- EXPECT_EQ(0, vpx_svc_is_keyframe(&svc_));
- res_dec = decoder_->DecodeFrame(
- static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)),
- vpx_svc_get_frame_size(&svc_));
- ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+ if ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
+ EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
+ res_dec = decoder_->DecodeFrame(
+ static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
+ ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+ ++decoded_frames;
+ }
+
+ // Flush encoder
+ res = vpx_svc_encode(&svc_, &codec_, NULL, 0,
+ video.duration(), VPX_DL_GOOD_QUALITY);
+ EXPECT_EQ(VPX_CODEC_OK, res);
+
+ while ((frame_size = vpx_svc_get_frame_size(&svc_)) > 0) {
+ EXPECT_EQ((decoded_frames == 0), vpx_svc_is_keyframe(&svc_));
+ res_dec = decoder_->DecodeFrame(
+ static_cast<const uint8_t *>(vpx_svc_get_buffer(&svc_)), frame_size);
+ ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder_->DecodeError();
+ ++decoded_frames;
+ }
+
+ EXPECT_EQ(decoded_frames, 3);
}
} // namespace
diff --git a/source/libvpx/test/test-data.sha1 b/source/libvpx/test/test-data.sha1
index adfe15e..f9c09c6 100644
--- a/source/libvpx/test/test-data.sha1
+++ b/source/libvpx/test/test-data.sha1
@@ -2,6 +2,19 @@
b87815bf86020c592ccc7a846ba2e28ec8043902 hantro_odd.yuv
fe346136b9b8c1e6f6084cc106485706915795e4 invalid-vp90-01.webm
25751f5d3b05ff03f0719ad42cd625348eb8961e invalid-vp90-01.webm.res
+d78e2fceba5ac942246503ec8366f879c4775ca5 invalid-vp90-02.webm
+2dadee5306245fa5eeb0f99652d0e17afbcba96d invalid-vp90-02.webm.res
+df1a1453feb3c00d7d89746c7003b4163523bff3 invalid-vp90-03.webm
+8fe6fd82bf537340f586f97a7ae31fb37ccda302 invalid-vp90-03.webm.res
+a432f96ff0a787268e2f94a8092ab161a18d1b06 park_joy_90p_10_420.y4m
+0b194cc312c3a2e84d156a221b0a5eb615dfddc5 park_joy_90p_10_422.y4m
+ff0e0a21dc2adc95b8c1b37902713700655ced17 park_joy_90p_10_444.y4m
+614c32ae1eca391e867c70d19974f0d62664dd99 park_joy_90p_12_420.y4m
+c92825f1ea25c5c37855083a69faac6ac4641a9e park_joy_90p_12_422.y4m
+b592189b885b6cc85db55cc98512a197d73d3b34 park_joy_90p_12_444.y4m
+4e0eb61e76f0684188d9bc9f3ce61f6b6b77bb2c park_joy_90p_8_420.y4m
+7a193ff7dfeb96ba5f82b2afd7afa9e1fe83d947 park_joy_90p_8_422.y4m
+bdb7856e6bc93599bdda05c2e773a9f22b6c6d03 park_joy_90p_8_444.y4m
b1f1c3ec79114b9a0651af24ce634afb44a9a419 rush_hour_444.y4m
5184c46ddca8b1fadd16742e8500115bc8f749da vp80-00-comprehensive-001.ivf
65bf1bbbced81b97bd030f376d1b7f61a224793f vp80-00-comprehensive-002.ivf
@@ -531,8 +544,6 @@
7f6d8879336239a43dbb6c9f13178cb11cf7ed09 vp90-2-05-resize.ivf.md5
bf61ddc1f716eba58d4c9837d4e91031d9ce4ffe vp90-2-06-bilinear.webm
f6235f937552e11d8eb331ec55da6b3aa596b9ac vp90-2-06-bilinear.webm.md5
-495256cfd123fe777b2c0406862ed8468a1f4677 vp91-2-04-yv444.webm
-65e3a7ffef61ab340d9140f335ecc49125970c2c vp91-2-04-yv444.webm.md5
0c83a1e414fde3bccd6dc451bbaee68e59974c76 vp90-2-07-frame_parallel.webm
e5c2c9fb383e5bf3b563480adaeba5b7e3475ecd vp90-2-07-frame_parallel.webm.md5
086c7edcffd699ae7d99d710fd7e53b18910ca5b vp90-2-08-tile_1x2_frame_parallel.webm
@@ -642,5 +653,11 @@
e3ab35d4316c5e81325c50f5236ceca4bc0d35df vp90-2-15-segkey.webm.md5
9b7ca2cac09d34c4a5d296c1900f93b1e2f69d0d vp90-2-15-segkey_adpq.webm
8f46ba5f785d0c2170591a153e0d0d146a7c8090 vp90-2-15-segkey_adpq.webm.md5
-d78e2fceba5ac942246503ec8366f879c4775ca5 vp90-2-15-fuzz-flicker.webm
-bbd7dd15f43a703ff0a332fee4959e7b23bf77dc vp90-2-15-fuzz-flicker.webm.md5
+0321d507ce62dedc8a51b4e9011f7a19aed9c3dc vp91-2-04-yuv444.webm
+367e423dd41fdb49aa028574a2cfec5c2f325c5c vp91-2-04-yuv444.webm.md5
+76024eb753cdac6a5e5703aaea189d35c3c30ac7 invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf
+d3964f9dad9f60363c81b688324d95b4ec7c8038 invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf.res
+83f50908c8dc0ef8760595447a2ff7727489542e invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf
+456d1493e52d32a5c30edf44a27debc1fa6b253a invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf.res
+c123d1f9f02fb4143abb5e271916e3a3080de8f6 invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf
+456d1493e52d32a5c30edf44a27debc1fa6b253a invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf.res
diff --git a/source/libvpx/test/test.mk b/source/libvpx/test/test.mk
index 5d02d66..85212d9 100644
--- a/source/libvpx/test/test.mk
+++ b/source/libvpx/test/test.mk
@@ -15,7 +15,7 @@
##
## Black box tests only use the public API.
##
-LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../md5_utils.h ../md5_utils.c
+LIBVPX_TEST_SRCS-yes += ../md5_utils.h ../md5_utils.c
LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ivf_video_source.h
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += ../y4minput.h ../y4minput.c
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += aq_segment_test.cc
@@ -30,6 +30,7 @@
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += external_frame_buffer_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += user_priv_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc
@@ -41,6 +42,9 @@
LIBVPX_TEST_SRCS-yes += encode_test_driver.cc
LIBVPX_TEST_SRCS-yes += encode_test_driver.h
+## Y4m parsing.
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += y4m_test.cc ../y4menc.c ../y4menc.h
+
## WebM Parsing
ifeq ($(CONFIG_WEBM_IO), yes)
LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser.cpp
@@ -133,7 +137,19 @@
##
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.yuv
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv
+
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_420.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_422.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_444.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_420.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_422.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_444.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_422.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_444.y4m
+
LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m
+LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf.md5
@@ -707,8 +723,6 @@
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_3.ivf.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-13-largescaling.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-13-largescaling.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-4.webm
@@ -757,12 +771,22 @@
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm.md5
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-fuzz-flicker.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-fuzz-flicker.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm.md5
# Invalid files for testing libvpx error checking.
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01.webm
LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01.webm.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02.webm.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03.webm.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf.res
ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
# BBB VP9 streams
diff --git a/source/libvpx/test/test_vectors.cc b/source/libvpx/test/test_vectors.cc
index 75cd58f..41c9e26 100644
--- a/source/libvpx/test/test_vectors.cc
+++ b/source/libvpx/test/test_vectors.cc
@@ -165,7 +165,7 @@
"vp90-2-11-size-351x287.webm", "vp90-2-11-size-351x288.webm",
"vp90-2-11-size-352x287.webm", "vp90-2-12-droppable_1.ivf",
"vp90-2-12-droppable_2.ivf", "vp90-2-12-droppable_3.ivf",
- "vp90-2-13-largescaling.webm", "vp91-2-04-yv444.webm",
+ "vp90-2-13-largescaling.webm",
"vp90-2-14-resize-fp-tiles-1-16.webm",
"vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm",
"vp90-2-14-resize-fp-tiles-1-2.webm", "vp90-2-14-resize-fp-tiles-1-4.webm",
@@ -180,7 +180,7 @@
"vp90-2-14-resize-fp-tiles-8-16.webm", "vp90-2-14-resize-fp-tiles-8-1.webm",
"vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm",
"vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm",
- "vp90-2-15-fuzz-flicker.webm"
+ "vp91-2-04-yuv444.webm",
};
const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
#endif // CONFIG_VP9_DECODER
diff --git a/source/libvpx/test/user_priv_test.cc b/source/libvpx/test/user_priv_test.cc
new file mode 100644
index 0000000..22fce85
--- /dev/null
+++ b/source/libvpx/test/user_priv_test.cc
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "./vpx_config.h"
+#include "test/acm_random.h"
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/ivf_video_source.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#if CONFIG_WEBM_IO
+#include "test/webm_video_source.h"
+#endif
+#include "vpx_mem/vpx_mem.h"
+#include "vpx/vp8.h"
+
+namespace {
+
+using std::string;
+using libvpx_test::ACMRandom;
+
+#if CONFIG_WEBM_IO
+
+void CheckUserPrivateData(void *user_priv, int *target) {
+ // actual pointer value should be the same as expected.
+ EXPECT_EQ(reinterpret_cast<void *>(target), user_priv) <<
+ "user_priv pointer value does not match.";
+}
+
+// Decodes |filename|. Passes in user_priv data when calling DecodeFrame and
+// compares the user_priv from return img with the original user_priv to see if
+// they match. Both the pointer values and the values inside the addresses
+// should match.
+string DecodeFile(const string &filename) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ libvpx_test::WebMVideoSource video(filename);
+ video.Init();
+
+ vpx_codec_dec_cfg_t cfg = {0};
+ libvpx_test::VP9Decoder decoder(cfg, 0);
+
+ libvpx_test::MD5 md5;
+ int frame_num = 0;
+ for (video.Begin(); !::testing::Test::HasFailure() && video.cxdata();
+ video.Next()) {
+ void *user_priv = reinterpret_cast<void *>(&frame_num);
+ const vpx_codec_err_t res =
+ decoder.DecodeFrame(video.cxdata(), video.frame_size(),
+ (frame_num == 0) ? NULL : user_priv);
+ if (res != VPX_CODEC_OK) {
+ EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
+ break;
+ }
+ libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
+ const vpx_image_t *img = NULL;
+
+ // Get decompressed data.
+ while ((img = dec_iter.Next())) {
+ if (frame_num == 0) {
+ CheckUserPrivateData(img->user_priv, NULL);
+ } else {
+ CheckUserPrivateData(img->user_priv, &frame_num);
+
+ // Also test ctrl_get_reference api.
+ struct vp9_ref_frame ref;
+ // Randomly fetch a reference frame.
+ ref.idx = rnd.Rand8() % 3;
+ decoder.Control(VP9_GET_REFERENCE, &ref);
+
+ CheckUserPrivateData(ref.img.user_priv, NULL);
+ }
+ md5.Add(img);
+ }
+
+ frame_num++;
+ }
+ return string(md5.Get());
+}
+
+TEST(UserPrivTest, VideoDecode) {
+ // no tiles or frame parallel; this exercises the decoding to test the
+ // user_priv.
+ EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc",
+ DecodeFile("vp90-2-03-size-226x226.webm").c_str());
+}
+
+#endif // CONFIG_WEBM_IO
+
+} // namespace
diff --git a/source/libvpx/test/video_source.h b/source/libvpx/test/video_source.h
index 6d1855a..4250cb7 100644
--- a/source/libvpx/test/video_source.h
+++ b/source/libvpx/test/video_source.h
@@ -50,6 +50,15 @@
return fopen(path_to_source.c_str(), "rb");
}
+static FILE *OpenTestOutFile(const std::string& file_name) {
+ const std::string path_to_source = GetDataPath() + "/" + file_name;
+ return fopen(path_to_source.c_str(), "wb");
+}
+
+static FILE *OpenTempOutFile() {
+ return tmpfile();
+}
+
// Abstract base class for test video sources, which provide a stream of
// vpx_image_t images with associated timestamps and duration.
class VideoSource {
diff --git a/source/libvpx/test/vp9_lossless_test.cc b/source/libvpx/test/vp9_lossless_test.cc
index 7c3ba9f..b3b9c92 100644
--- a/source/libvpx/test/vp9_lossless_test.cc
+++ b/source/libvpx/test/vp9_lossless_test.cc
@@ -36,6 +36,17 @@
SetMode(encoding_mode_);
}
+ virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+ ::libvpx_test::Encoder *encoder) {
+ if (video->frame() == 1) {
+ // Only call Control if quantizer > 0 to verify that using quantizer
+ // alone will activate lossless
+ if (cfg_.rc_max_quantizer > 0 || cfg_.rc_min_quantizer > 0) {
+ encoder->Control(VP9E_SET_LOSSLESS, 1);
+ }
+ }
+ }
+
virtual void BeginPassHook(unsigned int /*pass*/) {
psnr_ = kMaxPsnr;
nframes_ = 0;
@@ -91,5 +102,24 @@
EXPECT_GE(psnr_lossless, kMaxPsnr);
}
+TEST_P(LosslessTestLarge, TestLossLessEncodingCtrl) {
+ const vpx_rational timebase = { 33333333, 1000000000 };
+ cfg_.g_timebase = timebase;
+ cfg_.rc_target_bitrate = 2000;
+ cfg_.g_lag_in_frames = 25;
+ // Intentionally set Q > 0, to make sure control can be used to activate
+ // lossless
+ cfg_.rc_min_quantizer = 10;
+ cfg_.rc_max_quantizer = 20;
+
+ init_flags_ = VPX_CODEC_USE_PSNR;
+
+ libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ timebase.den, timebase.num, 0, 10);
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ const double psnr_lossless = GetMinPsnr();
+ EXPECT_GE(psnr_lossless, kMaxPsnr);
+}
+
VP9_INSTANTIATE_TEST_CASE(LosslessTestLarge, ALL_TEST_MODES);
} // namespace
diff --git a/source/libvpx/test/vp9_thread_test.cc b/source/libvpx/test/vp9_thread_test.cc
index 0c9f71b..72719a6 100644
--- a/source/libvpx/test/vp9_thread_test.cc
+++ b/source/libvpx/test/vp9_thread_test.cc
@@ -28,11 +28,11 @@
protected:
virtual ~VP9WorkerThreadTest() {}
virtual void SetUp() {
- vp9_worker_init(&worker_);
+ vp9_get_worker_interface()->init(&worker_);
}
virtual void TearDown() {
- vp9_worker_end(&worker_);
+ vp9_get_worker_interface()->end(&worker_);
}
VP9Worker worker_;
@@ -45,10 +45,11 @@
}
TEST_P(VP9WorkerThreadTest, HookSuccess) {
- EXPECT_NE(vp9_worker_sync(&worker_), 0); // should be a no-op.
+ // should be a no-op.
+ EXPECT_NE(vp9_get_worker_interface()->sync(&worker_), 0);
for (int i = 0; i < 2; ++i) {
- EXPECT_NE(vp9_worker_reset(&worker_), 0);
+ EXPECT_NE(vp9_get_worker_interface()->reset(&worker_), 0);
int hook_data = 0;
int return_value = 1; // return successfully from the hook
@@ -58,20 +59,21 @@
const bool synchronous = GetParam();
if (synchronous) {
- vp9_worker_execute(&worker_);
+ vp9_get_worker_interface()->execute(&worker_);
} else {
- vp9_worker_launch(&worker_);
+ vp9_get_worker_interface()->launch(&worker_);
}
- EXPECT_NE(vp9_worker_sync(&worker_), 0);
+ EXPECT_NE(vp9_get_worker_interface()->sync(&worker_), 0);
EXPECT_FALSE(worker_.had_error);
EXPECT_EQ(5, hook_data);
- EXPECT_NE(vp9_worker_sync(&worker_), 0); // should be a no-op.
+ // should be a no-op.
+ EXPECT_NE(vp9_get_worker_interface()->sync(&worker_), 0);
}
}
TEST_P(VP9WorkerThreadTest, HookFailure) {
- EXPECT_NE(vp9_worker_reset(&worker_), 0);
+ EXPECT_NE(vp9_get_worker_interface()->reset(&worker_), 0);
int hook_data = 0;
int return_value = 0; // return failure from the hook
@@ -81,26 +83,49 @@
const bool synchronous = GetParam();
if (synchronous) {
- vp9_worker_execute(&worker_);
+ vp9_get_worker_interface()->execute(&worker_);
} else {
- vp9_worker_launch(&worker_);
+ vp9_get_worker_interface()->launch(&worker_);
}
- EXPECT_FALSE(vp9_worker_sync(&worker_));
+ EXPECT_FALSE(vp9_get_worker_interface()->sync(&worker_));
EXPECT_EQ(1, worker_.had_error);
// Ensure _reset() clears the error and _launch() can be called again.
return_value = 1;
- EXPECT_NE(vp9_worker_reset(&worker_), 0);
+ EXPECT_NE(vp9_get_worker_interface()->reset(&worker_), 0);
EXPECT_FALSE(worker_.had_error);
- vp9_worker_launch(&worker_);
- EXPECT_NE(vp9_worker_sync(&worker_), 0);
+ vp9_get_worker_interface()->launch(&worker_);
+ EXPECT_NE(vp9_get_worker_interface()->sync(&worker_), 0);
EXPECT_FALSE(worker_.had_error);
}
+TEST(VP9WorkerThreadTest, TestInterfaceAPI) {
+ EXPECT_EQ(0, vp9_set_worker_interface(NULL));
+ EXPECT_TRUE(vp9_get_worker_interface() != NULL);
+ for (int i = 0; i < 6; ++i) {
+ VP9WorkerInterface winterface = *vp9_get_worker_interface();
+ switch (i) {
+ default:
+ case 0: winterface.init = NULL; break;
+ case 1: winterface.reset = NULL; break;
+ case 2: winterface.sync = NULL; break;
+ case 3: winterface.launch = NULL; break;
+ case 4: winterface.execute = NULL; break;
+ case 5: winterface.end = NULL; break;
+ }
+ EXPECT_EQ(0, vp9_set_worker_interface(&winterface));
+ }
+}
+
// -----------------------------------------------------------------------------
// Multi-threaded decode tests
#if CONFIG_WEBM_IO
+struct FileList {
+ const char *name;
+ const char *expected_md5;
+};
+
// Decodes |filename| with |num_threads|. Returns the md5 of the decoded frames.
string DecodeFile(const string& filename, int num_threads) {
libvpx_test::WebMVideoSource video(filename);
@@ -130,39 +155,77 @@
return string(md5.Get());
}
-TEST(VP9DecodeMTTest, MTDecode) {
- // no tiles or frame parallel; this exercises loop filter threading.
- EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc",
- DecodeFile("vp90-2-03-size-226x226.webm", 2).c_str());
+void DecodeFiles(const FileList files[]) {
+ for (const FileList *iter = files; iter->name != NULL; ++iter) {
+ SCOPED_TRACE(iter->name);
+ for (int t = 2; t <= 8; ++t) {
+ EXPECT_EQ(iter->expected_md5, DecodeFile(iter->name, t))
+ << "threads = " << t;
+ }
+ }
}
-TEST(VP9DecodeMTTest, MTDecode2) {
- static const struct {
- const char *name;
- const char *expected_md5;
- } files[] = {
+// Trivial serialized thread worker interface implementation.
+// Note any worker that requires synchronization between other workers will
+// hang.
+namespace impl {
+
+void Init(VP9Worker *const worker) { memset(worker, 0, sizeof(*worker)); }
+int Reset(VP9Worker *const /*worker*/) { return 1; }
+int Sync(VP9Worker *const worker) { return !worker->had_error; }
+
+void Execute(VP9Worker *const worker) {
+ worker->had_error |= worker->hook(worker->data1, worker->data2);
+}
+
+void Launch(VP9Worker *const worker) { Execute(worker); }
+void End(VP9Worker *const /*worker*/) {}
+
+} // namespace impl
+
+TEST(VP9WorkerThreadTest, TestSerialInterface) {
+ static const VP9WorkerInterface serial_interface = {
+ impl::Init, impl::Reset, impl::Sync, impl::Launch, impl::Execute, impl::End
+ };
+ // TODO(jzern): Avoid using a file that will use the row-based thread
+ // loopfilter, with the simple serialized implementation it will hang. This is
+ // due to its expectation that rows will be run in parallel as they wait on
+ // progress in the row above before proceeding.
+ static const char expected_md5[] = "b35a1b707b28e82be025d960aba039bc";
+ static const char filename[] = "vp90-2-03-size-226x226.webm";
+ VP9WorkerInterface default_interface = *vp9_get_worker_interface();
+
+ EXPECT_NE(vp9_set_worker_interface(&serial_interface), 0);
+ EXPECT_EQ(expected_md5, DecodeFile(filename, 2));
+
+ // Reset the interface.
+ EXPECT_NE(vp9_set_worker_interface(&default_interface), 0);
+ EXPECT_EQ(expected_md5, DecodeFile(filename, 2));
+}
+
+TEST(VP9DecodeMultiThreadedTest, Decode) {
+ // no tiles or frame parallel; this exercises loop filter threading.
+ EXPECT_EQ("b35a1b707b28e82be025d960aba039bc",
+ DecodeFile("vp90-2-03-size-226x226.webm", 2));
+}
+
+TEST(VP9DecodeMultiThreadedTest, Decode2) {
+ static const FileList files[] = {
{ "vp90-2-08-tile_1x2_frame_parallel.webm",
"68ede6abd66bae0a2edf2eb9232241b6" },
{ "vp90-2-08-tile_1x4_frame_parallel.webm",
"368ebc6ebf3a5e478d85b2c3149b2848" },
{ "vp90-2-08-tile_1x8_frame_parallel.webm",
"17e439da2388aff3a0f69cb22579c6c1" },
+ { NULL, NULL }
};
- for (int i = 0; i < static_cast<int>(sizeof(files) / sizeof(files[0])); ++i) {
- for (int t = 2; t <= 8; ++t) {
- EXPECT_STREQ(files[i].expected_md5, DecodeFile(files[i].name, t).c_str())
- << "threads = " << t;
- }
- }
+ DecodeFiles(files);
}
// Test tile quantity changes within one file.
-TEST(VP9DecodeMTTest, MTDecode3) {
- static const struct {
- const char *name;
- const char *expected_md5;
- } files[] = {
+TEST(VP9DecodeMultiThreadedTest, Decode3) {
+ static const FileList files[] = {
{ "vp90-2-14-resize-fp-tiles-1-16.webm",
"0cd5e632c326297e975f38949c31ea94" },
{ "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm",
@@ -207,14 +270,10 @@
"ae96f21f21b6370cc0125621b441fc52" },
{ "vp90-2-14-resize-fp-tiles-8-4.webm",
"3eb4f24f10640d42218f7fd7b9fd30d4" },
+ { NULL, NULL }
};
- for (int i = 0; i < static_cast<int>(sizeof(files) / sizeof(files[0])); ++i) {
- for (int t = 2; t <= 8; ++t) {
- EXPECT_STREQ(files[i].expected_md5, DecodeFile(files[i].name, t).c_str())
- << "threads = " << t;
- }
- }
+ DecodeFiles(files);
}
#endif // CONFIG_WEBM_IO
diff --git a/source/libvpx/test/y4m_test.cc b/source/libvpx/test/y4m_test.cc
new file mode 100644
index 0000000..73ff683
--- /dev/null
+++ b/source/libvpx/test/y4m_test.cc
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string>
+#include "test/md5_helper.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "./vpx_config.h"
+#include "./y4menc.h"
+
+namespace {
+
+using std::string;
+
+static const unsigned int kWidth = 160;
+static const unsigned int kHeight = 90;
+static const unsigned int kFrames = 10;
+
+typedef struct {
+ const char *filename;
+ unsigned int bit_depth;
+ vpx_img_fmt format;
+ const char *md5raw;
+} test_entry_type;
+
+const test_entry_type kY4mTestVectors[] = {
+ {"park_joy_90p_8_420.y4m", 8, VPX_IMG_FMT_I420,
+ "e5406275b9fc6bb3436c31d4a05c1cab"},
+ {"park_joy_90p_8_422.y4m", 8, VPX_IMG_FMT_I422,
+ "284a47a47133b12884ec3a14e959a0b6"},
+ {"park_joy_90p_8_444.y4m", 8, VPX_IMG_FMT_I444,
+ "90517ff33843d85de712fd4fe60dbed0"},
+ {"park_joy_90p_10_420.y4m", 10, VPX_IMG_FMT_I42016,
+ "63f21f9f717d8b8631bd2288ee87137b"},
+ {"park_joy_90p_10_422.y4m", 10, VPX_IMG_FMT_I42216,
+ "48ab51fb540aed07f7ff5af130c9b605"},
+ {"park_joy_90p_10_444.y4m", 10, VPX_IMG_FMT_I44416,
+ "067bfd75aa85ff9bae91fa3e0edd1e3e"},
+ {"park_joy_90p_12_420.y4m", 12, VPX_IMG_FMT_I42016,
+ "9e6d8f6508c6e55625f6b697bc461cef"},
+ {"park_joy_90p_12_422.y4m", 12, VPX_IMG_FMT_I42216,
+ "b239c6b301c0b835485be349ca83a7e3"},
+ {"park_joy_90p_12_444.y4m", 12, VPX_IMG_FMT_I44416,
+ "5a6481a550821dab6d0192f5c63845e9"},
+};
+
+static void write_image_file(const vpx_image_t *img, FILE *file) {
+ int plane, y;
+ for (plane = 0; plane < 3; ++plane) {
+ const unsigned char *buf = img->planes[plane];
+ const int stride = img->stride[plane];
+ const int bytes_per_sample = (img->fmt & VPX_IMG_FMT_HIGH) ? 2 : 1;
+ const int h = (plane ? (img->d_h + img->y_chroma_shift) >>
+ img->y_chroma_shift : img->d_h);
+ const int w = (plane ? (img->d_w + img->x_chroma_shift) >>
+ img->x_chroma_shift : img->d_w);
+ for (y = 0; y < h; ++y) {
+ fwrite(buf, bytes_per_sample, w, file);
+ buf += stride;
+ }
+ }
+}
+
+class Y4mVideoSourceTest
+ : public ::testing::TestWithParam<test_entry_type>,
+ public ::libvpx_test::Y4mVideoSource {
+ protected:
+ Y4mVideoSourceTest() : Y4mVideoSource("", 0, 0) {}
+
+ virtual ~Y4mVideoSourceTest() {
+ CloseSource();
+ }
+
+ virtual void Init(const std::string &file_name, int limit) {
+ file_name_ = file_name;
+ start_ = 0;
+ limit_ = limit;
+ frame_ = 0;
+ Begin();
+ }
+
+ // Checks y4m header information
+ void HeaderChecks(unsigned int bit_depth, vpx_img_fmt_t fmt) {
+ ASSERT_TRUE(input_file_ != NULL);
+ ASSERT_EQ(y4m_.pic_w, (int)kWidth);
+ ASSERT_EQ(y4m_.pic_h, (int)kHeight);
+ ASSERT_EQ(img()->d_w, kWidth);
+ ASSERT_EQ(img()->d_h, kHeight);
+ ASSERT_EQ(y4m_.bit_depth, bit_depth);
+ ASSERT_EQ(y4m_.vpx_fmt, fmt);
+ if (fmt == VPX_IMG_FMT_I420 || fmt == VPX_IMG_FMT_I42016) {
+ ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 3 / 2);
+ ASSERT_EQ(img()->x_chroma_shift, 1U);
+ ASSERT_EQ(img()->y_chroma_shift, 1U);
+ }
+ if (fmt == VPX_IMG_FMT_I422 || fmt == VPX_IMG_FMT_I42216) {
+ ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 2);
+ ASSERT_EQ(img()->x_chroma_shift, 1U);
+ ASSERT_EQ(img()->y_chroma_shift, 0U);
+ }
+ if (fmt == VPX_IMG_FMT_I444 || fmt == VPX_IMG_FMT_I44416) {
+ ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 3);
+ ASSERT_EQ(img()->x_chroma_shift, 0U);
+ ASSERT_EQ(img()->y_chroma_shift, 0U);
+ }
+ }
+
+ // Checks MD5 of the raw frame data
+ void Md5Check(const string &expected_md5) {
+ ASSERT_TRUE(input_file_ != NULL);
+ libvpx_test::MD5 md5;
+ for (unsigned int i = start_; i < limit_; i++) {
+ md5.Add(img());
+ Next();
+ }
+ ASSERT_EQ(string(md5.Get()), expected_md5);
+ }
+};
+
+TEST_P(Y4mVideoSourceTest, SourceTest) {
+ const test_entry_type t = GetParam();
+ Init(t.filename, kFrames);
+ HeaderChecks(t.bit_depth, t.format);
+ Md5Check(t.md5raw);
+}
+
+INSTANTIATE_TEST_CASE_P(C, Y4mVideoSourceTest,
+ ::testing::ValuesIn(kY4mTestVectors));
+
+class Y4mVideoWriteTest
+ : public Y4mVideoSourceTest {
+ protected:
+ Y4mVideoWriteTest() : Y4mVideoSourceTest() {}
+
+ virtual void ReplaceInputFp(FILE *input_file) {
+ CloseSource();
+ frame_ = 0;
+ input_file_ = input_file;
+ rewind(input_file_);
+ ReadSourceToStart();
+ }
+
+ // Writes out a y4m file and then reads it back
+ void WriteY4mAndReadBack() {
+ ASSERT_TRUE(input_file_ != NULL);
+ char buf[Y4M_BUFFER_SIZE] = {0};
+ const struct VpxRational framerate = {y4m_.fps_n, y4m_.fps_d};
+ FILE *out_file = libvpx_test::OpenTempOutFile();
+ ASSERT_TRUE(out_file != NULL);
+ y4m_write_file_header(buf, sizeof(buf),
+ kWidth, kHeight,
+ &framerate, y4m_.vpx_fmt,
+ y4m_.bit_depth);
+ fputs(buf, out_file);
+ for (unsigned int i = start_; i < limit_; i++) {
+ y4m_write_frame_header(buf, sizeof(buf));
+ fputs(buf, out_file);
+ write_image_file(img(), out_file);
+ Next();
+ }
+ ReplaceInputFp(out_file);
+ }
+
+ virtual void Init(const std::string &file_name, int limit) {
+ Y4mVideoSourceTest::Init(file_name, limit);
+ WriteY4mAndReadBack();
+ }
+};
+
+TEST_P(Y4mVideoWriteTest, WriteTest) {
+ const test_entry_type t = GetParam();
+ Init(t.filename, kFrames);
+ HeaderChecks(t.bit_depth, t.format);
+ Md5Check(t.md5raw);
+}
+
+INSTANTIATE_TEST_CASE_P(C, Y4mVideoWriteTest,
+ ::testing::ValuesIn(kY4mTestVectors));
+} // namespace
diff --git a/source/libvpx/test/y4m_video_source.h b/source/libvpx/test/y4m_video_source.h
index 7419043..378e75b 100644
--- a/source/libvpx/test/y4m_video_source.h
+++ b/source/libvpx/test/y4m_video_source.h
@@ -38,24 +38,30 @@
CloseSource();
}
- virtual void Begin() {
+ virtual void OpenSource() {
CloseSource();
input_file_ = OpenTestDataFile(file_name_);
ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
- << file_name_;
+ << file_name_;
+ }
- y4m_input_open(&y4m_, input_file_, NULL, 0, 0);
+ virtual void ReadSourceToStart() {
+ ASSERT_TRUE(input_file_ != NULL);
+ ASSERT_FALSE(y4m_input_open(&y4m_, input_file_, NULL, 0, 0));
framerate_numerator_ = y4m_.fps_n;
framerate_denominator_ = y4m_.fps_d;
-
frame_ = 0;
for (unsigned int i = 0; i < start_; i++) {
- Next();
+ Next();
}
-
FillFrame();
}
+ virtual void Begin() {
+ OpenSource();
+ ReadSourceToStart();
+ }
+
virtual void Next() {
++frame_;
FillFrame();
diff --git a/source/libvpx/tools_common.h b/source/libvpx/tools_common.h
index e033de2..6a9f4f7 100644
--- a/source/libvpx/tools_common.h
+++ b/source/libvpx/tools_common.h
@@ -90,6 +90,7 @@
uint32_t width;
uint32_t height;
vpx_img_fmt_t fmt;
+ vpx_bit_depth_t bit_depth;
int only_i420;
uint32_t fourcc;
struct VpxRational framerate;
diff --git a/source/libvpx/vp8/common/postproc.c b/source/libvpx/vp8/common/postproc.c
index 8e546d5..e50d393 100644
--- a/source/libvpx/vp8/common/postproc.c
+++ b/source/libvpx/vp8/common/postproc.c
@@ -393,12 +393,12 @@
int low_var_thresh,
int flag)
{
+ int mbr;
double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
int ppl = (int)(level + .5);
- int mb_rows = source->y_width >> 4;
- int mb_cols = source->y_height >> 4;
+ int mb_rows = cm->mb_rows;
+ int mb_cols = cm->mb_cols;
unsigned char *limits = cm->pp_limits_buffer;;
- int mbr, mbc;
(void) post;
(void) low_var_thresh;
(void) flag;
diff --git a/source/libvpx/vp8/common/rtcd_defs.pl b/source/libvpx/vp8/common/rtcd_defs.pl
index 3e40774..f587079 100644
--- a/source/libvpx/vp8/common/rtcd_defs.pl
+++ b/source/libvpx/vp8/common/rtcd_defs.pl
@@ -552,6 +552,9 @@
if (vpx_config("CONFIG_TEMPORAL_DENOISING") eq "yes") {
add_proto qw/int vp8_denoiser_filter/, "unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising";
specialize qw/vp8_denoiser_filter sse2 neon/;
+ add_proto qw/int vp8_denoiser_filter_uv/, "unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising";
+ specialize qw/vp8_denoiser_filter_uv sse2 neon/;
+
}
# End of encoder only functions
diff --git a/source/libvpx/vp8/encoder/arm/neon/denoising_neon.c b/source/libvpx/vp8/encoder/arm/neon/denoising_neon.c
index 78cc6fa..b7bb40c 100644
--- a/source/libvpx/vp8/encoder/arm/neon/denoising_neon.c
+++ b/source/libvpx/vp8/encoder/arm/neon/denoising_neon.c
@@ -240,3 +240,239 @@
return FILTER_BLOCK;
}
+
+int vp8_denoiser_filter_uv_neon(unsigned char *mc_running_avg,
+ int mc_running_avg_stride,
+ unsigned char *running_avg,
+ int running_avg_stride,
+ unsigned char *sig, int sig_stride,
+ unsigned int motion_magnitude,
+ int increase_denoising) {
+ /* If motion_magnitude is small, making the denoiser more aggressive by
+ * increasing the adjustment for each level, level1 adjustment is
+ * increased, the deltas stay the same.
+ */
+ int shift_inc = (increase_denoising &&
+ motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) ? 1 : 0;
+ const uint8x16_t v_level1_adjustment = vmovq_n_u8(
+ (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) ? 4 + shift_inc : 3);
+
+ const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1);
+ const uint8x16_t v_delta_level_2_and_3 = vdupq_n_u8(2);
+ const uint8x16_t v_level1_threshold = vmovq_n_u8(4 + shift_inc);
+ const uint8x16_t v_level2_threshold = vdupq_n_u8(8);
+ const uint8x16_t v_level3_threshold = vdupq_n_u8(16);
+ int64x2_t v_sum_diff_total = vdupq_n_s64(0);
+ int r;
+
+ {
+ uint16x4_t v_sum_block = vdup_n_u16(0);
+
+ // Avoid denoising color signal if its close to average level.
+ for (r = 0; r < 8; ++r) {
+ const uint8x8_t v_sig = vld1_u8(sig);
+ const uint16x4_t _76_54_32_10 = vpaddl_u8(v_sig);
+ v_sum_block = vqadd_u16(v_sum_block, _76_54_32_10);
+ sig += sig_stride;
+ }
+ sig -= sig_stride * 8;
+ {
+ const uint32x2_t _7654_3210 = vpaddl_u16(v_sum_block);
+ const uint64x1_t _76543210 = vpaddl_u32(_7654_3210);
+ const unsigned int sum_block =
+ vget_lane_u32(vreinterpret_u32_u64(_76543210), 0);
+ if (abs(sum_block - (128 * 8 * 8)) < SUM_DIFF_FROM_AVG_THRESH_UV) {
+ return COPY_BLOCK;
+ }
+ }
+ }
+
+ /* Go over lines. */
+ for (r = 0; r < 4; ++r) {
+ /* Load inputs. */
+ const uint8x8_t v_sig_lo = vld1_u8(sig);
+ const uint8x8_t v_sig_hi = vld1_u8(&sig[sig_stride]);
+ const uint8x16_t v_sig = vcombine_u8(v_sig_lo, v_sig_hi);
+ const uint8x8_t v_mc_running_avg_lo = vld1_u8(mc_running_avg);
+ const uint8x8_t v_mc_running_avg_hi =
+ vld1_u8(&mc_running_avg[mc_running_avg_stride]);
+ const uint8x16_t v_mc_running_avg =
+ vcombine_u8(v_mc_running_avg_lo, v_mc_running_avg_hi);
+ /* Calculate absolute difference and sign masks. */
+ const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg);
+ const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg);
+ const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg);
+
+ /* Figure out which level that put us in. */
+ const uint8x16_t v_level1_mask = vcleq_u8(v_level1_threshold,
+ v_abs_diff);
+ const uint8x16_t v_level2_mask = vcleq_u8(v_level2_threshold,
+ v_abs_diff);
+ const uint8x16_t v_level3_mask = vcleq_u8(v_level3_threshold,
+ v_abs_diff);
+
+ /* Calculate absolute adjustments for level 1, 2 and 3. */
+ const uint8x16_t v_level2_adjustment = vandq_u8(v_level2_mask,
+ v_delta_level_1_and_2);
+ const uint8x16_t v_level3_adjustment = vandq_u8(v_level3_mask,
+ v_delta_level_2_and_3);
+ const uint8x16_t v_level1and2_adjustment = vaddq_u8(v_level1_adjustment,
+ v_level2_adjustment);
+ const uint8x16_t v_level1and2and3_adjustment = vaddq_u8(
+ v_level1and2_adjustment, v_level3_adjustment);
+
+ /* Figure adjustment absolute value by selecting between the absolute
+ * difference if in level0 or the value for level 1, 2 and 3.
+ */
+ const uint8x16_t v_abs_adjustment = vbslq_u8(v_level1_mask,
+ v_level1and2and3_adjustment, v_abs_diff);
+
+ /* Calculate positive and negative adjustments. Apply them to the signal
+ * and accumulate them. Adjustments are less than eight and the maximum
+ * sum of them (7 * 16) can fit in a signed char.
+ */
+ const uint8x16_t v_pos_adjustment = vandq_u8(v_diff_pos_mask,
+ v_abs_adjustment);
+ const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask,
+ v_abs_adjustment);
+
+ uint8x16_t v_running_avg = vqaddq_u8(v_sig, v_pos_adjustment);
+ v_running_avg = vqsubq_u8(v_running_avg, v_neg_adjustment);
+
+ /* Store results. */
+ vst1_u8(running_avg, vget_low_u8(v_running_avg));
+ vst1_u8(&running_avg[running_avg_stride], vget_high_u8(v_running_avg));
+
+ /* Sum all the accumulators to have the sum of all pixel differences
+ * for this macroblock.
+ */
+ {
+ const int8x16_t v_sum_diff =
+ vqsubq_s8(vreinterpretq_s8_u8(v_pos_adjustment),
+ vreinterpretq_s8_u8(v_neg_adjustment));
+
+ const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff);
+
+ const int32x4_t fedc_ba98_7654_3210 =
+ vpaddlq_s16(fe_dc_ba_98_76_54_32_10);
+
+ const int64x2_t fedcba98_76543210 =
+ vpaddlq_s32(fedc_ba98_7654_3210);
+
+ v_sum_diff_total = vqaddq_s64(v_sum_diff_total, fedcba98_76543210);
+ }
+
+ /* Update pointers for next iteration. */
+ sig += sig_stride * 2;
+ mc_running_avg += mc_running_avg_stride * 2;
+ running_avg += running_avg_stride * 2;
+ }
+
+
+ /* Too much adjustments => copy block. */
+ {
+ int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total),
+ vget_low_s64(v_sum_diff_total));
+ int sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0);
+ int sum_diff_thresh = SUM_DIFF_THRESHOLD_UV;
+ if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH_UV;
+ if (sum_diff > sum_diff_thresh) {
+ // Before returning to copy the block (i.e., apply no denoising),
+ // checK if we can still apply some (weaker) temporal filtering to
+ // this block, that would otherwise not be denoised at all. Simplest
+ // is to apply an additional adjustment to running_avg_y to bring it
+ // closer to sig. The adjustment is capped by a maximum delta, and
+ // chosen such that in most cases the resulting sum_diff will be
+ // within the accceptable range given by sum_diff_thresh.
+
+ // The delta is set by the excess of absolute pixel diff over the
+ // threshold.
+ int delta = ((sum_diff - sum_diff_thresh) >> 8) + 1;
+ // Only apply the adjustment for max delta up to 3.
+ if (delta < 4) {
+ const uint8x16_t k_delta = vmovq_n_u8(delta);
+ sig -= sig_stride * 8;
+ mc_running_avg -= mc_running_avg_stride * 8;
+ running_avg -= running_avg_stride * 8;
+ for (r = 0; r < 4; ++r) {
+ const uint8x8_t v_sig_lo = vld1_u8(sig);
+ const uint8x8_t v_sig_hi = vld1_u8(&sig[sig_stride]);
+ const uint8x16_t v_sig = vcombine_u8(v_sig_lo, v_sig_hi);
+ const uint8x8_t v_mc_running_avg_lo = vld1_u8(mc_running_avg);
+ const uint8x8_t v_mc_running_avg_hi =
+ vld1_u8(&mc_running_avg[mc_running_avg_stride]);
+ const uint8x16_t v_mc_running_avg =
+ vcombine_u8(v_mc_running_avg_lo, v_mc_running_avg_hi);
+ /* Calculate absolute difference and sign masks. */
+ const uint8x16_t v_abs_diff = vabdq_u8(v_sig,
+ v_mc_running_avg);
+ const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig,
+ v_mc_running_avg);
+ const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig,
+ v_mc_running_avg);
+ // Clamp absolute difference to delta to get the adjustment.
+ const uint8x16_t v_abs_adjustment =
+ vminq_u8(v_abs_diff, (k_delta));
+
+ const uint8x16_t v_pos_adjustment = vandq_u8(v_diff_pos_mask,
+ v_abs_adjustment);
+ const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask,
+ v_abs_adjustment);
+ const uint8x8_t v_running_avg_lo = vld1_u8(running_avg);
+ const uint8x8_t v_running_avg_hi =
+ vld1_u8(&running_avg[running_avg_stride]);
+ uint8x16_t v_running_avg =
+ vcombine_u8(v_running_avg_lo, v_running_avg_hi);
+
+ v_running_avg = vqsubq_u8(v_running_avg, v_pos_adjustment);
+ v_running_avg = vqaddq_u8(v_running_avg, v_neg_adjustment);
+
+ /* Store results. */
+ vst1_u8(running_avg, vget_low_u8(v_running_avg));
+ vst1_u8(&running_avg[running_avg_stride],
+ vget_high_u8(v_running_avg));
+
+ {
+ const int8x16_t v_sum_diff =
+ vqsubq_s8(vreinterpretq_s8_u8(v_neg_adjustment),
+ vreinterpretq_s8_u8(v_pos_adjustment));
+
+ const int16x8_t fe_dc_ba_98_76_54_32_10 =
+ vpaddlq_s8(v_sum_diff);
+ const int32x4_t fedc_ba98_7654_3210 =
+ vpaddlq_s16(fe_dc_ba_98_76_54_32_10);
+ const int64x2_t fedcba98_76543210 =
+ vpaddlq_s32(fedc_ba98_7654_3210);
+
+ v_sum_diff_total = vqaddq_s64(v_sum_diff_total,
+ fedcba98_76543210);
+ }
+ /* Update pointers for next iteration. */
+ sig += sig_stride * 2;
+ mc_running_avg += mc_running_avg_stride * 2;
+ running_avg += running_avg_stride * 2;
+ }
+ {
+ // Update the sum of all pixel differences of this MB.
+ x = vqadd_s64(vget_high_s64(v_sum_diff_total),
+ vget_low_s64(v_sum_diff_total));
+ sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0);
+
+ if (sum_diff > sum_diff_thresh) {
+ return COPY_BLOCK;
+ }
+ }
+ } else {
+ return COPY_BLOCK;
+ }
+ }
+ }
+
+ /* Tell above level that block was filtered. */
+ running_avg -= running_avg_stride * 8;
+ sig -= sig_stride * 8;
+
+ vp8_copy_mem8x8(running_avg, running_avg_stride, sig, sig_stride);
+
+ return FILTER_BLOCK;
+}
diff --git a/source/libvpx/vp8/encoder/bitstream.h b/source/libvpx/vp8/encoder/bitstream.h
index eef2d79..66f4bf6 100644
--- a/source/libvpx/vp8/encoder/bitstream.h
+++ b/source/libvpx/vp8/encoder/bitstream.h
@@ -18,18 +18,18 @@
#if HAVE_EDSP
void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount,
- const vp8_token *,
+ vp8_token *,
const vp8_extra_bit_struct *,
const vp8_tree_index *);
void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *,
unsigned char * cx_data,
const unsigned char *cx_data_end,
int num_parts,
- const vp8_token *,
+ vp8_token *,
const vp8_extra_bit_struct *,
const vp8_tree_index *);
void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
- const vp8_token *,
+ vp8_token *,
const vp8_extra_bit_struct *,
const vp8_tree_index *);
# define pack_tokens(a,b,c) \
diff --git a/source/libvpx/vp8/encoder/denoising.c b/source/libvpx/vp8/encoder/denoising.c
index 9206b94..94aa2ca 100644
--- a/source/libvpx/vp8/encoder/denoising.c
+++ b/source/libvpx/vp8/encoder/denoising.c
@@ -191,6 +191,148 @@
return FILTER_BLOCK;
}
+int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg_uv,
+ int mc_avg_uv_stride,
+ unsigned char *running_avg_uv,
+ int avg_uv_stride,
+ unsigned char *sig,
+ int sig_stride,
+ unsigned int motion_magnitude,
+ int increase_denoising) {
+ unsigned char *running_avg_uv_start = running_avg_uv;
+ unsigned char *sig_start = sig;
+ int sum_diff_thresh;
+ int r, c;
+ int sum_diff = 0;
+ int sum_block = 0;
+ int adj_val[3] = {3, 4, 6};
+ int shift_inc1 = 0;
+ int shift_inc2 = 1;
+ /* If motion_magnitude is small, making the denoiser more aggressive by
+ * increasing the adjustment for each level. Add another increment for
+ * blocks that are labeled for increase denoising. */
+ if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) {
+ if (increase_denoising) {
+ shift_inc1 = 1;
+ shift_inc2 = 2;
+ }
+ adj_val[0] += shift_inc2;
+ adj_val[1] += shift_inc2;
+ adj_val[2] += shift_inc2;
+ }
+
+ // Avoid denoising color signal if its close to average level.
+ for (r = 0; r < 8; ++r) {
+ for (c = 0; c < 8; ++c) {
+ sum_block += sig[c];
+ }
+ sig += sig_stride;
+ }
+ if (abs(sum_block - (128 * 8 * 8)) < SUM_DIFF_FROM_AVG_THRESH_UV) {
+ return COPY_BLOCK;
+ }
+
+ sig -= sig_stride * 8;
+ for (r = 0; r < 8; ++r) {
+ for (c = 0; c < 8; ++c) {
+ int diff = 0;
+ int adjustment = 0;
+ int absdiff = 0;
+
+ diff = mc_running_avg_uv[c] - sig[c];
+ absdiff = abs(diff);
+
+ // When |diff| <= |3 + shift_inc1|, use pixel value from
+ // last denoised raw.
+ if (absdiff <= 3 + shift_inc1) {
+ running_avg_uv[c] = mc_running_avg_uv[c];
+ sum_diff += diff;
+ } else {
+ if (absdiff >= 4 && absdiff <= 7)
+ adjustment = adj_val[0];
+ else if (absdiff >= 8 && absdiff <= 15)
+ adjustment = adj_val[1];
+ else
+ adjustment = adj_val[2];
+ if (diff > 0) {
+ if ((sig[c] + adjustment) > 255)
+ running_avg_uv[c] = 255;
+ else
+ running_avg_uv[c] = sig[c] + adjustment;
+ sum_diff += adjustment;
+ } else {
+ if ((sig[c] - adjustment) < 0)
+ running_avg_uv[c] = 0;
+ else
+ running_avg_uv[c] = sig[c] - adjustment;
+ sum_diff -= adjustment;
+ }
+ }
+ }
+ /* Update pointers for next iteration. */
+ sig += sig_stride;
+ mc_running_avg_uv += mc_avg_uv_stride;
+ running_avg_uv += avg_uv_stride;
+ }
+
+ sum_diff_thresh= SUM_DIFF_THRESHOLD_UV;
+ if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH_UV;
+ if (abs(sum_diff) > sum_diff_thresh) {
+ // Before returning to copy the block (i.e., apply no denoising), check
+ // if we can still apply some (weaker) temporal filtering to this block,
+ // that would otherwise not be denoised at all. Simplest is to apply
+ // an additional adjustment to running_avg_y to bring it closer to sig.
+ // The adjustment is capped by a maximum delta, and chosen such that
+ // in most cases the resulting sum_diff will be within the
+ // accceptable range given by sum_diff_thresh.
+
+ // The delta is set by the excess of absolute pixel diff over threshold.
+ int delta = ((abs(sum_diff) - sum_diff_thresh) >> 8) + 1;
+ // Only apply the adjustment for max delta up to 3.
+ if (delta < 4) {
+ sig -= sig_stride * 8;
+ mc_running_avg_uv -= mc_avg_uv_stride * 8;
+ running_avg_uv -= avg_uv_stride * 8;
+ for (r = 0; r < 8; ++r) {
+ for (c = 0; c < 8; ++c) {
+ int diff = mc_running_avg_uv[c] - sig[c];
+ int adjustment = abs(diff);
+ if (adjustment > delta)
+ adjustment = delta;
+ if (diff > 0) {
+ // Bring denoised signal down.
+ if (running_avg_uv[c] - adjustment < 0)
+ running_avg_uv[c] = 0;
+ else
+ running_avg_uv[c] = running_avg_uv[c] - adjustment;
+ sum_diff -= adjustment;
+ } else if (diff < 0) {
+ // Bring denoised signal up.
+ if (running_avg_uv[c] + adjustment > 255)
+ running_avg_uv[c] = 255;
+ else
+ running_avg_uv[c] = running_avg_uv[c] + adjustment;
+ sum_diff += adjustment;
+ }
+ }
+ // TODO(marpan): Check here if abs(sum_diff) has gone below the
+ // threshold sum_diff_thresh, and if so, we can exit the row loop.
+ sig += sig_stride;
+ mc_running_avg_uv += mc_avg_uv_stride;
+ running_avg_uv += avg_uv_stride;
+ }
+ if (abs(sum_diff) > sum_diff_thresh)
+ return COPY_BLOCK;
+ } else {
+ return COPY_BLOCK;
+ }
+ }
+
+ vp8_copy_mem8x8(running_avg_uv_start, avg_uv_stride, sig_start,
+ sig_stride);
+ return FILTER_BLOCK;
+}
+
int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
int num_mb_rows, int num_mb_cols)
{
@@ -261,6 +403,8 @@
unsigned int motion_magnitude2;
unsigned int sse_thresh;
int sse_diff_thresh = 0;
+ // Denoise the UV channel.
+ int apply_color_denoise = 0;
// Spatial loop filter: only applied selectively based on
// temporal filter state of block relative to top/left neighbors.
int apply_spatial_loop_filter = 1;
@@ -268,6 +412,8 @@
MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame;
enum vp8_denoiser_decision decision = FILTER_BLOCK;
+ enum vp8_denoiser_decision decision_u = FILTER_BLOCK;
+ enum vp8_denoiser_decision decision_v = FILTER_BLOCK;
if (zero_frame)
{
@@ -377,11 +523,37 @@
/* Filter. */
decision = vp8_denoiser_filter(mc_running_avg_y, mc_avg_y_stride,
- running_avg_y, avg_y_stride,
- x->thismb, 16, motion_magnitude2,
- x->increase_denoising);
+ running_avg_y, avg_y_stride,
+ x->thismb, 16, motion_magnitude2,
+ x->increase_denoising);
denoiser->denoise_state[block_index] = motion_magnitude2 > 0 ?
kFilterNonZeroMV : kFilterZeroMV;
+ // Only denoise UV for zero motion, and if y channel was denoised.
+ if (apply_color_denoise &&
+ motion_magnitude2 == 0 &&
+ decision == FILTER_BLOCK) {
+ unsigned char *mc_running_avg_u =
+ denoiser->yv12_mc_running_avg.u_buffer + recon_uvoffset;
+ unsigned char *running_avg_u =
+ denoiser->yv12_running_avg[INTRA_FRAME].u_buffer + recon_uvoffset;
+ unsigned char *mc_running_avg_v =
+ denoiser->yv12_mc_running_avg.v_buffer + recon_uvoffset;
+ unsigned char *running_avg_v =
+ denoiser->yv12_running_avg[INTRA_FRAME].v_buffer + recon_uvoffset;
+ int mc_avg_uv_stride = denoiser->yv12_mc_running_avg.uv_stride;
+ int avg_uv_stride = denoiser->yv12_running_avg[INTRA_FRAME].uv_stride;
+ int signal_stride = x->block[16].src_stride;
+ decision_u =
+ vp8_denoiser_filter_uv(mc_running_avg_u, mc_avg_uv_stride,
+ running_avg_u, avg_uv_stride,
+ x->block[16].src + *x->block[16].base_src,
+ signal_stride, motion_magnitude2, 0);
+ decision_v =
+ vp8_denoiser_filter_uv(mc_running_avg_v, mc_avg_uv_stride,
+ running_avg_v, avg_uv_stride,
+ x->block[20].src + *x->block[20].base_src,
+ signal_stride, motion_magnitude2, 0);
+ }
}
if (decision == COPY_BLOCK)
{
@@ -394,7 +566,21 @@
denoiser->yv12_running_avg[INTRA_FRAME].y_stride);
denoiser->denoise_state[block_index] = kNoFilter;
}
- // Option to selectively deblock the denoised signal.
+ if (apply_color_denoise) {
+ if (decision_u == COPY_BLOCK) {
+ vp8_copy_mem8x8(
+ x->block[16].src + *x->block[16].base_src, x->block[16].src_stride,
+ denoiser->yv12_running_avg[INTRA_FRAME].u_buffer + recon_uvoffset,
+ denoiser->yv12_running_avg[INTRA_FRAME].uv_stride);
+ }
+ if (decision_v == COPY_BLOCK) {
+ vp8_copy_mem8x8(
+ x->block[20].src + *x->block[20].base_src, x->block[16].src_stride,
+ denoiser->yv12_running_avg[INTRA_FRAME].v_buffer + recon_uvoffset,
+ denoiser->yv12_running_avg[INTRA_FRAME].uv_stride);
+ }
+ }
+ // Option to selectively deblock the denoised signal, for y channel only.
if (apply_spatial_loop_filter) {
loop_filter_info lfi;
int apply_filter_col = 0;
diff --git a/source/libvpx/vp8/encoder/denoising.h b/source/libvpx/vp8/encoder/denoising.h
index 6db0785..8f1bfa5 100644
--- a/source/libvpx/vp8/encoder/denoising.h
+++ b/source/libvpx/vp8/encoder/denoising.h
@@ -22,6 +22,11 @@
#define SUM_DIFF_THRESHOLD_HIGH (16 * 16 * 3)
#define MOTION_MAGNITUDE_THRESHOLD (8*3)
+#define SUM_DIFF_THRESHOLD_UV (96) // (8 * 8 * 1.5)
+#define SUM_DIFF_THRESHOLD_HIGH_UV (8 * 8 * 2)
+#define SUM_DIFF_FROM_AVG_THRESH_UV (8 * 8 * 4)
+#define MOTION_MAGNITUDE_THRESHOLD_UV (8*3)
+
enum vp8_denoiser_decision
{
COPY_BLOCK,
diff --git a/source/libvpx/vp8/encoder/x86/denoising_sse2.c b/source/libvpx/vp8/encoder/x86/denoising_sse2.c
index ff439dd..b84795c 100644
--- a/source/libvpx/vp8/encoder/x86/denoising_sse2.c
+++ b/source/libvpx/vp8/encoder/x86/denoising_sse2.c
@@ -17,10 +17,23 @@
#include <emmintrin.h>
#include "vpx_ports/emmintrin_compat.h"
-union sum_union {
- __m128i v;
- signed char e[16];
-};
+/* Compute the sum of all pixel differences of this MB. */
+static INLINE unsigned int abs_sum_diff_16x1(__m128i acc_diff) {
+ const __m128i k_1 = _mm_set1_epi16(1);
+ const __m128i acc_diff_lo = _mm_srai_epi16(
+ _mm_unpacklo_epi8(acc_diff, acc_diff), 8);
+ const __m128i acc_diff_hi = _mm_srai_epi16(
+ _mm_unpackhi_epi8(acc_diff, acc_diff), 8);
+ const __m128i acc_diff_16 = _mm_add_epi16(acc_diff_lo, acc_diff_hi);
+ const __m128i hg_fe_dc_ba = _mm_madd_epi16(acc_diff_16, k_1);
+ const __m128i hgfe_dcba = _mm_add_epi32(hg_fe_dc_ba,
+ _mm_srli_si128(hg_fe_dc_ba, 8));
+ const __m128i hgfedcba = _mm_add_epi32(hgfe_dcba,
+ _mm_srli_si128(hgfe_dcba, 4));
+ unsigned int sum_diff = _mm_cvtsi128_si32(hgfedcba);
+
+ return abs(sum_diff);
+}
int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y,
int mc_avg_y_stride,
@@ -31,7 +44,7 @@
{
unsigned char *running_avg_y_start = running_avg_y;
unsigned char *sig_start = sig;
- int sum_diff_thresh;
+ unsigned int sum_diff_thresh;
int r;
int shift_inc = (increase_denoising &&
motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0;
@@ -103,16 +116,10 @@
{
/* Compute the sum of all pixel differences of this MB. */
- union sum_union s;
- int sum_diff = 0;
- s.v = acc_diff;
- sum_diff = s.e[0] + s.e[1] + s.e[2] + s.e[3] + s.e[4] + s.e[5]
- + s.e[6] + s.e[7] + s.e[8] + s.e[9] + s.e[10] + s.e[11]
- + s.e[12] + s.e[13] + s.e[14] + s.e[15];
-
+ unsigned int abs_sum_diff = abs_sum_diff_16x1(acc_diff);
sum_diff_thresh = SUM_DIFF_THRESHOLD;
if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH;
- if (abs(sum_diff) > sum_diff_thresh) {
+ if (abs_sum_diff > sum_diff_thresh) {
// Before returning to copy the block (i.e., apply no denoising),
// checK if we can still apply some (weaker) temporal filtering to
// this block, that would otherwise not be denoised at all. Simplest
@@ -123,7 +130,7 @@
// The delta is set by the excess of absolute pixel diff over the
// threshold.
- int delta = ((abs(sum_diff) - sum_diff_thresh) >> 8) + 1;
+ int delta = ((abs_sum_diff - sum_diff_thresh) >> 8) + 1;
// Only apply the adjustment for max delta up to 3.
if (delta < 4) {
const __m128i k_delta = _mm_set1_epi8(delta);
@@ -162,16 +169,9 @@
mc_running_avg_y += mc_avg_y_stride;
running_avg_y += avg_y_stride;
}
- {
- // Update the sum of all pixel differences of this MB.
- union sum_union s;
- s.v = acc_diff;
- sum_diff = s.e[0] + s.e[1] + s.e[2] + s.e[3] + s.e[4] + s.e[5]
- + s.e[6] + s.e[7] + s.e[8] + s.e[9] + s.e[10] + s.e[11]
- + s.e[12] + s.e[13] + s.e[14] + s.e[15];
- if (abs(sum_diff) > sum_diff_thresh) {
- return COPY_BLOCK;
- }
+ abs_sum_diff = abs_sum_diff_16x1(acc_diff);
+ if (abs_sum_diff > sum_diff_thresh) {
+ return COPY_BLOCK;
}
} else {
return COPY_BLOCK;
@@ -182,3 +182,198 @@
vp8_copy_mem16x16(running_avg_y_start, avg_y_stride, sig_start, sig_stride);
return FILTER_BLOCK;
}
+
+int vp8_denoiser_filter_uv_sse2(unsigned char *mc_running_avg,
+ int mc_avg_stride,
+ unsigned char *running_avg, int avg_stride,
+ unsigned char *sig, int sig_stride,
+ unsigned int motion_magnitude,
+ int increase_denoising) {
+ unsigned char *running_avg_start = running_avg;
+ unsigned char *sig_start = sig;
+ unsigned int sum_diff_thresh;
+ int r;
+ int shift_inc = (increase_denoising &&
+ motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) ? 1 : 0;
+ __m128i acc_diff = _mm_setzero_si128();
+ const __m128i k_0 = _mm_setzero_si128();
+ const __m128i k_4 = _mm_set1_epi8(4 + shift_inc);
+ const __m128i k_8 = _mm_set1_epi8(8);
+ const __m128i k_16 = _mm_set1_epi8(16);
+ /* Modify each level's adjustment according to motion_magnitude. */
+ const __m128i l3 = _mm_set1_epi8(
+ (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) ?
+ 7 + shift_inc : 6);
+ /* Difference between level 3 and level 2 is 2. */
+ const __m128i l32 = _mm_set1_epi8(2);
+ /* Difference between level 2 and level 1 is 1. */
+ const __m128i l21 = _mm_set1_epi8(1);
+
+ {
+ const __m128i k_1 = _mm_set1_epi16(1);
+ __m128i vec_sum_block = _mm_setzero_si128();
+
+ // Avoid denoising color signal if its close to average level.
+ for (r = 0; r < 8; ++r) {
+ const __m128i v_sig = _mm_loadl_epi64((__m128i *)(&sig[0]));
+ const __m128i v_sig_unpack = _mm_unpacklo_epi8(v_sig, k_0);
+ vec_sum_block = _mm_add_epi16(vec_sum_block, v_sig_unpack);
+ sig += sig_stride;
+ }
+ sig -= sig_stride * 8;
+ {
+ const __m128i hg_fe_dc_ba = _mm_madd_epi16(vec_sum_block, k_1);
+ const __m128i hgfe_dcba = _mm_add_epi32(hg_fe_dc_ba,
+ _mm_srli_si128(hg_fe_dc_ba, 8));
+ const __m128i hgfedcba = _mm_add_epi32(hgfe_dcba,
+ _mm_srli_si128(hgfe_dcba, 4));
+ const int sum_block = _mm_cvtsi128_si32(hgfedcba);
+ if (abs(sum_block - (128 * 8 * 8)) < SUM_DIFF_FROM_AVG_THRESH_UV) {
+ return COPY_BLOCK;
+ }
+ }
+ }
+
+ for (r = 0; r < 4; ++r) {
+ /* Calculate differences */
+ const __m128i v_sig_low = _mm_castpd_si128(
+ _mm_load_sd((double *)(&sig[0])));
+ const __m128i v_sig = _mm_castpd_si128(
+ _mm_loadh_pd(_mm_castsi128_pd(v_sig_low),
+ (double *)(&sig[sig_stride])));
+ const __m128i v_mc_running_avg_low = _mm_castpd_si128(
+ _mm_load_sd((double *)(&mc_running_avg[0])));
+ const __m128i v_mc_running_avg = _mm_castpd_si128(
+ _mm_loadh_pd(_mm_castsi128_pd(v_mc_running_avg_low),
+ (double *)(&mc_running_avg[mc_avg_stride])));
+ const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg, v_sig);
+ const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg);
+ /* Obtain the sign. FF if diff is negative. */
+ const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
+ /* Clamp absolute difference to 16 to be used to get mask. Doing this
+ * allows us to use _mm_cmpgt_epi8, which operates on signed byte. */
+ const __m128i clamped_absdiff = _mm_min_epu8(
+ _mm_or_si128(pdiff, ndiff), k_16);
+ /* Get masks for l2 l1 and l0 adjustments */
+ const __m128i mask2 = _mm_cmpgt_epi8(k_16, clamped_absdiff);
+ const __m128i mask1 = _mm_cmpgt_epi8(k_8, clamped_absdiff);
+ const __m128i mask0 = _mm_cmpgt_epi8(k_4, clamped_absdiff);
+ /* Get adjustments for l2, l1, and l0 */
+ __m128i adj2 = _mm_and_si128(mask2, l32);
+ const __m128i adj1 = _mm_and_si128(mask1, l21);
+ const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff);
+ __m128i adj, padj, nadj;
+ __m128i v_running_avg;
+
+ /* Combine the adjustments and get absolute adjustments. */
+ adj2 = _mm_add_epi8(adj2, adj1);
+ adj = _mm_sub_epi8(l3, adj2);
+ adj = _mm_andnot_si128(mask0, adj);
+ adj = _mm_or_si128(adj, adj0);
+
+ /* Restore the sign and get positive and negative adjustments. */
+ padj = _mm_andnot_si128(diff_sign, adj);
+ nadj = _mm_and_si128(diff_sign, adj);
+
+ /* Calculate filtered value. */
+ v_running_avg = _mm_adds_epu8(v_sig, padj);
+ v_running_avg = _mm_subs_epu8(v_running_avg, nadj);
+
+ _mm_storel_pd((double *)&running_avg[0],
+ _mm_castsi128_pd(v_running_avg));
+ _mm_storeh_pd((double *)&running_avg[avg_stride],
+ _mm_castsi128_pd(v_running_avg));
+
+ /* Adjustments <=7, and each element in acc_diff can fit in signed
+ * char.
+ */
+ acc_diff = _mm_adds_epi8(acc_diff, padj);
+ acc_diff = _mm_subs_epi8(acc_diff, nadj);
+
+ /* Update pointers for next iteration. */
+ sig += sig_stride * 2;
+ mc_running_avg += mc_avg_stride * 2;
+ running_avg += avg_stride * 2;
+ }
+
+ {
+ unsigned int abs_sum_diff = abs_sum_diff_16x1(acc_diff);
+ sum_diff_thresh = SUM_DIFF_THRESHOLD_UV;
+ if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH_UV;
+ if (abs_sum_diff > sum_diff_thresh) {
+ // Before returning to copy the block (i.e., apply no denoising),
+ // checK if we can still apply some (weaker) temporal filtering to
+ // this block, that would otherwise not be denoised at all. Simplest
+ // is to apply an additional adjustment to running_avg_y to bring it
+ // closer to sig. The adjustment is capped by a maximum delta, and
+ // chosen such that in most cases the resulting sum_diff will be
+ // within the accceptable range given by sum_diff_thresh.
+
+ // The delta is set by the excess of absolute pixel diff over the
+ // threshold.
+ int delta = ((abs_sum_diff - sum_diff_thresh) >> 8) + 1;
+ // Only apply the adjustment for max delta up to 3.
+ if (delta < 4) {
+ const __m128i k_delta = _mm_set1_epi8(delta);
+ sig -= sig_stride * 8;
+ mc_running_avg -= mc_avg_stride * 8;
+ running_avg -= avg_stride * 8;
+ for (r = 0; r < 4; ++r) {
+ // Calculate differences.
+ const __m128i v_sig_low = _mm_castpd_si128(
+ _mm_load_sd((double *)(&sig[0])));
+ const __m128i v_sig = _mm_castpd_si128(
+ _mm_loadh_pd(_mm_castsi128_pd(v_sig_low),
+ (double *)(&sig[sig_stride])));
+ const __m128i v_mc_running_avg_low = _mm_castpd_si128(
+ _mm_load_sd((double *)(&mc_running_avg[0])));
+ const __m128i v_mc_running_avg = _mm_castpd_si128(
+ _mm_loadh_pd(_mm_castsi128_pd(v_mc_running_avg_low),
+ (double *)(&mc_running_avg[mc_avg_stride])));
+ const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg, v_sig);
+ const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg);
+ // Obtain the sign. FF if diff is negative.
+ const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
+ // Clamp absolute difference to delta to get the adjustment.
+ const __m128i adj =
+ _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta);
+ // Restore the sign and get positive and negative adjustments.
+ __m128i padj, nadj;
+ const __m128i v_running_avg_low = _mm_castpd_si128(
+ _mm_load_sd((double *)(&running_avg[0])));
+ __m128i v_running_avg = _mm_castpd_si128(
+ _mm_loadh_pd(_mm_castsi128_pd(v_running_avg_low),
+ (double *)(&running_avg[avg_stride])));
+ padj = _mm_andnot_si128(diff_sign, adj);
+ nadj = _mm_and_si128(diff_sign, adj);
+ // Calculate filtered value.
+ v_running_avg = _mm_subs_epu8(v_running_avg, padj);
+ v_running_avg = _mm_adds_epu8(v_running_avg, nadj);
+
+ _mm_storel_pd((double *)&running_avg[0],
+ _mm_castsi128_pd(v_running_avg));
+ _mm_storeh_pd((double *)&running_avg[avg_stride],
+ _mm_castsi128_pd(v_running_avg));
+
+ // Accumulate the adjustments.
+ acc_diff = _mm_subs_epi8(acc_diff, padj);
+ acc_diff = _mm_adds_epi8(acc_diff, nadj);
+
+ // Update pointers for next iteration.
+ sig += sig_stride * 2;
+ mc_running_avg += mc_avg_stride * 2;
+ running_avg += avg_stride * 2;
+ }
+ abs_sum_diff = abs_sum_diff_16x1(acc_diff);
+ if (abs_sum_diff > sum_diff_thresh) {
+ return COPY_BLOCK;
+ }
+ } else {
+ return COPY_BLOCK;
+ }
+ }
+ }
+
+ vp8_copy_mem8x8(running_avg_start, avg_stride, sig_start, sig_stride);
+ return FILTER_BLOCK;
+}
diff --git a/source/libvpx/vp9/common/vp9_alloccommon.c b/source/libvpx/vp9/common/vp9_alloccommon.c
index e56a0b7..ccbf3f6 100644
--- a/source/libvpx/vp9/common/vp9_alloccommon.c
+++ b/source/libvpx/vp9/common/vp9_alloccommon.c
@@ -109,7 +109,9 @@
}
vp9_free_frame_buffer(&cm->post_proc_buffer);
+}
+void vp9_free_context_buffers(VP9_COMMON *cm) {
free_mi(cm);
vpx_free(cm->last_frame_seg_map);
@@ -125,12 +127,15 @@
int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
+#if CONFIG_INTERNAL_STATS || CONFIG_VP9_POSTPROC
const int ss_x = cm->subsampling_x;
const int ss_y = cm->subsampling_y;
+ // TODO(agrange): this should be conditionally allocated.
if (vp9_realloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
VP9_DEC_BORDER_IN_PIXELS, NULL, NULL, NULL) < 0)
goto fail;
+#endif
set_mb_mi(cm, aligned_width, aligned_height);
@@ -165,36 +170,56 @@
fail:
vp9_free_frame_buffers(cm);
+ vp9_free_context_buffers(cm);
return 1;
}
+static void init_frame_bufs(VP9_COMMON *cm) {
+ int i;
+
+ cm->new_fb_idx = FRAME_BUFFERS - 1;
+ cm->frame_bufs[cm->new_fb_idx].ref_count = 1;
+
+ for (i = 0; i < REF_FRAMES; ++i) {
+ cm->ref_frame_map[i] = i;
+ cm->frame_bufs[i].ref_count = 1;
+ }
+}
+
int vp9_alloc_frame_buffers(VP9_COMMON *cm, int width, int height) {
- const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
- const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
+ int i;
const int ss_x = cm->subsampling_x;
const int ss_y = cm->subsampling_y;
- int i;
vp9_free_frame_buffers(cm);
- for (i = 0; i < FRAME_BUFFERS; i++) {
+ for (i = 0; i < FRAME_BUFFERS; ++i) {
cm->frame_bufs[i].ref_count = 0;
if (vp9_alloc_frame_buffer(&cm->frame_bufs[i].buf, width, height,
ss_x, ss_y, VP9_ENC_BORDER_IN_PIXELS) < 0)
goto fail;
}
- cm->new_fb_idx = FRAME_BUFFERS - 1;
- cm->frame_bufs[cm->new_fb_idx].ref_count = 1;
+ init_frame_bufs(cm);
- for (i = 0; i < REF_FRAMES; i++) {
- cm->ref_frame_map[i] = i;
- cm->frame_bufs[i].ref_count = 1;
- }
-
+#if CONFIG_INTERNAL_STATS || CONFIG_VP9_POSTPROC
if (vp9_alloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
VP9_ENC_BORDER_IN_PIXELS) < 0)
goto fail;
+#endif
+
+ return 0;
+
+ fail:
+ vp9_free_frame_buffers(cm);
+ return 1;
+}
+
+int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) {
+ const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
+ const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
+
+ vp9_free_context_buffers(cm);
set_mb_mi(cm, aligned_width, aligned_height);
@@ -224,12 +249,13 @@
return 0;
fail:
- vp9_free_frame_buffers(cm);
+ vp9_free_context_buffers(cm);
return 1;
}
void vp9_remove_common(VP9_COMMON *cm) {
vp9_free_frame_buffers(cm);
+ vp9_free_context_buffers(cm);
vp9_free_internal_frame_buffers(&cm->int_frame_buffers);
}
diff --git a/source/libvpx/vp9/common/vp9_alloccommon.h b/source/libvpx/vp9/common/vp9_alloccommon.h
index 06636a9..c4b1b8d 100644
--- a/source/libvpx/vp9/common/vp9_alloccommon.h
+++ b/source/libvpx/vp9/common/vp9_alloccommon.h
@@ -23,8 +23,12 @@
int vp9_resize_frame_buffers(struct VP9Common *cm, int width, int height);
int vp9_alloc_frame_buffers(struct VP9Common *cm, int width, int height);
+int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height);
+int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height);
void vp9_free_frame_buffers(struct VP9Common *cm);
+void vp9_free_state_buffers(struct VP9Common *cm);
+void vp9_free_context_buffers(struct VP9Common *cm);
void vp9_update_frame_size(struct VP9Common *cm);
diff --git a/source/libvpx/vp9/common/vp9_blockd.c b/source/libvpx/vp9/common/vp9_blockd.c
index 43d6c6e..2404cfc 100644
--- a/source/libvpx/vp9/common/vp9_blockd.c
+++ b/source/libvpx/vp9/common/vp9_blockd.c
@@ -44,7 +44,7 @@
// block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
// 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
// transform size varies per plane, look it up in a common way.
- const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi)
+ const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd)
: mbmi->tx_size;
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
diff --git a/source/libvpx/vp9/common/vp9_blockd.h b/source/libvpx/vp9/common/vp9_blockd.h
index 9088b0b..9d5ad9c 100644
--- a/source/libvpx/vp9/common/vp9_blockd.h
+++ b/source/libvpx/vp9/common/vp9_blockd.h
@@ -270,18 +270,20 @@
void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y);
-static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize) {
+static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize,
+ int xss, int yss) {
if (bsize < BLOCK_8X8) {
return TX_4X4;
} else {
- // TODO(dkovalev): Assuming YUV420 (ss_x == 1, ss_y == 1)
- const BLOCK_SIZE plane_bsize = ss_size_lookup[bsize][1][1];
+ const BLOCK_SIZE plane_bsize = ss_size_lookup[bsize][xss][yss];
return MIN(y_tx_size, max_txsize_lookup[plane_bsize]);
}
}
-static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi) {
- return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type);
+static INLINE TX_SIZE get_uv_tx_size(const MB_MODE_INFO *mbmi,
+ const struct macroblockd_plane *pd) {
+ return get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, pd->subsampling_x,
+ pd->subsampling_y);
}
static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize,
diff --git a/source/libvpx/vp9/common/vp9_convolve.c b/source/libvpx/vp9/common/vp9_convolve.c
index 1a8c49d..d8aaf32 100644
--- a/source/libvpx/vp9/common/vp9_convolve.c
+++ b/source/libvpx/vp9/common/vp9_convolve.c
@@ -117,17 +117,25 @@
const InterpKernel *const y_filters,
int y0_q4, int y_step_q4,
int w, int h) {
- // Fixed size intermediate buffer places limits on parameters.
- // Maximum intermediate_height is 324, for y_step_q4 == 80,
- // h == 64, taps == 8.
- // y_step_q4 of 80 allows for 1/10 scale for 5 layer svc
- uint8_t temp[64 * 324];
+ // Note: Fixed size intermediate buffer, temp, places limits on parameters.
+ // 2d filtering proceeds in 2 steps:
+ // (1) Interpolate horizontally into an intermediate buffer, temp.
+ // (2) Interpolate temp vertically to derive the sub-pixel result.
+ // Deriving the maximum number of rows in the temp buffer (135):
+ // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative).
+ // --Largest block size is 64x64 pixels.
+ // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the
+ // original frame (in 1/16th pixel units).
+ // --Must round-up because block may be located at sub-pixel position.
+ // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
+ // --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
+ uint8_t temp[135 * 64];
int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS;
assert(w <= 64);
assert(h <= 64);
- assert(y_step_q4 <= 80);
- assert(x_step_q4 <= 80);
+ assert(y_step_q4 <= 32);
+ assert(x_step_q4 <= 32);
if (intermediate_height < h)
intermediate_height = h;
diff --git a/source/libvpx/vp9/common/vp9_frame_buffers.c b/source/libvpx/vp9/common/vp9_frame_buffers.c
index a0b1e03..733b3a9 100644
--- a/source/libvpx/vp9/common/vp9_frame_buffers.c
+++ b/source/libvpx/vp9/common/vp9_frame_buffers.c
@@ -76,6 +76,7 @@
int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb) {
InternalFrameBuffer *const int_fb = (InternalFrameBuffer *)fb->priv;
(void)cb_priv;
- int_fb->in_use = 0;
+ if (int_fb)
+ int_fb->in_use = 0;
return 0;
}
diff --git a/source/libvpx/vp9/common/vp9_loopfilter.c b/source/libvpx/vp9/common/vp9_loopfilter.c
index efd0249..d4b58b6 100644
--- a/source/libvpx/vp9/common/vp9_loopfilter.c
+++ b/source/libvpx/vp9/common/vp9_loopfilter.c
@@ -502,7 +502,7 @@
const MB_MODE_INFO *mbmi = &mi->mbmi;
const BLOCK_SIZE block_size = mbmi->sb_type;
const TX_SIZE tx_size_y = mbmi->tx_size;
- const TX_SIZE tx_size_uv = get_uv_tx_size(mbmi);
+ const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1);
const int filter_level = get_filter_level(lfi_n, mbmi);
uint64_t *const left_y = &lfm->left_y[tx_size_y];
uint64_t *const above_y = &lfm->above_y[tx_size_y];
@@ -939,7 +939,7 @@
!(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1;
const int skip_this_r = skip_this && !block_edge_above;
const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
- ? get_uv_tx_size(&mi[0].mbmi)
+ ? get_uv_tx_size(&mi[0].mbmi, plane)
: mi[0].mbmi.tx_size;
const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
diff --git a/source/libvpx/vp9/common/vp9_mvref_common.c b/source/libvpx/vp9/common/vp9_mvref_common.c
index 61682c4..0fe58c5 100644
--- a/source/libvpx/vp9/common/vp9_mvref_common.c
+++ b/source/libvpx/vp9/common/vp9_mvref_common.c
@@ -11,181 +11,6 @@
#include "vp9/common/vp9_mvref_common.h"
-#define MVREF_NEIGHBOURS 8
-
-typedef struct position {
- int row;
- int col;
-} POSITION;
-
-typedef enum {
- BOTH_ZERO = 0,
- ZERO_PLUS_PREDICTED = 1,
- BOTH_PREDICTED = 2,
- NEW_PLUS_NON_INTRA = 3,
- BOTH_NEW = 4,
- INTRA_PLUS_NON_INTRA = 5,
- BOTH_INTRA = 6,
- INVALID_CASE = 9
-} motion_vector_context;
-
-// This is used to figure out a context for the ref blocks. The code flattens
-// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by
-// adding 9 for each intra block, 3 for each zero mv and 1 for each new
-// motion vector. This single number is then converted into a context
-// with a single lookup ( counter_to_context ).
-static const int mode_2_counter[MB_MODE_COUNT] = {
- 9, // DC_PRED
- 9, // V_PRED
- 9, // H_PRED
- 9, // D45_PRED
- 9, // D135_PRED
- 9, // D117_PRED
- 9, // D153_PRED
- 9, // D207_PRED
- 9, // D63_PRED
- 9, // TM_PRED
- 0, // NEARESTMV
- 0, // NEARMV
- 3, // ZEROMV
- 1, // NEWMV
-};
-
-// There are 3^3 different combinations of 3 counts that can be either 0,1 or
-// 2. However the actual count can never be greater than 2 so the highest
-// counter we need is 18. 9 is an invalid counter that's never used.
-static const int counter_to_context[19] = {
- BOTH_PREDICTED, // 0
- NEW_PLUS_NON_INTRA, // 1
- BOTH_NEW, // 2
- ZERO_PLUS_PREDICTED, // 3
- NEW_PLUS_NON_INTRA, // 4
- INVALID_CASE, // 5
- BOTH_ZERO, // 6
- INVALID_CASE, // 7
- INVALID_CASE, // 8
- INTRA_PLUS_NON_INTRA, // 9
- INTRA_PLUS_NON_INTRA, // 10
- INVALID_CASE, // 11
- INTRA_PLUS_NON_INTRA, // 12
- INVALID_CASE, // 13
- INVALID_CASE, // 14
- INVALID_CASE, // 15
- INVALID_CASE, // 16
- INVALID_CASE, // 17
- BOTH_INTRA // 18
-};
-
-static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = {
- // 4X4
- {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
- // 4X8
- {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
- // 8X4
- {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
- // 8X8
- {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
- // 8X16
- {{0, -1}, {-1, 0}, {1, -1}, {-1, -1}, {0, -2}, {-2, 0}, {-2, -1}, {-1, -2}},
- // 16X8
- {{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}},
- // 16X16
- {{-1, 0}, {0, -1}, {-1, 1}, {1, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
- // 16X32
- {{0, -1}, {-1, 0}, {2, -1}, {-1, -1}, {-1, 1}, {0, -3}, {-3, 0}, {-3, -3}},
- // 32X16
- {{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
- // 32X32
- {{-1, 1}, {1, -1}, {-1, 2}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
- // 32X64
- {{0, -1}, {-1, 0}, {4, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {2, -1}},
- // 64X32
- {{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}},
- // 64X64
- {{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}}
-};
-
-static const int idx_n_column_to_subblock[4][2] = {
- {1, 2},
- {1, 3},
- {3, 2},
- {3, 3}
-};
-
-// clamp_mv_ref
-#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
-
-static void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) {
- clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER,
- xd->mb_to_right_edge + MV_BORDER,
- xd->mb_to_top_edge - MV_BORDER,
- xd->mb_to_bottom_edge + MV_BORDER);
-}
-
-// This function returns either the appropriate sub block or block's mv
-// on whether the block_size < 8x8 and we have check_sub_blocks set.
-static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv,
- int search_col, int block_idx) {
- return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8
- ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
- .as_mv[which_mv]
- : candidate->mbmi.mv[which_mv];
-}
-
-
-// Performs mv sign inversion if indicated by the reference frame combination.
-static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
- const MV_REFERENCE_FRAME this_ref_frame,
- const int *ref_sign_bias) {
- int_mv mv = mbmi->mv[ref];
- if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) {
- mv.as_mv.row *= -1;
- mv.as_mv.col *= -1;
- }
- return mv;
-}
-
-// This macro is used to add a motion vector mv_ref list if it isn't
-// already in the list. If it's the second motion vector it will also
-// skip all additional processing and jump to done!
-#define ADD_MV_REF_LIST(mv) \
- do { \
- if (refmv_count) { \
- if ((mv).as_int != mv_ref_list[0].as_int) { \
- mv_ref_list[refmv_count] = (mv); \
- goto Done; \
- } \
- } else { \
- mv_ref_list[refmv_count++] = (mv); \
- } \
- } while (0)
-
-// If either reference frame is different, not INTRA, and they
-// are different from each other scale and add the mv to our list.
-#define IF_DIFF_REF_FRAME_ADD_MV(mbmi) \
- do { \
- if (is_inter_block(mbmi)) { \
- if ((mbmi)->ref_frame[0] != ref_frame) \
- ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias)); \
- if (has_second_ref(mbmi) && \
- (mbmi)->ref_frame[1] != ref_frame && \
- (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \
- ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias)); \
- } \
- } while (0)
-
-
-// Checks that the given mi_row, mi_col and search point
-// are inside the borders of the tile.
-static INLINE int is_inside(const TileInfo *const tile,
- int mi_col, int mi_row, int mi_rows,
- const POSITION *mi_pos) {
- return !(mi_row + mi_pos->row < 0 ||
- mi_col + mi_pos->col < tile->mi_col_start ||
- mi_row + mi_pos->row >= mi_rows ||
- mi_col + mi_pos->col >= tile->mi_col_end);
-}
-
// This function searches the neighbourhood of a given MB/SB
// to try and find candidate reference vectors.
static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
diff --git a/source/libvpx/vp9/common/vp9_mvref_common.h b/source/libvpx/vp9/common/vp9_mvref_common.h
index 903ac02..7bce3fa 100644
--- a/source/libvpx/vp9/common/vp9_mvref_common.h
+++ b/source/libvpx/vp9/common/vp9_mvref_common.h
@@ -21,6 +21,181 @@
#define RIGHT_BOTTOM_MARGIN ((VP9_ENC_BORDER_IN_PIXELS -\
VP9_INTERP_EXTEND) << 3)
+#define MVREF_NEIGHBOURS 8
+
+typedef struct position {
+ int row;
+ int col;
+} POSITION;
+
+typedef enum {
+ BOTH_ZERO = 0,
+ ZERO_PLUS_PREDICTED = 1,
+ BOTH_PREDICTED = 2,
+ NEW_PLUS_NON_INTRA = 3,
+ BOTH_NEW = 4,
+ INTRA_PLUS_NON_INTRA = 5,
+ BOTH_INTRA = 6,
+ INVALID_CASE = 9
+} motion_vector_context;
+
+// This is used to figure out a context for the ref blocks. The code flattens
+// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by
+// adding 9 for each intra block, 3 for each zero mv and 1 for each new
+// motion vector. This single number is then converted into a context
+// with a single lookup ( counter_to_context ).
+static const int mode_2_counter[MB_MODE_COUNT] = {
+ 9, // DC_PRED
+ 9, // V_PRED
+ 9, // H_PRED
+ 9, // D45_PRED
+ 9, // D135_PRED
+ 9, // D117_PRED
+ 9, // D153_PRED
+ 9, // D207_PRED
+ 9, // D63_PRED
+ 9, // TM_PRED
+ 0, // NEARESTMV
+ 0, // NEARMV
+ 3, // ZEROMV
+ 1, // NEWMV
+};
+
+// There are 3^3 different combinations of 3 counts that can be either 0,1 or
+// 2. However the actual count can never be greater than 2 so the highest
+// counter we need is 18. 9 is an invalid counter that's never used.
+static const int counter_to_context[19] = {
+ BOTH_PREDICTED, // 0
+ NEW_PLUS_NON_INTRA, // 1
+ BOTH_NEW, // 2
+ ZERO_PLUS_PREDICTED, // 3
+ NEW_PLUS_NON_INTRA, // 4
+ INVALID_CASE, // 5
+ BOTH_ZERO, // 6
+ INVALID_CASE, // 7
+ INVALID_CASE, // 8
+ INTRA_PLUS_NON_INTRA, // 9
+ INTRA_PLUS_NON_INTRA, // 10
+ INVALID_CASE, // 11
+ INTRA_PLUS_NON_INTRA, // 12
+ INVALID_CASE, // 13
+ INVALID_CASE, // 14
+ INVALID_CASE, // 15
+ INVALID_CASE, // 16
+ INVALID_CASE, // 17
+ BOTH_INTRA // 18
+};
+
+static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = {
+ // 4X4
+ {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
+ // 4X8
+ {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
+ // 8X4
+ {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
+ // 8X8
+ {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}},
+ // 8X16
+ {{0, -1}, {-1, 0}, {1, -1}, {-1, -1}, {0, -2}, {-2, 0}, {-2, -1}, {-1, -2}},
+ // 16X8
+ {{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}},
+ // 16X16
+ {{-1, 0}, {0, -1}, {-1, 1}, {1, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
+ // 16X32
+ {{0, -1}, {-1, 0}, {2, -1}, {-1, -1}, {-1, 1}, {0, -3}, {-3, 0}, {-3, -3}},
+ // 32X16
+ {{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
+ // 32X32
+ {{-1, 1}, {1, -1}, {-1, 2}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}},
+ // 32X64
+ {{0, -1}, {-1, 0}, {4, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {2, -1}},
+ // 64X32
+ {{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}},
+ // 64X64
+ {{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}}
+};
+
+static const int idx_n_column_to_subblock[4][2] = {
+ {1, 2},
+ {1, 3},
+ {3, 2},
+ {3, 3}
+};
+
+// clamp_mv_ref
+#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units
+
+static void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) {
+ clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER,
+ xd->mb_to_right_edge + MV_BORDER,
+ xd->mb_to_top_edge - MV_BORDER,
+ xd->mb_to_bottom_edge + MV_BORDER);
+}
+
+// This function returns either the appropriate sub block or block's mv
+// on whether the block_size < 8x8 and we have check_sub_blocks set.
+static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv,
+ int search_col, int block_idx) {
+ return block_idx >= 0 && candidate->mbmi.sb_type < BLOCK_8X8
+ ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]]
+ .as_mv[which_mv]
+ : candidate->mbmi.mv[which_mv];
+}
+
+
+// Performs mv sign inversion if indicated by the reference frame combination.
+static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
+ const MV_REFERENCE_FRAME this_ref_frame,
+ const int *ref_sign_bias) {
+ int_mv mv = mbmi->mv[ref];
+ if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) {
+ mv.as_mv.row *= -1;
+ mv.as_mv.col *= -1;
+ }
+ return mv;
+}
+
+// This macro is used to add a motion vector mv_ref list if it isn't
+// already in the list. If it's the second motion vector it will also
+// skip all additional processing and jump to done!
+#define ADD_MV_REF_LIST(mv) \
+ do { \
+ if (refmv_count) { \
+ if ((mv).as_int != mv_ref_list[0].as_int) { \
+ mv_ref_list[refmv_count] = (mv); \
+ goto Done; \
+ } \
+ } else { \
+ mv_ref_list[refmv_count++] = (mv); \
+ } \
+ } while (0)
+
+// If either reference frame is different, not INTRA, and they
+// are different from each other scale and add the mv to our list.
+#define IF_DIFF_REF_FRAME_ADD_MV(mbmi) \
+ do { \
+ if (is_inter_block(mbmi)) { \
+ if ((mbmi)->ref_frame[0] != ref_frame) \
+ ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias)); \
+ if (has_second_ref(mbmi) && \
+ (mbmi)->ref_frame[1] != ref_frame && \
+ (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \
+ ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias)); \
+ } \
+ } while (0)
+
+
+// Checks that the given mi_row, mi_col and search point
+// are inside the borders of the tile.
+static INLINE int is_inside(const TileInfo *const tile,
+ int mi_col, int mi_row, int mi_rows,
+ const POSITION *mi_pos) {
+ return !(mi_row + mi_pos->row < 0 ||
+ mi_col + mi_pos->col < tile->mi_col_start ||
+ mi_row + mi_pos->row >= mi_rows ||
+ mi_col + mi_pos->col >= tile->mi_col_end);
+}
+
// TODO(jingning): this mv clamping function should be block size dependent.
static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
diff --git a/source/libvpx/vp9/common/vp9_quant_common.c b/source/libvpx/vp9/common/vp9_quant_common.c
index def1255..3332e58 100644
--- a/source/libvpx/vp9/common/vp9_quant_common.c
+++ b/source/libvpx/vp9/common/vp9_quant_common.c
@@ -12,7 +12,6 @@
#include "vp9/common/vp9_quant_common.h"
#include "vp9/common/vp9_seg_common.h"
-#if 1
static const int16_t dc_qlookup[QINDEX_RANGE] = {
4, 8, 8, 9, 10, 11, 12, 12,
13, 14, 15, 16, 17, 18, 19, 19,
@@ -83,44 +82,6 @@
1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828,
};
-void vp9_init_quant_tables(void) { }
-#else
-static int16_t dc_qlookup[QINDEX_RANGE];
-static int16_t ac_qlookup[QINDEX_RANGE];
-
-#define ACDC_MIN 8
-
-// TODO(dkovalev) move to common and reuse
-static double poly3(double a, double b, double c, double d, double x) {
- return a*x*x*x + b*x*x + c*x + d;
-}
-
-void vp9_init_quant_tables() {
- int i, val = 4;
-
- // A "real" q of 1.0 forces lossless mode.
- // In practice non lossless Q's between 1.0 and 2.0 (represented here by
- // integer values from 5-7 give poor rd results (lower psnr and often
- // larger size than the lossless encode. To block out those "not very useful"
- // values we increment the ac and dc q lookup values by 4 after position 0.
- ac_qlookup[0] = val;
- dc_qlookup[0] = val;
- val += 4;
-
- for (i = 1; i < QINDEX_RANGE; i++) {
- const int ac_val = val;
-
- val = (int)(val * 1.01975);
- if (val == ac_val)
- ++val;
-
- ac_qlookup[i] = (int16_t)ac_val;
- dc_qlookup[i] = (int16_t)MAX(ACDC_MIN, poly3(0.000000305, -0.00065, 0.9,
- 0.5, ac_val));
- }
-}
-#endif
-
int16_t vp9_dc_quant(int qindex, int delta) {
return dc_qlookup[clamp(qindex + delta, 0, MAXQ)];
}
diff --git a/source/libvpx/vp9/common/vp9_quant_common.h b/source/libvpx/vp9/common/vp9_quant_common.h
index 5811040..d1545d9 100644
--- a/source/libvpx/vp9/common/vp9_quant_common.h
+++ b/source/libvpx/vp9/common/vp9_quant_common.h
@@ -22,8 +22,6 @@
#define QINDEX_RANGE (MAXQ - MINQ + 1)
#define QINDEX_BITS 8
-void vp9_init_quant_tables();
-
int16_t vp9_dc_quant(int qindex, int delta);
int16_t vp9_ac_quant(int qindex, int delta);
diff --git a/source/libvpx/vp9/common/vp9_reconinter.c b/source/libvpx/vp9/common/vp9_reconinter.c
index edc36d7..d4fcb62 100644
--- a/source/libvpx/vp9/common/vp9_reconinter.c
+++ b/source/libvpx/vp9/common/vp9_reconinter.c
@@ -113,6 +113,18 @@
return res;
}
+static INLINE int round_mv_comp_q2(int value) {
+ return (value < 0 ? value - 1 : value + 1) / 2;
+}
+
+static MV mi_mv_pred_q2(const MODE_INFO *mi, int idx, int block0, int block1) {
+ MV res = { round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.row +
+ mi->bmi[block1].as_mv[idx].as_mv.row),
+ round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.col +
+ mi->bmi[block1].as_mv[idx].as_mv.col) };
+ return res;
+}
+
// TODO(jkoleszar): yet another mv clamping function :-(
MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv,
int bw, int bh, int ss_x, int ss_y) {
@@ -139,6 +151,29 @@
return clamped_mv;
}
+static MV average_split_mvs(const struct macroblockd_plane *pd, int plane,
+ const MODE_INFO *mi, int ref, int block) {
+ const int ss_idx = ((pd->subsampling_x > 0) << 1) | (pd->subsampling_y > 0);
+ MV res = {0, 0};
+ switch (ss_idx) {
+ case 0:
+ res = mi->bmi[block].as_mv[ref].as_mv;
+ break;
+ case 1:
+ res = mi_mv_pred_q2(mi, ref, block, block + 2);
+ break;
+ case 2:
+ res = mi_mv_pred_q2(mi, ref, block, block + 1);
+ break;
+ case 3:
+ res = mi_mv_pred_q4(mi, ref);
+ break;
+ default:
+ assert(ss_idx <= 3 || ss_idx >= 0);
+ }
+ return res;
+}
+
static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
int bw, int bh,
int x, int y, int w, int h,
@@ -154,14 +189,8 @@
struct buf_2d *const pre_buf = &pd->pre[ref];
struct buf_2d *const dst_buf = &pd->dst;
uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
-
- // TODO(jkoleszar): All chroma MVs in SPLITMV mode are taken as the
- // same MV (the average of the 4 luma MVs) but we could do something
- // smarter for non-4:2:0. Just punt for now, pending the changes to get
- // rid of SPLITMV mode entirely.
const MV mv = mi->mbmi.sb_type < BLOCK_8X8
- ? (plane == 0 ? mi->bmi[block].as_mv[ref].as_mv
- : mi_mv_pred_q4(mi, ref))
+ ? average_split_mvs(pd, plane, mi, ref, block)
: mi->mbmi.mv[ref].as_mv;
// TODO(jkoleszar): This clamping is done in the incorrect place for the
@@ -258,16 +287,11 @@
struct buf_2d *const pre_buf = &pd->pre[ref];
struct buf_2d *const dst_buf = &pd->dst;
uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
-
- // TODO(jkoleszar): All chroma MVs in SPLITMV mode are taken as the
- // same MV (the average of the 4 luma MVs) but we could do something
- // smarter for non-4:2:0. Just punt for now, pending the changes to get
- // rid of SPLITMV mode entirely.
const MV mv = mi->mbmi.sb_type < BLOCK_8X8
- ? (plane == 0 ? mi->bmi[block].as_mv[ref].as_mv
- : mi_mv_pred_q4(mi, ref))
+ ? average_split_mvs(pd, plane, mi, ref, block)
: mi->mbmi.mv[ref].as_mv;
+
// TODO(jkoleszar): This clamping is done in the incorrect place for the
// scaling case. It needs to be done on the scaled MV, not the pre-scaling
// MV. Note however that it performs the subsampling aware scaling so
diff --git a/source/libvpx/vp9/common/vp9_rtcd_defs.pl b/source/libvpx/vp9/common/vp9_rtcd_defs.pl
index 68613ec..f52dccb 100644
--- a/source/libvpx/vp9/common/vp9_rtcd_defs.pl
+++ b/source/libvpx/vp9/common/vp9_rtcd_defs.pl
@@ -714,6 +714,9 @@
add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
specialize qw/vp9_subtract_block/, "$sse2_x86inc";
+add_proto qw/void vp9_quantize_fp/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+specialize qw/vp9_quantize_fp/, "$ssse3_x86_64";
+
add_proto qw/void vp9_quantize_b/, "const int16_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_b/, "$ssse3_x86_64";
diff --git a/source/libvpx/vp9/common/vp9_scale.c b/source/libvpx/vp9/common/vp9_scale.c
index d3405fc..2f58323 100644
--- a/source/libvpx/vp9/common/vp9_scale.c
+++ b/source/libvpx/vp9/common/vp9_scale.c
@@ -33,14 +33,6 @@
return (other_size << REF_SCALE_SHIFT) / this_size;
}
-static int check_scale_factors(int other_w, int other_h,
- int this_w, int this_h) {
- return 2 * this_w >= other_w &&
- 2 * this_h >= other_h &&
- this_w <= 16 * other_w &&
- this_h <= 16 * other_h;
-}
-
MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf) {
const int x_off_q4 = scaled_x(x << SUBPEL_BITS, sf) & SUBPEL_MASK;
const int y_off_q4 = scaled_y(y << SUBPEL_BITS, sf) & SUBPEL_MASK;
@@ -54,7 +46,7 @@
void vp9_setup_scale_factors_for_frame(struct scale_factors *sf,
int other_w, int other_h,
int this_w, int this_h) {
- if (!check_scale_factors(other_w, other_h, this_w, this_h)) {
+ if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) {
sf->x_scale_fp = REF_INVALID_SCALE;
sf->y_scale_fp = REF_INVALID_SCALE;
return;
diff --git a/source/libvpx/vp9/common/vp9_scale.h b/source/libvpx/vp9/common/vp9_scale.h
index a9dda18..ad6f5d7 100644
--- a/source/libvpx/vp9/common/vp9_scale.h
+++ b/source/libvpx/vp9/common/vp9_scale.h
@@ -46,8 +46,16 @@
}
static INLINE int vp9_is_scaled(const struct scale_factors *sf) {
- return sf->x_scale_fp != REF_NO_SCALE ||
- sf->y_scale_fp != REF_NO_SCALE;
+ return vp9_is_valid_scale(sf) &&
+ (sf->x_scale_fp != REF_NO_SCALE || sf->y_scale_fp != REF_NO_SCALE);
+}
+
+static INLINE int valid_ref_frame_size(int ref_width, int ref_height,
+ int this_width, int this_height) {
+ return 2 * this_width >= ref_width &&
+ 2 * this_height >= ref_height &&
+ this_width <= 16 * ref_width &&
+ this_height <= 16 * ref_height;
}
#ifdef __cplusplus
diff --git a/source/libvpx/vp9/decoder/vp9_decodeframe.c b/source/libvpx/vp9/decoder/vp9_decodeframe.c
index f36105f..8b96abb 100644
--- a/source/libvpx/vp9/decoder/vp9_decodeframe.c
+++ b/source/libvpx/vp9/decoder/vp9_decodeframe.c
@@ -410,13 +410,17 @@
vp9_reader* r, BLOCK_SIZE bsize) {
const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
PARTITION_TYPE partition;
- BLOCK_SIZE subsize;
+ BLOCK_SIZE subsize, uv_subsize;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
partition = read_partition(cm, xd, hbs, mi_row, mi_col, bsize, r);
subsize = get_subsize(bsize, partition);
+ uv_subsize = ss_size_lookup[subsize][cm->subsampling_x][cm->subsampling_y];
+ if (subsize >= BLOCK_8X8 && uv_subsize == BLOCK_INVALID)
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Invalid block size.");
if (subsize < BLOCK_8X8) {
decode_block(cm, xd, tile, mi_row, mi_col, r, subsize);
} else {
@@ -667,9 +671,17 @@
if (!found)
read_frame_size(rb, &width, &height);
- if (width <= 0 || height <= 0)
- vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
- "Referenced frame with invalid size");
+ // Check that each of the frames that this frame references has valid
+ // dimensions.
+ for (i = 0; i < REFS_PER_FRAME; ++i) {
+ RefBuffer *const ref_frame = &cm->frame_refs[i];
+ const int ref_width = ref_frame->buf->y_width;
+ const int ref_height = ref_frame->buf->y_height;
+
+ if (!valid_ref_frame_size(ref_width, ref_height, width, height))
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Referenced frame has invalid size");
+ }
apply_frame_size(cm, width, height);
setup_display_size(cm, rb);
@@ -685,6 +697,10 @@
while (max_ones-- && vp9_rb_read_bit(rb))
cm->log2_tile_cols++;
+ if (cm->log2_tile_cols > 6)
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Invalid number of tile columns");
+
// rows
cm->log2_tile_rows = vp9_rb_read_bit(rb);
if (cm->log2_tile_rows)
@@ -755,6 +771,7 @@
const uint8_t *data,
const uint8_t *data_end) {
VP9_COMMON *const cm = &pbi->common;
+ const VP9WorkerInterface *const winterface = vp9_get_worker_interface();
const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
@@ -767,7 +784,7 @@
CHECK_MEM_ERROR(cm, pbi->lf_worker.data1,
vpx_memalign(32, sizeof(LFWorkerData)));
pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
- if (pbi->max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) {
+ if (pbi->max_threads > 1 && !winterface->reset(&pbi->lf_worker)) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"Loop filter thread creation failed");
}
@@ -853,13 +870,13 @@
// decoding has completed: finish up the loop filter in this thread.
if (mi_row + MI_BLOCK_SIZE >= cm->mi_rows) continue;
- vp9_worker_sync(&pbi->lf_worker);
+ winterface->sync(&pbi->lf_worker);
lf_data->start = lf_start;
lf_data->stop = mi_row;
if (pbi->max_threads > 1) {
- vp9_worker_launch(&pbi->lf_worker);
+ winterface->launch(&pbi->lf_worker);
} else {
- vp9_worker_execute(&pbi->lf_worker);
+ winterface->execute(&pbi->lf_worker);
}
}
}
@@ -868,10 +885,10 @@
// Loopfilter remaining rows in the frame.
if (cm->lf.filter_level) {
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
- vp9_worker_sync(&pbi->lf_worker);
+ winterface->sync(&pbi->lf_worker);
lf_data->start = lf_data->stop;
lf_data->stop = cm->mi_rows;
- vp9_worker_execute(&pbi->lf_worker);
+ winterface->execute(&pbi->lf_worker);
}
// Get last tile data.
@@ -915,6 +932,7 @@
const uint8_t *data,
const uint8_t *data_end) {
VP9_COMMON *const cm = &pbi->common;
+ const VP9WorkerInterface *const winterface = vp9_get_worker_interface();
const uint8_t *bit_reader_end = NULL;
const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int tile_cols = 1 << cm->log2_tile_cols;
@@ -941,11 +959,11 @@
VP9Worker *const worker = &pbi->tile_workers[i];
++pbi->num_tile_workers;
- vp9_worker_init(worker);
+ winterface->init(worker);
CHECK_MEM_ERROR(cm, worker->data1,
vpx_memalign(32, sizeof(TileWorkerData)));
CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo)));
- if (i < num_threads - 1 && !vp9_worker_reset(worker)) {
+ if (i < num_threads - 1 && !winterface->reset(worker)) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"Tile decoder thread creation failed");
}
@@ -1008,9 +1026,9 @@
worker->had_error = 0;
if (i == num_workers - 1 || n == tile_cols - 1) {
- vp9_worker_execute(worker);
+ winterface->execute(worker);
} else {
- vp9_worker_launch(worker);
+ winterface->launch(worker);
}
if (buf->col == tile_cols - 1) {
@@ -1022,7 +1040,7 @@
for (; i > 0; --i) {
VP9Worker *const worker = &pbi->tile_workers[i - 1];
- pbi->mb.corrupted |= !vp9_worker_sync(worker);
+ pbi->mb.corrupted |= !winterface->sync(worker);
}
if (final_worker > -1) {
TileWorkerData *const tile_data =
@@ -1138,12 +1156,12 @@
setup_frame_size(cm, rb);
} else {
pbi->refresh_frame_flags = vp9_rb_read_literal(rb, REF_FRAMES);
-
for (i = 0; i < REFS_PER_FRAME; ++i) {
const int ref = vp9_rb_read_literal(rb, REF_FRAMES_LOG2);
const int idx = cm->ref_frame_map[ref];
- cm->frame_refs[i].idx = idx;
- cm->frame_refs[i].buf = &cm->frame_bufs[idx].buf;
+ RefBuffer *const ref_frame = &cm->frame_refs[i];
+ ref_frame->idx = idx;
+ ref_frame->buf = &cm->frame_bufs[idx].buf;
cm->ref_frame_sign_bias[LAST_FRAME + i] = vp9_rb_read_bit(rb);
}
@@ -1322,7 +1340,8 @@
const uint8_t **p_data_end) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
- struct vp9_read_bit_buffer rb = { 0 };
+ struct vp9_read_bit_buffer rb = { NULL, NULL, 0, NULL, 0};
+
uint8_t clear_data[MAX_VP9_HEADER_SIZE];
const size_t first_partition_size = read_uncompressed_header(pbi,
init_read_bit_buffer(pbi, &rb, data, data_end, clear_data));
diff --git a/source/libvpx/vp9/decoder/vp9_decoder.c b/source/libvpx/vp9/decoder/vp9_decoder.c
index bd68420..d154e9d 100644
--- a/source/libvpx/vp9/decoder/vp9_decoder.c
+++ b/source/libvpx/vp9/decoder/vp9_decoder.c
@@ -37,7 +37,6 @@
if (!init_done) {
vp9_init_neighbors();
- vp9_init_quant_tables();
init_done = 1;
}
}
@@ -77,7 +76,7 @@
cm->error.setjmp = 0;
- vp9_worker_init(&pbi->lf_worker);
+ vp9_get_worker_interface()->init(&pbi->lf_worker);
return pbi;
}
@@ -87,12 +86,12 @@
int i;
vp9_remove_common(cm);
- vp9_worker_end(&pbi->lf_worker);
+ vp9_get_worker_interface()->end(&pbi->lf_worker);
vpx_free(pbi->lf_worker.data1);
vpx_free(pbi->tile_data);
for (i = 0; i < pbi->num_tile_workers; ++i) {
VP9Worker *const worker = &pbi->tile_workers[i];
- vp9_worker_end(worker);
+ vp9_get_worker_interface()->end(worker);
vpx_free(worker->data1);
vpx_free(worker->data2);
}
diff --git a/source/libvpx/vp9/decoder/vp9_dthread.c b/source/libvpx/vp9/decoder/vp9_dthread.c
index bc6c418..5dda49a 100644
--- a/source/libvpx/vp9/decoder/vp9_dthread.c
+++ b/source/libvpx/vp9/decoder/vp9_dthread.c
@@ -124,7 +124,7 @@
static int loop_filter_row_worker(void *arg1, void *arg2) {
TileWorkerData *const tile_data = (TileWorkerData*)arg1;
LFWorkerData *const lf_data = &tile_data->lfdata;
-
+ (void) arg2;
loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
lf_data->start, lf_data->stop, lf_data->y_only,
lf_data->lf_sync, lf_data->num_lf_workers);
@@ -138,6 +138,7 @@
int frame_filter_level,
int y_only) {
VP9LfSync *const lf_sync = &pbi->lf_row_sync;
+ const VP9WorkerInterface *const winterface = vp9_get_worker_interface();
// Number of superblock rows and cols
const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
const int tile_cols = 1 << cm->log2_tile_cols;
@@ -197,15 +198,15 @@
// Start loopfiltering
if (i == num_workers - 1) {
- vp9_worker_execute(worker);
+ winterface->execute(worker);
} else {
- vp9_worker_launch(worker);
+ winterface->launch(worker);
}
}
// Wait till all rows are finished
for (i = 0; i < num_workers; ++i) {
- vp9_worker_sync(&pbi->tile_workers[i]);
+ winterface->sync(&pbi->tile_workers[i]);
}
}
diff --git a/source/libvpx/vp9/decoder/vp9_thread.c b/source/libvpx/vp9/decoder/vp9_thread.c
index 5d31d3d..348bdf6 100644
--- a/source/libvpx/vp9/decoder/vp9_thread.c
+++ b/source/libvpx/vp9/decoder/vp9_thread.c
@@ -11,71 +11,79 @@
//
// Original source:
// http://git.chromium.org/webm/libwebp.git
-// 100644 blob eff8f2a8c20095aade3c292b0e9292dac6cb3587 src/utils/thread.c
-
+// 100644 blob 08ad4e1fecba302bf1247645e84a7d2779956bc3 src/utils/thread.c
#include <assert.h>
#include <string.h> // for memset()
#include "./vp9_thread.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+#include "vpx_mem/vpx_mem.h"
#if CONFIG_MULTITHREAD
+struct VP9WorkerImpl {
+ pthread_mutex_t mutex_;
+ pthread_cond_t condition_;
+ pthread_t thread_;
+};
+
//------------------------------------------------------------------------------
-static THREADFN thread_loop(void *ptr) { // thread loop
- VP9Worker* const worker = (VP9Worker*)ptr;
+static void execute(VP9Worker *const worker); // Forward declaration.
+
+static THREADFN thread_loop(void *ptr) {
+ VP9Worker *const worker = (VP9Worker*)ptr;
int done = 0;
while (!done) {
- pthread_mutex_lock(&worker->mutex_);
+ pthread_mutex_lock(&worker->impl_->mutex_);
while (worker->status_ == OK) { // wait in idling mode
- pthread_cond_wait(&worker->condition_, &worker->mutex_);
+ pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
}
if (worker->status_ == WORK) {
- vp9_worker_execute(worker);
+ execute(worker);
worker->status_ = OK;
} else if (worker->status_ == NOT_OK) { // finish the worker
done = 1;
}
- // signal to the main thread that we're done (for Sync())
- pthread_cond_signal(&worker->condition_);
- pthread_mutex_unlock(&worker->mutex_);
+ // signal to the main thread that we're done (for sync())
+ pthread_cond_signal(&worker->impl_->condition_);
+ pthread_mutex_unlock(&worker->impl_->mutex_);
}
return THREAD_RETURN(NULL); // Thread is finished
}
// main thread state control
-static void change_state(VP9Worker* const worker,
+static void change_state(VP9Worker *const worker,
VP9WorkerStatus new_status) {
- // no-op when attempting to change state on a thread that didn't come up
- if (worker->status_ < OK) return;
+ // No-op when attempting to change state on a thread that didn't come up.
+ // Checking status_ without acquiring the lock first would result in a data
+ // race.
+ if (worker->impl_ == NULL) return;
- pthread_mutex_lock(&worker->mutex_);
- // wait for the worker to finish
- while (worker->status_ != OK) {
- pthread_cond_wait(&worker->condition_, &worker->mutex_);
+ pthread_mutex_lock(&worker->impl_->mutex_);
+ if (worker->status_ >= OK) {
+ // wait for the worker to finish
+ while (worker->status_ != OK) {
+ pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
+ }
+ // assign new status and release the working thread if needed
+ if (new_status != OK) {
+ worker->status_ = new_status;
+ pthread_cond_signal(&worker->impl_->condition_);
+ }
}
- // assign new status and release the working thread if needed
- if (new_status != OK) {
- worker->status_ = new_status;
- pthread_cond_signal(&worker->condition_);
- }
- pthread_mutex_unlock(&worker->mutex_);
+ pthread_mutex_unlock(&worker->impl_->mutex_);
}
#endif // CONFIG_MULTITHREAD
//------------------------------------------------------------------------------
-void vp9_worker_init(VP9Worker* const worker) {
+static void init(VP9Worker *const worker) {
memset(worker, 0, sizeof(*worker));
worker->status_ = NOT_OK;
}
-int vp9_worker_sync(VP9Worker* const worker) {
+static int sync(VP9Worker *const worker) {
#if CONFIG_MULTITHREAD
change_state(worker, OK);
#endif
@@ -83,59 +91,93 @@
return !worker->had_error;
}
-int vp9_worker_reset(VP9Worker* const worker) {
+static int reset(VP9Worker *const worker) {
int ok = 1;
worker->had_error = 0;
if (worker->status_ < OK) {
#if CONFIG_MULTITHREAD
- if (pthread_mutex_init(&worker->mutex_, NULL) ||
- pthread_cond_init(&worker->condition_, NULL)) {
+ worker->impl_ = (VP9WorkerImpl*)vpx_calloc(1, sizeof(*worker->impl_));
+ if (worker->impl_ == NULL) {
return 0;
}
- pthread_mutex_lock(&worker->mutex_);
- ok = !pthread_create(&worker->thread_, NULL, thread_loop, worker);
+ if (pthread_mutex_init(&worker->impl_->mutex_, NULL)) {
+ goto Error;
+ }
+ if (pthread_cond_init(&worker->impl_->condition_, NULL)) {
+ pthread_mutex_destroy(&worker->impl_->mutex_);
+ goto Error;
+ }
+ pthread_mutex_lock(&worker->impl_->mutex_);
+ ok = !pthread_create(&worker->impl_->thread_, NULL, thread_loop, worker);
if (ok) worker->status_ = OK;
- pthread_mutex_unlock(&worker->mutex_);
+ pthread_mutex_unlock(&worker->impl_->mutex_);
+ if (!ok) {
+ pthread_mutex_destroy(&worker->impl_->mutex_);
+ pthread_cond_destroy(&worker->impl_->condition_);
+ Error:
+ vpx_free(worker->impl_);
+ worker->impl_ = NULL;
+ return 0;
+ }
#else
worker->status_ = OK;
#endif
} else if (worker->status_ > OK) {
- ok = vp9_worker_sync(worker);
+ ok = sync(worker);
}
assert(!ok || (worker->status_ == OK));
return ok;
}
-void vp9_worker_execute(VP9Worker* const worker) {
+static void execute(VP9Worker *const worker) {
if (worker->hook != NULL) {
worker->had_error |= !worker->hook(worker->data1, worker->data2);
}
}
-void vp9_worker_launch(VP9Worker* const worker) {
+static void launch(VP9Worker *const worker) {
#if CONFIG_MULTITHREAD
change_state(worker, WORK);
#else
- vp9_worker_execute(worker);
+ execute(worker);
#endif
}
-void vp9_worker_end(VP9Worker* const worker) {
+static void end(VP9Worker *const worker) {
if (worker->status_ >= OK) {
#if CONFIG_MULTITHREAD
change_state(worker, NOT_OK);
- pthread_join(worker->thread_, NULL);
- pthread_mutex_destroy(&worker->mutex_);
- pthread_cond_destroy(&worker->condition_);
+ pthread_join(worker->impl_->thread_, NULL);
+ pthread_mutex_destroy(&worker->impl_->mutex_);
+ pthread_cond_destroy(&worker->impl_->condition_);
#else
worker->status_ = NOT_OK;
#endif
}
+ vpx_free(worker->impl_);
+ worker->impl_ = NULL;
assert(worker->status_ == NOT_OK);
}
//------------------------------------------------------------------------------
-#if defined(__cplusplus) || defined(c_plusplus)
-} // extern "C"
-#endif
+static VP9WorkerInterface g_worker_interface = {
+ init, reset, sync, launch, execute, end
+};
+
+int vp9_set_worker_interface(const VP9WorkerInterface* const winterface) {
+ if (winterface == NULL ||
+ winterface->init == NULL || winterface->reset == NULL ||
+ winterface->sync == NULL || winterface->launch == NULL ||
+ winterface->execute == NULL || winterface->end == NULL) {
+ return 0;
+ }
+ g_worker_interface = *winterface;
+ return 1;
+}
+
+const VP9WorkerInterface *vp9_get_worker_interface(void) {
+ return &g_worker_interface;
+}
+
+//------------------------------------------------------------------------------
diff --git a/source/libvpx/vp9/decoder/vp9_thread.h b/source/libvpx/vp9/decoder/vp9_thread.h
index 2f8728d..864579c 100644
--- a/source/libvpx/vp9/decoder/vp9_thread.h
+++ b/source/libvpx/vp9/decoder/vp9_thread.h
@@ -11,8 +11,7 @@
//
// Original source:
// http://git.chromium.org/webm/libwebp.git
-// 100644 blob 13a61a4c84194c3374080cbf03d881d3cd6af40d src/utils/thread.h
-
+// 100644 blob 7bd451b124ae3b81596abfbcc823e3cb129d3a38 src/utils/thread.h
#ifndef VP9_DECODER_VP9_THREAD_H_
#define VP9_DECODER_VP9_THREAD_H_
@@ -163,40 +162,53 @@
// arguments (data1 and data2), and should return false in case of error.
typedef int (*VP9WorkerHook)(void*, void*);
-// Synchronize object used to launch job in the worker thread
+// Platform-dependent implementation details for the worker.
+typedef struct VP9WorkerImpl VP9WorkerImpl;
+
+// Synchronization object used to launch job in the worker thread
typedef struct {
-#if CONFIG_MULTITHREAD
- pthread_mutex_t mutex_;
- pthread_cond_t condition_;
- pthread_t thread_;
-#endif
+ VP9WorkerImpl *impl_;
VP9WorkerStatus status_;
VP9WorkerHook hook; // hook to call
- void* data1; // first argument passed to 'hook'
- void* data2; // second argument passed to 'hook'
+ void *data1; // first argument passed to 'hook'
+ void *data2; // second argument passed to 'hook'
int had_error; // return value of the last call to 'hook'
} VP9Worker;
-// Must be called first, before any other method.
-void vp9_worker_init(VP9Worker* const worker);
-// Must be called to initialize the object and spawn the thread. Re-entrant.
-// Will potentially launch the thread. Returns false in case of error.
-int vp9_worker_reset(VP9Worker* const worker);
-// Makes sure the previous work is finished. Returns true if worker->had_error
-// was not set and no error condition was triggered by the working thread.
-int vp9_worker_sync(VP9Worker* const worker);
-// Triggers the thread to call hook() with data1 and data2 argument. These
-// hook/data1/data2 can be changed at any time before calling this function,
-// but not be changed afterward until the next call to vp9_worker_sync().
-void vp9_worker_launch(VP9Worker* const worker);
-// This function is similar to vp9_worker_launch() except that it calls the
-// hook directly instead of using a thread. Convenient to bypass the thread
-// mechanism while still using the VP9Worker structs. vp9_worker_sync() must
-// still be called afterward (for error reporting).
-void vp9_worker_execute(VP9Worker* const worker);
-// Kill the thread and terminate the object. To use the object again, one
-// must call vp9_worker_reset() again.
-void vp9_worker_end(VP9Worker* const worker);
+// The interface for all thread-worker related functions. All these functions
+// must be implemented.
+typedef struct {
+ // Must be called first, before any other method.
+ void (*init)(VP9Worker *const worker);
+ // Must be called to initialize the object and spawn the thread. Re-entrant.
+ // Will potentially launch the thread. Returns false in case of error.
+ int (*reset)(VP9Worker *const worker);
+ // Makes sure the previous work is finished. Returns true if worker->had_error
+ // was not set and no error condition was triggered by the working thread.
+ int (*sync)(VP9Worker *const worker);
+ // Triggers the thread to call hook() with data1 and data2 arguments. These
+ // hook/data1/data2 values can be changed at any time before calling this
+ // function, but not be changed afterward until the next call to Sync().
+ void (*launch)(VP9Worker *const worker);
+ // This function is similar to launch() except that it calls the
+ // hook directly instead of using a thread. Convenient to bypass the thread
+ // mechanism while still using the VP9Worker structs. sync() must
+ // still be called afterward (for error reporting).
+ void (*execute)(VP9Worker *const worker);
+ // Kill the thread and terminate the object. To use the object again, one
+ // must call reset() again.
+ void (*end)(VP9Worker *const worker);
+} VP9WorkerInterface;
+
+// Install a new set of threading functions, overriding the defaults. This
+// should be done before any workers are started, i.e., before any encoding or
+// decoding takes place. The contents of the interface struct are copied, it
+// is safe to free the corresponding memory after this call. This function is
+// not thread-safe. Return false in case of invalid pointer or methods.
+int vp9_set_worker_interface(const VP9WorkerInterface *const winterface);
+
+// Retrieve the currently set thread worker interface.
+const VP9WorkerInterface *vp9_get_worker_interface(void);
//------------------------------------------------------------------------------
diff --git a/source/libvpx/vp9/encoder/vp9_aq_complexity.c b/source/libvpx/vp9/encoder/vp9_aq_complexity.c
index 0d6b41d..33f9239 100644
--- a/source/libvpx/vp9/encoder/vp9_aq_complexity.c
+++ b/source/libvpx/vp9/encoder/vp9_aq_complexity.c
@@ -15,8 +15,19 @@
#include "vp9/encoder/vp9_segmentation.h"
-static const double in_frame_q_adj_ratio[MAX_SEGMENTS] =
- {1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+#define AQ_C_SEGMENTS 3
+#define AQ_C_STRENGTHS 3
+static const int aq_c_active_segments[AQ_C_STRENGTHS] = {1, 2, 3};
+static const double aq_c_q_adj_factor[AQ_C_STRENGTHS][AQ_C_SEGMENTS] =
+ {{1.0, 1.0, 1.0}, {1.0, 2.0, 1.0}, {1.0, 1.5, 2.5}};
+static const double aq_c_transitions[AQ_C_STRENGTHS][AQ_C_SEGMENTS] =
+ {{1.0, 1.0, 1.0}, {1.0, 0.25, 0.0}, {1.0, 0.5, 0.25}};
+
+static int get_aq_c_strength(int q_index) {
+ // Approximate base quatizer (truncated to int)
+ int base_quant = vp9_ac_quant(q_index, 0) / 4;
+ return (base_quant > 20) + (base_quant > 45);
+}
void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
@@ -29,6 +40,8 @@
cpi->refresh_alt_ref_frame ||
(cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
int segment;
+ const int aq_strength = get_aq_c_strength(cm->base_qindex);
+ const int active_segments = aq_c_active_segments[aq_strength];
// Clear down the segment map.
vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
@@ -36,9 +49,17 @@
// Clear down the complexity map used for rd.
vpx_memset(cpi->complexity_map, 0, cm->mi_rows * cm->mi_cols);
- vp9_enable_segmentation(seg);
vp9_clearall_segfeatures(seg);
+ // Segmentation only makes sense if the target bits per SB is above a
+ // threshold. Below this the overheads will usually outweigh any benefit.
+ if (cpi->rc.sb64_target_rate < 256) {
+ vp9_disable_segmentation(seg);
+ return;
+ }
+
+ vp9_enable_segmentation(seg);
+
// Select delta coding method.
seg->abs_delta = SEGMENT_DELTADATA;
@@ -46,14 +67,14 @@
vp9_disable_segfeature(seg, 0, SEG_LVL_ALT_Q);
// Use some of the segments for in frame Q adjustment.
- for (segment = 1; segment < 2; segment++) {
+ for (segment = 1; segment < active_segments; ++segment) {
int qindex_delta =
vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex,
- in_frame_q_adj_ratio[segment]);
+ aq_c_q_adj_factor[aq_strength][segment]);
- // For AQ mode 2, we dont allow Q0 in a segment if the base Q is not 0.
- // Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment Q delta
- // is sometimes applied without going back around the rd loop.
+ // For AQ complexity mode, we dont allow Q0 in a segment if the base
+ // Q is not 0. Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment
+ // Q delta is sometimes applied without going back around the rd loop.
// This could lead to an illegal combination of partition size and q.
if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) {
qindex_delta = -cm->base_qindex + 1;
@@ -66,10 +87,15 @@
}
}
-// Select a segment for the current SB64
+// Select a segment for the current SB64 block.
+// The choice of segment for a block depends on the ratio of the projected
+// bits for the block vs a target average.
+// An "aq_strength" value determines how many segments are supported,
+// the set of transition points to use and the extent of the quantizer
+// adjustment for each segment (configured in vp9_setup_in_frame_q_adj()).
void vp9_select_in_frame_q_segment(VP9_COMP *cpi,
- int mi_row, int mi_col,
- int output_enabled, int projected_rate) {
+ int mi_row, int mi_col,
+ int output_enabled, int projected_rate) {
VP9_COMMON *const cm = &cpi->common;
const int mi_offset = mi_row * cm->mi_cols + mi_col;
@@ -89,11 +115,22 @@
// It is converted to bits * 256 units.
const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) /
(bw * bh);
+ const int aq_strength = get_aq_c_strength(cm->base_qindex);
+ const int active_segments = aq_c_active_segments[aq_strength];
- if (projected_rate < (target_rate / 4)) {
- segment = 1;
- } else {
- segment = 0;
+ // The number of segments considered and the transition points used to
+ // select them is determined by the "aq_strength" value.
+ // Currently this loop only supports segments that reduce Q (i.e. where
+ // there is undershoot.
+ // The loop counts down towards segment 0 which is the default segment
+ // with no Q adjustment.
+ segment = active_segments - 1;
+ while (segment > 0) {
+ if (projected_rate <
+ (target_rate * aq_c_transitions[aq_strength][segment])) {
+ break;
+ }
+ --segment;
}
if (target_rate > 0) {
diff --git a/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
index d1437d3..e7f0daa 100644
--- a/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -16,7 +16,6 @@
#include "vp9/common/vp9_seg_common.h"
#include "vp9/encoder/vp9_ratectrl.h"
-#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_segmentation.h"
struct CYCLIC_REFRESH {
diff --git a/source/libvpx/vp9/encoder/vp9_aq_variance.c b/source/libvpx/vp9/encoder/vp9_aq_variance.c
index ae2a163..56db95e 100644
--- a/source/libvpx/vp9/encoder/vp9_aq_variance.c
+++ b/source/libvpx/vp9/encoder/vp9_aq_variance.c
@@ -15,7 +15,7 @@
#include "vp9/common/vp9_seg_common.h"
#include "vp9/encoder/vp9_ratectrl.h"
-#include "vp9/encoder/vp9_rdopt.h"
+#include "vp9/encoder/vp9_rd.h"
#include "vp9/encoder/vp9_segmentation.h"
#include "vp9/common/vp9_systemdependent.h"
diff --git a/source/libvpx/vp9/encoder/vp9_bitstream.c b/source/libvpx/vp9/encoder/vp9_bitstream.c
index 76f5e7b..1bf826a 100644
--- a/source/libvpx/vp9/encoder/vp9_bitstream.c
+++ b/source/libvpx/vp9/encoder/vp9_bitstream.c
@@ -890,14 +890,8 @@
}
static int get_refresh_mask(VP9_COMP *cpi) {
- // Should the GF or ARF be updated using the transmitted frame or buffer
-#if CONFIG_MULTIPLE_ARF
- if (!cpi->multi_arf_enabled && cpi->refresh_golden_frame &&
- !cpi->refresh_alt_ref_frame) {
-#else
- if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame &&
- !cpi->use_svc) {
-#endif
+ if (!cpi->multi_arf_allowed && cpi->refresh_golden_frame &&
+ cpi->rc.is_src_frame_alt_ref && !cpi->use_svc) {
// Preserve the previously existing golden frame and update the frame in
// the alt ref slot instead. This is highly specific to the use of
// alt-ref as a forward reference, and this needs to be generalized as
@@ -910,15 +904,10 @@
(cpi->refresh_golden_frame << cpi->alt_fb_idx);
} else {
int arf_idx = cpi->alt_fb_idx;
-#if CONFIG_MULTIPLE_ARF
- // Determine which ARF buffer to use to encode this ARF frame.
- if (cpi->multi_arf_enabled) {
- int sn = cpi->sequence_number;
- arf_idx = (cpi->frame_coding_order[sn] < 0) ?
- cpi->arf_buffer_idx[sn + 1] :
- cpi->arf_buffer_idx[sn];
+ if ((cpi->pass == 2) && cpi->multi_arf_allowed) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ arf_idx = gf_group->arf_update_idx[gf_group->index];
}
-#endif
return (cpi->refresh_last_frame << cpi->lst_fb_idx) |
(cpi->refresh_golden_frame << cpi->gld_fb_idx) |
(cpi->refresh_alt_ref_frame << arf_idx);
diff --git a/source/libvpx/vp9/encoder/vp9_block.h b/source/libvpx/vp9/encoder/vp9_block.h
index 2463ed0..ab7991e 100644
--- a/source/libvpx/vp9/encoder/vp9_block.h
+++ b/source/libvpx/vp9/encoder/vp9_block.h
@@ -20,6 +20,12 @@
extern "C" {
#endif
+typedef struct {
+ unsigned int sse;
+ int sum;
+ unsigned int var;
+} diff;
+
struct macroblock_plane {
DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]);
int16_t *qcoeff;
@@ -29,6 +35,7 @@
// Quantizer setings
int16_t *quant_fp;
+ int16_t *round_fp;
int16_t *quant;
int16_t *quant_shift;
int16_t *zbin;
@@ -93,8 +100,6 @@
int encode_breakout;
- int in_active_map;
-
// note that token_costs is the cost when eob node is skipped
vp9_coeff_cost token_costs[TX_SIZES];
@@ -106,6 +111,9 @@
int use_lp32x32fdct;
int skip_encode;
+ // use fast quantization process
+ int quant_fp;
+
// skip forward transform and quantization
int skip_txfm;
diff --git a/source/libvpx/vp9/encoder/vp9_denoiser.c b/source/libvpx/vp9/encoder/vp9_denoiser.c
index fc939c5..f6393e0 100644
--- a/source/libvpx/vp9/encoder/vp9_denoiser.c
+++ b/source/libvpx/vp9/encoder/vp9_denoiser.c
@@ -9,38 +9,90 @@
*/
#include <assert.h>
+#include <limits.h>
#include "vpx_scale/yv12config.h"
#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_reconinter.h"
#include "vp9/encoder/vp9_denoiser.h"
+/* The VP9 denoiser is a work-in-progress. It currently is only designed to work
+ * with speed 6, though it (inexplicably) seems to also work with speed 5 (one
+ * would need to modify the source code in vp9_pickmode.c and vp9_encoder.c to
+ * make the calls to the vp9_denoiser_* functions when in speed 5).
+ *
+ * The implementation is very similar to that of the VP8 denoiser. While
+ * choosing the motion vectors / reference frames, the denoiser is run, and if
+ * it did not modify the signal to much, the denoised block is copied to the
+ * signal.
+ */
+
+#ifdef OUTPUT_YUV_DENOISED
+static void make_grayscale(YV12_BUFFER_CONFIG *yuv);
+#endif
+
static const int widths[] = {4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64};
static const int heights[] = {4, 8, 4, 8, 16, 8, 16, 32, 16, 32, 64, 32, 64};
-int vp9_denoiser_filter() {
- return 0;
+static int absdiff_thresh(BLOCK_SIZE bs, int increase_denoising) {
+ (void)bs;
+ return 3 + (increase_denoising ? 1 : 0);
}
-static int update_running_avg(const uint8_t *mc_avg, int mc_avg_stride,
- uint8_t *avg, int avg_stride,
- const uint8_t *sig, int sig_stride,
- int increase_denoising, BLOCK_SIZE bs) {
+static int delta_thresh(BLOCK_SIZE bs, int increase_denoising) {
+ (void)bs;
+ (void)increase_denoising;
+ return 4;
+}
+
+static int noise_motion_thresh(BLOCK_SIZE bs, int increase_denoising) {
+ (void)bs;
+ (void)increase_denoising;
+ return 25 * 25;
+}
+
+static unsigned int sse_thresh(BLOCK_SIZE bs, int increase_denoising) {
+ return widths[bs] * heights[bs] * (increase_denoising ? 60 : 40);
+}
+
+static int sse_diff_thresh(BLOCK_SIZE bs, int increase_denoising,
+ int mv_row, int mv_col) {
+ if (mv_row * mv_row + mv_col * mv_col >
+ noise_motion_thresh(bs, increase_denoising)) {
+ return 0;
+ } else {
+ return widths[bs] * heights[bs] * 20;
+ }
+}
+
+static int total_adj_strong_thresh(BLOCK_SIZE bs, int increase_denoising) {
+ return widths[bs] * heights[bs] * (increase_denoising ? 3 : 2);
+}
+
+static int total_adj_weak_thresh(BLOCK_SIZE bs, int increase_denoising) {
+ return widths[bs] * heights[bs] * (increase_denoising ? 3 : 2);
+}
+
+static VP9_DENOISER_DECISION denoiser_filter(const uint8_t *sig, int sig_stride,
+ const uint8_t *mc_avg,
+ int mc_avg_stride,
+ uint8_t *avg, int avg_stride,
+ int increase_denoising,
+ BLOCK_SIZE bs) {
int r, c;
- int diff, adj, absdiff;
- int shift_inc1 = 0, shift_inc2 = 1;
+ const uint8_t *sig_start = sig;
+ const uint8_t *mc_avg_start = mc_avg;
+ uint8_t *avg_start = avg;
+ int diff, adj, absdiff, delta;
int adj_val[] = {3, 4, 6};
int total_adj = 0;
- if (increase_denoising) {
- shift_inc1 = 1;
- shift_inc2 = 2;
- }
-
+ // First attempt to apply a strong temporal denoising filter.
for (r = 0; r < heights[bs]; ++r) {
for (c = 0; c < widths[bs]; ++c) {
diff = mc_avg[c] - sig[c];
absdiff = abs(diff);
- if (absdiff <= 3 + shift_inc1) {
+ if (absdiff <= absdiff_thresh(bs, increase_denoising)) {
avg[c] = mc_avg[c];
total_adj += diff;
} else {
@@ -68,7 +120,47 @@
avg += avg_stride;
mc_avg += mc_avg_stride;
}
- return total_adj;
+
+ // If the strong filter did not modify the signal too much, we're all set.
+ if (abs(total_adj) <= total_adj_strong_thresh(bs, increase_denoising)) {
+ return FILTER_BLOCK;
+ }
+
+ // Otherwise, we try to dampen the filter if the delta is not too high.
+ delta = ((abs(total_adj) - total_adj_strong_thresh(bs, increase_denoising))
+ >> 8) + 1;
+ if (delta > delta_thresh(bs, increase_denoising)) {
+ return COPY_BLOCK;
+ }
+
+ mc_avg = mc_avg_start;
+ avg = avg_start;
+ sig = sig_start;
+ for (r = 0; r < heights[bs]; ++r) {
+ for (c = 0; c < widths[bs]; ++c) {
+ diff = mc_avg[c] - sig[c];
+ adj = abs(diff);
+ if (adj > delta) {
+ adj = delta;
+ }
+ if (diff > 0) {
+ avg[c] = MAX(0, avg[c] - adj);
+ total_adj += adj;
+ } else {
+ avg[c] = MIN(UINT8_MAX, avg[c] + adj);
+ total_adj -= adj;
+ }
+ }
+ sig += sig_stride;
+ avg += avg_stride;
+ mc_avg += mc_avg_stride;
+ }
+
+ // We can use the filter if it has been sufficiently dampened
+ if (abs(total_adj) <= total_adj_weak_thresh(bs, increase_denoising)) {
+ return FILTER_BLOCK;
+ }
+ return COPY_BLOCK;
}
static uint8_t *block_start(uint8_t *framebuf, int stride,
@@ -76,51 +168,174 @@
return framebuf + (stride * mi_row * 8) + (mi_col * 8);
}
-void copy_block(uint8_t *dest, int dest_stride,
- uint8_t *src, int src_stride, BLOCK_SIZE bs) {
- int r, c;
+static void copy_block(uint8_t *dest, int dest_stride,
+ const uint8_t *src, int src_stride, BLOCK_SIZE bs) {
+ int r;
for (r = 0; r < heights[bs]; ++r) {
- for (c = 0; c < widths[bs]; ++c) {
- dest[c] = src[c];
- }
+ vpx_memcpy(dest, src, widths[bs]);
dest += dest_stride;
src += src_stride;
}
}
+static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
+ MACROBLOCK *mb,
+ BLOCK_SIZE bs,
+ int increase_denoising,
+ int mi_row,
+ int mi_col) {
+ int mv_col, mv_row;
+ int sse_diff = denoiser->zero_mv_sse - denoiser->best_sse;
+ MV_REFERENCE_FRAME frame;
+ MACROBLOCKD *filter_mbd = &mb->e_mbd;
+ MB_MODE_INFO *mbmi = &filter_mbd->mi[0]->mbmi;
+
+ // We will restore these after motion compensation.
+ MB_MODE_INFO saved_mbmi = *mbmi;
+ struct buf_2d saved_dst = filter_mbd->plane[0].dst;
+ struct buf_2d saved_pre[2];
+ saved_pre[0] = filter_mbd->plane[0].pre[0];
+ saved_pre[1] = filter_mbd->plane[0].pre[1];
+
+ mv_col = denoiser->best_sse_mv.as_mv.col;
+ mv_row = denoiser->best_sse_mv.as_mv.row;
+
+ frame = denoiser->best_reference_frame;
+
+ // If the best reference frame uses inter-prediction and there is enough of a
+ // difference in sum-squared-error, use it.
+ if (frame != INTRA_FRAME &&
+ sse_diff > sse_diff_thresh(bs, increase_denoising, mv_row, mv_col)) {
+ mbmi->ref_frame[0] = denoiser->best_reference_frame;
+ mbmi->mode = denoiser->best_sse_inter_mode;
+ mbmi->mv[0] = denoiser->best_sse_mv;
+ } else {
+ // Otherwise, use the zero reference frame.
+ frame = denoiser->best_zeromv_reference_frame;
+
+ mbmi->ref_frame[0] = denoiser->best_zeromv_reference_frame;
+ mbmi->mode = ZEROMV;
+ mbmi->mv[0].as_int = 0;
+
+ denoiser->best_sse_inter_mode = ZEROMV;
+ denoiser->best_sse_mv.as_int = 0;
+ denoiser->best_sse = denoiser->zero_mv_sse;
+ }
+
+ // Set the pointers in the MACROBLOCKD to point to the buffers in the denoiser
+ // struct.
+ filter_mbd->plane[0].pre[0].buf =
+ block_start(denoiser->running_avg_y[frame].y_buffer,
+ denoiser->running_avg_y[frame].y_stride,
+ mi_row, mi_col);
+ filter_mbd->plane[0].pre[0].stride = denoiser->running_avg_y[frame].y_stride;
+
+ filter_mbd->plane[1].pre[0].buf =
+ block_start(denoiser->running_avg_y[frame].u_buffer,
+ denoiser->running_avg_y[frame].uv_stride,
+ mi_row, mi_col);
+ filter_mbd->plane[1].pre[0].stride = denoiser->running_avg_y[frame].uv_stride;
+
+ filter_mbd->plane[2].pre[0].buf =
+ block_start(denoiser->running_avg_y[frame].v_buffer,
+ denoiser->running_avg_y[frame].uv_stride,
+ mi_row, mi_col);
+ filter_mbd->plane[2].pre[0].stride = denoiser->running_avg_y[frame].uv_stride;
+
+ filter_mbd->plane[0].pre[1].buf =
+ block_start(denoiser->running_avg_y[frame].y_buffer,
+ denoiser->running_avg_y[frame].y_stride,
+ mi_row, mi_col);
+ filter_mbd->plane[0].pre[1].stride = denoiser->running_avg_y[frame].y_stride;
+
+ filter_mbd->plane[1].pre[1].buf =
+ block_start(denoiser->running_avg_y[frame].u_buffer,
+ denoiser->running_avg_y[frame].uv_stride,
+ mi_row, mi_col);
+ filter_mbd->plane[1].pre[1].stride = denoiser->running_avg_y[frame].uv_stride;
+
+ filter_mbd->plane[2].pre[1].buf =
+ block_start(denoiser->running_avg_y[frame].v_buffer,
+ denoiser->running_avg_y[frame].uv_stride,
+ mi_row, mi_col);
+ filter_mbd->plane[2].pre[1].stride = denoiser->running_avg_y[frame].uv_stride;
+
+ filter_mbd->plane[0].dst.buf =
+ block_start(denoiser->mc_running_avg_y.y_buffer,
+ denoiser->mc_running_avg_y.y_stride,
+ mi_row, mi_col);
+ filter_mbd->plane[0].dst.stride = denoiser->mc_running_avg_y.y_stride;
+
+ filter_mbd->plane[1].dst.buf =
+ block_start(denoiser->mc_running_avg_y.u_buffer,
+ denoiser->mc_running_avg_y.uv_stride,
+ mi_row, mi_col);
+ filter_mbd->plane[1].dst.stride = denoiser->mc_running_avg_y.y_stride;
+
+ filter_mbd->plane[2].dst.buf =
+ block_start(denoiser->mc_running_avg_y.v_buffer,
+ denoiser->mc_running_avg_y.uv_stride,
+ mi_row, mi_col);
+ filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.y_stride;
+
+ vp9_build_inter_predictors_sby(filter_mbd, mv_row, mv_col, bs);
+
+ // Restore everything to its original state
+ filter_mbd->plane[0].pre[0] = saved_pre[0];
+ filter_mbd->plane[0].pre[1] = saved_pre[1];
+ filter_mbd->plane[0].dst = saved_dst;
+ *mbmi = saved_mbmi;
+
+ mv_row = denoiser->best_sse_mv.as_mv.row;
+ mv_col = denoiser->best_sse_mv.as_mv.col;
+
+ if (denoiser->best_sse > sse_thresh(bs, increase_denoising)) {
+ return COPY_BLOCK;
+ }
+ if (mv_row * mv_row + mv_col * mv_col >
+ 8 * noise_motion_thresh(bs, increase_denoising)) {
+ return COPY_BLOCK;
+ }
+ return FILTER_BLOCK;
+}
+
void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
int mi_row, int mi_col, BLOCK_SIZE bs) {
- int decision = COPY_BLOCK;
-
+ VP9_DENOISER_DECISION decision = FILTER_BLOCK;
YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
+ YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;
+ uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col);
+ uint8_t *mc_avg_start = block_start(mc_avg.y_buffer, mc_avg.y_stride,
+ mi_row, mi_col);
struct buf_2d src = mb->plane[0].src;
- update_running_avg(denoiser->mc_running_avg_y.y_buffer,
- denoiser->mc_running_avg_y.y_stride,
- denoiser->running_avg_y[INTRA_FRAME].y_buffer,
- denoiser->running_avg_y[INTRA_FRAME].y_stride,
- mb->plane[0].src.buf, mb->plane[0].src.stride, 0, bs);
+ decision = perform_motion_compensation(denoiser, mb, bs,
+ denoiser->increase_denoising,
+ mi_row, mi_col);
if (decision == FILTER_BLOCK) {
- // TODO(tkopp)
+ decision = denoiser_filter(src.buf, src.stride,
+ mc_avg_start, mc_avg.y_stride,
+ avg_start, avg.y_stride,
+ 0, bs);
}
- if (decision == COPY_BLOCK) {
- copy_block(block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col),
- avg.y_stride, src.buf, src.stride, bs);
+
+ if (decision == FILTER_BLOCK) {
+ copy_block(src.buf, src.stride, avg_start, avg.y_stride, bs);
+ } else { // COPY_BLOCK
+ copy_block(avg_start, avg.y_stride, src.buf, src.stride, bs);
}
}
static void copy_frame(YV12_BUFFER_CONFIG dest, const YV12_BUFFER_CONFIG src) {
- int r, c;
+ int r;
const uint8_t *srcbuf = src.y_buffer;
uint8_t *destbuf = dest.y_buffer;
assert(dest.y_width == src.y_width);
assert(dest.y_height == src.y_height);
for (r = 0; r < dest.y_height; ++r) {
- for (c = 0; c < dest.y_width; ++c) {
- destbuf[c] = srcbuf[c];
- }
+ vpx_memcpy(destbuf, srcbuf, dest.y_width);
destbuf += dest.y_stride;
srcbuf += src.y_stride;
}
@@ -155,7 +370,25 @@
}
}
-void vp9_denoiser_update_frame_stats() {
+void vp9_denoiser_reset_frame_stats(VP9_DENOISER *denoiser) {
+ denoiser->zero_mv_sse = UINT_MAX;
+ denoiser->best_sse = UINT_MAX;
+}
+
+void vp9_denoiser_update_frame_stats(VP9_DENOISER *denoiser, MB_MODE_INFO *mbmi,
+ unsigned int sse, PREDICTION_MODE mode) {
+ // TODO(tkopp): Use both MVs if possible
+ if (mbmi->mv[0].as_int == 0 && sse < denoiser->zero_mv_sse) {
+ denoiser->zero_mv_sse = sse;
+ denoiser->best_zeromv_reference_frame = mbmi->ref_frame[0];
+ }
+
+ if (mbmi->mv[0].as_int != 0 && sse < denoiser->best_sse) {
+ denoiser->best_sse = sse;
+ denoiser->best_sse_inter_mode = mode;
+ denoiser->best_sse_mv = mbmi->mv[0];
+ denoiser->best_reference_frame = mbmi->ref_frame[0];
+ }
}
int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height,
@@ -170,6 +403,9 @@
vp9_denoiser_free(denoiser);
return 1;
}
+#ifdef OUTPUT_YUV_DENOISED
+ make_grayscale(&denoiser->running_avg_y[i]);
+#endif
}
fail = vp9_alloc_frame_buffer(&denoiser->mc_running_avg_y, width, height,
@@ -178,6 +414,10 @@
vp9_denoiser_free(denoiser);
return 1;
}
+#ifdef OUTPUT_YUV_DENOISED
+ make_grayscale(&denoiser->running_avg_y[i]);
+#endif
+ denoiser->increase_denoising = 0;
return 0;
}
@@ -196,3 +436,22 @@
vp9_free_frame_buffer(&denoiser->mc_running_avg_y);
}
}
+
+#ifdef OUTPUT_YUV_DENOISED
+static void make_grayscale(YV12_BUFFER_CONFIG *yuv) {
+ int r, c;
+ uint8_t *u = yuv->u_buffer;
+ uint8_t *v = yuv->v_buffer;
+
+ // The '/2's are there because we have a 440 buffer, but we want to output
+ // 420.
+ for (r = 0; r < yuv->uv_height / 2; ++r) {
+ for (c = 0; c < yuv->uv_width / 2; ++c) {
+ u[c] = UINT8_MAX / 2;
+ v[c] = UINT8_MAX / 2;
+ }
+ u += yuv->uv_stride + yuv->uv_width / 2;
+ v += yuv->uv_stride + yuv->uv_width / 2;
+ }
+}
+#endif
diff --git a/source/libvpx/vp9/encoder/vp9_denoiser.h b/source/libvpx/vp9/encoder/vp9_denoiser.h
index 18b9766..cbb6423 100644
--- a/source/libvpx/vp9/encoder/vp9_denoiser.h
+++ b/source/libvpx/vp9/encoder/vp9_denoiser.h
@@ -18,14 +18,22 @@
extern "C" {
#endif
-enum vp9_denoiser_decision {
+typedef enum vp9_denoiser_decision {
COPY_BLOCK,
FILTER_BLOCK
-};
+} VP9_DENOISER_DECISION;
typedef struct vp9_denoiser {
YV12_BUFFER_CONFIG running_avg_y[MAX_REF_FRAMES];
YV12_BUFFER_CONFIG mc_running_avg_y;
+
+ unsigned int zero_mv_sse;
+ unsigned int best_sse;
+ int increase_denoising;
+ PREDICTION_MODE best_sse_inter_mode;
+ int_mv best_sse_mv;
+ MV_REFERENCE_FRAME best_reference_frame;
+ MV_REFERENCE_FRAME best_zeromv_reference_frame;
} VP9_DENOISER;
void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
@@ -38,7 +46,10 @@
void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
int mi_row, int mi_col, BLOCK_SIZE bs);
-void vp9_denoiser_update_frame_stats();
+void vp9_denoiser_reset_frame_stats(VP9_DENOISER *denoiser);
+
+void vp9_denoiser_update_frame_stats(VP9_DENOISER *denoiser, MB_MODE_INFO *mbmi,
+ unsigned int sse, PREDICTION_MODE mode);
int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height,
int ssx, int ssy, int border);
diff --git a/source/libvpx/vp9/encoder/vp9_encodeframe.c b/source/libvpx/vp9/encoder/vp9_encodeframe.c
index 001ac69..83955dc 100644
--- a/source/libvpx/vp9/encoder/vp9_encodeframe.c
+++ b/source/libvpx/vp9/encoder/vp9_encodeframe.c
@@ -38,6 +38,7 @@
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/encoder/vp9_extend.h"
#include "vp9/encoder/vp9_pickmode.h"
+#include "vp9/encoder/vp9_rd.h"
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_segmentation.h"
#include "vp9/encoder/vp9_tokenize.h"
@@ -70,12 +71,6 @@
128, 128, 128, 128, 128, 128, 128, 128
};
-typedef struct {
- unsigned int sse;
- int sum;
- unsigned int var;
-} diff;
-
static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
const struct buf_2d *ref,
BLOCK_SIZE bs) {
@@ -139,42 +134,6 @@
xd->mi[0] = cm->mi + idx_str;
}
-static int is_block_in_mb_map(const VP9_COMP *cpi, int mi_row, int mi_col,
- BLOCK_SIZE bsize) {
- const VP9_COMMON *const cm = &cpi->common;
- const int mb_rows = cm->mb_rows;
- const int mb_cols = cm->mb_cols;
- const int mb_row = mi_row >> 1;
- const int mb_col = mi_col >> 1;
- const int mb_width = num_8x8_blocks_wide_lookup[bsize] >> 1;
- const int mb_height = num_8x8_blocks_high_lookup[bsize] >> 1;
- int r, c;
- if (bsize <= BLOCK_16X16) {
- return cpi->active_map[mb_row * mb_cols + mb_col];
- }
- for (r = 0; r < mb_height; ++r) {
- for (c = 0; c < mb_width; ++c) {
- int row = mb_row + r;
- int col = mb_col + c;
- if (row >= mb_rows || col >= mb_cols)
- continue;
- if (cpi->active_map[row * mb_cols + col])
- return 1;
- }
- }
- return 0;
-}
-
-static int check_active_map(const VP9_COMP *cpi, const MACROBLOCK *x,
- int mi_row, int mi_col,
- BLOCK_SIZE bsize) {
- if (cpi->active_map_enabled && !x->e_mbd.lossless) {
- return is_block_in_mb_map(cpi, mi_row, mi_col, bsize);
- } else {
- return 1;
- }
-}
-
static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
int mi_row, int mi_col, BLOCK_SIZE bsize) {
MACROBLOCK *const x = &cpi->mb;
@@ -187,9 +146,6 @@
set_skip_context(xd, mi_row, mi_col);
- // Activity map pointer
- x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize);
-
set_modeinfo_offsets(cm, xd, mi_row, mi_col);
mbmi = &xd->mi[0]->mbmi;
@@ -723,7 +679,6 @@
xd->mi[0]->bmi[0].as_mv[0].as_int = 0;
x->skip = 1;
- x->skip_encode = 1;
*rate = 0;
*dist = 0;
@@ -822,12 +777,17 @@
vp9_rd_pick_intra_mode_sb(cpi, x, totalrate, totaldist, bsize, ctx,
best_rd);
} else {
- if (bsize >= BLOCK_8X8)
- vp9_rd_pick_inter_mode_sb(cpi, x, tile, mi_row, mi_col,
- totalrate, totaldist, bsize, ctx, best_rd);
- else
+ if (bsize >= BLOCK_8X8) {
+ if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+ vp9_rd_pick_inter_mode_sb_seg_skip(cpi, x, totalrate, totaldist, bsize,
+ ctx, best_rd);
+ else
+ vp9_rd_pick_inter_mode_sb(cpi, x, tile, mi_row, mi_col,
+ totalrate, totaldist, bsize, ctx, best_rd);
+ } else {
vp9_rd_pick_inter_mode_sub8x8(cpi, x, tile, mi_row, mi_col, totalrate,
totaldist, bsize, ctx, best_rd);
+ }
}
x->rdmult = orig_rdmult;
@@ -909,6 +869,7 @@
vpx_memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl,
sizeof(xd->left_seg_context[0]) * mi_height);
}
+
static void save_context(VP9_COMP *cpi, int mi_row, int mi_col,
ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
@@ -946,7 +907,6 @@
TOKENEXTRA **tp, int mi_row, int mi_col,
int output_enabled, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx) {
-
set_offsets(cpi, tile, mi_row, mi_col, bsize);
update_state(cpi, ctx, mi_row, mi_col, bsize, output_enabled);
encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize, ctx);
@@ -1175,7 +1135,6 @@
}
}
-
const struct {
int row;
int col;
@@ -1208,34 +1167,26 @@
// In-image SB64
if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
(row8x8_remaining >= MI_BLOCK_SIZE)) {
- const int src_stride = x->plane[0].src.stride;
- const int pre_stride = cpi->Last_Source->y_stride;
- const uint8_t *src = x->plane[0].src.buf;
- const int pre_offset = (mi_row * MI_SIZE) * pre_stride +
- (mi_col * MI_SIZE);
- const uint8_t *pre_src = cpi->Last_Source->y_buffer + pre_offset;
- const unsigned int thr_32x32 = cpi->sf.source_var_thresh;
- const unsigned int thr_64x64 = thr_32x32 << 1;
int i, j;
int index;
diff d32[4];
- int use16x16 = 0;
+ const int offset = (mi_row >> 1) * cm->mb_cols + (mi_col >> 1);
+ int is_larger_better = 0;
+ int use32x32 = 0;
+ unsigned int thr = cpi->source_var_thresh;
+
+ vpx_memset(d32, 0, 4 * sizeof(diff));
for (i = 0; i < 4; i++) {
- diff d16[4];
+ diff *d16[4];
for (j = 0; j < 4; j++) {
int b_mi_row = coord_lookup[i * 4 + j].row;
int b_mi_col = coord_lookup[i * 4 + j].col;
- int b_offset = b_mi_row * MI_SIZE * src_stride +
- b_mi_col * MI_SIZE;
+ int boffset = b_mi_row / 2 * cm->mb_cols +
+ b_mi_col / 2;
- vp9_get16x16var(src + b_offset, src_stride,
- pre_src + b_offset, pre_stride,
- &d16[j].sse, &d16[j].sum);
-
- d16[j].var = d16[j].sse -
- (((uint32_t)d16[j].sum * d16[j].sum) >> 8);
+ d16[j] = cpi->source_diff_var + offset + boffset;
index = b_mi_row * mis + b_mi_col;
mi_8x8[index] = mi_upper_left + index;
@@ -1245,14 +1196,16 @@
// size to further improve quality.
}
- if (d16[0].var < thr_32x32 && d16[1].var < thr_32x32 &&
- d16[2].var < thr_32x32 && d16[3].var < thr_32x32) {
- d32[i].sse = d16[0].sse;
- d32[i].sum = d16[0].sum;
+ is_larger_better = (d16[0]->var < thr) && (d16[1]->var < thr) &&
+ (d16[2]->var < thr) && (d16[3]->var < thr);
- for (j = 1; j < 4; j++) {
- d32[i].sse += d16[j].sse;
- d32[i].sum += d16[j].sum;
+ // Use 32x32 partition
+ if (is_larger_better) {
+ use32x32 += 1;
+
+ for (j = 0; j < 4; j++) {
+ d32[i].sse += d16[j]->sse;
+ d32[i].sum += d16[j]->sum;
}
d32[i].var = d32[i].sse - (((int64_t)d32[i].sum * d32[i].sum) >> 10);
@@ -1260,18 +1213,16 @@
index = coord_lookup[i*4].row * mis + coord_lookup[i*4].col;
mi_8x8[index] = mi_upper_left + index;
mi_8x8[index]->mbmi.sb_type = BLOCK_32X32;
-
- if (!((cm->current_video_frame - 1) %
- cpi->sf.search_type_check_frequency))
- cpi->use_large_partition_rate += 1;
- } else {
- use16x16 = 1;
}
}
- if (!use16x16) {
- if (d32[0].var < thr_64x64 && d32[1].var < thr_64x64 &&
- d32[2].var < thr_64x64 && d32[3].var < thr_64x64) {
+ if (use32x32 == 4) {
+ thr <<= 1;
+ is_larger_better = (d32[0].var < thr) && (d32[1].var < thr) &&
+ (d32[2].var < thr) && (d32[3].var < thr);
+
+ // Use 64x64 partition
+ if (is_larger_better) {
mi_8x8[0] = mi_upper_left;
mi_8x8[0]->mbmi.sb_type = BLOCK_64X64;
}
@@ -1508,20 +1459,8 @@
if (bsize == BLOCK_16X16) {
set_offsets(cpi, tile, mi_row, mi_col, bsize);
x->mb_energy = vp9_block_energy(cpi, x, bsize);
- } else {
- x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize);
}
- if (!x->in_active_map) {
- do_partition_search = 0;
- if (mi_row + (mi_step >> 1) < cm->mi_rows &&
- mi_col + (mi_step >> 1) < cm->mi_cols) {
- pc_tree->partitioning = PARTITION_NONE;
- bs_type = mi_8x8[0]->mbmi.sb_type = bsize;
- subsize = bsize;
- partition = PARTITION_NONE;
- }
- }
if (do_partition_search &&
cpi->sf.partition_search_type == SEARCH_PARTITION &&
cpi->sf.adjust_partitioning_from_last_frame) {
@@ -1984,8 +1923,6 @@
if (bsize == BLOCK_16X16) {
set_offsets(cpi, tile, mi_row, mi_col, bsize);
x->mb_energy = vp9_block_energy(cpi, x, bsize);
- } else {
- x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize);
}
// Determine partition types in search according to the speed features.
// The threshold set here has to be of square block size.
@@ -2018,8 +1955,6 @@
}
}
- if (!x->in_active_map && (partition_horz_allowed || partition_vert_allowed))
- do_split = 0;
// PARTITION_NONE
if (partition_none_allowed) {
rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize,
@@ -2053,10 +1988,6 @@
}
}
}
- if (!x->in_active_map) {
- do_split = 0;
- do_rect = 0;
- }
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
}
@@ -2310,7 +2241,8 @@
sf->always_this_block_size);
rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1, cpi->pc_root);
- } else if (sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
+ } else if (cpi->skippable_frame ||
+ sf->partition_search_type == VAR_BASED_FIXED_PARTITION) {
BLOCK_SIZE bsize;
set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
@@ -2322,8 +2254,15 @@
rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1, cpi->pc_root);
} else {
+ GF_GROUP * gf_grp = &cpi->twopass.gf_group;
+ int last_was_mid_sequence_overlay = 0;
+ if ((cpi->pass == 2) && (gf_grp->index)) {
+ if (gf_grp->update_type[gf_grp->index - 1] == OVERLAY_UPDATE)
+ last_was_mid_sequence_overlay = 1;
+ }
if ((cm->current_video_frame
% sf->last_partitioning_redo_frequency) == 0
+ || last_was_mid_sequence_overlay
|| cm->prev_mi == 0
|| cm->show_frame == 0
|| cm->frame_type == KEY_FRAME
@@ -2436,7 +2375,7 @@
rd_opt->tx_select_threshes[frame_type][TX_MODE_SELECT] ?
ALLOW_32X32 : TX_MODE_SELECT;
} else if (cpi->sf.tx_size_search_method == USE_TX_8X8) {
- return ALLOW_8X8;
+ return TX_MODE_SELECT;
} else {
unsigned int total = 0;
int i;
@@ -2453,20 +2392,6 @@
}
}
-static void set_mode_info(MB_MODE_INFO *mbmi, BLOCK_SIZE bsize,
- PREDICTION_MODE mode) {
- mbmi->mode = mode;
- mbmi->uv_mode = mode;
- mbmi->mv[0].as_int = 0;
- mbmi->mv[1].as_int = 0;
- mbmi->ref_frame[0] = INTRA_FRAME;
- mbmi->ref_frame[1] = NONE;
- mbmi->tx_size = max_txsize_lookup[bsize];
- mbmi->skip = 0;
- mbmi->sb_type = bsize;
- mbmi->segment_id = 0;
-}
-
static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
int mi_row, int mi_col,
int *rate, int64_t *dist,
@@ -2479,19 +2404,15 @@
mbmi = &xd->mi[0]->mbmi;
mbmi->sb_type = bsize;
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
if (mbmi->segment_id && x->in_static_area)
x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
- }
- if (!frame_is_intra_only(cm)) {
- if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
- set_mode_info_seg_skip(x, cm->tx_mode, rate, dist, bsize);
- else
- vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col, rate, dist, bsize);
- } else {
- set_mode_info(&xd->mi[0]->mbmi, bsize, DC_PRED);
- }
+ if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+ set_mode_info_seg_skip(x, cm->tx_mode, rate, dist, bsize);
+ else
+ vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col, rate, dist, bsize);
+
duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
}
@@ -2586,8 +2507,6 @@
assert(num_8x8_blocks_wide_lookup[bsize] ==
num_8x8_blocks_high_lookup[bsize]);
- x->in_active_map = check_active_map(cpi, x, mi_row, mi_col, bsize);
-
// Determine partition types in search according to the speed features.
// The threshold set here has to be of square block size.
if (cpi->sf.auto_min_max_partition_size) {
@@ -2606,15 +2525,13 @@
partition_vert_allowed &= force_vert_split;
}
- if (!x->in_active_map && (partition_horz_allowed || partition_vert_allowed))
- do_split = 0;
-
// PARTITION_NONE
if (partition_none_allowed) {
nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
&this_rate, &this_dist, bsize);
ctx->mic.mbmi = xd->mi[0]->mbmi;
ctx->skip_txfm = x->skip_txfm;
+ ctx->skip = x->skip;
if (this_rate != INT_MAX) {
int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
@@ -2643,10 +2560,6 @@
}
}
}
- if (!x->in_active_map) {
- do_split = 0;
- do_rect = 0;
- }
}
// store estimated motion vector
@@ -2702,6 +2615,7 @@
pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
pc_tree->horizontal[0].skip_txfm = x->skip_txfm;
+ pc_tree->horizontal[0].skip = x->skip;
sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
@@ -2712,6 +2626,7 @@
pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
pc_tree->horizontal[1].skip_txfm = x->skip_txfm;
+ pc_tree->horizontal[1].skip = x->skip;
if (this_rate == INT_MAX) {
sum_rd = INT64_MAX;
@@ -2742,6 +2657,7 @@
&this_rate, &this_dist, subsize);
pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
pc_tree->vertical[0].skip_txfm = x->skip_txfm;
+ pc_tree->vertical[0].skip = x->skip;
sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) {
load_pred_mv(x, ctx);
@@ -2749,6 +2665,7 @@
&this_rate, &this_dist, subsize);
pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
pc_tree->vertical[1].skip_txfm = x->skip_txfm;
+ pc_tree->vertical[1].skip = x->skip;
if (this_rate == INT_MAX) {
sum_rd = INT64_MAX;
} else {
@@ -2838,16 +2755,19 @@
nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
pc_tree->none.mic.mbmi = xd->mi[0]->mbmi;
pc_tree->none.skip_txfm = x->skip_txfm;
+ pc_tree->none.skip = x->skip;
break;
case PARTITION_VERT:
nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
pc_tree->vertical[0].skip_txfm = x->skip_txfm;
+ pc_tree->vertical[0].skip = x->skip;
if (mi_col + hbs < cm->mi_cols) {
nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs,
&rate, &dist, subsize);
pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
pc_tree->vertical[1].skip_txfm = x->skip_txfm;
+ pc_tree->vertical[1].skip = x->skip;
if (rate != INT_MAX && dist != INT64_MAX &&
*totrate != INT_MAX && *totdist != INT64_MAX) {
*totrate += rate;
@@ -2859,11 +2779,13 @@
nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
pc_tree->horizontal[0].skip_txfm = x->skip_txfm;
+ pc_tree->horizontal[0].skip = x->skip;
if (mi_row + hbs < cm->mi_rows) {
nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col,
&rate, &dist, subsize);
pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
pc_tree->horizontal[1].skip_txfm = x->skip_txfm;
+ pc_tree->horizontal[1].skip = x->skip;
if (rate != INT_MAX && dist != INT64_MAX &&
*totrate != INT_MAX && *totdist != INT64_MAX) {
*totrate += rate;
@@ -2984,6 +2906,93 @@
}
// end RTC play code
+static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
+ SPEED_FEATURES *const sf = &cpi->sf;
+ VP9_COMMON *const cm = &cpi->common;
+
+ const uint8_t *src = cpi->Source->y_buffer;
+ const uint8_t *last_src = cpi->Last_Source->y_buffer;
+ const int src_stride = cpi->Source->y_stride;
+ const int last_stride = cpi->Last_Source->y_stride;
+
+ // Pick cutoff threshold
+ const int cutoff = (MIN(cm->width, cm->height) >= 720) ?
+ (cm->MBs * VAR_HIST_LARGE_CUT_OFF / 100) :
+ (cm->MBs * VAR_HIST_SMALL_CUT_OFF / 100);
+ DECLARE_ALIGNED_ARRAY(16, int, hist, VAR_HIST_BINS);
+ diff *var16 = cpi->source_diff_var;
+
+ int sum = 0;
+ int i, j;
+
+ vpx_memset(hist, 0, VAR_HIST_BINS * sizeof(hist[0]));
+
+ for (i = 0; i < cm->mb_rows; i++) {
+ for (j = 0; j < cm->mb_cols; j++) {
+ vp9_get16x16var(src, src_stride, last_src, last_stride,
+ &var16->sse, &var16->sum);
+
+ var16->var = var16->sse -
+ (((uint32_t)var16->sum * var16->sum) >> 8);
+
+ if (var16->var >= VAR_HIST_MAX_BG_VAR)
+ hist[VAR_HIST_BINS - 1]++;
+ else
+ hist[var16->var / VAR_HIST_FACTOR]++;
+
+ src += 16;
+ last_src += 16;
+ var16++;
+ }
+
+ src = src - cm->mb_cols * 16 + 16 * src_stride;
+ last_src = last_src - cm->mb_cols * 16 + 16 * last_stride;
+ }
+
+ cpi->source_var_thresh = 0;
+
+ if (hist[VAR_HIST_BINS - 1] < cutoff) {
+ for (i = 0; i < VAR_HIST_BINS - 1; i++) {
+ sum += hist[i];
+
+ if (sum > cutoff) {
+ cpi->source_var_thresh = (i + 1) * VAR_HIST_FACTOR;
+ return 0;
+ }
+ }
+ }
+
+ return sf->search_type_check_frequency;
+}
+
+static void source_var_based_partition_search_method(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ SPEED_FEATURES *const sf = &cpi->sf;
+
+ if (cm->frame_type == KEY_FRAME) {
+ // For key frame, use SEARCH_PARTITION.
+ sf->partition_search_type = SEARCH_PARTITION;
+ } else if (cm->intra_only) {
+ sf->partition_search_type = FIXED_PARTITION;
+ } else {
+ if (cm->last_width != cm->width || cm->last_height != cm->height) {
+ if (cpi->source_diff_var)
+ vpx_free(cpi->source_diff_var);
+
+ CHECK_MEM_ERROR(cm, cpi->source_diff_var,
+ vpx_calloc(cm->MBs, sizeof(diff)));
+ }
+
+ if (!cpi->frames_till_next_var_check)
+ cpi->frames_till_next_var_check = set_var_thresh_from_histogram(cpi);
+
+ if (cpi->frames_till_next_var_check > 0) {
+ sf->partition_search_type = FIXED_PARTITION;
+ cpi->frames_till_next_var_check--;
+ }
+ }
+}
+
static int get_skip_encode_frame(const VP9_COMMON *cm) {
unsigned int intra_count = 0, inter_count = 0;
int j;
@@ -3014,7 +3023,7 @@
vp9_tile_init(&tile, cm, tile_row, tile_col);
for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end;
mi_row += MI_BLOCK_SIZE) {
- if (cpi->sf.use_nonrd_pick_mode && cm->frame_type != KEY_FRAME)
+ if (cpi->sf.use_nonrd_pick_mode && !frame_is_intra_only(cm))
encode_nonrd_sb_row(cpi, &tile, mi_row, &tok);
else
encode_rd_sb_row(cpi, &tile, mi_row, &tok);
@@ -3066,6 +3075,7 @@
init_encode_frame_mb_context(cpi);
set_prev_mi(cm);
+ x->quant_fp = cpi->sf.use_quant_fp;
x->skip_txfm = 0;
if (sf->use_nonrd_pick_mode) {
// Initialize internal buffer pointers for rtc coding, where non-RD
@@ -3083,28 +3093,8 @@
}
vp9_zero(x->zcoeff_blk);
- if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION &&
- cm->current_video_frame > 0) {
- int check_freq = sf->search_type_check_frequency;
-
- if ((cm->current_video_frame - 1) % check_freq == 0) {
- cpi->use_large_partition_rate = 0;
- }
-
- if ((cm->current_video_frame - 1) % check_freq == 1) {
- const int mbs_in_b32x32 = 1 << ((b_width_log2_lookup[BLOCK_32X32] -
- b_width_log2_lookup[BLOCK_16X16]) +
- (b_height_log2_lookup[BLOCK_32X32] -
- b_height_log2_lookup[BLOCK_16X16]));
- cpi->use_large_partition_rate = cpi->use_large_partition_rate * 100 *
- mbs_in_b32x32 / cm->MBs;
- }
-
- if ((cm->current_video_frame - 1) % check_freq >= 1) {
- if (cpi->use_large_partition_rate < 15)
- sf->partition_search_type = FIXED_PARTITION;
- }
- }
+ if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION)
+ source_var_based_partition_search_method(cpi);
}
{
@@ -3269,7 +3259,6 @@
}
} else {
cm->reference_mode = SINGLE_REFERENCE;
- cm->interp_filter = SWITCHABLE;
encode_frame_internal(cpi);
}
}
@@ -3364,7 +3353,10 @@
vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
&xd->block_refs[ref]->sf);
}
- vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
+ if (!cpi->sf.reuse_inter_pred_sby)
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
+
+ vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
if (!x->skip) {
mbmi->skip = 1;
diff --git a/source/libvpx/vp9/encoder/vp9_encodeframe.h b/source/libvpx/vp9/encoder/vp9_encodeframe.h
index 72343cd..fd1c9aa 100644
--- a/source/libvpx/vp9/encoder/vp9_encodeframe.h
+++ b/source/libvpx/vp9/encoder/vp9_encodeframe.h
@@ -20,6 +20,13 @@
struct yv12_buffer_config;
struct VP9_COMP;
+// Constants used in SOURCE_VAR_BASED_PARTITION
+#define VAR_HIST_MAX_BG_VAR 1000
+#define VAR_HIST_FACTOR 10
+#define VAR_HIST_BINS (VAR_HIST_MAX_BG_VAR / VAR_HIST_FACTOR + 1)
+#define VAR_HIST_LARGE_CUT_OFF 75
+#define VAR_HIST_SMALL_CUT_OFF 45
+
void vp9_setup_src_planes(struct macroblock *x,
const struct yv12_buffer_config *src,
int mi_row, int mi_col);
diff --git a/source/libvpx/vp9/encoder/vp9_encodemb.c b/source/libvpx/vp9/encoder/vp9_encodemb.c
index 1c00698..eb9624d 100644
--- a/source/libvpx/vp9/encoder/vp9_encodemb.c
+++ b/source/libvpx/vp9/encoder/vp9_encodemb.c
@@ -21,7 +21,7 @@
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
-#include "vp9/encoder/vp9_rdopt.h"
+#include "vp9/encoder/vp9_rd.h"
#include "vp9/encoder/vp9_tokenize.h"
struct optimize_ctx {
@@ -306,6 +306,56 @@
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
+ int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+ int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ uint16_t *const eob = &p->eobs[block];
+ const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ int i, j;
+ const int16_t *src_diff;
+ txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+ src_diff = &p->src_diff[4 * (j * diff_stride + i)];
+
+ switch (tx_size) {
+ case TX_32X32:
+ fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride);
+ vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
+ p->quant, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, p->zbin_extra, eob, scan_order->scan,
+ scan_order->iscan);
+ break;
+ case TX_16X16:
+ vp9_fdct16x16(src_diff, coeff, diff_stride);
+ vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp,
+ p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, p->zbin_extra, eob,
+ scan_order->scan, scan_order->iscan);
+ break;
+ case TX_8X8:
+ vp9_fdct8x8(src_diff, coeff, diff_stride);
+ vp9_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp,
+ p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, p->zbin_extra, eob,
+ scan_order->scan, scan_order->iscan);
+ break;
+ case TX_4X4:
+ x->fwd_txm4x4(src_diff, coeff, diff_stride);
+ vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp,
+ p->quant_fp, p->quant_shift, qcoeff, dqcoeff,
+ pd->dequant, p->zbin_extra, eob,
+ scan_order->scan, scan_order->iscan);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
@@ -424,11 +474,15 @@
if (x->skip_txfm == 0) {
// full forward transform and quantization
- if (!x->skip_recode)
- vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+ if (!x->skip_recode) {
+ if (x->quant_fp)
+ vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
+ else
+ vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+ }
} else if (x->skip_txfm == 2) {
// fast path forward transform and quantization
- vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
+ vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
} else {
// skip forward transform
p->eobs[block] = 0;
@@ -507,7 +561,7 @@
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
const struct macroblockd_plane* const pd = &xd->plane[plane];
- const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size;
+ const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
vp9_get_entropy_contexts(bsize, tx_size, pd,
ctx.ta[plane], ctx.tl[plane]);
}
diff --git a/source/libvpx/vp9/encoder/vp9_encodemb.h b/source/libvpx/vp9/encoder/vp9_encodemb.h
index 3196c99..0b8c3d2 100644
--- a/source/libvpx/vp9/encoder/vp9_encodemb.h
+++ b/source/libvpx/vp9/encoder/vp9_encodemb.h
@@ -24,6 +24,8 @@
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize);
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
+void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
diff --git a/source/libvpx/vp9/encoder/vp9_encoder.c b/source/libvpx/vp9/encoder/vp9_encoder.c
index 4e365e7..e0e0561 100644
--- a/source/libvpx/vp9/encoder/vp9_encoder.c
+++ b/source/libvpx/vp9/encoder/vp9_encoder.c
@@ -39,7 +39,7 @@
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_picklpf.h"
#include "vp9/encoder/vp9_ratectrl.h"
-#include "vp9/encoder/vp9_rdopt.h"
+#include "vp9/encoder/vp9_rd.h"
#include "vp9/encoder/vp9_segmentation.h"
#include "vp9/encoder/vp9_speed_features.h"
#if CONFIG_INTERNAL_STATS
@@ -65,7 +65,7 @@
// #define OUTPUT_YUV_REC
#ifdef OUTPUT_YUV_DENOISED
-FILE *yuv_denoised_file;
+FILE *yuv_denoised_file = NULL;
#endif
#ifdef OUTPUT_YUV_SRC
FILE *yuv_file;
@@ -106,7 +106,7 @@
}
}
-static void set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) {
+void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) {
MACROBLOCK *const mb = &cpi->mb;
cpi->common.allow_high_precision_mv = allow_high_precision_mv;
if (cpi->common.allow_high_precision_mv) {
@@ -145,8 +145,6 @@
if (!init_done) {
vp9_init_neighbors();
- vp9_init_quant_tables();
-
vp9_coef_tree_initialize();
vp9_tokenize_initialize();
vp9_init_me_luts();
@@ -176,10 +174,8 @@
vp9_cyclic_refresh_free(cpi->cyclic_refresh);
cpi->cyclic_refresh = NULL;
- vpx_free(cpi->active_map);
- cpi->active_map = NULL;
-
vp9_free_frame_buffers(cm);
+ vp9_free_context_buffers(cm);
vp9_free_frame_buffer(&cpi->last_frame_uf);
vp9_free_frame_buffer(&cpi->scaled_source);
@@ -198,6 +194,18 @@
lc->rc_twopass_stats_in.buf = NULL;
lc->rc_twopass_stats_in.sz = 0;
}
+
+ if (cpi->source_diff_var != NULL) {
+ vpx_free(cpi->source_diff_var);
+ cpi->source_diff_var = NULL;
+ }
+
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ vpx_free(cpi->twopass.this_frame_mb_stats.mb_stats);
+ cpi->twopass.this_frame_mb_stats.mb_stats = NULL;
+ }
+#endif
}
static void save_coding_context(VP9_COMP *cpi) {
@@ -417,39 +425,46 @@
"Failed to allocate altref buffer");
}
-void vp9_alloc_compressor_data(VP9_COMP *cpi) {
- VP9_COMMON *cm = &cpi->common;
-
+static void alloc_ref_frame_buffers(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
if (vp9_alloc_frame_buffers(cm, cm->width, cm->height))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate frame buffers");
+}
- if (vp9_alloc_frame_buffer(&cpi->last_frame_uf,
- cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
- VP9_ENC_BORDER_IN_PIXELS))
+static void alloc_util_frame_buffers(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ if (vp9_realloc_frame_buffer(&cpi->last_frame_uf,
+ cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+ VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate last frame buffer");
- if (vp9_alloc_frame_buffer(&cpi->scaled_source,
- cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
- VP9_ENC_BORDER_IN_PIXELS))
+ if (vp9_realloc_frame_buffer(&cpi->scaled_source,
+ cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+ VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate scaled source buffer");
- if (vp9_alloc_frame_buffer(&cpi->scaled_last_source,
- cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
- VP9_ENC_BORDER_IN_PIXELS))
+ if (vp9_realloc_frame_buffer(&cpi->scaled_last_source,
+ cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+ VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate scaled last source buffer");
+}
+
+void vp9_alloc_compressor_data(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+
+ vp9_alloc_context_buffers(cm, cm->width, cm->height);
vpx_free(cpi->tok);
{
unsigned int tokens = get_token_alloc(cm->mb_rows, cm->mb_cols);
-
CHECK_MEM_ERROR(cm, cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok)));
}
@@ -459,41 +474,7 @@
static void update_frame_size(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
-
vp9_update_frame_size(cm);
-
- // Update size of buffers local to this frame
- if (vp9_realloc_frame_buffer(&cpi->last_frame_uf,
- cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
- VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
- vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
- "Failed to reallocate last frame buffer");
-
- if (vp9_realloc_frame_buffer(&cpi->scaled_source,
- cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
- VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
- vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
- "Failed to reallocate scaled source buffer");
-
- if (vp9_realloc_frame_buffer(&cpi->scaled_last_source,
- cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
- VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
- vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
- "Failed to reallocate scaled last source buffer");
-
- {
- int y_stride = cpi->scaled_source.y_stride;
-
- if (cpi->sf.mv.search_method == NSTEP) {
- vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride);
- } else if (cpi->sf.mv.search_method == DIAMOND) {
- vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
- }
- }
-
init_macroblockd(cm, xd);
}
@@ -521,6 +502,12 @@
cm->log2_tile_rows = cpi->oxcf.tile_rows;
}
+static void init_buffer_indices(VP9_COMP *cpi) {
+ cpi->lst_fb_idx = 0;
+ cpi->gld_fb_idx = 1;
+ cpi->alt_fb_idx = 2;
+}
+
static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
VP9_COMMON *const cm = &cpi->common;
@@ -531,8 +518,6 @@
cm->width = oxcf->width;
cm->height = oxcf->height;
- cm->subsampling_x = 0;
- cm->subsampling_y = 0;
vp9_alloc_compressor_data(cpi);
// Spatial scalability.
@@ -551,10 +536,9 @@
vp9_change_config(cpi, oxcf);
cpi->static_mb_pct = 0;
+ cpi->ref_frame_flags = 0;
- cpi->lst_fb_idx = 0;
- cpi->gld_fb_idx = 1;
- cpi->alt_fb_idx = 2;
+ init_buffer_indices(cpi);
set_tile_limits(cpi);
}
@@ -593,7 +577,6 @@
cpi->pass = get_pass(cpi->oxcf.mode);
rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
- cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
cpi->refresh_golden_frame = 0;
cpi->refresh_last_frame = 1;
@@ -601,7 +584,7 @@
cm->reset_frame_context = 0;
vp9_reset_segment_features(&cm->seg);
- set_high_precision_mv(cpi, 0);
+ vp9_set_high_precision_mv(cpi, 0);
{
int i;
@@ -666,11 +649,7 @@
(int)cpi->oxcf.target_bandwidth);
}
-#if CONFIG_MULTIPLE_ARF
- vp9_zero(cpi->alt_ref_source);
-#else
cpi->alt_ref_source = NULL;
-#endif
rc->is_src_frame_alt_ref = 0;
#if 0
@@ -685,14 +664,11 @@
cpi->ext_refresh_frame_context_pending = 0;
#if CONFIG_DENOISING
- vp9_denoiser_alloc(&(cpi->denoiser), cm->width, cm->height,
- // TODO(tkopp) An unrelated bug causes
- // cm->subsampling_{x,y} to be uninitialized at this point
- // in execution. For now we assume YUV-420, which is x/y
- // subsampling of 1.
- 1, 1,
- // cm->subsampling_x, cm->subsampling_y,
- VP9_ENC_BORDER_IN_PIXELS);
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ vp9_denoiser_alloc(&(cpi->denoiser), cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+ VP9_ENC_BORDER_IN_PIXELS);
+ }
#endif
}
@@ -773,6 +749,8 @@
cpi->alt_is_last = 0;
cpi->gold_is_alt = 0;
+ cpi->skippable_frame = 0;
+
// Create the encoder segmentation map and set all entries to 0
CHECK_MEM_ERROR(cm, cpi->segmentation_map,
vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
@@ -790,10 +768,6 @@
CHECK_MEM_ERROR(cm, cpi->coding_context.last_frame_seg_map_copy,
vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
- CHECK_MEM_ERROR(cm, cpi->active_map, vpx_calloc(cm->MBs, 1));
- vpx_memset(cpi->active_map, 1, cm->MBs);
- cpi->active_map_enabled = 0;
-
for (i = 0; i < (sizeof(cpi->mbgraph_stats) /
sizeof(cpi->mbgraph_stats[0])); i++) {
CHECK_MEM_ERROR(cm, cpi->mbgraph_stats[i].mb_stats,
@@ -801,21 +775,38 @@
sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
}
- cpi->refresh_alt_ref_frame = 0;
-
-#if CONFIG_MULTIPLE_ARF
- // Turn multiple ARF usage on/off. This is a quick hack for the initial test
- // version. It should eventually be set via the codec API.
- cpi->multi_arf_enabled = 1;
-
- if (cpi->multi_arf_enabled) {
- cpi->sequence_number = 0;
- cpi->frame_coding_order_period = 0;
- vp9_zero(cpi->frame_coding_order);
- vp9_zero(cpi->arf_buffer_idx);
+#if CONFIG_FP_MB_STATS
+ cpi->use_fp_mb_stats = 0;
+ if (cpi->use_fp_mb_stats) {
+ // a place holder for the mb stats obtained from the first pass
+ CHECK_MEM_ERROR(cm, cpi->twopass.this_frame_mb_stats.mb_stats,
+ vpx_calloc(cm->MBs * sizeof(FIRSTPASS_MB_STATS), 1));
+ } else {
+ cpi->twopass.this_frame_mb_stats.mb_stats = NULL;
}
#endif
+ cpi->refresh_alt_ref_frame = 0;
+
+ // Note that at the moment multi_arf will not work with svc.
+ // For the current check in all the execution paths are defaulted to 0
+ // pending further tuning and testing. The code is left in place here
+ // as a place holder in regard to the required paths.
+ cpi->multi_arf_last_grp_enabled = 0;
+ if (cpi->pass == 2) {
+ if (cpi->use_svc) {
+ cpi->multi_arf_allowed = 0;
+ cpi->multi_arf_enabled = 0;
+ } else {
+ // Disable by default for now.
+ cpi->multi_arf_allowed = 0;
+ cpi->multi_arf_enabled = 0;
+ }
+ } else {
+ cpi->multi_arf_allowed = 0;
+ cpi->multi_arf_enabled = 0;
+ }
+
cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
#if CONFIG_INTERNAL_STATS
cpi->b_calculate_ssimg = 0;
@@ -869,8 +860,12 @@
cpi->mb.nmvsadcost_hp[1] = &cpi->mb.nmvsadcosts_hp[1][MV_MAX];
cal_nmvsadcosts_hp(cpi->mb.nmvsadcost_hp);
+#if CONFIG_DENOISING
#ifdef OUTPUT_YUV_DENOISED
- yuv_denoised_file = fopen("denoised.yuv", "ab");
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ yuv_denoised_file = fopen("denoised.yuv", "ab");
+ }
+#endif
#endif
#ifdef OUTPUT_YUV_SRC
yuv_file = fopen("bd.yuv", "ab");
@@ -942,6 +937,12 @@
set_speed_features(cpi);
+ // Allocate memory to store variances for a frame.
+ CHECK_MEM_ERROR(cm, cpi->source_diff_var,
+ vpx_calloc(cm->MBs, sizeof(diff)));
+ cpi->source_var_thresh = 0;
+ cpi->frames_till_next_var_check = 0;
+
// Default rd threshold factors for mode selection
for (i = 0; i < BLOCK_SIZES; ++i) {
for (j = 0; j < MAX_MODES; ++j)
@@ -1103,7 +1104,9 @@
}
#if CONFIG_DENOISING
- vp9_denoiser_free(&(cpi->denoiser));
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ vp9_denoiser_free(&(cpi->denoiser));
+ }
#endif
dealloc_compressor_data(cpi);
@@ -1117,8 +1120,12 @@
vp9_remove_common(&cpi->common);
vpx_free(cpi);
+#if CONFIG_DENOISING
#ifdef OUTPUT_YUV_DENOISED
- fclose(yuv_denoised_file);
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ fclose(yuv_denoised_file);
+ }
+#endif
#endif
#ifdef OUTPUT_YUV_SRC
fclose(yuv_file);
@@ -1301,7 +1308,7 @@
}
-#if defined(OUTPUT_YUV_SRC) || defined(OUTPUT_YUV_DENOISED)
+#if defined(OUTPUT_YUV_SRC)
void vp9_write_yuv_frame(YV12_BUFFER_CONFIG *s, FILE *f) {
uint8_t *src = s->y_buffer;
int h = s->y_height;
@@ -1329,6 +1336,40 @@
}
#endif
+#if CONFIG_DENOISING
+#if defined(OUTPUT_YUV_DENOISED)
+// The denoiser buffer is allocated as a YUV 440 buffer. This function writes it
+// as YUV 420. We simply use the top-left pixels of the UV buffers, since we do
+// not denoise the UV channels at this time. If ever we implement UV channel
+// denoising we will have to modify this.
+void vp9_write_yuv_frame_420(YV12_BUFFER_CONFIG *s, FILE *f) {
+ uint8_t *src = s->y_buffer;
+ int h = s->y_height;
+
+ do {
+ fwrite(src, s->y_width, 1, f);
+ src += s->y_stride;
+ } while (--h);
+
+ src = s->u_buffer;
+ h = s->uv_height / 2;
+
+ do {
+ fwrite(src, s->uv_width / 2, 1, f);
+ src += s->uv_stride + s->uv_width / 2;
+ } while (--h);
+
+ src = s->v_buffer;
+ h = s->uv_height / 2;
+
+ do {
+ fwrite(src, s->uv_width / 2, 1, f);
+ src += s->uv_stride + s->uv_width / 2;
+ } while (--h);
+}
+#endif
+#endif
+
#ifdef OUTPUT_YUV_REC
void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
YV12_BUFFER_CONFIG *s = cm->frame_to_show;
@@ -1396,9 +1437,7 @@
vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
- // TODO(hkuang): Call C version explicitly
- // as neon version only expand border size 32.
- vp8_yv12_extend_frame_borders_c(dst);
+ vp9_extend_frame_borders(dst);
}
static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
@@ -1438,9 +1477,7 @@
}
}
- // TODO(hkuang): Call C version explicitly
- // as neon version only expand border size 32.
- vp8_yv12_extend_frame_borders_c(dst);
+ vp9_extend_frame_borders(dst);
}
#define WRITE_RECON_BUFFER 0
@@ -1524,14 +1561,8 @@
&cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
ref_cnt_fb(cm->frame_bufs,
&cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx);
- }
-#if CONFIG_MULTIPLE_ARF
- else if (!cpi->multi_arf_enabled && cpi->refresh_golden_frame &&
- !cpi->refresh_alt_ref_frame) {
-#else
- else if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame &&
- !cpi->use_svc) {
-#endif
+ } else if (!cpi->multi_arf_allowed && cpi->refresh_golden_frame &&
+ cpi->rc.is_src_frame_alt_ref && !cpi->use_svc) {
/* Preserve the previously existing golden frame and update the frame in
* the alt ref slot instead. This is highly specific to the current use of
* alt-ref as a forward reference, and this needs to be generalized as
@@ -1549,14 +1580,14 @@
tmp = cpi->alt_fb_idx;
cpi->alt_fb_idx = cpi->gld_fb_idx;
cpi->gld_fb_idx = tmp;
- } else { /* For non key/golden frames */
+ } else { /* For non key/golden frames */
if (cpi->refresh_alt_ref_frame) {
int arf_idx = cpi->alt_fb_idx;
-#if CONFIG_MULTIPLE_ARF
- if (cpi->multi_arf_enabled) {
- arf_idx = cpi->arf_buffer_idx[cpi->sequence_number + 1];
+ if ((cpi->pass == 2) && cpi->multi_arf_allowed) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ arf_idx = gf_group->arf_update_idx[gf_group->index];
}
-#endif
+
ref_cnt_fb(cm->frame_bufs,
&cm->ref_frame_map[arf_idx], cm->new_fb_idx);
}
@@ -1572,12 +1603,14 @@
&cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx);
}
#if CONFIG_DENOISING
- vp9_denoiser_update_frame_info(&cpi->denoiser,
- *cpi->Source,
- cpi->common.frame_type,
- cpi->refresh_alt_ref_frame,
- cpi->refresh_golden_frame,
- cpi->refresh_last_frame);
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ vp9_denoiser_update_frame_info(&cpi->denoiser,
+ *cpi->Source,
+ cpi->common.frame_type,
+ cpi->refresh_alt_ref_frame,
+ cpi->refresh_golden_frame,
+ cpi->refresh_last_frame);
+ }
#endif
}
@@ -1609,13 +1642,15 @@
void vp9_scale_references(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
MV_REFERENCE_FRAME ref_frame;
+ const VP9_REFFRAME ref_mask[3] = {VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG};
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
const YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf;
- if (ref->y_crop_width != cm->width ||
- ref->y_crop_height != cm->height) {
+ // Need to convert from VP9_REFFRAME to index into ref_mask (subtract 1).
+ if ((cpi->ref_frame_flags & ref_mask[ref_frame - 1]) &&
+ (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height)) {
const int new_fb = get_free_fb(cm);
vp9_realloc_frame_buffer(&cm->frame_bufs[new_fb].buf,
cm->width, cm->height,
@@ -2003,6 +2038,29 @@
}
}
+static void configure_skippable_frame(VP9_COMP *cpi) {
+ // If the current frame does not have non-zero motion vector detected in the
+ // first pass, and so do its previous and forward frames, then this frame
+ // can be skipped for partition check, and the partition size is assigned
+ // according to the variance
+
+ SVC *const svc = &cpi->svc;
+ const int is_spatial_svc = (svc->number_spatial_layers > 1) &&
+ (svc->number_temporal_layers == 1);
+ TWO_PASS *const twopass = is_spatial_svc ?
+ &svc->layer_context[svc->spatial_layer_id].twopass
+ : &cpi->twopass;
+
+ cpi->skippable_frame = (!frame_is_intra_only(&cpi->common) &&
+ twopass->stats_in - 2 > twopass->stats_in_start &&
+ twopass->stats_in < twopass->stats_in_end &&
+ (twopass->stats_in - 1)->pcnt_inter - (twopass->stats_in - 1)->pcnt_motion
+ == 1 &&
+ (twopass->stats_in - 2)->pcnt_inter - (twopass->stats_in - 2)->pcnt_motion
+ == 1 &&
+ twopass->stats_in->pcnt_inter - twopass->stats_in->pcnt_motion == 1);
+}
+
static void encode_frame_to_data_rate(VP9_COMP *cpi,
size_t *size,
uint8_t *dest,
@@ -2098,6 +2156,13 @@
if (cpi->pass == 2 && cpi->sf.static_segmentation)
configure_static_seg_features(cpi);
+ // Check if the current frame is skippable for the partition search in the
+ // second pass according to the first pass stats
+ if (cpi->pass == 2 &&
+ (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) {
+ configure_skippable_frame(cpi);
+ }
+
// For 1 pass CBR, check if we are dropping this frame.
// Never drop on key frame.
if (cpi->pass == 0 &&
@@ -2137,23 +2202,28 @@
}
#endif
-#ifdef OUTPUT_YUV_DENOISED
- vp9_write_yuv_frame(&cpi->denoiser.running_avg_y[INTRA_FRAME],
- yuv_denoised_file);
-#endif
#ifdef OUTPUT_YUV_SRC
vp9_write_yuv_frame(cpi->Source, yuv_file);
#endif
set_speed_features(cpi);
+#if CONFIG_DENOISING
+#ifdef OUTPUT_YUV_DENOISED
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ vp9_write_yuv_frame_420(&cpi->denoiser.running_avg_y[INTRA_FRAME],
+ yuv_denoised_file);
+ }
+#endif
+#endif
+
// Decide q and q bounds.
q = vp9_rc_pick_q_and_bounds(cpi, &bottom_index, &top_index);
if (!frame_is_intra_only(cm)) {
cm->interp_filter = DEFAULT_INTERP_FILTER;
/* TODO: Decide this more intelligently */
- set_high_precision_mv(cpi, q < HIGH_PRECISION_MV_QTHRESH);
+ vp9_set_high_precision_mv(cpi, q < HIGH_PRECISION_MV_QTHRESH);
}
if (cpi->sf.recode_loop == DISALLOW_RECODE) {
@@ -2241,31 +2311,8 @@
if (cm->frame_type == KEY_FRAME) {
// Tell the caller that the frame was coded as a key frame
*frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY;
-
-#if CONFIG_MULTIPLE_ARF
- // Reset the sequence number.
- if (cpi->multi_arf_enabled) {
- cpi->sequence_number = 0;
- cpi->frame_coding_order_period = cpi->new_frame_coding_order_period;
- cpi->new_frame_coding_order_period = -1;
- }
-#endif
} else {
*frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY;
-
-#if CONFIG_MULTIPLE_ARF
- /* Increment position in the coded frame sequence. */
- if (cpi->multi_arf_enabled) {
- ++cpi->sequence_number;
- if (cpi->sequence_number >= cpi->frame_coding_order_period) {
- cpi->sequence_number = 0;
- cpi->frame_coding_order_period = cpi->new_frame_coding_order_period;
- cpi->new_frame_coding_order_period = -1;
- }
- cpi->this_frame_weight = cpi->arf_weight[cpi->sequence_number];
- assert(cpi->this_frame_weight >= 0);
- }
-#endif
}
// Clear the one shot update flags for segmentation map and mode/ref loop
@@ -2319,6 +2366,16 @@
vp9_twopass_postencode_update(cpi);
}
+static void init_motion_estimation(VP9_COMP *cpi) {
+ int y_stride = cpi->scaled_source.y_stride;
+
+ if (cpi->sf.mv.search_method == NSTEP) {
+ vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride);
+ } else if (cpi->sf.mv.search_method == DIAMOND) {
+ vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
+ }
+}
+
static void check_initial_width(VP9_COMP *cpi, int subsampling_x,
int subsampling_y) {
VP9_COMMON *const cm = &cpi->common;
@@ -2326,7 +2383,13 @@
if (!cpi->initial_width) {
cm->subsampling_x = subsampling_x;
cm->subsampling_y = subsampling_y;
+
alloc_raw_frame_buffers(cpi);
+ alloc_ref_frame_buffers(cpi);
+ alloc_util_frame_buffers(cpi);
+
+ init_motion_estimation(cpi);
+
cpi->initial_width = cm->width;
cpi->initial_height = cm->height;
}
@@ -2341,11 +2404,22 @@
int res = 0;
const int subsampling_x = sd->uv_width < sd->y_width;
const int subsampling_y = sd->uv_height < sd->y_height;
+ const int is_spatial_svc = cpi->use_svc &&
+ (cpi->svc.number_temporal_layers == 1);
check_initial_width(cpi, subsampling_x, subsampling_y);
+
vpx_usec_timer_start(&timer);
- if (vp9_lookahead_push(cpi->lookahead,
- sd, time_stamp, end_time, frame_flags))
+
+#ifdef CONFIG_SPATIAL_SVC
+ if (is_spatial_svc)
+ res = vp9_svc_lookahead_push(cpi, cpi->lookahead, sd, time_stamp, end_time,
+ frame_flags);
+ else
+#endif
+ res = vp9_lookahead_push(cpi->lookahead,
+ sd, time_stamp, end_time, frame_flags);
+ if (res)
res = -1;
vpx_usec_timer_mark(&timer);
cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
@@ -2373,13 +2447,6 @@
cm->seg.update_data;
}
-#if CONFIG_MULTIPLE_ARF
-int is_next_frame_arf(VP9_COMP *cpi) {
- // Negative entry in frame_coding_order indicates an ARF at this position.
- return cpi->frame_coding_order[cpi->sequence_number + 1] < 0 ? 1 : 0;
-}
-#endif
-
void adjust_frame_rate(VP9_COMP *cpi) {
int64_t this_duration;
int step = 0;
@@ -2418,6 +2485,46 @@
cpi->last_end_time_stamp_seen = cpi->source->ts_end;
}
+// Returns 0 if this is not an alt ref else the offset of the source frame
+// used as the arf midpoint.
+static int get_arf_src_index(VP9_COMP *cpi) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ int arf_src_index = 0;
+ if (is_altref_enabled(&cpi->oxcf)) {
+ if (cpi->pass == 2) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
+ arf_src_index = gf_group->arf_src_offset[gf_group->index];
+ }
+ } else if (rc->source_alt_ref_pending) {
+ arf_src_index = rc->frames_till_gf_update_due;
+ }
+ }
+ return arf_src_index;
+}
+
+static void check_src_altref(VP9_COMP *cpi) {
+ RATE_CONTROL *const rc = &cpi->rc;
+
+ if (cpi->pass == 2) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ rc->is_src_frame_alt_ref =
+ (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE);
+ } else {
+ rc->is_src_frame_alt_ref = cpi->alt_ref_source &&
+ (cpi->source == cpi->alt_ref_source);
+ }
+
+ if (rc->is_src_frame_alt_ref) {
+ // Current frame is an ARF overlay frame.
+ cpi->alt_ref_source = NULL;
+
+ // Don't refresh the last buffer for an ARF overlay frame. It will
+ // become the GF so preserve last as an alternative prediction option.
+ cpi->refresh_last_frame = 0;
+ }
+}
+
int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
size_t *size, uint8_t *dest,
int64_t *time_stamp, int64_t *time_end, int flush) {
@@ -2427,11 +2534,16 @@
struct vpx_usec_timer cmptimer;
YV12_BUFFER_CONFIG *force_src_buffer = NULL;
MV_REFERENCE_FRAME ref_frame;
+ int arf_src_index;
+ const int is_spatial_svc = cpi->use_svc &&
+ (cpi->svc.number_temporal_layers == 1) &&
+ (cpi->svc.number_spatial_layers > 1);
if (!cpi)
return -1;
- if (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2) {
+ if (is_spatial_svc && cpi->pass == 2) {
+ vp9_svc_lookahead_peek(cpi, cpi->lookahead, 0, 1);
vp9_restore_layer_context(cpi);
}
@@ -2440,7 +2552,7 @@
cpi->source = NULL;
cpi->last_source = NULL;
- set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
+ vp9_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
// Normal defaults
cm->reset_frame_context = 0;
@@ -2449,35 +2561,26 @@
cpi->refresh_golden_frame = 0;
cpi->refresh_alt_ref_frame = 0;
- // Should we code an alternate reference frame.
- if (is_altref_enabled(&cpi->oxcf) && rc->source_alt_ref_pending) {
- int frames_to_arf;
+ // Should we encode an arf frame.
+ arf_src_index = get_arf_src_index(cpi);
+ if (arf_src_index) {
+ assert(arf_src_index <= rc->frames_to_key);
-#if CONFIG_MULTIPLE_ARF
- assert(!cpi->multi_arf_enabled ||
- cpi->frame_coding_order[cpi->sequence_number] < 0);
-
- if (cpi->multi_arf_enabled && (cpi->pass == 2))
- frames_to_arf = (-cpi->frame_coding_order[cpi->sequence_number])
- - cpi->next_frame_in_order;
+#ifdef CONFIG_SPATIAL_SVC
+ if (is_spatial_svc)
+ cpi->source = vp9_svc_lookahead_peek(cpi, cpi->lookahead,
+ arf_src_index, 1);
else
#endif
- frames_to_arf = rc->frames_till_gf_update_due;
-
- assert(frames_to_arf <= rc->frames_to_key);
-
- if ((cpi->source = vp9_lookahead_peek(cpi->lookahead, frames_to_arf))) {
-#if CONFIG_MULTIPLE_ARF
- cpi->alt_ref_source[cpi->arf_buffered] = cpi->source;
-#else
+ cpi->source = vp9_lookahead_peek(cpi->lookahead, arf_src_index);
+ if (cpi->source != NULL) {
cpi->alt_ref_source = cpi->source;
-#endif
if (cpi->oxcf.arnr_max_frames > 0) {
// Produce the filtered ARF frame.
// TODO(agrange) merge these two functions.
- vp9_configure_arnr_filter(cpi, frames_to_arf, rc->gfu_boost);
- vp9_temporal_filter_prepare(cpi, frames_to_arf);
+ vp9_configure_arnr_filter(cpi, arf_src_index, rc->gfu_boost);
+ vp9_temporal_filter_prepare(cpi, arf_src_index);
vp9_extend_frame_borders(&cpi->alt_ref_buffer);
force_src_buffer = &cpi->alt_ref_buffer;
}
@@ -2487,59 +2590,38 @@
cpi->refresh_golden_frame = 0;
cpi->refresh_last_frame = 0;
rc->is_src_frame_alt_ref = 0;
-
-#if CONFIG_MULTIPLE_ARF
- if (!cpi->multi_arf_enabled)
-#endif
- rc->source_alt_ref_pending = 0;
+ rc->source_alt_ref_pending = 0;
} else {
rc->source_alt_ref_pending = 0;
}
}
if (!cpi->source) {
-#if CONFIG_MULTIPLE_ARF
- int i;
-#endif
-
// Get last frame source.
if (cm->current_video_frame > 0) {
- if ((cpi->last_source = vp9_lookahead_peek(cpi->lookahead, -1)) == NULL)
+#ifdef CONFIG_SPATIAL_SVC
+ if (is_spatial_svc)
+ cpi->last_source = vp9_svc_lookahead_peek(cpi, cpi->lookahead, -1, 0);
+ else
+#endif
+ cpi->last_source = vp9_lookahead_peek(cpi->lookahead, -1);
+ if (cpi->last_source == NULL)
return -1;
}
- if ((cpi->source = vp9_lookahead_pop(cpi->lookahead, flush))) {
+ // Read in the source frame.
+#ifdef CONFIG_SPATIAL_SVC
+ if (is_spatial_svc)
+ cpi->source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);
+ else
+#endif
+ cpi->source = vp9_lookahead_pop(cpi->lookahead, flush);
+ if (cpi->source != NULL) {
cm->show_frame = 1;
cm->intra_only = 0;
-#if CONFIG_MULTIPLE_ARF
- // Is this frame the ARF overlay.
- rc->is_src_frame_alt_ref = 0;
- for (i = 0; i < cpi->arf_buffered; ++i) {
- if (cpi->source == cpi->alt_ref_source[i]) {
- rc->is_src_frame_alt_ref = 1;
- cpi->refresh_golden_frame = 1;
- break;
- }
- }
-#else
- rc->is_src_frame_alt_ref = cpi->alt_ref_source &&
- (cpi->source == cpi->alt_ref_source);
-#endif
- if (rc->is_src_frame_alt_ref) {
- // Current frame is an ARF overlay frame.
-#if CONFIG_MULTIPLE_ARF
- cpi->alt_ref_source[i] = NULL;
-#else
- cpi->alt_ref_source = NULL;
-#endif
- // Don't refresh the last buffer for an ARF overlay frame. It will
- // become the GF so preserve last as an alternative prediction option.
- cpi->refresh_last_frame = 0;
- }
-#if CONFIG_MULTIPLE_ARF
- ++cpi->next_frame_in_order;
-#endif
+ // Check to see if the frame should be encoded as an arf overlay.
+ check_src_altref(cpi);
}
}
@@ -2547,20 +2629,17 @@
cpi->un_scaled_source = cpi->Source = force_src_buffer ? force_src_buffer
: &cpi->source->img;
- if (cpi->last_source != NULL) {
- cpi->unscaled_last_source = &cpi->last_source->img;
- } else {
- cpi->unscaled_last_source = NULL;
- }
+ if (cpi->last_source != NULL) {
+ cpi->unscaled_last_source = &cpi->last_source->img;
+ } else {
+ cpi->unscaled_last_source = NULL;
+ }
*time_stamp = cpi->source->ts_start;
*time_end = cpi->source->ts_end;
- *frame_flags = cpi->source->flags;
+ *frame_flags =
+ (cpi->source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
-#if CONFIG_MULTIPLE_ARF
- if (cm->frame_type != KEY_FRAME && cpi->pass == 2)
- rc->source_alt_ref_pending = is_next_frame_arf(cpi);
-#endif
} else {
*size = 0;
if (flush && cpi->pass == 1 && !cpi->twopass.first_pass_done) {
@@ -2598,16 +2677,14 @@
cm->frame_bufs[cm->new_fb_idx].ref_count--;
cm->new_fb_idx = get_free_fb(cm);
-#if CONFIG_MULTIPLE_ARF
- /* Set up the correct ARF frame. */
- if (cpi->refresh_alt_ref_frame) {
- ++cpi->arf_buffered;
+ if (!cpi->use_svc && cpi->multi_arf_allowed) {
+ if (cm->frame_type == KEY_FRAME) {
+ init_buffer_indices(cpi);
+ } else if (cpi->pass == 2) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ cpi->alt_fb_idx = gf_group->arf_ref_idx[gf_group->index];
+ }
}
- if (cpi->multi_arf_enabled && (cm->frame_type != KEY_FRAME) &&
- (cpi->pass == 2)) {
- cpi->alt_fb_idx = cpi->arf_buffer_idx[cpi->sequence_number];
- }
-#endif
cpi->frame_flags = *frame_flags;
@@ -2626,6 +2703,9 @@
cm->subsampling_x, cm->subsampling_y,
VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
+ alloc_util_frame_buffers(cpi);
+ init_motion_estimation(cpi);
+
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
YV12_BUFFER_CONFIG *const buf = &cm->frame_bufs[idx].buf;
@@ -2795,16 +2875,23 @@
int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols) {
if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
+ const int mi_rows = cpi->common.mi_rows;
+ const int mi_cols = cpi->common.mi_cols;
if (map) {
- vpx_memcpy(cpi->active_map, map, rows * cols);
- cpi->active_map_enabled = 1;
+ int r, c;
+ for (r = 0; r < mi_rows; r++) {
+ for (c = 0; c < mi_cols; c++) {
+ cpi->segmentation_map[r * mi_cols + c] =
+ !map[(r >> 1) * cols + (c >> 1)];
+ }
+ }
+ vp9_enable_segfeature(&cpi->common.seg, 1, SEG_LVL_SKIP);
+ vp9_enable_segmentation(&cpi->common.seg);
} else {
- cpi->active_map_enabled = 0;
+ vp9_disable_segmentation(&cpi->common.seg);
}
-
return 0;
} else {
- // cpi->active_map_enabled = 0;
return -1;
}
}
@@ -2883,3 +2970,42 @@
int vp9_get_quantizer(VP9_COMP *cpi) {
return cpi->common.base_qindex;
}
+
+void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) {
+ if (flags & (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF |
+ VP8_EFLAG_NO_REF_ARF)) {
+ int ref = 7;
+
+ if (flags & VP8_EFLAG_NO_REF_LAST)
+ ref ^= VP9_LAST_FLAG;
+
+ if (flags & VP8_EFLAG_NO_REF_GF)
+ ref ^= VP9_GOLD_FLAG;
+
+ if (flags & VP8_EFLAG_NO_REF_ARF)
+ ref ^= VP9_ALT_FLAG;
+
+ vp9_use_as_reference(cpi, ref);
+ }
+
+ if (flags & (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
+ VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_FORCE_GF |
+ VP8_EFLAG_FORCE_ARF)) {
+ int upd = 7;
+
+ if (flags & VP8_EFLAG_NO_UPD_LAST)
+ upd ^= VP9_LAST_FLAG;
+
+ if (flags & VP8_EFLAG_NO_UPD_GF)
+ upd ^= VP9_GOLD_FLAG;
+
+ if (flags & VP8_EFLAG_NO_UPD_ARF)
+ upd ^= VP9_ALT_FLAG;
+
+ vp9_update_reference(cpi, upd);
+ }
+
+ if (flags & VP8_EFLAG_NO_UPD_ENTROPY) {
+ vp9_update_entropy(cpi, 0);
+ }
+}
diff --git a/source/libvpx/vp9/encoder/vp9_encoder.h b/source/libvpx/vp9/encoder/vp9_encoder.h
index a80ab1b..c66e003 100644
--- a/source/libvpx/vp9/encoder/vp9_encoder.h
+++ b/source/libvpx/vp9/encoder/vp9_encoder.h
@@ -32,7 +32,7 @@
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_ratectrl.h"
-#include "vp9/encoder/vp9_rdopt.h"
+#include "vp9/encoder/vp9_rd.h"
#include "vp9/encoder/vp9_speed_features.h"
#include "vp9/encoder/vp9_svc_layercontext.h"
#include "vp9/encoder/vp9_tokenize.h"
@@ -248,11 +248,7 @@
VP9EncoderConfig oxcf;
struct lookahead_ctx *lookahead;
struct lookahead_entry *source;
-#if CONFIG_MULTIPLE_ARF
- struct lookahead_entry *alt_ref_source[REF_FRAMES];
-#else
struct lookahead_entry *alt_ref_source;
-#endif
struct lookahead_entry *last_source;
YV12_BUFFER_CONFIG *Source;
@@ -266,14 +262,13 @@
int alt_is_last; // Alt same as last ( short circuit altref search)
int gold_is_alt; // don't do both alt and gold search ( just do gold).
+ int skippable_frame;
+
int scaled_ref_idx[3];
int lst_fb_idx;
int gld_fb_idx;
int alt_fb_idx;
-#if CONFIG_MULTIPLE_ARF
- int alt_ref_fb_idx[REF_FRAMES - 3];
-#endif
int refresh_last_frame;
int refresh_golden_frame;
int refresh_alt_ref_frame;
@@ -291,13 +286,6 @@
TOKENEXTRA *tok;
unsigned int tok_count[4][1 << 6];
-#if CONFIG_MULTIPLE_ARF
- // Position within a frame coding order (including any additional ARF frames).
- unsigned int sequence_number;
- // Next frame in naturally occurring order that has not yet been coded.
- int next_frame_in_order;
-#endif
-
// Ambient reconstruction err target for force key frames
int ambient_err;
@@ -347,9 +335,6 @@
unsigned char *complexity_map;
- unsigned char *active_map;
- unsigned int active_map_enabled;
-
CYCLIC_REFRESH *cyclic_refresh;
fractional_mv_step_fp *find_fractional_mv_step;
@@ -363,6 +348,10 @@
uint64_t time_pick_lpf;
uint64_t time_encode_sb_row;
+#if CONFIG_FP_MB_STATS
+ int use_fp_mb_stats;
+#endif
+
TWO_PASS twopass;
YV12_BUFFER_CONFIG alt_ref_buffer;
@@ -416,7 +405,11 @@
SVC svc;
- int use_large_partition_rate;
+ // Store frame variance info in SOURCE_VAR_BASED_PARTITION search type.
+ diff *source_diff_var;
+ // The threshold used in SOURCE_VAR_BASED_PARTITION search type.
+ unsigned int source_var_thresh;
+ int frames_till_next_var_check;
int frame_flags;
@@ -433,18 +426,9 @@
PC_TREE *pc_root;
int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
-#if CONFIG_MULTIPLE_ARF
- // ARF tracking variables.
+ int multi_arf_allowed;
int multi_arf_enabled;
- unsigned int frame_coding_order_period;
- unsigned int new_frame_coding_order_period;
- int frame_coding_order[MAX_LAG_BUFFERS * 2];
- int arf_buffer_idx[MAX_LAG_BUFFERS * 3 / 2];
- int arf_weight[MAX_LAG_BUFFERS];
- int arf_buffered;
- int this_frame_weight;
- int max_arf_level;
-#endif
+ int multi_arf_last_grp_enabled;
#if CONFIG_DENOISING
VP9_DENOISER denoiser;
@@ -544,10 +528,14 @@
int64_t vp9_rescale(int64_t val, int64_t num, int denom);
+void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv);
+
YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
YV12_BUFFER_CONFIG *unscaled,
YV12_BUFFER_CONFIG *scaled);
+void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags);
+
static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
MV_REFERENCE_FRAME ref0,
MV_REFERENCE_FRAME ref1) {
diff --git a/source/libvpx/vp9/encoder/vp9_firstpass.c b/source/libvpx/vp9/encoder/vp9_firstpass.c
index 5e82bb3..ad73c4c 100644
--- a/source/libvpx/vp9/encoder/vp9_firstpass.c
+++ b/source/libvpx/vp9/encoder/vp9_firstpass.c
@@ -33,8 +33,7 @@
#include "vp9/encoder/vp9_firstpass.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_quantize.h"
-#include "vp9/encoder/vp9_ratectrl.h"
-#include "vp9/encoder/vp9_rdopt.h"
+#include "vp9/encoder/vp9_rd.h"
#include "vp9/encoder/vp9_variance.h"
#define OUTPUT_FPF 0
@@ -56,14 +55,7 @@
#define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001)
#define MIN_KF_BOOST 300
-
-#if CONFIG_MULTIPLE_ARF
-// Set MIN_GF_INTERVAL to 1 for the full decomposition.
-#define MIN_GF_INTERVAL 2
-#else
-#define MIN_GF_INTERVAL 4
-#endif
-
+#define MIN_GF_INTERVAL 4
#define LONG_TERM_VBR_CORRECTION
static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
@@ -97,23 +89,43 @@
// Read frame stats at an offset from the current position.
-static int read_frame_stats(const TWO_PASS *p,
- FIRSTPASS_STATS *frame_stats, int offset) {
- const FIRSTPASS_STATS *fps_ptr = p->stats_in;
-
- // Check legality of offset.
- if (offset >= 0) {
- if (&fps_ptr[offset] >= p->stats_in_end)
- return EOF;
- } else if (offset < 0) {
- if (&fps_ptr[offset] < p->stats_in_start)
- return EOF;
+static const FIRSTPASS_STATS *read_frame_stats(const TWO_PASS *p, int offset) {
+ if ((offset >= 0 && p->stats_in + offset >= p->stats_in_end) ||
+ (offset < 0 && p->stats_in + offset < p->stats_in_start)) {
+ return NULL;
}
- *frame_stats = fps_ptr[offset];
+ return &p->stats_in[offset];
+}
+
+#if CONFIG_FP_MB_STATS
+static int input_mb_stats(FIRSTPASS_FRAME_MB_STATS *fp_frame_stats,
+ const VP9_COMMON *const cm) {
+ FILE *fpfile;
+ int ret;
+
+ fpfile = fopen("firstpass_mb.stt", "r");
+ fseek(fpfile, cm->current_video_frame * cm->MBs * sizeof(FIRSTPASS_MB_STATS),
+ SEEK_SET);
+ ret = fread(fp_frame_stats->mb_stats, sizeof(FIRSTPASS_MB_STATS), cm->MBs,
+ fpfile);
+ fclose(fpfile);
+ if (ret < cm->MBs) {
+ return EOF;
+ }
return 1;
}
+static void output_mb_stats(FIRSTPASS_FRAME_MB_STATS *fp_frame_stats,
+ const VP9_COMMON *const cm) {
+ FILE *fpfile;
+
+ fpfile = fopen("firstpass_mb.stt", "a");
+ fwrite(fp_frame_stats->mb_stats, sizeof(FIRSTPASS_MB_STATS), cm->MBs, fpfile);
+ fclose(fpfile);
+}
+#endif
+
static int input_stats(TWO_PASS *p, FIRSTPASS_STATS *fps) {
if (p->stats_in >= p->stats_in_end)
return EOF;
@@ -460,6 +472,10 @@
const MV zero_mv = {0, 0};
const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
+#if CONFIG_FP_MB_STATS
+ FIRSTPASS_FRAME_MB_STATS *this_frame_mb_stats = &twopass->this_frame_mb_stats;
+#endif
+
vp9_clear_system_state();
set_first_pass_params(cpi);
@@ -497,6 +513,8 @@
&cpi->scaled_source);
}
+ vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
+
vp9_setup_src_planes(x, cpi->Source, 0, 0);
vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL);
vp9_setup_dst_planes(xd->plane, new_yv12, 0, 0);
@@ -504,8 +522,6 @@
xd->mi = cm->mi_grid_visible;
xd->mi[0] = cm->mi;
- vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
-
vp9_frame_init_quantizer(cpi);
for (i = 0; i < MAX_MB_PLANE; ++i) {
@@ -587,6 +603,17 @@
// Accumulate the intra error.
intra_error += (int64_t)this_error;
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ this_frame_mb_stats->mb_stats[mb_row * cm->mb_cols + mb_col].mode =
+ DC_PRED;
+ this_frame_mb_stats->mb_stats[mb_row * cm->mb_cols + mb_col].err =
+ this_error;
+ this_frame_mb_stats->mb_stats[mb_row * cm->mb_cols + mb_col].mv.as_int
+ = 0;
+ }
+#endif
+
// Set up limit values for motion vectors to prevent them extending
// outside the UMV borders.
x->mv_col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16);
@@ -615,7 +642,8 @@
&unscaled_last_source_buf_2d);
// TODO(pengchong): Replace the hard-coded threshold
- if (raw_motion_error > 25) {
+ if (raw_motion_error > 25 ||
+ (cpi->use_svc && cpi->svc.number_temporal_layers == 1)) {
// Test last reference frame using the previous best mv as the
// starting point (best reference) for the search.
first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv,
@@ -711,6 +739,17 @@
best_ref_mv.as_int = mv.as_int;
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ this_frame_mb_stats->mb_stats[mb_row * cm->mb_cols + mb_col].mode =
+ NEWMV;
+ this_frame_mb_stats->mb_stats[mb_row * cm->mb_cols + mb_col].err =
+ motion_error;
+ this_frame_mb_stats->mb_stats[mb_row * cm->mb_cols + mb_col].mv.
+ as_int = mv.as_int;
+ }
+#endif
+
if (mv.as_int) {
++mvcount;
@@ -815,6 +854,12 @@
twopass->this_frame_stats = fps;
output_stats(&twopass->this_frame_stats, cpi->output_pkt_list);
accumulate_stats(&twopass->total_stats, &fps);
+
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ output_mb_stats(this_frame_mb_stats, cm);
+ }
+#endif
}
// Copy the previous Last Frame back into gf and and arf buffers if
@@ -1060,24 +1105,16 @@
// score in the frame following a flash frame. The offset passed in should
// reflect this.
static int detect_flash(const TWO_PASS *twopass, int offset) {
- FIRSTPASS_STATS next_frame;
+ const FIRSTPASS_STATS *const next_frame = read_frame_stats(twopass, offset);
- int flash_detected = 0;
-
- // Read the frame data.
- // The return is FALSE (no flash detected) if not a valid frame
- if (read_frame_stats(twopass, &next_frame, offset) != EOF) {
- // What we are looking for here is a situation where there is a
- // brief break in prediction (such as a flash) but subsequent frames
- // are reasonably well predicted by an earlier (pre flash) frame.
- // The recovery after a flash is indicated by a high pcnt_second_ref
- // compared to pcnt_inter.
- if (next_frame.pcnt_second_ref > next_frame.pcnt_inter &&
- next_frame.pcnt_second_ref >= 0.5)
- flash_detected = 1;
- }
-
- return flash_detected;
+ // What we are looking for here is a situation where there is a
+ // brief break in prediction (such as a flash) but subsequent frames
+ // are reasonably well predicted by an earlier (pre flash) frame.
+ // The recovery after a flash is indicated by a high pcnt_second_ref
+ // compared to pcnt_inter.
+ return next_frame != NULL &&
+ next_frame->pcnt_second_ref > next_frame->pcnt_inter &&
+ next_frame->pcnt_second_ref >= 0.5;
}
// Update the motion related elements to the GF arf boost calculation.
@@ -1137,7 +1174,6 @@
static int calc_arf_boost(VP9_COMP *cpi, int offset,
int f_frames, int b_frames,
int *f_boost, int *b_boost) {
- FIRSTPASS_STATS this_frame;
TWO_PASS *const twopass = &cpi->twopass;
int i;
double boost_score = 0.0;
@@ -1151,11 +1187,12 @@
// Search forward from the proposed arf/next gf position.
for (i = 0; i < f_frames; ++i) {
- if (read_frame_stats(twopass, &this_frame, (i + offset)) == EOF)
+ const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i + offset);
+ if (this_frame == NULL)
break;
// Update the motion related elements to the boost calculation.
- accumulate_frame_motion_stats(&this_frame,
+ accumulate_frame_motion_stats(this_frame,
&this_frame_mv_in_out, &mv_in_out_accumulator,
&abs_mv_in_out_accumulator,
&mv_ratio_accumulator);
@@ -1167,12 +1204,12 @@
// Accumulate the effect of prediction quality decay.
if (!flash_detected) {
- decay_accumulator *= get_prediction_decay_rate(&cpi->common, &this_frame);
+ decay_accumulator *= get_prediction_decay_rate(&cpi->common, this_frame);
decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
? MIN_DECAY_FACTOR : decay_accumulator;
}
- boost_score += decay_accumulator * calc_frame_boost(twopass, &this_frame,
+ boost_score += decay_accumulator * calc_frame_boost(twopass, this_frame,
this_frame_mv_in_out);
}
@@ -1188,11 +1225,12 @@
// Search backward towards last gf position.
for (i = -1; i >= -b_frames; --i) {
- if (read_frame_stats(twopass, &this_frame, (i + offset)) == EOF)
+ const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i + offset);
+ if (this_frame == NULL)
break;
// Update the motion related elements to the boost calculation.
- accumulate_frame_motion_stats(&this_frame,
+ accumulate_frame_motion_stats(this_frame,
&this_frame_mv_in_out, &mv_in_out_accumulator,
&abs_mv_in_out_accumulator,
&mv_ratio_accumulator);
@@ -1204,12 +1242,12 @@
// Cumulative effect of prediction quality decay.
if (!flash_detected) {
- decay_accumulator *= get_prediction_decay_rate(&cpi->common, &this_frame);
+ decay_accumulator *= get_prediction_decay_rate(&cpi->common, this_frame);
decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
? MIN_DECAY_FACTOR : decay_accumulator;
}
- boost_score += decay_accumulator * calc_frame_boost(twopass, &this_frame,
+ boost_score += decay_accumulator * calc_frame_boost(twopass, this_frame,
this_frame_mv_in_out);
}
*b_boost = (int)boost_score;
@@ -1221,144 +1259,6 @@
return arf_boost;
}
-#if CONFIG_MULTIPLE_ARF
-// Work out the frame coding order for a GF or an ARF group.
-// The current implementation codes frames in their natural order for a
-// GF group, and inserts additional ARFs into an ARF group using a
-// binary split approach.
-// NOTE: this function is currently implemented recursively.
-static void schedule_frames(VP9_COMP *cpi, const int start, const int end,
- const int arf_idx, const int gf_or_arf_group,
- const int level) {
- int i, abs_end, half_range;
- int *cfo = cpi->frame_coding_order;
- int idx = cpi->new_frame_coding_order_period;
-
- // If (end < 0) an ARF should be coded at position (-end).
- assert(start >= 0);
-
- // printf("start:%d end:%d\n", start, end);
-
- // GF Group: code frames in logical order.
- if (gf_or_arf_group == 0) {
- assert(end >= start);
- for (i = start; i <= end; ++i) {
- cfo[idx] = i;
- cpi->arf_buffer_idx[idx] = arf_idx;
- cpi->arf_weight[idx] = -1;
- ++idx;
- }
- cpi->new_frame_coding_order_period = idx;
- return;
- }
-
- // ARF Group: Work out the ARF schedule and mark ARF frames as negative.
- if (end < 0) {
- // printf("start:%d end:%d\n", -end, -end);
- // ARF frame is at the end of the range.
- cfo[idx] = end;
- // What ARF buffer does this ARF use as predictor.
- cpi->arf_buffer_idx[idx] = (arf_idx > 2) ? (arf_idx - 1) : 2;
- cpi->arf_weight[idx] = level;
- ++idx;
- abs_end = -end;
- } else {
- abs_end = end;
- }
-
- half_range = (abs_end - start) >> 1;
-
- // ARFs may not be adjacent, they must be separated by at least
- // MIN_GF_INTERVAL non-ARF frames.
- if ((start + MIN_GF_INTERVAL) >= (abs_end - MIN_GF_INTERVAL)) {
- // printf("start:%d end:%d\n", start, abs_end);
- // Update the coding order and active ARF.
- for (i = start; i <= abs_end; ++i) {
- cfo[idx] = i;
- cpi->arf_buffer_idx[idx] = arf_idx;
- cpi->arf_weight[idx] = -1;
- ++idx;
- }
- cpi->new_frame_coding_order_period = idx;
- } else {
- // Place a new ARF at the mid-point of the range.
- cpi->new_frame_coding_order_period = idx;
- schedule_frames(cpi, start, -(start + half_range), arf_idx + 1,
- gf_or_arf_group, level + 1);
- schedule_frames(cpi, start + half_range + 1, abs_end, arf_idx,
- gf_or_arf_group, level + 1);
- }
-}
-
-#define FIXED_ARF_GROUP_SIZE 16
-
-void define_fixed_arf_period(VP9_COMP *cpi) {
- int i;
- int max_level = INT_MIN;
-
- assert(cpi->multi_arf_enabled);
- assert(cpi->oxcf.lag_in_frames >= FIXED_ARF_GROUP_SIZE);
-
- // Save the weight of the last frame in the sequence before next
- // sequence pattern overwrites it.
- cpi->this_frame_weight = cpi->arf_weight[cpi->sequence_number];
- assert(cpi->this_frame_weight >= 0);
-
- cpi->twopass.gf_zeromotion_pct = 0;
-
- // Initialize frame coding order variables.
- cpi->new_frame_coding_order_period = 0;
- cpi->next_frame_in_order = 0;
- cpi->arf_buffered = 0;
- vp9_zero(cpi->frame_coding_order);
- vp9_zero(cpi->arf_buffer_idx);
- vpx_memset(cpi->arf_weight, -1, sizeof(cpi->arf_weight));
-
- if (cpi->rc.frames_to_key <= (FIXED_ARF_GROUP_SIZE + 8)) {
- // Setup a GF group close to the keyframe.
- cpi->rc.source_alt_ref_pending = 0;
- cpi->rc.baseline_gf_interval = cpi->rc.frames_to_key;
- schedule_frames(cpi, 0, (cpi->rc.baseline_gf_interval - 1), 2, 0, 0);
- } else {
- // Setup a fixed period ARF group.
- cpi->rc.source_alt_ref_pending = 1;
- cpi->rc.baseline_gf_interval = FIXED_ARF_GROUP_SIZE;
- schedule_frames(cpi, 0, -(cpi->rc.baseline_gf_interval - 1), 2, 1, 0);
- }
-
- // Replace level indicator of -1 with correct level.
- for (i = 0; i < cpi->new_frame_coding_order_period; ++i) {
- if (cpi->arf_weight[i] > max_level) {
- max_level = cpi->arf_weight[i];
- }
- }
- ++max_level;
- for (i = 0; i < cpi->new_frame_coding_order_period; ++i) {
- if (cpi->arf_weight[i] == -1) {
- cpi->arf_weight[i] = max_level;
- }
- }
- cpi->max_arf_level = max_level;
-#if 0
- printf("\nSchedule: ");
- for (i = 0; i < cpi->new_frame_coding_order_period; ++i) {
- printf("%4d ", cpi->frame_coding_order[i]);
- }
- printf("\n");
- printf("ARFref: ");
- for (i = 0; i < cpi->new_frame_coding_order_period; ++i) {
- printf("%4d ", cpi->arf_buffer_idx[i]);
- }
- printf("\n");
- printf("Weight: ");
- for (i = 0; i < cpi->new_frame_coding_order_period; ++i) {
- printf("%4d ", cpi->arf_weight[i]);
- }
- printf("\n");
-#endif
-}
-#endif
-
// Calculate a section intra ratio used in setting max loop filter.
static int calculate_section_intra_ratio(const FIRSTPASS_STATS *begin,
const FIRSTPASS_STATS *end,
@@ -1428,6 +1328,18 @@
return MAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks), 0);
}
+// Current limit on maximum number of active arfs in a GF/ARF group.
+#define MAX_ACTIVE_ARFS 2
+#define ARF_SLOT1 2
+#define ARF_SLOT2 3
+// This function indirects the choice of buffers for arfs.
+// At the moment the values are fixed but this may change as part of
+// the integration process with other codec features that swap buffers around.
+static void get_arf_buffer_indices(unsigned char *arf_buffer_indices) {
+ arf_buffer_indices[0] = ARF_SLOT1;
+ arf_buffer_indices[1] = ARF_SLOT2;
+}
+
static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
double group_error, int gf_arf_bits) {
RATE_CONTROL *const rc = &cpi->rc;
@@ -1435,42 +1347,80 @@
TWO_PASS *twopass = &cpi->twopass;
FIRSTPASS_STATS frame_stats;
int i;
- int group_frame_index = 1;
+ int frame_index = 1;
int target_frame_size;
int key_frame;
const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf);
int64_t total_group_bits = gf_group_bits;
double modified_err = 0.0;
double err_fraction;
+ int mid_boost_bits = 0;
+ int mid_frame_idx;
+ unsigned char arf_buffer_indices[MAX_ACTIVE_ARFS];
key_frame = cpi->common.frame_type == KEY_FRAME ||
vp9_is_upper_layer_key_frame(cpi);
+ get_arf_buffer_indices(arf_buffer_indices);
+
// For key frames the frame target rate is already set and it
// is also the golden frame.
- // NOTE: We dont bother to check for the special case of ARF overlay
- // frames here, as there is clamping code for this in the function
- // vp9_rc_clamp_pframe_target_size(), which applies to one and two pass
- // encodes.
if (!key_frame) {
- twopass->gf_group_bit_allocation[0] = gf_arf_bits;
+ if (rc->source_alt_ref_active) {
+ twopass->gf_group.update_type[0] = OVERLAY_UPDATE;
+ twopass->gf_group.rf_level[0] = INTER_NORMAL;
+ twopass->gf_group.bit_allocation[0] = 0;
+ twopass->gf_group.arf_update_idx[0] = arf_buffer_indices[0];
+ twopass->gf_group.arf_ref_idx[0] = arf_buffer_indices[0];
+ } else {
+ twopass->gf_group.update_type[0] = GF_UPDATE;
+ twopass->gf_group.rf_level[0] = GF_ARF_STD;
+ twopass->gf_group.bit_allocation[0] = gf_arf_bits;
+ twopass->gf_group.arf_update_idx[0] = arf_buffer_indices[0];
+ twopass->gf_group.arf_ref_idx[0] = arf_buffer_indices[0];
+ }
// Step over the golden frame / overlay frame
if (EOF == input_stats(twopass, &frame_stats))
return;
}
- // Store the bits to spend on the ARF if there is one.
- if (rc->source_alt_ref_pending) {
- twopass->gf_group_bit_allocation[group_frame_index++] = gf_arf_bits;
- }
-
- // Deduct the boost bits for arf or gf if it is not a key frame.
+ // Deduct the boost bits for arf (or gf if it is not a key frame)
+ // from the group total.
if (rc->source_alt_ref_pending || !key_frame)
total_group_bits -= gf_arf_bits;
+ // Store the bits to spend on the ARF if there is one.
+ if (rc->source_alt_ref_pending) {
+ twopass->gf_group.update_type[frame_index] = ARF_UPDATE;
+ twopass->gf_group.rf_level[frame_index] = GF_ARF_STD;
+ twopass->gf_group.bit_allocation[frame_index] = gf_arf_bits;
+ twopass->gf_group.arf_src_offset[frame_index] =
+ (unsigned char)(rc->baseline_gf_interval - 1);
+ twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[0];
+ twopass->gf_group.arf_ref_idx[frame_index] =
+ arf_buffer_indices[cpi->multi_arf_last_grp_enabled &&
+ rc->source_alt_ref_active];
+ ++frame_index;
+
+ if (cpi->multi_arf_enabled) {
+ // Set aside a slot for a level 1 arf.
+ twopass->gf_group.update_type[frame_index] = ARF_UPDATE;
+ twopass->gf_group.rf_level[frame_index] = GF_ARF_LOW;
+ twopass->gf_group.arf_src_offset[frame_index] =
+ (unsigned char)((rc->baseline_gf_interval >> 1) - 1);
+ twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[1];
+ twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[0];
+ ++frame_index;
+ }
+ }
+
+ // Define middle frame
+ mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1;
+
// Allocate bits to the other frames in the group.
for (i = 0; i < rc->baseline_gf_interval - 1; ++i) {
+ int arf_idx = 0;
if (EOF == input_stats(twopass, &frame_stats))
break;
@@ -1482,11 +1432,52 @@
err_fraction = 0.0;
target_frame_size = (int)((double)total_group_bits * err_fraction);
+
+ if (rc->source_alt_ref_pending && cpi->multi_arf_enabled) {
+ mid_boost_bits += (target_frame_size >> 4);
+ target_frame_size -= (target_frame_size >> 4);
+
+ if (frame_index <= mid_frame_idx)
+ arf_idx = 1;
+ }
+ twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[arf_idx];
+ twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx];
+
target_frame_size = clamp(target_frame_size, 0,
MIN(max_bits, (int)total_group_bits));
- twopass->gf_group_bit_allocation[group_frame_index++] = target_frame_size;
+ twopass->gf_group.update_type[frame_index] = LF_UPDATE;
+ twopass->gf_group.rf_level[frame_index] = INTER_NORMAL;
+
+ twopass->gf_group.bit_allocation[frame_index] = target_frame_size;
+ ++frame_index;
}
+
+ // Note:
+ // We need to configure the frame at the end of the sequence + 1 that will be
+ // the start frame for the next group. Otherwise prior to the call to
+ // vp9_rc_get_second_pass_params() the data will be undefined.
+ twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[0];
+ twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[0];
+
+ if (rc->source_alt_ref_pending) {
+ twopass->gf_group.update_type[frame_index] = OVERLAY_UPDATE;
+ twopass->gf_group.rf_level[frame_index] = INTER_NORMAL;
+
+ // Final setup for second arf and its overlay.
+ if (cpi->multi_arf_enabled) {
+ twopass->gf_group.bit_allocation[2] =
+ twopass->gf_group.bit_allocation[mid_frame_idx] + mid_boost_bits;
+ twopass->gf_group.update_type[mid_frame_idx] = OVERLAY_UPDATE;
+ twopass->gf_group.bit_allocation[mid_frame_idx] = 0;
+ }
+ } else {
+ twopass->gf_group.update_type[frame_index] = GF_UPDATE;
+ twopass->gf_group.rf_level[frame_index] = GF_ARF_STD;
+ }
+
+ // Note whether multi-arf was enabled this group for next time.
+ cpi->multi_arf_last_grp_enabled = cpi->multi_arf_enabled;
}
// Analyse and define a gf/arf group.
@@ -1528,8 +1519,7 @@
// Reset the GF group data structures unless this is a key
// frame in which case it will already have been done.
if (cpi->common.frame_type != KEY_FRAME) {
- twopass->gf_group_index = 0;
- vp9_zero(twopass->gf_group_bit_allocation);
+ vp9_zero(twopass->gf_group);
}
vp9_clear_system_state();
@@ -1552,18 +1542,21 @@
// Motion breakout threshold for loop below depends on image size.
mv_ratio_accumulator_thresh = (cpi->common.width + cpi->common.height) / 10.0;
- // Work out a maximum interval for the GF.
- // If the image appears completely static we can extend beyond this.
- // The value chosen depends on the active Q range. At low Q we have
- // bits to spare and are better with a smaller interval and smaller boost.
- // At high Q when there are few bits to spare we are better with a longer
- // interval to spread the cost of the GF.
- //
- active_max_gf_interval =
- 12 + ((int)vp9_convert_qindex_to_q(rc->last_q[INTER_FRAME]) >> 5);
-
- if (active_max_gf_interval > rc->max_gf_interval)
+ // Work out a maximum interval for the GF group.
+ // If the image appears almost completely static we can extend beyond this.
+ if (cpi->multi_arf_allowed) {
active_max_gf_interval = rc->max_gf_interval;
+ } else {
+ // The value chosen depends on the active Q range. At low Q we have
+ // bits to spare and are better with a smaller interval and smaller boost.
+ // At high Q when there are few bits to spare we are better with a longer
+ // interval to spread the cost of the GF.
+ active_max_gf_interval =
+ 12 + ((int)vp9_convert_qindex_to_q(rc->last_q[INTER_FRAME]) >> 5);
+
+ if (active_max_gf_interval > rc->max_gf_interval)
+ active_max_gf_interval = rc->max_gf_interval;
+ }
i = 0;
while (i < rc->static_scene_max_gf_interval && i < rc->frames_to_key) {
@@ -1651,24 +1644,14 @@
}
}
-#if CONFIG_MULTIPLE_ARF
- if (cpi->multi_arf_enabled) {
- // Initialize frame coding order variables.
- cpi->new_frame_coding_order_period = 0;
- cpi->next_frame_in_order = 0;
- cpi->arf_buffered = 0;
- vp9_zero(cpi->frame_coding_order);
- vp9_zero(cpi->arf_buffer_idx);
- vpx_memset(cpi->arf_weight, -1, sizeof(cpi->arf_weight));
- }
-#endif
-
// Set the interval until the next gf.
if (cpi->common.frame_type == KEY_FRAME || rc->source_alt_ref_active)
rc->baseline_gf_interval = i - 1;
else
rc->baseline_gf_interval = i;
+ rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+
// Should we use the alternate reference frame.
if (allow_alt_ref &&
(i < cpi->oxcf.lag_in_frames) &&
@@ -1681,62 +1664,15 @@
&b_boost);
rc->source_alt_ref_pending = 1;
-#if CONFIG_MULTIPLE_ARF
- // Set the ARF schedule.
- if (cpi->multi_arf_enabled) {
- schedule_frames(cpi, 0, -(rc->baseline_gf_interval - 1), 2, 1, 0);
- }
-#endif
+ // Test to see if multi arf is appropriate.
+ cpi->multi_arf_enabled =
+ (cpi->multi_arf_allowed && (rc->baseline_gf_interval >= 6) &&
+ (zero_motion_accumulator < 0.995)) ? 1 : 0;
} else {
rc->gfu_boost = (int)boost_score;
rc->source_alt_ref_pending = 0;
-#if CONFIG_MULTIPLE_ARF
- // Set the GF schedule.
- if (cpi->multi_arf_enabled) {
- schedule_frames(cpi, 0, rc->baseline_gf_interval - 1, 2, 0, 0);
- assert(cpi->new_frame_coding_order_period ==
- rc->baseline_gf_interval);
- }
-#endif
}
-#if CONFIG_MULTIPLE_ARF
- if (cpi->multi_arf_enabled && (cpi->common.frame_type != KEY_FRAME)) {
- int max_level = INT_MIN;
- // Replace level indicator of -1 with correct level.
- for (i = 0; i < cpi->frame_coding_order_period; ++i) {
- if (cpi->arf_weight[i] > max_level) {
- max_level = cpi->arf_weight[i];
- }
- }
- ++max_level;
- for (i = 0; i < cpi->frame_coding_order_period; ++i) {
- if (cpi->arf_weight[i] == -1) {
- cpi->arf_weight[i] = max_level;
- }
- }
- cpi->max_arf_level = max_level;
- }
-#if 0
- if (cpi->multi_arf_enabled) {
- printf("\nSchedule: ");
- for (i = 0; i < cpi->new_frame_coding_order_period; ++i) {
- printf("%4d ", cpi->frame_coding_order[i]);
- }
- printf("\n");
- printf("ARFref: ");
- for (i = 0; i < cpi->new_frame_coding_order_period; ++i) {
- printf("%4d ", cpi->arf_buffer_idx[i]);
- }
- printf("\n");
- printf("Weight: ");
- for (i = 0; i < cpi->new_frame_coding_order_period; ++i) {
- printf("%4d ", cpi->arf_weight[i]);
- }
- printf("\n");
- }
-#endif
-#endif
// Reset the file position.
reset_fpf_position(twopass, start_pos);
@@ -1886,14 +1822,15 @@
cpi->common.frame_type = KEY_FRAME;
// Reset the GF group data structures.
- twopass->gf_group_index = 0;
- vp9_zero(twopass->gf_group_bit_allocation);
+ vp9_zero(twopass->gf_group);
// Is this a forced key frame by interval.
rc->this_key_frame_forced = rc->next_key_frame_forced;
- // Clear the alt ref active flag as this can never be active on a key frame.
+ // Clear the alt ref active flag and last group multi arf flags as they
+ // can never be set for a key frame.
rc->source_alt_ref_active = 0;
+ cpi->multi_arf_last_grp_enabled = 0;
// KF is always a GF so clear frames till next gf counter.
rc->frames_till_gf_update_due = 0;
@@ -2078,7 +2015,9 @@
twopass->kf_group_bits -= kf_bits;
// Save the bits to spend on the key frame.
- twopass->gf_group_bit_allocation[0] = kf_bits;
+ twopass->gf_group.bit_allocation[0] = kf_bits;
+ twopass->gf_group.update_type[0] = KF_UPDATE;
+ twopass->gf_group.rf_level[0] = KF_STD;
// Note the total error score of the kf group minus the key frame itself.
twopass->kf_group_error_left = (int)(kf_group_err - kf_mod_err);
@@ -2106,6 +2045,44 @@
}
}
+// Define the reference buffers that will be updated post encode.
+void configure_buffer_updates(VP9_COMP *cpi) {
+ TWO_PASS *const twopass = &cpi->twopass;
+
+ cpi->rc.is_src_frame_alt_ref = 0;
+ switch (twopass->gf_group.update_type[twopass->gf_group.index]) {
+ case KF_UPDATE:
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 1;
+ cpi->refresh_alt_ref_frame = 1;
+ break;
+ case LF_UPDATE:
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+ break;
+ case GF_UPDATE:
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 1;
+ cpi->refresh_alt_ref_frame = 0;
+ break;
+ case OVERLAY_UPDATE:
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 1;
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->rc.is_src_frame_alt_ref = 1;
+ break;
+ case ARF_UPDATE:
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_alt_ref_frame = 1;
+ break;
+ default:
+ assert(0);
+ }
+}
+
+
void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
@@ -2130,14 +2107,12 @@
if (!twopass->stats_in)
return;
- // Increment the gf group index.
- ++twopass->gf_group_index;
-
// If this is an arf frame then we dont want to read the stats file or
// advance the input pointer as we already have what we need.
- if (cpi->refresh_alt_ref_frame) {
+ if (twopass->gf_group.update_type[twopass->gf_group.index] == ARF_UPDATE) {
int target_rate;
- target_rate = twopass->gf_group_bit_allocation[twopass->gf_group_index];
+ configure_buffer_updates(cpi);
+ target_rate = twopass->gf_group.bit_allocation[twopass->gf_group.index];
target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate);
rc->base_frame_target = target_rate;
#ifdef LONG_TERM_VBR_CORRECTION
@@ -2201,15 +2176,7 @@
// Define a new GF/ARF group. (Should always enter here for key frames).
if (rc->frames_till_gf_update_due == 0) {
-#if CONFIG_MULTIPLE_ARF
- if (cpi->multi_arf_enabled) {
- define_fixed_arf_period(cpi);
- } else {
-#endif
- define_gf_group(cpi, &this_frame_copy);
-#if CONFIG_MULTIPLE_ARF
- }
-#endif
+ define_gf_group(cpi, &this_frame_copy);
if (twopass->gf_zeromotion_pct > 995) {
// As long as max_thresh for encode breakout is small enough, it is ok
@@ -2233,7 +2200,9 @@
}
}
- target_rate = twopass->gf_group_bit_allocation[twopass->gf_group_index];
+ configure_buffer_updates(cpi);
+
+ target_rate = twopass->gf_group.bit_allocation[twopass->gf_group.index];
if (cpi->common.frame_type == KEY_FRAME)
target_rate = vp9_rc_clamp_iframe_target_size(cpi, target_rate);
else
@@ -2249,6 +2218,12 @@
// Update the total stats remaining structure.
subtract_stats(&twopass->total_left_stats, &this_frame);
+
+#if CONFIG_FP_MB_STATS
+ if (cpi->use_fp_mb_stats) {
+ input_mb_stats(&twopass->this_frame_mb_stats, cm);
+ }
+#endif
}
void vp9_twopass_postencode_update(VP9_COMP *cpi) {
@@ -2296,4 +2271,7 @@
twopass->kf_group_bits -= bits_used;
}
twopass->kf_group_bits = MAX(twopass->kf_group_bits, 0);
+
+ // Increment the gf group index ready for the next frame.
+ ++twopass->gf_group.index;
}
diff --git a/source/libvpx/vp9/encoder/vp9_firstpass.h b/source/libvpx/vp9/encoder/vp9_firstpass.h
index 8206521..7e4c9ee 100644
--- a/source/libvpx/vp9/encoder/vp9_firstpass.h
+++ b/source/libvpx/vp9/encoder/vp9_firstpass.h
@@ -12,11 +12,24 @@
#define VP9_ENCODER_VP9_FIRSTPASS_H_
#include "vp9/encoder/vp9_lookahead.h"
+#include "vp9/encoder/vp9_ratectrl.h"
#ifdef __cplusplus
extern "C" {
#endif
+#if CONFIG_FP_MB_STATS
+typedef struct {
+ PREDICTION_MODE mode;
+ int err;
+ int_mv mv;
+} FIRSTPASS_MB_STATS;
+
+typedef struct {
+ FIRSTPASS_MB_STATS *mb_stats;
+} FIRSTPASS_FRAME_MB_STATS;
+#endif
+
typedef struct {
double frame;
double intra_error;
@@ -39,6 +52,25 @@
int64_t spatial_layer_id;
} FIRSTPASS_STATS;
+typedef enum {
+ KF_UPDATE = 0,
+ LF_UPDATE = 1,
+ GF_UPDATE = 2,
+ ARF_UPDATE = 3,
+ OVERLAY_UPDATE = 4,
+ FRAME_UPDATE_TYPES = 5
+} FRAME_UPDATE_TYPE;
+
+typedef struct {
+ unsigned char index;
+ RATE_FACTOR_LEVEL rf_level[(MAX_LAG_BUFFERS * 2) + 1];
+ FRAME_UPDATE_TYPE update_type[(MAX_LAG_BUFFERS * 2) + 1];
+ unsigned char arf_src_offset[(MAX_LAG_BUFFERS * 2) + 1];
+ unsigned char arf_update_idx[(MAX_LAG_BUFFERS * 2) + 1];
+ unsigned char arf_ref_idx[(MAX_LAG_BUFFERS * 2) + 1];
+ int bit_allocation[(MAX_LAG_BUFFERS * 2) + 1];
+} GF_GROUP;
+
typedef struct {
unsigned int section_intra_rating;
unsigned int next_iiratio;
@@ -56,6 +88,10 @@
double kf_intra_err_min;
double gf_intra_err_min;
+#if CONFIG_FP_MB_STATS
+ FIRSTPASS_FRAME_MB_STATS this_frame_mb_stats;
+#endif
+
// Projected total bits available for a key frame group of frames
int64_t kf_group_bits;
@@ -68,8 +104,7 @@
int active_worst_quality;
- int gf_group_index;
- int gf_group_bit_allocation[MAX_LAG_BUFFERS * 2];
+ GF_GROUP gf_group;
} TWO_PASS;
struct VP9_COMP;
diff --git a/source/libvpx/vp9/encoder/vp9_lookahead.c b/source/libvpx/vp9/encoder/vp9_lookahead.c
index abe71e6..e743517 100644
--- a/source/libvpx/vp9/encoder/vp9_lookahead.c
+++ b/source/libvpx/vp9/encoder/vp9_lookahead.c
@@ -18,18 +18,6 @@
#include "vp9/encoder/vp9_extend.h"
#include "vp9/encoder/vp9_lookahead.h"
-// The max of past frames we want to keep in the queue.
-#define MAX_PRE_FRAMES 1
-
-struct lookahead_ctx {
- unsigned int max_sz; /* Absolute size of the queue */
- unsigned int sz; /* Number of buffers currently in the queue */
- unsigned int read_idx; /* Read index */
- unsigned int write_idx; /* Write index */
- struct lookahead_entry *buf; /* Buffer list */
-};
-
-
/* Return the buffer at the given absolute index and increment the index */
static struct lookahead_entry *pop(struct lookahead_ctx *ctx,
unsigned int *idx) {
diff --git a/source/libvpx/vp9/encoder/vp9_lookahead.h b/source/libvpx/vp9/encoder/vp9_lookahead.h
index ff63c0d..f9cc3c8 100644
--- a/source/libvpx/vp9/encoder/vp9_lookahead.h
+++ b/source/libvpx/vp9/encoder/vp9_lookahead.h
@@ -14,6 +14,11 @@
#include "vpx_scale/yv12config.h"
#include "vpx/vpx_integer.h"
+#ifdef CONFIG_SPATIAL_SVC
+#include "vpx/vp8cx.h"
+#include "vpx/vpx_encoder.h"
+#endif
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -25,10 +30,22 @@
int64_t ts_start;
int64_t ts_end;
unsigned int flags;
+
+#ifdef CONFIG_SPATIAL_SVC
+ vpx_svc_parameters_t svc_params[VPX_SS_MAX_LAYERS];
+#endif
};
+// The max of past frames we want to keep in the queue.
+#define MAX_PRE_FRAMES 1
-struct lookahead_ctx;
+struct lookahead_ctx {
+ unsigned int max_sz; /* Absolute size of the queue */
+ unsigned int sz; /* Number of buffers currently in the queue */
+ unsigned int read_idx; /* Read index */
+ unsigned int write_idx; /* Write index */
+ struct lookahead_entry *buf; /* Buffer list */
+};
/**\brief Initializes the lookahead stage
*
diff --git a/source/libvpx/vp9/encoder/vp9_mbgraph.c b/source/libvpx/vp9/encoder/vp9_mbgraph.c
index 8106960..9eb2fbc 100644
--- a/source/libvpx/vp9/encoder/vp9_mbgraph.c
+++ b/source/libvpx/vp9/encoder/vp9_mbgraph.c
@@ -11,7 +11,6 @@
#include <limits.h>
#include "vpx_mem/vpx_mem.h"
-#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_segmentation.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/common/vp9_blockd.h"
@@ -37,9 +36,8 @@
MV ref_full;
// Further step/diamond searches as necessary
- int step_param = mv_sf->reduce_first_step_size +
- (cpi->oxcf.speed > 5 ? 1 : 0);
- step_param = MIN(step_param, mv_sf->max_step_search_steps - 2);
+ int step_param = mv_sf->reduce_first_step_size;
+ step_param = MIN(step_param, MAX_MVSEARCH_STEPS - 2);
vp9_set_mv_search_range(x, ref_mv);
diff --git a/source/libvpx/vp9/encoder/vp9_mcomp.c b/source/libvpx/vp9/encoder/vp9_mcomp.c
index cb45dfa..c0edf45 100644
--- a/source/libvpx/vp9/encoder/vp9_mcomp.c
+++ b/source/libvpx/vp9/encoder/vp9_mcomp.c
@@ -60,8 +60,7 @@
while ((size << sr) < MAX_FULL_PEL_VAL)
sr++;
- sr += sf->mv.reduce_first_step_size;
- sr = MIN(sr, (sf->mv.max_step_search_steps - 2));
+ sr = MIN(sr, MAX_MVSEARCH_STEPS - 2);
return sr;
}
@@ -1627,8 +1626,7 @@
break;
case NSTEP:
var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
- (sf->mv.max_step_search_steps - 1) -
- step_param,
+ MAX_MVSEARCH_STEPS - 1 - step_param,
1, fn_ptr, ref_mv, tmp_mv);
break;
default:
diff --git a/source/libvpx/vp9/encoder/vp9_picklpf.c b/source/libvpx/vp9/encoder/vp9_picklpf.c
index 5328465..0f363a7 100644
--- a/source/libvpx/vp9/encoder/vp9_picklpf.c
+++ b/source/libvpx/vp9/encoder/vp9_picklpf.c
@@ -144,7 +144,9 @@
lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0
: cpi->oxcf.sharpness;
- if (method == LPF_PICK_FROM_Q) {
+ if (method == LPF_PICK_MINIMAL_LPF && lf->filter_level) {
+ lf->filter_level = 0;
+ } else if (method >= LPF_PICK_FROM_Q) {
const int min_filter_level = 0;
const int max_filter_level = get_max_filter_level(cpi);
const int q = vp9_ac_quant(cm->base_qindex, 0);
diff --git a/source/libvpx/vp9/encoder/vp9_pickmode.c b/source/libvpx/vp9/encoder/vp9_pickmode.c
index f3b2d2f..7515f44 100644
--- a/source/libvpx/vp9/encoder/vp9_pickmode.c
+++ b/source/libvpx/vp9/encoder/vp9_pickmode.c
@@ -23,8 +23,88 @@
#include "vp9/common/vp9_reconintra.h"
#include "vp9/encoder/vp9_encoder.h"
+#include "vp9/encoder/vp9_pickmode.h"
#include "vp9/encoder/vp9_ratectrl.h"
-#include "vp9/encoder/vp9_rdopt.h"
+#include "vp9/encoder/vp9_rd.h"
+
+static int mv_refs_rt(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+ const TileInfo *const tile,
+ MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+ int_mv *mv_ref_list,
+ int mi_row, int mi_col) {
+ const int *ref_sign_bias = cm->ref_frame_sign_bias;
+ int i, refmv_count = 0;
+
+ const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
+
+ int different_ref_found = 0;
+ int context_counter = 0;
+ int const_motion = 0;
+
+ // Blank the reference vector list
+ vpx_memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES);
+
+ // The nearest 2 blocks are treated differently
+ // if the size < 8x8 we get the mv from the bmi substructure,
+ // and we also need to keep a mode count.
+ for (i = 0; i < 2; ++i) {
+ const POSITION *const mv_ref = &mv_ref_search[i];
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
+ const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row *
+ xd->mi_stride];
+ const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
+ // Keep counts for entropy encoding.
+ context_counter += mode_2_counter[candidate->mode];
+ different_ref_found = 1;
+
+ if (candidate->ref_frame[0] == ref_frame)
+ ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, -1));
+ }
+ }
+
+ const_motion = 1;
+
+ // Check the rest of the neighbors in much the same way
+ // as before except we don't need to keep track of sub blocks or
+ // mode counts.
+ for (; i < MVREF_NEIGHBOURS && !refmv_count; ++i) {
+ const POSITION *const mv_ref = &mv_ref_search[i];
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
+ const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row *
+ xd->mi_stride]->mbmi;
+ different_ref_found = 1;
+
+ if (candidate->ref_frame[0] == ref_frame)
+ ADD_MV_REF_LIST(candidate->mv[0]);
+ }
+ }
+
+ // Since we couldn't find 2 mvs from the same reference frame
+ // go back through the neighbors and find motion vectors from
+ // different reference frames.
+ if (different_ref_found && !refmv_count) {
+ for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
+ const POSITION *mv_ref = &mv_ref_search[i];
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
+ const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row
+ * xd->mi_stride]->mbmi;
+
+ // If the candidate is INTRA we don't want to consider its mv.
+ IF_DIFF_REF_FRAME_ADD_MV(candidate);
+ }
+ }
+ }
+
+ Done:
+
+ mi->mbmi.mode_context[ref_frame] = counter_to_context[context_counter];
+
+ // Clamp vectors
+ for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
+ clamp_mv_ref(&mv_ref_list[i].as_mv, xd);
+
+ return const_motion;
+}
static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int mi_row, int mi_col,
@@ -61,7 +141,7 @@
// TODO(jingning) exploiting adaptive motion search control in non-RD
// mode decision too.
- step_param = 6;
+ step_param = cpi->sf.mv.fullpel_search_step_param;
for (i = LAST_FRAME; i <= LAST_FRAME && cpi->common.show_frame; ++i) {
if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
@@ -172,6 +252,17 @@
else
x->skip_txfm = 0;
+ if (cpi->common.tx_mode == TX_MODE_SELECT) {
+ if (sse > (var << 2))
+ xd->mi[0]->mbmi.tx_size = MIN(max_txsize_lookup[bsize],
+ tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
+ else
+ xd->mi[0]->mbmi.tx_size = TX_8X8;
+ } else {
+ xd->mi[0]->mbmi.tx_size = MIN(max_txsize_lookup[bsize],
+ tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
+ }
+
vp9_model_rd_from_var_lapndz(sse - var, 1 << num_pels_log2_lookup[bsize],
dc_quant >> 3, &rate, &dist);
*out_rate_sum = rate >> 1;
@@ -183,6 +274,109 @@
*out_dist_sum += dist << 4;
}
+static int get_pred_buffer(PRED_BUFFER *p, int len) {
+ int i;
+
+ for (i = 0; i < len; i++) {
+ if (!p[i].in_use) {
+ p[i].in_use = 1;
+ return i;
+ }
+ }
+ return -1;
+}
+
+static void free_pred_buffer(PRED_BUFFER *p) {
+ p->in_use = 0;
+}
+
+static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ MV_REFERENCE_FRAME ref_frame,
+ PREDICTION_MODE this_mode,
+ unsigned int var_y, unsigned int sse_y,
+ struct buf_2d yv12_mb[][MAX_MB_PLANE],
+ int *rate, int64_t *dist) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+
+ const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
+ unsigned int var = var_y, sse = sse_y;
+ // Skipping threshold for ac.
+ unsigned int thresh_ac;
+ // Skipping threshold for dc.
+ unsigned int thresh_dc;
+ if (x->encode_breakout > 0) {
+ // Set a maximum for threshold to avoid big PSNR loss in low bit rate
+ // case. Use extreme low threshold for static frames to limit
+ // skipping.
+ const unsigned int max_thresh = 36000;
+ // The encode_breakout input
+ const unsigned int min_thresh =
+ MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
+
+ // Calculate threshold according to dequant value.
+ thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
+ thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
+
+ // Adjust ac threshold according to partition size.
+ thresh_ac >>=
+ 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
+
+ thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
+ } else {
+ thresh_ac = 0;
+ thresh_dc = 0;
+ }
+
+ // Y skipping condition checking for ac and dc.
+ if (var <= thresh_ac && (sse - var) <= thresh_dc) {
+ unsigned int sse_u, sse_v;
+ unsigned int var_u, var_v;
+
+ // Skip UV prediction unless breakout is zero (lossless) to save
+ // computation with low impact on the result
+ if (x->encode_breakout == 0) {
+ xd->plane[1].pre[0] = yv12_mb[ref_frame][1];
+ xd->plane[2].pre[0] = yv12_mb[ref_frame][2];
+ vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, bsize);
+ }
+
+ var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
+ x->plane[1].src.stride,
+ xd->plane[1].dst.buf,
+ xd->plane[1].dst.stride, &sse_u);
+
+ // U skipping condition checking
+ if ((var_u * 4 <= thresh_ac) && (sse_u - var_u <= thresh_dc)) {
+ var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
+ x->plane[2].src.stride,
+ xd->plane[2].dst.buf,
+ xd->plane[2].dst.stride, &sse_v);
+
+ // V skipping condition checking
+ if ((var_v * 4 <= thresh_ac) && (sse_v - var_v <= thresh_dc)) {
+ x->skip = 1;
+
+ // The cost of skip bit needs to be added.
+ *rate = cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
+ [INTER_OFFSET(this_mode)];
+
+ // More on this part of rate
+ // rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
+
+ // Scaling factor for SSE from spatial domain to frequency
+ // domain is 16. Adjust distortion accordingly.
+ // TODO(yunqingwang): In this function, only y-plane dist is
+ // calculated.
+ *dist = (sse << 4); // + ((sse_u + sse_v) << 4);
+
+ // *disable_skip = 1;
+ }
+ }
+ }
+}
+
// TODO(jingning) placeholder for inter-frame non-RD mode decision.
// this needs various further optimizations. to be continued..
int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
@@ -197,6 +391,8 @@
struct macroblockd_plane *const pd = &xd->plane[0];
PREDICTION_MODE this_mode, best_mode = ZEROMV;
MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
+ TX_SIZE best_tx_size = MIN(max_txsize_lookup[bsize],
+ tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
INTERP_FILTER best_pred_filter = EIGHTTAP;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
struct buf_2d yv12_mb[4][MAX_MB_PLANE];
@@ -224,10 +420,42 @@
const int *const rd_thresh_freq_fact = cpi->rd.thresh_freq_fact[bsize];
// Mode index conversion form THR_MODES to PREDICTION_MODE for a ref frame.
int mode_idx[MB_MODE_COUNT] = {0};
- INTERP_FILTER filter_ref = SWITCHABLE;
+ INTERP_FILTER filter_ref = cm->interp_filter;
int bsl = mi_width_log2_lookup[bsize];
- const int pred_filter_search = (((mi_row + mi_col) >> bsl) +
- get_chessboard_index(cm)) % 2;
+ const int pred_filter_search = cm->interp_filter == SWITCHABLE ?
+ (((mi_row + mi_col) >> bsl) + get_chessboard_index(cm)) % 2 : 0;
+ int const_motion[MAX_REF_FRAMES] = { 0 };
+
+ // For speed 6, the result of interp filter is reused later in actual encoding
+ // process.
+ int bh = num_4x4_blocks_high_lookup[bsize] << 2;
+ int bw = num_4x4_blocks_wide_lookup[bsize] << 2;
+ int pixels_in_block = bh * bw;
+ // tmp[3] points to dst buffer, and the other 3 point to allocated buffers.
+ PRED_BUFFER tmp[4];
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, pred_buf, 3 * 64 * 64);
+ struct buf_2d orig_dst = pd->dst;
+ PRED_BUFFER *best_pred = NULL;
+ PRED_BUFFER *this_mode_pred = NULL;
+ int i;
+
+#if CONFIG_DENOISING
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ vp9_denoiser_reset_frame_stats(&cpi->denoiser);
+ }
+#endif
+
+ if (cpi->sf.reuse_inter_pred_sby) {
+ for (i = 0; i < 3; i++) {
+ tmp[i].data = &pred_buf[pixels_in_block * i];
+ tmp[i].stride = bw;
+ tmp[i].in_use = 0;
+ }
+
+ tmp[3].data = pd->dst.buf;
+ tmp[3].stride = pd->dst.stride;
+ tmp[3].in_use = 0;
+ }
x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
@@ -241,18 +469,36 @@
mbmi->ref_frame[0] = NONE;
mbmi->ref_frame[1] = NONE;
mbmi->tx_size = MIN(max_txsize_lookup[bsize],
- tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
- mbmi->interp_filter = cpi->common.interp_filter == SWITCHABLE ?
- EIGHTTAP : cpi->common.interp_filter;
+ tx_mode_to_biggest_tx_size[cm->tx_mode]);
+ mbmi->interp_filter = cm->interp_filter == SWITCHABLE ?
+ EIGHTTAP : cm->interp_filter;
mbmi->skip = 0;
mbmi->segment_id = segment_id;
for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
x->pred_mv_sad[ref_frame] = INT_MAX;
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
- vp9_setup_buffer_inter(cpi, x, tile,
- ref_frame, bsize, mi_row, mi_col,
- frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
+ const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
+ int_mv *const candidates = mbmi->ref_mvs[ref_frame];
+ const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
+ vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
+ sf, sf);
+
+ if (cm->coding_use_prev_mi)
+ vp9_find_mv_refs(cm, xd, tile, xd->mi[0], ref_frame,
+ candidates, mi_row, mi_col);
+ else
+ const_motion[ref_frame] = mv_refs_rt(cm, xd, tile, xd->mi[0],
+ ref_frame, candidates,
+ mi_row, mi_col);
+
+ vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
+ &frame_mv[NEARESTMV][ref_frame],
+ &frame_mv[NEARMV][ref_frame]);
+
+ if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8)
+ vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
+ ref_frame, bsize);
}
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
frame_mv[ZEROMV][ref_frame].as_int = 0;
@@ -286,6 +532,10 @@
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
int rate_mv = 0;
+ if (const_motion[ref_frame] &&
+ (this_mode == NEARMV || this_mode == ZEROMV))
+ continue;
+
if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode)))
continue;
@@ -324,6 +574,16 @@
// Search for the best prediction filter type, when the resulting
// motion vector is at sub-pixel accuracy level for luma component, i.e.,
// the last three bits are all zeros.
+ if (cpi->sf.reuse_inter_pred_sby) {
+ if (this_mode == NEARESTMV) {
+ this_mode_pred = &tmp[3];
+ } else {
+ this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
+ pd->dst.buf = this_mode_pred->data;
+ pd->dst.stride = bw;
+ }
+ }
+
if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
pred_filter_search &&
((mbmi->mv[0].as_mv.row & 0x07) != 0 ||
@@ -332,8 +592,10 @@
int64_t pf_dist[3];
unsigned int pf_var[3];
unsigned int pf_sse[3];
+ TX_SIZE pf_tx_size[3];
int64_t best_cost = INT64_MAX;
INTERP_FILTER best_filter = SWITCHABLE, filter;
+ PRED_BUFFER *current_pred = this_mode_pred;
for (filter = EIGHTTAP; filter <= EIGHTTAP_SHARP; ++filter) {
int64_t cost;
@@ -344,14 +606,32 @@
cost = RDCOST(x->rdmult, x->rddiv,
vp9_get_switchable_rate(cpi) + pf_rate[filter],
pf_dist[filter]);
+ pf_tx_size[filter] = mbmi->tx_size;
if (cost < best_cost) {
- best_filter = filter;
- best_cost = cost;
- skip_txfm = x->skip_txfm;
+ best_filter = filter;
+ best_cost = cost;
+ skip_txfm = x->skip_txfm;
+
+ if (cpi->sf.reuse_inter_pred_sby) {
+ if (this_mode_pred != current_pred) {
+ free_pred_buffer(this_mode_pred);
+ this_mode_pred = current_pred;
+ }
+
+ if (filter < EIGHTTAP_SHARP) {
+ current_pred = &tmp[get_pred_buffer(tmp, 3)];
+ pd->dst.buf = current_pred->data;
+ pd->dst.stride = bw;
+ }
+ }
}
}
+ if (cpi->sf.reuse_inter_pred_sby && this_mode_pred != current_pred)
+ free_pred_buffer(current_pred);
+
mbmi->interp_filter = best_filter;
+ mbmi->tx_size = pf_tx_size[mbmi->interp_filter];
rate = pf_rate[mbmi->interp_filter];
dist = pf_dist[mbmi->interp_filter];
var_y = pf_var[mbmi->interp_filter];
@@ -370,77 +650,19 @@
// Skipping checking: test to see if this block can be reconstructed by
// prediction only.
- if (!x->in_active_map) {
- x->skip = 1;
- } else if (cpi->allow_encode_breakout && x->encode_breakout) {
- const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
- unsigned int var = var_y, sse = sse_y;
- // Skipping threshold for ac.
- unsigned int thresh_ac;
- // Skipping threshold for dc.
- unsigned int thresh_dc;
- // Set a maximum for threshold to avoid big PSNR loss in low bit rate
- // case. Use extreme low threshold for static frames to limit skipping.
- const unsigned int max_thresh = 36000;
- // The encode_breakout input
- const unsigned int min_thresh =
- MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
-
- // Calculate threshold according to dequant value.
- thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
- thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
-
- // Adjust ac threshold according to partition size.
- thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
- b_height_log2_lookup[bsize]);
-
- thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
-
- // Y skipping condition checking for ac and dc.
- if (var <= thresh_ac && (sse - var) <= thresh_dc) {
- unsigned int sse_u, sse_v;
- unsigned int var_u, var_v;
-
- // Skip u v prediction for less calculation, that won't affect
- // result much.
- var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
- x->plane[1].src.stride,
- xd->plane[1].dst.buf,
- xd->plane[1].dst.stride, &sse_u);
-
- // U skipping condition checking
- if ((var_u * 4 <= thresh_ac) && (sse_u - var_u <= thresh_dc)) {
- var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
- x->plane[2].src.stride,
- xd->plane[2].dst.buf,
- xd->plane[2].dst.stride, &sse_v);
-
- // V skipping condition checking
- if ((var_v * 4 <= thresh_ac) && (sse_v - var_v <= thresh_dc)) {
- x->skip = 1;
-
- // The cost of skip bit needs to be added.
- rate = rate_mv;
- rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
- [INTER_OFFSET(this_mode)];
-
- // More on this part of rate
- // rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
-
- // Scaling factor for SSE from spatial domain to frequency
- // domain is 16. Adjust distortion accordingly.
- // TODO(yunqingwang): In this function, only y-plane dist is
- // calculated.
- dist = (sse << 4); // + ((sse_u + sse_v) << 4);
- this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
- // *disable_skip = 1;
- }
- }
+ if (cpi->allow_encode_breakout) {
+ encode_breakout_test(cpi, x, bsize, mi_row, mi_col, ref_frame,
+ this_mode, var_y, sse_y, yv12_mb, &rate, &dist);
+ if (x->skip) {
+ rate += rate_mv;
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
}
}
#if CONFIG_DENOISING
- vp9_denoiser_update_frame_stats();
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ vp9_denoiser_update_frame_stats(&cpi->denoiser, mbmi, sse_y, this_mode);
+ }
#endif
if (this_rd < best_rd || x->skip) {
@@ -449,8 +671,19 @@
*returndistortion = dist;
best_mode = this_mode;
best_pred_filter = mbmi->interp_filter;
+ best_tx_size = mbmi->tx_size;
best_ref_frame = ref_frame;
skip_txfm = x->skip_txfm;
+
+ if (cpi->sf.reuse_inter_pred_sby) {
+ if (best_pred != NULL)
+ free_pred_buffer(best_pred);
+
+ best_pred = this_mode_pred;
+ }
+ } else {
+ if (cpi->sf.reuse_inter_pred_sby)
+ free_pred_buffer(this_mode_pred);
}
if (x->skip)
@@ -458,11 +691,25 @@
}
}
+ // If best prediction is not in dst buf, then copy the prediction block from
+ // temp buf to dst buf.
+ if (cpi->sf.reuse_inter_pred_sby && best_pred->data != orig_dst.buf) {
+ uint8_t *copy_from, *copy_to;
- mbmi->mode = best_mode;
+ pd->dst = orig_dst;
+ copy_to = pd->dst.buf;
+
+ copy_from = best_pred->data;
+
+ vp9_convolve_copy(copy_from, bw, copy_to, pd->dst.stride, NULL, 0, NULL, 0,
+ bw, bh);
+ }
+
+ mbmi->mode = best_mode;
mbmi->interp_filter = best_pred_filter;
- mbmi->ref_frame[0] = best_ref_frame;
- mbmi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int;
+ mbmi->tx_size = best_tx_size;
+ mbmi->ref_frame[0] = best_ref_frame;
+ mbmi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int;
xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
x->skip_txfm = skip_txfm;
@@ -470,22 +717,57 @@
// threshold.
if (!x->skip && best_rd > inter_mode_thresh &&
bsize <= cpi->sf.max_intra_bsize) {
- for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) {
- vp9_predict_intra_block(xd, 0, b_width_log2(bsize),
- mbmi->tx_size, this_mode,
- &p->src.buf[0], p->src.stride,
- &pd->dst.buf[0], pd->dst.stride, 0, 0, 0);
+ int i, j;
+ const int width = num_4x4_blocks_wide_lookup[bsize];
+ const int height = num_4x4_blocks_high_lookup[bsize];
- model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y);
+ int rate2 = 0;
+ int64_t dist2 = 0;
+ const int dst_stride = pd->dst.stride;
+ const int src_stride = p->src.stride;
+ int block_idx = 0;
+
+ TX_SIZE tmp_tx_size = MIN(max_txsize_lookup[bsize],
+ tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
+ const int step = 1 << tmp_tx_size;
+
+ for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) {
+ if (cpi->sf.reuse_inter_pred_sby) {
+ pd->dst.buf = tmp[0].data;
+ pd->dst.stride = bw;
+ }
+
+ for (j = 0; j < height; j += step) {
+ for (i = 0; i < width; i += step) {
+ vp9_predict_intra_block(xd, block_idx, b_width_log2(bsize),
+ tmp_tx_size, this_mode,
+ &p->src.buf[4 * (j * dst_stride + i)],
+ src_stride,
+ &pd->dst.buf[4 * (j * dst_stride + i)],
+ dst_stride, i, j, 0);
+ model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y);
+ rate2 += rate;
+ dist2 += dist;
+ ++block_idx;
+ }
+ }
+
+ rate = rate2;
+ dist = dist2;
+
rate += cpi->mbmode_cost[this_mode];
rate += intra_cost_penalty;
this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
+ if (cpi->sf.reuse_inter_pred_sby)
+ pd->dst = orig_dst;
+
if (this_rd + intra_mode_cost < best_rd) {
best_rd = this_rd;
*returnrate = rate;
*returndistortion = dist;
mbmi->mode = this_mode;
+ mbmi->tx_size = tmp_tx_size;
mbmi->ref_frame[0] = INTRA_FRAME;
mbmi->uv_mode = this_mode;
mbmi->mv[0].as_int = INVALID_MV;
@@ -494,8 +776,11 @@
}
}
}
+
#if CONFIG_DENOISING
- vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col, bsize);
+ if (cpi->oxcf.noise_sensitivity > 0) {
+ vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col, bsize);
+ }
#endif
return INT64_MAX;
diff --git a/source/libvpx/vp9/encoder/vp9_pickmode.h b/source/libvpx/vp9/encoder/vp9_pickmode.h
index a9c948d..3d89974 100644
--- a/source/libvpx/vp9/encoder/vp9_pickmode.h
+++ b/source/libvpx/vp9/encoder/vp9_pickmode.h
@@ -17,6 +17,12 @@
extern "C" {
#endif
+typedef struct {
+ uint8_t *data;
+ int stride;
+ int in_use;
+} PRED_BUFFER;
+
int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
const struct TileInfo *const tile,
int mi_row, int mi_col,
diff --git a/source/libvpx/vp9/encoder/vp9_quantize.c b/source/libvpx/vp9/encoder/vp9_quantize.c
index f817bcc..4964e0f 100644
--- a/source/libvpx/vp9/encoder/vp9_quantize.c
+++ b/source/libvpx/vp9/encoder/vp9_quantize.c
@@ -17,7 +17,7 @@
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_quantize.h"
-#include "vp9/encoder/vp9_rdopt.h"
+#include "vp9/encoder/vp9_rd.h"
void vp9_quantize_dc(const int16_t *coeff_ptr, int skip_block,
const int16_t *round_ptr, const int16_t quant,
@@ -42,9 +42,9 @@
}
void vp9_quantize_dc_32x32(const int16_t *coeff_ptr, int skip_block,
- const int16_t *round_ptr, const int16_t quant,
- int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
- const int16_t dequant_ptr, uint16_t *eob_ptr) {
+ const int16_t *round_ptr, const int16_t quant,
+ int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr) {
int eob = -1;
if (!skip_block) {
@@ -63,6 +63,47 @@
*eob_ptr = eob + 1;
}
+void vp9_quantize_fp_c(const int16_t *coeff_ptr, intptr_t count,
+ int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr,
+ const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
+ int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr,
+ int zbin_oq_value, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ int i, eob = -1;
+ // TODO(jingning) Decide the need of these arguments after the
+ // quantization process is completed.
+ (void)zbin_ptr;
+ (void)quant_shift_ptr;
+ (void)zbin_oq_value;
+ (void)iscan;
+
+ vpx_memset(qcoeff_ptr, 0, count * sizeof(int16_t));
+ vpx_memset(dqcoeff_ptr, 0, count * sizeof(int16_t));
+
+ if (!skip_block) {
+ // Quantization pass: All coefficients with index >= zero_flag are
+ // skippable. Note: zero_flag can be zero.
+ for (i = 0; i < count; i++) {
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+ tmp = (tmp * quant_ptr[rc != 0]) >> 16;
+
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
+
+ if (tmp)
+ eob = i;
+ }
+ }
+ *eob_ptr = eob + 1;
+}
+
void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t count,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
@@ -207,11 +248,16 @@
const int qrounding_factor = q == 0 ? 64 : 48;
for (i = 0; i < 2; ++i) {
+ int qrounding_factor_fp = i == 0 ? 48 : 42;
+ if (q == 0)
+ qrounding_factor_fp = 64;
+
// y
quant = i == 0 ? vp9_dc_quant(q, cm->y_dc_delta_q)
: vp9_ac_quant(q, 0);
invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i], quant);
quants->y_quant_fp[q][i] = (1 << 16) / quant;
+ quants->y_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7;
quants->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
quants->y_round[q][i] = (qrounding_factor * quant) >> 7;
cm->y_dequant[q][i] = quant;
@@ -222,6 +268,7 @@
invert_quant(&quants->uv_quant[q][i],
&quants->uv_quant_shift[q][i], quant);
quants->uv_quant_fp[q][i] = (1 << 16) / quant;
+ quants->uv_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7;
quants->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
quants->uv_round[q][i] = (qrounding_factor * quant) >> 7;
cm->uv_dequant[q][i] = quant;
@@ -240,6 +287,7 @@
for (i = 2; i < 8; i++) {
quants->y_quant[q][i] = quants->y_quant[q][1];
quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1];
+ quants->y_round_fp[q][i] = quants->y_round_fp[q][1];
quants->y_quant_shift[q][i] = quants->y_quant_shift[q][1];
quants->y_zbin[q][i] = quants->y_zbin[q][1];
quants->y_round[q][i] = quants->y_round[q][1];
@@ -247,6 +295,7 @@
quants->uv_quant[q][i] = quants->uv_quant[q][1];
quants->uv_quant_fp[q][i] = quants->uv_quant_fp[q][1];
+ quants->uv_round_fp[q][i] = quants->uv_round_fp[q][1];
quants->uv_quant_shift[q][i] = quants->uv_quant_shift[q][1];
quants->uv_zbin[q][i] = quants->uv_zbin[q][1];
quants->uv_round[q][i] = quants->uv_round[q][1];
@@ -276,6 +325,7 @@
// Y
x->plane[0].quant = quants->y_quant[qindex];
x->plane[0].quant_fp = quants->y_quant_fp[qindex];
+ x->plane[0].round_fp = quants->y_round_fp[qindex];
x->plane[0].quant_shift = quants->y_quant_shift[qindex];
x->plane[0].zbin = quants->y_zbin[qindex];
x->plane[0].round = quants->y_round[qindex];
@@ -286,6 +336,7 @@
for (i = 1; i < 3; i++) {
x->plane[i].quant = quants->uv_quant[qindex];
x->plane[i].quant_fp = quants->uv_quant_fp[qindex];
+ x->plane[i].round_fp = quants->uv_round_fp[qindex];
x->plane[i].quant_shift = quants->uv_quant_shift[qindex];
x->plane[i].zbin = quants->uv_zbin[qindex];
x->plane[i].round = quants->uv_round[qindex];
diff --git a/source/libvpx/vp9/encoder/vp9_quantize.h b/source/libvpx/vp9/encoder/vp9_quantize.h
index 0e90462..24e4491 100644
--- a/source/libvpx/vp9/encoder/vp9_quantize.h
+++ b/source/libvpx/vp9/encoder/vp9_quantize.h
@@ -28,6 +28,8 @@
// if we want to deprecate the current use of y_quant.
DECLARE_ALIGNED(16, int16_t, y_quant_fp[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, uv_quant_fp[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(16, int16_t, y_round_fp[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(16, int16_t, uv_round_fp[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, uv_quant[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, uv_quant_shift[QINDEX_RANGE][8]);
diff --git a/source/libvpx/vp9/encoder/vp9_ratectrl.c b/source/libvpx/vp9/encoder/vp9_ratectrl.c
index f775003..e110983 100644
--- a/source/libvpx/vp9/encoder/vp9_ratectrl.c
+++ b/source/libvpx/vp9/encoder/vp9_ratectrl.c
@@ -186,6 +186,8 @@
}
void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
+ int i;
+
if (pass == 0 && oxcf->rc_mode == VPX_CBR) {
rc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q;
rc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
@@ -227,9 +229,9 @@
rc->tot_q = 0.0;
rc->avg_q = vp9_convert_qindex_to_q(oxcf->worst_allowed_q);
- rc->rate_correction_factor = 1.0;
- rc->key_frame_rate_correction_factor = 1.0;
- rc->gf_rate_correction_factor = 1.0;
+ for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
+ rc->rate_correction_factors[i] = 1.0;
+ }
}
int vp9_rc_drop_frame(VP9_COMP *cpi) {
@@ -271,28 +273,40 @@
}
static double get_rate_correction_factor(const VP9_COMP *cpi) {
+ const RATE_CONTROL *const rc = &cpi->rc;
+
if (cpi->common.frame_type == KEY_FRAME) {
- return cpi->rc.key_frame_rate_correction_factor;
+ return rc->rate_correction_factors[KF_STD];
+ } else if (cpi->pass == 2) {
+ RATE_FACTOR_LEVEL rf_lvl =
+ cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index];
+ return rc->rate_correction_factors[rf_lvl];
} else {
if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
- !cpi->rc.is_src_frame_alt_ref &&
+ !rc->is_src_frame_alt_ref &&
!(cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR))
- return cpi->rc.gf_rate_correction_factor;
+ return rc->rate_correction_factors[GF_ARF_STD];
else
- return cpi->rc.rate_correction_factor;
+ return rc->rate_correction_factors[INTER_NORMAL];
}
}
static void set_rate_correction_factor(VP9_COMP *cpi, double factor) {
+ RATE_CONTROL *const rc = &cpi->rc;
+
if (cpi->common.frame_type == KEY_FRAME) {
- cpi->rc.key_frame_rate_correction_factor = factor;
+ rc->rate_correction_factors[KF_STD] = factor;
+ } else if (cpi->pass == 2) {
+ RATE_FACTOR_LEVEL rf_lvl =
+ cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index];
+ rc->rate_correction_factors[rf_lvl] = factor;
} else {
if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
- !cpi->rc.is_src_frame_alt_ref &&
+ !rc->is_src_frame_alt_ref &&
!(cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR))
- cpi->rc.gf_rate_correction_factor = factor;
+ rc->rate_correction_factors[GF_ARF_STD] = factor;
else
- cpi->rc.rate_correction_factor = factor;
+ rc->rate_correction_factors[INTER_NORMAL] = factor;
}
}
@@ -628,8 +642,8 @@
if (frame_is_intra_only(cm)) {
active_best_quality = rc->best_quality;
-#if !CONFIG_MULTIPLE_ARF
- // Handle the special case for key frames forced when we have75 reached
+
+ // Handle the special case for key frames forced when we have reached
// the maximum key frame interval. Here force the Q to a range
// based on the ambient Q to reduce the risk of popping.
if (rc->this_key_frame_forced) {
@@ -660,13 +674,6 @@
active_best_quality += vp9_compute_qdelta(rc, q_val,
q_val * q_adj_factor);
}
-#else
- double current_q;
- // Force the KF quantizer to be 30% of the active_worst_quality.
- current_q = vp9_convert_qindex_to_q(active_worst_quality);
- active_best_quality = active_worst_quality
- + vp9_compute_qdelta(rc, current_q, current_q * 0.3);
-#endif
} else if (!rc->is_src_frame_alt_ref &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
// Use the lower of active_worst_quality and recent
@@ -768,23 +775,7 @@
q = *top_index;
}
}
-#if CONFIG_MULTIPLE_ARF
- // Force the quantizer determined by the coding order pattern.
- if (cpi->multi_arf_enabled && (cm->frame_type != KEY_FRAME) &&
- cpi->oxcf.rc_mode != VPX_Q) {
- double new_q;
- double current_q = vp9_convert_qindex_to_q(active_worst_quality);
- int level = cpi->this_frame_weight;
- assert(level >= 0);
- new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level)));
- q = active_worst_quality +
- vp9_compute_qdelta(rc, current_q, new_q);
- *bottom_index = q;
- *top_index = q;
- printf("frame:%d q:%d\n", cm->current_video_frame, q);
- }
-#endif
assert(*top_index <= rc->worst_quality &&
*top_index >= rc->best_quality);
assert(*bottom_index <= rc->worst_quality &&
@@ -805,7 +796,6 @@
int q;
if (frame_is_intra_only(cm) || vp9_is_upper_layer_key_frame(cpi)) {
-#if !CONFIG_MULTIPLE_ARF
// Handle the special case for key frames forced when we have75 reached
// the maximum key frame interval. Here force the Q to a range
// based on the ambient Q to reduce the risk of popping.
@@ -840,13 +830,6 @@
active_best_quality += vp9_compute_qdelta(rc, q_val,
q_val * q_adj_factor);
}
-#else
- double current_q;
- // Force the KF quantizer to be 30% of the active_worst_quality.
- current_q = vp9_convert_qindex_to_q(active_worst_quality);
- active_best_quality = active_worst_quality
- + vp9_compute_qdelta(rc, current_q, current_q * 0.3);
-#endif
} else if (!rc->is_src_frame_alt_ref &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
// Use the lower of active_worst_quality and recent
@@ -909,21 +892,20 @@
*bottom_index = active_best_quality;
#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
+ vp9_clear_system_state();
{
- int qdelta = 0;
- vp9_clear_system_state();
-
- // Limit Q range for the adaptive loop.
- if ((cm->frame_type == KEY_FRAME || vp9_is_upper_layer_key_frame(cpi)) &&
- !rc->this_key_frame_forced) {
- qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
- active_worst_quality, 2.0);
- } else if (!rc->is_src_frame_alt_ref &&
- (oxcf->rc_mode != VPX_CBR) &&
- (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
- qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
- active_worst_quality, 1.75);
- }
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ const double rate_factor_deltas[RATE_FACTOR_LEVELS] = {
+ 1.00, // INTER_NORMAL
+ 1.00, // INTER_HIGH
+ 1.50, // GF_ARF_LOW
+ 1.75, // GF_ARF_STD
+ 2.00, // KF_STD
+ };
+ const double rate_factor =
+ rate_factor_deltas[gf_group->rf_level[gf_group->index]];
+ int qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
+ active_worst_quality, rate_factor);
*top_index = active_worst_quality + qdelta;
*top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index;
}
@@ -945,23 +927,7 @@
q = *top_index;
}
}
-#if CONFIG_MULTIPLE_ARF
- // Force the quantizer determined by the coding order pattern.
- if (cpi->multi_arf_enabled && (cm->frame_type != KEY_FRAME) &&
- cpi->oxcf.rc_mode != VPX_Q) {
- double new_q;
- double current_q = vp9_convert_qindex_to_q(active_worst_quality);
- int level = cpi->this_frame_weight;
- assert(level >= 0);
- new_q = current_q * (1.0 - (0.2 * (cpi->max_arf_level - level)));
- q = active_worst_quality +
- vp9_compute_qdelta(rc, current_q, new_q);
- *bottom_index = q;
- *top_index = q;
- printf("frame:%d q:%d\n", cm->current_video_frame, q);
- }
-#endif
assert(*top_index <= rc->worst_quality &&
*top_index >= rc->best_quality);
assert(*bottom_index <= rc->worst_quality &&
@@ -1026,11 +992,8 @@
RATE_CONTROL *const rc = &cpi->rc;
rc->frames_since_golden = 0;
-#if CONFIG_MULTIPLE_ARF
- if (!cpi->multi_arf_enabled)
-#endif
- // Clear the alternate reference update pending flag.
- rc->source_alt_ref_pending = 0;
+ // Mark the alt ref as done (setting to 0 means no further alt refs pending).
+ rc->source_alt_ref_pending = 0;
// Set the alternate reference frame active flag
rc->source_alt_ref_active = 1;
@@ -1044,8 +1007,13 @@
// this frame refreshes means next frames don't unless specified by user
rc->frames_since_golden = 0;
- if (!rc->source_alt_ref_pending)
+ if (cpi->pass == 2) {
+ if (!rc->source_alt_ref_pending &&
+ cpi->twopass.gf_group.rf_level[0] == GF_ARF_STD)
rc->source_alt_ref_active = 0;
+ } else if (!rc->source_alt_ref_pending) {
+ rc->source_alt_ref_active = 0;
+ }
// Decrement count down till next gf
if (rc->frames_till_gf_update_due > 0)
@@ -1388,6 +1356,8 @@
// Extended interval for genuinely static scenes
rc->static_scene_max_gf_interval = oxcf->key_freq >> 1;
+ if (rc->static_scene_max_gf_interval > (MAX_LAG_BUFFERS * 2))
+ rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
if (is_altref_enabled(oxcf)) {
if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
diff --git a/source/libvpx/vp9/encoder/vp9_ratectrl.h b/source/libvpx/vp9/encoder/vp9_ratectrl.h
index f1a4a3f..a15235c 100644
--- a/source/libvpx/vp9/encoder/vp9_ratectrl.h
+++ b/source/libvpx/vp9/encoder/vp9_ratectrl.h
@@ -23,6 +23,15 @@
// Bits Per MB at different Q (Multiplied by 512)
#define BPER_MB_NORMBITS 9
+typedef enum {
+ INTER_NORMAL = 0,
+ INTER_HIGH = 1,
+ GF_ARF_LOW = 2,
+ GF_ARF_STD = 3,
+ KF_STD = 4,
+ RATE_FACTOR_LEVELS = 5
+} RATE_FACTOR_LEVEL;
+
typedef struct {
// Rate targetting variables
int base_frame_target; // A baseline frame target before adjustment
@@ -37,9 +46,7 @@
int last_boost;
int kf_boost;
- double rate_correction_factor;
- double key_frame_rate_correction_factor;
- double gf_rate_correction_factor;
+ double rate_correction_factors[RATE_FACTOR_LEVELS];
int frames_since_golden;
int frames_till_gf_update_due;
diff --git a/source/libvpx/vp9/encoder/vp9_rd.c b/source/libvpx/vp9/encoder/vp9_rd.c
new file mode 100644
index 0000000..27e81b6
--- /dev/null
+++ b/source/libvpx/vp9/encoder/vp9_rd.c
@@ -0,0 +1,580 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+
+#include "./vp9_rtcd.h"
+
+#include "vpx_mem/vpx_mem.h"
+
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_entropymode.h"
+#include "vp9/common/vp9_mvref_common.h"
+#include "vp9/common/vp9_pred_common.h"
+#include "vp9/common/vp9_quant_common.h"
+#include "vp9/common/vp9_reconinter.h"
+#include "vp9/common/vp9_reconintra.h"
+#include "vp9/common/vp9_seg_common.h"
+#include "vp9/common/vp9_systemdependent.h"
+
+#include "vp9/encoder/vp9_cost.h"
+#include "vp9/encoder/vp9_encodemb.h"
+#include "vp9/encoder/vp9_encodemv.h"
+#include "vp9/encoder/vp9_encoder.h"
+#include "vp9/encoder/vp9_mcomp.h"
+#include "vp9/encoder/vp9_quantize.h"
+#include "vp9/encoder/vp9_ratectrl.h"
+#include "vp9/encoder/vp9_rd.h"
+#include "vp9/encoder/vp9_tokenize.h"
+#include "vp9/encoder/vp9_variance.h"
+
+#define RD_THRESH_POW 1.25
+#define RD_MULT_EPB_RATIO 64
+
+// Factor to weigh the rate for switchable interp filters.
+#define SWITCHABLE_INTERP_RATE_FACTOR 1
+
+// The baseline rd thresholds for breaking out of the rd loop for
+// certain modes are assumed to be based on 8x8 blocks.
+// This table is used to correct for block size.
+// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
+static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
+ 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
+};
+
+static void fill_mode_costs(VP9_COMP *cpi) {
+ const FRAME_CONTEXT *const fc = &cpi->common.fc;
+ int i, j;
+
+ for (i = 0; i < INTRA_MODES; ++i)
+ for (j = 0; j < INTRA_MODES; ++j)
+ vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
+ vp9_intra_mode_tree);
+
+ vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
+ vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
+ vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
+ vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
+ fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
+
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
+ vp9_cost_tokens(cpi->switchable_interp_costs[i],
+ fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
+}
+
+static void fill_token_costs(vp9_coeff_cost *c,
+ vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
+ int i, j, k, l;
+ TX_SIZE t;
+ for (t = TX_4X4; t <= TX_32X32; ++t)
+ for (i = 0; i < PLANE_TYPES; ++i)
+ for (j = 0; j < REF_TYPES; ++j)
+ for (k = 0; k < COEF_BANDS; ++k)
+ for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
+ vp9_prob probs[ENTROPY_NODES];
+ vp9_model_to_full_probs(p[t][i][j][k][l], probs);
+ vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
+ vp9_coef_tree);
+ vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
+ vp9_coef_tree);
+ assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
+ c[t][i][j][k][1][l][EOB_TOKEN]);
+ }
+}
+
+static const uint8_t rd_iifactor[32] = {
+ 4, 4, 3, 2, 1, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+// Values are now correlated to quantizer.
+static int sad_per_bit16lut[QINDEX_RANGE];
+static int sad_per_bit4lut[QINDEX_RANGE];
+
+void vp9_init_me_luts() {
+ int i;
+
+ // Initialize the sad lut tables using a formulaic calculation for now.
+ // This is to make it easier to resolve the impact of experimental changes
+ // to the quantizer tables.
+ for (i = 0; i < QINDEX_RANGE; ++i) {
+ const double q = vp9_convert_qindex_to_q(i);
+ sad_per_bit16lut[i] = (int)(0.0418 * q + 2.4107);
+ sad_per_bit4lut[i] = (int)(0.063 * q + 2.742);
+ }
+}
+
+int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
+ const int q = vp9_dc_quant(qindex, 0);
+ // TODO(debargha): Adjust the function below.
+ int rdmult = 88 * q * q / 25;
+ if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
+ if (cpi->twopass.next_iiratio > 31)
+ rdmult += (rdmult * rd_iifactor[31]) >> 4;
+ else
+ rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
+ }
+ return rdmult;
+}
+
+static int compute_rd_thresh_factor(int qindex) {
+ // TODO(debargha): Adjust the function below.
+ const int q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
+ return MAX(q, 8);
+}
+
+void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
+ cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
+ cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
+}
+
+static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
+ int i, bsize, segment_id;
+
+ for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
+ const int qindex =
+ clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
+ cm->y_dc_delta_q,
+ 0, MAXQ);
+ const int q = compute_rd_thresh_factor(qindex);
+
+ for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
+ // Threshold here seems unnecessarily harsh but fine given actual
+ // range of values used for cpi->sf.thresh_mult[].
+ const int t = q * rd_thresh_block_size_factor[bsize];
+ const int thresh_max = INT_MAX / t;
+
+ if (bsize >= BLOCK_8X8) {
+ for (i = 0; i < MAX_MODES; ++i)
+ rd->threshes[segment_id][bsize][i] =
+ rd->thresh_mult[i] < thresh_max
+ ? rd->thresh_mult[i] * t / 4
+ : INT_MAX;
+ } else {
+ for (i = 0; i < MAX_REFS; ++i)
+ rd->threshes[segment_id][bsize][i] =
+ rd->thresh_mult_sub8x8[i] < thresh_max
+ ? rd->thresh_mult_sub8x8[i] * t / 4
+ : INT_MAX;
+ }
+ }
+ }
+}
+
+void vp9_initialize_rd_consts(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &cpi->mb;
+ RD_OPT *const rd = &cpi->rd;
+ int i;
+
+ vp9_clear_system_state();
+
+ rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
+ rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
+
+ x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
+ x->errorperbit += (x->errorperbit == 0);
+
+ x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
+ cm->frame_type != KEY_FRAME) ? 0 : 1;
+
+ set_block_thresholds(cm, rd);
+
+ if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
+ fill_token_costs(x->token_costs, cm->fc.coef_probs);
+
+ for (i = 0; i < PARTITION_CONTEXTS; ++i)
+ vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(cm, i),
+ vp9_partition_tree);
+ }
+
+ if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
+ cm->frame_type == KEY_FRAME) {
+ fill_mode_costs(cpi);
+
+ if (!frame_is_intra_only(cm)) {
+ vp9_build_nmv_cost_table(x->nmvjointcost,
+ cm->allow_high_precision_mv ? x->nmvcost_hp
+ : x->nmvcost,
+ &cm->fc.nmvc, cm->allow_high_precision_mv);
+
+ for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
+ vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
+ cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
+ }
+ }
+}
+
+static const int MAX_XSQ_Q10 = 245727;
+
+static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
+ // NOTE: The tables below must be of the same size.
+
+ // The functions described below are sampled at the four most significant
+ // bits of x^2 + 8 / 256.
+
+ // Normalized rate:
+ // This table models the rate for a Laplacian source with given variance
+ // when quantized with a uniform quantizer with given stepsize. The
+ // closed form expression is:
+ // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
+ // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
+ // and H(x) is the binary entropy function.
+ static const int rate_tab_q10[] = {
+ 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651,
+ 4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811,
+ 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
+ 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638,
+ 2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130,
+ 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
+ 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199,
+ 1159, 1086, 1021, 963, 911, 864, 821, 781,
+ 745, 680, 623, 574, 530, 490, 455, 424,
+ 395, 345, 304, 269, 239, 213, 190, 171,
+ 154, 126, 104, 87, 73, 61, 52, 44,
+ 38, 28, 21, 16, 12, 10, 8, 6,
+ 5, 3, 2, 1, 1, 1, 0, 0,
+ };
+ // Normalized distortion:
+ // This table models the normalized distortion for a Laplacian source
+ // with given variance when quantized with a uniform quantizer
+ // with given stepsize. The closed form expression is:
+ // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
+ // where x = qpstep / sqrt(variance).
+ // Note the actual distortion is Dn * variance.
+ static const int dist_tab_q10[] = {
+ 0, 0, 1, 1, 1, 2, 2, 2,
+ 3, 3, 4, 5, 5, 6, 7, 7,
+ 8, 9, 11, 12, 13, 15, 16, 17,
+ 18, 21, 24, 26, 29, 31, 34, 36,
+ 39, 44, 49, 54, 59, 64, 69, 73,
+ 78, 88, 97, 106, 115, 124, 133, 142,
+ 151, 167, 184, 200, 215, 231, 245, 260,
+ 274, 301, 327, 351, 375, 397, 418, 439,
+ 458, 495, 528, 559, 587, 613, 637, 659,
+ 680, 717, 749, 777, 801, 823, 842, 859,
+ 874, 899, 919, 936, 949, 960, 969, 977,
+ 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
+ 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
+ };
+ static const int xsq_iq_q10[] = {
+ 0, 4, 8, 12, 16, 20, 24, 28,
+ 32, 40, 48, 56, 64, 72, 80, 88,
+ 96, 112, 128, 144, 160, 176, 192, 208,
+ 224, 256, 288, 320, 352, 384, 416, 448,
+ 480, 544, 608, 672, 736, 800, 864, 928,
+ 992, 1120, 1248, 1376, 1504, 1632, 1760, 1888,
+ 2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808,
+ 4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648,
+ 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
+ 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688,
+ 32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408,
+ 65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848,
+ 131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
+ };
+ const int tmp = (xsq_q10 >> 2) + 8;
+ const int k = get_msb(tmp) - 3;
+ const int xq = (k << 3) + ((tmp >> k) & 0x7);
+ const int one_q10 = 1 << 10;
+ const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
+ const int b_q10 = one_q10 - a_q10;
+ *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
+ *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
+}
+
+void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
+ unsigned int qstep, int *rate,
+ int64_t *dist) {
+ // This function models the rate and distortion for a Laplacian
+ // source with given variance when quantized with a uniform quantizer
+ // with given stepsize. The closed form expressions are in:
+ // Hang and Chen, "Source Model for transform video coder and its
+ // application - Part I: Fundamental Theory", IEEE Trans. Circ.
+ // Sys. for Video Tech., April 1997.
+ if (var == 0) {
+ *rate = 0;
+ *dist = 0;
+ } else {
+ int d_q10, r_q10;
+ const uint64_t xsq_q10_64 =
+ ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var;
+ const int xsq_q10 = xsq_q10_64 > MAX_XSQ_Q10 ?
+ MAX_XSQ_Q10 : (int)xsq_q10_64;
+ model_rd_norm(xsq_q10, &r_q10, &d_q10);
+ *rate = (n * r_q10 + 2) >> 2;
+ *dist = (var * (int64_t)d_q10 + 512) >> 10;
+ }
+}
+
+void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
+ const struct macroblockd_plane *pd,
+ ENTROPY_CONTEXT t_above[16],
+ ENTROPY_CONTEXT t_left[16]) {
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+ const ENTROPY_CONTEXT *const above = pd->above_context;
+ const ENTROPY_CONTEXT *const left = pd->left_context;
+
+ int i;
+ switch (tx_size) {
+ case TX_4X4:
+ vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
+ vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
+ break;
+ case TX_8X8:
+ for (i = 0; i < num_4x4_w; i += 2)
+ t_above[i] = !!*(const uint16_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 2)
+ t_left[i] = !!*(const uint16_t *)&left[i];
+ break;
+ case TX_16X16:
+ for (i = 0; i < num_4x4_w; i += 4)
+ t_above[i] = !!*(const uint32_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 4)
+ t_left[i] = !!*(const uint32_t *)&left[i];
+ break;
+ case TX_32X32:
+ for (i = 0; i < num_4x4_w; i += 8)
+ t_above[i] = !!*(const uint64_t *)&above[i];
+ for (i = 0; i < num_4x4_h; i += 8)
+ t_left[i] = !!*(const uint64_t *)&left[i];
+ break;
+ default:
+ assert(0 && "Invalid transform size.");
+ }
+}
+
+void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
+ uint8_t *ref_y_buffer, int ref_y_stride,
+ int ref_frame, BLOCK_SIZE block_size) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ int_mv this_mv;
+ int i;
+ int zero_seen = 0;
+ int best_index = 0;
+ int best_sad = INT_MAX;
+ int this_sad = INT_MAX;
+ int max_mv = 0;
+
+ uint8_t *src_y_ptr = x->plane[0].src.buf;
+ uint8_t *ref_y_ptr;
+ int row_offset, col_offset;
+ int num_mv_refs = MAX_MV_REF_CANDIDATES +
+ (cpi->sf.adaptive_motion_search &&
+ cpi->common.show_frame &&
+ block_size < cpi->sf.max_partition_size);
+
+ MV pred_mv[3];
+ pred_mv[0] = mbmi->ref_mvs[ref_frame][0].as_mv;
+ pred_mv[1] = mbmi->ref_mvs[ref_frame][1].as_mv;
+ pred_mv[2] = x->pred_mv[ref_frame];
+
+ // Get the sad for each candidate reference mv.
+ for (i = 0; i < num_mv_refs; ++i) {
+ this_mv.as_mv = pred_mv[i];
+
+ max_mv = MAX(max_mv,
+ MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
+ // Only need to check zero mv once.
+ if (!this_mv.as_int && zero_seen)
+ continue;
+
+ zero_seen = zero_seen || !this_mv.as_int;
+
+ row_offset = this_mv.as_mv.row >> 3;
+ col_offset = this_mv.as_mv.col >> 3;
+ ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
+
+ // Find sad for current vector.
+ this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
+ ref_y_ptr, ref_y_stride);
+
+ // Note if it is the best so far.
+ if (this_sad < best_sad) {
+ best_sad = this_sad;
+ best_index = i;
+ }
+ }
+
+ // Note the index of the mv that worked best in the reference list.
+ x->mv_best_ref_index[ref_frame] = best_index;
+ x->max_mv_context[ref_frame] = max_mv;
+ x->pred_mv_sad[ref_frame] = best_sad;
+}
+
+void vp9_setup_pred_block(const MACROBLOCKD *xd,
+ struct buf_2d dst[MAX_MB_PLANE],
+ const YV12_BUFFER_CONFIG *src,
+ int mi_row, int mi_col,
+ const struct scale_factors *scale,
+ const struct scale_factors *scale_uv) {
+ int i;
+
+ dst[0].buf = src->y_buffer;
+ dst[0].stride = src->y_stride;
+ dst[1].buf = src->u_buffer;
+ dst[2].buf = src->v_buffer;
+ dst[1].stride = dst[2].stride = src->uv_stride;
+#if CONFIG_ALPHA
+ dst[3].buf = src->alpha_buffer;
+ dst[3].stride = src->alpha_stride;
+#endif
+
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
+ i ? scale_uv : scale,
+ xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
+ }
+}
+
+const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
+ int ref_frame) {
+ const VP9_COMMON *const cm = &cpi->common;
+ const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
+ const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
+ return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL;
+}
+
+int vp9_get_switchable_rate(const VP9_COMP *cpi) {
+ const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+ const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const int ctx = vp9_get_pred_context_switchable_interp(xd);
+ return SWITCHABLE_INTERP_RATE_FACTOR *
+ cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
+}
+
+void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
+ int i;
+ RD_OPT *const rd = &cpi->rd;
+ SPEED_FEATURES *const sf = &cpi->sf;
+
+ // Set baseline threshold values.
+ for (i = 0; i < MAX_MODES; ++i)
+ rd->thresh_mult[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0;
+
+ rd->thresh_mult[THR_NEARESTMV] = 0;
+ rd->thresh_mult[THR_NEARESTG] = 0;
+ rd->thresh_mult[THR_NEARESTA] = 0;
+
+ rd->thresh_mult[THR_DC] += 1000;
+
+ rd->thresh_mult[THR_NEWMV] += 1000;
+ rd->thresh_mult[THR_NEWA] += 1000;
+ rd->thresh_mult[THR_NEWG] += 1000;
+
+ // Adjust threshold only in real time mode, which only uses last
+ // reference frame.
+ rd->thresh_mult[THR_NEWMV] += sf->elevate_newmv_thresh;
+
+ rd->thresh_mult[THR_NEARMV] += 1000;
+ rd->thresh_mult[THR_NEARA] += 1000;
+ rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
+ rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
+
+ rd->thresh_mult[THR_TM] += 1000;
+
+ rd->thresh_mult[THR_COMP_NEARLA] += 1500;
+ rd->thresh_mult[THR_COMP_NEWLA] += 2000;
+ rd->thresh_mult[THR_NEARG] += 1000;
+ rd->thresh_mult[THR_COMP_NEARGA] += 1500;
+ rd->thresh_mult[THR_COMP_NEWGA] += 2000;
+
+ rd->thresh_mult[THR_ZEROMV] += 2000;
+ rd->thresh_mult[THR_ZEROG] += 2000;
+ rd->thresh_mult[THR_ZEROA] += 2000;
+ rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
+ rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
+
+ rd->thresh_mult[THR_H_PRED] += 2000;
+ rd->thresh_mult[THR_V_PRED] += 2000;
+ rd->thresh_mult[THR_D45_PRED ] += 2500;
+ rd->thresh_mult[THR_D135_PRED] += 2500;
+ rd->thresh_mult[THR_D117_PRED] += 2500;
+ rd->thresh_mult[THR_D153_PRED] += 2500;
+ rd->thresh_mult[THR_D207_PRED] += 2500;
+ rd->thresh_mult[THR_D63_PRED] += 2500;
+
+ // Disable frame modes if flags not set.
+ if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
+ rd->thresh_mult[THR_NEWMV ] = INT_MAX;
+ rd->thresh_mult[THR_NEARESTMV] = INT_MAX;
+ rd->thresh_mult[THR_ZEROMV ] = INT_MAX;
+ rd->thresh_mult[THR_NEARMV ] = INT_MAX;
+ }
+ if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
+ rd->thresh_mult[THR_NEARESTG ] = INT_MAX;
+ rd->thresh_mult[THR_ZEROG ] = INT_MAX;
+ rd->thresh_mult[THR_NEARG ] = INT_MAX;
+ rd->thresh_mult[THR_NEWG ] = INT_MAX;
+ }
+ if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
+ rd->thresh_mult[THR_NEARESTA ] = INT_MAX;
+ rd->thresh_mult[THR_ZEROA ] = INT_MAX;
+ rd->thresh_mult[THR_NEARA ] = INT_MAX;
+ rd->thresh_mult[THR_NEWA ] = INT_MAX;
+ }
+
+ if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
+ (VP9_LAST_FLAG | VP9_ALT_FLAG)) {
+ rd->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX;
+ rd->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
+ rd->thresh_mult[THR_COMP_NEARLA ] = INT_MAX;
+ rd->thresh_mult[THR_COMP_NEWLA ] = INT_MAX;
+ }
+ if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
+ (VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
+ rd->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX;
+ rd->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
+ rd->thresh_mult[THR_COMP_NEARGA ] = INT_MAX;
+ rd->thresh_mult[THR_COMP_NEWGA ] = INT_MAX;
+ }
+}
+
+void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
+ const SPEED_FEATURES *const sf = &cpi->sf;
+ RD_OPT *const rd = &cpi->rd;
+ int i;
+
+ for (i = 0; i < MAX_REFS; ++i)
+ rd->thresh_mult_sub8x8[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0;
+
+ rd->thresh_mult_sub8x8[THR_LAST] += 2500;
+ rd->thresh_mult_sub8x8[THR_GOLD] += 2500;
+ rd->thresh_mult_sub8x8[THR_ALTR] += 2500;
+ rd->thresh_mult_sub8x8[THR_INTRA] += 2500;
+ rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500;
+ rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500;
+
+ // Check for masked out split cases.
+ for (i = 0; i < MAX_REFS; ++i)
+ if (sf->disable_split_mask & (1 << i))
+ rd->thresh_mult_sub8x8[i] = INT_MAX;
+
+ // Disable mode test if frame flag is not set.
+ if (!(cpi->ref_frame_flags & VP9_LAST_FLAG))
+ rd->thresh_mult_sub8x8[THR_LAST] = INT_MAX;
+ if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG))
+ rd->thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
+ if (!(cpi->ref_frame_flags & VP9_ALT_FLAG))
+ rd->thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
+ if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
+ (VP9_LAST_FLAG | VP9_ALT_FLAG))
+ rd->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
+ if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
+ (VP9_GOLD_FLAG | VP9_ALT_FLAG))
+ rd->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;
+}
diff --git a/source/libvpx/vp9/encoder/vp9_rd.h b/source/libvpx/vp9/encoder/vp9_rd.h
new file mode 100644
index 0000000..eeb5e0f
--- /dev/null
+++ b/source/libvpx/vp9/encoder/vp9_rd.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_VP9_RD_H_
+#define VP9_ENCODER_VP9_RD_H_
+
+#include <limits.h>
+
+#include "vp9/common/vp9_blockd.h"
+
+#include "vp9/encoder/vp9_block.h"
+#include "vp9/encoder/vp9_context_tree.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define RDDIV_BITS 7
+
+#define RDCOST(RM, DM, R, D) \
+ (((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM))
+#define QIDX_SKIP_THRESH 115
+
+#define MV_COST_WEIGHT 108
+#define MV_COST_WEIGHT_SUB 120
+
+#define INVALID_MV 0x80008000
+
+#define MAX_MODES 30
+#define MAX_REFS 6
+
+// This enumerator type needs to be kept aligned with the mode order in
+// const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code.
+typedef enum {
+ THR_NEARESTMV,
+ THR_NEARESTA,
+ THR_NEARESTG,
+
+ THR_DC,
+
+ THR_NEWMV,
+ THR_NEWA,
+ THR_NEWG,
+
+ THR_NEARMV,
+ THR_NEARA,
+ THR_COMP_NEARESTLA,
+ THR_COMP_NEARESTGA,
+
+ THR_TM,
+
+ THR_COMP_NEARLA,
+ THR_COMP_NEWLA,
+ THR_NEARG,
+ THR_COMP_NEARGA,
+ THR_COMP_NEWGA,
+
+ THR_ZEROMV,
+ THR_ZEROG,
+ THR_ZEROA,
+ THR_COMP_ZEROLA,
+ THR_COMP_ZEROGA,
+
+ THR_H_PRED,
+ THR_V_PRED,
+ THR_D135_PRED,
+ THR_D207_PRED,
+ THR_D153_PRED,
+ THR_D63_PRED,
+ THR_D117_PRED,
+ THR_D45_PRED,
+} THR_MODES;
+
+typedef enum {
+ THR_LAST,
+ THR_GOLD,
+ THR_ALTR,
+ THR_COMP_LA,
+ THR_COMP_GA,
+ THR_INTRA,
+} THR_MODES_SUB8X8;
+
+typedef struct RD_OPT {
+ // Thresh_mult is used to set a threshold for the rd score. A higher value
+ // means that we will accept the best mode so far more often. This number
+ // is used in combination with the current block size, and thresh_freq_fact
+ // to pick a threshold.
+ int thresh_mult[MAX_MODES];
+ int thresh_mult_sub8x8[MAX_REFS];
+
+ int threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES];
+ int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
+
+ int64_t comp_pred_diff[REFERENCE_MODES];
+ int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES];
+ int64_t tx_select_diff[TX_MODES];
+ // TODO(agrange): can this overflow?
+ int tx_select_threshes[MAX_REF_FRAMES][TX_MODES];
+
+ int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS];
+ int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
+ int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
+ int64_t mask_filter;
+
+ int RDMULT;
+ int RDDIV;
+} RD_OPT;
+
+struct TileInfo;
+struct VP9_COMP;
+struct macroblock;
+
+int vp9_compute_rd_mult(const struct VP9_COMP *cpi, int qindex);
+
+void vp9_initialize_rd_consts(struct VP9_COMP *cpi);
+
+void vp9_initialize_me_consts(struct VP9_COMP *cpi, int qindex);
+
+void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
+ unsigned int qstep, int *rate,
+ int64_t *dist);
+
+int vp9_get_switchable_rate(const struct VP9_COMP *cpi);
+
+const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const struct VP9_COMP *cpi,
+ int ref_frame);
+
+void vp9_init_me_luts();
+
+void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
+ const struct macroblockd_plane *pd,
+ ENTROPY_CONTEXT t_above[16],
+ ENTROPY_CONTEXT t_left[16]);
+
+void vp9_set_rd_speed_thresholds(struct VP9_COMP *cpi);
+
+void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi);
+
+static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
+ int thresh_fact) {
+ return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
+}
+
+void vp9_mv_pred(struct VP9_COMP *cpi, MACROBLOCK *x,
+ uint8_t *ref_y_buffer, int ref_y_stride,
+ int ref_frame, BLOCK_SIZE block_size);
+
+void vp9_setup_pred_block(const MACROBLOCKD *xd,
+ struct buf_2d dst[MAX_MB_PLANE],
+ const YV12_BUFFER_CONFIG *src,
+ int mi_row, int mi_col,
+ const struct scale_factors *scale,
+ const struct scale_factors *scale_uv);
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VP9_ENCODER_VP9_RD_H_
diff --git a/source/libvpx/vp9/encoder/vp9_rdopt.c b/source/libvpx/vp9/encoder/vp9_rdopt.c
index 6c055c4..a8daa21 100644
--- a/source/libvpx/vp9/encoder/vp9_rdopt.c
+++ b/source/libvpx/vp9/encoder/vp9_rdopt.c
@@ -10,7 +10,6 @@
#include <assert.h>
#include <math.h>
-#include <stdio.h>
#include "./vp9_rtcd.h"
@@ -35,17 +34,12 @@
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_ratectrl.h"
+#include "vp9/encoder/vp9_rd.h"
#include "vp9/encoder/vp9_rdopt.h"
-#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_variance.h"
#define RD_THRESH_MAX_FACT 64
#define RD_THRESH_INC 1
-#define RD_THRESH_POW 1.25
-#define RD_MULT_EPB_RATIO 64
-
-/* Factor to weigh the rate for switchable interp filters */
-#define SWITCHABLE_INTERP_RATE_FACTOR 1
#define LAST_FRAME_MODE_MASK 0xFFEDCD60
#define GOLDEN_FRAME_MODE_MASK 0xFFDA3BB0
@@ -128,14 +122,6 @@
{{INTRA_FRAME, NONE}},
};
-// The baseline rd thresholds for breaking out of the rd loop for
-// certain modes are assumed to be based on 8x8 blocks.
-// This table is used to correct for blocks size.
-// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
-static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
- 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
-};
-
static int raster_block_offset(BLOCK_SIZE plane_bsize,
int raster_block, int stride) {
const int bw = b_width_log2(plane_bsize);
@@ -149,97 +135,6 @@
return base + raster_block_offset(plane_bsize, raster_block, stride);
}
-static void fill_mode_costs(VP9_COMP *cpi) {
- const FRAME_CONTEXT *const fc = &cpi->common.fc;
- int i, j;
-
- for (i = 0; i < INTRA_MODES; i++)
- for (j = 0; j < INTRA_MODES; j++)
- vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
- vp9_intra_mode_tree);
-
- // TODO(rbultje) separate tables for superblock costing?
- vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
- vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
- vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
- vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
- fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
-
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- vp9_cost_tokens(cpi->switchable_interp_costs[i],
- fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
-}
-
-static void fill_token_costs(vp9_coeff_cost *c,
- vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
- int i, j, k, l;
- TX_SIZE t;
- for (t = TX_4X4; t <= TX_32X32; ++t)
- for (i = 0; i < PLANE_TYPES; ++i)
- for (j = 0; j < REF_TYPES; ++j)
- for (k = 0; k < COEF_BANDS; ++k)
- for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
- vp9_prob probs[ENTROPY_NODES];
- vp9_model_to_full_probs(p[t][i][j][k][l], probs);
- vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
- vp9_coef_tree);
- vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
- vp9_coef_tree);
- assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
- c[t][i][j][k][1][l][EOB_TOKEN]);
- }
-}
-
-static const uint8_t rd_iifactor[32] = {
- 4, 4, 3, 2, 1, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
-};
-
-// 3* dc_qlookup[Q]*dc_qlookup[Q];
-
-/* values are now correlated to quantizer */
-static int sad_per_bit16lut[QINDEX_RANGE];
-static int sad_per_bit4lut[QINDEX_RANGE];
-
-void vp9_init_me_luts() {
- int i;
-
- // Initialize the sad lut tables using a formulaic calculation for now
- // This is to make it easier to resolve the impact of experimental changes
- // to the quantizer tables.
- for (i = 0; i < QINDEX_RANGE; i++) {
- const double q = vp9_convert_qindex_to_q(i);
- sad_per_bit16lut[i] = (int)(0.0418 * q + 2.4107);
- sad_per_bit4lut[i] = (int)(0.063 * q + 2.742);
- }
-}
-
-int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
- const int q = vp9_dc_quant(qindex, 0);
- // TODO(debargha): Adjust the function below
- int rdmult = 88 * q * q / 25;
- if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
- if (cpi->twopass.next_iiratio > 31)
- rdmult += (rdmult * rd_iifactor[31]) >> 4;
- else
- rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
- }
- return rdmult;
-}
-
-static int compute_rd_thresh_factor(int qindex) {
- // TODO(debargha): Adjust the function below
- const int q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
- return MAX(q, 8);
-}
-
-void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
- cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
- cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
-}
-
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
int m, int n, int min_plane, int max_plane) {
int i;
@@ -265,189 +160,6 @@
}
}
-static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
- int i, bsize, segment_id;
-
- for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
- const int qindex = clamp(vp9_get_qindex(&cm->seg, segment_id,
- cm->base_qindex) + cm->y_dc_delta_q,
- 0, MAXQ);
- const int q = compute_rd_thresh_factor(qindex);
-
- for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
- // Threshold here seems unnecessarily harsh but fine given actual
- // range of values used for cpi->sf.thresh_mult[].
- const int t = q * rd_thresh_block_size_factor[bsize];
- const int thresh_max = INT_MAX / t;
-
- if (bsize >= BLOCK_8X8) {
- for (i = 0; i < MAX_MODES; ++i)
- rd->threshes[segment_id][bsize][i] =
- rd->thresh_mult[i] < thresh_max
- ? rd->thresh_mult[i] * t / 4
- : INT_MAX;
- } else {
- for (i = 0; i < MAX_REFS; ++i)
- rd->threshes[segment_id][bsize][i] =
- rd->thresh_mult_sub8x8[i] < thresh_max
- ? rd->thresh_mult_sub8x8[i] * t / 4
- : INT_MAX;
- }
- }
- }
-}
-
-void vp9_initialize_rd_consts(VP9_COMP *cpi) {
- VP9_COMMON *const cm = &cpi->common;
- MACROBLOCK *const x = &cpi->mb;
- RD_OPT *const rd = &cpi->rd;
- int i;
-
- vp9_clear_system_state();
-
- rd->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128)
- rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
-
- x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
- x->errorperbit += (x->errorperbit == 0);
-
- x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
- cm->frame_type != KEY_FRAME) ? 0 : 1;
-
- set_block_thresholds(cm, rd);
-
- if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
- fill_token_costs(x->token_costs, cm->fc.coef_probs);
-
- for (i = 0; i < PARTITION_CONTEXTS; i++)
- vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(cm, i),
- vp9_partition_tree);
- }
-
- if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
- cm->frame_type == KEY_FRAME) {
- fill_mode_costs(cpi);
-
- if (!frame_is_intra_only(cm)) {
- vp9_build_nmv_cost_table(x->nmvjointcost,
- cm->allow_high_precision_mv ? x->nmvcost_hp
- : x->nmvcost,
- &cm->fc.nmvc, cm->allow_high_precision_mv);
-
- for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
- vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
- cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
- }
- }
-}
-
-static const int MAX_XSQ_Q10 = 245727;
-
-static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
- // NOTE: The tables below must be of the same size
-
- // The functions described below are sampled at the four most significant
- // bits of x^2 + 8 / 256
-
- // Normalized rate
- // This table models the rate for a Laplacian source
- // source with given variance when quantized with a uniform quantizer
- // with given stepsize. The closed form expression is:
- // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
- // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
- // and H(x) is the binary entropy function.
- static const int rate_tab_q10[] = {
- 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651,
- 4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811,
- 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
- 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638,
- 2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130,
- 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
- 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199,
- 1159, 1086, 1021, 963, 911, 864, 821, 781,
- 745, 680, 623, 574, 530, 490, 455, 424,
- 395, 345, 304, 269, 239, 213, 190, 171,
- 154, 126, 104, 87, 73, 61, 52, 44,
- 38, 28, 21, 16, 12, 10, 8, 6,
- 5, 3, 2, 1, 1, 1, 0, 0,
- };
- // Normalized distortion
- // This table models the normalized distortion for a Laplacian source
- // source with given variance when quantized with a uniform quantizer
- // with given stepsize. The closed form expression is:
- // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
- // where x = qpstep / sqrt(variance)
- // Note the actual distortion is Dn * variance.
- static const int dist_tab_q10[] = {
- 0, 0, 1, 1, 1, 2, 2, 2,
- 3, 3, 4, 5, 5, 6, 7, 7,
- 8, 9, 11, 12, 13, 15, 16, 17,
- 18, 21, 24, 26, 29, 31, 34, 36,
- 39, 44, 49, 54, 59, 64, 69, 73,
- 78, 88, 97, 106, 115, 124, 133, 142,
- 151, 167, 184, 200, 215, 231, 245, 260,
- 274, 301, 327, 351, 375, 397, 418, 439,
- 458, 495, 528, 559, 587, 613, 637, 659,
- 680, 717, 749, 777, 801, 823, 842, 859,
- 874, 899, 919, 936, 949, 960, 969, 977,
- 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
- 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
- };
- static const int xsq_iq_q10[] = {
- 0, 4, 8, 12, 16, 20, 24, 28,
- 32, 40, 48, 56, 64, 72, 80, 88,
- 96, 112, 128, 144, 160, 176, 192, 208,
- 224, 256, 288, 320, 352, 384, 416, 448,
- 480, 544, 608, 672, 736, 800, 864, 928,
- 992, 1120, 1248, 1376, 1504, 1632, 1760, 1888,
- 2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808,
- 4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648,
- 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
- 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688,
- 32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408,
- 65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848,
- 131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
- };
- /*
- static const int tab_size = sizeof(rate_tab_q10) / sizeof(rate_tab_q10[0]);
- assert(sizeof(dist_tab_q10) / sizeof(dist_tab_q10[0]) == tab_size);
- assert(sizeof(xsq_iq_q10) / sizeof(xsq_iq_q10[0]) == tab_size);
- assert(MAX_XSQ_Q10 + 1 == xsq_iq_q10[tab_size - 1]);
- */
- int tmp = (xsq_q10 >> 2) + 8;
- int k = get_msb(tmp) - 3;
- int xq = (k << 3) + ((tmp >> k) & 0x7);
- const int one_q10 = 1 << 10;
- const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
- const int b_q10 = one_q10 - a_q10;
- *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
- *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
-}
-
-void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
- unsigned int qstep, int *rate,
- int64_t *dist) {
- // This function models the rate and distortion for a Laplacian
- // source with given variance when quantized with a uniform quantizer
- // with given stepsize. The closed form expressions are in:
- // Hang and Chen, "Source Model for transform video coder and its
- // application - Part I: Fundamental Theory", IEEE Trans. Circ.
- // Sys. for Video Tech., April 1997.
- if (var == 0) {
- *rate = 0;
- *dist = 0;
- } else {
- int d_q10, r_q10;
- const uint64_t xsq_q10_64 =
- ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var;
- const int xsq_q10 = xsq_q10_64 > MAX_XSQ_Q10 ?
- MAX_XSQ_Q10 : (int)xsq_q10_64;
- model_rd_norm(xsq_q10, &r_q10, &d_q10);
- *rate = (n * r_q10 + 2) >> 2;
- *dist = (var * (int64_t)d_q10 + 512) >> 10;
- }
-}
-
static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
int *out_rate_sum, int64_t *out_dist_sum) {
@@ -499,55 +211,6 @@
*out_dist_sum = dist_sum << 4;
}
-static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
- TX_SIZE tx_size,
- MACROBLOCK *x, MACROBLOCKD *xd,
- int *out_rate_sum, int64_t *out_dist_sum,
- int *out_skip) {
- int j, k;
- BLOCK_SIZE bs;
- const struct macroblock_plane *const p = &x->plane[0];
- const struct macroblockd_plane *const pd = &xd->plane[0];
- const int width = 4 * num_4x4_blocks_wide_lookup[bsize];
- const int height = 4 * num_4x4_blocks_high_lookup[bsize];
- int rate_sum = 0;
- int64_t dist_sum = 0;
- const int t = 4 << tx_size;
-
- if (tx_size == TX_4X4) {
- bs = BLOCK_4X4;
- } else if (tx_size == TX_8X8) {
- bs = BLOCK_8X8;
- } else if (tx_size == TX_16X16) {
- bs = BLOCK_16X16;
- } else if (tx_size == TX_32X32) {
- bs = BLOCK_32X32;
- } else {
- assert(0);
- }
-
- *out_skip = 1;
- for (j = 0; j < height; j += t) {
- for (k = 0; k < width; k += t) {
- int rate;
- int64_t dist;
- unsigned int sse;
- cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
- &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
- &sse);
- // sse works better than var, since there is no dc prediction used
- vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
- &rate, &dist);
- rate_sum += rate;
- dist_sum += dist;
- *out_skip &= (rate < 1024);
- }
- }
-
- *out_rate_sum = rate_sum;
- *out_dist_sum = dist_sum << 4;
-}
-
int64_t vp9_block_error_c(const int16_t *coeff, const int16_t *dqcoeff,
intptr_t block_size, int64_t *ssz) {
int i;
@@ -595,7 +258,7 @@
int c, cost;
// Check for consistency of tx_size with mode info
assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
- : get_uv_tx_size(mbmi) == tx_size);
+ : get_uv_tx_size(mbmi, pd) == tx_size);
if (eob == 0) {
// single eob token
@@ -721,45 +384,6 @@
}
}
-void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
- const struct macroblockd_plane *pd,
- ENTROPY_CONTEXT t_above[16],
- ENTROPY_CONTEXT t_left[16]) {
- const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
- const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
- const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
- const ENTROPY_CONTEXT *const above = pd->above_context;
- const ENTROPY_CONTEXT *const left = pd->left_context;
-
- int i;
- switch (tx_size) {
- case TX_4X4:
- vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
- vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
- break;
- case TX_8X8:
- for (i = 0; i < num_4x4_w; i += 2)
- t_above[i] = !!*(const uint16_t *)&above[i];
- for (i = 0; i < num_4x4_h; i += 2)
- t_left[i] = !!*(const uint16_t *)&left[i];
- break;
- case TX_16X16:
- for (i = 0; i < num_4x4_w; i += 4)
- t_above[i] = !!*(const uint32_t *)&above[i];
- for (i = 0; i < num_4x4_h; i += 4)
- t_left[i] = !!*(const uint32_t *)&left[i];
- break;
- case TX_32X32:
- for (i = 0; i < num_4x4_w; i += 8)
- t_above[i] = !!*(const uint64_t *)&above[i];
- for (i = 0; i < num_4x4_h; i += 8)
- t_left[i] = !!*(const uint64_t *)&left[i];
- break;
- default:
- assert(0 && "Invalid transform size.");
- }
-}
-
static void txfm_rd_in_plane(MACROBLOCK *x,
int *rate, int64_t *distortion,
int *skippable, int64_t *sse,
@@ -893,82 +517,6 @@
}
}
-static int64_t scaled_rd_cost(int rdmult, int rddiv,
- int rate, int64_t dist, double scale) {
- return (int64_t) (RDCOST(rdmult, rddiv, rate, dist) * scale);
-}
-
-static void choose_tx_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
- int (*r)[2], int *rate,
- int64_t *d, int64_t *distortion,
- int *s, int *skip, int64_t *sse,
- int64_t ref_best_rd,
- BLOCK_SIZE bs) {
- const TX_SIZE max_tx_size = max_txsize_lookup[bs];
- VP9_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
- int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
- {INT64_MAX, INT64_MAX},
- {INT64_MAX, INT64_MAX},
- {INT64_MAX, INT64_MAX}};
- TX_SIZE n, m;
- int s0, s1;
- double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
- const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
- int64_t best_rd = INT64_MAX;
- TX_SIZE best_tx = TX_4X4;
-
- const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
- assert(skip_prob > 0);
- s0 = vp9_cost_bit(skip_prob, 0);
- s1 = vp9_cost_bit(skip_prob, 1);
-
- for (n = TX_4X4; n <= max_tx_size; n++) {
- double scale = scale_rd[n];
- r[n][1] = r[n][0];
- for (m = 0; m <= n - (n == max_tx_size); m++) {
- if (m == n)
- r[n][1] += vp9_cost_zero(tx_probs[m]);
- else
- r[n][1] += vp9_cost_one(tx_probs[m]);
- }
- if (s[n]) {
- rd[n][0] = rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, s1, d[n],
- scale);
- } else {
- rd[n][0] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][0] + s0, d[n],
- scale);
- rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][1] + s0, d[n],
- scale);
- }
- if (rd[n][1] < best_rd) {
- best_rd = rd[n][1];
- best_tx = n;
- }
- }
-
- mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
- best_tx : MIN(max_tx_size, max_mode_tx_size);
-
- // Actually encode using the chosen mode if a model was used, but do not
- // update the r, d costs
- txfm_rd_in_plane(x, rate, distortion, skip,
- &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size,
- cpi->sf.use_fast_coef_costing);
-
- if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
- cpi->tx_stepdown_count[0]++;
- } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
- cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
- } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
- cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
- } else {
- cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
- }
-}
-
static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int64_t *distortion, int *skip,
int64_t *psse, BLOCK_SIZE bs,
@@ -1327,7 +875,7 @@
int64_t ref_best_rd) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
+ const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
int plane;
int pnrate = 0, pnskip = 1;
int64_t pndist = 0, pnsse = 0;
@@ -1447,16 +995,8 @@
static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode,
int mode_context) {
- const MACROBLOCK *const x = &cpi->mb;
- const int segment_id = x->e_mbd.mi[0]->mbmi.segment_id;
-
- // Don't account for mode here if segment skip is enabled.
- if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
- assert(is_inter_mode(mode));
- return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
- } else {
- return 0;
- }
+ assert(is_inter_mode(mode));
+ return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
}
static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
@@ -2074,65 +1614,6 @@
return bsi->segment_rd;
}
-static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
- uint8_t *ref_y_buffer, int ref_y_stride,
- int ref_frame, BLOCK_SIZE block_size ) {
- MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
- int_mv this_mv;
- int i;
- int zero_seen = 0;
- int best_index = 0;
- int best_sad = INT_MAX;
- int this_sad = INT_MAX;
- int max_mv = 0;
-
- uint8_t *src_y_ptr = x->plane[0].src.buf;
- uint8_t *ref_y_ptr;
- int row_offset, col_offset;
- int num_mv_refs = MAX_MV_REF_CANDIDATES +
- (cpi->sf.adaptive_motion_search &&
- cpi->common.show_frame &&
- block_size < cpi->sf.max_partition_size);
-
- MV pred_mv[3];
- pred_mv[0] = mbmi->ref_mvs[ref_frame][0].as_mv;
- pred_mv[1] = mbmi->ref_mvs[ref_frame][1].as_mv;
- pred_mv[2] = x->pred_mv[ref_frame];
-
- // Get the sad for each candidate reference mv
- for (i = 0; i < num_mv_refs; i++) {
- this_mv.as_mv = pred_mv[i];
-
- max_mv = MAX(max_mv,
- MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
- // only need to check zero mv once
- if (!this_mv.as_int && zero_seen)
- continue;
-
- zero_seen = zero_seen || !this_mv.as_int;
-
- row_offset = this_mv.as_mv.row >> 3;
- col_offset = this_mv.as_mv.col >> 3;
- ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
-
- // Find sad for current vector.
- this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
- ref_y_ptr, ref_y_stride);
-
- // Note if it is the best so far.
- if (this_sad < best_sad) {
- best_sad = this_sad;
- best_index = i;
- }
- }
-
- // Note the index of the mv that worked best in the reference list.
- x->mv_best_ref_index[ref_frame] = best_index;
- x->max_mv_context[ref_frame] = max_mv;
- x->pred_mv_sad[ref_frame] = best_sad;
-}
-
static void estimate_ref_frame_costs(const VP9_COMMON *cm,
const MACROBLOCKD *xd,
int segment_id,
@@ -2215,40 +1696,14 @@
sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
}
-static void setup_pred_block(const MACROBLOCKD *xd,
- struct buf_2d dst[MAX_MB_PLANE],
- const YV12_BUFFER_CONFIG *src,
- int mi_row, int mi_col,
- const struct scale_factors *scale,
- const struct scale_factors *scale_uv) {
- int i;
-
- dst[0].buf = src->y_buffer;
- dst[0].stride = src->y_stride;
- dst[1].buf = src->u_buffer;
- dst[2].buf = src->v_buffer;
- dst[1].stride = dst[2].stride = src->uv_stride;
-#if CONFIG_ALPHA
- dst[3].buf = src->alpha_buffer;
- dst[3].stride = src->alpha_stride;
-#endif
-
- // TODO(jkoleszar): Make scale factors per-plane data
- for (i = 0; i < MAX_MB_PLANE; i++) {
- setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
- i ? scale_uv : scale,
- xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
- }
-}
-
-void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
- const TileInfo *const tile,
- MV_REFERENCE_FRAME ref_frame,
- BLOCK_SIZE block_size,
- int mi_row, int mi_col,
- int_mv frame_nearest_mv[MAX_REF_FRAMES],
- int_mv frame_near_mv[MAX_REF_FRAMES],
- struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
+static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
+ const TileInfo *const tile,
+ MV_REFERENCE_FRAME ref_frame,
+ BLOCK_SIZE block_size,
+ int mi_row, int mi_col,
+ int_mv frame_nearest_mv[MAX_REF_FRAMES],
+ int_mv frame_near_mv[MAX_REF_FRAMES],
+ struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
const VP9_COMMON *cm = &cpi->common;
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
MACROBLOCKD *const xd = &x->e_mbd;
@@ -2258,7 +1713,7 @@
// TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
// use the UV scaling factors.
- setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
+ vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
// Gets an initial list of candidate vectors from neighbours and orders them
vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col);
@@ -2272,24 +1727,8 @@
// in full and choose the best as the centre point for subsequent searches.
// The current implementation doesn't support scaling.
if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8)
- mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
- ref_frame, block_size);
-}
-
-const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
- int ref_frame) {
- const VP9_COMMON *const cm = &cpi->common;
- const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
- const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
- return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL;
-}
-
-int vp9_get_switchable_rate(const VP9_COMP *cpi) {
- const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
- const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- const int ctx = vp9_get_pred_context_switchable_interp(xd);
- return SWITCHABLE_INTERP_RATE_FACTOR *
- cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
+ vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
+ ref_frame, block_size);
}
static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
@@ -2799,45 +2238,43 @@
*rate2 += vp9_get_switchable_rate(cpi);
if (!is_comp_pred) {
- if (!x->in_active_map ||
- vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
- if (psse)
- *psse = 0;
- *distortion = 0;
- x->skip = 1;
- } else if (cpi->allow_encode_breakout && x->encode_breakout) {
+ if (cpi->allow_encode_breakout) {
const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
unsigned int var, sse;
// Skipping threshold for ac.
unsigned int thresh_ac;
- // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
- // Use extreme low threshold for static frames to limit skipping.
- const unsigned int max_thresh = (cpi->allow_encode_breakout ==
- ENCODE_BREAKOUT_LIMITED) ? 128 : 36000;
- // The encode_breakout input
- const unsigned int min_thresh =
- MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
-
- // Calculate threshold according to dequant value.
- thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
- thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
+ // Skipping threshold for dc
+ unsigned int thresh_dc;
var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
xd->plane[0].dst.buf,
xd->plane[0].dst.stride, &sse);
- // Adjust threshold according to partition size.
- thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
- b_height_log2_lookup[bsize]);
+ if (x->encode_breakout > 0) {
+ // Set a maximum for threshold to avoid big PSNR loss in low bitrate
+ // case. Use extreme low threshold for static frames to limit skipping.
+ const unsigned int max_thresh = (cpi->allow_encode_breakout ==
+ ENCODE_BREAKOUT_LIMITED) ? 128 : 36000;
+ // The encode_breakout input
+ const unsigned int min_thresh =
+ MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
+
+ // Calculate threshold according to dequant value.
+ thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
+ thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
+
+ // Adjust threshold according to partition size.
+ thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
+ b_height_log2_lookup[bsize]);
+ thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
+ } else {
+ thresh_ac = 0;
+ thresh_dc = 0;
+ }
// Y skipping condition checking
if (sse < thresh_ac || sse == 0) {
- // Skipping threshold for dc
- unsigned int thresh_dc;
-
- thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
-
// dc skipping checking
if ((sse - var) < thresh_dc || sse == var) {
unsigned int sse_u, sse_v;
@@ -2925,6 +2362,7 @@
PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
+ struct macroblockd_plane *const pd = xd->plane;
int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
int y_skip = 0, uv_skip = 0;
int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
@@ -2940,7 +2378,9 @@
*returnrate = INT_MAX;
return;
}
- max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize);
+ max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize,
+ pd[1].subsampling_x,
+ pd[1].subsampling_y);
rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
&dist_uv, &uv_skip, bsize, max_uv_tx_size);
} else {
@@ -2950,7 +2390,9 @@
*returnrate = INT_MAX;
return;
}
- max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize);
+ max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize,
+ pd[1].subsampling_x,
+ pd[1].subsampling_y);
rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
&dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size);
}
@@ -3012,6 +2454,7 @@
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const struct segmentation *const seg = &cm->seg;
+ struct macroblockd_plane *const pd = xd->plane;
PREDICTION_MODE this_mode;
MV_REFERENCE_FRAME ref_frame, second_ref_frame;
unsigned char segment_id = mbmi->segment_id;
@@ -3076,7 +2519,7 @@
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
x->pred_mv_sad[ref_frame] = INT_MAX;
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
- vp9_setup_buffer_inter(cpi, x, tile,
+ setup_buffer_inter(cpi, x, tile,
ref_frame, bsize, mi_row, mi_col,
frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
}
@@ -3114,13 +2557,6 @@
}
}
- // If the segment skip feature is enabled....
- // then do nothing if the current mode is not allowed..
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
- mode_skip_mask = ~(1 << THR_ZEROMV);
- inter_mode_mask = (1 << ZEROMV);
- }
-
// Disable this drop out case if the ref frame
// segment level feature is enabled for this segment. This is to
// prevent the possibility that we end up unable to pick any mode.
@@ -3159,21 +2595,6 @@
mode_skip_mask |= all_intra_modes;
}
- if (!x->in_active_map) {
- int mode_index;
- assert(cpi->ref_frame_flags & VP9_LAST_FLAG);
- if (frame_mv[NEARESTMV][LAST_FRAME].as_int == 0)
- mode_index = THR_NEARESTMV;
- else if (frame_mv[NEARMV][LAST_FRAME].as_int == 0)
- mode_index = THR_NEARMV;
- else
- mode_index = THR_ZEROMV;
- mode_skip_mask = ~(1 << mode_index);
- mode_skip_start = MAX_MODES;
- inter_mode_mask = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) |
- (1 << NEWMV);
- }
-
for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
int mode_excluded = 0;
int64_t this_rd = INT64_MAX;
@@ -3263,17 +2684,14 @@
}
}
} else {
- if (x->in_active_map &&
- !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
- const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
- if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
- inter_mode_mask, this_mode, ref_frames))
- continue;
- }
+ const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
+ if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
+ inter_mode_mask, this_mode, ref_frames))
+ continue;
}
mbmi->mode = this_mode;
- mbmi->uv_mode = x->in_active_map ? DC_PRED : this_mode;
+ mbmi->uv_mode = DC_PRED;
mbmi->ref_frame[0] = ref_frame;
mbmi->ref_frame[1] = second_ref_frame;
// Evaluate all sub-pel filters irrespective of whether we can use
@@ -3301,7 +2719,8 @@
if (rate_y == INT_MAX)
continue;
- uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize);
+ uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd[1].subsampling_x,
+ pd[1].subsampling_y);
if (rate_uv_intra[uv_tx] == INT_MAX) {
choose_intra_uv_mode(cpi, ctx, bsize, uv_tx,
&rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
@@ -3345,31 +2764,20 @@
}
if (!disable_skip) {
- // Test for the condition where skip block will be activated
- // because there are no non zero coefficients and make any
- // necessary adjustment for rate. Ignore if skip is coded at
- // segment level as the cost wont have been added in.
- // Is Mb level skip allowed (i.e. not coded at segment level).
- const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
- SEG_LVL_SKIP);
-
if (skippable) {
+ vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
+
// Back out the coefficient coding costs
rate2 -= (rate_y + rate_uv);
// for best yrd calculation
rate_uv = 0;
- if (mb_skip_allowed) {
- int prob_skip_cost;
-
- // Cost the skip mb case
- vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
- if (skip_prob) {
- prob_skip_cost = vp9_cost_bit(skip_prob, 1);
- rate2 += prob_skip_cost;
- }
+ // Cost the skip mb case
+ if (skip_prob) {
+ int prob_skip_cost = vp9_cost_bit(skip_prob, 1);
+ rate2 += prob_skip_cost;
}
- } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
+ } else if (ref_frame != INTRA_FRAME && !xd->lossless) {
if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
// Add in the cost of the no skip flag.
@@ -3384,7 +2792,7 @@
rate_uv = 0;
this_skip2 = 1;
}
- } else if (mb_skip_allowed) {
+ } else {
// Add in the cost of the no skip flag.
rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
}
@@ -3546,7 +2954,7 @@
if (vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME) {
TX_SIZE uv_tx_size;
*mbmi = best_mbmode;
- uv_tx_size = get_uv_tx_size(mbmi);
+ uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
&rate_uv_tokenonly[uv_tx_size],
&dist_uv[uv_tx_size],
@@ -3593,16 +3001,6 @@
vp9_zero(best_tx_diff);
}
- if (!x->in_active_map) {
- assert(mbmi->ref_frame[0] == LAST_FRAME);
- assert(mbmi->ref_frame[1] == NONE);
- assert(mbmi->mode == NEARESTMV ||
- mbmi->mode == NEARMV ||
- mbmi->mode == ZEROMV);
- assert(frame_mv[mbmi->mode][LAST_FRAME].as_int == 0);
- assert(mbmi->mode == mbmi->uv_mode);
- }
-
set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
store_coding_context(x, ctx, best_mode_index,
best_pred_diff, best_tx_diff, best_filter_diff);
@@ -3610,6 +3008,111 @@
return best_rd;
}
+int64_t vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,
+ int *returnrate,
+ int64_t *returndistortion,
+ BLOCK_SIZE bsize,
+ PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd_so_far) {
+ VP9_COMMON *const cm = &cpi->common;
+ RD_OPT *const rd_opt = &cpi->rd;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ const struct segmentation *const seg = &cm->seg;
+ unsigned char segment_id = mbmi->segment_id;
+ const int comp_pred = 0;
+ int i;
+ int64_t best_tx_diff[TX_MODES];
+ int64_t best_pred_diff[REFERENCE_MODES];
+ int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
+ unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
+ vp9_prob comp_mode_p;
+ INTERP_FILTER best_filter = SWITCHABLE;
+ int64_t this_rd = INT64_MAX;
+ int rate2 = 0;
+ const int64_t distortion2 = 0;
+
+ x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
+
+ estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
+ &comp_mode_p);
+
+ for (i = 0; i < MAX_REF_FRAMES; ++i)
+ x->pred_sse[i] = INT_MAX;
+ for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i)
+ x->pred_mv_sad[i] = INT_MAX;
+
+ *returnrate = INT_MAX;
+
+ assert(vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP));
+
+ mbmi->mode = ZEROMV;
+ mbmi->uv_mode = DC_PRED;
+ mbmi->ref_frame[0] = LAST_FRAME;
+ mbmi->ref_frame[1] = NONE;
+ mbmi->mv[0].as_int = 0;
+ x->skip = 1;
+
+ // Search for best switchable filter by checking the variance of
+ // pred error irrespective of whether the filter will be used
+ rd_opt->mask_filter = 0;
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
+ rd_opt->filter_cache[i] = INT64_MAX;
+
+ if (cm->interp_filter != BILINEAR) {
+ best_filter = EIGHTTAP;
+ if (cm->interp_filter == SWITCHABLE &&
+ x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
+ int rs;
+ int best_rs = INT_MAX;
+ for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
+ mbmi->interp_filter = i;
+ rs = vp9_get_switchable_rate(cpi);
+ if (rs < best_rs) {
+ best_rs = rs;
+ best_filter = mbmi->interp_filter;
+ }
+ }
+ }
+ }
+ // Set the appropriate filter
+ if (cm->interp_filter == SWITCHABLE) {
+ mbmi->interp_filter = best_filter;
+ rate2 += vp9_get_switchable_rate(cpi);
+ } else {
+ mbmi->interp_filter = cm->interp_filter;
+ }
+
+ if (cm->reference_mode == REFERENCE_MODE_SELECT)
+ rate2 += vp9_cost_bit(comp_mode_p, comp_pred);
+
+ // Estimate the reference frame signaling cost and add it
+ // to the rolling cost variable.
+ rate2 += ref_costs_single[LAST_FRAME];
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+
+ *returnrate = rate2;
+ *returndistortion = distortion2;
+
+ if (this_rd >= best_rd_so_far)
+ return INT64_MAX;
+
+ assert((cm->interp_filter == SWITCHABLE) ||
+ (cm->interp_filter == mbmi->interp_filter));
+
+ update_rd_thresh_fact(cpi, bsize, THR_ZEROMV);
+
+ vp9_zero(best_pred_diff);
+ vp9_zero(best_filter_diff);
+ vp9_zero(best_tx_diff);
+
+ if (!x->select_tx_size)
+ swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
+ store_coding_context(x, ctx, THR_ZEROMV,
+ best_pred_diff, best_tx_diff, best_filter_diff);
+
+ return this_rd;
+}
int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
const TileInfo *const tile,
@@ -3678,7 +3181,7 @@
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
- vp9_setup_buffer_inter(cpi, x, tile,
+ setup_buffer_inter(cpi, x, tile,
ref_frame, bsize, mi_row, mi_col,
frame_mv[NEARESTMV], frame_mv[NEARMV],
yv12_mb);
@@ -4217,120 +3720,3 @@
return best_rd;
}
-
-void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
- int i;
- RD_OPT *const rd = &cpi->rd;
-
- // Set baseline threshold values
- for (i = 0; i < MAX_MODES; ++i)
- rd->thresh_mult[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0;
-
- rd->thresh_mult[THR_NEARESTMV] = 0;
- rd->thresh_mult[THR_NEARESTG] = 0;
- rd->thresh_mult[THR_NEARESTA] = 0;
-
- rd->thresh_mult[THR_DC] += 1000;
-
- rd->thresh_mult[THR_NEWMV] += 1000;
- rd->thresh_mult[THR_NEWA] += 1000;
- rd->thresh_mult[THR_NEWG] += 1000;
-
- rd->thresh_mult[THR_NEARMV] += 1000;
- rd->thresh_mult[THR_NEARA] += 1000;
- rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
- rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
-
- rd->thresh_mult[THR_TM] += 1000;
-
- rd->thresh_mult[THR_COMP_NEARLA] += 1500;
- rd->thresh_mult[THR_COMP_NEWLA] += 2000;
- rd->thresh_mult[THR_NEARG] += 1000;
- rd->thresh_mult[THR_COMP_NEARGA] += 1500;
- rd->thresh_mult[THR_COMP_NEWGA] += 2000;
-
- rd->thresh_mult[THR_ZEROMV] += 2000;
- rd->thresh_mult[THR_ZEROG] += 2000;
- rd->thresh_mult[THR_ZEROA] += 2000;
- rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
- rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
-
- rd->thresh_mult[THR_H_PRED] += 2000;
- rd->thresh_mult[THR_V_PRED] += 2000;
- rd->thresh_mult[THR_D45_PRED ] += 2500;
- rd->thresh_mult[THR_D135_PRED] += 2500;
- rd->thresh_mult[THR_D117_PRED] += 2500;
- rd->thresh_mult[THR_D153_PRED] += 2500;
- rd->thresh_mult[THR_D207_PRED] += 2500;
- rd->thresh_mult[THR_D63_PRED] += 2500;
-
- /* disable frame modes if flags not set */
- if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
- rd->thresh_mult[THR_NEWMV ] = INT_MAX;
- rd->thresh_mult[THR_NEARESTMV] = INT_MAX;
- rd->thresh_mult[THR_ZEROMV ] = INT_MAX;
- rd->thresh_mult[THR_NEARMV ] = INT_MAX;
- }
- if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
- rd->thresh_mult[THR_NEARESTG ] = INT_MAX;
- rd->thresh_mult[THR_ZEROG ] = INT_MAX;
- rd->thresh_mult[THR_NEARG ] = INT_MAX;
- rd->thresh_mult[THR_NEWG ] = INT_MAX;
- }
- if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
- rd->thresh_mult[THR_NEARESTA ] = INT_MAX;
- rd->thresh_mult[THR_ZEROA ] = INT_MAX;
- rd->thresh_mult[THR_NEARA ] = INT_MAX;
- rd->thresh_mult[THR_NEWA ] = INT_MAX;
- }
-
- if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
- (VP9_LAST_FLAG | VP9_ALT_FLAG)) {
- rd->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX;
- rd->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
- rd->thresh_mult[THR_COMP_NEARLA ] = INT_MAX;
- rd->thresh_mult[THR_COMP_NEWLA ] = INT_MAX;
- }
- if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
- (VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
- rd->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX;
- rd->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
- rd->thresh_mult[THR_COMP_NEARGA ] = INT_MAX;
- rd->thresh_mult[THR_COMP_NEWGA ] = INT_MAX;
- }
-}
-
-void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
- const SPEED_FEATURES *const sf = &cpi->sf;
- RD_OPT *const rd = &cpi->rd;
- int i;
-
- for (i = 0; i < MAX_REFS; ++i)
- rd->thresh_mult_sub8x8[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0;
-
- rd->thresh_mult_sub8x8[THR_LAST] += 2500;
- rd->thresh_mult_sub8x8[THR_GOLD] += 2500;
- rd->thresh_mult_sub8x8[THR_ALTR] += 2500;
- rd->thresh_mult_sub8x8[THR_INTRA] += 2500;
- rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500;
- rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500;
-
- // Check for masked out split cases.
- for (i = 0; i < MAX_REFS; i++)
- if (sf->disable_split_mask & (1 << i))
- rd->thresh_mult_sub8x8[i] = INT_MAX;
-
- // disable mode test if frame flag is not set
- if (!(cpi->ref_frame_flags & VP9_LAST_FLAG))
- rd->thresh_mult_sub8x8[THR_LAST] = INT_MAX;
- if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG))
- rd->thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
- if (!(cpi->ref_frame_flags & VP9_ALT_FLAG))
- rd->thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
- if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
- (VP9_LAST_FLAG | VP9_ALT_FLAG))
- rd->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
- if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
- (VP9_GOLD_FLAG | VP9_ALT_FLAG))
- rd->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;
-}
diff --git a/source/libvpx/vp9/encoder/vp9_rdopt.h b/source/libvpx/vp9/encoder/vp9_rdopt.h
index fba54cc..52c603f 100644
--- a/source/libvpx/vp9/encoder/vp9_rdopt.h
+++ b/source/libvpx/vp9/encoder/vp9_rdopt.h
@@ -11,8 +11,6 @@
#ifndef VP9_ENCODER_VP9_RDOPT_H_
#define VP9_ENCODER_VP9_RDOPT_H_
-#include <limits.h>
-
#include "vp9/common/vp9_blockd.h"
#include "vp9/encoder/vp9_block.h"
@@ -22,126 +20,10 @@
extern "C" {
#endif
-#define RDDIV_BITS 7
-
-#define RDCOST(RM, DM, R, D) \
- (((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM))
-#define QIDX_SKIP_THRESH 115
-
-#define MV_COST_WEIGHT 108
-#define MV_COST_WEIGHT_SUB 120
-
-#define INVALID_MV 0x80008000
-
-#define MAX_MODES 30
-#define MAX_REFS 6
-
-// This enumerator type needs to be kept aligned with the mode order in
-// const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code.
-typedef enum {
- THR_NEARESTMV,
- THR_NEARESTA,
- THR_NEARESTG,
-
- THR_DC,
-
- THR_NEWMV,
- THR_NEWA,
- THR_NEWG,
-
- THR_NEARMV,
- THR_NEARA,
- THR_COMP_NEARESTLA,
- THR_COMP_NEARESTGA,
-
- THR_TM,
-
- THR_COMP_NEARLA,
- THR_COMP_NEWLA,
- THR_NEARG,
- THR_COMP_NEARGA,
- THR_COMP_NEWGA,
-
- THR_ZEROMV,
- THR_ZEROG,
- THR_ZEROA,
- THR_COMP_ZEROLA,
- THR_COMP_ZEROGA,
-
- THR_H_PRED,
- THR_V_PRED,
- THR_D135_PRED,
- THR_D207_PRED,
- THR_D153_PRED,
- THR_D63_PRED,
- THR_D117_PRED,
- THR_D45_PRED,
-} THR_MODES;
-
-typedef enum {
- THR_LAST,
- THR_GOLD,
- THR_ALTR,
- THR_COMP_LA,
- THR_COMP_GA,
- THR_INTRA,
-} THR_MODES_SUB8X8;
-
-typedef struct RD_OPT {
- // Thresh_mult is used to set a threshold for the rd score. A higher value
- // means that we will accept the best mode so far more often. This number
- // is used in combination with the current block size, and thresh_freq_fact
- // to pick a threshold.
- int thresh_mult[MAX_MODES];
- int thresh_mult_sub8x8[MAX_REFS];
-
- int threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES];
- int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
-
- int64_t comp_pred_diff[REFERENCE_MODES];
- int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES];
- int64_t tx_select_diff[TX_MODES];
- // FIXME(rbultje) can this overflow?
- int tx_select_threshes[MAX_REF_FRAMES][TX_MODES];
-
- int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS];
- int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
- int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
- int64_t mask_filter;
-
- int RDMULT;
- int RDDIV;
-} RD_OPT;
-
-
struct TileInfo;
struct VP9_COMP;
struct macroblock;
-int vp9_compute_rd_mult(const struct VP9_COMP *cpi, int qindex);
-
-void vp9_initialize_rd_consts(struct VP9_COMP *cpi);
-
-void vp9_initialize_me_consts(struct VP9_COMP *cpi, int qindex);
-
-void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
- unsigned int qstep, int *rate,
- int64_t *dist);
-
-int vp9_get_switchable_rate(const struct VP9_COMP *cpi);
-
-void vp9_setup_buffer_inter(struct VP9_COMP *cpi, struct macroblock *x,
- const TileInfo *const tile,
- MV_REFERENCE_FRAME ref_frame,
- BLOCK_SIZE block_size,
- int mi_row, int mi_col,
- int_mv frame_nearest_mv[MAX_REF_FRAMES],
- int_mv frame_near_mv[MAX_REF_FRAMES],
- struct buf_2d yv12_mb[4][MAX_MB_PLANE]);
-
-const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const struct VP9_COMP *cpi,
- int ref_frame);
-
void vp9_rd_pick_intra_mode_sb(struct VP9_COMP *cpi, struct macroblock *x,
int *r, int64_t *d, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx, int64_t best_rd);
@@ -155,6 +37,14 @@
PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far);
+int64_t vp9_rd_pick_inter_mode_sb_seg_skip(struct VP9_COMP *cpi,
+ struct macroblock *x,
+ int *returnrate,
+ int64_t *returndistortion,
+ BLOCK_SIZE bsize,
+ PICK_MODE_CONTEXT *ctx,
+ int64_t best_rd_so_far);
+
int64_t vp9_rd_pick_inter_mode_sub8x8(struct VP9_COMP *cpi,
struct macroblock *x,
const struct TileInfo *const tile,
@@ -165,22 +55,6 @@
PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far);
-void vp9_init_me_luts();
-
-void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
- const struct macroblockd_plane *pd,
- ENTROPY_CONTEXT t_above[16],
- ENTROPY_CONTEXT t_left[16]);
-
-void vp9_set_rd_speed_thresholds(struct VP9_COMP *cpi);
-
-void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi);
-
-static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
- int thresh_fact) {
- return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
-}
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/source/libvpx/vp9/encoder/vp9_segmentation.c b/source/libvpx/vp9/encoder/vp9_segmentation.c
index 574df62..897ae01 100644
--- a/source/libvpx/vp9/encoder/vp9_segmentation.c
+++ b/source/libvpx/vp9/encoder/vp9_segmentation.c
@@ -27,6 +27,8 @@
void vp9_disable_segmentation(struct segmentation *seg) {
seg->enabled = 0;
+ seg->update_map = 0;
+ seg->update_data = 0;
}
void vp9_set_segment_data(struct segmentation *seg,
diff --git a/source/libvpx/vp9/encoder/vp9_speed_features.c b/source/libvpx/vp9/encoder/vp9_speed_features.c
index d54ed08..98d6825 100644
--- a/source/libvpx/vp9/encoder/vp9_speed_features.c
+++ b/source/libvpx/vp9/encoder/vp9_speed_features.c
@@ -84,16 +84,17 @@
if (speed >= 2) {
if (MIN(cm->width, cm->height) >= 720) {
- sf->lf_motion_threshold = LOW_MOITION_THRESHOLD;
+ sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
sf->last_partitioning_redo_frequency = 3;
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
: DISABLE_ALL_INTER_SPLIT;
} else {
sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
sf->last_partitioning_redo_frequency = 2;
- sf->lf_motion_threshold = NO_MOITION_THRESHOLD;
+ sf->lf_motion_threshold = NO_MOTION_THRESHOLD;
}
- sf->adaptive_pred_interp_filter = 2;
+
+ sf->adaptive_pred_interp_filter = 0;
sf->reference_masking = 1;
sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH |
FLAG_SKIP_INTRA_BESTINTER |
@@ -114,7 +115,7 @@
else
sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT;
- sf->lf_motion_threshold = LOW_MOITION_THRESHOLD;
+ sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
sf->last_partitioning_redo_frequency = 3;
sf->recode_loop = ALLOW_RECODE_KFMAXBW;
sf->adaptive_rd_thresh = 3;
@@ -148,6 +149,9 @@
}
cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
}
+ if (speed >= 6) {
+ sf->mv.reduce_first_step_size = 1;
+ }
}
static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
@@ -198,7 +202,7 @@
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
- sf->lf_motion_threshold = LOW_MOITION_THRESHOLD;
+ sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
sf->adjust_partitioning_from_last_frame = 1;
sf->last_partitioning_redo_frequency = 3;
sf->use_lp32x32fdct = 1;
@@ -269,16 +273,28 @@
// Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION.
sf->partition_search_type = SOURCE_VAR_BASED_PARTITION;
sf->search_type_check_frequency = 50;
- sf->source_var_thresh = 360;
- sf->tx_size_search_method = USE_TX_8X8;
- // TODO(yunqingwang): max_intra_bsize is used to decide if DC_PRED mode
- // is checked for a partition block. Later, we can try to allow large
- // partitions to do intra mode checking.
+ sf->tx_size_search_method = (cm->frame_type == KEY_FRAME) ?
+ USE_LARGESTALL : USE_TX_8X8;
sf->max_intra_bsize = BLOCK_8X8;
- }
+ // This feature is only enabled when partition search is disabled.
+ sf->reuse_inter_pred_sby = 1;
+
+ // Increase mode checking threshold for NEWMV.
+ sf->elevate_newmv_thresh = 2000;
+
+ sf->mv.reduce_first_step_size = 1;
+ }
if (speed >= 7) {
+ sf->use_quant_fp = cm->frame_type == KEY_FRAME ? 0 : 1;
+ sf->mv.fullpel_search_step_param = 10;
+ sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
+ sf->encode_breakout_thresh = (MIN(cm->width, cm->height) >= 720) ?
+ 800 : 300;
+ sf->elevate_newmv_thresh = 2500;
+ }
+ if (speed >= 8) {
int i;
for (i = 0; i < BLOCK_SIZES; ++i)
sf->inter_mode_mask[i] = INTER_NEAREST;
@@ -301,7 +317,7 @@
sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf);
sf->mv.reduce_first_step_size = 0;
sf->mv.auto_mv_step_size = 0;
- sf->mv.max_step_search_steps = MAX_MVSEARCH_STEPS;
+ sf->mv.fullpel_search_step_param = 6;
sf->comp_inter_joint_search_thresh = BLOCK_4X4;
sf->adaptive_rd_thresh = 0;
sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF;
@@ -309,6 +325,7 @@
sf->use_lp32x32fdct = 0;
sf->adaptive_motion_search = 0;
sf->adaptive_pred_interp_filter = 0;
+ sf->use_quant_fp = 0;
sf->reference_masking = 0;
sf->partition_search_type = SEARCH_PARTITION;
sf->less_rectangular_check = 0;
@@ -341,12 +358,13 @@
for (i = 0; i < BLOCK_SIZES; ++i)
sf->inter_mode_mask[i] = INTER_ALL;
sf->max_intra_bsize = BLOCK_64X64;
+ sf->reuse_inter_pred_sby = 0;
// This setting only takes effect when partition_search_type is set
// to FIXED_PARTITION.
sf->always_this_block_size = BLOCK_16X16;
sf->search_type_check_frequency = 50;
- sf->source_var_thresh = 100;
-
+ sf->encode_breakout_thresh = 0;
+ sf->elevate_newmv_thresh = 0;
// Recode loop tolerence %.
sf->recode_tolerance = 25;
@@ -389,4 +407,8 @@
if (!cpi->oxcf.frame_periodic_boost) {
sf->max_delta_qindex = 0;
}
+
+ if (cpi->encode_breakout && oxcf->mode == REALTIME &&
+ sf->encode_breakout_thresh > cpi->encode_breakout)
+ cpi->encode_breakout = sf->encode_breakout_thresh;
}
diff --git a/source/libvpx/vp9/encoder/vp9_speed_features.h b/source/libvpx/vp9/encoder/vp9_speed_features.h
index c796421..e6f4653 100644
--- a/source/libvpx/vp9/encoder/vp9_speed_features.h
+++ b/source/libvpx/vp9/encoder/vp9_speed_features.h
@@ -44,8 +44,8 @@
} SUBPEL_SEARCH_METHODS;
typedef enum {
- NO_MOITION_THRESHOLD = 0,
- LOW_MOITION_THRESHOLD = 7
+ NO_MOTION_THRESHOLD = 0,
+ LOW_MOTION_THRESHOLD = 7
} MOTION_THRESHOLD;
typedef enum {
@@ -73,6 +73,8 @@
LPF_PICK_FROM_SUBIMAGE,
// Estimate the level based on quantizer and frame type
LPF_PICK_FROM_Q,
+ // Pick 0 to disable LPF if LPF was enabled last frame
+ LPF_PICK_MINIMAL_LPF
} LPF_PICK_METHOD;
typedef enum {
@@ -137,10 +139,6 @@
// Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
SEARCH_METHODS search_method;
- // This parameter controls the number of steps we'll do in a diamond
- // search.
- int max_step_search_steps;
-
// This parameter controls which step in the n-step process we start at.
// It's changed adaptively based on circumstances.
int reduce_first_step_size;
@@ -160,6 +158,9 @@
// Control when to stop subpel search
int subpel_force_stop;
+
+ // This variable sets the step_param used in full pel motion search.
+ int fullpel_search_step_param;
} MV_SPEED_FEATURES;
typedef struct SPEED_FEATURES {
@@ -282,6 +283,9 @@
// was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected.
int adaptive_pred_interp_filter;
+ // Fast quantization process path
+ int use_quant_fp;
+
// Search through variable block partition types in non-RD mode decision
// encoding process for RTC.
int partition_check;
@@ -351,8 +355,17 @@
// FIXED_PARTITION search type should be used.
int search_type_check_frequency;
- // The threshold used in SOURCE_VAR_BASED_PARTITION search type.
- unsigned int source_var_thresh;
+ // When partition is pre-set, the inter prediction result from pick_inter_mode
+ // can be reused in final block encoding process. It is enabled only for real-
+ // time mode speed 6.
+ int reuse_inter_pred_sby;
+
+ // This variable sets the encode_breakout threshold. Currently, it is only
+ // enabled in real time mode.
+ int encode_breakout_thresh;
+
+ // In real time encoding, increase the threshold for NEWMV.
+ int elevate_newmv_thresh;
} SPEED_FEATURES;
struct VP9_COMP;
diff --git a/source/libvpx/vp9/encoder/vp9_svc_layercontext.c b/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
index 1b99575..07c17b2 100644
--- a/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
+++ b/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
@@ -12,6 +12,7 @@
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_svc_layercontext.h"
+#include "vp9/encoder/vp9_extend.h"
void vp9_init_layer_context(VP9_COMP *const cpi) {
SVC *const svc = &cpi->svc;
@@ -31,6 +32,7 @@
for (layer = 0; layer < layer_end; ++layer) {
LAYER_CONTEXT *const lc = &svc->layer_context[layer];
RATE_CONTROL *const lrc = &lc->rc;
+ int i;
lc->current_video_frame_in_layer = 0;
lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
lrc->ni_av_qi = oxcf->worst_allowed_q;
@@ -42,8 +44,10 @@
lrc->ni_frames = 0;
lrc->decimation_count = 0;
lrc->decimation_factor = 0;
- lrc->rate_correction_factor = 1.0;
- lrc->key_frame_rate_correction_factor = 1.0;
+
+ for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
+ lrc->rate_correction_factors[i] = 1.0;
+ }
if (svc->number_temporal_layers > 1) {
lc->target_bandwidth = oxcf->ts_target_bitrate[layer];
@@ -206,3 +210,101 @@
cpi->svc.spatial_layer_id > 0 &&
cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame;
}
+
+int vp9_svc_lookahead_push(const VP9_COMP *const cpi, struct lookahead_ctx *ctx,
+ YV12_BUFFER_CONFIG *src, int64_t ts_start,
+ int64_t ts_end, unsigned int flags) {
+ struct lookahead_entry *buf;
+ int i, index;
+
+ if (vp9_lookahead_push(ctx, src, ts_start, ts_end, flags))
+ return 1;
+
+ index = ctx->write_idx - 1;
+ if (index < 0)
+ index += ctx->max_sz;
+
+ buf = ctx->buf + index;
+
+ if (buf == NULL)
+ return 1;
+
+ // Store svc parameters for each layer
+ for (i = 0; i < cpi->svc.number_spatial_layers; ++i)
+ buf->svc_params[i] = cpi->svc.layer_context[i].svc_params_received;
+
+ return 0;
+}
+
+static int copy_svc_params(VP9_COMP *const cpi, struct lookahead_entry *buf) {
+ int layer_id;
+ vpx_svc_parameters_t *layer_param;
+ vpx_enc_frame_flags_t flags;
+
+ // Find the next layer to be encoded
+ for (layer_id = 0; layer_id < cpi->svc.number_spatial_layers; ++layer_id) {
+ if (buf->svc_params[layer_id].spatial_layer >=0)
+ break;
+ }
+
+ if (layer_id == cpi->svc.number_spatial_layers)
+ return 1;
+
+ layer_param = &buf->svc_params[layer_id];
+ buf->flags = flags = layer_param->flags;
+ cpi->svc.spatial_layer_id = layer_param->spatial_layer;
+ cpi->svc.temporal_layer_id = layer_param->temporal_layer;
+ cpi->lst_fb_idx = layer_param->lst_fb_idx;
+ cpi->gld_fb_idx = layer_param->gld_fb_idx;
+ cpi->alt_fb_idx = layer_param->alt_fb_idx;
+
+ if (vp9_set_size_literal(cpi, layer_param->width, layer_param->height) != 0)
+ return VPX_CODEC_INVALID_PARAM;
+
+ cpi->oxcf.worst_allowed_q =
+ vp9_quantizer_to_qindex(layer_param->max_quantizer);
+ cpi->oxcf.best_allowed_q =
+ vp9_quantizer_to_qindex(layer_param->min_quantizer);
+
+ vp9_change_config(cpi, &cpi->oxcf);
+
+ vp9_set_high_precision_mv(cpi, 1);
+
+ // Retrieve the encoding flags for each layer and apply it to encoder.
+ // It includes reference frame flags and update frame flags.
+ vp9_apply_encoding_flags(cpi, flags);
+
+ return 0;
+}
+
+struct lookahead_entry *vp9_svc_lookahead_peek(VP9_COMP *const cpi,
+ struct lookahead_ctx *ctx,
+ int index, int copy_params) {
+ struct lookahead_entry *buf = vp9_lookahead_peek(ctx, index);
+
+ if (buf != NULL && copy_params != 0) {
+ if (copy_svc_params(cpi, buf) != 0)
+ return NULL;
+ }
+ return buf;
+}
+
+struct lookahead_entry *vp9_svc_lookahead_pop(VP9_COMP *const cpi,
+ struct lookahead_ctx *ctx,
+ int drain) {
+ struct lookahead_entry *buf = NULL;
+
+ if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) {
+ buf = vp9_svc_lookahead_peek(cpi, ctx, 0, 1);
+ if (buf != NULL) {
+ // Only remove the buffer when pop the highest layer. Simply set the
+ // spatial_layer to -1 for lower layers.
+ buf->svc_params[cpi->svc.spatial_layer_id].spatial_layer = -1;
+ if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) {
+ vp9_lookahead_pop(ctx, drain);
+ }
+ }
+ }
+
+ return buf;
+}
diff --git a/source/libvpx/vp9/encoder/vp9_svc_layercontext.h b/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
index 36e2027..3ebb831 100644
--- a/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
+++ b/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
@@ -28,6 +28,7 @@
struct vpx_fixed_buf rc_twopass_stats_in;
unsigned int current_video_frame_in_layer;
int is_key_frame;
+ vpx_svc_parameters_t svc_params_received;
} LAYER_CONTEXT;
typedef struct {
@@ -74,6 +75,23 @@
// Check if current layer is key frame in spatial upper layer
int vp9_is_upper_layer_key_frame(const struct VP9_COMP *const cpi);
+// Copy the source image, flags and svc parameters into a new framebuffer
+// with the expected stride/border
+int vp9_svc_lookahead_push(const struct VP9_COMP *const cpi,
+ struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
+ int64_t ts_start, int64_t ts_end,
+ unsigned int flags);
+
+// Get the next source buffer to encode
+struct lookahead_entry *vp9_svc_lookahead_pop(struct VP9_COMP *const cpi,
+ struct lookahead_ctx *ctx,
+ int drain);
+
+// Get a future source buffer to encode
+struct lookahead_entry *vp9_svc_lookahead_peek(struct VP9_COMP *const cpi,
+ struct lookahead_ctx *ctx,
+ int index, int copy_params);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/source/libvpx/vp9/encoder/vp9_temporal_filter.c b/source/libvpx/vp9/encoder/vp9_temporal_filter.c
index 31f8c32..c090731 100644
--- a/source/libvpx/vp9/encoder/vp9_temporal_filter.c
+++ b/source/libvpx/vp9/encoder/vp9_temporal_filter.c
@@ -163,8 +163,8 @@
xd->plane[0].pre[0].buf = frame_ptr_buf;
xd->plane[0].pre[0].stride = stride;
- step_param = mv_sf->reduce_first_step_size + (cpi->oxcf.speed > 5 ? 1 : 0);
- step_param = MIN(step_param, mv_sf->max_step_search_steps - 2);
+ step_param = mv_sf->reduce_first_step_size;
+ step_param = MIN(step_param, MAX_MVSEARCH_STEPS - 2);
// Ignore mv costing by sending NULL pointer instead of cost arrays
vp9_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1,
diff --git a/source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm b/source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
index 48ccef8..2d9f2b0 100644
--- a/source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
+++ b/source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm
@@ -217,3 +217,183 @@
INIT_XMM ssse3
QUANTIZE_FN b, 7
QUANTIZE_FN b_32x32, 7
+
+%macro QUANTIZE_FP 2
+cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
+ shift, qcoeff, dqcoeff, dequant, zbin_oq, \
+ eob, scan, iscan
+ cmp dword skipm, 0
+ jne .blank
+
+ ; actual quantize loop - setup pointers, rounders, etc.
+ movifnidn coeffq, coeffmp
+ movifnidn ncoeffq, ncoeffmp
+ mov r2, dequantmp
+ movifnidn zbinq, zbinmp
+ movifnidn roundq, roundmp
+ movifnidn quantq, quantmp
+ mova m1, [roundq] ; m1 = round
+ mova m2, [quantq] ; m2 = quant
+%ifidn %1, b_32x32
+; TODO(jingning) to be continued with 32x32 quantization process
+ pcmpeqw m5, m5
+ psrlw m5, 15
+ paddw m0, m5
+ paddw m1, m5
+ psrlw m0, 1 ; m0 = (m0 + 1) / 2
+ psrlw m1, 1 ; m1 = (m1 + 1) / 2
+%endif
+ mova m3, [r2q] ; m3 = dequant
+ mov r3, qcoeffmp
+ mov r4, dqcoeffmp
+ mov r5, iscanmp
+%ifidn %1, b_32x32
+ psllw m4, 1
+%endif
+ pxor m5, m5 ; m5 = dedicated zero
+ DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, d6, eob
+ lea coeffq, [ coeffq+ncoeffq*2]
+ lea iscanq, [ iscanq+ncoeffq*2]
+ lea qcoeffq, [ qcoeffq+ncoeffq*2]
+ lea dqcoeffq, [dqcoeffq+ncoeffq*2]
+ neg ncoeffq
+
+ ; get DC and first 15 AC coeffs
+ mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
+ mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
+ pabsw m6, m9 ; m6 = abs(m9)
+ pabsw m11, m10 ; m11 = abs(m10)
+ pcmpeqw m7, m7
+
+ paddsw m6, m1 ; m6 += round
+ punpckhqdq m1, m1
+ paddsw m11, m1 ; m11 += round
+ pmulhw m8, m6, m2 ; m8 = m6*q>>16
+ punpckhqdq m2, m2
+ pmulhw m13, m11, m2 ; m13 = m11*q>>16
+ psignw m8, m9 ; m8 = reinsert sign
+ psignw m13, m10 ; m13 = reinsert sign
+ mova [qcoeffq+ncoeffq*2+ 0], m8
+ mova [qcoeffq+ncoeffq*2+16], m13
+%ifidn %1, b_32x32
+ pabsw m8, m8
+ pabsw m13, m13
+%endif
+ pmullw m8, m3 ; dqc[i] = qc[i] * q
+ punpckhqdq m3, m3
+ pmullw m13, m3 ; dqc[i] = qc[i] * q
+%ifidn %1, b_32x32
+ psrlw m8, 1
+ psrlw m13, 1
+ psignw m8, m9
+ psignw m13, m10
+%endif
+ mova [dqcoeffq+ncoeffq*2+ 0], m8
+ mova [dqcoeffq+ncoeffq*2+16], m13
+ pcmpeqw m8, m5 ; m8 = c[i] == 0
+ pcmpeqw m13, m5 ; m13 = c[i] == 0
+ mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
+ mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
+ psubw m6, m7 ; m6 = scan[i] + 1
+ psubw m11, m7 ; m11 = scan[i] + 1
+ pandn m8, m6 ; m8 = max(eob)
+ pandn m13, m11 ; m13 = max(eob)
+ pmaxsw m8, m13
+ add ncoeffq, mmsize
+ jz .accumulate_eob
+
+.ac_only_loop:
+ mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i]
+ mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i]
+ pabsw m6, m9 ; m6 = abs(m9)
+ pabsw m11, m10 ; m11 = abs(m10)
+ pcmpeqw m7, m7
+%ifidn %1, b_32x32
+ pmovmskb r6, m7
+ pmovmskb r2, m7
+ or r6, r2
+ jz .skip_iter
+%endif
+ paddsw m6, m1 ; m6 += round
+ paddsw m11, m1 ; m11 += round
+ pmulhw m14, m6, m2 ; m14 = m6*q>>16
+ pmulhw m13, m11, m2 ; m13 = m11*q>>16
+ psignw m14, m9 ; m14 = reinsert sign
+ psignw m13, m10 ; m13 = reinsert sign
+ mova [qcoeffq+ncoeffq*2+ 0], m14
+ mova [qcoeffq+ncoeffq*2+16], m13
+%ifidn %1, b_32x32
+ pabsw m14, m14
+ pabsw m13, m13
+%endif
+ pmullw m14, m3 ; dqc[i] = qc[i] * q
+ pmullw m13, m3 ; dqc[i] = qc[i] * q
+%ifidn %1, b_32x32
+ psrlw m14, 1
+ psrlw m13, 1
+ psignw m14, m9
+ psignw m13, m10
+%endif
+ mova [dqcoeffq+ncoeffq*2+ 0], m14
+ mova [dqcoeffq+ncoeffq*2+16], m13
+ pcmpeqw m14, m5 ; m14 = c[i] == 0
+ pcmpeqw m13, m5 ; m13 = c[i] == 0
+ mova m6, [ iscanq+ncoeffq*2+ 0] ; m6 = scan[i]
+ mova m11, [ iscanq+ncoeffq*2+16] ; m11 = scan[i]
+ psubw m6, m7 ; m6 = scan[i] + 1
+ psubw m11, m7 ; m11 = scan[i] + 1
+ pandn m14, m6 ; m14 = max(eob)
+ pandn m13, m11 ; m13 = max(eob)
+ pmaxsw m8, m14
+ pmaxsw m8, m13
+ add ncoeffq, mmsize
+ jl .ac_only_loop
+
+%ifidn %1, b_32x32
+ jmp .accumulate_eob
+.skip_iter:
+ mova [qcoeffq+ncoeffq*2+ 0], m5
+ mova [qcoeffq+ncoeffq*2+16], m5
+ mova [dqcoeffq+ncoeffq*2+ 0], m5
+ mova [dqcoeffq+ncoeffq*2+16], m5
+ add ncoeffq, mmsize
+ jl .ac_only_loop
+%endif
+
+.accumulate_eob:
+ ; horizontally accumulate/max eobs and write into [eob] memory pointer
+ mov r2, eobmp
+ pshufd m7, m8, 0xe
+ pmaxsw m8, m7
+ pshuflw m7, m8, 0xe
+ pmaxsw m8, m7
+ pshuflw m7, m8, 0x1
+ pmaxsw m8, m7
+ pextrw r6, m8, 0
+ mov [r2], r6
+ RET
+
+ ; skip-block, i.e. just write all zeroes
+.blank:
+ mov r0, dqcoeffmp
+ movifnidn ncoeffq, ncoeffmp
+ mov r2, qcoeffmp
+ mov r3, eobmp
+ DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob
+ lea dqcoeffq, [dqcoeffq+ncoeffq*2]
+ lea qcoeffq, [ qcoeffq+ncoeffq*2]
+ neg ncoeffq
+ pxor m7, m7
+.blank_loop:
+ mova [dqcoeffq+ncoeffq*2+ 0], m7
+ mova [dqcoeffq+ncoeffq*2+16], m7
+ mova [qcoeffq+ncoeffq*2+ 0], m7
+ mova [qcoeffq+ncoeffq*2+16], m7
+ add ncoeffq, mmsize
+ jl .blank_loop
+ mov word [eobq], 0
+ RET
+%endmacro
+
+INIT_XMM ssse3
+QUANTIZE_FP fp, 7
diff --git a/source/libvpx/vp9/vp9_cx_iface.c b/source/libvpx/vp9/vp9_cx_iface.c
index edd59ab..b150161 100644
--- a/source/libvpx/vp9/vp9_cx_iface.c
+++ b/source/libvpx/vp9/vp9_cx_iface.c
@@ -88,8 +88,8 @@
size_t pending_frame_magnitude;
vpx_image_t preview_img;
vp8_postproc_cfg_t preview_ppcfg;
- vpx_codec_pkt_list_decl(64) pkt_list;
- unsigned int fixed_kf_cntr;
+ vpx_codec_pkt_list_decl(128) pkt_list;
+ unsigned int fixed_kf_cntr;
};
static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
@@ -795,42 +795,7 @@
return VPX_CODEC_INVALID_PARAM;
}
- if (flags & (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF |
- VP8_EFLAG_NO_REF_ARF)) {
- int ref = 7;
-
- if (flags & VP8_EFLAG_NO_REF_LAST)
- ref ^= VP9_LAST_FLAG;
-
- if (flags & VP8_EFLAG_NO_REF_GF)
- ref ^= VP9_GOLD_FLAG;
-
- if (flags & VP8_EFLAG_NO_REF_ARF)
- ref ^= VP9_ALT_FLAG;
-
- vp9_use_as_reference(ctx->cpi, ref);
- }
-
- if (flags & (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
- VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_FORCE_GF |
- VP8_EFLAG_FORCE_ARF)) {
- int upd = 7;
-
- if (flags & VP8_EFLAG_NO_UPD_LAST)
- upd ^= VP9_LAST_FLAG;
-
- if (flags & VP8_EFLAG_NO_UPD_GF)
- upd ^= VP9_GOLD_FLAG;
-
- if (flags & VP8_EFLAG_NO_UPD_ARF)
- upd ^= VP9_ALT_FLAG;
-
- vp9_update_reference(ctx->cpi, upd);
- }
-
- if (flags & VP8_EFLAG_NO_UPD_ENTROPY) {
- vp9_update_entropy(ctx->cpi, 0);
- }
+ vp9_apply_encoding_flags(ctx->cpi, flags);
// Handle fixed keyframe intervals
if (ctx->cfg.kf_mode == VPX_KF_AUTO &&
@@ -843,7 +808,7 @@
// Initialize the encoder instance on the first frame.
if (res == VPX_CODEC_OK && ctx->cpi != NULL) {
- unsigned int lib_flags;
+ unsigned int lib_flags = 0;
YV12_BUFFER_CONFIG sd;
int64_t dst_time_stamp, dst_end_time_stamp;
size_t size, cx_data_sz;
@@ -853,9 +818,6 @@
if (ctx->base.init_flags & VPX_CODEC_USE_PSNR)
((VP9_COMP *)ctx->cpi)->b_calculate_psnr = 1;
- // Convert API flags to internal codec lib flags
- lib_flags = (flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
-
/* vp9 use 10,000,000 ticks/second as time stamp */
dst_time_stamp = (pts * 10000000 * ctx->cfg.g_timebase.num)
/ ctx->cfg.g_timebase.den;
@@ -865,7 +827,9 @@
if (img != NULL) {
res = image2yuvconfig(img, &sd);
- if (vp9_receive_raw_frame(ctx->cpi, lib_flags,
+ // Store the original flags in to the frame buffer. Will extract the
+ // key frame flag when we actually encode this frame.
+ if (vp9_receive_raw_frame(ctx->cpi, flags,
&sd, dst_time_stamp, dst_end_time_stamp)) {
VP9_COMP *cpi = (VP9_COMP *)ctx->cpi;
res = update_error_state(ctx, &cpi->common.error);
@@ -874,7 +838,6 @@
cx_data = ctx->cx_data;
cx_data_sz = ctx->cx_data_sz;
- lib_flags = 0;
/* Any pending invisible frames? */
if (ctx->pending_cx_data) {
@@ -902,7 +865,12 @@
VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
// Pack invisible frames with the next visible frame
- if (cpi->common.show_frame == 0) {
+ if (cpi->common.show_frame == 0
+#ifdef CONFIG_SPATIAL_SVC
+ || (cpi->use_svc && cpi->svc.number_temporal_layers == 1 &&
+ cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
+#endif
+ ) {
if (ctx->pending_cx_data == 0)
ctx->pending_cx_data = cx_data;
ctx->pending_cx_data_sz += size;
@@ -925,7 +893,12 @@
/ ctx->cfg.g_timebase.num / 10000000);
pkt.data.frame.flags = lib_flags << 16;
- if (lib_flags & FRAMEFLAGS_KEY)
+ if (lib_flags & FRAMEFLAGS_KEY
+#ifdef CONFIG_SPATIAL_SVC
+ || (cpi->use_svc && cpi->svc.number_temporal_layers == 1 &&
+ cpi->svc.layer_context[0].is_key_frame)
+#endif
+ )
pkt.data.frame.flags |= VPX_FRAME_IS_KEY;
if (cpi->common.show_frame == 0) {
@@ -1165,24 +1138,19 @@
VP9_COMP *const cpi = ctx->cpi;
vpx_svc_parameters_t *const params = va_arg(args, vpx_svc_parameters_t *);
- if (params == NULL)
+ if (params == NULL || params->spatial_layer < 0 ||
+ params->spatial_layer >= cpi->svc.number_spatial_layers)
return VPX_CODEC_INVALID_PARAM;
- cpi->svc.spatial_layer_id = params->spatial_layer;
- cpi->svc.temporal_layer_id = params->temporal_layer;
+ if (params->spatial_layer == 0) {
+ int i;
+ for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
+ cpi->svc.layer_context[i].svc_params_received.spatial_layer = -1;
+ }
+ }
- cpi->lst_fb_idx = params->lst_fb_idx;
- cpi->gld_fb_idx = params->gld_fb_idx;
- cpi->alt_fb_idx = params->alt_fb_idx;
-
- if (vp9_set_size_literal(ctx->cpi, params->width, params->height) != 0)
- return VPX_CODEC_INVALID_PARAM;
-
- ctx->cfg.rc_max_quantizer = params->max_quantizer;
- ctx->cfg.rc_min_quantizer = params->min_quantizer;
-
- set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);
- vp9_change_config(ctx->cpi, &ctx->oxcf);
+ cpi->svc.layer_context[params->spatial_layer].svc_params_received =
+ *params;
return VPX_CODEC_OK;
}
diff --git a/source/libvpx/vp9/vp9_dx_iface.c b/source/libvpx/vp9/vp9_dx_iface.c
index 52d7d9f..c3ca7ee 100644
--- a/source/libvpx/vp9/vp9_dx_iface.c
+++ b/source/libvpx/vp9/vp9_dx_iface.c
@@ -317,10 +317,17 @@
return *data;
}
-static void parse_superframe_index(const uint8_t *data, size_t data_sz,
- uint32_t sizes[8], int *count,
- vpx_decrypt_cb decrypt_cb,
- void *decrypt_state) {
+static vpx_codec_err_t parse_superframe_index(const uint8_t *data,
+ size_t data_sz,
+ uint32_t sizes[8], int *count,
+ vpx_decrypt_cb decrypt_cb,
+ void *decrypt_state) {
+ // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
+ // it is a super frame index. If the last byte of real video compression
+ // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
+ // not the associated matching marker byte at the front of the index we have
+ // an invalid bitstream and need to return an error.
+
uint8_t marker;
assert(data_sz);
@@ -332,35 +339,46 @@
const uint32_t mag = ((marker >> 3) & 0x3) + 1;
const size_t index_sz = 2 + mag * frames;
- if (data_sz >= index_sz) {
- uint8_t marker2 = read_marker(decrypt_cb, decrypt_state,
- data + data_sz - index_sz);
+ // This chunk is marked as having a superframe index but doesn't have
+ // enough data for it, thus it's an invalid superframe index.
+ if (data_sz < index_sz)
+ return VPX_CODEC_CORRUPT_FRAME;
- if (marker == marker2) {
- // Found a valid superframe index.
- uint32_t i, j;
- const uint8_t *x = &data[data_sz - index_sz + 1];
+ {
+ const uint8_t marker2 = read_marker(decrypt_cb, decrypt_state,
+ data + data_sz - index_sz);
- // Frames has a maximum of 8 and mag has a maximum of 4.
- uint8_t clear_buffer[32];
- assert(sizeof(clear_buffer) >= frames * mag);
- if (decrypt_cb) {
- decrypt_cb(decrypt_state, x, clear_buffer, frames * mag);
- x = clear_buffer;
- }
+ // This chunk is marked as having a superframe index but doesn't have
+ // the matching marker byte at the front of the index therefore it's an
+ // invalid chunk.
+ if (marker != marker2)
+ return VPX_CODEC_CORRUPT_FRAME;
+ }
- for (i = 0; i < frames; ++i) {
- uint32_t this_sz = 0;
+ {
+ // Found a valid superframe index.
+ uint32_t i, j;
+ const uint8_t *x = &data[data_sz - index_sz + 1];
- for (j = 0; j < mag; ++j)
- this_sz |= (*x++) << (j * 8);
- sizes[i] = this_sz;
- }
-
- *count = frames;
+ // Frames has a maximum of 8 and mag has a maximum of 4.
+ uint8_t clear_buffer[32];
+ assert(sizeof(clear_buffer) >= frames * mag);
+ if (decrypt_cb) {
+ decrypt_cb(decrypt_state, x, clear_buffer, frames * mag);
+ x = clear_buffer;
}
+
+ for (i = 0; i < frames; ++i) {
+ uint32_t this_sz = 0;
+
+ for (j = 0; j < mag; ++j)
+ this_sz |= (*x++) << (j * 8);
+ sizes[i] = this_sz;
+ }
+ *count = frames;
}
}
+ return VPX_CODEC_OK;
}
static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx,
@@ -375,8 +393,10 @@
if (data == NULL || data_sz == 0)
return VPX_CODEC_INVALID_PARAM;
- parse_superframe_index(data, data_sz, frame_sizes, &frame_count,
- ctx->decrypt_cb, ctx->decrypt_state);
+ res = parse_superframe_index(data, data_sz, frame_sizes, &frame_count,
+ ctx->decrypt_cb, ctx->decrypt_state);
+ if (res != VPX_CODEC_OK)
+ return res;
if (ctx->frame_parallel_decode) {
// Decode in frame parallel mode. When decoding in this mode, the frame
diff --git a/source/libvpx/vp9/vp9_iface_common.h b/source/libvpx/vp9/vp9_iface_common.h
index d60883c..b90c37b 100644
--- a/source/libvpx/vp9/vp9_iface_common.h
+++ b/source/libvpx/vp9/vp9_iface_common.h
@@ -31,6 +31,7 @@
img->fmt = VPX_IMG_FMT_I420;
bps = 12;
}
+ img->bit_depth = 8;
img->w = yv12->y_stride;
img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3);
img->d_w = yv12->y_crop_width;
diff --git a/source/libvpx/vp9/vp9cx.mk b/source/libvpx/vp9/vp9cx.mk
index 9dbb678..4d1b21c 100644
--- a/source/libvpx/vp9/vp9cx.mk
+++ b/source/libvpx/vp9/vp9cx.mk
@@ -47,6 +47,7 @@
VP9_CX_SRCS-yes += encoder/vp9_encoder.h
VP9_CX_SRCS-yes += encoder/vp9_quantize.h
VP9_CX_SRCS-yes += encoder/vp9_ratectrl.h
+VP9_CX_SRCS-yes += encoder/vp9_rd.h
VP9_CX_SRCS-yes += encoder/vp9_rdopt.h
VP9_CX_SRCS-yes += encoder/vp9_pickmode.h
VP9_CX_SRCS-yes += encoder/vp9_svc_layercontext.h
@@ -59,6 +60,7 @@
VP9_CX_SRCS-yes += encoder/vp9_picklpf.h
VP9_CX_SRCS-yes += encoder/vp9_quantize.c
VP9_CX_SRCS-yes += encoder/vp9_ratectrl.c
+VP9_CX_SRCS-yes += encoder/vp9_rd.c
VP9_CX_SRCS-yes += encoder/vp9_rdopt.c
VP9_CX_SRCS-yes += encoder/vp9_pickmode.c
VP9_CX_SRCS-yes += encoder/vp9_sad.c
@@ -105,11 +107,9 @@
ifeq ($(CONFIG_USE_X86INC),yes)
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_dct_mmx.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
-VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_error_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_variance_sse2.c
-VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance.asm
endif
@@ -124,7 +124,9 @@
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct32x32_sse2.c
-VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_dct_avx2.c
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_dct32x32_avx2.c
+VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_dct_avx2.c
+VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_error_intrin_avx2.c
+VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_avx2.c
VP9_CX_SRCS-yes := $(filter-out $(VP9_CX_SRCS_REMOVE-yes),$(VP9_CX_SRCS-yes))
diff --git a/source/libvpx/vpx/src/svc_encodeframe.c b/source/libvpx/vpx/src/svc_encodeframe.c
index 17e165b..6c15f6e 100644
--- a/source/libvpx/vpx/src/svc_encodeframe.c
+++ b/source/libvpx/vpx/src/svc_encodeframe.c
@@ -24,6 +24,7 @@
#include "vpx/svc_context.h"
#include "vpx/vp8cx.h"
#include "vpx/vpx_encoder.h"
+#include "vpx_mem/vpx_mem.h"
#ifdef __MINGW32__
#define strtok_r strtok_s
@@ -47,17 +48,22 @@
static const char *DEFAULT_QUANTIZER_VALUES = "60,53,39,33,27";
static const char *DEFAULT_SCALE_FACTORS = "4/16,5/16,7/16,11/16,16/16";
+// One encoded frame
+typedef struct FrameData {
+ void *buf; // compressed data buffer
+ size_t size; // length of compressed data
+ vpx_codec_frame_flags_t flags; /**< flags for this frame */
+ struct FrameData *next;
+} FrameData;
+
typedef struct SvcInternal {
char options[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_options
char quantizers[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_quantizers
- char quantizers_keyframe[OPTION_BUFFER_SIZE]; // set by
- // vpx_svc_set_quantizers
char scale_factors[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_scale_factors
// values extracted from option, quantizers
int scaling_factor_num[VPX_SS_MAX_LAYERS];
int scaling_factor_den[VPX_SS_MAX_LAYERS];
- int quantizer_keyframe[VPX_SS_MAX_LAYERS];
int quantizer[VPX_SS_MAX_LAYERS];
// accumulated statistics
@@ -72,15 +78,15 @@
// state variables
int encode_frame_count;
+ int frame_received;
int frame_within_gop;
vpx_enc_frame_flags_t enc_frame_flags;
int layers;
int layer;
int is_keyframe;
- size_t frame_size;
- size_t buffer_size;
- void *buffer;
+ FrameData *frame_list;
+ FrameData *frame_temp;
char *rc_stats_buf;
size_t rc_stats_buf_size;
@@ -90,128 +96,54 @@
vpx_codec_ctx_t *codec_ctx;
} SvcInternal;
-// Superframe is used to generate an index of individual frames (i.e., layers)
-struct Superframe {
- int count;
- uint32_t sizes[SUPERFRAME_SLOTS];
- uint32_t magnitude;
- uint8_t buffer[SUPERFRAME_BUFFER_SIZE];
- size_t index_size;
-};
-
-// One encoded frame layer
-struct LayerData {
- void *buf; // compressed data buffer
- size_t size; // length of compressed data
- struct LayerData *next;
-};
-
-// create LayerData from encoder output
-static struct LayerData *ld_create(void *buf, size_t size) {
- struct LayerData *const layer_data =
- (struct LayerData *)malloc(sizeof(*layer_data));
- if (layer_data == NULL) {
+// create FrameData from encoder output
+static struct FrameData *fd_create(void *buf, size_t size,
+ vpx_codec_frame_flags_t flags) {
+ struct FrameData *const frame_data =
+ (struct FrameData *)vpx_malloc(sizeof(*frame_data));
+ if (frame_data == NULL) {
return NULL;
}
- layer_data->buf = malloc(size);
- if (layer_data->buf == NULL) {
- free(layer_data);
+ frame_data->buf = vpx_malloc(size);
+ if (frame_data->buf == NULL) {
+ vpx_free(frame_data);
return NULL;
}
- memcpy(layer_data->buf, buf, size);
- layer_data->size = size;
- return layer_data;
+ vpx_memcpy(frame_data->buf, buf, size);
+ frame_data->size = size;
+ frame_data->flags = flags;
+ return frame_data;
}
-// free LayerData
-static void ld_free(struct LayerData *layer_data) {
- if (layer_data) {
- if (layer_data->buf) {
- free(layer_data->buf);
- layer_data->buf = NULL;
- }
- free(layer_data);
+// free FrameData
+static void fd_free(struct FrameData *p) {
+ if (p) {
+ if (p->buf)
+ vpx_free(p->buf);
+ vpx_free(p);
}
}
-// add layer data to list
-static void ld_list_add(struct LayerData **list, struct LayerData *layer_data) {
- struct LayerData **p = list;
+// add FrameData to list
+static void fd_list_add(struct FrameData **list, struct FrameData *layer_data) {
+ struct FrameData **p = list;
while (*p != NULL) p = &(*p)->next;
*p = layer_data;
layer_data->next = NULL;
}
-// get accumulated size of layer data
-static size_t ld_list_get_buffer_size(struct LayerData *list) {
- struct LayerData *p;
- size_t size = 0;
-
- for (p = list; p != NULL; p = p->next) {
- size += p->size;
- }
- return size;
-}
-
-// copy layer data to buffer
-static void ld_list_copy_to_buffer(struct LayerData *list, uint8_t *buffer) {
- struct LayerData *p;
-
- for (p = list; p != NULL; p = p->next) {
- buffer[0] = 1;
- memcpy(buffer, p->buf, p->size);
- buffer += p->size;
- }
-}
-
-// free layer data list
-static void ld_list_free(struct LayerData *list) {
- struct LayerData *p = list;
+// free FrameData list
+static void fd_free_list(struct FrameData *list) {
+ struct FrameData *p = list;
while (p) {
list = list->next;
- ld_free(p);
+ fd_free(p);
p = list;
}
}
-static void sf_create_index(struct Superframe *sf) {
- uint8_t marker = 0xc0;
- int i;
- uint32_t mag, mask;
- uint8_t *bufp;
-
- if (sf->count == 0 || sf->count >= 8) return;
-
- // Add the number of frames to the marker byte
- marker |= sf->count - 1;
-
- // Choose the magnitude
- for (mag = 0, mask = 0xff; mag < 4; ++mag) {
- if (sf->magnitude < mask) break;
- mask <<= 8;
- mask |= 0xff;
- }
- marker |= mag << 3;
-
- // Write the index
- sf->index_size = 2 + (mag + 1) * sf->count;
- bufp = sf->buffer;
-
- *bufp++ = marker;
- for (i = 0; i < sf->count; ++i) {
- int this_sz = sf->sizes[i];
- uint32_t j;
-
- for (j = 0; j <= mag; ++j) {
- *bufp++ = this_sz & 0xff;
- this_sz >>= 8;
- }
- }
- *bufp++ = marker;
-}
-
static SvcInternal *get_svc_internal(SvcContext *svc_ctx) {
if (svc_ctx == NULL) return NULL;
if (svc_ctx->internal == NULL) {
@@ -262,26 +194,8 @@
return retval;
}
-static vpx_codec_err_t set_option_encoding_mode(SvcContext *svc_ctx,
- const char *value_str) {
- if (strcmp(value_str, "i") == 0) {
- svc_ctx->encoding_mode = INTER_LAYER_PREDICTION_I;
- } else if (strcmp(value_str, "alt-ip") == 0) {
- svc_ctx->encoding_mode = ALT_INTER_LAYER_PREDICTION_IP;
- } else if (strcmp(value_str, "ip") == 0) {
- svc_ctx->encoding_mode = INTER_LAYER_PREDICTION_IP;
- } else if (strcmp(value_str, "gf") == 0) {
- svc_ctx->encoding_mode = USE_GOLDEN_FRAME;
- } else {
- svc_log(svc_ctx, SVC_LOG_ERROR, "invalid encoding mode: %s", value_str);
- return VPX_CODEC_INVALID_PARAM;
- }
- return VPX_CODEC_OK;
-}
-
static vpx_codec_err_t parse_quantizer_values(SvcContext *svc_ctx,
- const char *quantizer_values,
- const int is_keyframe) {
+ const char *quantizer_values) {
char *input_string;
char *token;
const char *delim = ",";
@@ -292,11 +206,6 @@
SvcInternal *const si = get_svc_internal(svc_ctx);
if (quantizer_values == NULL || strlen(quantizer_values) == 0) {
- if (is_keyframe) {
- // If there non settings for key frame, we will apply settings from
- // non key frame. So just simply return here.
- return VPX_CODEC_INVALID_PARAM;
- }
input_string = strdup(DEFAULT_QUANTIZER_VALUES);
} else {
input_string = strdup(quantizer_values);
@@ -317,12 +226,7 @@
} else {
q = 0;
}
- if (is_keyframe) {
- si->quantizer_keyframe[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers]
- = q;
- } else {
- si->quantizer[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] = q;
- }
+ si->quantizer[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] = q;
}
if (res == VPX_CODEC_OK && found != svc_ctx->spatial_layers) {
svc_log(svc_ctx, SVC_LOG_ERROR,
@@ -407,7 +311,6 @@
char *option_name;
char *option_value;
char *input_ptr;
- int is_keyframe_qaunt_set = 0;
vpx_codec_err_t res = VPX_CODEC_OK;
if (options == NULL) return VPX_CODEC_OK;
@@ -424,26 +327,14 @@
res = VPX_CODEC_INVALID_PARAM;
break;
}
- if (strcmp("encoding-mode", option_name) == 0) {
- res = set_option_encoding_mode(svc_ctx, option_value);
- if (res != VPX_CODEC_OK) break;
- } else if (strcmp("layers", option_name) == 0) {
+ if (strcmp("layers", option_name) == 0) {
svc_ctx->spatial_layers = atoi(option_value);
} else if (strcmp("scale-factors", option_name) == 0) {
res = parse_scale_factors(svc_ctx, option_value);
if (res != VPX_CODEC_OK) break;
} else if (strcmp("quantizers", option_name) == 0) {
- res = parse_quantizer_values(svc_ctx, option_value, 0);
+ res = parse_quantizer_values(svc_ctx, option_value);
if (res != VPX_CODEC_OK) break;
- if (!is_keyframe_qaunt_set) {
- SvcInternal *const si = get_svc_internal(svc_ctx);
- memcpy(get_svc_internal(svc_ctx)->quantizer_keyframe, si->quantizer,
- sizeof(si->quantizer));
- }
- } else if (strcmp("quantizers-keyframe", option_name) == 0) {
- res = parse_quantizer_values(svc_ctx, option_value, 1);
- if (res != VPX_CODEC_OK) break;
- is_keyframe_qaunt_set = 1;
} else {
svc_log(svc_ctx, SVC_LOG_ERROR, "invalid option: %s\n", option_name);
res = VPX_CODEC_INVALID_PARAM;
@@ -466,19 +357,13 @@
}
vpx_codec_err_t vpx_svc_set_quantizers(SvcContext *svc_ctx,
- const char *quantizers,
- const int is_for_keyframe) {
+ const char *quantizers) {
SvcInternal *const si = get_svc_internal(svc_ctx);
if (svc_ctx == NULL || quantizers == NULL || si == NULL) {
return VPX_CODEC_INVALID_PARAM;
}
- if (is_for_keyframe) {
- strncpy(si->quantizers_keyframe, quantizers, sizeof(si->quantizers));
- si->quantizers_keyframe[sizeof(si->quantizers_keyframe) - 1] = '\0';
- } else {
- strncpy(si->quantizers, quantizers, sizeof(si->quantizers));
- si->quantizers[sizeof(si->quantizers) - 1] = '\0';
- }
+ strncpy(si->quantizers, quantizers, sizeof(si->quantizers));
+ si->quantizers[sizeof(si->quantizers) - 1] = '\0';
return VPX_CODEC_OK;
}
@@ -525,13 +410,9 @@
return VPX_CODEC_INVALID_PARAM;
}
- res = parse_quantizer_values(svc_ctx, si->quantizers, 0);
+ res = parse_quantizer_values(svc_ctx, si->quantizers);
if (res != VPX_CODEC_OK) return res;
- res = parse_quantizer_values(svc_ctx, si->quantizers_keyframe, 1);
- if (res != VPX_CODEC_OK)
- memcpy(si->quantizer_keyframe, si->quantizer, sizeof(si->quantizer));
-
res = parse_scale_factors(svc_ctx, si->scale_factors);
if (res != VPX_CODEC_OK) return res;
@@ -574,8 +455,6 @@
// modify encoder configuration
enc_cfg->ss_number_layers = si->layers;
enc_cfg->ts_number_layers = 1; // Temporal layers not used in this encoder.
- // Lag in frames not currently supported
- enc_cfg->g_lag_in_frames = 0;
// TODO(ivanmaltz): determine if these values need to be set explicitly for
// svc, or if the normal default/override mechanism can be used
@@ -608,6 +487,34 @@
return VPX_CODEC_OK;
}
+static void accumulate_frame_size_for_each_layer(SvcInternal *const si,
+ const uint8_t *const buf,
+ const size_t size) {
+ uint8_t marker = buf[size - 1];
+ if ((marker & 0xe0) == 0xc0) {
+ const uint32_t frames = (marker & 0x7) + 1;
+ const uint32_t mag = ((marker >> 3) & 0x3) + 1;
+ const size_t index_sz = 2 + mag * frames;
+
+ uint8_t marker2 = buf[size - index_sz];
+
+ if (size >= index_sz && marker2 == marker) {
+ // found a valid superframe index
+ uint32_t i, j;
+ const uint8_t *x = &buf[size - index_sz + 1];
+
+ // frames has a maximum of 8 and mag has a maximum of 4.
+ for (i = 0; i < frames; i++) {
+ uint32_t this_sz = 0;
+
+ for (j = 0; j < mag; j++)
+ this_sz |= (*x++) << (j * 8);
+ si->bytes_sum[i] += this_sz;
+ }
+ }
+ }
+}
+
// SVC Algorithm flags - these get mapped to VP8_EFLAG_* defined in vp8cx.h
// encoder should reference the last frame
@@ -664,62 +571,14 @@
return;
}
- switch (svc_ctx->encoding_mode) {
- case ALT_INTER_LAYER_PREDICTION_IP:
- if (si->layer == 0) {
- flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
- } else if (is_keyframe) {
- if (si->layer == si->layers - 1) {
- flags = map_vp8_flags(USE_ARF | UPDATE_LAST);
- } else {
- flags = map_vp8_flags(USE_ARF | UPDATE_LAST | UPDATE_GF);
- }
- } else {
- flags = map_vp8_flags(USE_LAST | USE_ARF | UPDATE_LAST);
- }
- break;
- case INTER_LAYER_PREDICTION_I:
- if (si->layer == 0) {
- flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
- } else if (is_keyframe) {
- flags = map_vp8_flags(USE_ARF | UPDATE_LAST);
- } else {
- flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
- }
- break;
- case INTER_LAYER_PREDICTION_IP:
- if (si->layer == 0) {
- flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
- } else if (is_keyframe) {
- flags = map_vp8_flags(USE_ARF | UPDATE_LAST);
- } else {
- flags = map_vp8_flags(USE_LAST | USE_ARF | UPDATE_LAST);
- }
- break;
- case USE_GOLDEN_FRAME:
- if (2 * si->layers - SVC_REFERENCE_FRAMES <= si->layer) {
- if (si->layer == 0) {
- flags = map_vp8_flags(USE_LAST | USE_GF | UPDATE_LAST);
- } else if (is_keyframe) {
- flags = map_vp8_flags(USE_ARF | UPDATE_LAST | UPDATE_GF);
- } else {
- flags = map_vp8_flags(USE_LAST | USE_ARF | USE_GF | UPDATE_LAST);
- }
- } else {
- if (si->layer == 0) {
- flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
- } else if (is_keyframe) {
- flags = map_vp8_flags(USE_ARF | UPDATE_LAST);
- } else {
- flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
- }
- }
- break;
- default:
- svc_log(svc_ctx, SVC_LOG_ERROR, "unexpected encoding mode: %d\n",
- svc_ctx->encoding_mode);
- break;
+ if (si->layer == 0) {
+ flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
+ } else if (is_keyframe) {
+ flags = map_vp8_flags(USE_ARF | UPDATE_LAST);
+ } else {
+ flags = map_vp8_flags(USE_LAST | USE_ARF | UPDATE_LAST);
}
+
si->enc_frame_flags = flags;
}
@@ -765,13 +624,6 @@
svc_params.flags = si->enc_frame_flags;
layer = si->layer;
- if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP &&
- si->frame_within_gop == 0) {
- // layers 1 & 3 don't exist in this mode, use the higher one
- if (layer == 0 || layer == 2) {
- layer += 1;
- }
- }
if (VPX_CODEC_OK != vpx_svc_get_layer_resolution(svc_ctx, layer,
&svc_params.width,
&svc_params.height)) {
@@ -780,13 +632,8 @@
layer_index = layer + VPX_SS_MAX_LAYERS - si->layers;
if (codec_ctx->config.enc->g_pass == VPX_RC_ONE_PASS) {
- if (vpx_svc_is_keyframe(svc_ctx)) {
- svc_params.min_quantizer = si->quantizer_keyframe[layer_index];
- svc_params.max_quantizer = si->quantizer_keyframe[layer_index];
- } else {
- svc_params.min_quantizer = si->quantizer[layer_index];
- svc_params.max_quantizer = si->quantizer[layer_index];
- }
+ svc_params.min_quantizer = si->quantizer[layer_index];
+ svc_params.max_quantizer = si->quantizer[layer_index];
} else {
svc_params.min_quantizer = codec_ctx->config.enc->rc_min_quantizer;
svc_params.max_quantizer = codec_ctx->config.enc->rc_max_quantizer;
@@ -798,21 +645,8 @@
svc_params.lst_fb_idx = si->layer;
// Use buffer i-1 for layer i Alt (Inter-layer prediction)
- if (si->layer != 0) {
- const int use_higher_layer =
- svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP &&
- si->frame_within_gop == 0;
- svc_params.alt_fb_idx = use_higher_layer ? si->layer - 2 : si->layer - 1;
- }
-
- if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP) {
- svc_params.gld_fb_idx = si->layer + 1;
- } else {
- if (si->layer < 2 * si->layers - SVC_REFERENCE_FRAMES)
- svc_params.gld_fb_idx = svc_params.lst_fb_idx;
- else
- svc_params.gld_fb_idx = 2 * si->layers - 1 - si->layer;
- }
+ svc_params.alt_fb_idx = (si->layer > 0) ? si->layer - 1 : 0;
+ svc_params.gld_fb_idx = svc_params.lst_fb_idx;
svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, layer: %d, %dx%d, q: %d\n",
si->encode_frame_count, si->layer, svc_params.width,
@@ -846,15 +680,12 @@
vpx_codec_err_t res;
vpx_codec_iter_t iter;
const vpx_codec_cx_pkt_t *cx_pkt;
- struct LayerData *cx_layer_list = NULL;
- struct LayerData *layer_data;
- struct Superframe superframe;
+ int layer_for_psnr = 0;
SvcInternal *const si = get_svc_internal(svc_ctx);
if (svc_ctx == NULL || codec_ctx == NULL || si == NULL) {
return VPX_CODEC_INVALID_PARAM;
}
- memset(&superframe, 0, sizeof(superframe));
svc_log_reset(svc_ctx);
si->rc_stats_buf_used = 0;
@@ -863,7 +694,6 @@
si->frame_within_gop = 0;
}
si->is_keyframe = (si->frame_within_gop == 0);
- si->frame_size = 0;
if (rawimg != NULL) {
svc_log(svc_ctx, SVC_LOG_DEBUG,
@@ -872,124 +702,85 @@
si->frame_within_gop);
}
- // encode each layer
- for (si->layer = 0; si->layer < si->layers; ++si->layer) {
- if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP &&
- si->is_keyframe && (si->layer == 1 || si->layer == 3)) {
- svc_log(svc_ctx, SVC_LOG_DEBUG, "Skip encoding layer %d\n", si->layer);
- continue;
- }
-
- if (rawimg != NULL) {
+ if (rawimg != NULL) {
+ // encode each layer
+ for (si->layer = 0; si->layer < si->layers; ++si->layer) {
calculate_enc_frame_flags(svc_ctx);
set_svc_parameters(svc_ctx, codec_ctx);
}
+ }
- res = vpx_codec_encode(codec_ctx, rawimg, pts, (uint32_t)duration,
- si->enc_frame_flags, deadline);
- if (res != VPX_CODEC_OK) {
- return res;
- }
- // save compressed data
- iter = NULL;
- while ((cx_pkt = vpx_codec_get_cx_data(codec_ctx, &iter))) {
- switch (cx_pkt->kind) {
- case VPX_CODEC_CX_FRAME_PKT: {
- const uint32_t frame_pkt_size = (uint32_t)(cx_pkt->data.frame.sz);
- si->bytes_sum[si->layer] += frame_pkt_size;
- svc_log(svc_ctx, SVC_LOG_DEBUG,
- "SVC frame: %d, layer: %d, size: %u\n",
- si->encode_frame_count, si->layer, frame_pkt_size);
- layer_data =
- ld_create(cx_pkt->data.frame.buf, (size_t)frame_pkt_size);
- if (layer_data == NULL) {
- svc_log(svc_ctx, SVC_LOG_ERROR, "Error allocating LayerData\n");
- return VPX_CODEC_OK;
+ res = vpx_codec_encode(codec_ctx, rawimg, pts, (uint32_t)duration, 0,
+ deadline);
+ if (res != VPX_CODEC_OK) {
+ return res;
+ }
+ // save compressed data
+ iter = NULL;
+ while ((cx_pkt = vpx_codec_get_cx_data(codec_ctx, &iter))) {
+ switch (cx_pkt->kind) {
+ case VPX_CODEC_CX_FRAME_PKT: {
+ fd_list_add(&si->frame_list, fd_create(cx_pkt->data.frame.buf,
+ cx_pkt->data.frame.sz,
+ cx_pkt->data.frame.flags));
+ accumulate_frame_size_for_each_layer(si, cx_pkt->data.frame.buf,
+ cx_pkt->data.frame.sz);
+
+ svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, "
+ "pts: %d\n", si->frame_received,
+ (cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? 1 : 0,
+ (int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts);
+
+ ++si->frame_received;
+ layer_for_psnr = 0;
+ break;
+ }
+ case VPX_CODEC_PSNR_PKT: {
+ int i;
+ svc_log(svc_ctx, SVC_LOG_DEBUG,
+ "SVC frame: %d, layer: %d, PSNR(Total/Y/U/V): "
+ "%2.3f %2.3f %2.3f %2.3f \n",
+ si->frame_received, layer_for_psnr,
+ cx_pkt->data.psnr.psnr[0], cx_pkt->data.psnr.psnr[1],
+ cx_pkt->data.psnr.psnr[2], cx_pkt->data.psnr.psnr[3]);
+ svc_log(svc_ctx, SVC_LOG_DEBUG,
+ "SVC frame: %d, layer: %d, SSE(Total/Y/U/V): "
+ "%2.3f %2.3f %2.3f %2.3f \n",
+ si->frame_received, layer_for_psnr,
+ cx_pkt->data.psnr.sse[0], cx_pkt->data.psnr.sse[1],
+ cx_pkt->data.psnr.sse[2], cx_pkt->data.psnr.sse[3]);
+ for (i = 0; i < COMPONENTS; i++) {
+ si->psnr_sum[layer_for_psnr][i] += cx_pkt->data.psnr.psnr[i];
+ si->sse_sum[layer_for_psnr][i] += cx_pkt->data.psnr.sse[i];
+ }
+ ++layer_for_psnr;
+ break;
+ }
+ case VPX_CODEC_STATS_PKT: {
+ size_t new_size = si->rc_stats_buf_used +
+ cx_pkt->data.twopass_stats.sz;
+
+ if (new_size > si->rc_stats_buf_size) {
+ char *p = (char*)realloc(si->rc_stats_buf, new_size);
+ if (p == NULL) {
+ svc_log(svc_ctx, SVC_LOG_ERROR, "Error allocating stats buf\n");
+ return VPX_CODEC_MEM_ERROR;
}
- ld_list_add(&cx_layer_list, layer_data);
+ si->rc_stats_buf = p;
+ si->rc_stats_buf_size = new_size;
+ }
- // save layer size in superframe index
- superframe.sizes[superframe.count++] = frame_pkt_size;
- superframe.magnitude |= frame_pkt_size;
- break;
- }
- case VPX_CODEC_PSNR_PKT: {
- int i;
- svc_log(svc_ctx, SVC_LOG_DEBUG,
- "SVC frame: %d, layer: %d, PSNR(Total/Y/U/V): "
- "%2.3f %2.3f %2.3f %2.3f \n",
- si->encode_frame_count, si->layer,
- cx_pkt->data.psnr.psnr[0], cx_pkt->data.psnr.psnr[1],
- cx_pkt->data.psnr.psnr[2], cx_pkt->data.psnr.psnr[3]);
- svc_log(svc_ctx, SVC_LOG_DEBUG,
- "SVC frame: %d, layer: %d, SSE(Total/Y/U/V): "
- "%2.3f %2.3f %2.3f %2.3f \n",
- si->encode_frame_count, si->layer,
- cx_pkt->data.psnr.sse[0], cx_pkt->data.psnr.sse[1],
- cx_pkt->data.psnr.sse[2], cx_pkt->data.psnr.sse[3]);
- for (i = 0; i < COMPONENTS; i++) {
- si->psnr_sum[si->layer][i] += cx_pkt->data.psnr.psnr[i];
- si->sse_sum[si->layer][i] += cx_pkt->data.psnr.sse[i];
- }
- break;
- }
- case VPX_CODEC_STATS_PKT: {
- size_t new_size = si->rc_stats_buf_used +
- cx_pkt->data.twopass_stats.sz;
-
- if (new_size > si->rc_stats_buf_size) {
- char *p = (char*)realloc(si->rc_stats_buf, new_size);
- if (p == NULL) {
- svc_log(svc_ctx, SVC_LOG_ERROR, "Error allocating stats buf\n");
- break;
- }
- si->rc_stats_buf = p;
- si->rc_stats_buf_size = new_size;
- }
-
- memcpy(si->rc_stats_buf + si->rc_stats_buf_used,
- cx_pkt->data.twopass_stats.buf, cx_pkt->data.twopass_stats.sz);
- si->rc_stats_buf_used += cx_pkt->data.twopass_stats.sz;
- break;
- }
- default: {
- break;
- }
+ memcpy(si->rc_stats_buf + si->rc_stats_buf_used,
+ cx_pkt->data.twopass_stats.buf, cx_pkt->data.twopass_stats.sz);
+ si->rc_stats_buf_used += cx_pkt->data.twopass_stats.sz;
+ break;
+ }
+ default: {
+ break;
}
}
- if (rawimg == NULL) {
- break;
- }
}
- if (codec_ctx->config.enc->g_pass != VPX_RC_FIRST_PASS) {
- // add superframe index to layer data list
- sf_create_index(&superframe);
- layer_data = ld_create(superframe.buffer, superframe.index_size);
- ld_list_add(&cx_layer_list, layer_data);
- // get accumulated size of layer data
- si->frame_size = ld_list_get_buffer_size(cx_layer_list);
- if (si->frame_size > 0) {
- // all layers encoded, create single buffer with concatenated layers
- if (si->frame_size > si->buffer_size) {
- free(si->buffer);
- si->buffer = malloc(si->frame_size);
- if (si->buffer == NULL) {
- ld_list_free(cx_layer_list);
- return VPX_CODEC_MEM_ERROR;
- }
- si->buffer_size = si->frame_size;
- }
- // copy layer data into packet
- ld_list_copy_to_buffer(cx_layer_list, (uint8_t *)si->buffer);
-
- ld_list_free(cx_layer_list);
-
- svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, "
- "pts: %d\n", si->encode_frame_count, si->is_keyframe,
- (int)si->frame_size, (int)pts);
- }
- }
if (rawimg != NULL) {
++si->frame_within_gop;
++si->encode_frame_count;
@@ -1004,16 +795,27 @@
return si->message_buffer;
}
-void *vpx_svc_get_buffer(const SvcContext *svc_ctx) {
- const SvcInternal *const si = get_const_svc_internal(svc_ctx);
- if (svc_ctx == NULL || si == NULL) return NULL;
- return si->buffer;
+// We will maintain a list of output frame buffers since with lag_in_frame
+// we need to output all frame buffers at the end. vpx_svc_get_buffer() will
+// remove a frame buffer from the list the put it to a temporal pointer, which
+// will be removed at the next vpx_svc_get_buffer() or when closing encoder.
+void *vpx_svc_get_buffer(SvcContext *svc_ctx) {
+ SvcInternal *const si = get_svc_internal(svc_ctx);
+ if (svc_ctx == NULL || si == NULL || si->frame_list == NULL) return NULL;
+
+ if (si->frame_temp)
+ fd_free(si->frame_temp);
+
+ si->frame_temp = si->frame_list;
+ si->frame_list = si->frame_list->next;
+
+ return si->frame_temp->buf;
}
size_t vpx_svc_get_frame_size(const SvcContext *svc_ctx) {
const SvcInternal *const si = get_const_svc_internal(svc_ctx);
- if (svc_ctx == NULL || si == NULL) return 0;
- return si->frame_size;
+ if (svc_ctx == NULL || si == NULL || si->frame_list == NULL) return 0;
+ return si->frame_list->size;
}
int vpx_svc_get_encode_frame_count(const SvcContext *svc_ctx) {
@@ -1024,8 +826,8 @@
int vpx_svc_is_keyframe(const SvcContext *svc_ctx) {
const SvcInternal *const si = get_const_svc_internal(svc_ctx);
- if (svc_ctx == NULL || si == NULL) return 0;
- return si->is_keyframe;
+ if (svc_ctx == NULL || si == NULL || si->frame_list == NULL) return 0;
+ return (si->frame_list->flags & VPX_FRAME_IS_KEY) != 0;
}
void vpx_svc_set_keyframe(SvcContext *svc_ctx) {
@@ -1041,7 +843,7 @@
// dump accumulated statistics and reset accumulated values
const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) {
- int number_of_frames, number_of_keyframes, encode_frame_count;
+ int number_of_frames, encode_frame_count;
int i, j;
uint32_t bytes_total = 0;
double scale[COMPONENTS];
@@ -1058,14 +860,9 @@
if (si->encode_frame_count <= 0) return vpx_svc_get_message(svc_ctx);
svc_log(svc_ctx, SVC_LOG_INFO, "\n");
- number_of_keyframes = encode_frame_count / si->kf_dist + 1;
for (i = 0; i < si->layers; ++i) {
number_of_frames = encode_frame_count;
- if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP &&
- (i == 1 || i == 3)) {
- number_of_frames -= number_of_keyframes;
- }
svc_log(svc_ctx, SVC_LOG_INFO,
"Layer %d Average PSNR=[%2.3f, %2.3f, %2.3f, %2.3f], Bytes=[%u]\n",
i, (double)si->psnr_sum[i][0] / number_of_frames,
@@ -1112,7 +909,8 @@
// SvcInternal if it was not already allocated
si = (SvcInternal *)svc_ctx->internal;
if (si != NULL) {
- free(si->buffer);
+ fd_free(si->frame_temp);
+ fd_free_list(si->frame_list);
if (si->rc_stats_buf) {
free(si->rc_stats_buf);
}
diff --git a/source/libvpx/vpx/src/vpx_image.c b/source/libvpx/vpx/src/vpx_image.c
index 36eda95..dc8fcbc 100644
--- a/source/libvpx/vpx/src/vpx_image.c
+++ b/source/libvpx/vpx/src/vpx_image.c
@@ -40,13 +40,13 @@
}
}
-static vpx_image_t *img_alloc_helper(vpx_image_t *img,
- vpx_img_fmt_t fmt,
- unsigned int d_w,
- unsigned int d_h,
- unsigned int buf_align,
- unsigned int stride_align,
- unsigned char *img_data) {
+static vpx_image_t *img_alloc_helper(vpx_image_t *img,
+ vpx_img_fmt_t fmt,
+ unsigned int d_w,
+ unsigned int d_h,
+ unsigned int buf_align,
+ unsigned int stride_align,
+ unsigned char *img_data) {
unsigned int h, w, s, xcs, ycs, bps;
int align;
@@ -94,6 +94,21 @@
case VPX_IMG_FMT_VPXYV12:
bps = 12;
break;
+ case VPX_IMG_FMT_I422:
+ bps = 16;
+ break;
+ case VPX_IMG_FMT_I444:
+ bps = 24;
+ break;
+ case VPX_IMG_FMT_I42016:
+ bps = 24;
+ break;
+ case VPX_IMG_FMT_I42216:
+ bps = 32;
+ break;
+ case VPX_IMG_FMT_I44416:
+ bps = 48;
+ break;
default:
bps = 16;
break;
@@ -105,6 +120,9 @@
case VPX_IMG_FMT_YV12:
case VPX_IMG_FMT_VPXI420:
case VPX_IMG_FMT_VPXYV12:
+ case VPX_IMG_FMT_I422:
+ case VPX_IMG_FMT_I42016:
+ case VPX_IMG_FMT_I42216:
xcs = 1;
break;
default:
@@ -156,6 +174,7 @@
goto fail;
img->fmt = fmt;
+ img->bit_depth = (fmt & VPX_IMG_FMT_HIGH) ? 16 : 8;
img->w = w;
img->h = h;
img->x_chroma_shift = xcs;
diff --git a/source/libvpx/vpx/svc_context.h b/source/libvpx/vpx/svc_context.h
index 5d0fbbd..e0de263 100644
--- a/source/libvpx/vpx/svc_context.h
+++ b/source/libvpx/vpx/svc_context.h
@@ -23,13 +23,6 @@
extern "C" {
#endif
-typedef enum SVC_ENCODING_MODE {
- INTER_LAYER_PREDICTION_I,
- ALT_INTER_LAYER_PREDICTION_IP,
- INTER_LAYER_PREDICTION_IP,
- USE_GOLDEN_FRAME
-} SVC_ENCODING_MODE;
-
typedef enum SVC_LOG_LEVEL {
SVC_LOG_ERROR,
SVC_LOG_INFO,
@@ -39,7 +32,6 @@
typedef struct {
// public interface to svc_command options
int spatial_layers; // number of layers
- SVC_ENCODING_MODE encoding_mode; // svc encoding strategy
SVC_LOG_LEVEL log_level; // amount of information to display
int log_print; // when set, printf log messages instead of returning the
// message with svc_get_message
@@ -64,8 +56,7 @@
* e.g., "60,53,39,33,27"
*/
vpx_codec_err_t vpx_svc_set_quantizers(SvcContext *svc_ctx,
- const char *quantizer_values,
- const int is_for_keyframe);
+ const char *quantizer_values);
/**
* Set SVC scale factors
@@ -104,14 +95,16 @@
const char *vpx_svc_get_message(const SvcContext *svc_ctx);
/**
- * return size of encoded data to be returned by vpx_svc_get_buffer
+ * return size of encoded data to be returned by vpx_svc_get_buffer.
+ * it needs to be called before vpx_svc_get_buffer.
*/
size_t vpx_svc_get_frame_size(const SvcContext *svc_ctx);
/**
- * return buffer with encoded data
+ * return buffer with encoded data. encoder will maintain a list of frame
+ * buffers. each call of vpx_svc_get_buffer() will return one frame.
*/
-void *vpx_svc_get_buffer(const SvcContext *svc_ctx);
+void *vpx_svc_get_buffer(SvcContext *svc_ctx);
/**
* return size of two pass rate control stats data to be returned by
diff --git a/source/libvpx/vpx/vpx_codec.h b/source/libvpx/vpx/vpx_codec.h
index 03d2dec..45e7023 100644
--- a/source/libvpx/vpx/vpx_codec.h
+++ b/source/libvpx/vpx/vpx_codec.h
@@ -212,6 +212,15 @@
vpx_codec_priv_t *priv; /**< Algorithm private storage */
} vpx_codec_ctx_t;
+ /*!\brief Bit depth for codec
+ * *
+ * This enumeration determines the bit depth of the codec.
+ */
+ typedef enum vpx_bit_depth {
+ VPX_BITS_8, /**< 8 bits */
+ VPX_BITS_10, /**< 10 bits */
+ VPX_BITS_12 /**< 12 bits */
+ } vpx_bit_depth_t;
/*
* Library Version Number Interface
diff --git a/source/libvpx/vpx/vpx_image.h b/source/libvpx/vpx/vpx_image.h
index d45b003..7b04b70 100644
--- a/source/libvpx/vpx/vpx_image.h
+++ b/source/libvpx/vpx/vpx_image.h
@@ -103,8 +103,9 @@
vpx_img_fmt_t fmt; /**< Image Format */
/* Image storage dimensions */
- unsigned int w; /**< Stored image width */
- unsigned int h; /**< Stored image height */
+ unsigned int w; /**< Stored image width */
+ unsigned int h; /**< Stored image height */
+ unsigned int bit_depth; /**< Stored image bit-depth */
/* Image display dimensions */
unsigned int d_w; /**< Displayed image width */
diff --git a/source/libvpx/vpxdec.c b/source/libvpx/vpxdec.c
index 127e65f..1213ab6 100644
--- a/source/libvpx/vpxdec.c
+++ b/source/libvpx/vpxdec.c
@@ -77,6 +77,8 @@
"Enable decoder error-concealment");
static const arg_def_t scalearg = ARG_DEF("S", "scale", 0,
"Scale output frames uniformly");
+static const arg_def_t continuearg =
+ ARG_DEF("k", "keep-going", 0, "(debug) Continue decoding after error");
static const arg_def_t fb_arg =
ARG_DEF(NULL, "frame-buffers", 1, "Number of frame buffers to use");
@@ -88,8 +90,7 @@
&codecarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg,
&progressarg, &limitarg, &skiparg, &postprocarg, &summaryarg, &outputfile,
&threadsarg, &verbosearg, &scalearg, &fb_arg,
- &md5arg,
- &error_concealment,
+ &md5arg, &error_concealment, &continuearg,
NULL
};
@@ -497,6 +498,7 @@
int stop_after = 0, postproc = 0, summary = 0, quiet = 1;
int arg_skip = 0;
int ec_enabled = 0;
+ int keep_going = 0;
const VpxInterface *interface = NULL;
const VpxInterface *fourcc_interface = NULL;
uint64_t dx_time = 0;
@@ -632,6 +634,8 @@
}
} else if (arg_match(&arg, &error_concealment, argi)) {
ec_enabled = 1;
+ } else if (arg_match(&arg, &continuearg, argi)) {
+ keep_going = 1;
}
#endif
@@ -814,7 +818,8 @@
if (detail)
warn("Additional information: %s", detail);
- goto fail;
+ if (!keep_going)
+ goto fail;
}
vpx_usec_timer_mark(&timer);
@@ -895,7 +900,8 @@
len = y4m_write_file_header(buf, sizeof(buf),
vpx_input_ctx.width,
vpx_input_ctx.height,
- &vpx_input_ctx.framerate, img->fmt);
+ &vpx_input_ctx.framerate,
+ img->fmt, 8);
if (do_md5) {
MD5Update(&md5_ctx, (md5byte *)buf, (unsigned int)len);
} else {
diff --git a/source/libvpx/vpxenc.c b/source/libvpx/vpxenc.c
index d46a83e..fce6807 100644
--- a/source/libvpx/vpxenc.c
+++ b/source/libvpx/vpxenc.c
@@ -756,6 +756,7 @@
input->framerate.numerator = input->y4m.fps_n;
input->framerate.denominator = input->y4m.fps_d;
input->fmt = input->y4m.vpx_fmt;
+ input->bit_depth = input->y4m.bit_depth;
} else
fatal("Unsupported Y4M stream.");
} else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
@@ -1533,6 +1534,7 @@
input.framerate.numerator = 30;
input.framerate.denominator = 1;
input.only_i420 = 1;
+ input.bit_depth = 0;
/* First parse the global configuration values, because we want to apply
* other parameters on top of the default configuration provided by the
diff --git a/source/libvpx/y4menc.c b/source/libvpx/y4menc.c
index 8b1c95e..9211452 100644
--- a/source/libvpx/y4menc.c
+++ b/source/libvpx/y4menc.c
@@ -8,16 +8,48 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <assert.h>
#include "./y4menc.h"
int y4m_write_file_header(char *buf, size_t len, int width, int height,
const struct VpxRational *framerate,
- vpx_img_fmt_t fmt) {
- const char *const color = fmt == VPX_IMG_FMT_444A ? "C444alpha\n" :
- fmt == VPX_IMG_FMT_I444 ? "C444\n" :
- fmt == VPX_IMG_FMT_I422 ? "C422\n" :
- "C420jpeg\n";
-
+ vpx_img_fmt_t fmt, unsigned int bit_depth) {
+ const char *color;
+ switch (bit_depth) {
+ case 8:
+ color = fmt == VPX_IMG_FMT_444A ? "C444alpha\n" :
+ fmt == VPX_IMG_FMT_I444 ? "C444\n" :
+ fmt == VPX_IMG_FMT_I422 ? "C422\n" :
+ "C420jpeg\n";
+ break;
+ case 9:
+ color = fmt == VPX_IMG_FMT_I44416 ? "C444p9 XYSCSS=444P9\n" :
+ fmt == VPX_IMG_FMT_I42216 ? "C422p9 XYSCSS=422P9\n" :
+ "C420p9 XYSCSS=420P9\n";
+ break;
+ case 10:
+ color = fmt == VPX_IMG_FMT_I44416 ? "C444p10 XYSCSS=444P10\n" :
+ fmt == VPX_IMG_FMT_I42216 ? "C422p10 XYSCSS=422P10\n" :
+ "C420p10 XYSCSS=420P10\n";
+ break;
+ case 12:
+ color = fmt == VPX_IMG_FMT_I44416 ? "C444p12 XYSCSS=444P12\n" :
+ fmt == VPX_IMG_FMT_I42216 ? "C422p12 XYSCSS=422P12\n" :
+ "C420p12 XYSCSS=420P12\n";
+ break;
+ case 14:
+ color = fmt == VPX_IMG_FMT_I44416 ? "C444p14 XYSCSS=444P14\n" :
+ fmt == VPX_IMG_FMT_I42216 ? "C422p14 XYSCSS=422P14\n" :
+ "C420p14 XYSCSS=420P14\n";
+ break;
+ case 16:
+ color = fmt == VPX_IMG_FMT_I44416 ? "C444p16 XYSCSS=444P16\n" :
+ fmt == VPX_IMG_FMT_I42216 ? "C422p16 XYSCSS=422P16\n" :
+ "C420p16 XYSCSS=420P16\n";
+ break;
+ default:
+ assert(0);
+ }
return snprintf(buf, len, "YUV4MPEG2 W%u H%u F%u:%u I%c %s", width, height,
framerate->numerator, framerate->denominator, 'p', color);
}
diff --git a/source/libvpx/y4menc.h b/source/libvpx/y4menc.h
index 0fabf56..69d5904 100644
--- a/source/libvpx/y4menc.h
+++ b/source/libvpx/y4menc.h
@@ -23,7 +23,7 @@
int y4m_write_file_header(char *buf, size_t len, int width, int height,
const struct VpxRational *framerate,
- vpx_img_fmt_t fmt);
+ vpx_img_fmt_t fmt, unsigned int bit_depth);
int y4m_write_frame_header(char *buf, size_t len);
#ifdef __cplusplus
diff --git a/source/libvpx/y4minput.c b/source/libvpx/y4minput.c
index 90c5310a..b005b71 100644
--- a/source/libvpx/y4minput.c
+++ b/source/libvpx/y4minput.c
@@ -737,15 +737,52 @@
return -1;
}
_y4m->vpx_fmt = VPX_IMG_FMT_I420;
- _y4m->vpx_bps = 12;
+ _y4m->bps = 12;
+ _y4m->bit_depth = 8;
if (strcmp(_y4m->chroma_type, "420") == 0 ||
strcmp(_y4m->chroma_type, "420jpeg") == 0) {
_y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
_y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h
+ 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
- /*Natively supported: no conversion required.*/
+ /* Natively supported: no conversion required. */
_y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
_y4m->convert = y4m_convert_null;
+ } else if (strcmp(_y4m->chroma_type, "420p10") == 0) {
+ _y4m->src_c_dec_h = 2;
+ _y4m->dst_c_dec_h = 2;
+ _y4m->src_c_dec_v = 2;
+ _y4m->dst_c_dec_v = 2;
+ _y4m->dst_buf_read_sz = 2 * (_y4m->pic_w * _y4m->pic_h +
+ 2 * ((_y4m->pic_w + 1) / 2) *
+ ((_y4m->pic_h + 1) / 2));
+ /* Natively supported: no conversion required. */
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
+ _y4m->convert = y4m_convert_null;
+ _y4m->bit_depth = 10;
+ _y4m->bps = 15;
+ _y4m->vpx_fmt = VPX_IMG_FMT_I42016;
+ if (only_420) {
+ fprintf(stderr, "Unsupported conversion from 420p10 to 420jpeg\n");
+ return -1;
+ }
+ } else if (strcmp(_y4m->chroma_type, "420p12") == 0) {
+ _y4m->src_c_dec_h = 2;
+ _y4m->dst_c_dec_h = 2;
+ _y4m->src_c_dec_v = 2;
+ _y4m->dst_c_dec_v = 2;
+ _y4m->dst_buf_read_sz = 2 * (_y4m->pic_w * _y4m->pic_h +
+ 2 * ((_y4m->pic_w + 1) / 2) *
+ ((_y4m->pic_h + 1) / 2));
+ /* Natively supported: no conversion required. */
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
+ _y4m->convert = y4m_convert_null;
+ _y4m->bit_depth = 12;
+ _y4m->bps = 18;
+ _y4m->vpx_fmt = VPX_IMG_FMT_I42016;
+ if (only_420) {
+ fprintf(stderr, "Unsupported conversion from 420p12 to 420jpeg\n");
+ return -1;
+ }
} else if (strcmp(_y4m->chroma_type, "420mpeg2") == 0) {
_y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2;
_y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
@@ -786,7 +823,7 @@
_y4m->convert = y4m_convert_422_420jpeg;
} else {
_y4m->vpx_fmt = VPX_IMG_FMT_I422;
- _y4m->vpx_bps = 16;
+ _y4m->bps = 16;
_y4m->dst_c_dec_h = _y4m->src_c_dec_h;
_y4m->dst_c_dec_v = _y4m->src_c_dec_v;
_y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h
@@ -794,7 +831,39 @@
/*Natively supported: no conversion required.*/
_y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
_y4m->convert = y4m_convert_null;
- }
+ }
+ } else if (strcmp(_y4m->chroma_type, "422p10") == 0) {
+ _y4m->src_c_dec_h = 2;
+ _y4m->src_c_dec_v = 1;
+ _y4m->vpx_fmt = VPX_IMG_FMT_I42216;
+ _y4m->bps = 20;
+ _y4m->bit_depth = 10;
+ _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
+ _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
+ _y4m->dst_buf_read_sz = 2 * (_y4m->pic_w * _y4m->pic_h +
+ 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h);
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
+ _y4m->convert = y4m_convert_null;
+ if (only_420) {
+ fprintf(stderr, "Unsupported conversion from 422p10 to 420jpeg\n");
+ return -1;
+ }
+ } else if (strcmp(_y4m->chroma_type, "422p12") == 0) {
+ _y4m->src_c_dec_h = 2;
+ _y4m->src_c_dec_v = 1;
+ _y4m->vpx_fmt = VPX_IMG_FMT_I42216;
+ _y4m->bps = 24;
+ _y4m->bit_depth = 12;
+ _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
+ _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
+ _y4m->dst_buf_read_sz = 2 * (_y4m->pic_w * _y4m->pic_h +
+ 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h);
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
+ _y4m->convert = y4m_convert_null;
+ if (only_420) {
+ fprintf(stderr, "Unsupported conversion from 422p12 to 420jpeg\n");
+ return -1;
+ }
} else if (strcmp(_y4m->chroma_type, "411") == 0) {
_y4m->src_c_dec_h = 4;
_y4m->dst_c_dec_h = 2;
@@ -823,7 +892,7 @@
_y4m->convert = y4m_convert_444_420jpeg;
} else {
_y4m->vpx_fmt = VPX_IMG_FMT_I444;
- _y4m->vpx_bps = 24;
+ _y4m->bps = 24;
_y4m->dst_c_dec_h = _y4m->src_c_dec_h;
_y4m->dst_c_dec_v = _y4m->src_c_dec_v;
_y4m->dst_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;
@@ -831,6 +900,36 @@
_y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
_y4m->convert = y4m_convert_null;
}
+ } else if (strcmp(_y4m->chroma_type, "444p10") == 0) {
+ _y4m->src_c_dec_h = 1;
+ _y4m->src_c_dec_v = 1;
+ _y4m->vpx_fmt = VPX_IMG_FMT_I44416;
+ _y4m->bps = 30;
+ _y4m->bit_depth = 10;
+ _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
+ _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
+ _y4m->dst_buf_read_sz = 2 * 3 * _y4m->pic_w * _y4m->pic_h;
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
+ _y4m->convert = y4m_convert_null;
+ if (only_420) {
+ fprintf(stderr, "Unsupported conversion from 444p10 to 420jpeg\n");
+ return -1;
+ }
+ } else if (strcmp(_y4m->chroma_type, "444p12") == 0) {
+ _y4m->src_c_dec_h = 1;
+ _y4m->src_c_dec_v = 1;
+ _y4m->vpx_fmt = VPX_IMG_FMT_I44416;
+ _y4m->bps = 36;
+ _y4m->bit_depth = 12;
+ _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
+ _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
+ _y4m->dst_buf_read_sz = 2 * 3 * _y4m->pic_w * _y4m->pic_h;
+ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
+ _y4m->convert = y4m_convert_null;
+ if (only_420) {
+ fprintf(stderr, "Unsupported conversion from 444p12 to 420jpeg\n");
+ return -1;
+ }
} else if (strcmp(_y4m->chroma_type, "444alpha") == 0) {
_y4m->src_c_dec_h = 1;
_y4m->src_c_dec_v = 1;
@@ -847,7 +946,7 @@
_y4m->convert = y4m_convert_444_420jpeg;
} else {
_y4m->vpx_fmt = VPX_IMG_FMT_444A;
- _y4m->vpx_bps = 32;
+ _y4m->bps = 32;
_y4m->dst_c_dec_h = _y4m->src_c_dec_h;
_y4m->dst_c_dec_v = _y4m->src_c_dec_v;
_y4m->dst_buf_read_sz = 4 * _y4m->pic_w * _y4m->pic_h;
@@ -871,7 +970,10 @@
_y4m->dst_buf_sz = _y4m->pic_w * _y4m->pic_h
+ 2 * ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
- _y4m->dst_buf = (unsigned char *)malloc(_y4m->dst_buf_sz);
+ if (_y4m->bit_depth == 8)
+ _y4m->dst_buf = (unsigned char *)malloc(_y4m->dst_buf_sz);
+ else
+ _y4m->dst_buf = (unsigned char *)malloc(2 * _y4m->dst_buf_sz);
_y4m->aux_buf = (unsigned char *)malloc(_y4m->aux_buf_sz);
return 0;
}
@@ -887,6 +989,7 @@
int c_w;
int c_h;
int c_sz;
+ int bytes_per_sample = _y4m->bit_depth > 8 ? 2 : 1;
/*Read and skip the frame header.*/
if (!file_read(frame, 6, _fin)) return 0;
if (memcmp(frame, "FRAME", 5)) {
@@ -924,14 +1027,16 @@
_img->h = _img->d_h = _y4m->pic_h;
_img->x_chroma_shift = _y4m->dst_c_dec_h >> 1;
_img->y_chroma_shift = _y4m->dst_c_dec_v >> 1;
- _img->bps = _y4m->vpx_bps;
+ _img->bps = _y4m->bps;
/*Set up the buffer pointers.*/
- pic_sz = _y4m->pic_w * _y4m->pic_h;
+ pic_sz = _y4m->pic_w * _y4m->pic_h * bytes_per_sample;
c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
+ c_w *= bytes_per_sample;
c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
c_sz = c_w * c_h;
- _img->stride[PLANE_Y] = _img->stride[PLANE_ALPHA] = _y4m->pic_w;
+ _img->stride[PLANE_Y] = _img->stride[PLANE_ALPHA] =
+ _y4m->pic_w * bytes_per_sample;
_img->stride[PLANE_U] = _img->stride[PLANE_V] = c_w;
_img->planes[PLANE_Y] = _y4m->dst_buf;
_img->planes[PLANE_U] = _y4m->dst_buf + pic_sz;
diff --git a/source/libvpx/y4minput.h b/source/libvpx/y4minput.h
index d53eb65..356cebb 100644
--- a/source/libvpx/y4minput.h
+++ b/source/libvpx/y4minput.h
@@ -58,7 +58,8 @@
unsigned char *dst_buf;
unsigned char *aux_buf;
enum vpx_img_fmt vpx_fmt;
- int vpx_bps;
+ int bps;
+ unsigned int bit_depth;
};
int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip,