Merge third_party/libvpx from https://chromium.googlesource.com/chromium/deps/libvpx.git at 77496404dc182c2f4a5f86ebabffe1d1ceb81e7e

This commit was generated by merge_from_chromium.py.

Change-Id: I698ba69d2c79a697446f42d6ebd8d3ea8bc63783
diff --git a/README.chromium b/README.chromium
index c168758..325233a 100644
--- a/README.chromium
+++ b/README.chromium
@@ -5,9 +5,9 @@
 License File: source/libvpx/LICENSE
 Security Critical: yes
 
-Date: Thursday April 24 2014
+Date: Thursday May 1 2014
 Branch: master
-Commit: 109f58acfd8d46deec1e0bd4d0f82daa36cd6b8e
+Commit: 6653769a02870fd4f861902be8200f8dc0571bb4
 
 Description:
 Contains the sources used to compile libvpx binaries used by Google Chrome and
diff --git a/WATCHLISTS b/WATCHLISTS
new file mode 100644
index 0000000..825de44
--- /dev/null
+++ b/WATCHLISTS
@@ -0,0 +1,22 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Watchlist Rules
+# Refer: http://dev.chromium.org/developers/contributing-code/watchlists
+
+# IMPORTANT: The regular expression filepath is tested against each path using
+# re.search, so it is not usually necessary to add .*.
+
+{
+  'WATCHLIST_DEFINITIONS': {
+    'libvpx': {
+      'filepath': '.*'
+    },
+  },
+
+  'WATCHLISTS': {
+    'libvpx': ['tomfinegan@chromium.org', 'jzern@chromium.org',
+               'fgalligan@chromium.org', 'johannkoenig@chromium.org'],
+  },
+}
diff --git a/libvpx_srcs_arm_neon.gypi b/libvpx_srcs_arm_neon.gypi
index 23059c8..ba52e78 100644
--- a/libvpx_srcs_arm_neon.gypi
+++ b/libvpx_srcs_arm_neon.gypi
@@ -49,7 +49,6 @@
     '<(libvpx_source)/vp8/common/arm/neon/mbloopfilter_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/sad16_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/sad8_neon.asm',
-    '<(libvpx_source)/vp8/common/arm/neon/save_reg_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/shortidct4x4llm_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/sixtappredict16x16_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/sixtappredict4x4_neon.asm',
diff --git a/libvpx_srcs_arm_neon_cpu_detect.gypi b/libvpx_srcs_arm_neon_cpu_detect.gypi
index f339705..c5892e8 100644
--- a/libvpx_srcs_arm_neon_cpu_detect.gypi
+++ b/libvpx_srcs_arm_neon_cpu_detect.gypi
@@ -43,7 +43,6 @@
     '<(libvpx_source)/vp8/common/arm/neon/mbloopfilter_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/sad16_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/sad8_neon.asm',
-    '<(libvpx_source)/vp8/common/arm/neon/save_reg_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/shortidct4x4llm_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/sixtappredict16x16_neon.asm',
     '<(libvpx_source)/vp8/common/arm/neon/sixtappredict4x4_neon.asm',
diff --git a/libvpx_srcs_x86_64.gypi b/libvpx_srcs_x86_64.gypi
index 6809562..4cb3384 100644
--- a/libvpx_srcs_x86_64.gypi
+++ b/libvpx_srcs_x86_64.gypi
@@ -312,6 +312,7 @@
     '<(libvpx_source)/vp9/encoder/vp9_write_bit_buffer.h',
     '<(libvpx_source)/vp9/encoder/vp9_writer.c',
     '<(libvpx_source)/vp9/encoder/vp9_writer.h',
+    '<(libvpx_source)/vp9/encoder/x86/vp9_dct_ssse3.asm',
     '<(libvpx_source)/vp9/encoder/x86/vp9_error_sse2.asm',
     '<(libvpx_source)/vp9/encoder/x86/vp9_quantize_ssse3.asm',
     '<(libvpx_source)/vp9/encoder/x86/vp9_sad4d_sse2.asm',
diff --git a/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h b/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
index 7cb923b..897a0a5 100644
--- a/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
+++ b/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
@@ -23,7 +23,7 @@
 /* Encoder forward decls */
 struct macroblock;
 struct vp9_variance_vtable;
-
+struct search_site_config;
 struct mv;
 union int_mv;
 struct yv12_buffer_config;
@@ -192,7 +192,7 @@
 void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
 
-int vp9_diamond_search_sad_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 #define vp9_diamond_search_sad vp9_diamond_search_sad_c
 
 void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride);
@@ -219,7 +219,7 @@
 void vp9_fht8x8_c(const int16_t *input, int16_t *output, int stride, int tx_type);
 #define vp9_fht8x8 vp9_fht8x8_c
 
-int vp9_full_range_search_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 #define vp9_full_range_search vp9_full_range_search_c
 
 int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
@@ -583,12 +583,6 @@
 unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 #define vp9_sub_pixel_avg_variance8x8 vp9_sub_pixel_avg_variance8x8_c
 
-unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse32x32 vp9_sub_pixel_mse32x32_c
-
-unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse64x64 vp9_sub_pixel_mse64x64_c
-
 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_sub_pixel_variance16x16 vp9_sub_pixel_variance16x16_c
 
@@ -705,33 +699,6 @@
 unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance8x8 vp9_variance8x8_c
 
-unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_h vp9_variance_halfpixvar16x16_h_c
-
-unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_hv vp9_variance_halfpixvar16x16_hv_c
-
-unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_v vp9_variance_halfpixvar16x16_v_c
-
-unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_h vp9_variance_halfpixvar32x32_h_c
-
-unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_hv vp9_variance_halfpixvar32x32_hv_c
-
-unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_v vp9_variance_halfpixvar32x32_v_c
-
-unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_h vp9_variance_halfpixvar64x64_h_c
-
-unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_hv vp9_variance_halfpixvar64x64_hv_c
-
-unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c
-
 void vp9_rtcd(void);
 
 #include "vpx_config.h"
diff --git a/source/config/linux/arm-neon/vp9_rtcd.h b/source/config/linux/arm-neon/vp9_rtcd.h
index 1d91229..bbc7827 100644
--- a/source/config/linux/arm-neon/vp9_rtcd.h
+++ b/source/config/linux/arm-neon/vp9_rtcd.h
@@ -23,7 +23,7 @@
 /* Encoder forward decls */
 struct macroblock;
 struct vp9_variance_vtable;
-
+struct search_site_config;
 struct mv;
 union int_mv;
 struct yv12_buffer_config;
@@ -192,7 +192,7 @@
 void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
 
-int vp9_diamond_search_sad_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 #define vp9_diamond_search_sad vp9_diamond_search_sad_c
 
 void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride);
@@ -219,7 +219,7 @@
 void vp9_fht8x8_c(const int16_t *input, int16_t *output, int stride, int tx_type);
 #define vp9_fht8x8 vp9_fht8x8_c
 
-int vp9_full_range_search_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 #define vp9_full_range_search vp9_full_range_search_c
 
 int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
@@ -583,12 +583,6 @@
 unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 #define vp9_sub_pixel_avg_variance8x8 vp9_sub_pixel_avg_variance8x8_c
 
-unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse32x32 vp9_sub_pixel_mse32x32_c
-
-unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse64x64 vp9_sub_pixel_mse64x64_c
-
 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_sub_pixel_variance16x16 vp9_sub_pixel_variance16x16_c
 
@@ -705,33 +699,6 @@
 unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance8x8 vp9_variance8x8_c
 
-unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_h vp9_variance_halfpixvar16x16_h_c
-
-unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_hv vp9_variance_halfpixvar16x16_hv_c
-
-unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_v vp9_variance_halfpixvar16x16_v_c
-
-unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_h vp9_variance_halfpixvar32x32_h_c
-
-unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_hv vp9_variance_halfpixvar32x32_hv_c
-
-unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_v vp9_variance_halfpixvar32x32_v_c
-
-unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_h vp9_variance_halfpixvar64x64_h_c
-
-unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_hv vp9_variance_halfpixvar64x64_hv_c
-
-unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c
-
 void vp9_rtcd(void);
 
 #include "vpx_config.h"
diff --git a/source/config/linux/arm/vp9_rtcd.h b/source/config/linux/arm/vp9_rtcd.h
index 4769103..aa4aa63 100644
--- a/source/config/linux/arm/vp9_rtcd.h
+++ b/source/config/linux/arm/vp9_rtcd.h
@@ -23,7 +23,7 @@
 /* Encoder forward decls */
 struct macroblock;
 struct vp9_variance_vtable;
-
+struct search_site_config;
 struct mv;
 union int_mv;
 struct yv12_buffer_config;
@@ -184,7 +184,7 @@
 void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
 
-int vp9_diamond_search_sad_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 #define vp9_diamond_search_sad vp9_diamond_search_sad_c
 
 void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride);
@@ -211,7 +211,7 @@
 void vp9_fht8x8_c(const int16_t *input, int16_t *output, int stride, int tx_type);
 #define vp9_fht8x8 vp9_fht8x8_c
 
-int vp9_full_range_search_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 #define vp9_full_range_search vp9_full_range_search_c
 
 int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
@@ -547,12 +547,6 @@
 unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 #define vp9_sub_pixel_avg_variance8x8 vp9_sub_pixel_avg_variance8x8_c
 
-unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse32x32 vp9_sub_pixel_mse32x32_c
-
-unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse64x64 vp9_sub_pixel_mse64x64_c
-
 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_sub_pixel_variance16x16 vp9_sub_pixel_variance16x16_c
 
@@ -661,33 +655,6 @@
 unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance8x8 vp9_variance8x8_c
 
-unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_h vp9_variance_halfpixvar16x16_h_c
-
-unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_hv vp9_variance_halfpixvar16x16_hv_c
-
-unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_v vp9_variance_halfpixvar16x16_v_c
-
-unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_h vp9_variance_halfpixvar32x32_h_c
-
-unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_hv vp9_variance_halfpixvar32x32_hv_c
-
-unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_v vp9_variance_halfpixvar32x32_v_c
-
-unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_h vp9_variance_halfpixvar64x64_h_c
-
-unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_hv vp9_variance_halfpixvar64x64_hv_c
-
-unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c
-
 void vp9_rtcd(void);
 
 #include "vpx_config.h"
diff --git a/source/config/linux/generic/vp9_rtcd.h b/source/config/linux/generic/vp9_rtcd.h
index 47adbd3..e74fef5 100644
--- a/source/config/linux/generic/vp9_rtcd.h
+++ b/source/config/linux/generic/vp9_rtcd.h
@@ -23,7 +23,7 @@
 /* Encoder forward decls */
 struct macroblock;
 struct vp9_variance_vtable;
-
+struct search_site_config;
 struct mv;
 union int_mv;
 struct yv12_buffer_config;
@@ -184,7 +184,7 @@
 void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
 
-int vp9_diamond_search_sad_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 #define vp9_diamond_search_sad vp9_diamond_search_sad_c
 
 void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride);
@@ -211,7 +211,7 @@
 void vp9_fht8x8_c(const int16_t *input, int16_t *output, int stride, int tx_type);
 #define vp9_fht8x8 vp9_fht8x8_c
 
-int vp9_full_range_search_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 #define vp9_full_range_search vp9_full_range_search_c
 
 int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
@@ -547,12 +547,6 @@
 unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 #define vp9_sub_pixel_avg_variance8x8 vp9_sub_pixel_avg_variance8x8_c
 
-unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse32x32 vp9_sub_pixel_mse32x32_c
-
-unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse64x64 vp9_sub_pixel_mse64x64_c
-
 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_sub_pixel_variance16x16 vp9_sub_pixel_variance16x16_c
 
@@ -661,33 +655,6 @@
 unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance8x8 vp9_variance8x8_c
 
-unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_h vp9_variance_halfpixvar16x16_h_c
-
-unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_hv vp9_variance_halfpixvar16x16_hv_c
-
-unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_v vp9_variance_halfpixvar16x16_v_c
-
-unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_h vp9_variance_halfpixvar32x32_h_c
-
-unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_hv vp9_variance_halfpixvar32x32_hv_c
-
-unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_v vp9_variance_halfpixvar32x32_v_c
-
-unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_h vp9_variance_halfpixvar64x64_h_c
-
-unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_hv vp9_variance_halfpixvar64x64_hv_c
-
-unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c
-
 void vp9_rtcd(void);
 
 #include "vpx_config.h"
diff --git a/source/config/linux/ia32/vp9_rtcd.h b/source/config/linux/ia32/vp9_rtcd.h
index 47a9c18..8881a35 100644
--- a/source/config/linux/ia32/vp9_rtcd.h
+++ b/source/config/linux/ia32/vp9_rtcd.h
@@ -23,7 +23,7 @@
 /* Encoder forward decls */
 struct macroblock;
 struct vp9_variance_vtable;
-
+struct search_site_config;
 struct mv;
 union int_mv;
 struct yv12_buffer_config;
@@ -218,9 +218,9 @@
 void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
 
-int vp9_diamond_search_sad_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
-int vp9_diamond_search_sadx4(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
-RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_diamond_search_sadx4(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 
 void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride);
 void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride);
@@ -254,7 +254,7 @@
 void vp9_fht8x8_sse2(const int16_t *input, int16_t *output, int stride, int tx_type);
 RTCD_EXTERN void (*vp9_fht8x8)(const int16_t *input, int16_t *output, int stride, int tx_type);
 
-int vp9_full_range_search_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 #define vp9_full_range_search vp9_full_range_search_c
 
 int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
@@ -705,12 +705,6 @@
 unsigned int vp9_sub_pixel_avg_variance8x8_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance8x8)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
-unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse32x32 vp9_sub_pixel_mse32x32_c
-
-unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse64x64 vp9_sub_pixel_mse64x64_c
-
 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance16x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
@@ -872,36 +866,6 @@
 unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
-unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance_halfpixvar16x16_h_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance_halfpixvar16x16_h)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance_halfpixvar16x16_hv_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance_halfpixvar16x16_hv)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance_halfpixvar16x16_v_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance_halfpixvar16x16_v)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_h vp9_variance_halfpixvar32x32_h_c
-
-unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_hv vp9_variance_halfpixvar32x32_hv_c
-
-unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_v vp9_variance_halfpixvar32x32_v_c
-
-unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_h vp9_variance_halfpixvar64x64_h_c
-
-unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_hv vp9_variance_halfpixvar64x64_hv_c
-
-unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c
-
 void vp9_rtcd(void);
 
 #ifdef RTCD_C
@@ -1287,12 +1251,6 @@
     vp9_variance8x8 = vp9_variance8x8_c;
     if (flags & HAS_MMX) vp9_variance8x8 = vp9_variance8x8_mmx;
     if (flags & HAS_SSE2) vp9_variance8x8 = vp9_variance8x8_sse2;
-    vp9_variance_halfpixvar16x16_h = vp9_variance_halfpixvar16x16_h_c;
-    if (flags & HAS_SSE2) vp9_variance_halfpixvar16x16_h = vp9_variance_halfpixvar16x16_h_sse2;
-    vp9_variance_halfpixvar16x16_hv = vp9_variance_halfpixvar16x16_hv_c;
-    if (flags & HAS_SSE2) vp9_variance_halfpixvar16x16_hv = vp9_variance_halfpixvar16x16_hv_sse2;
-    vp9_variance_halfpixvar16x16_v = vp9_variance_halfpixvar16x16_v_c;
-    if (flags & HAS_SSE2) vp9_variance_halfpixvar16x16_v = vp9_variance_halfpixvar16x16_v_sse2;
 }
 #endif
 
diff --git a/source/config/linux/mipsel/vp9_rtcd.h b/source/config/linux/mipsel/vp9_rtcd.h
index 47adbd3..e74fef5 100644
--- a/source/config/linux/mipsel/vp9_rtcd.h
+++ b/source/config/linux/mipsel/vp9_rtcd.h
@@ -23,7 +23,7 @@
 /* Encoder forward decls */
 struct macroblock;
 struct vp9_variance_vtable;
-
+struct search_site_config;
 struct mv;
 union int_mv;
 struct yv12_buffer_config;
@@ -184,7 +184,7 @@
 void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
 
-int vp9_diamond_search_sad_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 #define vp9_diamond_search_sad vp9_diamond_search_sad_c
 
 void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride);
@@ -211,7 +211,7 @@
 void vp9_fht8x8_c(const int16_t *input, int16_t *output, int stride, int tx_type);
 #define vp9_fht8x8 vp9_fht8x8_c
 
-int vp9_full_range_search_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 #define vp9_full_range_search vp9_full_range_search_c
 
 int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
@@ -547,12 +547,6 @@
 unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 #define vp9_sub_pixel_avg_variance8x8 vp9_sub_pixel_avg_variance8x8_c
 
-unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse32x32 vp9_sub_pixel_mse32x32_c
-
-unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse64x64 vp9_sub_pixel_mse64x64_c
-
 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_sub_pixel_variance16x16 vp9_sub_pixel_variance16x16_c
 
@@ -661,33 +655,6 @@
 unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance8x8 vp9_variance8x8_c
 
-unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_h vp9_variance_halfpixvar16x16_h_c
-
-unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_hv vp9_variance_halfpixvar16x16_hv_c
-
-unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_v vp9_variance_halfpixvar16x16_v_c
-
-unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_h vp9_variance_halfpixvar32x32_h_c
-
-unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_hv vp9_variance_halfpixvar32x32_hv_c
-
-unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_v vp9_variance_halfpixvar32x32_v_c
-
-unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_h vp9_variance_halfpixvar64x64_h_c
-
-unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_hv vp9_variance_halfpixvar64x64_hv_c
-
-unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c
-
 void vp9_rtcd(void);
 
 #include "vpx_config.h"
diff --git a/source/config/linux/x64/vp9_rtcd.h b/source/config/linux/x64/vp9_rtcd.h
index 7649305..1b25e20 100644
--- a/source/config/linux/x64/vp9_rtcd.h
+++ b/source/config/linux/x64/vp9_rtcd.h
@@ -23,7 +23,7 @@
 /* Encoder forward decls */
 struct macroblock;
 struct vp9_variance_vtable;
-
+struct search_site_config;
 struct mv;
 union int_mv;
 struct yv12_buffer_config;
@@ -218,9 +218,9 @@
 void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
 
-int vp9_diamond_search_sad_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
-int vp9_diamond_search_sadx4(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
-RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_diamond_search_sadx4(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 
 void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride);
 void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride);
@@ -240,7 +240,8 @@
 
 void vp9_fdct8x8_c(const int16_t *input, int16_t *output, int stride);
 void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride);
-#define vp9_fdct8x8 vp9_fdct8x8_sse2
+void vp9_fdct8x8_ssse3(const int16_t *input, int16_t *output, int stride);
+RTCD_EXTERN void (*vp9_fdct8x8)(const int16_t *input, int16_t *output, int stride);
 
 void vp9_fht16x16_c(const int16_t *input, int16_t *output, int stride, int tx_type);
 void vp9_fht16x16_sse2(const int16_t *input, int16_t *output, int stride, int tx_type);
@@ -254,7 +255,7 @@
 void vp9_fht8x8_sse2(const int16_t *input, int16_t *output, int stride, int tx_type);
 #define vp9_fht8x8 vp9_fht8x8_sse2
 
-int vp9_full_range_search_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 #define vp9_full_range_search vp9_full_range_search_c
 
 int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
@@ -707,12 +708,6 @@
 unsigned int vp9_sub_pixel_avg_variance8x8_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance8x8)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
-unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse32x32 vp9_sub_pixel_mse32x32_c
-
-unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse64x64 vp9_sub_pixel_mse64x64_c
-
 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance16x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
@@ -875,36 +870,6 @@
 unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance8x8 vp9_variance8x8_sse2
 
-unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance_halfpixvar16x16_h_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_h vp9_variance_halfpixvar16x16_h_sse2
-
-unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance_halfpixvar16x16_hv_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_hv vp9_variance_halfpixvar16x16_hv_sse2
-
-unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance_halfpixvar16x16_v_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_v vp9_variance_halfpixvar16x16_v_sse2
-
-unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_h vp9_variance_halfpixvar32x32_h_c
-
-unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_hv vp9_variance_halfpixvar32x32_hv_c
-
-unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_v vp9_variance_halfpixvar32x32_v_c
-
-unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_h vp9_variance_halfpixvar64x64_h_c
-
-unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_hv vp9_variance_halfpixvar64x64_hv_c
-
-unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c
-
 void vp9_rtcd(void);
 
 #ifdef RTCD_C
@@ -959,6 +924,8 @@
     if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3;
     vp9_diamond_search_sad = vp9_diamond_search_sad_c;
     if (flags & HAS_SSE3) vp9_diamond_search_sad = vp9_diamond_search_sadx4;
+    vp9_fdct8x8 = vp9_fdct8x8_sse2;
+    if (flags & HAS_SSSE3) vp9_fdct8x8 = vp9_fdct8x8_ssse3;
     vp9_full_search_sad = vp9_full_search_sad_c;
     if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
diff --git a/source/config/mac/ia32/vp9_rtcd.h b/source/config/mac/ia32/vp9_rtcd.h
index de9f577..f4bf2fe 100644
--- a/source/config/mac/ia32/vp9_rtcd.h
+++ b/source/config/mac/ia32/vp9_rtcd.h
@@ -23,7 +23,7 @@
 /* Encoder forward decls */
 struct macroblock;
 struct vp9_variance_vtable;
-
+struct search_site_config;
 struct mv;
 union int_mv;
 struct yv12_buffer_config;
@@ -196,9 +196,9 @@
 void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
 
-int vp9_diamond_search_sad_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
-int vp9_diamond_search_sadx4(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
-RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_diamond_search_sadx4(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 
 void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride);
 void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride);
@@ -232,7 +232,7 @@
 void vp9_fht8x8_sse2(const int16_t *input, int16_t *output, int stride, int tx_type);
 RTCD_EXTERN void (*vp9_fht8x8)(const int16_t *input, int16_t *output, int stride, int tx_type);
 
-int vp9_full_range_search_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 #define vp9_full_range_search vp9_full_range_search_c
 
 int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
@@ -626,12 +626,6 @@
 unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 #define vp9_sub_pixel_avg_variance8x8 vp9_sub_pixel_avg_variance8x8_c
 
-unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse32x32 vp9_sub_pixel_mse32x32_c
-
-unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse64x64 vp9_sub_pixel_mse64x64_c
-
 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_sub_pixel_variance16x16 vp9_sub_pixel_variance16x16_c
 
@@ -746,33 +740,6 @@
 unsigned int vp9_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
-unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_h vp9_variance_halfpixvar16x16_h_c
-
-unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_hv vp9_variance_halfpixvar16x16_hv_c
-
-unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_v vp9_variance_halfpixvar16x16_v_c
-
-unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_h vp9_variance_halfpixvar32x32_h_c
-
-unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_hv vp9_variance_halfpixvar32x32_hv_c
-
-unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_v vp9_variance_halfpixvar32x32_v_c
-
-unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_h vp9_variance_halfpixvar64x64_h_c
-
-unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_hv vp9_variance_halfpixvar64x64_hv_c
-
-unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c
-
 void vp9_rtcd(void);
 
 #ifdef RTCD_C
diff --git a/source/config/mac/x64/vp9_rtcd.h b/source/config/mac/x64/vp9_rtcd.h
index 7649305..1b25e20 100644
--- a/source/config/mac/x64/vp9_rtcd.h
+++ b/source/config/mac/x64/vp9_rtcd.h
@@ -23,7 +23,7 @@
 /* Encoder forward decls */
 struct macroblock;
 struct vp9_variance_vtable;
-
+struct search_site_config;
 struct mv;
 union int_mv;
 struct yv12_buffer_config;
@@ -218,9 +218,9 @@
 void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
 
-int vp9_diamond_search_sad_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
-int vp9_diamond_search_sadx4(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
-RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_diamond_search_sadx4(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 
 void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride);
 void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride);
@@ -240,7 +240,8 @@
 
 void vp9_fdct8x8_c(const int16_t *input, int16_t *output, int stride);
 void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride);
-#define vp9_fdct8x8 vp9_fdct8x8_sse2
+void vp9_fdct8x8_ssse3(const int16_t *input, int16_t *output, int stride);
+RTCD_EXTERN void (*vp9_fdct8x8)(const int16_t *input, int16_t *output, int stride);
 
 void vp9_fht16x16_c(const int16_t *input, int16_t *output, int stride, int tx_type);
 void vp9_fht16x16_sse2(const int16_t *input, int16_t *output, int stride, int tx_type);
@@ -254,7 +255,7 @@
 void vp9_fht8x8_sse2(const int16_t *input, int16_t *output, int stride, int tx_type);
 #define vp9_fht8x8 vp9_fht8x8_sse2
 
-int vp9_full_range_search_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 #define vp9_full_range_search vp9_full_range_search_c
 
 int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
@@ -707,12 +708,6 @@
 unsigned int vp9_sub_pixel_avg_variance8x8_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance8x8)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
-unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse32x32 vp9_sub_pixel_mse32x32_c
-
-unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse64x64 vp9_sub_pixel_mse64x64_c
-
 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance16x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
@@ -875,36 +870,6 @@
 unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance8x8 vp9_variance8x8_sse2
 
-unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance_halfpixvar16x16_h_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_h vp9_variance_halfpixvar16x16_h_sse2
-
-unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance_halfpixvar16x16_hv_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_hv vp9_variance_halfpixvar16x16_hv_sse2
-
-unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance_halfpixvar16x16_v_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_v vp9_variance_halfpixvar16x16_v_sse2
-
-unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_h vp9_variance_halfpixvar32x32_h_c
-
-unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_hv vp9_variance_halfpixvar32x32_hv_c
-
-unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_v vp9_variance_halfpixvar32x32_v_c
-
-unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_h vp9_variance_halfpixvar64x64_h_c
-
-unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_hv vp9_variance_halfpixvar64x64_hv_c
-
-unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c
-
 void vp9_rtcd(void);
 
 #ifdef RTCD_C
@@ -959,6 +924,8 @@
     if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3;
     vp9_diamond_search_sad = vp9_diamond_search_sad_c;
     if (flags & HAS_SSE3) vp9_diamond_search_sad = vp9_diamond_search_sadx4;
+    vp9_fdct8x8 = vp9_fdct8x8_sse2;
+    if (flags & HAS_SSSE3) vp9_fdct8x8 = vp9_fdct8x8_ssse3;
     vp9_full_search_sad = vp9_full_search_sad_c;
     if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
diff --git a/source/config/nacl/vp9_rtcd.h b/source/config/nacl/vp9_rtcd.h
index 47adbd3..e74fef5 100644
--- a/source/config/nacl/vp9_rtcd.h
+++ b/source/config/nacl/vp9_rtcd.h
@@ -23,7 +23,7 @@
 /* Encoder forward decls */
 struct macroblock;
 struct vp9_variance_vtable;
-
+struct search_site_config;
 struct mv;
 union int_mv;
 struct yv12_buffer_config;
@@ -184,7 +184,7 @@
 void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
 
-int vp9_diamond_search_sad_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 #define vp9_diamond_search_sad vp9_diamond_search_sad_c
 
 void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride);
@@ -211,7 +211,7 @@
 void vp9_fht8x8_c(const int16_t *input, int16_t *output, int stride, int tx_type);
 #define vp9_fht8x8 vp9_fht8x8_c
 
-int vp9_full_range_search_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 #define vp9_full_range_search vp9_full_range_search_c
 
 int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
@@ -547,12 +547,6 @@
 unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 #define vp9_sub_pixel_avg_variance8x8 vp9_sub_pixel_avg_variance8x8_c
 
-unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse32x32 vp9_sub_pixel_mse32x32_c
-
-unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse64x64 vp9_sub_pixel_mse64x64_c
-
 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_sub_pixel_variance16x16 vp9_sub_pixel_variance16x16_c
 
@@ -661,33 +655,6 @@
 unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance8x8 vp9_variance8x8_c
 
-unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_h vp9_variance_halfpixvar16x16_h_c
-
-unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_hv vp9_variance_halfpixvar16x16_hv_c
-
-unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_v vp9_variance_halfpixvar16x16_v_c
-
-unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_h vp9_variance_halfpixvar32x32_h_c
-
-unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_hv vp9_variance_halfpixvar32x32_hv_c
-
-unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_v vp9_variance_halfpixvar32x32_v_c
-
-unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_h vp9_variance_halfpixvar64x64_h_c
-
-unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_hv vp9_variance_halfpixvar64x64_hv_c
-
-unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c
-
 void vp9_rtcd(void);
 
 #include "vpx_config.h"
diff --git a/source/config/win/ia32/vp9_rtcd.h b/source/config/win/ia32/vp9_rtcd.h
index 47a9c18..8881a35 100644
--- a/source/config/win/ia32/vp9_rtcd.h
+++ b/source/config/win/ia32/vp9_rtcd.h
@@ -23,7 +23,7 @@
 /* Encoder forward decls */
 struct macroblock;
 struct vp9_variance_vtable;
-
+struct search_site_config;
 struct mv;
 union int_mv;
 struct yv12_buffer_config;
@@ -218,9 +218,9 @@
 void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
 
-int vp9_diamond_search_sad_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
-int vp9_diamond_search_sadx4(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
-RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_diamond_search_sadx4(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 
 void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride);
 void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride);
@@ -254,7 +254,7 @@
 void vp9_fht8x8_sse2(const int16_t *input, int16_t *output, int stride, int tx_type);
 RTCD_EXTERN void (*vp9_fht8x8)(const int16_t *input, int16_t *output, int stride, int tx_type);
 
-int vp9_full_range_search_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 #define vp9_full_range_search vp9_full_range_search_c
 
 int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
@@ -705,12 +705,6 @@
 unsigned int vp9_sub_pixel_avg_variance8x8_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance8x8)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
-unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse32x32 vp9_sub_pixel_mse32x32_c
-
-unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse64x64 vp9_sub_pixel_mse64x64_c
-
 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance16x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
@@ -872,36 +866,6 @@
 unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 RTCD_EXTERN unsigned int (*vp9_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 
-unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance_halfpixvar16x16_h_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance_halfpixvar16x16_h)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance_halfpixvar16x16_hv_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance_halfpixvar16x16_hv)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance_halfpixvar16x16_v_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance_halfpixvar16x16_v)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_h vp9_variance_halfpixvar32x32_h_c
-
-unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_hv vp9_variance_halfpixvar32x32_hv_c
-
-unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_v vp9_variance_halfpixvar32x32_v_c
-
-unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_h vp9_variance_halfpixvar64x64_h_c
-
-unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_hv vp9_variance_halfpixvar64x64_hv_c
-
-unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c
-
 void vp9_rtcd(void);
 
 #ifdef RTCD_C
@@ -1287,12 +1251,6 @@
     vp9_variance8x8 = vp9_variance8x8_c;
     if (flags & HAS_MMX) vp9_variance8x8 = vp9_variance8x8_mmx;
     if (flags & HAS_SSE2) vp9_variance8x8 = vp9_variance8x8_sse2;
-    vp9_variance_halfpixvar16x16_h = vp9_variance_halfpixvar16x16_h_c;
-    if (flags & HAS_SSE2) vp9_variance_halfpixvar16x16_h = vp9_variance_halfpixvar16x16_h_sse2;
-    vp9_variance_halfpixvar16x16_hv = vp9_variance_halfpixvar16x16_hv_c;
-    if (flags & HAS_SSE2) vp9_variance_halfpixvar16x16_hv = vp9_variance_halfpixvar16x16_hv_sse2;
-    vp9_variance_halfpixvar16x16_v = vp9_variance_halfpixvar16x16_v_c;
-    if (flags & HAS_SSE2) vp9_variance_halfpixvar16x16_v = vp9_variance_halfpixvar16x16_v_sse2;
 }
 #endif
 
diff --git a/source/config/win/x64/vp9_rtcd.h b/source/config/win/x64/vp9_rtcd.h
index 7649305..1b25e20 100644
--- a/source/config/win/x64/vp9_rtcd.h
+++ b/source/config/win/x64/vp9_rtcd.h
@@ -23,7 +23,7 @@
 /* Encoder forward decls */
 struct macroblock;
 struct vp9_variance_vtable;
-
+struct search_site_config;
 struct mv;
 union int_mv;
 struct yv12_buffer_config;
@@ -218,9 +218,9 @@
 void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
 
-int vp9_diamond_search_sad_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
-int vp9_diamond_search_sadx4(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
-RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_diamond_search_sadx4(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 
 void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride);
 void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride);
@@ -240,7 +240,8 @@
 
 void vp9_fdct8x8_c(const int16_t *input, int16_t *output, int stride);
 void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride);
-#define vp9_fdct8x8 vp9_fdct8x8_sse2
+void vp9_fdct8x8_ssse3(const int16_t *input, int16_t *output, int stride);
+RTCD_EXTERN void (*vp9_fdct8x8)(const int16_t *input, int16_t *output, int stride);
 
 void vp9_fht16x16_c(const int16_t *input, int16_t *output, int stride, int tx_type);
 void vp9_fht16x16_sse2(const int16_t *input, int16_t *output, int stride, int tx_type);
@@ -254,7 +255,7 @@
 void vp9_fht8x8_sse2(const int16_t *input, int16_t *output, int stride, int tx_type);
 #define vp9_fht8x8 vp9_fht8x8_sse2
 
-int vp9_full_range_search_c(const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
+int vp9_full_range_search_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
 #define vp9_full_range_search vp9_full_range_search_c
 
 int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv);
@@ -707,12 +708,6 @@
 unsigned int vp9_sub_pixel_avg_variance8x8_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 RTCD_EXTERN unsigned int (*vp9_sub_pixel_avg_variance8x8)(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred);
 
-unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse32x32 vp9_sub_pixel_mse32x32_c
-
-unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_sub_pixel_mse64x64 vp9_sub_pixel_mse64x64_c
-
 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 unsigned int vp9_sub_pixel_variance16x16_ssse3(const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
@@ -875,36 +870,6 @@
 unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
 #define vp9_variance8x8 vp9_variance8x8_sse2
 
-unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance_halfpixvar16x16_h_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_h vp9_variance_halfpixvar16x16_h_sse2
-
-unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance_halfpixvar16x16_hv_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_hv vp9_variance_halfpixvar16x16_hv_sse2
-
-unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance_halfpixvar16x16_v_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar16x16_v vp9_variance_halfpixvar16x16_v_sse2
-
-unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_h vp9_variance_halfpixvar32x32_h_c
-
-unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_hv vp9_variance_halfpixvar32x32_hv_c
-
-unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar32x32_v vp9_variance_halfpixvar32x32_v_c
-
-unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_h vp9_variance_halfpixvar64x64_h_c
-
-unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_hv vp9_variance_halfpixvar64x64_hv_c
-
-unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance_halfpixvar64x64_v vp9_variance_halfpixvar64x64_v_c
-
 void vp9_rtcd(void);
 
 #ifdef RTCD_C
@@ -959,6 +924,8 @@
     if (flags & HAS_SSSE3) vp9_d63_predictor_8x8 = vp9_d63_predictor_8x8_ssse3;
     vp9_diamond_search_sad = vp9_diamond_search_sad_c;
     if (flags & HAS_SSE3) vp9_diamond_search_sad = vp9_diamond_search_sadx4;
+    vp9_fdct8x8 = vp9_fdct8x8_sse2;
+    if (flags & HAS_SSSE3) vp9_fdct8x8 = vp9_fdct8x8_ssse3;
     vp9_full_search_sad = vp9_full_search_sad_c;
     if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
     if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
diff --git a/source/libvpx/build/arm-msvs/obj_int_extract.bat b/source/libvpx/build/arm-msvs/obj_int_extract.bat
index 267ed61..3022fd8 100644
--- a/source/libvpx/build/arm-msvs/obj_int_extract.bat
+++ b/source/libvpx/build/arm-msvs/obj_int_extract.bat
@@ -1,18 +1,18 @@
-REM   Copyright (c) 2013 The WebM project authors. All Rights Reserved.
-REM
-REM   Use of this source code is governed by a BSD-style license
-REM   that can be found in the LICENSE file in the root of the source
-REM   tree. An additional intellectual property rights grant can be found
-REM   in the file PATENTS.  All contributing project authors may
-REM   be found in the AUTHORS file in the root of the source tree.
-echo on
-
-REM Arguments:
-REM   %1 - Relative path to the directory containing the vp8 and vpx_scale
-REM        source directories.
-REM   %2 - Path to obj_int_extract.exe.
-cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp8/encoder/vp8_asm_enc_offsets.c"
-%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
-
-cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vpx_scale/vpx_scale_asm_offsets.c"
-%2\obj_int_extract.exe rvds "vpx_scale_asm_offsets.obj" > "vpx_scale_asm_offsets.asm"
+REM   Copyright (c) 2013 The WebM project authors. All Rights Reserved.

+REM

+REM   Use of this source code is governed by a BSD-style license

+REM   that can be found in the LICENSE file in the root of the source

+REM   tree. An additional intellectual property rights grant can be found

+REM   in the file PATENTS.  All contributing project authors may

+REM   be found in the AUTHORS file in the root of the source tree.

+echo on

+

+REM Arguments:

+REM   %1 - Relative path to the directory containing the vp8 and vpx_scale

+REM        source directories.

+REM   %2 - Path to obj_int_extract.exe.

+cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp8/encoder/vp8_asm_enc_offsets.c"

+%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"

+

+cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vpx_scale/vpx_scale_asm_offsets.c"

+%2\obj_int_extract.exe rvds "vpx_scale_asm_offsets.obj" > "vpx_scale_asm_offsets.asm"

diff --git a/source/libvpx/build/make/Makefile b/source/libvpx/build/make/Makefile
index 03dacce..c4d53f1 100644
--- a/source/libvpx/build/make/Makefile
+++ b/source/libvpx/build/make/Makefile
@@ -411,6 +411,7 @@
     DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_proj.sh
     DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_sln.sh
     DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_vcxproj.sh
+    DIST-SRCS-$(CONFIG_MSVS)  += build/make/msvs_common.sh
     DIST-SRCS-$(CONFIG_MSVS)  += build/x86-msvs/obj_int_extract.bat
     DIST-SRCS-$(CONFIG_MSVS)  += build/arm-msvs/obj_int_extract.bat
     DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh
diff --git a/source/libvpx/build/make/gen_msvs_proj.sh b/source/libvpx/build/make/gen_msvs_proj.sh
index d0cbf3e..4e803b8 100755
--- a/source/libvpx/build/make/gen_msvs_proj.sh
+++ b/source/libvpx/build/make/gen_msvs_proj.sh
@@ -9,17 +9,11 @@
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
-
 self=$0
 self_basename=${self##*/}
 self_dirname=$(dirname "$0")
-EOL=$'\n'
-if [ "$(uname -o 2>/dev/null)" = "Cygwin" ] \
-   && cygpath --help >/dev/null 2>&1; then
-    FIXPATH='cygpath -m'
-else
-    FIXPATH='echo'
-fi
+
+. "$self_dirname/msvs_common.sh"|| exit 127
 
 show_help() {
     cat <<EOF
@@ -49,86 +43,6 @@
     exit 1
 }
 
-die() {
-    echo "${self_basename}: $@" >&2
-    exit 1
-}
-
-die_unknown(){
-    echo "Unknown option \"$1\"." >&2
-    echo "See ${self_basename} --help for available options." >&2
-    exit 1
-}
-
-fix_path() {
-    $FIXPATH "$1"
-}
-
-generate_uuid() {
-    local hex="0123456789ABCDEF"
-    local i
-    local uuid=""
-    local j
-    #93995380-89BD-4b04-88EB-625FBE52EBFB
-    for ((i=0; i<32; i++)); do
-        (( j = $RANDOM % 16 ))
-        uuid="${uuid}${hex:$j:1}"
-    done
-    echo "${uuid:0:8}-${uuid:8:4}-${uuid:12:4}-${uuid:16:4}-${uuid:20:12}"
-}
-
-indent1="    "
-indent=""
-indent_push() {
-    indent="${indent}${indent1}"
-}
-indent_pop() {
-    indent="${indent%${indent1}}"
-}
-
-tag_attributes() {
-    for opt in "$@"; do
-        optval="${opt#*=}"
-        [ -n "${optval}" ] ||
-            die "Missing attribute value in '$opt' while generating $tag tag"
-        echo "${indent}${opt%%=*}=\"${optval}\""
-    done
-}
-
-open_tag() {
-    local tag=$1
-    shift
-    if [ $# -ne 0 ]; then
-        echo "${indent}<${tag}"
-        indent_push
-        tag_attributes "$@"
-        echo "${indent}>"
-    else
-        echo "${indent}<${tag}>"
-        indent_push
-    fi
-}
-
-close_tag() {
-    local tag=$1
-    indent_pop
-    echo "${indent}</${tag}>"
-}
-
-tag() {
-    local tag=$1
-    shift
-    if [ $# -ne 0 ]; then
-        echo "${indent}<${tag}"
-        indent_push
-        tag_attributes "$@"
-        indent_pop
-        echo "${indent}/>"
-    else
-        echo "${indent}<${tag}/>"
-    fi
-}
-
 generate_filter() {
     local var=$1
     local name=$2
diff --git a/source/libvpx/build/make/gen_msvs_vcxproj.sh b/source/libvpx/build/make/gen_msvs_vcxproj.sh
index a64e129..9dc7906 100755
--- a/source/libvpx/build/make/gen_msvs_vcxproj.sh
+++ b/source/libvpx/build/make/gen_msvs_vcxproj.sh
@@ -9,17 +9,11 @@
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
-
 self=$0
 self_basename=${self##*/}
 self_dirname=$(dirname "$0")
-EOL=$'\n'
-if [ "$(uname -o 2>/dev/null)" = "Cygwin" ] \
-   && cygpath --help >/dev/null 2>&1; then
-    FIXPATH='cygpath -m'
-else
-    FIXPATH='echo'
-fi
+
+. "$self_dirname/msvs_common.sh"|| exit 127
 
 show_help() {
     cat <<EOF
@@ -50,86 +44,6 @@
     exit 1
 }
 
-die() {
-    echo "${self_basename}: $@" >&2
-    exit 1
-}
-
-die_unknown(){
-    echo "Unknown option \"$1\"." >&2
-    echo "See ${self_basename} --help for available options." >&2
-    exit 1
-}
-
-fix_path() {
-    $FIXPATH "$1"
-}
-
-generate_uuid() {
-    local hex="0123456789ABCDEF"
-    local i
-    local uuid=""
-    local j
-    #93995380-89BD-4b04-88EB-625FBE52EBFB
-    for ((i=0; i<32; i++)); do
-        (( j = $RANDOM % 16 ))
-        uuid="${uuid}${hex:$j:1}"
-    done
-    echo "${uuid:0:8}-${uuid:8:4}-${uuid:12:4}-${uuid:16:4}-${uuid:20:12}"
-}
-
-indent1="    "
-indent=""
-indent_push() {
-    indent="${indent}${indent1}"
-}
-indent_pop() {
-    indent="${indent%${indent1}}"
-}
-
-tag_attributes() {
-    for opt in "$@"; do
-        optval="${opt#*=}"
-        [ -n "${optval}" ] ||
-            die "Missing attribute value in '$opt' while generating $tag tag"
-        echo "${indent}${opt%%=*}=\"${optval}\""
-    done
-}
-
-open_tag() {
-    local tag=$1
-    shift
-    if [ $# -ne 0 ]; then
-        echo "${indent}<${tag}"
-        indent_push
-        tag_attributes "$@"
-        echo "${indent}>"
-    else
-        echo "${indent}<${tag}>"
-        indent_push
-    fi
-}
-
-close_tag() {
-    local tag=$1
-    indent_pop
-    echo "${indent}</${tag}>"
-}
-
-tag() {
-    local tag=$1
-    shift
-    if [ $# -ne 0 ]; then
-        echo "${indent}<${tag}"
-        indent_push
-        tag_attributes "$@"
-        indent_pop
-        echo "${indent}/>"
-    else
-        echo "${indent}<${tag}/>"
-    fi
-}
-
 tag_content() {
     local tag=$1
     local content=$2
diff --git a/source/libvpx/build/make/msvs_common.sh b/source/libvpx/build/make/msvs_common.sh
new file mode 100644
index 0000000..eb2eb7b
--- /dev/null
+++ b/source/libvpx/build/make/msvs_common.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+##
+##  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+
+if [ "$(uname -o 2>/dev/null)" = "Cygwin" ] \
+   && cygpath --help >/dev/null 2>&1; then
+    FIXPATH='cygpath -m'
+else
+    FIXPATH='echo'
+fi
+
+die() {
+    echo "${self_basename}: $@" >&2
+    exit 1
+}
+
+die_unknown(){
+    echo "Unknown option \"$1\"." >&2
+    echo "See ${self_basename} --help for available options." >&2
+    exit 1
+}
+
+fix_path() {
+    $FIXPATH "$1"
+}
+
+generate_uuid() {
+    local hex="0123456789ABCDEF"
+    local i
+    local uuid=""
+    local j
+    #93995380-89BD-4b04-88EB-625FBE52EBFB
+    for ((i=0; i<32; i++)); do
+        (( j = $RANDOM % 16 ))
+        uuid="${uuid}${hex:$j:1}"
+    done
+    echo "${uuid:0:8}-${uuid:8:4}-${uuid:12:4}-${uuid:16:4}-${uuid:20:12}"
+}
+
+indent1="    "
+indent=""
+indent_push() {
+    indent="${indent}${indent1}"
+}
+indent_pop() {
+    indent="${indent%${indent1}}"
+}
+
+tag_attributes() {
+    for opt in "$@"; do
+        optval="${opt#*=}"
+        [ -n "${optval}" ] ||
+            die "Missing attribute value in '$opt' while generating $tag tag"
+        echo "${indent}${opt%%=*}=\"${optval}\""
+    done
+}
+
+open_tag() {
+    local tag=$1
+    shift
+    if [ $# -ne 0 ]; then
+        echo "${indent}<${tag}"
+        indent_push
+        tag_attributes "$@"
+        echo "${indent}>"
+    else
+        echo "${indent}<${tag}>"
+        indent_push
+    fi
+}
+
+close_tag() {
+    local tag=$1
+    indent_pop
+    echo "${indent}</${tag}>"
+}
+
+tag() {
+    local tag=$1
+    shift
+    if [ $# -ne 0 ]; then
+        echo "${indent}<${tag}"
+        indent_push
+        tag_attributes "$@"
+        indent_pop
+        echo "${indent}/>"
+    else
+        echo "${indent}<${tag}/>"
+    fi
+}
+
diff --git a/source/libvpx/build/x86-msvs/obj_int_extract.bat b/source/libvpx/build/x86-msvs/obj_int_extract.bat
index 44d095d..0e9605e 100644
--- a/source/libvpx/build/x86-msvs/obj_int_extract.bat
+++ b/source/libvpx/build/x86-msvs/obj_int_extract.bat
@@ -1,15 +1,15 @@
-REM   Copyright (c) 2011 The WebM project authors. All Rights Reserved.
-REM
-REM   Use of this source code is governed by a BSD-style license
-REM   that can be found in the LICENSE file in the root of the source
-REM   tree. An additional intellectual property rights grant can be found
-REM   in the file PATENTS.  All contributing project authors may
-REM   be found in the AUTHORS file in the root of the source tree.
-echo on
-
-REM Arguments:
-REM   %1 - Relative path to the directory containing the vp8 source directory.
-REM   %2 - Path to obj_int_extract.exe.
-cl /I "./" /I "%1" /nologo /c "%1/vp8/encoder/vp8_asm_enc_offsets.c"
-%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
-
+REM   Copyright (c) 2011 The WebM project authors. All Rights Reserved.

+REM

+REM   Use of this source code is governed by a BSD-style license

+REM   that can be found in the LICENSE file in the root of the source

+REM   tree. An additional intellectual property rights grant can be found

+REM   in the file PATENTS.  All contributing project authors may

+REM   be found in the AUTHORS file in the root of the source tree.

+echo on

+

+REM Arguments:

+REM   %1 - Relative path to the directory containing the vp8 source directory.

+REM   %2 - Path to obj_int_extract.exe.

+cl /I "./" /I "%1" /nologo /c "%1/vp8/encoder/vp8_asm_enc_offsets.c"

+%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"

+

diff --git a/source/libvpx/examples.mk b/source/libvpx/examples.mk
index 91b9801..f6e7c00 100644
--- a/source/libvpx/examples.mk
+++ b/source/libvpx/examples.mk
@@ -25,6 +25,11 @@
                       third_party/libwebm/mkvwriter.hpp \
                       third_party/libwebm/webmids.hpp
 
+LIBWEBM_PARSER_SRCS = third_party/libwebm/mkvparser.cpp \
+                      third_party/libwebm/mkvreader.cpp \
+                      third_party/libwebm/mkvparser.hpp \
+                      third_party/libwebm/mkvreader.hpp
+
 # List of examples to build. UTILS are tools meant for distribution
 # while EXAMPLES demonstrate specific portions of the API.
 UTILS-$(CONFIG_DECODERS)    += vpxdec.c
@@ -39,14 +44,8 @@
 vpxdec.SRCS                 += y4menc.c y4menc.h
 vpxdec.SRCS                 += $(LIBYUV_SRCS)
 ifeq ($(CONFIG_WEBM_IO),yes)
-  vpxdec.SRCS                 += third_party/nestegg/halloc/halloc.h
-  vpxdec.SRCS                 += third_party/nestegg/halloc/src/align.h
-  vpxdec.SRCS                 += third_party/nestegg/halloc/src/halloc.c
-  vpxdec.SRCS                 += third_party/nestegg/halloc/src/hlist.h
-  vpxdec.SRCS                 += third_party/nestegg/halloc/src/macros.h
-  vpxdec.SRCS                 += third_party/nestegg/include/nestegg/nestegg.h
-  vpxdec.SRCS                 += third_party/nestegg/src/nestegg.c
-  vpxdec.SRCS                 += webmdec.c webmdec.h
+  vpxdec.SRCS                 += $(LIBWEBM_PARSER_SRCS)
+  vpxdec.SRCS                 += webmdec.cc webmdec.h
 endif
 vpxdec.GUID                  = BA5FE66F-38DD-E034-F542-B1578C5FB950
 vpxdec.DESCRIPTION           = Full featured decoder
diff --git a/source/libvpx/test/decode_to_md5.sh b/source/libvpx/test/decode_to_md5.sh
new file mode 100755
index 0000000..da1a870
--- /dev/null
+++ b/source/libvpx/test/decode_to_md5.sh
@@ -0,0 +1,68 @@
+#!/bin/sh
+##
+##  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+##  This file tests the libvpx decode_to_md5 example. To add new tests to this
+##  file, do the following:
+##    1. Write a shell function (this is your test).
+##    2. Add the function to decode_to_md5_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: Make sure input is available:
+#   $VP8_IVF_FILE and $VP9_IVF_FILE are required.
+decode_to_md5_verify_environment() {
+  if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_IVF_FILE}" ]; then
+    echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+# Runs decode_to_md5 on $1 and echoes the MD5 sum for the final frame. $2 is
+# interpreted as codec name and used solely to name the output file.
+decode_to_md5() {
+  local decoder="${LIBVPX_BIN_PATH}/decode_to_md5${VPX_TEST_EXE_SUFFIX}"
+  local input_file="$1"
+  local codec="$2"
+  local output_file="${VPX_TEST_OUTPUT_DIR}/decode_to_md5_${codec}"
+
+  [ -x "${decoder}" ] || return 1
+
+  "${decoder}" "${input_file}" "${output_file}" > /dev/null 2>&1
+
+  [ -e "${output_file}" ] || return 1
+
+  local md5_last_frame=$(tail -n1 "${output_file}")
+  echo "${md5_last_frame% *}" | tr -d [:space:]
+}
+
+decode_to_md5_vp8() {
+  # expected MD5 sum for the last frame.
+  local expected_md5="56794d911b02190212bca92f88ad60c6"
+
+  if [ "$(vp8_decode_available)" = "yes" ]; then
+    local actual_md5="$(decode_to_md5 "${VP8_IVF_FILE}" vp8)" || return 1
+    [ "${actual_md5}" = "${expected_md5}" ] || return 1
+  fi
+}
+
+decode_to_md5_vp9() {
+  # expected MD5 sum for the last frame.
+  local expected_md5="2952c0eae93f3dadd1aa84c50d3fd6d2"
+
+  if [ "$(vp9_decode_available)" = "yes" ]; then
+    local actual_md5="$(decode_to_md5 "${VP9_IVF_FILE}" vp9)" || return 1
+    [ "${actual_md5}" = "${expected_md5}" ] || return 1
+  fi
+}
+
+decode_to_md5_tests="decode_to_md5_vp8
+                     decode_to_md5_vp9"
+
+run_tests decode_to_md5_verify_environment "${decode_to_md5_tests}"
diff --git a/source/libvpx/test/decode_with_drops.sh b/source/libvpx/test/decode_with_drops.sh
new file mode 100755
index 0000000..d0321bf
--- /dev/null
+++ b/source/libvpx/test/decode_with_drops.sh
@@ -0,0 +1,75 @@
+#!/bin/sh
+##
+##  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+##  This file tests the libvpx decode_with_drops example. To add new tests to
+##  this file, do the following:
+##    1. Write a shell function (this is your test).
+##    2. Add the function to decode_with_drops_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: Make sure input is available:
+#   $VP8_IVF_FILE and $VP9_IVF_FILE are required.
+decode_with_drops_verify_environment() {
+  if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_IVF_FILE}" ]; then
+    echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+# Runs decode_with_drops on $1, $2 is interpreted as codec name and used solely
+# to name the output file. $3 is the drop mode, and is passed directly to
+# decode_with_drops.
+decode_with_drops() {
+  local decoder="${LIBVPX_BIN_PATH}/decode_with_drops${VPX_TEST_EXE_SUFFIX}"
+  local input_file="$1"
+  local codec="$2"
+  local output_file="${VPX_TEST_OUTPUT_DIR}/decode_with_drops_${codec}"
+  local drop_mode="$3"
+
+  [ -x "${decoder}" ] || return 1
+
+  "${decoder}" "${input_file}" "${output_file}" "${drop_mode}" > /dev/null 2>&1
+
+  [ -e "${output_file}" ] || return 1
+}
+
+# Decodes $VP8_IVF_FILE while dropping frames, twice: once in sequence mode,
+# and once in pattern mode.
+# Note: This test assumes that $VP8_IVF_FILE has exactly 29 frames, and could
+# break if the file is modified.
+decode_with_drops_vp8() {
+  if [ "$(vp8_decode_available)" = "yes" ]; then
+    # Test sequence mode: Drop frames 2-28.
+    decode_with_drops "${VP8_IVF_FILE}" "vp8" "2-28"
+
+    # Test pattern mode: Drop 3 of every 4 frames.
+    decode_with_drops "${VP8_IVF_FILE}" "vp8" "3/4"
+  fi
+}
+
+# Decodes $VP9_IVF_FILE while dropping frames, twice: once in sequence mode,
+# and once in pattern mode.
+# Note: This test assumes that $VP9_IVF_FILE has exactly 20 frames, and could
+# break if the file is modified.
+decode_with_drops_vp9() {
+  if [ "$(vp9_decode_available)" = "yes" ]; then
+    # Test sequence mode: Drop frames 2-28.
+    decode_with_drops "${VP9_IVF_FILE}" "vp9" "2-19"
+
+    # Test pattern mode: Drop 3 of every 4 frames.
+    decode_with_drops "${VP9_IVF_FILE}" "vp9" "3/4"
+  fi
+}
+
+decode_with_drops_tests="decode_with_drops_vp8
+                         decode_with_drops_vp9"
+
+run_tests decode_with_drops_verify_environment "${decode_with_drops_tests}"
diff --git a/source/libvpx/test/examples.sh b/source/libvpx/test/examples.sh
new file mode 100755
index 0000000..ac2a18c
--- /dev/null
+++ b/source/libvpx/test/examples.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+##
+##  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+##  This file runs all of the tests for the libvpx examples.
+##
+. $(dirname $0)/tools_common.sh
+
+example_tests=$(ls $(dirname $0)/*.sh)
+
+# List of script names to exclude.
+exclude_list="examples vpxdec vpxenc tools_common"
+
+# Filter out the scripts in $exclude_list.
+for word in ${exclude_list}; do
+  example_tests=$(filter_strings "${example_tests}" "${word}" exclude)
+done
+
+for test in ${example_tests}; do
+  # Source each test script so that exporting variables can be avoided.
+  . "${test}"
+done
diff --git a/source/libvpx/test/simple_decoder.sh b/source/libvpx/test/simple_decoder.sh
new file mode 100755
index 0000000..a0db58f
--- /dev/null
+++ b/source/libvpx/test/simple_decoder.sh
@@ -0,0 +1,57 @@
+#!/bin/sh
+##
+##  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+##  This file tests the libvpx simple_decoder example code. To add new tests to
+##  this file, do the following:
+##    1. Write a shell function (this is your test).
+##    2. Add the function to simple_decoder_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: Make sure input is available:
+#   $VP8_IVF_FILE and $VP9_IVF_FILE are required.
+simple_decoder_verify_environment() {
+  if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_IVF_FILE}" ]; then
+    echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+# Runs simple_decoder using $1 as input file. $2 is the codec name, and is used
+# solely to name the output file.
+simple_decoder() {
+  local decoder="${LIBVPX_BIN_PATH}/simple_decoder${VPX_TEST_EXE_SUFFIX}"
+  local input_file="$1"
+  local codec="$2"
+  local output_file="${VPX_TEST_OUTPUT_DIR}/simple_decoder_${codec}.raw"
+
+  [ -x "${decoder}" ] || return 1
+
+  "${decoder}" "${input_file}" "${output_file}" > /dev/null 2>&1
+
+  [ -e "${output_file}" ] || return 1
+}
+
+simple_decoder_vp8() {
+  if [ "$(vp8_decode_available)" = "yes" ]; then
+    simple_decoder "${VP8_IVF_FILE}" vp8 || return 1
+  fi
+}
+
+simple_decoder_vp9() {
+  if [ "$(vp9_decode_available)" = "yes" ]; then
+    simple_decoder "${VP9_IVF_FILE}" vp9 || return 1
+  fi
+}
+
+simple_decoder_tests="simple_decoder_vp8
+                      simple_decoder_vp9"
+
+run_tests simple_decoder_verify_environment "${simple_decoder_tests}"
diff --git a/source/libvpx/test/simple_encoder.sh b/source/libvpx/test/simple_encoder.sh
new file mode 100755
index 0000000..13f5e29
--- /dev/null
+++ b/source/libvpx/test/simple_encoder.sh
@@ -0,0 +1,58 @@
+#!/bin/sh
+##
+##  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+##  This file tests the libvpx simple_encoder example. To add new tests to this
+##  file, do the following:
+##    1. Write a shell function (this is your test).
+##    2. Add the function to simple_encoder_tests (on a new line).
+##
+. $(dirname $0)/tools_common.sh
+
+# Environment check: $YUV_RAW_INPUT is required.
+simple_encoder_verify_environment() {
+  if [ ! -e "${YUV_RAW_INPUT}" ]; then
+    echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+    return 1
+  fi
+}
+
+# Runs simple_encoder using the codec specified by $1.
+simple_encoder() {
+  local encoder="${LIBVPX_BIN_PATH}/simple_encoder${VPX_TEST_EXE_SUFFIX}"
+  local codec="$1"
+  local output_file="${VPX_TEST_OUTPUT_DIR}/simple_encoder_${codec}.ivf"
+
+  [ -x "${encoder}" ] || return 1
+
+  "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" "${YUV_RAW_INPUT_HEIGHT}" \
+      "${YUV_RAW_INPUT}" "${output_file}" 9999 > /dev/null 2>&1
+
+  [ -e "${output_file}" ] || return 1
+}
+
+simple_encoder_vp8() {
+  if [ "$(vp8_encode_available)" = "yes" ]; then
+    simple_encoder vp8 || return 1
+  fi
+}
+
+# TODO(tomfinegan): Add a frame limit param to simple_encoder and enable this
+# test. VP9 is just too slow right now: This test takes 4m30s+ on a fast
+# machine.
+DISABLED_simple_encoder_vp9() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    simple_encoder vp9 || return 1
+  fi
+}
+
+simple_encoder_tests="simple_encoder_vp8
+                      DISABLED_simple_encoder_vp9"
+
+run_tests simple_encoder_verify_environment "${simple_encoder_tests}"
diff --git a/source/libvpx/test/test.mk b/source/libvpx/test/test.mk
index da56b00..0dcb6c8 100644
--- a/source/libvpx/test/test.mk
+++ b/source/libvpx/test/test.mk
@@ -43,15 +43,13 @@
 
 ## WebM Parsing
 ifeq ($(CONFIG_WEBM_IO), yes)
-NESTEGG_SRCS                           += ../third_party/nestegg/halloc/halloc.h
-NESTEGG_SRCS                           += ../third_party/nestegg/halloc/src/align.h
-NESTEGG_SRCS                           += ../third_party/nestegg/halloc/src/halloc.c
-NESTEGG_SRCS                           += ../third_party/nestegg/halloc/src/hlist.h
-NESTEGG_SRCS                           += ../third_party/nestegg/include/nestegg/nestegg.h
-NESTEGG_SRCS                           += ../third_party/nestegg/src/nestegg.c
-LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += $(NESTEGG_SRCS)
+LIBWEBM_PARSER_SRCS                    += ../third_party/libwebm/mkvparser.cpp
+LIBWEBM_PARSER_SRCS                    += ../third_party/libwebm/mkvreader.cpp
+LIBWEBM_PARSER_SRCS                    += ../third_party/libwebm/mkvparser.hpp
+LIBWEBM_PARSER_SRCS                    += ../third_party/libwebm/mkvreader.hpp
+LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += $(LIBWEBM_PARSER_SRCS)
 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ../tools_common.h
-LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ../webmdec.c
+LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ../webmdec.cc
 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ../webmdec.h
 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += webm_video_source.h
 endif
diff --git a/source/libvpx/test/tools_common.sh b/source/libvpx/test/tools_common.sh
index aa446c9..1ed0176 100755
--- a/source/libvpx/test/tools_common.sh
+++ b/source/libvpx/test/tools_common.sh
@@ -9,8 +9,17 @@
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 ##  This file contains shell code shared by test scripts for libvpx tools.
+
+# Use $VPX_TEST_TOOLS_COMMON_SH as a pseudo include guard.
+if [ -z "${VPX_TEST_TOOLS_COMMON_SH}" ]; then
+VPX_TEST_TOOLS_COMMON_SH=included
+
 set -e
 
+vlog() {
+  [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ] && echo "$@"
+}
+
 # Sets $VPX_TOOL_TEST to the name specified by positional parameter one.
 test_begin() {
   VPX_TOOL_TEST="${1}"
@@ -308,9 +317,9 @@
   # Run tests.
   for test in ${tests_to_run}; do
     test_begin "${test}"
-    [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ] && echo "  RUN  ${test}"
+    vlog "  RUN  ${test}"
     "${test}"
-    [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ] && echo "  PASS ${test}"
+    vlog "  PASS ${test}"
     test_end "${test}"
   done
 
@@ -416,17 +425,26 @@
   VPX_TEST_EXE_SUFFIX=".exe"
 fi
 
+# Variables shared by tests.
+VP8_IVF_FILE="${LIBVPX_TEST_DATA_PATH}/vp80-00-comprehensive-001.ivf"
+VP9_IVF_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-09-subpixel-00.ivf"
+
+VP9_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-00-quantizer-00.webm"
+
+YUV_RAW_INPUT="${LIBVPX_TEST_DATA_PATH}/hantro_collage_w352h288.yuv"
+YUV_RAW_INPUT_WIDTH=352
+YUV_RAW_INPUT_HEIGHT=288
+
+# Setup a trap function to clean up after tests complete.
 trap cleanup EXIT
 
-if [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ]; then
-cat << EOF
-$(basename "${0%.*}") test configuration:
+vlog "$(basename "${0%.*}") test configuration:
   LIBVPX_BIN_PATH=${LIBVPX_BIN_PATH}
   LIBVPX_CONFIG_PATH=${LIBVPX_CONFIG_PATH}
   LIBVPX_TEST_DATA_PATH=${LIBVPX_TEST_DATA_PATH}
   VPX_TEST_OUTPUT_DIR=${VPX_TEST_OUTPUT_DIR}
   VPX_TEST_VERBOSE_OUTPUT=${VPX_TEST_VERBOSE_OUTPUT}
   VPX_TEST_FILTER=${VPX_TEST_FILTER}
-  VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS}
-EOF
-fi
+  VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS}"
+
+fi  # End $VPX_TEST_TOOLS_COMMON_SH pseudo include guard.
diff --git a/source/libvpx/test/vpxdec.sh b/source/libvpx/test/vpxdec.sh
index d236f97..093230b 100755
--- a/source/libvpx/test/vpxdec.sh
+++ b/source/libvpx/test/vpxdec.sh
@@ -14,9 +14,6 @@
 ##
 . $(dirname $0)/tools_common.sh
 
-VP8_IVF_FILE="${LIBVPX_TEST_DATA_PATH}/vp80-00-comprehensive-001.ivf"
-VP9_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-00-quantizer-00.webm"
-
 # Environment check: Make sure input is available.
 vpxdec_verify_environment() {
   if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_WEBM_FILE}" ]; then
diff --git a/source/libvpx/test/vpxenc.sh b/source/libvpx/test/vpxenc.sh
index 89e4eb3..f08c048 100755
--- a/source/libvpx/test/vpxenc.sh
+++ b/source/libvpx/test/vpxenc.sh
@@ -15,9 +15,6 @@
 ##
 . $(dirname $0)/tools_common.sh
 
-YUV_RAW_INPUT="${LIBVPX_TEST_DATA_PATH}/hantro_collage_w352h288.yuv"
-YUV_RAW_INPUT_WIDTH=352
-YUV_RAW_INPUT_HEIGHT=288
 TEST_FRAMES=10
 
 # Environment check: Make sure input is available.
diff --git a/source/libvpx/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm b/source/libvpx/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
index e3ea91f..a8730aa 100644
--- a/source/libvpx/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
+++ b/source/libvpx/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
@@ -26,6 +26,7 @@
 
 |vp8_build_intra_predictors_mby_neon_func| PROC
     push            {r4-r8, lr}
+    vpush           {d8-d15}
 
     cmp             r3, #0
     beq             case_dc_pred
@@ -37,8 +38,8 @@
     beq             case_tm_pred
 
 case_dc_pred
-    ldr             r4, [sp, #24]       ; Up
-    ldr             r5, [sp, #28]       ; Left
+    ldr             r4, [sp, #88]       ; Up
+    ldr             r5, [sp, #92]       ; Left
 
     ; Default the DC average to 128
     mov             r12, #128
@@ -143,6 +144,7 @@
     vst1.u8         {q0}, [r1]!
     vst1.u8         {q0}, [r1]!
 
+    vpop            {d8-d15}
     pop             {r4-r8,pc}
 case_v_pred
     ; Copy down above row
@@ -165,6 +167,7 @@
     vst1.u8         {q0}, [r1]!
     vst1.u8         {q0}, [r1]!
     vst1.u8         {q0}, [r1]!
+    vpop            {d8-d15}
     pop             {r4-r8,pc}
 
 case_h_pred
@@ -224,6 +227,7 @@
     vst1.u8         {q2}, [r1]!
     vst1.u8         {q3}, [r1]!
 
+    vpop            {d8-d15}
     pop             {r4-r8,pc}
 
 case_tm_pred
@@ -293,6 +297,7 @@
     subs            r12, r12, #1
     bne             case_tm_pred_loop
 
+    vpop            {d8-d15}
     pop             {r4-r8,pc}
 
     ENDP
@@ -307,6 +312,7 @@
 
 |vp8_build_intra_predictors_mby_s_neon_func| PROC
     push            {r4-r8, lr}
+    vpush           {d8-d15}
 
     mov             r1, r0      ;   unsigned char *ypred_ptr = x->dst.y_buffer; //x->Predictor;
 
@@ -320,8 +326,8 @@
     beq             case_tm_pred_s
 
 case_dc_pred_s
-    ldr             r4, [sp, #24]       ; Up
-    ldr             r5, [sp, #28]       ; Left
+    ldr             r4, [sp, #88]       ; Up
+    ldr             r5, [sp, #92]       ; Left
 
     ; Default the DC average to 128
     mov             r12, #128
@@ -426,6 +432,7 @@
     vst1.u8         {q0}, [r1], r2
     vst1.u8         {q0}, [r1], r2
 
+    vpop            {d8-d15}
     pop             {r4-r8,pc}
 case_v_pred_s
     ; Copy down above row
@@ -448,6 +455,8 @@
     vst1.u8         {q0}, [r1], r2
     vst1.u8         {q0}, [r1], r2
     vst1.u8         {q0}, [r1], r2
+
+    vpop            {d8-d15}
     pop             {r4-r8,pc}
 
 case_h_pred_s
@@ -507,6 +516,7 @@
     vst1.u8         {q2}, [r1], r2
     vst1.u8         {q3}, [r1], r2
 
+    vpop            {d8-d15}
     pop             {r4-r8,pc}
 
 case_tm_pred_s
@@ -576,6 +586,7 @@
     subs            r12, r12, #1
     bne             case_tm_pred_loop_s
 
+    vpop            {d8-d15}
     pop             {r4-r8,pc}
 
     ENDP
diff --git a/source/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm b/source/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm
index 6c29c55..3a39210 100644
--- a/source/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm
+++ b/source/libvpx/vp8/common/arm/neon/idct_dequant_0_2x_neon.asm
@@ -22,6 +22,7 @@
 ; r3   stride
 |idct_dequant_0_2x_neon| PROC
     push            {r4, r5}
+    vpush           {d8-d15}
 
     add             r12, r2, #4
     vld1.32         {d2[0]}, [r2], r3
@@ -72,6 +73,7 @@
     vst1.32         {d4[1]}, [r2]
     vst1.32         {d10[1]}, [r0]
 
+    vpop            {d8-d15}
     pop             {r4, r5}
     bx              lr
 
diff --git a/source/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm b/source/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm
index d5dce63..8da0fa0 100644
--- a/source/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm
+++ b/source/libvpx/vp8/common/arm/neon/idct_dequant_full_2x_neon.asm
@@ -22,6 +22,8 @@
 ; r2    *dst
 ; r3    stride
 |idct_dequant_full_2x_neon| PROC
+    vpush           {d8-d15}
+
     vld1.16         {q0, q1}, [r1]          ; dq (same l/r)
     vld1.16         {q2, q3}, [r0]          ; l q
     add             r0, r0, #32
@@ -184,6 +186,7 @@
     vst1.32         {d3[0]}, [r2]
     vst1.32         {d3[1]}, [r1]
 
+    vpop            {d8-d15}
     bx             lr
 
     ENDP           ; |idct_dequant_full_2x_neon|
diff --git a/source/libvpx/vp8/common/arm/neon/loopfilter_neon.asm b/source/libvpx/vp8/common/arm/neon/loopfilter_neon.asm
index e44be0a..c4f09c7 100644
--- a/source/libvpx/vp8/common/arm/neon/loopfilter_neon.asm
+++ b/source/libvpx/vp8/common/arm/neon/loopfilter_neon.asm
@@ -24,10 +24,12 @@
 ; sp    unsigned char thresh,
 |vp8_loop_filter_horizontal_edge_y_neon| PROC
     push        {lr}
+    vpush       {d8-d15}
+
     vdup.u8     q0, r2                     ; duplicate blimit
     vdup.u8     q1, r3                     ; duplicate limit
     sub         r2, r0, r1, lsl #2         ; move src pointer down by 4 lines
-    ldr         r3, [sp, #4]               ; load thresh
+    ldr         r3, [sp, #68]              ; load thresh
     add         r12, r2, r1
     add         r1, r1, r1
 
@@ -52,6 +54,7 @@
     vst1.u8     {q7}, [r2@128], r1              ; store oq0
     vst1.u8     {q8}, [r12@128], r1             ; store oq1
 
+    vpop        {d8-d15}
     pop         {pc}
     ENDP        ; |vp8_loop_filter_horizontal_edge_y_neon|
 
@@ -64,10 +67,12 @@
 ; sp+4  unsigned char *v
 |vp8_loop_filter_horizontal_edge_uv_neon| PROC
     push        {lr}
+    vpush       {d8-d15}
+
     vdup.u8     q0, r2                      ; duplicate blimit
     vdup.u8     q1, r3                      ; duplicate limit
-    ldr         r12, [sp, #4]               ; load thresh
-    ldr         r2, [sp, #8]                ; load v ptr
+    ldr         r12, [sp, #68]              ; load thresh
+    ldr         r2, [sp, #72]               ; load v ptr
     vdup.u8     q2, r12                     ; duplicate thresh
 
     sub         r3, r0, r1, lsl #2          ; move u pointer down by 4 lines
@@ -104,6 +109,7 @@
     vst1.u8     {d16}, [r0@64]                 ; store u oq1
     vst1.u8     {d17}, [r2@64]                 ; store v oq1
 
+    vpop        {d8-d15}
     pop         {pc}
     ENDP        ; |vp8_loop_filter_horizontal_edge_uv_neon|
 
@@ -120,11 +126,13 @@
 
 |vp8_loop_filter_vertical_edge_y_neon| PROC
     push        {lr}
+    vpush       {d8-d15}
+
     vdup.u8     q0, r2                     ; duplicate blimit
     vdup.u8     q1, r3                     ; duplicate limit
     sub         r2, r0, #4                 ; src ptr down by 4 columns
     add         r1, r1, r1
-    ldr         r3, [sp, #4]               ; load thresh
+    ldr         r3, [sp, #68]              ; load thresh
     add         r12, r2, r1, asr #1
 
     vld1.u8     {d6}, [r2], r1
@@ -194,6 +202,7 @@
     vst4.8      {d14[6], d15[6], d16[6], d17[6]}, [r0]
     vst4.8      {d14[7], d15[7], d16[7], d17[7]}, [r12]
 
+    vpop        {d8-d15}
     pop         {pc}
     ENDP        ; |vp8_loop_filter_vertical_edge_y_neon|
 
@@ -210,9 +219,11 @@
 ; sp+4  unsigned char *v
 |vp8_loop_filter_vertical_edge_uv_neon| PROC
     push        {lr}
+    vpush       {d8-d15}
+
     vdup.u8     q0, r2                      ; duplicate blimit
     sub         r12, r0, #4                 ; move u pointer down by 4 columns
-    ldr         r2, [sp, #8]                ; load v ptr
+    ldr         r2, [sp, #72]               ; load v ptr
     vdup.u8     q1, r3                      ; duplicate limit
     sub         r3, r2, #4                  ; move v pointer down by 4 columns
 
@@ -233,7 +244,7 @@
     vld1.u8     {d20}, [r12]
     vld1.u8     {d21}, [r3]
 
-    ldr        r12, [sp, #4]               ; load thresh
+    ldr        r12, [sp, #68]              ; load thresh
 
     ;transpose to 8x16 matrix
     vtrn.32     q3, q7
@@ -281,6 +292,7 @@
     vst4.8      {d10[7], d11[7], d12[7], d13[7]}, [r0]
     vst4.8      {d14[7], d15[7], d16[7], d17[7]}, [r2]
 
+    vpop        {d8-d15}
     pop         {pc}
     ENDP        ; |vp8_loop_filter_vertical_edge_uv_neon|
 
diff --git a/source/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm b/source/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
index adf848b..6eb0651 100644
--- a/source/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
+++ b/source/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
@@ -9,7 +9,6 @@
 ;
 
 
-    ;EXPORT  |vp8_loop_filter_simple_horizontal_edge_neon|
     EXPORT  |vp8_loop_filter_bhs_neon|
     EXPORT  |vp8_loop_filter_mbhs_neon|
     ARM
@@ -22,7 +21,7 @@
 ; q1    limit, PRESERVE
 
 |vp8_loop_filter_simple_horizontal_edge_neon| PROC
-
+    vpush       {d8-d15}
     sub         r3, r0, r1, lsl #1          ; move src pointer down by 2 lines
 
     vld1.u8     {q7}, [r0@128], r1          ; q0
@@ -82,6 +81,7 @@
     vst1.u8     {q6}, [r3@128]              ; store op0
     vst1.u8     {q7}, [r0@128]              ; store oq0
 
+    vpop        {d8-d15}
     bx          lr
     ENDP        ; |vp8_loop_filter_simple_horizontal_edge_neon|
 
diff --git a/source/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm b/source/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
index e690df2..78d13c8 100644
--- a/source/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
+++ b/source/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
@@ -9,7 +9,6 @@
 ;
 
 
-    ;EXPORT  |vp8_loop_filter_simple_vertical_edge_neon|
     EXPORT |vp8_loop_filter_bvs_neon|
     EXPORT |vp8_loop_filter_mbvs_neon|
     ARM
@@ -22,6 +21,8 @@
 ; q1    limit, PRESERVE
 
 |vp8_loop_filter_simple_vertical_edge_neon| PROC
+    vpush       {d8-d15}
+
     sub         r0, r0, #2                  ; move src pointer down by 2 columns
     add         r12, r1, r1
     add         r3, r0, r1
@@ -120,6 +121,7 @@
     vst2.8      {d14[6], d15[6]}, [r0], r12
     vst2.8      {d14[7], d15[7]}, [r3]
 
+    vpop        {d8-d15}
     bx          lr
     ENDP        ; |vp8_loop_filter_simple_vertical_edge_neon|
 
diff --git a/source/libvpx/vp8/common/arm/neon/mbloopfilter_neon.asm b/source/libvpx/vp8/common/arm/neon/mbloopfilter_neon.asm
index f41c156..d200c30 100644
--- a/source/libvpx/vp8/common/arm/neon/mbloopfilter_neon.asm
+++ b/source/libvpx/vp8/common/arm/neon/mbloopfilter_neon.asm
@@ -28,8 +28,10 @@
 ; sp    unsigned char thresh,
 |vp8_mbloop_filter_horizontal_edge_y_neon| PROC
     push        {lr}
+    vpush       {d8-d15}
+
     add         r1, r1, r1                  ; double stride
-    ldr         r12, [sp, #4]               ; load thresh
+    ldr         r12, [sp, #68]              ; load thresh
     sub         r0, r0, r1, lsl #1          ; move src pointer down by 4 lines
     vdup.u8     q2, r12                     ; thresh
     add         r12, r0, r1,  lsr #1        ; move src pointer up by 1 line
@@ -55,6 +57,7 @@
     vst1.u8     {q8}, [r12@128]            ; store oq1
     vst1.u8     {q9}, [r0@128]             ; store oq2
 
+    vpop        {d8-d15}
     pop         {pc}
     ENDP        ; |vp8_mbloop_filter_horizontal_edge_y_neon|
 
@@ -72,10 +75,12 @@
 
 |vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
     push        {lr}
-    ldr         r12, [sp, #4]                 ; load thresh
+    vpush       {d8-d15}
+
+    ldr         r12, [sp, #68]                ; load thresh
     sub         r0, r0, r1, lsl #2            ; move u pointer down by 4 lines
     vdup.u8     q2, r12                       ; thresh
-    ldr         r12, [sp, #8]                 ; load v ptr
+    ldr         r12, [sp, #72]                ; load v ptr
     sub         r12, r12, r1, lsl #2          ; move v pointer down by 4 lines
 
     vld1.u8     {d6}, [r0@64], r1              ; p3
@@ -116,6 +121,7 @@
     vst1.u8     {d18}, [r0@64], r1             ; store u oq2
     vst1.u8     {d19}, [r12@64], r1             ; store v oq2
 
+    vpop        {d8-d15}
     pop         {pc}
     ENDP        ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
 
@@ -130,7 +136,9 @@
 ; sp    unsigned char thresh,
 |vp8_mbloop_filter_vertical_edge_y_neon| PROC
     push        {lr}
-    ldr         r12, [sp, #4]               ; load thresh
+    vpush       {d8-d15}
+
+    ldr         r12, [sp, #68]              ; load thresh
     sub         r0, r0, #4                  ; move src pointer down by 4 columns
     vdup.s8     q2, r12                     ; thresh
     add         r12, r0, r1, lsl #3         ; move src pointer down by 8 lines
@@ -208,6 +216,7 @@
     vst1.8      {d20}, [r0]
     vst1.8      {d21}, [r12]
 
+    vpop        {d8-d15}
     pop         {pc}
     ENDP        ; |vp8_mbloop_filter_vertical_edge_y_neon|
 
@@ -224,10 +233,12 @@
 ; sp+4  unsigned char *v
 |vp8_mbloop_filter_vertical_edge_uv_neon| PROC
     push        {lr}
-    ldr         r12, [sp, #4]               ; load thresh
+    vpush       {d8-d15}
+
+    ldr         r12, [sp, #68]              ; load thresh
     sub         r0, r0, #4                  ; move u pointer down by 4 columns
     vdup.u8     q2, r12                     ; thresh
-    ldr         r12, [sp, #8]               ; load v ptr
+    ldr         r12, [sp, #72]              ; load v ptr
     sub         r12, r12, #4                ; move v pointer down by 4 columns
 
     vld1.u8     {d6}, [r0], r1              ;load u data
@@ -303,6 +314,7 @@
     vst1.8      {d20}, [r0]
     vst1.8      {d21}, [r12]
 
+    vpop        {d8-d15}
     pop         {pc}
     ENDP        ; |vp8_mbloop_filter_vertical_edge_uv_neon|
 
diff --git a/source/libvpx/vp8/common/arm/neon/sad16_neon.asm b/source/libvpx/vp8/common/arm/neon/sad16_neon.asm
index d7c590e..7197e56 100644
--- a/source/libvpx/vp8/common/arm/neon/sad16_neon.asm
+++ b/source/libvpx/vp8/common/arm/neon/sad16_neon.asm
@@ -24,6 +24,7 @@
 ; r3    int  ref_stride
 |vp8_sad16x16_neon| PROC
 ;;
+    vpush           {d8-d15}
     vld1.8          {q0}, [r0], r1
     vld1.8          {q4}, [r2], r3
 
@@ -132,6 +133,7 @@
 
     vmov.32         r0, d0[0]
 
+    vpop            {d8-d15}
     bx              lr
 
     ENDP
@@ -143,6 +145,8 @@
 ;    unsigned char *ref_ptr,
 ;    int  ref_stride)
 |vp8_sad16x8_neon| PROC
+    vpush           {d8-d15}
+
     vld1.8          {q0}, [r0], r1
     vld1.8          {q4}, [r2], r3
 
@@ -200,6 +204,7 @@
 
     vmov.32         r0, d0[0]
 
+    vpop            {d8-d15}
     bx              lr
 
     ENDP
diff --git a/source/libvpx/vp8/common/arm/neon/sad8_neon.asm b/source/libvpx/vp8/common/arm/neon/sad8_neon.asm
index 23ba6df..6b849d9 100644
--- a/source/libvpx/vp8/common/arm/neon/sad8_neon.asm
+++ b/source/libvpx/vp8/common/arm/neon/sad8_neon.asm
@@ -25,6 +25,7 @@
 ;    int  ref_stride)
 
 |vp8_sad8x8_neon| PROC
+    vpush           {d8-d15}
     vld1.8          {d0}, [r0], r1
     vld1.8          {d8}, [r2], r3
 
@@ -70,6 +71,7 @@
 
     vmov.32         r0, d0[0]
 
+    vpop            {d8-d15}
     bx              lr
 
     ENDP
@@ -82,6 +84,7 @@
 ;    int  ref_stride)
 
 |vp8_sad8x16_neon| PROC
+    vpush           {d8-d15}
     vld1.8          {d0}, [r0], r1
     vld1.8          {d8}, [r2], r3
 
@@ -167,6 +170,7 @@
 
     vmov.32         r0, d0[0]
 
+    vpop            {d8-d15}
     bx              lr
 
     ENDP
@@ -179,6 +183,7 @@
 ;    int  ref_stride)
 
 |vp8_sad4x4_neon| PROC
+    vpush           {d8-d15}
     vld1.8          {d0}, [r0], r1
     vld1.8          {d8}, [r2], r3
 
@@ -202,6 +207,7 @@
     vpaddl.u32      d0, d1
     vmov.32         r0, d0[0]
 
+    vpop            {d8-d15}
     bx              lr
 
     ENDP
diff --git a/source/libvpx/vp8/common/arm/neon/save_reg_neon.asm b/source/libvpx/vp8/common/arm/neon/save_reg_neon.asm
deleted file mode 100644
index fd7002e..0000000
--- a/source/libvpx/vp8/common/arm/neon/save_reg_neon.asm
+++ /dev/null
@@ -1,36 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_push_neon|
-    EXPORT  |vp8_pop_neon|
-
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-|vp8_push_neon| PROC
-    vst1.i64            {d8, d9, d10, d11}, [r0]!
-    vst1.i64            {d12, d13, d14, d15}, [r0]!
-    bx              lr
-
-    ENDP
-
-|vp8_pop_neon| PROC
-    vld1.i64            {d8, d9, d10, d11}, [r0]!
-    vld1.i64            {d12, d13, d14, d15}, [r0]!
-    bx              lr
-
-    ENDP
-
-    END
-
diff --git a/source/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.asm b/source/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.asm
index 67d2ab0..87ca887 100644
--- a/source/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.asm
+++ b/source/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.asm
@@ -37,12 +37,14 @@
 ; result of the multiplication that is needed in IDCT.
 
 |vp8_short_idct4x4llm_neon| PROC
+    vpush           {d8-d15}
+
     adr             r12, idct_coeff
     vld1.16         {q1, q2}, [r0]
     vld1.16         {d0}, [r12]
 
     vswp            d3, d4                  ;q2(vp[4] vp[12])
-    ldr             r0, [sp]                ; stride
+    ldr             r0, [sp, #64]           ; stride
 
     vqdmulh.s16     q3, q2, d0[2]
     vqdmulh.s16     q4, q2, d0[0]
@@ -125,6 +127,7 @@
     vst1.32         d2[0], [r3], r0
     vst1.32         d2[1], [r3], r0
 
+    vpop            {d8-d15}
     bx              lr
 
     ENDP
diff --git a/source/libvpx/vp8/common/arm/neon/sixtappredict16x16_neon.asm b/source/libvpx/vp8/common/arm/neon/sixtappredict16x16_neon.asm
index 9fdafd3..dd27719 100644
--- a/source/libvpx/vp8/common/arm/neon/sixtappredict16x16_neon.asm
+++ b/source/libvpx/vp8/common/arm/neon/sixtappredict16x16_neon.asm
@@ -43,10 +43,11 @@
 
 |vp8_sixtap_predict16x16_neon| PROC
     push            {r4-r5, lr}
+    vpush           {d8-d15}
 
     adr             r12, filter16_coeff
-    ldr             r4, [sp, #12]           ;load parameters from stack
-    ldr             r5, [sp, #16]           ;load parameters from stack
+    ldr             r4, [sp, #76]           ;load parameters from stack
+    ldr             r5, [sp, #80]           ;load parameters from stack
 
     cmp             r2, #0                  ;skip first_pass filter if xoffset=0
     beq             secondpass_filter16x16_only
@@ -291,6 +292,8 @@
     bne filt_blk2d_sp16x16_outloop_neon
 
     add             sp, sp, #336
+
+    vpop            {d8-d15}
     pop             {r4-r5,pc}
 
 ;--------------------
@@ -384,6 +387,7 @@
 
     bne             filt_blk2d_fpo16x16_loop_neon
 
+    vpop            {d8-d15}
     pop             {r4-r5,pc}
 
 ;--------------------
@@ -482,6 +486,7 @@
 
     bne filt_blk2d_spo16x16_outloop_neon
 
+    vpop            {d8-d15}
     pop             {r4-r5,pc}
 
     ENDP
diff --git a/source/libvpx/vp8/common/arm/neon/sixtappredict4x4_neon.asm b/source/libvpx/vp8/common/arm/neon/sixtappredict4x4_neon.asm
index a4222bc..e32e713 100644
--- a/source/libvpx/vp8/common/arm/neon/sixtappredict4x4_neon.asm
+++ b/source/libvpx/vp8/common/arm/neon/sixtappredict4x4_neon.asm
@@ -35,10 +35,11 @@
 
 |vp8_sixtap_predict4x4_neon| PROC
     push            {r4, lr}
+    vpush           {d8-d15}
 
     adr             r12, filter4_coeff
-    ldr             r4, [sp, #8]            ;load parameters from stack
-    ldr             lr, [sp, #12]           ;load parameters from stack
+    ldr             r4, [sp, #72]            ;load parameters from stack
+    ldr             lr, [sp, #76]           ;load parameters from stack
 
     cmp             r2, #0                  ;skip first_pass filter if xoffset=0
     beq             secondpass_filter4x4_only
@@ -261,6 +262,7 @@
     vst1.32         {d4[0]}, [r1]
     vst1.32         {d4[1]}, [r2]
 
+    vpop            {d8-d15}
     pop             {r4, pc}
 
 
@@ -348,6 +350,7 @@
     vst1.32         {d28[0]}, [r1]
     vst1.32         {d28[1]}, [r2]
 
+    vpop            {d8-d15}
     pop             {r4, pc}
 
 
@@ -413,6 +416,7 @@
     vst1.32         {d4[0]}, [r1]
     vst1.32         {d4[1]}, [r2]
 
+    vpop            {d8-d15}
     pop             {r4, pc}
 
     ENDP
diff --git a/source/libvpx/vp8/common/arm/neon/sixtappredict8x4_neon.asm b/source/libvpx/vp8/common/arm/neon/sixtappredict8x4_neon.asm
index a57ec01..d19bf89 100644
--- a/source/libvpx/vp8/common/arm/neon/sixtappredict8x4_neon.asm
+++ b/source/libvpx/vp8/common/arm/neon/sixtappredict8x4_neon.asm
@@ -35,10 +35,11 @@
 
 |vp8_sixtap_predict8x4_neon| PROC
     push            {r4-r5, lr}
+    vpush           {d8-d15}
 
     adr             r12, filter8_coeff
-    ldr             r4, [sp, #12]           ;load parameters from stack
-    ldr             r5, [sp, #16]           ;load parameters from stack
+    ldr             r4, [sp, #76]           ;load parameters from stack
+    ldr             r5, [sp, #80]           ;load parameters from stack
 
     cmp             r2, #0                  ;skip first_pass filter if xoffset=0
     beq             secondpass_filter8x4_only
@@ -297,6 +298,8 @@
     vst1.u8         {d9}, [r4], r5
 
     add             sp, sp, #32
+
+    vpop            {d8-d15}
     pop             {r4-r5,pc}
 
 ;--------------------
@@ -392,6 +395,7 @@
     vst1.u8         {d24}, [r4], r5
     vst1.u8         {d25}, [r4], r5
 
+    vpop            {d8-d15}
     pop             {r4-r5,pc}
 
 ;---------------------
@@ -464,6 +468,7 @@
     vst1.u8         {d8}, [r4], r5
     vst1.u8         {d9}, [r4], r5
 
+    vpop            {d8-d15}
     pop             {r4-r5,pc}
 
     ENDP
diff --git a/source/libvpx/vp8/common/arm/neon/sixtappredict8x8_neon.asm b/source/libvpx/vp8/common/arm/neon/sixtappredict8x8_neon.asm
index 00ed5ae..4b04925 100644
--- a/source/libvpx/vp8/common/arm/neon/sixtappredict8x8_neon.asm
+++ b/source/libvpx/vp8/common/arm/neon/sixtappredict8x8_neon.asm
@@ -35,11 +35,11 @@
 
 |vp8_sixtap_predict8x8_neon| PROC
     push            {r4-r5, lr}
-
+    vpush           {d8-d15}
     adr             r12, filter8_coeff
 
-    ldr             r4, [sp, #12]           ;load parameters from stack
-    ldr             r5, [sp, #16]           ;load parameters from stack
+    ldr             r4, [sp, #76]           ;load parameters from stack
+    ldr             r5, [sp, #80]           ;load parameters from stack
 
     cmp             r2, #0                  ;skip first_pass filter if xoffset=0
     beq             secondpass_filter8x8_only
@@ -324,6 +324,8 @@
     bne filt_blk2d_sp8x8_loop_neon
 
     add             sp, sp, #64
+
+    vpop            {d8-d15}
     pop             {r4-r5,pc}
 
 ;---------------------
@@ -428,6 +430,7 @@
 
     bne             filt_blk2d_fpo8x8_loop_neon
 
+    vpop            {d8-d15}
     pop             {r4-r5,pc}
 
 ;---------------------
@@ -515,6 +518,7 @@
 
     bne filt_blk2d_spo8x8_loop_neon
 
+    vpop            {d8-d15}
     pop             {r4-r5,pc}
 
     ENDP
diff --git a/source/libvpx/vp8/common/arm/neon/variance_neon.asm b/source/libvpx/vp8/common/arm/neon/variance_neon.asm
index e3b4832..8ecad72 100644
--- a/source/libvpx/vp8/common/arm/neon/variance_neon.asm
+++ b/source/libvpx/vp8/common/arm/neon/variance_neon.asm
@@ -26,6 +26,7 @@
 ; r3    int  recon_stride
 ; stack unsigned int *sse
 |vp8_variance16x16_neon| PROC
+    vpush           {q5}
     vmov.i8         q8, #0                      ;q8 - sum
     vmov.i8         q9, #0                      ;q9, q10 - sse
     vmov.i8         q10, #0
@@ -67,7 +68,7 @@
     vadd.u32        q10, q9, q10                ;accumulate sse
     vpaddl.s32      q0, q8                      ;accumulate sum
 
-    ldr             r12, [sp]                   ;load *sse from stack
+    ldr             r12, [sp, #16]              ;load *sse from stack
 
     vpaddl.u32      q1, q10
     vadd.s64        d0, d0, d1
@@ -87,6 +88,8 @@
     vsub.u32        d0, d1, d10
 
     vmov.32         r0, d0[0]                   ;return
+
+    vpop            {q5}
     bx              lr
 
     ENDP
@@ -99,6 +102,8 @@
 ;    int  recon_stride,
 ;   unsigned int *sse)
 |vp8_variance16x8_neon| PROC
+    vpush           {q5}
+
     vmov.i8         q8, #0                      ;q8 - sum
     vmov.i8         q9, #0                      ;q9, q10 - sse
     vmov.i8         q10, #0
@@ -137,7 +142,7 @@
     vadd.u32        q10, q9, q10                ;accumulate sse
     vpaddl.s32      q0, q8                      ;accumulate sum
 
-    ldr             r12, [sp]                   ;load *sse from stack
+    ldr             r12, [sp, #16]              ;load *sse from stack
 
     vpaddl.u32      q1, q10
     vadd.s64        d0, d0, d1
@@ -149,6 +154,8 @@
     vsub.u32        d0, d1, d10
 
     vmov.32         r0, d0[0]                   ;return
+
+    vpop            {q5}
     bx              lr
 
     ENDP
@@ -162,6 +169,8 @@
 ;   unsigned int *sse)
 
 |vp8_variance8x16_neon| PROC
+    vpush           {q5}
+
     vmov.i8         q8, #0                      ;q8 - sum
     vmov.i8         q9, #0                      ;q9, q10 - sse
     vmov.i8         q10, #0
@@ -192,7 +201,7 @@
     vadd.u32        q10, q9, q10                ;accumulate sse
     vpaddl.s32      q0, q8                      ;accumulate sum
 
-    ldr             r12, [sp]                   ;load *sse from stack
+    ldr             r12, [sp, #16]              ;load *sse from stack
 
     vpaddl.u32      q1, q10
     vadd.s64        d0, d0, d1
@@ -204,6 +213,8 @@
     vsub.u32        d0, d1, d10
 
     vmov.32         r0, d0[0]                   ;return
+
+    vpop            {q5}
     bx              lr
 
     ENDP
@@ -215,6 +226,8 @@
 ; r3    int  recon_stride
 ; stack unsigned int *sse
 |vp8_variance8x8_neon| PROC
+    vpush           {q5}
+
     vmov.i8         q8, #0                      ;q8 - sum
     vmov.i8         q9, #0                      ;q9, q10 - sse
     vmov.i8         q10, #0
@@ -257,7 +270,7 @@
     vadd.u32        q10, q9, q10                ;accumulate sse
     vpaddl.s32      q0, q8                      ;accumulate sum
 
-    ldr             r12, [sp]                   ;load *sse from stack
+    ldr             r12, [sp, #16]              ;load *sse from stack
 
     vpaddl.u32      q1, q10
     vadd.s64        d0, d0, d1
@@ -269,6 +282,8 @@
     vsub.u32        d0, d1, d10
 
     vmov.32         r0, d0[0]                   ;return
+
+    vpop            {q5}
     bx              lr
 
     ENDP
diff --git a/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm b/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
index 9d22c52..adc5b7e 100644
--- a/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
+++ b/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16_neon.asm
@@ -31,11 +31,12 @@
 
 |vp8_sub_pixel_variance16x16_neon_func| PROC
     push            {r4-r6, lr}
+    vpush           {d8-d15}
 
     adr             r12, bilinear_taps_coeff
-    ldr             r4, [sp, #16]           ;load *dst_ptr from stack
-    ldr             r5, [sp, #20]           ;load dst_pixels_per_line from stack
-    ldr             r6, [sp, #24]           ;load *sse from stack
+    ldr             r4, [sp, #80]           ;load *dst_ptr from stack
+    ldr             r5, [sp, #84]           ;load dst_pixels_per_line from stack
+    ldr             r6, [sp, #88]           ;load *sse from stack
 
     cmp             r2, #0                  ;skip first_pass filter if xoffset=0
     beq             secondpass_bfilter16x16_only
@@ -416,6 +417,7 @@
     add             sp, sp, #528
     vmov.32         r0, d0[0]                   ;return
 
+    vpop            {d8-d15}
     pop             {r4-r6,pc}
 
     ENDP
diff --git a/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm
index 155be4f..b0829af 100644
--- a/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+++ b/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance16x16s_neon.asm
@@ -31,9 +31,10 @@
 ;================================================
 |vp8_variance_halfpixvar16x16_h_neon| PROC
     push            {lr}
+    vpush           {d8-d15}
 
     mov             r12, #4                  ;loop counter
-    ldr             lr, [sp, #4]           ;load *sse from stack
+    ldr             lr, [sp, #68]            ;load *sse from stack
     vmov.i8         q8, #0                      ;q8 - sum
     vmov.i8         q9, #0                      ;q9, q10 - sse
     vmov.i8         q10, #0
@@ -116,6 +117,8 @@
     vsub.u32        d0, d1, d10
 
     vmov.32         r0, d0[0]                   ;return
+
+    vpop            {d8-d15}
     pop             {pc}
     ENDP
 
@@ -131,11 +134,12 @@
 ;================================================
 |vp8_variance_halfpixvar16x16_v_neon| PROC
     push            {lr}
+    vpush           {d8-d15}
 
     mov             r12, #4                     ;loop counter
 
     vld1.u8         {q0}, [r0], r1              ;load src data
-    ldr             lr, [sp, #4]                ;load *sse from stack
+    ldr             lr, [sp, #68]               ;load *sse from stack
 
     vmov.i8         q8, #0                      ;q8 - sum
     vmov.i8         q9, #0                      ;q9, q10 - sse
@@ -212,6 +216,8 @@
     vsub.u32        d0, d1, d10
 
     vmov.32         r0, d0[0]                   ;return
+
+    vpop            {d8-d15}
     pop             {pc}
     ENDP
 
@@ -227,10 +233,11 @@
 ;================================================
 |vp8_variance_halfpixvar16x16_hv_neon| PROC
     push            {lr}
+    vpush           {d8-d15}
 
     vld1.u8         {d0, d1, d2, d3}, [r0], r1      ;load src data
 
-    ldr             lr, [sp, #4]           ;load *sse from stack
+    ldr             lr, [sp, #68]           ;load *sse from stack
     vmov.i8         q13, #0                      ;q8 - sum
     vext.8          q1, q0, q1, #1          ;construct src_ptr[1]
 
@@ -331,6 +338,8 @@
     vsub.u32        d0, d1, d10
 
     vmov.32         r0, d0[0]                   ;return
+
+    vpop            {d8-d15}
     pop             {pc}
     ENDP
 
@@ -349,10 +358,11 @@
 
 |vp8_sub_pixel_variance16x16s_neon| PROC
     push            {r4, lr}
+    vpush           {d8-d15}
 
-    ldr             r4, [sp, #8]            ;load *dst_ptr from stack
-    ldr             r12, [sp, #12]          ;load dst_pixels_per_line from stack
-    ldr             lr, [sp, #16]           ;load *sse from stack
+    ldr             r4, [sp, #72]           ;load *dst_ptr from stack
+    ldr             r12, [sp, #76]          ;load dst_pixels_per_line from stack
+    ldr             lr, [sp, #80]           ;load *sse from stack
 
     cmp             r2, #0                  ;skip first_pass filter if xoffset=0
     beq             secondpass_bfilter16x16s_only
@@ -566,6 +576,7 @@
     add             sp, sp, #256
     vmov.32         r0, d0[0]                   ;return
 
+    vpop            {d8-d15}
     pop             {r4, pc}
     ENDP
 
diff --git a/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm b/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
index f6b6847..9d9f9e0 100644
--- a/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
+++ b/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance8x8_neon.asm
@@ -26,11 +26,12 @@
 
 |vp8_sub_pixel_variance8x8_neon| PROC
     push            {r4-r5, lr}
+    vpush           {d8-d15}
 
     adr             r12, bilinear_taps_coeff
-    ldr             r4, [sp, #12]           ;load *dst_ptr from stack
-    ldr             r5, [sp, #16]           ;load dst_pixels_per_line from stack
-    ldr             lr, [sp, #20]           ;load *sse from stack
+    ldr             r4, [sp, #76]           ;load *dst_ptr from stack
+    ldr             r5, [sp, #80]           ;load dst_pixels_per_line from stack
+    ldr             lr, [sp, #84]           ;load *sse from stack
 
     cmp             r2, #0                  ;skip first_pass filter if xoffset=0
     beq             skip_firstpass_filter
@@ -210,6 +211,8 @@
     vsub.u32        d0, d1, d10
 
     vmov.32         r0, d0[0]                   ;return
+
+    vpop            {d8-d15}
     pop             {r4-r5, pc}
 
     ENDP
diff --git a/source/libvpx/vp8/decoder/onyxd_if.c b/source/libvpx/vp8/decoder/onyxd_if.c
index 2d9e343..29fea61 100644
--- a/source/libvpx/vp8/decoder/onyxd_if.c
+++ b/source/libvpx/vp8/decoder/onyxd_if.c
@@ -178,12 +178,6 @@
    return pbi->common.error.error_code;
 }
 
-/*For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.*/
-#if HAVE_NEON
-extern void vp8_push_neon(int64_t *store);
-extern void vp8_pop_neon(int64_t *store);
-#endif
-
 static int get_free_fb (VP8_COMMON *cm)
 {
     int i;
@@ -307,9 +301,6 @@
                                   const uint8_t *source,
                                   int64_t time_stamp)
 {
-#if HAVE_NEON
-    int64_t dx_store_reg[8];
-#endif
     VP8_COMMON *cm = &pbi->common;
     int retcode = -1;
 
@@ -319,15 +310,6 @@
     if(retcode <= 0)
         return retcode;
 
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
-    if (cm->cpu_caps & HAS_NEON)
-#endif
-    {
-        vp8_push_neon(dx_store_reg);
-    }
-#endif
-
     cm->new_fb_idx = get_free_fb (cm);
 
     /* setup reference frames for vp8_decode_frame */
@@ -403,15 +385,6 @@
     pbi->last_time_stamp = time_stamp;
 
 decode_exit:
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
-    if (cm->cpu_caps & HAS_NEON)
-#endif
-    {
-        vp8_pop_neon(dx_store_reg);
-    }
-#endif
-
     pbi->common.error.setjmp = 0;
     return retcode;
 }
diff --git a/source/libvpx/vp8/encoder/arm/neon/subtract_neon.asm b/source/libvpx/vp8/encoder/arm/neon/subtract_neon.asm
index 5bda786..840cb33 100644
--- a/source/libvpx/vp8/encoder/arm/neon/subtract_neon.asm
+++ b/source/libvpx/vp8/encoder/arm/neon/subtract_neon.asm
@@ -65,8 +65,10 @@
 ;                           unsigned char *pred, int pred_stride)
 |vp8_subtract_mby_neon| PROC
     push            {r4-r7}
+    vpush           {d8-d15}
+
     mov             r12, #4
-    ldr             r4, [sp, #16]           ; pred_stride
+    ldr             r4, [sp, #80]           ; pred_stride
     mov             r6, #32                 ; "diff" stride x2
     add             r5, r0, #16             ; second diff pointer
 
@@ -101,6 +103,7 @@
     subs            r12, r12, #1
     bne             subtract_mby_loop
 
+    vpop            {d8-d15}
     pop             {r4-r7}
     bx              lr
     ENDP
@@ -112,9 +115,11 @@
 
 |vp8_subtract_mbuv_neon| PROC
     push            {r4-r7}
-    ldr             r4, [sp, #16]       ; upred
-    ldr             r5, [sp, #20]       ; vpred
-    ldr             r6, [sp, #24]       ; pred_stride
+    vpush           {d8-d15}
+
+    ldr             r4, [sp, #80]       ; upred
+    ldr             r5, [sp, #84]       ; vpred
+    ldr             r6, [sp, #88]       ; pred_stride
     add             r0, r0, #512        ; short *udiff = diff + 256;
     mov             r12, #32            ; "diff" stride x2
     add             r7, r0, #16         ; second diff pointer
@@ -191,6 +196,7 @@
     vst1.16         {q14}, [r0], r12
     vst1.16         {q15}, [r7], r12
 
+    vpop            {d8-d15}
     pop             {r4-r7}
     bx              lr
 
diff --git a/source/libvpx/vp8/encoder/arm/neon/vp8_memcpy_neon.asm b/source/libvpx/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
index 5b9f11e..d219e2d 100644
--- a/source/libvpx/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
+++ b/source/libvpx/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
@@ -21,6 +21,7 @@
 ;void vp8_memcpy_partial_neon(unsigned char *dst_ptr, unsigned char *src_ptr,
 ;                             int sz);
 |vp8_memcpy_partial_neon| PROC
+    vpush               {d8-d15}
     ;pld                [r1]                        ;preload pred data
     ;pld                [r1, #128]
     ;pld                [r1, #256]
@@ -64,6 +65,7 @@
     bne             extra_copy_neon_loop
 
 done_copy_neon_loop
+    vpop            {d8-d15}
     bx              lr
     ENDP
 
diff --git a/source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm b/source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
index 55edbf5..f82af3e 100644
--- a/source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
+++ b/source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
@@ -27,6 +27,8 @@
 ;from vp8_variance().
 
 |vp8_mse16x16_neon| PROC
+    vpush           {q7}
+
     vmov.i8         q7, #0                      ;q7, q8, q9, q10 - sse
     vmov.i8         q8, #0
     vmov.i8         q9, #0
@@ -62,7 +64,7 @@
     vadd.u32        q7, q7, q8
     vadd.u32        q9, q9, q10
 
-    ldr             r12, [sp]               ;load *sse from stack
+    ldr             r12, [sp, #16]              ;load *sse from stack
 
     vadd.u32        q10, q7, q9
     vpaddl.u32      q1, q10
@@ -71,6 +73,7 @@
     vst1.32         {d0[0]}, [r12]
     vmov.32         r0, d0[0]
 
+    vpop            {q7}
     bx              lr
 
     ENDP
@@ -82,6 +85,8 @@
 ; r2    unsigned char *ref_ptr,
 ; r3    int  recon_stride
 |vp8_get4x4sse_cs_neon| PROC
+    vpush           {q7}
+
     vld1.8          {d0}, [r0], r1              ;Load up source and reference
     vld1.8          {d4}, [r2], r3
     vld1.8          {d1}, [r0], r1
@@ -109,6 +114,8 @@
     vadd.u64        d0, d2, d3
 
     vmov.32         r0, d0[0]
+
+    vpop            {q7}
     bx              lr
 
     ENDP
diff --git a/source/libvpx/vp8/encoder/onyx_if.c b/source/libvpx/vp8/encoder/onyx_if.c
index 32c5997..560134e 100644
--- a/source/libvpx/vp8/encoder/onyx_if.c
+++ b/source/libvpx/vp8/encoder/onyx_if.c
@@ -4820,33 +4820,11 @@
 }
 #endif
 
-/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */
-#if HAVE_NEON
-extern void vp8_push_neon(int64_t *store);
-extern void vp8_pop_neon(int64_t *store);
-#endif
-
-
 int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time)
 {
-#if HAVE_NEON
-    int64_t store_reg[8];
-#if CONFIG_RUNTIME_CPU_DETECT
-    VP8_COMMON            *cm = &cpi->common;
-#endif
-#endif
     struct vpx_usec_timer  timer;
     int                    res = 0;
 
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
-    if (cm->cpu_caps & HAS_NEON)
-#endif
-    {
-        vp8_push_neon(store_reg);
-    }
-#endif
-
     vpx_usec_timer_start(&timer);
 
     /* Reinit the lookahead buffer if the frame size changes */
@@ -4863,15 +4841,6 @@
     vpx_usec_timer_mark(&timer);
     cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
 
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
-    if (cm->cpu_caps & HAS_NEON)
-#endif
-    {
-        vp8_pop_neon(store_reg);
-    }
-#endif
-
     return res;
 }
 
@@ -4892,9 +4861,6 @@
 
 int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush)
 {
-#if HAVE_NEON
-    int64_t store_reg[8];
-#endif
     VP8_COMMON *cm;
     struct vpx_usec_timer  tsctimer;
     struct vpx_usec_timer  ticktimer;
@@ -4914,15 +4880,6 @@
 
     cpi->common.error.setjmp = 1;
 
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
-    if (cm->cpu_caps & HAS_NEON)
-#endif
-    {
-        vp8_push_neon(store_reg);
-    }
-#endif
-
     vpx_usec_timer_start(&cmptimer);
 
     cpi->source = NULL;
@@ -5005,14 +4962,6 @@
 
 #endif
 
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
-        if (cm->cpu_caps & HAS_NEON)
-#endif
-        {
-            vp8_pop_neon(store_reg);
-        }
-#endif
         return -1;
     }
 
@@ -5416,15 +5365,6 @@
 #endif
 #endif
 
-#if HAVE_NEON
-#if CONFIG_RUNTIME_CPU_DETECT
-    if (cm->cpu_caps & HAS_NEON)
-#endif
-    {
-        vp8_pop_neon(store_reg);
-    }
-#endif
-
     cpi->common.error.setjmp = 0;
 
     return 0;
diff --git a/source/libvpx/vp8/vp8_common.mk b/source/libvpx/vp8/vp8_common.mk
index dfb54a5..3568b34 100644
--- a/source/libvpx/vp8/vp8_common.mk
+++ b/source/libvpx/vp8/vp8_common.mk
@@ -172,7 +172,6 @@
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/sixtappredict8x8_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/sixtappredict16x16_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/save_reg_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/idct_dequant_full_2x_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/idct_dequant_0_2x_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/idct_blk_neon.c
diff --git a/source/libvpx/vp9/common/vp9_rtcd_defs.pl b/source/libvpx/vp9/common/vp9_rtcd_defs.pl
index 8a81554..d4c3065 100644
--- a/source/libvpx/vp9/common/vp9_rtcd_defs.pl
+++ b/source/libvpx/vp9/common/vp9_rtcd_defs.pl
@@ -12,7 +12,7 @@
 /* Encoder forward decls */
 struct macroblock;
 struct vp9_variance_vtable;
-
+struct search_site_config;
 struct mv;
 union int_mv;
 struct yv12_buffer_config;
@@ -563,33 +563,6 @@
 add_proto qw/unsigned int vp9_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, const uint8_t *second_pred, unsigned int max_sad";
 specialize qw/vp9_sad4x4_avg/, "$sse_x86inc";
 
-add_proto qw/unsigned int vp9_variance_halfpixvar16x16_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar16x16_h/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance_halfpixvar16x16_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar16x16_v/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance_halfpixvar16x16_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar16x16_hv/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance_halfpixvar64x64_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar64x64_h/;
-
-add_proto qw/unsigned int vp9_variance_halfpixvar64x64_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar64x64_v/;
-
-add_proto qw/unsigned int vp9_variance_halfpixvar64x64_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar64x64_hv/;
-
-add_proto qw/unsigned int vp9_variance_halfpixvar32x32_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar32x32_h/;
-
-add_proto qw/unsigned int vp9_variance_halfpixvar32x32_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar32x32_v/;
-
-add_proto qw/unsigned int vp9_variance_halfpixvar32x32_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance_halfpixvar32x32_hv/;
-
 add_proto qw/void vp9_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int  ref_stride, unsigned int *sad_array";
 specialize qw/vp9_sad64x64x3/;
 
@@ -678,9 +651,6 @@
 add_proto qw/void vp9_sad4x4x4d/, "const uint8_t *src_ptr, int  src_stride, const uint8_t* const ref_ptr[], int  ref_stride, unsigned int *sad_array";
 specialize qw/vp9_sad4x4x4d sse/;
 
-#add_proto qw/unsigned int vp9_sub_pixel_mse16x16/, "const uint8_t *src_ptr, int  src_pixels_per_line, int  xoffset, int  yoffset, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse";
-#specialize qw/vp9_sub_pixel_mse16x16 sse2 mmx/;
-
 add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
 specialize qw/vp9_mse16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
 
@@ -693,12 +663,6 @@
 add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
 specialize qw/vp9_mse8x8/;
 
-add_proto qw/unsigned int vp9_sub_pixel_mse64x64/, "const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_mse64x64/;
-
-add_proto qw/unsigned int vp9_sub_pixel_mse32x32/, "const uint8_t *src_ptr, int  source_stride, int  xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_mse32x32/;
-
 add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
 specialize qw/vp9_get_mb_ss mmx sse2/;
 # ENCODEMB INVOKE
@@ -743,7 +707,7 @@
 specialize qw/vp9_fdct4x4 sse2 avx2/;
 
 add_proto qw/void vp9_fdct8x8/, "const int16_t *input, int16_t *output, int stride";
-specialize qw/vp9_fdct8x8 sse2 avx2/;
+specialize qw/vp9_fdct8x8 sse2 avx2/, "$ssse3_x86_64";
 
 add_proto qw/void vp9_fdct16x16/, "const int16_t *input, int16_t *output, int stride";
 specialize qw/vp9_fdct16x16 sse2 avx2/;
@@ -766,11 +730,11 @@
 specialize qw/vp9_refining_search_sad sse3/;
 $vp9_refining_search_sad_sse3=vp9_refining_search_sadx4;
 
-add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
+add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
 specialize qw/vp9_diamond_search_sad sse3/;
 $vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4;
 
-add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
+add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
 specialize qw/vp9_full_range_search/;
 
 add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
diff --git a/source/libvpx/vp9/common/vp9_tapify.py b/source/libvpx/vp9/common/vp9_tapify.py
deleted file mode 100644
index 99529cf..0000000
--- a/source/libvpx/vp9/common/vp9_tapify.py
+++ /dev/null
@@ -1,106 +0,0 @@
-"""
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
-"""
-#!/usr/bin/env python
-import sys,string,os,re,math,numpy
-scale = 2**16
-def dist(p1,p2):
-  x1,y1 = p1
-  x2,y2 = p2
-  if x1==x2 and y1==y2 :
-    return 1.0 
-  return 1/ math.sqrt((x1-x2)*(x1-x2)+(y1-y2)*(y1-y2))
-
-def gettaps(p):
-  def l(b):
-    return int(math.floor(b))
-  def h(b):
-    return int(math.ceil(b))
-  def t(b,p,s):
-    return int((scale*dist(b,p)+s/2)/s)
-  r,c = p
-  ul=[l(r),l(c)]
-  ur=[l(r),h(c)]
-  ll=[h(r),l(c)]
-  lr=[h(r),h(c)]
-  sum = dist(ul,p)+dist(ur,p)+dist(ll,p)+dist(lr,p)
-  t4 = scale - t(ul,p,sum) - t(ur,p,sum) - t(ll,p,sum);
-  return [[ul,t(ul,p,sum)],[ur,t(ur,p,sum)],
-          [ll,t(ll,p,sum)],[lr,t4]]
-
-def print_mb_taps(angle,blocksize):
-  theta = angle / 57.2957795;
-  affine = [[math.cos(theta),-math.sin(theta)],
-            [math.sin(theta),math.cos(theta)]]
-  radius = (float(blocksize)-1)/2
-  print " // angle of",angle,"degrees"
-  for y in range(blocksize) :
-    for x in range(blocksize) :
-      r,c = numpy.dot(affine,[y-radius, x-radius])
-      tps = gettaps([r+radius,c+radius])
-      for t in tps :
-        p,t = t
-        tr,tc = p
-        print " %2d, %2d, %5d, " % (tr,tc,t,),
-      print " // %2d,%2d " % (y,x)
-
-i=float(sys.argv[1])
-while  i <= float(sys.argv[2]) :
-  print_mb_taps(i,float(sys.argv[4]))
-  i=i+float(sys.argv[3])
-"""
-
-taps = []
-pt=dict()
-ptr=dict()
-for y in range(16) :
-  for x in range(16) :
-    r,c = numpy.dot(affine,[y-7.5, x-7.5])
-    tps = gettaps([r+7.5,c+7.5])
-    j=0
-    for tp in tps : 
-      p,i = tp
-      r,c = p
-      pt[y,x,j]= [p,i]
-      try: 
-        ptr[r,j,c].append([y,x])
-      except:
-        ptr[r,j,c]=[[y,x]]
-      j = j+1 
-
-for key in sorted(pt.keys()) :
-  print key,pt[key]
-
-lr = -99
-lj = -99 
-lc = 0
-
-shuf=""
-mask=""
-for r,j,c in sorted(ptr.keys()) :
-  for y,x in ptr[r,j,c] :
-    if lr != r or lj != j :
-      print "shuf_"+str(lr)+"_"+str(lj)+"_"+shuf.ljust(16,"0"), lc
-      shuf=""
-      lc = 0
-    for i in range(lc,c-1) :
-      shuf = shuf +"0"
-    shuf = shuf + hex(x)[2]
-    lc =c
-    break
-  lr = r
-  lj = j
-#  print r,j,c,ptr[r,j,c]    
-#  print 
-
-for r,j,c in sorted(ptr.keys()) :
-  for y,x in ptr[r,j,c] :
-    print r,j,c,y,x 
-    break
-"""
diff --git a/source/libvpx/vp9/decoder/vp9_decodeframe.c b/source/libvpx/vp9/decoder/vp9_decodeframe.c
index 1cc7ab7..45ebb2f 100644
--- a/source/libvpx/vp9/decoder/vp9_decodeframe.c
+++ b/source/libvpx/vp9/decoder/vp9_decodeframe.c
@@ -1370,7 +1370,8 @@
                          "A stream must start with a complete key frame");
   }
 
-  if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
+  if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode &&
+      !new_fb->corrupted) {
     vp9_adapt_coef_probs(cm);
 
     if (!frame_is_intra_only(cm)) {
diff --git a/source/libvpx/vp9/decoder/vp9_decoder.c b/source/libvpx/vp9/decoder/vp9_decoder.c
index faf710c..abcff9f 100644
--- a/source/libvpx/vp9/decoder/vp9_decoder.c
+++ b/source/libvpx/vp9/decoder/vp9_decoder.c
@@ -32,74 +32,6 @@
 #include "vp9/decoder/vp9_detokenize.h"
 #include "vp9/decoder/vp9_dthread.h"
 
-#define WRITE_RECON_BUFFER 0
-#if WRITE_RECON_BUFFER == 1
-static void recon_write_yuv_frame(const char *name,
-                                  const YV12_BUFFER_CONFIG *s,
-                                  int w, int _h) {
-  FILE *yuv_file = fopen(name, "ab");
-  const uint8_t *src = s->y_buffer;
-  int h = _h;
-
-  do {
-    fwrite(src, w, 1,  yuv_file);
-    src += s->y_stride;
-  } while (--h);
-
-  src = s->u_buffer;
-  h = (_h + 1) >> 1;
-  w = (w + 1) >> 1;
-
-  do {
-    fwrite(src, w, 1,  yuv_file);
-    src += s->uv_stride;
-  } while (--h);
-
-  src = s->v_buffer;
-  h = (_h + 1) >> 1;
-
-  do {
-    fwrite(src, w, 1, yuv_file);
-    src += s->uv_stride;
-  } while (--h);
-
-  fclose(yuv_file);
-}
-#endif
-#if WRITE_RECON_BUFFER == 2
-void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) {
-  // write the frame
-  FILE *yframe;
-  int i;
-  char filename[255];
-
-  snprintf(filename, sizeof(filename)-1, "dx\\y%04d.raw", this_frame);
-  yframe = fopen(filename, "wb");
-
-  for (i = 0; i < frame->y_height; i++)
-    fwrite(frame->y_buffer + i * frame->y_stride,
-           frame->y_width, 1, yframe);
-
-  fclose(yframe);
-  snprintf(filename, sizeof(filename)-1, "dx\\u%04d.raw", this_frame);
-  yframe = fopen(filename, "wb");
-
-  for (i = 0; i < frame->uv_height; i++)
-    fwrite(frame->u_buffer + i * frame->uv_stride,
-           frame->uv_width, 1, yframe);
-
-  fclose(yframe);
-  snprintf(filename, sizeof(filename)-1, "dx\\v%04d.raw", this_frame);
-  yframe = fopen(filename, "wb");
-
-  for (i = 0; i < frame->uv_height; i++)
-    fwrite(frame->v_buffer + i * frame->uv_stride,
-           frame->uv_width, 1, yframe);
-
-  fclose(yframe);
-}
-#endif
-
 void vp9_initialize_dec() {
   static int init_done = 0;
 
@@ -348,15 +280,6 @@
 
   swap_frame_buffers(pbi);
 
-#if WRITE_RECON_BUFFER == 2
-  if (cm->show_frame)
-    write_dx_frame_to_file(cm->frame_to_show,
-                           cm->current_video_frame);
-  else
-    write_dx_frame_to_file(cm->frame_to_show,
-                           cm->current_video_frame + 1000);
-#endif
-
   if (!pbi->do_loopfilter_inline) {
     // If multiple threads are used to decode tiles, then we use those threads
     // to do parallel loopfiltering.
@@ -367,21 +290,6 @@
     }
   }
 
-#if WRITE_RECON_BUFFER == 2
-  if (cm->show_frame)
-    write_dx_frame_to_file(cm->frame_to_show,
-                           cm->current_video_frame + 2000);
-  else
-    write_dx_frame_to_file(cm->frame_to_show,
-                           cm->current_video_frame + 3000);
-#endif
-
-#if WRITE_RECON_BUFFER == 1
-  if (cm->show_frame)
-    recon_write_yuv_frame("recon.yuv", cm->frame_to_show,
-                          cm->width, cm->height);
-#endif
-
   vp9_clear_system_state();
 
   cm->last_width = cm->width;
@@ -423,10 +331,6 @@
   ret = vp9_post_proc_frame(&pbi->common, sd, flags);
 #else
     *sd = *pbi->common.frame_to_show;
-    sd->y_width = pbi->common.width;
-    sd->y_height = pbi->common.height;
-    sd->uv_width = sd->y_width >> pbi->common.subsampling_x;
-    sd->uv_height = sd->y_height >> pbi->common.subsampling_y;
     ret = 0;
 #endif /*!CONFIG_POSTPROC*/
   vp9_clear_system_state();
diff --git a/source/libvpx/vp9/encoder/vp9_block.h b/source/libvpx/vp9/encoder/vp9_block.h
index fcf2a04..f35a85f 100644
--- a/source/libvpx/vp9/encoder/vp9_block.h
+++ b/source/libvpx/vp9/encoder/vp9_block.h
@@ -20,12 +20,6 @@
 extern "C" {
 #endif
 
-// motion search site
-typedef struct {
-  MV mv;
-  int offset;
-} search_site;
-
 // Structure to hold snapshot of coding context during the mode picking process
 typedef struct {
   MODE_INFO mic;
@@ -108,10 +102,6 @@
   int skip_optimize;
   int q_index;
 
-  search_site *ss;
-  int ss_count;
-  int searches_per_step;
-
   int errorperbit;
   int sadperbit16;
   int sadperbit4;
diff --git a/source/libvpx/vp9/encoder/vp9_encodeframe.c b/source/libvpx/vp9/encoder/vp9_encodeframe.c
index 2e44f7d..19aa592 100644
--- a/source/libvpx/vp9/encoder/vp9_encodeframe.c
+++ b/source/libvpx/vp9/encoder/vp9_encodeframe.c
@@ -560,11 +560,6 @@
   return act < (8 << 12) ? MIN(act, 5 << 12) : act;
 }
 
-// Stub for alternative experimental activity measures.
-static unsigned int alt_activity_measure(MACROBLOCK *x, int use_dc_pred) {
-  return vp9_encode_intra(x, use_dc_pred);
-}
-
 static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
                          int mi_row, int mi_col, BLOCK_SIZE bsize,
                          int output_enabled) {
diff --git a/source/libvpx/vp9/encoder/vp9_encodemb.c b/source/libvpx/vp9/encoder/vp9_encodemb.c
index baa4665..d71b16f 100644
--- a/source/libvpx/vp9/encoder/vp9_encodemb.c
+++ b/source/libvpx/vp9/encoder/vp9_encodemb.c
@@ -601,15 +601,3 @@
   vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block_intra,
                                          &arg);
 }
-
-int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred) {
-  MB_MODE_INFO * mbmi = &x->e_mbd.mi[0]->mbmi;
-  x->skip_encode = 0;
-  mbmi->mode = DC_PRED;
-  mbmi->ref_frame[0] = INTRA_FRAME;
-  mbmi->tx_size = use_16x16_pred ? (mbmi->sb_type >= BLOCK_16X16 ? TX_16X16
-                                                                 : TX_8X8)
-                                   : TX_4X4;
-  vp9_encode_intra_block_plane(x, mbmi->sb_type, 0);
-  return vp9_get_mb_ss(x->plane[0].src_diff);
-}
diff --git a/source/libvpx/vp9/encoder/vp9_encodemb.h b/source/libvpx/vp9/encoder/vp9_encodemb.h
index edef1e2..8021459 100644
--- a/source/libvpx/vp9/encoder/vp9_encodemb.h
+++ b/source/libvpx/vp9/encoder/vp9_encodemb.h
@@ -34,8 +34,6 @@
 
 void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
 
-int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred);
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/source/libvpx/vp9/encoder/vp9_encoder.c b/source/libvpx/vp9/encoder/vp9_encoder.c
index 3708258..395d26a 100644
--- a/source/libvpx/vp9/encoder/vp9_encoder.c
+++ b/source/libvpx/vp9/encoder/vp9_encoder.c
@@ -501,9 +501,9 @@
     int y_stride = cpi->scaled_source.y_stride;
 
     if (cpi->sf.search_method == NSTEP) {
-      vp9_init3smotion_compensation(&cpi->mb, y_stride);
+      vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride);
     } else if (cpi->sf.search_method == DIAMOND) {
-      vp9_init_dsmotion_compensation(&cpi->mb, y_stride);
+      vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
     }
   }
 
@@ -782,9 +782,6 @@
 
   cm->error.setjmp = 1;
 
-  CHECK_MEM_ERROR(cm, cpi->mb.ss, vpx_calloc(sizeof(search_site),
-                                             (MAX_MVSEARCH_STEPS * 8) + 1));
-
   vp9_rtcd();
 
   cpi->use_svc = 0;
@@ -829,7 +826,6 @@
                                sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
   }
 
-  cpi->key_frame_frequency = cpi->oxcf.key_freq;
   cpi->refresh_alt_ref_frame = 0;
 
 #if CONFIG_MULTIPLE_ARF
@@ -974,95 +970,73 @@
       cpi->rd.thresh_freq_fact[i][j] = 32;
   }
 
-#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SVFHH, SVFHV, SVFHHV, \
-            SDX3F, SDX8F, SDX4DF)\
+#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF)\
     cpi->fn_ptr[BT].sdf            = SDF; \
     cpi->fn_ptr[BT].sdaf           = SDAF; \
     cpi->fn_ptr[BT].vf             = VF; \
     cpi->fn_ptr[BT].svf            = SVF; \
     cpi->fn_ptr[BT].svaf           = SVAF; \
-    cpi->fn_ptr[BT].svf_halfpix_h  = SVFHH; \
-    cpi->fn_ptr[BT].svf_halfpix_v  = SVFHV; \
-    cpi->fn_ptr[BT].svf_halfpix_hv = SVFHHV; \
     cpi->fn_ptr[BT].sdx3f          = SDX3F; \
     cpi->fn_ptr[BT].sdx8f          = SDX8F; \
     cpi->fn_ptr[BT].sdx4df         = SDX4DF;
 
   BFP(BLOCK_32X16, vp9_sad32x16, vp9_sad32x16_avg,
       vp9_variance32x16, vp9_sub_pixel_variance32x16,
-      vp9_sub_pixel_avg_variance32x16, NULL, NULL,
-      NULL, NULL, NULL,
-      vp9_sad32x16x4d)
+      vp9_sub_pixel_avg_variance32x16, NULL, NULL, vp9_sad32x16x4d)
 
   BFP(BLOCK_16X32, vp9_sad16x32, vp9_sad16x32_avg,
       vp9_variance16x32, vp9_sub_pixel_variance16x32,
-      vp9_sub_pixel_avg_variance16x32, NULL, NULL,
-      NULL, NULL, NULL,
-      vp9_sad16x32x4d)
+      vp9_sub_pixel_avg_variance16x32, NULL, NULL, vp9_sad16x32x4d)
 
   BFP(BLOCK_64X32, vp9_sad64x32, vp9_sad64x32_avg,
       vp9_variance64x32, vp9_sub_pixel_variance64x32,
-      vp9_sub_pixel_avg_variance64x32, NULL, NULL,
-      NULL, NULL, NULL,
-      vp9_sad64x32x4d)
+      vp9_sub_pixel_avg_variance64x32, NULL, NULL, vp9_sad64x32x4d)
 
   BFP(BLOCK_32X64, vp9_sad32x64, vp9_sad32x64_avg,
       vp9_variance32x64, vp9_sub_pixel_variance32x64,
-      vp9_sub_pixel_avg_variance32x64, NULL, NULL,
-      NULL, NULL, NULL,
-      vp9_sad32x64x4d)
+      vp9_sub_pixel_avg_variance32x64, NULL, NULL, vp9_sad32x64x4d)
 
   BFP(BLOCK_32X32, vp9_sad32x32, vp9_sad32x32_avg,
       vp9_variance32x32, vp9_sub_pixel_variance32x32,
-      vp9_sub_pixel_avg_variance32x32, vp9_variance_halfpixvar32x32_h,
-      vp9_variance_halfpixvar32x32_v,
-      vp9_variance_halfpixvar32x32_hv, vp9_sad32x32x3, vp9_sad32x32x8,
+      vp9_sub_pixel_avg_variance32x32, vp9_sad32x32x3, vp9_sad32x32x8,
       vp9_sad32x32x4d)
 
   BFP(BLOCK_64X64, vp9_sad64x64, vp9_sad64x64_avg,
       vp9_variance64x64, vp9_sub_pixel_variance64x64,
-      vp9_sub_pixel_avg_variance64x64, vp9_variance_halfpixvar64x64_h,
-      vp9_variance_halfpixvar64x64_v,
-      vp9_variance_halfpixvar64x64_hv, vp9_sad64x64x3, vp9_sad64x64x8,
+      vp9_sub_pixel_avg_variance64x64, vp9_sad64x64x3, vp9_sad64x64x8,
       vp9_sad64x64x4d)
 
   BFP(BLOCK_16X16, vp9_sad16x16, vp9_sad16x16_avg,
       vp9_variance16x16, vp9_sub_pixel_variance16x16,
-      vp9_sub_pixel_avg_variance16x16, vp9_variance_halfpixvar16x16_h,
-      vp9_variance_halfpixvar16x16_v,
-      vp9_variance_halfpixvar16x16_hv, vp9_sad16x16x3, vp9_sad16x16x8,
+      vp9_sub_pixel_avg_variance16x16, vp9_sad16x16x3, vp9_sad16x16x8,
       vp9_sad16x16x4d)
 
   BFP(BLOCK_16X8, vp9_sad16x8, vp9_sad16x8_avg,
       vp9_variance16x8, vp9_sub_pixel_variance16x8,
-      vp9_sub_pixel_avg_variance16x8, NULL, NULL, NULL,
+      vp9_sub_pixel_avg_variance16x8,
       vp9_sad16x8x3, vp9_sad16x8x8, vp9_sad16x8x4d)
 
   BFP(BLOCK_8X16, vp9_sad8x16, vp9_sad8x16_avg,
       vp9_variance8x16, vp9_sub_pixel_variance8x16,
-      vp9_sub_pixel_avg_variance8x16, NULL, NULL, NULL,
+      vp9_sub_pixel_avg_variance8x16,
       vp9_sad8x16x3, vp9_sad8x16x8, vp9_sad8x16x4d)
 
   BFP(BLOCK_8X8, vp9_sad8x8, vp9_sad8x8_avg,
       vp9_variance8x8, vp9_sub_pixel_variance8x8,
-      vp9_sub_pixel_avg_variance8x8, NULL, NULL, NULL,
+      vp9_sub_pixel_avg_variance8x8,
       vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d)
 
   BFP(BLOCK_8X4, vp9_sad8x4, vp9_sad8x4_avg,
       vp9_variance8x4, vp9_sub_pixel_variance8x4,
-      vp9_sub_pixel_avg_variance8x4, NULL, NULL,
-      NULL, NULL, vp9_sad8x4x8,
-      vp9_sad8x4x4d)
+      vp9_sub_pixel_avg_variance8x4, NULL, vp9_sad8x4x8, vp9_sad8x4x4d)
 
   BFP(BLOCK_4X8, vp9_sad4x8, vp9_sad4x8_avg,
       vp9_variance4x8, vp9_sub_pixel_variance4x8,
-      vp9_sub_pixel_avg_variance4x8, NULL, NULL,
-      NULL, NULL, vp9_sad4x8x8,
-      vp9_sad4x8x4d)
+      vp9_sub_pixel_avg_variance4x8, NULL, vp9_sad4x8x8, vp9_sad4x8x4d)
 
   BFP(BLOCK_4X4, vp9_sad4x4, vp9_sad4x4_avg,
       vp9_variance4x4, vp9_sub_pixel_variance4x4,
-      vp9_sub_pixel_avg_variance4x4, NULL, NULL, NULL,
+      vp9_sub_pixel_avg_variance4x4,
       vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d)
 
   cpi->full_search_sad = vp9_full_search_sad;
@@ -1183,7 +1157,6 @@
   }
 
   dealloc_compressor_data(cpi);
-  vpx_free(cpi->mb.ss);
   vpx_free(cpi->tok);
 
   for (i = 0; i < sizeof(cpi->mbgraph_stats) /
@@ -1445,77 +1418,67 @@
 }
 #endif
 
-static void scale_and_extend_frame_nonnormative(YV12_BUFFER_CONFIG *src_fb,
-                                                YV12_BUFFER_CONFIG *dst_fb) {
-  const int in_w = src_fb->y_crop_width;
-  const int in_h = src_fb->y_crop_height;
-  const int out_w = dst_fb->y_crop_width;
-  const int out_h = dst_fb->y_crop_height;
-  const int in_w_uv = src_fb->uv_crop_width;
-  const int in_h_uv = src_fb->uv_crop_height;
-  const int out_w_uv = dst_fb->uv_crop_width;
-  const int out_h_uv = dst_fb->uv_crop_height;
+static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src,
+                                                YV12_BUFFER_CONFIG *dst) {
+  // TODO(dkovalev): replace YV12_BUFFER_CONFIG with vpx_image_t
   int i;
+  const uint8_t *const srcs[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
+                                  src->alpha_buffer};
+  const int src_strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,
+                              src->alpha_stride};
+  const int src_widths[4] = {src->y_crop_width, src->uv_crop_width,
+                             src->uv_crop_width, src->y_crop_width};
+  const int src_heights[4] = {src->y_crop_height, src->uv_crop_height,
+                              src->uv_crop_height, src->y_crop_height};
+  uint8_t *const dsts[4] = {dst->y_buffer, dst->u_buffer, dst->v_buffer,
+                            dst->alpha_buffer};
+  const int dst_strides[4] = {dst->y_stride, dst->uv_stride, dst->uv_stride,
+                              dst->alpha_stride};
+  const int dst_widths[4] = {dst->y_crop_width, dst->uv_crop_width,
+                             dst->uv_crop_width, dst->y_crop_width};
+  const int dst_heights[4] = {dst->y_crop_height, dst->uv_crop_height,
+                              dst->uv_crop_height, dst->y_crop_height};
 
-  uint8_t *srcs[4] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer,
-    src_fb->alpha_buffer};
-  int src_strides[4] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride,
-    src_fb->alpha_stride};
+  for (i = 0; i < MAX_MB_PLANE; ++i)
+    vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i],
+                     dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]);
 
-  uint8_t *dsts[4] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer,
-    dst_fb->alpha_buffer};
-  int dst_strides[4] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride,
-    dst_fb->alpha_stride};
-
-  for (i = 0; i < MAX_MB_PLANE; ++i) {
-    if (i == 0 || i == 3) {
-      // Y and alpha planes
-      vp9_resize_plane(srcs[i], in_h, in_w, src_strides[i],
-                       dsts[i], out_h, out_w, dst_strides[i]);
-    } else {
-      // Chroma planes
-      vp9_resize_plane(srcs[i], in_h_uv, in_w_uv, src_strides[i],
-                       dsts[i], out_h_uv, out_w_uv, dst_strides[i]);
-    }
-  }
   // TODO(hkuang): Call C version explicitly
   // as neon version only expand border size 32.
-  vp8_yv12_extend_frame_borders_c(dst_fb);
+  vp8_yv12_extend_frame_borders_c(dst);
 }
 
-static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,
-                                   YV12_BUFFER_CONFIG *dst_fb) {
-  const int in_w = src_fb->y_crop_width;
-  const int in_h = src_fb->y_crop_height;
-  const int out_w = dst_fb->y_crop_width;
-  const int out_h = dst_fb->y_crop_height;
+static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src,
+                                   YV12_BUFFER_CONFIG *dst) {
+  const int src_w = src->y_crop_width;
+  const int src_h = src->y_crop_height;
+  const int dst_w = dst->y_crop_width;
+  const int dst_h = dst->y_crop_height;
+  const uint8_t *const srcs[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
+                                  src->alpha_buffer};
+  const int src_strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,
+                              src->alpha_stride};
+  uint8_t *const dsts[4] = {dst->y_buffer, dst->u_buffer, dst->v_buffer,
+                            dst->alpha_buffer};
+  const int dst_strides[4] = {dst->y_stride, dst->uv_stride, dst->uv_stride,
+                              dst->alpha_stride};
   int x, y, i;
 
-  uint8_t *srcs[4] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer,
-                      src_fb->alpha_buffer};
-  int src_strides[4] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride,
-                        src_fb->alpha_stride};
-
-  uint8_t *dsts[4] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer,
-                      dst_fb->alpha_buffer};
-  int dst_strides[4] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride,
-                        dst_fb->alpha_stride};
-
-  for (y = 0; y < out_h; y += 16) {
-    for (x = 0; x < out_w; x += 16) {
+  for (y = 0; y < dst_h; y += 16) {
+    for (x = 0; x < dst_w; x += 16) {
       for (i = 0; i < MAX_MB_PLANE; ++i) {
         const int factor = (i == 0 || i == 3 ? 1 : 2);
-        const int x_q4 = x * (16 / factor) * in_w / out_w;
-        const int y_q4 = y * (16 / factor) * in_h / out_h;
+        const int x_q4 = x * (16 / factor) * src_w / dst_w;
+        const int y_q4 = y * (16 / factor) * src_h / dst_h;
         const int src_stride = src_strides[i];
         const int dst_stride = dst_strides[i];
-        uint8_t *src = srcs[i] + y / factor * in_h / out_h * src_stride +
-                                 x / factor * in_w / out_w;
-        uint8_t *dst = dsts[i] + y / factor * dst_stride + x / factor;
+        const uint8_t *src_ptr = srcs[i] + (y / factor) * src_h / dst_h *
+                                     src_stride + (x / factor) * src_w / dst_w;
+        uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
 
-        vp9_convolve8(src, src_stride, dst, dst_stride,
-                      vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w,
-                      vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h,
+        vp9_convolve8(src_ptr, src_stride, dst_ptr, dst_stride,
+                      vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * src_w / dst_w,
+                      vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * src_h / dst_h,
                       16 / factor, 16 / factor);
       }
     }
@@ -1523,7 +1486,7 @@
 
   // TODO(hkuang): Call C version explicitly
   // as neon version only expand border size 32.
-  vp8_yv12_extend_frame_borders_c(dst_fb);
+  vp8_yv12_extend_frame_borders_c(dst);
 }
 
 static int find_fp_qindex() {
@@ -1702,7 +1665,7 @@
 
   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
     const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
-    YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf;
+    const YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf;
 
     if (ref->y_crop_width != cm->width ||
         ref->y_crop_height != cm->height) {
diff --git a/source/libvpx/vp9/encoder/vp9_encoder.h b/source/libvpx/vp9/encoder/vp9_encoder.h
index de8f3c9..132b479 100644
--- a/source/libvpx/vp9/encoder/vp9_encoder.h
+++ b/source/libvpx/vp9/encoder/vp9_encoder.h
@@ -337,8 +337,6 @@
   YV12_BUFFER_CONFIG *unscaled_last_source;
   YV12_BUFFER_CONFIG scaled_last_source;
 
-  int key_frame_frequency;
-
   int gold_is_last;  // gold same as last frame ( short circuit gold searches)
   int alt_is_last;  // Alt same as last ( short circuit altref search)
   int gold_is_alt;  // don't do both alt and gold search ( just do gold).
@@ -499,6 +497,8 @@
 
   int frame_flags;
 
+  search_site_config ss_cfg;
+
 #if CONFIG_MULTIPLE_ARF
   // ARF tracking variables.
   int multi_arf_enabled;
diff --git a/source/libvpx/vp9/encoder/vp9_firstpass.c b/source/libvpx/vp9/encoder/vp9_firstpass.c
index 1879b15..bd69841 100644
--- a/source/libvpx/vp9/encoder/vp9_firstpass.c
+++ b/source/libvpx/vp9/encoder/vp9_firstpass.c
@@ -418,7 +418,7 @@
   v_fn_ptr.vf = get_block_variance_fn(bsize);
 
   // Center the initial step/diamond search on best mv.
-  tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv,
+  tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
                                     step_param,
                                     x->sadperbit16, &num00, &v_fn_ptr, ref_mv);
   if (tmp_err < INT_MAX)
@@ -441,7 +441,7 @@
     if (num00) {
       --num00;
     } else {
-      tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv,
+      tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
                                         step_param + n, x->sadperbit16,
                                         &num00, &v_fn_ptr, ref_mv);
       if (tmp_err < INT_MAX)
@@ -604,7 +604,13 @@
       }
 
       // Do intra 16x16 prediction.
-      this_error = vp9_encode_intra(x, use_dc_pred);
+      x->skip_encode = 0;
+      xd->mi[0]->mbmi.mode = DC_PRED;
+      xd->mi[0]->mbmi.tx_size = use_dc_pred ?
+         (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
+      vp9_encode_intra_block_plane(x, bsize, 0);
+      this_error = vp9_get_mb_ss(x->plane[0].src_diff);
+
       if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
         vp9_clear_system_state();
         this_error = (int)(this_error * error_weight);
@@ -920,12 +926,19 @@
     const int target_norm_bits_per_mb = ((uint64_t)section_target_bandwidth <<
                                             BPER_MB_NORMBITS) / num_mbs;
     int q;
+    int is_svc_upper_layer = 0;
+    if (cpi->use_svc && cpi->svc.number_temporal_layers == 1 &&
+        cpi->svc.spatial_layer_id > 0) {
+      is_svc_upper_layer = 1;
+    }
 
     // Try and pick a max Q that will be high enough to encode the
     // content at the given rate.
     for (q = rc->best_quality; q < rc->worst_quality; ++q) {
-      const double factor = calc_correction_factor(err_per_mb, ERR_DIVISOR,
-                                                   0.5, 0.90, q);
+      const double factor =
+          calc_correction_factor(err_per_mb, ERR_DIVISOR,
+                                 is_svc_upper_layer ? 0.8 : 0.5,
+                                 is_svc_upper_layer ? 1.0 : 0.90, q);
       const int bits_per_mb = vp9_rc_bits_per_mb(INTER_FRAME, q,
                                                  factor * speed_term);
       if (bits_per_mb <= target_norm_bits_per_mb)
@@ -1936,7 +1949,7 @@
   // Find the next keyframe.
   i = 0;
   while (twopass->stats_in < twopass->stats_in_end &&
-         rc->frames_to_key < cpi->key_frame_frequency) {
+         rc->frames_to_key < cpi->oxcf.key_freq) {
     // Accumulate kf group error.
     kf_group_err += calculate_modified_err(cpi, this_frame);
 
@@ -1966,7 +1979,7 @@
 
       // Special check for transition or high motion followed by a
       // static scene.
-      if (detect_transition_to_still(twopass, i, cpi->key_frame_frequency - i,
+      if (detect_transition_to_still(twopass, i, cpi->oxcf.key_freq - i,
                                      loop_decay_rate, decay_accumulator))
         break;
 
@@ -1974,8 +1987,8 @@
       ++rc->frames_to_key;
 
       // If we don't have a real key frame within the next two
-      // key_frame_frequency intervals then break out of the loop.
-      if (rc->frames_to_key >= 2 * (int)cpi->key_frame_frequency)
+      // key_freq intervals then break out of the loop.
+      if (rc->frames_to_key >= 2 * cpi->oxcf.key_freq)
         break;
     } else {
       ++rc->frames_to_key;
@@ -1988,7 +2001,7 @@
   // This code centers the extra kf if the actual natural interval
   // is between 1x and 2x.
   if (cpi->oxcf.auto_key &&
-      rc->frames_to_key > (int)cpi->key_frame_frequency) {
+      rc->frames_to_key > cpi->oxcf.key_freq) {
     FIRSTPASS_STATS tmp_frame = first_frame;
 
     rc->frames_to_key /= 2;
@@ -2005,7 +2018,7 @@
     }
     rc->next_key_frame_forced = 1;
   } else if (twopass->stats_in == twopass->stats_in_end ||
-             rc->frames_to_key >= cpi->key_frame_frequency) {
+             rc->frames_to_key >= cpi->oxcf.key_freq) {
     rc->next_key_frame_forced = 1;
   } else {
     rc->next_key_frame_forced = 0;
diff --git a/source/libvpx/vp9/encoder/vp9_mcomp.c b/source/libvpx/vp9/encoder/vp9_mcomp.c
index 89937f5..bbec4da 100644
--- a/source/libvpx/vp9/encoder/vp9_mcomp.c
+++ b/source/libvpx/vp9/encoder/vp9_mcomp.c
@@ -101,32 +101,32 @@
   return 0;
 }
 
-void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
+void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
   int len, ss_count = 1;
 
-  x->ss[0].mv.col = x->ss[0].mv.row = 0;
-  x->ss[0].offset = 0;
+  cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
+  cfg->ss[0].offset = 0;
 
   for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
     // Generate offsets for 4 search sites per step.
     const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}};
     int i;
     for (i = 0; i < 4; ++i) {
-      search_site *const ss = &x->ss[ss_count++];
+      search_site *const ss = &cfg->ss[ss_count++];
       ss->mv = ss_mvs[i];
       ss->offset = ss->mv.row * stride + ss->mv.col;
     }
   }
 
-  x->ss_count = ss_count;
-  x->searches_per_step = 4;
+  cfg->ss_count = ss_count;
+  cfg->searches_per_step = 4;
 }
 
-void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) {
+void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
   int len, ss_count = 1;
 
-  x->ss[0].mv.col = x->ss[0].mv.row = 0;
-  x->ss[0].offset = 0;
+  cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0;
+  cfg->ss[0].offset = 0;
 
   for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
     // Generate offsets for 8 search sites per step.
@@ -136,14 +136,14 @@
     };
     int i;
     for (i = 0; i < 8; ++i) {
-      search_site *const ss = &x->ss[ss_count++];
+      search_site *const ss = &cfg->ss[ss_count++];
       ss->mv = ss_mvs[i];
       ss->offset = ss->mv.row * stride + ss->mv.col;
     }
   }
 
-  x->ss_count = ss_count;
-  x->searches_per_step = 8;
+  cfg->ss_count = ss_count;
+  cfg->searches_per_step = 8;
 }
 
 /*
@@ -871,7 +871,9 @@
 
 #undef CHECK_BETTER
 
-int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
+int vp9_full_range_search_c(const MACROBLOCK *x,
+                            const search_site_config *cfg,
+                            MV *ref_mv, MV *best_mv,
                             int search_param, int sad_per_bit, int *num00,
                             const vp9_variance_fn_ptr_t *fn_ptr,
                             const MV *center_mv) {
@@ -962,6 +964,7 @@
 }
 
 int vp9_diamond_search_sad_c(const MACROBLOCK *x,
+                             const search_site_config *cfg,
                              MV *ref_mv, MV *best_mv,
                              int search_param, int sad_per_bit, int *num00,
                              const vp9_variance_fn_ptr_t *fn_ptr,
@@ -973,8 +976,8 @@
   // of iterations
   // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
   // (MAX_FIRST_STEP/4) pel... etc.
-  const search_site *const ss = &x->ss[search_param * x->searches_per_step];
-  const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
+  const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step];
+  const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
   const uint8_t *best_address, *in_what_ref;
   int best_sad = INT_MAX;
@@ -996,7 +999,7 @@
   i = 1;
 
   for (step = 0; step < tot_steps; step++) {
-    for (j = 0; j < x->searches_per_step; j++) {
+    for (j = 0; j < cfg->searches_per_step; j++) {
       const MV mv = {best_mv->row + ss[i].mv.row,
                      best_mv->col + ss[i].mv.col};
       if (is_mv_in(x, &mv)) {
@@ -1050,6 +1053,7 @@
 }
 
 int vp9_diamond_search_sadx4(const MACROBLOCK *x,
+                             const search_site_config *cfg,
                              MV *ref_mv, MV *best_mv, int search_param,
                              int sad_per_bit, int *num00,
                              const vp9_variance_fn_ptr_t *fn_ptr,
@@ -1075,8 +1079,8 @@
   // 0 = initial step (MAX_FIRST_STEP) pel
   // 1 = (MAX_FIRST_STEP/2) pel,
   // 2 = (MAX_FIRST_STEP/4) pel...
-  const search_site *ss = &x->ss[search_param * x->searches_per_step];
-  const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
+  const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
+  const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
 
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
   clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
@@ -1112,7 +1116,7 @@
     if (all_in) {
       unsigned int sad_array[4];
 
-      for (j = 0; j < x->searches_per_step; j += 4) {
+      for (j = 0; j < cfg->searches_per_step; j += 4) {
         unsigned char const *block_offset[4];
 
         for (t = 0; t < 4; t++)
@@ -1135,7 +1139,7 @@
         }
       }
     } else {
-      for (j = 0; j < x->searches_per_step; j++) {
+      for (j = 0; j < cfg->searches_per_step; j++) {
         // Trap illegal vectors
         const MV this_mv = {best_mv->row + ss[i].mv.row,
                             best_mv->col + ss[i].mv.col};
@@ -1202,7 +1206,7 @@
                            const MV *ref_mv, MV *dst_mv) {
   MV temp_mv;
   int thissme, n, num00 = 0;
-  int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
+  int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
                                         step_param, sadpb, &n,
                                         fn_ptr, ref_mv);
   if (bestsme < INT_MAX)
@@ -1220,7 +1224,7 @@
     if (num00) {
       num00--;
     } else {
-      thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
+      thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
                                         step_param + n, sadpb, &num00,
                                         fn_ptr, ref_mv);
       if (thissme < INT_MAX)
@@ -1290,192 +1294,154 @@
                           int sad_per_bit, int distance,
                           const vp9_variance_fn_ptr_t *fn_ptr,
                           const MV *center_mv, MV *best_mv) {
+  int r;
   const MACROBLOCKD *const xd = &x->e_mbd;
-  const uint8_t *const what = x->plane[0].src.buf;
-  const int what_stride = x->plane[0].src.stride;
-  const uint8_t *const in_what = xd->plane[0].pre[0].buf;
-  const int in_what_stride = xd->plane[0].pre[0].stride;
-  MV this_mv;
-  unsigned int bestsad = INT_MAX;
-  int r, c;
-  unsigned int thissad;
-  int ref_row = ref_mv->row;
-  int ref_col = ref_mv->col;
-
-  // Apply further limits to prevent us looking using vectors that stretch
-  // beyond the UMV border
-  const int row_min = MAX(ref_row - distance, x->mv_row_min);
-  const int row_max = MIN(ref_row + distance, x->mv_row_max);
-  const int col_min = MAX(ref_col - distance, x->mv_col_min);
-  const int col_max = MIN(ref_col + distance, x->mv_col_max);
-  unsigned int sad_array[3];
+  const struct buf_2d *const what = &x->plane[0].src;
+  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+  const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
+  const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
+  const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
+  const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+  unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
+      get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
+      mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
+  *best_mv = *ref_mv;
 
-  // Work out the mid point for the search
-  const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
+  for (r = row_min; r < row_max; ++r) {
+    int c = col_min;
+    const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
 
-  best_mv->row = ref_row;
-  best_mv->col = ref_col;
+    if (fn_ptr->sdx3f != NULL) {
+      while ((c + 2) < col_max) {
+        int i;
+        unsigned int sads[3];
 
-  // Baseline value at the centre
-  bestsad = fn_ptr->sdf(what, what_stride,
-                        bestaddress, in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
+        fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
+                      sads);
 
-  for (r = row_min; r < row_max; r++) {
-    const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
-    this_mv.row = r;
-    c = col_min;
-
-    while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) {
-      int i;
-
-      fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
-
-      for (i = 0; i < 3; i++) {
-        thissad = sad_array[i];
-
-        if (thissad < bestsad) {
-          this_mv.col = c;
-          thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
-          if (thissad < bestsad) {
-            bestsad = thissad;
-            best_mv->row = r;
-            best_mv->col = c;
+        for (i = 0; i < 3; ++i) {
+          unsigned int sad = sads[i];
+          if (sad < best_sad) {
+            const MV mv = {r, c};
+            sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+            if (sad < best_sad) {
+              best_sad = sad;
+              *best_mv = mv;
+            }
           }
+          ++check_here;
+          ++c;
         }
-        check_here++;
-        c++;
       }
     }
 
     while (c < col_max) {
-      thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
-                            bestsad);
-
-      if (thissad < bestsad) {
-        this_mv.col = c;
-        thissad  += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
-        if (thissad < bestsad) {
-          bestsad = thissad;
-          best_mv->row = r;
-          best_mv->col = c;
+      unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
+                                     check_here, in_what->stride, best_sad);
+      if (sad < best_sad) {
+        const MV mv = {r, c};
+        sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+        if (sad < best_sad) {
+          best_sad = sad;
+          *best_mv = mv;
         }
       }
-
-      check_here++;
-      c++;
+      ++check_here;
+      ++c;
     }
   }
-  return bestsad;
+
+  return best_sad;
 }
 
 int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
                           int sad_per_bit, int distance,
                           const vp9_variance_fn_ptr_t *fn_ptr,
                           const MV *center_mv, MV *best_mv) {
+  int r;
   const MACROBLOCKD *const xd = &x->e_mbd;
-  const uint8_t *const what = x->plane[0].src.buf;
-  const int what_stride = x->plane[0].src.stride;
-  const uint8_t *const in_what = xd->plane[0].pre[0].buf;
-  const int in_what_stride = xd->plane[0].pre[0].stride;
-  MV this_mv;
-  unsigned int bestsad = INT_MAX;
-  int r, c;
-  int ref_row = ref_mv->row;
-  int ref_col = ref_mv->col;
-
-  // Apply further limits to prevent us looking using vectors that stretch
-  // beyond the UMV border
-  const int row_min = MAX(ref_row - distance, x->mv_row_min);
-  const int row_max = MIN(ref_row + distance, x->mv_row_max);
-  const int col_min = MAX(ref_col - distance, x->mv_col_min);
-  const int col_max = MIN(ref_col + distance, x->mv_col_max);
-  DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8);
-  unsigned int sad_array[3];
+  const struct buf_2d *const what = &x->plane[0].src;
+  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+  const int row_min = MAX(ref_mv->row - distance, x->mv_row_min);
+  const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
+  const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
+  const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
+  unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
+      get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
+      mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
+  *best_mv = *ref_mv;
 
-  // Work out the mid point for the search
-  const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
+  for (r = row_min; r < row_max; ++r) {
+    int c = col_min;
+    const uint8_t *check_here = &in_what->buf[r * in_what->stride + c];
 
-  best_mv->row = ref_row;
-  best_mv->col = ref_col;
+    if (fn_ptr->sdx8f != NULL) {
+      while ((c + 7) < col_max) {
+        int i;
+        unsigned int sads[8];
 
-  // Baseline value at the center
-  bestsad = fn_ptr->sdf(what, what_stride,
-                        bestaddress, in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
+        fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride,
+                      sads);
 
-  for (r = row_min; r < row_max; r++) {
-    const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
-    this_mv.row = r;
-    c = col_min;
-
-    while ((c + 7) < col_max) {
-      int i;
-
-      fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8);
-
-      for (i = 0; i < 8; i++) {
-        unsigned int thissad = (unsigned int)sad_array8[i];
-
-        if (thissad < bestsad) {
-          this_mv.col = c;
-          thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
-          if (thissad < bestsad) {
-            bestsad = thissad;
-            best_mv->row = r;
-            best_mv->col = c;
+        for (i = 0; i < 8; ++i) {
+          unsigned int sad = sads[i];
+          if (sad < best_sad) {
+            const MV mv = {r, c};
+            sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+            if (sad < best_sad) {
+              best_sad = sad;
+              *best_mv = mv;
+            }
           }
+          ++check_here;
+          ++c;
         }
-
-        check_here++;
-        c++;
       }
     }
 
-    while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) {
-      int i;
+    if (fn_ptr->sdx3f != NULL) {
+      while ((c + 2) < col_max) {
+        int i;
+        unsigned int sads[3];
 
-      fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array);
+        fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride,
+                      sads);
 
-      for (i = 0; i < 3; i++) {
-        unsigned int thissad = sad_array[i];
-
-        if (thissad < bestsad) {
-          this_mv.col = c;
-          thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
-          if (thissad < bestsad) {
-            bestsad = thissad;
-            best_mv->row = r;
-            best_mv->col = c;
+        for (i = 0; i < 3; ++i) {
+          unsigned int sad = sads[i];
+          if (sad < best_sad) {
+            const MV mv = {r, c};
+            sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+            if (sad < best_sad) {
+              best_sad = sad;
+              *best_mv = mv;
+            }
           }
+          ++check_here;
+          ++c;
         }
-
-        check_here++;
-        c++;
       }
     }
 
     while (c < col_max) {
-      unsigned int thissad = fn_ptr->sdf(what, what_stride,
-                                         check_here, in_what_stride, bestsad);
-
-      if (thissad < bestsad) {
-        this_mv.col = c;
-        thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
-        if (thissad < bestsad) {
-          bestsad = thissad;
-          best_mv->row = r;
-          best_mv->col = c;
+      unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
+                                     check_here, in_what->stride, best_sad);
+      if (sad < best_sad) {
+        const MV mv = {r, c};
+        sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
+        if (sad < best_sad) {
+          best_sad = sad;
+          *best_mv = mv;
         }
       }
-
-      check_here++;
-      c++;
+      ++check_here;
+      ++c;
     }
   }
-  return bestsad;
+
+  return best_sad;
 }
 
 int vp9_refining_search_sad_c(const MACROBLOCK *x,
diff --git a/source/libvpx/vp9/encoder/vp9_mcomp.h b/source/libvpx/vp9/encoder/vp9_mcomp.h
index 70d7985..1f524f1 100644
--- a/source/libvpx/vp9/encoder/vp9_mcomp.h
+++ b/source/libvpx/vp9/encoder/vp9_mcomp.h
@@ -31,6 +31,20 @@
 // for Block_16x16
 #define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND)
 
+// motion search site
+typedef struct search_site {
+  MV mv;
+  int offset;
+} search_site;
+
+typedef struct search_site_config {
+  search_site ss[8 * MAX_MVSEARCH_STEPS + 1];
+  int ss_count;
+  int searches_per_step;
+} search_site_config;
+
+void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride);
+void vp9_init3smotion_compensation(search_site_config *cfg,  int stride);
 
 void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv);
 int vp9_mv_bit_cost(const MV *mv, const MV *ref,
@@ -46,8 +60,6 @@
                           const uint8_t *second_pred,
                           const vp9_variance_fn_ptr_t *vfp,
                           int use_mvcost);
-void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride);
-void vp9_init3smotion_compensation(MACROBLOCK *x,  int stride);
 
 struct VP9_COMP;
 int vp9_init_search_range(struct VP9_COMP *cpi, int size);
@@ -119,6 +131,7 @@
                                         const MV *center_mv);
 
 typedef int (*vp9_diamond_search_fn_t)(const MACROBLOCK *x,
+                                       const search_site_config *cfg,
                                        MV *ref_mv, MV *best_mv,
                                        int search_param, int sad_per_bit,
                                        int *num00,
diff --git a/source/libvpx/vp9/encoder/vp9_pickmode.c b/source/libvpx/vp9/encoder/vp9_pickmode.c
index c1493e7..56eb944 100644
--- a/source/libvpx/vp9/encoder/vp9_pickmode.c
+++ b/source/libvpx/vp9/encoder/vp9_pickmode.c
@@ -418,7 +418,7 @@
 
   // Perform intra prediction search, if the best SAD is above a certain
   // threshold.
-  if (best_rd > inter_mode_thresh) {
+  if (best_rd > inter_mode_thresh && bsize < cpi->sf.max_intra_bsize) {
     for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) {
       vp9_predict_intra_block(xd, 0, b_width_log2(bsize),
                               mbmi->tx_size, this_mode,
diff --git a/source/libvpx/vp9/encoder/vp9_ratectrl.c b/source/libvpx/vp9/encoder/vp9_ratectrl.c
index 2e35e5f..6ebd9f3 100644
--- a/source/libvpx/vp9/encoder/vp9_ratectrl.c
+++ b/source/libvpx/vp9/encoder/vp9_ratectrl.c
@@ -1215,7 +1215,7 @@
     cm->frame_type = KEY_FRAME;
     rc->this_key_frame_forced = cm->current_video_frame != 0 &&
                                 rc->frames_to_key == 0;
-    rc->frames_to_key = cpi->key_frame_frequency;
+    rc->frames_to_key = cpi->oxcf.key_freq;
     rc->kf_boost = DEFAULT_KF_BOOST;
     rc->source_alt_ref_active = 0;
   } else {
@@ -1302,7 +1302,7 @@
   if ((cm->current_video_frame == 0) ||
       (cpi->frame_flags & FRAMEFLAGS_KEY) ||
       (cpi->oxcf.auto_key && (rc->frames_since_key %
-                              cpi->key_frame_frequency == 0))) {
+          cpi->oxcf.key_freq == 0))) {
     cm->frame_type = KEY_FRAME;
     rc->source_alt_ref_active = 0;
     if (cpi->pass == 0 && cpi->oxcf.rc_mode == RC_MODE_CBR) {
@@ -1330,7 +1330,7 @@
     cm->frame_type = KEY_FRAME;
     rc->this_key_frame_forced = cm->current_video_frame != 0 &&
                                 rc->frames_to_key == 0;
-    rc->frames_to_key = cpi->key_frame_frequency;
+    rc->frames_to_key = cpi->oxcf.key_freq;
     rc->kf_boost = DEFAULT_KF_BOOST;
     rc->source_alt_ref_active = 0;
     target = calc_iframe_target_size_one_pass_cbr(cpi);
@@ -1415,7 +1415,7 @@
   rc->max_gf_interval = 16;
 
   // Extended interval for genuinely static scenes
-  rc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
+  rc->static_scene_max_gf_interval = cpi->oxcf.key_freq >> 1;
 
   // Special conditions when alt ref frame enabled in lagged compress mode
   if (oxcf->play_alternate && oxcf->lag_in_frames) {
diff --git a/source/libvpx/vp9/encoder/vp9_svc_layercontext.c b/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
index f59670d..2379f35 100644
--- a/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
+++ b/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
@@ -153,7 +153,7 @@
                                    oxcf->two_pass_vbrmax_section) / 100);
   lrc->max_gf_interval = 16;
 
-  lrc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
+  lrc->static_scene_max_gf_interval = cpi->oxcf.key_freq >> 1;
 
   if (oxcf->play_alternate && oxcf->lag_in_frames) {
     if (lrc->max_gf_interval > oxcf->lag_in_frames - 1)
diff --git a/source/libvpx/vp9/encoder/vp9_variance.c b/source/libvpx/vp9/encoder/vp9_variance.c
index 1399bfb..ae3c86a 100644
--- a/source/libvpx/vp9/encoder/vp9_variance.c
+++ b/source/libvpx/vp9/encoder/vp9_variance.c
@@ -276,126 +276,6 @@
 SUBPIX_VAR(64, 64)
 SUBPIX_AVG_VAR(64, 64)
 
-unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
-                                              int  source_stride,
-                                              const uint8_t *ref_ptr,
-                                              int  recon_stride,
-                                              unsigned int *sse) {
-  return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
-                                       ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr,
-                                              int  source_stride,
-                                              const uint8_t *ref_ptr,
-                                              int  recon_stride,
-                                              unsigned int *sse) {
-  return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
-                                       ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr,
-                                              int  source_stride,
-                                              const uint8_t *ref_ptr,
-                                              int  recon_stride,
-                                              unsigned int *sse) {
-  return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0,
-                                       ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr,
-                                              int  source_stride,
-                                              const uint8_t *ref_ptr,
-                                              int  recon_stride,
-                                              unsigned int *sse) {
-  return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
-                                       ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr,
-                                              int  source_stride,
-                                              const uint8_t *ref_ptr,
-                                              int  recon_stride,
-                                              unsigned int *sse) {
-  return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
-                                       ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr,
-                                              int  source_stride,
-                                              const uint8_t *ref_ptr,
-                                              int  recon_stride,
-                                              unsigned int *sse) {
-  return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8,
-                                       ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr,
-                                               int  source_stride,
-                                               const uint8_t *ref_ptr,
-                                               int  recon_stride,
-                                               unsigned int *sse) {
-  return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
-                                       ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr,
-                                               int  source_stride,
-                                               const uint8_t *ref_ptr,
-                                               int  recon_stride,
-                                               unsigned int *sse) {
-  return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
-                                       ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
-                                               int  source_stride,
-                                               const uint8_t *ref_ptr,
-                                               int  recon_stride,
-                                               unsigned int *sse) {
-  return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8,
-                                       ref_ptr, recon_stride, sse);
-}
-
-unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr,
-                                      int  src_pixels_per_line,
-                                      int  xoffset,
-                                      int  yoffset,
-                                      const uint8_t *dst_ptr,
-                                      int dst_pixels_per_line,
-                                      unsigned int *sse) {
-  vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
-                                xoffset, yoffset, dst_ptr,
-                                dst_pixels_per_line, sse);
-  return *sse;
-}
-
-unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr,
-                                      int  src_pixels_per_line,
-                                      int  xoffset,
-                                      int  yoffset,
-                                      const uint8_t *dst_ptr,
-                                      int dst_pixels_per_line,
-                                      unsigned int *sse) {
-  vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
-                                xoffset, yoffset, dst_ptr,
-                                dst_pixels_per_line, sse);
-  return *sse;
-}
-
-unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr,
-                                      int  src_pixels_per_line,
-                                      int  xoffset,
-                                      int  yoffset,
-                                      const uint8_t *dst_ptr,
-                                      int dst_pixels_per_line,
-                                      unsigned int *sse) {
-  vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line,
-                                xoffset, yoffset, dst_ptr,
-                                dst_pixels_per_line, sse);
-  return *sse;
-}
-
 void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
                        int height, const uint8_t *ref, int ref_stride) {
   int i, j;
diff --git a/source/libvpx/vp9/encoder/vp9_variance.h b/source/libvpx/vp9/encoder/vp9_variance.h
index 4c8be71..c47fe13 100644
--- a/source/libvpx/vp9/encoder/vp9_variance.h
+++ b/source/libvpx/vp9/encoder/vp9_variance.h
@@ -69,22 +69,12 @@
                                                    unsigned int *sse,
                                                    const uint8_t *second_pred);
 
-typedef unsigned int (*vp9_getmbss_fn_t)(const short *);
-
-typedef unsigned int (*vp9_get16x16prederror_fn_t)(const uint8_t *src_ptr,
-                                                   int source_stride,
-                                                   const uint8_t *ref_ptr,
-                                                   int  ref_stride);
-
 typedef struct vp9_variance_vtable {
   vp9_sad_fn_t               sdf;
   vp9_sad_avg_fn_t           sdaf;
   vp9_variance_fn_t          vf;
   vp9_subpixvariance_fn_t    svf;
   vp9_subp_avg_variance_fn_t svaf;
-  vp9_variance_fn_t          svf_halfpix_h;
-  vp9_variance_fn_t          svf_halfpix_v;
-  vp9_variance_fn_t          svf_halfpix_hv;
   vp9_sad_multi_fn_t         sdx3f;
   vp9_sad_multi_fn_t         sdx8f;
   vp9_sad_multi_d_fn_t       sdx4df;
diff --git a/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.asm b/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.asm
new file mode 100644
index 0000000..1400071
--- /dev/null
+++ b/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.asm
@@ -0,0 +1,174 @@
+;
+;  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+%include "third_party/x86inc/x86inc.asm"
+
+; This file provides SSSE3 version of the forward transformation. Part
+; of the macro definitions are originally derived from ffmpeg project.
+; The current version applies to x86 64-bit only.
+
+SECTION_RODATA
+
+pw_11585x2: times 8 dw 23170
+pd_8192:    times 4 dd 8192
+
+%macro TRANSFORM_COEFFS 2
+pw_%1_%2:   dw  %1,  %2,  %1,  %2,  %1,  %2,  %1,  %2
+pw_%2_m%1:  dw  %2, -%1,  %2, -%1,  %2, -%1,  %2, -%1
+%endmacro
+
+TRANSFORM_COEFFS 15137,   6270
+TRANSFORM_COEFFS 16069,   3196
+TRANSFORM_COEFFS  9102,  13623
+
+SECTION .text
+
+%if ARCH_X86_64
+%macro SUM_SUB 3
+  psubw  m%3, m%1, m%2
+  paddw  m%1, m%2
+  SWAP    %2, %3
+%endmacro
+
+; butterfly operation
+%macro MUL_ADD_2X 6 ; dst1, dst2, src, round, coefs1, coefs2
+  pmaddwd            m%1, m%3, %5
+  pmaddwd            m%2, m%3, %6
+  paddd              m%1,  %4
+  paddd              m%2,  %4
+  psrad              m%1,  14
+  psrad              m%2,  14
+%endmacro
+
+%macro BUTTERFLY_4X 7 ; dst1, dst2, coef1, coef2, round, tmp1, tmp2
+  punpckhwd          m%6, m%2, m%1
+  MUL_ADD_2X         %7,  %6,  %6,  %5, [pw_%4_%3], [pw_%3_m%4]
+  punpcklwd          m%2, m%1
+  MUL_ADD_2X         %1,  %2,  %2,  %5, [pw_%4_%3], [pw_%3_m%4]
+  packssdw           m%1, m%7
+  packssdw           m%2, m%6
+%endmacro
+
+; matrix transpose
+%macro INTERLEAVE_2X 4
+  punpckh%1          m%4, m%2, m%3
+  punpckl%1          m%2, m%3
+  SWAP               %3,  %4
+%endmacro
+
+%macro TRANSPOSE8X8 9
+  INTERLEAVE_2X  wd, %1, %2, %9
+  INTERLEAVE_2X  wd, %3, %4, %9
+  INTERLEAVE_2X  wd, %5, %6, %9
+  INTERLEAVE_2X  wd, %7, %8, %9
+
+  INTERLEAVE_2X  dq, %1, %3, %9
+  INTERLEAVE_2X  dq, %2, %4, %9
+  INTERLEAVE_2X  dq, %5, %7, %9
+  INTERLEAVE_2X  dq, %6, %8, %9
+
+  INTERLEAVE_2X  qdq, %1, %5, %9
+  INTERLEAVE_2X  qdq, %3, %7, %9
+  INTERLEAVE_2X  qdq, %2, %6, %9
+  INTERLEAVE_2X  qdq, %4, %8, %9
+
+  SWAP  %2, %5
+  SWAP  %4, %7
+%endmacro
+
+; 1D forward 8x8 DCT transform
+%macro FDCT8_1D 0
+  SUM_SUB            0,  7,  9
+  SUM_SUB            1,  6,  9
+  SUM_SUB            2,  5,  9
+  SUM_SUB            3,  4,  9
+
+  SUM_SUB            0,  3,  9
+  SUM_SUB            1,  2,  9
+  SUM_SUB            6,  5,  9
+  SUM_SUB            0,  1,  9
+
+  BUTTERFLY_4X       2,  3,  6270,  15137,  m8,  9,  10
+
+  pmulhrsw           m6, m12
+  pmulhrsw           m5, m12
+  pmulhrsw           m0, m12
+  pmulhrsw           m1, m12
+
+  SUM_SUB            4,  5,  9
+  SUM_SUB            7,  6,  9
+  BUTTERFLY_4X       4,  7,  3196,  16069,  m8,  9,  10
+  BUTTERFLY_4X       5,  6,  13623,  9102,  m8,  9,  10
+  SWAP               1,  4
+  SWAP               3,  6
+%endmacro
+
+%macro DIVIDE_ROUND_2X 4 ; dst1, dst2, tmp1, tmp2
+  psraw              m%3, m%1, 15
+  psraw              m%4, m%2, 15
+  psubw              m%1, m%3
+  psubw              m%2, m%4
+  psraw              m%1, 1
+  psraw              m%2, 1
+%endmacro
+
+INIT_XMM ssse3
+cglobal fdct8x8, 3, 5, 13, input, output, stride
+
+  mova               m8, [pd_8192]
+  mova              m12, [pw_11585x2]
+  pxor              m11, m11
+
+  lea                r3, [2 * strideq]
+  lea                r4, [4 * strideq]
+  mova               m0, [inputq]
+  mova               m1, [inputq + r3]
+  lea                inputq, [inputq + r4]
+  mova               m2, [inputq]
+  mova               m3, [inputq + r3]
+  lea                inputq, [inputq + r4]
+  mova               m4, [inputq]
+  mova               m5, [inputq + r3]
+  lea                inputq, [inputq + r4]
+  mova               m6, [inputq]
+  mova               m7, [inputq + r3]
+
+  ; left shift by 2 to increase forward transformation precision
+  psllw              m0, 2
+  psllw              m1, 2
+  psllw              m2, 2
+  psllw              m3, 2
+  psllw              m4, 2
+  psllw              m5, 2
+  psllw              m6, 2
+  psllw              m7, 2
+
+  ; column transform
+  FDCT8_1D
+  TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
+
+  FDCT8_1D
+  TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9
+
+  DIVIDE_ROUND_2X   0, 1, 9, 10
+  DIVIDE_ROUND_2X   2, 3, 9, 10
+  DIVIDE_ROUND_2X   4, 5, 9, 10
+  DIVIDE_ROUND_2X   6, 7, 9, 10
+
+  mova              [outputq +   0], m0
+  mova              [outputq +  16], m1
+  mova              [outputq +  32], m2
+  mova              [outputq +  48], m3
+  mova              [outputq +  64], m4
+  mova              [outputq +  80], m5
+  mova              [outputq +  96], m6
+  mova              [outputq + 112], m7
+
+  RET
+%endif
diff --git a/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c b/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c
index 9e65694..25d5946 100644
--- a/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c
+++ b/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c
@@ -494,58 +494,3 @@
 
 #undef FNS
 #undef FN
-
-unsigned int vp9_variance_halfpixvar16x16_h_sse2(
-  const unsigned char *src_ptr,
-  int  src_pixels_per_line,
-  const unsigned char *dst_ptr,
-  int  dst_pixels_per_line,
-  unsigned int *sse) {
-  int xsum0;
-  unsigned int xxsum0;
-
-  vp9_half_horiz_variance16x_h_sse2(
-    src_ptr, src_pixels_per_line,
-    dst_ptr, dst_pixels_per_line, 16,
-    &xsum0, &xxsum0);
-
-  *sse = xxsum0;
-  return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
-}
-
-
-unsigned int vp9_variance_halfpixvar16x16_v_sse2(
-  const unsigned char *src_ptr,
-  int  src_pixels_per_line,
-  const unsigned char *dst_ptr,
-  int  dst_pixels_per_line,
-  unsigned int *sse) {
-  int xsum0;
-  unsigned int xxsum0;
-  vp9_half_vert_variance16x_h_sse2(
-    src_ptr, src_pixels_per_line,
-    dst_ptr, dst_pixels_per_line, 16,
-    &xsum0, &xxsum0);
-
-  *sse = xxsum0;
-  return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
-}
-
-
-unsigned int vp9_variance_halfpixvar16x16_hv_sse2(
-  const unsigned char *src_ptr,
-  int  src_pixels_per_line,
-  const unsigned char *dst_ptr,
-  int  dst_pixels_per_line,
-  unsigned int *sse) {
-  int xsum0;
-  unsigned int xxsum0;
-
-  vp9_half_horiz_vert_variance16x_h_sse2(
-    src_ptr, src_pixels_per_line,
-    dst_ptr, dst_pixels_per_line, 16,
-    &xsum0, &xxsum0);
-
-  *sse = xxsum0;
-  return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
-}
diff --git a/source/libvpx/vp9/vp9_iface_common.h b/source/libvpx/vp9/vp9_iface_common.h
index 58256b2..d60883c 100644
--- a/source/libvpx/vp9/vp9_iface_common.h
+++ b/source/libvpx/vp9/vp9_iface_common.h
@@ -16,9 +16,11 @@
     * the Y, U, and V planes, nor other alignment adjustments that
     * might be representable by a YV12_BUFFER_CONFIG, so we just
     * initialize all the fields.*/
-  int bps = 12;
-  if (yv12->uv_height == yv12->y_height) {
-    if (yv12->uv_width == yv12->y_width) {
+  const int ss_x = yv12->uv_crop_width < yv12->y_crop_width;
+  const int ss_y = yv12->uv_crop_height < yv12->y_crop_height;
+  int bps;
+  if (!ss_y) {
+    if (!ss_x) {
       img->fmt = VPX_IMG_FMT_I444;
       bps = 24;
     } else {
@@ -27,13 +29,14 @@
     }
   } else {
     img->fmt = VPX_IMG_FMT_I420;
+    bps = 12;
   }
   img->w = yv12->y_stride;
   img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3);
   img->d_w = yv12->y_crop_width;
   img->d_h = yv12->y_crop_height;
-  img->x_chroma_shift = yv12->uv_width < yv12->y_width;
-  img->y_chroma_shift = yv12->uv_height < yv12->y_height;
+  img->x_chroma_shift = ss_x;
+  img->y_chroma_shift = ss_y;
   img->planes[VPX_PLANE_Y] = yv12->y_buffer;
   img->planes[VPX_PLANE_U] = yv12->u_buffer;
   img->planes[VPX_PLANE_V] = yv12->v_buffer;
diff --git a/source/libvpx/vp9/vp9cx.mk b/source/libvpx/vp9/vp9cx.mk
index c444fe4..fab7f18 100644
--- a/source/libvpx/vp9/vp9cx.mk
+++ b/source/libvpx/vp9/vp9cx.mk
@@ -112,6 +112,7 @@
 
 ifeq ($(ARCH_X86_64),yes)
 VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3.asm
+VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.asm
 endif
 VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_sad_ssse3.asm
 VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/vp9_sad_sse4.asm
diff --git a/source/libvpx/vpx/vpx_image.h b/source/libvpx/vpx/vpx_image.h
index 8d0f4ec..d45b003 100644
--- a/source/libvpx/vpx/vpx_image.h
+++ b/source/libvpx/vpx/vpx_image.h
@@ -34,7 +34,7 @@
 #define VPX_IMG_FMT_PLANAR     0x100  /**< Image is a planar format */
 #define VPX_IMG_FMT_UV_FLIP    0x200  /**< V plane precedes U plane in memory */
 #define VPX_IMG_FMT_HAS_ALPHA  0x400  /**< Image has an alpha channel component */
-
+#define VPX_IMG_FMT_HIGH       0x800  /**< Image uses 16bit framebuffer */
 
   /*!\brief List of supported image formats */
   typedef enum vpx_img_fmt {
@@ -58,7 +58,10 @@
     VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4,
     VPX_IMG_FMT_I422    = VPX_IMG_FMT_PLANAR | 5,
     VPX_IMG_FMT_I444    = VPX_IMG_FMT_PLANAR | 6,
-    VPX_IMG_FMT_444A    = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_HAS_ALPHA | 7
+    VPX_IMG_FMT_444A    = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_HAS_ALPHA | 7,
+    VPX_IMG_FMT_I42016    = VPX_IMG_FMT_I420 | VPX_IMG_FMT_HIGH,
+    VPX_IMG_FMT_I42216    = VPX_IMG_FMT_I422 | VPX_IMG_FMT_HIGH,
+    VPX_IMG_FMT_I44416    = VPX_IMG_FMT_I444 | VPX_IMG_FMT_HIGH
   } vpx_img_fmt_t; /**< alias for enum vpx_img_fmt */
 
 #if !defined(VPX_CODEC_DISABLE_COMPAT) || !VPX_CODEC_DISABLE_COMPAT
diff --git a/source/libvpx/vpx_scale/generic/yv12config.c b/source/libvpx/vpx_scale/generic/yv12config.c
index 5e95d31..3eaf50e 100644
--- a/source/libvpx/vpx_scale/generic/yv12config.c
+++ b/source/libvpx/vpx_scale/generic/yv12config.c
@@ -183,8 +183,7 @@
       ybf->buffer_alloc = (uint8_t *)yv12_align_addr(fb->data, 32);
     } else if (frame_size > ybf->buffer_alloc_sz) {
       // Allocation to hold larger frame, or first allocation.
-      if (ybf->buffer_alloc)
-        vpx_free(ybf->buffer_alloc);
+      vpx_free(ybf->buffer_alloc);
       ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, frame_size);
       if (!ybf->buffer_alloc)
         return -1;
diff --git a/source/libvpx/webmdec.c b/source/libvpx/webmdec.c
deleted file mode 100644
index 93a8d9f..0000000
--- a/source/libvpx/webmdec.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "./webmdec.h"
-
-#include <stdarg.h>
-
-#include "third_party/nestegg/include/nestegg/nestegg.h"
-
-static int nestegg_read_cb(void *buffer, size_t length, void *userdata) {
-  FILE *f = userdata;
-
-  if (fread(buffer, 1, length, f) < length) {
-    if (ferror(f))
-      return -1;
-    if (feof(f))
-      return 0;
-  }
-  return 1;
-}
-
-static int nestegg_seek_cb(int64_t offset, int whence, void *userdata) {
-  switch (whence) {
-    case NESTEGG_SEEK_SET:
-      whence = SEEK_SET;
-      break;
-    case NESTEGG_SEEK_CUR:
-      whence = SEEK_CUR;
-      break;
-    case NESTEGG_SEEK_END:
-      whence = SEEK_END;
-      break;
-  };
-  return fseek(userdata, (int32_t)offset, whence) ? -1 : 0;
-}
-
-static int64_t nestegg_tell_cb(void *userdata) {
-  return ftell(userdata);
-}
-
-static void nestegg_log_cb(nestegg *context,
-                           unsigned int severity,
-                           char const *format, ...) {
-  va_list ap;
-  va_start(ap, format);
-  vfprintf(stderr, format, ap);
-  fprintf(stderr, "\n");
-  va_end(ap);
-}
-
-int file_is_webm(struct WebmInputContext *webm_ctx,
-                 struct VpxInputContext *vpx_ctx) {
-  uint32_t i, n;
-  int track_type = -1;
-  int codec_id;
-
-  nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb, 0};
-  nestegg_video_params params;
-
-  io.userdata = vpx_ctx->file;
-  if (nestegg_init(&webm_ctx->nestegg_ctx, io, NULL, -1))
-    goto fail;
-
-  if (nestegg_track_count(webm_ctx->nestegg_ctx, &n))
-    goto fail;
-
-  for (i = 0; i < n; i++) {
-    track_type = nestegg_track_type(webm_ctx->nestegg_ctx, i);
-
-    if (track_type == NESTEGG_TRACK_VIDEO)
-      break;
-    else if (track_type < 0)
-      goto fail;
-  }
-
-  codec_id = nestegg_track_codec_id(webm_ctx->nestegg_ctx, i);
-  if (codec_id == NESTEGG_CODEC_VP8) {
-    vpx_ctx->fourcc = VP8_FOURCC;
-  } else if (codec_id == NESTEGG_CODEC_VP9) {
-    vpx_ctx->fourcc = VP9_FOURCC;
-  } else {
-    fprintf(stderr, "Not VPx video, quitting.\n");
-    goto fail;
-  }
-
-  webm_ctx->video_track = i;
-
-  if (nestegg_track_video_params(webm_ctx->nestegg_ctx, i, &params))
-    goto fail;
-
-  vpx_ctx->framerate.denominator = 0;
-  vpx_ctx->framerate.numerator = 0;
-  vpx_ctx->width = params.width;
-  vpx_ctx->height = params.height;
-
-  return 1;
-
- fail:
-  webm_ctx->nestegg_ctx = NULL;
-  rewind(vpx_ctx->file);
-
-  return 0;
-}
-
-int webm_read_frame(struct WebmInputContext *webm_ctx,
-                    uint8_t **buffer,
-                    size_t *bytes_in_buffer,
-                    size_t *buffer_size) {
-  if (webm_ctx->chunk >= webm_ctx->chunks) {
-    uint32_t track;
-    int status;
-
-    do {
-      /* End of this packet, get another. */
-      if (webm_ctx->pkt) {
-        nestegg_free_packet(webm_ctx->pkt);
-        webm_ctx->pkt = NULL;
-      }
-
-      status = nestegg_read_packet(webm_ctx->nestegg_ctx, &webm_ctx->pkt);
-      if (status <= 0)
-        return status ? status : 1;
-
-      if (nestegg_packet_track(webm_ctx->pkt, &track))
-        return -1;
-    } while (track != webm_ctx->video_track);
-
-    if (nestegg_packet_count(webm_ctx->pkt, &webm_ctx->chunks))
-      return -1;
-
-    webm_ctx->chunk = 0;
-  }
-
-  if (nestegg_packet_data(webm_ctx->pkt, webm_ctx->chunk,
-                          buffer, bytes_in_buffer)) {
-    return -1;
-  }
-
-  webm_ctx->chunk++;
-  return 0;
-}
-
-int webm_guess_framerate(struct WebmInputContext *webm_ctx,
-                         struct VpxInputContext *vpx_ctx) {
-  uint32_t i;
-  uint64_t tstamp = 0;
-
-  /* Check to see if we can seek before we parse any data. */
-  if (nestegg_track_seek(webm_ctx->nestegg_ctx, webm_ctx->video_track, 0)) {
-    fprintf(stderr, "Failed to guess framerate (no Cues), set to 30fps.\n");
-    vpx_ctx->framerate.numerator = 30;
-    vpx_ctx->framerate.denominator  = 1;
-    return 0;
-  }
-
-  /* Guess the framerate. Read up to 1 second, or 50 video packets,
-   * whichever comes first.
-   */
-  for (i = 0; tstamp < 1000000000 && i < 50;) {
-    nestegg_packet *pkt;
-    uint32_t track;
-
-    if (nestegg_read_packet(webm_ctx->nestegg_ctx, &pkt) <= 0)
-      break;
-
-    nestegg_packet_track(pkt, &track);
-    if (track == webm_ctx->video_track) {
-      nestegg_packet_tstamp(pkt, &tstamp);
-      ++i;
-    }
-
-    nestegg_free_packet(pkt);
-  }
-
-  if (nestegg_track_seek(webm_ctx->nestegg_ctx, webm_ctx->video_track, 0))
-    goto fail;
-
-  vpx_ctx->framerate.numerator = (i - 1) * 1000000;
-  vpx_ctx->framerate.denominator = (int)(tstamp / 1000);
-  return 0;
-
- fail:
-  nestegg_destroy(webm_ctx->nestegg_ctx);
-  webm_ctx->nestegg_ctx = NULL;
-  rewind(vpx_ctx->file);
-  return 1;
-}
-
-void webm_free(struct WebmInputContext *webm_ctx) {
-  if (webm_ctx && webm_ctx->nestegg_ctx) {
-    if (webm_ctx->pkt)
-      nestegg_free_packet(webm_ctx->pkt);
-    nestegg_destroy(webm_ctx->nestegg_ctx);
-  }
-}
diff --git a/source/libvpx/webmdec.cc b/source/libvpx/webmdec.cc
new file mode 100644
index 0000000..eb89bef
--- /dev/null
+++ b/source/libvpx/webmdec.cc
@@ -0,0 +1,219 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./webmdec.h"
+
+#include <cstring>
+#include <cstdio>
+
+#include "third_party/libwebm/mkvparser.hpp"
+#include "third_party/libwebm/mkvreader.hpp"
+
+namespace {
+
+void reset(struct WebmInputContext *const webm_ctx) {
+  if (webm_ctx->reader != NULL) {
+    mkvparser::MkvReader *const reader =
+        reinterpret_cast<mkvparser::MkvReader*>(webm_ctx->reader);
+    delete reader;
+  }
+  if (webm_ctx->segment != NULL) {
+    mkvparser::Segment *const segment =
+        reinterpret_cast<mkvparser::Segment*>(webm_ctx->segment);
+    delete segment;
+  }
+  if (webm_ctx->buffer != NULL) {
+    delete[] webm_ctx->buffer;
+  }
+  webm_ctx->reader = NULL;
+  webm_ctx->segment = NULL;
+  webm_ctx->buffer = NULL;
+  webm_ctx->cluster = NULL;
+  webm_ctx->block_entry = NULL;
+  webm_ctx->block = NULL;
+  webm_ctx->block_frame_index = 0;
+  webm_ctx->video_track_index = 0;
+  webm_ctx->timestamp_ns = 0;
+}
+
+void get_first_cluster(struct WebmInputContext *const webm_ctx) {
+  mkvparser::Segment *const segment =
+      reinterpret_cast<mkvparser::Segment*>(webm_ctx->segment);
+  const mkvparser::Cluster *const cluster = segment->GetFirst();
+  webm_ctx->cluster = cluster;
+}
+
+void rewind_and_reset(struct WebmInputContext *const webm_ctx,
+                      struct VpxInputContext *const vpx_ctx) {
+  rewind(vpx_ctx->file);
+  reset(webm_ctx);
+}
+
+}  // namespace
+
+int file_is_webm(struct WebmInputContext *webm_ctx,
+                 struct VpxInputContext *vpx_ctx) {
+  mkvparser::MkvReader *const reader = new mkvparser::MkvReader(vpx_ctx->file);
+  webm_ctx->reader = reader;
+
+  mkvparser::EBMLHeader header;
+  long long pos = 0;
+  if (header.Parse(reader, pos) < 0) {
+    rewind_and_reset(webm_ctx, vpx_ctx);
+    return 0;
+  }
+
+  mkvparser::Segment* segment;
+  if (mkvparser::Segment::CreateInstance(reader, pos, segment)) {
+    rewind_and_reset(webm_ctx, vpx_ctx);
+    return 0;
+  }
+  webm_ctx->segment = segment;
+  if (segment->Load() < 0) {
+    rewind_and_reset(webm_ctx, vpx_ctx);
+    return 0;
+  }
+
+  const mkvparser::Tracks *const tracks = segment->GetTracks();
+  const mkvparser::VideoTrack* video_track = NULL;
+  for (unsigned long i = 0; i < tracks->GetTracksCount(); ++i) {
+    const mkvparser::Track* const track = tracks->GetTrackByIndex(i);
+    if (track->GetType() == mkvparser::Track::kVideo) {
+      video_track = static_cast<const mkvparser::VideoTrack*>(track);
+      webm_ctx->video_track_index = track->GetNumber();
+      break;
+    }
+  }
+
+  if (video_track == NULL) {
+    rewind_and_reset(webm_ctx, vpx_ctx);
+    return 0;
+  }
+
+  if (!strncmp(video_track->GetCodecId(), "V_VP8", 5)) {
+    vpx_ctx->fourcc = VP8_FOURCC;
+  } else if (!strncmp(video_track->GetCodecId(), "V_VP9", 5)) {
+    vpx_ctx->fourcc = VP9_FOURCC;
+  } else {
+    rewind_and_reset(webm_ctx, vpx_ctx);
+    return 0;
+  }
+
+  vpx_ctx->framerate.denominator = 0;
+  vpx_ctx->framerate.numerator = 0;
+  vpx_ctx->width = video_track->GetWidth();
+  vpx_ctx->height = video_track->GetHeight();
+
+  get_first_cluster(webm_ctx);
+
+  return 1;
+}
+
+int webm_read_frame(struct WebmInputContext *webm_ctx,
+                    uint8_t **buffer,
+                    size_t *bytes_in_buffer,
+                    size_t *buffer_size) {
+  mkvparser::Segment *const segment =
+      reinterpret_cast<mkvparser::Segment*>(webm_ctx->segment);
+  const mkvparser::Cluster* cluster =
+      reinterpret_cast<const mkvparser::Cluster*>(webm_ctx->cluster);
+  const mkvparser::Block *block =
+      reinterpret_cast<const mkvparser::Block*>(webm_ctx->block);
+  const mkvparser::BlockEntry *block_entry =
+      reinterpret_cast<const mkvparser::BlockEntry*>(webm_ctx->block_entry);
+  bool block_entry_eos = false;
+  do {
+    long status = 0;
+    bool get_new_block = false;
+    if (block_entry == NULL && !block_entry_eos) {
+      status = cluster->GetFirst(block_entry);
+      get_new_block = true;
+    } else if (block_entry_eos || block_entry->EOS()) {
+      cluster = segment->GetNext(cluster);
+      if (cluster == NULL || cluster->EOS()) {
+        *bytes_in_buffer = 0;
+        return 1;
+      }
+      status = cluster->GetFirst(block_entry);
+      block_entry_eos = false;
+      get_new_block = true;
+    } else if (block == NULL ||
+               webm_ctx->block_frame_index == block->GetFrameCount() ||
+               block->GetTrackNumber() != webm_ctx->video_track_index) {
+      status = cluster->GetNext(block_entry, block_entry);
+      if (block_entry == NULL || block_entry->EOS()) {
+        block_entry_eos = true;
+        continue;
+      }
+      get_new_block = true;
+    }
+    if (status) {
+      return -1;
+    }
+    if (get_new_block) {
+      block = block_entry->GetBlock();
+      webm_ctx->block_frame_index = 0;
+    }
+  } while (block->GetTrackNumber() != webm_ctx->video_track_index ||
+           block_entry_eos);
+
+  webm_ctx->cluster = cluster;
+  webm_ctx->block_entry = block_entry;
+  webm_ctx->block = block;
+
+  const mkvparser::Block::Frame& frame =
+      block->GetFrame(webm_ctx->block_frame_index);
+  ++webm_ctx->block_frame_index;
+  if (frame.len > static_cast<long>(*buffer_size)) {
+    delete[] *buffer;
+    *buffer = new uint8_t[frame.len];
+    if (*buffer == NULL) {
+      return -1;
+    }
+    *buffer_size = frame.len;
+    webm_ctx->buffer = *buffer;
+  }
+  *bytes_in_buffer = frame.len;
+  webm_ctx->timestamp_ns = block->GetTime(cluster);
+
+  mkvparser::MkvReader *const reader =
+      reinterpret_cast<mkvparser::MkvReader*>(webm_ctx->reader);
+  return frame.Read(reader, *buffer) ? -1 : 0;
+}
+
+int webm_guess_framerate(struct WebmInputContext *webm_ctx,
+                         struct VpxInputContext *vpx_ctx) {
+  uint32_t i = 0;
+  uint8_t *buffer = NULL;
+  size_t bytes_in_buffer = 0;
+  size_t buffer_size = 0;
+  while (webm_ctx->timestamp_ns < 1000000000 && i < 50) {
+    if (webm_read_frame(webm_ctx, &buffer, &bytes_in_buffer, &buffer_size)) {
+      break;
+    }
+    ++i;
+  }
+  vpx_ctx->framerate.numerator = (i - 1) * 1000000;
+  vpx_ctx->framerate.denominator =
+      static_cast<int>(webm_ctx->timestamp_ns / 1000);
+  delete[] buffer;
+
+  get_first_cluster(webm_ctx);
+  webm_ctx->block = NULL;
+  webm_ctx->block_entry = NULL;
+  webm_ctx->block_frame_index = 0;
+  webm_ctx->timestamp_ns = 0;
+
+  return 0;
+}
+
+void webm_free(struct WebmInputContext *webm_ctx) {
+  reset(webm_ctx);
+}
diff --git a/source/libvpx/webmdec.h b/source/libvpx/webmdec.h
index 108c6ad..29b815d 100644
--- a/source/libvpx/webmdec.h
+++ b/source/libvpx/webmdec.h
@@ -16,34 +16,53 @@
 extern "C" {
 #endif
 
-struct nestegg;
-struct nestegg_packet;
 struct VpxInputContext;
 
 struct WebmInputContext {
-  uint32_t chunk;
-  uint32_t chunks;
-  uint32_t video_track;
-  struct nestegg *nestegg_ctx;
-  struct nestegg_packet *pkt;
+  void *reader;
+  void *segment;
+  uint8_t *buffer;
+  const void *cluster;
+  const void *block_entry;
+  const void *block;
+  int block_frame_index;
+  int video_track_index;
+  uint64_t timestamp_ns;
 };
 
+// Checks if the input is a WebM file. If so, initializes WebMInputContext so
+// that webm_read_frame can be called to retrieve a video frame.
+// Returns 1 on success and 0 on failure or input is not WebM file.
+// TODO(vigneshv): Refactor this function into two smaller functions specific
+// to their task.
 int file_is_webm(struct WebmInputContext *webm_ctx,
                  struct VpxInputContext *vpx_ctx);
 
-/* Reads a WebM video frame. Return values:
- *   0 - Success
- *   1 - End of File
- *  -1 - Error
- */
+// Reads a WebM Video Frame. Memory for the buffer is created, owned and managed
+// by this function. For the first call, |buffer| should be NULL and
+// |*bytes_in_buffer| should be 0. Once all the frames are read and used,
+// webm_free() should be called, otherwise there will be a leak.
+// Parameters:
+//      webm_ctx - WebmInputContext object
+//      buffer - pointer where the frame data will be filled.
+//      bytes_in_buffer - pointer to buffer size.
+//      buffer_size - unused TODO(vigneshv): remove this
+// Return values:
+//      0 - Success
+//      1 - End of Stream
+//     -1 - Error
+// TODO(vigneshv): Make the return values consistent across all functions in
+// this file.
 int webm_read_frame(struct WebmInputContext *webm_ctx,
                     uint8_t **buffer,
                     size_t *bytes_in_buffer,
                     size_t *buffer_size);
 
+// Guesses the frame rate of the input file based on the container timestamps.
 int webm_guess_framerate(struct WebmInputContext *webm_ctx,
                          struct VpxInputContext *vpx_ctx);
 
+// Resets the WebMInputContext.
 void webm_free(struct WebmInputContext *webm_ctx);
 
 #ifdef __cplusplus