Merge "Fix the false-alarm bug when we skip to the end, which should be OK."
diff --git a/api/current.txt b/api/current.txt
index 7b2b392..8ef07de 100644
--- a/api/current.txt
+++ b/api/current.txt
@@ -97,6 +97,7 @@
     field public static final java.lang.String SET_ANIMATION_SCALE = "android.permission.SET_ANIMATION_SCALE";
     field public static final java.lang.String SET_DEBUG_APP = "android.permission.SET_DEBUG_APP";
     field public static final java.lang.String SET_ORIENTATION = "android.permission.SET_ORIENTATION";
+    field public static final java.lang.String SET_POINTER_SPEED = "android.permission.SET_POINTER_SPEED";
     field public static final deprecated java.lang.String SET_PREFERRED_APPLICATIONS = "android.permission.SET_PREFERRED_APPLICATIONS";
     field public static final java.lang.String SET_PROCESS_LIMIT = "android.permission.SET_PROCESS_LIMIT";
     field public static final java.lang.String SET_TIME = "android.permission.SET_TIME";
@@ -2892,6 +2893,7 @@
     method public void registerForContextMenu(android.view.View);
     method public void setArguments(android.os.Bundle);
     method public void setHasOptionsMenu(boolean);
+    method public void setInitialSavedState(android.app.Fragment.SavedState);
     method public void setRetainInstance(boolean);
     method public void setTargetFragment(android.app.Fragment, int);
     method public void startActivity(android.content.Intent);
@@ -2903,6 +2905,12 @@
     ctor public Fragment.InstantiationException(java.lang.String, java.lang.Exception);
   }
 
+  public static class Fragment.SavedState implements android.os.Parcelable {
+    method public int describeContents();
+    method public void writeToParcel(android.os.Parcel, int);
+    field public static final android.os.Parcelable.ClassLoaderCreator CREATOR;
+  }
+
   public class FragmentBreadCrumbs extends android.view.ViewGroup implements android.app.FragmentManager.OnBackStackChangedListener {
     ctor public FragmentBreadCrumbs(android.content.Context);
     ctor public FragmentBreadCrumbs(android.content.Context, android.util.AttributeSet);
@@ -2940,6 +2948,7 @@
     method public abstract boolean popBackStackImmediate(int, int);
     method public abstract void putFragment(android.os.Bundle, java.lang.String, android.app.Fragment);
     method public abstract void removeOnBackStackChangedListener(android.app.FragmentManager.OnBackStackChangedListener);
+    method public abstract android.app.Fragment.SavedState saveFragmentInstanceState(android.app.Fragment);
     field public static final int POP_BACK_STACK_INCLUSIVE = 1; // 0x1
   }
 
@@ -14163,6 +14172,10 @@
     field public static final int PARCELABLE_WRITE_RETURN_VALUE = 1; // 0x1
   }
 
+  public static abstract interface Parcelable.ClassLoaderCreator implements android.os.Parcelable.Creator {
+    method public abstract T createFromParcel(android.os.Parcel, java.lang.ClassLoader);
+  }
+
   public static abstract interface Parcelable.Creator {
     method public abstract T createFromParcel(android.os.Parcel);
     method public abstract T[] newArray(int);
diff --git a/core/java/android/app/Activity.java b/core/java/android/app/Activity.java
index 3877bd0..0481158 100644
--- a/core/java/android/app/Activity.java
+++ b/core/java/android/app/Activity.java
@@ -4425,27 +4425,29 @@
     final void performRestart() {
         mFragments.noteStateNotSaved();
 
-        synchronized (mManagedCursors) {
-            final int N = mManagedCursors.size();
-            for (int i=0; i<N; i++) {
-                ManagedCursor mc = mManagedCursors.get(i);
-                if (mc.mReleased || mc.mUpdated) {
-                    if (!mc.mCursor.requery()) {
-                        throw new IllegalStateException(
-                                "trying to requery an already closed cursor");
-                    }
-                    mc.mReleased = false;
-                    mc.mUpdated = false;
-                }
-            }
-        }
-
         if (mStopped) {
             mStopped = false;
-            mCalled = false;
             if (mToken != null && mParent == null) {
                 WindowManagerImpl.getDefault().setStoppedState(mToken, false);
             }
+
+            synchronized (mManagedCursors) {
+                final int N = mManagedCursors.size();
+                for (int i=0; i<N; i++) {
+                    ManagedCursor mc = mManagedCursors.get(i);
+                    if (mc.mReleased || mc.mUpdated) {
+                        if (!mc.mCursor.requery()) {
+                            throw new IllegalStateException(
+                                    "trying to requery an already closed cursor  "
+                                    + mc.mCursor);
+                        }
+                        mc.mReleased = false;
+                        mc.mUpdated = false;
+                    }
+                }
+            }
+
+            mCalled = false;
             mInstrumentation.callActivityOnRestart(this);
             if (!mCalled) {
                 throw new SuperNotCalledException(
diff --git a/core/java/android/app/ActivityThread.java b/core/java/android/app/ActivityThread.java
index c102a38..7e94cf2 100644
--- a/core/java/android/app/ActivityThread.java
+++ b/core/java/android/app/ActivityThread.java
@@ -1884,7 +1884,7 @@
             }
             deliverNewIntents(r, intents);
             if (resumed) {
-                mInstrumentation.callActivityOnResume(r.activity);
+                r.activity.performResume();
                 r.activity.mTemporaryPause = false;
             }
         }
@@ -2850,7 +2850,7 @@
             }
             deliverResults(r, res.results);
             if (resumed) {
-                mInstrumentation.callActivityOnResume(r.activity);
+                r.activity.performResume();
                 r.activity.mTemporaryPause = false;
             }
         }
diff --git a/core/java/android/app/Fragment.java b/core/java/android/app/Fragment.java
index 6f0bbd7..14ffd3b 100644
--- a/core/java/android/app/Fragment.java
+++ b/core/java/android/app/Fragment.java
@@ -450,6 +450,51 @@
     boolean mCheckedForLoaderManager;
     
     /**
+     * State information that has been retrieved from a fragment instance
+     * through {@link FragmentManager#saveFragmentInstanceState(Fragment)
+     * FragmentManager.saveFragmentInstanceState}.
+     */
+    public static class SavedState implements Parcelable {
+        final Bundle mState;
+
+        SavedState(Bundle state) {
+            mState = state;
+        }
+
+        SavedState(Parcel in, ClassLoader loader) {
+            mState = in.readBundle();
+            if (loader != null && mState != null) {
+                mState.setClassLoader(loader);
+            }
+        }
+
+        @Override
+        public int describeContents() {
+            return 0;
+        }
+
+        @Override
+        public void writeToParcel(Parcel dest, int flags) {
+            dest.writeBundle(mState);
+        }
+
+        public static final Parcelable.ClassLoaderCreator<SavedState> CREATOR
+                = new Parcelable.ClassLoaderCreator<SavedState>() {
+            public SavedState createFromParcel(Parcel in) {
+                return new SavedState(in, null);
+            }
+
+            public SavedState createFromParcel(Parcel in, ClassLoader loader) {
+                return new SavedState(in, loader);
+            }
+
+            public SavedState[] newArray(int size) {
+                return new SavedState[size];
+            }
+        };
+    }
+
+    /**
      * Thrown by {@link Fragment#instantiate(Context, String, Bundle)} when
      * there is an instantiation failure.
      */
@@ -624,6 +669,22 @@
     }
 
     /**
+     * Set the initial saved state that this Fragment should restore itself
+     * from when first being constructed, as returned by
+     * {@link FragmentManager#saveFragmentInstanceState(Fragment)
+     * FragmentManager.saveFragmentInstanceState}.
+     *
+     * @param state The state the fragment should be restored from.
+     */
+    public void setInitialSavedState(SavedState state) {
+        if (mIndex >= 0) {
+            throw new IllegalStateException("Fragment already active");
+        }
+        mSavedFragmentState = state != null && state.mState != null
+                ? state.mState : null;
+    }
+
+    /**
      * Optional target for this fragment.  This may be used, for example,
      * if this fragment is being started by another, and when done wants to
      * give a result back to the first.  The target set here is retained
diff --git a/core/java/android/app/FragmentManager.java b/core/java/android/app/FragmentManager.java
index 0da656f..3b2e108 100644
--- a/core/java/android/app/FragmentManager.java
+++ b/core/java/android/app/FragmentManager.java
@@ -274,6 +274,30 @@
     public abstract Fragment getFragment(Bundle bundle, String key);
 
     /**
+     * Save the current instance state of the given Fragment.  This can be
+     * used later when creating a new instance of the Fragment and adding
+     * it to the fragment manager, to have it create itself to match the
+     * current state returned here.  Note that there are limits on how
+     * this can be used:
+     *
+     * <ul>
+     * <li>The Fragment must currently be attached to the FragmentManager.
+     * <li>A new Fragment created using this saved state must be the same class
+     * type as the Fragment it was created from.
+     * <li>The saved state can not contain dependencies on other fragments --
+     * that is it can't use {@link #putFragment(Bundle, String, Fragment)} to
+     * store a fragment reference because that reference may not be valid when
+     * this saved state is later used.  Likewise the Fragment's target and
+     * result code are not included in this state.
+     * </ul>
+     *
+     * @param f The Fragment whose state is to be saved.
+     * @return The generated state.  This will be null if there was no
+     * interesting state created by the fragment.
+     */
+    public abstract Fragment.SavedState saveFragmentInstanceState(Fragment f);
+
+    /**
      * Print the FragmentManager's state into the given stream.
      *
      * @param prefix Text to print at the front of each line.
@@ -492,6 +516,19 @@
     }
 
     @Override
+    public Fragment.SavedState saveFragmentInstanceState(Fragment fragment) {
+        if (fragment.mIndex < 0) {
+            throw new IllegalStateException("Fragment " + fragment
+                    + " is not currently in the FragmentManager");
+        }
+        if (fragment.mState > Fragment.INITIALIZING) {
+            Bundle result = saveFragmentBasicState(fragment);
+            return result != null ? new Fragment.SavedState(result) : null;
+        }
+        return null;
+    }
+
+    @Override
     public String toString() {
         StringBuilder sb = new StringBuilder(128);
         sb.append("FragmentManager{");
@@ -715,7 +752,6 @@
                         if (f.mView != null) {
                             f.mView.setSaveFromParentEnabled(false);
                             if (f.mHidden) f.mView.setVisibility(View.GONE);
-                            f.restoreViewState();
                             f.onViewCreated(f.mView, f.mSavedFragmentState);
                         }
                     }
@@ -747,7 +783,6 @@
                                     container.addView(f.mView);
                                 }
                                 if (f.mHidden) f.mView.setVisibility(View.GONE);
-                                f.restoreViewState();
                                 f.onViewCreated(f.mView, f.mSavedFragmentState);
                             }
                         }
@@ -759,6 +794,7 @@
                                     + " did not call through to super.onActivityCreated()");
                         }
                         if (f.mView != null) {
+                            f.restoreViewState();
                         }
                         f.mSavedFragmentState = null;
                     }
@@ -1073,7 +1109,6 @@
                     mNeedMenuInvalidate = true;
                 }
                 fragment.mAdded = false;
-                fragment.mRemoving = true;
                 moveToState(fragment, Fragment.CREATED, transition, transitionStyle);
             }
         }
@@ -1086,7 +1121,6 @@
             if (!fragment.mAdded) {
                 mAdded.add(fragment);
                 fragment.mAdded = true;
-                fragment.mRemoving = false;
                 if (fragment.mHasMenu) {
                     mNeedMenuInvalidate = true;
                 }
@@ -1375,6 +1409,8 @@
         }
         if (mStateArray == null) {
             mStateArray = new SparseArray<Parcelable>();
+        } else {
+            mStateArray.clear();
         }
         f.mView.saveHierarchyState(mStateArray);
         if (mStateArray.size() > 0) {
@@ -1383,6 +1419,32 @@
         }
     }
     
+    Bundle saveFragmentBasicState(Fragment f) {
+        Bundle result = null;
+
+        if (mStateBundle == null) {
+            mStateBundle = new Bundle();
+        }
+        f.onSaveInstanceState(mStateBundle);
+        if (!mStateBundle.isEmpty()) {
+            result = mStateBundle;
+            mStateBundle = null;
+        }
+
+        if (f.mView != null) {
+            saveFragmentViewState(f);
+        }
+        if (f.mSavedViewState != null) {
+            if (result == null) {
+                result = new Bundle();
+            }
+            result.putSparseParcelableArray(
+                    FragmentManagerImpl.VIEW_STATE_TAG, f.mSavedViewState);
+        }
+
+        return result;
+    }
+
     Parcelable saveAllState() {
         // Make sure all pending operations have now been executed to get
         // our state update-to-date.
@@ -1407,25 +1469,7 @@
                 active[i] = fs;
                 
                 if (f.mState > Fragment.INITIALIZING && fs.mSavedFragmentState == null) {
-                    if (mStateBundle == null) {
-                        mStateBundle = new Bundle();
-                    }
-                    f.onSaveInstanceState(mStateBundle);
-                    if (!mStateBundle.isEmpty()) {
-                        fs.mSavedFragmentState = mStateBundle;
-                        mStateBundle = null;
-                    }
-
-                    if (f.mView != null) {
-                        saveFragmentViewState(f);
-                        if (f.mSavedViewState != null) {
-                            if (fs.mSavedFragmentState == null) {
-                                fs.mSavedFragmentState = new Bundle();
-                            }
-                            fs.mSavedFragmentState.putSparseParcelableArray(
-                                    FragmentManagerImpl.VIEW_STATE_TAG, f.mSavedViewState);
-                        }
-                    }
+                    fs.mSavedFragmentState = saveFragmentBasicState(f);
 
                     if (f.mTarget != null) {
                         if (f.mTarget.mIndex < 0) {
diff --git a/core/java/android/os/FileUtils.java b/core/java/android/os/FileUtils.java
index eb0cf37..215e836 100644
--- a/core/java/android/os/FileUtils.java
+++ b/core/java/android/os/FileUtils.java
@@ -19,20 +19,21 @@
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.OutputStream;
 import java.util.regex.Pattern;
+import java.util.zip.CRC32;
+import java.util.zip.CheckedInputStream;
 
 
 /**
  * Tools for managing files.  Not for public consumption.
  * @hide
  */
-public class FileUtils
-{
+public class FileUtils {
     public static final int S_IRWXU = 00700;
     public static final int S_IRUSR = 00400;
     public static final int S_IWUSR = 00200;
@@ -95,7 +96,7 @@
 
     /** returns the FAT file system volume ID for the volume mounted 
      * at the given mount point, or -1 for failure
-     * @param mount point for FAT volume
+     * @param mountPoint point for FAT volume
      * @return volume ID or -1
      */
     public static native int getFatVolumeId(String mountPoint);
@@ -243,4 +244,32 @@
             out.close();
         }
     }
+
+    /**
+     * Computes the checksum of a file using the CRC32 checksum routine.
+     * The value of the checksum is returned.
+     *
+     * @param file  the file to checksum, must not be null
+     * @return the checksum value or an exception is thrown.
+     */
+    public static long checksumCrc32(File file) throws FileNotFoundException, IOException {
+        CRC32 checkSummer = new CRC32();
+        CheckedInputStream cis = null;
+
+        try {
+            cis = new CheckedInputStream( new FileInputStream(file), checkSummer);
+            byte[] buf = new byte[128];
+            while(cis.read(buf) >= 0) {
+                // Just read for checksum to get calculated.
+            }
+            return checkSummer.getValue();
+        } finally {
+            if (cis != null) {
+                try {
+                    cis.close();
+                } catch (IOException e) {
+                }
+            }
+        }
+    }
 }
diff --git a/core/java/android/os/Parcel.java b/core/java/android/os/Parcel.java
index 6b35215..e9ed676 100644
--- a/core/java/android/os/Parcel.java
+++ b/core/java/android/os/Parcel.java
@@ -1980,6 +1980,9 @@
             }
         }
 
+        if (creator instanceof Parcelable.ClassLoaderCreator<?>) {
+            return ((Parcelable.ClassLoaderCreator<T>)creator).createFromParcel(this, loader);
+        }
         return creator.createFromParcel(this);
     }
 
diff --git a/core/java/android/os/Parcelable.java b/core/java/android/os/Parcelable.java
index 0a4b60f..594fbb2 100644
--- a/core/java/android/os/Parcelable.java
+++ b/core/java/android/os/Parcelable.java
@@ -113,4 +113,22 @@
          */
         public T[] newArray(int size);
     }
+
+    /**
+     * Specialization of {@link Creator} that allows you to receive the
+     * ClassLoader the object is being created in.
+     */
+    public interface ClassLoaderCreator<T> extends Creator<T> {
+        /**
+         * Create a new instance of the Parcelable class, instantiating it
+         * from the given Parcel whose data had previously been written by
+         * {@link Parcelable#writeToParcel Parcelable.writeToParcel()} and
+         * using the given ClassLoader.
+         *
+         * @param source The Parcel to read the object's data from.
+         * @param loader The ClassLoader that this object is being created in.
+         * @return Returns a new instance of the Parcelable class.
+         */
+        public T createFromParcel(Parcel source, ClassLoader loader);
+    }
 }
diff --git a/core/java/android/provider/Settings.java b/core/java/android/provider/Settings.java
index c78b935..893947d 100644
--- a/core/java/android/provider/Settings.java
+++ b/core/java/android/provider/Settings.java
@@ -1809,6 +1809,16 @@
         public static final String SIP_ASK_ME_EACH_TIME = "SIP_ASK_ME_EACH_TIME";
 
         /**
+         * Pointer speed setting.
+         * This is an integer value in a range between -7 and +7, so there are 15 possible values.
+         *   -7 = slowest
+         *    0 = default speed
+         *   +7 = fastest
+         * @hide
+         */
+        public static final String POINTER_SPEED = "pointer_speed";
+
+        /**
          * Settings to backup. This is here so that it's in the same place as the settings
          * keys and easy to update.
          * @hide
@@ -1872,6 +1882,7 @@
             USE_PTP_INTERFACE,
             SIP_CALL_OPTIONS,
             SIP_RECEIVE_CALLS,
+            POINTER_SPEED,
         };
 
         // Settings moved to Settings.Secure
diff --git a/core/java/android/view/IWindowManager.aidl b/core/java/android/view/IWindowManager.aidl
index 4427eb5..ad17edf 100644
--- a/core/java/android/view/IWindowManager.aidl
+++ b/core/java/android/view/IWindowManager.aidl
@@ -205,4 +205,9 @@
      * Called by the status bar to notify Views of changes to System UI visiblity.
      */
     void statusBarVisibilityChanged(int visibility);
+
+    /**
+     * Called by the settings application to temporarily set the pointer speed.
+     */
+    void setPointerSpeed(int speed);
 }
diff --git a/core/java/android/webkit/ViewStateSerializer.java b/core/java/android/webkit/ViewStateSerializer.java
index 81f9e70..0fc76fa 100644
--- a/core/java/android/webkit/ViewStateSerializer.java
+++ b/core/java/android/webkit/ViewStateSerializer.java
@@ -16,7 +16,6 @@
 package android.webkit;
 
 import android.graphics.Point;
-import android.graphics.Rect;
 import android.graphics.Region;
 import android.webkit.WebViewCore.DrawData;
 
diff --git a/core/java/android/webkit/WebView.java b/core/java/android/webkit/WebView.java
index ea8d8db..9e61ecf 100644
--- a/core/java/android/webkit/WebView.java
+++ b/core/java/android/webkit/WebView.java
@@ -620,6 +620,9 @@
     // SetBaseLayer time and to pause when WebView paused.
     private HTML5VideoViewProxy mHTML5VideoViewProxy;
 
+    // If we are using a set picture, don't send view updates to webkit
+    private boolean mBlockWebkitViewMessages = false;
+
     /*
      * Private message ids
      */
@@ -1815,8 +1818,8 @@
      */
     public boolean loadViewState(InputStream stream) {
         try {
-            mWebViewCore.sendMessage(EventHub.CLEAR_CONTENT);
             DrawData draw = ViewStateSerializer.deserializeViewState(stream, this);
+            mBlockWebkitViewMessages = true;
             setNewPicture(draw);
             return true;
         } catch (IOException e) {
@@ -1826,6 +1829,16 @@
     }
 
     /**
+     * Clears the view state set with {@link #loadViewState(InputStream)}.
+     * This WebView will then switch to showing the content from webkit
+     * @hide
+     */
+    public void clearViewState() {
+        mBlockWebkitViewMessages = false;
+        invalidate();
+    }
+
+    /**
      * Restore the state of this WebView from the given map used in
      * {@link android.app.Activity#onRestoreInstanceState}. This method should
      * be called to restore the state of the WebView before using the object. If
@@ -2704,10 +2717,12 @@
         calcOurContentVisibleRect(rect);
         // Rect.equals() checks for null input.
         if (!rect.equals(mLastVisibleRectSent)) {
-            Point pos = new Point(rect.left, rect.top);
-            mWebViewCore.removeMessages(EventHub.SET_SCROLL_OFFSET);
-            mWebViewCore.sendMessage(EventHub.SET_SCROLL_OFFSET,
-                    nativeMoveGeneration(), mSendScrollEvent ? 1 : 0, pos);
+            if (!mBlockWebkitViewMessages) {
+                Point pos = new Point(rect.left, rect.top);
+                mWebViewCore.removeMessages(EventHub.SET_SCROLL_OFFSET);
+                mWebViewCore.sendMessage(EventHub.SET_SCROLL_OFFSET,
+                        nativeMoveGeneration(), mSendScrollEvent ? 1 : 0, pos);
+            }
             mLastVisibleRectSent = rect;
             mPrivateHandler.removeMessages(SWITCH_TO_LONGPRESS);
         }
@@ -2722,7 +2737,9 @@
             // TODO: the global offset is only used by windowRect()
             // in ChromeClientAndroid ; other clients such as touch
             // and mouse events could return view + screen relative points.
-            mWebViewCore.sendMessage(EventHub.SET_GLOBAL_BOUNDS, globalRect);
+            if (!mBlockWebkitViewMessages) {
+                mWebViewCore.sendMessage(EventHub.SET_GLOBAL_BOUNDS, globalRect);
+            }
             mLastGlobalRect = globalRect;
         }
         return rect;
@@ -2787,6 +2804,7 @@
      * @return true if new values were sent
      */
     boolean sendViewSizeZoom(boolean force) {
+        if (mBlockWebkitViewMessages) return false;
         if (mZoomManager.isPreventingWebkitUpdates()) return false;
 
         int viewWidth = getViewWidth();
@@ -3399,9 +3417,11 @@
                 }
                 abortAnimation();
                 mPrivateHandler.removeMessages(RESUME_WEBCORE_PRIORITY);
-                WebViewCore.resumePriority();
-                if (!mSelectingText) {
-                    WebViewCore.resumeUpdatePicture(mWebViewCore);
+                if (!mBlockWebkitViewMessages) {
+                    WebViewCore.resumePriority();
+                    if (!mSelectingText) {
+                        WebViewCore.resumeUpdatePicture(mWebViewCore);
+                    }
                 }
                 if (oldX != mScrollX || oldY != mScrollY) {
                     sendOurVisibleRect();
@@ -4259,7 +4279,7 @@
     }
 
     void onFixedLengthZoomAnimationEnd() {
-        if (!mSelectingText) {
+        if (!mBlockWebkitViewMessages && !mSelectingText) {
             WebViewCore.resumeUpdatePicture(mWebViewCore);
         }
         onZoomAnimationEnd();
@@ -4360,7 +4380,7 @@
             // synchronization problem with layers.
             int content = nativeDraw(canvas, color, extras, false);
             canvas.setDrawFilter(null);
-            if (content != 0) {
+            if (!mBlockWebkitViewMessages && content != 0) {
                 mWebViewCore.sendMessage(EventHub.SPLIT_PICTURE_SET, content, 0);
             }
         }
@@ -4764,6 +4784,9 @@
 
     @Override
     public boolean onKeyMultiple(int keyCode, int repeatCount, KeyEvent event) {
+        if (mBlockWebkitViewMessages) {
+            return false;
+        }
         // send complex characters to webkit for use by JS and plugins
         if (keyCode == KeyEvent.KEYCODE_UNKNOWN && event.getCharacters() != null) {
             // pass the key to DOM
@@ -4788,6 +4811,9 @@
                     + "keyCode=" + keyCode
                     + ", " + event + ", unicode=" + event.getUnicodeChar());
         }
+        if (mBlockWebkitViewMessages) {
+            return false;
+        }
 
         // don't implement accelerator keys here; defer to host application
         if (event.isCtrlPressed()) {
@@ -4991,6 +5017,9 @@
             Log.v(LOGTAG, "keyUp at " + System.currentTimeMillis()
                     + ", " + event + ", unicode=" + event.getUnicodeChar());
         }
+        if (mBlockWebkitViewMessages) {
+            return false;
+        }
 
         if (mNativeClass == 0) {
             return false;
@@ -5575,10 +5604,12 @@
     }
 
     private boolean shouldForwardTouchEvent() {
-        return mFullScreenHolder != null || (mForwardTouchEvents
+        if (mFullScreenHolder != null) return true;
+        if (mBlockWebkitViewMessages) return false;
+        return mForwardTouchEvents
                 && !mSelectingText
                 && mPreventDefault != PREVENT_DEFAULT_IGNORE
-                && mPreventDefault != PREVENT_DEFAULT_NO);
+                && mPreventDefault != PREVENT_DEFAULT_NO;
     }
 
     private boolean inFullScreenMode() {
@@ -5709,25 +5740,31 @@
                         // commit the short press action for the previous tap
                         doShortPress();
                         mTouchMode = TOUCH_INIT_MODE;
-                        mDeferTouchProcess = (!inFullScreenMode()
-                                && mForwardTouchEvents) ? hitFocusedPlugin(
-                                contentX, contentY) : false;
+                        mDeferTouchProcess = !mBlockWebkitViewMessages
+                                && (!inFullScreenMode() && mForwardTouchEvents)
+                                ? hitFocusedPlugin(contentX, contentY)
+                                : false;
                     }
                 } else { // the normal case
                     mTouchMode = TOUCH_INIT_MODE;
-                    mDeferTouchProcess = (!inFullScreenMode()
-                            && mForwardTouchEvents) ? hitFocusedPlugin(
-                            contentX, contentY) : false;
-                    mWebViewCore.sendMessage(
-                            EventHub.UPDATE_FRAME_CACHE_IF_LOADING);
+                    mDeferTouchProcess = !mBlockWebkitViewMessages
+                            && (!inFullScreenMode() && mForwardTouchEvents)
+                            ? hitFocusedPlugin(contentX, contentY)
+                            : false;
+                    if (!mBlockWebkitViewMessages) {
+                        mWebViewCore.sendMessage(
+                                EventHub.UPDATE_FRAME_CACHE_IF_LOADING);
+                    }
                     if (getSettings().supportTouchOnly()) {
                         TouchHighlightData data = new TouchHighlightData();
                         data.mX = contentX;
                         data.mY = contentY;
                         data.mSlop = viewToContentDimension(mNavSlop);
-                        mWebViewCore.sendMessageDelayed(
-                                EventHub.GET_TOUCH_HIGHLIGHT_RECTS, data,
-                                ViewConfiguration.getTapTimeout());
+                        if (!mBlockWebkitViewMessages) {
+                            mWebViewCore.sendMessageDelayed(
+                                    EventHub.GET_TOUCH_HIGHLIGHT_RECTS, data,
+                                    ViewConfiguration.getTapTimeout());
+                        }
                         if (DEBUG_TOUCH_HIGHLIGHT) {
                             if (getSettings().getNavDump()) {
                                 mTouchHighlightX = (int) x + mScrollX;
@@ -5763,7 +5800,7 @@
                             SWITCH_TO_LONGPRESS, LONG_PRESS_TIMEOUT);
                     if (inFullScreenMode() || mDeferTouchProcess) {
                         mPreventDefault = PREVENT_DEFAULT_YES;
-                    } else if (mForwardTouchEvents) {
+                    } else if (!mBlockWebkitViewMessages && mForwardTouchEvents) {
                         mPreventDefault = PREVENT_DEFAULT_MAYBE_YES;
                     } else {
                         mPreventDefault = PREVENT_DEFAULT_NO;
@@ -7875,6 +7912,10 @@
                 // after WebView's destroy() is called, skip handling messages.
                 return;
             }
+            if (mBlockWebkitViewMessages) {
+                // Blocking messages from webkit
+                return;
+            }
             switch (msg.what) {
                 case REMEMBER_PASSWORD: {
                     mDatabase.setUsernamePassword(
diff --git a/core/java/android/widget/TextView.java b/core/java/android/widget/TextView.java
index 88ab442..35e78fb 100644
--- a/core/java/android/widget/TextView.java
+++ b/core/java/android/widget/TextView.java
@@ -9318,7 +9318,6 @@
         private static final int RECENT_CUT_COPY_DURATION = 15 * 1000; // seconds
 
         // Used to detect taps on the insertion handle, which will affect the PastePopupWindow
-        private long mTouchTimer;
         private float mDownPositionX, mDownPositionY;
         private PastePopupWindow mPastePopupWindow;
         private Runnable mHider;
@@ -9392,22 +9391,18 @@
                 case MotionEvent.ACTION_DOWN:
                     mDownPositionX = ev.getRawX();
                     mDownPositionY = ev.getRawY();
-                    mTouchTimer = SystemClock.uptimeMillis();
                     break;
 
                 case MotionEvent.ACTION_UP:
-                    long delay = SystemClock.uptimeMillis() - mTouchTimer;
-                    if (delay < ViewConfiguration.getTapTimeout()) {
-                        final float deltaX = mDownPositionX - ev.getRawX();
-                        final float deltaY = mDownPositionY - ev.getRawY();
-                        final float distanceSquared = deltaX * deltaX + deltaY * deltaY;
-                        if (distanceSquared < mSquaredTouchSlopDistance) {
-                            if (mPastePopupWindow != null && mPastePopupWindow.isShowing()) {
-                                // Tapping on the handle dismisses the displayed paste view,
-                                mPastePopupWindow.hide();
-                            } else {
-                                show(0);
-                            }
+                    final float deltaX = mDownPositionX - ev.getRawX();
+                    final float deltaY = mDownPositionY - ev.getRawY();
+                    final float distanceSquared = deltaX * deltaX + deltaY * deltaY;
+                    if (distanceSquared < mSquaredTouchSlopDistance) {
+                        if (mPastePopupWindow != null && mPastePopupWindow.isShowing()) {
+                            // Tapping on the handle dismisses the displayed paste view,
+                            mPastePopupWindow.hide();
+                        } else {
+                            show(0);
                         }
                     }
                     hideDelayed();
diff --git a/core/res/AndroidManifest.xml b/core/res/AndroidManifest.xml
index 419578c..110268e 100644
--- a/core/res/AndroidManifest.xml
+++ b/core/res/AndroidManifest.xml
@@ -1098,6 +1098,13 @@
         android:description="@string/permdesc_setOrientation"
         android:protectionLevel="signature" />
 
+    <!-- Allows low-level access to setting the pointer speed.
+         Not for use by normal applications. -->
+    <permission android:name="android.permission.SET_POINTER_SPEED"
+        android:label="@string/permlab_setPointerSpeed"
+        android:description="@string/permdesc_setPointerSpeed"
+        android:protectionLevel="signature" />
+
     <!-- Allows an application to install packages. -->
     <permission android:name="android.permission.INSTALL_PACKAGES"
         android:label="@string/permlab_installPackages"
diff --git a/core/res/res/values/strings.xml b/core/res/res/values/strings.xml
index a277c74..8a4b74b 100755
--- a/core/res/res/values/strings.xml
+++ b/core/res/res/values/strings.xml
@@ -711,6 +711,13 @@
         the rotation of the screen at any time. Should never be needed for
         normal applications.</string>
 
+    <!-- Title of an application permission, listed so the user can choose whether they want to allow the application to do this. [CHAR LIMIT=30] -->
+    <string name="permlab_setPointerSpeed">change pointer speed</string>
+    <!-- Description of an application permission, listed so the user can choose whether they want to allow the application to do this. [CHAR LIMIT=NONE] -->
+    <string name="permdesc_setPointerSpeed">Allows an application to change
+        the mouse or trackpad pointer speed at any time. Should never be needed for
+        normal applications.</string>
+
     <!-- Title of an application permission, listed so the user can choose whether they want to allow the application to do this. -->
     <string name="permlab_signalPersistentProcesses">send Linux signals to applications</string>
     <!-- Description of an application permission, listed so the user can choose whether they want to allow the application to do this. -->
diff --git a/docs/html/guide/topics/fundamentals/services.jd b/docs/html/guide/topics/fundamentals/services.jd
index a9dd315..468a417 100644
--- a/docs/html/guide/topics/fundamentals/services.jd
+++ b/docs/html/guide/topics/fundamentals/services.jd
@@ -49,6 +49,13 @@
       LocalService}</a></li>
 </ol>
 
+<h2>Articles</h2>
+<ol>
+  <li><a href="{@docRoot}resources/articles/multitasking-android-way.html">Multitasking the Android Way</a></li>
+  <li><a href="{@docRoot}resources/articles/service-api-changes-starting-with.html">Service API changes starting
+      with Android 2.0</a></li>
+</ol>
+
 <h2>See also</h2>
 <ol>
 <li><a href="{@docRoot}guide/topics/fundamentals/bound-services.html">Bound Services</a></li>
diff --git a/docs/html/guide/topics/fundamentals/tasks-and-back-stack.jd b/docs/html/guide/topics/fundamentals/tasks-and-back-stack.jd
index f22e5b2..072488c 100644
--- a/docs/html/guide/topics/fundamentals/tasks-and-back-stack.jd
+++ b/docs/html/guide/topics/fundamentals/tasks-and-back-stack.jd
@@ -27,6 +27,11 @@
 </li>
 </ol>
 
+<h2>Articles</h2>
+<ol>
+  <li><a href="{@docRoot}resources/articles/multitasking-android-way.html">Multitasking the Android Way</a></li>
+</ol>
+
 <h2>See also</h2>
 <ol>
   <li><a><a href="{@docRoot}videos/index.html#v=fL6gSd4ugSI">Application Lifecycle video</a></li>
diff --git a/docs/html/guide/topics/graphics/animation.jd b/docs/html/guide/topics/graphics/animation.jd
index e10ab3e..0b02ee76 100644
--- a/docs/html/guide/topics/graphics/animation.jd
+++ b/docs/html/guide/topics/graphics/animation.jd
@@ -868,10 +868,22 @@
 For more information on creating animators, see the sections on animating with
 <a href="#value-animator">ValueAnimator</a> and <a href="#object-animator">ObjectAnimator</a>
 
-  <h2 id="declaring-xml">Declaring Animations in XML</h2>
+
+<h2 id="declaring-xml">Declaring Animations in XML</h2>
 
   <p>The property animation system lets you declare property animations with XML instead of doing
-  it programmatically. The following Android classes have XML declaration support with the
+  it programmatically. By defining your animations in XML, you can easily reuse your animations
+in multiple activities and more easily edit the animation sequence.</p>
+  
+<p>To distinguish animation files that use the new property animation APIs from those that use the
+legacy <a href="{@docRoot}guide/topics/graphics/view-animation.html">view animation</a> framework,
+starting with Android 3.1, you should save the XML files for property animations in the {@code
+res/animator/} directory (instead of {@code res/anim/}). Using the {@code animator} directory name
+is optional, but necessary if you want to use the layout editor tools in the Eclipse ADT plugin (ADT
+11.0.0+), because ADT only searches the {@code res/animator/} directory for property animation
+resources.</p>
+
+<p>The following property animation classes have XML declaration support with the
   following XML tags:</p>
 
   <ul>
@@ -924,22 +936,25 @@
     beginning each time.</dd>
   </dl>
 
-  <p>The <code>objectAnimator</code> ({@link android.animation.ObjectAnimator}) element has the
-  additional attribute <code>propertyName</code>, that lets you specify the name of the property
-  being animated. The <code>objectAnimator</code> element does not expose a <code>target</code>
+  <p>The <code>&lt;objectAnimator&gt;</code> ({@link android.animation.ObjectAnimator}) element has the
+  additional attribute <code>android:propertyName</code>, that lets you specify the name of the
+property
+  being animated. The <code>&lt;objectAnimator&gt;</code> element does not expose a <code>target</code>
   attribute, however, so you cannot set the object to animate in the XML declaration. You have to
   inflate the XML resource by calling {@link android.animation.AnimatorInflater#loadAnimator
   loadAnimator()} and call {@link android.animation.ObjectAnimator#setTarget setTarget()} to set
   the target object unlike the underlying {@link android.animation.ObjectAnimator},
   before calling {@link android.animation.ObjectAnimator#start start()}.</p>
 
-  <p>The <code>set</code> element ({@link android.animation.AnimatorSet}) exposes a single
-  attribute, <code>ordering</code>. Set this attribute to <code>together</code> (default) to play
-  all the animations in this set at once. Set this attribute to <code>sequentially</code> to play
+  <p>The <code>&lt;set&gt;</code> element ({@link android.animation.AnimatorSet}) exposes a single
+  attribute, <code>android:ordering</code>. Set this attribute to <code>"together"</code> (default)
+to play
+  all the animations in this set at once. Set this attribute to <code>"sequentially"</code> to play
   the animations in the order they are declared.</p>
 
-  <p>You can specify nested <code>set</code> tags to further group animations together. The
-  animations that you want to group together should be children of the <code>set</code> tag and can
+  <p>You can specify nested <code>&lt;set&gt;</code> elements to further group animations together.
+The
+  animations that you want to group together should be children of the <code>&lt;set&gt;</code> tag and can
   define their own <code>ordering</code> attribute.</p>
 
   <p>As an example, this XML code creates an {@link android.animation.AnimatorSet} object that
@@ -969,4 +984,9 @@
   android.animation.AnimatorSet} object, and then set the target objects for all of the animations
   before starting the animation set. Calling {@link android.animation.AnimatorSet#setTarget
   setTarget()} sets a single target object for all children of the {@link
-  android.animation.AnimatorSet}.</p>
\ No newline at end of file
+  android.animation.AnimatorSet}.</p>
+
+<p class="note"><strong>Tip:</strong> To see how the ADT layout editor allows you to develop and
+preview animations in your layout, watch the <a
+href="http://www.youtube.com/watch?v=Oq05KqjXTvs&feature=player_detailpage#t=1709s">Android
+Developer Tools session</a> from Google I/O '11</p>
diff --git a/docs/html/guide/topics/resources/providing-resources.jd b/docs/html/guide/topics/resources/providing-resources.jd
index 32595a0..59f2e73 100644
--- a/docs/html/guide/topics/resources/providing-resources.jd
+++ b/docs/html/guide/topics/resources/providing-resources.jd
@@ -88,9 +88,18 @@
   </tr>
 
   <tr>
+    <td><code>animator/</code></td>
+    <td>XML files that define <a href="{@docRoot}guide/topics/graphics/animation.html">property
+animations</a>.</td>
+  </tr>
+
+  <tr>
     <td><code>anim/</code></td>
-    <td>XML files that define tween animations. See <a
-href="animation-resource.html">Animation Resources</a>.</td>
+    <td>XML files that define <a
+href="{@docRoot}guide/topics/graphics/view-animation.html#tween-animation">tween
+animations</a>. (Property animations can also be saved in this directory, but
+the {@code animator/} directory is preferred for property animations to distinguish between the two
+types.)</td>
   </tr>
 
   <tr>
diff --git a/docs/html/guide/topics/usb/index.jd b/docs/html/guide/topics/usb/index.jd
index 18af06a..3e2a18b 100644
--- a/docs/html/guide/topics/usb/index.jd
+++ b/docs/html/guide/topics/usb/index.jd
@@ -6,9 +6,9 @@
       <h2>Topics</h2>
 
       <ol>
-        <li><a href="{@docRoot}/guide/topics/USB/accessory.jd">USB Accessory</a></li>
+        <li><a href="{@docRoot}guide/topics/USB/accessory.jd">USB Accessory</a></li>
 
-        <li><a href="{@docRoot}/guide/topics/USB/host.jd">USB Host</a></li>
+        <li><a href="{@docRoot}guide/topics/USB/host.jd">USB Host</a></li>
       </ol>
     </div>
   </div>
diff --git a/docs/html/resources/articles/images/service-api-changes-starting-with_runningservices.png b/docs/html/resources/articles/images/service-api-changes-starting-with_runningservices.png
new file mode 100644
index 0000000..e159fff
--- /dev/null
+++ b/docs/html/resources/articles/images/service-api-changes-starting-with_runningservices.png
Binary files differ
diff --git a/docs/html/resources/articles/images/service-api-changes-starting-with_stopservice.png b/docs/html/resources/articles/images/service-api-changes-starting-with_stopservice.png
new file mode 100644
index 0000000..cc8f0a2
--- /dev/null
+++ b/docs/html/resources/articles/images/service-api-changes-starting-with_stopservice.png
Binary files differ
diff --git a/docs/html/resources/articles/multitasking-android-way.jd b/docs/html/resources/articles/multitasking-android-way.jd
new file mode 100644
index 0000000..0dc8627
--- /dev/null
+++ b/docs/html/resources/articles/multitasking-android-way.jd
@@ -0,0 +1,103 @@
+page.title=Multitasking the Android Way
+parent.title=Articles
+parent.link=../browser.html?tag=article
+@jd:body
+
+<div id="qv-wrapper">
+<div id="qv">
+
+  <h2>See also</h2>
+  <ol>
+    <li><a href="{@docRoot}guide/topics/fundamentals/tasks-and-back-stack.html">Tasks and Back Stack</a></li>
+    <li><a href="{@docRoot}guide/topics/fundamentals/services.html">Services</a></li>
+  </ol>
+
+  <h2>Key classes</h2>
+  <ol>
+    <li>{@link android.app.Service}</li>
+    <li>{@link android.content.BroadcastReceiver}</li>
+  </ol>
+
+</div>
+</div>
+
+<p>Android is fairly unique in the ways it allows multiple applications to run at the same time.  Developers coming from a different platform may find the way it operates surprising.  Understanding its behavior is important for designing applications that will work well and integrate seamlessly with the rest of the Android platform.  This article covers the reasons for Android's multitasking design, its impact on how applications work, and how you can best take advantage of Android's unique features.</p>
+<h3>Design considerations</h3>
+<p>Mobile devices have technical limitations and user experience requirements not present in desktop or web systems.  Here are the four key constraints we were working under as we designed Android's multitasking:</p>
+<ul>
+  <li>
+    <p>We did not want to require that users close applications when "done" with them.  Such a usage pattern does not work well in a mobile environment, where usage tends to involve repeated brief contact with a wide variety of applications throughout the day.</p>
+  </li>
+  <li>
+    <p>Mobile devices don't have the luxury of swap space, so have fairly hard limits on memory use.  Robert Love has <a href="http://blog.rlove.org/2010/04/why-ipad-and-iphone-dont-support.html">a very good article</a> covering the topic.</p>
+  </li>
+  <li>
+    <p>Application switching on a mobile device is extremely critical; we target significantly less than 1 second to launch a new application.  This is especially important when the user is switching between a few applications, such as switching to look at a new SMS message while watching a video, and then returning to that video.  A noticeable wait in such situations will quickly make users hate you.</p>
+  </li>
+  <li>
+    <p>The available APIs must be sufficient for writing the built-in Google applications, as part of our "all applications are created equal" philosophy.  This means background music playback, data syncing, GPS navigation, and application downloading must be implemented with the same APIs that are available to third party developers.</p>
+  </li>
+</ul>
+<p>The first two requirements highlight an interesting conflict.  We don't want users to worry about closing their apps, but rather make it appear that all of the applications are always running.  At the same time, mobile devices have hard limits on memory use, so that a system will degrade or even start failing very quickly as it needs more RAM than is available; a desktop computer, with swap, in contrast will simply start slowing down as it needs to page RAM to its swap space.  These competing constraints were a key motivation for Android's design.</p>
+<h3>When does an application "stop"?</h3>
+<p>A common misunderstanding about Android multitasking is the difference between a process and an application.  In Android these are not tightly coupled entities: applications may seem present to the user without an actual process currently running the app; multiple applications may share processes, or one application may make use of multiple processes depending on its needs; the process(es) of an application may be kept around by Android even when that application is not actively doing something.</p>
+<p>The fact that you can see an application's process "running" does not mean the application is running or doing anything.  It may simply be there because Android needed it at some point, and has decided that it would be best to keep it around in case it needs it again.  Likewise, you may leave an application for a little bit and return to it from where you left off, and during that time Android may have needed to get rid of the process for other things.</p>
+<p>A key to how Android handles applications in this way is that processes don't shut down cleanly.  When the user leaves an application, its process is kept around in the background, allowing it to continue working (for example downloading web pages) if needed, and come immediately to the foreground if the user returns to it.  If a device never runs out of memory, then Android will keep all of these processes around, truly leaving all applications "running" all of the time.</p>
+<p>Of course, there is a limited amount of memory, and to accommodate this Android must decide when to get rid of processes that are not needed.  This leads to Android's <a href="{@docRoot}guide/topics/fundamentals.html#proclife">process lifecycle</a>, the rules it uses to decide how important each process is and thus the next one that should be dropped.  These rules are based on both how important a process is for the user's current experience, as well as how long it has been since the process was last needed by the user.</p>
+<p>Once Android determines that it needs to remove a process, it does this brutally, simply force-killing it.  The kernel can then immediately reclaim all resources needed by the process, without relying on that application being well written and responsive to a polite request to exit.  Allowing the kernel to immediately reclaim application resources makes it a lot easier to avoid serious out of memory situations.</p>
+<p>If a user later returns to an application that's been killed, Android needs a way to re-launch it in the same state as it was last seen, to preserve the "all applications are running all of the time" experience.  This is done by keeping track of the parts of the application the user is aware of (the Activities), and re-starting them with information about the last state they were seen in.  This last state is generated each time the user leaves that part of the application, not when it is killed, so that the kernel can later freely kill it without depending on the application to respond correctly at that point.</p>
+<p>In some ways, Android's process management can be seen as a form of swap space: application processes represent a certain amount of in-use memory; when memory is low, some processes can be killed (swapped out); when those processes are needed again, they can be re-started from their last saved state (swapped in).</p>
+<h3>Explicitly running in the background</h3>
+<p>So far, we have a way for applications to implicitly do work in the background, as long as the process doesn't get killed by Android as part of its regular memory management.  This is fine for things like loading web pages in the background, but what about features with harder requirements?  Background music playback, data synchronization, location tracking, alarm clocks, etc.</p>
+<p/>
+<p>For these tasks, the application needs a way to tell Android "I would explicitly like to run at this point."  There are two main facilities available to applications for this, represented by two kinds of components they can publish in their manifest: <span style="font-style:italic;">broadcast receivers</span> and <span style="font-style:italic;">services</span>.</p>
+<h4>Broadcast Receivers</h4>
+<p>A BroadcastReceiver allows an application to run, for a brief amount of time, in the background as a result of something else happening.  It can be used in many ways to build higher-level facilities: for example the AlarmManager allows an application to have a broadcast sent at a certain time in the future, and the LocationManager can send a broadcast when it detects interesting changes in location.  Because information about the receiver is part of an application's manifest, Android can find and launch the application even if it isn't running; of course if it already has its process available in the background, the broadcast can very efficiently be directly dispatched to it.</p>
+<p>When handling a broadcast, the application is given a fixed set of time (currently 10 seconds) in which to do its work.  If it doesn't complete in that time, the application is considered to be misbehaving, and its process immediately tossed into the background state to be killed for memory if needed.</p>
+<p>Broadcast receivers are great for doing small pieces of work in response to an external stimulus, such as posting a notification to the user after being sent a new GPS location report.  They are very lightweight, since the application's process only needs to be around while actively receiving the broadcast.  Because they are active for a deterministic amount of time, fairly strong guarantees can be made about not killing their process while running.  However they are not appropriate for anything of indeterminate length, such as networking.</p>
+<h4>Services</h4>
+<p>A Service allows an application to implement longer-running background operations.  There are actually a lot of other functions that services provide, but for the discussion here their fundamental purpose is for an application to say "hey I would like to continue running even while in the background, until I say I am done."  An application controls when its service runs by explicitly starting and stopping the service.</p>
+<p>While services do provide a rich client-server model, its use is optional.  Upon starting an application's services, Android simply instantiates the component in the application's process to provide its context.  How it is used after that is up to the application: it can put all of the needed code inside of the service itself without interacting with other parts of the application, make calls on other singleton objects shared with other parts of the app, directly retrieve the Service instance from elsewhere if needed, or run it in another process and do a full-blown RPC protocol if that is desired.</p>
+<p>Process management for services is different than broadcast receivers, because an unbounded number of services can ask to be running for an unknown amount of time.  There may not be enough RAM to have all of the requesting services run, so as a result no strong guarantees are made about being able to keep them running.</p>
+<p>If there is too little RAM, processes hosting services will be immediately killed like background processes are.  However, if appropriate, Android will remember that these services wish to remain running, and restart their process at a later time when more RAM is available.  For example, if the user goes to a web page that requires large amounts of RAM, Android may kill background service processes like sync until the browser's memory needs go down.</p>
+<p>Services can further negotiate this behavior by requesting they be considered "foreground."  This places the service in a "please don't kill" state, but requires that it include a notification to the user about it actively running.  This is useful for services such as background music playback or car navigation, which the user is actively aware of; when you're playing music and using the browser, you can always see the music-playing glyph in the status bar.  Android won't try to kill these services, but as a trade-off, ensures the user knows about them and is able to explicitly stop them when desired.</p>
+<h3>The value of generic components</h3>
+<p>Android's generic broadcast receiver and service components allow developers to create a wide variety of efficient background operations, including things that were never originally considered.  In Android 1.0 they were used to implement nearly all of the background behavior that the built-in and proprietary Google apps provided:</p>
+<ul>
+  <li>
+    Music playback runs in a service to allow it to continue operating after the user leaves the music application.
+  </li>
+  <li>
+    The alarm clock schedules a broadcast receiver with the alarm manager, to go off at the next set alarm time.
+  </li>
+  <li>
+    The calendar application likewise schedules an alarm to display or update its notification at the appropriate time for the next calendar event.
+  </li>
+  <li>
+    Background file download is implemented a service that runs when there are any downloads to process.
+  </li>
+  <li>
+    The e-mail application schedules an alarm to wake up a service at regular intervals that looks for and retrieves any new mail.
+  </li>
+  <li>
+    The Google applications maintain a service to receive push notifications from the network; it in turn sends broadcasts to individual apps when it is told that they need to do things like synchronize contacts.</p>
+  </li>
+</ul>
+<p>As the platform has evolved, these same basic components have been used to implement many of the major new developer features:
+<ul>
+  <li>
+    Input methods are implemented by developers as a Service component that Android manages and works with to display as the current IME.
+  </li>
+  <li>
+    Application widgets are broadcast receivers that Android sends broadcasts to when it needs to interact with them.  This allows app widgets to be quite lightweight, by not needing their application's process remain running.
+  </li>
+  <li>
+    Accessibility features are implemented as services that Android keeps running while in use and sends appropriate information to about user interactions.
+  </li>
+  <li>
+    Sync adapters introduced in Android 2.0 are services that are run in the background when a particular data sync needs to be performed.
+  </li>
+  <li>
+    Live wallpapers are a service started by Android when selected by the user.
+  </li>
+</ul>
diff --git a/docs/html/resources/articles/service-api-changes-starting-with.jd b/docs/html/resources/articles/service-api-changes-starting-with.jd
new file mode 100644
index 0000000..7bafd81
--- /dev/null
+++ b/docs/html/resources/articles/service-api-changes-starting-with.jd
@@ -0,0 +1,177 @@
+page.title=Service API changes starting with Android 2.0
+parent.title=Articles
+parent.link=../browser.html?tag=article
+@jd:body
+
+
+<div id="qv-wrapper">
+<div id="qv">
+
+  <h2>See also</h2>
+  <ol>
+    <li><a href="{@docRoot}guide/topics/fundamentals/services.html">Services</a></li>
+  </ol>
+
+  <h2>Key classes</h2>
+  <ol>
+    <li>{@link android.app.Service}</li>
+  </ol>
+
+</div>
+</div>
+
+<p>Watching developers use the Android platform the last year has shown a number of trouble areas in the Service API as well as growing issues in the ways services operate. As a result, Android 2.0 introduced a number of changes and improvements in this area for both developers and users.</p>
+<p>The three main changes to be aware of are:</p>
+<ul>
+  <li>Service.setForeground() is now deprecated and in 2.0 does nothing.</li>
+  <li>There were many edge cases in the service lifecycle that made it very easy to accidentally leave a service running; new APIs in 2.0 make this much easier to deal with.</li>
+  <li>Android 2.0 also introduces a new UI for end users to monitor and manage the running services on their device.</li>
+</ul>
+<h3>Background on services</h3>
+<p>Before going into the details of 2.0, it may be useful to go over a quick summary of services. The Service API in Android is one of the key mechanisms for applications to do work in the background. Due to the way Android is designed, once an application is no longer visible to the user it is generally considered expendable and a candidate to be killed by the system if it ever needs memory elsewhere. The main way applications get around this is by starting a Service component, which explicitly tells the system that they are doing some valuable work and would prefer that the system not kill their process if it doesn't truly need to.</p>
+<p>This is a very powerful facility but along with that power comes some responsibility: an actively running service is taking resources away from other things that can run (including inactive processes in the background that don't need to be initialized the next time the user visits them). It is thus important that developers take care when designing their services that they only run when truly needed and avoid any bugs where they may accidentally leave the service running for long durations.</p>
+<h3>Redesigning Service.setForeground()</h3>
+<p>During the final stabilization period of Android 1.6 we started to see more issues due to an increasing number of applications using the Service.setForeground() API when they shouldn't be. This is an API that we haven't advertised much because it should not be used by most applications and can be very hard on the system: it asks that the service's process be treated as in the foreground, essentially making it unkillable and thus more difficult for the system to recover from low memory situations.</p>
+<p>At that point in 1.6 it was too late to make any significant changes to the behavior here, but in 2.0 we have done so: Service.setForeground() now does nothing. The API was always intended to be something a service would do in conjunction with putting up an ongoing notification for the user; by saying you are in the foreground, the user should be "aware" that the service is running in some way and know how to stop it. Thus in place of the old API Andriod 2.0 introduces two new APIs that require a notification go along with being in the foreground:</p>
+<pre class="prettyprint">
+public final void startForeground(int id, Notification notification);
+public final void stopForeground(boolean removeNotification);
+</pre>
+<p>This also not coincidentally makes it much easier to manage the notification state along with the service, since the system can now guarantee that there is always a notification while the service is in the foreground, and that the notification goes away whenever the service does.</p>
+<p>Many developers will want to write a service that works on older platforms as well as 2.0 and later; this can be accomplished by using something like the following code to selectively call the new APIs when they are available.</p>
+<pre class="prettyprint">
+private static final Class[] mStartForegroundSignature = new Class[] {
+    int.class, Notification.class};
+private static final Class[] mStopForegroundSignature = new Class[] {
+    boolean.class};
+
+private NotificationManager mNM;
+private Method mStartForeground;
+private Method mStopForeground;
+private Object[] mStartForegroundArgs = new Object[2];
+private Object[] mStopForegroundArgs = new Object[1];
+
+&#64;Override
+public void onCreate() {
+    mNM = (NotificationManager)getSystemService(NOTIFICATION_SERVICE);
+    try {
+        mStartForeground = getClass().getMethod("startForeground",
+                mStartForegroundSignature);
+        mStopForeground = getClass().getMethod("stopForeground",
+                mStopForegroundSignature);
+    } catch (NoSuchMethodException e) {
+        // Running on an older platform.
+        mStartForeground = mStopForeground = null;
+    }
+}
+
+/**
+ * This is a wrapper around the new startForeground method, using the older
+ * APIs if it is not available.
+ */
+void startForegroundCompat(int id, Notification notification) {
+    // If we have the new startForeground API, then use it.
+    if (mStartForeground != null) {
+        mStartForegroundArgs[0] = Integer.valueOf(id);
+        mStartForegroundArgs[1] = notification;
+        try {
+            mStartForeground.invoke(this, mStartForegroundArgs);
+        } catch (InvocationTargetException e) {
+            // Should not happen.
+            Log.w("MyApp", "Unable to invoke startForeground", e);
+        } catch (IllegalAccessException e) {
+            // Should not happen.
+            Log.w("MyApp", "Unable to invoke startForeground", e);
+        }
+        return;
+    }
+    
+    // Fall back on the old API.
+    setForeground(true);
+    mNM.notify(id, notification);
+}
+
+/**
+ * This is a wrapper around the new stopForeground method, using the older
+ * APIs if it is not available.
+ */
+void stopForegroundCompat(int id) {
+    // If we have the new stopForeground API, then use it.
+    if (mStopForeground != null) {
+        mStopForegroundArgs[0] = Boolean.TRUE;
+        try {
+            mStopForeground.invoke(this, mStopForegroundArgs);
+        } catch (InvocationTargetException e) {
+            // Should not happen.
+            Log.w("MyApp", "Unable to invoke stopForeground", e);
+        } catch (IllegalAccessException e) {
+            // Should not happen.
+            Log.w("MyApp", "Unable to invoke stopForeground", e);
+        }
+        return;
+    }
+    
+    // Fall back on the old API.  Note to cancel BEFORE changing the
+    // foreground state, since we could be killed at that point.
+    mNM.cancel(id);
+    setForeground(false);
+}
+</pre>
+<h3>Service lifecycle changes</h3>
+<p>Another situation we were increasingly seeing in 1.6 was that, even ignoring the services that inappropriately make themselves foreground, we had a growing number of devices with a large number of services running in the background all fighting each other over the available memory.</p>
+<p>Part of this problem is services that are running more than they should or there simply being too much stuff trying to be done on the device. However, we also found many issues in the interaction between services and the platform that made it easy for an application to leave a service running even when it is trying to do the right thing. Consider this typical scenario:</p>
+<ol>
+  <li>An application calls startService().</li>
+  <li>That service gets onCreate(), onStart(), and then spawns a background thread to do some work.</li>
+  <li>The system is tight on memory, so has to kill the currently running service.</li>
+  <li>Later when memory is free, the service is restarted, and gets onCreate() called but not onStart() because there has not been another call to startService() with a new Intent command to send it.</li>
+</ol>
+<p>Now the service will sit there created, not realizing it used to be doing some work, and so not knowing it should stop itself at some point.</p>
+<p>To address this, in Android 2.0 Service.onStart() as been deprecated (though still exists and operates as it used to in previous versions of the platform). It is replaced with a new {@link android.app.Service#onStartCommand(android.content.Intent, int, int)} callback that allows the service to better control how the system should manage it. The key part here is a new result code returned by the function, telling the system what it should do with the service if its process is killed while it is running:</p>
+<ul>
+  <li>{@link android.app.Service#START_STICKY} is basically the same as the previous behavior, where the service is left "started" and will later be restarted by the system. The only difference from previous versions of the platform is that it if it gets restarted because its process is killed, onStartCommand() will be called on the next instance of the service with a null Intent instead of not being called at all. Services that use this mode should always check for this case and deal with it appropriately.</li>
+  <li>{@link android.app.Service#START_NOT_STICKY} says that, after returning from onStartCreated(), if the process is killed with no remaining start commands to deliver, then the service will be stopped instead of restarted. This makes a lot more sense for services that are intended to only run while executing commands sent to them. For example, a service may be started every 15 minutes from an alarm to poll some network state. If it gets killed while doing that work, it would be best to just let it be stopped and get started the next time the alarm fires.</li>
+  <li>{@link android.app.Service#START_REDELIVER_INTENT} is like START_NOT_STICKY, except if the service's process is killed before it calls stopSelf() for a given intent, that intent will be re-delivered to it until it completes (unless after some number of more tries it still can't complete, at which point the system gives up). This is useful for services that are receiving commands of work to do, and want to make sure they do eventually complete the work for each command sent.</li>
+</ul>
+<p>For compatibility with existing applications, the default return code for applications that are targeting an earlier version of the platform is a special {@link android.app.Service#START_STICKY_COMPATIBILITY} code that provides the old behavior of not calling onStart() with a null intent. Once you start targeting API version 5 or later, the default mode is START_STICKY and you must be prepared to deal with onStart() or onStartCommand() being called with a null Intent.</p>
+<p>You can also easily write a Service that uses both the old and new APIs, depending on the platform. All you need to do is compile against the 2.0 SDK with this code:</p>
+<pre class="prettyprint">
+// This is the old onStart method that will be called on the pre-2.0
+// platform.  On 2.0 or later we override onStartCommand() so this
+// method will not be called.
+&#64;Override
+public void onStart(Intent intent, int startId) {
+    handleStart(intent, startId);
+}
+
+&#64;Override
+public int onStartCommand(Intent intent, int flags, int startId) {
+    handleStart(intent, startId);
+    return START_NOT_STICKY;
+}
+
+void handleStart(Intent intent, int startId) {
+    // do work
+}
+</pre>
+<h3>New "running services" user interface</h3>
+<p>Our final issue to address is the case where there are simply too many service running in the amount of memory available on a device. This may be due to bugs or design flaws in installed applications, or the user simply trying to do too much. Historically users have had no visibility into what is going on at this level in the system, but it has become important to expose this, at least for lower-end devices, as the use of services has had an increasing impact on the user experience.</p>
+<p>To help address this, Android 2.0 introduces a new "Running Services" activity available from the Application system settings. When brought up, it looks something like this:</p>
+<img src="images/service-api-changes-starting-with_runningservices.png" width="192" height="320" alt="Running Services"/>
+<p>The main content is a list of all running services that may be of interest to the user, organized by the processes they run in. In the example here, we see three services:</p>
+<ul>
+  <li><b>GTalkService</b> is part of the standard Google application suit; it is running in Google's "gapps" process, which currently consumes 6.8MB. It has been started for 3 hours 55 minutes, which on this device is the time from when it was first booted.</li>
+  <li><b>ActivityService</b> is part of the Phonebook app, and its process consumes 4MB. This also has been running since boot.</li>
+  <li><b>SoftKeyboard</b> is a third party input method. It has been running since I switched to it, about 4 minutes ago.</li>
+</ul>
+<p>The user can tap on any of these services to control it; for normal services that are running because they were explicitly started, this will present a dialog allowing the user to explicitly stop it:</p>
+<img src="images/service-api-changes-starting-with_stopservice.png" height="320" width="192" alt="Stop Service"/>
+<p>Some other services, like the input method, are running for other reasons. For these, tapping on the service will go to the corresponding UI to manage it (in this case the system's input settings).</p>
+<p>Finally, along the bottom of the screen are some obscure numbers. If you know how to interpret them, this gives you a lot of information on the memory status of your device:</p>
+<ul>
+  <li><b>Avail: 38MB+114MB in 25</b> says that the device has 38MB of completely free (or likely used for unrequired caches) memory, and has another 114MB of available memory in 25 background processes it can kill at any time.</li>
+  <li><b>Other: 32MB in 3</b> says that the device has 32MB of unavailable memory in 3 unkillable processes (that is, processes that are currently considered to be foreground and must be kept running)</li>
+</ul>
+<p>For most users, this new user interface should be a much more effective way to manage the background applications on their device than the existing "task killer" applications. In the vast majority of cases the reason for a slow running device is too many services trying to run. This prevents the system from being able to run any background processes (which speed up app switching), and ultimately can result in thrashing through the services when not even they can all be kept running. The Running Services UI is intended to provide very specific information about the services that are running, to help make a good decision about what should be stopped. It also does not use the API to force stop an application, which can unintentionally break applications in numerous ways.</p>
+<p>For developers, this is an important tool to ensure your services are well behaved. As you develop your app, be sure to keep an eye on Running Services to ensure that you are not accidentally leaving your services running when they shouldn't be. You should also now keep in mind that users may freely stop any of your services as they wish, without your control, and account for that.</p>
+<p>Android's Services are a very powerful tool, but one of the main and subtle ways that application developers can harm the overall experience a user has with their phone.</p>
diff --git a/docs/html/resources/resources-data.js b/docs/html/resources/resources-data.js
index 8d9b7b1..77aee46 100644
--- a/docs/html/resources/resources-data.js
+++ b/docs/html/resources/resources-data.js
@@ -20,6 +20,7 @@
     'intent': 'Intents',
     'layout': 'Layouts/Views',
     'media': 'Multimedia',
+    'multitasking': 'Multi-tasking',
     'newfeature': 'New Features',
     'performance': 'Performance',
     'search': 'Search',
@@ -201,6 +202,16 @@
     }
   },
   {
+    tags: ['article', 'bestpractice', 'multitasking'],
+    path: 'articles/multitasking-android-way.html',
+    title: {
+      en: 'Multitasking the Android Way'
+    },
+    description: {
+      en: 'This article describes best practices and user experience guidelines for multi-tasking on Android.'
+    }
+  },
+  {
     tags: ['article', 'input'],
     path: 'articles/on-screen-inputs.html',
     title: {
@@ -241,6 +252,16 @@
     }
   },
   {
+    tags: ['article', 'compatibility', 'multitasking'],
+    path: 'articles/service-api-changes-starting-with.html',
+    title: {
+      en: 'Service API changes starting with Android 2.0'
+    },
+    description: {
+      en: 'This article describes the changes and improvements to services introduced in Android 2.0, as well as strategies for compatibility with older versions of the platform.'
+    }
+  },
+  {
     tags: ['article', 'ui'],
     path: 'articles/touch-mode.html',
     title: {
diff --git a/docs/html/sdk/eclipse-adt.jd b/docs/html/sdk/eclipse-adt.jd
index feb84b1..935bf63 100644
--- a/docs/html/sdk/eclipse-adt.jd
+++ b/docs/html/sdk/eclipse-adt.jd
@@ -1,8 +1,8 @@
 page.title=ADT Plugin for Eclipse
-adt.zip.version=10.0.1
-adt.zip.download=ADT-10.0.1.zip
-adt.zip.bytes=5096182
-adt.zip.checksum=e26a77db08377bdd2e62edeb9a3e3701
+adt.zip.version=11.0.0
+adt.zip.download=ADT-11.0.0.zip
+adt.zip.bytes=TODO
+adt.zip.checksum=TODO
 
 @jd:body
 
@@ -100,6 +100,139 @@
   <a href="#" onclick="return toggleDiv(this)">
         <img src="{@docRoot}assets/images/triangle-opened.png" class="toggle-img" height="9px"
 width="9px" />
+ADT 11.0.0</a> <em>(June 2011)</em>
+  <dd class="toggleme">
+
+<dl>
+
+<dt>Dependencies:</dt>
+
+<dd>ADT 11.0.0 is designed for use with SDK Tools r11. If you haven't
+already installed SDK Tools r11 into your SDK, use the Android SDK and AVD Manager to do
+so.</dd>
+
+<dt>Visual Refactoring:</dt>
+<dd>
+  <ul>
+    <li>"Extract Style" feature pulls out style-related attributes from your layout and extracts
+them as a new style defined in {@code styles.xml} (<a
+href="http://tools.android.com/recent/extractstylerefactoring">more info</a>).</li>
+    <li>"Wrap in Container" feature lets you select a group of views then surround them
+    in a new layout (a new view group, such as a LinearLayout), and transfers namespace and layout
+    parameters to the new parent (<a
+href="http://tools.android.com/recent/newrefactoringswrapinchangelayoutchangewidget">more
+info</a>).</li>
+    <li>"Change Layout" feature changes layouts from one type
+    to another, and can also flatten a layout hierarchy (<a
+href="http://tools.android.com/recent/newrefactoringswrapinchangelayoutchangewidget">more
+info</a>).</li>
+    <li>"Change Widget Type" feature changes the type of the
+    selected views to a new type. Also, a new selection context menu
+    in the visual layout editor makes it easy to select siblings as
+    well as views anywhere in the layout that have the same type (<a
+href="http://tools.android.com/recent/newrefactoringswrapinchangelayoutchangewidget">more
+info</a>).</li>
+    <li>"Extract as Include" feature finds identical collections of views
+    in other layouts and offers to combine them into a single layout that you can then include in
+ each layout (<a
+href="http://tools.android.com/recent/extractasincludeimprovements">more info</a>).</li>
+    <li>Quick Assistant in Eclipse can be invoked
+    from the XML editor (with Ctrl-1) to apply any of the above
+    refactorings (and Extract String) to the current selection (<a
+href="http://tools.android.com/recent/refactoringquickassistant">more info</a>).</li>
+  </ul>
+</dd>
+
+<dt>Visual Layout Editor:</dt>
+<dd>
+  <ul>
+    <li>This is the update to the layout editor you've been waiting for! It includes (almost) all
+the goodies demonstrated at Google I/O. <a href="http://www.youtube.com/watch?v=Oq05KqjXTvs">Watch
+the video</a> on YouTube.</li>
+    <li>The palette now supports different configurations for supported widgets. That is, a single
+view is presented in various different configurations that you can drag into your layout. For
+example, there is a <em>Text Fields</em> palette category where you can drag an {@link
+android.widget.EditText} widget in as a password field, an e-mail field, a phone field, or other
+types of text boxes. Similarly, {@link android.widget.TextView} widgets are preconfigured
+with large, normal and small theme sizes, and {@link android.widget.LinearLayout} elements are
+preconfigured in horizontal and vertical configurations (<a
+href="http://tools.android.com/recent/multipletextfieldandlayouttypes">more info</a>).</li>
+    <li>The palette supports custom views. You can pick up any custom
+    implementations of the View class you've created in your project or from included libraries and
+drag them into your layout (<a
+href="http://tools.android.com/recent/customviewsinthepalette">more info</a>).</li>
+    <li>Fragments are available in the palette for placement in your layout. In the tool, you can
+choose which layout to show rendered for a given fragment tag. Go to declaration works for fragment
+classes (<a href="http://tools.android.com/recent/fragmentsupport">more info</a>).</li> 
+    <li>The layout editor automatically applies a "zoom to fit" for newly
+    opened files as well as on device size and orientation changes to
+    ensure that large layouts are always fully visible unless you
+    manually zoom in.</li>
+    <li>You can drop in an {@code &lt;include&gt;} element from the palette, which will pop up
+    a layout chooser. When you select the layout to include, it is added with an {@code
+&lt;include&gt;}. Similarly, dropping images or image buttons will pop up image
+    resource choosers (<a
+href="http://tools.android.com/recent/includetagdropsupport">more info</a>).</li>
+    <li>The configuration chooser now applies the "Render Target" and
+    "Locale" settings project wide, making it trivial to check the
+    layouts for different languages or render targets without having
+    to configure these individually for each layout.</li>
+    <li>The layout editor is smarter about picking a default theme to
+    render a layout with, consulting factors like theme registrations
+    in the manifest, the SDK version, and other factors.</li>
+    <li>The layout editor is smarter about picking a default configuration to render a layout
+with, defaulting to the currently visible configuration in the previous file. It also considers the
+SDK target to determine whether to default to a tablet or phone screen size.</li>
+    <li>Basic focus support. The first text field dropped in a layout is assigned focus, and there
+are <strong>Request Focus</strong> and <strong>Clear Focus</strong> context menu items on text
+fields to change the focus.</li>
+  </ul>
+</dd>
+
+<dt>XML editors:</dt>
+<dd>
+<ul>  
+  <li>Code completion has been significantly improved. It now works
+  with {@code &lt;style&gt;} elements, completes dimensional units,
+  sorts resource paths in values based on the attribute name, and more. There are also many fixes to
+handle text replacement (<a
+href="http://tools.android.com/recent/xmlcodecompletionimprovements">more info</a>).</li>
+  <li>AAPT errors are handled better. They are now underlined for the
+  relevant range in the editor, and a new quickfix makes it trivial
+  to create missing resources.</li>
+  <li>Code completion for drawable, animation and color XML files (<a
+href="http://tools.android.com/recent/codecompletionfordrawablescolorsandanimationfiles">more
+info</a>).</li>
+</ul>
+</dd>
+
+<dt>DDMS:</dt>
+<dd>
+<ul>  
+  <li>"New Folder" action in the File Explorer.</li>
+  <li>The screenshot dialog will add timestamps to the filenames and preserve the orientation on
+snapshot refresh.</li>
+</ul>
+</dd>
+
+<dt>General notes:</dt>
+<dd>
+  <ul>
+    <li>TraceView supports zooming with the mouse-wheel in the timeline.</li>
+    <li>The New Android Project wizard now supports Eclipse working sets.</li>
+  </ul>
+</dd>
+</dl>
+<p>More information about tool changes are available on the <a
+href="http://tools.android.com/recent">Android Tools Project Site</a>.</p>
+</div>
+</div>
+
+
+<div class="toggleable closed">
+  <a href="#" onclick="return toggleDiv(this)">
+        <img src="{@docRoot}assets/images/triangle-closed.png" class="toggle-img" height="9px"
+width="9px" />
 ADT 10.0.1</a> <em>(March 2011)</em>
   <div class="toggleme">
 
diff --git a/docs/html/sdk/sdk_toc.cs b/docs/html/sdk/sdk_toc.cs
index 286307a..5b90551 100644
--- a/docs/html/sdk/sdk_toc.cs
+++ b/docs/html/sdk/sdk_toc.cs
@@ -153,7 +153,7 @@
       <span style="display:none" class="zh-TW"></span>
       </h2>
     <ul>
-      <li><a href="<?cs var:toroot ?>sdk/eclipse-adt.html">ADT 10.0.1
+      <li><a href="<?cs var:toroot ?>sdk/eclipse-adt.html">ADT 11.0.0
       <span style="display:none" class="de"></span>
       <span style="display:none" class="es"></span>
       <span style="display:none" class="fr"></span>
@@ -161,7 +161,7 @@
       <span style="display:none" class="ja"></span>
       <span style="display:none" class="zh-CN"></span>
       <span style="display:none" class="zh-TW"></span></a>
-     <!-- <span class="new">new!</span> --></li>
+      <span class="new">new!</span></li>
     </ul>
   </li>
   <li>
diff --git a/include/media/stagefright/openmax/OMX_IVCommon.h b/include/media/stagefright/openmax/OMX_IVCommon.h
index 4a8be7b..12b4f93 100644
--- a/include/media/stagefright/openmax/OMX_IVCommon.h
+++ b/include/media/stagefright/openmax/OMX_IVCommon.h
@@ -149,6 +149,7 @@
     OMX_COLOR_Format24BitABGR6666,
     OMX_COLOR_FormatKhronosExtensions = 0x6F000000, /**< Reserved region for introducing Khronos Standard Extensions */ 
     OMX_COLOR_FormatVendorStartUnused = 0x7F000000, /**< Reserved region for introducing Vendor Extensions */
+    OMX_QCOM_COLOR_FormatYVU420SemiPlanar = 0x7FA30C00,
     OMX_COLOR_FormatMax = 0x7FFFFFFF
 } OMX_COLOR_FORMATTYPE;
 
diff --git a/include/ui/Input.h b/include/ui/Input.h
index ba1c6b4..3b5aba4 100644
--- a/include/ui/Input.h
+++ b/include/ui/Input.h
@@ -674,6 +674,87 @@
     int32_t mActivePointerId;
 };
 
+
+/*
+ * Specifies parameters that govern pointer or wheel acceleration.
+ */
+struct VelocityControlParameters {
+    // A scale factor that is multiplied with the raw velocity deltas
+    // prior to applying any other velocity control factors.  The scale
+    // factor should be used to adapt the input device resolution
+    // (eg. counts per inch) to the output device resolution (eg. pixels per inch).
+    //
+    // Must be a positive value.
+    // Default is 1.0 (no scaling).
+    float scale;
+
+    // The scaled speed at which acceleration begins to be applied.
+    // This value establishes the upper bound of a low speed regime for
+    // small precise motions that are performed without any acceleration.
+    //
+    // Must be a non-negative value.
+    // Default is 0.0 (no low threshold).
+    float lowThreshold;
+
+    // The scaled speed at which maximum acceleration is applied.
+    // The difference between highThreshold and lowThreshold controls
+    // the range of speeds over which the acceleration factor is interpolated.
+    // The wider the range, the smoother the acceleration.
+    //
+    // Must be a non-negative value greater than or equal to lowThreshold.
+    // Default is 0.0 (no high threshold).
+    float highThreshold;
+
+    // The acceleration factor.
+    // When the speed is above the low speed threshold, the velocity will scaled
+    // by an interpolated value between 1.0 and this amount.
+    //
+    // Must be a positive greater than or equal to 1.0.
+    // Default is 1.0 (no acceleration).
+    float acceleration;
+
+    VelocityControlParameters() :
+            scale(1.0f), lowThreshold(0.0f), highThreshold(0.0f), acceleration(1.0f) {
+    }
+
+    VelocityControlParameters(float scale, float lowThreshold,
+            float highThreshold, float acceleration) :
+            scale(scale), lowThreshold(lowThreshold),
+            highThreshold(highThreshold), acceleration(acceleration) {
+    }
+};
+
+/*
+ * Implements mouse pointer and wheel speed control and acceleration.
+ */
+class VelocityControl {
+public:
+    VelocityControl();
+
+    /* Sets the various parameters. */
+    void setParameters(const VelocityControlParameters& parameters);
+
+    /* Resets the current movement counters to zero.
+     * This has the effect of nullifying any acceleration. */
+    void reset();
+
+    /* Translates a raw movement delta into an appropriately
+     * scaled / accelerated delta based on the current velocity. */
+    void move(nsecs_t eventTime, float* deltaX, float* deltaY);
+
+private:
+    // If no movements are received within this amount of time,
+    // we assume the movement has stopped and reset the movement counters.
+    static const nsecs_t STOP_TIME = 500 * 1000000; // 500 ms
+
+    VelocityControlParameters mParameters;
+
+    nsecs_t mLastMovementTime;
+    VelocityTracker::Position mRawPosition;
+    VelocityTracker mVelocityTracker;
+};
+
+
 /*
  * Describes the characteristics and capabilities of an input device.
  */
diff --git a/libs/hwui/OpenGLRenderer.cpp b/libs/hwui/OpenGLRenderer.cpp
index 45f4a42..8558054 100644
--- a/libs/hwui/OpenGLRenderer.cpp
+++ b/libs/hwui/OpenGLRenderer.cpp
@@ -1949,7 +1949,16 @@
     }
     if (mSnapshot->isIgnored()) return;
 
+    // TODO: We should probably make a copy of the paint instead of modifying
+    //       it; modifying the paint will change its generationID the first
+    //       time, which might impact caches. More investigation needed to
+    //       see if it matters.
+    //       If we make a copy, then drawTextDecorations() should *not* make
+    //       its own copy as it does right now.
     paint->setAntiAlias(true);
+#if RENDER_TEXT_AS_GLYPHS
+    paint->setTextEncoding(SkPaint::kGlyphID_TextEncoding);
+#endif
 
     float length = -1.0f;
     switch (paint->getTextAlign()) {
@@ -1983,8 +1992,8 @@
 
     if (mHasShadow) {
         mCaches.dropShadowCache.setFontRenderer(fontRenderer);
-        const ShadowTexture* shadow = mCaches.dropShadowCache.get(paint, text, bytesCount,
-                count, mShadowRadius);
+        const ShadowTexture* shadow = mCaches.dropShadowCache.get(
+                paint, text, bytesCount, count, mShadowRadius);
         const AutoTexture autoCleanup(shadow);
 
         const float sx = oldX - shadow->left + mShadowDx;
@@ -2226,10 +2235,6 @@
     uint32_t flags = paint->getFlags();
     if (flags & (SkPaint::kUnderlineText_Flag | SkPaint::kStrikeThruText_Flag)) {
         SkPaint paintCopy(*paint);
-#if RENDER_TEXT_AS_GLYPHS
-        paintCopy.setTextEncoding(SkPaint::kGlyphID_TextEncoding);
-#endif
-
         float underlineWidth = length;
         // If length is > 0.0f, we already measured the text for the text alignment
         if (length <= 0.0f) {
diff --git a/libs/ui/Input.cpp b/libs/ui/Input.cpp
index 1ba38a7..e95dbe4 100644
--- a/libs/ui/Input.cpp
+++ b/libs/ui/Input.cpp
@@ -13,6 +13,10 @@
 // Log debug messages about velocity tracking.
 #define DEBUG_VELOCITY 0
 
+// Log debug messages about acceleration.
+#define DEBUG_ACCELERATION 0
+
+
 #include <stdlib.h>
 #include <unistd.h>
 #include <ctype.h>
@@ -20,6 +24,7 @@
 #include <ui/Input.h>
 
 #include <math.h>
+#include <limits.h>
 
 #ifdef HAVE_ANDROID_OS
 #include <binder/Parcel.h>
@@ -693,6 +698,11 @@
 
 // --- VelocityTracker ---
 
+const uint32_t VelocityTracker::HISTORY_SIZE;
+const nsecs_t VelocityTracker::MAX_AGE;
+const nsecs_t VelocityTracker::MIN_WINDOW;
+const nsecs_t VelocityTracker::MIN_DURATION;
+
 VelocityTracker::VelocityTracker() {
     clear();
 }
@@ -902,6 +912,85 @@
 }
 
 
+// --- VelocityControl ---
+
+const nsecs_t VelocityControl::STOP_TIME;
+
+VelocityControl::VelocityControl() {
+    reset();
+}
+
+void VelocityControl::setParameters(const VelocityControlParameters& parameters) {
+    mParameters = parameters;
+    reset();
+}
+
+void VelocityControl::reset() {
+    mLastMovementTime = LLONG_MIN;
+    mRawPosition.x = 0;
+    mRawPosition.y = 0;
+    mVelocityTracker.clear();
+}
+
+void VelocityControl::move(nsecs_t eventTime, float* deltaX, float* deltaY) {
+    if ((deltaX && *deltaX) || (deltaY && *deltaY)) {
+        if (eventTime >= mLastMovementTime + STOP_TIME) {
+#if DEBUG_ACCELERATION
+            LOGD("VelocityControl: stopped, last movement was %0.3fms ago",
+                    (eventTime - mLastMovementTime) * 0.000001f);
+#endif
+            reset();
+        }
+
+        mLastMovementTime = eventTime;
+        if (deltaX) {
+            mRawPosition.x += *deltaX;
+        }
+        if (deltaY) {
+            mRawPosition.y += *deltaY;
+        }
+        mVelocityTracker.addMovement(eventTime, BitSet32(BitSet32::valueForBit(0)), &mRawPosition);
+
+        float vx, vy;
+        float scale = mParameters.scale;
+        if (mVelocityTracker.getVelocity(0, &vx, &vy)) {
+            float speed = hypotf(vx, vy) * scale;
+            if (speed >= mParameters.highThreshold) {
+                // Apply full acceleration above the high speed threshold.
+                scale *= mParameters.acceleration;
+            } else if (speed > mParameters.lowThreshold) {
+                // Linearly interpolate the acceleration to apply between the low and high
+                // speed thresholds.
+                scale *= 1 + (speed - mParameters.lowThreshold)
+                        / (mParameters.highThreshold - mParameters.lowThreshold)
+                        * (mParameters.acceleration - 1);
+            }
+
+#if DEBUG_ACCELERATION
+            LOGD("VelocityControl(%0.3f, %0.3f, %0.3f, %0.3f): "
+                    "vx=%0.3f, vy=%0.3f, speed=%0.3f, accel=%0.3f",
+                    mParameters.scale, mParameters.lowThreshold, mParameters.highThreshold,
+                    mParameters.acceleration,
+                    vx, vy, speed, scale / mParameters.scale);
+#endif
+        } else {
+#if DEBUG_ACCELERATION
+            LOGD("VelocityControl(%0.3f, %0.3f, %0.3f, %0.3f): unknown velocity",
+                    mParameters.scale, mParameters.lowThreshold, mParameters.highThreshold,
+                    mParameters.acceleration);
+#endif
+        }
+
+        if (deltaX) {
+            *deltaX *= scale;
+        }
+        if (deltaY) {
+            *deltaY *= scale;
+        }
+    }
+}
+
+
 // --- InputDeviceInfo ---
 
 InputDeviceInfo::InputDeviceInfo() {
diff --git a/media/libstagefright/ACodec.cpp b/media/libstagefright/ACodec.cpp
index 642b3a3..d628301 100644
--- a/media/libstagefright/ACodec.cpp
+++ b/media/libstagefright/ACodec.cpp
@@ -888,8 +888,6 @@
     CHECK_EQ(err, (status_t)OK);
     CHECK_EQ((int)format.eCompressionFormat, (int)OMX_VIDEO_CodingUnused);
 
-    static const int OMX_QCOM_COLOR_FormatYVU420SemiPlanar = 0x7FA30C00;
-
     CHECK(format.eColorFormat == OMX_COLOR_FormatYUV420Planar
            || format.eColorFormat == OMX_COLOR_FormatYUV420SemiPlanar
            || format.eColorFormat == OMX_COLOR_FormatCbYCrY
diff --git a/media/libstagefright/OMXCodec.cpp b/media/libstagefright/OMXCodec.cpp
index e71f16c..9c69a6f 100644
--- a/media/libstagefright/OMXCodec.cpp
+++ b/media/libstagefright/OMXCodec.cpp
@@ -61,8 +61,6 @@
 
 namespace android {
 
-static const int OMX_QCOM_COLOR_FormatYVU420SemiPlanar = 0x7FA30C00;
-
 struct CodecInfo {
     const char *mime;
     const char *codec;
@@ -200,6 +198,7 @@
     { MEDIA_MIMETYPE_VIDEO_AVC, "OMX.qcom.video.decoder.avc" },
     { MEDIA_MIMETYPE_VIDEO_AVC, "OMX.TI.Video.Decoder" },
     { MEDIA_MIMETYPE_VIDEO_AVC, "OMX.SEC.AVC.Decoder" },
+    { MEDIA_MIMETYPE_VIDEO_AVC, "OMX.google.h264.decoder" },
     { MEDIA_MIMETYPE_VIDEO_AVC, "OMX.google.avc.decoder" },
     { MEDIA_MIMETYPE_VIDEO_AVC, "AVCDecoder" },
     { MEDIA_MIMETYPE_AUDIO_VORBIS, "OMX.google.vorbis.decoder" },
@@ -1388,8 +1387,6 @@
         CHECK_EQ(err, (status_t)OK);
         CHECK_EQ((int)format.eCompressionFormat, (int)OMX_VIDEO_CodingUnused);
 
-        static const int OMX_QCOM_COLOR_FormatYVU420SemiPlanar = 0x7FA30C00;
-
         CHECK(format.eColorFormat == OMX_COLOR_FormatYUV420Planar
                || format.eColorFormat == OMX_COLOR_FormatYUV420SemiPlanar
                || format.eColorFormat == OMX_COLOR_FormatCbYCrY
diff --git a/media/libstagefright/codecs/on2/h264dec/Android.mk b/media/libstagefright/codecs/on2/h264dec/Android.mk
new file mode 100644
index 0000000..5b3c876
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/Android.mk
@@ -0,0 +1,127 @@
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_ARM_MODE := arm
+
+LOCAL_SRC_FILES := \
+	./source/h264bsd_transform.c \
+	./source/h264bsd_util.c \
+	./source/h264bsd_byte_stream.c \
+	./source/h264bsd_seq_param_set.c \
+	./source/h264bsd_pic_param_set.c \
+	./source/h264bsd_slice_header.c \
+	./source/h264bsd_slice_data.c \
+	./source/h264bsd_macroblock_layer.c \
+	./source/h264bsd_stream.c \
+	./source/h264bsd_vlc.c \
+	./source/h264bsd_cavlc.c \
+	./source/h264bsd_nal_unit.c \
+	./source/h264bsd_neighbour.c \
+	./source/h264bsd_storage.c \
+	./source/h264bsd_slice_group_map.c \
+	./source/h264bsd_intra_prediction.c \
+	./source/h264bsd_inter_prediction.c \
+	./source/h264bsd_reconstruct.c \
+	./source/h264bsd_dpb.c \
+	./source/h264bsd_image.c \
+	./source/h264bsd_deblocking.c \
+	./source/h264bsd_conceal.c \
+	./source/h264bsd_vui.c \
+	./source/h264bsd_pic_order_cnt.c \
+	./source/h264bsd_decoder.c \
+	./source/H264SwDecApi.c \
+	SoftAVC.cpp \
+
+LOCAL_C_INCLUDES := $(LOCAL_PATH)/./inc \
+	frameworks/base/media/libstagefright/include \
+	frameworks/base/include/media/stagefright/openmax \
+
+MY_ASM := \
+	./source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S \
+	./source/arm_neon_asm_gcc/h264bsdClearMbLayer.S \
+	./source/arm_neon_asm_gcc/h264bsdFillRow7.S \
+	./source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S \
+	./source/arm_neon_asm_gcc/h264bsdFlushBits.S
+
+
+MY_OMXDL_C_SRC := \
+	./omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c \
+	./omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c \
+	./omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c \
+	./omxdl/arm_neon/vc/m4p10/src/armVCM4P10_CAVLCTables.c \
+	./omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c \
+	./omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c \
+	./omxdl/arm_neon/src/armCOMM_Bitstream.c \
+	./omxdl/arm_neon/src/armCOMM.c
+
+MY_OMXDL_ASM_SRC := \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingChroma_unsafe_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingLuma_unsafe_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Align_unsafe_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Copy_unsafe_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_InterpolateLuma_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DequantTables_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_QuantTables_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_TransformResidual4x4_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_UnpackBlock4x4_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_4x4_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S \
+	./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantChromaDCFromPair_s.S \
+
+
+ifeq ($(ARCH_ARM_HAVE_NEON),true)
+    LOCAL_ARM_NEON   := true
+#    LOCAL_CFLAGS     := -std=c99 -D._NEON -D._OMXDL
+    LOCAL_CFLAGS     := -DH264DEC_NEON -DH264DEC_OMXDL
+    LOCAL_SRC_FILES  += $(MY_ASM) $(MY_OMXDL_C_SRC) $(MY_OMXDL_ASM_SRC)
+    LOCAL_C_INCLUDES += $(LOCAL_PATH)/./source/arm_neon_asm_gcc
+    LOCAL_C_INCLUDES += $(LOCAL_PATH)/./omxdl/arm_neon/api \
+                        $(LOCAL_PATH)/./omxdl/arm_neon/vc/api \
+                        $(LOCAL_PATH)/./omxdl/arm_neon/vc/m4p10/api
+endif
+
+LOCAL_SHARED_LIBRARIES := \
+	libstagefright libstagefright_omx libstagefright_foundation libutils \
+
+LOCAL_MODULE := libstagefright_soft_h264dec
+
+LOCAL_MODULE_TAGS := optional
+
+include $(BUILD_SHARED_LIBRARY)
+
+#####################################################################
+# test utility: decoder
+#####################################################################
+##
+## Test application
+##
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := ./source/DecTestBench.c
+
+LOCAL_C_INCLUDES := $(LOCAL_PATH)/inc
+
+LOCAL_SHARED_LIBRARIES := libstagefright_soft_h264dec
+
+LOCAL_MODULE_TAGS := debug
+
+LOCAL_MODULE := decoder
+
+include $(BUILD_EXECUTABLE)
+
diff --git a/media/libstagefright/codecs/on2/h264dec/SoftAVC.cpp b/media/libstagefright/codecs/on2/h264dec/SoftAVC.cpp
new file mode 100644
index 0000000..259fbc9
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/SoftAVC.cpp
@@ -0,0 +1,515 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//#define LOG_NDEBUG 0
+#define LOG_TAG "SoftAVC"
+#include <utils/Log.h>
+
+#include "SoftAVC.h"
+
+#include <media/stagefright/foundation/ADebug.h>
+#include <media/stagefright/MediaDefs.h>
+#include <media/stagefright/MediaErrors.h>
+
+
+namespace android {
+
+template<class T>
+static void InitOMXParams(T *params) {
+    params->nSize = sizeof(T);
+    params->nVersion.s.nVersionMajor = 1;
+    params->nVersion.s.nVersionMinor = 0;
+    params->nVersion.s.nRevision = 0;
+    params->nVersion.s.nStep = 0;
+}
+
+SoftAVC::SoftAVC(
+        const char *name,
+        const OMX_CALLBACKTYPE *callbacks,
+        OMX_PTR appData,
+        OMX_COMPONENTTYPE **component)
+    : SimpleSoftOMXComponent(name, callbacks, appData, component),
+      mHandle(NULL),
+      mInputBufferCount(0),
+      mWidth(320),
+      mHeight(240),
+      mPictureSize(mWidth * mHeight * 3 / 2),
+      mCropLeft(0),
+      mCropTop(0),
+      mFirstPicture(NULL),
+      mFirstPictureId(-1),
+      mPicId(0),
+      mHeadersDecoded(false),
+      mEOSStatus(INPUT_DATA_AVAILABLE),
+      mOutputPortSettingsChange(NONE) {
+    initPorts();
+    CHECK_EQ(initDecoder(), (status_t)OK);
+}
+
+SoftAVC::~SoftAVC() {
+    H264SwDecRelease(mHandle);
+    mHandle = NULL;
+
+    while (mPicToHeaderMap.size() != 0) {
+        OMX_BUFFERHEADERTYPE *header = mPicToHeaderMap.editValueAt(0);
+        mPicToHeaderMap.removeItemsAt(0);
+        delete header;
+        header = NULL;
+    }
+    List<BufferInfo *> &outQueue = getPortQueue(kOutputPortIndex);
+    List<BufferInfo *> &inQueue = getPortQueue(kInputPortIndex);
+    CHECK(outQueue.empty());
+    CHECK(inQueue.empty());
+
+    delete[] mFirstPicture;
+}
+
+void SoftAVC::initPorts() {
+    OMX_PARAM_PORTDEFINITIONTYPE def;
+    InitOMXParams(&def);
+
+    def.nPortIndex = kInputPortIndex;
+    def.eDir = OMX_DirInput;
+    def.nBufferCountMin = kNumInputBuffers;
+    def.nBufferCountActual = def.nBufferCountMin;
+    def.nBufferSize = 8192;
+    def.bEnabled = OMX_TRUE;
+    def.bPopulated = OMX_FALSE;
+    def.eDomain = OMX_PortDomainVideo;
+    def.bBuffersContiguous = OMX_FALSE;
+    def.nBufferAlignment = 1;
+
+    def.format.video.cMIMEType = const_cast<char *>(MEDIA_MIMETYPE_VIDEO_AVC);
+    def.format.video.pNativeRender = NULL;
+    def.format.video.nFrameWidth = mWidth;
+    def.format.video.nFrameHeight = mHeight;
+    def.format.video.nStride = def.format.video.nFrameWidth;
+    def.format.video.nSliceHeight = def.format.video.nFrameHeight;
+    def.format.video.nBitrate = 0;
+    def.format.video.xFramerate = 0;
+    def.format.video.bFlagErrorConcealment = OMX_FALSE;
+    def.format.video.eCompressionFormat = OMX_VIDEO_CodingAVC;
+    def.format.video.eColorFormat = OMX_COLOR_FormatUnused;
+    def.format.video.pNativeWindow = NULL;
+
+    addPort(def);
+
+    def.nPortIndex = kOutputPortIndex;
+    def.eDir = OMX_DirOutput;
+    def.nBufferCountMin = kNumOutputBuffers;
+    def.nBufferCountActual = def.nBufferCountMin;
+    def.bEnabled = OMX_TRUE;
+    def.bPopulated = OMX_FALSE;
+    def.eDomain = OMX_PortDomainVideo;
+    def.bBuffersContiguous = OMX_FALSE;
+    def.nBufferAlignment = 2;
+
+    def.format.video.cMIMEType = const_cast<char *>(MEDIA_MIMETYPE_VIDEO_RAW);
+    def.format.video.pNativeRender = NULL;
+    def.format.video.nFrameWidth = mWidth;
+    def.format.video.nFrameHeight = mHeight;
+    def.format.video.nStride = def.format.video.nFrameWidth;
+    def.format.video.nSliceHeight = def.format.video.nFrameHeight;
+    def.format.video.nBitrate = 0;
+    def.format.video.xFramerate = 0;
+    def.format.video.bFlagErrorConcealment = OMX_FALSE;
+    def.format.video.eCompressionFormat = OMX_VIDEO_CodingUnused;
+    def.format.video.eColorFormat = OMX_COLOR_FormatYUV420Planar;
+    def.format.video.pNativeWindow = NULL;
+
+    def.nBufferSize =
+        (def.format.video.nFrameWidth * def.format.video.nFrameHeight * 3) / 2;
+
+    addPort(def);
+}
+
+status_t SoftAVC::initDecoder() {
+    if (H264SwDecInit(&mHandle, 1) == H264SWDEC_OK) {
+        return OK;
+    }
+    return UNKNOWN_ERROR;
+}
+
+OMX_ERRORTYPE SoftAVC::internalGetParameter(
+        OMX_INDEXTYPE index, OMX_PTR params) {
+    switch (index) {
+        case OMX_IndexParamVideoPortFormat:
+        {
+            OMX_VIDEO_PARAM_PORTFORMATTYPE *formatParams =
+                (OMX_VIDEO_PARAM_PORTFORMATTYPE *)params;
+
+            if (formatParams->nPortIndex > kOutputPortIndex) {
+                return OMX_ErrorUndefined;
+            }
+
+            if (formatParams->nIndex != 0) {
+                return OMX_ErrorNoMore;
+            }
+
+            if (formatParams->nPortIndex == kInputPortIndex) {
+                formatParams->eCompressionFormat = OMX_VIDEO_CodingAVC;
+                formatParams->eColorFormat = OMX_COLOR_FormatUnused;
+                formatParams->xFramerate = 0;
+            } else {
+                CHECK(formatParams->nPortIndex == kOutputPortIndex);
+
+                formatParams->eCompressionFormat = OMX_VIDEO_CodingUnused;
+                formatParams->eColorFormat = OMX_COLOR_FormatYUV420Planar;
+                formatParams->xFramerate = 0;
+            }
+
+            return OMX_ErrorNone;
+        }
+
+        default:
+            return SimpleSoftOMXComponent::internalGetParameter(index, params);
+    }
+}
+
+OMX_ERRORTYPE SoftAVC::internalSetParameter(
+        OMX_INDEXTYPE index, const OMX_PTR params) {
+    switch (index) {
+        case OMX_IndexParamStandardComponentRole:
+        {
+            const OMX_PARAM_COMPONENTROLETYPE *roleParams =
+                (const OMX_PARAM_COMPONENTROLETYPE *)params;
+
+            if (strncmp((const char *)roleParams->cRole,
+                        "video_decoder.avc",
+                        OMX_MAX_STRINGNAME_SIZE - 1)) {
+                return OMX_ErrorUndefined;
+            }
+
+            return OMX_ErrorNone;
+        }
+
+        case OMX_IndexParamVideoPortFormat:
+        {
+            OMX_VIDEO_PARAM_PORTFORMATTYPE *formatParams =
+                (OMX_VIDEO_PARAM_PORTFORMATTYPE *)params;
+
+            if (formatParams->nPortIndex > kOutputPortIndex) {
+                return OMX_ErrorUndefined;
+            }
+
+            if (formatParams->nIndex != 0) {
+                return OMX_ErrorNoMore;
+            }
+
+            return OMX_ErrorNone;
+        }
+
+        default:
+            return SimpleSoftOMXComponent::internalSetParameter(index, params);
+    }
+}
+
+OMX_ERRORTYPE SoftAVC::getConfig(
+        OMX_INDEXTYPE index, OMX_PTR params) {
+    switch (index) {
+        case OMX_IndexConfigCommonOutputCrop:
+        {
+            OMX_CONFIG_RECTTYPE *rectParams = (OMX_CONFIG_RECTTYPE *)params;
+
+            if (rectParams->nPortIndex != 1) {
+                return OMX_ErrorUndefined;
+            }
+
+            rectParams->nLeft = mCropLeft;
+            rectParams->nTop = mCropTop;
+            rectParams->nWidth = mWidth;
+            rectParams->nHeight = mHeight;
+
+            return OMX_ErrorNone;
+        }
+
+        default:
+            return OMX_ErrorUnsupportedIndex;
+    }
+}
+
+void SoftAVC::onQueueFilled(OMX_U32 portIndex) {
+    if (mOutputPortSettingsChange != NONE) {
+        return;
+    }
+
+    if (mEOSStatus == OUTPUT_FRAMES_FLUSHED) {
+        return;
+    }
+
+    List<BufferInfo *> &inQueue = getPortQueue(kInputPortIndex);
+    List<BufferInfo *> &outQueue = getPortQueue(kOutputPortIndex);
+    H264SwDecRet ret = H264SWDEC_PIC_RDY;
+    status_t err = OK;
+    bool portSettingsChanged = false;
+    while ((mEOSStatus != INPUT_DATA_AVAILABLE || !inQueue.empty())
+            && outQueue.size() == kNumOutputBuffers) {
+
+        if (mEOSStatus == INPUT_EOS_SEEN) {
+            drainAllOutputBuffers();
+            return;
+        }
+
+        BufferInfo *inInfo = *inQueue.begin();
+        OMX_BUFFERHEADERTYPE *inHeader = inInfo->mHeader;
+        ++mPicId;
+        if (inHeader->nFlags & OMX_BUFFERFLAG_EOS) {
+            inQueue.erase(inQueue.begin());
+            inInfo->mOwnedByUs = false;
+            notifyEmptyBufferDone(inHeader);
+            mEOSStatus = INPUT_EOS_SEEN;
+            continue;
+        }
+
+        OMX_BUFFERHEADERTYPE *header = new OMX_BUFFERHEADERTYPE;
+        memset(header, 0, sizeof(OMX_BUFFERHEADERTYPE));
+        header->nTimeStamp = inHeader->nTimeStamp;
+        header->nFlags = inHeader->nFlags;
+        mPicToHeaderMap.add(mPicId, header);
+        inQueue.erase(inQueue.begin());
+
+        H264SwDecInput inPicture;
+        H264SwDecOutput outPicture;
+        memset(&inPicture, 0, sizeof(inPicture));
+        inPicture.dataLen = inHeader->nFilledLen;
+        inPicture.pStream = inHeader->pBuffer + inHeader->nOffset;
+        inPicture.picId = mPicId;
+        inPicture.intraConcealmentMethod = 1;
+        H264SwDecPicture decodedPicture;
+
+        while (inPicture.dataLen > 0) {
+            ret = H264SwDecDecode(mHandle, &inPicture, &outPicture);
+            if (ret == H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY ||
+                ret == H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY) {
+                inPicture.dataLen -= (u32)(outPicture.pStrmCurrPos - inPicture.pStream);
+                inPicture.pStream = outPicture.pStrmCurrPos;
+                if (ret == H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY) {
+                    mHeadersDecoded = true;
+                    H264SwDecInfo decoderInfo;
+                    CHECK(H264SwDecGetInfo(mHandle, &decoderInfo) == H264SWDEC_OK);
+
+                    if (handlePortSettingChangeEvent(&decoderInfo)) {
+                        portSettingsChanged = true;
+                    }
+
+                    if (decoderInfo.croppingFlag &&
+                        handleCropRectEvent(&decoderInfo.cropParams)) {
+                        portSettingsChanged = true;
+                    }
+                }
+            } else {
+                if (portSettingsChanged) {
+                    if (H264SwDecNextPicture(mHandle, &decodedPicture, 0)
+                        == H264SWDEC_PIC_RDY) {
+
+                        // Save this output buffer; otherwise, it will be
+                        // lost during dynamic port reconfiguration because
+                        // OpenMAX client will delete _all_ output buffers
+                        // in the process.
+                        saveFirstOutputBuffer(
+                            decodedPicture.picId,
+                            (uint8_t *)decodedPicture.pOutputPicture);
+                    }
+                }
+                inPicture.dataLen = 0;
+                if (ret < 0) {
+                    LOGE("Decoder failed: %d", ret);
+                    err = ERROR_MALFORMED;
+                }
+            }
+        }
+        inInfo->mOwnedByUs = false;
+        notifyEmptyBufferDone(inHeader);
+
+        if (portSettingsChanged) {
+            portSettingsChanged = false;
+            return;
+        }
+
+        if (mFirstPicture && !outQueue.empty()) {
+            drainOneOutputBuffer(mFirstPictureId, mFirstPicture);
+            delete[] mFirstPicture;
+            mFirstPicture = NULL;
+            mFirstPictureId = -1;
+        }
+
+        while (!outQueue.empty() &&
+                mHeadersDecoded &&
+                H264SwDecNextPicture(mHandle, &decodedPicture, 0)
+                    == H264SWDEC_PIC_RDY) {
+
+            int32_t picId = decodedPicture.picId;
+            uint8_t *data = (uint8_t *) decodedPicture.pOutputPicture;
+            drainOneOutputBuffer(picId, data);
+        }
+
+        if (err != OK) {
+            notify(OMX_EventError, OMX_ErrorUndefined, err, NULL);
+        }
+    }
+}
+
+bool SoftAVC::handlePortSettingChangeEvent(const H264SwDecInfo *info) {
+    if (mWidth != info->picWidth || mHeight != info->picHeight) {
+        mWidth  = info->picWidth;
+        mHeight = info->picHeight;
+        mPictureSize = mWidth * mHeight * 3 / 2;
+        updatePortDefinitions();
+        notify(OMX_EventPortSettingsChanged, 1, 0, NULL);
+        mOutputPortSettingsChange = AWAITING_DISABLED;
+        return true;
+    }
+
+    return false;
+}
+
+bool SoftAVC::handleCropRectEvent(const CropParams *crop) {
+    if (mCropLeft != crop->cropLeftOffset ||
+        mCropTop != crop->cropTopOffset ||
+        mWidth != crop->cropOutWidth ||
+        mHeight != crop->cropOutHeight) {
+
+        mCropLeft = crop->cropLeftOffset;
+        mCropTop = crop->cropTopOffset;
+        mWidth = crop->cropOutWidth;
+        mHeight = crop->cropOutHeight;
+        mPictureSize = mWidth * mHeight * 3 / 2;
+
+        notify(OMX_EventPortSettingsChanged, 1,
+                OMX_IndexConfigCommonOutputCrop, NULL);
+
+        return true;
+    }
+    return false;
+}
+
+void SoftAVC::saveFirstOutputBuffer(int32_t picId, uint8_t *data) {
+    CHECK(mFirstPicture == NULL);
+    mFirstPictureId = picId;
+
+    mFirstPicture = new uint8_t[mPictureSize];
+    memcpy(mFirstPicture, data, mPictureSize);
+}
+
+void SoftAVC::drainOneOutputBuffer(int32_t picId, uint8_t* data) {
+    List<BufferInfo *> &outQueue = getPortQueue(kOutputPortIndex);
+    BufferInfo *outInfo = *outQueue.begin();
+    outQueue.erase(outQueue.begin());
+    OMX_BUFFERHEADERTYPE *outHeader = outInfo->mHeader;
+    OMX_BUFFERHEADERTYPE *header = mPicToHeaderMap.valueFor(picId);
+    outHeader->nTimeStamp = header->nTimeStamp;
+    outHeader->nFlags = header->nFlags;
+    outHeader->nFilledLen = mPictureSize;
+    memcpy(outHeader->pBuffer + outHeader->nOffset,
+            data, mPictureSize);
+    mPicToHeaderMap.removeItem(picId);
+    delete header;
+    outInfo->mOwnedByUs = false;
+    notifyFillBufferDone(outHeader);
+}
+
+bool SoftAVC::drainAllOutputBuffers() {
+    List<BufferInfo *> &outQueue = getPortQueue(kOutputPortIndex);
+    H264SwDecPicture decodedPicture;
+
+    while (!outQueue.empty()) {
+        BufferInfo *outInfo = *outQueue.begin();
+        outQueue.erase(outQueue.begin());
+        OMX_BUFFERHEADERTYPE *outHeader = outInfo->mHeader;
+        if (mHeadersDecoded &&
+            H264SWDEC_PIC_RDY ==
+                H264SwDecNextPicture(mHandle, &decodedPicture, 1 /* flush */)) {
+
+            int32_t picId = decodedPicture.picId;
+            CHECK(mPicToHeaderMap.indexOfKey(picId) >= 0);
+
+            memcpy(outHeader->pBuffer + outHeader->nOffset,
+                decodedPicture.pOutputPicture,
+                mPictureSize);
+
+            OMX_BUFFERHEADERTYPE *header = mPicToHeaderMap.valueFor(picId);
+            outHeader->nTimeStamp = header->nTimeStamp;
+            outHeader->nFlags = header->nFlags;
+            outHeader->nFilledLen = mPictureSize;
+            mPicToHeaderMap.removeItem(picId);
+            delete header;
+        } else {
+            outHeader->nTimeStamp = 0;
+            outHeader->nFilledLen = 0;
+            outHeader->nFlags = OMX_BUFFERFLAG_EOS;
+            mEOSStatus = OUTPUT_FRAMES_FLUSHED;
+        }
+
+        outInfo->mOwnedByUs = false;
+        notifyFillBufferDone(outHeader);
+    }
+
+    return true;
+}
+
+void SoftAVC::onPortFlushCompleted(OMX_U32 portIndex) {
+    if (portIndex == kInputPortIndex) {
+        mEOSStatus = INPUT_DATA_AVAILABLE;
+    }
+}
+
+void SoftAVC::onPortEnableCompleted(OMX_U32 portIndex, bool enabled) {
+    switch (mOutputPortSettingsChange) {
+        case NONE:
+            break;
+
+        case AWAITING_DISABLED:
+        {
+            CHECK(!enabled);
+            mOutputPortSettingsChange = AWAITING_ENABLED;
+            break;
+        }
+
+        default:
+        {
+            CHECK_EQ((int)mOutputPortSettingsChange, (int)AWAITING_ENABLED);
+            CHECK(enabled);
+            mOutputPortSettingsChange = NONE;
+            break;
+        }
+    }
+}
+
+void SoftAVC::updatePortDefinitions() {
+    OMX_PARAM_PORTDEFINITIONTYPE *def = &editPortInfo(0)->mDef;
+    def->format.video.nFrameWidth = mWidth;
+    def->format.video.nFrameHeight = mHeight;
+    def->format.video.nStride = def->format.video.nFrameWidth;
+    def->format.video.nSliceHeight = def->format.video.nFrameHeight;
+
+    def = &editPortInfo(1)->mDef;
+    def->format.video.nFrameWidth = mWidth;
+    def->format.video.nFrameHeight = mHeight;
+    def->format.video.nStride = def->format.video.nFrameWidth;
+    def->format.video.nSliceHeight = def->format.video.nFrameHeight;
+
+    def->nBufferSize =
+        (def->format.video.nFrameWidth
+            * def->format.video.nFrameHeight * 3) / 2;
+}
+
+}  // namespace android
+
+android::SoftOMXComponent *createSoftOMXComponent(
+        const char *name, const OMX_CALLBACKTYPE *callbacks,
+        OMX_PTR appData, OMX_COMPONENTTYPE **component) {
+    return new android::SoftAVC(name, callbacks, appData, component);
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/SoftAVC.h b/media/libstagefright/codecs/on2/h264dec/SoftAVC.h
new file mode 100644
index 0000000..a7340c0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/SoftAVC.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SOFT_AVC_H_
+
+#define SOFT_AVC_H_
+
+#include "SimpleSoftOMXComponent.h"
+#include <utils/KeyedVector.h>
+
+#include "H264SwDecApi.h"
+#include "basetype.h"
+
+namespace android {
+
+struct SoftAVC : public SimpleSoftOMXComponent {
+    SoftAVC(const char *name,
+            const OMX_CALLBACKTYPE *callbacks,
+            OMX_PTR appData,
+            OMX_COMPONENTTYPE **component);
+
+protected:
+    virtual ~SoftAVC();
+
+    virtual OMX_ERRORTYPE internalGetParameter(
+            OMX_INDEXTYPE index, OMX_PTR params);
+
+    virtual OMX_ERRORTYPE internalSetParameter(
+            OMX_INDEXTYPE index, const OMX_PTR params);
+
+    virtual OMX_ERRORTYPE getConfig(OMX_INDEXTYPE index, OMX_PTR params);
+
+    virtual void onQueueFilled(OMX_U32 portIndex);
+    virtual void onPortFlushCompleted(OMX_U32 portIndex);
+    virtual void onPortEnableCompleted(OMX_U32 portIndex, bool enabled);
+
+private:
+    enum {
+        kInputPortIndex   = 0,
+        kOutputPortIndex  = 1,
+        kNumInputBuffers  = 8,
+        kNumOutputBuffers = 16,
+    };
+
+    enum EOSStatus {
+        INPUT_DATA_AVAILABLE,
+        INPUT_EOS_SEEN,
+        OUTPUT_FRAMES_FLUSHED,
+    };
+
+    void *mHandle;
+
+    size_t mInputBufferCount;
+
+    uint32_t mWidth, mHeight, mPictureSize;
+    uint32_t mCropLeft, mCropTop;
+
+    uint8_t *mFirstPicture;
+    int32_t mFirstPictureId;
+
+    int32_t mPicId;  // Which output picture is for which input buffer?
+
+    // OMX_BUFFERHEADERTYPE may be overkill, but it is convenient
+    // for tracking the following fields: nFlags, nTimeStamp, etc.
+    KeyedVector<int32_t, OMX_BUFFERHEADERTYPE *> mPicToHeaderMap;
+    bool mHeadersDecoded;
+
+    EOSStatus mEOSStatus;
+
+    enum OutputPortSettingChange {
+        NONE,
+        AWAITING_DISABLED,
+        AWAITING_ENABLED
+    };
+    OutputPortSettingChange mOutputPortSettingsChange;
+
+    void initPorts();
+    status_t initDecoder();
+    void updatePortDefinitions();
+    bool drainAllOutputBuffers();
+    void drainOneOutputBuffer(int32_t picId, uint8_t *data);
+    void saveFirstOutputBuffer(int32_t pidId, uint8_t *data);
+    bool handleCropRectEvent(const CropParams* crop);
+    bool handlePortSettingChangeEvent(const H264SwDecInfo *info);
+
+    DISALLOW_EVIL_CONSTRUCTORS(SoftAVC);
+};
+
+}  // namespace android
+
+#endif  // SOFT_AVC_H_
+
diff --git a/media/libstagefright/codecs/on2/h264dec/inc/H264SwDecApi.h b/media/libstagefright/codecs/on2/h264dec/inc/H264SwDecApi.h
new file mode 100755
index 0000000..fe112bc
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/inc/H264SwDecApi.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include Headers
+
+    2. Enumerations used as a return value or a parameter.
+        2.1. API's return value enumerations.
+
+    3. User Structures
+        3.1. Structures for H264SwDecDecode() parameters.
+        3.2. Structures for information interchange with
+             DEC API and user application.
+
+    4. Prototypes of Decoder API functions
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDECAPI_H
+#define H264SWDECAPI_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/*------------------------------------------------------------------------------
+    1. Include Headers
+------------------------------------------------------------------------------*/
+
+    #include "basetype.h"
+
+/*------------------------------------------------------------------------------
+    2.1. API's return value enumerations.
+------------------------------------------------------------------------------*/
+
+    typedef enum
+    {
+        H264SWDEC_OK = 0,
+        H264SWDEC_STRM_PROCESSED = 1,
+        H264SWDEC_PIC_RDY,
+        H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY,
+        H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY,
+        H264SWDEC_PARAM_ERR = -1,
+        H264SWDEC_STRM_ERR = -2,
+        H264SWDEC_NOT_INITIALIZED = -3,
+        H264SWDEC_MEMFAIL = -4,
+        H264SWDEC_INITFAIL = -5,
+        H264SWDEC_HDRS_NOT_RDY = -6,
+        H264SWDEC_EVALUATION_LIMIT_EXCEEDED = -7
+    } H264SwDecRet;
+
+/*------------------------------------------------------------------------------
+    3.1. Structures for H264SwDecDecode() parameters.
+------------------------------------------------------------------------------*/
+
+    /* typedef of the Decoder instance */
+    typedef void *H264SwDecInst;
+
+    /* Input structure */
+    typedef struct
+    {
+        u8  *pStream;            /* Pointer to stream to be decoded          */
+        u32  dataLen;            /* Number of bytes to be decoded            */
+        u32  picId;              /* Identifier for the picture to be decoded */
+        u32 intraConcealmentMethod; /* 0 = Gray concealment for intra
+                                       1 = Reference concealment for intra */
+
+    } H264SwDecInput;
+
+
+    /* Output structure */
+    typedef struct
+    {
+        u8  *pStrmCurrPos;      /* Pointer to stream position where decoder
+                                   ended up */
+    } H264SwDecOutput;
+
+    /* Output structure for H264SwDecNextPicture */
+    typedef struct
+    {
+        u32 *pOutputPicture;    /* Pointer to the picture, YUV format       */
+        u32 picId;              /* Identifier of the picture to be displayed*/
+        u32 isIdrPicture;       /* Flag to indicate if the picture is an
+                                   IDR picture */
+        u32 nbrOfErrMBs;        /* Number of concealed MB's in the picture  */
+    } H264SwDecPicture;
+
+/*------------------------------------------------------------------------------
+    3.2. Structures for information interchange with DEC API
+         and user application.
+------------------------------------------------------------------------------*/
+
+    typedef struct
+    {
+        u32 cropLeftOffset;
+        u32 cropOutWidth;
+        u32 cropTopOffset;
+        u32 cropOutHeight;
+    } CropParams;
+
+    typedef struct
+    {
+        u32 profile;
+        u32 picWidth;
+        u32 picHeight;
+        u32 videoRange;
+        u32 matrixCoefficients;
+        u32 parWidth;
+        u32 parHeight;
+        u32 croppingFlag;
+        CropParams cropParams;
+    } H264SwDecInfo;
+
+    /* Version information */
+    typedef struct
+    {
+        u32 major;    /* Decoder API major version */
+        u32 minor;    /* Dncoder API minor version */
+    } H264SwDecApiVersion;
+
+/*------------------------------------------------------------------------------
+    4. Prototypes of Decoder API functions
+------------------------------------------------------------------------------*/
+
+    H264SwDecRet H264SwDecDecode(H264SwDecInst      decInst,
+                                 H264SwDecInput     *pInput,
+                                 H264SwDecOutput    *pOutput);
+
+    H264SwDecRet H264SwDecInit(H264SwDecInst *decInst,
+                               u32            noOutputReordering);
+
+    H264SwDecRet H264SwDecNextPicture(H264SwDecInst     decInst,
+                                      H264SwDecPicture *pOutput,
+                                      u32               endOfStream);
+
+    H264SwDecRet H264SwDecGetInfo(H264SwDecInst decInst,
+                                  H264SwDecInfo *pDecInfo);
+
+    void  H264SwDecRelease(H264SwDecInst decInst);
+
+    H264SwDecApiVersion H264SwDecGetAPIVersion(void);
+
+    /* function prototype for API trace */
+    void H264SwDecTrace(char *);
+
+    /* function prototype for memory allocation */
+    void* H264SwDecMalloc(u32 size);
+
+    /* function prototype for memory free */
+    void H264SwDecFree(void *ptr);
+
+    /* function prototype for memory copy */
+    void H264SwDecMemcpy(void *dest, void *src, u32 count);
+
+    /* function prototype for memset */
+    void H264SwDecMemset(void *ptr, i32 value, u32 count);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* H264SWDECAPI_H */
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/inc/basetype.h b/media/libstagefright/codecs/on2/h264dec/inc/basetype.h
new file mode 100755
index 0000000..63d5653
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/inc/basetype.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef BASETYPE_H_INCLUDED
+#define BASETYPE_H_INCLUDED
+
+
+#ifdef __arm
+#define VOLATILE volatile
+#else
+#define VOLATILE
+#endif
+
+typedef unsigned char   u8;
+typedef signed char     i8;
+typedef unsigned short  u16;
+typedef signed short    i16;
+typedef unsigned int    u32;
+typedef signed int      i32;
+
+#if defined(VC1SWDEC_16BIT) || defined(MP4ENC_ARM11)
+typedef unsigned short  u16x;
+typedef signed short    i16x;
+#else
+typedef unsigned int    u16x;
+typedef signed int      i16x;
+#endif
+
+
+#ifndef NULL
+#ifdef  __cplusplus
+#define NULL 0
+#else
+#define NULL ((void *)0)
+#endif
+#endif
+
+#endif  /* BASETYPE_H_INCLUDED */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_DELIVERY.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_DELIVERY.TXT
new file mode 100644
index 0000000..5ce70ca
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_DELIVERY.TXT
@@ -0,0 +1,63 @@
+The contents of this transaction was created by Hedley Francis
+of ARM on 19-Feb-2008.
+
+It contains the ARM data versions listed below.
+
+This data, unless otherwise stated, is ARM Proprietary and access to it
+is subject to the agreements indicated below.
+
+If you experience problems with this data, please contact ARM support
+quoting transaction reference <97413>.
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+- OX001-SW-98010-r0p0-00bet1
+  Video codecs - optimised code
+  V6 optimized code release for Hantro (Ver 1.0.2)
+  internal access
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+This transaction contains deliverables which are designated as being of
+beta release status (BET).
+
+Beta release status has a particular meaning to ARM of which the recipient
+must be aware. Beta is a pre-release status indicating that the deliverable
+so described is believed to robustly demonstrate specified behaviour, to be
+consistent across its included aspects and be ready for general deployment.
+But Beta also indicates that pre-release reliability trials are ongoing and
+that it is possible residual defects or errors in operation, consistency
+and documentation may still be encountered. The recipient should consider
+this position when using this Beta material supplied. ARM will normally
+attempt to provide fixes or a work-around for defects identified by the
+recipient, but the provision or timeliness of this support cannot be
+guaranteed. ARM shall not be responsible for direct or consequential
+damages as a result of encountering one or more of these residual defects.
+By accepting a Beta release, the recipient agrees to these constraints and
+to providing reasonable information to ARM to enable the replication of the
+defects identified by the recipient. The specific Beta version supplied
+will not be supported after release of a later or higher status version.
+It should be noted that Support for the Beta release of the deliverable
+will only be provided by ARM to a recipient who has a current support and
+maintenance contract for the deliverable.
+
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+In addition to the data versions listed above, this transaction contains
+two additional files at the top level.
+
+The first is this file, ARM_DELIVERY_97413.TXT, which is the delivery
+note.
+
+The second is ARM_MANIFEST_97413.TXT which contains a manifest of all the
+files included in this transaction, together with their checksums.
+
+The checksums provided are calculated using the RSA Data Security, Inc.
+MD5 Message-Digest Algorithm.
+
+The checksums can be used to verify the integrity of this data using the
+"md5sum" tool (which is part of the GNU "textutils" package) by running:
+
+  % md5sum --check ARM_MANIFEST_97413.TXT
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_MANIFEST.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_MANIFEST.TXT
new file mode 100644
index 0000000..9b2238b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_MANIFEST.TXT
@@ -0,0 +1,91 @@
+				  OX001-SW-98010-r0p0-00bet1/
+				  OX001-SW-98010-r0p0-00bet1/api/
+e049791cfab6060a08cbac7b3ad767d6  OX001-SW-98010-r0p0-00bet1/api/armCOMM_s.h
+ed798face25497b2703ede736d6d52b6  OX001-SW-98010-r0p0-00bet1/api/omxtypes_s.h
+4eebd63af087376811d6749f0646b864  OX001-SW-98010-r0p0-00bet1/api/armCOMM_BitDec_s.h
+43cf46c2cf2fe1f93c615b57bcbe4809  OX001-SW-98010-r0p0-00bet1/api/armCOMM.h
+8f248ceaac8f602e277a521b679dcbbe  OX001-SW-98010-r0p0-00bet1/api/armCOMM_IDCTTable.h
+53f2ae8a98495f05e26a4cf862a7f750  OX001-SW-98010-r0p0-00bet1/api/armCOMM_Version.h
+3a2f420ddf6a1b950470bd0f5ebd5c62  OX001-SW-98010-r0p0-00bet1/api/armCOMM_IDCT_s.h
+511c0bb534fe223599e2c84eff24c9ed  OX001-SW-98010-r0p0-00bet1/api/armCOMM_MaskTable.h
+8971932d56eed6b1ad1ba507f0bff5f0  OX001-SW-98010-r0p0-00bet1/api/armCOMM_Bitstream.h
+f87fedd9ca432fefa757008176864ef8  OX001-SW-98010-r0p0-00bet1/api/armOMX.h
+8e49899a428822c36ef9dd94e0e05f18  OX001-SW-98010-r0p0-00bet1/api/omxtypes.h
+694281d11af52f88e6f9d4cb226ac8a7  OX001-SW-98010-r0p0-00bet1/build_vc.pl
+e72d96c0a415459748df9807f3dae72f  OX001-SW-98010-r0p0-00bet1/filelist_vc.txt
+				  OX001-SW-98010-r0p0-00bet1/src/
+5eeae659a29477f5c52296d24afffd3c  OX001-SW-98010-r0p0-00bet1/src/armCOMM_IDCTTable.c
+d64cdcf38f7749dc7f77465e5b7d356d  OX001-SW-98010-r0p0-00bet1/src/armCOMM_MaskTable.c
+				  OX001-SW-98010-r0p0-00bet1/vc/
+				  OX001-SW-98010-r0p0-00bet1/vc/m4p10/
+				  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/
+e7e0c320978564a7c9b2c723749a98d6  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_CAVLCTables.c
+4adcd0df081990bdfc4729041a2a9152  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
+852e0404142965dc1f3aa7f00ee5127b  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
+7054151c5bfea6b5e74feee86b2d7b01  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+38944c5e0bba01e32ff349c2c87c71b2  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DequantTables_s.s
+32ff4b8be62e2f0f3e764b83c1e5e2fd  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
+90b0e6a04e764902c0a0903640c10b32  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
+28a19ae4fe2258628080d6a89bb54b91  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
+98e196b9e1ffebaf91f62ea9d17fb97d  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
+01ba60eff66ea49a4f833ce6279f8e2f  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
+f301d5a95e07354f593ea5747c01cb0a  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
+44c9ef21e840a100301f7d7a4189957c  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
+a33b03bbd3352d24ed744769e12bb87d  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
+00c20bfda67bb86096b615fc17c94b35  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
+2ddcaf60a8ea1e6e6b77737f768bfb9d  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_QuantTables_s.s
+c3002aad5600f872b70a5d7fe3915846  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
+a2900f2c47f1c61d20bd6c1eda33d6d4  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
+e4fecd66bc47f07539bc308935e84a1f  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
+78815c9df50ba53131bb22d2b829e3c3  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
+1909ae312ac79a03a5fac1d1e8bc0291  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
+3d2c48580655928065de7839866d9bc4  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
+23aa2fdf155d4fa6ff745eab6e01f32b  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
+97f20a93c481d7f6173d919f41e415bd  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
+becd512da202436286811b6aec061f47  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
+dd24a99ae3cd842dcacaf31d47de88b3  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
+c2d995f787b6f44ef10c751c12d1935f  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
+3628fbdf0cd217c287b6ccc94135d06e  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
+4a52b3e9e268b8a8f07829bf500d03af  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
+11249f8a98c5d4b84cb5575b0e37ca9c  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
+3599b1074330965c8ca285d164efccff  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
+3339e026c7de655d9400949eb5e51451  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
+cc4a6f32db0b72a91d3f278f6855df69  OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+				  OX001-SW-98010-r0p0-00bet1/vc/m4p10/api/
+6e530ddaa7c2b57ffe88162c020cb662  OX001-SW-98010-r0p0-00bet1/vc/m4p10/api/armVCM4P10_CAVLCTables.h
+				  OX001-SW-98010-r0p0-00bet1/vc/m4p2/
+				  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/
+cdf412920c2037a725d0420002b6752e  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Clip8_s.s
+dba9824e959b21d401cac925e68a11a6  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
+b559b71d5c94627f10e616fb72c0cefc  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
+4fba4c431a783a78a2eb6497a94ac967  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
+1e4c3be8c5eddc00c9f05e83bcf315ef  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
+1b0b2990c2669dfb87cf6b810611c01b  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
+1c9b87abf3283e957816b3937c680701  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
+4fe1afca659a9055fc1172e58f78a506  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
+2ea067f0436f91ba1351edaf411cb4ea  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
+acb92be1dbcdb3ebe824cbe9e28d03bf  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
+a6b41f01b1df7dd656ebdba3084bfa2a  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
+293a48a648a3085456e6665bb7366fad  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
+ffe6b96c74d4881f4d3c8de8cc737797  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
+437dfa204508850d61d4b87091446e9f  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
+ff5915d181bfd2cd2f0bd588bd2300dc  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
+6775eb0c561dbab965c60f85b08c96fd  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
+a0d85f4f517c945a4c9317ac021f2d08  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
+386020dee8b725c7fe2526f1fc211d7d  OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
+				  OX001-SW-98010-r0p0-00bet1/vc/m4p2/api/
+4624e7c838e10a249abcc3d3f4f40748  OX001-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
+65e1057d04e2cb844559dc9f6e09795a  OX001-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
+				  OX001-SW-98010-r0p0-00bet1/vc/src/
+e627b3346b0dc9aff14446005ce0fa43  OX001-SW-98010-r0p0-00bet1/vc/src/armVC_Version.c
+				  OX001-SW-98010-r0p0-00bet1/vc/api/
+7ca94b1c33ac0211e17d38baadd7d1dd  OX001-SW-98010-r0p0-00bet1/vc/api/armVC.h
+12cf7596edbbf6048b626d15e8d0ed48  OX001-SW-98010-r0p0-00bet1/vc/api/omxVC.h
+11726e286a81257cb45f5547fb4d374c  OX001-SW-98010-r0p0-00bet1/vc/api/omxVC_s.h
+a5b2af605c319cd2491319e430741377  OX001-SW-98010-r0p0-00bet1/vc/api/armVCCOMM_s.h
+				  OX001-SW-98010-r0p0-00bet1/vc/comm/
+				  OX001-SW-98010-r0p0-00bet1/vc/comm/src/
+50cca6954c447b012ab39ca7872e5e8f  OX001-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy16x16_s.s
+d1c3bce77fc5774c899b447d13f02cd0  OX001-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy8x8_s.s
+fdac1d1bad3fd23c880beb39bc2e89aa  OX001-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
+6d9adc2be5bd0311591030d0c6df771c  ARM_DELIVERY_97413.TXT
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM.h
new file mode 100644
index 0000000..2ed86a4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM.h
@@ -0,0 +1,785 @@
+/**
+ * 
+ * File Name:  armCOMM.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *   
+ * File: armCOMM.h
+ * Brief: Declares Common APIs/Data Types used across OpenMAX API's
+ *
+ */
+ 
+  
+#ifndef _armCommon_H_
+#define _armCommon_H_
+
+#include "omxtypes.h"
+
+typedef struct
+{
+  OMX_F32 Re; /** Real part */
+  OMX_F32 Im; /** Imaginary part */	
+        
+} OMX_FC32; /** single precision floating point complex number */
+
+typedef struct
+{
+  OMX_F64 Re; /** Real part */
+  OMX_F64 Im; /** Imaginary part */	
+        
+} OMX_FC64; /** double precision floating point complex number */
+
+
+/* Used by both IP and IC domains for 8x8 JPEG blocks. */
+typedef OMX_S16 ARM_BLOCK8x8[64];
+
+
+#include "armOMX.h"
+
+#define  armPI (OMX_F64)(3.1415926535897932384626433832795)
+
+/***********************************************************************/
+
+/* Compiler extensions */
+#ifdef ARM_DEBUG
+/* debug version */
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#define armError(str) {printf((str)); printf("\n"); exit(-1);}
+#define armWarn(str) {printf((str)); printf("\n");}
+#define armIgnore(a) ((void)a)
+#define armAssert(a) assert(a)
+#else 
+/* release version */
+#define armError(str) ((void) (str))
+#define armWarn(str)  ((void) (str))
+#define armIgnore(a)  ((void) (a))
+#define armAssert(a)  ((void) (a))
+#endif /* ARM_DEBUG */
+
+/* Arithmetic operations */
+
+#define armMin(a,b)             ( (a) > (b) ?  (b):(a) )
+#define armMax(a,b)             ( (a) > (b) ?  (a):(b) )
+#define armAbs(a)               ( (a) <  0  ? -(a):(a) )
+
+/* Alignment operation */
+
+#define armAlignToBytes(Ptr,N)      (Ptr + ( ((N-(int)Ptr)&(N-1)) / sizeof(*Ptr) ))
+#define armAlignTo2Bytes(Ptr)       armAlignToBytes(Ptr,2)
+#define armAlignTo4Bytes(Ptr)       armAlignToBytes(Ptr,4)
+#define armAlignTo8Bytes(Ptr)       armAlignToBytes(Ptr,8)
+#define armAlignTo16Bytes(Ptr)      armAlignToBytes(Ptr,16)
+
+/* Error and Alignment check */
+
+#define armRetArgErrIf(condition, code)  if(condition) { return (code); }
+#define armRetDataErrIf(condition, code) if(condition) { return (code); }
+
+#ifndef ALIGNMENT_DOESNT_MATTER
+#define armIsByteAligned(Ptr,N)     ((((int)(Ptr)) % N)==0)
+#define armNotByteAligned(Ptr,N)    ((((int)(Ptr)) % N)!=0)
+#else
+#define armIsByteAligned(Ptr,N)     (1)
+#define armNotByteAligned(Ptr,N)    (0)
+#endif
+
+#define armIs2ByteAligned(Ptr)      armIsByteAligned(Ptr,2)
+#define armIs4ByteAligned(Ptr)      armIsByteAligned(Ptr,4)
+#define armIs8ByteAligned(Ptr)      armIsByteAligned(Ptr,8)
+#define armIs16ByteAligned(Ptr)     armIsByteAligned(Ptr,16)
+
+#define armNot2ByteAligned(Ptr)     armNotByteAligned(Ptr,2)
+#define armNot4ByteAligned(Ptr)     armNotByteAligned(Ptr,4)
+#define armNot8ByteAligned(Ptr)     armNotByteAligned(Ptr,8)
+#define armNot16ByteAligned(Ptr)    armNotByteAligned(Ptr,16)
+#define armNot32ByteAligned(Ptr)    armNotByteAligned(Ptr,32)
+
+/**
+ * Function: armRoundFloatToS16_ref/armRoundFloatToS32_ref/armRoundFloatToS64
+ *
+ * Description:
+ * Converts a double precision value into a short int/int after rounding
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16/OMX_S32 format
+ *
+ */
+
+OMX_S16 armRoundFloatToS16 (OMX_F64 Value);
+OMX_S32 armRoundFloatToS32 (OMX_F64 Value);
+OMX_S64 armRoundFloatToS64 (OMX_F64 Value);
+
+/**
+ * Function: armSatRoundFloatToS16_ref/armSatRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a short int/int after rounding and saturation
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16/OMX_S32 format
+ *
+ */
+
+OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value);
+OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value);
+
+/**
+ * Function: armSatRoundFloatToU16_ref/armSatRoundFloatToU32
+ *
+ * Description:
+ * Converts a double precision value into a unsigned short int/int after rounding and saturation
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U16/OMX_U32 format
+ *
+ */
+
+OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value);
+OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value);
+
+/**
+ * Function: armSignCheck
+ *
+ * Description:
+ * Checks the sign of a variable:
+ * returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	    var     Variable to be checked
+ *
+ * Return Value:
+ * OMX_INT --   returns 1 if it is Positive
+ *              returns 0 if it is 0
+ *              returns -1 if it is Negative 
+ */ 
+ 
+OMX_INT armSignCheck (OMX_S16 var);
+
+/**
+ * Function: armClip
+ *
+ * Description: Clips the input between MAX and MIN value
+ * 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min     lower bound
+ * [in] Max     upper bound
+ * [in] src     variable to the clipped
+ *
+ * Return Value:
+ * OMX_S32 --   returns clipped value
+ */ 
+ 
+OMX_S32 armClip (
+        OMX_INT min,
+        OMX_INT max, 
+        OMX_S32 src
+        );
+
+/**
+ * Function: armClip_F32
+ *
+ * Description: Clips the input between MAX and MIN value
+ * 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min     lower bound
+ * [in] Max     upper bound
+ * [in] src     variable to the clipped
+ *
+ * Return Value:
+ * OMX_F32 --   returns clipped value
+ */ 
+ 
+OMX_F32 armClip_F32 (
+        OMX_F32 min,
+        OMX_F32 max, 
+        OMX_F32 src
+        );
+
+/**
+ * Function: armShiftSat_F32
+ *
+ * Description: Divides a float value by 2^shift and 
+ * saturates it for unsigned value range for satBits.
+ * Second parameter is like "shifting" the corresponding 
+ * integer value. Takes care of rounding while clipping the final 
+ * value.
+ *
+ * Parameters:
+ * [in] v          Number to be operated upon
+ * [in] shift      Divides the input "v" by "2^shift"
+ * [in] satBits    Final range is [0, 2^satBits)
+ *
+ * Return Value:
+ * OMX_S32 --   returns "shifted" saturated value
+ */ 
+ 
+OMX_U32 armShiftSat_F32(
+        OMX_F32 v, 
+        OMX_INT shift, 
+        OMX_INT satBits
+        );
+
+/**
+ * Functions: armSwapElem
+ *
+ * Description:
+ * This function swaps two elements at the specified pointer locations.
+ * The size of each element could be anything as specified by <elemSize>
+ *
+ * Return Value:
+ * OMXResult -- Error status from the function
+ */
+OMXResult armSwapElem(OMX_U8 *pBuf1, OMX_U8 *pBuf2, OMX_INT elemSize);
+
+
+/**
+ * Function: armMedianOf3
+ *
+ * Description: Finds the median of three numbers
+ * 
+ * Remarks:
+ *
+ * Parameters:
+ * [in] fEntry     First entry
+ * [in] sEntry     second entry
+ * [in] tEntry     Third entry
+ *
+ * Return Value:
+ * OMX_S32 --   returns the median value
+ */ 
+ 
+OMX_S32 armMedianOf3 (
+    OMX_S32 fEntry,
+    OMX_S32 sEntry, 
+    OMX_S32 tEntry 
+    );
+
+/**
+ * Function: armLogSize
+ *
+ * Description: Finds the size of a positive value and returns the same
+ * 
+ * Remarks:
+ *
+ * Parameters:
+ * [in] value    Positive value
+ *
+ * Return Value:
+ * OMX_U8 --   returns the size of the positive value
+ */ 
+ 
+OMX_U8 armLogSize (
+    OMX_U16 value 
+    );    
+
+/***********************************************************************/
+                /* Saturating Arithmetic operations */
+
+/**
+ * Function :armSatAdd_S32()
+ *
+ * Description :
+ *   Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ *
+ * Return:
+ * [out]             Result of operation
+ * 
+ *    
+ **/
+
+OMX_S32 armSatAdd_S32(
+                OMX_S32 Value1,
+                OMX_S32 Value2
+                );
+
+/**
+ * Function :armSatAdd_S64()
+ *
+ * Description :
+ *   Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ *
+ * Return:
+ * [out]             Result of operation
+ * 
+ *    
+ **/
+
+OMX_S64 armSatAdd_S64(
+                OMX_S64 Value1,
+                OMX_S64 Value2
+                );
+
+/** Function :armSatSub_S32()
+ * 
+ * Description :
+ *     Returns the result of saturated substraction of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ *
+ * Return:
+ * [out]             Result of operation
+ * 
+ **/
+
+OMX_S32 armSatSub_S32(
+                    OMX_S32 Value1,
+                    OMX_S32 Value2
+                    );
+
+/**
+ * Function :armSatMac_S32()
+ *
+ * Description :
+ *     Returns the result of Multiplication of Value1 and Value2 and subesquent saturated
+ *     accumulation with Mac
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ * [in] Mac          Accumulator
+ *
+ * Return:
+ * [out]             Result of operation
+ **/
+
+OMX_S32 armSatMac_S32(
+                    OMX_S32 Mac,
+                    OMX_S16 Value1,
+                    OMX_S16 Value2
+                    );
+
+/**
+ * Function :armSatMac_S16S32_S32
+ *
+ * Description :
+ *   Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac
+ *
+ *   mac = mac + Saturate_in_32Bits(delayElem * filTap)
+ *
+ * Parametrs:
+ * [in] delayElem    First 32 bit Operand
+ * [in] filTap       Second 16 bit Operand
+ * [in] mac          Result of MAC operation
+ *
+ * Return:
+ * [out]  mac        Result of operation
+ *    
+ **/
+ 
+OMX_S32 armSatMac_S16S32_S32(
+                        OMX_S32 mac, 
+                        OMX_S32 delayElem, 
+                        OMX_S16 filTap );
+
+/**
+ * Function :armSatRoundRightShift_S32_S16
+ *
+ * Description :
+ *   Returns the result of rounded right shift operation of input by the scalefactor
+ *
+ *   output = Saturate_in_16Bits( ( RightShift( (Round(input) , scaleFactor ) )
+ *
+ * Parametrs:
+ * [in] input       The input to be operated on
+ * [in] scaleFactor The shift number
+ *
+ * Return:
+ * [out]            Result of operation
+ *    
+ **/
+
+
+OMX_S16 armSatRoundRightShift_S32_S16(
+                        OMX_S32 input, 
+                        OMX_INT scaleFactor);
+
+/**
+ * Function :armSatRoundLeftShift_S32()
+ *
+ * Description :
+ *     Returns the result of saturating left-shift operation on input
+ *     Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value        Operand
+ * [in] shift        Operand for shift operation
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+ 
+OMX_S32 armSatRoundLeftShift_S32(
+                        OMX_S32 Value,
+                        OMX_INT shift
+                        );
+
+/**
+ * Function :armSatRoundLeftShift_S64()
+ *
+ * Description :
+ *     Returns the result of saturating left-shift operation on input
+ *     Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value        Operand
+ * [in] shift        Operand for shift operation
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+ 
+OMX_S64 armSatRoundLeftShift_S64(
+                        OMX_S64 Value,
+                        OMX_INT shift
+                        );
+
+/**
+ * Function :armSatMulS16S32_S32()
+ *
+ * Description :
+ *     Returns the result of a S16 data type multiplied with an S32 data type
+ *     in a S32 container
+ *
+ * Parametrs:
+ * [in] input1       Operand 1
+ * [in] input2       Operand 2
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+
+
+OMX_S32 armSatMulS16S32_S32(
+                    OMX_S16 input1,
+                    OMX_S32 input2);
+
+/**
+ * Function :armSatMulS32S32_S32()
+ *
+ * Description :
+ *     Returns the result of a S32 data type multiplied with an S32 data type
+ *     in a S32 container
+ *
+ * Parametrs:
+ * [in] input1       Operand 1
+ * [in] input2       Operand 2
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+
+OMX_S32 armSatMulS32S32_S32(
+                    OMX_S32 input1,
+                    OMX_S32 input2);
+
+
+/**
+ * Function :armIntDivAwayFromZero()
+ *
+ * Description : Integer division with rounding to the nearest integer. 
+ *               Half-integer values are rounded away from zero
+ *               unless otherwise specified. For example 3//2 is rounded 
+ *               to 2, and -3//2 is rounded to -2.
+ *
+ * Parametrs:
+ * [in] Num        Operand 1
+ * [in] Deno       Operand 2
+ *
+ * Return:
+ * [out]             Result of operation input1//input2
+ *    
+ **/
+
+OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno);
+
+
+/***********************************************************************/
+/*
+ * Debugging macros
+ *
+ */
+
+
+/*
+ * Definition of output stream - change to stderr if necessary
+ */
+#define DEBUG_STREAM stdout
+
+/*
+ * Debug printf macros, one for each argument count.
+ * Add more if needed.
+ */
+#ifdef DEBUG_ON
+#include <stdio.h>
+
+#define DEBUG_PRINTF_0(a)                                               fprintf(DEBUG_STREAM, a)
+#define DEBUG_PRINTF_1(a, b)                                            fprintf(DEBUG_STREAM, a, b)
+#define DEBUG_PRINTF_2(a, b, c)                                         fprintf(DEBUG_STREAM, a, b, c)
+#define DEBUG_PRINTF_3(a, b, c, d)                                      fprintf(DEBUG_STREAM, a, b, c, d)
+#define DEBUG_PRINTF_4(a, b, c, d, e)                                   fprintf(DEBUG_STREAM, a, b, c, d, e)
+#define DEBUG_PRINTF_5(a, b, c, d, e, f)                                fprintf(DEBUG_STREAM, a, b, c, d, e, f)
+#define DEBUG_PRINTF_6(a, b, c, d, e, f, g)                             fprintf(DEBUG_STREAM, a, b, c, d, e, f, g)
+#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h)                          fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h)
+#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i)                       fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i)
+#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j)                    fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j)
+#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k)                fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k)
+#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l)             fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l)
+#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m)          fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m)
+#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n)       fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n)
+#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)    fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
+#else /* DEBUG_ON */
+#define DEBUG_PRINTF_0(a)                                  
+#define DEBUG_PRINTF_1(a, b)                               
+#define DEBUG_PRINTF_2(a, b, c)                            
+#define DEBUG_PRINTF_3(a, b, c, d)                         
+#define DEBUG_PRINTF_4(a, b, c, d, e)                      
+#define DEBUG_PRINTF_5(a, b, c, d, e, f)                   
+#define DEBUG_PRINTF_6(a, b, c, d, e, f, g)                
+#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h)             
+#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i)          
+#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j)       
+#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k)    
+#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l)             
+#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m)          
+#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n)      
+#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)   
+#endif /* DEBUG_ON */
+
+
+/*
+ * Domain and sub domain definitions
+ *
+ * In order to turn on debug for an entire domain or sub-domain
+ * at compile time, one of the DEBUG_DOMAIN_* below may be defined,
+ * which will activate debug in all of the defines it contains.
+ */
+
+#ifdef DEBUG_DOMAIN_AC
+#define DEBUG_OMXACAAC_DECODECHANPAIRELT_MPEG4
+#define DEBUG_OMXACAAC_DECODECHANPAIRELT
+#define DEBUG_OMXACAAC_DECODEDATSTRELT
+#define DEBUG_OMXACAAC_DECODEFILLELT
+#define DEBUG_OMXACAAC_DECODEISSTEREO_S32
+#define DEBUG_OMXACAAC_DECODEMSPNS_S32
+#define DEBUG_OMXACAAC_DECODEMSSTEREO_S32_I
+#define DEBUG_OMXACAAC_DECODEPRGCFGELT
+#define DEBUG_OMXACAAC_DECODETNS_S32_I
+#define DEBUG_OMXACAAC_DEINTERLEAVESPECTRUM_S32
+#define DEBUG_OMXACAAC_ENCODETNS_S32_I
+#define DEBUG_OMXACAAC_LONGTERMPREDICT_S32
+#define DEBUG_OMXACAAC_LONGTERMRECONSTRUCT_S32
+#define DEBUG_OMXACAAC_MDCTFWD_S32
+#define DEBUG_OMXACAAC_MDCTINV_S32_S16
+#define DEBUG_OMXACAAC_NOISELESSDECODE
+#define DEBUG_OMXACAAC_QUANTINV_S32_I
+#define DEBUG_OMXACAAC_UNPACKADIFHEADER
+#define DEBUG_OMXACAAC_UNPACKADTSFRAMEHEADER
+#define DEBUG_OMXACMP3_HUFFMANDECODESFBMBP_S32
+#define DEBUG_OMXACMP3_HUFFMANDECODESFB_S32
+#define DEBUG_OMXACMP3_HUFFMANDECODE_S32
+#define DEBUG_OMXACMP3_MDCTINV_S32
+#define DEBUG_OMXACMP3_REQUANTIZESFB_S32_I
+#define DEBUG_OMXACMP3_REQUANTIZE_S32_I
+#define DEBUG_OMXACMP3_SYNTHPQMF_S32_S16
+#define DEBUG_OMXACMP3_UNPACKFRAMEHEADER
+#define DEBUG_OMXACMP3_UNPACKSCALEFACTORS_S8
+#define DEBUG_OMXACMP3_UNPACKSIDEINFO
+#endif /* DEBUG_DOMAIN_AC */
+
+
+#ifdef DEBUG_DOMAIN_VC
+#define DEBUG_OMXVCM4P10_AVERAGE_16X
+#define DEBUG_OMXVCM4P10_AVERAGE_4X
+#define DEBUG_OMXVCM4P10_AVERAGE_8X
+#define DEBUG_OMXVCM4P10_DEBLOCKCHROMA_U8_C1IR
+#define DEBUG_OMXVCM4P10_DEBLOCKLUMA_U8_C1IR
+#define DEBUG_OMXVCM4P10_DECODECHROMADCCOEFFSTOPAIRCAVLC_U8
+#define DEBUG_OMXVCM4P10_DECODECOEFFSTOPAIRCAVLC_U8
+#define DEBUG_OMXVCM4P10_DEQUANTTRANSFORMACFROMPAIR_U8_S16_C1_DLX
+#define DEBUG_OMXVCM4P10_EXPANDFRAME
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_HOREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_VEREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_HOREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_VEREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_PREDICTINTRACHROMA8X8_U8_C1R
+#define DEBUG_OMXVCM4P10_PREDICTINTRA_16X16_U8_C1R
+#define DEBUG_OMXVCM4P10_PREDICTINTRA_4X4_U8_C1R
+#define DEBUG_OMXVCM4P10_SADQUAR_16X
+#define DEBUG_OMXVCM4P10_SADQUAR_4X
+#define DEBUG_OMXVCM4P10_SADQUAR_8X
+#define DEBUG_OMXVCM4P10_SAD_16X
+#define DEBUG_OMXVCM4P10_SAD_4X
+#define DEBUG_OMXVCM4P10_SAD_8X
+#define DEBUG_OMXVCM4P10_SATD_4X4
+#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTCHROMADCFROMPAIR_U8_S16_C1
+#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTLUMADCFROMPAIR_U8_S16_C1
+#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_CHROMADC
+#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_LUMADC
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_16X16
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_8X8
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_16X16
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_8X8
+#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_SAD_U8_S16
+#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_U8_S16
+#define DEBUG_OMXVCM4P2_DCT8X8BLKDLX
+#define DEBUG_OMXVCM4P2_DECODEBLOCKCOEF_INTER_S16
+#define DEBUG_OMXVCM4P2_DECODEPADMV_PVOP
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTER_S16
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRAACVLC_S16
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRADCVLC_S16
+#define DEBUG_OMXVCM4P2_ENCODEMV_U8_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTER_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRAACVLC_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRADCVLC_S16
+#define DEBUG_OMXVCM4P2_FINDMVPRED
+#define DEBUG_OMXVCM4P2_IDCT8X8BLKDLX
+#define DEBUG_OMXVCM4P2_LIMITMVTORECT
+#define DEBUG_OMXVCM4P2_MOTIONESTIMATIONMB
+#define DEBUG_OMXVCM4P2_PADMBGRAY_U8
+#define DEBUG_OMXVCM4P2_PADMBHORIZONTAL_U8
+#define DEBUG_OMXVCM4P2_PADMBVERTICAL_U8
+#define DEBUG_OMXVCM4P2_PADMV
+#define DEBUG_OMXVCM4P2_QUANTINTER_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINTRA_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINVINTER_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINVINTRA_S16_I
+#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTER
+#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTRA
+#endif /* DEBUG_DOMAIN_VC */
+
+
+#ifdef DEBUG_DOMAIN_IC
+/* To be filled in */
+#endif /* DEBUG_DOMAIN_IC */
+
+
+#ifdef DEBUG_DOMAIN_SP
+#define DEBUG_OMXACSP_DOTPROD_S16
+#define DEBUG_OMXACSP_BLOCKEXP_S16
+#define DEBUG_OMXACSP_BLOCKEXP_S32
+#define DEBUG_OMXACSP_COPY_S16
+#define DEBUG_OMXACSP_DOTPROD_S16
+#define DEBUG_OMXACSP_DOTPROD_S16_SFS
+#define DEBUG_OMXACSP_FFTFWD_CTOC_SC16_SFS
+#define DEBUG_OMXACSP_FFTFWD_CTOC_SC32_SFS
+#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S16S32_SFS
+#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S32_SFS
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC16
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC32
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S16_S32
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S32
+#define DEBUG_OMXACSP_FFTINIT_C_SC16
+#define DEBUG_OMXACSP_FFTINIT_C_SC32
+#define DEBUG_OMXACSP_FFTINIT_R_S16_S32
+#define DEBUG_OMXACSP_FFTINIT_R_S32
+#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32S16_SFS
+#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32_SFS
+#define DEBUG_OMXACSP_FFTINV_CTOC_SC16_SFS
+#define DEBUG_OMXACSP_FFTINV_CTOC_SC32_SFS
+#define DEBUG_OMXACSP_FILTERMEDIAN_S32_I
+#define DEBUG_OMXACSP_FILTERMEDIAN_S32
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_ISFS
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_I
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_SFS
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_ISFS
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_I
+#define DEBUG_OMXACSP_FIR_DIRECT_S16
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_SFS
+#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16_I
+#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16
+#define DEBUG_OMXACSP_IIRONE_DIRECT_S16_I
+#define DEBUG_OMXACSP_IIRONE_DIRECT_S16
+#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16_I
+#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16
+#define DEBUG_OMXACSP_IIR_DIRECT_S16_I
+#define DEBUG_OMXACSP_IIR_DIRECT_S16
+#endif /* DEBUG_DOMAIN_SP */
+
+
+#ifdef DEBUG_DOMAIN_IP
+#define DEBUG_OMXIPBM_ADDC_U8_C1R_SFS
+#define DEBUG_OMXIPBM_COPY_U8_C1R
+#define DEBUG_OMXIPBM_COPY_U8_C3R
+#define DEBUG_OMXIPBM_MIRROR_U8_C1R
+#define DEBUG_OMXIPBM_MULC_U8_C1R_SFS
+#define DEBUG_OMXIPCS_COLORTWISTQ14_U8_C3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR420LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR422LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR444LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR420LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR422LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR444LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_YCBCR420RSZROT_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR420TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR420TORGB565_U8_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR420TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422RSZCSCROTRGB_U8_C2R
+#define DEBUG_OMXIPCS_YCBCR422RSZROT_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB565_U8_U16_C2C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB_U8_C2C3R
+#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_C2P3R
+#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR444TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR444TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB_U8_C3R
+#define DEBUG_OMXIPPP_GETCENTRALMOMENT_S64
+#define DEBUG_OMXIPPP_GETSPATIALMOMENT_S64
+#define DEBUG_OMXIPPP_MOMENTGETSTATESIZE_S64
+#define DEBUG_OMXIPPP_MOMENTINIT_S64
+#define DEBUG_OMXIPPP_MOMENTS64S_U8_C1R
+#define DEBUG_OMXIPPP_MOMENTS64S_U8_C3R
+#endif /* DEBUG_DOMAIN_IP */
+
+
+#endif /* _armCommon_H_ */
+
+/*End of File*/
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_BitDec_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_BitDec_s.h
new file mode 100644
index 0000000..abb98fc
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_BitDec_s.h
@@ -0,0 +1,670 @@
+;//
+;// 
+;// File Name:  armCOMM_BitDec_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;// 
+;// OpenMAX optimized bitstream decode module
+;//
+;// You must include armCOMM_s.h before including this file
+;//
+;// This module provides macros to perform assembly optimized fixed and
+;// variable length decoding from a read-only bitstream. The variable
+;// length decode modules take as input a pointer to a table of 16-bit
+;// entries of the following format.
+;//
+;// VLD Table Entry format
+;//
+;//        15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
+;//       +------------------------------------------------+
+;//       |  Len   |               Symbol              | 1 |
+;//       +------------------------------------------------+
+;//       |                Offset                      | 0 |
+;//       +------------------------------------------------+
+;//
+;// If the table entry is a leaf entry then bit 0 set:
+;//    Len    = Number of bits overread (0 to 7)
+;//    Symbol = Symbol payload (unsigned 12 bits)
+;//
+;// If the table entry is an internal node then bit 0 is clear:
+;//    Offset = Number of (16-bit) half words from the table
+;//             start to the next table node
+;//
+;// The table is accessed by successive lookup up on the
+;// next Step bits of the input bitstream until a leaf node
+;// is obtained. The Step sizes are supplied to the VLD macro.
+;//
+;// USAGE:
+;//
+;// To use any of the macros in this package, first call:
+;//
+;//    M_BD_INIT ppBitStream, pBitOffset, pBitStream, RBitBuffer, RBitCount, Tmp
+;//
+;// This caches the current bitstream position and next available
+;// bits in registers pBitStream, RBitBuffer, RBitCount. These registers
+;// are reserved for use by the bitstream decode package until you
+;// call M_BD_FINI.
+;//
+;// Next call the following macro(s) as many times as you need:
+;//
+;//    M_BD_LOOK8       - Look ahead constant 1<=N<=8  bits into the bitstream
+;//    M_BD_LOOK16      - Look ahead constant 1<=N<=16 bits into the bitstream
+;//    M_BD_READ8       - Read constant 1<=N<=8  bits from the bitstream
+;//    M_BD_READ16      - Read constant 1<=N<=16 bits from the bitstream
+;//    M_BD_VREAD8      - Read variable 1<=N<=8  bits from the bitstream
+;//    M_BD_VREAD16     - Read variable 1<=N<=16 bits from the bitstream
+;//    M_BD_VLD         - Perform variable length decode using lookup table
+;//
+;// Finally call the macro:
+;//
+;//    M_BD_FINI ppBitStream, pBitOffset
+;//
+;// This writes the bitstream state back to memory.
+;//
+;// The three bitstream cache register names are assigned to the following global
+;// variables:
+;//
+
+        GBLS    pBitStream  ;// Register name for pBitStream
+        GBLS    BitBuffer   ;// Register name for BitBuffer
+        GBLS    BitCount    ;// Register name for BitCount
+   
+;//        
+;// These register variables must have a certain defined state on entry to every bitstream
+;// macro (except M_BD_INIT) and on exit from every bitstream macro (except M_BD_FINI).
+;// The state may depend on implementation.
+;//
+;// For the default (ARM11) implementation the following hold:
+;//    pBitStream - points to the first byte not held in the BitBuffer
+;//    BitBuffer  - is a cache of (4 bytes) 32 bits, bit 31 the first bit
+;//    BitCount   - is offset (from the top bit) to the next unused bitstream bit
+;//    0<=BitCount<=15 (so BitBuffer holds at least 17 unused bits)
+;//
+;//
+
+        ;// Bitstream Decode initialise
+        ;//
+        ;// Initialises the bitstream decode global registers from
+        ;// bitstream pointers. This macro is split into 3 parts to enable
+        ;// scheduling.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $ppBitStream    - pointer to pointer to the next bitstream byte
+        ;// $pBitOffset     - pointer to the number of bits used in the current byte (0..7)
+        ;// $RBitStream     - register to use for pBitStream (can be $ppBitStream)
+        ;// $RBitBuffer     - register to use for BitBuffer
+        ;// $RBitCount      - register to use for BitCount   (can be $pBitOffset)
+        ;//
+        ;// Output Registers:
+        ;//
+        ;// $T1,$T2,$T3     - registers that must be preserved between calls to
+        ;//                   M_BD_INIT1 and M_BD_INIT2
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_INIT0  $ppBitStream, $pBitOffset, $RBitStream, $RBitBuffer, $RBitCount
+
+pBitStream  SETS "$RBitStream"
+BitBuffer   SETS "$RBitBuffer"
+BitCount    SETS "$RBitCount"        
+        
+        ;// load inputs
+        LDR     $pBitStream, [$ppBitStream]
+        LDR     $BitCount, [$pBitOffset]
+        MEND
+        
+        MACRO
+        M_BD_INIT1  $T1, $T2, $T3
+        LDRB    $T2, [$pBitStream, #2]
+        LDRB    $T1, [$pBitStream, #1]
+        LDRB    $BitBuffer,  [$pBitStream], #3
+        ADD     $BitCount, $BitCount, #8
+        MEND
+        
+        MACRO
+        M_BD_INIT2  $T1, $T2, $T3
+        ORR     $T2, $T2, $T1, LSL #8
+        ORR     $BitBuffer, $T2, $BitBuffer, LSL #16
+        MEND    
+        
+        ;//
+        ;// Look ahead fixed 1<=N<=8 bits without consuming any bits
+        ;// The next bits will be placed at bit 31..24 of destination register
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $N              - number of bits to look
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $Symbol         - the next N bits of the bitstream
+        ;// $T1             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_LOOK8  $Symbol, $N
+        ASSERT  ($N>=1):LAND:($N<=8)
+        MOV     $Symbol, $BitBuffer, LSL $BitCount
+        MEND
+        
+        ;//
+        ;// Look ahead fixed 1<=N<=16 bits without consuming any bits
+        ;// The next bits will be placed at bit 31..16 of destination register
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $N              - number of bits to look
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $Symbol         - the next N bits of the bitstream
+        ;// $T1             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_LOOK16  $Symbol, $N, $T1
+        ASSERT  ($N >= 1):LAND:($N <= 16)
+        MOV     $Symbol, $BitBuffer, LSL $BitCount
+        MEND
+        
+        ;//
+        ;// Skips fixed 1<=N<=8 bits from the bitstream, advancing the bitstream pointer
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $N              - number of bits
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $T1             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_SKIP8 $N, $T1
+        ASSERT  ($N>=1):LAND:($N<=8)        
+        SUBS    $BitCount, $BitCount, #(8-$N)
+        LDRCSB  $T1, [$pBitStream], #1   
+        ADDCC   $BitCount, $BitCount, #8
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        MEND
+        
+        
+        ;//
+        ;// Read fixed 1<=N<=8 bits from the bitstream, advancing the bitstream pointer
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $N              - number of bits to read
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $Symbol         - the next N bits of the bitstream
+        ;// $T1             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_READ8 $Symbol, $N, $T1
+        ASSERT  ($N>=1):LAND:($N<=8)                
+        MOVS    $Symbol, $BitBuffer, LSL $BitCount        
+        SUBS    $BitCount, $BitCount, #(8-$N)
+        LDRCSB  $T1, [$pBitStream], #1   
+        ADDCC   $BitCount, $BitCount, #8
+        MOV     $Symbol, $Symbol, LSR #(32-$N)
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        MEND
+
+        ;//
+        ;// Read fixed 1<=N<=16 bits from the bitstream, advancing the bitstream pointer
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $N              - number of bits to read
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $Symbol         - the next N bits of the bitstream
+        ;// $T1             - corrupted temp/scratch register
+        ;// $T2             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_READ16 $Symbol, $N, $T1, $T2
+        ASSERT  ($N>=1):LAND:($N<=16)
+        ASSERT  $Symbol<>$T1
+        IF ($N<=8)
+            M_BD_READ8  $Symbol, $N, $T1
+        ELSE        
+            ;// N>8 so we will be able to refill at least one byte            
+            LDRB    $T1, [$pBitStream], #1            
+            MOVS    $Symbol, $BitBuffer, LSL $BitCount
+            ORR     $BitBuffer, $T1, $BitBuffer, LSL #8                       
+            SUBS    $BitCount, $BitCount, #(16-$N)
+            LDRCSB  $T1, [$pBitStream], #1            
+            MOV     $Symbol, $Symbol, LSR #(32-$N)
+            ADDCC   $BitCount, $BitCount, #8
+            ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        ENDIF
+        MEND
+        
+        ;//
+        ;// Skip variable 1<=N<=8 bits from the bitstream, advancing the bitstream pointer.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $N              - number of bits. 1<=N<=8
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $T1             - corrupted temp/scratch register
+        ;// $T2             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_VSKIP8 $N, $T1
+        ADD     $BitCount, $BitCount, $N
+        SUBS    $BitCount, $BitCount, #8
+        LDRCSB  $T1, [$pBitStream], #1        
+        ADDCC   $BitCount, $BitCount, #8
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        MEND        
+        
+        ;//
+        ;// Skip variable 1<=N<=16 bits from the bitstream, advancing the bitstream pointer.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $N              - number of bits. 1<=N<=16
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $T1             - corrupted temp/scratch register
+        ;// $T2             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_VSKIP16 $N, $T1, $T2
+        ADD     $BitCount, $BitCount, $N
+        SUBS    $BitCount, $BitCount, #8
+        LDRCSB  $T1, [$pBitStream], #1        
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        SUBCSS  $BitCount, $BitCount, #8        
+        LDRCSB  $T1, [$pBitStream], #1
+        ADDCC   $BitCount, $BitCount, #8
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        MEND        
+
+        ;//
+        ;// Read variable 1<=N<=8 bits from the bitstream, advancing the bitstream pointer.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $N              - number of bits to read. 1<=N<=8
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $Symbol         - the next N bits of the bitstream
+        ;// $T1             - corrupted temp/scratch register
+        ;// $T2             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_VREAD8 $Symbol, $N, $T1, $T2
+        MOV     $Symbol, $BitBuffer, LSL $BitCount        
+        ADD     $BitCount, $BitCount, $N
+        SUBS    $BitCount, $BitCount, #8
+        LDRCSB  $T1, [$pBitStream], #1        
+        RSB     $T2, $N, #32        
+        ADDCC   $BitCount, $BitCount, #8
+        MOV     $Symbol, $Symbol, LSR $T2
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        MEND
+
+
+        ;//
+        ;// Read variable 1<=N<=16 bits from the bitstream, advancing the bitstream pointer.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $N              - number of bits to read. 1<=N<=16
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $Symbol         - the next N bits of the bitstream
+        ;// $T1             - corrupted temp/scratch register
+        ;// $T2             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_VREAD16 $Symbol, $N, $T1, $T2
+        MOV     $Symbol, $BitBuffer, LSL $BitCount        
+        ADD     $BitCount, $BitCount, $N
+        SUBS    $BitCount, $BitCount, #8
+        LDRCSB  $T1, [$pBitStream], #1        
+        RSB     $T2, $N, #32        
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        SUBCSS  $BitCount, $BitCount, #8        
+        LDRCSB  $T1, [$pBitStream], #1
+        ADDCC   $BitCount, $BitCount, #8
+        MOV     $Symbol, $Symbol, LSR $T2
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        MEND
+
+
+        ;//
+        ;// Decode a code of the form 0000...001 where there
+        ;// are N zeros before the 1 and N<=15 (code length<=16)
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $Symbol         - the number of zeros before the next 1
+        ;//                   >=16 is an illegal code
+        ;// $T1             - corrupted temp/scratch register
+        ;// $T2             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//        
+        MACRO
+        M_BD_CLZ16 $Symbol, $T1, $T2
+        MOVS    $Symbol, $BitBuffer, LSL $BitCount
+        CLZ     $Symbol, $Symbol                
+        ADD     $BitCount, $BitCount, $Symbol
+        SUBS    $BitCount, $BitCount, #7        ;// length is Symbol+1
+        LDRCSB  $T1, [$pBitStream], #1
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        SUBCSS  $BitCount, $BitCount, #8        
+        LDRCSB  $T1, [$pBitStream], #1
+        ADDCC   $BitCount, $BitCount, #8
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        MEND  
+
+        ;//
+        ;// Decode a code of the form 1111...110 where there
+        ;// are N ones before the 0 and N<=15 (code length<=16)
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $Symbol         - the number of zeros before the next 1
+        ;//                   >=16 is an illegal code
+        ;// $T1             - corrupted temp/scratch register
+        ;// $T2             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//        
+        MACRO
+        M_BD_CLO16 $Symbol, $T1, $T2
+        MOV     $Symbol, $BitBuffer, LSL $BitCount
+        MVN     $Symbol, $Symbol
+        CLZ     $Symbol, $Symbol                
+        ADD     $BitCount, $BitCount, $Symbol
+        SUBS    $BitCount, $BitCount, #7        ;// length is Symbol+1
+        LDRCSB  $T1, [$pBitStream], #1
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        SUBCSS  $BitCount, $BitCount, #8        
+        LDRCSB  $T1, [$pBitStream], #1
+        ADDCC   $BitCount, $BitCount, #8
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        MEND  
+
+
+        ;//
+        ;// Variable Length Decode module
+        ;//
+        ;// Decodes one VLD Symbol from a bitstream and refill the bitstream
+        ;// buffer.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $pVLDTable      - pointer to VLD decode table of 16-bit entries.
+        ;//                   The format is described above at the start of
+        ;//                   this file.
+        ;// $S0             - The number of bits to look up for the first step
+        ;//                   1<=$S0<=8
+        ;// $S1             - The number of bits to look up for each subsequent
+        ;//                   step 1<=$S1<=$S0.
+        ;//
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $Symbol         - decoded VLD symbol value
+        ;// $T1             - corrupted temp/scratch register
+        ;// $T2             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_VLD $Symbol, $T1, $T2, $pVLDTable, $S0, $S1
+        ASSERT (1<=$S0):LAND:($S0<=8)
+        ASSERT (1<=$S1):LAND:($S1<=$S0)
+        
+        ;// Note 0<=BitCount<=15 on entry and exit
+        
+        MOVS    $T1, $BitBuffer, LSL $BitCount       ;// left align next bits
+        MOVS    $Symbol, #(2<<$S0)-2                 ;// create mask
+        AND     $Symbol, $Symbol, $T1, LSR #(31-$S0) ;// 2*(next $S0 bits)
+        SUBS    $BitCount, $BitCount, #8             ;// CS if buffer can be filled
+01
+        LDRCSB  $T1, [$pBitStream], #1               ;// load refill byte
+        LDRH    $Symbol, [$pVLDTable, $Symbol]       ;// load table entry
+        ADDCC   $BitCount, $BitCount, #8             ;// refill not possible
+        ADD     $BitCount, $BitCount, #$S0           ;// assume $S0 bits used
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8  ;// merge in refill byte
+        MOVS    $T1, $Symbol, LSR #1                 ;// CS=leaf entry
+        BCS     %FT02
+        
+        MOVS    $T1, $BitBuffer, LSL $BitCount       ;// left align next bit
+        IF (2*$S0-$S1<=8)
+            ;// Can combine refill check and -S0+S1 and keep $BitCount<=15
+            SUBS    $BitCount, $BitCount, #8+($S0-$S1)
+        ELSE
+            ;// Separate refill check and -S0+S1 offset
+            SUBS  $BitCount, $BitCount, #8
+            SUB   $BitCount, $BitCount, #($S0-$S1)
+        ENDIF
+        ADD     $Symbol, $Symbol, $T1, LSR #(31-$S1) ;// add 2*(next $S1 bits) to
+        BIC     $Symbol, $Symbol, #1                 ;//   table offset
+        B       %BT01                                ;// load next table entry
+02
+        ;// BitCount range now depend on the route here
+        ;// if (first step)       S0 <= BitCount <= 7+S0        <=15
+        ;// else if (2*S0-S1<=8)  S0 <= BitCount <= 7+(2*S0-S1) <=15
+        ;// else                  S1 <= BitCount <= 7+S1        <=15
+        
+        SUB     $BitCount, $BitCount, $Symbol, LSR#13
+        BIC     $Symbol, $T1, #0xF000
+        MEND
+        
+
+        ;// Add an offset number of bits
+        ;//
+        ;// Outputs destination byte and bit index values which corresponds to an offset number of bits 
+        ;// from the current location. This is used to compare bitstream positions using. M_BD_CMP.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $Offset         - Offset to be added in bits.
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        ;// Output Registers:
+        ;//
+        ;// $ByteIndex      - Destination pBitStream pointer after adding the Offset. 
+        ;//                   This value will be 4 byte ahead and needs to subtract by 4 to get exact 
+        ;//                   pointer (as in M_BD_FINI). But for using with M_BD_CMP subtract is not needed.
+        ;// $BitIndex       - Destination BitCount after the addition of Offset number of bits
+        ;//
+        MACRO
+        M_BD_ADD  $ByteIndex, $BitIndex, $Offset
+
+        ;// ($ByteIndex,$BitIndex) = Current position + $Offset bits
+        ADD     $Offset, $Offset, $BitCount
+        AND     $BitIndex, $Offset, #7
+        ADD     $ByteIndex, $pBitStream, $Offset, ASR #3        
+        MEND
+
+        ;// Move bitstream pointers to the location given
+        ;//
+        ;// Outputs destination byte and bit index values which corresponds to  
+        ;// the current location given (calculated using M_BD_ADD). 
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// $ByteIndex      - Destination pBitStream pointer after move. 
+        ;//                   This value will be 4 byte ahead and needs to subtract by 4 to get exact 
+        ;//                   pointer (as in M_BD_FINI).
+        ;// $BitIndex       - Destination BitCount after the move
+        ;//
+        ;// Output Registers:
+        ;//
+        ;// $pBitStream     \ 
+        ;//                  } See description above.  
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_MOV  $ByteIndex, $BitIndex
+
+        ;// ($pBitStream, $Offset) = ($ByteIndex,$BitIndex)
+        MOV     $BitCount, $BitIndex
+        MOV     $pBitStream, $ByteIndex
+        MEND
+
+        ;// Bitstream Compare
+        ;//
+        ;// Compares bitstream position with that of a destination position. Destination position 
+        ;// is held in two input registers which are calculated using M_BD_ADD macro
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $ByteIndex      - Destination pBitStream pointer, (4 byte ahead as described in M_BD_ADD)
+        ;// $BitIndex       - Destination BitCount
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        ;// Output Registers:
+        ;//
+        ;// FLAGS           - GE if destination is reached, LT = is destination is ahead
+        ;// $T1             - corrupted temp/scratch register
+        ;//
+        MACRO
+        M_BD_CMP  $ByteIndex, $BitIndex, $T1
+        
+        ;// Return flags set by (current positon)-($ByteIndex,$BitIndex)
+        ;// so GE means that we have reached the indicated position
+
+        ADD         $T1, $pBitStream, $BitCount, LSR #3
+        CMP         $T1, $ByteIndex
+        AND         $T1, $BitCount, #7
+        CMPEQ       $T1, $BitIndex        
+        MEND
+
+        
+        ;// Bitstream Decode finalise
+        ;//
+        ;// Writes back the bitstream state to the bitstream pointers
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        ;// Output Registers:
+        ;//
+        ;// $ppBitStream    - pointer to pointer to the next bitstream byte
+        ;// $pBitOffset     - pointer to the number of bits used in the current byte (0..7)
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } these register are corrupted
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_FINI  $ppBitStream, $pBitOffset
+        
+        ;// Advance pointer by the number of free bits in the buffer
+        ADD     $pBitStream, $pBitStream, $BitCount, LSR#3
+        AND     $BitCount, $BitCount, #7
+        
+        ;// Now move back 32 bits to reach the first usued bit
+        SUB     $pBitStream, $pBitStream, #4
+        
+        ;// Store out bitstream state
+        STR     $BitCount, [$pBitOffset]
+        STR     $pBitStream, [$ppBitStream]
+        MEND
+        
+        END
+        
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Bitstream.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Bitstream.h
new file mode 100644
index 0000000..4f9bc3b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Bitstream.h
@@ -0,0 +1,212 @@
+/**
+ * 
+ * File Name:  armCOMM_Bitstream.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File: armCOMM_Bitstream.h
+ * Brief: Declares common API's/Data types used across the OpenMax Encoders/Decoders.
+ *
+ */
+
+#ifndef _armCodec_H_
+#define _armCodec_H_
+
+#include "omxtypes.h"
+
+typedef struct {
+    OMX_U8   codeLen;
+    OMX_U32	 codeWord;
+} ARM_VLC32;
+
+/* The above should be renamed as "ARM_VLC32" */
+
+/**
+ * Function: armLookAheadBits()
+ *
+ * Description:
+ * Get the next N bits from the bitstream without advancing the bitstream pointer
+ *
+ * Parameters:
+ * [in]     **ppBitStream
+ * [in]     *pOffset
+ * [in]     N=1...32
+ *
+ * Returns  Value
+ */
+
+OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N);
+
+/**
+ * Function: armGetBits()
+ *
+ * Description:
+ * Read N bits from the bitstream
+ *    
+ * Parameters:
+ * [in]     *ppBitStream
+ * [in]     *pOffset
+ * [in]     N=1..32
+ *
+ * [out]    *ppBitStream
+ * [out]    *pOffset
+ * Returns  Value
+ */
+
+OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N);
+
+/**
+ * Function: armByteAlign()
+ *
+ * Description:
+ * Align the pointer *ppBitStream to the next byte boundary
+ *
+ * Parameters:
+ * [in]     *ppBitStream
+ * [in]     *pOffset
+ *
+ * [out]    *ppBitStream
+ * [out]    *pOffset
+ *
+ **/
+ 
+OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset);
+
+/** 
+ * Function: armSkipBits()
+ *
+ * Description:
+ * Skip N bits from the value at *ppBitStream
+ *
+ * Parameters:
+ * [in]     *ppBitStream
+ * [in]     *pOffset
+ * [in]     N
+ *
+ * [out]    *ppBitStream
+ * [out]    *pOffset
+ *
+ **/
+
+OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N);
+
+/***************************************
+ * Variable bit length Decode
+ ***************************************/
+
+/**
+ * Function: armUnPackVLC32()
+ *
+ * Description:
+ * Variable length decode of variable length symbol (max size 32 bits) read from
+ * the bit stream pointed by *ppBitStream at *pOffset by using the table
+ * pointed by pCodeBook
+ * 
+ * Parameters:
+ * [in]     **ppBitStream
+ * [in]     *pOffset
+ * [in]     pCodeBook
+ * 
+ * [out]    **ppBitStream
+ * [out]    *pOffset
+ *
+ * Returns : Code Book Index if successfull. 
+ *         : "ARM_NO_CODEBOOK_INDEX = 0xFFFF" if search fails.
+ **/
+
+#define ARM_NO_CODEBOOK_INDEX (OMX_U16)(0xFFFF)
+
+OMX_U16 armUnPackVLC32(
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pOffset,
+    const ARM_VLC32 *pCodeBook
+);
+
+/***************************************
+ * Fixed bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackBits
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte 
+ *                      in the bit stream.
+ * [in]	pOffset	        pointer to the bit position in the byte 
+ *                      pointed by *ppBitStream. Valid within 0
+ *                      to 7.
+ * [in]	codeWord		Code word that need to be inserted in to the
+ *                          bitstream
+ * [in]	codeLength		Length of the code word valid range 1...32
+ *
+ * [out] ppBitStream	*ppBitStream is updated after the block is encoded,
+ *	                        so that it points to the current byte in the bit
+ *							stream buffer.
+ * [out] pBitOffset		*pBitOffset is updated so that it points to the
+ *							current bit position in the byte pointed by
+ *							*ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+ 
+OMXResult armPackBits (
+    OMX_U8  **ppBitStream, 
+    OMX_INT *pOffset,
+    OMX_U32 codeWord, 
+    OMX_INT codeLength 
+);
+ 
+/***************************************
+ * Variable bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackVLC32
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte 
+ *                      in the bit stream.
+ * [in]	pBitOffset	    pointer to the bit position in the byte 
+ *                      pointed by *ppBitStream. Valid within 0
+ *                      to 7.
+ * [in]	 code     		VLC code word that need to be inserted in to the
+ *                      bitstream
+ *
+ * [out] ppBitStream	*ppBitStream is updated after the block is encoded,
+ *	                    so that it points to the current byte in the bit
+ *						stream buffer.
+ * [out] pBitOffset		*pBitOffset is updated so that it points to the
+ *						current bit position in the byte pointed by
+ *						*ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+ 
+OMXResult armPackVLC32 (
+    OMX_U8 **ppBitStream, 
+    OMX_INT *pBitOffset,
+    ARM_VLC32 code 
+);
+
+#endif      /*_armCodec_H_*/
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCTTable.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCTTable.h
new file mode 100644
index 0000000..d5db32f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCTTable.h
@@ -0,0 +1,40 @@
+/**
+ *
+ * 
+ * File Name:  armCOMM_IDCTTable.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File         : armCOMM_IDCTTable.h
+ * Description  : Contains declarations of tables for IDCT calculation.
+ *
+ */
+  
+#ifndef _armCOMM_IDCTTable_H_
+#define _armCOMM_IDCTTable_H_
+
+#include "omxtypes.h"
+
+     /*  Table of s(u)*A(u)*A(v)/16 at Q15
+      *  s(u)=1.0 0 <= u <= 5
+      *  s(6)=2.0
+      *  s(7)=4.0
+      *  A(0) = 2*sqrt(2)
+      *  A(u) = 4*cos(u*pi/16)  for (u!=0)
+	  */
+extern const OMX_U16 armCOMM_IDCTPreScale [64];
+extern const OMX_U16 armCOMM_IDCTCoef [4];
+
+#endif /* _armCOMM_IDCTTable_H_ */
+
+
+/* End of File */
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCT_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCT_s.h
new file mode 100644
index 0000000..03f7137
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCT_s.h
@@ -0,0 +1,1445 @@
+;//
+;// This confidential and proprietary software may be used only as
+;// authorised by a licensing agreement from ARM Limited
+;//   (C) COPYRIGHT 2004 ARM Limited
+;//       ALL RIGHTS RESERVED
+;// The entire notice above must be reproduced on all authorised
+;// copies and copies may only be made to the extent permitted
+;// by a licensing agreement from ARM Limited.
+;//
+;// IDCT_s.s
+;//
+;// Inverse DCT module
+;//
+;// 
+;// ALGORITHM DESCRIPTION
+;//
+;// The 8x8 2D IDCT is performed by calculating a 1D IDCT for each
+;// column and then a 1D IDCT for each row.
+;//
+;// The 8-point 1D IDCT is defined by
+;//   f(x) = (C(0)*T(0)*c(0,x) + ... + C(7)*T(7)*c(7,x))/2
+;//
+;//   C(u) = 1/sqrt(2) if u=0 or 1 if u!=0
+;//   c(u,x) = cos( (2x+1)*u*pi/16 )
+;//
+;// We compute the 8-point 1D IDCT using the reverse of
+;// the Arai-Agui-Nakajima flow graph which we split into
+;// 5 stages named in reverse order to identify with the
+;// forward DCT. Direct inversion of the forward formulae
+;// in file FDCT_s.s gives:
+;//
+;// IStage 5:   j(u) = T(u)*A(u)  [ A(u)=4*C(u)*c(u,0) ]
+;//             [ A(0) = 2*sqrt(2)
+;//               A(u) = 4*cos(u*pi/16)  for (u!=0) ]
+;//
+;// IStage 4:   i0 = j0             i1 = j4
+;//             i3 = (j2+j6)/2      i2 = (j2-j6)/2
+;//             i7 = (j5+j3)/2      i4 = (j5-j3)/2
+;//             i5 = (j1+j7)/2      i6 = (j1-j7)/2
+;//
+;// IStage 3:   h0 = (i0+i1)/2      h1 = (i0-i1)/2
+;//             h2 = (i2*sqrt2)-i3  h3 = i3
+;//             h4 =  cos(pi/8)*i4 + sin(pi/8)*i6
+;//             h6 = -sin(pi/8)*i4 + cos(pi/8)*i6
+;//             [ The above two lines rotate by -(pi/8) ]
+;//             h5 = (i5-i7)/sqrt2  h7 = (i5+i7)/2 
+;//             
+;// IStage 2:   g0 = (h0+h3)/2      g3 = (h0-h3)/2
+;//             g1 = (h1+h2)/2      g2 = (h1-h2)/2
+;//             g7 = h7             g6 = h6 - h7
+;//             g5 = h5 - g6        g4 = h4 - g5
+;//
+;// IStage 1:   f0 = (g0+g7)/2      f7 = (g0-g7)/2
+;//             f1 = (g1+g6)/2      f6 = (g1-g6)/2
+;//             f2 = (g2+g5)/2      f5 = (g2-g5)/2
+;//             f3 = (g3+g4)/2      f4 = (g3-g4)/2
+;//
+;// Note that most coefficients are halved 3 times during the
+;// above calculation. We can rescale the algorithm dividing
+;// the input by 8 to remove the halvings.
+;//
+;// IStage 5:   j(u) = T(u)*A(u)/8
+;//
+;// IStage 4:   i0 = j0             i1 = j4
+;//             i3 = j2 + j6        i2 = j2 - j6
+;//             i7 = j5 + j3        i4 = j5 - j3
+;//             i5 = j1 + j7        i6 = j1 - j7
+;//
+;// IStage 3:   h0 = i0 + i1        h1 = i0 - i1
+;//             h2 = (i2*sqrt2)-i3  h3 = i3
+;//             h4 = 2*( cos(pi/8)*i4 + sin(pi/8)*i6)
+;//             h6 = 2*(-sin(pi/8)*i4 + cos(pi/8)*i6)
+;//             h5 = (i5-i7)*sqrt2  h7 = i5 + i7 
+;//             
+;// IStage 2:   g0 = h0 + h3        g3 = h0 - h3
+;//             g1 = h1 + h2        g2 = h1 - h2
+;//             g7 = h7             g6 = h6 - h7
+;//             g5 = h5 - g6        g4 = h4 - g5
+;//
+;// IStage 1:   f0 = g0 + g7        f7 = g0 - g7
+;//             f1 = g1 + g6        f6 = g1 - g6
+;//             f2 = g2 + g5        f5 = g2 - g5
+;//             f3 = g3 + g4        f4 = g3 - g4
+;//
+;// Note:
+;// 1. The scaling by A(u)/8 can often be combined with inverse
+;//    quantization. The column and row scalings can be combined.
+;// 2. The flowgraph in the AAN paper has h4,g6 negated compared
+;//    to the above code but is otherwise identical.
+;// 3. The rotation by -pi/8 can be peformed using three multiplies
+;//    Eg  c*i4+s*i6 = (i6-i4)*s + (c+s)*i4
+;//       -s*i4+c*i6 = (i6-i4)*s + (c-s)*i6
+;// 4. If |T(u)|<=1 then from the IDCT definition,
+;//    |f(x)| <= ((1/sqrt2) + |c(1,x)| + .. + |c(7,x)|)/2
+;//            = ((1/sqrt2) + cos(pi/16) + ... + cos(7*pi/16))/2
+;//            = ((1/sqrt2) + (cot(pi/32)-1)/2)/2
+;//            = (1 + cos(pi/16) + cos(2pi/16) + cos(3pi/16))/sqrt(2)
+;//            = (approx)2.64
+;//    So the max gain of the 2D IDCT is ~x7.0 = 3 bits.
+;//    The table below shows input patterns generating the maximum
+;//    value of |f(u)| for input in the range |T(x)|<=1. M=-1, P=+1
+;//    InputPattern      Max |f(x)|
+;//      PPPPPPPP        |f0| =  2.64
+;//      PPPMMMMM        |f1| =  2.64
+;//      PPMMMPPP        |f2| =  2.64
+;//      PPMMPPMM        |f3| =  2.64
+;//      PMMPPMMP        |f4| =  2.64
+;//      PMMPMMPM        |f5| =  2.64
+;//      PMPPMPMP        |f6| =  2.64
+;//      PMPMPMPM        |f7| =  2.64
+;//   Note that this input pattern is the transpose of the
+;//   corresponding max input patter for the FDCT.
+
+;// Arguments
+
+pSrc    RN 0    ;// source data buffer
+Stride  RN 1    ;// destination stride in bytes
+pDest   RN 2    ;// destination data buffer
+pScale  RN 3    ;// pointer to scaling table
+
+
+        ;// DCT Inverse Macro
+        ;// The DCT code should be parametrized according
+        ;// to the following inputs:
+        ;// $outsize = "u8"  :  8-bit unsigned data saturated (0 to +255)
+        ;//            "s9"  : 16-bit signed data saturated to 9-bit (-256 to +255)
+        ;//            "s16" : 16-bit signed data not saturated (max size ~+/-14273)
+        ;// $inscale = "s16" : signed 16-bit aan-scale table, Q15 format, with 4 byte alignment
+        ;//            "s32" : signed 32-bit aan-scale table, Q23 format, with 4 byte alignment
+        ;//
+        ;// Inputs:
+        ;// pSrc   = r0 = Pointer to input data
+        ;//               Range is -256 to +255 (9-bit)
+        ;// Stride = r1 = Stride between input lines
+        ;// pDest  = r2 = Pointer to output data
+        ;// pScale = r3 = Pointer to aan-scale table in the format defined by $inscale
+        
+        
+        
+        MACRO
+        M_IDCT  $outsize, $inscale, $stride
+        LCLA    SHIFT
+        
+        
+        IF ARM1136JS
+        
+;// REGISTER ALLOCATION
+;// This is hard since we have 8 values, 9 free registers and each
+;// butterfly requires a temporary register. We also want to 
+;// maintain register order so we can use LDM/STM. The table below
+;// summarises the register allocation that meets all these criteria.
+;// a=1stcol, b=2ndcol, f,g,h,i are dataflow points described above.
+;//
+;// r1  a01     g0  h0
+;// r4  b01 f0  g1  h1  i0
+;// r5  a23 f1  g2      i1
+;// r6  b23 f2  g3  h2  i2
+;// r7  a45 f3      h3  i3
+;// r8  b45 f4  g4  h4  i4
+;// r9  a67 f5  g5  h5  i5
+;// r10 b67 f6  g6  h6  i6
+;// r11     f7  g7  h7  i7
+;//
+ra01    RN 1
+rb01    RN 4
+ra23    RN 5
+rb23    RN 6
+ra45    RN 7
+rb45    RN 8
+ra67    RN 9
+rb67    RN 10
+rtmp    RN 11
+csPiBy8 RN 12   ;// [ (Sin(pi/8)@Q15), (Cos(pi/8)@Q15) ]
+LoopRR2 RN 14   ;// [ LoopNumber<<13 , (1/Sqrt(2))@Q15 ]
+;// Transpose allocation
+xft     RN ra01
+xf0     RN rb01
+xf1     RN ra23
+xf2     RN rb23
+xf3     RN ra45
+xf4     RN rb45
+xf5     RN ra67
+xf6     RN rb67
+xf7     RN rtmp
+;// IStage 1 allocation
+xg0     RN xft
+xg1     RN xf0
+xg2     RN xf1
+xg3     RN xf2
+xgt     RN xf3
+xg4     RN xf4
+xg5     RN xf5
+xg6     RN xf6
+xg7     RN xf7
+;// IStage 2 allocation
+xh0     RN xg0
+xh1     RN xg1
+xht     RN xg2
+xh2     RN xg3
+xh3     RN xgt
+xh4     RN xg4
+xh5     RN xg5
+xh6     RN xg6
+xh7     RN xg7
+;// IStage 3,4 allocation
+xit     RN xh0
+xi0     RN xh1
+xi1     RN xht
+xi2     RN xh2
+xi3     RN xh3
+xi4     RN xh4
+xi5     RN xh5
+xi6     RN xh6
+xi7     RN xh7
+        
+        M_STR   pDest,  ppDest
+        IF "$stride"="s"
+            M_STR   Stride, pStride
+        ENDIF
+        M_ADR   pDest,  pBlk
+        LDR     csPiBy8, =0x30fc7642
+        LDR     LoopRR2, =0x00005a82
+  
+v6_idct_col$_F
+        ;// Load even values
+        LDR     xi4, [pSrc], #4  ;// j0
+        LDR     xi5, [pSrc, #4*16-4]  ;// j4
+        LDR     xi6, [pSrc, #2*16-4]  ;// j2
+        LDR     xi7, [pSrc, #6*16-4]  ;// j6
+        
+        ;// Scale Even Values
+        IF "$inscale"="s16" ;// 16x16 mul
+SHIFT       SETA    12
+            LDR     xi0, [pScale], #4
+            LDR     xi1, [pScale, #4*16-4]        
+            LDR     xi2, [pScale, #2*16-4]
+            MOV     xit, #1<<(SHIFT-1)
+            SMLABB  xi3, xi0, xi4, xit
+            SMLATT  xi4, xi0, xi4, xit
+            SMLABB  xi0, xi1, xi5, xit
+            SMLATT  xi5, xi1, xi5, xit
+            MOV     xi3, xi3, ASR #SHIFT
+            PKHBT   xi4, xi3, xi4, LSL #(16-SHIFT)
+            LDR     xi3, [pScale, #6*16-4]
+            SMLABB  xi1, xi2, xi6, xit
+            SMLATT  xi6, xi2, xi6, xit
+            MOV     xi0, xi0, ASR #SHIFT
+            PKHBT   xi5, xi0, xi5, LSL #(16-SHIFT)
+            SMLABB  xi2, xi3, xi7, xit
+            SMLATT  xi7, xi3, xi7, xit
+            MOV     xi1, xi1, ASR #SHIFT
+            PKHBT   xi6, xi1, xi6, LSL #(16-SHIFT)
+            MOV     xi2, xi2, ASR #SHIFT
+            PKHBT   xi7, xi2, xi7, LSL #(16-SHIFT)
+        ENDIF
+        IF "$inscale"="s32" ;// 32x16 mul
+SHIFT       SETA    (12+8-16)
+            MOV     xit, #1<<(SHIFT-1)
+            LDR     xi0, [pScale], #8
+            LDR     xi1, [pScale, #0*32+4-8]
+            LDR     xi2, [pScale, #4*32-8]
+            LDR     xi3, [pScale, #4*32+4-8]            
+            SMLAWB  xi0, xi0, xi4, xit
+            SMLAWT  xi1, xi1, xi4, xit
+            SMLAWB  xi2, xi2, xi5, xit
+            SMLAWT  xi3, xi3, xi5, xit            
+            MOV     xi0, xi0, ASR #SHIFT
+            PKHBT   xi4, xi0, xi1, LSL #(16-SHIFT)
+            MOV     xi2, xi2, ASR #SHIFT            
+            PKHBT   xi5, xi2, xi3, LSL #(16-SHIFT)
+            LDR     xi0, [pScale, #2*32-8]
+            LDR     xi1, [pScale, #2*32+4-8]
+            LDR     xi2, [pScale, #6*32-8]
+            LDR     xi3, [pScale, #6*32+4-8]            
+            SMLAWB  xi0, xi0, xi6, xit
+            SMLAWT  xi1, xi1, xi6, xit
+            SMLAWB  xi2, xi2, xi7, xit
+            SMLAWT  xi3, xi3, xi7, xit            
+            MOV     xi0, xi0, ASR #SHIFT
+            PKHBT   xi6, xi0, xi1, LSL #(16-SHIFT)
+            MOV     xi2, xi2, ASR #SHIFT            
+            PKHBT   xi7, xi2, xi3, LSL #(16-SHIFT)
+        ENDIF
+                
+        ;// Load odd values
+        LDR     xi0, [pSrc, #1*16-4]      ;// j1
+        LDR     xi1, [pSrc, #7*16-4]      ;// j7
+        LDR     xi2, [pSrc, #5*16-4]      ;// j5
+        LDR     xi3, [pSrc, #3*16-4]      ;// j3
+        
+        IF  {TRUE}
+            ;// shortcut if odd values 0
+            TEQ     xi0, #0
+            TEQEQ   xi1, #0
+            TEQEQ   xi2, #0
+            TEQEQ   xi3, #0
+            BEQ     v6OddZero$_F
+        ENDIF
+        
+        ;// Store scaled even values
+        STMIA   pDest, {xi4, xi5, xi6, xi7}
+        
+        ;// Scale odd values
+        IF "$inscale"="s16"
+            ;// Perform AAN Scale
+            LDR     xi4, [pScale, #1*16-4]
+            LDR     xi5, [pScale, #7*16-4]        
+            LDR     xi6, [pScale, #5*16-4]
+            SMLABB  xi7, xi0, xi4, xit
+            SMLATT  xi0, xi0, xi4, xit
+            SMLABB  xi4, xi1, xi5, xit
+            SMLATT  xi1, xi1, xi5, xit
+            MOV     xi7, xi7, ASR #SHIFT
+            PKHBT   xi0, xi7, xi0, LSL #(16-SHIFT)
+            LDR     xi7, [pScale, #3*16-4]
+            SMLABB  xi5, xi2, xi6, xit
+            SMLATT  xi2, xi2, xi6, xit
+            MOV     xi4, xi4, ASR #SHIFT
+            PKHBT   xi1, xi4, xi1, LSL #(16-SHIFT)
+            SMLABB  xi6, xi3, xi7, xit
+            SMLATT  xi3, xi3, xi7, xit
+            MOV     xi5, xi5, ASR #SHIFT
+            PKHBT   xi2, xi5, xi2, LSL #(16-SHIFT)
+            MOV     xi6, xi6, ASR #SHIFT
+            PKHBT   xi3, xi6, xi3, LSL #(16-SHIFT)
+        ENDIF
+        IF "$inscale"="s32" ;// 32x16 mul
+            LDR     xi4, [pScale, #1*32-8]
+            LDR     xi5, [pScale, #1*32+4-8]
+            LDR     xi6, [pScale, #7*32-8]
+            LDR     xi7, [pScale, #7*32+4-8]            
+            SMLAWB  xi4, xi4, xi0, xit
+            SMLAWT  xi5, xi5, xi0, xit
+            SMLAWB  xi6, xi6, xi1, xit
+            SMLAWT  xi7, xi7, xi1, xit            
+            MOV     xi4, xi4, ASR #SHIFT
+            PKHBT   xi0, xi4, xi5, LSL #(16-SHIFT)
+            MOV     xi6, xi6, ASR #SHIFT            
+            PKHBT   xi1, xi6, xi7, LSL #(16-SHIFT)
+            LDR     xi4, [pScale, #5*32-8]
+            LDR     xi5, [pScale, #5*32+4-8]
+            LDR     xi6, [pScale, #3*32-8]
+            LDR     xi7, [pScale, #3*32+4-8]            
+            SMLAWB  xi4, xi4, xi2, xit
+            SMLAWT  xi5, xi5, xi2, xit
+            SMLAWB  xi6, xi6, xi3, xit
+            SMLAWT  xi7, xi7, xi3, xit            
+            MOV     xi4, xi4, ASR #SHIFT
+            PKHBT   xi2, xi4, xi5, LSL #(16-SHIFT)
+            MOV     xi6, xi6, ASR #SHIFT            
+            PKHBT   xi3, xi6, xi7, LSL #(16-SHIFT)
+        ENDIF
+        
+        SHADD16 xi5, xi0, xi1           ;// (j1+j7)/2
+        SSUB16  xi6, xi0, xi1           ;// j1-j7
+        SHADD16 xi7, xi2, xi3           ;// (j5+j3)/2
+        SSUB16  xi4, xi2, xi3           ;// j5-j3
+        
+        SSUB16  xi3, xi5, xi7           ;// (i5-i7)/2
+        
+        PKHBT   xi0, xi6, xi4, LSL#16   ;// [i4,i6] row a
+        PKHTB   xi1, xi4, xi6, ASR#16   ;// [i4,i6] row b
+        
+        SMUADX  xi2, xi0, csPiBy8       ;// rowa by [c,s]
+        SMUADX  xi4, xi1, csPiBy8       ;// rowb by [c,s]
+        SMUSD   xi0, xi0, csPiBy8       ;// rowa by [-s,c]   
+        SMUSD   xi6, xi1, csPiBy8       ;// rowb by [-s,c]
+                
+        SMULBB  xi1, xi3, LoopRR2
+        SMULTB  xi3, xi3, LoopRR2
+                
+        PKHTB   xh4, xi4, xi2, ASR#16   ;// h4/4
+        PKHTB   xh6, xi6, xi0, ASR#16   ;// h6/4
+        SHADD16 xh7, xi5, xi7           ;// (i5+i7)/4
+                
+        ;// xi0,xi1,xi2,xi3 now free
+        ;// IStage 4,3, rows 2to3 x1/2
+        
+        MOV     xi3, xi3, LSL #1
+        PKHTB   xh5, xi3, xi1, ASR#15   ;// h5/4
+        LDRD    xi0, [pDest, #8]        ;// j2,j6 scaled
+                
+        ;// IStage 2, rows4to7
+        SSUB16  xg6, xh6, xh7
+        SSUB16  xg5, xh5, xg6        
+        SSUB16  xg4, xh4, xg5
+                
+        SSUB16  xi2, xi0, xi1           ;// (j2-j6)
+        SHADD16 xi3, xi0, xi1           ;// (j2+j6)/2
+        
+        SMULBB  xi0, xi2, LoopRR2
+        SMULTB  xi2, xi2, LoopRR2
+        
+        MOV     xi2, xi2, LSL #1
+        PKHTB   xh2, xi2, xi0, ASR#15   ;// i2*sqrt(2)/4
+        
+        ;// xi0, xi1 now free
+        ;// IStage 4,3 rows 0to1 x 1/2
+        LDRD    xi0, [pDest]            ;// j0, j4 scaled
+        SSUB16  xh2, xh2, xi3
+        ADDS    LoopRR2, LoopRR2, #2<<29    ;// done two rows
+        
+        SHADD16 xh0, xi0, xi1
+        SHSUB16 xh1, xi0, xi1                
+        
+        ;// IStage 2 rows 0to3 x 1/2
+        SHSUB16 xg2, xh1, xh2
+        SHADD16 xg1, xh1, xh2
+        SHSUB16 xg3, xh0, xh3
+        SHADD16 xg0, xh0, xh3
+        
+        ;// IStage 1 all rows
+        SADD16  xf3, xg3, xg4
+        SSUB16  xf4, xg3, xg4
+        SADD16  xf2, xg2, xg5
+        SSUB16  xf5, xg2, xg5
+        SADD16  xf1, xg1, xg6
+        SSUB16  xf6, xg1, xg6
+        SADD16  xf0, xg0, xg7
+        SSUB16  xf7, xg0, xg7
+        
+        ;// Transpose, store and loop
+        PKHBT   ra01, xf0, xf1, LSL #16
+        PKHTB   rb01, xf1, xf0, ASR #16
+        
+        PKHBT   ra23, xf2, xf3, LSL #16
+        PKHTB   rb23, xf3, xf2, ASR #16
+        
+        PKHBT   ra45, xf4, xf5, LSL #16
+        PKHTB   rb45, xf5, xf4, ASR #16
+        
+        PKHBT   ra67, xf6, xf7, LSL #16
+        STMIA   pDest!, {ra01, ra23, ra45, ra67}      
+        PKHTB   rb67, xf7, xf6, ASR #16
+        STMIA   pDest!, {rb01, rb23, rb45, rb67}                              
+        BCC     v6_idct_col$_F
+        
+        SUB     pSrc, pDest, #(64*2)
+        M_LDR   pDest, ppDest
+        IF "$stride"="s"
+            M_LDR   pScale, pStride 
+        ENDIF
+        B       v6_idct_row$_F
+        
+v6OddZero$_F
+        SSUB16  xi2, xi6, xi7           ;// (j2-j6)
+        SHADD16 xi3, xi6, xi7           ;// (j2+j6)/2
+        
+        SMULBB  xi0, xi2, LoopRR2
+        SMULTB  xi2, xi2, LoopRR2
+        
+        MOV     xi2, xi2, LSL #1
+        PKHTB   xh2, xi2, xi0, ASR#15   ;// i2*sqrt(2)/4
+        SSUB16  xh2, xh2, xi3
+        
+        ;// xi0, xi1 now free
+        ;// IStage 4,3 rows 0to1 x 1/2
+        
+        SHADD16 xh0, xi4, xi5
+        SHSUB16 xh1, xi4, xi5                
+        
+        ;// IStage 2 rows 0to3 x 1/2
+        SHSUB16 xg2, xh1, xh2
+        SHADD16 xg1, xh1, xh2
+        SHSUB16 xg3, xh0, xh3
+        SHADD16 xg0, xh0, xh3
+               
+        ;// IStage 1 all rows
+        MOV  xf3, xg3
+        MOV  xf4, xg3
+        MOV  xf2, xg2
+        MOV  xf5, xg2
+        MOV  xf1, xg1
+        MOV  xf6, xg1
+        MOV  xf0, xg0
+        MOV  xf7, xg0
+        
+        ;// Transpose
+        PKHBT   ra01, xf0, xf1, LSL #16
+        PKHTB   rb01, xf1, xf0, ASR #16
+        
+        PKHBT   ra23, xf2, xf3, LSL #16
+        PKHTB   rb23, xf3, xf2, ASR #16
+        
+        PKHBT   ra45, xf4, xf5, LSL #16
+        PKHTB   rb45, xf5, xf4, ASR #16
+        
+        PKHBT   ra67, xf6, xf7, LSL #16
+        PKHTB   rb67, xf7, xf6, ASR #16
+                
+        STMIA   pDest!, {ra01, ra23, ra45, ra67}      
+        ADDS    LoopRR2, LoopRR2, #2<<29    ;// done two rows
+        STMIA   pDest!, {rb01, rb23, rb45, rb67}      
+        
+        BCC     v6_idct_col$_F
+        SUB     pSrc, pDest, #(64*2)
+        M_LDR   pDest, ppDest
+        IF "$stride"="s"
+            M_LDR   pScale, pStride 
+        ENDIF
+               
+        
+v6_idct_row$_F
+        ;// IStage 4,3, rows4to7 x1/4
+        LDR     xit, =0x00010001        ;// rounding constant
+        LDR     xi0, [pSrc, #1*16]      ;// j1
+        LDR     xi1, [pSrc, #7*16]      ;// 4*j7
+        LDR     xi2, [pSrc, #5*16]      ;// j5
+        LDR     xi3, [pSrc, #3*16]      ;// j3
+        
+        SHADD16 xi1, xi1, xit           ;// 2*j7
+        SHADD16 xi1, xi1, xit           ;// j7                
+        
+        SHADD16 xi5, xi0, xi1           ;// (j1+j7)/2
+        SSUB16  xi6, xi0, xi1           ;// j1-j7
+        SHADD16 xi7, xi2, xi3           ;// (j5+j3)/2
+        SSUB16  xi4, xi2, xi3           ;// j5-j3
+        
+        SSUB16  xi3, xi5, xi7           ;// (i5-i7)/2
+        
+        PKHBT   xi0, xi6, xi4, LSL#16   ;// [i4,i6] row a
+        PKHTB   xi1, xi4, xi6, ASR#16   ;// [i4,i6] row b
+        
+        SMUADX  xi2, xi0, csPiBy8       ;// rowa by [c,s]
+        SMUADX  xi4, xi1, csPiBy8       ;// rowb by [c,s]
+        SMUSD   xi0, xi0, csPiBy8       ;// rowa by [-s,c]   
+        SMUSD   xi6, xi1, csPiBy8       ;// rowb by [-s,c]
+                
+        SMULBB  xi1, xi3, LoopRR2
+        SMULTB  xi3, xi3, LoopRR2
+                
+        PKHTB   xh4, xi4, xi2, ASR#16   ;// h4/4
+        PKHTB   xh6, xi6, xi0, ASR#16   ;// h6/4
+        SHADD16 xh7, xi5, xi7           ;// (i5+i7)/4
+        
+        MOV     xi3, xi3, LSL #1
+        PKHTB   xh5, xi3, xi1, ASR#15   ;// h5/4
+               
+        ;// xi0,xi1,xi2,xi3 now free
+        ;// IStage 4,3, rows 2to3 x1/2
+        
+        LDR     xi0, [pSrc, #2*16]      ;// j2
+        LDR     xi1, [pSrc, #6*16]      ;// 2*j6
+        
+        ;// IStage 2, rows4to7
+        SSUB16  xg6, xh6, xh7
+        SSUB16  xg5, xh5, xg6
+        SSUB16  xg4, xh4, xg5
+        
+        SHADD16 xi1, xi1, xit           ;// j6
+        SSUB16  xi2, xi0, xi1           ;// (j2-j6)        
+        SHADD16 xi3, xi0, xi1           ;// (j2+j6)/2
+        
+        SMULBB  xi0, xi2, LoopRR2
+        SMULTB  xi2, xi2, LoopRR2
+        
+        MOV     xi2, xi2, LSL #1
+        
+        PKHTB   xh2, xi2, xi0, ASR#15   ;// i2*sqrt(2)/4
+        
+        ;// xi0, xi1 now free
+        ;// IStage 4,3 rows 0to1 x 1/2
+        LDR     xi1, [pSrc, #4*16]      ;// j4
+        LDR     xi0, [pSrc], #4         ;// j0
+
+        SSUB16  xh2, xh2, xi3
+        ADDS    LoopRR2, LoopRR2, #2<<29    ;// done two rows
+        
+        ADD     xi0, xi0, xit, LSL #2   ;// ensure correct round
+        SHADD16 xh0, xi0, xi1           ;// of DC result
+        SHSUB16 xh1, xi0, xi1
+                
+        ;// IStage 2 rows 0to3 x 1/2
+        SHSUB16 xg2, xh1, xh2
+        SHADD16 xg1, xh1, xh2
+        SHSUB16 xg3, xh0, xh3
+        SHADD16 xg0, xh0, xh3
+        
+        ;// IStage 1 all rows
+        SHADD16 xf3, xg3, xg4
+        SHSUB16 xf4, xg3, xg4
+        SHADD16 xf2, xg2, xg5
+        SHSUB16 xf5, xg2, xg5
+        SHADD16 xf1, xg1, xg6
+        SHSUB16 xf6, xg1, xg6
+        SHADD16 xf0, xg0, xg7
+        SHSUB16 xf7, xg0, xg7
+        
+        ;// Saturate
+        IF ("$outsize"="u8")
+            USAT16  xf0, #8, xf0
+            USAT16  xf1, #8, xf1
+            USAT16  xf2, #8, xf2
+            USAT16  xf3, #8, xf3
+            USAT16  xf4, #8, xf4
+            USAT16  xf5, #8, xf5
+            USAT16  xf6, #8, xf6
+            USAT16  xf7, #8, xf7        
+        ENDIF
+        IF ("$outsize"="s9")
+            SSAT16  xf0, #9, xf0
+            SSAT16  xf1, #9, xf1
+            SSAT16  xf2, #9, xf2
+            SSAT16  xf3, #9, xf3
+            SSAT16  xf4, #9, xf4
+            SSAT16  xf5, #9, xf5
+            SSAT16  xf6, #9, xf6
+            SSAT16  xf7, #9, xf7        
+        ENDIF
+        
+        ;// Transpose to Row, Pack and store
+        IF ("$outsize"="u8")
+            ORR     xf0, xf0, xf1, LSL #8 ;// [ b1 b0 a1 a0 ]
+            ORR     xf2, xf2, xf3, LSL #8 ;// [ b3 b2 a3 a2 ]
+            ORR     xf4, xf4, xf5, LSL #8 ;// [ b5 b4 a5 a4 ]
+            ORR     xf6, xf6, xf7, LSL #8 ;// [ b7 b6 a7 a6 ]
+            PKHBT   ra01, xf0, xf2, LSL #16
+            PKHTB   rb01, xf2, xf0, ASR #16
+            PKHBT   ra23, xf4, xf6, LSL #16
+            PKHTB   rb23, xf6, xf4, ASR #16
+            STMIA   pDest, {ra01, ra23}
+            IF "$stride"="s"
+                ADD     pDest, pDest, pScale
+                STMIA   pDest, {rb01, rb23}
+                ADD     pDest, pDest, pScale
+            ELSE                
+                ADD     pDest, pDest, #($stride)
+                STMIA   pDest, {rb01, rb23}
+                ADD     pDest, pDest, #($stride)
+            ENDIF
+        ENDIF
+        IF ("$outsize"="s9"):LOR:("$outsize"="s16")        
+            PKHBT   ra01, xf0, xf1, LSL #16
+            PKHTB   rb01, xf1, xf0, ASR #16
+        
+            PKHBT   ra23, xf2, xf3, LSL #16
+            PKHTB   rb23, xf3, xf2, ASR #16
+            
+            PKHBT   ra45, xf4, xf5, LSL #16
+            PKHTB   rb45, xf5, xf4, ASR #16
+            
+            PKHBT   ra67, xf6, xf7, LSL #16
+            PKHTB   rb67, xf7, xf6, ASR #16
+            
+            STMIA   pDest, {ra01, ra23, ra45, ra67}      
+            IF "$stride"="s"
+                ADD     pDest, pDest, pScale
+                STMIA   pDest, {rb01, rb23, rb45, rb67}      
+                ADD     pDest, pDest, pScale
+            ELSE                
+                ADD     pDest, pDest, #($stride)
+                STMIA   pDest, {rb01, rb23, rb45, rb67}      
+                ADD     pDest, pDest, #($stride)
+            ENDIF
+        ENDIF
+        
+        BCC     v6_idct_row$_F
+        ENDIF ;// ARM1136JS
+
+
+        IF CortexA8
+        
+Src0            EQU  7              
+Src1            EQU  8              
+Src2            EQU  9              
+Src3            EQU  10              
+Src4            EQU  11              
+Src5            EQU  12              
+Src6            EQU  13
+Src7            EQU  14
+Tmp             EQU  15
+
+qXj0            QN Src0.S16 
+qXj1            QN Src1.S16
+qXj2            QN Src2.S16
+qXj3            QN Src3.S16
+qXj4            QN Src4.S16
+qXj5            QN Src5.S16
+qXj6            QN Src6.S16
+qXj7            QN Src7.S16
+qXjt            QN Tmp.S16
+
+dXj0lo          DN (Src0*2).S16
+dXj0hi          DN (Src0*2+1).S16
+dXj1lo          DN (Src1*2).S16
+dXj1hi          DN (Src1*2+1).S16
+dXj2lo          DN (Src2*2).S16
+dXj2hi          DN (Src2*2+1).S16
+dXj3lo          DN (Src3*2).S16
+dXj3hi          DN (Src3*2+1).S16
+dXj4lo          DN (Src4*2).S16
+dXj4hi          DN (Src4*2+1).S16
+dXj5lo          DN (Src5*2).S16
+dXj5hi          DN (Src5*2+1).S16
+dXj6lo          DN (Src6*2).S16
+dXj6hi          DN (Src6*2+1).S16
+dXj7lo          DN (Src7*2).S16
+dXj7hi          DN (Src7*2+1).S16
+dXjtlo          DN (Tmp*2).S16
+dXjthi          DN (Tmp*2+1).S16
+
+qXi0            QN qXj0
+qXi1            QN qXj4
+qXi2            QN qXj2
+qXi3            QN qXj7
+qXi4            QN qXj5
+qXi5            QN qXjt
+qXi6            QN qXj1
+qXi7            QN qXj6
+qXit            QN qXj3
+
+dXi0lo          DN dXj0lo
+dXi0hi          DN dXj0hi
+dXi1lo          DN dXj4lo
+dXi1hi          DN dXj4hi
+dXi2lo          DN dXj2lo
+dXi2hi          DN dXj2hi
+dXi3lo          DN dXj7lo
+dXi3hi          DN dXj7hi
+dXi4lo          DN dXj5lo
+dXi4hi          DN dXj5hi
+dXi5lo          DN dXjtlo
+dXi5hi          DN dXjthi
+dXi6lo          DN dXj1lo
+dXi6hi          DN dXj1hi
+dXi7lo          DN dXj6lo
+dXi7hi          DN dXj6hi
+dXitlo          DN dXj3lo
+dXithi          DN dXj3hi
+
+qXh0            QN qXit
+qXh1            QN qXi0
+qXh2            QN qXi2
+qXh3            QN qXi3
+qXh4            QN qXi7
+qXh5            QN qXi5
+qXh6            QN qXi4
+qXh7            QN qXi1
+qXht            QN qXi6
+
+dXh0lo          DN dXitlo
+dXh0hi          DN dXithi
+dXh1lo          DN dXi0lo
+dXh1hi          DN dXi0hi
+dXh2lo          DN dXi2lo
+dXh2hi          DN dXi2hi
+dXh3lo          DN dXi3lo
+dXh3hi          DN dXi3hi
+dXh4lo          DN dXi7lo
+dXh4hi          DN dXi7hi
+dXh5lo          DN dXi5lo
+dXh5hi          DN dXi5hi
+dXh6lo          DN dXi4lo
+dXh6hi          DN dXi4hi
+dXh7lo          DN dXi1lo
+dXh7hi          DN dXi1hi
+dXhtlo          DN dXi6lo
+dXhthi          DN dXi6hi
+
+qXg0            QN qXh2
+qXg1            QN qXht
+qXg2            QN qXh1
+qXg3            QN qXh0
+qXg4            QN qXh4
+qXg5            QN qXh5
+qXg6            QN qXh6
+qXg7            QN qXh7
+qXgt            QN qXh3
+
+qXf0            QN qXg6
+qXf1            QN qXg5
+qXf2            QN qXg4
+qXf3            QN qXgt
+qXf4            QN qXg3
+qXf5            QN qXg2
+qXf6            QN qXg1
+qXf7            QN qXg0
+qXft            QN qXg7
+
+
+qXt0            QN 1.S32
+qXt1            QN 2.S32
+qT0lo           QN 1.S32         
+qT0hi           QN 2.S32         
+qT1lo           QN 3.S32         
+qT1hi           QN 4.S32         
+qScalelo        QN 5.S32        ;// used to read post scale values
+qScalehi        QN 6.S32
+qTemp0          QN 5.S32         
+qTemp1          QN 6.S32    
+
+
+Scale1          EQU 6
+Scale2          EQU 15
+qScale1         QN Scale1.S16     
+qScale2         QN Scale2.S16     
+dScale1lo       DN (Scale1*2).S16     
+dScale1hi       DN (Scale1*2+1).S16
+dScale2lo       DN (Scale2*2).S16     
+dScale2hi       DN (Scale2*2+1).S16
+
+dCoefs          DN 0.S16        ;// Scale coefficients in format {[0] [C] [S] [InvSqrt2]}
+InvSqrt2        DN dCoefs[0]    ;// 1/sqrt(2) in Q15
+S               DN dCoefs[1]    ;// Sin(PI/8) in Q15
+C               DN dCoefs[2]    ;// Cos(PI/8) in Q15
+
+pTemp           RN 12
+
+                
+        IMPORT  armCOMM_IDCTCoef
+                    
+        VLD1        {qXj0,qXj1}, [pSrc @64]!
+        VLD1        {qXj2,qXj3}, [pSrc @64]!
+        VLD1        {qXj4,qXj5}, [pSrc @64]!
+        VLD1        {qXj6,qXj7}, [pSrc @64]!
+        
+        ;// Load PreScale and multiply with Src
+        ;// IStage 4
+        
+        IF "$inscale"="s16"                         ;// 16X16 Mul
+            M_IDCT_PRESCALE16
+        ENDIF
+        
+        IF "$inscale"="s32"                         ;// 32X32 ,ul
+            M_IDCT_PRESCALE32
+        ENDIF
+
+        ;// IStage 3
+        VQRDMULH     qXi2, qXi2, InvSqrt2            ;// i2/sqrt(2)
+        VHADD       qXh0, qXi0, qXi1                ;// (i0+i1)/2
+        VHSUB       qXh1, qXi0, qXi1                ;// (i0-i1)/2
+        VHADD       qXh7, qXi5, qXi7                ;// (i5+i7)/4
+        VSUB        qXh5, qXi5, qXi7                ;// (i5-i7)/2
+        VQRDMULH     qXh5, qXh5, InvSqrt2            ;// h5/sqrt(2)
+        VSUB        qXh2, qXi2, qXi3                ;// h2, h3
+
+        VMULL       qXt0, dXi4lo, C                 ;// c*i4
+        VMLAL       qXt0, dXi6lo, S                 ;// c*i4+s*i6
+        VMULL       qXt1, dXi4hi, C
+        VMLAL       qXt1, dXi6hi, S
+        VSHRN       dXh4lo, qXt0, #16               ;// h4
+        VSHRN       dXh4hi, qXt1, #16
+        
+        VMULL       qXt0, dXi6lo, C                 ;// c*i6
+        VMLSL       qXt0, dXi4lo, S                 ;// -s*i4 + c*h6
+        VMULL       qXt1, dXi6hi, C
+        VMLSL       qXt1, dXi4hi, S
+        VSHRN       dXh6lo, qXt0, #16               ;// h6
+        VSHRN       dXh6hi, qXt1, #16
+        
+        ;// IStage 2
+        VSUB        qXg6, qXh6, qXh7
+        VSUB        qXg5, qXh5, qXg6
+        VSUB        qXg4, qXh4, qXg5
+        VHADD       qXg1, qXh1, qXh2        ;// (h1+h2)/2
+        VHSUB       qXg2, qXh1, qXh2        ;// (h1-h2)/2
+        VHADD       qXg0, qXh0, qXh3        ;// (h0+h3)/2
+        VHSUB       qXg3, qXh0, qXh3        ;// (h0-h3)/2
+
+        ;// IStage 1 all rows
+        VADD        qXf3, qXg3, qXg4        
+        VSUB        qXf4, qXg3, qXg4        
+        VADD        qXf2, qXg2, qXg5        
+        VSUB        qXf5, qXg2, qXg5        
+        VADD        qXf1, qXg1, qXg6
+        VSUB        qXf6, qXg1, qXg6        
+        VADD        qXf0, qXg0, qXg7
+        VSUB        qXf7, qXg0, qXg7      
+
+        ;// Transpose, store and loop
+XTR0            EQU Src5
+XTR1            EQU Tmp
+XTR2            EQU Src6
+XTR3            EQU Src7
+XTR4            EQU Src3
+XTR5            EQU Src0
+XTR6            EQU Src1
+XTR7            EQU Src2
+XTRt            EQU Src4
+                
+qA0             QN  XTR0.S32  ;// for XTRpose
+qA1             QN  XTR1.S32
+qA2             QN  XTR2.S32
+qA3             QN  XTR3.S32
+qA4             QN  XTR4.S32
+qA5             QN  XTR5.S32
+qA6             QN  XTR6.S32
+qA7             QN  XTR7.S32
+
+dB0             DN  XTR0*2+1      ;// for using VSWP
+dB1             DN  XTR1*2+1
+dB2             DN  XTR2*2+1
+dB3             DN  XTR3*2+1
+dB4             DN  XTR4*2
+dB5             DN  XTR5*2
+dB6             DN  XTR6*2
+dB7             DN  XTR7*2
+
+          
+        VTRN        qXf0, qXf1
+        VTRN        qXf2, qXf3
+        VTRN        qXf4, qXf5
+        VTRN        qXf6, qXf7
+        VTRN        qA0, qA2
+        VTRN        qA1, qA3
+        VTRN        qA4, qA6
+        VTRN        qA5, qA7        
+        VSWP        dB0, dB4
+        VSWP        dB1, dB5
+        VSWP        dB2, dB6
+        VSWP        dB3, dB7
+        
+
+qYj0            QN qXf0
+qYj1            QN qXf1
+qYj2            QN qXf2
+qYj3            QN qXf3
+qYj4            QN qXf4
+qYj5            QN qXf5
+qYj6            QN qXf6
+qYj7            QN qXf7
+qYjt            QN qXft
+
+dYj0lo          DN (XTR0*2).S16
+dYj0hi          DN (XTR0*2+1).S16
+dYj1lo          DN (XTR1*2).S16
+dYj1hi          DN (XTR1*2+1).S16
+dYj2lo          DN (XTR2*2).S16
+dYj2hi          DN (XTR2*2+1).S16
+dYj3lo          DN (XTR3*2).S16
+dYj3hi          DN (XTR3*2+1).S16
+dYj4lo          DN (XTR4*2).S16
+dYj4hi          DN (XTR4*2+1).S16
+dYj5lo          DN (XTR5*2).S16
+dYj5hi          DN (XTR5*2+1).S16
+dYj6lo          DN (XTR6*2).S16
+dYj6hi          DN (XTR6*2+1).S16
+dYj7lo          DN (XTR7*2).S16
+dYj7hi          DN (XTR7*2+1).S16
+dYjtlo          DN (XTRt*2).S16
+dYjthi          DN (XTRt*2+1).S16
+
+qYi0            QN qYj0
+qYi1            QN qYj4
+qYi2            QN qYj2
+qYi3            QN qYj7
+qYi4            QN qYj5
+qYi5            QN qYjt
+qYi6            QN qYj1
+qYi7            QN qYj6
+qYit            QN qYj3
+
+dYi0lo          DN dYj0lo
+dYi0hi          DN dYj0hi
+dYi1lo          DN dYj4lo
+dYi1hi          DN dYj4hi
+dYi2lo          DN dYj2lo
+dYi2hi          DN dYj2hi
+dYi3lo          DN dYj7lo
+dYi3hi          DN dYj7hi
+dYi4lo          DN dYj5lo
+dYi4hi          DN dYj5hi
+dYi5lo          DN dYjtlo
+dYi5hi          DN dYjthi
+dYi6lo          DN dYj1lo
+dYi6hi          DN dYj1hi
+dYi7lo          DN dYj6lo
+dYi7hi          DN dYj6hi
+dYitlo          DN dYj3lo
+dYithi          DN dYj3hi
+
+qYh0            QN qYit
+qYh1            QN qYi0
+qYh2            QN qYi2
+qYh3            QN qYi3
+qYh4            QN qYi7
+qYh5            QN qYi5
+qYh6            QN qYi4
+qYh7            QN qYi1
+qYht            QN qYi6
+
+dYh0lo          DN dYitlo
+dYh0hi          DN dYithi
+dYh1lo          DN dYi0lo
+dYh1hi          DN dYi0hi
+dYh2lo          DN dYi2lo
+dYh2hi          DN dYi2hi
+dYh3lo          DN dYi3lo
+dYh3hi          DN dYi3hi
+dYh4lo          DN dYi7lo
+dYh4hi          DN dYi7hi
+dYh5lo          DN dYi5lo
+dYh5hi          DN dYi5hi
+dYh6lo          DN dYi4lo
+dYh6hi          DN dYi4hi
+dYh7lo          DN dYi1lo
+dYh7hi          DN dYi1hi
+dYhtlo          DN dYi6lo
+dYhthi          DN dYi6hi
+
+qYg0            QN qYh2
+qYg1            QN qYht
+qYg2            QN qYh1
+qYg3            QN qYh0
+qYg4            QN qYh4
+qYg5            QN qYh5
+qYg6            QN qYh6
+qYg7            QN qYh7
+qYgt            QN qYh3
+
+qYf0            QN qYg6
+qYf1            QN qYg5
+qYf2            QN qYg4
+qYf3            QN qYgt
+qYf4            QN qYg3
+qYf5            QN qYg2
+qYf6            QN qYg1
+qYf7            QN qYg0
+qYft            QN qYg7
+
+        VRSHR       qYj7, qYj7, #2
+        VRSHR       qYj6, qYj6, #1
+        
+        VHADD       qYi5, qYj1, qYj7        ;// i5 = (j1+j7)/2
+        VSUB        qYi6, qYj1, qYj7        ;// i6 = j1-j7
+        VHADD       qYi3, qYj2, qYj6        ;// i3 = (j2+j6)/2
+        VSUB        qYi2, qYj2, qYj6        ;// i2 = j2-j6
+        VHADD       qYi7, qYj5, qYj3        ;// i7 = (j5+j3)/2
+        VSUB        qYi4, qYj5, qYj3        ;// i4 = j5-j3
+
+        VQRDMULH     qYi2, qYi2, InvSqrt2    ;// i2/sqrt(2)
+        ;// IStage 4,3 rows 0to1 x 1/2
+        
+        MOV         pTemp, #0x4             ;// ensure correct round
+        VDUP        qScale1, pTemp           ;// of DC result
+        VADD        qYi0, qYi0, qScale1
+        
+        VHADD       qYh0, qYi0, qYi1        ;// (i0+i1)/2
+        VHSUB       qYh1, qYi0, qYi1        ;// (i0-i1)/2
+
+        VHADD       qYh7, qYi5, qYi7        ;// (i5+i7)/4
+        VSUB        qYh5, qYi5, qYi7        ;// (i5-i7)/2
+        VSUB        qYh2, qYi2, qYi3        ;// h2, h3
+        VQRDMULH     qYh5, qYh5, InvSqrt2    ;// h5/sqrt(2)
+
+        VMULL       qXt0, dYi4lo, C         ;// c*i4
+        VMLAL       qXt0, dYi6lo, S         ;// c*i4+s*i6
+        VMULL       qXt1, dYi4hi, C
+        VMLAL       qXt1, dYi6hi, S
+        VSHRN       dYh4lo, qXt0, #16       ;// h4
+        VSHRN       dYh4hi, qXt1, #16
+        
+        VMULL       qXt0, dYi6lo, C         ;// c*i6
+        VMLSL       qXt0, dYi4lo, S         ;// -s*i4 + c*h6
+        VMULL       qXt1, dYi6hi, C
+        VMLSL       qXt1, dYi4hi, S
+        VSHRN       dYh6lo, qXt0, #16       ;// h6
+        VSHRN       dYh6hi, qXt1, #16
+        
+        VSUB        qYg6, qYh6, qYh7
+        VSUB        qYg5, qYh5, qYg6
+        VSUB        qYg4, qYh4, qYg5
+        
+        ;// IStage 2 rows 0to3 x 1/2
+        VHADD       qYg1, qYh1, qYh2        ;// (h1+h2)/2
+        VHSUB       qYg2, qYh1, qYh2        ;// (h1-h2)/2
+        VHADD       qYg0, qYh0, qYh3        ;// (h0+h3)/2
+        VHSUB       qYg3, qYh0, qYh3        ;// (h0-h3)/2
+        
+
+        ;// IStage 1 all rows
+        VHADD        qYf3, qYg3, qYg4        
+        VHSUB        qYf4, qYg3, qYg4        
+        VHADD        qYf2, qYg2, qYg5        
+        VHSUB        qYf5, qYg2, qYg5        
+        VHADD        qYf1, qYg1, qYg6
+        VHSUB        qYf6, qYg1, qYg6        
+        VHADD        qYf0, qYg0, qYg7
+        VHSUB        qYf7, qYg0, qYg7      
+
+YTR0            EQU Src0
+YTR1            EQU Src4
+YTR2            EQU Src1
+YTR3            EQU Src2
+YTR4            EQU Src7
+YTR5            EQU Src5
+YTR6            EQU Tmp
+YTR7            EQU Src6
+YTRt            EQU Src3
+
+qC0             QN  YTR0.S32                ;// for YTRpose
+qC1             QN  YTR1.S32
+qC2             QN  YTR2.S32
+qC3             QN  YTR3.S32
+qC4             QN  YTR4.S32
+qC5             QN  YTR5.S32
+qC6             QN  YTR6.S32
+qC7             QN  YTR7.S32
+
+dD0             DN  YTR0*2+1                ;// for using VSWP
+dD1             DN  YTR1*2+1
+dD2             DN  YTR2*2+1
+dD3             DN  YTR3*2+1
+dD4             DN  YTR4*2
+dD5             DN  YTR5*2
+dD6             DN  YTR6*2
+dD7             DN  YTR7*2
+          
+        VTRN        qYf0, qYf1
+        VTRN        qYf2, qYf3
+        VTRN        qYf4, qYf5
+        VTRN        qYf6, qYf7
+        VTRN        qC0, qC2
+        VTRN        qC1, qC3
+        VTRN        qC4, qC6
+        VTRN        qC5, qC7        
+        VSWP        dD0, dD4
+        VSWP        dD1, dD5
+        VSWP        dD2, dD6
+        VSWP        dD3, dD7
+
+        
+dYf0U8          DN YTR0*2.U8
+dYf1U8          DN YTR1*2.U8
+dYf2U8          DN YTR2*2.U8
+dYf3U8          DN YTR3*2.U8
+dYf4U8          DN YTR4*2.U8
+dYf5U8          DN YTR5*2.U8
+dYf6U8          DN YTR6*2.U8
+dYf7U8          DN YTR7*2.U8
+        
+        ;//
+        ;// Do saturation if outsize is other than S16
+        ;//
+        
+        IF ("$outsize"="u8")
+            ;// Output range [0-255]
+            VQMOVN            dYf0U8, qYf0
+            VQMOVN            dYf1U8, qYf1
+            VQMOVN            dYf2U8, qYf2
+            VQMOVN            dYf3U8, qYf3
+            VQMOVN            dYf4U8, qYf4
+            VQMOVN            dYf5U8, qYf5
+            VQMOVN            dYf6U8, qYf6
+            VQMOVN            dYf7U8, qYf7
+        ENDIF
+        
+        IF ("$outsize"="s9")
+            ;// Output range [-256 to +255]
+            VQSHL            qYf0, qYf0, #16-9
+            VQSHL            qYf1, qYf1, #16-9
+            VQSHL            qYf2, qYf2, #16-9
+            VQSHL            qYf3, qYf3, #16-9
+            VQSHL            qYf4, qYf4, #16-9
+            VQSHL            qYf5, qYf5, #16-9
+            VQSHL            qYf6, qYf6, #16-9
+            VQSHL            qYf7, qYf7, #16-9
+            
+            VSHR             qYf0, qYf0, #16-9
+            VSHR             qYf1, qYf1, #16-9
+            VSHR             qYf2, qYf2, #16-9
+            VSHR             qYf3, qYf3, #16-9
+            VSHR             qYf4, qYf4, #16-9
+            VSHR             qYf5, qYf5, #16-9
+            VSHR             qYf6, qYf6, #16-9
+            VSHR             qYf7, qYf7, #16-9
+        ENDIF
+
+        ;// Store output depending on the Stride size
+        IF "$stride"="s"
+            VST1        qYf0, [pDest @64], Stride
+            VST1        qYf1, [pDest @64], Stride
+            VST1        qYf2, [pDest @64], Stride
+            VST1        qYf3, [pDest @64], Stride
+            VST1        qYf4, [pDest @64], Stride
+            VST1        qYf5, [pDest @64], Stride
+            VST1        qYf6, [pDest @64], Stride
+            VST1        qYf7, [pDest @64]            
+        ELSE
+            IF ("$outsize"="u8")
+                VST1        dYf0U8, [pDest @64], #8
+                VST1        dYf1U8, [pDest @64], #8
+                VST1        dYf2U8, [pDest @64], #8
+                VST1        dYf3U8, [pDest @64], #8
+                VST1        dYf4U8, [pDest @64], #8
+                VST1        dYf5U8, [pDest @64], #8
+                VST1        dYf6U8, [pDest @64], #8
+                VST1        dYf7U8, [pDest @64]
+            ELSE
+                ;// ("$outsize"="s9") or ("$outsize"="s16")
+                VST1        qYf0, [pDest @64], #16
+                VST1        qYf1, [pDest @64], #16
+                VST1        qYf2, [pDest @64], #16
+                VST1        qYf3, [pDest @64], #16
+                VST1        qYf4, [pDest @64], #16
+                VST1        qYf5, [pDest @64], #16
+                VST1        qYf6, [pDest @64], #16
+                VST1        qYf7, [pDest @64]
+            ENDIF
+        
+        ENDIF
+
+
+
+        ENDIF ;// CortexA8
+
+
+
+        MEND        
+
+        ;// Scale TWO input rows with TWO rows of 16 bit scale values
+        ;//
+        ;// This macro is used by M_IDCT_PRESCALE16 to pre-scale one row
+        ;// input (Eight input values) with one row of scale values. Also 
+        ;// Loads next scale values from pScale, if $LastRow flag is not set.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $dAlo           - Input D register with first four S16 values of row n
+        ;// $dAhi           - Input D register with next four S16 values of row n
+        ;// $dBlo           - Input D register with first four S16 values of row n+1
+        ;// $dBhi           - Input D register with next four S16 values of row n+1
+        ;// pScale          - Pointer to next row of scale values
+        ;// qT0lo           - Temporary scratch register
+        ;// qT0hi           - Temporary scratch register
+        ;// qT1lo           - Temporary scratch register
+        ;// qT1hi           - Temporary scratch register
+        ;// dScale1lo       - Scale value of row n
+        ;// dScale1hi       - Scale value of row n
+        ;// dScale2lo       - Scale value of row n+1
+        ;// dScale2hi       - Scale value of row n+1
+        ;//
+        ;// Input Flag
+        ;//
+        ;// $LastRow        - Flag to indicate whether current row is last row
+        ;//
+        ;// Output Registers:
+        ;//
+        ;// $dAlo           - Scaled output values (first four S16 of row n)
+        ;// $dAhi           - Scaled output values (next four S16 of row n)
+        ;// $dBlo           - Scaled output values (first four S16 of row n+1)
+        ;// $dBhi           - Scaled output values (next four S16 of row n+1)
+        ;// qScale1         - Scale values for next row
+        ;// qScale2         - Scale values for next row+1
+        ;// pScale          - Pointer to next row of scale values
+        ;//
+        MACRO
+        M_IDCT_SCALE16 $dAlo, $dAhi, $dBlo, $dBhi, $LastRow
+        VMULL       qT0lo, $dAlo, dScale1lo
+        VMULL       qT0hi, $dAhi, dScale1hi
+        VMULL       qT1lo, $dBlo, dScale2lo
+        VMULL       qT1hi, $dBhi, dScale2hi
+        IF "$LastRow"="0"
+            VLD1        qScale1, [pScale], #16  ;// Load scale for row n+1
+            VLD1        qScale2, [pScale], #16  ;// Load scale for row n+2
+        ENDIF
+        VQRSHRN       $dAlo, qT0lo, #12        
+        VQRSHRN       $dAhi, qT0hi, #12        
+        VQRSHRN       $dBlo, qT1lo, #12        
+        VQRSHRN       $dBhi, qT1hi, #12        
+        MEND
+
+        ;// Scale 8x8 block input values with 16 bit scale values
+        ;//
+        ;// This macro is used to pre-scale block of 8x8 input.
+        ;// This also do the Ist stage transformations of IDCT.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// dXjnlo          - n th input D register with first four S16 values
+        ;// dXjnhi          - n th input D register with next four S16 values
+        ;// qXjn            - n th input Q register with eight S16 values
+        ;// pScale          - Pointer to scale values
+        ;//
+        ;// Output Registers:
+        ;//
+        ;// qXin            - n th output Q register with eight S16 output values of 1st stage
+        ;//
+        MACRO
+        M_IDCT_PRESCALE16
+        VLD1        qScale1, [pScale], #16      ;// Load Pre scale for row 0
+        VLD1        qScale2, [pScale], #16      ;// Load Pre scale for row 0
+        M_IDCT_SCALE16 dXj0lo, dXj0hi, dXj1lo, dXj1hi, 0        ;// Pre scale row 0 & 1
+        M_IDCT_SCALE16 dXj2lo, dXj2hi, dXj3lo, dXj3hi, 0        
+        M_IDCT_SCALE16 dXj4lo, dXj4hi, dXj5lo, dXj5hi, 0        
+        M_IDCT_SCALE16 dXj6lo, dXj6hi, dXj7lo, dXj7hi, 1        
+        VHADD       qXi5, qXj1, qXj7            ;// (j1+j7)/2
+        VSUB        qXi6, qXj1, qXj7            ;// j1-j7
+        LDR         pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants
+        VHADD       qXi3, qXj2, qXj6            ;// (j2+j6)/2
+        VSUB        qXi2, qXj2, qXj6            ;// j2-j6
+        VLDR        dCoefs, [pSrc]              ;// Load DCT inverse AAN constants
+        VHADD       qXi7, qXj5, qXj3            ;// (j5+j3)/2
+        VSUB        qXi4, qXj5, qXj3            ;// j5-j3
+        MEND    
+        
+        
+        ;// Scale 8x8 block input values with 32 bit scale values
+        ;//
+        ;// This macro is used to pre-scale block of 8x8 input.
+        ;// This also do the Ist stage transformations of IDCT.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// dXjnlo          - n th input D register with first four S16 values
+        ;// dXjnhi          - n th input D register with next four S16 values
+        ;// qXjn            - n th input Q register with eight S16 values
+        ;// pScale          - Pointer to 32bit scale values in Q23 format
+        ;//
+        ;// Output Registers:
+        ;//
+        ;// dXinlo          - n th output D register with first four S16 output values of 1st stage
+        ;// dXinhi          - n th output D register with next four S16 output values of 1st stage
+        ;//
+        MACRO
+        M_IDCT_PRESCALE32
+qScale0lo       QN 0.S32
+qScale0hi       QN 1.S32
+qScale1lo       QN 2.S32
+qScale1hi       QN 3.S32
+qScale2lo       QN qScale1lo
+qScale2hi       QN qScale1hi
+qScale3lo       QN qScale1lo
+qScale3hi       QN qScale1hi
+qScale4lo       QN qScale1lo
+qScale4hi       QN qScale1hi
+qScale5lo       QN qScale0lo
+qScale5hi       QN qScale0hi
+qScale6lo       QN qScale0lo
+qScale6hi       QN qScale0hi
+qScale7lo       QN qScale0lo
+qScale7hi       QN qScale0hi
+
+qSrc0lo         QN 4.S32
+qSrc0hi         QN 5.S32
+qSrc1lo         QN 6.S32
+qSrc1hi         QN Src4.S32
+qSrc2lo         QN qSrc0lo
+qSrc2hi         QN qSrc0hi
+qSrc3lo         QN qSrc0lo
+qSrc3hi         QN qSrc0hi
+qSrc4lo         QN qSrc0lo
+qSrc4hi         QN qSrc0hi
+qSrc5lo         QN qSrc1lo
+qSrc5hi         QN qSrc1hi
+qSrc6lo         QN qSrc1lo
+qSrc6hi         QN qSrc1hi
+qSrc7lo         QN qSrc0lo
+qSrc7hi         QN qSrc0hi
+
+qRes17lo        QN qScale0lo
+qRes17hi        QN qScale0hi
+qRes26lo        QN qScale0lo
+qRes26hi        QN qScale0hi
+qRes53lo        QN qScale0lo
+qRes53hi        QN qScale0hi
+
+            ADD         pTemp, pScale, #4*8*7           ;// Address of  pScale[7]
+            
+            ;// Row 0
+            VLD1        {qScale0lo, qScale0hi}, [pScale]!
+            VSHLL       qSrc0lo, dXj0lo, #(12-1)
+            VSHLL       qSrc0hi, dXj0hi, #(12-1)            
+            VLD1        {qScale1lo, qScale1hi}, [pScale]!
+            VQRDMULH    qSrc0lo, qScale0lo, qSrc0lo
+            VQRDMULH    qSrc0hi, qScale0hi, qSrc0hi
+            VLD1        {qScale7lo, qScale7hi}, [pTemp]!
+            VSHLL       qSrc1lo, dXj1lo, #(12-1)
+            VSHLL       qSrc1hi, dXj1hi, #(12-1)            
+            VMOVN       dXi0lo, qSrc0lo                 ;// Output i0
+            VMOVN       dXi0hi, qSrc0hi
+            VSHLL       qSrc7lo, dXj7lo, #(12-1)
+            VSHLL       qSrc7hi, dXj7hi, #(12-1)
+            SUB         pTemp, pTemp, #((16*2)+(4*8*1))
+            VQRDMULH    qSrc1lo, qScale1lo, qSrc1lo
+            VQRDMULH    qSrc1hi, qScale1hi, qSrc1hi
+            VQRDMULH    qSrc7lo, qScale7lo, qSrc7lo
+            VQRDMULH    qSrc7hi, qScale7hi, qSrc7hi
+            VLD1        {qScale2lo, qScale2hi}, [pScale]!
+
+            ;// Row 1 & 7
+            VHADD       qRes17lo, qSrc1lo, qSrc7lo      ;// (j1+j7)/2
+            VHADD       qRes17hi, qSrc1hi, qSrc7hi      ;// (j1+j7)/2
+            VMOVN       dXi5lo, qRes17lo                ;// Output i5
+            VMOVN       dXi5hi, qRes17hi              
+            VSUB        qRes17lo, qSrc1lo, qSrc7lo      ;// j1-j7
+            VSUB        qRes17hi, qSrc1hi, qSrc7hi      ;// j1-j7
+            VMOVN       dXi6lo, qRes17lo                ;// Output i6
+            VMOVN       dXi6hi, qRes17hi      
+            VSHLL       qSrc2lo, dXj2lo, #(12-1)
+            VSHLL       qSrc2hi, dXj2hi, #(12-1)
+            VLD1        {qScale6lo, qScale6hi}, [pTemp]!
+            VSHLL       qSrc6lo, dXj6lo, #(12-1)
+            VSHLL       qSrc6hi, dXj6hi, #(12-1)
+            SUB         pTemp, pTemp, #((16*2)+(4*8*1))
+            VQRDMULH    qSrc2lo, qScale2lo, qSrc2lo
+            VQRDMULH    qSrc2hi, qScale2hi, qSrc2hi
+            VQRDMULH    qSrc6lo, qScale6lo, qSrc6lo
+            VQRDMULH    qSrc6hi, qScale6hi, qSrc6hi
+            VLD1        {qScale3lo, qScale3hi}, [pScale]!
+
+            ;// Row 2 & 6
+            VHADD       qRes26lo, qSrc2lo, qSrc6lo      ;// (j2+j6)/2
+            VHADD       qRes26hi, qSrc2hi, qSrc6hi      ;// (j2+j6)/2
+            VMOVN       dXi3lo, qRes26lo                ;// Output i3
+            VMOVN       dXi3hi, qRes26hi              
+            VSUB        qRes26lo, qSrc2lo, qSrc6lo      ;// j2-j6
+            VSUB        qRes26hi, qSrc2hi, qSrc6hi      ;// j2-j6
+            VMOVN       dXi2lo, qRes26lo                ;// Output i2
+            VMOVN       dXi2hi, qRes26hi      
+            VSHLL       qSrc3lo, dXj3lo, #(12-1)
+            VSHLL       qSrc3hi, dXj3hi, #(12-1)
+            VLD1        {qScale5lo, qScale5hi}, [pTemp]!
+            VSHLL       qSrc5lo, dXj5lo, #(12-1)
+            VSHLL       qSrc5hi, dXj5hi, #(12-1)
+            VQRDMULH    qSrc3lo, qScale3lo, qSrc3lo
+            VQRDMULH    qSrc3hi, qScale3hi, qSrc3hi
+            VQRDMULH    qSrc5lo, qScale5lo, qSrc5lo
+            VQRDMULH    qSrc5hi, qScale5hi, qSrc5hi
+            
+            ;// Row 3 & 5
+            VHADD       qRes53lo, qSrc5lo, qSrc3lo      ;// (j5+j3)/2
+            VHADD       qRes53hi, qSrc5hi, qSrc3hi      ;// (j5+j3)/2
+            SUB         pSrc, pSrc, #16*2*2
+            VMOVN       dXi7lo, qRes53lo                ;// Output i7
+            VMOVN       dXi7hi, qRes53hi              
+            VSUB        qRes53lo, qSrc5lo, qSrc3lo      ;// j5-j3
+            VSUB        qRes53hi, qSrc5hi, qSrc3hi      ;// j5-j3
+            VLD1        qXj4, [pSrc @64]
+            VMOVN       dXi4lo, qRes53lo                ;// Output i4
+            VMOVN       dXi4hi, qRes53hi                              
+            VSHLL       qSrc4lo, dXj4lo, #(12-1)
+            VSHLL       qSrc4hi, dXj4hi, #(12-1)
+            VLD1        {qScale4lo, qScale4hi}, [pScale]            
+            LDR         pSrc, =armCOMM_IDCTCoef     ;// Address of DCT inverse AAN constants
+            VQRDMULH    qSrc4lo, qScale4lo, qSrc4lo
+            VQRDMULH    qSrc4hi, qScale4hi, qSrc4hi
+            VLDR        dCoefs, [pSrc]                  ;// Load DCT inverse AAN constants
+            ;// Row 4
+            VMOVN       dXi1lo, qSrc4lo                 ;// Output i1
+            VMOVN       dXi1hi, qSrc4hi              
+        
+        MEND
+                                                
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_MaskTable.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_MaskTable.h
new file mode 100644
index 0000000..b5da9dce
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_MaskTable.h
@@ -0,0 +1,27 @@
+/**
+ * 
+ * File Name:  armCOMM_MaskTable.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Mask Table to mask the end of array
+ */
+ 
+
+
+#ifndef _ARMCOMM_MASKTABLE_H_
+#define _ARMCOMM_MASKTABLE_H_
+
+#define MaskTableSize 72
+  
+/* Mask table */
+
+extern const OMX_U16 armCOMM_qMaskTable16[MaskTableSize];
+extern const OMX_U8 armCOMM_qMaskTable8[MaskTableSize];
+
+#endif
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Version.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Version.h
new file mode 100644
index 0000000..13e5b2b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Version.h
@@ -0,0 +1,43 @@
+/* Guard the header against multiple inclusion. */
+#ifndef __ARM_COMM_VERSION_H__
+#define __ARM_COMM_VERSION_H__
+
+
+/* The following line should be in omxtypes.h but hasn't been approved by OpenMAX yet */
+#define OMX_VERSION 102
+
+/* We need to define these macros in order to convert a #define number into a #define string. */
+#define ARM_QUOTE(a) #a
+#define ARM_INDIRECT(A) ARM_QUOTE(A)
+
+/* Convert the OMX_VERSION number into a string that can be used, for example, to print it out. */
+#define ARM_VERSION_STRING ARM_INDIRECT(OMX_VERSION)
+
+
+/* Define this in order to turn on ARM version/release/build strings in each domain */
+#define ARM_INCLUDE_VERSION_DESCRIPTIONS
+
+#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS
+  extern const char * const omxAC_VersionDescription;
+  extern const char * const omxIC_VersionDescription;
+  extern const char * const omxIP_VersionDescription;
+  extern const char * const omxSP_VersionDescription;
+  extern const char * const omxVC_VersionDescription;
+#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */
+
+
+/* The following entries should be automatically updated by the release script */
+/* They are used in the ARM version strings defined for each domain.             */
+
+/* The release tag associated with this release of the library. - used for source and object releases */
+#define OMX_ARM_RELEASE_TAG  "r0p0-00bet1"
+
+/* The ARM architecture used to build any objects or executables in this release. */
+#define OMX_ARM_BUILD_ARCHITECTURE "ARM Architecture V6"
+
+/* The ARM Toolchain used to build any objects or executables in this release. */
+#define OMX_ARM_BUILD_TOOLCHAIN    "ARM RVCT 3.1"
+
+
+#endif /* __ARM_COMM_VERSION_H__ */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_s.h
new file mode 100644
index 0000000..2df1fc8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_s.h
@@ -0,0 +1,1154 @@
+;//
+;// 
+;// File Name:  armCOMM_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+;// ARM optimized OpenMAX common header file
+;//
+
+;// Protect against multiple inclusion
+ IF :LNOT::DEF:ARMCOMM_S_H
+ GBLL ARMCOMM_S_H
+
+        REQUIRE8            ;// Requires 8-byte stack alignment
+        PRESERVE8           ;// Preserves 8-byte stack alignment
+        
+        GBLL    ARM_ERRORCHECK
+ARM_ERRORCHECK  SETL {FALSE}
+
+;// Globals
+
+        GBLS    _RRegList   ;// R saved register list
+        GBLS    _DRegList   ;// D saved register list
+        GBLS    _Variant    ;// Selected processor variant
+        GBLS    _CPU        ;// CPU name
+        GBLS    _Struct     ;// Structure name
+        
+        GBLL    _InFunc     ;// Inside function assembly flag
+        GBLL    _SwLong     ;// Long switch flag
+        
+        GBLA    _RBytes     ;// Number of register bytes on stack
+        GBLA    _SBytes     ;// Number of scratch bytes on stack 
+        GBLA    _ABytes     ;// Stack offset of next argument
+        GBLA    _Workspace  ;// Stack offset of scratch workspace
+        GBLA    _F          ;// Function number
+        GBLA    _StOff      ;// Struct offset
+        GBLA    _SwNum      ;// Switch number
+        GBLS    _32         ;// Suffix for 32 byte alignmnet
+        GBLS    _16         ;// Suffix for 16 byte alignmnet
+        
+_InFunc         SETL    {FALSE}
+_SBytes         SETA    0
+_F              SETA    0
+_SwNum          SETA    0
+_32             SETS    "ALIGN32"
+_16             SETS    "ALIGN16"
+
+;/////////////////////////////////////////////////////////
+;// Override the tools settings of the CPU if the #define
+;// USECPU is set, otherwise use the CPU defined by the
+;// assembler settings.
+;/////////////////////////////////////////////////////////
+
+       IF :DEF: OVERRIDECPU
+_CPU       SETS  OVERRIDECPU
+       ELSE
+_CPU       SETS    {CPU}       
+       ENDIF
+
+
+
+;/////////////////////////////////////////////////////////
+;// Work out which code to build
+;/////////////////////////////////////////////////////////
+
+        IF :DEF:ARM1136JS:LOR::DEF:CortexA8:LOR::DEF:ARM_GENERIC
+            INFO 1,"Please switch to using M_VARIANTS"
+        ENDIF
+
+        ;// Define and reset all officially recongnised variants
+        MACRO
+        _M_DEF_VARIANTS
+        _M_DEF_VARIANT ARM926EJS
+        _M_DEF_VARIANT ARM1136JS
+        _M_DEF_VARIANT ARM1136JS_U
+        _M_DEF_VARIANT CortexA8
+        _M_DEF_VARIANT ARM7TDMI
+        MEND
+        
+        MACRO
+        _M_DEF_VARIANT $var
+        GBLL $var
+        GBLL _ok$var
+$var    SETL {FALSE}
+        MEND        
+        
+
+        ;// Variant declaration
+        ;//
+        ;// Define a list of code variants supported by this
+        ;// source file. This macro then chooses the most
+        ;// appropriate variant to build for the currently configured
+        ;// core.
+        ;//        
+        MACRO
+        M_VARIANTS $v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7        
+        ;// Set to TRUE variants that are supported
+        _M_DEF_VARIANTS
+        _M_VARIANT $v0
+        _M_VARIANT $v1
+        _M_VARIANT $v2
+        _M_VARIANT $v3
+        _M_VARIANT $v4
+        _M_VARIANT $v5
+        _M_VARIANT $v6
+        _M_VARIANT $v7
+        
+        ;// Look for first available variant to match a CPU
+        ;// _M_TRY cpu, variant fall back list
+_Variant SETS ""                
+        _M_TRY ARM926EJ-S,   ARM926EJS
+        _M_TRY ARM1176JZ-S,  ARM1136JS
+        _M_TRY ARM1176JZF-S, ARM1136JS
+        _M_TRY ARM1156T2-S,  ARM1136JS
+        _M_TRY ARM1156T2F-S, ARM1136JS
+        _M_TRY ARM1136J-S,   ARM1136JS
+        _M_TRY ARM1136JF-S,  ARM1136JS
+        _M_TRY MPCore,       ARM1136JS
+        _M_TRY Cortex-A8,    CortexA8, ARM1136JS
+        _M_TRY Cortex-R4,    ARM1136JS
+        _M_TRY ARM7TDMI
+        
+        ;// Select the correct variant
+        _M_DEF_VARIANTS
+        IF _Variant=""
+            INFO 1, "No match found for CPU '$_CPU'"
+        ELSE
+$_Variant   SETL {TRUE}
+        ENDIF
+        MEND
+        
+        ;// Register a variant as available
+        MACRO
+        _M_VARIANT $var
+        IF "$var"=""
+            MEXIT
+        ENDIF
+        IF :LNOT::DEF:_ok$var
+            INFO 1, "Unrecognized variant '$var'"
+        ENDIF
+$var    SETL {TRUE}
+        MEND
+        
+        ;// For a given CPU, see if any of the variants supporting
+        ;// this CPU are available. The first available variant is
+        ;// chosen
+        MACRO
+        _M_TRY $cpu, $v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7
+        IF "$cpu"<>_CPU
+            MEXIT
+        ENDIF
+        _M_TRY1 $v0
+        _M_TRY1 $v1
+        _M_TRY1 $v2
+        _M_TRY1 $v3
+        _M_TRY1 $v4
+        _M_TRY1 $v5
+        _M_TRY1 $v6
+        _M_TRY1 $v7
+        ;// Check a match was found
+        IF _Variant=""
+            INFO 1, "No variant match found for CPU '$_CPU'"
+        ENDIF
+        MEND
+        
+        MACRO
+        _M_TRY1 $var
+        IF "$var"=""
+            MEXIT
+        ENDIF
+        IF (_Variant=""):LAND:$var
+_Variant SETS "$var"
+        ENDIF
+        MEND
+        
+;////////////////////////////////////////////////////////
+;// Structure definition
+;////////////////////////////////////////////////////////
+
+        ;// Declare a structure of given name
+        MACRO
+        M_STRUCT $sname
+_Struct SETS "$sname"
+_StOff  SETA 0
+        MEND
+        
+        ;// Declare a structure field
+        ;// The field is called $sname_$fname
+        ;// $size   = the size of each entry, must be power of 2 
+        ;// $number = (if provided) the number of entries for an array
+        MACRO
+        M_FIELD $fname, $size, $number
+        IF (_StOff:AND:($size-1))!=0
+_StOff      SETA _StOff + ($size - (_StOff:AND:($size-1)))
+        ENDIF
+$_Struct._$fname EQU _StOff
+        IF "$number"<>""
+_StOff      SETA _StOff + $size*$number
+        ELSE
+_StOff      SETA _StOff + $size
+        ENDIF
+        MEND
+        
+        
+        MACRO
+        M_ENDSTRUCT
+sizeof_$_Struct EQU _StOff
+_Struct SETS ""
+        MEND
+
+;//////////////////////////////////////////////////////////
+;// Switch and table macros
+;//////////////////////////////////////////////////////////
+
+        ;// Start a relative switch table with register to switch on
+        ;//
+        ;// $v = the register to switch on
+        ;// $s = if specified must be "L" to indicate long
+        ;//      this allows a greater range to the case code
+        MACRO
+        M_SWITCH $v, $s
+        ASSERT "$s"="":LOR:"$s"="L"
+_SwLong SETL {FALSE}
+        IF "$s"="L"
+_SwLong     SETL {TRUE}
+        ENDIF
+_SwNum  SETA _SwNum+1        
+        IF {CONFIG}=16
+            ;// Thumb
+            IF _SwLong
+                TBH [pc, $v, LSL#1]
+            ELSE
+                TBB [pc, $v]
+            ENDIF
+_Switch$_SwNum
+        ELSE
+            ;// ARM
+            ADD pc, pc, $v, LSL #2
+            NOP
+        ENDIF
+        MEND
+        
+        ;// Add a case to the switch statement
+        MACRO
+        M_CASE  $label
+        IF {CONFIG}=16
+            ;// Thumb
+            IF _SwLong
+                DCW ($label - _Switch$_SwNum)/2
+            ELSE
+                DCB ($label - _Switch$_SwNum)/2
+            ENDIF
+        ELSE
+            ;// ARM
+            B   $label
+        ENDIF
+        MEND
+        
+        ;// End of switch statement
+        MACRO
+        M_ENDSWITCH
+        ALIGN 2
+        MEND       
+
+
+;////////////////////////////////////////////////////////
+;// Data area allocation
+;////////////////////////////////////////////////////////
+
+        ;// Constant table allocator macro
+        ;//
+        ;// Creates a new section for each constant table
+        ;// $name is symbol through which the table can be accessed.
+        ;// $align is the optional alignment of the table, log2 of 
+        ;//  the byte alignment - $align=4 is 16 byte aligned
+        MACRO
+        M_TABLE  $name, $align
+        ASSERT :LNOT:_InFunc
+        IF "$align"=""
+            AREA |.constdata|, READONLY, DATA
+        ELSE
+            ;// AREAs inherit the alignment of the first declaration.
+            ;// Therefore for each alignment size we must have an area
+            ;// of a different name.
+            AREA constdata_a$align, READONLY, DATA, ALIGN=$align
+            
+            ;// We also force alignment incase we are tagging onto
+            ;// an already started area.
+            ALIGN (1<<$align)
+        ENDIF
+$name
+        MEND
+        
+;/////////////////////////////////////////////////////
+;// Macros to allocate space on the stack
+;//
+;// These all assume that the stack is 8-byte aligned
+;// at entry to the function, which means that the 
+;// 32-byte alignment macro needs to work in a
+;// bit more of a special way...
+;/////////////////////////////////////////////////////
+
+        
+
+
+        ;// Allocate 1-byte aligned area of name
+        ;// $name size $size bytes.
+        MACRO
+        M_ALLOC1  $name, $size
+        ASSERT :LNOT:_InFunc
+$name$_F   EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+        MEND
+            
+        ;// Allocate 2-byte aligned area of name
+        ;// $name size $size bytes.
+        MACRO
+        M_ALLOC2  $name, $size
+        ASSERT :LNOT:_InFunc
+        IF (_SBytes:AND:1)!=0
+_SBytes     SETA _SBytes + (2 - (_SBytes:AND:1))
+        ENDIF
+$name$_F   EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+        MEND
+            
+        ;// Allocate 4-byte aligned area of name
+        ;// $name size $size bytes.
+        MACRO
+        M_ALLOC4  $name, $size
+        ASSERT :LNOT:_InFunc
+        IF (_SBytes:AND:3)!=0
+_SBytes     SETA _SBytes + (4 - (_SBytes:AND:3))
+        ENDIF
+$name$_F   EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+        MEND
+            
+        ;// Allocate 8-byte aligned area of name
+        ;// $name size $size bytes.
+        MACRO
+        M_ALLOC8  $name, $size
+        ASSERT :LNOT:_InFunc
+        IF (_SBytes:AND:7)!=0
+_SBytes     SETA _SBytes + (8 - (_SBytes:AND:7))
+        ENDIF
+$name$_F   EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+        MEND        
+
+        
+        ;// Allocate 8-byte aligned area of name
+        ;// $name size ($size+16) bytes.
+        ;// The extra 16 bytes are later used to align the pointer to 16 bytes
+        
+        MACRO
+        M_ALLOC16  $name, $size
+        ASSERT :LNOT:_InFunc
+        IF (_SBytes:AND:7)!=0
+_SBytes     SETA _SBytes + (8 - (_SBytes:AND:7))
+        ENDIF
+$name$_F$_16   EQU (_SBytes + 8)
+_SBytes SETA _SBytes + ($size) + 8
+        MEND        
+        
+        ;// Allocate 8-byte aligned area of name
+        ;// $name size ($size+32) bytes.
+        ;// The extra 32 bytes are later used to align the pointer to 32 bytes
+        
+        MACRO
+        M_ALLOC32  $name, $size
+        ASSERT :LNOT:_InFunc
+        IF (_SBytes:AND:7)!=0
+_SBytes     SETA _SBytes + (8 - (_SBytes:AND:7))
+        ENDIF
+$name$_F$_32   EQU (_SBytes + 24)
+_SBytes SETA _SBytes + ($size) + 24
+        MEND        
+        
+        
+        
+        
+        ;// Argument Declaration Macro
+        ;//
+        ;// Allocate an argument name $name
+        ;// size $size bytes
+        MACRO
+        M_ARG     $name, $size
+        ASSERT _InFunc
+$name$_F    EQU _ABytes
+_ABytes SETA _ABytes + ($size)
+        MEND        
+        
+;///////////////////////////////////////////////
+;// Macros to access stacked variables
+;///////////////////////////////////////////////
+
+        ;// Macro to perform a data processing operation
+        ;// with a constant second operand
+        MACRO
+        _M_OPC $op,$rd,$rn,$const
+        LCLA    _sh
+        LCLA    _cst
+_sh     SETA    0
+_cst    SETA    $const
+        IF _cst=0
+        $op $rd, $rn, #_cst
+            MEXIT
+        ENDIF
+        WHILE (_cst:AND:3)=0
+_cst        SETA _cst>>2
+_sh         SETA _sh+2
+        WEND
+        $op $rd, $rn, #(_cst:AND:0x000000FF)<<_sh
+        IF _cst>=256
+            $op $rd, $rd, #(_cst:AND:0xFFFFFF00)<<_sh
+        ENDIF
+        MEND
+
+        ;// Macro to perform a data access operation
+        ;// Such as LDR or STR
+        ;// The addressing mode is modified such that
+        ;// 1. If no address is given then the name is taken
+        ;//    as a stack offset
+        ;// 2. If the addressing mode is not available for the
+        ;//    state being assembled for (eg Thumb) then a suitable
+        ;//    addressing mode is substituted.
+        ;//
+        ;// On Entry:
+        ;// $i = Instruction to perform (eg "LDRB")
+        ;// $a = Required byte alignment
+        ;// $r = Register(s) to transfer (eg "r1")
+        ;// $a0,$a1,$a2. Addressing mode and condition. One of:
+        ;//     label {,cc}
+        ;//     [base]                    {,,,cc}
+        ;//     [base, offset]{!}         {,,cc}
+        ;//     [base, offset, shift]{!}  {,cc}
+        ;//     [base], offset            {,,cc}
+        ;//     [base], offset, shift     {,cc}
+        MACRO
+        _M_DATA $i,$a,$r,$a0,$a1,$a2,$a3
+        IF "$a0":LEFT:1="["
+            IF "$a1"=""
+                $i$a3   $r, $a0
+            ELSE
+                IF "$a0":RIGHT:1="]"
+                    IF "$a2"=""
+                        _M_POSTIND $i$a3, "$r", $a0, $a1
+                    ELSE
+                        _M_POSTIND $i$a3, "$r", $a0, "$a1,$a2"
+                    ENDIF
+                ELSE
+                    IF "$a2"=""
+                        _M_PREIND  $i$a3, "$r", $a0, $a1
+                    ELSE
+                        _M_PREIND  $i$a3, "$r", $a0, "$a1,$a2"
+                    ENDIF
+                ENDIF
+            ENDIF
+        ELSE
+            LCLA    _Offset
+_Offset     SETA    _Workspace + $a0$_F
+            ASSERT  (_Offset:AND:($a-1))=0
+            $i$a1   $r, [sp, #_Offset]
+        ENDIF
+        MEND
+        
+        ;// Handle post indexed load/stores
+        ;// op  reg, [base], offset
+        MACRO
+        _M_POSTIND $i,$r,$a0,$a1
+        LCLS _base
+        LCLS _offset
+        IF {CONFIG}=16 ;// Thumb
+_base       SETS ("$a0":LEFT:(:LEN:"$a0"-1)):RIGHT:(:LEN:"$a0"-2)   ;// remove []
+_offset     SETS "$a1"
+            IF _offset:LEFT:1="+"
+_offset         SETS _offset:RIGHT:(:LEN:_offset-1)
+            ENDIF
+            $i  $r, $a0
+            IF _offset:LEFT:1="-"
+_offset         SETS _offset:RIGHT:(:LEN:_offset-1)
+                SUB $_base, $_base, $_offset
+            ELSE                
+                ADD $_base, $_base, $_offset
+            ENDIF
+        ELSE ;// ARM
+            $i  $r, $a0, $a1
+        ENDIF
+        MEND
+        
+        ;// Handle pre indexed load/store
+        ;// op  reg, [base, offset]{!}
+        MACRO
+        _M_PREIND $i,$r,$a0,$a1
+        LCLS _base
+        LCLS _offset
+        IF ({CONFIG}=16):LAND:(("$a1":RIGHT:2)="]!")
+_base       SETS "$a0":RIGHT:(:LEN:("$a0")-1)
+_offset     SETS "$a1":LEFT:(:LEN:("$a1")-2)
+            $i $r, [$_base, $_offset]
+            ADD $_base, $_base, $_offset
+        ELSE
+            $i  $r, $a0, $a1
+        ENDIF
+        MEND
+
+        ;// Load unsigned byte from stack
+        MACRO
+        M_LDRB  $r,$a0,$a1,$a2,$a3
+        _M_DATA "LDRB",1,$r,$a0,$a1,$a2,$a3
+        MEND
+        
+        ;// Load signed byte from stack
+        MACRO
+        M_LDRSB $r,$a0,$a1,$a2,$a3
+        _M_DATA "LDRSB",1,$r,$a0,$a1,$a2,$a3
+        MEND
+        
+        ;// Store byte to stack
+        MACRO
+        M_STRB  $r,$a0,$a1,$a2,$a3
+        _M_DATA "STRB",1,$r,$a0,$a1,$a2,$a3
+        MEND
+        
+        ;// Load unsigned half word from stack
+        MACRO
+        M_LDRH  $r,$a0,$a1,$a2,$a3
+        _M_DATA "LDRH",2,$r,$a0,$a1,$a2,$a3
+        MEND
+        
+        ;// Load signed half word from stack
+        MACRO
+        M_LDRSH $r,$a0,$a1,$a2,$a3
+        _M_DATA "LDRSH",2,$r,$a0,$a1,$a2,$a3
+        MEND
+        
+        ;// Store half word to stack
+        MACRO
+        M_STRH  $r,$a0,$a1,$a2,$a3
+        _M_DATA "STRH",2,$r,$a0,$a1,$a2,$a3
+        MEND
+
+        ;// Load word from stack
+        MACRO
+        M_LDR   $r,$a0,$a1,$a2,$a3
+        _M_DATA "LDR",4,$r,$a0,$a1,$a2,$a3
+        MEND
+        
+        ;// Store word to stack
+        MACRO
+        M_STR   $r,$a0,$a1,$a2,$a3
+        _M_DATA "STR",4,$r,$a0,$a1,$a2,$a3
+        MEND
+
+        ;// Load double word from stack
+        MACRO
+        M_LDRD  $r0,$r1,$a0,$a1,$a2,$a3
+        _M_DATA "LDRD",8,"$r0,$r1",$a0,$a1,$a2,$a3
+        MEND
+                
+        ;// Store double word to stack
+        MACRO
+        M_STRD  $r0,$r1,$a0,$a1,$a2,$a3
+        _M_DATA "STRD",8,"$r0,$r1",$a0,$a1,$a2,$a3
+        MEND
+        
+        ;// Get absolute address of stack allocated location
+        MACRO
+        M_ADR   $a, $b, $cc
+        _M_OPC  ADD$cc, $a, sp, (_Workspace + $b$_F)
+        MEND
+        
+        ;// Get absolute address of stack allocated location and align the address to 16 bytes
+        MACRO
+        M_ADR16 $a, $b, $cc
+            _M_OPC  ADD$cc, $a, sp, (_Workspace + $b$_F$_16)
+        
+            ;// Now align $a to 16 bytes
+            BIC$cc  $a,$a,#0x0F
+        MEND
+        
+        ;// Get absolute address of stack allocated location and align the address to 32 bytes
+        MACRO
+        M_ADR32 $a, $b, $cc
+            _M_OPC  ADD$cc, $a, sp, (_Workspace + $b$_F$_32)
+        
+            ;// Now align $a to 32 bytes
+            BIC$cc  $a,$a,#0x1F
+        MEND
+
+;//////////////////////////////////////////////////////////
+;// Function header and footer macros
+;//////////////////////////////////////////////////////////      
+        
+        ;// Function Header Macro    
+        ;// Generates the function prologue
+        ;// Note that functions should all be "stack-moves-once"
+        ;// The FNSTART and FNEND macros should be the only places
+        ;// where the stack moves.
+        ;//    
+        ;// $name  = function name
+        ;// $rreg  = ""   don't stack any registers
+        ;//          "lr" stack "lr" only
+        ;//          "rN" stack registers "r4-rN,lr"
+        ;// $dreg  = ""   don't stack any D registers
+        ;//          "dN" stack registers "d8-dN"
+        ;//
+        ;// Note: ARM Archicture procedure call standard AAPCS
+        ;// states that r4-r11, sp, d8-d15 must be preserved by
+        ;// a compliant function.
+        MACRO
+        M_START $name, $rreg, $dreg
+        ASSERT :LNOT:_InFunc
+        ASSERT "$name"!=""
+_InFunc SETL {TRUE}
+_RBytes SETA 0
+_Workspace SETA 0
+
+        ;// Create an area for the function        
+        AREA    |.text|, CODE
+        EXPORT  $name
+$name   FUNCTION
+        
+        ;// Save R registers
+        _M_GETRREGLIST $rreg
+        IF _RRegList<>""
+            STMFD   sp!, {$_RRegList, lr}
+        ENDIF
+                
+        ;// Save D registers
+        _M_GETDREGLIST  $dreg        
+        IF _DRegList<>""
+            VSTMFD  sp!, {$_DRegList}
+        ENDIF            
+            
+                    
+        ;// Ensure size claimed on stack is 8-byte aligned
+        IF ((_SBytes:AND:7)!=0)
+_SBytes     SETA _SBytes + (8 - (_SBytes:AND:7))
+        ENDIF
+        
+        IF (_SBytes!=0)
+            _M_OPC SUB, sp, sp, _SBytes
+        ENDIF
+        
+        
+_ABytes SETA _SBytes + _RBytes - _Workspace
+
+                        
+        ;// Print function name if debug enabled
+        M_PRINTF "$name\n",
+        MEND
+        
+        ;// Work out a list of R saved registers
+        MACRO
+        _M_GETRREGLIST $rreg
+        IF "$rreg"=""
+_RRegList   SETS ""
+            MEXIT
+        ENDIF        
+        IF "$rreg"="lr":LOR:"$rreg"="r4"
+_RRegList   SETS "r4"
+_RBytes     SETA _RBytes+8
+            MEXIT
+        ENDIF
+        IF "$rreg"="r5":LOR:"$rreg"="r6"
+_RRegList   SETS "r4-r6"
+_RBytes     SETA _RBytes+16
+            MEXIT
+        ENDIF
+        IF "$rreg"="r7":LOR:"$rreg"="r8"
+_RRegList   SETS "r4-r8"
+_RBytes     SETA _RBytes+24
+            MEXIT
+        ENDIF
+        IF "$rreg"="r9":LOR:"$rreg"="r10"
+_RRegList   SETS "r4-r10"
+_RBytes     SETA _RBytes+32
+            MEXIT
+        ENDIF
+        IF "$rreg"="r11":LOR:"$rreg"="r12"
+_RRegList   SETS "r4-r12"
+_RBytes     SETA _RBytes+40
+            MEXIT
+        ENDIF
+        INFO 1, "Unrecognized saved r register limit '$rreg'"
+        MEND        
+        
+        ;// Work out a list of D saved registers
+        MACRO
+        _M_GETDREGLIST $dreg
+        IF "$dreg"=""
+_DRegList   SETS ""
+            MEXIT
+        ENDIF        
+        IF "$dreg"="d8"
+_DRegList   SETS "d8"
+_RBytes     SETA _RBytes+8
+            MEXIT
+        ENDIF
+        IF "$dreg"="d9"
+_DRegList   SETS "d8-d9"
+_RBytes     SETA _RBytes+16
+            MEXIT
+        ENDIF
+        IF "$dreg"="d10"
+_DRegList   SETS "d8-d10"
+_RBytes     SETA _RBytes+24
+            MEXIT
+        ENDIF
+        IF "$dreg"="d11"
+_DRegList   SETS "d8-d11"
+_RBytes     SETA _RBytes+32
+            MEXIT
+        ENDIF
+        IF "$dreg"="d12"
+_DRegList   SETS "d8-d12"
+_RBytes     SETA _RBytes+40
+            MEXIT
+        ENDIF
+        IF "$dreg"="d13"
+_DRegList   SETS "d8-d13"
+_RBytes     SETA _RBytes+48
+            MEXIT
+        ENDIF
+        IF "$dreg"="d14"
+_DRegList   SETS "d8-d14"
+_RBytes     SETA _RBytes+56
+            MEXIT
+        ENDIF
+        IF "$dreg"="d15"
+_DRegList   SETS "d8-d15"
+_RBytes     SETA _RBytes+64
+            MEXIT
+        ENDIF
+        INFO 1, "Unrecognized saved d register limit '$dreg'"
+        MEND
+        
+        ;// Produce function return instructions
+        MACRO
+        _M_RET $cc
+        IF _DRegList<>""
+            VPOP$cc {$_DRegList}
+        ENDIF
+        IF _RRegList=""
+            BX$cc lr
+        ELSE
+            LDM$cc.FD sp!, {$_RRegList, pc}
+        ENDIF
+        MEND        
+        
+        ;// Early Function Exit Macro
+        ;// $cc = condition to exit with
+        ;// (Example: M_EXIT EQ)
+        MACRO
+        M_EXIT  $cc
+        ASSERT  _InFunc
+        IF  _SBytes!=0
+            ;// Restore stack frame and exit
+            B$cc  _End$_F
+        ELSE
+            ;// Can return directly
+            _M_RET $cc
+        ENDIF        
+        MEND        
+
+        ;// Function Footer Macro        
+        ;// Generates the function epilogue
+        MACRO
+        M_END
+        ASSERT _InFunc
+_InFunc SETL {FALSE}
+_End$_F
+
+        ;// Restore the stack pointer to its original value on function entry
+        IF _SBytes!=0
+            _M_OPC ADD, sp, sp, _SBytes
+        ENDIF
+        _M_RET
+        ENDFUNC
+
+        ;// Reset the global stack tracking variables back to their 
+        ;// initial values, and increment the function count
+_SBytes        SETA 0
+_F             SETA _F+1
+        MEND
+
+                
+;//==========================================================================
+;// Debug Macros
+;//==========================================================================
+
+        GBLL    DEBUG_ON
+DEBUG_ON SETL   {FALSE}
+        GBLL    DEBUG_STALLS_ON
+DEBUG_STALLS_ON SETL {FALSE}
+        
+        ;//==========================================================================
+        ;// Debug call to printf
+        ;//  M_PRINTF $format, $val0, $val1, $val2
+        ;//
+        ;// Examples:
+        ;//  M_PRINTF "x=%08x\n", r0
+        ;//
+        ;// This macro preserves the value of all registers including the
+        ;// flags.
+        ;//==========================================================================
+
+        MACRO
+        M_PRINTF  $format, $val0, $val1, $val2
+        IF DEBUG_ON
+        
+        IMPORT  printf
+        LCLA    nArgs
+nArgs	SETA    0
+        
+        ;// save registers so we don't corrupt them
+        STMFD   sp!, {r0-r12, lr}
+        
+        ;// Drop stack to give us some workspace
+        SUB     sp, sp, #16
+        
+        ;// Save registers we need to print to the stack
+        IF "$val2" <> ""
+            ASSERT "$val1" <> ""
+            STR    $val2, [sp, #8]
+nArgs       SETA   nArgs+1
+        ENDIF
+        IF "$val1" <> ""
+            ASSERT "$val0" <> ""
+            STR    $val1, [sp, #4]
+nArgs	    SETA   nArgs+1
+        ENDIF
+        IF "$val0"<>""
+            STR    $val0, [sp]
+nArgs	    SETA   nArgs+1
+        ENDIF
+        
+        ;// Now we are safe to corrupt registers
+        ADR     r0, %FT00
+        IF nArgs=1
+          LDR   r1, [sp]
+        ENDIF
+        IF nArgs=2
+          LDMIA sp, {r1,r2}
+        ENDIF
+        IF nArgs=3
+          LDMIA sp, {r1,r2,r3}
+        ENDIF
+        
+        ;// print the values
+        MRS     r4, cpsr        ;// preserve flags
+        BL      printf
+        MSR     cpsr_f, r4      ;// restore flags
+        B       %FT01
+00      ;// string to print
+        DCB     "$format", 0
+        ALIGN
+01      ;// Finished
+        ADD     sp, sp, #16
+        ;// Restore registers
+        LDMFD	sp!, {r0-r12,lr}
+
+        ENDIF   ;// DEBUG_ON
+        MEND
+
+
+        ;// Stall Simulation Macro
+        ;// Inserts a given number of NOPs for the currently
+        ;//  defined platform
+        MACRO
+        M_STALL $plat1stall, $plat2stall, $plat3stall, $plat4stall, $plat5stall, $plat6stall
+        IF DEBUG_STALLS_ON
+            _M_STALL_SUB $plat1stall    
+            _M_STALL_SUB $plat2stall    
+            _M_STALL_SUB $plat3stall    
+            _M_STALL_SUB $plat4stall    
+            _M_STALL_SUB $plat5stall    
+            _M_STALL_SUB $plat6stall    
+        ENDIF
+        MEND
+        
+        MACRO
+        _M_STALL_SUB $platstall
+        IF "$platstall"!=""
+            LCLA _pllen
+            LCLS _pl
+            LCLL _pllog
+_pllen      SETA :LEN:"$platstall"
+_pl         SETS "$platstall":LEFT:(_pllen - 2)
+            IF :DEF:$_pl
+                IF $_pl
+                    LCLS _st
+                    LCLA _stnum
+_st                 SETS "$platstall":RIGHT:1        
+_stnum              SETA $_st
+                    WHILE _stnum>0
+			MOV sp, sp
+_stnum                  SETA _stnum - 1
+                    WEND
+                ENDIF
+            ENDIF
+        ENDIF
+        MEND
+        
+        
+        
+;//==========================================================================
+;// Endian Invarience Macros
+;// 
+;// The idea behind these macros is that if an array is
+;// loaded as words then the SMUL00 macro will multiply
+;// array elements 0 regardless of the endianess of the
+;// system. For little endian SMUL00=SMULBB, for big
+;// endian SMUL00=SMULTT and similarly for other packed operations.
+;//
+;//==========================================================================
+
+        MACRO
+        LIBI4   $comli, $combi, $a, $b, $c, $d, $cc
+        IF {ENDIAN}="big"
+        $combi.$cc $a, $b, $c, $d
+        ELSE
+        $comli.$cc $a, $b, $c, $d
+        ENDIF
+        MEND
+        
+        MACRO
+        LIBI3   $comli, $combi, $a, $b, $c, $cc
+        IF {ENDIAN}="big"
+        $combi.$cc $a, $b, $c
+        ELSE
+        $comli.$cc $a, $b, $c
+        ENDIF
+        MEND
+        
+        ;// SMLAxy macros
+        
+        MACRO
+        SMLA00  $a, $b, $c, $d, $cc
+        LIBI4 SMLABB, SMLATT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLA01  $a, $b, $c, $d, $cc
+        LIBI4 SMLABT, SMLATB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLA0B  $a, $b, $c, $d, $cc
+        LIBI4 SMLABB, SMLATB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLA0T  $a, $b, $c, $d, $cc
+        LIBI4 SMLABT, SMLATT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLA10  $a, $b, $c, $d, $cc
+        LIBI4 SMLATB, SMLABT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLA11  $a, $b, $c, $d, $cc
+        LIBI4 SMLATT, SMLABB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLA1B  $a, $b, $c, $d, $cc
+        LIBI4 SMLATB, SMLABB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLA1T  $a, $b, $c, $d, $cc
+        LIBI4 SMLATT, SMLABT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAB0  $a, $b, $c, $d, $cc
+        LIBI4 SMLABB, SMLABT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAB1  $a, $b, $c, $d, $cc
+        LIBI4 SMLABT, SMLABB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAT0  $a, $b, $c, $d, $cc
+        LIBI4 SMLATB, SMLATT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAT1  $a, $b, $c, $d, $cc
+        LIBI4 SMLATT, SMLATB, $a, $b, $c, $d, $cc
+        MEND
+        
+        ;// SMULxy macros
+        
+        MACRO
+        SMUL00  $a, $b, $c, $cc
+        LIBI3 SMULBB, SMULTT, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMUL01  $a, $b, $c, $cc
+        LIBI3 SMULBT, SMULTB, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMUL0B  $a, $b, $c, $cc
+        LIBI3 SMULBB, SMULTB, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMUL0T  $a, $b, $c, $cc
+        LIBI3 SMULBT, SMULTT, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMUL10  $a, $b, $c, $cc
+        LIBI3 SMULTB, SMULBT, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMUL11  $a, $b, $c, $cc
+        LIBI3 SMULTT, SMULBB, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMUL1B  $a, $b, $c, $cc
+        LIBI3 SMULTB, SMULBB, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMUL1T  $a, $b, $c, $cc
+        LIBI3 SMULTT, SMULBT, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMULB0  $a, $b, $c, $cc
+        LIBI3 SMULBB, SMULBT, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMULB1  $a, $b, $c, $cc
+        LIBI3 SMULBT, SMULBB, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMULT0  $a, $b, $c, $cc
+        LIBI3 SMULTB, SMULTT, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMULT1  $a, $b, $c, $cc
+        LIBI3 SMULTT, SMULTB, $a, $b, $c, $cc
+        MEND
+        
+        ;// SMLAWx, SMULWx macros
+        
+        MACRO
+        SMLAW0  $a, $b, $c, $d, $cc
+        LIBI4 SMLAWB, SMLAWT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAW1  $a, $b, $c, $d, $cc
+        LIBI4 SMLAWT, SMLAWB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMULW0  $a, $b, $c, $cc
+        LIBI3 SMULWB, SMULWT, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMULW1  $a, $b, $c, $cc
+        LIBI3 SMULWT, SMULWB, $a, $b, $c, $cc
+        MEND
+
+        ;// SMLALxy macros
+
+
+        MACRO
+        SMLAL00  $a, $b, $c, $d, $cc
+        LIBI4 SMLALBB, SMLALTT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAL01  $a, $b, $c, $d, $cc
+        LIBI4 SMLALBT, SMLALTB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAL0B  $a, $b, $c, $d, $cc
+        LIBI4 SMLALBB, SMLALTB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAL0T  $a, $b, $c, $d, $cc
+        LIBI4 SMLALBT, SMLALTT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAL10  $a, $b, $c, $d, $cc
+        LIBI4 SMLALTB, SMLALBT, $a, $b, $c, $d, $cc
+        MEND
+
+        MACRO
+        SMLAL11  $a, $b, $c, $d, $cc
+        LIBI4 SMLALTT, SMLALBB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAL1B  $a, $b, $c, $d, $cc
+        LIBI4 SMLALTB, SMLALBB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAL1T  $a, $b, $c, $d, $cc
+        LIBI4 SMLALTT, SMLALBT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLALB0  $a, $b, $c, $d, $cc
+        LIBI4 SMLALBB, SMLALBT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLALB1  $a, $b, $c, $d, $cc
+        LIBI4 SMLALBT, SMLALBB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLALT0  $a, $b, $c, $d, $cc
+        LIBI4 SMLALTB, SMLALTT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLALT1  $a, $b, $c, $d, $cc
+        LIBI4 SMLALTT, SMLALTB, $a, $b, $c, $d, $cc
+        MEND
+        
+  ENDIF ;// ARMCOMM_S_H
+            
+  END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armOMX.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armOMX.h
new file mode 100644
index 0000000..f629f72
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armOMX.h
@@ -0,0 +1,274 @@
+/* 
+ * 
+ * File Name:  armOMX_ReleaseVersion.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * This file allows a version of the OMX DL libraries to be built where some or
+ * all of the function names can be given a user specified suffix. 
+ *
+ * You might want to use it where:
+ *
+ * - you want to rename a function "out of the way" so that you could replace
+ *   a function with a different version (the original version would still be
+ *   in the library just with a different name - so you could debug the new
+ *   version by comparing it to the output of the old)
+ *
+ * - you want to rename all the functions to versions with a suffix so that 
+ *   you can include two versions of the library and choose between functions
+ *   at runtime.
+ *
+ *     e.g. omxIPBM_Copy_U8_C1R could be renamed omxIPBM_Copy_U8_C1R_CortexA8
+ * 
+ */
+
+  
+#ifndef _armOMX_H_
+#define _armOMX_H_
+
+
+/* We need to define these two macros in order to expand and concatenate the names */
+#define OMXCAT2BAR(A, B) omx ## A ## B
+#define OMXCATBAR(A, B) OMXCAT2BAR(A, B)
+
+/* Define the suffix to add to all functions - the default is no suffix */
+#define BARE_SUFFIX 
+
+
+
+/* Define what happens to the bare suffix-less functions, down to the sub-domain accuracy */
+#define OMXACAAC_SUFFIX    BARE_SUFFIX   
+#define OMXACMP3_SUFFIX    BARE_SUFFIX
+#define OMXICJP_SUFFIX     BARE_SUFFIX
+#define OMXIPBM_SUFFIX     BARE_SUFFIX
+#define OMXIPCS_SUFFIX     BARE_SUFFIX
+#define OMXIPPP_SUFFIX     BARE_SUFFIX
+#define OMXSP_SUFFIX       BARE_SUFFIX
+#define OMXVCCOMM_SUFFIX   BARE_SUFFIX
+#define OMXVCM4P10_SUFFIX  BARE_SUFFIX
+#define OMXVCM4P2_SUFFIX   BARE_SUFFIX
+
+
+
+
+/* Define what the each bare, un-suffixed OpenMAX API function names is to be renamed */
+#define omxACAAC_DecodeChanPairElt                        OMXCATBAR(ACAAC_DecodeChanPairElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeDatStrElt                          OMXCATBAR(ACAAC_DecodeDatStrElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeFillElt                            OMXCATBAR(ACAAC_DecodeFillElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeIsStereo_S32                       OMXCATBAR(ACAAC_DecodeIsStereo_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeMsPNS_S32_I                        OMXCATBAR(ACAAC_DecodeMsPNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeMsStereo_S32_I                     OMXCATBAR(ACAAC_DecodeMsStereo_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodePrgCfgElt                          OMXCATBAR(ACAAC_DecodePrgCfgElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeTNS_S32_I                          OMXCATBAR(ACAAC_DecodeTNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DeinterleaveSpectrum_S32                 OMXCATBAR(ACAAC_DeinterleaveSpectrum_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_EncodeTNS_S32_I                          OMXCATBAR(ACAAC_EncodeTNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_LongTermPredict_S32                      OMXCATBAR(ACAAC_LongTermPredict_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_LongTermReconstruct_S32_I                OMXCATBAR(ACAAC_LongTermReconstruct_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_MDCTFwd_S32                              OMXCATBAR(ACAAC_MDCTFwd_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_MDCTInv_S32_S16                          OMXCATBAR(ACAAC_MDCTInv_S32_S16, OMXACAAC_SUFFIX)
+#define omxACAAC_NoiselessDecode                          OMXCATBAR(ACAAC_NoiselessDecode, OMXACAAC_SUFFIX)
+#define omxACAAC_QuantInv_S32_I                           OMXCATBAR(ACAAC_QuantInv_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_UnpackADIFHeader                         OMXCATBAR(ACAAC_UnpackADIFHeader, OMXACAAC_SUFFIX)
+#define omxACAAC_UnpackADTSFrameHeader                    OMXCATBAR(ACAAC_UnpackADTSFrameHeader, OMXACAAC_SUFFIX)
+
+
+#define omxACMP3_HuffmanDecode_S32                        OMXCATBAR(ACMP3_HuffmanDecode_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_HuffmanDecodeSfb_S32                     OMXCATBAR(ACMP3_HuffmanDecodeSfb_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_HuffmanDecodeSfbMbp_S32                  OMXCATBAR(ACMP3_HuffmanDecodeSfbMbp_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_MDCTInv_S32                              OMXCATBAR(ACMP3_MDCTInv_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_ReQuantize_S32_I                         OMXCATBAR(ACMP3_ReQuantize_S32_I, OMXACMP3_SUFFIX)
+#define omxACMP3_ReQuantizeSfb_S32_I                      OMXCATBAR(ACMP3_ReQuantizeSfb_S32_I, OMXACMP3_SUFFIX)
+#define omxACMP3_SynthPQMF_S32_S16                        OMXCATBAR(ACMP3_SynthPQMF_S32_S16, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackFrameHeader                        OMXCATBAR(ACMP3_UnpackFrameHeader, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackScaleFactors_S8                    OMXCATBAR(ACMP3_UnpackScaleFactors_S8, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackSideInfo                           OMXCATBAR(ACMP3_UnpackSideInfo, OMXACMP3_SUFFIX)
+
+#define omxICJP_CopyExpand_U8_C3                          OMXCATBAR(ICJP_CopyExpand_U8_C3, OMXICJP_SUFFIX)
+#define omxICJP_DCTFwd_S16                                OMXCATBAR(ICJP_DCTFwd_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTFwd_S16_I                              OMXCATBAR(ICJP_DCTFwd_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTInv_S16                                OMXCATBAR(ICJP_DCTInv_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTInv_S16_I                              OMXCATBAR(ICJP_DCTInv_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_Multiple_S16                  OMXCATBAR(ICJP_DCTQuantFwd_Multiple_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_S16                           OMXCATBAR(ICJP_DCTQuantFwd_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_S16_I                         OMXCATBAR(ICJP_DCTQuantFwd_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwdTableInit                      OMXCATBAR(ICJP_DCTQuantFwdTableInit, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_Multiple_S16                  OMXCATBAR(ICJP_DCTQuantInv_Multiple_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_S16                           OMXCATBAR(ICJP_DCTQuantInv_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_S16_I                         OMXCATBAR(ICJP_DCTQuantInv_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInvTableInit                      OMXCATBAR(ICJP_DCTQuantInvTableInit, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffman8x8_Direct_S16_C1            OMXCATBAR(ICJP_DecodeHuffman8x8_Direct_S16_C1, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffmanSpecGetBufSize_U8            OMXCATBAR(ICJP_DecodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffmanSpecInit_U8                  OMXCATBAR(ICJP_DecodeHuffmanSpecInit_U8, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffman8x8_Direct_S16_U1_C1         OMXCATBAR(ICJP_EncodeHuffman8x8_Direct_S16_U1_C1, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffmanSpecGetBufSize_U8            OMXCATBAR(ICJP_EncodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffmanSpecInit_U8                  OMXCATBAR(ICJP_EncodeHuffmanSpecInit_U8, OMXICJP_SUFFIX)
+
+#define omxIPBM_AddC_U8_C1R_Sfs                           OMXCATBAR(IPBM_AddC_U8_C1R_Sfs, OMXIPBM_SUFFIX)
+#define omxIPBM_Copy_U8_C1R                               OMXCATBAR(IPBM_Copy_U8_C1R, OMXIPBM_SUFFIX)
+#define omxIPBM_Copy_U8_C3R                               OMXCATBAR(IPBM_Copy_U8_C3R, OMXIPBM_SUFFIX)
+#define omxIPBM_Mirror_U8_C1R                             OMXCATBAR(IPBM_Mirror_U8_C1R, OMXIPBM_SUFFIX)
+#define omxIPBM_MulC_U8_C1R_Sfs                           OMXCATBAR(IPBM_MulC_U8_C1R_Sfs, OMXIPBM_SUFFIX)
+
+#define omxIPCS_ColorTwistQ14_U8_C3R                      OMXCATBAR(IPCS_ColorTwistQ14_U8_C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R      OMXCATBAR(IPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R      OMXCATBAR(IPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R      OMXCATBAR(IPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R       OMXCATBAR(IPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R       OMXCATBAR(IPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R       OMXCATBAR(IPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420RszCscRotBGR_U8_P3C3R             OMXCATBAR(IPCS_YCbCr420RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420RszRot_U8_P3R                     OMXCATBAR(IPCS_YCbCr420RszRot_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR565_U8_U16_P3C3R             OMXCATBAR(IPCS_YCbCr420ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R      OMXCATBAR(IPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R       OMXCATBAR(IPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422RszCscRotBGR_U8_P3C3R             OMXCATBAR(IPCS_YCbCr422RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R          OMXCATBAR(IPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422RszRot_U8_P3R                     OMXCATBAR(IPCS_YCbCr422RszRot_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbYCr422ToBGR565_U8_U16_C2C3R            OMXCATBAR(IPCS_YCbYCr422ToBGR565_U8_U16_C2C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R      OMXCATBAR(IPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbYCr422ToBGR888_U8_C2C3R                OMXCATBAR(IPCS_YCbYCr422ToBGR888_U8_C2C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R       OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R       OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R        OMXCATBAR(IPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToYCbCr420Rotate_U8_P3R           OMXCATBAR(IPCS_YCbCr422ToYCbCr420Rotate_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565_U8_U16_C3R               OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565_U8_U16_P3C3R             OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R      OMXCATBAR(IPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR888_U8_C3R                   OMXCATBAR(IPCS_YCbCr444ToBGR888_U8_C3R, OMXIPCS_SUFFIX)
+
+#define omxIPPP_Deblock_HorEdge_U8_I                      OMXCATBAR(IPPP_Deblock_HorEdge_U8_I, OMXIPPP_SUFFIX)
+#define omxIPPP_Deblock_VerEdge_U8_I                      OMXCATBAR(IPPP_Deblock_VerEdge_U8_I, OMXIPPP_SUFFIX)
+#define omxIPPP_FilterFIR_U8_C1R                          OMXCATBAR(IPPP_FilterFIR_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_FilterMedian_U8_C1R                       OMXCATBAR(IPPP_FilterMedian_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_GetCentralMoment_S64                      OMXCATBAR(IPPP_GetCentralMoment_S64, OMXIPPP_SUFFIX)
+#define omxIPPP_GetSpatialMoment_S64                      OMXCATBAR(IPPP_GetSpatialMoment_S64, OMXIPPP_SUFFIX)
+#define omxIPPP_MomentGetStateSize                        OMXCATBAR(IPPP_MomentGetStateSize, OMXIPPP_SUFFIX)
+#define omxIPPP_MomentInit                                OMXCATBAR(IPPP_MomentInit, OMXIPPP_SUFFIX)
+#define omxIPPP_Moments_U8_C1R                            OMXCATBAR(IPPP_Moments_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_Moments_U8_C3R                            OMXCATBAR(IPPP_Moments_U8_C3R, OMXIPPP_SUFFIX)
+
+#define omxSP_BlockExp_S16                                OMXCATBAR(SP_BlockExp_S16, OMXSP_SUFFIX)
+#define omxSP_BlockExp_S32                                OMXCATBAR(SP_BlockExp_S32, OMXSP_SUFFIX)
+#define omxSP_Copy_S16                                    OMXCATBAR(SP_Copy_S16, OMXSP_SUFFIX)
+#define omxSP_DotProd_S16                                 OMXCATBAR(SP_DotProd_S16, OMXSP_SUFFIX)
+#define omxSP_DotProd_S16_Sfs                             OMXCATBAR(SP_DotProd_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_CToC_SC16_Sfs                        OMXCATBAR(SP_FFTFwd_CToC_SC16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_CToC_SC32_Sfs                        OMXCATBAR(SP_FFTFwd_CToC_SC32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_RToCCS_S16S32_Sfs                    OMXCATBAR(SP_FFTFwd_RToCCS_S16S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_RToCCS_S32_Sfs                       OMXCATBAR(SP_FFTFwd_RToCCS_S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_C_SC16                        OMXCATBAR(SP_FFTGetBufSize_C_SC16, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_C_SC32                        OMXCATBAR(SP_FFTGetBufSize_C_SC32, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_R_S16S32                      OMXCATBAR(SP_FFTGetBufSize_R_S16S32, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_R_S32                         OMXCATBAR(SP_FFTGetBufSize_R_S32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_C_SC16                              OMXCATBAR(SP_FFTInit_C_SC16, OMXSP_SUFFIX)
+#define omxSP_FFTInit_C_SC32                              OMXCATBAR(SP_FFTInit_C_SC32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_R_S16S32                            OMXCATBAR(SP_FFTInit_R_S16S32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_R_S32                               OMXCATBAR(SP_FFTInit_R_S32, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CCSToR_S32_Sfs                       OMXCATBAR(SP_FFTInv_CCSToR_S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CCSToR_S32S16_Sfs                    OMXCATBAR(SP_FFTInv_CCSToR_S32S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CToC_SC16_Sfs                        OMXCATBAR(SP_FFTInv_CToC_SC16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CToC_SC32_Sfs                        OMXCATBAR(SP_FFTInv_CToC_SC32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FilterMedian_S32                            OMXCATBAR(SP_FilterMedian_S32, OMXSP_SUFFIX)
+#define omxSP_FilterMedian_S32_I                          OMXCATBAR(SP_FilterMedian_S32_I, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16                              OMXCATBAR(SP_FIR_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_I                            OMXCATBAR(SP_FIR_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_ISfs                         OMXCATBAR(SP_FIR_Direct_S16_ISfs, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_Sfs                          OMXCATBAR(SP_FIR_Direct_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16                           OMXCATBAR(SP_FIROne_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_I                         OMXCATBAR(SP_FIROne_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_ISfs                      OMXCATBAR(SP_FIROne_Direct_S16_ISfs, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_Sfs                       OMXCATBAR(SP_FIROne_Direct_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_IIR_BiQuadDirect_S16                        OMXCATBAR(SP_IIR_BiQuadDirect_S16, OMXSP_SUFFIX)
+#define omxSP_IIR_BiQuadDirect_S16_I                      OMXCATBAR(SP_IIR_BiQuadDirect_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIR_Direct_S16                              OMXCATBAR(SP_IIR_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_IIR_Direct_S16_I                            OMXCATBAR(SP_IIR_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIROne_BiQuadDirect_S16                     OMXCATBAR(SP_IIROne_BiQuadDirect_S16, OMXSP_SUFFIX)
+#define omxSP_IIROne_BiQuadDirect_S16_I                   OMXCATBAR(SP_IIROne_BiQuadDirect_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIROne_Direct_S16                           OMXCATBAR(SP_IIROne_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_IIROne_Direct_S16_I                         OMXCATBAR(SP_IIROne_Direct_S16_I, OMXSP_SUFFIX)
+
+#define omxVCCOMM_Average_16x                             OMXCATBAR(VCCOMM_Average_16x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Average_8x                              OMXCATBAR(VCCOMM_Average_8x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ComputeTextureErrorBlock                OMXCATBAR(VCCOMM_ComputeTextureErrorBlock, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ComputeTextureErrorBlock_SAD            OMXCATBAR(VCCOMM_ComputeTextureErrorBlock_SAD, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Copy16x16                               OMXCATBAR(VCCOMM_Copy16x16, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Copy8x8                                 OMXCATBAR(VCCOMM_Copy8x8, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ExpandFrame_I                           OMXCATBAR(VCCOMM_ExpandFrame_I, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_LimitMVToRect                           OMXCATBAR(VCCOMM_LimitMVToRect, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_SAD_16x                                 OMXCATBAR(VCCOMM_SAD_16x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_SAD_8x                                  OMXCATBAR(VCCOMM_SAD_8x, OMXVCCOMM_SUFFIX)
+
+#define omxVCM4P10_Average_4x                             OMXCATBAR(VCM4P10_Average_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Half                        OMXCATBAR(VCM4P10_BlockMatch_Half, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Integer                     OMXCATBAR(VCM4P10_BlockMatch_Integer, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Quarter                     OMXCATBAR(VCM4P10_BlockMatch_Quarter, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DeblockChroma_I                        OMXCATBAR(VCM4P10_DeblockChroma_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DeblockLuma_I                          OMXCATBAR(VCM4P10_DeblockLuma_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC        OMXCATBAR(VCM4P10_DecodeChromaDcCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DecodeCoeffsToPairCAVLC                OMXCATBAR(VCM4P10_DecodeCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DequantTransformResidualFromPairAndAdd OMXCATBAR(VCM4P10_DequantTransformResidualFromPairAndAdd, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingChroma_HorEdge_I       OMXCATBAR(VCM4P10_FilterDeblockingChroma_HorEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingChroma_VerEdge_I       OMXCATBAR(VCM4P10_FilterDeblockingChroma_VerEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingLuma_HorEdge_I         OMXCATBAR(VCM4P10_FilterDeblockingLuma_HorEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingLuma_VerEdge_I         OMXCATBAR(VCM4P10_FilterDeblockingLuma_VerEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_GetVLCInfo                             OMXCATBAR(VCM4P10_GetVLCInfo, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateChroma                      OMXCATBAR(VCM4P10_InterpolateChroma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateHalfHor_Luma                OMXCATBAR(VCM4P10_InterpolateHalfHor_Luma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateHalfVer_Luma                OMXCATBAR(VCM4P10_InterpolateHalfVer_Luma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateLuma                        OMXCATBAR(VCM4P10_InterpolateLuma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformDequant_ChromaDC           OMXCATBAR(VCM4P10_InvTransformDequant_ChromaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformDequant_LumaDC             OMXCATBAR(VCM4P10_InvTransformDequant_LumaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformResidualAndAdd             OMXCATBAR(VCM4P10_InvTransformResidualAndAdd, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MEGetBufSize                           OMXCATBAR(VCM4P10_MEGetBufSize, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MEInit                                 OMXCATBAR(VCM4P10_MEInit, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MotionEstimationMB                     OMXCATBAR(VCM4P10_MotionEstimationMB, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntra_16x16                     OMXCATBAR(VCM4P10_PredictIntra_16x16, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntra_4x4                       OMXCATBAR(VCM4P10_PredictIntra_4x4, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntraChroma_8x8                  OMXCATBAR(VCM4P10_PredictIntraChroma_8x8, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SAD_4x                                 OMXCATBAR(VCM4P10_SAD_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_16x                            OMXCATBAR(VCM4P10_SADQuar_16x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_4x                             OMXCATBAR(VCM4P10_SADQuar_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_8x                             OMXCATBAR(VCM4P10_SADQuar_8x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SATD_4x4                               OMXCATBAR(VCM4P10_SATD_4x4, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SubAndTransformQDQResidual             OMXCATBAR(VCM4P10_SubAndTransformQDQResidual, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformDequantChromaDCFromPair       OMXCATBAR(VCM4P10_TransformDequantChromaDCFromPair, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformDequantLumaDCFromPair         OMXCATBAR(VCM4P10_TransformDequantLumaDCFromPair, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformQuant_ChromaDC                OMXCATBAR(VCM4P10_TransformQuant_ChromaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformQuant_LumaDC                  OMXCATBAR(VCM4P10_TransformQuant_LumaDC, OMXVCM4P10_SUFFIX)
+
+#define omxVCM4P2_BlockMatch_Half_16x16                   OMXCATBAR(VCM4P2_BlockMatch_Half_16x16, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Half_8x8                     OMXCATBAR(VCM4P2_BlockMatch_Half_8x8, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Integer_16x16                OMXCATBAR(VCM4P2_BlockMatch_Integer_16x16, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Integer_8x8                  OMXCATBAR(VCM4P2_BlockMatch_Integer_8x8, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DCT8x8blk                               OMXCATBAR(VCM4P2_DCT8x8blk, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeBlockCoef_Inter                   OMXCATBAR(VCM4P2_DecodeBlockCoef_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeBlockCoef_Intra                   OMXCATBAR(VCM4P2_DecodeBlockCoef_Intra, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodePadMV_PVOP                        OMXCATBAR(VCM4P2_DecodePadMV_PVOP, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_Inter                   OMXCATBAR(VCM4P2_DecodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_IntraACVLC              OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_IntraDCVLC              OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeMV                                OMXCATBAR(VCM4P2_EncodeMV, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_Inter                   OMXCATBAR(VCM4P2_EncodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_IntraACVLC              OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_IntraDCVLC              OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_FindMVpred                              OMXCATBAR(VCM4P2_FindMVpred, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_IDCT8x8blk                              OMXCATBAR(VCM4P2_IDCT8x8blk, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MCReconBlock                            OMXCATBAR(VCM4P2_MCReconBlock, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MEGetBufSize                            OMXCATBAR(VCM4P2_MEGetBufSize, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MEInit                                  OMXCATBAR(VCM4P2_MEInit, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MotionEstimationMB                      OMXCATBAR(VCM4P2_MotionEstimationMB, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_PredictReconCoefIntra                   OMXCATBAR(VCM4P2_PredictReconCoefIntra, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInter_I                            OMXCATBAR(VCM4P2_QuantInter_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantIntra_I                            OMXCATBAR(VCM4P2_QuantIntra_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInvInter_I                         OMXCATBAR(VCM4P2_QuantInvInter_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInvIntra_I                         OMXCATBAR(VCM4P2_QuantInvIntra_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_TransRecBlockCoef_inter                 OMXCATBAR(VCM4P2_TransRecBlockCoef_inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_TransRecBlockCoef_intra                 OMXCATBAR(VCM4P2_TransRecBlockCoef_intra, OMXVCM4P2_SUFFIX)
+
+
+#endif /* _armOMX_h_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes.h
new file mode 100644
index 0000000..8b295a6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes.h
@@ -0,0 +1,252 @@
+/**
+ * File: omxtypes.h
+ * Brief: Defines basic Data types used in OpenMAX v1.0.2 header files.
+ *
+ * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved. 
+ *
+ * These materials are protected by copyright laws and contain material 
+ * proprietary to the Khronos Group, Inc.  You may use these materials 
+ * for implementing Khronos specifications, without altering or removing 
+ * any trademark, copyright or other notice from the specification.
+ * 
+ * Khronos Group makes no, and expressly disclaims any, representations 
+ * or warranties, express or implied, regarding these materials, including, 
+ * without limitation, any implied warranties of merchantability or fitness 
+ * for a particular purpose or non-infringement of any intellectual property. 
+ * Khronos Group makes no, and expressly disclaims any, warranties, express 
+ * or implied, regarding the correctness, accuracy, completeness, timeliness, 
+ * and reliability of these materials. 
+ *
+ * Under no circumstances will the Khronos Group, or any of its Promoters, 
+ * Contributors or Members or their respective partners, officers, directors, 
+ * employees, agents or representatives be liable for any damages, whether 
+ * direct, indirect, special or consequential damages for lost revenues, 
+ * lost profits, or otherwise, arising from or in connection with these 
+ * materials.
+ * 
+ * Khronos and OpenMAX are trademarks of the Khronos Group Inc. 
+ *
+ */
+  
+#ifndef _OMXTYPES_H_
+#define _OMXTYPES_H_
+
+#include <limits.h> 
+
+#define OMX_IN
+#define OMX_OUT
+#define OMX_INOUT
+
+
+typedef enum {
+    
+    /* Mandatory return codes - use cases are explicitly described for each function */
+    OMX_Sts_NoErr                    =  0,    /* No error, the function completed successfully */
+    OMX_Sts_Err                      = -2,    /* Unknown/unspecified error */    
+    OMX_Sts_InvalidBitstreamValErr   = -182,  /* Invalid value detected during bitstream processing */    
+    OMX_Sts_MemAllocErr              = -9,    /* Not enough memory allocated for the operation */
+    OMX_StsACAAC_GainCtrErr    	     = -159,  /* AAC: Unsupported gain control data detected */
+    OMX_StsACAAC_PrgNumErr           = -167,  /* AAC: Invalid number of elements for one program   */
+    OMX_StsACAAC_CoefValErr          = -163,  /* AAC: Invalid quantized coefficient value          */     
+    OMX_StsACAAC_MaxSfbErr           = -162,  /* AAC: Invalid maxSfb value in relation to numSwb */    
+	OMX_StsACAAC_PlsDataErr		     = -160,  /* AAC: pulse escape sequence data error */
+
+    /* Optional return codes - use cases are explicitly described for each function*/
+    OMX_Sts_BadArgErr                = -5,    /* Bad Arguments */
+
+    OMX_StsACAAC_TnsNumFiltErr       = -157,  /* AAC: Invalid number of TNS filters  */
+    OMX_StsACAAC_TnsLenErr           = -156,  /* AAC: Invalid TNS region length  */   
+    OMX_StsACAAC_TnsOrderErr         = -155,  /* AAC: Invalid order of TNS filter  */                  
+    OMX_StsACAAC_TnsCoefResErr       = -154,  /* AAC: Invalid bit-resolution for TNS filter coefficients  */
+    OMX_StsACAAC_TnsCoefErr          = -153,  /* AAC: Invalid TNS filter coefficients  */                  
+    OMX_StsACAAC_TnsDirectErr        = -152,  /* AAC: Invalid TNS filter direction  */  
+
+    OMX_StsICJP_JPEGMarkerErr        = -183,  /* JPEG marker encountered within an entropy-coded block; */
+                                              /* Huffman decoding operation terminated early.           */
+    OMX_StsICJP_JPEGMarker           = -181,  /* JPEG marker encountered; Huffman decoding */
+                                              /* operation terminated early.                         */
+    OMX_StsIPPP_ContextMatchErr      = -17,   /* Context parameter doesn't match to the operation */
+
+    OMX_StsSP_EvenMedianMaskSizeErr  = -180,  /* Even size of the Median Filter mask was replaced by the odd one */
+
+    OMX_Sts_MaximumEnumeration       = INT_MAX  /*Placeholder, forces enum of size OMX_INT*/
+    
+ } OMXResult;          /** Return value or error value returned from a function. Identical to OMX_INT */
+
+ 
+/* OMX_U8 */
+#if UCHAR_MAX == 0xff
+typedef unsigned char OMX_U8;
+#elif USHRT_MAX == 0xff 
+typedef unsigned short int OMX_U8; 
+#else
+#error OMX_U8 undefined
+#endif 
+
+ 
+/* OMX_S8 */
+#if SCHAR_MAX == 0x7f 
+typedef signed char OMX_S8;
+#elif SHRT_MAX == 0x7f 
+typedef signed short int OMX_S8; 
+#else
+#error OMX_S8 undefined
+#endif
+ 
+ 
+/* OMX_U16 */
+#if USHRT_MAX == 0xffff
+typedef unsigned short int OMX_U16;
+#elif UINT_MAX == 0xffff
+typedef unsigned int OMX_U16; 
+#else
+#error OMX_U16 undefined
+#endif
+
+
+/* OMX_S16 */
+#if SHRT_MAX == 0x7fff 
+typedef signed short int OMX_S16;
+#elif INT_MAX == 0x7fff 
+typedef signed int OMX_S16; 
+#else
+#error OMX_S16 undefined
+#endif
+
+
+/* OMX_U32 */
+#if UINT_MAX == 0xffffffff
+typedef unsigned int OMX_U32;
+#elif LONG_MAX == 0xffffffff
+typedef unsigned long int OMX_U32; 
+#else
+#error OMX_U32 undefined
+#endif
+
+
+/* OMX_S32 */
+#if INT_MAX == 0x7fffffff
+typedef signed int OMX_S32;
+#elif LONG_MAX == 0x7fffffff
+typedef long signed int OMX_S32; 
+#else
+#error OMX_S32 undefined
+#endif
+
+
+/* OMX_U64 & OMX_S64 */
+#if defined( _WIN32 ) || defined ( _WIN64 )
+    typedef __int64 OMX_S64; /** Signed 64-bit integer */
+    typedef unsigned __int64 OMX_U64; /** Unsigned 64-bit integer */
+    #define OMX_MIN_S64			(0x8000000000000000i64)
+    #define OMX_MIN_U64			(0x0000000000000000i64)
+    #define OMX_MAX_S64			(0x7FFFFFFFFFFFFFFFi64)
+    #define OMX_MAX_U64			(0xFFFFFFFFFFFFFFFFi64)
+#else
+    typedef long long OMX_S64; /** Signed 64-bit integer */
+    typedef unsigned long long OMX_U64; /** Unsigned 64-bit integer */
+    #define OMX_MIN_S64			(0x8000000000000000LL)
+    #define OMX_MIN_U64			(0x0000000000000000LL)
+    #define OMX_MAX_S64			(0x7FFFFFFFFFFFFFFFLL)
+    #define OMX_MAX_U64			(0xFFFFFFFFFFFFFFFFLL)
+#endif
+
+
+/* OMX_SC8 */
+typedef struct
+{
+  OMX_S8 Re; /** Real part */
+  OMX_S8 Im; /** Imaginary part */	
+	
+} OMX_SC8; /** Signed 8-bit complex number */
+
+
+/* OMX_SC16 */
+typedef struct
+{
+  OMX_S16 Re; /** Real part */
+  OMX_S16 Im; /** Imaginary part */	
+	
+} OMX_SC16; /** Signed 16-bit complex number */
+
+
+/* OMX_SC32 */
+typedef struct
+{
+  OMX_S32 Re; /** Real part */
+  OMX_S32 Im; /** Imaginary part */	
+	
+} OMX_SC32; /** Signed 32-bit complex number */
+
+
+/* OMX_SC64 */
+typedef struct
+{
+  OMX_S64 Re; /** Real part */
+  OMX_S64 Im; /** Imaginary part */	
+	
+} OMX_SC64; /** Signed 64-bit complex number */
+
+
+/* OMX_F32 */
+typedef float OMX_F32; /** Single precision floating point,IEEE 754 */
+
+
+/* OMX_F64 */
+typedef double OMX_F64; /** Double precision floating point,IEEE 754 */
+
+
+/* OMX_INT */
+typedef int OMX_INT; /** signed integer corresponding to machine word length, has maximum signed value INT_MAX*/
+
+
+#define OMX_MIN_S8  	   	(-128)
+#define OMX_MIN_U8  		0
+#define OMX_MIN_S16		 	(-32768)
+#define OMX_MIN_U16			0
+#define OMX_MIN_S32			(-2147483647-1)
+#define OMX_MIN_U32			0
+
+#define OMX_MAX_S8			(127)
+#define OMX_MAX_U8			(255)
+#define OMX_MAX_S16			(32767)
+#define OMX_MAX_U16			(0xFFFF)
+#define OMX_MAX_S32			(2147483647)
+#define OMX_MAX_U32			(0xFFFFFFFF)
+
+typedef void OMXVoid;
+
+#ifndef NULL
+#define NULL ((void*)0)
+#endif
+
+/** Defines the geometric position and size of a rectangle, 
+  * where x,y defines the coordinates of the top left corner
+  * of the rectangle, with dimensions width in the x-direction 
+  * and height in the y-direction */
+typedef struct {
+	OMX_INT x;      /** x-coordinate of top left corner of rectangle */
+	OMX_INT y;      /** y-coordinate of top left corner of rectangle */
+	OMX_INT width;  /** Width in the x-direction. */
+	OMX_INT height; /** Height in the y-direction. */
+}OMXRect;
+
+
+/** Defines the geometric position of a point, */
+typedef struct 
+{
+ OMX_INT x; /** x-coordinate */
+ OMX_INT y;	/** y-coordinate */
+	
+} OMXPoint;
+
+
+/** Defines the dimensions of a rectangle, or region of interest in an image */
+typedef struct 
+{
+ OMX_INT width;  /** Width of the rectangle, in the x-direction */
+ OMX_INT height; /** Height of the rectangle, in the y-direction */
+	
+} OMXSize;
+
+#endif /* _OMXTYPES_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes_s.h
new file mode 100644
index 0000000..8d24b65
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes_s.h
@@ -0,0 +1,77 @@
+;//
+;// 
+;// File Name:  omxtypes_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+;// Mandatory return codes - use cases are explicitly described for each function 
+OMX_Sts_NoErr                    EQU  0    ;// No error the function completed successfully 
+OMX_Sts_Err                      EQU -2    ;// Unknown/unspecified error     
+OMX_Sts_InvalidBitstreamValErr   EQU -182  ;// Invalid value detected during bitstream processing     
+OMX_Sts_MemAllocErr              EQU -9    ;// Not enough memory allocated for the operation 
+OMX_StsACAAC_GainCtrErr    	     EQU -159  ;// AAC: Unsupported gain control data detected 
+OMX_StsACAAC_PrgNumErr           EQU -167  ;// AAC: Invalid number of elements for one program   
+OMX_StsACAAC_CoefValErr          EQU -163  ;// AAC: Invalid quantized coefficient value               
+OMX_StsACAAC_MaxSfbErr           EQU -162  ;// AAC: Invalid maxSfb value in relation to numSwb     
+OMX_StsACAAC_PlsDataErr		     EQU -160  ;// AAC: pulse escape sequence data error 
+
+;// Optional return codes - use cases are explicitly described for each function
+OMX_Sts_BadArgErr                EQU -5    ;// Bad Arguments 
+
+OMX_StsACAAC_TnsNumFiltErr       EQU -157  ;// AAC: Invalid number of TNS filters  
+OMX_StsACAAC_TnsLenErr           EQU -156  ;// AAC: Invalid TNS region length     
+OMX_StsACAAC_TnsOrderErr         EQU -155  ;// AAC: Invalid order of TNS filter                    
+OMX_StsACAAC_TnsCoefResErr       EQU -154  ;// AAC: Invalid bit-resolution for TNS filter coefficients  
+OMX_StsACAAC_TnsCoefErr          EQU -153  ;// AAC: Invalid TNS filter coefficients                    
+OMX_StsACAAC_TnsDirectErr        EQU -152  ;// AAC: Invalid TNS filter direction    
+
+OMX_StsICJP_JPEGMarkerErr        EQU -183  ;// JPEG marker encountered within an entropy-coded block; 
+                                            ;// Huffman decoding operation terminated early.           
+OMX_StsICJP_JPEGMarker           EQU -181  ;// JPEG marker encountered; Huffman decoding 
+                                            ;// operation terminated early.                         
+OMX_StsIPPP_ContextMatchErr      EQU -17   ;// Context parameter doesn't match to the operation 
+
+OMX_StsSP_EvenMedianMaskSizeErr  EQU -180  ;// Even size of the Median Filter mask was replaced by the odd one 
+
+OMX_Sts_MaximumEnumeration       EQU 0x7FFFFFFF
+
+
+
+OMX_MIN_S8      EQU 	   	(-128)
+OMX_MIN_U8  	EQU     	0
+OMX_MIN_S16		EQU      	(-32768)
+OMX_MIN_U16		EQU	        0
+
+
+OMX_MIN_S32		EQU	(-2147483647-1)
+OMX_MIN_U32		EQU	0
+
+OMX_MAX_S8		EQU	(127)
+OMX_MAX_U8		EQU	(255)
+OMX_MAX_S16		EQU	(32767)
+OMX_MAX_U16		EQU	(0xFFFF)
+OMX_MAX_S32		EQU	(2147483647)
+OMX_MAX_U32		EQU	(0xFFFFFFFF)
+
+OMX_VC_UPPER    EQU 0x1                 ;// Used by the PredictIntra functions   
+OMX_VC_LEFT     EQU 0x2                 ;// Used by the PredictIntra functions 
+OMX_VC_UPPER_RIGHT    EQU 0x40          ;// Used by the PredictIntra functions   
+
+NULL    EQU 0
+
+;// Structures
+
+    INCLUDE     armCOMM_s.h
+
+    M_STRUCT    OMXPoint
+    M_FIELD     x, 4
+    M_FIELD     y, 4
+    M_ENDSTRUCT
+
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/build_vc.pl b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/build_vc.pl
new file mode 100755
index 0000000..1ae7005
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/build_vc.pl
@@ -0,0 +1,111 @@
+#!/usr/bin/perl
+#
+# 
+# File Name:  build_vc.pl
+# OpenMAX DL: v1.0.2
+# Revision:   9641
+# Date:       Thursday, February 7, 2008
+# 
+# (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+# 
+# 
+#
+# This file builds the OpenMAX DL vc domain library omxVC.o.
+#
+
+use File::Spec;
+use strict;
+
+my ($CC, $CC_OPTS, $AS, $AS_OPTS, $LIB, $LIB_OPTS, $LIB_TYPE);
+
+$CC       = 'armcc';
+$CC_OPTS  = '--no_unaligned_access --cpu ARM1136J-S -c';
+$AS       = 'armasm';
+$AS_OPTS  = '--no_unaligned_access --cpu ARM1136J-S';
+# $LIB      = 'armlink';
+# $LIB_OPTS = '--partial -o';
+# $LIB_TYPE = '.o';
+$LIB      = 'armar';
+$LIB_OPTS = '--create -r';
+$LIB_TYPE = '.a';
+
+#------------------------
+
+my (@headerlist, @filelist, $hd, $file, $ofile, $command, $objlist, $libfile, $h);
+
+# Define the list of directories containing included header files.
+@headerlist = qw(api vc/api vc/m4p2/api vc/m4p10/api);
+
+# Define the list of source files to compile.
+open(FILES, '<filelist_vc.txt') or die("Can't open source file list\n");
+@filelist = <FILES>;
+close(FILES);
+
+# Fix the file separators in the header paths
+foreach $h (@headerlist)
+{
+        $h = File::Spec->canonpath($h);
+}
+
+# Create the include path to be passed to the compiler
+$hd = '-I' . join(' -I', @headerlist);
+
+# Create the build directories "/lib/" and "/obj/" (if they are not there already)
+mkdir "obj", 0777 if (! -d "obj");
+mkdir "lib", 0777 if (! -d "lib");
+
+$objlist = '';
+
+# Compile each file
+foreach $file (@filelist)
+{
+	my $f;
+	my $base;
+	my $ext;
+	my $objfile;
+
+	chomp($file);
+	$file = File::Spec->canonpath($file);
+
+	(undef, undef, $f) = File::Spec->splitpath($file);
+	if(($base, $ext) = $f =~ /(.+)\.(\w)$/)
+	{
+		$objfile = File::Spec->catfile('obj', $base.'.o');
+
+		if($ext eq 'c')
+		{
+			$objlist .= "$objfile ";
+			$command = $CC.' '.$CC_OPTS.' '.$hd.' -o '.$objfile.' '.$file;
+			print "$command\n";
+			system($command);
+		}
+		elsif($ext eq 's')
+		{
+			$objlist .= "$objfile ";
+			$command = $AS.' '.$AS_OPTS.' '.$hd.' -o '.$objfile.' '.$file;
+			print "$command\n";
+			system($command);
+		}
+		else
+		{
+			print "Ignoring file: $f\n";
+		}
+	}
+	else
+	{
+		die "No file extension found: $f\n";
+	}
+}
+
+# Do the final link stage to create the libraries.
+$libfile = File::Spec->catfile('lib', 'omxVC'.$LIB_TYPE);
+$command = $LIB.' '.$LIB_OPTS.' '.$libfile.' '.$objlist;
+print "$command\n";
+(system($command) == 0) and print "Build successful\n";
+
+
+
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/filelist_vc.txt b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/filelist_vc.txt
new file mode 100644
index 0000000..0f1623f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/filelist_vc.txt
@@ -0,0 +1,74 @@
+./api/armCOMM.h
+./api/armCOMM_BitDec_s.h
+./api/armCOMM_Bitstream.h
+./api/armCOMM_IDCT_s.h
+./api/armCOMM_IDCTTable.h
+./api/armCOMM_MaskTable.h
+./api/armCOMM_s.h
+./api/armCOMM_Version.h
+./api/armOMX_ReleaseVersion.h
+./api/omxtypes.h
+./api/omxtypes_s.h
+./src/armCOMM_IDCTTable.c
+./src/armCOMM_MaskTable.c
+./vc/api/armVC.h
+./vc/api/armVCCOMM_s.h
+./vc/api/omxVC.h
+./vc/api/omxVC_s.h
+./vc/comm/src/omxVCCOMM_Copy16x16_s.s
+./vc/comm/src/omxVCCOMM_Copy8x8_s.s
+./vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
+./vc/m4p10/api/armVCM4P10_CAVLCTables.h
+./vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_CAVLCTables.c
+./vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
+./vc/m4p10/src/armVCM4P10_DequantTables_s.s
+./vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_QuantTables_s.s
+./vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
+./vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
+./vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
+./vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
+./vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+./vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
+./vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
+./vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
+./vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
+./vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
+./vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
+./vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
+./vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
+./vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
+./vc/m4p2/src/armVCM4P2_Clip8_s.s
+./vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
+./vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
+./vc/m4p2/src/armVCM4P2_Lookup_Tables.c
+./vc/m4p2/src/armVCM4P2_SetPredDir_s.s
+./vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
+./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
+./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
+./vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
+./vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
+./vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
+./vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
+./vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
+./vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
+./vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
+./vc/src/armVC_Version.c
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM.c
new file mode 100644
index 0000000..e572a89
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM.c
@@ -0,0 +1,936 @@
+/**
+ * 
+ * File Name:  armCOMM.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Defines Common APIs used across OpenMAX API's
+ */
+
+#include "omxtypes.h"
+#include "armCOMM.h"
+
+/***********************************************************************/
+                /* Miscellaneous Arithmetic operations */
+
+/**
+ * Function: armRoundFloatToS16
+ *
+ * Description:
+ * Converts a double precision value into a short int after rounding
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16 format
+ *
+ */
+
+OMX_S16 armRoundFloatToS16 (OMX_F64 Value)
+{
+    if (Value > 0)
+    {
+        return (OMX_S16)(Value + .5);
+    }
+    else
+    {
+        return (OMX_S16)(Value - .5);
+    }
+}
+
+/**
+ * Function: armRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a int after rounding
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S32 format
+ *
+ */
+
+OMX_S32 armRoundFloatToS32 (OMX_F64 Value)
+{
+    if (Value > 0)
+    {
+        return (OMX_S32)(Value + .5);
+    }
+    else
+    {
+        return (OMX_S32)(Value - .5);
+    }
+}
+/**
+ * Function: armSatRoundFloatToS16
+ *
+ * Description:
+ * Converts a double precision value into a short int after rounding and saturation
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16 format
+ *
+ */
+
+OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value)
+{
+    if (Value > 0)
+    {
+        Value += 0.5;
+        
+        if(Value > (OMX_S16)OMX_MAX_S16 )
+        {
+            return (OMX_S16)OMX_MAX_S16;
+        }
+        else
+        {
+            return (OMX_S16)Value;
+        }
+    }
+    else
+    {
+        Value -= 0.5;
+
+        if(Value < (OMX_S16)OMX_MIN_S16 )
+        {
+            return (OMX_S16)OMX_MIN_S16;
+        }
+        else
+        {
+            return (OMX_S16)Value;
+        }
+    }
+}
+
+/**
+ * Function: armSatRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a int after rounding and saturation
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S32 format
+ *
+ */
+
+OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value)
+{
+    if (Value > 0)
+    {
+        Value += 0.5;
+        
+        if(Value > (OMX_S32)OMX_MAX_S32 )
+        {
+            return (OMX_S32)OMX_MAX_S32;
+        }
+        else
+        {
+            return (OMX_S32)Value;
+        }
+    }
+    else
+    {
+        Value -= 0.5;
+
+        if(Value < (OMX_S32)OMX_MIN_S32 )
+        {
+            return (OMX_S32)OMX_MIN_S32;
+        }
+        else
+        {
+            return (OMX_S32)Value;
+        }
+    }
+}
+
+/**
+ * Function: armSatRoundFloatToU16
+ *
+ * Description:
+ * Converts a double precision value into a unsigned short int after rounding and saturation
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U16 format
+ *
+ */
+
+OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value)
+{
+    Value += 0.5;
+    
+    if(Value > (OMX_U16)OMX_MAX_U16 )
+    {
+        return (OMX_U16)OMX_MAX_U16;
+    }
+    else
+    {
+        return (OMX_U16)Value;
+    }
+}
+
+/**
+ * Function: armSatRoundFloatToU32
+ *
+ * Description:
+ * Converts a double precision value into a unsigned int after rounding and saturation
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U32 format
+ *
+ */
+
+OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value)
+{
+    Value += 0.5;
+    
+    if(Value > (OMX_U32)OMX_MAX_U32 )
+    {
+        return (OMX_U32)OMX_MAX_U32;
+    }
+    else
+    {
+        return (OMX_U32)Value;
+    }
+}
+
+/**
+ * Function: armRoundFloatToS64
+ *
+ * Description:
+ * Converts a double precision value into a 64 bit int after rounding
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S64 format
+ *
+ */
+
+OMX_S64 armRoundFloatToS64 (OMX_F64 Value)
+{
+    if (Value > 0)
+    {
+        return (OMX_S64)(Value + .5);
+    }
+    else
+    {
+        return (OMX_S64)(Value - .5);
+    }
+}
+
+/**
+ * Function: armSignCheck
+ *
+ * Description:
+ * Checks the sign of a variable:
+ * returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	    var     Variable to be checked
+ *
+ * Return Value:
+ * OMX_INT --   returns 1 if it is Positive
+ *              returns 0 if it is 0
+ *              returns -1 if it is Negative 
+ */ 
+
+OMX_INT armSignCheck (
+    OMX_S16 var
+)
+
+{
+    OMX_INT Sign;
+    
+    if (var < 0)
+    {
+        Sign = -1;
+    }
+    else if ( var > 0)
+    {
+        Sign = 1;
+    }
+    else
+    {
+        Sign = 0;
+    }
+    
+    return Sign;
+}
+
+/**
+ * Function: armClip
+ *
+ * Description: Clips the input between MAX and MIN value
+ * 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min     lower bound
+ * [in] Max     upper bound
+ * [in] src     variable to the clipped
+ *
+ * Return Value:
+ * OMX_S32 --   returns clipped value
+ */ 
+ 
+OMX_S32 armClip (
+    OMX_INT min,
+    OMX_INT max, 
+    OMX_S32 src 
+)
+ 
+{
+    if (src > max)
+    {
+        src = max;
+    }
+    else if (src < min)
+    {
+        src = min;
+    }
+    
+    return src;
+}
+
+/**
+ * Function: armClip_F32
+ *
+ * Description: Clips the input between MAX and MIN value
+ * 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min     lower bound
+ * [in] Max     upper bound
+ * [in] src     variable to the clipped
+ *
+ * Return Value:
+ * OMX_F32 --   returns clipped value
+ */ 
+ 
+OMX_F32 armClip_F32 (
+    OMX_F32 min,
+    OMX_F32 max, 
+    OMX_F32 src 
+)
+ 
+{
+    if (src > max)
+    {
+        src = max;
+    }
+    else if (src < min)
+    {
+        src = min;
+    }
+    
+    return src;
+}
+
+/**
+ * Function: armShiftSat_F32
+ *
+ * Description: Divides a float value by 2^shift and 
+ * saturates it for unsigned value range for satBits.
+ * Second parameter is like "shifting" the corresponding 
+ * integer value. Takes care of rounding while clipping the final 
+ * value.
+ *
+ * Parameters:
+ * [in] v          Number to be operated upon
+ * [in] shift      Divides the input "v" by "2^shift"
+ * [in] satBits    Final range is [0, 2^satBits)
+ *
+ * Return Value:
+ * OMX_S32 --   returns "shifted" saturated value
+ */ 
+ 
+OMX_U32 armShiftSat_F32(OMX_F32 v, OMX_INT shift, OMX_INT satBits) 
+{
+    OMX_U32 allOnes = (OMX_U32)(-1);
+    OMX_U32 maxV = allOnes >> (32-satBits);
+    OMX_F32 vShifted, vRounded, shiftDiv = (OMX_F32)(1 << shift);
+    OMX_U32 vInt;
+    OMX_U32 vIntSat;
+    
+    if(v <= 0)
+        return 0;
+    
+    vShifted = v / shiftDiv;
+    vRounded = (OMX_F32)(vShifted + 0.5);
+    vInt = (OMX_U32)vRounded;
+    vIntSat = vInt;
+    if(vIntSat > maxV) 
+        vIntSat = maxV;
+    return vIntSat;
+}
+
+/**
+ * Functions: armSwapElem
+ *
+ * Description:
+ * These function swaps two elements at the specified pointer locations.
+ * The size of each element could be anything as specified by <elemSize>
+ *
+ * Return Value:
+ * OMXResult -- Error status from the function
+ */
+OMXResult armSwapElem(
+        OMX_U8 *pBuf1,
+        OMX_U8 *pBuf2,
+        OMX_INT elemSize
+       )
+{
+    OMX_INT i;
+    OMX_U8 temp;
+    armRetArgErrIf(!pBuf1 || !pBuf2, OMX_Sts_BadArgErr);
+    
+    for(i = 0; i < elemSize; i++)
+    {
+        temp = *(pBuf1 + i);
+        *(pBuf1 + i) = *(pBuf2 + i);
+        *(pBuf2 + i) = temp;
+    }
+    return OMX_Sts_NoErr;
+}
+
+/**
+ * Function: armMedianOf3
+ *
+ * Description: Finds the median of three numbers
+ * 
+ * Remarks:
+ *
+ * Parameters:
+ * [in] fEntry     First entry
+ * [in] sEntry     second entry
+ * [in] tEntry     Third entry
+ *
+ * Return Value:
+ * OMX_S32 --   returns the median value
+ */ 
+ 
+OMX_S32 armMedianOf3 (
+    OMX_S32 fEntry,
+    OMX_S32 sEntry, 
+    OMX_S32 tEntry 
+)
+{
+    OMX_S32 a, b, c;
+    
+    a = armMin (fEntry, sEntry);
+    b = armMax (fEntry, sEntry);
+    c = armMin (b, tEntry);
+    return (armMax (a, c));
+}
+
+/**
+ * Function: armLogSize
+ *
+ * Description: Finds the size of a positive value and returns the same
+ * 
+ * Remarks:
+ *
+ * Parameters:
+ * [in] value    Positive value
+ *
+ * Return Value:
+ * OMX_U8 --     Returns the minimum number of bits required to represent the positive value. 
+                 This is the smallest k>=0 such that that value is less than (1<<k).
+ */ 
+ 
+OMX_U8 armLogSize (
+    OMX_U16 value 
+)
+{
+    OMX_U8 i;    
+    for ( i = 0; value > 0; value = value >> 1) 
+    {
+        i++;
+    }
+    return i;
+}
+
+/***********************************************************************/
+                /* Saturating Arithmetic operations */
+
+/**
+ * Function :armSatAdd_S32()
+ *
+ * Description :
+ *   Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ *
+ * Return:
+ * [out]             Result of operation
+ * 
+ *    
+ **/
+ 
+OMX_S32 armSatAdd_S32(OMX_S32 Value1,OMX_S32 Value2)
+{
+    OMX_S32 Result;
+    
+    Result = Value1 + Value2;
+
+    if( (Value1^Value2) >= 0)
+    {
+        /*Same sign*/
+        if( (Result^Value1) >= 0)
+        {
+            /*Result has not saturated*/
+            return Result;
+        }
+        else
+        {
+            if(Value1 >= 0)
+            {
+                /*Result has saturated in positive side*/
+                return OMX_MAX_S32;
+            }
+            else
+            {
+                /*Result has saturated in negative side*/
+                return OMX_MIN_S32;
+            }
+        
+        }
+   
+    }
+    else
+    {
+        return Result;
+    }
+    
+}
+
+/**
+ * Function :armSatAdd_S64()
+ *
+ * Description :
+ *   Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ *
+ * Return:
+ * [out]             Result of operation
+ * 
+ *    
+ **/
+ 
+OMX_S64 armSatAdd_S64(OMX_S64 Value1,OMX_S64 Value2)
+{
+    OMX_S64 Result;
+    
+    Result = Value1 + Value2;
+
+    if( (Value1^Value2) >= 0)
+    {
+        /*Same sign*/
+        if( (Result^Value1) >= 0)
+        {
+            /*Result has not saturated*/
+            return Result;
+        }
+        else
+        {
+            if(Value1 >= 0)
+            {
+                /*Result has saturated in positive side*/
+                Result = OMX_MAX_S64;
+                return Result;
+            }
+            else
+            {
+                /*Result has saturated in negative side*/
+                return OMX_MIN_S64;
+            }
+        
+        }
+   
+    }
+    else
+    {
+        return Result;
+    }
+    
+}
+
+/** Function :armSatSub_S32()
+ * 
+ * Description :
+ *     Returns the result of saturated substraction of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ *
+ * Return:
+ * [out]             Result of operation
+ * 
+ **/
+
+OMX_S32 armSatSub_S32(OMX_S32 Value1,OMX_S32 Value2)
+{
+    OMX_S32 Result;
+    
+    Result = Value1 - Value2;
+
+    if( (Value1^Value2) < 0)
+    {
+        /*Opposite sign*/
+        if( (Result^Value1) >= 0)
+        {
+            /*Result has not saturated*/
+            return Result;
+        }
+        else
+        {
+            if(Value1 >= 0)
+            {
+                /*Result has saturated in positive side*/
+                return OMX_MAX_S32;
+            }
+            else
+            {
+                /*Result has saturated in negative side*/
+                return OMX_MIN_S32;
+            }
+        
+        }
+   
+    }
+    else
+    {
+        return Result;
+    }
+    
+}
+
+/**
+ * Function :armSatMac_S32()
+ *
+ * Description :
+ *     Returns the result of Multiplication of Value1 and Value2 and subesquent saturated
+ *     accumulation with Mac
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ * [in] Mac          Accumulator
+ *
+ * Return:
+ * [out]             Result of operation
+ **/
+
+OMX_S32 armSatMac_S32(OMX_S32 Mac,OMX_S16 Value1,OMX_S16 Value2)
+{
+    OMX_S32 Result;
+    
+    Result = (OMX_S32)(Value1*Value2);
+    Result = armSatAdd_S32( Mac , Result );
+
+    return Result;    
+}
+
+/**
+ * Function :armSatMac_S16S32_S32
+ *
+ * Description :
+ *   Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac
+ *
+ *   mac = mac + Saturate_in_32Bits(delayElem * filTap)
+ *
+ * Parametrs:
+ * [in] delayElem    First 32 bit Operand
+ * [in] filTap       Second 16 bit Operand
+ * [in] mac          Result of MAC operation
+ *
+ * Return:
+ * [out]  mac        Result of operation
+ *    
+ **/
+ 
+OMX_S32 armSatMac_S16S32_S32(OMX_S32 mac, OMX_S32 delayElem, OMX_S16 filTap )
+{
+    
+    OMX_S32 result;
+
+    result = armSatMulS16S32_S32(filTap,delayElem); 
+
+    if ( result > OMX_MAX_S16 )
+    {
+        result = OMX_MAX_S32;
+    }
+    else if( result < OMX_MIN_S16 )
+    {
+        result = OMX_MIN_S32;
+    }
+    else
+    {
+        result = delayElem * filTap;
+    }
+
+    mac = armSatAdd_S32(mac,result);
+    
+    return mac;
+}
+
+
+/**
+ * Function :armSatRoundRightShift_S32_S16
+ *
+ * Description :
+ *   Returns the result of rounded right shift operation of input by the scalefactor
+ *
+ *   output = Saturate_in_16Bits( ( Right/LeftShift( (Round(input) , shift ) )
+ *
+ * Parametrs:
+ * [in] input       The input to be operated on
+ * [in] shift The shift number
+ *
+ * Return:
+ * [out]            Result of operation
+ *    
+ **/
+
+
+OMX_S16 armSatRoundRightShift_S32_S16(OMX_S32 input, OMX_INT shift)
+{
+    input = armSatRoundLeftShift_S32(input,-shift);
+
+    if ( input > OMX_MAX_S16 )
+    {
+        return (OMX_S16)OMX_MAX_S16;
+    }
+    else if (input < OMX_MIN_S16)
+    {
+        return (OMX_S16)OMX_MIN_S16;
+    }
+    else
+    {
+       return (OMX_S16)input;
+    }
+
+}
+
+/**
+ * Function :armSatRoundLeftShift_S32()
+ *
+ * Description :
+ *     Returns the result of saturating left-shift operation on input
+ *     Or rounded Right shift if the input Shift is negative.
+ *     
+ * Parametrs:
+ * [in] Value        Operand
+ * [in] Shift        Operand for shift operation
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+
+OMX_S32 armSatRoundLeftShift_S32(OMX_S32 Value, OMX_INT Shift)
+{
+    OMX_INT i;
+    
+    if (Shift < 0)
+    {
+        Shift = -Shift;
+        Value = armSatAdd_S32(Value, (1 << (Shift - 1)));
+        Value = Value >> Shift;
+    }
+    else
+    {
+        for (i = 0; i < Shift; i++)
+        {
+            Value = armSatAdd_S32(Value, Value);
+        }
+    }
+    return Value;
+}
+
+/**
+ * Function :armSatRoundLeftShift_S64()
+ *
+ * Description :
+ *     Returns the result of saturating left-shift operation on input
+ *     Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value        Operand
+ * [in] shift        Operand for shift operation
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+ 
+OMX_S64 armSatRoundLeftShift_S64(OMX_S64 Value, OMX_INT Shift)
+{
+    OMX_INT i;
+    
+    if (Shift < 0)
+    {
+        Shift = -Shift;
+        Value = armSatAdd_S64(Value, ((OMX_S64)1 << (Shift - 1)));
+        Value = Value >> Shift;
+    }
+    else
+    {
+        for (i = 0; i < Shift; i++)
+        {
+            Value = armSatAdd_S64(Value, Value);
+        }
+    }
+    return Value;
+}
+
+/**
+ * Function :armSatMulS16S32_S32()
+ *
+ * Description :
+ *     Returns the result of a S16 data type multiplied with an S32 data type
+ *     in a S32 container
+ *
+ * Parametrs:
+ * [in] input1       Operand 1
+ * [in] input2       Operand 2
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+
+
+OMX_S32 armSatMulS16S32_S32(OMX_S16 input1,OMX_S32 input2)
+{
+    OMX_S16 hi2,lo1;
+    OMX_U16 lo2;
+    
+    OMX_S32 temp1,temp2;
+    OMX_S32 result;
+    
+    lo1  = input1;
+
+    hi2  = ( input2 >>  16 );
+    lo2  = ( (OMX_U32)( input2 << 16 ) >> 16 );
+    
+    temp1 = hi2 * lo1;
+    temp2 = ( lo2* lo1 ) >> 16;
+
+    result =  armSatAdd_S32(temp1,temp2);
+
+    return result;
+}
+
+/**
+ * Function :armSatMulS32S32_S32()
+ *
+ * Description :
+ *     Returns the result of a S32 data type multiplied with an S32 data type
+ *     in a S32 container
+ *
+ * Parametrs:
+ * [in] input1       Operand 1
+ * [in] input2       Operand 2
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+
+OMX_S32 armSatMulS32S32_S32(OMX_S32 input1,OMX_S32 input2)
+{
+    OMX_S16 hi1,hi2;
+    OMX_U16 lo1,lo2;
+    
+    OMX_S32 temp1,temp2,temp3;
+    OMX_S32 result;
+
+    hi1  = ( input1 >>  16 );
+    lo1  = ( (OMX_U32)( input1 << 16 ) >> 16 );
+
+    hi2  = ( input2 >>  16 );
+    lo2  = ( (OMX_U32)( input2 << 16 ) >> 16 );
+    
+    temp1 =   hi1 * hi2;
+    temp2 = ( hi1* lo2 ) >> 16;
+    temp3 = ( hi2* lo1 ) >> 16;
+
+    result = armSatAdd_S32(temp1,temp2);
+    result = armSatAdd_S32(result,temp3);
+
+    return result;
+}
+
+/**
+ * Function :armIntDivAwayFromZero()
+ *
+ * Description : Integer division with rounding to the nearest integer. 
+ *               Half-integer values are rounded away from zero
+ *               unless otherwise specified. For example 3//2 is rounded 
+ *               to 2, and -3//2 is rounded to -2.
+ *
+ * Parametrs:
+ * [in] Num        Operand 1
+ * [in] Deno       Operand 2
+ *
+ * Return:
+ * [out]             Result of operation input1//input2
+ *    
+ **/
+
+OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno)
+{
+    OMX_F64 result;
+    
+    result = ((OMX_F64)Num)/((OMX_F64)Deno);
+    
+    if (result >= 0)
+    {
+        result += 0.5;
+    }
+    else
+    {
+        result -= 0.5;
+    }
+
+    return (OMX_S32)(result);
+}
+
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_Bitstream.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_Bitstream.c
new file mode 100644
index 0000000..9ef9319
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_Bitstream.c
@@ -0,0 +1,329 @@
+/**
+ * 
+ * File Name:  armCOMM_Bitstream.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Defines bitstream encode and decode functions common to all codecs
+ */
+
+#include "omxtypes.h"
+#include "armCOMM.h"
+#include "armCOMM_Bitstream.h"
+
+/***************************************
+ * Fixed bit length Decode
+ ***************************************/
+
+/**
+ * Function: armLookAheadBits()
+ *
+ * Description:
+ * Get the next N bits from the bitstream without advancing the bitstream pointer
+ *
+ * Parameters:
+ * [in]     **ppBitStream
+ * [in]     *pOffset
+ * [in]     N=1...32
+ *
+ * Returns  Value
+ */
+
+OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N)
+{
+    const OMX_U8 *pBitStream = *ppBitStream;
+    OMX_INT Offset = *pOffset;
+    OMX_U32 Value;
+
+    armAssert(Offset>=0 && Offset<=7);
+    armAssert(N>=1 && N<=32);
+
+    /* Read next 32 bits from stream */
+    Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16)  | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+    Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+    /* Return N bits */
+    return Value >> (32-N);
+}
+
+
+/**
+ * Function: armGetBits()
+ *
+ * Description:
+ * Read N bits from the bitstream
+ *    
+ * Parameters:
+ * [in]     *ppBitStream
+ * [in]     *pOffset
+ * [in]     N=1..32
+ *
+ * [out]    *ppBitStream
+ * [out]    *pOffset
+ * Returns  Value
+ */
+
+
+OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N)
+{
+    const OMX_U8 *pBitStream = *ppBitStream;
+    OMX_INT Offset = *pOffset;
+    OMX_U32 Value;
+    
+    if(N == 0)
+    {
+      return 0;
+    }
+
+    armAssert(Offset>=0 && Offset<=7);
+    armAssert(N>=1 && N<=32);
+
+    /* Read next 32 bits from stream */
+    Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16)  | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+    Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+    /* Advance bitstream pointer by N bits */
+    Offset += N;
+    *ppBitStream = pBitStream + (Offset>>3);
+    *pOffset = Offset & 7;
+
+    /* Return N bits */
+    return Value >> (32-N);
+}
+
+/**
+ * Function: armByteAlign()
+ *
+ * Description:
+ * Align the pointer *ppBitStream to the next byte boundary
+ *
+ * Parameters:
+ * [in]     *ppBitStream
+ * [in]     *pOffset
+ *
+ * [out]    *ppBitStream
+ * [out]    *pOffset
+ *
+ **/
+ 
+OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset)
+{
+    if(*pOffset > 0)
+    {
+        *ppBitStream += 1;
+        *pOffset = 0;
+    }    
+}
+
+/** 
+ * Function: armSkipBits()
+ *
+ * Description:
+ * Skip N bits from the value at *ppBitStream
+ *
+ * Parameters:
+ * [in]     *ppBitStream
+ * [in]     *pOffset
+ * [in]     N
+ *
+ * [out]    *ppBitStream
+ * [out]    *pOffset
+ *
+ **/
+
+
+OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N)
+{
+    OMX_INT Offset = *pOffset;
+    const OMX_U8 *pBitStream = *ppBitStream;
+   
+    /* Advance bitstream pointer by N bits */
+    Offset += N;
+    *ppBitStream = pBitStream + (Offset>>3);
+    *pOffset = Offset & 7;
+}
+
+/***************************************
+ * Variable bit length Decode
+ ***************************************/
+
+/**
+ * Function: armUnPackVLC32()
+ *
+ * Description:
+ * Variable length decode of variable length symbol (max size 32 bits) read from
+ * the bit stream pointed by *ppBitStream at *pOffset by using the table
+ * pointed by pCodeBook
+ * 
+ * Parameters:
+ * [in]     *pBitStream
+ * [in]     *pOffset
+ * [in]     pCodeBook
+ * 
+ * [out]    *pBitStream
+ * [out]    *pOffset
+ *
+ * Returns : Code Book Index if successfull. 
+ *         : ARM_NO_CODEBOOK_INDEX = -1 if search fails.
+ **/
+#ifndef C_OPTIMIZED_IMPLEMENTATION 
+
+OMX_U16 armUnPackVLC32(
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pOffset,
+    const ARM_VLC32 *pCodeBook
+)
+{    
+    const OMX_U8 *pBitStream = *ppBitStream;
+    OMX_INT Offset = *pOffset;
+    OMX_U32 Value;
+    OMX_INT Index;
+        
+    armAssert(Offset>=0 && Offset<=7);
+
+    /* Read next 32 bits from stream */
+    Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16)  | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+    Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+    /* Search through the codebook */    
+    for (Index=0; pCodeBook->codeLen != 0; Index++)
+    {
+        if (pCodeBook->codeWord == (Value >> (32 - pCodeBook->codeLen)))
+        {
+            Offset       = Offset + pCodeBook->codeLen;
+            *ppBitStream = pBitStream + (Offset >> 3) ;
+            *pOffset     = Offset & 7;
+            
+            return Index;
+        }        
+        pCodeBook++;
+    }
+
+    /* No code match found */
+    return ARM_NO_CODEBOOK_INDEX;
+}
+
+#endif
+
+/***************************************
+ * Fixed bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackBits
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream     pointer to the pointer to the current byte 
+ *                      in the bit stream.
+ * [in] pOffset         pointer to the bit position in the byte 
+ *                      pointed by *ppBitStream. Valid within 0
+ *                      to 7.
+ * [in] codeWord        Code word that need to be inserted in to the
+ *                          bitstream
+ * [in] codeLength      Length of the code word valid range 1...32
+ *
+ * [out] ppBitStream    *ppBitStream is updated after the block is encoded,
+ *                          so that it points to the current byte in the bit
+ *                          stream buffer.
+ * [out] pBitOffset     *pBitOffset is updated so that it points to the
+ *                          current bit position in the byte pointed by
+ *                          *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+ 
+OMXResult armPackBits (
+    OMX_U8  **ppBitStream, 
+    OMX_INT *pOffset,
+    OMX_U32 codeWord, 
+    OMX_INT codeLength 
+)
+{
+    OMX_U8  *pBitStream = *ppBitStream;
+    OMX_INT Offset = *pOffset;
+    OMX_U32 Value;
+        
+    /* checking argument validity */
+    armRetArgErrIf(Offset < 0, OMX_Sts_BadArgErr);
+    armRetArgErrIf(Offset > 7, OMX_Sts_BadArgErr);
+    armRetArgErrIf(codeLength < 1, OMX_Sts_BadArgErr);
+    armRetArgErrIf(codeLength > 32, OMX_Sts_BadArgErr);
+
+    /* Prepare the first byte */
+    codeWord = codeWord << (32-codeLength);
+    Value = (pBitStream[0] >> (8-Offset)) << (8-Offset);
+    Value = Value | (codeWord >> (24+Offset));
+
+    /* Write out whole bytes */
+    while (8-Offset <= codeLength)
+    {
+        *pBitStream++ = (OMX_U8)Value;
+        codeWord   = codeWord  << (8-Offset);
+        codeLength = codeLength - (8-Offset);
+        Offset = 0;
+        Value = codeWord >> 24;
+    }
+
+    /* Write out final partial byte */
+    *pBitStream  = (OMX_U8)Value;
+    *ppBitStream = pBitStream;
+    *pOffset = Offset + codeLength;
+    
+    return  OMX_Sts_NoErr;
+}
+ 
+/***************************************
+ * Variable bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackVLC32
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte 
+ *                      in the bit stream.
+ * [in]	pBitOffset	    pointer to the bit position in the byte 
+ *                      pointed by *ppBitStream. Valid within 0
+ *                      to 7.
+ * [in]	 code     		VLC code word that need to be inserted in to the
+ *                      bitstream
+ *
+ * [out] ppBitStream	*ppBitStream is updated after the block is encoded,
+ *	                    so that it points to the current byte in the bit
+ *						stream buffer.
+ * [out] pBitOffset		*pBitOffset is updated so that it points to the
+ *						current bit position in the byte pointed by
+ *						*ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+ 
+OMXResult armPackVLC32 (
+    OMX_U8 **ppBitStream, 
+    OMX_INT *pBitOffset,
+    ARM_VLC32 code 
+)
+{
+    return (armPackBits(ppBitStream, pBitOffset, code.codeWord, code.codeLen));
+}
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_IDCTTable.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_IDCTTable.c
new file mode 100644
index 0000000..9e4679c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_IDCTTable.c
@@ -0,0 +1,60 @@
+/**
+ * 
+ * File Name:  armCOMM_IDCTTable.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *   
+ * File: armCOMM_IDCTTable.c
+ * Brief: Defines Tables used in IDCT computation
+ *
+ */
+
+#include "armCOMM_IDCTTable.h"
+
+     /*  Table of s(u)*A(u)*A(v)/16 at Q15
+      *  s(u)=1.0 0 <= u <= 5
+      *  s(6)=2.0
+      *  s(7)=4.0
+      *  A(0) = 2*sqrt(2)
+      *  A(u) = 4*cos(u*pi/16)  for (u!=0)
+	  */
+	  
+__align(4) const OMX_U16 armCOMM_IDCTPreScale [64] =
+{
+    0x4000, 0x58c5, 0x539f, 0x4b42, 0x4000, 0x3249, 0x4546, 0x46a1,
+    0x58c5, 0x7b21, 0x73fc, 0x6862, 0x58c5, 0x45bf, 0x6016, 0x61f8,
+    0x539f, 0x73fc, 0x6d41, 0x6254, 0x539f, 0x41b3, 0x5a82, 0x5c48,
+    0x4b42, 0x6862, 0x6254, 0x587e, 0x4b42, 0x3b21, 0x5175, 0x530d,
+    0x4000, 0x58c5, 0x539f, 0x4b42, 0x4000, 0x3249, 0x4546, 0x46a1,
+    0x3249, 0x45bf, 0x41b3, 0x3b21, 0x3249, 0x2782, 0x366d, 0x377e,
+    0x22a3, 0x300b, 0x2d41, 0x28ba, 0x22a3, 0x1b37, 0x257e, 0x263a,
+    0x11a8, 0x187e, 0x1712, 0x14c3, 0x11a8, 0x0de0, 0x131d, 0x137d    
+};
+    /* Above array armCOMM_IDCTPreScale,  in Q23 format */
+const OMX_U32 armCOMM_IDCTPreScaleU32 [64] =
+{
+    0x400000, 0x58c543, 0x539eba, 0x4b418c, 0x400000, 0x3248d4, 0x4545ea, 0x46a157,
+    0x58c543, 0x7b20d8, 0x73fbfc, 0x686214, 0x58c543, 0x45bf1f, 0x6015a5, 0x61f78b,
+    0x539eba, 0x73fbfc, 0x6d413d, 0x6253a6, 0x539eba, 0x41b328, 0x5a827a, 0x5c4869,
+    0x4b418c, 0x686214, 0x6253a6, 0x587de3, 0x4b418c, 0x3b20d8, 0x5174e0, 0x530d69,
+    0x400000, 0x58c543, 0x539eba, 0x4b418c, 0x400000, 0x3248d4, 0x4545ea, 0x46a157,
+    0x3248d4, 0x45bf1f, 0x41b328, 0x3b20d8, 0x3248d4, 0x27821d, 0x366d72, 0x377e6b,
+    0x22a2f5, 0x300ad3, 0x2d413d, 0x28ba70, 0x22a2f5, 0x1b36b9, 0x257d86, 0x26398d,
+    0x11a856, 0x187de3, 0x17121a, 0x14c35a, 0x11a856, 0x0ddf9b, 0x131cc7, 0x137ca2
+};
+   
+const OMX_U16 armCOMM_IDCTCoef [4] =
+{
+    0x5a82, /* InvSqrt2 */
+    0x30fc, /* SinPIBy8 */
+    0x7642, /* CosPIBy8 */
+    0x0000    
+};
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_MaskTable.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_MaskTable.c
new file mode 100644
index 0000000..3241db2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_MaskTable.c
@@ -0,0 +1,45 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  armCOMM_MaskTable.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Mask Table to mask the end of array.
+ * 
+ */
+ 
+#include "omxtypes.h"
+
+#define MaskTableSize 72
+
+const OMX_U16 armCOMM_qMaskTable16[MaskTableSize] = 
+{
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000,
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000,
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF    
+};
+
+const OMX_U8 armCOMM_qMaskTable8[MaskTableSize] = 
+{
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
+        0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
+        0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
+        0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 
+        0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00,  
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00,  
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF    
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVC.h
new file mode 100644
index 0000000..7fa7716
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVC.h
@@ -0,0 +1,1153 @@
+/**
+ * 
+ * File Name:  armVC.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File: armVideo.h
+ * Brief: Declares API's/Basic Data types used across the OpenMAX Video domain
+ *
+ */
+
+
+#ifndef _armVideo_H_
+#define _armVideo_H_
+
+#include "omxVC.h"
+#include "armCOMM_Bitstream.h"
+
+/**
+ * ARM specific state structure to hold Motion Estimation information.
+ */
+ 
+struct m4p2_MESpec
+{
+    OMXVCM4P2MEParams MEParams;
+    OMXVCM4P2MEMode   MEMode;
+};
+
+struct m4p10_MESpec
+{
+    OMXVCM4P10MEParams MEParams;
+    OMXVCM4P10MEMode   MEMode;
+};
+
+typedef struct m4p2_MESpec  ARMVCM4P2_MESpec;
+typedef struct m4p10_MESpec ARMVCM4P10_MESpec;
+
+/**
+ * Function: armVCM4P2_CompareMV
+ *
+ * Description:
+ * Performs comparision of motion vectors and SAD's to decide the
+ * best MV and SAD
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]     mvX     x coordinate of the candidate motion vector
+ * [in]     mvY     y coordinate of the candidate motion vector
+ * [in]     candSAD Candidate SAD
+ * [in]     bestMVX x coordinate of the best motion vector
+ * [in]     bestMVY y coordinate of the best motion vector
+ * [in]     bestSAD best SAD
+ *
+ * Return Value:
+ * OMX_INT -- 1 to indicate that the current sad is the best
+ *            0 to indicate that it is NOT the best SAD
+ */
+
+OMX_INT armVCM4P2_CompareMV (
+    OMX_S16 mvX,
+    OMX_S16 mvY,
+    OMX_INT candSAD,
+    OMX_S16 bestMVX,
+    OMX_S16 bestMVY,
+    OMX_INT bestSAD);
+
+/**
+ * Function: armVCM4P2_ACDCPredict
+ *
+ * Description:
+ * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+ * to the function call, prediction direction (predDir) should be selected
+ * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDst     pointer to the coefficient buffer which contains
+ *                          the quantized coefficient residuals (PQF) of the
+ *                          current block
+ * [in] pPredBufRow pointer to the coefficient row buffer
+ * [in] pPredBufCol pointer to the coefficient column buffer
+ * [in] curQP       quantization parameter of the current block. curQP
+ *                          may equal to predQP especially when the current
+ *                          block and the predictor block are in the same
+ *                          macroblock.
+ * [in] predQP      quantization parameter of the predictor block
+ * [in] predDir     indicates the prediction direction which takes one
+ *                          of the following values:
+ *                          OMX_VIDEO_HORIZONTAL    predict horizontally
+ *                          OMX_VIDEO_VERTICAL      predict vertically
+ * [in] ACPredFlag  a flag indicating if AC prediction should be
+ *                          performed. It is equal to ac_pred_flag in the bit
+ *                          stream syntax of MPEG-4
+ * [in] videoComp   video component type (luminance, chrominance or
+ *                          alpha) of the current block
+ * [in] flag        This flag defines the if one wants to use this functions to
+ *                  calculate PQF (set 1, prediction) or QF (set 0, reconstruction)
+ * [out]    pPreACPredict   pointer to the predicted coefficients buffer.
+ *                          Filled ONLY if it is not NULL
+ * [out]    pSrcDst     pointer to the coefficient buffer which contains
+ *                          the quantized coefficients (QF) of the current
+ *                          block
+ * [out]    pPredBufRow pointer to the updated coefficient row buffer
+ * [out]    pPredBufCol pointer to the updated coefficient column buffer
+ * [out]    pSumErr     pointer to the updated sum of the difference
+ *                      between predicted and unpredicted coefficients
+ *                      If this is NULL, do not update
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_ACDCPredict(
+     OMX_S16 * pSrcDst,
+     OMX_S16 * pPreACPredict,
+     OMX_S16 * pPredBufRow,
+     OMX_S16 * pPredBufCol,
+     OMX_INT curQP,
+     OMX_INT predQP,
+     OMX_INT predDir,
+     OMX_INT ACPredFlag,
+     OMXVCM4P2VideoComponent  videoComp,
+     OMX_U8 flag,
+     OMX_INT *pSumErr
+);
+
+/**
+ * Function: armVCM4P2_SetPredDir
+ *
+ * Description:
+ * Performs detecting the prediction direction
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] blockIndex  block index indicating the component type and
+ *                          position as defined in subclause 6.1.3.8, of ISO/IEC
+ *                          14496-2. Furthermore, indexes 6 to 9 indicate the
+ *                          alpha blocks spatially corresponding to luminance
+ *                          blocks 0 to 3 in the same macroblock.
+ * [in] pCoefBufRow pointer to the coefficient row buffer
+ * [in] pQpBuf      pointer to the quantization parameter buffer
+ * [out]    predQP      quantization parameter of the predictor block
+ * [out]    predDir     indicates the prediction direction which takes one
+ *                          of the following values:
+ *                          OMX_VIDEO_HORIZONTAL    predict horizontally
+ *                          OMX_VIDEO_VERTICAL      predict vertically
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_SetPredDir(
+     OMX_INT blockIndex,
+     OMX_S16 *pCoefBufRow,
+     OMX_S16 *pCoefBufCol,
+     OMX_INT *predDir,
+     OMX_INT *predQP,
+     const OMX_U8 *pQpBuf
+);
+
+/**
+ * Function: armVCM4P2_EncodeVLCZigzag_Intra
+ *
+ * Description:
+ * Performs zigzag scanning and VLC encoding for one intra block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream     pointer to the pointer to the current byte in
+ *                              the bit stream
+ * [in] pBitOffset      pointer to the bit position in the byte pointed
+ *                              by *ppBitStream. Valid within 0 to 7.
+ * [in] pQDctBlkCoef    pointer to the quantized DCT coefficient
+ * [in] predDir         AC prediction direction, which is used to decide
+ *                              the zigzag scan pattern. This takes one of the
+ *                              following values:
+ *                              OMX_VIDEO_NONE          AC prediction not used.
+ *                                                      Performs classical zigzag
+ *                                                      scan.
+ *                              OMX_VIDEO_HORIZONTAL    Horizontal prediction.
+ *                                                      Performs alternate-vertical
+ *                                                      zigzag scan.
+ *                              OMX_VIDEO_VERTICAL      Vertical prediction.
+ *                                                      Performs alternate-horizontal
+ *                                                      zigzag scan.
+ * [in] pattern         block pattern which is used to decide whether
+ *                              this block is encoded
+ * [in] start           start indicates whether the encoding begins with 0th element
+ *                      or 1st.
+ * [out]    ppBitStream     *ppBitStream is updated after the block is encoded,
+ *                              so that it points to the current byte in the bit
+ *                              stream buffer.
+ * [out]    pBitOffset      *pBitOffset is updated so that it points to the
+ *                              current bit position in the byte pointed by
+ *                              *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_EncodeVLCZigzag_Intra(
+     OMX_U8 **ppBitStream,
+     OMX_INT *pBitOffset,
+     const OMX_S16 *pQDctBlkCoef,
+     OMX_U8 predDir,
+     OMX_U8 pattern,
+     OMX_INT shortVideoHeader,
+     OMX_U8 start
+);
+
+/**
+ * Function: armVCM4P2_DecodeVLCZigzag_Intra
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream     pointer to the pointer to the current byte in
+ *                              the bitstream buffer
+ * [in] pBitOffset      pointer to the bit position in the byte pointed
+ *                              to by *ppBitStream. *pBitOffset is valid within
+ *                              [0-7].
+ * [in] predDir         AC prediction direction which is used to decide
+ *                              the zigzag scan pattern. It takes one of the
+ *                              following values:
+ *                              OMX_VIDEO_NONE  AC prediction not used;
+ *                                              perform classical zigzag scan;
+ *                              OMX_VIDEO_HORIZONTAL    Horizontal prediction;
+ *                                                      perform alternate-vertical
+ *                                                      zigzag scan;
+ *                              OMX_VIDEO_VERTICAL      Vertical prediction;
+ *                                                      thus perform
+ *                                                      alternate-horizontal
+ *                                                      zigzag scan.
+ * [in] videoComp       video component type (luminance, chrominance or
+ *                              alpha) of the current block
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ *                           and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start           start indicates whether the encoding begins with 0th element
+ *                      or 1st.
+ * [out]    ppBitStream     *ppBitStream is updated after the block is
+ *                              decoded, so that it points to the current byte
+ *                              in the bit stream buffer
+ * [out]    pBitOffset      *pBitOffset is updated so that it points to the
+ *                              current bit position in the byte pointed by
+ *                              *ppBitStream
+ * [out]    pDst            pointer to the coefficient buffer of current
+ *                              block. Should be 32-bit aligned
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_DecodeVLCZigzag_Intra(
+     const OMX_U8 ** ppBitStream,
+     OMX_INT * pBitOffset,
+     OMX_S16 * pDst,
+     OMX_U8 predDir,
+     OMX_INT shortVideoHeader, 
+     OMX_U8  start
+);
+
+/**
+ * Function: armVCM4P2_FillVLDBuffer
+ *
+ * Description:
+ * Performs filling of the coefficient buffer according to the run, level
+ * and sign, also updates the index
+ * 
+ * Parameters:
+ * [in]  storeRun        Stored Run value (count of zeros)   
+ * [in]  storeLevel      Stored Level value (non-zero value)
+ * [in]  sign            Flag indicating the sign of level
+ * [in]  last            status of the last flag
+ * [in]  pIndex          pointer to coefficient index in 8x8 matrix
+ * [out] pIndex          pointer to updated coefficient index in 8x8 
+ *                       matrix
+ * [in]  pZigzagTable    pointer to the zigzag tables
+ * [out] pDst            pointer to the coefficient buffer of current
+ *                       block. Should be 32-bit aligned
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_FillVLDBuffer(
+    OMX_U32 storeRun,
+    OMX_S16 * pDst,
+    OMX_S16 storeLevel,
+    OMX_U8  sign,
+    OMX_U8  last,
+    OMX_U8  * index,
+    const OMX_U8 * pZigzagTable
+);
+
+/**
+ * Function: armVCM4P2_GetVLCBits
+ *
+ * Description:
+ * Performs escape mode decision based on the run, run+, level, level+ and 
+ * last combinations.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte in
+ *								the bit stream
+ * [in]	pBitOffset		pointer to the bit position in the byte pointed
+ *								by *ppBitStream. Valid within 0 to 7
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ *                           and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start           start indicates whether the encoding begins with 
+ *                      0th element or 1st.
+ * [in/out] pLast       pointer to last status flag
+ * [in] runBeginSingleLevelEntriesL0      The run value from which level 
+ *                                        will be equal to 1: last == 0
+ * [in] IndexBeginSingleLevelEntriesL0    Array index in the VLC table 
+ *                                        pointing to the  
+ *                                        runBeginSingleLevelEntriesL0 
+ * [in] runBeginSingleLevelEntriesL1      The run value from which level 
+ *                                        will be equal to 1: last == 1
+ * [in] IndexBeginSingleLevelEntriesL1    Array index in the VLC table 
+ *                                        pointing to the  
+ *                                        runBeginSingleLevelEntriesL0 
+ * [in] pRunIndexTableL0    Run Index table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pVlcTableL0         VLC table for last == 0
+ * [in] pRunIndexTableL1    Run Index table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pVlcTableL1         VLC table for last == 1
+ * [in] pLMAXTableL0        Level MAX table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pLMAXTableL1        Level MAX table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pRMAXTableL0        Run MAX table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pRMAXTableL1        Run MAX table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [out]pDst			    pointer to the coefficient buffer of current
+ *							block. Should be 32-bit aligned
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_GetVLCBits (
+              const OMX_U8 **ppBitStream,
+              OMX_INT * pBitOffset,
+			  OMX_S16 * pDst,
+			  OMX_INT shortVideoHeader,
+			  OMX_U8    start,			  
+			  OMX_U8  * pLast,
+			  OMX_U8    runBeginSingleLevelEntriesL0,
+			  OMX_U8    maxIndexForMultipleEntriesL0,
+			  OMX_U8    maxRunForMultipleEntriesL1,
+			  OMX_U8    maxIndexForMultipleEntriesL1,
+              const OMX_U8  * pRunIndexTableL0,
+              const ARM_VLC32 *pVlcTableL0,
+			  const OMX_U8  * pRunIndexTableL1,
+              const ARM_VLC32 *pVlcTableL1,
+              const OMX_U8  * pLMAXTableL0,
+              const OMX_U8  * pLMAXTableL1,
+              const OMX_U8  * pRMAXTableL0,
+              const OMX_U8  * pRMAXTableL1,
+              const OMX_U8  * pZigzagTable
+);
+
+/**
+ * Function: armVCM4P2_PutVLCBits
+ *
+ * Description:
+ * Checks the type of Escape Mode and put encoded bits for 
+ * quantized DCT coefficients.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	 ppBitStream      pointer to the pointer to the current byte in
+ *						  the bit stream
+ * [in]	 pBitOffset       pointer to the bit position in the byte pointed
+ *                        by *ppBitStream. Valid within 0 to 7
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ *                           and escape mode 4 is used when shortVideoHeader==1.
+ * [in]  start            start indicates whether the encoding begins with 
+ *                        0th element or 1st.
+ * [in]  maxStoreRunL0    Max store possible (considering last and inter/intra)
+ *                        for last = 0
+ * [in]  maxStoreRunL1    Max store possible (considering last and inter/intra)
+ *                        for last = 1
+ * [in]  maxRunForMultipleEntriesL0 
+ *                        The run value after which level 
+ *                        will be equal to 1: 
+ *                        (considering last and inter/intra status) for last = 0
+ * [in]  maxRunForMultipleEntriesL1 
+ *                        The run value after which level 
+ *                        will be equal to 1: 
+ *                        (considering last and inter/intra status) for last = 1
+ * [in]  pRunIndexTableL0 Run Index table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in]  pVlcTableL0      VLC table for last == 0
+ * [in]  pRunIndexTableL1 Run Index table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in]  pVlcTableL1      VLC table for last == 1
+ * [in]  pLMAXTableL0     Level MAX table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in]  pLMAXTableL1     Level MAX table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in]  pRMAXTableL0     Run MAX table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in]  pRMAXTableL1     Run MAX table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [out] pQDctBlkCoef     pointer to the quantized DCT coefficient
+ * [out] ppBitStream      *ppBitStream is updated after the block is encoded
+ *                        so that it points to the current byte in the bit
+ *                        stream buffer.
+ * [out] pBitOffset       *pBitOffset is updated so that it points to the
+ *                        current bit position in the byte pointed by
+ *                        *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+
+OMXResult armVCM4P2_PutVLCBits (
+              OMX_U8 **ppBitStream,
+              OMX_INT * pBitOffset,
+              const OMX_S16 *pQDctBlkCoef,
+              OMX_INT shortVideoHeader,
+              OMX_U8 start,
+              OMX_U8 maxStoreRunL0,
+              OMX_U8 maxStoreRunL1,
+              OMX_U8  maxRunForMultipleEntriesL0,
+              OMX_U8  maxRunForMultipleEntriesL1,
+              const OMX_U8  * pRunIndexTableL0,
+              const ARM_VLC32 *pVlcTableL0,
+			  const OMX_U8  * pRunIndexTableL1,
+              const ARM_VLC32 *pVlcTableL1,
+              const OMX_U8  * pLMAXTableL0,
+              const OMX_U8  * pLMAXTableL1,
+              const OMX_U8  * pRMAXTableL0,
+              const OMX_U8  * pRMAXTableL1,
+              const OMX_U8  * pZigzagTable
+);
+/**
+ * Function: armVCM4P2_FillVLCBuffer
+ *
+ * Description:
+ * Performs calculating the VLC bits depending on the escape type and insert 
+ * the same in the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	 ppBitStream		pointer to the pointer to the current byte in
+ *	                        the bit stream
+ * [in]	 pBitOffset         pointer to the bit position in the byte pointed
+ *                          by *ppBitStream. Valid within 0 to 7
+ * [in]  run                Run value (count of zeros) to be encoded  
+ * [in]  level              Level value (non-zero value) to be encoded
+ * [in]  runPlus            Calculated as runPlus = run - (RMAX + 1)  
+ * [in]  levelPlus          Calculated as 
+ *                          levelPlus = sign(level)*[abs(level) - LMAX]
+ * [in]  fMode              Flag indicating the escape modes
+ * [in]  last               status of the last flag
+ * [in]  maxRunForMultipleEntries 
+ *                          The run value after which level will be equal to 1: 
+ *                          (considering last and inter/intra status)
+ * [in]  pRunIndexTable     Run Index table defined in
+ *                          armVCM4P2_Huff_tables_VLC.h
+ * [in]  pVlcTable          VLC table defined in armVCM4P2_Huff_tables_VLC.h
+ * [out] ppBitStream		*ppBitStream is updated after the block is encoded
+ *                          so that it points to the current byte in the bit
+ *                          stream buffer.
+ * [out] pBitOffset         *pBitOffset is updated so that it points to the
+ *                          current bit position in the byte pointed by
+ *                          *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_FillVLCBuffer (
+              OMX_U8 **ppBitStream,
+              OMX_INT * pBitOffset,
+              OMX_U32 run,
+              OMX_S16 level, 
+			  OMX_U32 runPlus,
+              OMX_S16 levelPlus, 
+              OMX_U8  fMode,
+			  OMX_U8  last,
+              OMX_U8  maxRunForMultipleEntries, 
+              const OMX_U8  *pRunIndexTable,
+              const ARM_VLC32 *pVlcTable
+);
+
+/**
+ * Function: armVCM4P2_CheckVLCEscapeMode
+ *
+ * Description:
+ * Performs escape mode decision based on the run, run+, level, level+ and 
+ * last combinations.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] run             Run value (count of zeros) to be encoded  
+ * [in] level           Level value (non-zero value) to be encoded
+ * [in] runPlus         Calculated as runPlus = run - (RMAX + 1)  
+ * [in] levelPlus       Calculated as 
+ *                      levelPlus = sign(level)*[abs(level) - LMAX]
+ * [in] maxStoreRun     Max store possible (considering last and inter/intra)
+ * [in] maxRunForMultipleEntries 
+ *                      The run value after which level 
+ *                      will be equal to 1: 
+ *                      (considering last and inter/intra status)
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ *                           and escape mode 4 is used when shortVideoHeader==1.
+ * [in] pRunIndexTable  Run Index table defined in 
+ *                      armVCM4P2_Huff_Tables_VLC.c
+ *                      (considering last and inter/intra status)
+ *
+ *                      
+ * Return Value:
+ * Returns an Escape mode which can take values from 0 to 3
+ * 0 --> no escape mode, 1 --> escape type 1,
+ * 1 --> escape type 2, 3 --> escape type 3, check section 7.4.1.3
+ * in the MPEG ISO standard.
+ *
+ */
+
+OMX_U8 armVCM4P2_CheckVLCEscapeMode(
+     OMX_U32 run,
+     OMX_U32 runPlus,
+     OMX_S16 level,
+     OMX_S16 levelPlus,
+     OMX_U8  maxStoreRun,
+     OMX_U8  maxRunForMultipleEntries,
+     OMX_INT shortVideoHeader,
+     const OMX_U8  *pRunIndexTable
+);
+
+
+/**
+ * Function: armVCM4P2_BlockMatch_Integer
+ *
+ * Description:
+ * Performs a 16x16 block search; estimates motion vector and associated minimum SAD.  
+ * Both the input and output motion vectors are represented using half-pixel units, and 
+ * therefore a shift left or right by 1 bit may be required, respectively, to match the 
+ * input or output MVs with other functions that either generate output MVs or expect 
+ * input MVs represented using integer pixel units. 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	pSrcRefBuf		pointer to the reference Y plane; points to the reference MB that 
+ *                    corresponds to the location of the current macroblock in the current 
+ *                    plane.
+ * [in]	refWidth		  width of the reference plane
+ * [in]	pRefRect		  pointer to the valid rectangular in reference plane. Relative to image origin. 
+ *                    It's not limited to the image boundary, but depended on the padding. For example, 
+ *                    if you pad 4 pixels outside the image border, then the value for left border 
+ *                    can be -4
+ * [in]	pSrcCurrBuf		pointer to the current macroblock extracted from original plane (linear array, 
+ *                    256 entries); must be aligned on an 8-byte boundary.
+ * [in] pCurrPointPos	position of the current macroblock in the current plane
+ * [in] pSrcPreMV		  pointer to predicted motion vector; NULL indicates no predicted MV
+ * [in] pSrcPreSAD		pointer to SAD associated with the predicted MV (referenced by pSrcPreMV)
+ * [in] searchRange		search range for 16X16 integer block,the units of it is full pixel,the search range 
+ *                    is the same in all directions.It is in inclusive of the boundary and specified in 
+ *                    terms of integer pixel units.
+ * [in] pMESpec			  vendor-specific motion estimation specification structure; must have been allocated 
+ *                    and then initialized using omxVCM4P2_MEInit prior to calling the block matching 
+ *                    function.
+ * [in] BlockSize     MacroBlock Size i.e either 16x16 or 8x8.
+ * [out]	pDstMV			pointer to estimated MV
+ * [out]	pDstSAD			pointer to minimum SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr ¨C no error.
+ * OMX_Sts_BadArgErr ¨C bad arguments
+ *
+ */
+
+OMXResult armVCM4P2_BlockMatch_Integer(
+     const OMX_U8 *pSrcRefBuf,
+     OMX_INT refWidth,
+     const OMXRect *pRefRect,
+     const OMX_U8 *pSrcCurrBuf,
+     const OMXVCM4P2Coordinate *pCurrPointPos,
+     const OMXVCMotionVector *pSrcPreMV,
+     const OMX_INT *pSrcPreSAD,
+     void *pMESpec,
+     OMXVCMotionVector *pDstMV,
+     OMX_INT *pDstSAD,
+     OMX_U8 BlockSize
+);
+
+/**
+ * Function: armVCM4P2_BlockMatch_Half
+ *
+ * Description:
+ * Performs a 16x16 block match with half-pixel resolution.  Returns the estimated 
+ * motion vector and associated minimum SAD.  This function estimates the half-pixel 
+ * motion vector by interpolating the integer resolution motion vector referenced 
+ * by the input parameter pSrcDstMV, i.e., the initial integer MV is generated 
+ * externally.  The input parameters pSrcRefBuf and pSearchPointRefPos should be 
+ * shifted by the winning MV of 16x16 integer search prior to calling BlockMatch_Half_16x16.  
+ * The function BlockMatch_Integer_16x16 may be used for integer motion estimation.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	pSrcRefBuf		pointer to the reference Y plane; points to the reference MB 
+ *                    that corresponds to the location of the current macroblock in 
+ *                    the	current plane.
+ * [in]	refWidth		  width of the reference plane
+ * [in]	pRefRect		  reference plane valid region rectangle
+ * [in]	pSrcCurrBuf		pointer to the current macroblock extracted from original plane 
+ *                    (linear array, 256 entries); must be aligned on an 8-byte boundary. 
+ * [in]	pSearchPointRefPos	position of the starting point for half pixel search (specified 
+ *                          in terms of integer pixel units) in the reference plane.
+ * [in]	rndVal			  rounding control bit for half pixel motion estimation; 
+ *                    0=rounding control disabled; 1=rounding control enabled
+ * [in]	pSrcDstMV		pointer to the initial MV estimate; typically generated during a prior 
+ *                  16X16 integer search and its unit is half pixel.
+ * [in] BlockSize     MacroBlock Size i.e either 16x16 or 8x8.
+ * [out]pSrcDstMV		pointer to estimated MV
+ * [out]pDstSAD			pointer to minimum SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr ¨C no error
+ * OMX_Sts_BadArgErr ¨C bad arguments
+ *
+ */
+
+OMXResult armVCM4P2_BlockMatch_Half(
+     const OMX_U8 *pSrcRefBuf,
+     OMX_INT refWidth,
+     const OMXRect *pRefRect,
+     const OMX_U8 *pSrcCurrBuf,
+     const OMXVCM4P2Coordinate *pSearchPointRefPos,
+     OMX_INT rndVal,
+     OMXVCMotionVector *pSrcDstMV,
+     OMX_INT *pDstSAD,
+     OMX_U8 BlockSize
+);
+/**
+ * Function: armVCM4P2_PadMV
+ *
+ * Description:
+ * Performs motion vector padding for a macroblock.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDstMV       pointer to motion vector buffer of the current
+ *                              macroblock
+ * [in] pTransp         pointer to transparent status buffer of the
+ *                              current macroblock
+ * [out]    pSrcDstMV       pointer to motion vector buffer in which the
+ *                              motion vectors have been padded
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_PadMV(
+     OMXVCMotionVector * pSrcDstMV,
+     OMX_U8 * pTransp
+);
+
+/* 
+ * H.264 Specific Declarations 
+ */
+/* Defines */
+#define ARM_M4P10_Q_OFFSET        (15)
+
+
+/* Dequant tables */
+
+extern const OMX_U8 armVCM4P10_PosToVCol4x4[16];
+extern const OMX_U8 armVCM4P10_PosToVCol2x2[4];
+extern const OMX_U8 armVCM4P10_VMatrix[6][3];
+extern const OMX_U32 armVCM4P10_MFMatrix[6][3];
+
+
+/*
+ * Description:
+ * This function perform the work required by the OpenMAX
+ * DecodeCoeffsToPair function and DecodeChromaDCCoeffsToPair.
+ * Since most of the code is common we share it here.
+ *
+ * Parameters:
+ * [in]	ppBitStream		Double pointer to current byte in bit stream buffer
+ * [in]	pOffset			Pointer to current bit position in the byte pointed
+ *								to by *ppBitStream
+ * [in]	sMaxNumCoeff	Maximum number of non-zero coefficients in current
+ *								block (4,15 or 16)
+ * [in]	nTable          Table number (0 to 4) according to the five columns
+ *                      of Table 9-5 in the H.264 spec
+ * [out]	ppBitStream		*ppBitStream is updated after each block is decoded
+ * [out]	pOffset			*pOffset is updated after each block is decoded
+ * [out]	pNumCoeff		Pointer to the number of nonzero coefficients in
+ *								this block
+ * [out]	ppPosCoefbuf	Double pointer to destination residual
+ *								coefficient-position pair buffer
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+
+ */
+
+OMXResult armVCM4P10_DecodeCoeffsToPair(
+     const OMX_U8** ppBitStream,
+     OMX_S32* pOffset,
+     OMX_U8* pNumCoeff,
+     OMX_U8**ppPosCoefbuf,
+     OMX_INT nTable,
+     OMX_INT sMaxNumCoeff        
+ );
+
+/*
+ * Description:
+ * Perform DC style intra prediction, averaging upper and left block
+ *
+ * Parameters:
+ * [in]	pSrcLeft		Pointer to the buffer of 16 left coefficients:
+ *								p[x, y] (x = -1, y = 0..3)
+ * [in]	pSrcAbove		Pointer to the buffer of 16 above coefficients:
+ *								p[x,y] (x = 0..3, y = -1)
+ * [in]	leftStep		Step of left coefficient buffer
+ * [in]	dstStep			Step of the destination buffer
+ * [in]	availability	Neighboring 16x16 MB availability flag
+ * [out]	pDst			Pointer to the destination buffer
+ *
+ * Return Value:
+ * None
+ */
+
+void armVCM4P10_PredictIntraDC4x4(
+     const OMX_U8* pSrcLeft,
+     const OMX_U8 *pSrcAbove,
+     OMX_U8* pDst,
+     OMX_INT leftStep,
+     OMX_INT dstStep,
+     OMX_S32 availability        
+);
+
+/*
+ * Description
+ * Unpack a 4x4 block of coefficient-residual pair values
+ *
+ * Parameters:
+ * [in]	ppSrc	Double pointer to residual coefficient-position pair
+ *						buffer output by CALVC decoding
+ * [out]	ppSrc	*ppSrc is updated to the start of next non empty block
+ * [out]	pDst	Pointer to unpacked 4x4 block
+ */
+
+void armVCM4P10_UnpackBlock4x4(
+     const OMX_U8 **ppSrc,
+     OMX_S16* pDst
+);
+
+/*
+ * Description
+ * Unpack a 2x2 block of coefficient-residual pair values
+ *
+ * Parameters:
+ * [in]	ppSrc	Double pointer to residual coefficient-position pair
+ *						buffer output by CALVC decoding
+ * [out]	ppSrc	*ppSrc is updated to the start of next non empty block
+ * [out]	pDst	Pointer to unpacked 4x4 block
+ */
+
+void armVCM4P10_UnpackBlock2x2(
+     const OMX_U8 **ppSrc,
+     OMX_S16* pDst
+);
+
+/*
+ * Description
+ * Deblock one boundary pixel
+ *
+ * Parameters:
+ * [in]	pQ0         Pointer to pixel q0
+ * [in] Step        Step between pixels q0 and q1
+ * [in] tC0         Edge threshold value
+ * [in] alpha       alpha threshold value
+ * [in] beta        beta threshold value
+ * [in] bS          deblocking strength
+ * [in] ChromaFlag  True for chroma blocks
+ * [out] pQ0        Deblocked pixels
+ * 
+ */
+
+void armVCM4P10_DeBlockPixel(
+    OMX_U8 *pQ0,    /* pointer to the pixel q0 */
+    int Step,       /* step between pixels q0 and q1 */
+    int tC0,        /* edge threshold value */
+    int alpha,      /* alpha */
+    int beta,       /* beta */
+    int bS,         /* deblocking strength */
+    int ChromaFlag
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfHor_Luma
+ *
+ * Description:
+ * This function performs interpolation for horizontal 1/2-pel positions
+ *
+ * Remarks:
+ *
+ *	[in]	pSrc			Pointer to top-left corner of block used to interpolate 
+ 													in the reconstructed frame plane
+ *	[in]	iSrcStep	Step of the source buffer.
+ *	[in]	iDstStep	Step of the destination(interpolation) buffer.
+ *	[in]	iWidth		Width of the current block
+ *	[in]	iHeight		Height of the current block
+ *	[out]	pDst	    Pointer to the interpolation buffer of the 1/2-pel 
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfHor_Luma(
+        const OMX_U8*		pSrc, 
+		OMX_U32 	iSrcStep, 
+		OMX_U8* 	pDst, 
+		OMX_U32 	iDstStep, 
+		OMX_U32 	iWidth, 
+		OMX_U32 	iHeight
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfVer_Luma
+ * 
+ * Description:
+ * This function performs interpolation for vertical 1/2-pel positions 
+ * around a full-pel position.
+ *
+ * Remarks:
+ *
+ *	[in]	pSrc			Pointer to top-left corner of block used to interpolate 
+ *												in the reconstructed frame plane
+ *	[in]	iSrcStep	Step of the source buffer.
+ *	[in]	iDstStep	Step of the destination(interpolation) buffer.
+ *	[in]	iWidth		Width of the current block
+ *	[in]	iHeight		Height of the current block
+ *	[out]	pDst    	Pointer to the interpolation buffer of the 1/2-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfVer_Luma(	
+	 const OMX_U8* 	pSrc, 
+	 OMX_U32 	iSrcStep, 
+ 	 OMX_U8* 	pDst,
+ 	 OMX_U32 	iDstStep, 
+ 	 OMX_U32 	iWidth, 
+ 	 OMX_U32 	iHeight
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfDiag_Luma
+ * 
+ * Description:
+ * This function performs interpolation for (1/2, 1/2)  positions 
+ * around a full-pel position.
+ *
+ * Remarks:
+ *
+ *  [in]    pSrc        Pointer to top-left corner of block used to interpolate 
+ *                      in the reconstructed frame plane
+ *  [in]    iSrcStep    Step of the source buffer.
+ *  [in]    iDstStep    Step of the destination(interpolation) buffer.
+ *  [in]    iWidth      Width of the current block
+ *  [in]    iHeight     Height of the current block
+ *  [out]   pDst        Pointer to the interpolation buffer of the (1/2,1/2)-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfDiag_Luma(  
+        const OMX_U8*     pSrc, 
+        OMX_U32     iSrcStep, 
+        OMX_U8*     pDst, 
+        OMX_U32     iDstStep,
+        OMX_U32     iWidth, 
+        OMX_U32     iHeight
+);
+
+/*
+ * Description:
+ * Transform Residual 4x4 Coefficients
+ *
+ * Parameters:
+ * [in]  pSrc		Source 4x4 block
+ * [out] pDst		Destination 4x4 block
+ *
+ */
+
+void armVCM4P10_TransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc);
+
+/*
+ * Description:
+ * Forward Transform Residual 4x4 Coefficients
+ *
+ * Parameters:
+ * [in]  pSrc		Source 4x4 block
+ * [out] pDst		Destination 4x4 block
+ *
+ */
+
+void armVCM4P10_FwdTransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc);
+
+OMX_INT armVCM4P10_CompareMotionCostToMV (
+    OMX_S16  mvX,
+    OMX_S16  mvY,
+    OMXVCMotionVector diffMV, 
+    OMX_INT candSAD, 
+    OMXVCMotionVector *bestMV, 
+    OMX_U32 nLamda,
+    OMX_S32 *pBestCost);
+
+/**
+ * Function: armVCCOMM_SAD
+ *
+ * Description:
+ * This function calculate the SAD for NxM blocks.
+ *
+ * Remarks:
+ *
+ * [in]		pSrcOrg		Pointer to the original block
+ * [in]		iStepOrg	Step of the original block buffer
+ * [in]		pSrcRef		Pointer to the reference block
+ * [in]		iStepRef	Step of the reference block buffer
+ * [in]		iHeight		Height of the block
+ * [in]		iWidth		Width of the block
+ * [out]	pDstSAD		Pointer of result SAD
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+OMXResult armVCCOMM_SAD(	
+	const OMX_U8* 	pSrcOrg,
+	OMX_U32 	iStepOrg,
+	const OMX_U8* 	pSrcRef,
+	OMX_U32 	iStepRef,
+	OMX_S32*	pDstSAD,
+	OMX_U32		iHeight,
+	OMX_U32		iWidth);
+
+/**
+ * Function: armVCCOMM_Average
+ *
+ * Description:
+ * This function calculates the average of two blocks and stores the result.
+ *
+ * Remarks:
+ *
+ *	[in]	pPred0			Pointer to the top-left corner of reference block 0
+ *	[in]	pPred1			Pointer to the top-left corner of reference block 1
+ *	[in]	iPredStep0	    Step of reference block 0
+ *	[in]	iPredStep1	    Step of reference block 1
+ *	[in]	iDstStep 		Step of the destination buffer
+ *	[in]	iWidth			Width of the blocks
+ *	[in]	iHeight			Height of the blocks
+ *	[out]	pDstPred		Pointer to the destination buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+ OMXResult armVCCOMM_Average (
+	 const OMX_U8* 	    pPred0,
+	 const OMX_U8* 	    pPred1,	
+	 OMX_U32		iPredStep0,
+	 OMX_U32		iPredStep1,
+	 OMX_U8*		pDstPred,
+	 OMX_U32		iDstStep, 
+	 OMX_U32		iWidth,
+	 OMX_U32		iHeight
+);
+
+/**
+ * Function: armVCM4P10_SADQuar
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the 
+ * average of the other two (pSrcRef0 and pSrcRef1)
+ *
+ * Remarks:
+ *
+ * [in]		pSrc				Pointer to the original block
+ * [in]		pSrcRef0		Pointer to reference block 0
+ * [in]		pSrcRef1		Pointer to reference block 1
+ * [in]		iSrcStep 		Step of the original block buffer
+ * [in]		iRefStep0		Step of reference block 0 
+ * [in]		iRefStep1 	Step of reference block 1 
+ * [in]		iHeight			Height of the block
+ * [in]		iWidth			Width of the block
+ * [out]	pDstSAD			Pointer of result SAD
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+OMXResult armVCM4P10_SADQuar(
+	const OMX_U8* 	pSrc,
+    const OMX_U8* 	pSrcRef0,
+	const OMX_U8* 	pSrcRef1,	
+    OMX_U32 	iSrcStep,
+    OMX_U32		iRefStep0,
+    OMX_U32		iRefStep1,
+    OMX_U32*	pDstSAD,
+    OMX_U32     iHeight,
+    OMX_U32     iWidth
+);
+
+/**
+ * Function: armVCM4P10_Interpolate_Chroma
+ *
+ * Description:
+ * This function performs interpolation for chroma components.
+ *
+ * Remarks:
+ *
+ *  [in]    pSrc            Pointer to top-left corner of block used to 
+ *                                              interpolate in the reconstructed frame plane
+ *  [in]    iSrcStep    Step of the source buffer.
+ *  [in]    iDstStep    Step of the destination(interpolation) buffer.
+ *  [in]    iWidth      Width of the current block
+ *  [in]    iHeight     Height of the current block
+ *  [in]    dx              Fractional part of horizontal motion vector 
+ *                                              component in 1/8 pixel unit (0~7) 
+ *  [in]    dy              Fractional part of vertical motion vector 
+ *                                              component in 1/8 pixel unit (0~7)
+ *  [out]   pDst            Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+ OMXResult armVCM4P10_Interpolate_Chroma(
+        OMX_U8      *pSrc,
+        OMX_U32     iSrcStep,
+        OMX_U8      *pDst,
+        OMX_U32     iDstStep,
+        OMX_U32     iWidth,
+        OMX_U32     iHeight,
+        OMX_U32     dx,
+        OMX_U32     dy
+);
+
+/**
+ * Function: armVCM4P10_Interpolate_Luma
+ *
+ * Description:
+ * This function performs interpolation for luma components.
+ *
+ * Remarks:
+ *
+ *  [in]    pSrc            Pointer to top-left corner of block used to 
+ *                                              interpolate in the reconstructed frame plane
+ *  [in]    iSrcStep    Step of the source buffer.
+ *  [in]    iDstStep    Step of the destination(interpolation) buffer.
+ *  [in]    iWidth      Width of the current block
+ *  [in]    iHeight     Height of the current block
+ *  [in]    dx              Fractional part of horizontal motion vector 
+ *                                              component in 1/4 pixel unit (0~3) 
+ *  [in]    dy              Fractional part of vertical motion vector 
+ *                                              component in 1/4 pixel unit (0~3) 
+ *  [out]   pDst            Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+ OMXResult armVCM4P10_Interpolate_Luma(
+     const OMX_U8     *pSrc,
+     OMX_U32    iSrcStep,
+     OMX_U8     *pDst,
+     OMX_U32    iDstStep,
+     OMX_U32    iWidth,
+     OMX_U32    iHeight,
+     OMX_U32    dx,
+     OMX_U32    dy
+);
+
+/**
+ * Function: omxVCH264_DequantTransformACFromPair_U8_S16_C1_DLx
+ *
+ * Description:
+ * Reconstruct the 4x4 residual block from coefficient-position pair buffer,
+ * perform dequantisation and integer inverse transformation for 4x4 block of
+ * residuals and update the pair buffer pointer to next non-empty block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppSrc		Double pointer to residual coefficient-position
+ *							pair buffer output by CALVC decoding
+ * [in]	pDC			Pointer to the DC coefficient of this block, NULL
+ *							if it doesn't exist
+ * [in]	QP			Quantization parameter
+ * [in] AC          Flag indicating if at least one non-zero coefficient exists
+ * [out]	pDst		pointer to the reconstructed 4x4 block data
+ *
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P10_DequantTransformACFromPair_U8_S16_C1_DLx(
+     OMX_U8 **ppSrc,
+     OMX_S16 *pDst,
+     OMX_INT QP,
+     OMX_S16* pDC,
+     int AC
+);
+
+#endif  /*_armVideo_H_*/
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVCCOMM_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVCCOMM_s.h
new file mode 100644
index 0000000..7f0a9b8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVCCOMM_s.h
@@ -0,0 +1,72 @@
+;//
+;// 
+;// File Name:  armVCCOMM_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+;// ARM optimized OpenMAX AC header file
+;// 
+;// Formula used:
+;// MACRO for calculating median for three values.
+
+
+
+    IF :LNOT::DEF:ARMVCCOMM_S_H
+        INCLUDE armCOMM_s.h
+    M_VARIANTS      CortexA8, ARM1136JS
+    
+    IF ARM1136JS :LOR: CortexA8 
+     
+     ;///*
+     ;// * Macro: M_MEDIAN3
+     ;// *
+     ;// * Description: Finds the median of three numbers
+     ;// * 
+     ;// * Remarks:
+     ;// *
+     ;// * Parameters:
+     ;// * [in] x     First entry for the list of three numbers.
+     ;// * [in] y     Second entry for the list of three numbers.
+     ;// *            Input value may be corrupted at the end of
+     ;// *            the execution of this macro.
+     ;// * [in] z     Third entry of the list of three numbers.
+     ;// *            Input value corrupted at the end of the 
+     ;// *            execution of this macro.
+     ;// * [in] t     Temporary scratch  register.
+     ;// * [out]z     Median of the three numbers.       
+     ;// */
+     
+     MACRO
+
+     M_MEDIAN3 $x, $y, $z, $t 
+     
+     SUBS  $t, $y, $z; // if (y < z)
+     ADDLT $z, $z, $t; //  swap y and z
+     SUBLT $y, $y, $t;
+
+     ;// Now z' <= y', so there are three cases for the
+     ;// median value, depending on x.
+
+     ;// 1) x <= z'      <= y'      : median value is z'
+     ;// 2)      z' <= x <= y'      : median value is x
+     ;// 3)      z'      <= y' <= x : median value is y'
+
+     CMP   $z, $x;     // if ( x > min(y,z) )
+     MOVLT $z, $x;     // ans = x 
+
+     CMP   $x, $y;     // if ( x > max(y,z) )
+     MOVGT $z, $y;     // ans = max(y,z)
+     
+     MEND
+    ENDIF      
+    
+    
+        
+    ENDIF ;// ARMACCOMM_S_H
+
+ END
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC.h
new file mode 100644
index 0000000..7b3cc72
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC.h
@@ -0,0 +1,4381 @@
+/**
+ * File: omxVC.h
+ * Brief: OpenMAX DL v1.0.2 - Video Coding library
+ *
+ * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved. 
+ *
+ * These materials are protected by copyright laws and contain material 
+ * proprietary to the Khronos Group, Inc.  You may use these materials 
+ * for implementing Khronos specifications, without altering or removing 
+ * any trademark, copyright or other notice from the specification.
+ * 
+ * Khronos Group makes no, and expressly disclaims any, representations 
+ * or warranties, express or implied, regarding these materials, including, 
+ * without limitation, any implied warranties of merchantability or fitness 
+ * for a particular purpose or non-infringement of any intellectual property. 
+ * Khronos Group makes no, and expressly disclaims any, warranties, express 
+ * or implied, regarding the correctness, accuracy, completeness, timeliness, 
+ * and reliability of these materials. 
+ *
+ * Under no circumstances will the Khronos Group, or any of its Promoters, 
+ * Contributors or Members or their respective partners, officers, directors, 
+ * employees, agents or representatives be liable for any damages, whether 
+ * direct, indirect, special or consequential damages for lost revenues, 
+ * lost profits, or otherwise, arising from or in connection with these 
+ * materials.
+ * 
+ * Khronos and OpenMAX are trademarks of the Khronos Group Inc. 
+ *
+ */
+
+/* *****************************************************************************************/
+
+#ifndef _OMXVC_H_
+#define _OMXVC_H_
+
+#include "omxtypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* 6.1.1.1 Motion Vectors  */
+/* In omxVC, motion vectors are represented as follows:  */
+
+typedef struct {
+    OMX_S16 dx;
+    OMX_S16 dy;
+} OMXVCMotionVector;
+
+
+
+/**
+ * Function:  omxVCCOMM_Average_8x   (6.1.3.1.1)
+ *
+ * Description:
+ * This function calculates the average of two 8x4, 8x8, or 8x16 blocks.  The 
+ * result is rounded according to (a+b+1)/2.  The block average function can 
+ * be used in conjunction with half-pixel interpolation to obtain quarter 
+ * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1. 
+ *
+ * Input Arguments:
+ *   
+ *   pPred0     - Pointer to the top-left corner of reference block 0 
+ *   pPred1     - Pointer to the top-left corner of reference block 1 
+ *   iPredStep0 - Step of reference block 0 
+ *   iPredStep1 - Step of reference block 1 
+ *   iDstStep   - Step of the destination buffer. 
+ *   iHeight    - Height of the blocks 
+ *
+ * Output Arguments:
+ *   
+ *   pDstPred - Pointer to the destination buffer. 8-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -   one or more of the following pointers is NULL: pPred0, pPred1, or 
+ *              pDstPred. 
+ *    -   pDstPred is not aligned on an 8-byte boundary. 
+ *    -   iPredStep0 <= 0 or iPredStep0 is not a multiple of 8. 
+ *    -   iPredStep1 <= 0 or iPredStep1 is not a multiple of 8. 
+ *    -   iDstStep   <= 0 or iDstStep is not a multiple of 8. 
+ *    -   iHeight is not 4, 8, or 16. 
+ *
+ */
+OMXResult omxVCCOMM_Average_8x (
+    const OMX_U8 *pPred0,
+    const OMX_U8 *pPred1,
+    OMX_U32 iPredStep0,
+    OMX_U32 iPredStep1,
+    OMX_U8 *pDstPred,
+    OMX_U32 iDstStep,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_Average_16x   (6.1.3.1.2)
+ *
+ * Description:
+ * This function calculates the average of two 16x16 or 16x8 blocks.  The 
+ * result is rounded according to (a+b+1)/2.  The block average function can 
+ * be used in conjunction with half-pixel interpolation to obtain quarter 
+ * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1. 
+ *
+ * Input Arguments:
+ *   
+ *   pPred0 - Pointer to the top-left corner of reference block 0 
+ *   pPred1 - Pointer to the top-left corner of reference block 1 
+ *   iPredStep0 - Step of reference block 0 
+ *   iPredStep1 - Step of reference block 1 
+ *   iDstStep - Step of the destination buffer 
+ *   iHeight - Height of the blocks 
+ *
+ * Output Arguments:
+ *   
+ *   pDstPred - Pointer to the destination buffer. 16-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -   one or more of the following pointers is NULL: pPred0, pPred1, or 
+ *              pDstPred. 
+ *    -   pDstPred is not aligned on a 16-byte boundary. 
+ *    -   iPredStep0 <= 0 or iPredStep0 is not a multiple of 16. 
+ *    -   iPredStep1 <= 0 or iPredStep1 is not a multiple of 16. 
+ *    -   iDstStep <= 0 or iDstStep is not a multiple of 16. 
+ *    -   iHeight is not 8 or 16. 
+ *
+ */
+OMXResult omxVCCOMM_Average_16x (
+    const OMX_U8 *pPred0,
+    const OMX_U8 *pPred1,
+    OMX_U32 iPredStep0,
+    OMX_U32 iPredStep1,
+    OMX_U8 *pDstPred,
+    OMX_U32 iDstStep,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_ExpandFrame_I   (6.1.3.2.1)
+ *
+ * Description:
+ * This function expands a reconstructed frame in-place.  The unexpanded 
+ * source frame should be stored in a plane buffer with sufficient space 
+ * pre-allocated for edge expansion, and the input frame should be located in 
+ * the plane buffer center.  This function executes the pixel expansion by 
+ * replicating source frame edge pixel intensities in the empty pixel 
+ * locations (expansion region) between the source frame edge and the plane 
+ * buffer edge.  The width/height of the expansion regions on the 
+ * horizontal/vertical edges is controlled by the parameter iExpandPels. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDstPlane - pointer to the top-left corner of the frame to be 
+ *            expanded; must be aligned on an 8-byte boundary. 
+ *   iFrameWidth - frame width; must be a multiple of 8. 
+ *   iFrameHeight -frame height; must be a multiple of 8. 
+ *   iExpandPels - number of pixels to be expanded in the horizontal and 
+ *            vertical directions; must be a multiple of 8. 
+ *   iPlaneStep - distance, in bytes, between the start of consecutive lines 
+ *            in the plane buffer; must be larger than or equal to 
+ *            (iFrameWidth + 2 * iExpandPels). 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstPlane -Pointer to the top-left corner of the frame (NOT the 
+ *            top-left corner of the plane); must be aligned on an 8-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -    pSrcDstPlane is NULL. 
+ *    -    pSrcDstPlane is not aligned on an 8-byte boundary. 
+ *    -    one of the following parameters is either equal to zero or is a 
+ *              non-multiple of 8: iFrameHeight, iFrameWidth, iPlaneStep, or 
+ *              iExpandPels. 
+ *    -    iPlaneStep < (iFrameWidth + 2 * iExpandPels). 
+ *
+ */
+OMXResult omxVCCOMM_ExpandFrame_I (
+    OMX_U8 *pSrcDstPlane,
+    OMX_U32 iFrameWidth,
+    OMX_U32 iFrameHeight,
+    OMX_U32 iExpandPels,
+    OMX_U32 iPlaneStep
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_Copy8x8   (6.1.3.3.1)
+ *
+ * Description:
+ * Copies the reference 8x8 block to the current block. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the reference block in the source frame; must be 
+ *            aligned on an 8-byte boundary. 
+ *   step - distance between the starts of consecutive lines in the reference 
+ *            frame, in bytes; must be a multiple of 8 and must be larger than 
+ *            or equal to 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination block; must be aligned on an 8-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -   one or more of the following pointers is NULL: pSrc, pDst 
+ *    -   one or more of the following pointers is not aligned on an 8-byte 
+ *              boundary: pSrc, pDst 
+ *    -    step <8 or step is not a multiple of 8. 
+ *
+ */
+OMXResult omxVCCOMM_Copy8x8 (
+    const OMX_U8 *pSrc,
+    OMX_U8 *pDst,
+    OMX_INT step
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_Copy16x16   (6.1.3.3.2)
+ *
+ * Description:
+ * Copies the reference 16x16 macroblock to the current macroblock. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the reference macroblock in the source frame; must be 
+ *            aligned on a 16-byte boundary. 
+ *   step - distance between the starts of consecutive lines in the reference 
+ *            frame, in bytes; must be a multiple of 16 and must be larger 
+ *            than or equal to 16. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination macroblock; must be aligned on a 
+ *            16-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -   one or more of the following pointers is NULL: pSrc, pDst 
+ *    -   one or more of the following pointers is not aligned on a 16-byte 
+ *              boundary: pSrc, pDst 
+ *    -    step <16 or step is not a multiple of 16. 
+ *
+ */
+OMXResult omxVCCOMM_Copy16x16 (
+    const OMX_U8 *pSrc,
+    OMX_U8 *pDst,
+    OMX_INT step
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_ComputeTextureErrorBlock_SAD   (6.1.4.1.1)
+ *
+ * Description:
+ * Computes texture error of the block; also returns SAD. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the source plane; must be aligned on an 8-byte 
+ *            boundary. 
+ *   srcStep - step of the source plane 
+ *   pSrcRef - pointer to the reference buffer, an 8x8 block; must be aligned 
+ *            on an 8-byte boundary. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination buffer, an 8x8 block; must be aligned 
+ *            on an 8-byte boundary. 
+ *   pDstSAD - pointer to the Sum of Absolute Differences (SAD) value 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments 
+ *    -    At least one of the following 
+ *         pointers is NULL: pSrc, pSrcRef, pDst and pDstSAD. 
+ *    -    pSrc is not 8-byte aligned. 
+ *    -    SrcStep <= 0 or srcStep is not a multiple of 8. 
+ *    -    pSrcRef is not 8-byte aligned. 
+ *    -    pDst is not 8-byte aligned. 
+ *
+ */
+OMXResult omxVCCOMM_ComputeTextureErrorBlock_SAD (
+    const OMX_U8 *pSrc,
+    OMX_INT srcStep,
+    const OMX_U8 *pSrcRef,
+    OMX_S16 *pDst,
+    OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_ComputeTextureErrorBlock   (6.1.4.1.2)
+ *
+ * Description:
+ * Computes the texture error of the block. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the source plane. This should be aligned on an 8-byte 
+ *            boundary. 
+ *   srcStep - step of the source plane 
+ *   pSrcRef - pointer to the reference buffer, an 8x8 block. This should be 
+ *            aligned on an 8-byte boundary. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination buffer, an 8x8 block. This should be 
+ *            aligned on an 8-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    At least one of the following pointers is NULL: 
+ *         pSrc, pSrcRef, pDst. 
+ *    -    pSrc is not 8-byte aligned. 
+ *    -    SrcStep <= 0 or srcStep is not a multiple of 8. 
+ *    -    pSrcRef is not 8-byte aligned. 
+ *    -    pDst is not 8-byte aligned 
+ *
+ */
+OMXResult omxVCCOMM_ComputeTextureErrorBlock (
+    const OMX_U8 *pSrc,
+    OMX_INT srcStep,
+    const OMX_U8 *pSrcRef,
+    OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_LimitMVToRect   (6.1.4.1.3)
+ *
+ * Description:
+ * Limits the motion vector associated with the current block/macroblock to 
+ * prevent the motion compensated block/macroblock from moving outside a 
+ * bounding rectangle as shown in Figure 6-1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcMV - pointer to the motion vector associated with the current block 
+ *            or macroblock 
+ *   pRectVOPRef - pointer to the bounding rectangle 
+ *   Xcoord, Ycoord  - coordinates of the current block or macroblock 
+ *   size - size of the current block or macroblock; must be equal to 8 or 
+ *            16. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstMV - pointer to the limited motion vector 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one or more of the 
+ *              following conditions is true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcMV, pDstMV, or pRectVOPRef. 
+ *    -    size is not equal to either 8 or 16. 
+ *    -    the width or height of the bounding rectangle is less than 
+ *         twice the block size.
+ */
+OMXResult omxVCCOMM_LimitMVToRect (
+    const OMXVCMotionVector *pSrcMV,
+    OMXVCMotionVector *pDstMV,
+    const OMXRect *pRectVOPRef,
+    OMX_INT Xcoord,
+    OMX_INT Ycoord,
+    OMX_INT size
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_SAD_16x   (6.1.4.1.4)
+ *
+ * Description:
+ * This function calculates the SAD for 16x16 and 16x8 blocks. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg - Pointer to the original block; must be aligned on a 16-byte 
+ *             boundary. 
+ *   iStepOrg - Step of the original block buffer 
+ *   pSrcRef  - Pointer to the reference block 
+ *   iStepRef - Step of the reference block buffer 
+ *   iHeight  - Height of the block 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD - Pointer of result SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one or more of the 
+ *              following conditions is true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcOrg, pDstSAD, or pSrcRef 
+ *    -    pSrcOrg is not 16-byte aligned. 
+ *    -    iStepOrg  <= 0 or iStepOrg is not a multiple of 16 
+ *    -    iStepRef <= 0 or iStepRef is not a multiple of 16 
+ *    -    iHeight is not 8 or 16 
+ *
+ */
+OMXResult omxVCCOMM_SAD_16x (
+    const OMX_U8 *pSrcOrg,
+    OMX_U32 iStepOrg,
+    const OMX_U8 *pSrcRef,
+    OMX_U32 iStepRef,
+    OMX_S32 *pDstSAD,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_SAD_8x   (6.1.4.1.5)
+ *
+ * Description:
+ * This function calculates the SAD for 8x16, 8x8, 8x4 blocks. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg  - Pointer to the original block; must be aligned on a 8-byte 
+ *              boundary. 
+ *   iStepOrg - Step of the original block buffer 
+ *   pSrcRef  - Pointer to the reference block 
+ *   iStepRef - Step of the reference block buffer 
+ *   iHeight  - Height of the block 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD -Pointer of result SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one or more of the 
+ *              following conditions is true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcOrg, pDstSAD, or pSrcRef 
+ *    -    pSrcOrg is not 8-byte aligned. 
+ *    -    iStepOrg  <= 0 or iStepOrg is not a multiple of 8 
+ *    -    iStepRef <= 0 or iStepRef is not a multiple of 8 
+ *    -    iHeight is not 4, 8 or 16 
+ *
+ */
+OMXResult omxVCCOMM_SAD_8x (
+    const OMX_U8 *pSrcOrg,
+    OMX_U32 iStepOrg,
+    const OMX_U8 *pSrcRef,
+    OMX_U32 iStepRef,
+    OMX_S32*pDstSAD,
+    OMX_U32 iHeight
+);
+
+
+
+/* 6.2.1.1 Direction  */
+/* The direction enumerator is used with functions that perform AC/DC prediction and zig-zag scan.  */
+
+enum {
+    OMX_VC_NONE       = 0,
+    OMX_VC_HORIZONTAL = 1,
+    OMX_VC_VERTICAL   = 2 
+};
+
+
+
+/* 6.2.1.2 Bilinear Interpolation  */
+/* The bilinear interpolation enumerator is used with motion estimation, motion compensation, and reconstruction functions.  */
+
+enum {
+    OMX_VC_INTEGER_PIXEL = 0, /* case a */
+    OMX_VC_HALF_PIXEL_X  = 1, /* case b */
+    OMX_VC_HALF_PIXEL_Y  = 2, /* case c */
+    OMX_VC_HALF_PIXEL_XY = 3  /* case d */ 
+};
+
+
+
+/* 6.2.1.3 Neighboring Macroblock Availability  */
+/* Neighboring macroblock availability is indicated using the following flags:   */
+
+enum {
+    OMX_VC_UPPER = 1,        /** above macroblock is available */
+    OMX_VC_LEFT = 2,         /** left macroblock is available */
+    OMX_VC_CENTER = 4,
+    OMX_VC_RIGHT = 8,
+    OMX_VC_LOWER = 16,
+    OMX_VC_UPPER_LEFT = 32,  /** above-left macroblock is available */
+    OMX_VC_UPPER_RIGHT = 64, /** above-right macroblock is available */
+    OMX_VC_LOWER_LEFT = 128,
+    OMX_VC_LOWER_RIGHT = 256 
+};
+
+
+
+/* 6.2.1.4 Video Components  */
+/* A data type that enumerates video components is defined as follows:  */
+
+typedef enum {
+    OMX_VC_LUMINANCE,    /** Luminance component */
+    OMX_VC_CHROMINANCE   /** chrominance component */ 
+} OMXVCM4P2VideoComponent;
+
+
+
+/* 6.2.1.5 MacroblockTypes  */
+/* A data type that enumerates macroblock types is defined as follows:  */
+
+typedef enum {
+    OMX_VC_INTER     = 0, /** P picture or P-VOP */
+    OMX_VC_INTER_Q   = 1, /** P picture or P-VOP */
+    OMX_VC_INTER4V   = 2, /** P picture or P-VOP */
+    OMX_VC_INTRA     = 3, /** I and P picture, I- and P-VOP */
+    OMX_VC_INTRA_Q   = 4, /** I and P picture, I- and P-VOP */
+    OMX_VC_INTER4V_Q = 5  /** P picture or P-VOP (H.263)*/
+} OMXVCM4P2MacroblockType;
+
+
+
+/* 6.2.1.6 Coordinates  */
+/* Coordinates are represented as follows:  */
+
+typedef struct {
+    OMX_INT x;
+    OMX_INT y;
+} OMXVCM4P2Coordinate;
+
+
+
+/* 6.2.1.7 Motion Estimation Algorithms  */
+/* A data type that enumerates motion estimation search methods is defined as follows:  */
+
+typedef enum {
+    OMX_VC_M4P2_FAST_SEARCH = 0,  /** Fast motion search */
+    OMX_VC_M4P2_FULL_SEARCH = 1   /** Full motion search */ 
+} OMXVCM4P2MEMode;
+
+
+
+/* 6.2.1.8 Motion Estimation Parameters  */
+/* A data structure containing control parameters for 
+ * motion estimation functions is defined as follows:  
+ */
+
+typedef struct {
+    OMX_INT searchEnable8x8;     /** enables 8x8 search */
+    OMX_INT halfPelSearchEnable; /** enables half-pel resolution */
+    OMX_INT searchRange;         /** search range */
+    OMX_INT rndVal;              /** rounding control; 0-disabled, 1-enabled*/
+} OMXVCM4P2MEParams;
+
+
+
+/* 6.2.1.9 Macroblock Information   */
+/* A data structure containing macroblock parameters for 
+ * motion estimation functions is defined as follows:  
+ */
+
+typedef struct {
+    OMX_S32 sliceId;                 /* slice number */
+    OMXVCM4P2MacroblockType mbType;  /* MB type: OMX_VC_INTRA, OMX_VC_INTER, or OMX_VC_INTER4 */
+    OMX_S32 qp;                      /* quantization parameter*/
+    OMX_U32 cbpy;                    /* CBP Luma */
+    OMX_U32 cbpc;                    /* CBP Chroma */
+    OMXVCMotionVector pMV0[2][2];    /* motion vector, represented using 1/2-pel units, 
+                                      * pMV0[blocky][blockx] (blocky = 0~1, blockx =0~1) 
+                                      */
+    OMXVCMotionVector pMVPred[2][2]; /* motion vector prediction, represented using 1/2-pel units, 
+                                      * pMVPred[blocky][blockx] (blocky = 0~1, blockx = 0~1) 
+                                      */
+    OMX_U8 pPredDir[2][2];           /* AC prediction direction: 
+                                      *   OMX_VC_NONE, OMX_VC_VERTICAL, OMX_VC_HORIZONTAL 
+                                      */
+} OMXVCM4P2MBInfo, *OMXVCM4P2MBInfoPtr;
+
+
+
+/**
+ * Function:  omxVCM4P2_FindMVpred   (6.2.3.1.1)
+ *
+ * Description:
+ * Predicts a motion vector for the current block using the procedure 
+ * specified in [ISO14496-2], subclause 7.6.5.  The resulting predicted MV is 
+ * returned in pDstMVPred. If the parameter pDstMVPredME if is not NULL then 
+ * the set of three MV candidates used for prediction is also returned, 
+ * otherwise pDstMVPredMEis NULL upon return. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcMVCurMB - pointer to the MV buffer associated with the current Y 
+ *            macroblock; a value of NULL indicates unavailability. 
+ *   pSrcCandMV1 - pointer to the MV buffer containing the 4 MVs associated 
+ *            with the MB located to the left of the current MB; set to NULL 
+ *            if there is no MB to the left. 
+ *   pSrcCandMV2 - pointer to the MV buffer containing the 4 MVs associated 
+ *            with the MB located above the current MB; set to NULL if there 
+ *            is no MB located above the current MB. 
+ *   pSrcCandMV3 - pointer to the MV buffer containing the 4 MVs associated 
+ *            with the MB located to the right and above the current MB; set 
+ *            to NULL if there is no MB located to the above-right. 
+ *   iBlk - the index of block in the current macroblock 
+ *   pDstMVPredME - MV candidate return buffer;  if set to NULL then 
+ *            prediction candidate MVs are not returned and pDstMVPredME will 
+ *            be NULL upon function return; if pDstMVPredME is non-NULL then it 
+ *            must point to a buffer containing sufficient space for three 
+ *            return MVs. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstMVPred - pointer to the predicted motion vector 
+ *   pDstMVPredME - if non-NULL upon input then pDstMVPredME  points upon 
+ *            return to a buffer containing the three motion vector candidates 
+ *            used for prediction as specified in [ISO14496-2], subclause 
+ *            7.6.5, otherwise if NULL upon input then pDstMVPredME is NULL 
+ *            upon output. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -    the pointer pDstMVPred is NULL 
+ *    -    the parameter iBlk does not fall into the range 0 <= iBlk<=3 
+ *
+ */
+OMXResult omxVCM4P2_FindMVpred (
+    const OMXVCMotionVector *pSrcMVCurMB,
+    const OMXVCMotionVector *pSrcCandMV1,
+    const OMXVCMotionVector *pSrcCandMV2,
+    const OMXVCMotionVector *pSrcCandMV3,
+    OMXVCMotionVector *pDstMVPred,
+    OMXVCMotionVector *pDstMVPredME,
+    OMX_INT iBlk
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_IDCT8x8blk   (6.2.3.2.1)
+ *
+ * Description:
+ * Computes a 2D inverse DCT for a single 8x8 block, as defined in 
+ * [ISO14496-2]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the start of the linearly arranged IDCT input buffer; 
+ *            must be aligned on a 16-byte boundary.  According to 
+ *            [ISO14496-2], the input coefficient values should lie within the 
+ *            range [-2048, 2047]. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the start of the linearly arranged IDCT output buffer; 
+ *            must be aligned on a 16-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    pSrc or pDst is NULL. 
+ *    -    pSrc or pDst is not 16-byte aligned. 
+ *
+ */
+OMXResult omxVCM4P2_IDCT8x8blk (
+    const OMX_S16 *pSrc,
+    OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_MEGetBufSize   (6.2.4.1.1)
+ *
+ * Description:
+ * Computes the size, in bytes, of the vendor-specific specification 
+ * structure for the following motion estimation functions: 
+ * BlockMatch_Integer_8x8, BlockMatch_Integer_16x16, and MotionEstimationMB. 
+ *
+ * Input Arguments:
+ *   
+ *   MEmode - motion estimation mode; available modes are defined by the 
+ *            enumerated type OMXVCM4P2MEMode 
+ *   pMEParams - motion estimation parameters 
+ *
+ * Output Arguments:
+ *   
+ *   pSize - pointer to the number of bytes required for the specification 
+ *            structure 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - one or more of the following is true: 
+ *    -    an invalid value was specified for the parameter MEmode 
+ *    -    a negative or zero value was specified for the 
+ *         parameter pMEParams->searchRange 
+ *
+ */
+OMXResult omxVCM4P2_MEGetBufSize (
+    OMXVCM4P2MEMode MEmode,
+    const OMXVCM4P2MEParams *pMEParams,
+    OMX_U32 *pSize
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_MEInit   (6.2.4.1.2)
+ *
+ * Description:
+ * Initializes the vendor-specific specification structure required for the 
+ * following motion estimation functions:  BlockMatch_Integer_8x8, 
+ * BlockMatch_Integer_16x16, and MotionEstimationMB. Memory for the 
+ * specification structure *pMESpec must be allocated prior to calling the 
+ * function, and should be aligned on a 4-byte boundary.  Following 
+ * initialization by this function, the vendor-specific structure *pMESpec 
+ * should contain an implementation-specific representation of all motion 
+ * estimation parameters received via the structure pMEParams, for example  
+ * rndVal, searchRange, etc.  The number of bytes required for the 
+ * specification structure can be determined using the function 
+ * omxVCM4P2_MEGetBufSize. 
+ *
+ * Input Arguments:
+ *   
+ *   MEmode - motion estimation mode; available modes are defined by the 
+ *            enumerated type OMXVCM4P2MEMode 
+ *   pMEParams - motion estimation parameters 
+ *   pMESpec - pointer to the uninitialized ME specification structure 
+ *
+ * Output Arguments:
+ *   
+ *   pMESpec - pointer to the initialized ME specification structure 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - one or more of the following is true: 
+ *    -    an invalid value was specified for the parameter MEmode 
+ *    -    a negative or zero value was specified for the 
+ *         parameter pMEParams->searchRange 
+ *
+ */
+OMXResult omxVCM4P2_MEInit (
+    OMXVCM4P2MEMode MEmode,
+    const OMXVCM4P2MEParams*pMEParams,
+    void *pMESpec
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_BlockMatch_Integer_16x16   (6.2.4.2.1)
+ *
+ * Description:
+ * Performs a 16x16 block search; estimates motion vector and associated 
+ * minimum SAD. Both the input and output motion vectors are represented using 
+ * half-pixel units, and therefore a shift left or right by 1 bit may be 
+ * required, respectively, to match the input or output MVs with other 
+ * functions that either generate output MVs or expect input MVs represented 
+ * using integer pixel units. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            MB that corresponds to the location of the current macroblock in 
+ *            the current plane. 
+ *   refWidth - width of the reference plane 
+ *   pRefRect - pointer to the valid reference plane rectangle; coordinates 
+ *            are specified relative to the image origin.  Rectangle 
+ *            boundaries may extend beyond image boundaries if the image has 
+ *            been padded.  For example, if padding extends 4 pixels beyond 
+ *            frame border, then the value for the left border could be set to 
+ *            -4. 
+ *   pSrcCurrBuf - pointer to the current block in the current macroblock 
+ *            buffer extracted from the original plane (linear array, 256 
+ *            entries); must be aligned on a 16-byte boundary.  The number of 
+ *            bytes between lines (step) is 16. 
+ *   pCurrPointPos - position of the current macroblock in the current plane 
+ *   pSrcPreMV - pointer to predicted motion vector; NULL indicates no 
+ *            predicted MV 
+ *   pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced 
+ *            by pSrcPreMV); may be set to NULL if unavailable. 
+ *   pMESpec - vendor-specific motion estimation specification structure; 
+ *            must have been allocated and then initialized using 
+ *            omxVCM4P2_MEInit prior to calling the block matching function. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstMV - pointer to estimated MV 
+ *   pDstSAD - pointer to minimum SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one of the following 
+ *              conditions is true: 
+ *    -    at least one of the following pointers is NULL: pSrcRefBuf, 
+ *              pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or 
+ *              pMESpec, or 
+ *    -    pSrcCurrBuf is not 16-byte aligned 
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Integer_16x16 (
+    const OMX_U8 *pSrcRefBuf,
+    OMX_INT refWidth,
+    const OMXRect *pRefRect,
+    const OMX_U8 *pSrcCurrBuf,
+    const OMXVCM4P2Coordinate *pCurrPointPos,
+    const OMXVCMotionVector*pSrcPreMV,
+    const OMX_INT *pSrcPreSAD,
+    void *pMESpec,
+    OMXVCMotionVector*pDstMV,
+    OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_BlockMatch_Integer_8x8   (6.2.4.2.2)
+ *
+ * Description:
+ * Performs an 8x8 block search; estimates motion vector and associated 
+ * minimum SAD.  Both the input and output motion vectors are represented 
+ * using half-pixel units, and therefore a shift left or right by 1 bit may be 
+ * required, respectively, to match the input or output MVs with other 
+ * functions that either generate output MVs or expect input MVs represented 
+ * using integer pixel units. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            block that corresponds to the location of the current 8x8 block 
+ *            in the current plane. 
+ *   refWidth - width of the reference plane 
+ *   pRefRect - pointer to the valid reference plane rectangle; coordinates 
+ *            are specified relative to the image origin.  Rectangle 
+ *            boundaries may extend beyond image boundaries if the image has 
+ *            been padded. 
+ *   pSrcCurrBuf - pointer to the current block in the current macroblock 
+ *            buffer extracted from the original plane (linear array, 128 
+ *            entries); must be aligned on an 8-byte boundary.  The number of 
+ *            bytes between lines (step) is 16 bytes. 
+ *   pCurrPointPos - position of the current block in the current plane 
+ *   pSrcPreMV - pointer to predicted motion vector; NULL indicates no 
+ *            predicted MV 
+ *   pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced 
+ *            by pSrcPreMV); may be set to NULL if unavailable. 
+ *   pMESpec - vendor-specific motion estimation specification structure; 
+ *            must have been allocated and then initialized using 
+ *            omxVCM4P2_MEInit prior to calling the block matching function. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstMV - pointer to estimated MV 
+ *   pDstSAD - pointer to minimum SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one of the following 
+ *              conditions is true: 
+ *    -    at least one of the following pointers is NULL: pSrcRefBuf, 
+ *              pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or 
+ *              pMESpec, or 
+ *    -    pSrcCurrBuf is not 8-byte aligned 
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Integer_8x8 (
+    const OMX_U8 *pSrcRefBuf,
+    OMX_INT refWidth,
+    const OMXRect *pRefRect,
+    const OMX_U8 *pSrcCurrBuf,
+    const OMXVCM4P2Coordinate *pCurrPointPos,
+    const OMXVCMotionVector *pSrcPreMV,
+    const OMX_INT *pSrcPreSAD,
+    void *pMESpec,
+    OMXVCMotionVector *pDstMV,
+    OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_BlockMatch_Half_16x16   (6.2.4.2.3)
+ *
+ * Description:
+ * Performs a 16x16 block match with half-pixel resolution.  Returns the 
+ * estimated motion vector and associated minimum SAD.  This function 
+ * estimates the half-pixel motion vector by interpolating the integer 
+ * resolution motion vector referenced by the input parameter pSrcDstMV, i.e., 
+ * the initial integer MV is generated externally.  The input parameters 
+ * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of 
+ * 16x16 integer search prior to calling BlockMatch_Half_16x16. The function 
+ * BlockMatch_Integer_16x16 may be used for integer motion estimation. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            macroblock that corresponds to the location of the current 
+ *            macroblock in the current plane. 
+ *   refWidth - width of the reference plane 
+ *   pRefRect - reference plane valid region rectangle 
+ *   pSrcCurrBuf - pointer to the current block in the current macroblock 
+ *            buffer extracted from the original plane (linear array, 256 
+ *            entries); must be aligned on a 16-byte boundary.  The number of 
+ *            bytes between lines (step) is 16. 
+ *   pSearchPointRefPos - position of the starting point for half pixel 
+ *            search (specified in terms of integer pixel units) in the 
+ *            reference plane, i.e., the reference position pointed to by the 
+ *            predicted motion vector. 
+ *   rndVal - rounding control parameter: 0 - disabled; 1 - enabled. 
+ *   pSrcDstMV - pointer to the initial MV estimate; typically generated 
+ *            during a prior 16X16 integer search; specified in terms of 
+ *            half-pixel units. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstMV - pointer to estimated MV 
+ *   pDstSAD - pointer to minimum SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one of the following 
+ *              conditions is true: 
+ *    -    at least one of the following pointers is NULL: pSrcRefBuf, 
+ *         pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV.
+ *    -    pSrcCurrBuf is not 16-byte aligned, or 
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Half_16x16 (
+    const OMX_U8 *pSrcRefBuf,
+    OMX_INT refWidth,
+    const OMXRect *pRefRect,
+    const OMX_U8 *pSrcCurrBuf,
+    const OMXVCM4P2Coordinate *pSearchPointRefPos,
+    OMX_INT rndVal,
+    OMXVCMotionVector *pSrcDstMV,
+    OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_BlockMatch_Half_8x8   (6.2.4.2.4)
+ *
+ * Description:
+ * Performs an 8x8 block match with half-pixel resolution. Returns the 
+ * estimated motion vector and associated minimum SAD.  This function 
+ * estimates the half-pixel motion vector by interpolating the integer 
+ * resolution motion vector referenced by the input parameter pSrcDstMV, i.e., 
+ * the initial integer MV is generated externally.  The input parameters 
+ * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of 
+ * 8x8 integer search prior to calling BlockMatch_Half_8x8. The function 
+ * BlockMatch_Integer_8x8 may be used for integer motion estimation. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            block that corresponds to the location of the current 8x8 block 
+ *            in the current plane. 
+ *   refWidth - width of the reference plane 
+ *   pRefRect - reference plane valid region rectangle 
+ *   pSrcCurrBuf - pointer to the current block in the current macroblock 
+ *            buffer extracted from the original plane (linear array, 128 
+ *            entries); must be aligned on a 8-byte boundary.  The number of 
+ *            bytes between lines (step) is 16. 
+ *   pSearchPointRefPos - position of the starting point for half pixel 
+ *            search (specified in terms of integer pixel units) in the 
+ *            reference plane. 
+ *   rndVal - rounding control parameter: 0 - disabled; 1 - enabled. 
+ *   pSrcDstMV - pointer to the initial MV estimate; typically generated 
+ *            during a prior 8x8 integer search, specified in terms of 
+ *            half-pixel units. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstMV - pointer to estimated MV 
+ *   pDstSAD - pointer to minimum SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one of the following 
+ *              conditions is true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcRefBuf, pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV
+ *    -    pSrcCurrBuf is not 8-byte aligned 
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Half_8x8 (
+    const OMX_U8 *pSrcRefBuf,
+    OMX_INT refWidth,
+    const OMXRect *pRefRect,
+    const OMX_U8 *pSrcCurrBuf,
+    const OMXVCM4P2Coordinate *pSearchPointRefPos,
+    OMX_INT rndVal,
+    OMXVCMotionVector *pSrcDstMV,
+    OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_MotionEstimationMB   (6.2.4.3.1)
+ *
+ * Description:
+ * Performs motion search for a 16x16 macroblock.  Selects best motion search 
+ * strategy from among inter-1MV, inter-4MV, and intra modes.  Supports 
+ * integer and half pixel resolution. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcCurrBuf - pointer to the top-left corner of the current MB in the 
+ *            original picture plane; must be aligned on a 16-byte boundary.  
+ *            The function does not expect source data outside the region 
+ *            bounded by the MB to be available; for example it is not 
+ *            necessary for the caller to guarantee the availability of 
+ *            pSrcCurrBuf[-SrcCurrStep], i.e., the row of pixels above the MB 
+ *            to be processed. 
+ *   srcCurrStep - width of the original picture plane, in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            plane location corresponding to the location of the current 
+ *            macroblock in the current plane; must be aligned on a 16-byte 
+ *            boundary. 
+ *   srcRefStep - width of the reference picture plane, in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   pRefRect - reference plane valid region rectangle, specified relative to 
+ *            the image origin 
+ *   pCurrPointPos - position of the current macroblock in the current plane 
+ *   pMESpec - pointer to the vendor-specific motion estimation specification 
+ *            structure; must be allocated and then initialized using 
+ *            omxVCM4P2_MEInit prior to calling this function. 
+ *   pMBInfo - array, of dimension four, containing pointers to information 
+ *            associated with four nearby MBs: 
+ *            -   pMBInfo[0] - pointer to left MB information 
+ *            -   pMBInfo[1] - pointer to top MB information 
+ *            -   pMBInfo[2] - pointer to top-left MB information 
+ *            -   pMBInfo[3] - pointer to top-right MB information 
+ *            Any pointer in the array may be set equal to NULL if the 
+ *            corresponding MB doesn't exist.  For each MB, the following structure 
+ *            members are used:    
+ *            -   mbType - macroblock type, either OMX_VC_INTRA, OMX_VC_INTER, or 
+ *                OMX_VC_INTER4V 
+ *            -   pMV0[2][2] - estimated motion vectors; represented 
+ *                in 1/2 pixel units 
+ *            -   sliceID - number of the slice to which the MB belongs 
+ *   pSrcDstMBCurr - pointer to information structure for the current MB.  
+ *            The following entries should be set prior to calling the 
+ *            function: sliceID - the number of the slice the to which the 
+ *            current MB belongs.  The structure elements cbpy and cbpc are 
+ *            ignored. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstMBCurr - pointer to updated information structure for the current 
+ *            MB after MB-level motion estimation has been completed.  The 
+ *            following structure members are updated by the ME function:   
+ *              -  mbType - macroblock type: OMX_VC_INTRA, OMX_VC_INTER, or 
+ *                 OMX_VC_INTER4V. 
+ *              -  pMV0[2][2] - estimated motion vectors; represented in 
+ *                 terms of 1/2 pel units. 
+ *              -  pMVPred[2][2] - predicted motion vectors; represented 
+ *                 in terms of 1/2 pel units. 
+ *            The structure members cbpy and cbpc are not updated by the function. 
+ *   pDstSAD - pointer to the minimum SAD for INTER1V, or sum of minimum SADs 
+ *            for INTER4V 
+ *   pDstBlockSAD - pointer to an array of SAD values for each of the four 
+ *            8x8 luma blocks in the MB.  The block SADs are in scan order for 
+ *            each MB. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one or more of the 
+ *              following conditions is true: 
+ *    -    at least one of the following pointers is NULL: pSrcCurrBuf, 
+ *              pSrcRefBuf, pRefRect, pCurrPointPos, pMBInter, pMBIntra, 
+ *              pSrcDstMBCurr, or pDstSAD. 
+ *
+ */
+OMXResult omxVCM4P2_MotionEstimationMB (
+    const OMX_U8 *pSrcCurrBuf,
+    OMX_S32 srcCurrStep,
+    const OMX_U8 *pSrcRefBuf,
+    OMX_S32 srcRefStep,
+    const OMXRect*pRefRect,
+    const OMXVCM4P2Coordinate *pCurrPointPos,
+    void *pMESpec,
+    const OMXVCM4P2MBInfoPtr *pMBInfo,
+    OMXVCM4P2MBInfo *pSrcDstMBCurr,
+    OMX_U16 *pDstSAD,
+    OMX_U16 *pDstBlockSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DCT8x8blk   (6.2.4.4.1)
+ *
+ * Description:
+ * Computes a 2D forward DCT for a single 8x8 block, as defined in 
+ * [ISO14496-2]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the start of the linearly arranged input buffer; must 
+ *            be aligned on a 16-byte boundary.  Input values (pixel 
+ *            intensities) are valid in the range [-255,255]. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the start of the linearly arranged output buffer; must 
+ *            be aligned on a 16-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, returned if:
+ *    -    pSrc or pDst is NULL. 
+ *    -    pSrc or pDst is not 16-byte aligned. 
+ *
+ */
+OMXResult omxVCM4P2_DCT8x8blk (
+    const OMX_S16 *pSrc,
+    OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_QuantIntra_I   (6.2.4.4.2)
+ *
+ * Description:
+ * Performs quantization on intra block coefficients. This function supports 
+ * bits_per_pixel == 8. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input intra block coefficients; must be aligned 
+ *            on a 16-byte boundary. 
+ *   QP - quantization parameter (quantizer_scale). 
+ *   blockIndex - block index indicating the component type and position, 
+ *            valid in the range 0 to 5, as defined in [ISO14496-2], subclause 
+ *            6.1.3.8. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; shortVideoHeader==1 selects linear intra DC 
+ *            mode, and shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the output (quantized) interblock coefficients.  
+ *            When shortVideoHeader==1, AC coefficients are saturated on the 
+ *            interval [-127, 127], and DC coefficients are saturated on the 
+ *            interval [1, 254].  When shortVideoHeader==0, AC coefficients 
+ *            are saturated on the interval [-2047, 2047]. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    pSrcDst is NULL. 
+ *    -    blockIndex < 0 or blockIndex >= 10 
+ *    -    QP <= 0 or QP >= 32. 
+ *
+ */
+OMXResult omxVCM4P2_QuantIntra_I (
+    OMX_S16 *pSrcDst,
+    OMX_U8 QP,
+    OMX_INT blockIndex,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_QuantInter_I   (6.2.4.4.3)
+ *
+ * Description:
+ * Performs quantization on an inter coefficient block; supports 
+ * bits_per_pixel == 8. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input inter block coefficients; must be aligned 
+ *            on a 16-byte boundary. 
+ *   QP - quantization parameter (quantizer_scale) 
+ *   shortVideoHeader - binary flag indicating presence of short_video_header; 
+ *            shortVideoHeader==1 selects linear intra DC mode, and 
+ *            shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the output (quantized) interblock coefficients.  
+ *            When shortVideoHeader==1, AC coefficients are saturated on the 
+ *            interval [-127, 127], and DC coefficients are saturated on the 
+ *            interval [1, 254].  When shortVideoHeader==0, AC coefficients 
+ *            are saturated on the interval [-2047, 2047]. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    pSrcDst is NULL. 
+ *    -    QP <= 0 or QP >= 32. 
+ *
+ */
+OMXResult omxVCM4P2_QuantInter_I (
+    OMX_S16 *pSrcDst,
+    OMX_U8 QP,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_TransRecBlockCoef_intra   (6.2.4.4.4)
+ *
+ * Description:
+ * Quantizes the DCT coefficients, implements intra block AC/DC coefficient 
+ * prediction, and reconstructs the current intra block texture for prediction 
+ * on the next frame.  Quantized row and column coefficients are returned in 
+ * the updated coefficient buffers. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the pixels of current intra block; must be aligned on 
+ *            an 8-byte boundary. 
+ *   pPredBufRow - pointer to the coefficient row buffer containing 
+ *            ((num_mb_per_row * 2 + 1) * 8) elements of type OMX_S16. 
+ *            Coefficients are organized into blocks of eight as described 
+ *            below (Internal Prediction Coefficient Update Procedures).  The 
+ *            DC coefficient is first, and the remaining buffer locations 
+ *            contain the quantized AC coefficients. Each group of eight row 
+ *            buffer elements combined with one element eight elements ahead 
+ *            contains the coefficient predictors of the neighboring block 
+ *            that is spatially above or to the left of the block currently to 
+ *            be decoded. A negative-valued DC coefficient indicates that this 
+ *            neighboring block is not INTRA-coded or out of bounds, and 
+ *            therefore the AC and DC coefficients are invalid.  Pointer must 
+ *            be aligned on an 8-byte boundary. 
+ *   pPredBufCol - pointer to the prediction coefficient column buffer 
+ *            containing 16 elements of type OMX_S16. Coefficients are 
+ *            organized as described in section 6.2.2.5.  Pointer must be 
+ *            aligned on an 8-byte boundary. 
+ *   pSumErr - pointer to a flag indicating whether or not AC prediction is 
+ *            required; AC prediction is enabled if *pSumErr >=0, but the 
+ *            value is not used for coefficient prediction, i.e., the sum of 
+ *            absolute differences starts from 0 for each call to this 
+ *            function.  Otherwise AC prediction is disabled if *pSumErr < 0 . 
+ *   blockIndex - block index indicating the component type and position, as 
+ *            defined in [ISO14496-2], subclause 6.1.3.8. 
+ *   curQp - quantization parameter of the macroblock to which the current 
+ *            block belongs 
+ *   pQpBuf - pointer to a 2-element quantization parameter buffer; pQpBuf[0] 
+ *            contains the quantization parameter associated with the 8x8 
+ *            block left of the current block (QPa), and pQpBuf[1] contains 
+ *            the quantization parameter associated with the 8x8 block above 
+ *            the current block (QPc).  In the event that the corresponding 
+ *            block is outside of the VOP bound, the Qp value will not affect 
+ *            the intra prediction process, as described in [ISO14496-2], 
+ *            sub-clause 7.4.3.3,  Adaptive AC Coefficient Prediction.  
+ *   srcStep - width of the source buffer; must be a multiple of 8. 
+ *   dstStep - width of the reconstructed destination buffer; must be a 
+ *            multiple of 16. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; shortVideoHeader==1 selects linear intra DC 
+ *            mode, and shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the quantized DCT coefficient buffer; pDst[0] contains 
+ *            the predicted DC coefficient; the remaining entries contain the 
+ *            quantized AC coefficients (without prediction).  The pointer 
+ *            pDstmust be aligned on a 16-byte boundary. 
+ *   pRec - pointer to the reconstructed texture; must be aligned on an 
+ *            8-byte boundary. 
+ *   pPredBufRow - pointer to the updated coefficient row buffer 
+ *   pPredBufCol - pointer to the updated coefficient column buffer 
+ *   pPreACPredict - if prediction is enabled, the parameter points to the 
+ *            start of the buffer containing the coefficient differences for 
+ *            VLC encoding. The entry pPreACPredict[0]indicates prediction 
+ *            direction for the current block and takes one of the following 
+ *            values: OMX_VC_NONE (prediction disabled), OMX_VC_HORIZONTAL, or 
+ *            OMX_VC_VERTICAL.  The entries 
+ *            pPreACPredict[1]-pPreACPredict[7]contain predicted AC 
+ *            coefficients.  If prediction is disabled (*pSumErr<0) then the 
+ *            contents of this buffer are undefined upon return from the 
+ *            function 
+ *   pSumErr - pointer to the value of the accumulated AC coefficient errors, 
+ *            i.e., sum of the absolute differences between predicted and 
+ *            unpredicted AC coefficients 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - Bad arguments:
+ *    -    At least one of the following pointers is NULL: pSrc, pDst, pRec, 
+ *         pCoefBufRow, pCoefBufCol, pQpBuf, pPreACPredict, pSumErr. 
+ *    -    blockIndex < 0 or blockIndex >= 10; 
+ *    -    curQP <= 0 or curQP >= 32. 
+ *    -    srcStep, or dstStep <= 0 or not a multiple of 8. 
+ *    -    pDst is not 16-byte aligned: . 
+ *    -    At least one of the following pointers is not 8-byte aligned: 
+ *         pSrc, pRec.  
+ *
+ *  Note: The coefficient buffers must be updated in accordance with the 
+ *        update procedures defined in section in 6.2.2. 
+ *
+ */
+OMXResult omxVCM4P2_TransRecBlockCoef_intra (
+    const OMX_U8 *pSrc,
+    OMX_S16 *pDst,
+    OMX_U8 *pRec,
+    OMX_S16 *pPredBufRow,
+    OMX_S16 *pPredBufCol,
+    OMX_S16 *pPreACPredict,
+    OMX_INT *pSumErr,
+    OMX_INT blockIndex,
+    OMX_U8 curQp,
+    const OMX_U8 *pQpBuf,
+    OMX_INT srcStep,
+    OMX_INT dstStep,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_TransRecBlockCoef_inter   (6.2.4.4.5)
+ *
+ * Description:
+ * Implements DCT, and quantizes the DCT coefficients of the inter block 
+ * while reconstructing the texture residual. There is no boundary check for 
+ * the bit stream buffer. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc -pointer to the residuals to be encoded; must be aligned on an 
+ *            16-byte boundary. 
+ *   QP - quantization parameter. 
+ *   shortVideoHeader - binary flag indicating presence of short_video_header; 
+ *                      shortVideoHeader==1 selects linear intra DC mode, and 
+ *                      shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the quantized DCT coefficients buffer; must be aligned 
+ *            on a 16-byte boundary. 
+ *   pRec - pointer to the reconstructed texture residuals; must be aligned 
+ *            on a 16-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    At least one of the following pointers is either NULL or 
+ *         not 16-byte aligned: 
+ *            - pSrc 
+ *            - pDst
+ *            - pRec
+ *    -    QP <= 0 or QP >= 32. 
+ *
+ */
+OMXResult omxVCM4P2_TransRecBlockCoef_inter (
+    const OMX_S16 *pSrc,
+    OMX_S16 *pDst,
+    OMX_S16 *pRec,
+    OMX_U8 QP,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_EncodeVLCZigzag_IntraDCVLC   (6.2.4.5.2)
+ *
+ * Description:
+ * Performs zigzag scan and VLC encoding of AC and DC coefficients for one 
+ * intra block.  Two versions of the function (DCVLC and ACVLC) are provided 
+ * in order to support the two different methods of processing DC 
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, "Intra DC 
+ * Coefficient Decoding for the Case of Switched VLC Encoding".  
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - double pointer to the current byte in the bitstream 
+ *   pBitOffset - pointer to the bit position in the byte pointed by 
+ *            *ppBitStream. Valid within 0 to 7. 
+ *   pQDctBlkCoef - pointer to the quantized DCT coefficient 
+ *   predDir - AC prediction direction, which is used to decide the zigzag 
+ *            scan pattern; takes one of the following values: 
+ *            -  OMX_VC_NONE - AC prediction not used.  
+ *                             Performs classical zigzag scan. 
+ *            -  OMX_VC_HORIZONTAL - Horizontal prediction.  
+ *                             Performs alternate-vertical zigzag scan. 
+ *            -  OMX_VC_VERTICAL - Vertical prediction.  
+ *                             Performs alternate-horizontal zigzag scan. 
+ *   pattern - block pattern which is used to decide whether this block is 
+ *            encoded 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *   videoComp - video component type (luminance, chrominance) of the current 
+ *            block 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is encoded, so 
+ *            that it points to the current byte in the bit stream buffer. 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - Bad arguments:
+ *    -    At least one of the following pointers is NULL: ppBitStream, 
+ *              *ppBitStream, pBitOffset, pQDctBlkCoef. 
+ *    -   *pBitOffset < 0, or *pBitOffset >7. 
+ *    -    PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or 
+ *         OMX_VC_VERTICAL. 
+ *    -    VideoComp is not one component of enum OMXVCM4P2VideoComponent. 
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_IntraDCVLC (
+    OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    const OMX_S16 *pQDctBlkCoef,
+    OMX_U8 predDir,
+    OMX_U8 pattern,
+    OMX_INT shortVideoHeader,
+    OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_EncodeVLCZigzag_IntraACVLC   (6.2.4.5.2)
+ *
+ * Description:
+ * Performs zigzag scan and VLC encoding of AC and DC coefficients for one 
+ * intra block.  Two versions of the function (DCVLC and ACVLC) are provided 
+ * in order to support the two different methods of processing DC 
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4,  Intra DC 
+ * Coefficient Decoding for the Case of Switched VLC Encoding.  
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - double pointer to the current byte in the bitstream 
+ *   pBitOffset - pointer to the bit position in the byte pointed by 
+ *            *ppBitStream. Valid within 0 to 7. 
+ *   pQDctBlkCoef - pointer to the quantized DCT coefficient 
+ *   predDir - AC prediction direction, which is used to decide the zigzag 
+ *            scan pattern; takes one of the following values: 
+ *            -  OMX_VC_NONE - AC prediction not used.  
+ *                             Performs classical zigzag scan. 
+ *            -  OMX_VC_HORIZONTAL - Horizontal prediction.  
+ *                             Performs alternate-vertical zigzag scan. 
+ *            -  OMX_VC_VERTICAL - Vertical prediction.  
+ *                             Performs alternate-horizontal zigzag scan. 
+ *   pattern - block pattern which is used to decide whether this block is 
+ *            encoded 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is encoded, so 
+ *            that it points to the current byte in the bit stream buffer. 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - Bad arguments:
+ *    -    At least one of the following pointers is NULL: ppBitStream, 
+ *              *ppBitStream, pBitOffset, pQDctBlkCoef. 
+ *    -   *pBitOffset < 0, or *pBitOffset >7. 
+ *    -    PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or 
+ *         OMX_VC_VERTICAL. 
+ *    -    VideoComp is not one component of enum OMXVCM4P2VideoComponent. 
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_IntraACVLC (
+    OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    const OMX_S16 *pQDctBlkCoef,
+    OMX_U8 predDir,
+    OMX_U8 pattern,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_EncodeVLCZigzag_Inter   (6.2.4.5.3)
+ *
+ * Description:
+ * Performs classical zigzag scanning and VLC encoding for one inter block. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the bit 
+ *            stream 
+ *   pBitOffset - pointer to the bit position in the byte pointed by 
+ *            *ppBitStream. Valid within 0 to 7 
+ *   pQDctBlkCoef - pointer to the quantized DCT coefficient 
+ *   pattern - block pattern which is used to decide whether this block is 
+ *            encoded 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is encoded so that 
+ *            it points to the current byte in the bit stream buffer. 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - Bad arguments 
+ *    -    At least one of the pointers: is NULL: ppBitStream, *ppBitStream, 
+ *              pBitOffset, pQDctBlkCoef 
+ *    -   *pBitOffset < 0, or *pBitOffset >7. 
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_Inter (
+    OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    const OMX_S16 *pQDctBlkCoef,
+    OMX_U8 pattern,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_EncodeMV   (6.2.4.5.4)
+ *
+ * Description:
+ * Predicts a motion vector for the current macroblock, encodes the 
+ * difference, and writes the output to the stream buffer. The input MVs 
+ * pMVCurMB, pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB should lie 
+ * within the ranges associated with the input parameter fcodeForward, as 
+ * described in [ISO14496-2], subclause 7.6.3.  This function provides a 
+ * superset of the functionality associated with the function 
+ * omxVCM4P2_FindMVpred. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - double pointer to the current byte in the bitstream buffer 
+ *   pBitOffset - index of the first free (next available) bit in the stream 
+ *            buffer referenced by *ppBitStream, valid in the range 0 to 7. 
+ *   pMVCurMB - pointer to the current macroblock motion vector; a value of 
+ *            NULL indicates unavailability. 
+ *   pSrcMVLeftMB - pointer to the source left macroblock motion vector; a 
+ *            value of  NULLindicates unavailability. 
+ *   pSrcMVUpperMB - pointer to source upper macroblock motion vector; a 
+ *            value of NULL indicates unavailability. 
+ *   pSrcMVUpperRightMB - pointer to source upper right MB motion vector; a 
+ *            value of NULL indicates unavailability. 
+ *   fcodeForward - an integer with values from 1 to 7; used in encoding 
+ *            motion vectors related to search range, as described in 
+ *            [ISO14496-2], subclause 7.6.3. 
+ *   MBType - macro block type, valid in the range 0 to 5 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - updated pointer to the current byte in the bit stream 
+ *            buffer 
+ *   pBitOffset - updated index of the next available bit position in stream 
+ *            buffer referenced by *ppBitStream 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments 
+ *    -    At least one of the following pointers is NULL: ppBitStream, 
+ *              *ppBitStream, pBitOffset, pMVCurMB 
+ *    -    *pBitOffset < 0, or *pBitOffset >7. 
+ *    -    fcodeForward <= 0, or fcodeForward > 7, or MBType < 0. 
+ *
+ */
+OMXResult omxVCM4P2_EncodeMV (
+    OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    const OMXVCMotionVector *pMVCurMB,
+    const OMXVCMotionVector*pSrcMVLeftMB,
+    const OMXVCMotionVector *pSrcMVUpperMB,
+    const OMXVCMotionVector *pSrcMVUpperRightMB,
+    OMX_INT fcodeForward,
+    OMXVCM4P2MacroblockType MBType
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodePadMV_PVOP   (6.2.5.1.1)
+ *
+ * Description:
+ * Decodes and pads the four motion vectors associated with a non-intra P-VOP 
+ * macroblock.  For macroblocks of type OMX_VC_INTER4V, the output MV is 
+ * padded as specified in [ISO14496-2], subclause 7.6.1.6. Otherwise, for 
+ * macroblocks of types other than OMX_VC_INTER4V, the decoded MV is copied to 
+ * all four output MV buffer entries. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the bit 
+ *            stream buffer 
+ *   pBitOffset - pointer to the bit position in the byte pointed to by 
+ *            *ppBitStream. *pBitOffset is valid within [0-7]. 
+ *   pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB - pointers to the 
+ *            motion vector buffers of the macroblocks specially at the left, 
+ *            upper, and upper-right side of the current macroblock, 
+ *            respectively; a value of NULL indicates unavailability.  Note: 
+ *            Any neighborhood macroblock outside the current VOP or video 
+ *            packet or outside the current GOB (when short_video_header is 
+ *             1 ) for which gob_header_empty is  0  is treated as 
+ *            transparent, according to [ISO14496-2], subclause 7.6.5. 
+ *   fcodeForward - a code equal to vop_fcode_forward in MPEG-4 bit stream 
+ *            syntax 
+ *   MBType - the type of the current macroblock. If MBType is not equal to 
+ *            OMX_VC_INTER4V, the destination motion vector buffer is still 
+ *            filled with the same decoded vector. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded, so 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream 
+ *   pDstMVCurMB - pointer to the motion vector buffer for the current 
+ *            macroblock; contains four decoded motion vectors 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    At least one of the following pointers is NULL: 
+ *         ppBitStream, *ppBitStream, pBitOffset, pDstMVCurMB 
+ *    -    *pBitOffset exceeds [0,7]
+ *    -    fcodeForward exceeds (0,7]
+ *    -    MBType less than zero
+ *    -    motion vector buffer is not 4-byte aligned. 
+ *    OMX_Sts_Err - status error 
+ *
+ */
+OMXResult omxVCM4P2_DecodePadMV_PVOP (
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    OMXVCMotionVector *pSrcMVLeftMB,
+    OMXVCMotionVector*pSrcMVUpperMB,
+    OMXVCMotionVector *pSrcMVUpperRightMB,
+    OMXVCMotionVector*pDstMVCurMB,
+    OMX_INT fcodeForward,
+    OMXVCM4P2MacroblockType MBType
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodeVLCZigzag_IntraDCVLC   (6.2.5.2.2)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients 
+ * for one intra block.  Two versions of the function (DCVLC and ACVLC) are 
+ * provided in order to support the two different methods of processing DC 
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4,  Intra DC 
+ * Coefficient Decoding for the Case of Switched VLC Encoding.  
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the 
+ *            bitstream buffer 
+ *   pBitOffset - pointer to the bit position in the current byte referenced 
+ *            by *ppBitStream.  The parameter *pBitOffset is valid in the 
+ *            range [0-7]. 
+ *            Bit Position in one byte:  |Most      Least| 
+ *                    *pBitOffset        |0 1 2 3 4 5 6 7| 
+ *   predDir - AC prediction direction; used to select the zigzag scan 
+ *            pattern; takes one of the following values: 
+ *            -  OMX_VC_NONE - AC prediction not used; 
+ *                             performs classical zigzag scan. 
+ *            -  OMX_VC_HORIZONTAL - Horizontal prediction; 
+ *                             performs alternate-vertical zigzag scan; 
+ *            -  OMX_VC_VERTICAL - Vertical prediction; 
+ *                             performs alternate-horizontal zigzag scan. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *   videoComp - video component type (luminance or chrominance) of the 
+ *            current block 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded such 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated such that it points to the current 
+ *            bit position in the byte pointed by *ppBitStream 
+ *   pDst - pointer to the coefficient buffer of current block; must be 
+ *            4-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, if:
+ *    -    At least one of the following pointers is NULL: 
+ *         ppBitStream, *ppBitStream, pBitOffset, pDst
+ *    -    *pBitOffset exceeds [0,7]
+ *    -    preDir exceeds [0,2]
+ *    -    pDst is not 4-byte aligned 
+ *    OMX_Sts_Err - if:
+ *    -    In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 
+ *    -    At least one of mark bits equals zero 
+ *    -    Illegal stream encountered; code cannot be located in VLC table 
+ *    -    Forbidden code encountered in the VLC FLC table. 
+ *    -    The number of coefficients is greater than 64 
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_IntraDCVLC (
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    OMX_S16 *pDst,
+    OMX_U8 predDir,
+    OMX_INT shortVideoHeader,
+    OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodeVLCZigzag_IntraACVLC   (6.2.5.2.2)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients 
+ * for one intra block.  Two versions of the function (DCVLC and ACVLC) are 
+ * provided in order to support the two different methods of processing DC 
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4,  Intra DC 
+ * Coefficient Decoding for the Case of Switched VLC Encoding.  
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the 
+ *            bitstream buffer 
+ *   pBitOffset - pointer to the bit position in the current byte referenced 
+ *            by *ppBitStream.  The parameter *pBitOffset is valid in the 
+ *            range [0-7]. Bit Position in one byte:  |Most Least| *pBitOffset 
+ *            |0 1 2 3 4 5 6 7| 
+ *   predDir - AC prediction direction; used to select the zigzag scan 
+ *            pattern; takes one of the following values: OMX_VC_NONE - AC 
+ *            prediction not used; performs classical zigzag scan. 
+ *            OMX_VC_HORIZONTAL - Horizontal prediction; performs 
+ *            alternate-vertical zigzag scan; OMX_VC_VERTICAL - Vertical 
+ *            prediction; performs alternate-horizontal zigzag scan. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *   videoComp - video component type (luminance or chrominance) of the 
+ *            current block 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded such 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated such that it points to the current 
+ *            bit position in the byte pointed by *ppBitStream 
+ *   pDst - pointer to the coefficient buffer of current block; must be 
+ *            4-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments At least one of the following 
+ *              pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, 
+ *              or At least one of the following conditions is true: 
+ *              *pBitOffset exceeds [0,7], preDir exceeds [0,2], or pDst is 
+ *              not 4-byte aligned 
+ *    OMX_Sts_Err In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 At least one of 
+ *              mark bits equals zero Illegal stream encountered; code cannot 
+ *              be located in VLC table Forbidden code encountered in the VLC 
+ *              FLC table The number of coefficients is greater than 64 
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_IntraACVLC (
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    OMX_S16 *pDst,
+    OMX_U8 predDir,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodeVLCZigzag_Inter   (6.2.5.2.3)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan for one inter-coded block. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - double pointer to the current byte in the stream buffer 
+ *   pBitOffset - pointer to the next available bit in the current stream 
+ *            byte referenced by *ppBitStream. The parameter *pBitOffset is 
+ *            valid within the range [0-7]. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded such 
+ *            that it points to the current byte in the stream buffer 
+ *   pBitOffset - *pBitOffset is updated after decoding such that it points 
+ *            to the next available bit in the stream byte referenced by 
+ *            *ppBitStream 
+ *   pDst - pointer to the coefficient buffer of current block; must be 
+ *            4-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    At least one of the following pointers is NULL: 
+ *         ppBitStream, *ppBitStream, pBitOffset, pDst
+ *    -    pDst is not 4-byte aligned
+ *    -   *pBitOffset exceeds [0,7]
+ *    OMX_Sts_Err - status error, if:
+ *    -    At least one mark bit is equal to zero 
+ *    -    Encountered an illegal stream code that cannot be found in the VLC table 
+ *    -    Encountered an illegal code in the VLC FLC table 
+ *    -    The number of coefficients is greater than 64 
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_Inter (
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    OMX_S16 *pDst,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_QuantInvIntra_I   (6.2.5.3.2)
+ *
+ * Description:
+ * Performs the second inverse quantization mode on an intra/inter coded 
+ * block. Supports bits_per_pixel = 8. The output coefficients are clipped to 
+ * the range [-2048, 2047]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input (quantized) intra/inter block; must be 
+ *            aligned on a 16-byte boundary. 
+ *   QP - quantization parameter (quantizer_scale) 
+ *   videoComp - video component type of the current block. Takes one of the 
+ *            following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra 
+ *            version only). 
+ *   shortVideoHeader - binary flag indicating presence of short_video_header 
+ *            (intra version only). 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the output (dequantized) intra/inter block 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; one or more of the following is 
+ *              true: 
+ *    -    pSrcDst is NULL 
+ *    -    QP <= 0 or QP >=31 
+ *    -    videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE. 
+ *
+ */
+OMXResult omxVCM4P2_QuantInvIntra_I (
+    OMX_S16 *pSrcDst,
+    OMX_INT QP,
+    OMXVCM4P2VideoComponent videoComp,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_QuantInvInter_I   (6.2.5.3.2)
+ *
+ * Description:
+ * Performs the second inverse quantization mode on an intra/inter coded 
+ * block. Supports bits_per_pixel = 8. The output coefficients are clipped to 
+ * the range [-2048, 2047]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input (quantized) intra/inter block; must be 
+ *            aligned on a 16-byte boundary. 
+ *   QP - quantization parameter (quantizer_scale) 
+ *   videoComp - video component type of the current block. Takes one of the 
+ *            following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra 
+ *            version only). 
+ *   shortVideoHeader - binary flag indicating presence of short_video_header 
+ *            (intra version only). 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the output (dequantized) intra/inter block 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; one or more of the following is 
+ *              true: 
+ *    -    pSrcDst is NULL 
+ *    -    QP <= 0 or QP >=31 
+ *    -    videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE. 
+ *
+ */
+OMXResult omxVCM4P2_QuantInvInter_I (
+    OMX_S16 *pSrcDst,
+    OMX_INT QP
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodeBlockCoef_Intra   (6.2.5.4.1)
+ *
+ * Description:
+ * Decodes the INTRA block coefficients. Inverse quantization, inversely 
+ * zigzag positioning, and IDCT, with appropriate clipping on each step, are 
+ * performed on the coefficients. The results are then placed in the output 
+ * frame/plane on a pixel basis.  Note: This function will be used only when 
+ * at least one non-zero AC coefficient of current block exists in the bit 
+ * stream. The DC only condition will be handled in another function. 
+ *
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the bit 
+ *            stream buffer. There is no boundary check for the bit stream 
+ *            buffer. 
+ *   pBitOffset - pointer to the bit position in the byte pointed to by 
+ *            *ppBitStream. *pBitOffset is valid within [0-7]. 
+ *   step - width of the destination plane 
+ *   pCoefBufRow - pointer to the coefficient row buffer; must be aligned on 
+ *            an 8-byte boundary. 
+ *   pCoefBufCol - pointer to the coefficient column buffer; must be aligned 
+ *            on an 8-byte boundary. 
+ *   curQP - quantization parameter of the macroblock which the current block 
+ *            belongs to 
+ *   pQPBuf - pointer to the quantization parameter buffer 
+ *   blockIndex - block index indicating the component type and position as 
+ *            defined in [ISO14496-2], subclause 6.1.3.8, Figure 6-5. 
+ *   intraDCVLC - a code determined by intra_dc_vlc_thr and QP. This allows a 
+ *            mechanism to switch between two VLC for coding of Intra DC 
+ *            coefficients as per [ISO14496-2], Table 6-21. 
+ *   ACPredFlag - a flag equal to ac_pred_flag (of luminance) indicating if 
+ *            the ac coefficients of the first row or first column are 
+ *            differentially coded for intra coded macroblock. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; shortVideoHeader==1 selects linear intra DC 
+ *            mode, and shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded, so 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream 
+ *   pDst - pointer to the block in the destination plane; must be aligned on 
+ *            an 8-byte boundary. 
+ *   pCoefBufRow - pointer to the updated coefficient row buffer. 
+ *   pCoefBufCol - pointer to the updated coefficient column buffer  Note: 
+ *            The coefficient buffers must be updated in accordance with the 
+ *            update procedure defined in section 6.2.2. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, if:
+ *    -    At least one of the following pointers is NULL: 
+ *         ppBitStream, *ppBitStream, pBitOffset, pCoefBufRow, pCoefBufCol, 
+ *         pQPBuf, pDst. 
+ *    -    *pBitOffset exceeds [0,7] 
+ *    -    curQP exceeds (1, 31)
+ *    -    blockIndex exceeds [0,5]
+ *    -    step is not the multiple of 8
+ *    -    a pointer alignment requirement was violated. 
+ *    OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Intra.  
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Intra (
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    OMX_U8 *pDst,
+    OMX_INT step,
+    OMX_S16 *pCoefBufRow,
+    OMX_S16 *pCoefBufCol,
+    OMX_U8 curQP,
+    const OMX_U8 *pQPBuf,
+    OMX_INT blockIndex,
+    OMX_INT intraDCVLC,
+    OMX_INT ACPredFlag,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodeBlockCoef_Inter   (6.2.5.4.2)
+ *
+ * Description:
+ * Decodes the INTER block coefficients. This function performs inverse 
+ * quantization, inverse zigzag positioning, and IDCT (with appropriate 
+ * clipping on each step) on the coefficients. The results (residuals) are 
+ * placed in a contiguous array of 64 elements. For INTER block, the output 
+ * buffer holds the residuals for further reconstruction. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the bit 
+ *            stream buffer. There is no boundary check for the bit stream 
+ *            buffer. 
+ *   pBitOffset - pointer to the bit position in the byte pointed to by 
+ *            *ppBitStream. *pBitOffset is valid within [0-7] 
+ *   QP - quantization parameter 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; shortVideoHeader==1 selects linear intra DC 
+ *            mode, and shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded, so 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream 
+ *   pDst - pointer to the decoded residual buffer (a contiguous array of 64 
+ *            elements of OMX_S16 data type); must be aligned on a 16-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, if:
+ *    -    At least one of the following pointers is Null: 
+ *         ppBitStream, *ppBitStream, pBitOffset , pDst 
+ *    -    *pBitOffset exceeds [0,7]
+ *    -    QP <= 0. 
+ *    -    pDst is not 16-byte aligned 
+ *    OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Inter . 
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Inter (
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    OMX_S16 *pDst,
+    OMX_INT QP,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_PredictReconCoefIntra   (6.2.5.4.3)
+ *
+ * Description:
+ * Performs adaptive DC/AC coefficient prediction for an intra block.  Prior 
+ * to the function call, prediction direction (predDir) should be selected as 
+ * specified in [ISO14496-2], subclause 7.4.3.1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the coefficient buffer which contains the quantized 
+ *            coefficient residuals (PQF) of the current block; must be 
+ *            aligned on a 4-byte boundary.  The output coefficients are 
+ *            saturated to the range [-2048, 2047]. 
+ *   pPredBufRow - pointer to the coefficient row buffer; must be aligned on 
+ *            a 4-byte boundary. 
+ *   pPredBufCol - pointer to the coefficient column buffer; must be aligned 
+ *            on a 4-byte boundary. 
+ *   curQP - quantization parameter of the current block. curQP may equal to 
+ *            predQP especially when the current block and the predictor block 
+ *            are in the same macroblock. 
+ *   predQP - quantization parameter of the predictor block 
+ *   predDir - indicates the prediction direction which takes one of the 
+ *            following values: OMX_VC_HORIZONTAL - predict horizontally 
+ *            OMX_VC_VERTICAL - predict vertically 
+ *   ACPredFlag - a flag indicating if AC prediction should be performed. It 
+ *            is equal to ac_pred_flag in the bit stream syntax of MPEG-4 
+ *   videoComp - video component type (luminance or chrominance) of the 
+ *            current block 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the coefficient buffer which contains the quantized 
+ *            coefficients (QF) of the current block 
+ *   pPredBufRow - pointer to the updated coefficient row buffer 
+ *   pPredBufCol - pointer to the updated coefficient column buffer  Note: 
+ *            Buffer update: Update the AC prediction buffer (both row and 
+ *            column buffer). 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, if:
+ *        -    At least one of the pointers is NULL: 
+ *              pSrcDst, pPredBufRow, or pPredBufCol. 
+ *        -    curQP <= 0, 
+ *        -    predQP <= 0, 
+ *        -    curQP >31, 
+ *        -    predQP > 31, 
+ *        -    preDir exceeds [1,2]
+ *        -    pSrcDst, pPredBufRow, or pPredBufCol is not 4-byte aligned. 
+ *
+ */
+OMXResult omxVCM4P2_PredictReconCoefIntra (
+    OMX_S16 *pSrcDst,
+    OMX_S16 *pPredBufRow,
+    OMX_S16 *pPredBufCol,
+    OMX_INT curQP,
+    OMX_INT predQP,
+    OMX_INT predDir,
+    OMX_INT ACPredFlag,
+    OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_MCReconBlock   (6.2.5.5.1)
+ *
+ * Description:
+ * Performs motion compensation prediction for an 8x8 block using 
+ * interpolation described in [ISO14496-2], subclause 7.6.2. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the block in the reference plane. 
+ *   srcStep - distance between the start of consecutive lines in the 
+ *            reference plane, in bytes; must be a multiple of 8. 
+ *   dstStep - distance between the start of consecutive lines in the 
+ *            destination plane, in bytes; must be a multiple of 8. 
+ *   pSrcResidue - pointer to a buffer containing the 16-bit prediction 
+ *            residuals; must be 16-byte aligned. If the pointer is NULL, then 
+ *            no prediction is done, only motion compensation, i.e., the block 
+ *            is moved with interpolation. 
+ *   predictType - bilinear interpolation type, as defined in section 
+ *            6.2.1.2. 
+ *   rndVal - rounding control parameter: 0 - disabled; 1 - enabled. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination buffer; must be 8-byte aligned.  If 
+ *            prediction residuals are added then output intensities are 
+ *            clipped to the range [0,255]. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -    pDst is not 8-byte aligned. 
+ *    -    pSrcResidue is not 16-byte aligned. 
+ *    -    one or more of the following pointers is NULL: pSrc or pDst. 
+ *    -    either srcStep or dstStep is not a multiple of 8. 
+ *    -    invalid type specified for the parameter predictType. 
+ *    -    the parameter rndVal is not equal either to 0 or 1. 
+ *
+ */
+OMXResult omxVCM4P2_MCReconBlock (
+    const OMX_U8 *pSrc,
+    OMX_INT srcStep,
+    const OMX_S16 *pSrcResidue,
+    OMX_U8 *pDst,
+    OMX_INT dstStep,
+    OMX_INT predictType,
+    OMX_INT rndVal
+);
+
+
+
+/* 6.3.1.1 Intra 16x16 Prediction Modes  */
+/* A data type that enumerates intra_16x16 macroblock prediction modes is defined as follows:  */
+
+typedef enum {
+    OMX_VC_16X16_VERT = 0,  /** Intra_16x16_Vertical */
+    OMX_VC_16X16_HOR = 1,   /** Intra_16x16_Horizontal */
+    OMX_VC_16X16_DC = 2,    /** Intra_16x16_DC */
+    OMX_VC_16X16_PLANE = 3  /** Intra_16x16_Plane */ 
+} OMXVCM4P10Intra16x16PredMode;
+
+
+
+/* 6.3.1.2 Intra 4x4 Prediction Modes  */
+/* A data type that enumerates intra_4x4 macroblock prediction modes is defined as follows:  */
+
+typedef enum {
+    OMX_VC_4X4_VERT = 0,     /** Intra_4x4_Vertical */
+    OMX_VC_4X4_HOR = 1,      /** Intra_4x4_Horizontal */
+    OMX_VC_4X4_DC = 2,       /** Intra_4x4_DC */
+    OMX_VC_4X4_DIAG_DL = 3,  /** Intra_4x4_Diagonal_Down_Left */
+    OMX_VC_4X4_DIAG_DR = 4,  /** Intra_4x4_Diagonal_Down_Right */
+    OMX_VC_4X4_VR = 5,       /** Intra_4x4_Vertical_Right */
+    OMX_VC_4X4_HD = 6,       /** Intra_4x4_Horizontal_Down */
+    OMX_VC_4X4_VL = 7,       /** Intra_4x4_Vertical_Left */
+    OMX_VC_4X4_HU = 8        /** Intra_4x4_Horizontal_Up */ 
+} OMXVCM4P10Intra4x4PredMode;
+
+
+
+/* 6.3.1.3 Chroma Prediction Modes  */
+/* A data type that enumerates intra chroma prediction modes is defined as follows:  */
+
+typedef enum {
+    OMX_VC_CHROMA_DC = 0,    /** Intra_Chroma_DC */
+    OMX_VC_CHROMA_HOR = 1,   /** Intra_Chroma_Horizontal */
+    OMX_VC_CHROMA_VERT = 2,  /** Intra_Chroma_Vertical */
+    OMX_VC_CHROMA_PLANE = 3  /** Intra_Chroma_Plane */ 
+} OMXVCM4P10IntraChromaPredMode;
+
+
+
+/* 6.3.1.4 Motion Estimation Modes  */
+/* A data type that enumerates H.264 motion estimation modes is defined as follows:  */
+
+typedef enum {
+    OMX_VC_M4P10_FAST_SEARCH = 0, /** Fast motion search */
+    OMX_VC_M4P10_FULL_SEARCH = 1  /** Full motion search */ 
+} OMXVCM4P10MEMode;
+
+
+
+/* 6.3.1.5 Macroblock Types  */
+/* A data type that enumerates H.264 macroblock types is defined as follows:  */
+
+typedef enum {
+    OMX_VC_P_16x16  = 0, /* defined by [ISO14496-10] */
+    OMX_VC_P_16x8  = 1,
+    OMX_VC_P_8x16  = 2,
+    OMX_VC_P_8x8  = 3,
+    OMX_VC_PREF0_8x8  = 4,
+    OMX_VC_INTER_SKIP  = 5,
+    OMX_VC_INTRA_4x4  = 8,
+    OMX_VC_INTRA_16x16  = 9,
+    OMX_VC_INTRA_PCM = 10 
+} OMXVCM4P10MacroblockType;
+
+
+
+/* 6.3.1.6 Sub-Macroblock Types  */
+/* A data type that enumerates H.264 sub-macroblock types is defined as follows:  */
+
+typedef enum {
+    OMX_VC_SUB_P_8x8 = 0, /* defined by [ISO14496-10] */
+    OMX_VC_SUB_P_8x4 = 1,
+    OMX_VC_SUB_P_4x8 = 2,
+    OMX_VC_SUB_P_4x4 = 3 
+} OMXVCM4P10SubMacroblockType;
+
+
+
+/* 6.3.1.7 Variable Length Coding (VLC) Information  */
+
+typedef struct {
+    OMX_U8 uTrailing_Ones;      /* Trailing ones; 3 at most */
+    OMX_U8 uTrailing_One_Signs; /* Trailing ones signal */
+    OMX_U8 uNumCoeffs;          /* Total number of non-zero coefs, including trailing ones */
+    OMX_U8 uTotalZeros;         /* Total number of zero coefs */
+    OMX_S16 iLevels[16];        /* Levels of non-zero coefs, in reverse zig-zag order */
+    OMX_U8 uRuns[16];           /* Runs for levels and trailing ones, in reverse zig-zag order */
+} OMXVCM4P10VLCInfo;
+
+
+
+/* 6.3.1.8 Macroblock Information  */
+
+typedef struct {
+    OMX_S32 sliceId;                          /* slice number */
+    OMXVCM4P10MacroblockType mbType;          /* MB type */
+    OMXVCM4P10SubMacroblockType subMBType[4]; /* sub-block type */
+    OMX_S32 qpy;                              /* qp for luma */
+    OMX_S32 qpc;                              /* qp for chroma */
+    OMX_U32 cbpy;                             /* CBP Luma */
+    OMX_U32 cbpc;                             /* CBP Chroma */
+    OMXVCMotionVector pMV0[4][4]; /* motion vector, represented using 1/4-pel units, pMV0[blocky][blockx] (blocky = 0~3, blockx =0~3) */
+    OMXVCMotionVector pMVPred[4][4]; /* motion vector prediction, Represented using 1/4-pel units, pMVPred[blocky][blockx] (blocky = 0~3, blockx = 0~3) */
+    OMX_U8 pRefL0Idx[4];                      /* reference picture indices */
+    OMXVCM4P10Intra16x16PredMode Intra16x16PredMode; /* best intra 16x16 prediction mode */
+    OMXVCM4P10Intra4x4PredMode pIntra4x4PredMode[16]; /* best intra 4x4 prediction mode for each block, pMV0 indexed as above */
+} OMXVCM4P10MBInfo, *OMXVCM4P10MBInfoPtr;
+
+
+
+/* 6.3.1.9 Motion Estimation Parameters  */
+
+typedef struct {
+    OMX_S32 blockSplitEnable8x8; /* enables 16x8, 8x16, 8x8 */
+    OMX_S32 blockSplitEnable4x4; /* enable splitting of 8x4, 4x8, 4x4 blocks */
+    OMX_S32 halfSearchEnable;
+    OMX_S32 quarterSearchEnable;
+    OMX_S32 intraEnable4x4;      /* 1=enable, 0=disable */
+    OMX_S32 searchRange16x16;    /* integer pixel units */
+    OMX_S32 searchRange8x8;
+    OMX_S32 searchRange4x4;
+} OMXVCM4P10MEParams;
+
+
+
+/**
+ * Function:  omxVCM4P10_PredictIntra_4x4   (6.3.3.1.1)
+ *
+ * Description:
+ * Perform Intra_4x4 prediction for luma samples. If the upper-right block is 
+ * not available, then duplication work should be handled inside the function. 
+ * Users need not define them outside. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcLeft -  Pointer to the buffer of 4 left pixels: 
+ *                  p[x, y] (x = -1, y = 0..3) 
+ *   pSrcAbove - Pointer to the buffer of 8 above pixels: 
+ *                  p[x,y] (x = 0..7, y =-1); 
+ *               must be aligned on a 4-byte boundary. 
+ *   pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) 
+ *   leftStep - Step of left pixel buffer; must be a multiple of 4. 
+ *   dstStep - Step of the destination buffer; must be a multiple of 4. 
+ *   predMode - Intra_4x4 prediction mode. 
+ *   availability - Neighboring 4x4 block availability flag, refer to 
+ *             "Neighboring Macroblock Availability" . 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to the destination buffer; must be aligned on a 4-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pDst is NULL. 
+ *    dstStep < 4, or dstStep is not a multiple of 4. 
+ *    leftStep is not a multiple of 4. 
+ *    predMode is not in the valid range of enumeration 
+ *              OMXVCM4P10Intra4x4PredMode. 
+ *    predMode is OMX_VC_4x4_VERT, but availability doesn't set OMX_VC_UPPER 
+ *              indicating p[x,-1] (x = 0..3) is not available. 
+ *    predMode is OMX_VC_4x4_HOR, but availability doesn't set OMX_VC_LEFT 
+ *              indicating p[-1,y] (y = 0..3) is not available. 
+ *    predMode is OMX_VC_4x4_DIAG_DL, but availability doesn't set 
+ *              OMX_VC_UPPER indicating p[x, -1] (x = 0..3) is not available. 
+ *    predMode is OMX_VC_4x4_DIAG_DR, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not 
+ *              available. 
+ *    predMode is OMX_VC_4x4_VR, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not 
+ *              available. 
+ *    predMode is OMX_VC_4x4_HD, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not 
+ *              available. 
+ *    predMode is OMX_VC_4x4_VL, but availability doesn't set OMX_VC_UPPER 
+ *              indicating p[x,-1] (x = 0..3) is not available. 
+ *    predMode is OMX_VC_4x4_HU, but availability doesn't set OMX_VC_LEFT 
+ *              indicating p[-1,y] (y = 0..3) is not available. 
+ *    availability sets OMX_VC_UPPER, but pSrcAbove is NULL. 
+ *    availability sets OMX_VC_LEFT, but pSrcLeft is NULL. 
+ *    availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. 
+ *    either pSrcAbove or pDst is not aligned on a 4-byte boundary.  
+ *
+ * Note: 
+ *     pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if 
+ *     they are not used by intra prediction as implied in predMode. 
+ *
+ */
+OMXResult omxVCM4P10_PredictIntra_4x4 (
+    const OMX_U8 *pSrcLeft,
+    const OMX_U8 *pSrcAbove,
+    const OMX_U8 *pSrcAboveLeft,
+    OMX_U8 *pDst,
+    OMX_INT leftStep,
+    OMX_INT dstStep,
+    OMXVCM4P10Intra4x4PredMode predMode,
+    OMX_S32 availability
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_PredictIntra_16x16   (6.3.3.1.2)
+ *
+ * Description:
+ * Perform Intra_16x16 prediction for luma samples. If the upper-right block 
+ * is not available, then duplication work should be handled inside the 
+ * function. Users need not define them outside. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcLeft - Pointer to the buffer of 16 left pixels: p[x, y] (x = -1, y = 
+ *            0..15) 
+ *   pSrcAbove - Pointer to the buffer of 16 above pixels: p[x,y] (x = 0..15, 
+ *            y= -1); must be aligned on a 16-byte boundary. 
+ *   pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) 
+ *   leftStep - Step of left pixel buffer; must be a multiple of 16. 
+ *   dstStep - Step of the destination buffer; must be a multiple of 16. 
+ *   predMode - Intra_16x16 prediction mode, please refer to section 3.4.1. 
+ *   availability - Neighboring 16x16 MB availability flag. Refer to 
+ *                  section 3.4.4. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst -Pointer to the destination buffer; must be aligned on a 16-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pDst is NULL. 
+ *    dstStep < 16. or dstStep is not a multiple of 16. 
+ *    leftStep is not a multiple of 16. 
+ *    predMode is not in the valid range of enumeration 
+ *              OMXVCM4P10Intra16x16PredMode 
+ *    predMode is OMX_VC_16X16_VERT, but availability doesn't set 
+ *              OMX_VC_UPPER indicating p[x,-1] (x = 0..15) is not available. 
+ *    predMode is OMX_VC_16X16_HOR, but availability doesn't set OMX_VC_LEFT 
+ *              indicating p[-1,y] (y = 0..15) is not available. 
+ *    predMode is OMX_VC_16X16_PLANE, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1](x = 0..15), or p[-1,y] (y = 0..15), or p[-1,-1] is not 
+ *              available. 
+ *    availability sets OMX_VC_UPPER, but pSrcAbove is NULL. 
+ *    availability sets OMX_VC_LEFT, but pSrcLeft is NULL. 
+ *    availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. 
+ *    either pSrcAbove or pDst is not aligned on a 16-byte boundary.  
+ *
+ * Note: 
+ *     pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if 
+ *     they are not used by intra prediction implied in predMode. 
+ * Note: 
+ *     OMX_VC_UPPER_RIGHT is not used in intra_16x16 luma prediction. 
+ *
+ */
+OMXResult omxVCM4P10_PredictIntra_16x16 (
+    const OMX_U8 *pSrcLeft,
+    const OMX_U8 *pSrcAbove,
+    const OMX_U8 *pSrcAboveLeft,
+    OMX_U8 *pDst,
+    OMX_INT leftStep,
+    OMX_INT dstStep,
+    OMXVCM4P10Intra16x16PredMode predMode,
+    OMX_S32 availability
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_PredictIntraChroma_8x8   (6.3.3.1.3)
+ *
+ * Description:
+ * Performs intra prediction for chroma samples. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcLeft - Pointer to the buffer of 8 left pixels: p[x, y] (x = -1, y= 
+ *            0..7). 
+ *   pSrcAbove - Pointer to the buffer of 8 above pixels: p[x,y] (x = 0..7, y 
+ *            = -1); must be aligned on an 8-byte boundary. 
+ *   pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) 
+ *   leftStep - Step of left pixel buffer; must be a multiple of 8. 
+ *   dstStep - Step of the destination buffer; must be a multiple of 8. 
+ *   predMode - Intra chroma prediction mode, please refer to section 3.4.3. 
+ *   availability - Neighboring chroma block availability flag, please refer 
+ *            to  "Neighboring Macroblock Availability". 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to the destination buffer; must be aligned on an 8-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If any of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pDst is NULL. 
+ *    dstStep < 8 or dstStep is not a multiple of 8. 
+ *    leftStep is not a multiple of 8. 
+ *    predMode is not in the valid range of enumeration 
+ *              OMXVCM4P10IntraChromaPredMode. 
+ *    predMode is OMX_VC_CHROMA_VERT, but availability doesn't set 
+ *              OMX_VC_UPPER indicating p[x,-1] (x = 0..7) is not available. 
+ *    predMode is OMX_VC_CHROMA_HOR, but availability doesn't set OMX_VC_LEFT 
+ *              indicating p[-1,y] (y = 0..7) is not available. 
+ *    predMode is OMX_VC_CHROMA_PLANE, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1](x = 0..7), or p[-1,y] (y = 0..7), or p[-1,-1] is not 
+ *              available. 
+ *    availability sets OMX_VC_UPPER, but pSrcAbove is NULL. 
+ *    availability sets OMX_VC_LEFT, but pSrcLeft is NULL. 
+ *    availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. 
+ *    either pSrcAbove or pDst is not aligned on a 8-byte boundary.  
+ *
+ *  Note: pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointer if 
+ *  they are not used by intra prediction implied in predMode. 
+ *
+ *  Note: OMX_VC_UPPER_RIGHT is not used in intra chroma prediction. 
+ *
+ */
+OMXResult omxVCM4P10_PredictIntraChroma_8x8 (
+    const OMX_U8 *pSrcLeft,
+    const OMX_U8 *pSrcAbove,
+    const OMX_U8 *pSrcAboveLeft,
+    OMX_U8 *pDst,
+    OMX_INT leftStep,
+    OMX_INT dstStep,
+    OMXVCM4P10IntraChromaPredMode predMode,
+    OMX_S32 availability
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InterpolateLuma   (6.3.3.2.1)
+ *
+ * Description:
+ * Performs quarter-pixel interpolation for inter luma MB. It is assumed that 
+ * the frame is already padded when calling this function. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the source reference frame buffer 
+ *   srcStep - reference frame step, in bytes; must be a multiple of roi.width 
+ *   dstStep - destination frame step, in bytes; must be a multiple of 
+ *            roi.width 
+ *   dx - Fractional part of horizontal motion vector component in 1/4 pixel 
+ *            unit; valid in the range [0,3] 
+ *   dy - Fractional part of vertical motion vector y component in 1/4 pixel 
+ *            unit; valid in the range [0,3] 
+ *   roi - Dimension of the interpolation region; the parameters roi.width and 
+ *            roi.height must be equal to either 4, 8, or 16. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to the destination frame buffer: 
+ *          if roi.width==4,  4-byte alignment required 
+ *          if roi.width==8,  8-byte alignment required 
+ *          if roi.width==16, 16-byte alignment required 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pSrc or pDst is NULL. 
+ *    srcStep or dstStep < roi.width. 
+ *    dx or dy is out of range [0,3]. 
+ *    roi.width or roi.height is out of range {4, 8, 16}. 
+ *    roi.width is equal to 4, but pDst is not 4 byte aligned. 
+ *    roi.width is equal to 8 or 16, but pDst is not 8 byte aligned. 
+ *    srcStep or dstStep is not a multiple of 8. 
+ *
+ */
+OMXResult omxVCM4P10_InterpolateLuma (
+    const OMX_U8 *pSrc,
+    OMX_S32 srcStep,
+    OMX_U8 *pDst,
+    OMX_S32 dstStep,
+    OMX_S32 dx,
+    OMX_S32 dy,
+    OMXSize roi
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InterpolateChroma   (6.3.3.2.2)
+ *
+ * Description:
+ * Performs 1/8-pixel interpolation for inter chroma MB. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc -Pointer to the source reference frame buffer 
+ *   srcStep -Reference frame step in bytes 
+ *   dstStep -Destination frame step in bytes; must be a multiple of 
+ *            roi.width. 
+ *   dx -Fractional part of horizontal motion vector component in 1/8 pixel 
+ *            unit; valid in the range [0,7] 
+ *   dy -Fractional part of vertical motion vector component in 1/8 pixel 
+ *            unit; valid in the range [0,7] 
+ *   roi -Dimension of the interpolation region; the parameters roi.width and 
+ *            roi.height must be equal to either 2, 4, or 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst -Pointer to the destination frame buffer:
+ *         if roi.width==2,  2-byte alignment required 
+ *         if roi.width==4,  4-byte alignment required 
+ *         if roi.width==8, 8-byte alignment required 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pSrc or pDst is NULL. 
+ *    srcStep or dstStep < 8. 
+ *    dx or dy is out of range [0-7]. 
+ *    roi.width or roi.height is out of range {2,4,8}. 
+ *    roi.width is equal to 2, but pDst is not 2-byte aligned. 
+ *    roi.width is equal to 4, but pDst is not 4-byte aligned. 
+ *    roi.width is equal to 8, but pDst is not 8 byte aligned. 
+ *    srcStep or dstStep is not a multiple of 8. 
+ *
+ */
+OMXResult omxVCM4P10_InterpolateChroma (
+    const OMX_U8 *pSrc,
+    OMX_S32 srcStep,
+    OMX_U8 *pDst,
+    OMX_S32 dstStep,
+    OMX_S32 dx,
+    OMX_S32 dy,
+    OMXSize roi
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_FilterDeblockingLuma_VerEdge_I   (6.3.3.3.1)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four vertical edges of the luma 
+ * macroblock (16x16). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - Pointer to the input macroblock; must be 16-byte aligned. 
+ *   srcdstStep -Step of the arrays; must be a multiple of 16. 
+ *   pAlpha -Array of size 2 of alpha thresholds (the first item is the alpha 
+ *            threshold for the external vertical edge, and the second item is 
+ *            for the internal vertical edge); per [ISO14496-10] alpha values 
+ *            must be in the range [0,255]. 
+ *   pBeta -Array of size 2 of beta thresholds (the first item is the beta 
+ *            threshold for the external vertical edge, and the second item is 
+ *            for the internal vertical edge); per [ISO14496-10] beta values 
+ *            must be in the range [0,18]. 
+ *   pThresholds -Array of size 16 of Thresholds (TC0) (values for the left 
+ *            edge of each 4x4 block, arranged in vertical block order); must 
+ *            be aligned on a 4-byte boundary..  Per [ISO14496-10] values must 
+ *            be in the range [0,25]. 
+ *   pBS -Array of size 16 of BS parameters (arranged in vertical block 
+ *            order); valid in the range [0,4] with the following 
+ *            restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) 
+ *            pBS[i]== 4 if and only if pBS[i^3]== 4.  Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst -Pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    Either of the pointers in pSrcDst, pAlpha, pBeta, pThresholds, or pBS 
+ *              is NULL. 
+ *    Either pThresholds or pBS is not aligned on a 4-byte boundary. 
+ *    pSrcDst is not 16-byte aligned. 
+ *    srcdstStep is not a multiple of 16. 
+ *    pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. 
+ *    pBeta[0] and/or pBeta[1] is outside the range [0,18]. 
+ *    One or more entries in the table pThresholds[0..15]is outside of the 
+ *              range [0,25]. 
+ *    pBS is out of range, i.e., one of the following conditions is true: 
+ *              pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && 
+ *              pBS[i^3]!=4) for 0<=i<=3. 
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingLuma_VerEdge_I (
+    OMX_U8 *pSrcDst,
+    OMX_S32 srcdstStep,
+    const OMX_U8 *pAlpha,
+    const OMX_U8 *pBeta,
+    const OMX_U8 *pThresholds,
+    const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_FilterDeblockingLuma_HorEdge_I   (6.3.3.3.2)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four horizontal edges of the luma 
+ * macroblock (16x16). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input macroblock; must be 16-byte aligned. 
+ *   srcdstStep - step of the arrays; must be a multiple of 16. 
+ *   pAlpha - array of size 2 of alpha thresholds (the first item is the alpha 
+ *            threshold for the external vertical edge, and the second item is 
+ *            for the internal horizontal edge); per [ISO14496-10] alpha 
+ *            values must be in the range [0,255]. 
+ *   pBeta - array of size 2 of beta thresholds (the first item is the beta 
+ *            threshold for the external horizontal edge, and the second item 
+ *            is for the internal horizontal edge). Per [ISO14496-10] beta 
+ *            values must be in the range [0,18]. 
+ *   pThresholds - array of size 16 containing thresholds, TC0, for the top 
+ *            horizontal edge of each 4x4 block, arranged in horizontal block 
+ *            order; must be aligned on a 4-byte boundary.  Per [ISO14496 10] 
+ *            values must be in the range [0,25]. 
+ *   pBS - array of size 16 of BS parameters (arranged in horizontal block 
+ *            order); valid in the range [0,4] with the following 
+ *            restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) 
+ *            pBS[i]== 4 if and only if pBS[i^3]== 4.  Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst -Pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr, if one of the following cases occurs: 
+ *    -    one or more of the following pointers is NULL: pSrcDst, pAlpha, 
+ *              pBeta, pThresholds, or pBS. 
+ *    -    either pThresholds or pBS is not aligned on a 4-byte boundary. 
+ *    -    pSrcDst is not 16-byte aligned. 
+ *    -    srcdstStep is not a multiple of 16. 
+ *    -    pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. 
+ *    -    pBeta[0] and/or pBeta[1] is outside the range [0,18]. 
+ *    -    One or more entries in the table pThresholds[0..15] is 
+ *         outside of the range [0,25]. 
+ *    -    pBS is out of range, i.e., one of the following conditions is true: 
+ *              pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or 
+ *              (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. 
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingLuma_HorEdge_I (
+    OMX_U8 *pSrcDst,
+    OMX_S32 srcdstStep,
+    const OMX_U8 *pAlpha,
+    const OMX_U8 *pBeta,
+    const OMX_U8 *pThresholds,
+    const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_FilterDeblockingChroma_VerEdge_I   (6.3.3.3.3)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four vertical edges of the chroma 
+ * macroblock (8x8). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - Pointer to the input macroblock; must be 8-byte aligned. 
+ *   srcdstStep - Step of the arrays; must be a multiple of 8. 
+ *   pAlpha - Array of size 2 of alpha thresholds (the first item is alpha 
+ *            threshold for external vertical edge, and the second item is for 
+ *            internal vertical edge); per [ISO14496-10] alpha values must be 
+ *            in the range [0,255]. 
+ *   pBeta - Array of size 2 of beta thresholds (the first item is the beta 
+ *            threshold for the external vertical edge, and the second item is 
+ *            for the internal vertical edge); per [ISO14496-10] beta values 
+ *            must be in the range [0,18]. 
+ *   pThresholds - Array of size 8 containing thresholds, TC0, for the left 
+ *            vertical edge of each 4x2 chroma block, arranged in vertical 
+ *            block order; must be aligned on a 4-byte boundary.  Per 
+ *            [ISO14496-10] values must be in the range [0,25]. 
+ *   pBS - Array of size 16 of BS parameters (values for each 2x2 chroma 
+ *            block, arranged in vertical block order). This parameter is the 
+ *            same as the pBS parameter passed into FilterDeblockLuma_VerEdge; 
+ *            valid in the range [0,4] with the following restrictions: i) 
+ *            pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and 
+ *            only if pBS[i^3]== 4.  Must be 4 byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst -Pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    one or more of the following pointers is NULL: pSrcDst, pAlpha, 
+ *              pBeta, pThresholds, or pBS. 
+ *    -    pSrcDst is not 8-byte aligned. 
+ *    -    srcdstStep is not a multiple of 8. 
+ *    -    pThresholds is not 4-byte aligned. 
+ *    -    pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. 
+ *    -    pBeta[0] and/or pBeta[1] is outside the range [0,18]. 
+ *    -    One or more entries in the table pThresholds[0..7] is outside 
+ *         of the range [0,25]. 
+ *    -    pBS is out of range, i.e., one of the following conditions is true: 
+ *         pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or 
+ *         (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. 
+ *    -    pBS is not 4-byte aligned. 
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingChroma_VerEdge_I (
+    OMX_U8 *pSrcDst,
+    OMX_S32 srcdstStep,
+    const OMX_U8 *pAlpha,
+    const OMX_U8 *pBeta,
+    const OMX_U8 *pThresholds,
+    const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_FilterDeblockingChroma_HorEdge_I   (6.3.3.3.4)
+ *
+ * Description:
+ * Performs in-place deblock filtering on the horizontal edges of the chroma 
+ * macroblock (8x8). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input macroblock; must be 8-byte aligned. 
+ *   srcdstStep - array step; must be a multiple of 8. 
+ *   pAlpha - array of size 2 containing alpha thresholds; the first element 
+ *            contains the threshold for the external horizontal edge, and the 
+ *            second element contains the threshold for internal horizontal 
+ *            edge.  Per [ISO14496-10] alpha values must be in the range 
+ *            [0,255]. 
+ *   pBeta - array of size 2 containing beta thresholds; the first element 
+ *            contains the threshold for the external horizontal edge, and the 
+ *            second element contains the threshold for the internal 
+ *            horizontal edge.  Per [ISO14496-10] beta values must be in the 
+ *            range [0,18]. 
+ *   pThresholds - array of size 8 containing thresholds, TC0, for the top 
+ *            horizontal edge of each 2x4 chroma block, arranged in horizontal 
+ *            block order; must be aligned on a 4-byte boundary.  Per 
+ *            [ISO14496-10] values must be in the range [0,25]. 
+ *   pBS - array of size 16 containing BS parameters for each 2x2 chroma 
+ *            block, arranged in horizontal block order; valid in the range 
+ *            [0,4] with the following restrictions: i) pBS[i]== 4 may occur 
+ *            only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 4. 
+ *            Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst -Pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr, if one of the following cases occurs: 
+ *    -    any of the following pointers is NULL: 
+ *         pSrcDst, pAlpha, pBeta, pThresholds, or pBS. 
+ *    -    pSrcDst is not 8-byte aligned. 
+ *    -    srcdstStep is not a multiple of 8. 
+ *    -    pThresholds is not 4-byte aligned. 
+ *    -    pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. 
+ *    -    pBeta[0] and/or pBeta[1] is outside the range [0,18]. 
+ *    -    One or more entries in the table pThresholds[0..7] is outside 
+ *         of the range [0,25]. 
+ *    -    pBS is out of range, i.e., one of the following conditions is true: 
+ *              pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or 
+ *              (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ *    -    pBS is not 4-byte aligned. 
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingChroma_HorEdge_I (
+    OMX_U8 *pSrcDst,
+    OMX_S32 srcdstStep,
+    const OMX_U8 *pAlpha,
+    const OMX_U8 *pBeta,
+    const OMX_U8 *pThresholds,
+    const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_DeblockLuma_I   (6.3.3.3.5)
+ *
+ * Description:
+ * This function performs in-place deblock filtering the horizontal and 
+ * vertical edges of a luma macroblock (16x16). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input macroblock; must be 16-byte aligned. 
+ *   srcdstStep - image width; must be a multiple of 16. 
+ *   pAlpha - pointer to a 2x2 table of alpha thresholds, organized as 
+ *            follows: {external vertical edge, internal vertical edge, 
+ *            external horizontal edge, internal horizontal edge }.  Per 
+ *            [ISO14496-10] alpha values must be in the range [0,255]. 
+ *   pBeta - pointer to a 2x2 table of beta thresholds, organized as follows: 
+ *            {external vertical edge, internal vertical edge, external 
+ *            horizontal edge, internal horizontal edge }.  Per [ISO14496-10] 
+ *            beta values must be in the range [0,18]. 
+ *   pThresholds - pointer to a 16x2 table of threshold (TC0), organized as 
+ *            follows: {values for the left or above edge of each 4x4 block, 
+ *            arranged in vertical block order and then in horizontal block 
+ *            order}; must be aligned on a 4-byte boundary.  Per [ISO14496-10] 
+ *            values must be in the range [0,25]. 
+ *   pBS - pointer to a 16x2 table of BS parameters arranged in scan block 
+ *            order for vertical edges and then horizontal edges; valid in the 
+ *            range [0,4] with the following restrictions: i) pBS[i]== 4 may 
+ *            occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 
+ *            4. Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments 
+ *    -     one or more of the following pointers is NULL: pSrcDst, pAlpha, 
+ *              pBeta, pThresholds or pBS. 
+ *    -    pSrcDst is not 16-byte aligned. 
+ *    -    either pThresholds or pBS is not aligned on a 4-byte boundary. 
+ *    -    one or more entries in the table pAlpha[0..3] is outside the range 
+ *              [0,255]. 
+ *    -    one or more entries in the table pBeta[0..3] is outside the range 
+ *              [0,18]. 
+ *    -    one or more entries in the table pThresholds[0..31]is outside of 
+ *              the range [0,25]. 
+ *    -    pBS is out of range, i.e., one of the following conditions is true: 
+ *              pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or 
+ *             (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. 
+ *    -    srcdstStep is not a multiple of 16. 
+ *
+ */
+OMXResult omxVCM4P10_DeblockLuma_I (
+    OMX_U8 *pSrcDst,
+    OMX_S32 srcdstStep,
+    const OMX_U8 *pAlpha,
+    const OMX_U8 *pBeta,
+    const OMX_U8 *pThresholds,
+    const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_DeblockChroma_I   (6.3.3.3.6)
+ *
+ * Description:
+ * Performs in-place deblocking filtering on all edges of the chroma 
+ * macroblock (16x16). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input macroblock; must be 8-byte aligned. 
+ *   srcdstStep - step of the arrays; must be a multiple of 8. 
+ *   pAlpha - pointer to a 2x2 array of alpha thresholds, organized as 
+ *            follows: {external vertical edge, internal vertical edge, 
+ *            external horizontal edge, internal horizontal edge }.  Per 
+ *            [ISO14496-10] alpha values must be in the range [0,255]. 
+ *   pBeta - pointer to a 2x2 array of Beta Thresholds, organized as follows: 
+ *            { external vertical edge, internal vertical edge, external 
+ *            horizontal edge, internal horizontal edge }.  Per [ISO14496-10] 
+ *            beta values must be in the range [0,18]. 
+ *   pThresholds - array of size 8x2 of Thresholds (TC0) (values for the left 
+ *            or above edge of each 4x2 or 2x4 block, arranged in vertical 
+ *            block order and then in horizontal block order); must be aligned 
+ *            on a 4-byte boundary. Per [ISO14496-10] values must be in the 
+ *            range [0,25]. 
+ *   pBS - array of size 16x2 of BS parameters (arranged in scan block order 
+ *            for vertical edges and then horizontal edges); valid in the 
+ *            range [0,4] with the following restrictions: i) pBS[i]== 4 may 
+ *            occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 
+ *            4.  Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments 
+ *    -   one or more of the following pointers is NULL: pSrcDst, pAlpha, 
+ *              pBeta, pThresholds, or pBS. 
+ *    -   pSrcDst is not 8-byte aligned. 
+ *    -   either pThresholds or pBS is not 4-byte aligned. 
+ *    -   one or more entries in the table pAlpha[0..3] is outside the range 
+ *              [0,255]. 
+ *    -   one or more entries in the table pBeta[0..3] is outside the range 
+ *              [0,18]. 
+ *    -   one or more entries in the table pThresholds[0..15]is outside of 
+ *              the range [0,25]. 
+ *    -   pBS is out of range, i.e., one of the following conditions is true: 
+ *            pBS[i]<0, pBS[i]>4, pBS[i]==4  for i>=4, or 
+ *            (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. 
+ *    -   srcdstStep is not a multiple of 8. 
+ *
+ */
+OMXResult omxVCM4P10_DeblockChroma_I (
+    OMX_U8 *pSrcDst,
+    OMX_S32 srcdstStep,
+    const OMX_U8 *pAlpha,
+    const OMX_U8 *pBeta,
+    const OMX_U8 *pThresholds,
+    const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC   (6.3.4.1.1)
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse raster scan for a 2x2 block of 
+ * ChromaDCLevel.  The decoded coefficients in the packed position-coefficient 
+ * buffer are stored in reverse zig-zag order, i.e., the first buffer element 
+ * contains the last non-zero postion-coefficient pair of the block. Within 
+ * each position-coefficient pair, the position entry indicates the 
+ * raster-scan position of the coefficient, while the coefficient entry 
+ * contains the coefficient value. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - Double pointer to current byte in bit stream buffer 
+ *   pOffset - Pointer to current bit position in the byte pointed to by 
+ *            *ppBitStream; valid in the range [0,7]. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after each block is decoded 
+ *   pOffset - *pOffset is updated after each block is decoded 
+ *   pNumCoeff - Pointer to the number of nonzero coefficients in this block 
+ *   ppPosCoefBuf - Double pointer to destination residual 
+ *            coefficient-position pair buffer.  Buffer position 
+ *            (*ppPosCoefBuf) is updated upon return, unless there are only 
+ *            zero coefficients in the currently decoded block.  In this case 
+ *            the caller is expected to bypass the transform/dequantization of 
+ *            the empty blocks. 
+ *
+ * Return Value:
+ *
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    ppBitStream or pOffset is NULL. 
+ *    -    ppPosCoefBuf or pNumCoeff is NULL. 
+ *    OMX_Sts_Err - if one of the following is true: 
+ *    -    an illegal code is encountered in the bitstream 
+ *
+ */
+OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (
+    const OMX_U8 **ppBitStream,
+    OMX_S32*pOffset,
+    OMX_U8 *pNumCoeff,
+    OMX_U8 **ppPosCoefbuf
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_DecodeCoeffsToPairCAVLC   (6.3.4.1.2)
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of 
+ * Intra16x16DCLevel, Intra16x16ACLevel, LumaLevel, and ChromaACLevel. Inverse 
+ * field scan is not supported. The decoded coefficients in the packed 
+ * position-coefficient buffer are stored in reverse zig-zag order, i.e., the 
+ * first buffer element contains the last non-zero postion-coefficient pair of 
+ * the block. Within each position-coefficient pair, the position entry 
+ * indicates the raster-scan position of the coefficient, while the 
+ * coefficient entry contains the coefficient value. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream -Double pointer to current byte in bit stream buffer 
+ *   pOffset - Pointer to current bit position in the byte pointed to by 
+ *            *ppBitStream; valid in the range [0,7]. 
+ *   sMaxNumCoeff - Maximum the number of non-zero coefficients in current 
+ *            block 
+ *   sVLCSelect - VLC table selector, obtained from the number of non-zero 
+ *            coefficients contained in the above and left 4x4 blocks.  It is 
+ *            equivalent to the variable nC described in H.264 standard table 
+ *            9 5, except its value can t be less than zero. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after each block is decoded.  
+ *            Buffer position (*ppPosCoefBuf) is updated upon return, unless 
+ *            there are only zero coefficients in the currently decoded block. 
+ *             In this case the caller is expected to bypass the 
+ *            transform/dequantization of the empty blocks. 
+ *   pOffset - *pOffset is updated after each block is decoded 
+ *   pNumCoeff - Pointer to the number of nonzero coefficients in this block 
+ *   ppPosCoefBuf - Double pointer to destination residual 
+ *            coefficient-position pair buffer 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    ppBitStream or pOffset is NULL. 
+ *    -    ppPosCoefBuf or pNumCoeff is NULL. 
+ *    -    sMaxNumCoeff is not equal to either 15 or 16. 
+ *    -    sVLCSelect is less than 0. 
+ *
+ *    OMX_Sts_Err - if one of the following is true: 
+ *    -    an illegal code is encountered in the bitstream 
+ *
+ */
+OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC (
+    const OMX_U8 **ppBitStream,
+    OMX_S32 *pOffset,
+    OMX_U8 *pNumCoeff,
+    OMX_U8 **ppPosCoefbuf,
+    OMX_INT sVLCSelect,
+    OMX_INT sMaxNumCoeff
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_TransformDequantLumaDCFromPair   (6.3.4.2.1)
+ *
+ * Description:
+ * Reconstructs the 4x4 LumaDC block from the coefficient-position pair 
+ * buffer, performs integer inverse, and dequantization for 4x4 LumaDC 
+ * coefficients, and updates the pair buffer pointer to the next non-empty 
+ * block. 
+ *
+ * Input Arguments:
+ *   
+ *   ppSrc - Double pointer to residual coefficient-position pair buffer 
+ *            output by CALVC decoding 
+ *   QP - Quantization parameter QpY 
+ *
+ * Output Arguments:
+ *   
+ *   ppSrc - *ppSrc is updated to the start of next non empty block 
+ *   pDst - Pointer to the reconstructed 4x4 LumaDC coefficients buffer; must 
+ *            be aligned on a 8-byte boundary. 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    ppSrc or pDst is NULL. 
+ *    -    pDst is not 8 byte aligned. 
+ *    -    QP is not in the range of [0-51]. 
+ *
+ */
+OMXResult omxVCM4P10_TransformDequantLumaDCFromPair (
+    const OMX_U8 **ppSrc,
+    OMX_S16 *pDst,
+    OMX_INT QP
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_TransformDequantChromaDCFromPair   (6.3.4.2.2)
+ *
+ * Description:
+ * Reconstruct the 2x2 ChromaDC block from coefficient-position pair buffer, 
+ * perform integer inverse transformation, and dequantization for 2x2 chroma 
+ * DC coefficients, and update the pair buffer pointer to next non-empty 
+ * block. 
+ *
+ * Input Arguments:
+ *   
+ *   ppSrc - Double pointer to residual coefficient-position pair buffer 
+ *            output by CALVC decoding 
+ *   QP - Quantization parameter QpC 
+ *
+ * Output Arguments:
+ *   
+ *   ppSrc - *ppSrc is updated to the start of next non empty block 
+ *   pDst - Pointer to the reconstructed 2x2 ChromaDC coefficients buffer; 
+ *            must be aligned on a 4-byte boundary. 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    ppSrc or pDst is NULL. 
+ *    -    pDst is not 4-byte aligned. 
+ *    -    QP is not in the range of [0-51]. 
+ *
+ */
+OMXResult omxVCM4P10_TransformDequantChromaDCFromPair (
+    const OMX_U8 **ppSrc,
+    OMX_S16 *pDst,
+    OMX_INT QP
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_DequantTransformResidualFromPairAndAdd   (6.3.4.2.3)
+ *
+ * Description:
+ * Reconstruct the 4x4 residual block from coefficient-position pair buffer, 
+ * perform dequantization and integer inverse transformation for 4x4 block of 
+ * residuals with previous intra prediction or motion compensation data, and 
+ * update the pair buffer pointer to next non-empty block. If pDC == NULL, 
+ * there re 16 non-zero AC coefficients at most in the packed buffer starting 
+ * from 4x4 block position 0; If pDC != NULL, there re 15 non-zero AC 
+ * coefficients at most in the packet buffer starting from 4x4 block position 
+ * 1. 
+ *
+ * Input Arguments:
+ *   
+ *   ppSrc - Double pointer to residual coefficient-position pair buffer 
+ *            output by CALVC decoding 
+ *   pPred - Pointer to the predicted 4x4 block; must be aligned on a 4-byte 
+ *            boundary 
+ *   predStep - Predicted frame step size in bytes; must be a multiple of 4 
+ *   dstStep - Destination frame step in bytes; must be a multiple of 4 
+ *   pDC - Pointer to the DC coefficient of this block, NULL if it doesn't 
+ *            exist 
+ *   QP - QP Quantization parameter.  It should be QpC in chroma 4x4 block 
+ *            decoding, otherwise it should be QpY. 
+ *   AC - Flag indicating if at least one non-zero AC coefficient exists 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the reconstructed 4x4 block data; must be aligned on a 
+ *            4-byte boundary 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    pPred or pDst is NULL. 
+ *    -    pPred or pDst is not 4-byte aligned. 
+ *    -    predStep or dstStep is not a multiple of 4. 
+ *    -    AC !=0 and Qp is not in the range of [0-51] or ppSrc == NULL. 
+ *    -    AC ==0 && pDC ==NULL. 
+ *
+ */
+OMXResult omxVCM4P10_DequantTransformResidualFromPairAndAdd (
+    const OMX_U8 **ppSrc,
+    const OMX_U8 *pPred,
+    const OMX_S16 *pDC,
+    OMX_U8 *pDst,
+    OMX_INT predStep,
+    OMX_INT dstStep,
+    OMX_INT QP,
+    OMX_INT AC
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_MEGetBufSize   (6.3.5.1.1)
+ *
+ * Description:
+ * Computes the size, in bytes, of the vendor-specific specification 
+ * structure for the omxVCM4P10 motion estimation functions BlockMatch_Integer 
+ * and MotionEstimationMB. 
+ *
+ * Input Arguments:
+ *   
+ *   MEmode - motion estimation mode; available modes are defined by the 
+ *            enumerated type OMXVCM4P10MEMode 
+ *   pMEParams -motion estimation parameters 
+ *
+ * Output Arguments:
+ *   
+ *   pSize - pointer to the number of bytes required for the motion 
+ *            estimation specification structure 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    pMEParams or pSize is NULL. 
+ *    -    an invalid MEMode is specified. 
+ *
+ */
+OMXResult omxVCM4P10_MEGetBufSize (
+    OMXVCM4P10MEMode MEmode,
+    const OMXVCM4P10MEParams *pMEParams,
+    OMX_U32 *pSize
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_MEInit   (6.3.5.1.2)
+ *
+ * Description:
+ * Initializes the vendor-specific specification structure required for the 
+ * omxVCM4P10 motion estimation functions:  BlockMatch_Integer and 
+ * MotionEstimationMB. Memory for the specification structure *pMESpec must be 
+ * allocated prior to calling the function, and should be aligned on a 4-byte 
+ * boundary.  The number of bytes required for the specification structure can 
+ * be determined using the function omxVCM4P10_MEGetBufSize. Following 
+ * initialization by this function, the vendor-specific structure *pMESpec 
+ * should contain an implementation-specific representation of all motion 
+ * estimation parameters received via the structure pMEParams, for example  
+ * searchRange16x16, searchRange8x8, etc. 
+ *
+ * Input Arguments:
+ *   
+ *   MEmode - motion estimation mode; available modes are defined by the 
+ *            enumerated type OMXVCM4P10MEMode 
+ *   pMEParams - motion estimation parameters 
+ *   pMESpec - pointer to the uninitialized ME specification structure 
+ *
+ * Output Arguments:
+ *   
+ *   pMESpec - pointer to the initialized ME specification structure 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    pMEParams or pSize is NULL. 
+ *    -    an invalid value was specified for the parameter MEmode 
+ *    -    a negative or zero value was specified for one of the search ranges 
+ *         (e.g.,  pMBParams >searchRange8x8, pMEParams->searchRange16x16, etc.) 
+ *    -    either in isolation or in combination, one or more of the enables or 
+ *         search ranges in the structure *pMEParams were configured such 
+ *         that the requested behavior fails to comply with [ISO14496-10]. 
+ *
+ */
+OMXResult omxVCM4P10_MEInit (
+    OMXVCM4P10MEMode MEmode,
+    const OMXVCM4P10MEParams *pMEParams,
+    void *pMESpec
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_BlockMatch_Integer   (6.3.5.2.1)
+ *
+ * Description:
+ * Performs integer block match.  Returns best MV and associated cost. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrgY - Pointer to the top-left corner of the current block:
+ *            If iBlockWidth==4,  4-byte alignment required. 
+ *            If iBlockWidth==8,  8-byte alignment required. 
+ *            If iBlockWidth==16, 16-byte alignment required. 
+ *   pSrcRefY - Pointer to the top-left corner of the co-located block in the 
+ *            reference picture: 
+ *            If iBlockWidth==4,  4-byte alignment required.  
+ *            If iBlockWidth==8,  8-byte alignment required.  
+ *            If iBlockWidth==16, 16-byte alignment required. 
+ *   nSrcOrgStep - Stride of the original picture plane, expressed in terms 
+ *            of integer pixels; must be a multiple of iBlockWidth. 
+ *   nSrcRefStep - Stride of the reference picture plane, expressed in terms 
+ *            of integer pixels 
+ *   pRefRect - pointer to the valid reference rectangle inside the reference 
+ *            picture plane 
+ *   nCurrPointPos - position of the current block in the current plane 
+ *   iBlockWidth - Width of the current block, expressed in terms of integer 
+ *            pixels; must be equal to either 4, 8, or 16. 
+ *   iBlockHeight - Height of the current block, expressed in terms of 
+ *            integer pixels; must be equal to either 4, 8, or 16. 
+ *   nLamda - Lamda factor; used to compute motion cost 
+ *   pMVPred - Predicted MV; used to compute motion cost, expressed in terms 
+ *            of 1/4-pel units 
+ *   pMVCandidate - Candidate MV; used to initialize the motion search, 
+ *            expressed in terms of integer pixels 
+ *   pMESpec - pointer to the ME specification structure 
+ *
+ * Output Arguments:
+ *   
+ *   pDstBestMV - Best MV resulting from integer search, expressed in terms 
+ *            of 1/4-pel units 
+ *   pBestCost - Motion cost associated with the best MV; computed as 
+ *            SAD+Lamda*BitsUsedByMV 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    any of the following pointers are NULL:
+ *         pSrcOrgY, pSrcRefY, pRefRect, pMVPred, pMVCandidate, or pMESpec. 
+ *    -    Either iBlockWidth or iBlockHeight are values other than 4, 8, or 16. 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Integer (
+    const OMX_U8 *pSrcOrgY,
+    OMX_S32 nSrcOrgStep,
+    const OMX_U8 *pSrcRefY,
+    OMX_S32 nSrcRefStep,
+    const OMXRect *pRefRect,
+    const OMXVCM4P2Coordinate *pCurrPointPos,
+    OMX_U8 iBlockWidth,
+    OMX_U8 iBlockHeight,
+    OMX_U32 nLamda,
+    const OMXVCMotionVector *pMVPred,
+    const OMXVCMotionVector *pMVCandidate,
+    OMXVCMotionVector *pBestMV,
+    OMX_S32 *pBestCost,
+    void *pMESpec
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_BlockMatch_Half   (6.3.5.2.2)
+ *
+ * Description:
+ * Performs a half-pel block match using results from a prior integer search. 
+ *  Returns the best MV and associated cost.  This function estimates the 
+ * half-pixel motion vector by interpolating the integer resolution motion 
+ * vector referenced by the input parameter pSrcDstBestMV, i.e., the initial 
+ * integer MV is generated externally.  The function 
+ * omxVCM4P10_BlockMatch_Integer may be used for integer motion estimation. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrgY - Pointer to the current position in original picture plane:
+ *              If iBlockWidth==4,  4-byte alignment required. 
+ *              If iBlockWidth==8,  8-byte alignment required. 
+ *              If iBlockWidth==16, 16-byte alignment required. 
+ *   pSrcRefY - Pointer to the top-left corner of the co-located block in the 
+ *            reference picture:  
+ *              If iBlockWidth==4,  4-byte alignment required.  
+ *              If iBlockWidth==8,  8-byte alignment required.  
+ *              If iBlockWidth==16, 16-byte alignment required. 
+ *   nSrcOrgStep - Stride of the original picture plane in terms of full 
+ *            pixels; must be a multiple of iBlockWidth. 
+ *   nSrcRefStep - Stride of the reference picture plane in terms of full 
+ *            pixels 
+ *   iBlockWidth - Width of the current block in terms of full pixels; must 
+ *            be equal to either 4, 8, or 16. 
+ *   iBlockHeight - Height of the current block in terms of full pixels; must 
+ *            be equal to either 4, 8, or 16. 
+ *   nLamda - Lamda factor, used to compute motion cost 
+ *   pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to 
+ *            compute motion cost 
+ *   pSrcDstBestMV - The best MV resulting from a prior integer search, 
+ *            represented in terms of 1/4-pel units 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstBestMV - Best MV resulting from the half-pel search, expressed in 
+ *            terms of 1/4-pel units 
+ *   pBestCost - Motion cost associated with the best MV; computed as 
+ *            SAD+Lamda*BitsUsedByMV 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    any of the following pointers is NULL: pSrcOrgY, pSrcRefY, 
+ *              pSrcDstBestMV, pMVPred, pBestCost 
+ *    -    iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16. 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Half (
+    const OMX_U8 *pSrcOrgY,
+    OMX_S32 nSrcOrgStep,
+    const OMX_U8 *pSrcRefY,
+    OMX_S32 nSrcRefStep,
+    OMX_U8 iBlockWidth,
+    OMX_U8 iBlockHeight,
+    OMX_U32 nLamda,
+    const OMXVCMotionVector *pMVPred,
+    OMXVCMotionVector *pSrcDstBestMV,
+    OMX_S32 *pBestCost
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_BlockMatch_Quarter   (6.3.5.2.3)
+ *
+ * Description:
+ * Performs a quarter-pel block match using results from a prior half-pel 
+ * search.  Returns the best MV and associated cost.  This function estimates 
+ * the quarter-pixel motion vector by interpolating the half-pel resolution 
+ * motion vector referenced by the input parameter pSrcDstBestMV, i.e., the 
+ * initial half-pel MV is generated externally.  The function 
+ * omxVCM4P10_BlockMatch_Half may be used for half-pel motion estimation. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrgY - Pointer to the current position in original picture plane:
+ *            If iBlockWidth==4,  4-byte alignment required. 
+ *            If iBlockWidth==8,  8-byte alignment required. 
+ *            If iBlockWidth==16, 16-byte alignment required. 
+ *   pSrcRefY - Pointer to the top-left corner of the co-located block in the 
+ *            reference picture:
+ *            If iBlockWidth==4,  4-byte alignment required.  
+ *            If iBlockWidth==8,  8-byte alignment required.  
+ *            If iBlockWidth==16, 16-byte alignment required. 
+ *   nSrcOrgStep - Stride of the original picture plane in terms of full 
+ *            pixels; must be a multiple of iBlockWidth. 
+ *   nSrcRefStep - Stride of the reference picture plane in terms of full 
+ *            pixels 
+ *   iBlockWidth - Width of the current block in terms of full pixels; must 
+ *            be equal to either 4, 8, or 16. 
+ *   iBlockHeight - Height of the current block in terms of full pixels; must 
+ *            be equal to either 4, 8, or 16. 
+ *   nLamda - Lamda factor, used to compute motion cost 
+ *   pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to 
+ *            compute motion cost 
+ *   pSrcDstBestMV - The best MV resulting from a prior half-pel search, 
+ *            represented in terms of 1/4 pel units 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstBestMV - Best MV resulting from the quarter-pel search, expressed 
+ *            in terms of 1/4-pel units 
+ *   pBestCost - Motion cost associated with the best MV; computed as 
+ *            SAD+Lamda*BitsUsedByMV 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    One or more of the following pointers is NULL: 
+ *         pSrcOrgY, pSrcRefY, pSrcDstBestMV, pMVPred, pBestCost 
+ *    -    iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16. 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Quarter (
+    const OMX_U8 *pSrcOrgY,
+    OMX_S32 nSrcOrgStep,
+    const OMX_U8 *pSrcRefY,
+    OMX_S32 nSrcRefStep,
+    OMX_U8 iBlockWidth,
+    OMX_U8 iBlockHeight,
+    OMX_U32 nLamda,
+    const OMXVCMotionVector *pMVPred,
+    OMXVCMotionVector *pSrcDstBestMV,
+    OMX_S32 *pBestCost
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_MotionEstimationMB   (6.3.5.3.1)
+ *
+ * Description:
+ * Performs MB-level motion estimation and selects best motion estimation 
+ * strategy from the set of modes supported in baseline profile [ISO14496-10]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcCurrBuf - Pointer to the current position in original picture plane; 
+ *            16-byte alignment required 
+ *   pSrcRefBufList - Pointer to an array with 16 entries.  Each entry points 
+ *            to the top-left corner of the co-located MB in a reference 
+ *            picture.  The array is filled from low-to-high with valid 
+ *            reference frame pointers; the unused high entries should be set 
+ *            to NULL.  Ordering of the reference frames should follow 
+ *            [ISO14496-10] subclause 8.2.4  Decoding Process for Reference 
+ *            Picture Lists.   The entries must be 16-byte aligned. 
+ *   pSrcRecBuf - Pointer to the top-left corner of the co-located MB in the 
+ *            reconstructed picture; must be 16-byte aligned. 
+ *   SrcCurrStep - Width of the original picture plane in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   SrcRefStep - Width of the reference picture plane in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   SrcRecStep - Width of the reconstructed picture plane in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   pRefRect - Pointer to the valid reference rectangle; relative to the 
+ *            image origin. 
+ *   pCurrPointPos - Position of the current macroblock in the current plane. 
+ *   Lambda - Lagrange factor for computing the cost function 
+ *   pMESpec - Pointer to the motion estimation specification structure; must 
+ *            have been allocated and initialized prior to calling this 
+ *            function. 
+ *   pMBInter - Array, of dimension four, containing pointers to information 
+ *            associated with four adjacent type INTER MBs (Left, Top, 
+ *            Top-Left, Top-Right). Any pointer in the array may be set equal 
+ *            to NULL if the corresponding MB doesn t exist or is not of type 
+ *            INTER. 
+ *            -  pMBInter[0] - Pointer to left MB information 
+ *            -  pMBInter[1] - Pointer to top MB information 
+ *            -  pMBInter[2] - Pointer to top-left MB information 
+ *            -  pMBInter[3] - Pointer to top-right MB information 
+ *   pMBIntra - Array, of dimension four, containing pointers to information 
+ *            associated with four adjacent type INTRA MBs (Left, Top, 
+ *            Top-Left, Top-Right). Any pointer in the array may be set equal 
+ *            to NULL if the corresponding MB doesn t exist or is not of type 
+ *            INTRA. 
+ *            -  pMBIntra[0] - Pointer to left MB information 
+ *            -  pMBIntra[1] - Pointer to top MB information 
+ *            -  pMBIntra[2] - Pointer to top-left MB information 
+ *            -  pMBIntra[3] - Pointer to top-right MB information 
+ *   pSrcDstMBCurr - Pointer to information structure for the current MB.  
+ *            The following entries should be set prior to calling the 
+ *            function:  sliceID - the number of the slice the to which the 
+ *            current MB belongs. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstCost - Pointer to the minimum motion cost for the current MB. 
+ *   pDstBlockSAD - Pointer to the array of SADs for each of the sixteen luma 
+ *            4x4 blocks in each MB.  The block SADs are in scan order for 
+ *            each MB.  For implementations that cannot compute the SAD values 
+ *            individually, the maximum possible value (0xffff) is returned 
+ *            for each of the 16 block SAD entries. 
+ *   pSrcDstMBCurr - Pointer to updated information structure for the current 
+ *            MB after MB-level motion estimation has been completed.  The 
+ *            following fields are updated by the ME function.   The following 
+ *            parameter set quantifies the MB-level ME search results: 
+ *            -  MbType 
+ *            -  subMBType[4] 
+ *            -  pMV0[4][4] 
+ *            -  pMVPred[4][4] 
+ *            -  pRefL0Idx[4] 
+ *            -  Intra16x16PredMode 
+ *            -  pIntra4x4PredMode[4][4] 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -   One or more of the following pointers is NULL: pSrcCurrBuf, 
+ *           pSrcRefBufList, pSrcRecBuf, pRefRect, pCurrPointPos, pMESpec, 
+ *           pMBInter, pMBIntra,pSrcDstMBCurr, pDstCost, pSrcRefBufList[0] 
+ *    -    SrcRefStep, SrcRecStep are not multiples of 16 
+ *    -    iBlockWidth or iBlockHeight are values other than 4, 8, or 16. 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_MotionEstimationMB (
+    const OMX_U8 *pSrcCurrBuf,
+    OMX_S32 SrcCurrStep,
+    const OMX_U8 *pSrcRefBufList[15],
+    OMX_S32 SrcRefStep,
+    const OMX_U8 *pSrcRecBuf,
+    OMX_S32 SrcRecStep,
+    const OMXRect *pRefRect,
+    const OMXVCM4P2Coordinate *pCurrPointPos,
+    OMX_U32 Lambda,
+    void *pMESpec,
+    const OMXVCM4P10MBInfoPtr *pMBInter,
+    const OMXVCM4P10MBInfoPtr *pMBIntra,
+    OMXVCM4P10MBInfoPtr pSrcDstMBCurr,
+    OMX_INT *pDstCost,
+    OMX_U16 *pDstBlockSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_SAD_4x   (6.3.5.4.1)
+ *
+ * Description:
+ * This function calculates the SAD for 4x8 and 4x4 blocks. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg -Pointer to the original block; must be aligned on a 4-byte 
+ *            boundary. 
+ *   iStepOrg -Step of the original block buffer; must be a multiple of 4. 
+ *   pSrcRef -Pointer to the reference block 
+ *   iStepRef -Step of the reference block buffer 
+ *   iHeight -Height of the block; must be equal to either 4 or 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD -Pointer of result SAD 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    One or more of the following pointers is NULL: 
+ *         pSrcOrg, pSrcRef, or pDstSAD 
+ *    -    iHeight is not equal to either 4 or 8. 
+ *    -    iStepOrg is not a multiple of 4 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_SAD_4x (
+    const OMX_U8 *pSrcOrg,
+    OMX_U32 iStepOrg,
+    const OMX_U8 *pSrcRef,
+    OMX_U32 iStepRef,
+    OMX_S32 *pDstSAD,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_SADQuar_4x   (6.3.5.4.2)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average 
+ * of the other two (pSrcRef0 and pSrcRef1) for 4x8 or 4x4 blocks.  Rounding 
+ * is applied according to the convention (a+b+1)>>1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the original block; must be aligned on a 4-byte 
+ *            boundary. 
+ *   pSrcRef0 - Pointer to reference block 0 
+ *   pSrcRef1 - Pointer to reference block 1 
+ *   iSrcStep - Step of the original block buffer; must be a multiple of 4. 
+ *   iRefStep0 - Step of reference block 0 
+ *   iRefStep1 - Step of reference block 1 
+ *   iHeight - Height of the block; must be equal to either 4 or 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD - Pointer of result SAD 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    iHeight is not equal to either 4 or 8. 
+ *    -    One or more of the following pointers is NULL: pSrc, pSrcRef0, 
+ *              pSrcRef1, pDstSAD. 
+ *    -    iSrcStep is not a multiple of 4 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_4x (
+    const OMX_U8 *pSrc,
+    const OMX_U8 *pSrcRef0,
+    const OMX_U8 *pSrcRef1,
+    OMX_U32 iSrcStep,
+    OMX_U32 iRefStep0,
+    OMX_U32 iRefStep1,
+    OMX_U32 *pDstSAD,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_SADQuar_8x   (6.3.5.4.3)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average 
+ * of the other two (pSrcRef0 and pSrcRef1) for 8x16, 8x8, or 8x4 blocks.  
+ * Rounding is applied according to the convention (a+b+1)>>1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the original block; must be aligned on an 8-byte 
+ *            boundary. 
+ *   pSrcRef0 - Pointer to reference block 0 
+ *   pSrcRef1 - Pointer to reference block 1 
+ *   iSrcStep - Step of the original block buffer; must be a multiple of 8. 
+ *   iRefStep0 - Step of reference block 0 
+ *   iRefStep1 - Step of reference block 1 
+ *   iHeight - Height of the block; must be equal either 4, 8, or 16. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD - Pointer of result SAD 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    iHeight is not equal to either 4, 8, or 16. 
+ *    -    One or more of the following pointers is NULL: pSrc, pSrcRef0, 
+ *              pSrcRef1, pDstSAD. 
+ *    -    iSrcStep is not a multiple of 8 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_8x (
+    const OMX_U8 *pSrc,
+    const OMX_U8 *pSrcRef0,
+    const OMX_U8 *pSrcRef1,
+    OMX_U32 iSrcStep,
+    OMX_U32 iRefStep0,
+    OMX_U32 iRefStep1,
+    OMX_U32 *pDstSAD,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_SADQuar_16x   (6.3.5.4.4)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average 
+ * of the other two (pSrcRef0 and pSrcRef1) for 16x16 or 16x8 blocks.  
+ * Rounding is applied according to the convention (a+b+1)>>1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the original block; must be aligned on a 16-byte 
+ *            boundary. 
+ *   pSrcRef0 - Pointer to reference block 0 
+ *   pSrcRef1 - Pointer to reference block 1 
+ *   iSrcStep - Step of the original block buffer; must be a multiple of 16 
+ *   iRefStep0 - Step of reference block 0 
+ *   iRefStep1 - Step of reference block 1 
+ *   iHeight - Height of the block; must be equal to either 8 or 16 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD -Pointer of result SAD 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    iHeight is not equal to either 8 or 16. 
+ *    -    One or more of the following pointers is NULL: pSrc, pSrcRef0, 
+ *              pSrcRef1, pDstSAD. 
+ *    -    iSrcStep is not a multiple of 16 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_16x (
+    const OMX_U8 *pSrc,
+    const OMX_U8 *pSrcRef0,
+    const OMX_U8 *pSrcRef1,
+    OMX_U32 iSrcStep,
+    OMX_U32 iRefStep0,
+    OMX_U32 iRefStep1,
+    OMX_U32 *pDstSAD,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_SATD_4x4   (6.3.5.4.5)
+ *
+ * Description:
+ * This function calculates the sum of absolute transform differences (SATD) 
+ * for a 4x4 block by applying a Hadamard transform to the difference block 
+ * and then calculating the sum of absolute coefficient values. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg - Pointer to the original block; must be aligned on a 4-byte 
+ *            boundary 
+ *   iStepOrg - Step of the original block buffer; must be a multiple of 4 
+ *   pSrcRef - Pointer to the reference block; must be aligned on a 4-byte 
+ *            boundary 
+ *   iStepRef - Step of the reference block buffer; must be a multiple of 4 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD - pointer to the resulting SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcOrg, pSrcRef, or pDstSAD either pSrcOrg 
+ *    -    pSrcRef is not aligned on a 4-byte boundary 
+ *    -    iStepOrg <= 0 or iStepOrg is not a multiple of 4 
+ *    -    iStepRef <= 0 or iStepRef is not a multiple of 4 
+ *
+ */
+OMXResult omxVCM4P10_SATD_4x4 (
+    const OMX_U8 *pSrcOrg,
+    OMX_U32 iStepOrg,
+    const OMX_U8 *pSrcRef,
+    OMX_U32 iStepRef,
+    OMX_U32 *pDstSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InterpolateHalfHor_Luma   (6.3.5.5.1)
+ *
+ * Description:
+ * This function performs interpolation for two horizontal 1/2-pel positions 
+ * (-1/2,0) and (1/2, 0) - around a full-pel position. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the top-left corner of the block used to interpolate in 
+ *            the reconstruction frame plane. 
+ *   iSrcStep - Step of the source buffer. 
+ *   iDstStep - Step of the destination(interpolation) buffer; must be a 
+ *            multiple of iWidth. 
+ *   iWidth - Width of the current block; must be equal to either 4, 8, or 16 
+ *   iHeight - Height of the current block; must be equal to 4, 8, or 16 
+ *
+ * Output Arguments:
+ *   
+ *   pDstLeft -Pointer to the interpolation buffer of the left -pel position 
+ *            (-1/2, 0) 
+ *                 If iWidth==4,  4-byte alignment required. 
+ *                 If iWidth==8,  8-byte alignment required. 
+ *                 If iWidth==16, 16-byte alignment required. 
+ *   pDstRight -Pointer to the interpolation buffer of the right -pel 
+ *            position (1/2, 0) 
+ *                 If iWidth==4,  4-byte alignment required. 
+ *                 If iWidth==8,  8-byte alignment required. 
+ *                 If iWidth==16, 16-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *             pSrc, pDstLeft, or pDstRight 
+ *    -    iWidth or iHeight have values other than 4, 8, or 16 
+ *    -    iWidth==4 but pDstLeft and/or pDstRight is/are not aligned on a 4-byte boundary 
+ *    -    iWidth==8 but pDstLeft and/or pDstRight is/are not aligned on a 8-byte boundary 
+ *    -    iWidth==16 but pDstLeft and/or pDstRight is/are not aligned on a 16-byte boundary 
+ *    -    any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_InterpolateHalfHor_Luma (
+    const OMX_U8 *pSrc,
+    OMX_U32 iSrcStep,
+    OMX_U8 *pDstLeft,
+    OMX_U8 *pDstRight,
+    OMX_U32 iDstStep,
+    OMX_U32 iWidth,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InterpolateHalfVer_Luma   (6.3.5.5.2)
+ *
+ * Description:
+ * This function performs interpolation for two vertical 1/2-pel positions - 
+ * (0, -1/2) and (0, 1/2) - around a full-pel position. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to top-left corner of block used to interpolate in the 
+ *            reconstructed frame plane 
+ *   iSrcStep - Step of the source buffer. 
+ *   iDstStep - Step of the destination (interpolation) buffer; must be a 
+ *            multiple of iWidth. 
+ *   iWidth - Width of the current block; must be equal to either 4, 8, or 16 
+ *   iHeight - Height of the current block; must be equal to either 4, 8, or 16 
+ *
+ * Output Arguments:
+ *   
+ *   pDstUp -Pointer to the interpolation buffer of the -pel position above 
+ *            the current full-pel position (0, -1/2) 
+ *                If iWidth==4, 4-byte alignment required. 
+ *                If iWidth==8, 8-byte alignment required. 
+ *                If iWidth==16, 16-byte alignment required. 
+ *   pDstDown -Pointer to the interpolation buffer of the -pel position below 
+ *            the current full-pel position (0, 1/2) 
+ *                If iWidth==4, 4-byte alignment required. 
+ *                If iWidth==8, 8-byte alignment required. 
+ *                If iWidth==16, 16-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *            pSrc, pDstUp, or pDstDown 
+ *    -    iWidth or iHeight have values other than 4, 8, or 16 
+ *    -    iWidth==4 but pDstUp and/or pDstDown is/are not aligned on a 4-byte boundary 
+ *    -    iWidth==8 but pDstUp and/or pDstDown is/are not aligned on a 8-byte boundary 
+ *    -    iWidth==16 but pDstUp and/or pDstDown is/are not aligned on a 16-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_InterpolateHalfVer_Luma (
+    const OMX_U8 *pSrc,
+    OMX_U32 iSrcStep,
+    OMX_U8 *pDstUp,
+    OMX_U8 *pDstDown,
+    OMX_U32 iDstStep,
+    OMX_U32 iWidth,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_Average_4x   (6.3.5.5.3)
+ *
+ * Description:
+ * This function calculates the average of two 4x4, 4x8 blocks.  The result 
+ * is rounded according to (a+b+1)/2. 
+ *
+ * Input Arguments:
+ *   
+ *   pPred0 - Pointer to the top-left corner of reference block 0 
+ *   pPred1 - Pointer to the top-left corner of reference block 1 
+ *   iPredStep0 - Step of reference block 0; must be a multiple of 4. 
+ *   iPredStep1 - Step of reference block 1; must be a multiple of 4. 
+ *   iDstStep - Step of the destination buffer; must be a multiple of 4. 
+ *   iHeight - Height of the blocks; must be either 4 or 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstPred - Pointer to the destination buffer. 4-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *           pPred0, pPred1, or pDstPred 
+ *    -    pDstPred is not aligned on a 4-byte boundary 
+ *    -    iPredStep0 <= 0 or iPredStep0 is not a multiple of 4 
+ *    -    iPredStep1 <= 0 or iPredStep1 is not a multiple of 4 
+ *    -    iDstStep <= 0 or iDstStep is not a multiple of 4 
+ *    -    iHeight is not equal to either 4 or 8 
+ *
+ */
+OMXResult omxVCM4P10_Average_4x (
+    const OMX_U8 *pPred0,
+    const OMX_U8 *pPred1,
+    OMX_U32 iPredStep0,
+    OMX_U32 iPredStep1,
+    OMX_U8 *pDstPred,
+    OMX_U32 iDstStep,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_TransformQuant_ChromaDC   (6.3.5.6.1)
+ *
+ * Description:
+ * This function performs 2x2 Hadamard transform of chroma DC coefficients 
+ * and then quantizes the coefficients. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - Pointer to the 2x2 array of chroma DC coefficients.  8-byte 
+ *            alignment required. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *   bIntra - Indicate whether this is an INTRA block. 1-INTRA, 0-INTER 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - Pointer to transformed and quantized coefficients.  8-byte 
+ *            alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *             pSrcDst 
+ *    -    pSrcDst is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_TransformQuant_ChromaDC (
+    OMX_S16 *pSrcDst,
+    OMX_U32 iQP,
+    OMX_U8 bIntra
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_TransformQuant_LumaDC   (6.3.5.6.2)
+ *
+ * Description:
+ * This function performs a 4x4 Hadamard transform of luma DC coefficients 
+ * and then quantizes the coefficients. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - Pointer to the 4x4 array of luma DC coefficients.  16-byte 
+ *            alignment required. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - Pointer to transformed and quantized coefficients.  16-byte 
+ *             alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: pSrcDst 
+ *    -    pSrcDst is not aligned on an 16-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_TransformQuant_LumaDC (
+    OMX_S16 *pSrcDst,
+    OMX_U32 iQP
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InvTransformDequant_LumaDC   (6.3.5.6.3)
+ *
+ * Description:
+ * This function performs inverse 4x4 Hadamard transform and then dequantizes 
+ * the coefficients. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the 4x4 array of the 4x4 Hadamard-transformed and 
+ *            quantized coefficients.  16 byte alignment required. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to inverse-transformed and dequantized coefficients.  
+ *            16-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: pSrc 
+ *    -    pSrc or pDst is not aligned on a 16-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_InvTransformDequant_LumaDC (
+    const OMX_S16 *pSrc,
+    OMX_S16 *pDst,
+    OMX_U32 iQP
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InvTransformDequant_ChromaDC   (6.3.5.6.4)
+ *
+ * Description:
+ * This function performs inverse 2x2 Hadamard transform and then dequantizes 
+ * the coefficients. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the 2x2 array of the 2x2 Hadamard-transformed and 
+ *            quantized coefficients.  8 byte alignment required. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to inverse-transformed and dequantized coefficients.  
+ *            8-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: pSrc 
+ *    -    pSrc or pDst is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_InvTransformDequant_ChromaDC (
+    const OMX_S16 *pSrc,
+    OMX_S16 *pDst,
+    OMX_U32 iQP
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InvTransformResidualAndAdd   (6.3.5.7.1)
+ *
+ * Description:
+ * This function performs inverse an 4x4 integer transformation to produce 
+ * the difference signal and then adds the difference to the prediction to get 
+ * the reconstructed signal. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcPred - Pointer to prediction signal.  4-byte alignment required. 
+ *   pDequantCoeff - Pointer to the transformed coefficients.  8-byte 
+ *            alignment required. 
+ *   iSrcPredStep - Step of the prediction buffer; must be a multiple of 4. 
+ *   iDstReconStep - Step of the destination reconstruction buffer; must be a 
+ *            multiple of 4. 
+ *   bAC - Indicate whether there is AC coefficients in the coefficients 
+ *            matrix. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstRecon -Pointer to the destination reconstruction buffer.  4-byte 
+ *            alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *            pSrcPred, pDequantCoeff, pDstRecon 
+ *    -    pSrcPred is not aligned on a 4-byte boundary 
+ *    -    iSrcPredStep or iDstReconStep is not a multiple of 4. 
+ *    -    pDequantCoeff is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_InvTransformResidualAndAdd (
+    const OMX_U8 *pSrcPred,
+    const OMX_S16 *pDequantCoeff,
+    OMX_U8 *pDstRecon,
+    OMX_U32 iSrcPredStep,
+    OMX_U32 iDstReconStep,
+    OMX_U8 bAC
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_SubAndTransformQDQResidual   (6.3.5.8.1)
+ *
+ * Description:
+ * This function subtracts the prediction signal from the original signal to 
+ * produce the difference signal and then performs a 4x4 integer transform and 
+ * quantization. The quantized transformed coefficients are stored as 
+ * pDstQuantCoeff. This function can also output dequantized coefficients or 
+ * unquantized DC coefficients optionally by setting the pointers 
+ * pDstDeQuantCoeff, pDCCoeff. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg - Pointer to original signal. 4-byte alignment required. 
+ *   pSrcPred - Pointer to prediction signal. 4-byte alignment required. 
+ *   iSrcOrgStep - Step of the original signal buffer; must be a multiple of 
+ *            4. 
+ *   iSrcPredStep - Step of the prediction signal buffer; must be a multiple 
+ *            of 4. 
+ *   pNumCoeff -Number of non-zero coefficients after quantization. If this 
+ *            parameter is not required, it is set to NULL. 
+ *   nThreshSAD - Zero-block early detection threshold. If this parameter is 
+ *            not required, it is set to 0. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *   bIntra - Indicates whether this is an INTRA block, either 1-INTRA or 
+ *            0-INTER 
+ *
+ * Output Arguments:
+ *   
+ *   pDstQuantCoeff - Pointer to the quantized transformed coefficients.  
+ *            8-byte alignment required. 
+ *   pDstDeQuantCoeff - Pointer to the dequantized transformed coefficients 
+ *            if this parameter is not equal to NULL.  8-byte alignment 
+ *            required. 
+ *   pDCCoeff - Pointer to the unquantized DC coefficient if this parameter 
+ *            is not equal to NULL. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *            pSrcOrg, pSrcPred, pNumCoeff, pDstQuantCoeff, 
+ *            pDstDeQuantCoeff, pDCCoeff 
+ *    -    pSrcOrg is not aligned on a 4-byte boundary 
+ *    -    pSrcPred is not aligned on a 4-byte boundary 
+ *    -    iSrcOrgStep is not a multiple of 4 
+ *    -    iSrcPredStep is not a multiple of 4 
+ *    -    pDstQuantCoeff or pDstDeQuantCoeff is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_SubAndTransformQDQResidual (
+    const OMX_U8 *pSrcOrg,
+    const OMX_U8 *pSrcPred,
+    OMX_U32 iSrcOrgStep,
+    OMX_U32 iSrcPredStep,
+    OMX_S16 *pDstQuantCoeff,
+    OMX_S16 *pDstDeQuantCoeff,
+    OMX_S16 *pDCCoeff,
+    OMX_S8 *pNumCoeff,
+    OMX_U32 nThreshSAD,
+    OMX_U32 iQP,
+    OMX_U8 bIntra
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_GetVLCInfo   (6.3.5.9.1)
+ *
+ * Description:
+ * This function extracts run-length encoding (RLE) information from the 
+ * coefficient matrix.  The results are returned in an OMXVCM4P10VLCInfo 
+ * structure. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcCoeff - pointer to the transform coefficient matrix.  8-byte 
+ *            alignment required. 
+ *   pScanMatrix - pointer to the scan order definition matrix.  For a luma 
+ *            block the scan matrix should follow [ISO14496-10] section 8.5.4, 
+ *            and should contain the values 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 
+ *            10, 7, 11, 14, 15.  For a chroma block, the scan matrix should 
+ *            contain the values 0, 1, 2, 3. 
+ *   bAC - indicates presence of a DC coefficient; 0 = DC coefficient 
+ *            present, 1= DC coefficient absent. 
+ *   MaxNumCoef - specifies the number of coefficients contained in the 
+ *            transform coefficient matrix, pSrcCoeff. The value should be 16 
+ *            for blocks of type LUMADC, LUMAAC, LUMALEVEL, and CHROMAAC. The 
+ *            value should be 4 for blocks of type CHROMADC. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstVLCInfo - pointer to structure that stores information for 
+ *            run-length coding. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *            pSrcCoeff, pScanMatrix, pDstVLCInfo 
+ *    -    pSrcCoeff is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_GetVLCInfo (
+    const OMX_S16 *pSrcCoeff,
+    const OMX_U8 *pScanMatrix,
+    OMX_U8 bAC,
+    OMX_U32 MaxNumCoef,
+    OMXVCM4P10VLCInfo*pDstVLCInfo
+);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /** end of #define _OMXVC_H_ */
+
+/** EOF */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC_s.h
new file mode 100644
index 0000000..be974d5
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC_s.h
@@ -0,0 +1,129 @@
+;/******************************************************************************
+;// Copyright (c) 1999-2005 The Khronos Group Inc. All Rights Reserved
+;//
+;//
+;//
+;//
+;//
+;//
+;//
+;//
+;******************************************************************************/
+
+;/** =============== Structure Definition for Sample Generation ============== */
+;/** transparent status */
+
+;enum {
+OMX_VIDEO_TRANSPARENT	EQU 0;	/** Wholly transparent */
+OMX_VIDEO_PARTIAL		EQU 1;	/** Partially transparent */
+OMX_VIDEO_OPAQUE		EQU 2;	/** Opaque */
+;}
+
+;/** direction */
+;enum {
+OMX_VIDEO_NONE			EQU 0;
+OMX_VIDEO_HORIZONTAL	EQU 1;
+OMX_VIDEO_VERTICAL		EQU 2;
+;}
+
+;/** bilinear interpolation type */
+;enum {
+OMX_VIDEO_INTEGER_PIXEL EQU 0;	/** case ¡°a¡± */
+OMX_VIDEO_HALF_PIXEL_X  EQU 1;	/** case ¡°b¡± */
+OMX_VIDEO_HALF_PIXEL_Y  EQU 2;	/** case ¡°c¡± */
+OMX_VIDEO_HALF_PIXEL_XY EQU 3;	/** case ¡°d¡± */
+;}
+
+;enum {
+OMX_UPPER  				EQU 1;			/** set if the above macroblock is available */
+OMX_LEFT   				EQU 2;			/** set if the left macroblock is available */
+OMX_CENTER 				EQU 4;
+OMX_RIGHT				EQU 8;
+OMX_LOWER  				EQU	16;
+OMX_UPPER_LEFT  		EQU 32;		/** set if the above-left macroblock is available */
+OMX_UPPER_RIGHT 		EQU 64;		/** set if the above-right macroblock is available */
+OMX_LOWER_LEFT  		EQU 128;
+OMX_LOWER_RIGHT 		EQU 256
+;}
+
+;enum {
+OMX_VIDEO_LUMINANCE  	EQU 0;	/** Luminance component */
+OMX_VIDEO_CHROMINANCE  	EQU 1;	/** chrominance component */
+OMX_VIDEO_ALPHA  		EQU 2;			/** Alpha component */
+;}
+
+;enum {
+OMX_VIDEO_INTER			EQU 0;	/** P picture or P-VOP */
+OMX_VIDEO_INTER_Q		EQU 1;	/** P picture or P-VOP */
+OMX_VIDEO_INTER4V		EQU 2;	/** P picture or P-VOP */
+OMX_VIDEO_INTRA			EQU 3;	/** I and P picture; I- and P-VOP */
+OMX_VIDEO_INTRA_Q		EQU 4;	/** I and P picture; I- and P-VOP */
+OMX_VIDEO_INTER4V_Q		EQU 5;	/** P picture or P-VOP (H.263)*/
+OMX_VIDEO_DIRECT		EQU 6;	/** B picture or B-VOP (MPEG-4 only) */
+OMX_VIDEO_INTERPOLATE	EQU 7;	/** B picture or B-VOP */
+OMX_VIDEO_BACKWARD		EQU 8;	/** B picture or B-VOP */
+OMX_VIDEO_FORWARD		EQU 9;	/** B picture or B-VOP */
+OMX_VIDEO_NOTCODED		EQU 10;	/** B picture or B-VOP */
+;}
+
+;enum {
+OMX_16X16_VERT 			EQU 0;		/** Intra_16x16_Vertical (prediction mode) */
+OMX_16X16_HOR 			EQU 1;		/** Intra_16x16_Horizontal (prediction mode) */
+OMX_16X16_DC 			EQU 2;		/** Intra_16x16_DC (prediction mode) */
+OMX_16X16_PLANE 		EQU 3;	/** Intra_16x16_Plane (prediction mode) */
+;}
+
+;enum {
+OMX_4x4_VERT 			EQU 0;		/** Intra_4x4_Vertical (prediction mode) */
+OMX_4x4_HOR  			EQU 1;		/** Intra_4x4_Horizontal (prediction mode) */
+OMX_4x4_DC   			EQU 2;		/** Intra_4x4_DC (prediction mode) */
+OMX_4x4_DIAG_DL 		EQU 3;	/** Intra_4x4_Diagonal_Down_Left (prediction mode) */
+OMX_4x4_DIAG_DR 		EQU 4;	/** Intra_4x4_Diagonal_Down_Right (prediction mode) */
+OMX_4x4_VR 				EQU 5;			/** Intra_4x4_Vertical_Right (prediction mode) */
+OMX_4x4_HD 				EQU 6;			/** Intra_4x4_Horizontal_Down (prediction mode) */
+OMX_4x4_VL 				EQU 7;			/** Intra_4x4_Vertical_Left (prediction mode) */
+OMX_4x4_HU 				EQU 8;			/** Intra_4x4_Horizontal_Up (prediction mode) */
+;}
+
+;enum {
+OMX_CHROMA_DC 			EQU 0;		/** Intra_Chroma_DC (prediction mode) */
+OMX_CHROMA_HOR 			EQU 1;		/** Intra_Chroma_Horizontal (prediction mode) */
+OMX_CHROMA_VERT 		EQU 2;	/** Intra_Chroma_Vertical (prediction mode) */
+OMX_CHROMA_PLANE 		EQU 3;	/** Intra_Chroma_Plane (prediction mode) */
+;}
+
+;typedef	struct {	
+x	EQU	0;
+y	EQU	4;
+;}OMXCoordinate;
+
+;typedef struct {
+dx	EQU	0;
+dy	EQU	2;
+;}OMXMotionVector;
+
+;typedef struct {
+xx		EQU	0;
+yy		EQU	4;
+width	EQU	8;
+height	EQU	12;
+;}OMXiRect;
+
+;typedef enum {
+OMX_VC_INTER         EQU 0;        /** P picture or P-VOP */
+OMX_VC_INTER_Q       EQU 1;       /** P picture or P-VOP */
+OMX_VC_INTER4V       EQU 2;       /** P picture or P-VOP */
+OMX_VC_INTRA         EQU 3;        /** I and P picture, I- and P-VOP */
+OMX_VC_INTRA_Q       EQU 4;       /** I and P picture, I- and P-VOP */
+OMX_VC_INTER4V_Q     EQU 5;    /** P picture or P-VOP (H.263)*/
+;} OMXVCM4P2MacroblockType;
+
+;enum {
+OMX_VC_NONE          EQU 0
+OMX_VC_HORIZONTAL    EQU 1
+OMX_VC_VERTICAL      EQU 2 
+;};
+
+
+	END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy16x16_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy16x16_s.s
new file mode 100644
index 0000000..2663a70
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy16x16_s.s
@@ -0,0 +1,148 @@
+ ;/**
+ ; * Function: omxVCCOMM_Copy16x16
+ ; *
+ ; * Description:
+ ; * Copies the reference 16x16 block to the current block.
+ ; * Parameters:
+ ; * [in] pSrc         - pointer to the reference block in the source frame; must be aligned on an 16-byte boundary.
+ ; * [in] step         - distance between the starts of consecutive lines in the reference frame, in bytes;
+ ; *                     must be a multiple of 16 and must be larger than or equal to 16.
+ ; * [out] pDst        - pointer to the destination block; must be aligned on an 8-byte boundary.
+ ; * Return Value:
+ ; * OMX_Sts_NoErr     - no error
+ ; * OMX_Sts_BadArgErr - bad arguments; returned under any of the following conditions:
+ ; *                   - one or more of the following pointers is NULL:  pSrc, pDst
+ ; *                   - one or more of the following pointers is not aligned on an 16-byte boundary:  pSrc, pDst
+ ; *                   - step <16 or step is not a multiple of 16.  
+ ; */
+
+   INCLUDE omxtypes_s.h
+   
+     
+     M_VARIANTS ARM1136JS
+     
+
+
+
+     IF ARM1136JS
+
+;//Input Arguments
+pSrc    RN 0        
+pDst    RN 1        
+step    RN 2
+
+;//Local Variables
+Count   RN 14
+X0      RN 2
+X1      RN 4
+
+Return  RN 0
+     
+     M_START omxVCCOMM_Copy16x16,r5
+        
+        
+        
+        SUB   Count,step,#8                 ;//Count=step-8
+        LDRD  X0,[pSrc],#8                  ;//pSrc after loading pSrc=pSrc+8
+        LDRD  X1,[pSrc],Count               ;//pSrc after loading pSrc=pSrc+step
+        
+        ;// loading 16 bytes and storing
+        STRD  X0,[pDst],#8               
+        LDRD  X0,[pSrc],#8 
+        STRD  X1,[pDst],#8               
+        LDRD  X1,[pSrc],Count
+        
+        ;// loading 16 bytes and storing
+        STRD  X0,[pDst],#8               
+        LDRD  X0,[pSrc],#8
+        STRD  X1,[pDst],#8               
+        LDRD  X1,[pSrc],Count
+        
+        ;// loading 16 bytes and storing
+        STRD  X0,[pDst],#8               
+        LDRD  X0,[pSrc],#8
+        STRD  X1,[pDst],#8               
+        LDRD  X1,[pSrc],Count
+        
+        ;// loading 16 bytes and storing
+        STRD  X0,[pDst],#8               
+        LDRD  X0,[pSrc],#8
+        STRD  X1,[pDst],#8               
+        LDRD  X1,[pSrc],Count
+        
+        ;// loading 16 bytes and storing
+        STRD  X0,[pDst],#8               
+        LDRD  X0,[pSrc],#8
+        STRD  X1,[pDst],#8               
+        LDRD  X1,[pSrc],Count
+       
+        ;// loading 16 bytes and storing
+        STRD  X0,[pDst],#8               
+        LDRD  X0,[pSrc],#8
+        STRD  X1,[pDst],#8               
+        LDRD  X1,[pSrc],Count
+
+        ;// loading 16 bytes and storing
+        STRD  X0,[pDst],#8               
+        LDRD  X0,[pSrc],#8
+        STRD  X1,[pDst],#8               
+        LDRD  X1,[pSrc],Count
+        
+        ;// loading 16 bytes and storing
+        STRD  X0,[pDst],#8              
+        LDRD  X0,[pSrc],#8 
+        STRD  X1,[pDst],#8               
+        LDRD  X1,[pSrc],Count
+        
+        ;// loading 16 bytes and storing
+        STRD  X0,[pDst],#8               
+        LDRD  X0,[pSrc],#8 
+        STRD  X1,[pDst],#8               
+        LDRD  X1,[pSrc],Count
+
+        ;// loading 16 bytes and storing
+        STRD  X0,[pDst],#8               
+        LDRD  X0,[pSrc],#8 
+        STRD  X1,[pDst],#8               
+        LDRD  X1,[pSrc],Count
+
+        ;// loading 16 bytes and storing
+        STRD  X0,[pDst],#8               
+        LDRD  X0,[pSrc],#8 
+        STRD  X1,[pDst],#8               
+        LDRD  X1,[pSrc],Count
+
+        ;// loading 16 bytes and storing
+        STRD  X0,[pDst],#8               
+        LDRD  X0,[pSrc],#8 
+        STRD  X1,[pDst],#8               
+        LDRD  X1,[pSrc],Count
+
+        ;// loading 16 bytes and storing
+        STRD  X0,[pDst],#8               
+        LDRD  X0,[pSrc],#8 
+        STRD  X1,[pDst],#8               
+        LDRD  X1,[pSrc],Count
+
+        ;// loading 16 bytes and storing
+        STRD  X0,[pDst],#8               
+        LDRD  X0,[pSrc],#8 
+        STRD  X1,[pDst],#8               
+        LDRD  X1,[pSrc],Count
+       
+        ;// loading 16 bytes and storing
+        STRD  X0,[pDst],#8               
+        LDRD  X0,[pSrc],#8 
+        STRD  X1,[pDst],#8               
+        LDRD  X1,[pSrc],Count
+
+        STRD  X0,[pDst],#8               
+        MOV   Return,#OMX_Sts_NoErr
+        STRD  X1,[pDst],#8               
+
+       
+        M_END
+        ENDIF
+        
+        END
+       
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy8x8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy8x8_s.s
new file mode 100644
index 0000000..993873c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy8x8_s.s
@@ -0,0 +1,72 @@
+ ;/**
+ ; * Function: omxVCCOMM_Copy8x8
+ ; *
+ ; * Description:
+ ; * Copies the reference 8x8 block to the current block.
+ ; * Parameters:
+ ; * [in] pSrc         - pointer to the reference block in the source frame; must be aligned on an 8-byte boundary.
+ ; * [in] step         - distance between the starts of consecutive lines in the reference frame, in bytes;
+ ; *                     must be a multiple of 8 and must be larger than or equal to 8.
+ ; * [out] pDst        - pointer to the destination block; must be aligned on an 8-byte boundary.
+ ; * Return Value:
+ ; * OMX_Sts_NoErr     - no error
+ ; * OMX_Sts_BadArgErr - bad arguments; returned under any of the following conditions:
+ ; *                   - one or more of the following pointers is NULL:  pSrc, pDst
+ ; *                   - one or more of the following pointers is not aligned on an 8-byte boundary:  pSrc, pDst
+ ; *                   - step <8 or step is not a multiple of 8.  
+ ; */
+
+   INCLUDE omxtypes_s.h
+   
+     
+     M_VARIANTS ARM1136JS
+     
+
+
+
+     IF ARM1136JS
+
+;//Input Arguments
+pSrc    RN 0        
+pDst    RN 1        
+step    RN 2
+
+;//Local Variables
+Count   RN 14
+X0      RN 2
+X1      RN 4
+Return  RN 0
+     M_START omxVCCOMM_Copy8x8,r5
+        
+        
+        
+        MOV   Count,step                 ;//Count=step 
+        
+        LDRD  X0,[pSrc],Count            ;//pSrc after loading : pSrc=pSrc+step
+        LDRD  X1,[pSrc],Count
+        
+        STRD  X0,[pDst],#8               
+        LDRD  X0,[pSrc],Count 
+        STRD  X1,[pDst],#8               
+        LDRD  X1,[pSrc],Count
+        
+        STRD  X0,[pDst],#8               
+        LDRD  X0,[pSrc],Count
+        STRD  X1,[pDst],#8               
+        LDRD  X1,[pSrc],Count
+        
+        STRD  X0,[pDst],#8              
+        LDRD  X0,[pSrc],Count
+        STRD  X1,[pDst],#8               
+        LDRD  X1,[pSrc],Count
+        
+        STRD  X0,[pDst],#8               
+        MOV   Return,#OMX_Sts_NoErr
+        STRD  X1,[pDst],#8               
+        
+        
+        M_END
+        ENDIF
+        
+        END
+        
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
new file mode 100644
index 0000000..02b4b08
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
@@ -0,0 +1,189 @@
+;//
+;// 
+;// File Name:  omxVCCOMM_ExpandFrame_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+;// Description:
+;// This function will Expand Frame boundary pixels into Plane
+;// 
+;// 
+
+;// Include standard headers
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS ARM1136JS
+
+;// Import symbols required from other files
+;// (For example tables)
+    
+  
+;// Set debugging level        
+DEBUG_ON    SETL {FALSE}
+
+
+    
+
+
+
+        
+;// Guarding implementation by the processor name
+    
+    IF  ARM1136JS
+    
+;//Input Registers
+
+pSrcDstPlane    RN  0
+iFrameWidth     RN  1
+iFrameHeight    RN  2    
+iExpandPels     RN  3
+
+
+;//Output Registers
+
+result          RN  0
+
+;//Local Scratch Registers
+
+iPlaneStep      RN  4
+pTop            RN  5
+pBottom         RN  6
+pBottomIndex    RN  7
+x               RN  8
+y               RN  9
+tempTop         RN  10
+tempBot         RN  11
+ColStep         RN  12
+pLeft           RN  5
+pRight          RN  6
+pRightIndex     RN  7
+tempLeft1       RN  10
+tempRight1      RN  11
+tempLeft2       RN  14
+tempRight2      RN  2
+indexY          RN  14
+RowStep         RN  12
+expandTo4bytes  RN  1                               ;// copy a byte to 4 bytes of a word
+    
+        ;// Allocate stack memory required by the function
+        
+        
+        ;// Write function header
+        M_START omxVCCOMM_ExpandFrame_I,r11
+        
+        ;// Define stack arguments
+        M_ARG   iPlaneStepOnStack, 4
+        
+        ;// Load argument from the stack
+        M_LDR   iPlaneStep, iPlaneStepOnStack
+        
+        MUL     pTop,iExpandPels,iPlaneStep
+        MLA     pBottom,iFrameHeight,iPlaneStep,pSrcDstPlane
+        SUB     x,iFrameWidth,#4
+        MOV     indexY,pTop
+        ADD     ColStep,indexY,#4
+        SUB     pBottomIndex,pBottom,iPlaneStep
+        SUB     pTop,pSrcDstPlane,pTop
+        
+        
+        ADD     pTop,pTop,x
+        ADD     pBottom,pBottom,x
+
+        ;//------------------------------------------------------------------------
+        ;// The following improves upon the C implmentation
+        ;// The x and y loops are interchanged: This ensures that the values of
+        ;// pSrcDstPlane [x] and pSrcDstPlane [(iFrameHeight - 1) * iPlaneStep + x] 
+        ;// which depend only on loop variable 'x' are loaded once and used in 
+        ;// multiple stores in the 'Y' loop
+        ;//------------------------------------------------------------------------
+
+        ;// xloop
+ExpandFrameTopBotXloop
+        
+        LDR     tempTop,[pSrcDstPlane,x]
+        ;//------------------------------------------------------------------------
+        ;// pSrcDstPlane [(iFrameHeight - 1) * iPlaneStep + x] is simplified as:
+        ;// pSrcDstPlane + (iFrameHeight * iPlaneStep) - iPlaneStep + x ==
+        ;// pBottom - iPlaneStep + x == pBottomIndex [x]
+        ;// The value of pBottomIndex is calculated above this 'x' loop
+        ;//------------------------------------------------------------------------
+        LDR     tempBot,[pBottomIndex,x]
+        
+        ;// yloop
+        MOV     y,iExpandPels
+
+ExpandFrameTopBotYloop        
+        SUBS    y,y,#1
+        M_STR   tempTop,[pTop],iPlaneStep
+        M_STR   tempBot,[pBottom],iPlaneStep
+        BGT     ExpandFrameTopBotYloop
+        
+        SUBS    x,x,#4
+        SUB     pTop,pTop,ColStep
+        SUB     pBottom,pBottom,ColStep
+        BGE     ExpandFrameTopBotXloop
+        
+        
+        ;// y loop
+        ;// The product is already calculated above : Reuse
+        ;//MUL     indexY,iExpandPels,iPlaneStep      
+      
+        SUB     pSrcDstPlane,pSrcDstPlane,indexY
+        SUB     pLeft,pSrcDstPlane,iExpandPels                  ;// pLeft->points to the top left of the expanded block
+        ADD     pRight,pSrcDstPlane,iFrameWidth
+        SUB     pRightIndex,pRight,#1 
+        
+        ADD     y,iFrameHeight,iExpandPels,LSL #1
+        LDR     expandTo4bytes,=0x01010101
+        
+        RSB     RowStep,iExpandPels,iPlaneStep,LSL #1
+
+        ;// The Y Loop is unrolled twice
+ExpandFrameLeftRightYloop  
+        LDRB    tempLeft2,[pSrcDstPlane,iPlaneStep]             ;// PreLoad the values
+        LDRB    tempRight2,[pRightIndex,iPlaneStep]
+        M_LDRB  tempLeft1,[pSrcDstPlane],iPlaneStep,LSL #1      ;// PreLoad the values
+        M_LDRB  tempRight1,[pRightIndex],iPlaneStep,LSL #1
+              
+        SUB     x,iExpandPels,#4
+        MUL     tempLeft2,tempLeft2,expandTo4bytes              ;// Copy the single byte to 4 bytes
+        MUL     tempRight2,tempRight2,expandTo4bytes
+        MUL     tempLeft1,tempLeft1,expandTo4bytes              ;// Copy the single byte to 4 bytes
+        MUL     tempRight1,tempRight1,expandTo4bytes
+        
+        
+        ;// x loop
+ExpandFrameLeftRightXloop        
+        SUBS    x,x,#4
+        STR     tempLeft2,[pLeft,iPlaneStep]                     ;// Store the 4 bytes at one go
+        STR     tempRight2,[pRight,iPlaneStep]
+        STR     tempLeft1,[pLeft],#4                             ;// Store the 4 bytes at one go
+        STR     tempRight1,[pRight],#4
+        BGE     ExpandFrameLeftRightXloop
+        
+        SUBS    y,y,#2
+        ADD     pLeft,pLeft,RowStep
+        ADD     pRight,pRight,RowStep
+        BGT     ExpandFrameLeftRightYloop
+        
+                        
+        ;// Set return value
+          
+        MOV         result,#OMX_Sts_NoErr  
+End             
+      
+        ;// Write function tail
+        
+        M_END
+        
+    ENDIF                                                    ;//ARM1136JS        
+ 
+            
+    END
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/api/armVCM4P10_CAVLCTables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/api/armVCM4P10_CAVLCTables.h
new file mode 100644
index 0000000..4340f2a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/api/armVCM4P10_CAVLCTables.h
@@ -0,0 +1,30 @@
+/* ----------------------------------------------------------------
+ * 
+ * 
+ * File Name:  armVCM4P10_CAVLCTables.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * Header file for optimized H.264 CALVC tables
+ * 
+ */
+ 
+#ifndef ARMVCM4P10_CAVLCTABLES_H
+#define ARMVCM4P10_CAVLCTABLES_H
+  
+/* CAVLC tables */
+
+extern const OMX_U16 *armVCM4P10_CAVLCCoeffTokenTables[18];
+extern const OMX_U16 *armVCM4P10_CAVLCTotalZeroTables[15];
+extern const OMX_U16 *armVCM4P10_CAVLCTotalZeros2x2Tables[3];
+extern const OMX_U16 *armVCM4P10_CAVLCRunBeforeTables[15];
+extern const OMX_U8 armVCM4P10_ZigZag_4x4[16];
+extern const OMX_U8 armVCM4P10_ZigZag_2x2[4];
+extern const OMX_S8 armVCM4P10_SuffixToLevel[7];
+
+#endif
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
new file mode 100644
index 0000000..b2cd9d1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
@@ -0,0 +1,222 @@
+;//
+;// 
+;// File Name:  armVCM4P10_Average_4x_Align_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+
+;// Functions:
+;//     armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe  
+;//
+;// Implements Average of 4x4 with equation c = (a+b+1)>>1.
+;// First operand will be at offset ALIGNMENT from aligned address
+;// Second operand will be at aligned location and will be used as output.
+;// destination pointed by (pDst) for vertical interpolation.
+;// This function needs to copy 4 bytes in horizontal direction 
+;//
+;// Registers used as input for this function
+;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size
+;//
+;// Registers preserved for top level function
+;// r4,r5,r6,r8,r9,r14
+;//
+;// Registers modified by the function
+;// r7,r10,r11,r12
+;//
+;// Output registers
+;// r2 - pointer to the aligned location
+;// r3 - step size to this aligned location
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS ARM1136JS
+
+        EXPORT armVCM4P10_Average_4x4_Align0_unsafe
+        EXPORT armVCM4P10_Average_4x4_Align2_unsafe
+        EXPORT armVCM4P10_Average_4x4_Align3_unsafe
+
+DEBUG_ON    SETL {FALSE}
+
+;// Declare input registers
+pPred0          RN 0
+iPredStep0      RN 1
+pPred1          RN 2
+iPredStep1      RN 3
+pDstPred        RN 2
+iDstStep        RN 3
+
+;// Declare other intermediate registers
+iPredA0         RN 10
+iPredA1         RN 11
+iPredB0         RN 12
+iPredB1         RN 14
+Temp1           RN 4
+Temp2           RN 5
+ResultA         RN 5
+ResultB         RN 4
+r0x80808080     RN 7
+
+    IF ARM1136JS
+        
+        ;// This function calculates average of 4x4 block 
+        ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4
+
+        ;// Function header
+        M_START armVCM4P10_Average_4x4_Align0_unsafe, r6
+
+        ;// Code start        
+        LDR         r0x80808080, =0x80808080
+
+        ;// 1st load
+        M_LDR       iPredB0, [pPred1]
+        M_LDR       iPredA0, [pPred0], iPredStep0        
+        M_LDR       iPredB1, [pPred1, iPredStep1]
+        M_LDR       iPredA1, [pPred0], iPredStep0
+
+        ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
+        MVN         iPredB0, iPredB0
+        MVN         iPredB1, iPredB1
+        UHSUB8      ResultA, iPredA0, iPredB0
+        UHSUB8      ResultB, iPredA1, iPredB1
+        EOR         ResultA, ResultA, r0x80808080
+        M_STR       ResultA, [pDstPred], iDstStep        
+        EOR         ResultB, ResultB, r0x80808080
+        M_STR       ResultB, [pDstPred], iDstStep        
+        
+        ;// 2nd load
+        M_LDR       iPredA0, [pPred0], iPredStep0        
+        M_LDR       iPredB0, [pPred1]
+        M_LDR       iPredA1, [pPred0], iPredStep0
+        M_LDR       iPredB1, [pPred1, iPredStep1]
+
+        MVN         iPredB0, iPredB0
+        UHSUB8      ResultA, iPredA0, iPredB0
+        MVN         iPredB1, iPredB1
+        UHSUB8      ResultB, iPredA1, iPredB1
+        EOR         ResultA, ResultA, r0x80808080        
+        M_STR       ResultA, [pDstPred], iDstStep        
+        EOR         ResultB, ResultB, r0x80808080
+        M_STR       ResultB, [pDstPred], iDstStep                
+End0
+        M_END
+
+        ;// This function calculates average of 4x4 block 
+        ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4
+
+        ;// Function header
+        M_START armVCM4P10_Average_4x4_Align2_unsafe, r6
+
+        ;// Code start        
+        LDR         r0x80808080, =0x80808080
+
+        ;// 1st load
+        LDR         Temp1, [pPred0, #4]
+        M_LDR       iPredA0, [pPred0], iPredStep0        
+        M_LDR       iPredB0, [pPred1]
+        M_LDR       iPredB1, [pPred1, iPredStep1]
+        M_LDR       Temp2, [pPred0, #4]
+        M_LDR       iPredA1, [pPred0], iPredStep0
+        MVN         iPredB0, iPredB0
+        MVN         iPredB1, iPredB1        
+        MOV         iPredA0, iPredA0, LSR #16
+        ORR         iPredA0, iPredA0, Temp1, LSL #16        
+        MOV         iPredA1, iPredA1, LSR #16
+        ORR         iPredA1, iPredA1, Temp2, LSL #16
+
+        ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
+        UHSUB8      ResultA, iPredA0, iPredB0
+        UHSUB8      ResultB, iPredA1, iPredB1
+        EOR         ResultA, ResultA, r0x80808080
+        M_STR       ResultA, [pDstPred], iDstStep        
+        EOR         ResultB, ResultB, r0x80808080
+        M_STR       ResultB, [pDstPred], iDstStep        
+        
+        ;// 2nd load
+        LDR         Temp1, [pPred0, #4]
+        M_LDR         iPredA0, [pPred0], iPredStep0        
+        LDR         iPredB0, [pPred1]
+        LDR         iPredB1, [pPred1, iPredStep1]
+        LDR         Temp2, [pPred0, #4]
+        M_LDR         iPredA1, [pPred0], iPredStep0
+        MVN         iPredB0, iPredB0
+        MVN         iPredB1, iPredB1
+        MOV         iPredA0, iPredA0, LSR #16
+        ORR         iPredA0, iPredA0, Temp1, LSL #16        
+        MOV         iPredA1, iPredA1, LSR #16
+        ORR         iPredA1, iPredA1, Temp2, LSL #16
+
+        UHSUB8      ResultA, iPredA0, iPredB0
+        UHSUB8      ResultB, iPredA1, iPredB1
+        EOR         ResultA, ResultA, r0x80808080        
+        M_STR       ResultA, [pDstPred], iDstStep        
+        EOR         ResultB, ResultB, r0x80808080
+        M_STR       ResultB, [pDstPred], iDstStep                
+End2
+        M_END
+
+
+        ;// This function calculates average of 4x4 block 
+        ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4
+
+        ;// Function header
+        M_START armVCM4P10_Average_4x4_Align3_unsafe, r6
+
+        ;// Code start        
+        LDR         r0x80808080, =0x80808080
+
+        ;// 1st load
+        LDR         Temp1, [pPred0, #4]
+        M_LDR       iPredA0, [pPred0], iPredStep0        
+        LDR         iPredB0, [pPred1]
+        LDR         iPredB1, [pPred1, iPredStep1]
+        LDR         Temp2, [pPred0, #4]
+        M_LDR       iPredA1, [pPred0], iPredStep0
+
+        MVN         iPredB0, iPredB0
+        MVN         iPredB1, iPredB1
+        MOV         iPredA0, iPredA0, LSR #24
+        ORR         iPredA0, iPredA0, Temp1, LSL #8                
+        MOV         iPredA1, iPredA1, LSR #24
+        ORR         iPredA1, iPredA1, Temp2, LSL #8
+        UHSUB8      ResultA, iPredA0, iPredB0
+        UHSUB8      ResultB, iPredA1, iPredB1
+        EOR         ResultA, ResultA, r0x80808080
+        M_STR       ResultA, [pDstPred], iDstStep        
+        EOR         ResultB, ResultB, r0x80808080
+        M_STR       ResultB, [pDstPred], iDstStep        
+        
+        ;// 2nd load
+        LDR         Temp1, [pPred0, #4]
+        M_LDR       iPredA0, [pPred0], iPredStep0        
+        LDR         iPredB0, [pPred1]
+        LDR         iPredB1, [pPred1, iPredStep1]
+        LDR         Temp2, [pPred0, #4]
+        M_LDR       iPredA1, [pPred0], iPredStep0
+
+        MVN         iPredB0, iPredB0
+        MVN         iPredB1, iPredB1
+        MOV         iPredA0, iPredA0, LSR #24
+        ORR         iPredA0, iPredA0, Temp1, LSL #8        
+        MOV         iPredA1, iPredA1, LSR #24
+        ORR         iPredA1, iPredA1, Temp2, LSL #8
+
+        UHSUB8      ResultA, iPredA0, iPredB0
+        UHSUB8      ResultB, iPredA1, iPredB1
+        EOR         ResultA, ResultA, r0x80808080        
+        M_STR       ResultA, [pDstPred], iDstStep        
+        EOR         ResultB, ResultB, r0x80808080
+        M_STR       ResultB, [pDstPred], iDstStep                
+End3
+        M_END
+
+    ENDIF
+    
+    END
+    
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_CAVLCTables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_CAVLCTables.c
new file mode 100644
index 0000000..17fe518
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_CAVLCTables.c
@@ -0,0 +1,327 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  armVCM4P10_CAVLCTables.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * Optimized CAVLC tables for H.264
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVCM4P10_CAVLCTables.h"
+
+/* 4x4 DeZigZag table */
+
+const OMX_U8 armVCM4P10_ZigZag_4x4[16] =
+{
+    0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+/* 2x2 DeZigZag table */
+
+const OMX_U8 armVCM4P10_ZigZag_2x2[4] =
+{
+    0, 1, 2, 3
+};
+
+
+/*
+ * Suffix To Level table
+ * We increment the suffix length if 
+ * ((LevelCode>>1)+1)>(3<<(SuffixLength-1)) && SuffixLength<6
+ * (LevelCode>>1)>=(3<<(SuffixLength-1))    && SuffixLength<6
+ *  LevelCode    >= 3<<SuffixLength         && SuffixLength<6
+ * (LevelCode+2) >= (3<<SuffixLength)+2     && SuffixLength<6
+ */
+const OMX_S8 armVCM4P10_SuffixToLevel[7] =
+{
+    (3<<1)+2,       /* SuffixLength=1 */
+    (3<<1)+2,       /* SuffixLength=1 */
+    (3<<2)+2,       /* SuffixLength=2 */
+    (3<<3)+2,       /* SuffixLength=3 */
+    (3<<4)+2,       /* SuffixLength=4 */
+    (3<<5)+2,       /* SuffixLength=5 */
+    -1              /* SuffixLength=6 - never increment */
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_0[132] = {
+    0x0020, 0x0100, 0x2015, 0x2015, 0x400b, 0x400b, 0x400b, 0x400b,
+    0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001,
+    0x0028, 0x00f0, 0x00f8, 0x0027, 0x0030, 0x00d8, 0x00e0, 0x00e8,
+    0x0038, 0x00a0, 0x00c8, 0x00d0, 0x0040, 0x0068, 0x0090, 0x0098,
+    0x0048, 0x0050, 0x0058, 0x0060, 0x27ff, 0x27ff, 0x206b, 0x206b,
+    0x0081, 0x0085, 0x0083, 0x0079, 0x0087, 0x007d, 0x007b, 0x0071,
+    0x007f, 0x0075, 0x0073, 0x0069, 0x0070, 0x0078, 0x0080, 0x0088,
+    0x2077, 0x2077, 0x206d, 0x206d, 0x2063, 0x2063, 0x2061, 0x2061,
+    0x206f, 0x206f, 0x2065, 0x2065, 0x205b, 0x205b, 0x2059, 0x2059,
+    0x0067, 0x005d, 0x0053, 0x0051, 0x005f, 0x0055, 0x004b, 0x0049,
+    0x00a8, 0x00b0, 0x00b8, 0x00c0, 0x2041, 0x2041, 0x204d, 0x204d,
+    0x2043, 0x2043, 0x2039, 0x2039, 0x2057, 0x2057, 0x2045, 0x2045,
+    0x203b, 0x203b, 0x2031, 0x2031, 0x204f, 0x204f, 0x203d, 0x203d,
+    0x2033, 0x2033, 0x2029, 0x2029, 0x0047, 0x0035, 0x002b, 0x0021,
+    0x203f, 0x203f, 0x202d, 0x202d, 0x2023, 0x2023, 0x2019, 0x2019,
+    0x0037, 0x0025, 0x001b, 0x0011, 0x202f, 0x202f, 0x201d, 0x201d,
+    0x0013, 0x0009, 0x201f, 0x201f
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_1[128] = {
+    0x0020, 0x00e8, 0x00f0, 0x00f8, 0x0027, 0x001f, 0x2015, 0x2015,
+    0x400b, 0x400b, 0x400b, 0x400b, 0x4001, 0x4001, 0x4001, 0x4001,
+    0x0028, 0x00d0, 0x00d8, 0x00e0, 0x0030, 0x0098, 0x00c0, 0x00c8,
+    0x0038, 0x0060, 0x0088, 0x0090, 0x0040, 0x0048, 0x0050, 0x0058,
+    0x27ff, 0x27ff, 0x207f, 0x207f, 0x0087, 0x0085, 0x0083, 0x0081,
+    0x007b, 0x0079, 0x007d, 0x0073, 0x2075, 0x2075, 0x2071, 0x2071,
+    0x0068, 0x0070, 0x0078, 0x0080, 0x2077, 0x2077, 0x206d, 0x206d,
+    0x206b, 0x206b, 0x2069, 0x2069, 0x206f, 0x206f, 0x2065, 0x2065,
+    0x2063, 0x2063, 0x2061, 0x2061, 0x0059, 0x005d, 0x005b, 0x0051,
+    0x0067, 0x0055, 0x0053, 0x0049, 0x00a0, 0x00a8, 0x00b0, 0x00b8,
+    0x205f, 0x205f, 0x204d, 0x204d, 0x204b, 0x204b, 0x2041, 0x2041,
+    0x2057, 0x2057, 0x2045, 0x2045, 0x2043, 0x2043, 0x2039, 0x2039,
+    0x204f, 0x204f, 0x203d, 0x203d, 0x203b, 0x203b, 0x2031, 0x2031,
+    0x0029, 0x0035, 0x0033, 0x0021, 0x2047, 0x2047, 0x202d, 0x202d,
+    0x202b, 0x202b, 0x2019, 0x2019, 0x003f, 0x0025, 0x0023, 0x0011,
+    0x0037, 0x001d, 0x001b, 0x0009, 0x202f, 0x202f, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_2[112] = {
+    0x0020, 0x0088, 0x00b0, 0x00b8, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+    0x003f, 0x0037, 0x002f, 0x0027, 0x001f, 0x0015, 0x000b, 0x0001,
+    0x0028, 0x0050, 0x0078, 0x0080, 0x0030, 0x0038, 0x0040, 0x0048,
+    0x07ff, 0x0081, 0x0087, 0x0085, 0x0083, 0x0079, 0x007f, 0x007d,
+    0x007b, 0x0071, 0x0077, 0x0075, 0x0073, 0x0069, 0x206b, 0x206b,
+    0x0058, 0x0060, 0x0068, 0x0070, 0x2061, 0x2061, 0x206d, 0x206d,
+    0x2063, 0x2063, 0x2059, 0x2059, 0x206f, 0x206f, 0x2065, 0x2065,
+    0x205b, 0x205b, 0x2051, 0x2051, 0x0067, 0x005d, 0x0053, 0x0049,
+    0x005f, 0x0055, 0x004b, 0x0041, 0x0090, 0x0098, 0x00a0, 0x00a8,
+    0x2039, 0x2039, 0x2031, 0x2031, 0x204d, 0x204d, 0x2029, 0x2029,
+    0x2057, 0x2057, 0x2045, 0x2045, 0x2043, 0x2043, 0x2021, 0x2021,
+    0x0019, 0x003d, 0x003b, 0x0011, 0x004f, 0x0035, 0x0033, 0x0009,
+    0x202b, 0x202b, 0x202d, 0x202d, 0x2023, 0x2023, 0x2025, 0x2025,
+    0x201b, 0x201b, 0x2047, 0x2047, 0x201d, 0x201d, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_3[80] = {
+    0x0020, 0x0028, 0x0030, 0x0038, 0x0040, 0x0048, 0x0050, 0x0058,
+    0x0060, 0x0068, 0x0070, 0x0078, 0x0080, 0x0088, 0x0090, 0x0098,
+    0x0009, 0x000b, 0x07ff, 0x0001, 0x0011, 0x0013, 0x0015, 0x07ff,
+    0x0019, 0x001b, 0x001d, 0x001f, 0x0021, 0x0023, 0x0025, 0x0027,
+    0x0029, 0x002b, 0x002d, 0x002f, 0x0031, 0x0033, 0x0035, 0x0037,
+    0x0039, 0x003b, 0x003d, 0x003f, 0x0041, 0x0043, 0x0045, 0x0047,
+    0x0049, 0x004b, 0x004d, 0x004f, 0x0051, 0x0053, 0x0055, 0x0057,
+    0x0059, 0x005b, 0x005d, 0x005f, 0x0061, 0x0063, 0x0065, 0x0067,
+    0x0069, 0x006b, 0x006d, 0x006f, 0x0071, 0x0073, 0x0075, 0x0077,
+    0x0079, 0x007b, 0x007d, 0x007f, 0x0081, 0x0083, 0x0085, 0x0087
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_4[32] = {
+    0x0020, 0x0038, 0x2015, 0x2015, 0x4001, 0x4001, 0x4001, 0x4001,
+    0x600b, 0x600b, 0x600b, 0x600b, 0x600b, 0x600b, 0x600b, 0x600b,
+    0x0028, 0x0030, 0x0021, 0x0019, 0x2027, 0x2027, 0x0025, 0x0023,
+    0x201d, 0x201d, 0x201b, 0x201b, 0x0011, 0x001f, 0x0013, 0x0009
+};
+
+const OMX_U16 * armVCM4P10_CAVLCCoeffTokenTables[18] = {
+    armVCM4P10_CAVLCCoeffTokenTables_0, /* nC=0 */
+    armVCM4P10_CAVLCCoeffTokenTables_0, /* nC=1 */
+    armVCM4P10_CAVLCCoeffTokenTables_1, /* nC=2 */
+    armVCM4P10_CAVLCCoeffTokenTables_1, /* nC=3 */
+    armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=4 */
+    armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=5 */
+    armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=6 */
+    armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=7 */
+    armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=8 */
+    armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=9 */
+    armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=10 */
+    armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=11 */
+    armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=12 */
+    armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=13 */
+    armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=14 */
+    armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=15 */
+    armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=16 */
+    armVCM4P10_CAVLCCoeffTokenTables_4  /* nC=-1 */
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_0[40] = {
+    0x0020, 0x0048, 0x0009, 0x0007, 0x2005, 0x2005, 0x2003, 0x2003,
+    0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001,
+    0x0028, 0x0040, 0x0011, 0x000f, 0x0030, 0x0038, 0x0019, 0x0017,
+    0x27ff, 0x27ff, 0x201f, 0x201f, 0x201d, 0x201d, 0x201b, 0x201b,
+    0x2015, 0x2015, 0x2013, 0x2013, 0x200d, 0x200d, 0x200b, 0x200b
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_1[24] = {
+    0x0020, 0x0028, 0x0011, 0x000f, 0x000d, 0x000b, 0x2009, 0x2009,
+    0x2007, 0x2007, 0x2005, 0x2005, 0x2003, 0x2003, 0x2001, 0x2001,
+    0x001d, 0x001b, 0x0019, 0x0017, 0x2015, 0x2015, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_2[24] = {
+    0x0020, 0x0028, 0x0011, 0x000b, 0x0009, 0x0001, 0x200f, 0x200f,
+    0x200d, 0x200d, 0x2007, 0x2007, 0x2005, 0x2005, 0x2003, 0x2003,
+    0x001b, 0x0017, 0x2019, 0x2019, 0x2015, 0x2015, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_3[24] = {
+    0x0020, 0x0028, 0x0013, 0x000f, 0x0007, 0x0005, 0x2011, 0x2011,
+    0x200d, 0x200d, 0x200b, 0x200b, 0x2009, 0x2009, 0x2003, 0x2003,
+    0x2019, 0x2019, 0x2017, 0x2017, 0x2015, 0x2015, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_4[20] = {
+    0x0020, 0x0015, 0x0011, 0x0005, 0x0003, 0x0001, 0x200f, 0x200f,
+    0x200d, 0x200d, 0x200b, 0x200b, 0x2009, 0x2009, 0x2007, 0x2007,
+    0x2017, 0x2017, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_5[20] = {
+    0x0020, 0x0011, 0x2013, 0x2013, 0x200f, 0x200f, 0x200d, 0x200d,
+    0x200b, 0x200b, 0x2009, 0x2009, 0x2007, 0x2007, 0x2005, 0x2005,
+    0x0015, 0x0001, 0x2003, 0x2003
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_6[20] = {
+    0x0020, 0x000f, 0x2011, 0x2011, 0x200d, 0x200d, 0x2009, 0x2009,
+    0x2007, 0x2007, 0x2005, 0x2005, 0x400b, 0x400b, 0x400b, 0x400b,
+    0x0013, 0x0001, 0x2003, 0x2003
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_7[20] = {
+    0x0020, 0x0003, 0x200f, 0x200f, 0x200d, 0x200d, 0x2007, 0x2007,
+    0x400b, 0x400b, 0x400b, 0x400b, 0x4009, 0x4009, 0x4009, 0x4009,
+    0x0011, 0x0001, 0x2005, 0x2005
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_8[20] = {
+    0x0020, 0x0005, 0x200b, 0x200b, 0x400d, 0x400d, 0x400d, 0x400d,
+    0x4009, 0x4009, 0x4009, 0x4009, 0x4007, 0x4007, 0x4007, 0x4007,
+    0x0003, 0x0001, 0x200f, 0x200f
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_9[20] = {
+    0x0020, 0x000d, 0x2005, 0x2005, 0x400b, 0x400b, 0x400b, 0x400b,
+    0x4009, 0x4009, 0x4009, 0x4009, 0x4007, 0x4007, 0x4007, 0x4007,
+    0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_10[16] = {
+    0x0001, 0x0003, 0x2005, 0x2005, 0x2007, 0x2007, 0x200b, 0x200b,
+    0x6009, 0x6009, 0x6009, 0x6009, 0x6009, 0x6009, 0x6009, 0x6009
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_11[16] = {
+    0x0001, 0x0003, 0x2009, 0x2009, 0x4005, 0x4005, 0x4005, 0x4005,
+    0x6007, 0x6007, 0x6007, 0x6007, 0x6007, 0x6007, 0x6007, 0x6007
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_12[16] = {
+    0x2001, 0x2001, 0x2003, 0x2003, 0x4007, 0x4007, 0x4007, 0x4007,
+    0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_13[16] = {
+    0x4001, 0x4001, 0x4001, 0x4001, 0x4003, 0x4003, 0x4003, 0x4003,
+    0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_14[16] = {
+    0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001,
+    0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003
+};
+
+const OMX_U16 * armVCM4P10_CAVLCTotalZeroTables[15] = {
+    armVCM4P10_CAVLCTotalZeroTables_0,
+    armVCM4P10_CAVLCTotalZeroTables_1,
+    armVCM4P10_CAVLCTotalZeroTables_2,
+    armVCM4P10_CAVLCTotalZeroTables_3,
+    armVCM4P10_CAVLCTotalZeroTables_4,
+    armVCM4P10_CAVLCTotalZeroTables_5,
+    armVCM4P10_CAVLCTotalZeroTables_6,
+    armVCM4P10_CAVLCTotalZeroTables_7,
+    armVCM4P10_CAVLCTotalZeroTables_8,
+    armVCM4P10_CAVLCTotalZeroTables_9,
+    armVCM4P10_CAVLCTotalZeroTables_10,
+    armVCM4P10_CAVLCTotalZeroTables_11,
+    armVCM4P10_CAVLCTotalZeroTables_12,
+    armVCM4P10_CAVLCTotalZeroTables_13,
+    armVCM4P10_CAVLCTotalZeroTables_14
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeros2x2Tables_0[16] = {
+    0x2007, 0x2007, 0x2005, 0x2005, 0x4003, 0x4003, 0x4003, 0x4003,
+    0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeros2x2Tables_1[16] = {
+    0x4005, 0x4005, 0x4005, 0x4005, 0x4003, 0x4003, 0x4003, 0x4003,
+    0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeros2x2Tables_2[16] = {
+    0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003,
+    0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001
+};
+
+const OMX_U16 * armVCM4P10_CAVLCTotalZeros2x2Tables[3] = {
+    armVCM4P10_CAVLCTotalZeros2x2Tables_0,
+    armVCM4P10_CAVLCTotalZeros2x2Tables_1,
+    armVCM4P10_CAVLCTotalZeros2x2Tables_2
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_0[8] = {
+    0x4003, 0x4003, 0x4003, 0x4003, 0x4001, 0x4001, 0x4001, 0x4001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_1[8] = {
+    0x2005, 0x2005, 0x2003, 0x2003, 0x4001, 0x4001, 0x4001, 0x4001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_2[8] = {
+    0x2007, 0x2007, 0x2005, 0x2005, 0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_3[8] = {
+    0x0009, 0x0007, 0x2005, 0x2005, 0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_4[8] = {
+    0x000b, 0x0009, 0x0007, 0x0005, 0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_5[8] = {
+    0x0003, 0x0005, 0x0009, 0x0007, 0x000d, 0x000b, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_6[24] = {
+    0x0010, 0x000d, 0x000b, 0x0009, 0x0007, 0x0005, 0x0003, 0x0001,
+    0x0018, 0x0011, 0x200f, 0x200f, 0x0020, 0x0015, 0x2013, 0x2013,
+    0x0028, 0x0019, 0x2017, 0x2017, 0x07ff, 0x001d, 0x201b, 0x201b
+};
+
+/* Tables 7 to 14 are duplicates of table 6 */
+
+const OMX_U16 * armVCM4P10_CAVLCRunBeforeTables[15] = {
+    armVCM4P10_CAVLCRunBeforeTables_0,  /* ZerosLeft=1 */
+    armVCM4P10_CAVLCRunBeforeTables_1,
+    armVCM4P10_CAVLCRunBeforeTables_2,
+    armVCM4P10_CAVLCRunBeforeTables_3,
+    armVCM4P10_CAVLCRunBeforeTables_4,
+    armVCM4P10_CAVLCRunBeforeTables_5,  /* ZerosLeft=6 */
+    armVCM4P10_CAVLCRunBeforeTables_6,  /* ZerosLeft=7 */
+    armVCM4P10_CAVLCRunBeforeTables_6,  /* ZerosLeft=8 */
+    armVCM4P10_CAVLCRunBeforeTables_6,  /* ZerosLeft=9 */
+    armVCM4P10_CAVLCRunBeforeTables_6,  /* ZerosLeft=10 */
+    armVCM4P10_CAVLCRunBeforeTables_6,  /* ZerosLeft=11 */
+    armVCM4P10_CAVLCRunBeforeTables_6,  /* ZerosLeft=12 */
+    armVCM4P10_CAVLCRunBeforeTables_6,  /* ZerosLeft=13 */
+    armVCM4P10_CAVLCRunBeforeTables_6,  /* ZerosLeft=14 */
+    armVCM4P10_CAVLCRunBeforeTables_6   /* ZerosLeft=15 */
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
new file mode 100644
index 0000000..dcbcd00
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
@@ -0,0 +1,20 @@
+;//
+;// 
+;// File Name:  armVCM4P10_DeblockingChroma_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS ARM1136JS
+
+
+
+        END
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
new file mode 100644
index 0000000..14b37fe
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
@@ -0,0 +1,366 @@
+;//
+;// 
+;// File Name:  armVCM4P10_DeblockingLuma_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS ARM1136JS
+
+
+
+    IF  ARM1136JS
+
+MASK_1  EQU 0x01010101
+
+;// Declare input registers
+
+pQ0        RN 0
+StepArg    RN 1
+tC0Arg     RN 2
+alpha      RN 6
+
+beta       RN 14
+bS         RN 14
+tC0        RN 14
+ptC0       RN 1
+
+;// Declare Local/Temporary variables
+
+;// Pixels
+p_0     RN 3 
+p_1     RN 5  
+p_2     RN 4  
+p_3     RN 2  
+q_0     RN 8  
+q_1     RN 9  
+q_2     RN 10 
+q_3     RN 12 
+
+
+;// Filtering
+
+ap0q0   RN 1  
+filt    RN 2
+        
+m00     RN 7
+m01     RN 11
+
+apflg   RN 0 
+aqflg   RN 6
+
+tC      RN 1
+
+
+;//Declarations for bSLT4 kernel
+
+pos     RN 7
+neg     RN 12
+
+P0a     RN 1   
+P1a     RN 8   
+Q0a     RN 7  
+Q1a     RN 4   
+
+u1      RN 3   
+max     RN 12
+min     RN 2   
+               
+                
+                
+;//Declarations for bSGE4 kernel
+
+q_3b    RN 9   
+p_3b    RN 0
+apqflg  RN 12
+
+P0b     RN 6
+P1b     RN 7 
+P2b     RN 1
+
+Q0b     RN 9 
+Q1b     RN 0 
+Q2b     RN 2
+
+;// Miscellanous
+
+a       RN 0
+t0      RN 3 
+t1      RN 12
+t2      RN 7
+t3      RN 11
+t4      RN 4   
+t5      RN 1   
+t8      RN 6   
+t9      RN 14  
+t10     RN 5   
+t11     RN 9   
+
+;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe()
+;//
+;// Inputs - 3,4,5,8,9,10 - Input Pixels (p0-p2,q0-q2)
+;//        - 2 - filt, 0 - apflg, 6 - aqflg
+;//        - 11 - m01, 7 - tC0
+;//         
+;// Outputs - 1,8,7,11 - Output Pixels(P0a,P1a,Q0a,Q1a)
+;//
+;// Registers Corrupted - 0-3,5-12,14
+
+
+        M_START armVCM4P10_DeblockingLumabSLT4_unsafe, lr
+
+        ;// Since beta <= 18 and alpha <= 255 we know
+        ;// -254 <= p0-q0 <= 254
+        ;//  -17 <= q1-q0 <= 17
+        ;//  -17 <= p1-p0 <= 17
+
+        ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3))
+        ;// 
+        ;//    Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3
+        ;//                = (4*q0 - 4*p0 + p1 - q1 + 4)>>3
+        ;//                = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3
+        
+        USUB8   t1, p_1, p_0
+        MUL     tC0, t2, m01
+        
+        USUB8   t2, q_1, q_0
+        SSUB8   t1, t1, t2
+
+        USUB8   t2, p_0, q_0
+        AND     t2, t2, m01
+        SHSUB8  t1, t1, t2
+        UHSUB8  t5, p_0, q_0
+        SSUB8   t1, t1, t2
+        SHSUB8  t1, t1, t5
+        MOV     m00, #0
+        SADD8   t1, t1, m01
+        SHSUB8  t1, t1, t5
+        
+        ;// tC = tC0
+        ;// if (ap < beta) tC++;
+        ;// if (aq < beta) tC++;
+        USUB8   t5, filt, m01   
+        SEL     tC0, tC0, m00
+        UQADD8  tC, tC0, apflg
+        SSUB8   t1, t1, m00
+        UQADD8  tC, tC, aqflg
+
+        ;// Split into positive and negative part and clip 
+        SEL     pos, t1, m00
+        USUB8   neg, pos, t1
+        USUB8   t3, pos, tC
+        SEL     pos, tC, pos
+        USUB8   t3, neg, tC
+        SEL     neg, tC, neg
+        
+        ;//Reload m01
+        LDR     m01,=MASK_1
+
+        UQADD8  P0a, p_0, pos
+        UQSUB8  Q0a, q_0, pos
+        UQSUB8  P0a, P0a, neg
+        UQADD8  Q0a, Q0a, neg
+        
+        ;// Choose to store the filtered
+        ;// value or the original pixel
+        USUB8   t1, filt, m01    
+        SEL     P0a, P0a, p_0
+        SEL     Q0a, Q0a, q_0
+    
+        ;// delta = (p2 + ((p0+q0+1)>>1) - (p1<<1))>>1;
+        ;// u1 = (p0 + q0 + 1)>>1
+        ;// u1 = ( (q_0 - p_0')>>1 ) ^ 0x80
+        MVN     p_0, p_0
+        UHSUB8  u1, q_0, p_0 
+        UQADD8  max, p_1, tC0
+        EOR     u1, u1, m01 ,LSL #7
+    
+        ;// Calculate A = (p2+u1)>>1 
+        ;// Then delta = Clip3( -tC0, tC0, A - p1)
+
+        ;// Clip P1
+        UHADD8  P1a, p_2, u1
+        UQSUB8  min, p_1, tC0
+        USUB8   t4, P1a, max
+        SEL     P1a, max, P1a
+        USUB8   t4, P1a, min
+        SEL     P1a, P1a, min
+
+        ;// Clip Q1
+        UHADD8  Q1a, q_2, u1
+        UQADD8  max, q_1, tC0
+        UQSUB8  min, q_1, tC0
+        USUB8   t0, Q1a, max
+        SEL     Q1a, max, Q1a
+        USUB8   t0, Q1a, min
+        SEL     Q1a, Q1a, min
+        
+        ;// Choose to store the filtered
+        ;// value or the original pixel
+        USUB8   t0, apflg, m01
+        SEL     P1a, P1a, p_1
+        USUB8   t0, aqflg, m01
+        SEL     t3, Q1a, q_1
+        
+        M_END
+
+;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe()
+;//
+;// Inputs - 3,4,5,8,9,10 - Input Pixels (p0-p2,q0-q2)
+;//        - 2 - filt, 0 - apflg,aqflg
+;//        - 1 - ap0q0, 6 - alpha
+;//        - 7 - m00, 11 - m01
+;//         
+;// Outputs - 6,7,1,9,0,2 - Output Pixels(P0b,P1b,P2b, Q0b,Q1b,Q2b)
+;// 
+;// Registers Corrupted - 0-3,5-12,14
+
+        M_START armVCM4P10_DeblockingLumabSGE4_unsafe, lr
+    
+        ;// apflg = apflg && |p0-q0|<((alpha>>2)+2) 
+        ;// apflg = aqflg && |p0-q0|<((alpha>>2)+2) 
+
+        M_ARG   pDummy,4
+        M_ARG   pQ_3,4
+        M_ARG   pP_3,4
+        
+        UHADD8  alpha, alpha, m00
+        USUB8   t9, p_2, p_0    ;//t9 = dp2p0
+        UHADD8  alpha, alpha, m00
+        ADD     alpha, alpha, m01, LSL #1        
+        USUB8   ap0q0, ap0q0, alpha
+        SEL     apqflg, m00, apflg
+
+        ;// P0 = (p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4)>>3 
+        ;//    = ((p2-p0) + 2*(p1-p0) + (q1-q0) + 3*(q0-p0) + 8*p0 + 4)>>3
+        ;//    = p0 + (((p2-p0) + 2*(p1-p0) + (q1-q0) - 3*(p0-q0) + 4)>>3)
+
+        ;// P1 = (p2 + p1 + q0 + p0 + 2)>>2
+        ;//    = p0 + (((p2-p0) + (p1-p0) - (p0-q0) + 2)>>2)
+        
+        ;// P2 = (2*p3 + 3*p2 + p1 + p0 + q0 + 4)>>3
+        ;//    = (2*(p3-p0) + 3*(p2-p0) + (p1-p0) - (p0-q0) + 8*p0 + 4)>>3
+        ;//    = p0 + (((p3-p0) + (p2-p0) + t2 + 2)>>2)
+
+        ;// Compute P0b
+        USUB8   t2, p_0, q_0         
+        SSUB8   t5, t9, t2           
+
+        USUB8   t8, q_1, q_0         
+        SHADD8  t8, t5, t8
+
+        USUB8   t9, p_1, p_0         
+        SADD8   t8, t8, t9
+        SHSUB8  t8, t8, t2
+        SHADD8  t5, t5, t9
+        SHADD8  t8, t8, m01
+        SHADD8  t9, t5, m01
+        SADD8   P0b, p_0, t8         
+        ;// P0b ready
+        
+        ;// Compute P1b
+        M_LDR   p_3b, pP_3
+        SADD8   P1b, p_0, t9         
+        ;// P1b ready
+        
+        ;// Compute P2b
+        USUB8   t9, p_2, p_0         
+        SADD8   t5, t5, t9
+        UHSUB8  t9, p_3b, p_0        
+        EOR     a, p_3b, p_0         
+        AND     a, a, m01
+        SHADD8  t5, t5, a
+        UHADD8  a, p_0, q_1
+        SADD8   t5, t5, m01
+        SHADD8  t5, t5, t9
+        MVN     t9, p_1
+        SADD8   P2b, p_0, t5         
+        ;// P2b ready
+        
+        UHSUB8  a, a, t9
+        ORR     t9, apqflg, m01
+        USUB8   t9, apqflg, t9
+
+        EOR     a, a, m01, LSL #7
+        SEL     P0b, P0b, a
+        SEL     P1b, P1b, p_1
+        SEL     P2b, P2b, p_2
+
+        USUB8   t4, filt, m01
+        SEL     P0b, P0b, p_0
+
+        
+        ;// Q0 = (q2 + 2*q1 + 2*q0 + 2*p0 + p1 + 4)>>3 
+        ;//    = ((q2-q0) + 2*(q1-q0) + (p1-p0) + 3*(p0-q0) + 8*q0 + 4)>>3
+        ;//    = q0 + (((q2-q0) + 2*(q1-q0) + (p1-p0) + 3*(p0-q0) + 4)>>3)
+
+        ;// Q1 = (q2 + q1 + p0 + q0 + 2)>>2
+        ;//    = q0 + (((q2-q0) + (q1-q0) + (p0-q0) + 2)>>2)
+
+        ;// Q2 = (2*q3 + 3*q2 + q1 + q0 + p0 + 4)>>3
+        ;//    = (2*(q3-q0) + 3*(q2-q0) + (q1-q0) + (p0-q0) + 8*q0 + 4)>>3
+        ;//    = q0 + (((q3-q0) + (q2-q0) + t2 + 2)>>2)
+
+
+        ;// Compute Q0b Q1b
+        USUB8   t4, q_2, q_0           
+        USUB8   a, p_0, q_0
+        USUB8   t9, p_1, p_0
+        SADD8   t0, t4, a
+        SHADD8  t9, t0, t9
+        UHADD8  t10, q_0, p_1
+        SADD8   t9, t9, a
+        USUB8   a, q_1, q_0
+        SHADD8  t9, t9, a
+        SHADD8  t0, t0, a
+        SHADD8  t9, t9, m01
+        SHADD8  a, t0, m01
+        SADD8   t9, q_0, t9            
+        ;// Q0b ready - t9
+        
+        MOV     t4, #0
+        UHADD8  apqflg, apqflg, t4
+        
+        SADD8   Q1b, q_0, a 
+        ;// Q1b ready
+       
+        USUB8   t4, apqflg, m01
+        SEL     Q1b, Q1b, q_1
+        MVN     t11, q_1
+        UHSUB8  t10, t10, t11
+        M_LDR   q_3b, pQ_3
+        EOR     t10, t10, m01, LSL #7
+        SEL     t9, t9, t10            
+        
+        ;// Compute Q2b
+        USUB8   t4, q_2, q_0
+        SADD8   t4, t0, t4
+        EOR     t0, q_3b, q_0 
+        AND     t0, t0, m01
+        SHADD8  t4, t4, t0
+        UHSUB8  t10, q_3b, q_0
+        SADD8   t4, t4, m01
+        SHADD8  t4, t4, t10
+
+        USUB8   t10, filt, m01
+        SEL     Q0b, t9, q_0
+
+        SADD8   t4, q_0, t4            
+        ;// Q2b ready - t4
+
+        USUB8   t10, apqflg, m01
+        SEL     Q2b, t4, q_2
+
+        M_END
+    
+    ENDIF
+
+        END
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
new file mode 100644
index 0000000..ac448a0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
@@ -0,0 +1,325 @@
+;//
+;// 
+;// File Name:  armVCM4P10_DecodeCoeffsToPair_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        INCLUDE armCOMM_BitDec_s.h
+        
+        IMPORT armVCM4P10_CAVLCCoeffTokenTables
+        IMPORT armVCM4P10_CAVLCTotalZeroTables
+        IMPORT armVCM4P10_CAVLCTotalZeros2x2Tables
+        IMPORT armVCM4P10_CAVLCRunBeforeTables
+        IMPORT armVCM4P10_SuffixToLevel
+        IMPORT armVCM4P10_ZigZag_4x4
+        IMPORT armVCM4P10_ZigZag_2x2
+        
+        M_VARIANTS ARM1136JS
+        
+;//DEBUG_ON    SETL {TRUE}
+        
+LAST_COEFF               EQU 0x20        ;// End of block flag
+TWO_BYTE_COEFF           EQU 0x10
+
+;// Declare input registers
+
+ppBitStream     RN 0
+pOffset         RN 1
+pNumCoeff       RN 2
+ppPosCoefbuf    RN 3
+nC              RN 4 ;// number of coeffs or 17 for chroma
+sMaxNumCoeff    RN 5
+
+;// Declare inner loop registers
+
+;// Level loop
+Count           RN 0
+TrailingOnes    RN 1
+pLevel          RN 2
+LevelSuffix     RN 3
+SuffixLength    RN 4
+TotalCoeff      RN 5
+
+pVLDTable       RN 6
+Symbol          RN 7
+T1              RN 8
+T2              RN 9
+RBitStream      RN 10
+RBitBuffer      RN 11
+RBitCount       RN 12
+lr              RN 14
+
+;// Run loop
+Count           RN 0
+ZerosLeft       RN 1
+pLevel          RN 2
+ppRunTable      RN 3
+pRun            RN 4
+TotalCoeff      RN 5
+
+pVLDTable       RN 6
+Symbol          RN 7
+T1              RN 8
+T2              RN 9
+RBitStream      RN 10
+RBitBuffer      RN 11
+RBitCount       RN 12
+lr              RN 14
+
+;// Fill in coefficients loop
+pPosCoefbuf     RN 0
+temp            RN 1
+pLevel          RN 2
+ppPosCoefbuf    RN 3
+pRun            RN 4
+TotalCoeff      RN 5
+pZigZag         RN 6
+
+T1              RN 8
+T2              RN 9
+RBitStream      RN 10
+RBitBuffer      RN 11
+RBitCount       RN 12
+CoeffNum        RN 14
+
+
+
+    IF ARM1136JS
+        
+        ;// Allocate stack memory required by the function
+        M_ALLOC4 pppBitStream, 4
+        M_ALLOC4 ppOffset, 4
+        M_ALLOC4 pppPosCoefbuf, 4
+        M_ALLOC4 ppLevel, 16*2
+        M_ALLOC4 ppRun, 16
+        
+        ;// Write function header
+        M_START armVCM4P10_DecodeCoeffsToPair, r11
+        
+        ;// Define stack arguments
+        M_ARG   pNC, 4
+        M_ARG   pSMaxNumCoeff,4
+        
+        ;// Code start        
+        M_BD_INIT0 ppBitStream, pOffset, RBitStream, RBitBuffer, RBitCount
+        LDR        pVLDTable, =armVCM4P10_CAVLCCoeffTokenTables
+        M_LDR      nC, pNC
+        
+        M_BD_INIT1 T1, T2, lr
+        LDR     pVLDTable, [pVLDTable, nC, LSL #2]  ;// Find VLD table    
+        
+        M_BD_INIT2 T1, T2, lr
+
+        ;// Decode Symbol = TotalCoeff*4 + TrailingOnes
+        M_BD_VLD  Symbol, T1, T2, pVLDTable, 4, 2
+    
+        MOVS    TotalCoeff, Symbol, LSR #2    
+        STRB    TotalCoeff, [pNumCoeff]    
+        M_PRINTF "TotalCoeff=%d\n", TotalCoeff
+        BEQ.W   EndNoError                  ;// Finished if no coefficients
+
+        CMP     Symbol, #17*4
+        BGE.W   EndBadSymbol                ;// Error if bad symbol
+        
+        ;// Save bitstream pointers
+        M_STR   ppBitStream,  pppBitStream
+        M_STR   pOffset,      ppOffset
+        M_STR   ppPosCoefbuf, pppPosCoefbuf                
+        
+        ;// Decode Trailing Ones
+        ANDS    TrailingOnes, Symbol, #3
+        M_ADR   pLevel, ppLevel            
+        M_PRINTF "TrailingOnes=%d\n", TrailingOnes
+        BEQ     TrailingOnesDone    
+        MOV     Count, TrailingOnes
+TrailingOnesLoop    
+        M_BD_READ8 Symbol, 1, T1
+        SUBS    Count, Count, #1
+        MOV     T1, #1
+        SUB     T1, T1, Symbol, LSL #1
+        M_PRINTF "Level=%d\n", T1
+        STRH    T1, [pLevel], #2
+        BGT     TrailingOnesLoop
+TrailingOnesDone    
+    
+        ;// Decode level values    
+        SUBS    Count, TotalCoeff, TrailingOnes     ;// Number of levels to read
+        BEQ     DecodeRuns                          ;// None left
+        
+        MOV     SuffixLength, #1
+        CMP     TotalCoeff, #10
+        MOVLE   SuffixLength, #0
+        CMP     TrailingOnes, #3    ;// if (TrailingOnes<3)
+        MOVLT   TrailingOnes, #4    ;// then TrailingOnes = +4
+        MOVGE   TrailingOnes, #2    ;// else TrailingOnes = +2
+        MOVGE   SuffixLength, #0    ;//      SuffixLength = 0
+        
+LevelLoop
+        M_BD_CLZ16 Symbol, T1, T2   ;// Symbol=LevelPrefix
+        CMP     Symbol,#16
+        BGE     EndBadSymbol
+        
+        MOVS    lr, SuffixLength    ;// if LevelSuffixSize==0
+        TEQEQ   Symbol, #14         ;//   and  LevelPrefix==14
+        MOVEQ   lr, #4              ;//   then LevelSuffixSize=4
+        TEQ     Symbol, #15         ;// if LevelSuffixSize==15
+        MOVEQ   lr, #12             ;//   then LevelSuffixSize=12
+        
+        TEQEQ   SuffixLength,#0
+        ADDEQ   Symbol,Symbol,#15
+        
+        TEQ     lr, #0              ;// if LevelSuffixSize==0
+        BEQ     LevelCodeRead       ;// LevelCode = LevelPrefix
+        
+        M_BD_VREAD16 LevelSuffix, lr, T1, T2  ;// Read Level Suffix
+        
+        MOV     Symbol, Symbol, LSL SuffixLength
+        ADD     Symbol, LevelSuffix, Symbol
+             
+LevelCodeRead        
+        ;// Symbol = LevelCode
+        ADD     Symbol, Symbol, TrailingOnes ;// +4 if level cannot be +/-1, +2 o/w
+        MOV     TrailingOnes, #2
+        MOVS    T1, Symbol, LSR #1
+        RSBCS   T1, T1, #0                  ;// If Symbol odd then negate
+        M_PRINTF "Level=%d\n", T1
+        STRH    T1, [pLevel], #2            ;// Store level.
+        
+        LDR     T2, =armVCM4P10_SuffixToLevel
+        LDRSB   T1, [T2, SuffixLength]      ;// Find increment level        
+        TEQ     SuffixLength, #0
+        MOVEQ   SuffixLength, #1
+        CMP     Symbol, T1
+        ADDCS   SuffixLength, SuffixLength, #1        
+        SUBS    Count, Count, #1        
+        BGT     LevelLoop
+        
+DecodeRuns        
+        ;// Find number of zeros
+        M_LDR   T1, pSMaxNumCoeff           ;// sMaxNumCoeff
+        SUB     Count, TotalCoeff, #1       ;// Number of runs excluding last
+        SUBS    ZerosLeft, T1, TotalCoeff   ;// Maximum number of zeros there could be
+        M_ADR   pRun, ppRun
+        MOV     CoeffNum,TotalCoeff
+        SUB     CoeffNum,CoeffNum,#1
+        BEQ     NoZerosLeft
+        
+        ;// Unpack number of zeros from bitstream
+        TEQ     T1, #4        
+        LDREQ   pVLDTable, =(armVCM4P10_CAVLCTotalZeros2x2Tables-4)
+        LDRNE   pVLDTable, =(armVCM4P10_CAVLCTotalZeroTables-4)
+        LDR     pVLDTable, [pVLDTable, TotalCoeff, LSL #2]
+        
+        M_BD_VLD  Symbol, T1, T2, pVLDTable, 4, 2 ;// Symbol = ZerosLeft
+        CMP     Symbol,#16
+        BGE     EndBadSymbol
+
+        LDR     ppRunTable, =(armVCM4P10_CAVLCRunBeforeTables-4)
+        M_ADR   pRun, ppRun
+        MOVS    ZerosLeft, Symbol
+
+        ADD     CoeffNum,CoeffNum,ZerosLeft        
+
+        BEQ     NoZerosLeft
+        
+        ;// Decode runs while zeros are left and more than one coefficient
+RunLoop 
+        SUBS    Count, Count, #1
+        LDR     pVLDTable, [ppRunTable, ZerosLeft, LSL#2]
+        BLT     LastRun
+        M_BD_VLD  Symbol, T1, T2, pVLDTable, 3, 2 ;// Symbol = Run
+        CMP     Symbol,#15         
+        BGE     EndBadSymbol        
+
+        SUBS    ZerosLeft, ZerosLeft, Symbol
+        M_PRINTF "Run=%d\n", Symbol
+        STRB    Symbol, [pRun], #1
+        BGT     RunLoop
+        
+        ;// Decode runs while no zeros are left
+NoZerosLeft 
+        SUBS    Count, Count, #1
+        M_PRINTF "Run=%d\n", ZerosLeft
+        STRGEB  ZerosLeft, [pRun], #1
+        BGT     NoZerosLeft
+
+LastRun        
+        ;// Final run length is remaining zeros
+        M_PRINTF "LastRun=%d\n", ZerosLeft
+        STRB    ZerosLeft, [pRun], #1        
+        
+        ;// Write coefficients to output array
+        M_LDR   T1, pSMaxNumCoeff                    ;// sMaxNumCoeff
+        TEQ     T1, #15
+        ADDEQ   CoeffNum,CoeffNum,#1
+        
+
+        SUB     pRun,pRun,TotalCoeff
+        SUB     pLevel,pLevel,TotalCoeff  
+        SUB     pLevel,pLevel,TotalCoeff   
+
+        M_LDR   ppPosCoefbuf, pppPosCoefbuf
+        LDR     pPosCoefbuf, [ppPosCoefbuf]
+        TEQ     T1, #4
+        LDREQ   pZigZag, =armVCM4P10_ZigZag_2x2
+        LDRNE   pZigZag, =armVCM4P10_ZigZag_4x4
+
+        
+        
+OutputLoop
+        
+        LDRB    T2, [pRun],#1
+        LDRB    T1, [pZigZag, CoeffNum]
+        SUB     CoeffNum, CoeffNum, #1      ;// Skip Non zero
+        SUB     CoeffNum, CoeffNum, T2      ;// Skip Zero run
+        
+        LDRSH   T2, [pLevel],#2
+        
+        SUBS    TotalCoeff, TotalCoeff, #1       
+        ORREQ   T1, T1, #LAST_COEFF
+        
+        ADD     temp, T2, #128
+        CMP     temp, #256
+        ORRCS   T1, T1, #TWO_BYTE_COEFF
+
+        
+        TEQ     TotalCoeff, #0              ;// Preserves carry        
+        
+        M_PRINTF "Output=%02x %04x\n", T1, T2
+        STRB    T1, [pPosCoefbuf], #1
+        STRB    T2, [pPosCoefbuf], #1
+        MOV     T2, T2, LSR #8
+        STRCSB  T2, [pPosCoefbuf], #1                
+        BNE     OutputLoop
+        
+        ;// Finished
+        STR     pPosCoefbuf, [ppPosCoefbuf]
+        M_LDR   ppBitStream, pppBitStream
+        M_LDR   pOffset, ppOffset
+        B       EndNoError
+            
+EndBadSymbol
+        MOV     r0, #OMX_Sts_Err
+        B       End    
+        
+EndNoError
+        ;// Finished reading from the bitstream                
+        M_BD_FINI ppBitStream, pOffset
+        
+        ;// Set return value
+        MOV     r0, #OMX_Sts_NoErr    
+End
+        M_END
+    
+    ENDIF
+    
+    END
+    
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DequantTables_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DequantTables_s.s
new file mode 100644
index 0000000..b16f188
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DequantTables_s.s
@@ -0,0 +1,123 @@
+;//
+;// 
+;// File Name:  armVCM4P10_DequantTables_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        
+
+         INCLUDE omxtypes_s.h
+         INCLUDE armCOMM_s.h
+     
+         EXPORT armVCM4P10_QPDivTable
+         EXPORT armVCM4P10_VMatrixQPModTable
+         EXPORT armVCM4P10_PosToVCol4x4
+         EXPORT armVCM4P10_PosToVCol2x2
+         EXPORT armVCM4P10_VMatrix 
+         EXPORT armVCM4P10_QPModuloTable
+         EXPORT armVCM4P10_VMatrixU16
+         
+;// Define the processor variants supported by this file
+         
+         M_VARIANTS ARM1136JS
+           
+         
+;// Guarding implementation by the processor name
+
+    
+    IF ARM1136JS :LOR: CortexA8
+           
+ 
+         M_TABLE armVCM4P10_PosToVCol4x4
+         DCB  0, 2, 0, 2
+         DCB  2, 1, 2, 1
+         DCB  0, 2, 0, 2
+         DCB  2, 1, 2, 1
+
+
+         M_TABLE armVCM4P10_PosToVCol2x2
+         DCB  0, 2
+         DCB  2, 1
+
+
+         M_TABLE armVCM4P10_VMatrix
+         DCB  10, 16, 13
+         DCB  11, 18, 14
+         DCB  13, 20, 16
+         DCB  14, 23, 18
+         DCB  16, 25, 20
+         DCB  18, 29, 23
+
+;//-------------------------------------------------------
+;// This table evaluates the expression [(INT)(QP/6)],
+;// for values of QP from 0 to 51 (inclusive). 
+;//-------------------------------------------------------
+
+         M_TABLE armVCM4P10_QPDivTable
+         DCB  0,  0,  0,  0,  0,  0
+         DCB  1,  1,  1,  1,  1,  1
+         DCB  2,  2,  2,  2,  2,  2
+         DCB  3,  3,  3,  3,  3,  3
+         DCB  4,  4,  4,  4,  4,  4
+         DCB  5,  5,  5,  5,  5,  5
+         DCB  6,  6,  6,  6,  6,  6
+         DCB  7,  7,  7,  7,  7,  7
+         DCB  8,  8,  8,  8,  8,  8
+    
+;//----------------------------------------------------
+;// This table contains armVCM4P10_VMatrix[QP%6][0] entires,
+;// for values of QP from 0 to 51 (inclusive). 
+;//----------------------------------------------------
+
+         M_TABLE armVCM4P10_VMatrixQPModTable
+         DCB 10, 11, 13, 14, 16, 18
+         DCB 10, 11, 13, 14, 16, 18
+         DCB 10, 11, 13, 14, 16, 18
+         DCB 10, 11, 13, 14, 16, 18
+         DCB 10, 11, 13, 14, 16, 18
+         DCB 10, 11, 13, 14, 16, 18
+         DCB 10, 11, 13, 14, 16, 18
+         DCB 10, 11, 13, 14, 16, 18
+         DCB 10, 11, 13, 14, 16, 18
+    
+;//-------------------------------------------------------
+;// This table evaluates the modulus expression [QP%6]*6,
+;// for values of QP from 0 to 51 (inclusive). 
+;//-------------------------------------------------------
+
+         M_TABLE armVCM4P10_QPModuloTable
+         DCB 0, 6, 12, 18, 24, 30
+         DCB 0, 6, 12, 18, 24, 30
+         DCB 0, 6, 12, 18, 24, 30
+         DCB 0, 6, 12, 18, 24, 30
+         DCB 0, 6, 12, 18, 24, 30
+         DCB 0, 6, 12, 18, 24, 30
+         DCB 0, 6, 12, 18, 24, 30
+         DCB 0, 6, 12, 18, 24, 30
+         DCB 0, 6, 12, 18, 24, 30
+        
+;//-------------------------------------------------------
+;// This table contains the invidual byte values stored as
+;// halfwords. This avoids unpacking inside the function
+;//-------------------------------------------------------
+        
+         M_TABLE armVCM4P10_VMatrixU16
+         DCW 10, 16, 13 
+         DCW 11, 18, 14
+         DCW 13, 20, 16
+         DCW 14, 23, 18
+         DCW 16, 25, 20
+         DCW 18, 29, 23 
+         
+    ENDIF                                                           ;//ARM1136JS            
+
+
+                           
+    
+         END
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
new file mode 100644
index 0000000..82b9542
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
@@ -0,0 +1,236 @@
+;//
+;// 
+;// File Name:  armVCM4P10_InterpolateLuma_Align_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS ARM1136JS
+
+        EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+        EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+
+DEBUG_ON    SETL {FALSE}
+
+    IF ARM1136JS 
+
+;// Declare input registers
+pSrc            RN 0
+srcStep         RN 1
+pDst            RN 8
+iHeight         RN 9
+
+;// Declare inner loop registers
+x               RN 7
+x0              RN 7
+x1              RN 10
+x2              RN 11
+Scratch         RN 12
+
+;// Function: 
+;//     armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+;//
+;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned
+;// destination pointed by (pDst) for horizontal interpolation.
+;// This function needs to copy 9 bytes in horizontal direction. 
+;//
+;// Registers used as input for this function
+;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy
+;//
+;// Registers preserved for top level function
+;// r2,r3,r4,r5,r6
+;//
+;// Registers modified by the function
+;// r7,r8,r9,r10,r11,r12
+;//
+;// Output registers
+;// r0 - pointer to the new aligned location which will be used as pSrc
+;// r1 - step size to this aligned location
+
+        ;// Function header
+        M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe     
+        
+        ;// Copy pDst to scratch
+        MOV     Scratch, pDst
+
+StartAlignedStackCopy
+        AND     x, pSrc, #3
+        BIC     pSrc, pSrc, #3
+        
+        M_SWITCH x
+        M_CASE   Copy0toAligned
+        M_CASE   Copy1toAligned
+        M_CASE   Copy2toAligned
+        M_CASE   Copy3toAligned
+        M_ENDSWITCH
+
+Copy0toAligned  
+        LDM     pSrc, {x0, x1, x2}
+        SUBS    iHeight, iHeight, #1
+        ADD     pSrc, pSrc, srcStep
+        
+        ;// One cycle stall
+
+        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
+        BGT     Copy0toAligned
+        B       CopyEnd  
+      
+Copy1toAligned        
+        LDM     pSrc, {x0, x1, x2}
+        SUBS    iHeight, iHeight, #1
+        ADD     pSrc, pSrc, srcStep
+        
+        ;// One cycle stall
+
+        MOV     x0, x0, LSR #8
+        ORR     x0, x0, x1, LSL #24
+        MOV     x1, x1, LSR #8
+        ORR     x1, x1, x2, LSL #24
+        MOV     x2, x2, LSR #8
+        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
+        BGT     Copy1toAligned
+        B       CopyEnd  
+
+Copy2toAligned        
+        LDM     pSrc, {x0, x1, x2}
+        SUBS    iHeight, iHeight, #1
+        ADD     pSrc, pSrc, srcStep
+        
+        ;// One cycle stall
+
+        MOV     x0, x0, LSR #16
+        ORR     x0, x0, x1, LSL #16
+        MOV     x1, x1, LSR #16
+        ORR     x1, x1, x2, LSL #16
+        MOV     x2, x2, LSR #16
+        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
+        BGT     Copy2toAligned
+        B       CopyEnd  
+
+Copy3toAligned        
+        LDM     pSrc, {x0, x1, x2}
+        SUBS    iHeight, iHeight, #1
+        ADD     pSrc, pSrc, srcStep
+        
+        ;// One cycle stall
+
+        MOV     x0, x0, LSR #24
+        ORR     x0, x0, x1, LSL #8
+        MOV     x1, x1, LSR #24
+        ORR     x1, x1, x2, LSL #8
+        MOV     x2, x2, LSR #24
+        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
+        BGT     Copy3toAligned
+
+CopyEnd  
+        
+        MOV     pSrc, Scratch
+        MOV     srcStep, #12
+
+        M_END
+    
+
+;// Function:
+;//     armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+;//
+;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned
+;// destination pointed by (pDst) for vertical interpolation.
+;// This function needs to copy 4 bytes in horizontal direction 
+;//
+;// Registers used as input for this function
+;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy
+;//
+;// Registers preserved for top level function
+;// r2,r3,r4,r5,r6
+;//
+;// Registers modified by the function
+;// r7,r8,r9,r10,r11,r12
+;//
+;// Output registers
+;// r0 - pointer to the new aligned location which will be used as pSrc
+;// r1 - step size to this aligned location
+
+        ;// Function header
+        M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe     
+        
+        ;// Copy pSrc to stack
+StartVAlignedStackCopy
+        AND     x, pSrc, #3
+        BIC     pSrc, pSrc, #3                        
+        
+        
+        M_SWITCH x
+        M_CASE   Copy0toVAligned
+        M_CASE   Copy1toVAligned
+        M_CASE   Copy2toVAligned
+        M_CASE   Copy3toVAligned
+        M_ENDSWITCH
+        
+Copy0toVAligned  
+        M_LDR   x0, [pSrc], srcStep
+        SUBS    iHeight, iHeight, #1
+        
+        ;// One cycle stall
+
+        STR     x0, [pDst], #4                              ;// Store aligned output row
+        BGT     Copy0toVAligned
+        B       CopyVEnd  
+      
+Copy1toVAligned        
+        LDR     x1, [pSrc, #4]
+        M_LDR   x0, [pSrc], srcStep
+        SUBS    iHeight, iHeight, #1        
+        
+        ;// One cycle stall
+
+        MOV     x1, x1, LSL #24
+        ORR     x0, x1, x0, LSR #8
+        STR     x0, [pDst], #4                              ;// Store aligned output row
+        BGT     Copy1toVAligned
+        B       CopyVEnd  
+
+Copy2toVAligned        
+        LDR     x1, [pSrc, #4]
+        M_LDR   x0, [pSrc], srcStep
+        SUBS    iHeight, iHeight, #1        
+        
+        ;// One cycle stall
+
+        MOV     x1, x1, LSL #16
+        ORR     x0, x1, x0, LSR #16
+        STR     x0, [pDst], #4                              ;// Store aligned output row
+        BGT     Copy2toVAligned
+        B       CopyVEnd  
+
+Copy3toVAligned        
+        LDR     x1, [pSrc, #4]
+        M_LDR   x0, [pSrc], srcStep
+        SUBS    iHeight, iHeight, #1        
+        
+        ;// One cycle stall
+
+        MOV     x1, x1, LSL #8
+        ORR     x0, x1, x0, LSR #24
+        STR     x0, [pDst], #4                              ;// Store aligned output row
+        BGT     Copy3toVAligned
+
+CopyVEnd  
+
+        SUB     pSrc, pDst, #28
+        MOV     srcStep, #4
+
+        M_END
+
+
+    ENDIF
+
+    END
+    
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
new file mode 100644
index 0000000..bc0b6ec
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
@@ -0,0 +1,149 @@
+;//
+;// 
+;// File Name:  armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+;// Function:
+;//     armVCM4P10_InterpolateLuma_Copy4x4_unsafe 
+;//
+;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned
+;// destination pointed by (pDst)
+;//
+;// Registers preserved for top level function
+;// r1,r3,r4,r5,r6,r7,r10,r11,r14
+;//
+;// Registers modified by the function
+;// r0,r2,r8,r9,r12
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS ARM1136JS
+
+        EXPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe
+        
+;// Declare input registers
+pSrc            RN 0
+srcStep         RN 1
+pDst            RN 2
+dstStep         RN 3
+
+;// Declare other intermediate registers
+x0              RN 4
+x1              RN 5
+x2              RN 8
+x3              RN 9
+Temp            RN 12
+
+    IF ARM1136JS
+
+        M_START armVCM4P10_InterpolateLuma_Copy4x4_unsafe, r6
+
+Copy4x4Start
+        ;// Do Copy and branch to EndOfInterpolation
+        AND     Temp, pSrc, #3
+        BIC     pSrc, pSrc, #3                        
+
+        M_SWITCH Temp
+        M_CASE  Copy4x4Align0
+        M_CASE  Copy4x4Align1
+        M_CASE  Copy4x4Align2
+        M_CASE  Copy4x4Align3
+        M_ENDSWITCH
+
+Copy4x4Align0
+        M_LDR   x0, [pSrc], srcStep
+        M_LDR   x1, [pSrc], srcStep
+        M_STR   x0, [pDst], dstStep
+        M_LDR   x2, [pSrc], srcStep
+        M_STR   x1, [pDst], dstStep
+        M_LDR   x3, [pSrc], srcStep
+        M_STR   x2, [pDst], dstStep
+        M_STR   x3, [pDst], dstStep
+        B       Copy4x4End  
+
+Copy4x4Align1
+        LDR     x1, [pSrc, #4]
+        M_LDR   x0, [pSrc], srcStep
+        LDR     x3, [pSrc, #4]
+        M_LDR   x2, [pSrc], srcStep
+        MOV     x0, x0, LSR #8
+        ORR     x0, x0, x1, LSL #24
+        M_STR   x0, [pDst], dstStep
+        MOV     x2, x2, LSR #8
+        ORR     x2, x2, x3, LSL #24
+        LDR     x1, [pSrc, #4]
+        M_LDR   x0, [pSrc], srcStep
+        M_STR   x2, [pDst], dstStep
+        LDR     x3, [pSrc, #4]
+        M_LDR   x2, [pSrc], srcStep
+        MOV     x0, x0, LSR #8
+        ORR     x0, x0, x1, LSL #24
+        M_STR   x0, [pDst], dstStep
+        MOV     x2, x2, LSR #8
+        ORR     x2, x2, x3, LSL #24
+        M_STR   x2, [pDst], dstStep
+        B       Copy4x4End  
+      
+Copy4x4Align2
+        LDR     x1, [pSrc, #4]
+        M_LDR   x0, [pSrc], srcStep
+        LDR     x3, [pSrc, #4]
+        M_LDR   x2, [pSrc], srcStep
+        MOV     x0, x0, LSR #16
+        ORR     x0, x0, x1, LSL #16
+        M_STR   x0, [pDst], dstStep
+        MOV     x2, x2, LSR #16
+        ORR     x2, x2, x3, LSL #16
+        M_STR   x2, [pDst], dstStep        
+
+        LDR     x1, [pSrc, #4]
+        M_LDR   x0, [pSrc], srcStep
+        LDR     x3, [pSrc, #4]
+        M_LDR   x2, [pSrc], srcStep
+        MOV     x0, x0, LSR #16
+        ORR     x0, x0, x1, LSL #16
+        M_STR   x0, [pDst], dstStep
+        MOV     x2, x2, LSR #16
+        ORR     x2, x2, x3, LSL #16
+        M_STR   x2, [pDst], dstStep        
+        B       Copy4x4End  
+
+Copy4x4Align3 
+        LDR     x1, [pSrc, #4]
+        M_LDR   x0, [pSrc], srcStep
+        LDR     x3, [pSrc, #4]
+        M_LDR   x2, [pSrc], srcStep
+        MOV     x0, x0, LSR #24
+        ORR     x0, x0, x1, LSL #8
+        M_STR   x0, [pDst], dstStep
+        MOV     x2, x2, LSR #24
+        ORR     x2, x2, x3, LSL #8
+        M_STR   x2, [pDst], dstStep
+
+        LDR     x1, [pSrc, #4]
+        M_LDR   x0, [pSrc], srcStep
+        LDR     x3, [pSrc, #4]
+        M_LDR   x2, [pSrc], srcStep
+        MOV     x0, x0, LSR #24
+        ORR     x0, x0, x1, LSL #8
+        M_STR   x0, [pDst], dstStep
+        MOV     x2, x2, LSR #24
+        ORR     x2, x2, x3, LSL #8
+        M_STR   x2, [pDst], dstStep
+        B       Copy4x4End  
+
+Copy4x4End
+        M_END
+
+    ENDIF
+
+    END
+    
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
new file mode 100644
index 0000000..66cfe5e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
@@ -0,0 +1,178 @@
+;//
+;// 
+;// File Name:  armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS ARM1136JS
+
+        EXPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
+        EXPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+
+;// Functions: 
+;//     armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe and
+;//     armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe 
+;//
+;// Implements re-arrangement of data from temporary buffer to a buffer pointed by pBuf.
+;// This will do the convertion of data from 16 bit to 8 bit and it also
+;// remove offset and check for saturation.
+;//
+;// Registers used as input for this function
+;// r0,r1,r7 where r0 is input pointer and r2 its step size, r7 is output pointer
+;//
+;// Registers preserved for top level function
+;// r4,r5,r6,r8,r9,r14
+;//
+;// Registers modified by the function
+;// r7,r10,r11,r12
+;//
+;// Output registers
+;// r0 - pointer to the destination location
+;// r1 - step size to this destination location
+
+
+DEBUG_ON    SETL {FALSE}
+        
+MASK            EQU 0x80808080  ;// Mask is used to implement (a+b+1)/2
+
+;// Declare input registers
+
+pSrc0           RN 0
+srcStep0        RN 1
+
+;// Declare other intermediate registers
+Temp1           RN 4
+Temp2           RN 5
+Temp3           RN 10
+Temp4           RN 11
+pBuf            RN 7
+r0x0fe00fe0     RN 6
+r0x00ff00ff     RN 12
+Count           RN 14
+ValueA0         RN 10
+ValueA1         RN 11
+
+    IF ARM1136JS
+
+
+        ;// Function header
+        M_START armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe, r6
+
+        ;// Code start     
+        MOV         Count, #4   
+        LDR         r0x0fe00fe0, =0x0fe00fe0
+        LDR         r0x00ff00ff, =0x00ff00ff        
+LoopStart1
+        LDR         Temp4, [pSrc0, #12]
+        LDR         Temp3, [pSrc0, #8]        
+        LDR         Temp2, [pSrc0, #4]
+        M_LDR       Temp1, [pSrc0], srcStep0              
+        UQSUB16     Temp4, Temp4, r0x0fe00fe0        
+        UQSUB16     Temp3, Temp3, r0x0fe00fe0                 
+        UQSUB16     Temp2, Temp2, r0x0fe00fe0        
+        UQSUB16     Temp1, Temp1, r0x0fe00fe0                 
+        USAT16      Temp4, #13, Temp4
+        USAT16      Temp3, #13, Temp3                          
+        USAT16      Temp2, #13, Temp2
+        USAT16      Temp1, #13, Temp1                                  
+        AND         Temp4, r0x00ff00ff, Temp4, LSR #5         
+        AND         Temp3, r0x00ff00ff, Temp3, LSR #5         
+        AND         Temp2, r0x00ff00ff, Temp2, LSR #5         
+        AND         Temp1, r0x00ff00ff, Temp1, LSR #5         
+        ORR         ValueA1, Temp3, Temp4, LSL #8             
+        ORR         ValueA0, Temp1, Temp2, LSL #8             
+        SUBS        Count, Count, #1                   
+        STRD        ValueA0, [pBuf], #8 
+        BGT         LoopStart1
+End1
+        SUB        pSrc0, pBuf, #32
+        MOV        srcStep0, #8
+
+        M_END
+
+
+        ;// Function header
+        M_START armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe, r6
+        
+        ;// Code start        
+        LDR         r0x0fe00fe0, =0x0fe00fe0
+        LDR         r0x00ff00ff, =0x00ff00ff
+        MOV         Count, #2
+
+LoopStart    
+        LDR         Temp4, [pSrc0, #12]
+        LDR         Temp3, [pSrc0, #8]        
+        LDR         Temp2, [pSrc0, #4]
+        M_LDR       Temp1, [pSrc0], srcStep0
+        
+        UQSUB16     Temp4, Temp4, r0x0fe00fe0        
+        UQSUB16     Temp3, Temp3, r0x0fe00fe0                 
+        UQSUB16     Temp2, Temp2, r0x0fe00fe0        
+        UQSUB16     Temp1, Temp1, r0x0fe00fe0                 
+        
+        USAT16      Temp4, #13, Temp4
+        USAT16      Temp3, #13, Temp3                          
+        USAT16      Temp2, #13, Temp2
+        USAT16      Temp1, #13, Temp1
+                                  
+        AND         Temp4, r0x00ff00ff, Temp4, LSR #5         
+        AND         Temp3, r0x00ff00ff, Temp3, LSR #5         
+        AND         Temp2, r0x00ff00ff, Temp2, LSR #5         
+        AND         Temp1, r0x00ff00ff, Temp1, LSR #5         
+        ORR         ValueA1, Temp3, Temp4, LSL #8        ;// [d2 c2 d0 c0]             
+        ORR         ValueA0, Temp1, Temp2, LSL #8        ;// [b2 a2 b0 a0]         
+                    
+        PKHBT       Temp1, ValueA0, ValueA1, LSL #16     ;// [d0 c0 b0 a0]
+
+        STR         Temp1, [pBuf], #8 
+        PKHTB       Temp2, ValueA1, ValueA0, ASR #16     ;// [d2 c2 b2 a2]
+        STR         Temp2, [pBuf], #-4  
+
+        LDR         Temp4, [pSrc0, #12]
+        LDR         Temp3, [pSrc0, #8]        
+        LDR         Temp2, [pSrc0, #4]
+        M_LDR       Temp1, [pSrc0], srcStep0
+        
+        UQSUB16     Temp4, Temp4, r0x0fe00fe0        
+        UQSUB16     Temp3, Temp3, r0x0fe00fe0                 
+        UQSUB16     Temp2, Temp2, r0x0fe00fe0        
+        UQSUB16     Temp1, Temp1, r0x0fe00fe0                 
+        
+        USAT16      Temp4, #13, Temp4
+        USAT16      Temp3, #13, Temp3                          
+        USAT16      Temp2, #13, Temp2
+        USAT16      Temp1, #13, Temp1
+                                  
+        AND         Temp4, r0x00ff00ff, Temp4, LSR #5         
+        AND         Temp3, r0x00ff00ff, Temp3, LSR #5         
+        AND         Temp2, r0x00ff00ff, Temp2, LSR #5         
+        AND         Temp1, r0x00ff00ff, Temp1, LSR #5         
+        ORR         ValueA1, Temp3, Temp4, LSL #8        ;// [d2 c2 d0 c0]             
+        ORR         ValueA0, Temp1, Temp2, LSL #8        ;// [b2 a2 b0 a0]         
+                    
+        PKHBT       Temp1, ValueA0, ValueA1, LSL #16     ;// [d0 c0 b0 a0]
+        SUBS        Count, Count, #1
+        STR         Temp1, [pBuf], #8 
+        PKHTB       Temp2, ValueA1, ValueA0, ASR #16     ;// [d2 c2 b2 a2]
+        STR         Temp2, [pBuf], #4  
+        
+        BGT         LoopStart
+End2
+        SUB         pSrc0, pBuf, #32-8
+        MOV         srcStep0, #4
+
+        M_END
+
+    ENDIF
+    
+    END
+    
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
new file mode 100644
index 0000000..851ff6a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
@@ -0,0 +1,296 @@
+;//
+;// 
+;// File Name:  armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        EXPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+
+        M_VARIANTS ARM1136JS
+
+
+
+    IF ARM1136JS 
+
+
+        M_ALLOC8 ppDstArgs, 8
+        M_ALLOC8 pTempResult1, 8
+        M_ALLOC8 pTempResult2, 8
+        M_ALLOC4 ppSrc, 4
+        M_ALLOC4 ppDst, 4
+        M_ALLOC4 pDstStep, 4
+        M_ALLOC4 pSrcStep, 4
+        M_ALLOC4 pCounter, 4
+
+        ;// Function header
+        ;// Function: 
+        ;//     armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+        ;//
+        ;// Implements diagonal interpolation for a block of size 4x4. Input and output should 
+        ;// be aligned. 
+        ;//
+        ;// Registers used as input for this function
+        ;// r0,r1,r2,r3, r8 where r0,r2  input pointer and r1,r3 step size, r8 intermediate-buf pointer
+        ;//
+        ;// Registers preserved for top level function
+        ;// r0,r1,r2,r3,r4,r5,r6,r14
+        ;//
+        ;// Registers modified by the function
+        ;// r7,r8,r9,r10,r11,r12
+        ;//
+        ;// Output registers
+        ;// None. Function will preserve r0-r3
+
+        M_START armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe, r6
+        
+;// Declare input registers
+pSrc            RN 0
+srcStep         RN 1
+pDst            RN 2
+dstStep         RN 3
+
+;// Declare inner loop registers
+Acc0            RN 4
+Acc1            RN 5
+Acc2            RN 6
+Acc3            RN 7
+
+ValA            RN 4
+ValB            RN 5
+ValC            RN 6
+ValD            RN 7
+ValE            RN 8
+ValF            RN 9
+ValG            RN 12
+ValH            RN 14
+ValI            RN 1
+
+Temp1           RN 3
+Temp2           RN 1
+Temp3           RN 12
+Temp4           RN 7
+Temp5           RN 5
+r0x0fe00fe0     RN 3                                    ;// [0 (16*255 - 16) 0 (16*255 - 16)]
+r0x00ff00ff     RN 10                                   ;// [0 255 0 255] where 255 is offset
+Counter         RN 11
+pInterBuf       RN 8
+
+ValCA           RN 8
+ValDB           RN 9
+ValGE           RN 10
+ValHF           RN 11
+r0x00140001     RN 12
+r0x0014fffb     RN 14
+
+r0x0001fc00     RN 11
+
+Accx            RN 8
+Accy            RN 9
+Temp6           RN 14
+
+        M_STRD      pDst, dstStep, ppDstArgs
+
+        MOV         pDst, pInterBuf                
+        MOV         dstStep, #16
+
+        ;// Set up counter of format, [0]  [0]  [1 (height)]  [8 (width)]                                                                                    
+        MOV         Counter, #4
+        M_STR       dstStep, pDstStep        
+        M_STR       srcStep, pSrcStep        
+        LDR         r0x00ff00ff, =0x00ff00ff               ;// [0 255 0 255] 255 is offset to avoid negative results 
+
+HeightLoop
+NextTwoRowsLoop
+        LDR     ValD, [pSrc, srcStep]                   ;// Load row 1 [d1 c1 b1 a1]
+        LDR     ValA, [pSrc], #4                        ;// Load row 0 [d0 c0 b0 a0]
+        LDR     ValH, [pSrc, srcStep]                   ;// Load  [h1 g1 f1 e1]        
+        LDR     ValE, [pSrc], #4                        ;// Load  [h0 g0 f0 e0]
+        LDRB    Temp2, [pSrc, srcStep]                  ;// Load row 1 [l1 k1 j1 i1]
+        LDRB    Temp1, [pSrc], #-8                      ;// Load row 0 [l0 k0 j0 i0]
+        
+        PKHBT   ValB, ValA, ValD, LSL #16               ;// [b1 a1 b0 a0]
+        PKHTB   ValD, ValD, ValA, ASR #16               ;// [d1 c1 d0 c0]
+        UXTAB16 ValA, r0x00ff00ff, ValB                 ;// [00 a1 00 a0] + [0 255 0 255]
+        UXTAB16 ValC, r0x00ff00ff, ValD                 ;// [00 c1 00 c0] + [0 255 0 255]
+        PKHBT   ValI, Temp1, Temp2, LSL #16             ;// [00 i1 00 i0]            
+        PKHBT   ValF, ValE, ValH, LSL #16               ;// [f1 e1 f0 e0]
+        PKHTB   ValH, ValH, ValE, ASR #16               ;// [h1 g1 h0 g0]
+        UXTAB16 ValE, r0x00ff00ff, ValF                 ;// [00 e1 00 e0] + [0 255 0 255]
+
+        ;// Calculate Acc0
+        ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f
+        UXTAB16 Temp1, ValC, ValD, ROR #8
+        UXTAB16 Temp3, ValE, ValB, ROR #8
+        RSB     Temp1, Temp3, Temp1, LSL #2                
+        UXTAB16 Acc0, ValA, ValF, ROR #8
+        ADD     Temp1, Temp1, Temp1, LSL #2        
+        ADD     Acc0, Acc0, Temp1       
+
+        ;// Calculate Acc1
+        ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g
+        UXTAB16 Temp1, ValE, ValD, ROR #8
+        UXTAB16 Temp3, ValC, ValF, ROR #8
+        RSB     Temp1, Temp3, Temp1, LSL #2                        
+        UXTAB16 ValG, r0x00ff00ff, ValH                 ;// [00 g1 00 g0] + [0 255 0 255]
+        ADD     Temp1, Temp1, Temp1, LSL #2        
+        UXTAB16 Acc1, ValG, ValB, ROR #8
+        ADD     Acc1, Acc1, Temp1        
+
+        UXTAB16 Acc2, ValC, ValH, ROR #8        
+        ADD     ValI, r0x00ff00ff, ValI                 ;// [00 i1 00 i0] + [0 255 0 255]        
+        
+        ;// Calculate Acc2
+        ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h
+        UXTAB16 Temp1, ValG, ValD, ROR #8
+        UXTAB16 Acc3, ValI, ValD, ROR #8
+        UXTAB16 Temp2, ValE, ValF, ROR #8
+        
+        RSB     Temp1, Temp1, Temp2, LSL #2        
+        UXTAB16 Temp2, ValG, ValF, ROR #8
+        ADD     Temp1, Temp1, Temp1, LSL #2        
+        ADD     Acc2, Acc2, Temp1        
+
+        ;// Calculate Acc3
+        ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i
+        UXTAB16 Temp1, ValE, ValH, ROR #8
+        RSB     Temp1, Temp1, Temp2, LSL #2
+        ADD     Temp1, Temp1, Temp1, LSL #2        
+        ADD     Acc3, Acc3, Temp1
+        
+        M_LDR   dstStep, pDstStep        
+        M_LDR   srcStep, pSrcStep
+
+        ;// If Counter is even store Acc0-Acc3 in a temporary buffer
+        ;// If Counter is off store Acc0-Acc3 and previous Acc0-Acc3 in a intermediate buf 
+        ANDS        Temp3, Counter, #1
+        BEQ         NoProcessing        
+        
+        ;// Packing previous and current Acc0-Acc3 values
+        M_LDRD      Accx, Accy, pTempResult1
+        PKHBT       Temp6, Accx, Acc0, LSL #16          ;//[0 a2 0 a0] = [0 a3 0 a2] [0 a1 0 a0]
+        PKHTB       Acc0, Acc0, Accx, ASR #16           ;//[0 a3 0 a1] = [0 a1 0 a0] [0 a3 0 a2] 
+        STR         Acc0, [pDst, dstStep]                        
+        STR         Temp6, [pDst], #4                   
+        PKHBT       Temp6, Accy, Acc1, LSL #16          ;//[0 b2 0 b0] = [0 b3 0 b2] [0 b1 0 b0]
+        PKHTB       Acc1, Acc1, Accy, ASR #16            ;//[0 b3 0 b1] = [0 b1 0 b0] [0 b3 0 b2]
+        M_LDRD      Accx, Accy, pTempResult2
+        STR         Acc1, [pDst, dstStep]                        
+        STR         Temp6, [pDst], #4                   
+        
+        PKHBT       Temp6, Accx, Acc2, LSL #16          ;//[0 c2 0 c0] = [0 c3 0 c2] [0 c1 0 c0]
+        PKHTB       Acc2, Acc2, Accx, ASR #16            ;//[0 c3 0 c1] = [0 c1 0 c0] [0 c3 0 c2]
+        STR         Acc2, [pDst, dstStep]                        
+        STR         Temp6, [pDst], #4                   
+        PKHBT       Temp6, Accy, Acc3, LSL #16          ;//[0 d2 0 d0] = [0 d3 0 d2] [0 d1 0 d0]
+        PKHTB       Acc3, Acc3, Accy, ASR #16            ;//[0 d3 0 d1] = [0 d1 0 d0] [0 d3 0 d2]
+        STR         Acc3, [pDst, dstStep]                        
+        STR         Temp6, [pDst], #-12
+        ADD         pDst, pDst, dstStep, LSL #1                   
+        B           AfterStore
+
+NoProcessing
+        M_STRD      Acc0, Acc1, pTempResult1
+        M_STRD      Acc2, Acc3, pTempResult2
+AfterStore
+        SUBS        Counter, Counter, #1                ;// Loop till height is 10
+        ADD         pSrc, pSrc, srcStep, LSL #1
+        BPL         HeightLoop
+
+        STR         Acc0, [pDst], #4                    ;//[0 a1 0 a0]
+        STR         Acc1, [pDst], #4
+        STR         Acc2, [pDst], #4
+        STR         Acc3, [pDst], #-12
+        
+        ;//
+        ;// Horizontal interpolation using multiplication
+        ;//
+    
+        SUB         pSrc, pDst, dstStep, LSL #2
+        MOV         srcStep, #16
+        M_LDRD      pDst, dstStep, ppDstArgs
+
+        MOV         Counter, #4
+        LDR         r0x0014fffb, =0x0014fffb
+        LDR         r0x00140001, =0x00140001
+
+HeightLoop1
+        M_STR       Counter, pCounter
+
+        M_LDR       ValCA, [pSrc], srcStep               ;// Load  [0 c 0 a]
+        M_LDR       ValDB, [pSrc], srcStep               ;// Load  [0 d 0 b]
+        M_LDR       ValGE, [pSrc], srcStep               ;// Load  [0 g 0 e]
+        M_LDR       ValHF, [pSrc], srcStep               ;// Load  [0 h 0 f]
+
+
+        ;// Acc0 = smuad ([0 20 0 1], add([0 c 0 a] + [0 d 0 f])) - (5 * (b + e)) 
+        ;// Acc1 = smuad ([0 20 0 1], add([0 e 0 g] + [0 d 0 b])) - (5 * (c + f)) 
+        ;// Acc2 = smuad ([0 1 0 20], add([0 c 0 e] + [0 h 0 f])) - (5 * (d + g)) 
+        ;// Acc3 = smuad ([0 20 0 1], add([0 d 0 f] + [0 i 0 g])) - (5 * (e + h)) 
+
+        SMUAD       Acc0, ValCA, r0x00140001            ;// Acc0  = [0 c 0 a] * [0 20 0 1]
+        SMUAD       Acc1, ValDB, r0x00140001            ;// Acc1  = [0 c 0 a] * [0 20 0 1]
+        SMUADX      Acc2, ValGE, r0x0014fffb            ;// Acc2  = [0 g 0 e] * [0 20 0 -5]
+        SMUAD       Acc3, ValGE, r0x0014fffb            ;// Acc3  = [0 g 0 e] * [0 20 0 -5]
+
+        SMLAD       Acc0, ValDB, r0x0014fffb, Acc0      ;// Acc0 += [0 d 0 b] * [0 20 0 -5]
+        SMLADX      Acc1, ValGE, r0x00140001, Acc1      ;// Acc1 += [0 g 0 e] * [0 20 0 1]
+        SMLADX      Acc2, ValHF, r0x00140001, Acc2      ;// Acc2 += [0 h 0 f] * [0 20 0 1]
+        SMLADX      Acc3, ValHF, r0x0014fffb, Acc3      ;// Acc3 += [0 h 0 f] * [0 20 0 -5]
+
+        SMLABB      Acc0, ValGE, r0x0014fffb, Acc0      ;// Acc0 += [0 g 0 e] * [0 0 0 -5]
+        SMLATB      Acc1, ValCA, r0x0014fffb, Acc1      ;// Acc1 += [0 d 0 b] * [0 0 0 -5]
+        SMLATB      Acc2, ValCA, r0x00140001, Acc2      ;// Acc2 += [0 c 0 a] * [0 0 0 1]
+        SMLATB      Acc3, ValDB, r0x00140001, Acc3      ;// Acc3 += [0 c 0 a] * [0 0 0 1]
+
+        LDRH        ValCA, [pSrc], #4                   ;// 8 = srcStep - 16
+        SMLABB      Acc0, ValHF, r0x00140001, Acc0      ;// Acc0 += [0 h 0 f] * [0 0 0 1]        
+        SMLABB      Acc1, ValHF, r0x0014fffb, Acc1      ;// Acc1 += [0 h 0 f] * [0 0 0 -5]
+        SMLATB      Acc2, ValDB, r0x0014fffb, Acc2      ;// Acc2 += [0 d 0 b] * [0 0 0 -5]        
+        SMLABB      Acc3, ValCA, r0x00140001, Acc3      ;// Acc3 += [0 d 0 b] * [0 0 0 1]
+        
+        LDR         r0x0001fc00, =0x0001fc00            ;// (0xff * 16 * 32) - 512
+        SUB         Acc0, Acc0, r0x0001fc00        
+        SUB         Acc1, Acc1, r0x0001fc00        
+        SUB         Acc2, Acc2, r0x0001fc00        
+        SUB         Acc3, Acc3, r0x0001fc00        
+
+        USAT        Acc0, #18, Acc0
+        USAT        Acc1, #18, Acc1
+        USAT        Acc2, #18, Acc2
+        USAT        Acc3, #18, Acc3
+        
+        MOV         Acc0, Acc0, LSR #10
+        M_STRB      Acc0, [pDst], dstStep
+        MOV         Acc1, Acc1, LSR #10
+        M_STRB      Acc1, [pDst], dstStep
+        MOV         Acc2, Acc2, LSR #10
+        M_STRB      Acc2, [pDst], dstStep
+        MOV         Acc3, Acc3, LSR #10
+        M_STRB      Acc3, [pDst], dstStep
+
+
+        M_LDR       Counter, pCounter
+        SUB         pDst, pDst, dstStep, LSL #2
+        SUB         pSrc, pSrc, srcStep, LSL #2
+        ADD         pDst, pDst, #1
+        SUBS        Counter, Counter, #1
+        BGT         HeightLoop1
+End
+        SUB         pDst, pDst, #4
+        SUB         pSrc, pSrc, #16
+
+        M_END
+    
+    ENDIF
+    
+    END
+    
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
new file mode 100644
index 0000000..2f48e13
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
@@ -0,0 +1,276 @@
+;//
+;// 
+;// File Name:  armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+
+        EXPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+
+        M_VARIANTS ARM1136JS
+
+    
+    
+    
+
+    IF ARM1136JS 
+        
+        M_ALLOC8 ppDstArgs, 8
+        M_ALLOC4 ppSrc, 4
+        M_ALLOC4 ppDst, 4
+        M_ALLOC4 pCounter, 4
+
+        ;// Function header
+        ;// Function:
+        ;//     armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+        ;//
+        ;// Implements diagonal interpolation for a block of size 4x4. Input and output should 
+        ;// be aligned. 
+        ;//
+        ;// Registers used as input for this function
+        ;// r0,r1,r2,r3, r8 where r0,r2  input pointer and r1,r3 step size, r8 intermediate-buf pointer
+        ;//
+        ;// Registers preserved for top level function
+        ;// r0,r1,r2,r3,r4,r5,r6,r14
+        ;//
+        ;// Registers modified by the function
+        ;// r7,r8,r9,r10,r11,r12
+        ;//
+        ;// Output registers
+        ;// None. Function will preserve r0-r3
+
+        M_START armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe, r6
+
+;// Declare input registers
+pSrc            RN 0
+srcStep         RN 1
+pDst            RN 2
+dstStep         RN 3
+
+;// Declare inner loop registers
+ValA            RN 5
+ValA0           RN 4
+ValA1           RN 5
+ValAF0          RN 4
+ValAF1          RN 5
+
+ValB            RN 11
+
+ValC            RN 5
+ValC0           RN 4
+ValC1           RN 5
+ValCD0          RN 12
+ValCD1          RN 14
+ValCF0          RN 4
+ValCF1          RN 5
+
+ValD            RN 10
+
+ValE            RN 7
+ValE0           RN 6
+ValE1           RN 7
+ValEB0          RN 10
+ValEB1          RN 11
+ValED0          RN 6
+ValED1          RN 7
+
+ValF            RN 10
+
+ValG            RN 14
+ValG0           RN 12
+ValG1           RN 14
+ValGB0          RN 12
+ValGB1          RN 14
+
+Acc0            RN 4
+Acc1            RN 5
+Acc2            RN 6
+Acc3            RN 7
+
+Temp            RN 7
+Step            RN 6
+
+pInterBuf       RN 8
+Counter         RN 8
+r0x00ff00ff     RN 9                                        ;// [0 255 0 255] where 255 is offset
+r0x0001fc00     RN 10                                       ;// [0 (16*255 - 16) 0 (16*255 - 16)]
+
+    
+;// Declare inner loop registers
+ValCA           RN 8
+ValDB           RN 9
+ValGE           RN 10
+ValHF           RN 11
+r0x00140001     RN 12
+r0x0014fffb     RN 14
+
+r0x00000200     RN 12
+r0x000000ff     RN 12
+        
+        M_STRD      pDst, dstStep, ppDstArgs
+        MOV         pDst, pInterBuf                
+        MOV         dstStep, #24
+
+        ;// Set up counter of format, [0]  [0]  [1 (height)]  [8 (width)]                                                                                    
+        MOV         Counter, #1
+        MOV         Temp, #8                                                        
+        ADD         Counter, Temp, Counter, LSL #8        ;// [0 0 H W]                        
+        
+        LDR         r0x00ff00ff, =0x00ff00ff                ;// [0 255 0 255] 255 is offset to avoid negative results 
+WidthLoop
+        M_STR       pSrc, ppSrc
+        M_STR       pDst, ppDst
+HeightLoop
+TwoRowsLoop
+        M_LDR       ValC, [pSrc], srcStep                   ;// Load  [c3 c2 c1 c0]
+        M_LDR       ValD, [pSrc], srcStep                   ;// Load  [d3 d2 d1 d0]
+        M_LDR       ValE, [pSrc], srcStep                   ;// Load  [e3 e2 e1 e0]        
+        SUB         pSrc, pSrc, srcStep, LSL #2                
+        UXTAB16     ValC0, r0x00ff00ff, ValC                ;// [0 c2 0 c0] + [0 255 0 255]
+        UXTAB16     ValC1, r0x00ff00ff, ValC, ROR #8        ;// [0 c3 0 c1] + [0 255 0 255]        
+        LDR         ValB, [pSrc]                            ;// Load  [b3 b2 b1 b0]        
+        UXTAB16     ValE0, r0x00ff00ff, ValE                ;// [0 e2 0 e0] + [0 255 0 255]
+        UXTAB16     ValE1, r0x00ff00ff, ValE, ROR #8        ;// [0 e3 0 e1] + [0 255 0 255]        
+        UXTAB16     ValCD0, ValC0, ValD                     ;// [0 c2 0 c0] + [0 255 0 255] + [0 d2 0 d0]
+        UXTAB16     ValCD1, ValC1, ValD, ROR #8             ;// [0 c3 0 c1] + [0 255 0 255] + [0 d3 0 d1]                                
+        UXTAB16     ValEB0, ValE0, ValB                     ;// [0 e2 0 e0] + [0 255 0 255] + [0 b2 0 b0]
+        RSB         ValCD0, ValEB0, ValCD0, LSL #2          ;// 4*(Off+C+D) - (Off+B+E)
+        
+        LDR         ValD, [pSrc, srcStep, LSL #1]                       ;// Load  [d3 d2 d1 d0]
+        UXTAB16     ValEB1, ValE1, ValB, ROR #8             ;// [0 e3 0 e1] + [0 255 0 255] + [0 b3 0 b1]                                               
+        RSB         ValCD1, ValEB1, ValCD1, LSL #2                
+        
+        UXTAB16     ValED0, ValE0, ValD                     ;// [0 e2 0 e0] + [0 255 0 255] + [0 d2 0 d0]
+        UXTAB16     ValED1, ValE1, ValD, ROR #8             ;// [0 e3 0 e1] + [0 255 0 255] + [0 d3 0 d1]                                                       
+        LDR         ValF, [pSrc, srcStep, LSL #2]           ;// Load  [f3 f2 f1 f0]
+        M_LDR       ValB, [pSrc], srcStep                   ;// Load  [b3 b2 b1 b0]                
+        ADD         ValCD0, ValCD0, ValCD0, LSL #2          ;// 5 * [4*(Off+C+D) - (Off+B+E)]
+        ADD         ValCD1, ValCD1, ValCD1, LSL #2                          
+        UXTAB16     ValCF1, ValC1, ValF, ROR #8             ;// [0 c3 0 c1] + [0 255 0 255] + [0 f3 0 f1]                                
+        UXTAB16     ValCF0, ValC0, ValF                     ;// [0 c2 0 c0] + [0 255 0 255] + [0 f2 0 f0]        
+        RSB         ValED1, ValCF1, ValED1, LSL #2        
+        
+        SUB         ValA, pSrc, srcStep, LSL #1
+        LDR         ValA, [ValA]                            ;// Load  [a3 a2 a1 a0]
+        RSB         ValED0, ValCF0, ValED0, LSL #2          ;// 4*(Off+E+D) - (Off+C+F)        
+        ADD         ValED1, ValED1, ValED1, LSL #2          
+        ADD         ValED0, ValED0, ValED0, LSL #2          ;// 5 * [4*(Off+E+D) - (Off+C+F)]
+        UXTAB16     ValA0, r0x00ff00ff, ValA                ;// [0 a2 0 a0] + [0 255 0 255]
+        UXTAB16     ValA1, r0x00ff00ff, ValA, ROR #8        ;// [0 a3 0 a1] + [0 255 0 255]
+        UXTAB16     ValAF0, ValA0, ValF                     ;// [0 a2 0 a0] + [0 255 0 255] + [0 f2 0 f0]
+        UXTAB16     ValAF1, ValA1, ValF, ROR #8             ;// [0 a3 0 a1] + [0 255 0 255] + [0 f3 0 f1]                                        
+        ADD         Acc1, ValCD1, ValAF1        
+        
+        LDR         ValG, [pSrc, srcStep, LSL #2]           ;// Load  [g3 g2 g1 g0]
+        ADD         Acc0, ValCD0, ValAF0                    ;// Acc0 = 16*Off + (A+F) + 20*(C+D) - 5*(B+E)        
+        STR         Acc1, [pDst, #4]                        ;// Store result & adjust pointer
+        M_STR       Acc0, [pDst], dstStep                   ;// Store result & adjust pointer
+        UXTAB16     ValG0, r0x00ff00ff, ValG                ;// [0 g2 0 g0] + [0 255 0 255]
+        UXTAB16     ValG1, r0x00ff00ff, ValG, ROR #8        ;// [0 g3 0 g1] + [0 255 0 255]
+        UXTAB16     ValGB0, ValG0, ValB                     ;// [0 g2 0 g0] + [0 255 0 255] + [0 b2 0 b0]
+        UXTAB16     ValGB1, ValG1, ValB, ROR #8             ;// [0 g3 0 g1] + [0 255 0 255] + [0 b3 0 b1]                        
+        ADD         Acc2, ValED0, ValGB0                    ;// Acc2 = 16*Off + (B+G) + 20*(D+E) - 5*(C+F)
+        ADD         Acc3, ValED1, ValGB1        
+        
+        STR         Acc3, [pDst, #4]                        ;// Store result & adjust pointer                                       
+        M_STR       Acc2, [pDst], dstStep                   ;// Store result & adjust pointer                                               
+        
+        SUBS        Counter, Counter, #1 << 8               ;// Loop till height is 10
+        ADD         pSrc, pSrc, srcStep, LSL #1
+        BPL         HeightLoop
+        
+        M_LDR       pSrc, ppSrc
+        M_LDR       pDst, ppDst
+        ADDS        Counter, Counter, #(1 << 8)-4           ;// Loop till width is 12
+        ADD         pSrc, pSrc, #4
+        ADD         pDst, pDst, #8
+        ADD         Counter, Counter, #1<<8
+        BPL         WidthLoop
+    
+        ;//
+        ;// Horizontal interpolation using multiplication
+        ;//
+    
+        SUB         pSrc, pDst, #24
+        MOV         srcStep, #24
+        M_LDRD      pDst, dstStep, ppDstArgs
+
+        MOV         Counter, #4
+        LDR         r0x0014fffb, =0x0014fffb
+        LDR         r0x00140001, =0x00140001
+
+HeightLoop1
+        M_STR       Counter, pCounter
+
+
+        LDR         ValCA, [pSrc], #4                   ;// Load  [0 c 0 a]
+        LDR         ValDB, [pSrc], #4                   ;// Load  [0 d 0 b]
+        LDR         ValGE, [pSrc], #4                   ;// Load  [0 g 0 e]
+        LDR         ValHF, [pSrc], #4                   ;// Load  [0 h 0 f]
+
+        ;// Acc0 = smuad ([0 20 0 1], add([0 c 0 a] + [0 d 0 f])) - (5 * (b + e)) 
+        ;// Acc1 = smuad ([0 20 0 1], add([0 e 0 g] + [0 d 0 b])) - (5 * (c + f)) 
+        ;// Acc2 = smuad ([0 1 0 20], add([0 c 0 e] + [0 h 0 f])) - (5 * (d + g)) 
+        ;// Acc3 = smuad ([0 20 0 1], add([0 d 0 f] + [0 i 0 g])) - (5 * (e + h)) 
+        SMUAD       Acc0, ValCA, r0x00140001            ;// Acc0  = [0 c 0 a] * [0 20 0 1]
+        SMUAD       Acc1, ValDB, r0x00140001            ;// Acc1  = [0 c 0 a] * [0 20 0 1]
+        SMUADX      Acc2, ValGE, r0x0014fffb            ;// Acc2  = [0 g 0 e] * [0 20 0 -5]
+        SMUAD       Acc3, ValGE, r0x0014fffb            ;// Acc3  = [0 g 0 e] * [0 20 0 -5]
+
+        SMLAD       Acc0, ValDB, r0x0014fffb, Acc0      ;// Acc0 += [0 d 0 b] * [0 20 0 -5]
+        SMLADX      Acc1, ValGE, r0x00140001, Acc1      ;// Acc1 += [0 g 0 e] * [0 20 0 1]
+        SMLADX      Acc2, ValHF, r0x00140001, Acc2      ;// Acc2 += [0 h 0 f] * [0 20 0 1]
+        SMLADX      Acc3, ValHF, r0x0014fffb, Acc3      ;// Acc3 += [0 h 0 f] * [0 20 0 -5]
+
+        SMLABB      Acc0, ValGE, r0x0014fffb, Acc0      ;// Acc0 += [0 g 0 e] * [0 0 0 -5]
+        SMLATB      Acc1, ValCA, r0x0014fffb, Acc1      ;// Acc1 += [0 d 0 b] * [0 0 0 -5]
+        SMLATB      Acc2, ValCA, r0x00140001, Acc2      ;// Acc2 += [0 c 0 a] * [0 0 0 1]
+        SMLATB      Acc3, ValDB, r0x00140001, Acc3      ;// Acc3 += [0 c 0 a] * [0 0 0 1]
+
+        LDRH        ValCA, [pSrc], #8                   ;// 8 = srcStep - 16
+        SMLABB      Acc0, ValHF, r0x00140001, Acc0      ;// Acc0 += [0 h 0 f] * [0 0 0 1]        
+        SMLABB      Acc1, ValHF, r0x0014fffb, Acc1      ;// Acc1 += [0 h 0 f] * [0 0 0 -5]
+        SMLATB      Acc2, ValDB, r0x0014fffb, Acc2      ;// Acc2 += [0 d 0 b] * [0 0 0 -5]        
+        SMLABB      Acc3, ValCA, r0x00140001, Acc3      ;// Acc3 += [0 d 0 b] * [0 0 0 1]
+        
+        LDR         r0x0001fc00, =0x0001fc00            ;// (0xff * 16 * 32) - 512
+        SUB         Acc0, Acc0, r0x0001fc00        
+        SUB         Acc1, Acc1, r0x0001fc00        
+        SUB         Acc2, Acc2, r0x0001fc00        
+        SUB         Acc3, Acc3, r0x0001fc00        
+
+        USAT        Acc0, #18, Acc0
+        USAT        Acc1, #18, Acc1
+        USAT        Acc2, #18, Acc2
+        USAT        Acc3, #18, Acc3
+        
+        MOV         Acc0, Acc0, LSR #10
+        MOV         Acc1, Acc1, LSR #10
+        MOV         Acc2, Acc2, LSR #10
+        MOV         Acc3, Acc3, LSR #10
+
+        M_LDR       Counter, pCounter        
+        ORR         Acc0, Acc0, Acc1, LSL #8
+        ORR         Acc2, Acc2, Acc3, LSL #8
+        SUBS        Counter, Counter, #1
+        ORR         Acc0, Acc0, Acc2, LSL #16
+        M_STR       Acc0, [pDst], dstStep
+        BGT         HeightLoop1
+End
+        SUB         pDst, pDst, dstStep, LSL #2
+        SUB         pSrc, pSrc, srcStep, LSL #2
+
+        M_END
+    
+    ENDIF
+    
+    END
+    
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
new file mode 100644
index 0000000..6690ced
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
@@ -0,0 +1,239 @@
+;//
+;// 
+;// File Name:  armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+
+        M_VARIANTS ARM1136JS
+        
+        EXPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+
+DEBUG_ON    SETL {FALSE}
+
+
+    IF ARM1136JS
+
+;// Function: 
+;//     armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+;//
+;// Implements horizontal interpolation for a block of size 4x4. Input and output should 
+;// be aligned. 
+;//
+;// Registers used as input for this function
+;// r0,r1,r2,r3 where r0,r2  input pointer and r1,r3 corresponding step size
+;//
+;// Registers preserved for top level function
+;// r0,r1,r2,r3,r4,r5,r6,r14
+;//
+;// Registers modified by the function
+;// r7,r8,r9,r10,r11,r12
+;//
+;// Output registers
+;// None. Function will preserve r0-r3
+
+
+;// Declare input registers
+pSrc            RN 0
+srcStep         RN 1
+pDst            RN 2
+dstStep         RN 3
+
+;// Declare inner loop registers
+Acc0            RN 4
+Acc1            RN 5
+Acc2            RN 6
+Acc3            RN 7
+
+ValA            RN 4
+ValB            RN 5
+ValC            RN 6
+ValD            RN 7
+ValE            RN 8
+ValF            RN 9
+ValG            RN 12
+ValH            RN 14
+ValI            RN 1
+
+Temp1           RN 3
+Temp2           RN 1
+Temp3           RN 12
+Temp4           RN 7
+Temp5           RN 5
+r0x0fe00fe0     RN 3                                    ;// [0 (16*255 - 16) 0 (16*255 - 16)]
+r0x00ff00ff     RN 10                                   ;// [0 255 0 255] where 255 is offset
+Counter         RN 11
+
+Height          RN 3
+
+        M_ALLOC4 pDstStep, 4
+        M_ALLOC4 pSrcStep, 4
+
+        ;// Function header
+        M_START armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe, r6
+        
+        MOV     Counter, #2
+        M_STR   dstStep, pDstStep        
+        M_STR   srcStep, pSrcStep        
+        LDR     r0x00ff00ff, =0x00ff00ff               ;// [0 255 0 255] 255 is offset to avoid negative results 
+
+NextTwoRowsLoop
+        LDR     ValD, [pSrc, srcStep]                   ;// Load row 1 [d1 c1 b1 a1]
+        LDR     ValA, [pSrc], #4                        ;// Load row 0 [d0 c0 b0 a0]
+        LDR     ValH, [pSrc, srcStep]                   ;// Load  [h1 g1 f1 e1]        
+        LDR     ValE, [pSrc], #4                        ;// Load  [h0 g0 f0 e0]
+        LDRB    Temp2, [pSrc, srcStep]                  ;// Load row 1 [l1 k1 j1 i1]
+        LDRB    Temp1, [pSrc], #-8                      ;// Load row 0 [l0 k0 j0 i0]
+        
+        PKHBT   ValB, ValA, ValD, LSL #16               ;// [b1 a1 b0 a0]
+        PKHTB   ValD, ValD, ValA, ASR #16               ;// [d1 c1 d0 c0]
+        UXTAB16 ValA, r0x00ff00ff, ValB                 ;// [00 a1 00 a0] + [0 255 0 255]
+        UXTAB16 ValC, r0x00ff00ff, ValD                 ;// [00 c1 00 c0] + [0 255 0 255]
+        PKHBT   ValI, Temp1, Temp2, LSL #16             ;// [00 i1 00 i0]            
+        PKHBT   ValF, ValE, ValH, LSL #16               ;// [f1 e1 f0 e0]
+        PKHTB   ValH, ValH, ValE, ASR #16               ;// [h1 g1 h0 g0]
+        UXTAB16 ValE, r0x00ff00ff, ValF                 ;// [00 e1 00 e0] + [0 255 0 255]
+
+        ;// Calculate Acc0
+        ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f
+        UXTAB16 Temp1, ValC, ValD, ROR #8
+        UXTAB16 Temp3, ValE, ValB, ROR #8
+        RSB     Temp1, Temp3, Temp1, LSL #2                
+        UXTAB16 Acc0, ValA, ValF, ROR #8
+        ADD     Temp1, Temp1, Temp1, LSL #2        
+        ADD     Acc0, Acc0, Temp1       
+
+        ;// Calculate Acc1
+        ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g
+        UXTAB16 Temp1, ValE, ValD, ROR #8
+        UXTAB16 Temp3, ValC, ValF, ROR #8
+        RSB     Temp1, Temp3, Temp1, LSL #2                        
+        UXTAB16 ValG, r0x00ff00ff, ValH                 ;// [00 g1 00 g0] + [0 255 0 255]
+        ADD     Temp1, Temp1, Temp1, LSL #2        
+        UXTAB16 Acc1, ValG, ValB, ROR #8
+        ADD     Acc1, Acc1, Temp1        
+
+        LDR     r0x0fe00fe0, =0x0fe00fe0                ;// 0x0fe00fe0 = (16 * Offset) - 16 where Offset is 255        
+        UXTAB16 Acc2, ValC, ValH, ROR #8        
+        ADD     ValI, r0x00ff00ff, ValI                 ;// [00 i1 00 i0] + [0 255 0 255]        
+        UQSUB16 Acc0, Acc0, r0x0fe00fe0                    
+        UQSUB16 Acc1, Acc1, r0x0fe00fe0
+        USAT16  Acc0, #13, Acc0
+        USAT16  Acc1, #13, Acc1        
+        
+        ;// Calculate Acc2
+        ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h
+        UXTAB16 Temp1, ValG, ValD, ROR #8
+        UXTAB16 Acc3, ValI, ValD, ROR #8
+        UXTAB16 Temp2, ValE, ValF, ROR #8
+        AND     Acc1, r0x00ff00ff, Acc1, LSR #5
+        AND     Acc0, r0x00ff00ff, Acc0, LSR #5
+        ORR     Acc0, Acc0, Acc1, LSL #8        
+        RSB     Temp5, Temp1, Temp2, LSL #2        
+        UXTAB16 Temp2, ValG, ValF, ROR #8
+        ADD     Temp5, Temp5, Temp5, LSL #2        
+        ADD     Acc2, Acc2, Temp5        
+
+        ;// Calculate Acc3
+        ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i
+        UXTAB16 Temp5, ValE, ValH, ROR #8
+        RSB     Temp5, Temp5, Temp2, LSL #2
+        LDR     r0x0fe00fe0, =0x0fe00fe0
+        ADD     Temp5, Temp5, Temp5, LSL #2        
+        ADD     Acc3, Acc3, Temp5
+        
+        UQSUB16 Acc3, Acc3, r0x0fe00fe0        
+        UQSUB16 Acc2, Acc2, r0x0fe00fe0        
+        USAT16  Acc3, #13, Acc3
+        USAT16  Acc2, #13, Acc2        
+
+        M_LDR   dstStep, pDstStep
+        AND     Acc3, r0x00ff00ff, Acc3, LSR #5
+        AND     Acc2, r0x00ff00ff, Acc2, LSR #5
+        ORR     Acc2, Acc2, Acc3, LSL #8
+        
+        SUBS    Counter, Counter, #1
+        M_LDR   srcStep, pSrcStep
+        PKHBT   Acc1, Acc0, Acc2, LSL #16   
+        M_STR   Acc1, [pDst], dstStep                   ;// Store result1
+        PKHTB   Acc2, Acc2, Acc0, ASR #16   
+        M_STR   Acc2, [pDst], dstStep                   ;// Store result2
+        ADD     pSrc, pSrc, srcStep, LSL #1
+        
+        BGT     NextTwoRowsLoop
+End
+        SUB     pDst, pDst, dstStep, LSL #2
+        SUB     pSrc, pSrc, srcStep, LSL #2
+
+        M_END
+    
+    ENDIF
+
+    END
+    
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
new file mode 100644
index 0000000..007cd0d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
@@ -0,0 +1,185 @@
+;//
+;// 
+;// File Name:  armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+       
+        M_VARIANTS ARM1136JS
+       
+        EXPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+
+
+    
+    IF ARM1136JS
+    
+        ;// Function header
+
+        ;// Function: 
+        ;//     armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe  
+        ;//
+        ;// Implements vertical interpolation for a block of size 4x4. Input and output should 
+        ;// be aligned. 
+        ;//
+        ;// Registers used as input for this function
+        ;// r0,r1,r2,r3 where r0,r2  input pointer and r1,r3 corresponding step size
+        ;//
+        ;// Registers preserved for top level function
+        ;// r0,r1,r2,r3,r4,r5,r6,r14
+        ;//
+        ;// Registers modified by the function
+        ;// r7,r8,r9,r10,r11,r12
+        ;//
+        ;// Output registers
+        ;// None. Function will preserve r0-r3
+        M_START armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe, r6
+
+;// Declare input registers
+pSrc            RN 0
+srcStep         RN 1
+pDst            RN 2
+dstStep         RN 3
+
+;// Declare inner loop registers
+ValA            RN 5
+ValA0           RN 4
+ValA1           RN 5
+ValAF0          RN 4
+ValAF1          RN 5
+
+ValB            RN 11
+
+ValC            RN 5
+ValC0           RN 4
+ValC1           RN 5
+ValCD0          RN 12
+ValCD1          RN 14
+ValCF0          RN 4
+ValCF1          RN 5
+
+ValD            RN 10
+
+ValE            RN 7
+ValE0           RN 6
+ValE1           RN 7
+ValEB0          RN 10
+ValEB1          RN 11
+ValED0          RN 6
+ValED1          RN 7
+
+ValF            RN 10
+
+ValG            RN 14
+ValG0           RN 12
+ValG1           RN 14
+ValGB0          RN 12
+ValGB1          RN 14
+
+Acc0            RN 4
+Acc1            RN 5
+Acc2            RN 6
+Acc3            RN 7
+
+Temp            RN 7
+Height          RN 3
+Step            RN 6
+
+Counter         RN 8
+r0x00ff00ff     RN 9                                        ;// [0 255 0 255] where 255 is offset
+r0x0fe00fe0     RN 10                                       ;// [0 (16*255 - 16) 0 (16*255 - 16)]
+
+        
+        LDR         r0x00ff00ff, =0x00ff00ff                ;// [0 255 0 255] 255 is offset to avoid negative results 
+        MOV         Counter, #2
+        
+TwoRowsLoop
+        M_LDR       ValC, [pSrc], srcStep                   ;// Load  [c3 c2 c1 c0]
+        M_LDR       ValD, [pSrc], srcStep                   ;// Load  [d3 d2 d1 d0]
+        M_LDR       ValE, [pSrc], srcStep                   ;// Load  [e3 e2 e1 e0]        
+        SUB         pSrc, pSrc, srcStep, LSL #2                
+        LDR         ValB, [pSrc]                            ;// Load  [b3 b2 b1 b0]        
+        UXTAB16     ValC0, r0x00ff00ff, ValC                ;// [0 c2 0 c0] + [0 255 0 255]
+        UXTAB16     ValC1, r0x00ff00ff, ValC, ROR #8        ;// [0 c3 0 c1] + [0 255 0 255]        
+        
+        UXTAB16     ValE0, r0x00ff00ff, ValE                ;// [0 e2 0 e0] + [0 255 0 255]
+        UXTAB16     ValE1, r0x00ff00ff, ValE, ROR #8        ;// [0 e3 0 e1] + [0 255 0 255]        
+        UXTAB16     ValCD0, ValC0, ValD                     ;// [0 c2 0 c0] + [0 255 0 255] + [0 d2 0 d0]
+        UXTAB16     ValCD1, ValC1, ValD, ROR #8             ;// [0 c3 0 c1] + [0 255 0 255] + [0 d3 0 d1]                                
+        UXTAB16     ValEB0, ValE0, ValB                     ;// [0 e2 0 e0] + [0 255 0 255] + [0 b2 0 b0]
+        RSB         ValCD0, ValEB0, ValCD0, LSL #2          ;// 4*(Off+C+D) - (Off+B+E)
+        
+        LDR         ValD, [pSrc, srcStep, LSL #1]                       ;// Load  [d3 d2 d1 d0]
+        UXTAB16     ValEB1, ValE1, ValB, ROR #8             ;// [0 e3 0 e1] + [0 255 0 255] + [0 b3 0 b1]                                               
+        RSB         ValCD1, ValEB1, ValCD1, LSL #2                
+        ;// One cycle stall
+        UXTAB16     ValED0, ValE0, ValD                     ;// [0 e2 0 e0] + [0 255 0 255] + [0 d2 0 d0]
+        UXTAB16     ValED1, ValE1, ValD, ROR #8             ;// [0 e3 0 e1] + [0 255 0 255] + [0 d3 0 d1]                                               
+        
+        LDR         ValF, [pSrc, srcStep, LSL #2]           ;// Load  [f3 f2 f1 f0]
+        M_LDR       ValB, [pSrc], srcStep                   ;// Load  [b3 b2 b1 b0]                
+        ADD         ValCD0, ValCD0, ValCD0, LSL #2          ;// 5 * [4*(Off+C+D) - (Off+B+E)]
+        ADD         ValCD1, ValCD1, ValCD1, LSL #2                          
+        UXTAB16     ValCF1, ValC1, ValF, ROR #8             ;// [0 c3 0 c1] + [0 255 0 255] + [0 f3 0 f1]                                
+        UXTAB16     ValCF0, ValC0, ValF                     ;// [0 c2 0 c0] + [0 255 0 255] + [0 f2 0 f0]
+        RSB         ValED1, ValCF1, ValED1, LSL #2        
+        
+        SUB         ValA, pSrc, srcStep, LSL #1
+        LDR         ValA, [ValA]                            ;// Load  [a3 a2 a1 a0]
+        RSB         ValED0, ValCF0, ValED0, LSL #2          ;// 4*(Off+E+D) - (Off+C+F)        
+        ADD         ValED1, ValED1, ValED1, LSL #2          
+        ADD         ValED0, ValED0, ValED0, LSL #2          ;// 5 * [4*(Off+E+D) - (Off+C+F)]
+        UXTAB16     ValA0, r0x00ff00ff, ValA                ;// [0 a2 0 a0] + [0 255 0 255]
+        UXTAB16     ValA1, r0x00ff00ff, ValA, ROR #8        ;// [0 a3 0 a1] + [0 255 0 255]
+        UXTAB16     ValAF0, ValA0, ValF                     ;// [0 a2 0 a0] + [0 255 0 255] + [0 f2 0 f0]
+        UXTAB16     ValAF1, ValA1, ValF, ROR #8             ;// [0 a3 0 a1] + [0 255 0 255] + [0 f3 0 f1]                                
+        
+        LDR         r0x0fe00fe0, =0x0fe00fe0                ;// [0 255 0 255] 255 is offset to avoid negative results 
+        ADD         Acc1, ValCD1, ValAF1        
+        
+        LDR         ValG, [pSrc, srcStep, LSL #2]           ;// Load  [g3 g2 g1 g0]
+        ADD         Acc0, ValCD0, ValAF0                    ;// Acc0 = 16*Off + (A+F) + 20*(C+D) - 5*(B+E)        
+        UQSUB16     Acc1, Acc1, r0x0fe00fe0                 ;// Acc1 -= (16*Off - 16)
+        UQSUB16     Acc0, Acc0, r0x0fe00fe0        
+        UXTAB16     ValG0, r0x00ff00ff, ValG                ;// [0 g2 0 g0] + [0 255 0 255]
+        UXTAB16     ValG1, r0x00ff00ff, ValG, ROR #8        ;// [0 g3 0 g1] + [0 255 0 255]
+        UXTAB16     ValGB0, ValG0, ValB                     ;// [0 g2 0 g0] + [0 255 0 255] + [0 b2 0 b0]
+        UXTAB16     ValGB1, ValG1, ValB, ROR #8             ;// [0 g3 0 g1] + [0 255 0 255] + [0 b3 0 b1]                        
+        ADD         Acc2, ValED0, ValGB0                    ;// Acc2 = 16*Off + (B+G) + 20*(D+E) - 5*(C+F)
+        ADD         Acc3, ValED1, ValGB1        
+        UQSUB16     Acc3, Acc3, r0x0fe00fe0                 ;// Acc3 -= (16*Off - 16)
+        UQSUB16     Acc2, Acc2, r0x0fe00fe0        
+        USAT16      Acc1, #13, Acc1                         ;// Saturate to 8+5 = 13 bits
+        USAT16      Acc0, #13, Acc0
+        USAT16      Acc3, #13, Acc3        
+        USAT16      Acc2, #13, Acc2
+        AND         Acc1, r0x00ff00ff, Acc1, LSR #5         ;// [0 a3 0 a1]
+        AND         Acc0, r0x00ff00ff, Acc0, LSR #5         ;// [0 a2 0 a0]
+        ORR         Acc0, Acc0, Acc1, LSL #8                ;// [a3 a2 a1 a0]
+        AND         Acc3, r0x00ff00ff, Acc3, LSR #5         ;// [0 b3 0 b1]
+        AND         Acc2, r0x00ff00ff, Acc2, LSR #5         ;// [0 b2 0 b0]
+        
+        M_STR       Acc0, [pDst], dstStep                   ;// Store result & adjust pointer
+        ORR         Acc2, Acc2, Acc3, LSL #8                ;// [b3 b2 b1 b0]        
+        M_STR       Acc2, [pDst], dstStep                   ;// Store result & adjust pointer                                       
+        ADD         pSrc, pSrc, srcStep, LSL #1
+        
+        SUBS        Counter, Counter, #1
+        BGT         TwoRowsLoop
+End
+        SUB     pDst, pDst, dstStep, LSL #2
+        SUB     pSrc, pSrc, srcStep, LSL #2
+
+        M_END
+    
+    ENDIF
+    
+    END
+    
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
new file mode 100644
index 0000000..b1ad17c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
@@ -0,0 +1,273 @@
+;//
+;// 
+;// File Name:  armVCM4P10_Interpolate_Chroma_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS ARM1136JS
+        
+    IF ARM1136JS
+
+;// input registers
+
+pSrc                 RN 0
+iSrcStep             RN 1
+pDst                 RN 2
+iDstStep             RN 3
+iWidth               RN 4
+iHeight              RN 5
+dx                   RN 6
+dy                   RN 7
+
+
+;// local variable registers
+temp                 RN 11
+r0x20                RN 12
+tmp0x20              RN 14
+return               RN 0
+dxPlusdy             RN 10
+EightMinusdx         RN 8 
+EightMinusdy         RN 9
+dxEightMinusdx       RN 8
+BACoeff              RN 6
+DCCoeff              RN 7
+                     
+iDstStepx2MinusWidth RN 8
+iSrcStepx2MinusWidth RN 9
+iSrcStep1            RN 10
+
+pSrc1                RN 1
+pSrc2                RN 8
+pDst1                RN 8
+pDst2                RN 12
+                     
+pix00                RN 8
+pix01                RN 9
+pix10                RN 10
+pix11                RN 11
+
+Out0100              RN 8  
+Out1110              RN 10 
+
+x00                  RN 8
+x01                  RN 10
+x02                  RN 12
+x10                  RN 9
+x11                  RN 11
+x12                  RN 14
+x20                  RN 10
+x21                  RN 12
+x22                  RN 14
+                     
+x01x00               RN 8  
+x02x01               RN 10 
+x11x10               RN 9  
+x12x11               RN 11 
+x21x20               RN 10 
+x22x21               RN 12 
+                     
+OutRow00             RN 12
+OutRow01             RN 14
+OutRow10             RN 10
+OutRow11             RN 12
+                     
+OutRow0100           RN 12
+OutRow1110           RN 12
+                     
+;//-----------------------------------------------------------------------------------------------
+;// armVCM4P10_Interpolate_Chroma_asm starts
+;//-----------------------------------------------------------------------------------------------
+        
+        ;// Write function header
+        M_START armVCM4P10_Interpolate_Chroma, r11
+        
+        ;// Define stack arguments
+        M_ARG   Width,      4
+        M_ARG   Height,     4
+        M_ARG   Dx,         4
+        M_ARG   Dy,         4
+        
+        ;// Load argument from the stack
+        ;// M_STALL ARM1136JS=4
+        
+        M_LDR   iWidth,  Width  
+        M_LDR   iHeight, Height  
+        M_LDR   dx,      Dx 
+        M_LDR   dy,      Dy
+        
+        ;// EightMinusdx = 8 - dx
+        ;// EightMinusdy = 8 - dy
+        
+        ;// ACoeff = EightMinusdx * EightMinusdy
+        ;// BCoeff = dx * EightMinusdy
+        ;// CCoeff = EightMinusdx * dy
+        ;// DCoeff = dx * dy
+        
+        ADD     pSrc1, pSrc, iSrcStep
+        SUB     temp, iWidth, #1
+        RSB     EightMinusdx, dx, #8 
+        RSB     EightMinusdy, dy, #8
+        CMN     dx,dy
+        ADD     dxEightMinusdx, EightMinusdx, dx, LSL #16
+        ORR     iWidth, iWidth, temp, LSL #16
+        
+        ;// Packed Coeffs.
+        
+        MUL     BACoeff, dxEightMinusdx, EightMinusdy
+        MUL     DCCoeff, dxEightMinusdx, dy        
+        
+        
+        ;// Checking either of dx and dy being non-zero
+        
+        BEQ     MVIsZero
+        
+;// Pixel layout:
+;//
+;//   x00 x01 x02
+;//   x10 x11 x12
+;//   x20 x21 x22
+
+;// If fractionl mv is not (0, 0)
+        
+OuterLoopMVIsNotZero
+
+InnerLoopMVIsNotZero
+            
+                LDRB    x00, [pSrc,  #+0]                   
+                LDRB    x10, [pSrc1, #+0]                   
+                LDRB    x01, [pSrc,  #+1]                  
+                LDRB    x11, [pSrc1, #+1]                  
+                LDRB    x02, [pSrc,  #+2]!                   
+                LDRB    x12, [pSrc1, #+2]!                   
+                
+                ORR     x01x00, x00, x01, LSL #16        
+                ;// M_STALL ARM1136JS=1
+                ORR     x02x01, x01, x02, LSL #16        
+                MOV     r0x20,  #32
+                ORR     x11x10, x10, x11, LSL #16    
+                ORR     x12x11, x11, x12, LSL #16        
+                
+                SMLAD   x01x00, x01x00, BACoeff, r0x20
+                SMLAD   x02x01, x02x01, BACoeff, r0x20                
+                
+                ;// iWidth packed with MSB (top 16 bits) 
+                ;// as inner loop counter value i.e 
+                ;// (iWidth -1) and LSB (lower 16 bits)
+                ;// as original width
+                
+                SUBS    iWidth, iWidth, #1<<17
+                
+                SMLAD   OutRow00, x11x10, DCCoeff, x01x00            
+                SMLAD   OutRow01, x12x11, DCCoeff, x02x01            
+                
+                RSB     pSrc2, pSrc, pSrc1, LSL #1
+                
+                MOV     OutRow00, OutRow00, LSR #6
+                MOV     OutRow01, OutRow01, LSR #6
+                
+                LDRB    x20,[pSrc2, #-2]
+                
+                ORR     OutRow0100, OutRow00, OutRow01, LSL #8
+                STRH    OutRow0100, [pDst], #2
+                
+                LDRB    x21,[pSrc2, #-1]
+                LDRB    x22,[pSrc2, #+0]
+                
+                ADD     pDst1, pDst, iDstStep
+                
+                ;// M_STALL ARM1136JS=1
+                                
+                ORR     x21x20, x20, x21, LSL #16
+                ORR     x22x21, x21, x22, LSL #16     
+                
+                MOV     tmp0x20, #32
+                
+                ;// Reusing the packed data x11x10 and x12x11
+                
+                SMLAD   x11x10,  x11x10,  BACoeff, tmp0x20
+                SMLAD   x12x11,  x12x11,  BACoeff, tmp0x20
+                SMLAD   OutRow10, x21x20, DCCoeff, x11x10            
+                SMLAD   OutRow11, x22x21, DCCoeff, x12x11
+                
+                MOV     OutRow10, OutRow10, LSR #6
+                MOV     OutRow11, OutRow11, LSR #6
+                
+                ;// M_STALL ARM1136JS=1
+               
+                ORR     OutRow1110, OutRow10, OutRow11, LSL #8
+                
+                STRH    OutRow1110, [pDst1, #-2]
+                
+                BGT     InnerLoopMVIsNotZero
+                
+                SUBS    iHeight, iHeight, #2
+                ADD     iWidth, iWidth, #1<<16
+                RSB     iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1
+                SUB     iSrcStep1, pSrc1, pSrc
+                SUB     temp, iWidth, #1
+                RSB     iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1
+                ADD     pDst, pDst, iDstStepx2MinusWidth
+                ADD     pSrc1, pSrc1, iSrcStepx2MinusWidth
+                ADD     pSrc, pSrc, iSrcStepx2MinusWidth
+                ORR     iWidth, iWidth, temp, LSL #16
+                BGT     OuterLoopMVIsNotZero
+                MOV     return,  #OMX_Sts_NoErr
+                M_EXIT
+
+;// If fractionl mv is (0, 0)
+
+MVIsZero
+                ;// M_STALL ARM1136JS=4
+OuterLoopMVIsZero
+
+InnerLoopMVIsZero
+                                      
+                LDRB    pix00, [pSrc],  #+1
+                LDRB    pix01, [pSrc],  #+1
+                LDRB    pix10, [pSrc1], #+1
+                LDRB    pix11, [pSrc1], #+1
+                
+                ADD     pDst2,  pDst, iDstStep
+                SUBS    iWidth, iWidth, #1<<17                
+                
+                ORR     Out0100, pix00, pix01, LSL #8 
+                ORR     Out1110, pix10, pix11, LSL #8
+                
+                STRH    Out0100, [pDst],  #2
+                STRH    Out1110, [pDst2], #2
+                
+                BGT     InnerLoopMVIsZero
+                
+                SUBS    iHeight, iHeight, #2
+                ADD     iWidth, iWidth, #1<<16
+                RSB     iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1
+                SUB     iSrcStep1, pSrc1, pSrc
+                SUB     temp, iWidth, #1
+                RSB     iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1
+                ADD     pDst, pDst, iDstStepx2MinusWidth
+                ADD     pSrc1, pSrc1, iSrcStepx2MinusWidth
+                ADD     pSrc, pSrc, iSrcStepx2MinusWidth
+                ORR     iWidth, iWidth, temp, LSL #16
+                BGT     OuterLoopMVIsZero
+                MOV     return,  #OMX_Sts_NoErr
+                M_END
+
+        ENDIF ;// ARM1136JS
+
+        
+        END
+
+;//-----------------------------------------------------------------------------------------------
+;// armVCM4P10_Interpolate_Chroma_asm ends
+;//-----------------------------------------------------------------------------------------------
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_QuantTables_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_QuantTables_s.s
new file mode 100644
index 0000000..f962f70
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_QuantTables_s.s
@@ -0,0 +1,74 @@
+;//
+;// 
+;// File Name:  armVCM4P10_QuantTables_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;// Description:
+;// This file contains quantization tables
+;// 
+;// 
+
+         INCLUDE omxtypes_s.h
+         INCLUDE armCOMM_s.h
+     
+         
+         EXPORT armVCM4P10_MFMatrixQPModTable
+         EXPORT armVCM4P10_QPDivIntraTable
+         EXPORT armVCM4P10_QPDivPlusOneTable  
+         
+;//--------------------------------------------------------------
+;// This table contains armVCM4P10_MFMatrix [iQP % 6][0] entires,
+;// for values of iQP from 0 to 51 (inclusive). 
+;//--------------------------------------------------------------
+
+         M_TABLE armVCM4P10_MFMatrixQPModTable
+         DCW 13107, 11916, 10082, 9362, 8192, 7282
+         DCW 13107, 11916, 10082, 9362, 8192, 7282
+         DCW 13107, 11916, 10082, 9362, 8192, 7282
+         DCW 13107, 11916, 10082, 9362, 8192, 7282
+         DCW 13107, 11916, 10082, 9362, 8192, 7282
+         DCW 13107, 11916, 10082, 9362, 8192, 7282
+         DCW 13107, 11916, 10082, 9362, 8192, 7282
+         DCW 13107, 11916, 10082, 9362, 8192, 7282
+         DCW 13107, 11916, 10082, 9362, 8192, 7282
+         
+;//---------------------------------------------------------------
+;// This table contains ARM_M4P10_Q_OFFSET + 1 + (iQP / 6) values,
+;// for values of iQP from 0 to 51 (inclusive). 
+;//---------------------------------------------------------------
+
+         M_TABLE armVCM4P10_QPDivPlusOneTable
+         DCB 16, 16, 16, 16, 16, 16
+         DCB 17, 17, 17, 17, 17, 17
+         DCB 18, 18, 18, 18, 18, 18
+         DCB 19, 19, 19, 19, 19, 19
+         DCB 20, 20, 20, 20, 20, 20
+         DCB 21, 21, 21, 21, 21, 21
+         DCB 22, 22, 22, 22, 22, 22
+         DCB 23, 23, 23, 23, 23, 23
+         DCB 24, 24, 24, 24, 24, 24
+
+;//------------------------------------------------------------------
+;// This table contains (1 << QbitsPlusOne) / 3 Values (Intra case) ,
+;// for values of iQP from 0 to 51 (inclusive). 
+;//------------------------------------------------------------------
+    
+         M_TABLE armVCM4P10_QPDivIntraTable, 2
+         DCD 21845, 21845, 21845, 21845, 21845, 21845
+         DCD 43690, 43690, 43690, 43690, 43690, 43690
+         DCD 87381, 87381, 87381, 87381, 87381, 87381
+         DCD 174762, 174762, 174762, 174762, 174762, 174762
+         DCD 349525, 349525, 349525, 349525, 349525, 349525
+         DCD 699050, 699050, 699050, 699050, 699050, 699050
+         DCD 1398101, 1398101, 1398101, 1398101, 1398101, 1398101
+         DCD 2796202, 2796202, 2796202, 2796202, 2796202, 2796202
+         DCD 5592405, 5592405, 5592405, 5592405, 5592405, 5592405                
+         
+         
+         END
+         
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
new file mode 100644
index 0000000..241d188
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
@@ -0,0 +1,407 @@
+;//
+;// 
+;// File Name:  armVCM4P10_TransformResidual4x4_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+;// Description:
+;// Transform Residual 4x4 Coefficients
+;// 
+;// 
+
+        
+;// Include standard headers
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS ARM1136JS
+        
+;// Import symbols required from other files
+;// (For example tables)
+    
+        
+        
+        
+;// Set debugging level        
+;//DEBUG_ON    SETL {TRUE}
+
+
+
+;// Guarding implementation by the processor name
+    
+    IF  ARM1136JS 
+    
+;//Input Registers
+pDst                RN  0
+pSrc                RN  1
+
+;//Output Registers
+
+
+;//Local Scratch Registers
+
+;// Packed Input pixels
+in00                RN  2                   ;// Src[0] & Src[1] 
+in02                RN  3                   ;// Src[2] & Src[3]
+in10                RN  4                   ;// Src[4] & Src[5]
+in12                RN  5                   ;// Src[6] & Src[7]
+in20                RN  6                   ;// Src[8] & Src[9]
+in22                RN  7                   ;// Src[10] & Src[11]
+in30                RN  8                   ;// Src[12] & Src[13]
+in32                RN  9                   ;// Src[14] & Src[15]
+
+;// Transpose for Row operations (Rows to cols)
+trRow00             RN  2
+trRow10             RN  10
+trRow02             RN  3
+trRow12             RN  5
+trRow20             RN  11
+trRow30             RN  12
+trRow32             RN  14
+trRow22             RN  7
+
+;// Intermediate calculations
+e0                  RN  4                   
+e1                  RN  6
+e2                  RN  8
+e3                  RN  9
+constZero           RN  1
+
+;// Row operated pixels
+rowOp00             RN  2
+rowOp10             RN  10
+rowOp20             RN  11
+rowOp30             RN  12
+rowOp02             RN  3
+rowOp12             RN  5
+rowOp22             RN  7
+rowOp32             RN  14
+
+;// Transpose for colulmn operations
+trCol00             RN  2                   
+trCol02             RN  3                   
+trCol10             RN  4                   
+trCol12             RN  5                   
+trCol20             RN  6                   
+trCol22             RN  7                   
+trCol30             RN  8                   
+trCol32             RN  9  
+
+;// Intermediate calculations
+g0                  RN  10
+g1                  RN  11
+g2                  RN  12
+g3                  RN  14   
+
+;// Coloumn operated pixels
+colOp00             RN  2                   
+colOp02             RN  3                   
+colOp10             RN  4                   
+colOp12             RN  5                   
+colOp20             RN  6                   
+colOp22             RN  7                   
+colOp30             RN  8                   
+colOp32             RN  9  
+
+
+temp1               RN  10                  ;// Temporary scratch varaibles
+const1              RN  11      
+const2              RN  12
+mask                RN  14
+
+;// Output pixels
+out00               RN  2                   
+out02               RN  3                   
+out10               RN  4                   
+out12               RN  5                   
+out20               RN  6                   
+out22               RN  7                   
+out30               RN  8                   
+out32               RN  9  
+      
+       
+       
+    ;// Allocate stack memory required by the function
+        
+
+    ;// Write function header
+        M_START armVCM4P10_TransformResidual4x4,r11
+        
+        ;******************************************************************
+        ;// The strategy used in implementing the transform is as follows:*
+        ;// Load the 4x4 block into 8 registers                           *  
+        ;// Transpose the 4x4 matrix                                      *  
+        ;// Perform the row operations (on columns) using SIMD            *  
+        ;// Transpose the 4x4 result matrix                               *  
+        ;// Perform the coloumn operations                                *
+        ;// Store the 4x4 block at one go                                 *  
+        ;******************************************************************
+
+        ;// Load all the 4x4 pixels
+        
+        LDMIA   pSrc,{in00,in02,in10,in12,in20,in22,in30,in32}
+        
+        MOV       constZero,#0                                     ;// Used to right shift by 1 
+        ;LDR       constZero,=0x00000000  
+        
+        ;*****************************************************************
+        ;//
+        ;// Transpose the matrix inorder to perform row ops as coloumn ops
+        ;// Input:   in[][] = original matrix
+        ;// Output:  trRow[][]= transposed matrix
+        ;// Step1: Obtain the LL part of the transposed matrix
+        ;// Step2: Obtain the HL part
+        ;// step3: Obtain the LH part
+        ;// Step4: Obtain the HH part
+        ;//
+        ;*****************************************************************
+        
+        ;// LL 2x2 transposed matrix 
+        ;//   d0 d1 - -
+        ;//   d4 d5 - -
+        ;//   -  -  - -
+        ;//   -  -  - -
+        
+        PKHTB   trRow10,in10,in00,ASR #16               ;// [5 4] = [f5:f1]    
+        PKHBT   trRow00,in00,in10,LSL #16               ;// [1 0] = [f4:f0]  
+        
+        ;// HL 2x2 transposed matrix  
+        ;//    -   -   - -
+        ;//    -   -   - -
+        ;//    d8  d9  - -
+        ;//   d12 d13  - -
+        
+         
+         PKHTB   trRow30,in12,in02,ASR #16              ;// [13 12] = [7 3]
+         PKHBT   trRow20,in02,in12,LSL #16              ;// [9 8] = [6 2] 
+        
+        ;// LH 2x2 transposed matrix 
+        ;//   - - d2 d3 
+        ;//   - - d6 d7 
+        ;//   - - -  -
+        ;//   - - -  -
+        
+        PKHBT   trRow02,in20,in30,LSL #16               ;// [3 2] = [f12:f8]  
+        PKHTB   trRow12,in30,in20,ASR #16               ;// [7 6] = [f13:f9] 
+        
+        
+        
+         
+        ;// HH 2x2 transposed matrix  
+        ;//    - -   -   -
+        ;//    - -   -   -
+        ;//    - -  d10 d11
+        ;//    - -  d14 d15
+        
+        PKHTB   trRow32,in32,in22,ASR #16               ;// [15 14] = [15 11]
+        PKHBT   trRow22,in22,in32,LSL #16               ;// [11 10] = [14 10]
+       
+        
+        ;**************************************** 
+        ;// Row Operations (Performed on columns)
+        ;**************************************** 
+        
+        
+        ;// SIMD operations on first two columns(two rows of the original matrix)
+        
+        
+        SADD16      e0, trRow00,trRow20                   ;//  e0 = d0 + d2 
+        SSUB16    e1, trRow00,trRow20                   ;//  e1 = d0 - d2  
+        SHADD16   e2, trRow10,constZero                 ;// (f1>>1) constZero is a register holding 0
+        SHADD16   e3, trRow30,constZero                 ;//  avoid pipeline stalls for e2 and e3
+        SSUB16    e2, e2, trRow30                       ;//  e2 = (d1>>1) - d3  
+        SADD16    e3, e3, trRow10                       ;//  e3 = d1 + (d3>>1)  
+        SADD16    rowOp00, e0, e3                       ;//  f0 = e0 + e3  
+        SADD16    rowOp10, e1, e2                       ;//  f1 = e1 + e2  
+        SSUB16    rowOp20, e1, e2                       ;//  f2 = e1 - e2  
+        SSUB16    rowOp30, e0, e3                       ;//  f3 = e0 - e3
+        
+        ;// SIMD operations on next two columns(next two rows of the original matrix)
+        
+        SADD16      e0, trRow02,trRow22
+        SSUB16    e1, trRow02,trRow22
+        SHADD16   e2, trRow12,constZero                 ;//(f1>>1) constZero is a register holding 0
+        SHADD16   e3, trRow32,constZero
+        SSUB16    e2, e2, trRow32
+        SADD16    e3, e3, trRow12
+        SADD16    rowOp02, e0, e3
+        SADD16    rowOp12, e1, e2
+        SSUB16    rowOp22, e1, e2
+        SSUB16    rowOp32, e0, e3
+        
+        
+        ;*****************************************************************
+        ;// Transpose the resultant matrix
+        ;// Input:  rowOp[][]
+        ;// Output: trCol[][] 
+        ;*****************************************************************
+        
+        ;// LL 2x2 transposed matrix 
+        ;//   d0 d1 - -
+        ;//   d4 d5 - -
+        ;//   -  -  - -
+        ;//   -  -  - -
+        
+        PKHTB   trCol10,rowOp10,rowOp00,ASR #16           ;// [5 4] = [f5:f1]
+        PKHBT   trCol00,rowOp00,rowOp10,LSL #16           ;// [1 0] = [f4:f0]  
+        
+        ;// HL 2x2 transposed matrix  
+        ;//    -   -   - -
+        ;//    -   -   - -
+        ;//    d8  d9  - -
+        ;//   d12 d13  - -
+        
+         
+         PKHTB   trCol30,rowOp12,rowOp02,ASR #16          ;// [13 12] = [7 3]
+         PKHBT   trCol20,rowOp02,rowOp12,LSL #16          ;// [9 8] = [6 2] 
+        
+        ;// LH 2x2 transposed matrix 
+        ;//   - - d2 d3 
+        ;//   - - d6 d7 
+        ;//   - - -  -
+        ;//   - - -  -
+        
+        PKHBT   trCol02,rowOp20,rowOp30,LSL #16           ;// [3 2] = [f12:f8]  
+        PKHTB   trCol12,rowOp30,rowOp20,ASR #16           ;// [7 6] = [f13:f9] 
+        
+        
+        
+         
+        ;// HH 2x2 transposed matrix  
+        ;//    - -   -   -
+        ;//    - -   -   -
+        ;//    - -  d10 d11
+        ;//    - -  d14 d15
+        
+        PKHTB   trCol32,rowOp32,rowOp22,ASR #16            ;// [15 14] = [15 11]
+        PKHBT   trCol22,rowOp22,rowOp32,LSL #16            ;// [11 10] = [14 10]
+       
+        
+        ;******************************* 
+        ;// Coloumn Operations 
+        ;******************************* 
+        
+        
+        ;// SIMD operations on first two columns
+        
+          
+        SADD16      g0, trCol00,trCol20
+        SSUB16    g1, trCol00,trCol20
+        SHADD16   g2, trCol10,constZero                     ;// (f1>>1) constZero is a register holding 0
+        SHADD16   g3, trCol30,constZero
+        SSUB16    g2, g2, trCol30
+        SADD16    g3, g3, trCol10
+        SADD16    colOp00, g0, g3
+        SADD16    colOp10, g1, g2
+        SSUB16    colOp20, g1, g2
+        SSUB16    colOp30, g0, g3
+        
+        ;// SIMD operations on next two columns
+        
+        SADD16      g0, trCol02,trCol22
+        SSUB16    g1, trCol02,trCol22
+        SHADD16   g2, trCol12,constZero                     ;// (f1>>1) constZero is a register holding 0
+        SHADD16   g3, trCol32,constZero
+        SSUB16    g2, g2, trCol32
+        SADD16    g3, g3, trCol12
+        SADD16    colOp02, g0, g3
+        SADD16    colOp12, g1, g2
+        SSUB16    colOp22, g1, g2
+        SSUB16    colOp32, g0, g3
+        
+        
+             
+                  
+             
+        ;************************************************
+        ;// Calculate final value (colOp[i][j] + 32)>>6
+        ;************************************************
+        
+        ;// const1: Serves dual purpose 
+        ;// (1) Add #32 to both the lower and higher 16bits of the SIMD result 
+        ;// (2) Convert the lower 16 bit value to an unsigned number (Add 32768)
+        
+        LDR     const1, =0x00208020             
+        
+        LDR     mask, =0xffff03ff                       ;// Used to mask the down shifted 6 bits  
+        
+        ;// const2(#512): used to convert the lower 16bit number back to signed value 
+      
+        MOV     const2,#0x200                           ;// const2 = 2^9
+        
+        ;// First Row 
+        
+        SADD16    colOp00, colOp00, const1
+        SADD16    colOp02, colOp02, const1
+        AND     colOp00, mask, colOp00, ASR #6
+        AND     colOp02, mask, colOp02, ASR #6
+        SSUB16  out00,colOp00,const2
+        SSUB16  out02,colOp02,const2    
+        
+
+        ;// Second Row
+        
+        SADD16    colOp10, colOp10, const1
+        SADD16    colOp12, colOp12, const1
+        AND     colOp10, mask, colOp10, ASR #6
+        AND     colOp12, mask, colOp12, ASR #6
+        SSUB16  out10,colOp10,const2
+        SSUB16  out12,colOp12,const2    
+        
+        
+        ;// Third Row
+        
+        SADD16    colOp20, colOp20, const1
+        SADD16    colOp22, colOp22, const1
+        AND     colOp20, mask, colOp20, ASR #6
+        AND     colOp22, mask, colOp22, ASR #6
+        SSUB16  out20,colOp20,const2
+        SSUB16  out22,colOp22,const2
+        
+        
+        ;// Fourth Row   
+        
+        SADD16    colOp30, colOp30, const1
+        SADD16    colOp32, colOp32, const1
+        AND     colOp30, mask, colOp30, ASR #6
+        AND     colOp32, mask, colOp32, ASR #6
+        SSUB16  out30,colOp30,const2
+        SSUB16  out32,colOp32,const2
+        
+        
+        
+                
+        ;***************************
+        ;// Store all the 4x4 pixels
+        ;***************************
+        
+        STMIA   pDst,{out00,out02,out10,out12,out20,out22,out30,out32}
+        
+                               
+       
+        ;// Set return value
+        
+End                
+
+        
+        ;// Write function tail
+        M_END
+        
+    ENDIF                                                           ;//ARM1136JS    
+    
+    
+
+
+
+
+
+;// Guarding implementation by the processor name
+    
+            
+    END
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
new file mode 100644
index 0000000..ad16d9cb0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
@@ -0,0 +1,92 @@
+;//
+;// 
+;// File Name:  armVCM4P10_UnpackBlock4x4_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+;// Define the processor variants supported by this file
+
+        M_VARIANTS ARM1136JS
+        
+                       
+        IF ARM1136JS
+        
+;//--------------------------------------
+;// Input Arguments and their scope/usage
+;//--------------------------------------
+ppSrc           RN 0    ;// Persistent variable
+pDst            RN 1    ;// Persistent variable
+
+;//--------------------------------
+;// Variables and their scope/usage
+;//--------------------------------
+pSrc            RN 2    ;// Persistent variables
+Flag            RN 3    
+Value           RN 4    
+Value2          RN 5    
+strOffset       RN 6    
+cstOffset       RN 7    
+
+        
+        M_START armVCM4P10_UnpackBlock4x4, r7
+        
+        LDR     pSrc, [ppSrc]                       ;// Load pSrc
+        MOV     cstOffset, #31                      ;// To be used in the loop, to compute offset
+        
+        ;//-----------------------------------------------------------------------
+        ; Firstly, fill all the coefficient values on the <pDst> buffer by zero
+        ;//-----------------------------------------------------------------------
+        
+        MOV      Value,  #0                         ;// Initialize the zero value
+        MOV      Value2, #0                         ;// Initialize the zero value
+        LDRB     Flag,  [pSrc], #1                  ;// Preload <Flag> before <unpackLoop>
+        
+        STRD     Value, [pDst, #0]                  ;// pDst[0]  = pDst[1]  = pDst[2]  = pDst[3]  = 0
+        STRD     Value, [pDst, #8]                  ;// pDst[4]  = pDst[5]  = pDst[6]  = pDst[7]  = 0
+        STRD     Value, [pDst, #16]                 ;// pDst[8]  = pDst[9]  = pDst[10] = pDst[11] = 0
+        STRD     Value, [pDst, #24]                 ;// pDst[12] = pDst[13] = pDst[14] = pDst[15] = 0
+        
+        ;//----------------------------------------------------------------------------
+        ;// The loop below parses and unpacks the input stream. The C-model has 
+        ;// a somewhat complicated logic for sign extension.  But in the v6 version,
+        ;// that can be easily taken care by loading the data from <pSrc> stream as 
+        ;// SIGNED byte/halfword. So, based on the first TST instruction, 8-bits or 
+        ;// 16-bits are read.
+        ;//
+        ;// Next, to compute the offset, where the unpacked value needs to be stored,
+        ;// we modify the computation to perform [(Flag & 15) < 1] as [(Flag < 1) & 31]
+        ;// This results in a saving of one cycle.
+        ;//----------------------------------------------------------------------------
+        
+unpackLoop
+        TST      Flag,  #0x10                        ;// Computing (Flag & 0x10)
+        LDRSBNE  Value2,[pSrc,#1]                    ;// Load byte wise to avoid unaligned access   
+        LDRBNE   Value, [pSrc], #2                   
+        AND      strOffset, cstOffset, Flag, LSL #1  ;// strOffset = (Flag & 15) < 1;
+        LDRSBEQ  Value, [pSrc], #1                   ;// Value = (OMX_U8)  *pSrc++
+        ORRNE    Value,Value,Value2, LSL #8          ;// Value = (OMX_U16) *pSrc++
+        
+        TST      Flag,  #0x20                        ;// Computing (Flag & 0x20) to check, if we're done
+        LDRBEQ   Flag,  [pSrc], #1                   ;// Flag  = (OMX_U8) *pSrc++, for next iteration
+        STRH     Value, [pDst, strOffset]            ;// Store <Value> at offset <strOffset>
+        BEQ      unpackLoop                          ;// Branch to the loop beginning
+        
+        STR      pSrc, [ppSrc]                       ;// Update the bitstream pointer
+        M_END
+    
+    ENDIF
+    
+    
+    
+    END
+    
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
new file mode 100644
index 0000000..c2e6b601
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
@@ -0,0 +1,88 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_DeblockChroma_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 intra chroma deblock
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_DeblockChroma_I
+ *
+ * Description:
+ * Performs deblocking filtering on all edges of the chroma macroblock (16x16).
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	pSrcDst         pointer to the input macroblock. Must be 8-byte aligned.
+ * [in]	srcdstStep      Step of the arrays
+ * [in]	pAlpha          pointer to a 2x2 array of alpha thresholds, organized as follows: { external
+ *                          vertical edge, internal  vertical edge, external
+ *                         horizontal edge, internal horizontal edge }
+ * [in]	pBeta			pointer to a 2x2 array of beta thresholds, organized as follows: { external
+ *                              vertical edge, internal vertical edge, external  horizontal edge,
+ *                              internal  horizontal edge }
+ * [in]	pThresholds		AArray of size  8x2 of Thresholds (TC0) (values for the left or
+ *                               above edge of each 4x2 or 2x4 block, arranged in  vertical block order
+ *                               and then in  horizontal block order)
+ * [in]	pBS				array of size 16x2 of BS parameters (arranged in scan block order for vertical edges and then horizontal edges);
+ *                         valid in the range [0,4] with the following restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^1]== 4.  Must be 4-byte aligned.
+ * [out]	pSrcDst		pointer to filtered output macroblock
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ *   - Either of the pointers in pSrcDst, pAlpha, pBeta, pTresholds, or pBS is NULL.
+ *   - pSrcDst is not 8-byte aligned.
+ *   - either pThresholds or pBS is not 4-byte aligned.
+ *   - pBS is out of range, i.e., one of the following conditions is true: pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && pBS[i^1]!=4) for 0<=i<=3.
+ *   - srcdstStep is not a multiple of 8.
+ *
+ */
+OMXResult omxVCM4P10_DeblockChroma_I(
+	OMX_U8* pSrcDst, 
+	OMX_S32 srcdstStep, 
+	const OMX_U8* pAlpha, 
+	const OMX_U8* pBeta, 
+	const OMX_U8* pThresholds,
+    const OMX_U8 *pBS
+)
+{
+    OMXResult errorCode;
+    
+    armRetArgErrIf(pSrcDst == NULL,                 OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot8ByteAligned(pSrcDst),     OMX_Sts_BadArgErr);
+    armRetArgErrIf(srcdstStep & 7,                  OMX_Sts_BadArgErr);
+    armRetArgErrIf(pAlpha == NULL,                  OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBeta == NULL,                   OMX_Sts_BadArgErr);
+    armRetArgErrIf(pThresholds == NULL,             OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBS == NULL,                     OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pBS),         OMX_Sts_BadArgErr);
+
+    errorCode = omxVCM4P10_FilterDeblockingChroma_VerEdge_I(
+        pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS);
+
+    armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode)
+    
+    errorCode = omxVCM4P10_FilterDeblockingChroma_HorEdge_I(
+        pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+8, pBS+16);
+
+    return errorCode;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
new file mode 100644
index 0000000..60238622
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
@@ -0,0 +1,91 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_DeblockLuma_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 luma deblock
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+ 
+
+/**
+ * Function: omxVCM4P10_DeblockLuma_I
+ *
+ * Description:
+ * This function performs deblock filtering the horizontal and vertical edges of a luma macroblock
+ *(16x16).
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	pSrcDst         pointer to the input macroblock. Must be 8-byte aligned.
+ * [in]	srcdstStep      image width
+ * [in]	pAlpha          pointer to a 2x2 table of alpha thresholds, organized as follows: { external
+ *                             vertical edge, internal vertical edge, external horizontal
+ *                             edge, internal horizontal edge }
+ * [in]	pBeta			pointer to a 2x2 table of beta thresholds, organized as follows: { external
+ *                              vertical edge, internal vertical edge, external  horizontal edge,
+ *                              internal  horizontal edge }
+ * [in]	pThresholds		pointer to a 16x2 table of threshold (TC0), organized as follows: { values for
+ *                              the  left or above edge of each 4x4 block, arranged in  vertical block order
+ *                              and then in horizontal block order)
+ * [in]	pBS				 pointer to a 16x2 table of BS parameters arranged in scan block order for vertical edges and then horizontal edges;
+ *                               valid in the range [0,4] with the following restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^1]== 4.  Must be 4-byte aligned.
+ * [out]	pSrcDst		pointer to filtered output macroblock.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ *    - Either of the pointers in pSrcDst, pAlpha, pBeta, pTresholds or pBS is NULL.
+ *    - pSrcDst is not 8-byte aligned.
+ *    - srcdstStep is not a multiple of 8
+ *    - pBS is out of range, i.e., one of the following conditions is true: pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && pBS[i^1]!=4) for 0<=i<=3.
+.
+ *
+ */
+
+OMXResult omxVCM4P10_DeblockLuma_I(
+	OMX_U8* pSrcDst, 
+	OMX_S32 srcdstStep, 
+	const OMX_U8* pAlpha, 
+	const OMX_U8* pBeta, 
+	const OMX_U8* pThresholds, 
+	const OMX_U8 *pBS
+)
+{
+    OMXResult errorCode;
+    
+    armRetArgErrIf(pSrcDst == NULL,             OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(srcdstStep & 7,              OMX_Sts_BadArgErr);    
+    armRetArgErrIf(pAlpha == NULL,              OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBeta == NULL,               OMX_Sts_BadArgErr);
+    armRetArgErrIf(pThresholds == NULL,         OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBS == NULL,                     OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pBS),         OMX_Sts_BadArgErr);
+
+    errorCode = omxVCM4P10_FilterDeblockingLuma_VerEdge_I(
+        pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS);
+
+    armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode)
+    
+    errorCode = omxVCM4P10_FilterDeblockingLuma_HorEdge_I(
+        pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+16, pBS+16);
+
+    return errorCode;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
new file mode 100644
index 0000000..a19f277
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
@@ -0,0 +1,62 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 decode coefficients module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse raster scan for 2x2 block of 
+ * ChromaDCLevel. The decoded coefficients in packed position-coefficient 
+ * buffer are stored in increasing raster scan order, namely position order.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		Double pointer to current byte in bit stream
+ *								buffer
+ * [in]	pOffset			Pointer to current bit position in the byte 
+ *								pointed to by *ppBitStream
+ * [out]	ppBitStream		*ppBitStream is updated after each block is decoded
+ * [out]	pOffset			*pOffset is updated after each block is decoded
+ * [out]	pNumCoeff		Pointer to the number of nonzero coefficients
+ *								in this block
+ * [out]	ppPosCoefbuf	Double pointer to destination residual
+ *								coefficient-position pair buffer
+ *
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (
+     const OMX_U8** ppBitStream,
+     OMX_S32* pOffset,
+     OMX_U8* pNumCoeff,
+     OMX_U8** ppPosCoefbuf        
+ )
+
+{
+    return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff,
+                                         ppPosCoefbuf, 17, 4);
+
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
new file mode 100644
index 0000000..99bb4ce
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
@@ -0,0 +1,68 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 decode coefficients module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_DecodeCoeffsToPairCAVLC
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of 
+ * Intra16x16DCLevel, Intra16x16ACLevel,LumaLevel, and ChromaACLevel. 
+ * Inverse field scan is not supported. The decoded coefficients in packed 
+ * position-coefficient buffer are stored in increasing zigzag order instead 
+ * of position order.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		Double pointer to current byte in bit stream buffer
+ * [in]	pOffset			Pointer to current bit position in the byte pointed
+ *								to by *ppBitStream
+ * [in]	sMaxNumCoeff	Maximum number of non-zero coefficients in current
+ *								block
+ * [in]	sVLCSelect		VLC table selector, obtained from number of non-zero
+ *								AC coefficients of above and left 4x4 blocks. It is 
+ *								equivalent to the variable nC described in H.264 standard 
+ *								table 9-5, except its value can¡¯t be less than zero.
+ * [out]	ppBitStream		*ppBitStream is updated after each block is decoded
+ * [out]	pOffset			*pOffset is updated after each block is decoded
+ * [out]	pNumCoeff		Pointer to the number of nonzero coefficients in
+ *								this block
+ * [out]	ppPosCoefbuf	Double pointer to destination residual
+ *								coefficient-position pair buffer
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC(
+     const OMX_U8** ppBitStream,
+     OMX_S32* pOffset,
+     OMX_U8* pNumCoeff,
+     OMX_U8**ppPosCoefbuf,
+     OMX_INT sVLCSelect,
+     OMX_INT sMaxNumCoeff        
+ )
+{
+    return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff,
+                                         ppPosCoefbuf, sVLCSelect, sMaxNumCoeff);
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s
new file mode 100644
index 0000000..2b71486
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s
@@ -0,0 +1,480 @@
+;//
+;// (c) Copyright 2007 ARM Limited. All Rights Reserved.
+;//
+;// Description:
+;// H.264 inverse quantize and transform module
+;// 
+;// 
+
+        
+
+;// Include standard headers
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+;// Import symbols required from other files
+;// (For example tables)
+    
+        IMPORT armVCM4P10_UnpackBlock4x4
+        IMPORT armVCM4P10_TransformResidual4x4
+        IMPORT armVCM4P10_QPDivTable
+        IMPORT armVCM4P10_VMatrixU16
+        IMPORT armVCM4P10_QPModuloTable 
+        
+    M_VARIANTS ARM1136JS, ARM1136JS_U
+        
+;// Set debugging level        
+;//DEBUG_ON    SETL {TRUE}
+
+
+;// Static Function: armVCM4P10_DequantLumaAC4x4
+
+;// Guarding implementation by the processor name
+    
+    IF  ARM1136JS 
+    
+;//Input Registers
+pSrcDst       RN  0
+QP            RN  1
+
+
+;//Output Registers
+
+
+;//Local Scratch Registers
+pQPdiv          RN  4
+pQPmod          RN  5
+pVRow           RN  2
+QPmod           RN  6
+shift           RN  3
+rowLuma01       RN  1
+rowLuma23       RN  4
+
+SrcDst00        RN  5
+SrcDst02        RN  6
+SrcDst10        RN  7
+SrcDst12        RN  8
+SrcDst20        RN  9
+SrcDst22        RN  10
+SrcDst30        RN  11
+SrcDst32        RN  12
+
+temp1           RN  2
+temp2           RN  3
+temp3           RN  14
+    
+    
+        ;// Allocate stack memory required by the function
+        
+        ;// Write function header
+        M_START armVCM4P10_DequantLumaAC4x4,r11
+         
+        LDR    pQPmod,=armVCM4P10_QPModuloTable
+        LDR    pQPdiv,=armVCM4P10_QPDivTable        
+        LDR    pVRow,=armVCM4P10_VMatrixU16
+         
+        LDRSB  QPmod,[pQPmod,QP]                    ;// (QP%6) * 6
+        LDRSB  shift,[pQPdiv,QP]                    ;// Shift = QP / 6
+                
+        LDRH    rowLuma01,[pVRow,QPmod]!             ;// rowLuma01 = [00|0a]
+        LDRH    temp3,[pVRow,#2]                     ;// temp3     = [00|0b]   
+        LDRH    rowLuma23,[pVRow,#4]                 ;// rowLuma23 = [00|0c] 
+        ORR     rowLuma01,rowLuma01,temp3,LSL #16    ;// rowLuma01 = [0b|0a]   
+        
+        ;// Load all the 16 'src' values
+        LDMIA   pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32}
+        
+        
+        ;//*********************************************************************************************
+        ;//
+        ;// 'Shift' ranges between [0,8] 
+        ;// So we can shift the packed rowLuma values [0b|0a] with a single LSL operation
+        ;//
+        ;//*********************************************************************************************
+        
+        LSL    rowLuma01,rowLuma01,shift
+        LSL    rowLuma23,rowLuma23,shift
+        
+        
+        ;//**********************************************************************************************
+        ;//
+        ;// The idea is to unroll the Loop completely
+        ;// All the 16 src values are loaded at once into 8 registers : SrcDst<y><x> (above)
+        ;// 0<= armVCM4P10_PosToVCol4x4[i] <=2 for any 'i<16' 
+        ;// So the only values of pVRow[i] that need to be loaded are for i=0,1,2
+        ;// These 3 values are loaded into rowLuma01 and rowLuma23 (above)
+        ;// We first calculate pVRow[armVCM4P10_PosToVCol4x4[i]]) << Shift which fits into 16 bits (above)
+        ;// Then the product pSrcDst[i] * (pVRow[armVCM4P10_PosToVCol4x4[i]] << Shift) is calculated
+        ;// Here we interleave the PKHBT operations for various rows  to avoide pipeline stalls
+        ;// 
+        ;// We then pack the two 16 bit multiplication result into a word and store at one go
+        ;//
+        ;//**********************************************************************************************
+        
+        
+        ;// Row 1
+        
+        
+        SMULTB  temp1,SrcDst00,rowLuma23                    ;// pSrcDst[1] * (pVRow[2]<<Shift) 
+        SMULBB  SrcDst00,SrcDst00,rowLuma01                 ;// pSrcDst[0] * (pVRow[0]<<Shift)  
+        
+        SMULTB  temp2,SrcDst02,rowLuma23                    ;// pSrcDst[3] * (pVRow[2]<<Shift) 
+        SMULBB  SrcDst02,SrcDst02,rowLuma01                 ;// pSrcDst[2] * (pVRow[0]<<Shift)
+        
+        PKHBT   SrcDst00,SrcDst00,temp1,LSL #16             ;// Pack the first two product values
+        
+                
+        ;// Row 2
+        SMULTT  temp1,SrcDst10,rowLuma01                    ;// pSrcDst[5] * (pVRow[1]<<Shift)
+        SMULBB  SrcDst10,SrcDst10,rowLuma23                 ;// pSrcDst[4] * (pVRow[2]<<Shift)
+        
+        PKHBT   SrcDst02,SrcDst02,temp2,LSL #16             ;// Pack the next two product values
+        SMULTT  temp2,SrcDst12,rowLuma01                    ;// pSrcDst[7] * (pVRow[1]<<Shift)
+        SMULBB  SrcDst12,SrcDst12,rowLuma23                    ;// pSrcDst[6] * (pVRow[2]<<Shift)
+        
+        PKHBT   SrcDst10,SrcDst10,temp1,LSL #16             ;// Pack the next two product values
+        
+               
+        ;// Row 3    
+        
+        SMULTB  temp1,SrcDst20,rowLuma23                    ;// pSrcDst[9] * (pVRow[2]<<Shift)         
+        SMULBB  SrcDst20,SrcDst20,rowLuma01                    ;// pSrcDst[8] * (pVRow[0]<<Shift)  
+       
+        PKHBT   SrcDst12,SrcDst12,temp2,LSL #16               ;// Pack the next two product values
+        SMULTB  temp2,SrcDst22,rowLuma23                    ;// pSrcDst[11] * (pVRow[2]<<Shift) 
+        SMULBB  SrcDst22,SrcDst22,rowLuma01                    ;// pSrcDst[10] * (pVRow[0]<<Shift)
+                                                            
+        PKHBT   SrcDst20,SrcDst20,temp1,LSL #16             ;// Pack the next two product values
+        
+        
+                        
+        ;// Row 4   
+        
+        SMULTT  temp1,SrcDst30,rowLuma01                    ;// pSrcDst[13] * (pVRow[1]<<Shift)
+        SMULBB  SrcDst30,SrcDst30,rowLuma23                    ;// pSrcDst[12] * (pVRow[2]<<Shift)
+        
+        SMULTT  temp3,SrcDst32,rowLuma01                    ;// pSrcDst[15] * (pVRow[1]<<Shift)
+        SMULBB  SrcDst32,SrcDst32,rowLuma23                    ;// pSrcDst[14] * (pVRow[2]<<Shift)
+       
+        PKHBT   SrcDst22,SrcDst22,temp2,LSL #16             ;// Pack the remaining product values
+        PKHBT   SrcDst30,SrcDst30,temp1,LSL #16
+        PKHBT   SrcDst32,SrcDst32,temp3,LSL #16
+        
+        
+        STMIA   pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32}
+        
+        
+        ;// Set return value
+          
+           
+      
+        ;// Write function tail
+        M_END
+        
+    ENDIF                                                    ;//ARM1136JS        
+ 
+
+;// Guarding implementation by the processor name
+    
+    IF  ARM1136JS_U
+    
+;//Input Registers
+pSrcDst       RN  0
+QP            RN  1
+
+
+;//Output Registers
+
+
+;//Local Scratch Registers
+pQPdiv          RN  4
+pQPmod          RN  5
+pVRow           RN  2
+QPmod           RN  6
+shift           RN  3
+rowLuma01       RN  1
+rowLuma23       RN  4
+
+SrcDst00        RN  5
+SrcDst02        RN  6
+SrcDst10        RN  7
+SrcDst12        RN  8
+SrcDst20        RN  9
+SrcDst22        RN  10
+SrcDst30        RN  11
+SrcDst32        RN  12
+
+temp1           RN  2
+temp2           RN  3
+temp3           RN  14
+    
+    
+        ;// Allocate stack memory required by the function
+        
+        ;// Write function header
+        M_START armVCM4P10_DequantLumaAC4x4,r11
+         
+        LDR    pQPmod,=armVCM4P10_QPModuloTable
+        LDR    pQPdiv,=armVCM4P10_QPDivTable        
+        LDR    pVRow,=armVCM4P10_VMatrixU16
+         
+        LDRSB  QPmod,[pQPmod,QP]                    ;// (QP%6) * 6
+        LDRSB  shift,[pQPdiv,QP]                    ;// Shift = QP / 6
+                
+        LDR    rowLuma01,[pVRow,QPmod]!             ;// rowLuma01 = [0b|0a]
+        LDR    rowLuma23,[pVRow,#4]                 ;// rowLuma23 = [0d|0c]    
+
+        ;// Load all the 16 'src' values
+        LDMIA   pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32}
+        
+        
+        ;//*********************************************************************************************
+        ;//
+        ;// 'Shift' ranges between [0,8] 
+        ;// So we can shift the packed rowLuma values [0b|0a] with a single LSL operation
+        ;//
+        ;//*********************************************************************************************
+        
+        LSL    rowLuma01,rowLuma01,shift
+        LSL    rowLuma23,rowLuma23,shift
+        
+        
+        ;//**********************************************************************************************
+        ;//
+        ;// The idea is to unroll the Loop completely
+        ;// All the 16 src values are loaded at once into 8 registers : SrcDst<y><x> (above)
+        ;// 0<= armVCM4P10_PosToVCol4x4[i] <=2 for any 'i<16' 
+        ;// So the only values of pVRow[i] that need to be loaded are for i=0,1,2
+        ;// These 3 values are loaded into rowLuma01 and rowLuma23 (above)
+        ;// We first calculate pVRow[armVCM4P10_PosToVCol4x4[i]]) << Shift which fits into 16 bits (above)
+        ;// Then the product pSrcDst[i] * (pVRow[armVCM4P10_PosToVCol4x4[i]] << Shift) is calculated
+        ;// Here we interleave the PKHBT operations for various rows  to avoide pipeline stalls
+        ;// 
+        ;// We then pack the two 16 bit multiplication result into a word and store at one go
+        ;//
+        ;//**********************************************************************************************
+        
+        
+        ;// Row 1
+        
+        
+        SMULTB  temp1,SrcDst00,rowLuma23                    ;// pSrcDst[1] * (pVRow[2]<<Shift) 
+        SMULBB  SrcDst00,SrcDst00,rowLuma01                 ;// pSrcDst[0] * (pVRow[0]<<Shift)  
+        
+        SMULTB  temp2,SrcDst02,rowLuma23                    ;// pSrcDst[3] * (pVRow[2]<<Shift) 
+        SMULBB  SrcDst02,SrcDst02,rowLuma01                 ;// pSrcDst[2] * (pVRow[0]<<Shift)
+        
+        PKHBT   SrcDst00,SrcDst00,temp1,LSL #16             ;// Pack the first two product values
+        
+                
+        ;// Row 2
+        SMULTT  temp1,SrcDst10,rowLuma01                    ;// pSrcDst[5] * (pVRow[1]<<Shift)
+        SMULBB  SrcDst10,SrcDst10,rowLuma23                 ;// pSrcDst[4] * (pVRow[2]<<Shift)
+        
+        PKHBT   SrcDst02,SrcDst02,temp2,LSL #16             ;// Pack the next two product values
+        SMULTT  temp2,SrcDst12,rowLuma01                    ;// pSrcDst[7] * (pVRow[1]<<Shift)
+        SMULBB  SrcDst12,SrcDst12,rowLuma23                    ;// pSrcDst[6] * (pVRow[2]<<Shift)
+        
+        PKHBT   SrcDst10,SrcDst10,temp1,LSL #16             ;// Pack the next two product values
+        
+               
+        ;// Row 3    
+        
+        SMULTB  temp1,SrcDst20,rowLuma23                    ;// pSrcDst[9] * (pVRow[2]<<Shift)         
+        SMULBB  SrcDst20,SrcDst20,rowLuma01                    ;// pSrcDst[8] * (pVRow[0]<<Shift)  
+       
+        PKHBT   SrcDst12,SrcDst12,temp2,LSL #16               ;// Pack the next two product values
+        SMULTB  temp2,SrcDst22,rowLuma23                    ;// pSrcDst[11] * (pVRow[2]<<Shift) 
+        SMULBB  SrcDst22,SrcDst22,rowLuma01                    ;// pSrcDst[10] * (pVRow[0]<<Shift)
+                                                            
+        PKHBT   SrcDst20,SrcDst20,temp1,LSL #16             ;// Pack the next two product values
+        
+        
+                        
+        ;// Row 4   
+        
+        SMULTT  temp1,SrcDst30,rowLuma01                    ;// pSrcDst[13] * (pVRow[1]<<Shift)
+        SMULBB  SrcDst30,SrcDst30,rowLuma23                    ;// pSrcDst[12] * (pVRow[2]<<Shift)
+        
+        SMULTT  temp3,SrcDst32,rowLuma01                    ;// pSrcDst[15] * (pVRow[1]<<Shift)
+        SMULBB  SrcDst32,SrcDst32,rowLuma23                    ;// pSrcDst[14] * (pVRow[2]<<Shift)
+       
+        PKHBT   SrcDst22,SrcDst22,temp2,LSL #16             ;// Pack the remaining product values
+        PKHBT   SrcDst30,SrcDst30,temp1,LSL #16
+        PKHBT   SrcDst32,SrcDst32,temp3,LSL #16
+        
+        
+        STMIA   pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32}
+        
+        
+        ;// Set return value
+          
+           
+      
+        ;// Write function tail
+        M_END
+        
+    ENDIF                                                    ;//ARM1136JS_U        
+
+
+
+
+
+;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd            
+    
+;// Guarding implementation by the processor name
+    
+    IF  ARM1136JS
+    
+;//Input Registers
+ppSrc       RN  0
+pPred       RN  1
+pDC         RN  2
+pDst        RN  3
+   
+
+;//Output Registers
+result      RN  0
+
+;//Local Scratch Registers
+pDelta      RN  4
+pDeltaTmp   RN  6
+AC          RN  5                   ;//Load from stack
+pPredTemp   RN  7
+pDCTemp     RN  8
+pDstTemp    RN  9
+pDeltaArg1  RN  1
+pDeltaArg0  RN  0
+QP          RN  1                   ;//Load from stack
+DCval       RN  10  
+DCvalCopy   RN  11
+predstep    RN  1
+dstStep     RN  10
+ycounter    RN  0
+PredVal1    RN  3
+PredVal2    RN  5
+DeltaVal1   RN  2
+DeltaVal2   RN  11
+PredVal     RN  8
+tmpDeltaVal RN  6
+sum1        RN  12
+sum2        RN  14
+    
+    
+           
+    ;// Allocate stack memory required by the function
+        M_ALLOC8 pBuffer, 32
+               
+
+    ;// Write function header
+        M_START omxVCM4P10_DequantTransformResidualFromPairAndAdd,r11
+        
+        ;// Define stack arguments
+        M_ARG   predStepOnStack, 4
+        M_ARG   dstStepOnStack,4
+        M_ARG   QPOnStack, 4
+        M_ARG   ACOnStack,4
+  
+        
+        M_ADR   pDelta,pBuffer 
+        M_LDR   AC,ACOnStack 
+        
+         
+        ;// Save registers r1,r2,r3 before function call    
+        MOV     pPredTemp,pPred
+        MOV     pDCTemp,pDC
+        MOV     pDstTemp,pDst
+        
+        CMP     AC,#0
+        BEQ     DCcase
+        MOV     pDeltaArg1,pDelta                           ;// Set up r1 for armVCM4P10_UnpackBlock4x4
+    
+        BL      armVCM4P10_UnpackBlock4x4
+    
+        M_LDR   QP,QPOnStack                                ;// Set up r1 for DequantLumaAC4x4
+        MOV     pDeltaArg0,pDelta                           ;// Set up r0 for DequantLumaAC4x4
+
+        BL      armVCM4P10_DequantLumaAC4x4
+        
+        
+        CMP     pDCTemp,#0
+        LDRSHNE DCval,[pDCTemp]
+        MOV     pDeltaArg0,pDelta                           ;// Set up r0 for armVCM4P10_TransformResidual4x4
+        MOV     pDeltaArg1,pDelta                           ;// Set up r1 for armVCM4P10_TransformResidual4x4
+        STRHNE  DCval,[pDelta]
+        
+        BL      armVCM4P10_TransformResidual4x4
+        B       OutDCcase 
+        
+
+DCcase
+        LDRSH   DCval,[pDCTemp] 
+        ADD     DCval,DCval,#32 
+        ASR     DCval,DCval,#6
+        PKHBT   DCval,DCval,DCval,LSL #16                  ;// Duplicating the Lower halfword
+        MOV     DCvalCopy, DCval                           ;// Needed for STRD
+        STRD    DCval, [pDelta, #0]                        ;// pDelta[0]  = pDelta[1]  = pDelta[2]  = pDelta[3] = DCval
+        STRD    DCval, [pDelta, #8]                        ;// pDelta[4]  = pDelta[5]  = pDelta[6]  = pDelta[7] = DCval
+        STRD    DCval, [pDelta, #16]                       ;// pDelta[8]  = pDelta[9]  = pDelta[10] = pDelta[11] = DCval
+        STRD    DCval, [pDelta, #24]   
+        
+               
+OutDCcase      
+        M_LDR   predstep,predStepOnStack
+        M_LDR   dstStep,dstStepOnStack
+        
+        LDMIA   pDelta!,{tmpDeltaVal,DeltaVal2}             ;// Pre load
+        MOV     ycounter,#4                                 ;// Counter for the PredPlusDeltaLoop
+        LDR     PredVal,[pPredTemp]                         ;// Pre load
+
+PredPlusDeltaLoop
+        
+       
+        SUBS    ycounter,ycounter,#1
+        ADD     pPredTemp,pPredTemp,predstep                ;// Increment pPred ptr
+        
+        PKHBT   DeltaVal1,tmpDeltaVal,DeltaVal2,LSL #16     ;// Deltaval1 = [C A]   
+        PKHTB   DeltaVal2,DeltaVal2,tmpDeltaVal,ASR #16     ;// DeltaVal2 = [D B]
+        
+        UXTB16  PredVal1,PredVal                            ;// PredVal1 = [0c0a]
+        UXTB16  PredVal2,PredVal,ROR #8                     ;// PredVal2 = [0d0b]
+        
+        LDRGT   PredVal,[pPredTemp]                         ;// Pre load
+        
+        QADD16  sum2,DeltaVal2,PredVal2                     ;// Add and saturate to 16 bits
+        QADD16  sum1,DeltaVal1,PredVal1
+        
+        USAT16  sum2,#8,sum2                                ;// armClip(0,255,sum2)
+        USAT16  sum1,#8,sum1
+        
+        LDMGTIA   pDelta!,{tmpDeltaVal,DeltaVal2}           ;// Pre load
+          
+        ORR     sum1,sum1,sum2,LSL #8                       ;// sum1 = [dcba]
+        STR     sum1,[pDstTemp]
+        
+        ADD     pDstTemp,pDstTemp,dstStep                   ;// Increment pDst ptr
+        BGT     PredPlusDeltaLoop  
+        
+        
+        ;// Set return value
+        MOV     result,#OMX_Sts_NoErr
+        
+End                
+
+        
+        ;// Write function tail
+        
+        M_END
+        
+    ENDIF                                                    ;//ARM1136JS   
+    
+    
+;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd            
+    
+;// Guarding implementation by the processor name
+    
+    
+         
+            
+    END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
new file mode 100644
index 0000000..6d960f0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
@@ -0,0 +1,336 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+
+        M_VARIANTS ARM1136JS
+
+        
+        IF ARM1136JS
+        
+MASK_0      EQU 0x00000000   
+MASK_1      EQU 0x01010101
+LOOP_COUNT  EQU 0x50000000
+
+;// Declare input registers
+
+pSrcDst     RN 0
+srcdstStep  RN 1
+pAlphaArg   RN 2
+pBetaArg    RN 3
+
+pThresholds RN 6
+pBS         RN 9
+pQ0         RN 0
+bS          RN 10
+
+alpha       RN 6
+alpha0      RN 6
+alpha1      RN 8
+
+beta        RN 7
+beta0       RN 7
+beta1       RN 9
+
+;// Declare Local/Temporary variables
+
+;// Pixels
+p_0         RN 3  
+p_1         RN 5  
+q_0         RN 8  
+q_1         RN 9  
+
+;// Filtering
+
+dp0q0       RN 12
+dp1p0       RN 12
+dq1q0       RN 12
+
+ap0q0       RN 4  
+filt        RN 2
+        
+m00         RN 14
+m01         RN 11
+            
+pQ0         RN 0
+Step        RN 1
+            
+;// Output
+            
+P_0         RN 6
+Q_0         RN 7 
+
+;//Declarations for bSLT4 kernel
+
+tC          RN 12
+tC0         RN 5
+tC1         RN 12
+pos         RN 5
+neg         RN 9
+
+;//Declarations for bSGE4 kernel
+
+
+;// Miscellanous
+XY          RN 8
+
+a           RN 10
+t1          RN 10
+t2          RN 12
+t3          RN 14
+t4          RN 6
+t5          RN 5
+
+        
+        ;// Allocate stack memory 
+        M_ALLOC4 ppThresholds,4
+        M_ALLOC8 pAlphaBeta0,8
+        M_ALLOC8 pAlphaBeta1,8
+        M_ALLOC8 pXYBS,4
+        M_ALLOC4 ppBS,4
+        
+        ;// Function header
+        M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r11
+        
+        ;//Input arguments on the stack
+        M_ARG   ppThresholdsArg, 4
+        M_ARG   ppBSArg, 4
+        
+        LDRB    alpha1, [pAlphaArg,#1]
+        LDRB    beta1,  [pBetaArg,#1]
+        M_LDR   pThresholds, ppThresholdsArg
+        LDR     a,=MASK_1
+        LDRB    beta0,  [pBetaArg]
+        M_STR   pThresholds, ppThresholds
+        LDRB    alpha0, [pAlphaArg]
+
+        MUL     alpha1, alpha1, a
+        MUL     beta1, beta1, a
+        MUL     alpha0, alpha0, a
+        MUL     beta0, beta0, a
+
+        M_STRD  alpha1, beta1, pAlphaBeta1
+        M_LDR   pBS, ppBSArg
+        M_STRD  alpha0, beta0, pAlphaBeta0
+
+        LDR     XY,=LOOP_COUNT
+        M_STRD  XY, pBS, pXYBS
+
+        SUB     pQ0, pQ0, srcdstStep, LSL #1
+LoopY
+LoopX
+;//---------------Load Pixels-------------------
+        LDRH    bS, [pBS], #2
+        
+        M_STR   pBS, ppBS
+        M_LDR   p_1, [pQ0],srcdstStep
+
+        CMP     bS, #0
+        
+        M_LDR   p_0, [pQ0],srcdstStep
+        M_LDR   q_0, [pQ0],srcdstStep
+        M_LDR   q_1, [pQ0]
+        LDR     m01, =MASK_1                ;//  01010101 mask 
+        BEQ     NoFilterBS0
+
+        
+        ;// p_0 = [r3p0 r2p0 r1p0 r0p0]
+        ;// p_1 = [r3p1 r2p1 r1p1 r0p1]
+        ;// q_0 = [r3q0 r2q0 r1q0 r0q0]
+        ;// q_1 = [r3q1 r2q1 r1q1 r0q1]
+
+;//--------------Filtering Decision -------------------
+        MOV     m00, #MASK_0                ;//  00000000 mask 
+
+        MOV     filt, m01
+        TST     bS, #0xff00
+        MOVEQ   filt, filt, LSR #16
+        TST     bS, #0xff
+        MOVEQ   filt, filt, LSL #16
+        TST     bS, #4
+
+        
+        ;// Check |p0-q0|<Alpha 
+        USUB8   dp0q0, p_0, q_0 
+        USUB8   a, q_0, p_0
+        SEL     ap0q0, a, dp0q0
+        USUB8   a, ap0q0, alpha
+        SEL     filt, m00, filt
+                
+        ;// Check |p1-p0|<Beta 
+        USUB8   dp1p0, p_1, p_0
+        USUB8   a, p_0, p_1
+        SEL     a, a, dp1p0
+        USUB8   a, a, beta
+        SEL     filt, m00, filt
+
+        ;// Check |q1-q0|<Beta 
+        USUB8   dq1q0, q_1, q_0
+        USUB8   a, q_0, q_1
+        SEL     a, a, dq1q0
+        USUB8   a, a, beta
+        SEL     filt, m00, filt
+
+        BEQ     bSLT4        
+;//-------------------Filter--------------------
+bSGE4        
+        ;//---------bSGE4 Execution---------------
+        CMP     filt, #0
+
+        M_LDR   pThresholds, ppThresholds
+
+        ;// Compute P0b
+        UHADD8  t1, p_0, q_1
+        BEQ     NoFilterFilt0
+        MVN     t2, p_1
+        UHSUB8  t1, t1, t2
+        USUB8   t2, filt, m01
+        EOR     t1, t1, m01, LSL #7
+
+        ADD     pThresholds,pThresholds, #2
+        
+        ;// Compute Q0b 
+        UHADD8  t2, q_0, p_1
+        MVN     t3, q_1
+        UHSUB8  t2, t2, t3
+        M_STR   pThresholds, ppThresholds
+        SEL     P_0, t1, p_0
+        EOR     t2, t2, m01, LSL #7
+        SEL     Q_0, t2, q_0
+
+        SUB     pQ0, pQ0, srcdstStep, LSL #1
+        B       StoreResultAndExit
+
+;//---------- Exit of LoopX --------------
+;//---- for the case of no filtering -----
+
+NoFilterFilt0
+NoFilterBS0
+        M_LDR   pThresholds, ppThresholds
+        SUB     pQ0, pQ0, srcdstStep, LSL #1
+        SUB     pQ0, pQ0, srcdstStep
+        ADD     pQ0, pQ0, #4
+        ADD     pThresholds, pThresholds, #2
+
+        ;// Load counter for LoopX
+        M_LDRD  XY, pBS, pXYBS
+        M_STR   pThresholds, ppThresholds
+        M_LDRD  alpha, beta, pAlphaBeta0
+
+        ;// Align the pointer
+        ADDS    XY, XY, XY
+        M_STR   XY, pXYBS
+        BCC     LoopY
+        B       ExitLoopY
+        
+bSLT4         
+        ;//---------bSLT4 Execution---------------
+        M_LDR   pThresholds, ppThresholds
+        CMP     filt, #0
+        
+        ;// Since beta <= 18 and alpha <= 255 we know
+        ;// -254 <= p0-q0 <= 254
+        ;//  -17 <= q1-q0 <= 17
+        ;//  -17 <= p1-p0 <= 17
+
+        ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3))
+        ;// 
+        ;//    Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3
+        ;//                = (4*q0 - 4*p0 + p1 - q1 + 4)>>3
+        ;//                = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3
+
+        USUB8   t1, p_1, p_0
+        USUB8   t2, q_1, q_0
+        BEQ     NoFilterFilt0
+        
+        LDRB    tC0, [pThresholds],#1
+        SSUB8   t1, t1, t2
+        LDRB    tC1, [pThresholds],#1
+        M_STR   pThresholds, ppThresholds
+        UHSUB8  t4, p_0, q_0
+        ORR     tC, tC0, tC1, LSL #16
+        USUB8   t5, p_0, q_0
+        AND     t5, t5, m01
+        SHSUB8  t1, t1, t5
+        ORR     tC, tC, LSL #8        
+        SSUB8   t1, t1, t5
+        SHSUB8  t1, t1, t4
+        UQADD8  tC, tC, m01
+        SADD8   t1, t1, m01
+        USUB8   t5, filt, m01   
+        SHSUB8  t1, t1, t4
+        SEL     tC, tC, m00
+
+        ;// Split into positive and negative part and clip 
+
+        SSUB8   t1, t1, m00
+        SEL     pos, t1, m00
+        USUB8   neg, pos, t1
+        USUB8   t3, pos, tC
+        SEL     pos, tC, pos
+        USUB8   t3, neg, tC
+        SEL     neg, tC, neg
+        UQADD8  P_0, p_0, pos
+        UQSUB8  Q_0, q_0, pos
+        UQSUB8  P_0, P_0, neg
+        UQADD8  Q_0, Q_0, neg
+        
+        SUB     pQ0, pQ0, srcdstStep, LSL #1
+
+        ;// Choose to store the filtered
+        ;// value or the original pixel
+        USUB8   t1, filt, m01    
+        SEL     P_0, P_0, p_0
+        SEL     Q_0, Q_0, q_0
+    
+StoreResultAndExit
+
+        ;//---------Store result---------------
+
+        ;// P_0 = [r0p0 r1p0 r2p0 r3p0]
+        ;// Q_0 = [r0q0 r1q0 r2q0 r3q0]
+
+        M_STR   P_0, [pQ0], srcdstStep
+        STR     Q_0, [pQ0], #4
+
+        M_LDRD  XY, pBS, pXYBS
+        M_LDRD  alpha, beta, pAlphaBeta0
+
+        SUB     pQ0, pQ0, srcdstStep, LSL #1
+
+        ADDS    XY, XY, XY
+        M_STR   XY, pXYBS
+        BCC     LoopX
+
+;//-------- Common Exit of LoopY -----------------
+        ;// Align the pointers 
+
+ExitLoopY
+        ADD     pBS, pBS, #4
+        M_LDRD  alpha, beta, pAlphaBeta1
+        SUB     pQ0, pQ0, #8
+        ADD     pQ0, pQ0, srcdstStep, LSL #2
+        M_STRD  alpha, beta, pAlphaBeta0
+
+        BNE     LoopY
+        MOV     r0, #OMX_Sts_NoErr
+
+;//-----------------End Filter--------------------
+        M_END
+
+    ENDIF        
+
+        END
+        
+        
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
new file mode 100644
index 0000000..00c8354
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
@@ -0,0 +1,437 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+
+        M_VARIANTS ARM1136JS
+
+
+        IF ARM1136JS
+        
+        
+MASK_0      EQU 0x00000000   
+MASK_1      EQU 0x01010101
+MASK_2      EQU 0x0000ff00
+LOOP_COUNT  EQU 0x50000000
+
+;// Declare input registers
+
+pSrcDst     RN 0
+srcdstStep  RN 1
+pAlphaArg   RN 2
+pBetaArg    RN 3
+
+pThresholds RN 6
+pBS         RN 9
+pQ0         RN 0
+bS          RN 2
+bSTemp      RN 10
+
+alpha       RN 6
+alpha0      RN 6
+alpha1      RN 8
+
+beta        RN 7
+beta0       RN 7
+beta1       RN 9
+
+;// Declare Local/Temporary variables
+
+;// Pixels
+p_0         RN 3  
+p_1         RN 5  
+q_0         RN 8  
+q_1         RN 9  
+
+;// Unpacking
+mask        RN 11 
+
+row0        RN 2
+row1        RN 4
+row2        RN 5
+row3        RN 3
+
+row4        RN 8
+row5        RN 9
+row6        RN 10
+row7        RN 12
+
+tunpk0      RN 2
+tunpk2      RN 10
+tunpk3      RN 12
+
+tunpk4      RN 4
+tunpk5      RN 5
+tunpk6      RN 14
+tunpk7      RN 2 
+
+;// Filtering
+
+dp0q0       RN 12
+dp1p0       RN 12
+dq1q0       RN 12
+
+ap0q0       RN 4  
+filt        RN 2
+        
+m00         RN 14
+m01         RN 11
+            
+pQ0         RN 0
+Step        RN 1
+            
+;// Output
+            
+P_0         RN 6
+Q_0         RN 7 
+
+;//Declarations for bSLT4 kernel
+
+tC          RN 12
+tC0         RN 5
+tC1         RN 12
+pos         RN 5
+neg         RN 9
+
+;//Declarations for bSGE4 kernel
+
+
+;// Miscellanous
+XY          RN 8
+
+a           RN 10
+t1          RN 10
+t2          RN 12
+t3          RN 14
+t4          RN 6
+t5          RN 5
+
+
+        ;// Allocate stack memory 
+        M_ALLOC4 ppThresholds,4
+        M_ALLOC8 pAlphaBeta0,8
+        M_ALLOC8 pAlphaBeta1,8
+        M_ALLOC8 pXYBS,4
+        M_ALLOC4 ppBS,4
+        
+        ;// Function header
+        M_START omxVCM4P10_FilterDeblockingChroma_VerEdge_I, r11
+        
+        ;//Input arguments on the stack
+        M_ARG   ppThresholdsArg, 4
+        M_ARG   ppBSArg, 4
+        
+        LDRB    alpha1, [pAlphaArg,#1]
+        LDRB    beta1,  [pBetaArg,#1]
+        M_LDR   pThresholds, ppThresholdsArg
+        LDR     a,=MASK_1
+        LDRB    beta0,  [pBetaArg]
+        M_STR   pThresholds, ppThresholds
+        LDRB    alpha0, [pAlphaArg]
+
+        MUL     alpha1, alpha1, a
+        MUL     beta1, beta1, a
+        MUL     alpha0, alpha0, a
+        MUL     beta0, beta0, a
+
+        M_STRD  alpha1, beta1, pAlphaBeta1
+        M_LDR   pBS, ppBSArg
+        M_STRD  alpha0, beta0, pAlphaBeta0
+
+        LDR     XY,=LOOP_COUNT
+        M_STRD  XY, pBS, pXYBS
+        
+        
+LoopY
+LoopX
+;//---------------Load Pixels-------------------
+
+;//----------------Pack q0-q1-----------------------
+        LDRH    bS, [pBS], #8
+        LDR     mask, =MASK_2
+
+        M_LDRH  row4, [pQ0], srcdstStep
+        CMP     bS, #0
+        M_STR   pBS, ppBS
+        M_LDRH  row5, [pQ0], srcdstStep
+        BEQ.W   NoFilterBS0
+        LDRH    row6, [pQ0]
+        LDRH    row7, [pQ0, srcdstStep]
+
+        ;// row4 = [0 0 r0q0 r0q1]
+        ;// row5 = [0 0 r1q0 r1q1]
+        ;// row6 = [0 0 r2q0 r2q1]
+        ;// row7 = [0 0 r3q0 r3q1]
+
+        AND     tunpk4, mask, row4
+        AND     tunpk5, mask, row4, LSL#8
+        UXTAB   tunpk4, tunpk4, row5, ROR#8
+        UXTAB   tunpk5, tunpk5, row5
+        AND     tunpk6, mask, row6
+        AND     tunpk7, mask, row6, LSL#8
+        UXTAB   tunpk6, tunpk6, row7, ROR#8
+        UXTAB   tunpk7, tunpk7, row7
+
+        ;// tunpk4 = [0 0 r0q0 r1q0]
+        ;// tunpk5 = [0 0 r0q1 r1q1]
+        ;// tunpk6 = [0 0 r2q0 r3q0]
+        ;// tunpk7 = [0 0 r2q1 r3q1]
+
+        SUB     pQ0, pQ0, srcdstStep, LSL #1
+        SUB     pQ0, pQ0, #2
+
+        PKHBT   q_1, tunpk6, tunpk4, LSL#16
+        PKHBT   q_0, tunpk7, tunpk5, LSL#16
+
+        ;// q_0 = [r0q0 r1q0 r2q0 r3q0]
+        ;// q_1 = [r0q1 r1q1 r2q1 r3q1]
+
+
+;//----------------Pack p0-p1-----------------------
+
+        M_LDRH  row0, [pQ0], srcdstStep          
+        M_LDRH  row1, [pQ0], srcdstStep          
+        LDRH    row2, [pQ0]
+        LDRH    row3, [pQ0, srcdstStep]
+        
+        ;// row0 = [0 0 r0p0 r0p1]
+        ;// row1 = [0 0 r1p0 r1p1]
+        ;// row2 = [0 0 r2p0 r2p1]
+        ;// row3 = [0 0 r3p0 r3p1]
+
+        AND     tunpk2, mask, row0
+        AND     tunpk6, mask, row0, LSL#8
+        UXTAB   tunpk2, tunpk2, row1, ROR#8
+        UXTAB   tunpk6, tunpk6, row1
+
+        AND     tunpk0, mask, row2
+        AND     tunpk3, mask, row2, LSL#8
+        UXTAB   tunpk0, tunpk0, row3, ROR#8
+        UXTAB   tunpk3, tunpk3, row3
+
+        ;// tunpk2 = [0 0 r0p0 r1p0]
+        ;// tunpk6 = [0 0 r0p1 r1p1]
+        ;// tunpk0 = [0 0 r2p0 r3p0]
+        ;// tunpk3 = [0 0 r2p1 r3p1]
+
+        PKHBT   p_0, tunpk0, tunpk2, LSL#16
+        M_LDR   bSTemp, ppBS
+        PKHBT   p_1, tunpk3, tunpk6, LSL#16
+
+        ;// p_0 = [r0p0 r1p0 r2p0 r3p0]
+        ;// p_1 = [r0p1 r1p1 r2p1 r3p1]
+
+;//--------------Filtering Decision -------------------
+        USUB8   dp0q0, p_0, q_0 
+        LDR     m01, =MASK_1
+        LDRH    bSTemp, [bSTemp ,#-8]
+        MOV     m00, #MASK_0                ;//  00000000 mask 
+        
+        MOV     filt, m01
+        TST     bSTemp, #0xff00
+        MOVEQ   filt, filt, LSL #16
+        TST     bSTemp, #0xff
+        MOVEQ   filt, filt, LSR #16
+        TST     bSTemp, #4
+
+        ;// Check |p0-q0|<Alpha 
+        USUB8   a, q_0, p_0
+        SEL     ap0q0, a, dp0q0
+        USUB8   a, ap0q0, alpha
+        SEL     filt, m00, filt
+        
+        ;// Check |p1-p0|<Beta 
+        USUB8   dp1p0, p_1, p_0
+        USUB8   a, p_0, p_1
+        SEL     a, a, dp1p0
+        USUB8   a, a, beta
+        SEL     filt, m00, filt
+
+        ;// Check |q1-q0|<Beta 
+        USUB8   dq1q0, q_1, q_0
+        USUB8   a, q_0, q_1
+        SEL     a, a, dq1q0
+        USUB8   a, a, beta
+        SEL     filt, m00, filt
+
+        BEQ     bSLT4        
+;//-------------------Filter--------------------
+bSGE4        
+        ;//---------bSGE4 Execution---------------
+        CMP     filt, #0
+
+        M_LDR   pThresholds, ppThresholds
+
+        ;// Compute P0b
+        UHADD8  t1, p_0, q_1
+        BEQ     NoFilterFilt0
+        MVN     t2, p_1
+        UHSUB8  t1, t1, t2
+        USUB8   t2, filt, m01
+        EOR     t1, t1, m01, LSL #7
+
+        ADD     pThresholds,pThresholds, #4
+        
+        ;// Compute Q0b 
+        UHADD8  t2, q_0, p_1
+        MVN     t3, q_1
+        UHSUB8  t2, t2, t3
+        M_STR   pThresholds, ppThresholds
+        SEL     P_0, t1, p_0
+        EOR     t2, t2, m01, LSL #7
+        SEL     Q_0, t2, q_0
+
+        B       StoreResultAndExit
+
+;//---------- Exit of LoopX --------------
+;//---- for the case of no filtering -----
+
+NoFilterFilt0
+        ADD     pQ0, pQ0, #2
+NoFilterBS0
+        M_LDR   pThresholds, ppThresholds
+        SUB     pQ0, pQ0, srcdstStep, LSL #1
+        ADD     pQ0, pQ0, #4
+        ADD     pThresholds, pThresholds, #4
+        ;// Load counter for LoopX
+        M_LDRD  XY, pBS, pXYBS
+        M_STR   pThresholds, ppThresholds
+        M_LDRD  alpha, beta, pAlphaBeta1
+
+        ;// Align the pointer
+        ADDS    XY, XY, XY
+        M_STR   XY, pXYBS
+        BCC     LoopY
+        B       ExitLoopY
+        
+bSLT4        
+        ;//---------bSLT4 Execution---------------
+        M_LDR   pThresholds, ppThresholds
+        CMP     filt, #0
+        
+
+        ;// Since beta <= 18 and alpha <= 255 we know
+        ;// -254 <= p0-q0 <= 254
+        ;//  -17 <= q1-q0 <= 17
+        ;//  -17 <= p1-p0 <= 17
+
+        ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3))
+        ;// 
+        ;//    Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3
+        ;//                = (4*q0 - 4*p0 + p1 - q1 + 4)>>3
+        ;//                = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3
+
+        USUB8   t1, p_1, p_0
+        USUB8   t2, q_1, q_0
+        BEQ     NoFilterFilt0
+        
+        LDRB    tC0, [pThresholds], #1
+        SSUB8   t1, t1, t2
+        LDRB    tC1, [pThresholds], #3
+        M_STR   pThresholds, ppThresholds
+        UHSUB8  t4, p_0, q_0
+        ORR     tC, tC1, tC0, LSL #16
+        USUB8   t5, p_0, q_0
+        AND     t5, t5, m01
+        SHSUB8  t1, t1, t5
+        ORR     tC, tC, LSL #8        
+        SSUB8   t1, t1, t5
+        SHSUB8  t1, t1, t4
+        UQADD8  tC, tC, m01
+        SADD8   t1, t1, m01
+        USUB8   t5, filt, m01   
+        SHSUB8  t1, t1, t4
+        SEL     tC, tC, m00
+
+        ;// Split into positive and negative part and clip 
+
+        SSUB8   t1, t1, m00
+        SEL     pos, t1, m00
+        USUB8   neg, pos, t1
+        USUB8   t3, pos, tC
+        SEL     pos, tC, pos
+        USUB8   t3, neg, tC
+        SEL     neg, tC, neg
+        UQADD8  P_0, p_0, pos
+        UQSUB8  Q_0, q_0, pos
+        UQSUB8  P_0, P_0, neg
+        UQADD8  Q_0, Q_0, neg
+        
+        ;// Choose to store the filtered
+        ;// value or the original pixel
+        USUB8   t1, filt, m01    
+        SEL     P_0, P_0, p_0
+        SEL     Q_0, Q_0, q_0
+    
+StoreResultAndExit
+
+        ;//---------Store result---------------
+
+        ;// P_0 = [r0p0 r1p0 r2p0 r3p0]
+        ;// Q_0 = [r0q0 r1q0 r2q0 r3q0]
+
+        SUB     pQ0, pQ0, srcdstStep, LSL #1
+        ADD        pQ0, pQ0, #1      
+ 
+        MOV     t1, Q_0, LSR #24
+        STRB    t1, [pQ0, #1]
+        MOV     t1, P_0, LSR #24
+        M_STRB  t1, [pQ0], srcdstStep
+
+        MOV     t1, Q_0, LSR #16
+        STRB    t1, [pQ0, #1]
+        MOV     t1, P_0, LSR #16
+        M_STRB  t1, [pQ0], srcdstStep
+
+        MOV     t1, P_0, LSR #8
+        STRB    t1, [pQ0]
+        STRB    P_0, [pQ0, srcdstStep]
+        MOV     t1, Q_0, LSR #8
+        STRB    t1, [pQ0, #1]!
+        STRB    Q_0, [pQ0, srcdstStep]
+
+        M_LDRD  XY, pBS, pXYBS
+        M_LDRD  alpha, beta, pAlphaBeta1
+
+        SUB     pQ0, pQ0, srcdstStep, LSL #1
+        ADD     pQ0, pQ0, #4
+
+        ADDS    XY, XY, XY
+        M_STR   XY, pXYBS
+        BCC     LoopX
+
+;//-------- Common Exit of LoopY -----------------
+        ;// Align the pointers 
+
+ExitLoopY
+
+        M_LDR   pThresholds, ppThresholds
+        SUB     pQ0, pQ0, #8
+        ADD     pQ0, pQ0, srcdstStep, LSL #2
+        SUB     pBS, pBS, #14 
+        SUB     pThresholds, pThresholds, #6
+        M_STR   pThresholds, ppThresholds
+
+        M_LDRD  alpha, beta, pAlphaBeta0
+
+        BNE     LoopY
+        MOV     r0, #OMX_Sts_NoErr
+;//-----------------End Filter--------------------
+
+        M_END
+
+        ENDIF        
+
+        END
+        
+        
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
new file mode 100644
index 0000000..1b84080
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
@@ -0,0 +1,331 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+
+        M_VARIANTS ARM1136JS
+
+        IMPORT  armVCM4P10_DeblockingLumabSLT4_unsafe
+        IMPORT  armVCM4P10_DeblockingLumabSGE4_unsafe
+
+    
+
+    IF ARM1136JS
+
+
+MASK_0      EQU 0x00000000   
+MASK_1      EQU 0x01010101
+MASK_2      EQU 0xff00ff00
+LOOP_COUNT  EQU 0x11110000
+
+;// Declare input registers
+
+pSrcDst     RN 0
+srcdstStep  RN 1
+pAlphaArg   RN 2
+pBetaArg    RN 3
+
+pThresholds RN 14
+pBS         RN 9
+pQ0         RN 0
+bS          RN 2
+
+alpha       RN 6
+alpha0      RN 6
+alpha1      RN 8
+
+beta        RN 7
+beta0       RN 7
+beta1       RN 9
+
+;// Declare Local/Temporary variables
+
+;// Pixels
+p_0         RN 3  
+p_1         RN 5  
+p_2         RN 4  
+p_3         RN 2  
+q_0         RN 8  
+q_1         RN 9  
+q_2         RN 10 
+q_3         RN 12 
+
+;// Filtering
+
+dp0q0       RN 12
+dp1p0       RN 12
+dq1q0       RN 12
+dp2p0       RN 12
+dq2q0       RN 12
+            
+ap0q0       RN 1  
+filt        RN 2
+        
+m00         RN 14
+m01         RN 11
+            
+apflg       RN 0 
+aqflg       RN 6
+apqflg      RN 0
+            
+
+;//Declarations for bSLT4 kernel
+
+tC0         RN 7
+ptC0        RN 1
+
+pQ0a        RN 0    
+Stepa       RN 1    
+maska       RN 14
+            
+P0a         RN 1
+P1a         RN 8
+Q0a         RN 7
+Q1a         RN 11
+            
+;//Declarations for bSGE4 kernel
+
+pQ0b        RN 0
+Stepb       RN 1
+maskb       RN 14
+            
+P0b         RN 6
+P1b         RN 7
+P2b         RN 1
+P3b         RN 3
+            
+Q0b         RN 9 
+Q1b         RN 0   
+Q2b         RN 2 
+Q3b         RN 3 
+
+;// Miscellanous
+XY          RN 8
+t0          RN 3 
+t1          RN 12
+t2          RN 14
+t7          RN 7
+t4          RN 4
+t5          RN 1  
+t8          RN 6  
+a           RN 0
+
+            
+
+        
+        ;// Allocate stack memory 
+        M_ALLOC4 ppThresholds,4
+        M_ALLOC4 pQ_3,4
+        M_ALLOC4 pP_3,4
+        M_ALLOC8 pAlphaBeta0,8
+        M_ALLOC8 pAlphaBeta1,8
+        M_ALLOC8 pXYBS,4
+        M_ALLOC4 ppBS,4
+        M_ALLOC8 ppQ0Step,4
+        M_ALLOC4 pStep,4
+        
+        ;// Function header
+        M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11
+        
+        ;//Input arguments on the stack
+        M_ARG   ppThresholdsArg, 4
+        M_ARG   ppBSArg, 4
+
+        LDR     t4,=MASK_1
+
+        LDRB    alpha0, [pAlphaArg]
+        LDRB    beta0,  [pBetaArg]
+        LDRB    alpha1, [pAlphaArg,#1]
+        LDRB    beta1,  [pBetaArg,#1]
+
+        MUL     alpha0, alpha0, t4
+        MUL     beta0, beta0, t4
+        MUL     alpha1, alpha1, t4
+        MUL     beta1, beta1, t4
+
+        M_STRD  alpha0, beta0, pAlphaBeta0
+        M_STRD  alpha1, beta1, pAlphaBeta1
+
+        LDR     XY,=LOOP_COUNT
+        M_LDR   pBS, ppBSArg
+        M_LDR   pThresholds, ppThresholdsArg
+        M_STR   srcdstStep, pStep
+        M_STRD  XY, pBS, pXYBS
+        SUB     pQ0, pQ0, srcdstStep, LSL #2
+        M_STR   pThresholds, ppThresholds
+LoopY
+LoopX
+;//---------------Load Pixels-------------------
+        M_STR   pQ0, ppQ0Step
+        M_LDR   p_3, [pQ0], srcdstStep
+        M_LDR   p_2, [pQ0], srcdstStep
+        M_STR   p_3, pP_3
+        LDRB    bS, [pBS], #1
+        M_STR   pBS, ppBS
+        M_LDR   p_1, [pQ0], srcdstStep
+        CMP     bS, #0
+        M_LDR   p_0, [pQ0], srcdstStep
+        M_LDR   q_0, [pQ0], srcdstStep
+        M_LDR   q_1, [pQ0], srcdstStep
+        M_LDR   q_2, [pQ0], srcdstStep
+        M_LDR   q_3, [pQ0], srcdstStep
+        BEQ     NoFilterBS0
+        CMP     bS, #4
+        M_STR   q_3, pQ_3
+
+;//--------------Filtering Decision -------------------
+        LDR     m01, =MASK_1                ;//  01010101 mask 
+        MOV     m00, #MASK_0                ;//  00000000 mask 
+
+        ;// Check |p0-q0|<Alpha 
+        USUB8   dp0q0, p_0, q_0 
+        USUB8   a, q_0, p_0
+        SEL     ap0q0, a, dp0q0
+        USUB8   a, ap0q0, alpha
+        SEL     filt, m00, m01
+        
+        ;// Check |p1-p0|<Beta 
+        USUB8   dp1p0, p_1, p_0
+        USUB8   a, p_0, p_1
+        SEL     a, a, dp1p0
+        USUB8   a, a, beta
+        SEL     filt, m00, filt
+
+        ;// Check |q1-q0|<Beta 
+        USUB8   dq1q0, q_1, q_0
+        USUB8   a, q_0, q_1
+        SEL     a, a, dq1q0
+        USUB8   a, a, beta
+        SEL     filt, m00, filt
+
+        ;// Check ap<Beta 
+        USUB8   dp2p0, p_2, p_0
+        USUB8   a, p_0, p_2
+        SEL     a, a, dp2p0
+        USUB8   a, a, beta
+        SEL     apflg, m00, filt            ;// apflg = filt && (ap<beta)
+
+        ;// Check aq<Beta 
+        USUB8   dq2q0, q_2, q_0
+        USUB8   t2, q_0, q_2
+        SEL     t2, t2, dq2q0
+        USUB8   t2, t2, beta
+        MOV     t7,#0
+
+        BLT     bSLT4        
+;//-------------------Filter--------------------
+bSGE4        
+        ;//---------bSGE4 Execution---------------
+        SEL     t1, t7, filt            ;// aqflg = filt && (aq<beta) 
+        CMP     filt, #0
+        ORR     apqflg, apflg, t1, LSL #1
+        M_LDRD  pQ0, srcdstStep, ppQ0Step, EQ
+        BEQ     NoFilterFilt0
+
+        BL      armVCM4P10_DeblockingLumabSGE4_unsafe
+        
+        ;//---------Store result---------------
+        M_LDR   pThresholds, ppThresholds
+        MOV     p_2, Q1b
+        MOV     p_1, P2b
+        M_LDRD  pQ0b, Stepb, ppQ0Step
+        ADD     pThresholds, #1
+        M_STR   pThresholds, ppThresholds
+        M_STR   p_1, [pQ0b, Stepb]!
+        M_STR   P1b, [pQ0b, Stepb]!
+        M_STR   P0b, [pQ0b, Stepb]!
+        M_STR   Q0b, [pQ0b, Stepb]!
+        STR     p_2, [pQ0b, Stepb]
+        STR     Q2b, [pQ0b, Stepb, LSL #1]
+
+
+        M_LDRD  XY, pBS, pXYBS
+        SUB     pQ0, pQ0b, Stepb, LSL #2
+        ADD     pQ0, pQ0, #4
+        M_LDRD  alpha, beta, pAlphaBeta0
+        ADDS    XY, XY, XY
+        M_STR   XY, pXYBS
+        BCC     LoopX
+        B       ExitLoopY
+
+;//---------- Exit of LoopX --------------
+;//---- for the case of no filtering -----
+
+NoFilterBS0
+        SUB     pQ0, pQ0, srcdstStep, LSL #3
+NoFilterFilt0
+        ADD     pQ0, pQ0, #4
+        ;// Load counter for LoopX
+        M_LDRD  XY, pBS, pXYBS
+        M_LDR   pThresholds, ppThresholds
+        M_LDRD  alpha, beta, pAlphaBeta0
+
+        ;// Align the pointers
+        ADDS    XY, XY, XY
+        ADD     pThresholds, pThresholds, #1
+        M_STR   pThresholds, ppThresholds
+        M_STR   XY, pXYBS
+        BCC     LoopX
+        B       ExitLoopY
+
+bSLT4        
+        ;//---------bSLT4 Execution---------------
+        SEL     aqflg, t7, filt            ;// aqflg = filt && (aq<beta) 
+        M_LDR   ptC0, ppThresholds
+        CMP     filt, #0
+        M_LDRD  pQ0, srcdstStep, ppQ0Step, EQ
+        BEQ     NoFilterFilt0
+        
+        LDRB    tC0, [ptC0], #1
+        M_STR   ptC0, ppThresholds
+
+        BL      armVCM4P10_DeblockingLumabSLT4_unsafe
+
+        ;//---------Store result---------------
+        MOV     p_2, P0a
+        M_LDRD  pQ0a, Stepa, ppQ0Step
+        M_STR   P1a, [pQ0a, Stepa, LSL #1]!
+        M_STR   p_2, [pQ0a, Stepa]!
+        M_STR   Q0a, [pQ0a, Stepa]!
+        STR     Q1a, [pQ0a, Stepa]
+       
+        ;// Load counter
+        M_LDRD  XY, pBS, pXYBS
+        M_LDRD  alpha, beta, pAlphaBeta0
+
+        SUB     pQ0, pQ0a, Stepa, LSL #2
+        ADD     pQ0, pQ0, #4
+
+        ADDS    XY, XY, XY
+        M_STR   XY, pXYBS
+        BCC     LoopX
+        
+;//-------- Common Exit of LoopY -----------------
+        ;// Align the pointers 
+ExitLoopY
+        M_LDRD  alpha, beta, pAlphaBeta1
+        SUB     pQ0, pQ0, #16
+        ADD     pQ0, pQ0, srcdstStep, LSL #2
+        M_STRD  alpha, beta, pAlphaBeta0
+
+        BNE     LoopY
+        MOV     r0, #OMX_Sts_NoErr
+;//-----------------End Filter--------------------
+        M_END
+
+    ENDIF        
+        
+
+        END
+        
+        
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
new file mode 100644
index 0000000..417ddc2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
@@ -0,0 +1,550 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+
+        M_VARIANTS ARM1136JS
+
+        IMPORT  armVCM4P10_DeblockingLumabSLT4_unsafe
+        IMPORT  armVCM4P10_DeblockingLumabSGE4_unsafe
+        
+    
+    IF ARM1136JS
+
+MASK_0      EQU 0x00000000   
+MASK_1      EQU 0x01010101
+MASK_2      EQU 0xff00ff00
+LOOP_COUNT  EQU 0x11110000
+
+;// Declare input registers
+
+pSrcDst     RN 0
+srcdstStep  RN 1
+pAlphaArg   RN 2
+pBetaArg    RN 3
+
+pThresholds RN 14
+pBS         RN 9
+pQ0         RN 0
+bS          RN 2
+
+alpha       RN 6
+alpha0      RN 6
+alpha1      RN 8
+
+beta        RN 7
+beta0       RN 7
+beta1       RN 9
+
+;// Declare Local/Temporary variables
+
+;// Pixels
+p_0         RN 3  
+p_1         RN 5  
+p_2         RN 4  
+p_3         RN 2  
+q_0         RN 8  
+q_1         RN 9  
+q_2         RN 10 
+q_3         RN 12 
+
+;// Unpacking
+mask        RN 11 
+
+row0        RN 2
+row1        RN 4
+row2        RN 5
+row3        RN 3
+
+row4        RN 8
+row5        RN 9
+row6        RN 10
+row7        RN 12
+row8        RN 14
+row9        RN 7
+            
+tunpk0      RN 8
+tunpk1      RN 9
+tunpk2      RN 10
+tunpk3      RN 12
+tunpk4      RN 0
+
+tunpk5      RN 1
+tunpk6      RN 14
+tunpk7      RN 2 
+tunpk8      RN 5 
+tunpk9      RN 6 
+ 
+
+;// Filtering
+
+dp0q0       RN 12
+dp1p0       RN 12
+dq1q0       RN 12
+dp2p0       RN 12
+dq2q0       RN 12
+            
+ap0q0       RN 1  
+filt        RN 2
+        
+m00         RN 14
+m01         RN 11
+            
+apflg       RN 0 
+aqflg       RN 6
+apqflg      RN 0
+            
+
+;//Declarations for bSLT4 kernel
+
+tC0         RN 7
+ptC0        RN 1
+
+pQ0a        RN 0    
+Stepa       RN 1    
+maska       RN 14
+            
+P0a         RN 1
+P1a         RN 8
+Q0a         RN 7
+Q1a         RN 11
+            
+;//Declarations for bSGE4 kernel
+
+pQ0b        RN 0
+Stepb       RN 1
+maskb       RN 14
+            
+P0b         RN 6
+P1b         RN 7
+P2b         RN 1
+P3b         RN 3
+            
+Q0b         RN 9 
+Q1b         RN 0   
+Q2b         RN 2 
+Q3b         RN 3 
+
+;// Miscellanous
+XY          RN 8
+t0          RN 3 
+t1          RN 12
+t2          RN 14
+t7          RN 7
+t4          RN 4
+t5          RN 1  
+t8          RN 6  
+a           RN 0
+
+            
+
+        ;// Allocate stack memory 
+        M_ALLOC4 ppThresholds,4
+        M_ALLOC4 pQ_3,4
+        M_ALLOC4 pP_3,4
+        M_ALLOC8 pAlphaBeta0,8
+        M_ALLOC8 pAlphaBeta1,8
+        M_ALLOC8 pXYBS,4
+        M_ALLOC4 ppBS,4
+        M_ALLOC8 ppQ0Step,4
+        M_ALLOC4 pStep,4
+        
+        ;// Function header
+        M_START omxVCM4P10_FilterDeblockingLuma_VerEdge_I, r11
+        
+        ;//Input arguments on the stack
+        M_ARG   ppThresholdsArg, 4
+        M_ARG   ppBSArg, 4
+        
+        LDR     t4,=MASK_1
+
+        LDRB    alpha0, [pAlphaArg]
+        LDRB    beta0,  [pBetaArg]
+        LDRB    alpha1, [pAlphaArg,#1]
+        LDRB    beta1,  [pBetaArg,#1]
+
+        MUL     alpha0, alpha0, t4
+        MUL     beta0, beta0, t4
+        MUL     alpha1, alpha1, t4
+        MUL     beta1, beta1, t4
+
+        M_STRD  alpha0, beta0, pAlphaBeta0
+        M_STRD  alpha1, beta1, pAlphaBeta1
+
+        LDR     XY,=LOOP_COUNT
+        M_LDR   pBS, ppBSArg
+        M_LDR   pThresholds, ppThresholdsArg
+        M_STR   srcdstStep, pStep
+        M_STRD  XY, pBS, pXYBS
+        M_STR   pThresholds, ppThresholds
+        
+        SUB     pQ0, pQ0, #4
+LoopY
+;//---------------Load Pixels-------------------
+
+;//----------------Pack p0-p3-----------------------
+        LDR     mask, =MASK_2
+        
+        M_LDR   row0, [pQ0], srcdstStep          
+        M_LDR   row1, [pQ0], srcdstStep          
+        LDR     row2, [pQ0]
+        LDR     row3, [pQ0, srcdstStep]
+        SUB     pQ0, pQ0, srcdstStep, LSL #1
+        
+        ;// row0 = [r0p0 r0p1 r0p2 r0p3]
+        ;// row1 = [r1p0 r1p1 r1p2 r1p3]
+        ;// row2 = [r2p0 r2p1 r2p2 r2p3]
+        ;// row3 = [r3p0 r3p1 r3p2 r3p3]
+
+        AND     tunpk0, mask, row0
+        AND     tunpk6, mask, row0, LSL#8
+        UXTAB16 tunpk0, tunpk0, row1, ROR#8
+        UXTAB16 tunpk6, tunpk6, row1
+        AND     tunpk2, mask, row2
+        AND     tunpk3, mask, row2, LSL#8
+        UXTAB16 tunpk2, tunpk2, row3, ROR#8
+        UXTAB16 tunpk3, tunpk3, row3
+
+        ;// tunpk0 = [r0p0 r1p0 r0p2 r1p2]
+        ;// tunpk6 = [r0p1 r1p1 r0p3 r1p3]
+        ;// tunpk2 = [r2p0 r3p0 r2p2 r3p2]
+        ;// tunpk3 = [r2p1 r3p1 r2p3 r3p3]
+
+        PKHTB   p_0, tunpk0, tunpk2, ASR#16
+        PKHTB   p_1, tunpk6, tunpk3, ASR#16
+        PKHBT   p_2, tunpk2, tunpk0, LSL#16
+        PKHBT   p_3, tunpk3, tunpk6, LSL#16
+
+
+        ;// p_0 = [r0p0 r1p0 r2p0 r3p0]
+        ;// p_1 = [r0p1 r1p1 r2p1 r3p1]
+        ;// p_2 = [r0p2 r1p2 r2p1 r3p2]
+        ;// p_3 = [r0p3 r1p3 r2p3 r3p3]
+
+        M_STR   p_3, pP_3
+
+;//----------------Pack q0-q3-----------------------
+LoopX
+        LDRB    bS, [pBS], #4
+        M_STR   pQ0, ppQ0Step
+        LDR     mask, =MASK_2
+        CMP     bS, #0
+        M_STR   pBS, ppBS
+
+        LDR     row4, [pQ0, #4]!
+        BEQ.W   NoFilterBS0
+        M_LDR   row5, [pQ0, srcdstStep]!
+        M_LDR   row6, [pQ0, srcdstStep]!
+        M_LDR   row7, [pQ0, srcdstStep]
+
+        ;// row4 = [r0q3 r0q2 r0q1 r0q0]
+        ;// row5 = [r1q3 r1q2 r1q1 r1q0]
+        ;// row6 = [r2q3 r2q2 r2q1 r2q0]
+        ;// row7 = [r3q3 r3q2 r3q1 r3q0]
+    
+        AND     tunpk4, mask, row4
+        CMP     bS, #4
+        AND     tunpk5, mask, row4, LSL#8
+        UXTAB16 tunpk4, tunpk4, row5, ROR#8
+        UXTAB16 tunpk5, tunpk5, row5
+        AND     tunpk6, mask, row6
+        AND     tunpk7, mask, row6, LSL#8
+        UXTAB16 tunpk6, tunpk6, row7, ROR#8
+        UXTAB16 tunpk7, tunpk7, row7
+
+        ;// tunpk4 = [r0q0 r1q0 r0q2 r1q2]
+        ;// tunpk5 = [r0q1 r1q1 r0q3 r1q3]
+        ;// tunpk6 = [r2q0 r3q0 r2q2 r3q2]
+        ;// tunpk7 = [r2q1 r3q1 r2q3 r3q3]
+
+        PKHTB   q_3, tunpk4, tunpk6, ASR#16
+        PKHTB   q_2, tunpk5, tunpk7, ASR#16
+        PKHBT   q_1, tunpk6, tunpk4, LSL#16
+        M_STR   q_3, pQ_3
+        PKHBT   q_0, tunpk7, tunpk5, LSL#16
+
+
+        ;// q_0 = [r0q0 r1q0 r2q0 r3q0]
+        ;// q_1 = [r0q1 r1q1 r2q1 r3q1]
+        ;// q_2 = [r0q2 r1q2 r2q1 r3q2]
+        ;// q_3 = [r0q3 r1q3 r2q3 r3q3]
+
+
+;//--------------Filtering Decision -------------------
+        LDR     m01, =MASK_1                ;//  01010101 mask 
+        MOV     m00, #MASK_0                ;//  00000000 mask 
+
+        ;// Check |p0-q0|<Alpha 
+        USUB8   dp0q0, p_0, q_0 
+        USUB8   a, q_0, p_0
+        SEL     ap0q0, a, dp0q0
+        USUB8   a, ap0q0, alpha
+        SEL     filt, m00, m01
+        
+        ;// Check |p1-p0|<Beta 
+        USUB8   dp1p0, p_1, p_0
+        USUB8   a, p_0, p_1
+        SEL     a, a, dp1p0
+        USUB8   a, a, beta
+        SEL     filt, m00, filt
+
+        ;// Check |q1-q0|<Beta 
+        USUB8   dq1q0, q_1, q_0
+        USUB8   a, q_0, q_1
+        SEL     a, a, dq1q0
+        USUB8   a, a, beta
+        SEL     filt, m00, filt
+
+        ;// Check ap<Beta 
+        USUB8   dp2p0, p_2, p_0
+        USUB8   a, p_0, p_2
+        SEL     a, a, dp2p0
+        USUB8   a, a, beta
+        SEL     apflg, m00, filt            ;// apflg = filt && (ap<beta)
+
+        ;// Check aq<Beta 
+        USUB8   dq2q0, q_2, q_0
+        USUB8   t2, q_0, q_2
+        SEL     t2, t2, dq2q0
+        USUB8   t2, t2, beta
+        MOV     t7,#0
+        
+
+        BLT     bSLT4        
+;//-------------------Filter--------------------
+bSGE4        
+        ;//---------bSGE4 Execution---------------
+        SEL     t1, t7, filt            ;// aqflg = filt && (aq<beta) 
+        CMP     filt, #0
+        ORR     apqflg, apflg, t1, LSL #1
+        M_LDRD  pQ0, srcdstStep, ppQ0Step, EQ
+        BEQ     NoFilterFilt0
+
+        BL      armVCM4P10_DeblockingLumabSGE4_unsafe
+        
+        ;//---------Store result---------------
+
+        LDR     maskb,=MASK_2
+
+        ;// P0b = [r0p0 r1p0 r2p0 r3p0]
+        ;// P1b = [r0p1 r1p1 r2p1 r3p1]
+        ;// P2b = [r0p2 r1p2 r2p2 r3p2]
+        ;// P3b = [r0p3 r1p3 r2p3 r3p3]
+
+        M_LDR   P3b, pP_3   
+        M_STR   Q0b, pP_3   
+
+        ;//------Pack p0-p3------
+        AND     tunpk0, maskb, P0b
+        AND     tunpk2, maskb, P0b, LSL#8
+        UXTAB16 tunpk0, tunpk0, P1b, ROR#8
+        UXTAB16 tunpk2, tunpk2, P1b
+
+        AND     tunpk3, maskb, P2b
+        AND     tunpk8, maskb, P2b, LSL#8
+        UXTAB16 tunpk3, tunpk3, P3b, ROR#8
+        UXTAB16 tunpk8, tunpk8, P3b
+
+        ;// tunpk0 = [r0p0 r0p1 r2p0 r2p1]
+        ;// tunpk2 = [r1p0 r1p1 r3p0 r3p1]
+        ;// tunpk3 = [r0p2 r0p3 r2p2 r2p3]
+        ;// tunpk8 = [r1p2 r1p3 r3p2 r3p3]
+
+        MOV     p_2, Q1b
+        M_LDRD  pQ0b, Stepb, ppQ0Step
+
+        PKHTB   row9, tunpk0, tunpk3, ASR#16
+        PKHBT   row7, tunpk3, tunpk0, LSL#16
+        PKHTB   row3, tunpk2, tunpk8, ASR#16
+        PKHBT   row6, tunpk8, tunpk2, LSL#16
+
+        ;// row9 = [r0p0 r0p1 r0p2 r0p3]
+        ;// row3 = [r1p0 r1p1 r1p2 r1p3]
+        ;// row7 = [r2p0 r2p1 r2p2 r2p3]
+        ;// row6 = [r3p0 r3p1 r3p2 r3p3]
+
+        M_STR   row9, [pQ0b], Stepb
+        STR     row7, [pQ0b, Stepb]
+        STR     row6, [pQ0b, Stepb, LSL #1]
+        STR     row3, [pQ0b], #4
+        
+        M_LDR   Q3b, pQ_3
+
+        ;// Q0b = [r0q0 r1q0 r2q0 r3q0]
+        ;// Q1b = [r0q1 r1q1 r2q1 r3q1]
+        ;// Q2b = [r0q2 r1q2 r2q2 r3q2]
+        ;// Q3b = [r0q3 r1q3 r2q3 r3q3]
+
+        ;//------Pack q0-q3------
+        AND     tunpk0, maskb, p_2
+        AND     tunpk2, maskb, p_2, LSL#8
+        UXTAB16 tunpk0, tunpk0, Q0b, ROR#8
+        UXTAB16 tunpk2, tunpk2, Q0b
+
+        AND     tunpk3, maskb, Q3b
+        AND     tunpk8, maskb, Q3b, LSL#8
+        UXTAB16 tunpk3, tunpk3, Q2b, ROR#8
+        UXTAB16 tunpk8, tunpk8, Q2b
+
+        ;// tunpk0 = [r0q1 r0q0 r2q1 r2q0]
+        ;// tunpk2 = [r1q1 r1q0 r3q1 r3q0]
+        ;// tunpk3 = [r0q3 r0q2 r2q3 r2q2]
+        ;// tunpk8 = [r1q3 r1q2 r3q3 r3q2]
+
+        PKHTB   row8, tunpk3, tunpk0, ASR#16
+        PKHBT   row7, tunpk0, tunpk3, LSL#16
+        PKHTB   row4, tunpk8, tunpk2, ASR#16
+        PKHBT   row6, tunpk2, tunpk8, LSL#16
+
+        ;// row8 = [r0q0 r0q1 r0q2 r0q3]
+        ;// row4 = [r1q0 r1q1 r1q2 r1q3]
+        ;// row7 = [r2q0 r2q1 r2q2 r2q3]
+        ;// row6 = [r3q0 r3q1 r3q2 r3q3]
+
+        STR     row4, [pQ0b]
+        STR     row7, [pQ0b, Stepb]
+        STR     row6, [pQ0b, Stepb, LSL #1]
+
+        SUB     pQ0, pQ0b, Stepb
+        MOV     p_1, Q2b
+
+        STR     row8, [pQ0]
+
+        M_LDRD  XY, pBS, pXYBS
+        M_LDR   pThresholds, ppThresholds
+        M_LDRD  alpha, beta, pAlphaBeta1
+
+        ADDS    XY, XY, XY
+        ADD     pThresholds, #4
+        M_STR   pThresholds, ppThresholds
+        M_STR   XY, pXYBS
+        BCC     LoopX
+        B       ExitLoopY
+
+;//---------- Exit of LoopX --------------
+;//---- for the case of no filtering -----
+
+NoFilterFilt0
+        ADD     pQ0, pQ0, #4
+NoFilterBS0
+        ;// Load counter for LoopX
+        M_LDRD  XY, pBS, pXYBS
+        M_LDR   pThresholds, ppThresholds
+        M_LDRD  alpha, beta, pAlphaBeta1
+
+        ;// Align the pointer
+        ADDS    XY, XY, XY
+        ADD     pThresholds, pThresholds, #4
+        M_STR   pThresholds, ppThresholds
+        M_STR   XY, pXYBS
+        BCC     LoopY
+        B       ExitLoopY
+        
+bSLT4        
+        ;//---------bSLT4 Execution---------------
+        SEL     aqflg, t7, filt            ;// aqflg = filt && (aq<beta) 
+        M_LDR   ptC0, ppThresholds
+        CMP     filt, #0
+        M_LDRD  pQ0, srcdstStep, ppQ0Step, EQ
+        BEQ     NoFilterFilt0
+        
+        LDRB    tC0, [ptC0], #4
+        M_STR   ptC0, ppThresholds
+
+        BL      armVCM4P10_DeblockingLumabSLT4_unsafe
+
+        ;//---------Store result---------------
+        ;//--------Pack p1,p0,q1,q0------------
+        
+        ;//Load destination pointer
+        LDR     maska,=MASK_2
+        M_STR   Q0a, pP_3
+        MOV     p_1, q_2
+
+        ;// P1a = [r0p1 r1p1 r2p1 r3p1]
+        ;// P0a = [r0p0 r1p0 r2p0 r3p0]
+        ;// Q0a = [r0q0 r1q0 r2q0 r3q0]
+        ;// Q1a = [r0q1 r1q1 r2q1 r3q1]
+
+        AND     tunpk1, maska, P0a
+        AND     tunpk2, maska, P0a, LSL#8
+        UXTAB16 tunpk1, tunpk1, P1a, ROR#8
+        UXTAB16 tunpk2, tunpk2, P1a
+
+        M_LDRD  pQ0a, Stepa, ppQ0Step
+
+        AND     tunpk9, maska, Q1a
+        AND     tunpk3, maska, Q1a, LSL#8
+        UXTAB16 tunpk9, tunpk9, Q0a, ROR#8
+        UXTAB16 tunpk3, tunpk3, Q0a
+
+        ;// tunpk1 = [r0p0 r0p1 r2p0 r2p1]
+        ;// tunpk2 = [r1p0 r1p1 r3p0 r3p1]
+        ;// tunpk9 = [r0q1 r0q0 r2q1 r2q0]
+        ;// tunpk3 = [r1q1 r1q0 r3q1 r3q0]
+
+        MOV     t4, tunpk1, LSR #16
+        MOV     t0, tunpk9, LSR #16
+
+        STRH    t4,[pQ0a, #2]!          ;//Stores [r0p0 r0p1]
+        STRH    t0,[pQ0a, #2]           ;//Stores [r0q0 r0q1]
+
+        MOV     t4, tunpk2, LSR #16
+        MOV     t0, tunpk3, LSR #16
+
+        M_STRH  t4,[pQ0a, Stepa]!       ;//Stores [r1p0 r1p1]
+        STRH    t0,[pQ0a, #2]           ;//Stores [r1q0 r1q1]
+        
+        M_STRH  tunpk1,[pQ0a, Stepa]!   ;//Stores [r2p0 r2p1]
+        STRH    tunpk2,[pQ0a, Stepa]    ;//Stores [r3p0 r3p1]
+        STRH    tunpk9,[pQ0a, #2]!        ;//Stores [r2q0 r2q1]
+        STRH    tunpk3,[pQ0a, Stepa]    ;//Stores [r3q0 r3q1]
+
+        SUB     pQ0, pQ0a, Stepa, LSL #1
+
+        ;// Load counter
+        M_LDRD  XY, pBS, pXYBS
+
+        ;// Reload Pixels
+        M_LDR   p_0, pQ_3
+        MOV     p_2, Q1a
+                
+        M_LDRD  alpha, beta, pAlphaBeta1
+
+        ADDS    XY, XY, XY
+        M_STR   XY, pXYBS
+        BCC     LoopX
+        
+;//-------- Common Exit of LoopY -----------------
+        ;// Align the pointers 
+        M_LDR   pThresholds, ppThresholds
+ExitLoopY
+        SUB     pQ0, pQ0, #16
+        ADD     pQ0, pQ0, srcdstStep, LSL #2
+        SUB     pBS, pBS, #15
+        SUB     pThresholds, pThresholds, #15
+        M_STR   pThresholds, ppThresholds
+
+        M_LDRD  alpha, beta, pAlphaBeta0
+
+        BNE     LoopY
+        MOV     r0, #OMX_Sts_NoErr
+
+        M_END
+;//-----------------End Filter--------------------
+
+    ENDIF        
+        
+        END
+        
+        
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
new file mode 100644
index 0000000..de835bd
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
@@ -0,0 +1,79 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_InterpolateChroma.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate 1/8 Pixel interpolation for Chroma Block
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P10_InterpolateChroma,
+ *
+ * Description:
+ * Performs 1/8-pixel interpolation for inter chroma MB.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	pSrc	Pointer to the source reference frame buffer
+ * [in]	srcStep Reference frame step in byte
+ * [in]	dstStep Destination frame step in byte. Must be multiple of roi.width.
+ * [in]	dx		Fractional part of horizontal motion vector component
+ *						in 1/8 pixel unit;valid in the range [0,7]
+ * [in]	dy		Fractional part of vertical motion vector component
+ *						in 1/8 pixel unit;valid in the range [0,7]
+ * [in]	roi		Dimension of the interpolation region;the parameters roi.width and roi.height must
+ *                      be equal to either 2, 4, or 8.
+ * [out]	pDst	Pointer to the destination frame buffer.
+ *                   if roi.width==2,  2-byte alignment required
+ *                   if roi.width==4,  4-byte alignment required
+ *                   if roi.width==8,  8-byte alignment required
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns OMX_Sts_BadArgErr:
+ *	pSrc or pDst is NULL.
+ *	srcStep or dstStep < 8.
+ *	dx or dy is out of range [0-7].
+ *	roi.width or roi.height is out of range {2,4,8}.
+ *	roi.width is equal to 2, but pDst is not 2-byte aligned.
+ *	roi.width is equal to 4, but pDst is not 4-byte aligned.
+ *	roi.width is equal to 8, but pDst is not 8 byte aligned.
+ *	srcStep or dstStep is not a multiple of 8.
+ *
+ */
+
+OMXResult omxVCM4P10_InterpolateChroma (
+     const OMX_U8* pSrc,
+     OMX_S32 srcStep,
+     OMX_U8* pDst,
+     OMX_S32 dstStep,
+     OMX_S32 dx,
+     OMX_S32 dy,
+     OMXSize roi
+ )
+{
+    return armVCM4P10_Interpolate_Chroma 
+        ((OMX_U8*)pSrc, srcStep, pDst, dstStep, roi.width, roi.height, dx, dy);
+}
+
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
new file mode 100644
index 0000000..cf611a3
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
@@ -0,0 +1,426 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_InterpolateLuma_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+;// Function:
+;//     omxVCM4P10_InterpolateLuma
+;//
+;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly.
+;// Performs quarter pel interpolation of inter luma MB.
+;// It's assumed that the frame is already padded when calling this function.
+;// Parameters:
+;// [in]    pSrc        Pointer to the source reference frame buffer
+;// [in]    srcStep     Reference frame step in byte
+;// [in]    dstStep     Destination frame step in byte. Must be multiple of roi.width
+;// [in]    dx          Fractional part of horizontal motion vector
+;//                         component in 1/4 pixel unit; valid in the range [0,3]
+;// [in]    dy          Fractional part of vertical motion vector
+;//                         component in 1/4 pixel unit; valid in the range [0,3]
+;// [in]    roi         Dimension of the interpolation region;the parameters roi.width and roi.height must
+;//                         be equal to either 4, 8, or 16.
+;// [out]   pDst        Pointer to the destination frame buffer.
+;//                   if roi.width==4,  4-byte alignment required
+;//                   if roi.width==8,  8-byte alignment required
+;//                   if roi.width==16, 16-byte alignment required
+;//
+;// Return Value:
+;// If the function runs without error, it returns OMX_Sts_NoErr.
+;// It is assued that following cases are satisfied before calling this function:
+;//  pSrc or pDst is not NULL.
+;//  srcStep or dstStep >= roi.width.
+;//     dx or dy is in the range [0-3].
+;//     roi.width or roi.height is not out of range {4, 8, 16}.
+;//     If roi.width is equal to 4, Dst is 4 byte aligned.
+;//     If roi.width is equal to 8, pDst is 8 byte aligned.
+;//     If roi.width is equal to 16, pDst is 16 byte aligned.
+;//     srcStep and dstStep is multiple of 8.
+;//
+;//
+
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+
+        M_VARIANTS ARM1136JS
+
+        EXPORT omxVCM4P10_InterpolateLuma
+        
+    IF ARM1136JS
+        IMPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe
+        IMPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+        IMPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+        IMPORT armVCM4P10_Average_4x4_Align0_unsafe
+        IMPORT armVCM4P10_Average_4x4_Align2_unsafe
+        IMPORT armVCM4P10_Average_4x4_Align3_unsafe
+        IMPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
+        IMPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+    ENDIF
+
+    IF ARM1136JS
+        IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+        IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+        IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+        IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+    ENDIF
+    
+    
+
+;// Declare input registers
+pSrc            RN 0
+srcStep         RN 1
+pDst            RN 2
+dstStep         RN 3
+iHeight         RN 4
+iWidth          RN 5
+
+;// Declare other intermediate registers
+idx             RN 6
+idy             RN 7
+index           RN 6
+Temp            RN 12
+pArgs           RN 11
+
+
+        ;// End of CortexA8
+                    
+;//-------------------------------------------------------------------------------------------------------------------------    
+;//-------------------------------------------------------------------------------------------------------------------------    
+    IF ARM1136JS
+
+
+        M_ALLOC4 ppDst, 8
+        M_ALLOC4 ppSrc, 8
+        M_ALLOC4 ppArgs, 16
+        M_ALLOC4 pBuffer, 120                           ;// 120 = 12x10
+        M_ALLOC8 pInterBuf, 120                         ;// 120 = 12*5*2
+        M_ALLOC8 pTempBuf, 32                           ;// 32 =  8*4
+        
+        ;// Function header
+        ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time.
+        ;// Depending on the values of motion vector fractional parts (dx,dy), one out of 16 cases will be processed.
+        ;// Registers r4, r5, r6 to be preserved by internal unsafe functions
+        ;// r4 - iHeight
+        ;// r5 - iWidth
+        ;// r6 - index
+        M_START omxVCM4P10_InterpolateLuma, r11
+
+;// Declare other intermediate registers
+idx             RN 6
+idy             RN 7
+index           RN 6
+Temp            RN 12
+pArgs           RN 11
+
+pBuf            RN 8
+Height          RN 9 
+bufStep         RN 9
+        
+        ;// Define stack arguments
+        M_ARG   ptridx, 4
+        M_ARG   ptridy, 4        
+        M_ARG   ptrWidth, 4
+        M_ARG   ptrHeight, 4        
+
+        ;// Load structure elements of roi 
+        M_LDR   idx, ptridx
+        M_LDR   idy, ptridy
+        M_LDR   iWidth, ptrWidth
+        M_LDR   iHeight, ptrHeight
+        
+        M_PRINTF "roi.width %d\n", iWidth
+        M_PRINTF "roi.height %d\n", iHeight
+
+        ADD     index, idx, idy, LSL #2                 ;//  [index] = [idy][idx]
+        M_ADR   pArgs, ppArgs
+
+InterpolateLuma
+Block4x4WidthLoop
+Block4x4HeightLoop
+
+        STM     pArgs, {pSrc,srcStep,pDst,dstStep} 
+        M_ADR   pBuf, pBuffer                           
+
+        ;// switch table using motion vector as index
+        M_SWITCH index, L
+        M_CASE  Case_0
+        M_CASE  Case_1
+        M_CASE  Case_2
+        M_CASE  Case_3
+        M_CASE  Case_4
+        M_CASE  Case_5
+        M_CASE  Case_6
+        M_CASE  Case_7
+        M_CASE  Case_8
+        M_CASE  Case_9
+        M_CASE  Case_a
+        M_CASE  Case_b
+        M_CASE  Case_c
+        M_CASE  Case_d
+        M_CASE  Case_e
+        M_CASE  Case_f
+        M_ENDSWITCH
+
+Case_0
+        ;// Case G
+        M_PRINTF "Case 0 \n"
+
+        BL      armVCM4P10_InterpolateLuma_Copy4x4_unsafe
+        B       Block4x4LoopEnd
+
+Case_1
+        ;// Case a
+        M_PRINTF "Case 1 \n"
+
+        SUB     pSrc, pSrc, #2
+        MOV     Height, #4
+        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+        BL      armVCM4P10_Average_4x4_Align2_unsafe
+        B       Block4x4LoopEnd
+Case_2
+        ;// Case b
+        M_PRINTF "Case 2 \n"
+        
+        SUB     pSrc, pSrc, #2
+        MOV     Height, #4
+        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe        
+        B       Block4x4LoopEnd
+Case_3
+        ;// Case c
+        M_PRINTF "Case 3 \n"
+
+        SUB     pSrc, pSrc, #2
+        MOV     Height, #4
+        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+        BL      armVCM4P10_Average_4x4_Align3_unsafe
+        B       Block4x4LoopEnd
+Case_4
+        ;// Case d
+        M_PRINTF "Case 4 \n"
+
+        SUB     pSrc, pSrc, srcStep, LSL #1
+        MOV     Height, #9
+        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+        BL      armVCM4P10_Average_4x4_Align0_unsafe
+
+        B       Block4x4LoopEnd
+Case_5
+        ;// Case e
+        M_PRINTF "Case 5 \n"
+
+        SUB     pSrc, pSrc, #2
+        MOV     Height, #4
+        M_ADR   pDst, pTempBuf
+        MOV     dstStep, #4
+        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+        M_ADR   pArgs, ppArgs
+        LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
+        SUB     pSrc, pSrc, srcStep, LSL #1
+        M_ADR   pBuf, pBuffer                           
+        MOV     Height, #9
+        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+        M_ADR   pSrc, pTempBuf
+        MOV     srcStep, #4
+        BL      armVCM4P10_Average_4x4_Align0_unsafe
+        
+
+        B       Block4x4LoopEnd
+Case_6
+        ;// Case f
+        M_PRINTF "Case 6 \n"
+
+        SUB     pSrc, pSrc, #2
+        SUB     pSrc, pSrc, srcStep, LSL #1
+        MOV     Height, #9
+        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+        M_ADR   pBuf, pInterBuf
+        BL      armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+        M_ADR   idy, pTempBuf
+        BL      armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe    
+        BL      armVCM4P10_Average_4x4_Align0_unsafe
+        B       Block4x4LoopEnd
+Case_7
+        ;// Case g
+        M_PRINTF "Case 7 \n"
+        
+        SUB     pSrc, pSrc, #2
+        MOV     Height, #4
+        M_ADR   pDst, pTempBuf
+        MOV     dstStep, #4
+        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+        M_ADR   pArgs, ppArgs
+        LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
+        SUB     pSrc, pSrc, srcStep, LSL #1
+        ADD     pSrc, pSrc, #1
+        M_ADR   pBuf, pBuffer                           
+        MOV     Height, #9
+        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+        M_ADR   pSrc, pTempBuf
+        MOV     srcStep, #4
+        BL      armVCM4P10_Average_4x4_Align0_unsafe
+
+        B       Block4x4LoopEnd
+Case_8
+        ;// Case h
+        M_PRINTF "Case 8 \n"
+
+        SUB     pSrc, pSrc, srcStep, LSL #1
+        MOV     Height, #9
+        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+        B       Block4x4LoopEnd
+Case_9
+        ;// Case i
+        M_PRINTF "Case 9 \n"
+
+        SUB     pSrc, pSrc, #2
+        SUB     pSrc, pSrc, srcStep, LSL #1
+        MOV     Height, #9
+        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+        ADD     pSrc, pSrc, srcStep, LSL #1
+        M_ADR   pBuf, pInterBuf
+        BL      armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+        M_ADR   idy, pTempBuf
+        BL      armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe    
+        BL      armVCM4P10_Average_4x4_Align2_unsafe
+        B       Block4x4LoopEnd
+Case_a
+        ;// Case j
+        M_PRINTF "Case a \n"
+
+        SUB     pSrc, pSrc, #2
+        SUB     pSrc, pSrc, srcStep, LSL #1
+        MOV     Height, #9
+        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+        ADD     pSrc, pSrc, srcStep, LSL #1
+        M_ADR   pBuf, pInterBuf
+        BL      armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+        B       Block4x4LoopEnd
+Case_b
+        ;// Case k
+        M_PRINTF "Case b \n"
+        SUB     pSrc, pSrc, #2
+        SUB     pSrc, pSrc, srcStep, LSL #1
+        MOV     Height, #9
+        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+        ADD     pSrc, pSrc, srcStep, LSL #1
+        M_ADR   pBuf, pInterBuf
+        BL      armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+        M_ADR   idy, pTempBuf
+        BL      armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe    
+        BL      armVCM4P10_Average_4x4_Align3_unsafe
+        B       Block4x4LoopEnd
+Case_c
+        ;// Case n
+        M_PRINTF "Case c \n"
+
+        SUB     pSrc, pSrc, srcStep, LSL #1
+        MOV     Height, #9
+        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+        ADD     pSrc, pSrc, srcStep                     ;// Update pSrc to one row down
+        BL      armVCM4P10_Average_4x4_Align0_unsafe
+        B       Block4x4LoopEnd
+Case_d
+        ;// Case p
+        M_PRINTF "Case d \n"
+        SUB     pSrc, pSrc, #2
+        ADD     pSrc, pSrc, srcStep
+        MOV     Height, #4
+        M_ADR   pDst, pTempBuf
+        MOV     dstStep, #4
+        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+        M_ADR   pArgs, ppArgs
+        LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
+        SUB     pSrc, pSrc, srcStep, LSL #1
+        M_ADR   pBuf, pBuffer                           
+        MOV     Height, #9
+        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+        M_ADR   pSrc, pTempBuf
+        MOV     srcStep, #4
+        BL      armVCM4P10_Average_4x4_Align0_unsafe
+        B       Block4x4LoopEnd
+Case_e
+        ;// Case q
+        M_PRINTF "Case e \n"
+        
+        SUB     pSrc, pSrc, #2
+        SUB     pSrc, pSrc, srcStep, LSL #1
+        MOV     Height, #9
+        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+        M_ADR   pBuf, pInterBuf
+        BL      armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+        M_ADR   idy, pTempBuf
+        BL      armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+        ADD     pSrc, pSrc, #4    
+        BL      armVCM4P10_Average_4x4_Align0_unsafe
+
+        B       Block4x4LoopEnd
+Case_f
+        ;// Case r
+        M_PRINTF "Case f \n"
+        SUB     pSrc, pSrc, #2
+        ADD     pSrc, pSrc, srcStep
+        MOV     Height, #4
+        M_ADR   pDst, pTempBuf
+        MOV     dstStep, #4
+        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+        M_ADR   pArgs, ppArgs
+        LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
+        SUB     pSrc, pSrc, srcStep, LSL #1
+        ADD     pSrc, pSrc, #1
+        M_ADR   pBuf, pBuffer                           
+        MOV     Height, #9
+        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+        M_ADR   pSrc, pTempBuf
+        MOV     srcStep, #4
+        BL      armVCM4P10_Average_4x4_Align0_unsafe
+
+Block4x4LoopEnd
+
+        ;// Width Loop
+        SUBS    iWidth, iWidth, #4
+        M_ADR   pArgs, ppArgs
+        LDM     pArgs, {pSrc,srcStep,pDst,dstStep}  ;// Load arguments
+        ADD     pSrc, pSrc, #4      
+        ADD     pDst, pDst, #4
+        BGT     Block4x4WidthLoop
+
+        ;// Height Loop
+        SUBS    iHeight, iHeight, #4
+        M_LDR   iWidth, ptrWidth
+        M_ADR   pArgs, ppArgs
+        ADD     pSrc, pSrc, srcStep, LSL #2      
+        ADD     pDst, pDst, dstStep, LSL #2
+        SUB     pSrc, pSrc, iWidth
+        SUB     pDst, pDst, iWidth
+        BGT     Block4x4HeightLoop
+
+EndOfInterpolation
+        MOV     r0, #0
+        M_END
+
+    ENDIF
+                    
+
+    END
+    
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
new file mode 100644
index 0000000..34fedd8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
@@ -0,0 +1,494 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_PredictIntraChroma_8x8_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+  
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        EXPORT armVCM4P10_pIndexTable8x8
+        
+;// Define the processor variants supported by this file
+         
+         M_VARIANTS ARM1136JS
+     
+     AREA table, DATA    
+;//-------------------------------------------------------
+;// This table for implementing switch case of C in asm by
+;// the mehtod of two levels of indexing.
+;//-------------------------------------------------------
+
+    M_TABLE armVCM4P10_pIndexTable8x8
+    DCD  OMX_VC_CHROMA_DC,     OMX_VC_CHROMA_HOR 
+    DCD  OMX_VC_CHROMA_VERT,   OMX_VC_CHROMA_PLANE  
+    
+    M_TABLE armVCM4P10_MultiplierTableChroma8x8,1
+    DCW   3, 2, 1,4 
+    DCW  -3,-2,-1,0
+    DCW   1, 2, 3,4
+    
+    IF ARM1136JS
+  
+;//--------------------------------------------
+;// Constants
+;//--------------------------------------------  
+
+BLK_SIZE        EQU 0x8
+MUL_CONST0      EQU 0x01010101
+MASK_CONST      EQU 0x00FF00FF
+MUL_CONST1      EQU 0x80808080
+
+;//--------------------------------------------
+;// Scratch variable
+;//--------------------------------------------
+y               RN 12   
+pc              RN 15   
+return          RN 0    
+pSrcLeft2       RN 1    
+pDst2           RN 2    
+sum1            RN 6    
+sum2            RN 7    
+pTable          RN 9    
+dstStepx2       RN 11   
+leftStepx2      RN 14   
+outerCount      RN 14   
+r0x01010101     RN 10   
+r0x00FF00FF     RN 11   
+
+tVal0           RN 0    
+tVal1           RN 1    
+tVal2           RN 2    
+tVal3           RN 3    
+tVal4           RN 4    
+tVal5           RN 5    
+tVal6           RN 6    
+tVal7           RN 7    
+tVal8           RN 8    
+tVal9           RN 9    
+tVal10          RN 10   
+tVal11          RN 11   
+tVal12          RN 12   
+tVal14          RN 14   
+
+b               RN 14   
+c               RN 12   
+
+p2p0            RN 0    
+p3p1            RN 1    
+p6p4            RN 2    
+p7p5            RN 4    
+
+pp2pp0          RN 6    
+pp3pp1          RN 7    
+pp6pp4          RN 8    
+pp7pp5          RN 9    
+
+p3210           RN 10   
+p7654           RN 10   
+
+;//--------------------------------------------
+;// Input Arguments
+;//--------------------------------------------
+pSrcLeft        RN 0    ;// input pointer
+pSrcAbove       RN 1    ;// input pointer
+pSrcAboveLeft   RN 2    ;// input pointer
+pDst            RN 3    ;// output pointer
+leftStep        RN 4    ;// input variable
+dstStep         RN 5    ;// input variable
+predMode        RN 6    ;// input variable
+availability    RN 7    ;// input variable
+
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntraChroma_8x8 starts
+;//-----------------------------------------------------------------------------------------------
+        
+        ;// Write function header
+        M_START omxVCM4P10_PredictIntraChroma_8x8, r11
+        
+        ;// Define stack arguments
+        M_ARG    LeftStep,     4
+        M_ARG    DstStep,      4
+        M_ARG    PredMode,     4
+        M_ARG    Availability, 4
+        
+        ;// M_STALL ARM1136JS=4
+        
+        LDR      pTable,=armVCM4P10_pIndexTable8x8   ;// Load index table for switch case
+        
+        
+        ;// Load argument from the stack
+        M_LDR    predMode, PredMode                  ;// Arg predMode loaded from stack to reg 
+        M_LDR    leftStep, LeftStep                  ;// Arg leftStep loaded from stack to reg 
+        M_LDR    dstStep,  DstStep                   ;// Arg dstStep loaded from stack to reg         
+        M_LDR    availability, Availability          ;// Arg availability loaded from stack to reg 
+        
+        MOV      y, #BLK_SIZE                        ;// Outer Loop Count
+        LDR      pc, [pTable, predMode, LSL #2]      ;// Branch to the case based on preMode
+
+OMX_VC_CHROMA_DC
+        AND      availability, availability,#(OMX_VC_UPPER + OMX_VC_LEFT)
+        CMP      availability, #(OMX_VC_UPPER + OMX_VC_LEFT) ;// if(availability & (#OMX_VC_UPPER | #OMX_VC_LEFT))
+        LDR      r0x01010101, =MUL_CONST0
+        BNE      TST_UPPER                           ;// Jump to Upper if not both
+        LDM      pSrcAbove,{tVal8,tVal9}             ;// tVal 8 to 9 = pSrcAbove[0 to 7]
+        
+        ADD      leftStepx2, leftStep,leftStep       ;// leftStepx2 = 2 * leftStep
+        ADD      pSrcLeft2, pSrcLeft, leftStep       ;// pSrcLeft2 = pSrcLeft + leftStep
+        
+        ;// M_STALL ARM1136JS=1
+       
+        UXTB16   tVal7, tVal8                        ;// pSrcAbove[0, 2]
+        UXTB16   tVal8, tVal8, ROR #8                ;// pSrcAbove[1, 3]
+        UADD16   sum1, tVal7, tVal8                  ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
+        
+        UXTB16   tVal7, tVal9                        ;// pSrcAbove[4, 6]
+        UXTB16   tVal9, tVal9, ROR #8                ;// pSrcAbove[5, 7]
+        UADD16   sum2, tVal7, tVal9                  ;// pSrcAbove[0, 2] + pSrcAbove[4, 6]
+        ADD      sum1, sum1, sum1, LSR #16           ;// sum(pSrcAbove[0] to pSrcAbove[3])
+        ADD      sum2, sum2, sum2, LSR #16           ;// sum(pSrcAbove[4] to pSrcAbove[7])
+        UXTH     sum1, sum1                          ;// upsum1 (Clear the top junk bits)
+        UXTH     sum2, sum2                          ;// upsum2 (Clear the top junk bits)
+        
+        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[0]
+        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[1]
+        M_LDRB   tVal4, [pSrcLeft],  +leftStepx2     ;// tVal4 = pSrcLeft[2]
+        M_LDRB   tVal12,[pSrcLeft2], +leftStepx2     ;// tVal12= pSrcLeft[3]
+        ADD      tVal2, tVal8, tVal9                 ;// tVal14 = tVal8 + tVal9
+        
+        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[4]
+        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[5]
+        ADD      tVal14, tVal4, tVal12               ;// tVal14 = tVal4 + tVal12
+        
+        LDRB     tVal4, [pSrcLeft]                   ;// tVal4 = pSrcLeft[6]
+        LDRB     tVal12,[pSrcLeft2]                  ;// tVal12= pSrcLeft[7]
+        ADD      tVal8, tVal8, tVal9                 ;// tVal8 = tVal8 + tVal9
+        ADD      tVal2, tVal2, tVal14                ;// leftsum1  = sum(pSrcLeft[0] to pSrcLeft[3])
+        ADD      tVal4, tVal4, tVal12                ;// tVal4 = tVal4 + tVal12
+        ADD      tVal14, tVal8, tVal4                ;// leftsum2  = sum(pSrcLeft[4] to pSrcLeft[7])
+        ADD      tVal8, tVal14, #2                   ;// tVal8 = leftsum2 + 2
+        ADD      tVal9, sum2,   #2                   ;// tVal8 = upsum2 + 2
+        ADD      sum1,  sum1, tVal2                  ;// sum1 = upsum1 + leftsum1
+        ADD      sum2,  sum2, tVal14                 ;// sum2 = upsum2 + leftsum2
+        ADD      sum1, sum1, #4                      ;// (sum1 + 4)
+        ADD      sum2, sum2, #4                      ;// (sum2 + 4)
+        MOV      sum1,  sum1,  LSR #3                ;// (sum1 + 4)>>3
+        MOV      tVal9, tVal9, LSR #2                ;// (tVal9 + 2)>>2
+        MOV      tVal8, tVal8, LSR #2                ;// (tVal8 + 2)>>2
+        MOV      sum2,  sum2,  LSR #3                ;// (sum2 + 4)>>3
+        
+        MUL      tVal0, sum1, r0x01010101            ;// replicate the val in all the bytes
+        MUL      tVal1, tVal9,r0x01010101            ;// replicate the val in all the bytes
+        MUL      tVal8, tVal8,r0x01010101            ;// replicate the val in all the bytes
+        MUL      tVal9, sum2, r0x01010101            ;// replicate the val in all the bytes
+        
+        M_STRD   tVal0, tVal1, [pDst], dstStep       ;// pDst[0 to 7]   = tVal 0 to 1
+        M_STRD   tVal0, tVal1, [pDst], dstStep       ;// pDst[8 to 15]  = tVal 0 to 1
+        M_STRD   tVal0, tVal1, [pDst], dstStep       ;// pDst[16 to 23] = tVal 0 to 1
+        M_STRD   tVal0, tVal1, [pDst], dstStep       ;// pDst[24 to 31] = tVal 0 to 1
+                                       
+        M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[32 to 39] = tVal 8 to 9
+        M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[40 to 47] = tVal 8 to 9
+        M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[48 to 55] = tVal 8 to 9
+        M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[56 to 63] = tVal 8 to 9
+        MOV      return, #OMX_Sts_NoErr
+        M_EXIT
+        
+TST_UPPER
+        
+        ;// M_STALL ARM1136JS=3
+        
+        CMP      availability, #OMX_VC_UPPER         ;// if(availability & #OMX_VC_UPPER)
+        
+        BNE      TST_LEFT                            ;// Jump to Left if not upper
+        LDM      pSrcAbove,{tVal8,tVal9}             ;// tVal 8 to 9 = pSrcAbove[0 to 7]
+        
+        ;// M_STALL ARM1136JS=3
+        
+        UXTB16   tVal7, tVal8                        ;// pSrcAbove[0, 2]
+        UXTB16   tVal8, tVal8, ROR #8                ;// pSrcAbove[1, 3]
+        UADD16   sum1,  tVal7, tVal8                 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
+        
+        UXTB16   tVal7, tVal9                        ;// pSrcAbove[4, 6]
+        UXTB16   tVal9, tVal9, ROR #8                ;// pSrcAbove[5, 7]
+        UADD16   sum2,  tVal7, tVal9                 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6]
+        
+        ADD      sum1, sum1, sum1, LSR #16           ;// sum(pSrcAbove[0] to pSrcAbove[3])
+        ADD      sum2, sum2, sum2, LSR #16           ;// sum(pSrcAbove[4] to pSrcAbove[7])
+        
+        UXTH     sum1, sum1                          ;// upsum1 (Clear the top junk bits)
+        UXTH     sum2, sum2                          ;// upsum2 (Clear the top junk bits)
+        
+        ADD      sum1, sum1, #2                      ;// sum1 + 2
+        ADD      sum2, sum2, #2                      ;// sum2 + 2
+        
+        MOV      sum1, sum1, LSR #2                  ;// (sum1 + 2)>>2
+        MOV      sum2, sum2, LSR #2                  ;// (sum2 + 2)>>2
+        
+        MUL      sum1, sum1,r0x01010101              ;// replicate the val in all the bytes
+        MUL      sum2, sum2,r0x01010101              ;// replicate the val in all the bytes
+        
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[0 to 7]   = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[8 to 15]  = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[16 to 23] = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[24 to 31] = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[32 to 39] = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[40 to 47] = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[48 to 55] = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[56 to 63] = tVal 6 to 7
+        MOV      return, #OMX_Sts_NoErr
+        M_EXIT
+        
+TST_LEFT 
+        ;// M_STALL ARM1136JS=3       
+        
+        CMP      availability, #OMX_VC_LEFT
+        BNE      TST_COUNT0
+        ADD      leftStepx2, leftStep,leftStep       ;// leftStepx2 = 2 * leftStep
+        ADD      pSrcLeft2, pSrcLeft, leftStep       ;// pSrcLeft2 = pSrcLeft + leftStep
+        
+        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[0]
+        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[1]
+        M_LDRB   tVal4, [pSrcLeft],  +leftStepx2     ;// tVal4 = pSrcLeft[2]
+        M_LDRB   tVal12,[pSrcLeft2], +leftStepx2     ;// tVal12= pSrcLeft[3]
+        
+        ADD      tVal6, tVal8, tVal9                 ;// tVal6 = tVal8 + tVal9
+        
+        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[4]
+        ADD      tVal7, tVal4, tVal12                ;// tVal7 = tVal4 + tVal12
+        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[5]
+        M_LDRB   tVal4, [pSrcLeft],  +leftStepx2     ;// tVal4 = pSrcLeft[6]
+        M_LDRB   tVal12,[pSrcLeft2], +leftStepx2     ;// tVal12= pSrcLeft[7]
+        
+        ADD      tVal8, tVal8, tVal9                 ;// tVal8 = tVal8 + tVal9
+        ADD      sum1,  tVal6, tVal7                 ;// sum1  = sum(pSrcLeft[0] to pSrcLeft[3])
+        ADD      tVal4, tVal4, tVal12                ;// tVal4 = tVal4 + tVal12
+        ADD      sum2,  tVal8, tVal4                 ;// sum2  = sum(pSrcLeft[4] to pSrcLeft[7])
+        
+        ADD      sum1, sum1, #2                      ;// sum1 + 2
+        ADD      sum2, sum2, #2                      ;// sum2 + 2
+        
+        MOV      sum1, sum1, LSR #2                  ;// (sum1 + 2)>>2
+        MOV      sum2, sum2, LSR #2                  ;// (sum2 + 2)>>2
+        
+        MUL      tVal6, sum1,r0x01010101             ;// replicate the val in all the bytes
+        MUL      tVal8, sum2,r0x01010101             ;// replicate the val in all the bytes
+        
+        ;// M_STALL ARM1136JS=1
+        MOV      tVal7,tVal6                         ;// tVal7 = sum1
+        MOV      tVal9,tVal8                         ;// tVal9 = sum2
+        
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[0 to 7]   = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[8 to 15]  = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[16 to 23] = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[24 to 31] = tVal 6 to 7
+        
+        M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[32 to 39] = tVal 8 to 9
+        M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[40 to 47] = tVal 8 to 9
+        M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[48 to 55] = tVal 8 to 9
+        M_STRD   tVal8, tVal9, [pDst], dstStep       ;// pDst[56 to 63] = tVal 8 to 9
+        
+        MOV      return, #OMX_Sts_NoErr
+        M_EXIT                                       ;// Macro to exit midway-break frm case
+
+TST_COUNT0
+        LDR      sum1, =MUL_CONST1                  ;// sum1 = 0x80808080 if(count == 0)
+        
+        ;// M_STALL ARM1136JS=2
+        
+        MOV      tVal7, sum1                         ;// tVal7 = sum1
+        
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[0 to 7]   = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[8 to 15]  = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[16 to 23] = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[24 to 31] = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[32 to 39] = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[40 to 47] = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[48 to 55] = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[56 to 63] = tVal 6 to 7
+        
+        MOV      return, #OMX_Sts_NoErr
+        M_EXIT                                       ;// Macro to exit midway-break frm case
+
+OMX_VC_CHROMA_HOR
+        
+        ;// M_STALL ARM1136JS=2 
+        
+        ADD      pSrcLeft2, pSrcLeft, leftStep       ;// pSrcLeft2 = pSrcLeft + leftStep
+        ADD      leftStepx2, leftStep, leftStep      ;// leftStepx2 = leftStep * 2
+        ADD      pDst2, pDst, dstStep                ;// pDst2 = pDst + dstStep
+        ADD      dstStepx2, dstStep, dstStep         ;// double dstStep
+        SUB      dstStepx2, dstStepx2, #4            ;// double dstStep  minus 4
+        LDR      r0x01010101, =MUL_CONST0            ;// Const to repeat the byte in reg 4 times
+        M_LDRB   tVal6, [pSrcLeft], +leftStepx2      ;// tVal6 = pSrcLeft[0]
+        M_LDRB   tVal7, [pSrcLeft2],+leftStepx2      ;// tVal7 = pSrcLeft[1]
+        M_LDRB   tVal8, [pSrcLeft], +leftStepx2      ;// tVal8 = pSrcLeft[2]
+        M_LDRB   tVal9, [pSrcLeft2],+leftStepx2      ;// tVal9 = pSrcLeft[3]
+        MUL      tVal6, tVal6, r0x01010101           ;// replicate the val in all the bytes
+        MUL      tVal7, tVal7, r0x01010101           ;// replicate the val in all the bytes
+        MUL      tVal8, tVal8, r0x01010101           ;// replicate the val in all the bytes
+        MUL      tVal9, tVal9, r0x01010101           ;// replicate the val in all the bytes
+        STR      tVal6, [pDst],  #+4                 ;// store {tVal6} at pDst [0 to 3] 
+        STR      tVal7, [pDst2], #+4                 ;// store {tVal7} at pDst2[0 to 3]
+        M_STR    tVal6, [pDst],  dstStepx2           ;// store {tVal6} at pDst [4 to 7]
+        M_STR    tVal7, [pDst2], dstStepx2           ;// store {tVal7} at pDst2[4 to 7]
+        STR      tVal8, [pDst],  #+4                 ;// store {tVal6} at pDst [0 to 3]
+        STR      tVal9, [pDst2], #+4                 ;// store {tVal7} at pDst2[0 to 3]
+        M_STR    tVal8, [pDst],  dstStepx2           ;// store {tVal6} at pDst [4 to 7]
+        M_STR    tVal9, [pDst2], dstStepx2           ;// store {tVal7} at pDst2[4 to 7]
+        M_LDRB   tVal6, [pSrcLeft], +leftStepx2      ;// tVal6 = pSrcLeft[4]
+        M_LDRB   tVal7, [pSrcLeft2],+leftStepx2      ;// tVal7 = pSrcLeft[5]
+        M_LDRB   tVal8, [pSrcLeft], +leftStepx2      ;// tVal8 = pSrcLeft[6]
+        M_LDRB   tVal9, [pSrcLeft2],+leftStepx2      ;// tVal9 = pSrcLeft[7]
+        MUL      tVal6, tVal6, r0x01010101           ;// replicate the val in all the bytes
+        MUL      tVal7, tVal7, r0x01010101           ;// replicate the val in all the bytes
+        MUL      tVal8, tVal8, r0x01010101           ;// replicate the val in all the bytes
+        MUL      tVal9, tVal9, r0x01010101           ;// replicate the val in all the bytes
+        STR      tVal6, [pDst],  #+4                 ;// store {tVal6} at pDst [0 to 3] 
+        STR      tVal7, [pDst2], #+4                 ;// store {tVal7} at pDst2[0 to 3]
+        M_STR    tVal6, [pDst],  dstStepx2           ;// store {tVal6} at pDst [4 to 7]
+        M_STR    tVal7, [pDst2], dstStepx2           ;// store {tVal7} at pDst2[4 to 7]
+        STR      tVal8, [pDst],  #+4                 ;// store {tVal6} at pDst [0 to 3]
+        STR      tVal9, [pDst2], #+4                 ;// store {tVal7} at pDst2[0 to 3]
+        M_STR    tVal8, [pDst],  dstStepx2           ;// store {tVal6} at pDst [4 to 7]
+        M_STR    tVal9, [pDst2], dstStepx2           ;// store {tVal7} at pDst2[4 to 7]
+        MOV      return, #OMX_Sts_NoErr
+        M_EXIT
+        
+OMX_VC_CHROMA_VERT
+        
+        ;// M_STALL ARM1136JS=4        
+        
+        LDMIA    pSrcAbove, {tVal6,tVal7}            ;// tVal 6 to 7 = pSrcAbove[0 to 7]
+        MOV      return, #OMX_Sts_NoErr
+        
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[0 to 7]   = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[8 to 15]  = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[16 to 23] = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[24 to 31] = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[32 to 39] = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[40 to 47] = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[48 to 55] = tVal 6 to 7
+        M_STRD   tVal6, tVal7, [pDst], dstStep       ;// pDst[56 to 63] = tVal 6 to 7
+
+        M_EXIT                                       ;// Macro to exit midway-break frm case
+        
+OMX_VC_CHROMA_PLANE
+        
+        ;// M_STALL ARM1136JS=3
+        
+        RSB      tVal14, leftStep, leftStep, LSL #3  ;// 7*leftStep
+        LDRB     tVal7, [pSrcAbove, #+7]             ;// pSrcAbove[7]
+        LDRB     tVal6, [pSrcLeft, +tVal14]          ;// pSrcLeft[7*leftStep]
+        LDRB     tVal8, [pSrcAboveLeft]              ;// pSrcAboveLeft[0]
+        LDRB     tVal9, [pSrcAbove, #+6 ]            ;// pSrcAbove[6]
+        LDRB     tVal10,[pSrcAbove]                  ;// pSrcAbove[0]
+        ADD      tVal2, tVal7, tVal6                 ;// pSrcAbove[7] + pSrcLeft[7*leftStep]
+        SUB      tVal6, tVal6, tVal8                 ;// V0 = pSrcLeft[7*leftStep] - pSrcAboveLeft[0]
+        SUB      tVal7, tVal7, tVal8                 ;// H0 = pSrcAbove[7] - pSrcAboveLeft[0]        
+        LSL      tVal2, tVal2, #4                    ;// a = 16 * (pSrcAbove[15] + pSrcLeft[15*lS])
+        ADD      tVal2, tVal2, #16                   ;// a + 16
+        SUB      tVal9, tVal9,tVal10                 ;// pSrcAbove[6] - pSrcAbove[0]
+        LDRB     tVal8, [pSrcAbove,#+5]              ;// pSrcAbove[5]
+        LDRB     tVal10,[pSrcAbove,#+1]              ;// pSrcAbove[1]
+        ADD      tVal9, tVal9, tVal9, LSL #1         ;// H1 = 3 * (pSrcAbove[6] - pSrcAbove[0])
+        ADD      tVal7, tVal9, tVal7, LSL #2         ;// H = H1 + H0
+        SUB      tVal8, tVal8, tVal10                ;// pSrcAbove[5] - pSrcAbove[1]
+        LDRB     tVal9, [pSrcAbove,#+4]              ;// pSrcAbove[4]
+        LDRB     tVal10,[pSrcAbove,#+2]              ;// pSrcAbove[2]
+        ADD      tVal7, tVal7, tVal8, LSL #1         ;// H = H + H2
+        SUB      tVal11, tVal14,leftStep             ;// 6*leftStep
+        ADD      tVal11, pSrcLeft, tVal11            ;// pSrcLeft + 6*leftStep
+        MOV      tVal12, pSrcLeft                    ;// pSrcLeft
+        SUB      tVal9, tVal9, tVal10                ;// pSrcAbove[4] - pSrcAbove[2]
+        ADD      tVal7, tVal7, tVal9                 ;// H = H + H3
+        M_LDRB   tVal8, [tVal11],-leftStep           ;// pSrcLeft[6*leftStep]
+        M_LDRB   tVal10,[tVal12],+leftStep           ;// pSrcLeft[0]
+        ADD      tVal7, tVal7, tVal7, LSL #4         ;// 17 * H
+        ADD      tVal7, tVal7, #16                   ;// 17 * H + 16
+        SUB      tVal8, tVal8, tVal10                ;// pSrcLeft[6*leftStep] - pSrcLeft[0]
+        ASR      b, tVal7, #5                        ;// b = (17 * H + 16) >> 5
+        ADD      tVal8, tVal8, tVal8, LSL #1         ;// V1 = 3 * (pSrcLeft[6*leftStep] - pSrcLeft[0])
+        ADD      tVal6, tVal8, tVal6, LSL #2         ;// V = V0 +V1
+        M_LDRB   tVal8, [tVal11],-leftStep           ;// pSrcLeft[5*leftStep]
+        M_LDRB   tVal10,[tVal12],+leftStep           ;// pSrcLeft[leftStep]
+        ADD      tVal7, b, b, LSL #1                 ;// 3*b
+        SUB      tVal2, tVal2, tVal7                 ;// a + 16 - 3*b
+        SUB      tVal7, tVal8, tVal10                ;// pSrcLeft[5*leftStep] - pSrcLeft[leftStep]
+        M_LDRB   tVal8, [tVal11],-leftStep           ;// pSrcLeft[4*leftStep]
+        M_LDRB   tVal10,[tVal12],+leftStep           ;// pSrcLeft[2*leftStep]        
+        ADD      tVal6, tVal6, tVal7, LSL #1         ;// V = V + V2
+        LDR      r0x00FF00FF, =MASK_CONST            ;// r0x00FF00FF = 0x00FF00FF
+        SUB      tVal7, tVal8, tVal10                ;// pSrcLeft[4*leftStep] - pSrcLeft[2*leftStep]
+        ADD      tVal6, tVal6, tVal7                 ;// V = V + V7
+        SUB      dstStep, dstStep, #4                ;// dstStep - 4
+        ADD      tVal6, tVal6, tVal6, LSL #4         ;// 17*V
+        ADD      tVal6, tVal6, #16                   ;// 17*V + 16
+        
+        ;// M_STALL ARM1136JS=1
+        
+        ASR      c, tVal6, #5                        ;// c = (17*V + 16)>>5
+        
+        ;// M_STALL ARM1136JS=1
+        
+        ADD      tVal6, c, c, LSL #1                 ;// 3*c
+        UXTH     c, c                                ;// only in half word
+        SUB      tVal6, tVal2, tVal6                 ;// a - 3*b - 3*c + 16
+        ORR      c, c, c, LSL #16                    ;// c c
+        ADD      tVal7, b, b                         ;// 2b
+        ADD      tVal2, tVal6, tVal7                 ;// pp2 = d + 2*b
+        ADD      tVal7, tVal7, b                     ;// 3b
+        ORR      p2p0,   tVal6,  tVal2,  LSL #16     ;// p2p0   = pack {p2, p0}
+        UXTH     b, b
+        UXTH     tVal7, tVal7
+        ORR      b, b, b, LSL #16                    ;// {b,b}
+        ORR      tVal7, tVal7, tVal7, LSL #16        ;// {3b,3b}
+        SADD16   p3p1,   p2p0, b                     ;// p3p1   = p2p0 + {b,b}
+        SADD16   p6p4,   p3p1, tVal7                 ;// p6p4   = p3p1 + {3b,3b}
+        SADD16   p7p5,   p6p4, b                     ;// p7p5   = p6p4 + {b,b}
+        MOV      outerCount, #BLK_SIZE               ;// Outer Loop Count        
+        
+LOOP_PLANE        
+
+        USAT16   p7p5,   #13, p7p5                    ;// clip13(p7) clip13(p5)
+        USAT16   p6p4,   #13, p6p4                    ;// clip13(p6) clip13(p4)
+        USAT16   p3p1,   #13, p3p1                    ;// clip13(p3) clip13(p1)
+        USAT16   p2p0,   #13, p2p0                    ;// clip13(p2) clip13(p0)
+        
+        AND      pp7pp5, r0x00FF00FF, p7p5, ASR #5    ;// clip8(p7) clip8(p5)
+        AND      pp6pp4, r0x00FF00FF, p6p4, ASR #5    ;// clip8(p6) clip8(p4)
+        AND      pp3pp1, r0x00FF00FF, p3p1, ASR #5    ;// clip8(p3) clip8(p1)
+        AND      pp2pp0, r0x00FF00FF, p2p0, ASR #5    ;// clip8(p2) clip8(p0)
+        
+        SUBS     outerCount, outerCount, #1           ;// outerCount--
+      
+        ORR      p3210, pp2pp0, pp3pp1, LSL #8        ;// pack {p3,p2, p1, p0}
+        STR      p3210, [pDst], #4                    ;// store {pDst[0] to pDst[3]}  
+        
+        ORR      p7654, pp6pp4, pp7pp5, LSL #8        ;// pack {p7,p6, p5, p4}
+        M_STR    p7654, [pDst], dstStep               ;// store {pDst[4] to pDst[7]}
+
+        SADD16   p7p5,   p7p5,   c                    ;// {p7 + c}, {p5 + c}
+        SADD16   p6p4,   p6p4,   c                    ;// {p6 + c}, {p4 + c}
+        SADD16   p3p1,   p3p1,   c                    ;// {p3 + c}, {p1 + c}
+        SADD16   p2p0,   p2p0,   c                    ;// {p2 + c}, {p0 + c}
+      
+        BNE      LOOP_PLANE                           ;// Loop for 8 times
+        MOV      return, #OMX_Sts_NoErr
+        M_END
+        
+        ENDIF ;// ARM1136JS
+        
+        
+        
+        END
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntraChroma_8x8 ends
+;//-----------------------------------------------------------------------------------------------
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
new file mode 100644
index 0000000..1557208
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
@@ -0,0 +1,501 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_PredictIntra_16x16_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS ARM1136JS    
+  
+;//-------------------------------------------------------
+;// This table for implementing switch case of C in asm by
+;// the mehtod of two levels of indexing.
+;//-------------------------------------------------------
+
+    M_TABLE armVCM4P10_pIndexTable16x16
+    DCD  OMX_VC_16X16_VERT, OMX_VC_16X16_HOR 
+    DCD  OMX_VC_16X16_DC,   OMX_VC_16X16_PLANE
+    
+    IF ARM1136JS
+
+;//--------------------------------------------
+;// Constants 
+;//--------------------------------------------  
+BLK_SIZE        EQU 0x10
+MUL_CONST0      EQU 0x01010101
+MUL_CONST1      EQU 0x00060004
+MUL_CONST2      EQU 0x00070005
+MUL_CONST3      EQU 0x00030001
+MASK_CONST      EQU 0x00FF00FF
+
+;//--------------------------------------------
+;// Scratch variable
+;//--------------------------------------------
+y               RN 12   
+pc              RN 15   
+
+return          RN 0    
+innerCount      RN 0    
+outerCount      RN 1    
+pSrcLeft2       RN 1    
+pDst2           RN 2    
+sum             RN 6    
+pTable          RN 9    
+temp1           RN 10   
+temp2           RN 12   
+cMul1           RN 11   
+cMul2           RN 12   
+count           RN 12   
+dstStepx2       RN 11   
+leftStepx2      RN 14   
+r0x01010101     RN 10   
+r0x00FF00FF     RN 11
+
+tVal0           RN 0    
+tVal1           RN 1    
+tVal2           RN 2    
+tVal3           RN 3    
+tVal4           RN 4    
+tVal5           RN 5    
+tVal6           RN 6    
+tVal7           RN 7    
+tVal8           RN 8    
+tVal9           RN 9    
+tVal10          RN 10   
+tVal11          RN 11   
+tVal12          RN 12   
+tVal14          RN 14   
+
+b               RN 12   
+c               RN 14   
+
+p2p0            RN 0    
+p3p1            RN 1    
+p6p4            RN 2    
+p7p5            RN 4    
+p10p8           RN 6    
+p11p9           RN 7    
+p14p12          RN 8    
+p15p13          RN 9    
+
+p3210           RN 10   
+p7654           RN 10   
+p111098         RN 10   
+p15141312       RN 10   
+
+;//--------------------------------------------
+;// Declare input registers
+;//--------------------------------------------
+pSrcLeft        RN 0    ;// input pointer
+pSrcAbove       RN 1    ;// input pointer
+pSrcAboveLeft   RN 2    ;// input pointer
+pDst            RN 3    ;// output pointer
+leftStep        RN 4    ;// input variable
+dstStep         RN 5    ;// input variable
+predMode        RN 6    ;// input variable
+availability    RN 7    ;// input variable
+
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntra_16x16 starts
+;//-----------------------------------------------------------------------------------------------
+        
+        ;// Write function header
+        M_START omxVCM4P10_PredictIntra_16x16, r11
+        
+        ;// Define stack arguments
+        M_ARG    LeftStep,     4
+        M_ARG    DstStep,      4
+        M_ARG    PredMode,     4
+        M_ARG    Availability, 4
+        
+        ;// M_STALL ARM1136JS=4
+        
+        LDR      pTable,=armVCM4P10_pIndexTable16x16 ;// Load index table for switch case
+        
+        ;// Load argument from the stack
+        M_LDR    predMode, PredMode                  ;// Arg predMode loaded from stack to reg 
+        M_LDR    leftStep, LeftStep                  ;// Arg leftStep loaded from stack to reg 
+        M_LDR    dstStep,  DstStep                   ;// Arg dstStep loaded from stack to reg         
+        M_LDR    availability, Availability          ;// Arg availability loaded from stack to reg
+        
+        MOV      y, #BLK_SIZE                        ;// Outer Loop Count
+        LDR      pc, [pTable, predMode, LSL #2]      ;// Branch to the case based on preMode
+        
+OMX_VC_16X16_VERT
+        LDM      pSrcAbove, {tVal6,tVal7,tVal8,tVal9};// tVal 6 to 9 = pSrcAbove[0 to 15]
+        ADD      dstStepx2, dstStep, dstStep         ;// double dstStep
+        ADD      pDst2, pDst, dstStep                ;// pDst2- pDst advanced by dstStep
+        
+        ;// M_STALL ARM1136JS=2                       ;// Stall outside the loop
+
+LOOP_VERT
+        STM      pDst, {tVal6,tVal7,tVal8,tVal9}     ;// pDst[0 to 15] = tVal 6 to 9
+        SUBS     y, y, #2                            ;// y--
+        ADD      pDst, pDst, dstStepx2               ;// pDst advanced by dstStep
+        STM      pDst2, {tVal6,tVal7,tVal8,tVal9}    ;// pDst2[16 to 31] = tVal 6 to 9
+        ADD      pDst2, pDst2, dstStepx2             ;// pDst advanced by dstStep
+        BNE      LOOP_VERT                           ;// Loop for 8 times
+        MOV      return, #OMX_Sts_NoErr
+        M_EXIT
+
+        
+OMX_VC_16X16_HOR
+        
+        ;// M_STALL ARM1136JS=6 
+               
+        LDR      r0x01010101, =MUL_CONST0            ;// Const to repeat the byte in reg 4 times
+        MOV      y, #4                               ;// Outer Loop Count
+        M_LDRB   tVal6, [pSrcLeft], +leftStep        ;// tVal6 = pSrcLeft[0 to 3]
+        ADD      pDst2, pDst, dstStep                ;// pDst2- pDst advanced by dstStep
+        M_LDRB   tVal7, [pSrcLeft], +leftStep        ;// tVal1 = pSrcLeft[4 to 7]
+        ADD      dstStepx2, dstStep, dstStep         ;// double dstStep
+        SUB      dstStepx2, dstStepx2, #12           ;// double dstStep  minus 12
+       
+LOOP_HOR        
+        M_LDRB   tVal8, [pSrcLeft], +leftStep        ;// tVal8 = pSrcLeft[0 to 3]
+        MUL      tVal6, tVal6, r0x01010101           ;// replicate the val in all the bytes
+        M_LDRB   tVal9, [pSrcLeft], +leftStep        ;// tVal9 = pSrcLeft[4 to 7]
+        MUL      tVal7, tVal7, r0x01010101           ;// replicate the val in all the bytes
+        SUBS     y, y, #1                            ;// y--
+        STR      tVal6, [pDst],  #+4                 ;// store {tVal6} at pDst[0 to 3] 
+        STR      tVal7, [pDst2], #+4                 ;// store {tVal7} at pDst2[0 to 3]
+        STR      tVal6, [pDst],  #+4                 ;// store {tVal6} at pDst[4 to 7]
+        STR      tVal7, [pDst2], #+4                 ;// store {tVal7} at pDst2[4 to 7]
+        MUL      tVal8, tVal8, r0x01010101           ;// replicate the val in all the bytes
+        STR      tVal6, [pDst],  #+4                 ;// store {tVal6} at pDst[8 to 11]
+        STR      tVal7, [pDst2], #+4                 ;// store {tVal7} at pDst2[8 to 11]
+        MUL      tVal9, tVal9, r0x01010101           ;// replicate the val in all the bytes
+        M_STR    tVal6, [pDst], dstStepx2            ;// store {tVal6} at pDst[12 to 15]
+        M_STR    tVal7, [pDst2], dstStepx2           ;// store {tVal7} at pDst2[12 to 15]
+        STR      tVal8, [pDst],  #+4                 ;// store {tVal6} at pDst[0 to 3] 
+        STR      tVal9, [pDst2], #+4                 ;// store {tVal7} at pDst2[0 to 3]
+        STR      tVal8, [pDst],  #+4                 ;// store {tVal6} at pDst[4 to 7]
+        STR      tVal9, [pDst2], #+4                 ;// store {tVal7} at pDst2[4 to 7]
+        STR      tVal8, [pDst],  #+4                 ;// store {tVal6} at pDst[8 to 11]
+        STR      tVal9, [pDst2], #+4                 ;// store {tVal7} at pDst2[8 to 11]
+        M_STR    tVal8, [pDst], dstStepx2            ;// store {tVal6} at pDst[12 to 15]
+        M_LDRB   tVal6, [pSrcLeft], +leftStep        ;// tVal6 = pSrcLeft[0 to 3]
+        M_STR    tVal9, [pDst2], dstStepx2           ;// store {tVal7} at pDst2[12 to 15]
+        M_LDRB   tVal7, [pSrcLeft], +leftStep        ;// tVal7 = pSrcLeft[4 to 7]
+        BNE      LOOP_HOR                            ;// Loop for 3 times
+        MOV      return, #OMX_Sts_NoErr
+        M_EXIT
+        
+OMX_VC_16X16_DC
+        
+        ;// M_STALL ARM1136JS=2
+        
+        MOV      count, #0                           ;// count = 0
+        TST      availability, #OMX_VC_UPPER         ;// if(availability & #OMX_VC_UPPER)
+        BEQ      TST_LEFT                            ;// Jump to Left if not upper
+        LDM      pSrcAbove,{tVal8,tVal9,tVal10,tVal11};// tVal 8 to 11 = pSrcAbove[0 to 15]
+        ADD      count, count, #1                    ;// if upper inc count by 1
+        
+        ;// M_STALL ARM1136JS=2
+        
+        UXTB16   tVal2, tVal8                        ;// pSrcAbove[0, 2]
+        UXTB16   tVal6, tVal9                        ;// pSrcAbove[4, 6]
+        UADD16   tVal2, tVal2, tVal6                 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6]
+        UXTB16   tVal8, tVal8, ROR #8                ;// pSrcAbove[1, 3]
+        UXTB16   tVal9, tVal9, ROR #8                ;// pSrcAbove[5, 7]
+        UADD16   tVal8, tVal8, tVal9                 ;// pSrcAbove[1, 3] + pSrcAbove[5, 7]
+        UADD16   tVal2, tVal2, tVal8                 ;// sum(pSrcAbove[0] to pSrcAbove[7])
+        
+        UXTB16   tVal8, tVal10                       ;// pSrcAbove[8, 10]
+        UXTB16   tVal9, tVal11                       ;// pSrcAbove[12, 14]
+        UADD16   tVal8, tVal8, tVal9                 ;// pSrcAbove[8, 10] + pSrcAbove[12, 14]
+        UXTB16   tVal10, tVal10, ROR #8              ;// pSrcAbove[9, 11]
+        UXTB16   tVal11, tVal11, ROR #8              ;// pSrcAbove[13, 15]
+        UADD16   tVal10, tVal10, tVal11              ;// pSrcAbove[9, 11] + pSrcAbove[13, 15]
+        UADD16   tVal8, tVal8, tVal10                ;// sum(pSrcAbove[8] to pSrcAbove[15])
+        
+        UADD16   tVal2, tVal2, tVal8                 ;// sum(pSrcAbove[0] to pSrcAbove[15])
+        
+        ;// M_STALL ARM1136JS=1
+        
+        ADD      tVal2, tVal2, tVal2, LSR #16        ;// sum(pSrcAbove[0] to pSrcAbove[15])
+        
+        ;// M_STALL ARM1136JS=1
+        
+        UXTH     sum, tVal2                          ;// Extract the lower half for result
+        
+TST_LEFT        
+        TST      availability, #OMX_VC_LEFT
+        BEQ      TST_COUNT
+        ADD      leftStepx2, leftStep,leftStep       ;// leftStepx2 = 2 * leftStep
+        ADD      pSrcLeft2, pSrcLeft, leftStep       ;// pSrcLeft2 = pSrcLeft + leftStep
+        
+        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[0]
+        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[1]
+        M_LDRB   tVal10, [pSrcLeft], +leftStepx2     ;// tVal10= pSrcLeft[2]
+        M_LDRB   tVal11, [pSrcLeft2],+leftStepx2     ;// tVal11= pSrcLeft[3]
+        ADD      tVal7, tVal8, tVal9                 ;// tVal7 = tVal8 + tVal9
+        ADD      count, count, #1                    ;// Inc Counter if Left is available
+        ADD      tVal6, tVal10, tVal11               ;// tVal6 = tVal10 + tVal11
+        
+        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[0]
+        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[1]
+        M_LDRB   tVal10, [pSrcLeft], +leftStepx2     ;// tVal10= pSrcLeft[2]
+        M_LDRB   tVal11, [pSrcLeft2],+leftStepx2     ;// tVal11= pSrcLeft[3]
+        ADD      sum, tVal7, tVal6                   ;// sum = tVal8 + tVal10
+        ADD      tVal8, tVal8, tVal9                 ;// tVal8 = tVal8 + tVal9
+        ADD      tVal10, tVal10, tVal11              ;// tVal10= tVal10 + tVal11
+        ADD      tVal7, tVal8, tVal10                ;// tVal7 = tVal8 + tVal10
+        
+        
+        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[0]
+        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[1]
+        M_LDRB   tVal10, [pSrcLeft], +leftStepx2     ;// tVal10= pSrcLeft[2]
+        M_LDRB   tVal11, [pSrcLeft2],+leftStepx2     ;// tVal11= pSrcLeft[3]
+        ADD      sum, sum, tVal7                     ;// sum = sum + tVal7
+        ADD      tVal8, tVal8, tVal9                 ;// tVal8 = tVal8 + tVal9
+        ADD      tVal10, tVal10, tVal11              ;// tVal10= tVal10 + tVal11
+        ADD      tVal7, tVal8, tVal10                ;// tVal7 = tVal8 + tVal10
+        
+        
+        M_LDRB   tVal8, [pSrcLeft],  +leftStepx2     ;// tVal8 = pSrcLeft[0]
+        M_LDRB   tVal9, [pSrcLeft2], +leftStepx2     ;// tVal9 = pSrcLeft[1]
+        M_LDRB   tVal10, [pSrcLeft], +leftStepx2     ;// tVal10= pSrcLeft[2]
+        M_LDRB   tVal11, [pSrcLeft2],+leftStepx2     ;// tVal11= pSrcLeft[3]
+        ADD      sum, sum, tVal7                     ;// sum = sum + tVal7
+        ADD      tVal8, tVal8, tVal9                 ;// tVal8 = tVal8 + tVal9
+        ADD      tVal10, tVal10, tVal11              ;// tVal10= tVal10 + tVal11
+        ADD      tVal7, tVal8, tVal10                ;// tVal7 = tVal8 + tVal10
+        ADD      sum, sum, tVal7                     ;// sum = sum + tVal7
+
+TST_COUNT        
+        CMP      count, #0                           ;// if(count == 0)
+        MOVEQ    sum, #128                           ;// sum = 128 if(count == 0)
+        BEQ      TST_COUNT0                          ;// if(count == 0)
+        CMP      count, #1                           ;// if(count == 1)
+        ADDEQ    sum, sum, #8                        ;// sum += 8 if(count == 1)
+        ADDNE    sum, sum, tVal2                     ;// sum = sumleft + sumupper
+        ADDNE    sum, sum, #16                       ;// sum += 16 if(count == 2)
+        
+        ;// M_STALL ARM1136JS=1
+        
+        UXTH     sum, sum                            ;// sum only byte rest cleared
+        
+        ;// M_STALL ARM1136JS=1
+        
+        LSREQ    sum, sum, #4                        ;// sum >> 4 if(count == 1)
+        
+        ;// M_STALL ARM1136JS=1
+        
+        LSRNE    sum, sum, #5                        ;// sum >> 5 if(count == 2)
+
+TST_COUNT0
+        
+        ;// M_STALL ARM1136JS=1
+        
+        ORR      sum, sum, sum, LSL #8               ;// sum replicated in two halfword
+        
+        ;// M_STALL ARM1136JS=1
+        
+        ORR      tVal6, sum, sum, LSL #16            ;// sum  replicated in all bytes
+        CPY      tVal7, tVal6                        ;// tVal1 = tVal0
+        CPY      tVal8, tVal6                        ;// tVal2 = tVal0
+        CPY      tVal9, tVal6                        ;// tVal3 = tVal0
+        ADD      dstStepx2, dstStep, dstStep         ;// double dstStep
+        ADD      pDst2, pDst, dstStep                ;// pDst2- pDst advanced by dstStep
+        MOV      y, #BLK_SIZE                        ;// Outer Loop Count
+        
+LOOP_DC        
+        STM      pDst, {tVal6,tVal7,tVal8,tVal9}     ;// pDst[0 to 15] = tVal 6 to 9
+        SUBS     y, y, #2                            ;// y--
+        ADD      pDst, pDst, dstStepx2               ;// pDst advanced by dstStep
+        STM      pDst2, {tVal6,tVal7,tVal8,tVal9}    ;// pDst2[16 to 31] = tVal 6 to 9
+        ADD      pDst2, pDst2, dstStepx2             ;// pDst advanced by dstStep
+        BNE      LOOP_DC                             ;// Loop for 8 times
+        
+        MOV      return, #OMX_Sts_NoErr
+        M_EXIT
+
+OMX_VC_16X16_PLANE
+        
+        ;// M_STALL ARM1136JS=3
+        RSB      tVal14, leftStep, leftStep, LSL #4  ;// tVal14 = 15*leftStep
+        
+        ;// M_STALL ARM1136JS=2
+        LDRB     tVal10, [pSrcLeft,  tVal14]         ;// tVal10 = pSrcLeft[15*leftStep]
+        LDRB     tVal11, [pSrcAboveLeft]             ;// tVal11 = pSrcAboveLeft[0]
+        LDRB     tVal12, [pSrcAbove, #15]
+
+        ADD      tVal2,  tVal12,  tVal10             ;// tVal2  = pSrcAbove[15] + pSrcLeft[15*leftStep]
+        SUB      tVal10, tVal10,  tVal11             ;// tVal10 = V0 = pSrcLeft[15*leftStep] - pSrcAboveLeft[0]
+        SUB      tVal11, tVal12,  tVal11             ;// tVal11 = H0 = pSrcAbove[15] - pSrcAboveLeft[0]
+        MOV      tVal2,  tVal2,   LSL #4             ;// tVal2  = a = 16 * (pSrcAbove[15] + pSrcLeft[15*leftStep])
+
+        MOV     tVal11, tVal11, LSL #3              ;// 8*[15]-[-1]
+        LDRB    tVal6, [pSrcAbove, #0]
+        LDRB    tVal7, [pSrcAbove, #14]
+        SUB     tVal8, tVal7, tVal6
+        RSB     tVal8, tVal8, tVal8, LSL #3         ;// 7*[14]-[0]
+        ADD     tVal11, tVal11, tVal8
+        LDRB    tVal6, [pSrcAbove, #1]
+        LDRB    tVal7, [pSrcAbove, #13]
+        SUB     tVal8, tVal7, tVal6
+        ADD     tVal8, tVal8, tVal8
+        ADD     tVal8, tVal8, tVal8, LSL #1         ;// 6*[13]-[1]
+        ADD     tVal11, tVal11, tVal8
+        LDRB    tVal6, [pSrcAbove, #2]
+        LDRB    tVal7, [pSrcAbove, #12]
+        SUB     tVal8, tVal7, tVal6
+        ADD     tVal8, tVal8, tVal8, LSL #2         ;// 5*[12]-[2]
+        ADD     tVal11, tVal11, tVal8
+        LDRB    tVal6, [pSrcAbove, #3]
+        LDRB    tVal7, [pSrcAbove, #11]
+        SUB     tVal8, tVal7, tVal6
+        ADD     tVal11, tVal11, tVal8, LSL #2       ;// + 4*[11]-[3]
+        LDRB    tVal6, [pSrcAbove, #4]
+        LDRB    tVal7, [pSrcAbove, #10]
+        SUB     tVal8, tVal7, tVal6
+        ADD     tVal8, tVal8, tVal8, LSL #1         ;// 3*[10]-[4]
+        ADD     tVal11, tVal11, tVal8
+        LDRB    tVal6, [pSrcAbove, #5]
+        LDRB    tVal7, [pSrcAbove, #9]
+        SUB     tVal8, tVal7, tVal6
+        ADD     tVal11, tVal11, tVal8, LSL #1       ;// + 2*[9]-[5]
+        LDRB    tVal6, [pSrcAbove, #6]
+        LDRB    tVal7, [pSrcAbove, #8]
+        SUB     tVal8, tVal7, tVal6                 ;// 1*[8]-[6]
+        ADD     tVal7, tVal11, tVal8
+
+        ADD      tVal2,  tVal2,   #16                ;// tVal2  = a + 16
+        MOV      tVal1,  pSrcLeft                    ;// tVal4  = pSrcLeft
+        SUB      tVal9,  tVal14,   leftStep          ;// tVal9  = 14*leftStep
+        ADD      tVal9,  pSrcLeft, tVal9             ;// tVal9  = pSrcLeft + 14*leftStep
+        
+        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[14*leftStep]
+        M_LDRB   tVal11, [tVal1], +leftStep          ;// tVal11 = pSrcLeft[0]
+        ADD      tVal7,  tVal7,  tVal7,  LSL #2      ;// tVal7  = 5 * H
+        ADD      tVal7,  tVal7,  #32                 ;// tVal7  = 5 * H + 32
+        SUB      tVal8,  tVal8,  tVal11              ;// tVal8  = pSrcLeft[14*leftStep] - pSrcLeft[0]
+        ASR      tVal12, tVal7,  #6                  ;// tVal12 = b = (5 * H + 32) >> 6
+        
+        RSB      tVal8,  tVal8,  tVal8,  LSL #3      ;// tVal8  = V1 = 7* (pSrcLeft[14*leftStep]-pSrcLeft[0])
+        ADD      tVal6,  tVal8,  tVal10, LSL #3      ;// tVal6  = V = V0 +V1
+        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[13*leftStep]
+        M_LDRB   tVal10, [tVal1], +leftStep          ;// tVal10 = pSrcLeft[leftStep]
+        RSB      tVal7,  tVal12,  tVal12,  LSL #3    ;// tVal7  = 7*b
+        SUB      tVal2,  tVal2,   tVal7              ;// tVal2  = a + 16 - 7*b
+        SUB      tVal7,  tVal8,   tVal10             ;// tVal7  = pSrcLeft[13*leftStep] - pSrcLeft[leftStep]
+        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[12*lS]
+        ADD      tVal7,  tVal7,   tVal7              ;// tVal7  = 2 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep])
+        M_LDRB   tVal10, [tVal1], +leftStep          ;// tVal10 = pSrcLeft[2*leftStep]        
+        ADD      tVal7,  tVal7,   tVal7,  LSL #1     ;// tVal7  = 6 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep])
+        ADD      tVal6,  tVal6,   tVal7              ;// tVal6  = V = V + V2
+        SUB      tVal7,  tVal8,   tVal10             ;// tVal7  = pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep]
+        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[11*leftStep]
+        M_LDRB   tVal10, [tVal1], +leftStep          ;// tVal10 = pSrcLeft[3*leftStep]
+        ADD      tVal7,  tVal7,   tVal7,  LSL #2     ;// tVal7  = 5 * (pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep])
+        ADD      tVal6,  tVal6,   tVal7              ;// tVal6  = V = V + V3
+        SUB      tVal7,  tVal8,   tVal10             ;// tVal7  = pSrcLeft[11*leftStep] - pSrcLeft[3*leftStep]
+        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[10*leftStep]
+        M_LDRB   tVal10, [tVal1], +leftStep          ;// tVal10 = pSrcLeft[4*leftStep]
+        ADD      tVal6,  tVal6,   tVal7,  LSL #2     ;// tVal6  = V = V + V4
+        SUB      dstStep, dstStep, #16               ;// tVal5  = dstStep - 16
+        SUB      tVal7,  tVal8,   tVal10             ;// tVal7  = pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep]
+        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[9*leftStep]
+        M_LDRB   tVal10, [tVal1], +leftStep          ;// tVal10 = pSrcLeft[5*leftStep]
+        ADD      tVal7,  tVal7,   tVal7,  LSL #1     ;// tVal7  = 3 * (pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep])
+        ADD      tVal6,  tVal6,   tVal7              ;// tVal6  = V = V + V5
+        SUB      tVal7,  tVal8,   tVal10             ;// tVal7  = pSrcLeft[9*leftStep] - pSrcLeft[5*leftStep]
+        M_LDRB   tVal8,  [tVal9], -leftStep          ;// tVal8  = pSrcLeft[8*leftStep]
+        M_LDRB   tVal10, [tVal1], +leftStep          ;// tVal10 = pSrcLeft[6*leftStep]
+        ADD      tVal6,  tVal6,   tVal7,  LSL #1     ;// tVal6  = V = V + V6
+        
+        ;// M_STALL ARM1136JS=1
+        SUB      tVal7,  tVal8,   tVal10             ;// tVal7  = pSrcLeft[8*leftStep] - pSrcLeft[6*leftStep]
+        ADD      tVal6,  tVal6,   tVal7              ;// tVal6  = V = V + V7
+        
+        ;// M_STALL ARM1136JS=1
+        ADD      tVal6,  tVal6,   tVal6,  LSL #2     ;// tVal6  = 5*V
+        ADD      tVal6,  tVal6,   #32                ;// tVal6  = 5*V + 32
+        
+        ;// M_STALL ARM1136JS=1
+        ASR      tVal14, tVal6,   #6                 ;// tVal14 = c = (5*V + 32)>>6
+        
+        ;// M_STALL ARM1136JS=1
+        RSB      tVal6,  tVal14,  tVal14, LSL #3     ;// tVal6  = 7*c
+        UXTH     tVal14, tVal14                      ;// tVal14 = Cleared the upper half word
+        ADD      tVal10, tVal12,  tVal12             ;// tVal10 = 2*b
+        ORR      tVal14, tVal14,  tVal14, LSL #16    ;// tVal14 = {c  ,  c}
+        SUB      tVal6,  tVal2,   tVal6              ;// tVal6  = d = a - 7*b - 7*c + 16
+        ADD      tVal1,  tVal6,   tVal10             ;// tVal1  = pp2 = d + 2*b
+        ADD      tVal10, tVal10,  tVal12             ;// tVal10 =3*b
+        ORR      tVal0,  tVal6,   tVal1,  LSL #16    ;// tval0  = p2p0   = pack {p2, p0}
+        UXTH     tVal12, tVal12                      ;// tVal12 = Cleared the upper half word
+        UXTH     tVal10, tVal10                      ;// tVal12 = Cleared the upper half word
+        ORR      tVal12, tVal12,  tVal12, LSL #16    ;// tVal12 = {b  ,  b}
+        ORR      tVal10, tVal10,  tVal10, LSL #16    ;// tVal10 = {3b , 3b}
+        SADD16   tVal1,  tVal0,   tVal12             ;// tVal1  = p3p1   = p2p0   + {b,b}
+        SADD16   tVal2,  tVal1,   tVal10             ;// tVal2  = p6p4   = p3p1   + {3b,3b}
+        SADD16   tVal4,  tVal2,   tVal12             ;// tVal4  = p7p5   = p6p4   + {b,b}
+        SADD16   tVal6,  tVal4,   tVal10             ;// tVal6  = p10p8  = p7p5   + {3b,3b}
+        SADD16   tVal7,  tVal6,   tVal12             ;// tVal7  = p11p9  = p10p8  + {b,b}
+        SADD16   tVal8,  tVal7,   tVal10             ;// tVal8  = p14p12 = p11p9  + {3b,3b}
+        SADD16   tVal9,  tVal8,   tVal12             ;// tVal9  = p15p13 = p14p12 + {b,b}
+        LDR      r0x00FF00FF,     =MASK_CONST        ;// r0x00FF00FF = 0x00FF00FF
+        
+LOOP_PLANE        
+
+        USAT16   temp2, #13, p3p1
+        USAT16   temp1, #13, p2p0
+        SADD16   p3p1,   p3p1,   c                    
+        SADD16   p2p0,   p2p0,   c                    
+        AND      temp2, r0x00FF00FF, temp2, ASR #5
+        AND      temp1, r0x00FF00FF, temp1, ASR #5
+        ORR      temp1, temp1, temp2, LSL #8
+        STR      temp1, [pDst], #4
+        
+        USAT16   temp2, #13, p7p5
+        USAT16   temp1, #13, p6p4
+        SADD16   p7p5,   p7p5,   c                    
+        SADD16   p6p4,   p6p4,   c                    
+        AND      temp2, r0x00FF00FF, temp2, ASR #5
+        AND      temp1, r0x00FF00FF, temp1, ASR #5
+        ORR      temp1, temp1, temp2, LSL #8
+        STR      temp1, [pDst], #4
+        
+        USAT16   temp2, #13, p11p9
+        USAT16   temp1, #13, p10p8
+        SADD16   p11p9,  p11p9,  c                    
+        SADD16   p10p8,  p10p8,  c                    
+        AND      temp2, r0x00FF00FF, temp2, ASR #5
+        AND      temp1, r0x00FF00FF, temp1, ASR #5
+        ORR      temp1, temp1, temp2, LSL #8
+        STR      temp1, [pDst], #4
+        
+        USAT16   temp2, #13, p15p13
+        USAT16   temp1, #13, p14p12
+        SADD16   p15p13, p15p13, c                    
+        SADD16   p14p12, p14p12, c                    
+        AND      temp2, r0x00FF00FF, temp2, ASR #5
+        AND      temp1, r0x00FF00FF, temp1, ASR #5
+        ORR      temp1, temp1, temp2, LSL #8
+        STR      temp1, [pDst], #4
+        
+        ADDS     r0x00FF00FF, r0x00FF00FF, #1<<28     ;// Loop counter value in top 4 bits
+        
+        ADD      pDst, pDst, dstStep                   
+        
+        BCC      LOOP_PLANE                           ;// Loop for 16 times
+        MOV      return, #OMX_Sts_NoErr
+        M_END
+        
+        ENDIF ;// ARM1136JS
+
+            
+        END
+;-----------------------------------------------------------------------------------------------
+; omxVCM4P10_PredictIntra_16x16 ends
+;-----------------------------------------------------------------------------------------------
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
new file mode 100644
index 0000000..a90f4604
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
@@ -0,0 +1,567 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_PredictIntra_4x4_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+;// Define the processor variants supported by this file
+         
+         M_VARIANTS ARM1136JS
+        
+;//-------------------------------------------------------
+;// This table for implementing switch case of C in asm by
+;// the mehtod of two levels of indexing.
+;//-------------------------------------------------------
+
+    M_TABLE armVCM4P10_pSwitchTable4x4
+    DCD  OMX_VC_4x4_VERT,     OMX_VC_4x4_HOR 
+    DCD  OMX_VC_4x4_DC,       OMX_VC_4x4_DIAG_DL
+    DCD  OMX_VC_4x4_DIAG_DR,  OMX_VC_4x4_VR
+    DCD  OMX_VC_4x4_HD,       OMX_VC_4x4_VL
+    DCD  OMX_VC_4x4_HU   
+    
+    IF ARM1136JS
+  
+;//--------------------------------------------
+;// Constants
+;//--------------------------------------------  
+BLK_SIZE              EQU 0x8
+MUL_CONST0            EQU 0x01010101
+ADD_CONST1            EQU 0x80808080
+
+;//--------------------------------------------
+;// Scratch variable
+;//--------------------------------------------
+return          RN 0
+pTable          RN 9
+pc              RN 15
+r0x01010101     RN 1
+r0x80808080     RN 0
+
+tVal0           RN 0
+tVal1           RN 1
+tVal2           RN 2
+tVal4           RN 4
+tVal6           RN 6
+tVal7           RN 7
+tVal8           RN 8
+tVal9           RN 9
+tVal10          RN 10
+tVal11          RN 11
+tVal12          RN 12
+tVal14          RN 14
+
+Out0            RN 6
+Out1            RN 7
+Out2            RN 8
+Out3            RN 9
+
+Left0           RN 6
+Left1           RN 7
+Left2           RN 8
+Left3           RN 9
+
+Above0123       RN 12
+Above4567       RN 14
+
+AboveLeft       RN 10
+
+;//--------------------------------------------
+;// Declare input registers
+;//--------------------------------------------
+pSrcLeft        RN 0    ;// input pointer
+pSrcAbove       RN 1    ;// input pointer
+pSrcAboveLeft   RN 2    ;// input pointer
+pDst            RN 3    ;// output pointer
+leftStep        RN 4    ;// input variable
+dstStep         RN 5    ;// input variable
+predMode        RN 6    ;// input variable
+availability    RN 7    ;// input variable
+
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntra_4x4 starts
+;//-----------------------------------------------------------------------------------------------
+        
+        ;// Write function header
+        M_START omxVCM4P10_PredictIntra_4x4, r11
+        
+        ;// Define stack arguments
+        M_ARG    LeftStep,     4
+        M_ARG    DstStep,      4
+        M_ARG    PredMode,     4
+        M_ARG    Availability, 4
+        
+        ;// M_STALL ARM1136JS=4
+        
+        LDR      pTable,=armVCM4P10_pSwitchTable4x4  ;// Load index table for switch case
+        
+        ;// Load argument from the stack
+        M_LDR    predMode, PredMode                  ;// Arg predMode loaded from stack to reg 
+        M_LDR    leftStep, LeftStep                  ;// Arg leftStep loaded from stack to reg 
+        M_LDR    dstStep,  DstStep                   ;// Arg dstStep loaded from stack to reg         
+        M_LDR    availability, Availability          ;// Arg availability loaded from stack to reg 
+        
+        LDR      pc, [pTable, predMode, LSL #2]      ;// Branch to the case based on preMode
+
+OMX_VC_4x4_VERT
+        
+        LDR      Above0123,  [pSrcAbove]             ;// Above0123 = pSrcAbove[0 to 3]
+        M_STR    Above0123,  [pDst],  dstStep        ;// pDst[0  to 3]  = Above0123
+        M_STR    Above0123,  [pDst],  dstStep        ;// pDst[4  to 7]  = Above0123
+        M_STR    Above0123,  [pDst],  dstStep        ;// pDst[8  to 11] = Above0123
+        STR      Above0123,  [pDst]                  ;// pDst[12 to 15] = Above0123
+        MOV      return, #OMX_Sts_NoErr
+        M_EXIT                                      ;// Macro to exit midway-break frm case
+
+OMX_VC_4x4_HOR
+        
+        ;// M_STALL ARM1136JS=6 
+        
+        LDR      r0x01010101,  =MUL_CONST0           ;// Const to repeat the byte in reg 4 times
+        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
+        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
+        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
+        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
+        MUL      Out0,   Left0,   r0x01010101        ;// replicate the val in all the bytes
+        MUL      Out1,   Left1,   r0x01010101        ;// replicate the val in all the bytes
+        MUL      Out2,   Left2,   r0x01010101        ;// replicate the val in all the bytes
+        MUL      Out3,   Left3,   r0x01010101        ;// replicate the val in all the bytes
+        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ] 
+        M_STR    Out1,   [pDst],  dstStep            ;// store {Out1} at pDst [4  to 7 ]
+        M_STR    Out2,   [pDst],  dstStep            ;// store {Out2} at pDst [8  to 11]
+        STR      Out3,   [pDst]                      ;// store {Out3} at pDst [12 to 15]
+        MOV      return, #OMX_Sts_NoErr
+        M_EXIT                                       ;// Macro to exit midway-break frm case
+        
+OMX_VC_4x4_DC
+        
+        ;// M_STALL ARM1136JS=6
+        
+        AND      availability,  availability,  #(OMX_VC_UPPER + OMX_VC_LEFT)
+        CMP      availability,  #(OMX_VC_UPPER + OMX_VC_LEFT)
+        BNE      UpperOrLeftOrNoneAvailable          ;// Jump to Upper if not both
+        LDR      Above0123,  [pSrcAbove]             ;// Above0123  = pSrcAbove[0 to 3]
+        
+        ;// M_STALL ARM1136JS=1
+        
+        UXTB16   tVal7,  Above0123                   ;// pSrcAbove[0, 2]
+        UXTB16   tVal6,  Above0123,  ROR #8          ;// pSrcAbove[1, 3]
+        UADD16   tVal11, tVal6,   tVal7              ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
+        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
+        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
+        ADD      tVal11, tVal11,  LSR #16            ;// sum(pSrcAbove[0] to pSrcAbove[3])
+        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
+        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
+        UXTH     tVal11, tVal11                      ;// upsum1 (Clear the top junk bits)
+        ADD      tVal6,  Left0,  Left1               ;// tVal6 = Left0 + Left1
+        ADD      tVal7,  Left2,  Left3               ;// tVal7 = Left2 + Left3
+        ADD      tVal6,  tVal6,  tVal7               ;// tVal6 = tVal6 + tVal7
+        ADD      Out0,   tVal6,  tVal11              ;// Out0  = tVal6 + tVal11   
+        ADD      Out0,   Out0,   #4                  ;// Out0  = Out0 + 4
+        LDR      r0x01010101,   =MUL_CONST0          ;// 0x01010101
+        MOV      Out0,   Out0,  LSR #3               ;// Out0 = (Out0 + 4)>>3
+        
+        ;// M_STALL ARM1136JS=1
+        
+        MUL      Out0,   Out0,  r0x01010101          ;// replicate the val in all the bytes
+        
+        ;// M_STALL ARM1136JS=1
+        
+        MOV      return,  #OMX_Sts_NoErr
+        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
+        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [4  to 7 ]
+        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [8  to 11]
+        STR      Out0,   [pDst]                      ;// store {Out0} at pDst [12 to 15]
+        M_EXIT                                       ;// Macro to exit midway-break frm case
+        
+UpperOrLeftOrNoneAvailable
+        ;// M_STALL ARM1136JS=3
+        
+        CMP      availability,  #OMX_VC_UPPER        ;// if(availability & #OMX_VC_UPPER)
+        BNE      LeftOrNoneAvailable                 ;// Jump to Left if not upper
+        LDR      Above0123,  [pSrcAbove]             ;// Above0123  = pSrcAbove[0 to 3]
+        
+        ;// M_STALL ARM1136JS=3
+        
+        UXTB16   tVal7,  Above0123                   ;// pSrcAbove[0, 2]
+        UXTB16   tVal6,  Above0123,  ROR #8          ;// pSrcAbove[1, 3]
+        UADD16   Out0,   tVal6,  tVal7               ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
+        LDR      r0x01010101,   =MUL_CONST0          ;// 0x01010101
+        ADD      Out0,   Out0,   LSR #16             ;// sum(pSrcAbove[0] to pSrcAbove[3])
+        
+        ;// M_STALL ARM1136JS=1
+        
+        UXTH     Out0,   Out0                        ;// upsum1 (Clear the top junk bits)
+        ADD      Out0,   Out0,   #2                  ;// Out0  = Out0 + 2
+        
+        ;// M_STALL ARM1136JS=1
+        
+        MOV      Out0,   Out0,   LSR #2              ;// Out0  = (Out0 + 2)>>2
+        
+        ;// M_STALL ARM1136JS=1
+        
+        MUL      Out0,   Out0,   r0x01010101         ;// replicate the val in all the bytes
+        
+        ;// M_STALL ARM1136JS=1
+        
+        MOV      return, #OMX_Sts_NoErr
+        M_STR    Out0,   [pDst],  dstStep            ;// store {tVal6} at pDst [0  to 3 ]
+        M_STR    Out0,   [pDst],  dstStep            ;// store {tVal6} at pDst [4  to 7 ] 
+        M_STR    Out0,   [pDst],  dstStep            ;// store {tVal6} at pDst [8  to 11]
+        STR      Out0,   [pDst]                      ;// store {tVal6} at pDst [12 to 15]
+        
+        M_EXIT                                       ;// Macro to exit midway-break frm case
+        
+LeftOrNoneAvailable        
+        ;// M_STALL ARM1136JS=3
+        
+        LDR      r0x01010101,   =MUL_CONST0          ;// 0x01010101
+        CMP      availability, #OMX_VC_LEFT
+        BNE      NoneAvailable
+        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
+        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
+        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
+        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
+        ADD      Out0,   Left0,  Left1               ;// Out0  = Left0 + Left1
+        
+        ;// M_STALL ARM1136JS=1
+        
+        ADD      Out1,   Left2,  Left3               ;// Out1  = Left2 + Left3
+        ADD      Out0,   Out0,   Out1                ;// Out0  = Out0  + Out1
+        ADD      Out0,   Out0,   #2                  ;// Out0  = Out0 + 2
+        
+        ;// M_STALL ARM1136JS=1
+        
+        MOV      Out0,   Out0,   LSR #2              ;// Out0  = (Out0 + 2)>>2
+        
+        ;// M_STALL ARM1136JS=1
+        
+        MUL      Out0,   Out0,   r0x01010101         ;// replicate the val in all the bytes
+        
+        ;// M_STALL ARM1136JS=1
+        
+        MOV      return, #OMX_Sts_NoErr
+        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
+        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [4  to 7 ] 
+        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [8  to 11]
+        STR      Out0,   [pDst]                      ;// store {Out0} at pDst [12 to 15]
+        M_EXIT                                       ;// Macro to exit midway-break frm case
+
+NoneAvailable
+        MOV      Out0,   #128                        ;// Out0 = 128 if(count == 0)
+        
+        ;// M_STALL ARM1136JS=5
+        
+        MUL      Out0,   Out0,  r0x01010101          ;// replicate the val in all the bytes
+        
+        ;// M_STALL ARM1136JS=1
+        
+        MOV      return, #OMX_Sts_NoErr
+        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [0  to 3 ]
+        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [4  to 7 ] 
+        M_STR    Out0,   [pDst],  dstStep            ;// store {Out0} at pDst [8  to 11]
+        STR      Out0,   [pDst]                      ;// store {Out0} at pDst [12 to 15]
+        M_EXIT                                       ;// Macro to exit midway-break frm case
+        
+OMX_VC_4x4_DIAG_DL
+        
+        ;//------------------------------------------------------------------
+        ;// f = (a+2*b+c+2)>>2
+        ;// Calculate as:
+        ;// d = (a + c )>>1
+        ;// e = (d - b')>>1
+        ;// f = e + 128
+        ;//------------------------------------------------------------------
+        
+        ;// M_STALL ARM1136JS=3
+        
+        TST      availability, #OMX_VC_UPPER_RIGHT                  
+        LDMIA    pSrcAbove,  {Above0123, Above4567}  ;// Above0123, Above4567 = pSrcAbove[0 to 7]
+        LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
+        BNE      DLUpperRightAvailable
+        LDR      r0x01010101,  =MUL_CONST0           ;// 0x01010101
+        MOV      tVal7,  Above0123,  LSR #24         ;// {00,  00,  00,  U3 }
+        MOV      tVal11, tVal7,  LSL #24             ;// {U3,  00,  00,  00 }
+        MUL      Out3,   tVal7,  r0x01010101         ;// {U3,  U3,  U3,  U3 } 
+        MOV      tVal8,  Above0123,  LSR #16         ;// {00,  00,  U3,  U2 }
+        MOV      tVal10, Above0123,  LSR #8          ;// {00,  U3,  U2,  U1 }
+        MVN      tVal10, tVal10                      ;// {00', U3', U2', U1'}
+        UHADD8   tVal8,  tVal8,  Above0123           ;// {xx,  xx,  d1,  d0 }
+        UHADD8   tVal6,  Above0123,  tVal9           ;// {xx,  d2,  xx,  xx }
+        UHSUB8   tVal8,  tVal8,  tVal10              ;// {xx,  xx,  e1,  e0 }
+        UHSUB8   tVal6,  tVal6,  tVal10              ;// {xx,  e2,  xx,  xx }
+        UADD8    tVal8,  tVal8,  r0x80808080         ;// {xx,  xx,  f1,  f0 }
+        UADD8    tVal6,  tVal6,  r0x80808080         ;// {xx,  f2,  xx,  xx }
+        
+        ;// M_STALL ARM1136JS=1
+        
+        PKHBT    tVal6,  tVal8,  tVal6               ;// {xx,  f2,  f1,  f0 }
+        BIC      tVal6,  tVal6,  #0xFF000000         ;// {00,  f2,  f1,  f0 }
+        ORR      Out0,   tVal6,  tVal11              ;// {U3,  f2,  f1,  f0 }
+        
+        ;// M_STALL ARM1136JS=1
+        
+        PKHTB    Out1,   Out3,   Out0,  ASR #8       ;// {U3,  U3,  f2,  f1 }
+        MOV      return, #OMX_Sts_NoErr
+        PKHTB    Out2,   Out3,   Out1,  ASR #8       ;// {U3,  U3,  U3,  f2 }
+        
+        M_STR    Out0,   [pDst], dstStep             ;// store {f3 to f0} at pDst[3  to 0 ]
+        M_STR    Out1,   [pDst], dstStep             ;// store {f4 to f1} at pDst[7  to 4 ]
+        M_STR    Out2,   [pDst], dstStep             ;// store {f5 to f2} at pDst[11 to 8 ]
+        STR      Out3,   [pDst]                      ;// store {f6 to f3} at pDSt[15 to 12]
+        M_EXIT                                       ;// Macro to exit midway-break frm case
+
+DLUpperRightAvailable        
+        
+        MOV      tVal8,  Above0123,  LSR #24         ;// {00,  00,  00,  U3 }
+        MOV      tVal9,  Above0123,  LSR #16         ;// {00,  00,  U3,  U2 }
+        MOV      tVal10, Above0123,  LSR #8          ;// {00,  U3,  U2,  U1 }
+        ORR      tVal8,  tVal8,  Above4567, LSL #8   ;// {U6,  U5,  U4,  U3 }
+        ORR      tVal10, tVal10, Above4567, LSL #24  ;// {U4,  U3,  U2,  U1 }
+        PKHBT    tVal9,  tVal9,  Above4567, LSL #16  ;// {U5,  U4,  U3,  U2 }
+        MVN      tVal1,  tVal8                       ;// {U6', U5', U4', U3'}
+        MVN      tVal10, tVal10                      ;// {U4', U3', U2', U1'}
+        MVN      tVal2,  Above4567                   ;// {U7', U6', U5', U4'}
+        UHADD8   tVal6,  Above0123,  tVal9           ;// {d3,  d2,  d1,  d0 }
+        UHADD8   tVal9,  tVal9,  Above4567           ;// {d5,  d4,  d3,  d2 }
+        UHADD8   tVal8,  Above4567,  tVal8           ;// {d6,  xx,  xx,  xx }
+        UHSUB8   tVal6,  tVal6,  tVal10              ;// {e3,  e2,  e1,  e0 }
+        UHSUB8   tVal12, tVal9,  tVal1               ;// {e5,  e4,  e3,  e2 }
+        UHSUB8   tVal8,  tVal8,  tVal2               ;// {e6,  xx,  xx,  xx }
+        UADD8    Out0,   tVal6,  r0x80808080         ;// {f3,  f2,  f1,  f0 }
+        UADD8    tVal9,  tVal8,  r0x80808080         ;// {f6,  xx,  xx,  xx }
+        UADD8    Out2,   tVal12, r0x80808080         ;// {f5,  f4,  f3,  f2 }
+        MOV      tVal7,  Out0,   LSR #8              ;// {00,  f3,  f2,  f1 }
+        AND      tVal9,  tVal9,  #0xFF000000         ;// {f6,  00,  00,  00 }
+        PKHBT    Out1,   tVal7,  Out2,  LSL #8       ;// {f4,  f3,  f2,  f1 }
+        ORR      Out3,   tVal9,  Out2,  LSR #8       ;// {f6,  f5,  f4,  f3 }
+        M_STR    Out0,   [pDst], dstStep             ;// store {f3 to f0} at pDst[3  to 0 ]
+        M_STR    Out1,   [pDst], dstStep             ;// store {f4 to f1} at pDst[7  to 4 ]
+        M_STR    Out2,   [pDst], dstStep             ;// store {f5 to f2} at pDst[11 to 8 ]
+        STR      Out3,   [pDst]                      ;// store {f6 to f3} at pDSt[15 to 12]
+        MOV      return, #OMX_Sts_NoErr
+        M_EXIT                                       ;// Macro to exit midway-break frm case
+        
+
+OMX_VC_4x4_DIAG_DR
+        
+        ;// M_STALL ARM1136JS=4
+        
+        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
+        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
+        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
+        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
+        LDRB     AboveLeft, [pSrcAboveLeft]          ;// AboveLeft = pSrcAboveLeft[0]
+        ORR      tVal7,  Left1,  Left0,  LSL #8      ;// tVal7 = 00 00 L0 L1
+        LDR      Above0123,  [pSrcAbove]             ;// Above0123 = U3 U2 U1 U0
+        LDR      r0x80808080, =ADD_CONST1            ;// 0x80808080
+        ORR      tVal8,  Left3,  Left2,  LSL #8      ;// tVal8 = 00 00 L2 L3
+        PKHBT    tVal7,  tVal8,  tVal7,  LSL #16     ;// tVal7 = L0 L1 L2 L3
+        MOV      tVal8,  Above0123,  LSL #8          ;// tVal8 = U2 U1 U0 00
+        MOV      tVal9,  tVal7,  LSR #8              ;// tVal9 = 00 L0 L1 L2
+        ORR      tVal8,  tVal8,  AboveLeft           ;// tVal8 = U2 U1 U0 UL
+        ORR      tVal9,  tVal9,  AboveLeft, LSL #24  ;// tVal9 = UL L0 L1 L2
+        MOV      tVal10, Above0123,  LSL #24         ;// tVal10= U0 00 00 00
+        UXTB     tVal11, tVal7,  ROR #24             ;// tVal11= 00 00 00 L0
+        ORR      tVal10, tVal10, tVal9,  LSR #8      ;// tVal10= U0 UL L0 L1
+        ORR      tVal11, tVal11, tVal8,  LSL #8      ;// tVal11= U1 U0 UL L0
+        UHADD8   tVal11, Above0123,  tVal11          ;// tVal11= d1 d0 dL g0
+        UHADD8   tVal10, tVal7,  tVal10              ;// tVal10= g0 g1 g2 g3
+        MVN      tVal8,  tVal8                       ;// tVal8 = U2'U1'U0'UL'
+        MVN      tVal9,  tVal9                       ;// tVal9 = UL'L0'L1'L2'
+        UHSUB8   tVal11, tVal11, tVal8               ;// tVal11= e1 e0 eL h0
+        UHSUB8   tVal10, tVal10, tVal9               ;// tVal10= h0 h1 h2 h3
+        UADD8    Out3,   tVal10, r0x80808080         ;// Out3  = i0 i1 i2 i3
+        UADD8    Out0,   tVal11, r0x80808080         ;// Out0  = f1 f0 fL i0
+        UXTH     tVal11, Out3,   ROR #8              ;// tVal11= 00 00 i1 i2
+        MOV      tVal7,  Out0,   LSL #8              ;// tVal7 = f0 fL i0 00
+        ORR      Out1,   tVal7,  tVal11,  LSR #8     ;// Out1  = f0 fL i0 i1
+        PKHBT    Out2,   tVal11, Out0,    LSL #16    ;// Out2  = fL i0 i1 i2
+        M_STR    Out0,   [pDst], dstStep             ;// store {f1 to i0} at pDst[3  to 0 ]
+        M_STR    Out1,   [pDst], dstStep             ;// store {f0 to i1} at pDst[7  to 4 ]
+        M_STR    Out2,   [pDst], dstStep             ;// store {fL to i2} at pDst[11 to 8 ]
+        STR      Out3,   [pDst]                      ;// store {i0 to i3} at pDst[15 to 12] 
+        MOV      return,  #OMX_Sts_NoErr
+        M_EXIT                                       ;// Macro to exit midway-break frm case
+
+OMX_VC_4x4_VR
+
+        ;// M_STALL ARM1136JS=4
+        
+        LDR      Above0123,  [pSrcAbove]             ;// Above0123 = U3 U2 U1 U0
+        LDRB     AboveLeft,  [pSrcAboveLeft]         ;// AboveLeft = 00 00 00 UL
+        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0     = 00 00 00 L0
+        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1     = 00 00 00 L1
+        LDRB     Left2,  [pSrcLeft]                  ;// Left2     = 00 00 00 L2
+        MOV      tVal0,  Above0123,  LSL #8          ;// tVal0     = U2 U1 U0 00
+        MOV      tVal9,  Above0123                   ;// tVal9     = U3 U2 U1 U0 
+        ORR      tVal14, tVal0,   AboveLeft          ;// tVal14    = U2 U1 U0 UL
+        MVN      tVal11, tVal14                      ;// tVal11    = U2'U1'U0'UL'
+        MOV      tVal2,  tVal14,  LSL #8             ;// tVal2     = U1 U0 UL 00
+        UHSUB8   tVal1,  Above0123,  tVal11          ;// tVal1     = d2 d1 d0 dL
+        UHADD8   tVal10, AboveLeft, Left1            ;// tVal10    = 00 00 00 j1       
+        MVN      tVal4,  Left0                       ;// tVal4     = 00 00 00 L0'
+        UHSUB8   tVal4,  tVal10,  tVal4              ;// tVal4     = 00 00 00 k1
+        ORR      tVal12, tVal0,   Left0              ;// tVal12    = U2 U1 U0 L0
+        ORR      tVal14, tVal2,   Left0              ;// tVal14    = U1 U0 UL L0
+        LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
+        UHADD8   tVal10, tVal9,   tVal14             ;// tVal10    = g3 g2 g1 g0
+        UADD8    Out0,   tVal1,   r0x80808080        ;// Out0      = e2 e1 e0 eL
+        UHSUB8   tVal10, tVal10,  tVal11             ;// tVal10    = h3 h2 h1 h0
+        M_STR    Out0,   [pDst],  dstStep            ;// store {e2 to eL} at pDst[3  to 0 ]
+        MOV      tVal1,  tVal14,  LSL #8             ;// tVal1     = U0 UL L0 00
+        MOV      tVal6,  Out0,    LSL #8             ;// tVal6     = e1 e0 eL 00
+        ORR      tVal2,  tVal2,   Left1              ;// tVal2     = U1 U0 UL L1
+        UADD8    tVal4,  tVal4,   r0x80808080        ;// tVal4     = 00 00 00 l1        
+        UADD8    Out1,   tVal10,  r0x80808080        ;// Out1      = i3 i2 i1 i0
+        MVN      tVal2,  tVal2                       ;// tVal14    = U1'U0'UL'L1'
+        ORR      tVal1,  tVal1,   Left2              ;// tVal1     = U0 UL L0 L2
+        ORR      Out2,   tVal6,   tVal4              ;// Out2      = e1 e0 eL l1
+        UHADD8   tVal1,  tVal1,   tVal12             ;// tVal1     = g2 g1 g0 j2
+        M_STR    Out1,   [pDst],  dstStep            ;// store {i3 to i0} at pDst[7  to 4 ]
+        M_STR    Out2,   [pDst],  dstStep            ;// store {e1 to l1} at pDst[11 to 8 ]
+        UHSUB8   tVal9,  tVal1,   tVal2              ;// tVal9     = h2 h1 h0 k2
+        UADD8    Out3,   tVal9,   r0x80808080        ;// Out3      = i2 i1 i0 l2
+        STR      Out3,   [pDst]                      ;// store {i2 to l2} at pDst[15 to 12] 
+        MOV      return,  #OMX_Sts_NoErr
+        M_EXIT                                       ;// Macro to exit midway-break frm case
+        
+OMX_VC_4x4_HD
+        
+        ;// M_STALL ARM1136JS=4
+        
+        LDR      Above0123,  [pSrcAbove]             ;// Above0123 = U3 U2 U1 U0
+        LDRB     AboveLeft,  [pSrcAboveLeft]         ;// AboveLeft = 00 00 00 UL
+        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = 00 00 00 L0
+        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = 00 00 00 L1
+        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = 00 00 00 L2
+        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = 00 00 00 L3
+        LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
+        ORR      tVal2,  AboveLeft, Above0123, LSL #8;// tVal2 = U2 U1 U0 UL
+        MVN      tVal1,  Left0                       ;// tVal1 = 00 00 00 L0'
+        ORR      tVal4,  Left0,  tVal2,  LSL #8      ;// tVal4 = U1 U0 UL L0
+        MVN      tVal2,  tVal2                       ;// tVal2 = U2'U1'U0'UL'
+        UHADD8   tVal4,  tVal4,  Above0123           ;// tVal4 = g3 g2 g1 g0
+        UHSUB8   tVal1,  AboveLeft,  tVal1           ;// tVal1 = 00 00 00 dL
+        UHSUB8   tVal4,  tVal4,  tVal2               ;// tVal4 = h3 h2 h1 h0
+        UADD8    tVal1,  tVal1,  r0x80808080         ;// tVal1 = 00 00 00 eL
+        UADD8    tVal4,  tVal4,  r0x80808080         ;// tVal4 = i3 i2 i1 i0
+        ORR      tVal2,  Left0,  AboveLeft,  LSL #16 ;// tVal2 = 00 UL 00 L0
+        MOV      tVal4,  tVal4,  LSL #8              ;// tVal4 = i2 i1 i0 00
+        ORR      tVal11, Left1,  Left0,  LSL #16     ;// tVal11= 00 L0 00 L1
+        ORR      tVal7,  Left2,  Left1,  LSL #16     ;// tVal7 = 00 L1 00 L2
+        ORR      tVal10, Left3,  Left2,  LSL #16     ;// tVal10= 00 L2 00 L3
+        ORR      Out0,   tVal4,  tVal1               ;// Out0  = i2 i1 i0 eL
+        M_STR    Out0,   [pDst], dstStep             ;// store {Out0}  at pDst [0  to 3 ] 
+        MOV      tVal4,  Out0,   LSL #16             ;// tVal4 = i1 i0 00 00
+        UHADD8   tVal2,  tVal2,  tVal7               ;// tVal2 = 00 j1 00 j2
+        UHADD8   tVal6,  tVal11, tVal10              ;// tVal11= 00 j2 00 j3
+        MVN      tVal12, tVal11                      ;// tVal12= 00 L0'00 L1'
+        MVN      tVal14, tVal7                       ;// tVal14= 00 L1'00 L2'
+        UHSUB8   tVal2,  tVal2,  tVal12              ;// tVal2 = 00 k1 00 k2
+        UHSUB8   tVal8,  tVal7,  tVal12              ;// tVal8 = 00 d1 00 d2
+        UHSUB8   tVal11, tVal6,  tVal14              ;// tVal11= 00 k2 00 k3
+        UHSUB8   tVal9,  tVal10, tVal14              ;// tVal9 = 00 d2 00 d3
+        UADD8    tVal2,  tVal2,  r0x80808080         ;// tVal2 = 00 l1 00 l2
+        UADD8    tVal8,  tVal8,  r0x80808080         ;// tVal8 = 00 e1 00 e2
+        UADD8    tVal11, tVal11, r0x80808080         ;// tVal11= 00 l2 00 l3
+        UADD8    tVal9,  tVal9,  r0x80808080         ;// tVal9 = 00 e2 00 e3
+        ORR      Out2,   tVal8,  tVal2,  LSL #8      ;// Out2  = l1 e1 l2 e2
+        ORR      Out3,   tVal9,  tVal11, LSL #8      ;// Out3  = l2 e2 l3 e3
+        PKHTB    Out1,   tVal4,  Out2,   ASR #16     ;// Out1  = i1 i0 l1 e1
+        M_STR    Out1,   [pDst], dstStep             ;// store {Out1}  at pDst [4  to 7 ]
+        M_STR    Out2,   [pDst], dstStep             ;// store {Out2}  at pDst [8  to 11]
+        STR      Out3,   [pDst]                      ;// store {Out3}  at pDst [12 to 15]
+        MOV      return,  #OMX_Sts_NoErr
+        M_EXIT                                       ;// Macro to exit midway-break frm case
+        
+OMX_VC_4x4_VL
+        
+        ;// M_STALL ARM1136JS=3
+        
+        LDMIA    pSrcAbove, {Above0123, Above4567}   ;// Above0123, Above4567 = pSrcAbove[0 to 7]
+        TST      availability, #OMX_VC_UPPER_RIGHT
+        LDR      r0x80808080,  =ADD_CONST1           ;// 0x80808080
+        LDR      r0x01010101,  =MUL_CONST0           ;// 0x01010101
+        MOV      tVal11, Above0123,  LSR #24         ;// tVal11= 00 00 00 U3
+        MULEQ    Above4567, tVal11, r0x01010101      ;// Above4567 = U3 U3 U3 U3
+        MOV      tVal9,  Above0123,  LSR #8          ;// tVal9 = 00 U3 U2 U1
+        MVN      tVal10, Above0123                   ;// tVal10= U3'U2'U1'U0'
+        ORR      tVal2,  tVal9,  Above4567,  LSL #24 ;// tVal2 = U4 U3 U2 U1
+        UHSUB8   tVal8,  tVal2,  tVal10              ;// tVal8 = d4 d3 d2 d1
+        UADD8    Out0,   tVal8,  r0x80808080         ;// Out0 = e4 e3 e2 e1
+        M_STR    Out0,   [pDst], dstStep             ;// store {Out0}  at pDst [0  to 3 ]
+        MOV      tVal9,  tVal9,  LSR #8              ;// tVal9 = 00 00 U3 U2
+        MOV      tVal10, Above4567,  LSL #8          ;// tVal10= U6 U5 U4 00
+        PKHBT    tVal9,  tVal9,  Above4567, LSL #16  ;// tVal9 = U5 U4 U3 U2
+        ORR      tVal10, tVal10, tVal11              ;// tVal10= U6 U5 U4 U3
+        UHADD8   tVal11, tVal9,  Above0123           ;// tVal11= g5 g4 g3 g2
+        UHADD8   tVal14, tVal2,  tVal10              ;// tVal14= g6 g5 g4 g3
+        MVN      tVal8,  tVal2                       ;// tVal8 = U4'U3'U2'U1'
+        MVN      tVal7,  tVal9                       ;// tVal7 = U5'U4'U3'U2'
+        UHSUB8   tVal12, tVal9,  tVal8               ;// tVal12= d5 d4 d3 d2
+        UHSUB8   tVal11, tVal11, tVal8               ;// tVal11= h5 h4 h3 h2
+        UHSUB8   tVal2,  tVal14, tVal7               ;// tVal2 = h6 h5 h4 h3
+        UADD8    Out1,   tVal11, r0x80808080         ;// Out1  = i5 i4 i3 i2
+        UADD8    Out2,   tVal12, r0x80808080         ;// Out2  = e5 e4 e3 e2
+        UADD8    Out3,   tVal2,  r0x80808080         ;// Out3  = i6 i5 i4 i3
+        M_STR    Out1,   [pDst], dstStep             ;// store {Out1} at pDst [4  to 7 ]
+        M_STR    Out2,   [pDst], dstStep             ;// store {Out2} at pDst [8  to 11]
+        M_STR    Out3,   [pDst], dstStep             ;// store {Out3} at pDst [12 to 15]
+        MOV      return, #OMX_Sts_NoErr
+        M_EXIT                                       ;// Macro to exit midway-break frm case
+        
+OMX_VC_4x4_HU
+        
+        ;// M_STALL ARM1136JS=2
+        
+        LDR      r0x01010101,  =MUL_CONST0           ;// 0x01010101
+        M_LDRB   Left0,  [pSrcLeft],  leftStep       ;// Left0 = pSrcLeft[0]
+        M_LDRB   Left1,  [pSrcLeft],  leftStep       ;// Left1 = pSrcLeft[1]
+        M_LDRB   Left2,  [pSrcLeft],  leftStep       ;// Left2 = pSrcLeft[2]
+        LDRB     Left3,  [pSrcLeft]                  ;// Left3 = pSrcLeft[3]
+        MOV      r0x80808080,  r0x01010101, LSL #7   ;// 0x80808080
+        ORR      tVal6,  Left0,  Left1,  LSL #16     ;// tVal6 = 00 L1 00 L0
+        ORR      tVal7,  Left1,  Left2,  LSL #16     ;// tVal7 = 00 L2 00 L1
+        ORR      tVal11, Left2,  Left3,  LSL #16     ;// tVal11= 00 L3 00 L2
+        MUL      Out3,   Left3,  r0x01010101         ;// Out3  = L3 L3 L3 L3
+        MVN      tVal8,  tVal7                       ;// tVal8 = 00 L2'00 L1'
+        MVN      tVal10, tVal11                      ;// tVal10= 00 L3'00 L2'
+        UHADD8   tVal4,  tVal6,  tVal11              ;// tVal4 = 00 g3 00 g2
+        UXTB16   tVal12, Out3                        ;// tVal12= 00 L3 00 L3
+        UHSUB8   tVal4,  tVal4,  tVal8               ;// tVal4 = 00 h3 00 h2
+        UHSUB8   tVal6,  tVal6,  tVal8               ;// tVal6 = 00 d2 00 d1
+        UHSUB8   tVal11, tVal11, tVal8               ;// tVal11= 00 d3 00 d2
+        UHADD8   tVal12, tVal12, tVal7               ;// tVal12= 00 g4 00 g3
+        UADD8    tVal4,  tVal4,  r0x80808080         ;// tVal4 = 00 i3 00 i2
+        UHSUB8   tVal12, tVal12, tVal10              ;// tVal12= 00 h4 00 h3
+        UADD8    tVal8,  tVal6,  r0x80808080         ;// tVal8 = 00 e2 00 e1
+        UADD8    tVal11, tVal11, r0x80808080         ;// tVal11= 00 e3 00 e2
+        UADD8    tVal12, tVal12, r0x80808080         ;// tVal12= 00 i4 00 i3
+        ORR      Out0,   tVal8,  tVal4,  LSL #8      ;// Out0  = i3 e2 i2 e1
+        ORR      Out1,   tVal11, tVal12, LSL #8      ;// Out1  = i4 e3 i3 e2
+        M_STR    Out0,   [pDst], dstStep             ;// store {Out0}  at pDst [0  to 3 ]
+        PKHTB    Out2,   Out3,   Out1,   ASR #16     ;// Out2  = L3 L3 i4 e3
+        M_STR    Out1,   [pDst], dstStep             ;// store {Out1}  at pDst [4  to 7 ]
+        M_STR    Out2,   [pDst], dstStep             ;// store {Out2}  at pDst [8  to 11]
+        STR      Out3,   [pDst]                      ;// store {Out3}  at pDst [12 to 15]
+        MOV      return,  #OMX_Sts_NoErr
+        M_END
+
+        ENDIF ;// ARM1136JS
+        
+        
+        END
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntra_4x4 ends
+;//-----------------------------------------------------------------------------------------------
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
new file mode 100644
index 0000000..53597a8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
@@ -0,0 +1,128 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_TransformDequantChromaDCFromPair_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        IMPORT armVCM4P10_QPDivTable
+        IMPORT armVCM4P10_VMatrixQPModTable
+            
+        M_VARIANTS ARM1136JS
+    
+
+    IF ARM1136JS
+
+;//--------------------------------------
+;// Declare input registers
+;//--------------------------------------
+ppSrc       RN 0
+pDst        RN 1
+QP          RN 2
+
+;//--------------------------------
+;// Scratch variable for Unpack2x2 
+;//--------------------------------
+pSrc        RN 9
+Value       RN 4
+Value2      RN 5
+Flag        RN 6
+strOffset   RN 7
+cstOffset   RN 8
+
+;//--------------------------------
+;// Scratch variable
+;//--------------------------------
+r0w0        RN  3
+r0w1        RN  4
+
+c0w0        RN  5
+c1w0        RN  6
+
+return      RN  0
+pQPDivTable RN  5
+pQPModTable    RN  6
+Shift        RN  9
+Scale        RN  2
+
+Temp1       RN  3
+Temp2       RN  4
+Temp3       RN  7
+Temp4       RN  8
+
+        ;// Write function header
+        M_START omxVCM4P10_TransformDequantChromaDCFromPair, r9
+        
+        
+        LDR     pSrc, [ppSrc]                        ;// Load pSrc
+        MOV     cstOffset, #31                       ;// To be used in the loop, to compute offset
+        
+        ;//-----------------------------------------------------------------------
+        ;// Firstly, fill all the coefficient values on the <pDst> buffer by zero
+        ;//-----------------------------------------------------------------------
+        
+        MOV      Value,  #0                          ;// Initialize the zero value
+        MOV      Value2,  #0                         ;// Initialize the zero value
+        LDRB     Flag,  [pSrc], #1                   ;// Preload <Flag> before <unpackLoop>
+        STRD     Value, [pDst, #0]                   ;// pDst[0]  = pDst[1]  = pDst[2]  = pDst[3]  = 0
+        
+
+unpackLoop
+        TST      Flag,  #0x10                        ;// Computing (Flag & 0x10)
+        LDRSBNE  Value2,[pSrc,#1]                  
+        LDRBNE   Value, [pSrc], #2                   ;// Load byte wise to avoid unaligned access
+        AND      strOffset, cstOffset, Flag, LSL #1  ;// strOffset = (Flag & 15) < 1;
+        LDRSBEQ  Value, [pSrc], #1                   ;// Value = (OMX_U8)  *pSrc++
+        ORRNE    Value,Value,Value2, LSL #8          ;// Value = (OMX_U16) *pSrc++
+        
+        TST      Flag,  #0x20                        ;// Computing (Flag & 0x20) to check, if we're done
+        LDRBEQ   Flag,  [pSrc], #1                   ;// Flag  = (OMX_U8) *pSrc++, for next iteration
+        STRH     Value, [pDst, strOffset]            ;// Store <Value> at offset <strOffset>
+        BEQ      unpackLoop                          ;// Branch to the loop beginning
+        
+        LDMIA    pDst, {r0w0, r0w1}                  ;// r0w0 = |c1|c0| & r0w1 = |c3|c2|
+
+
+        STR      pSrc, [ppSrc]                       ;// Update the bitstream pointer
+        
+        LDR      pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer
+        LDR      pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer
+        
+        SADDSUBX r0w0, r0w0,  r0w0                   ;// [ c00+c01, c00-c01 ]
+        SADDSUBX r0w1, r0w1,  r0w1                   ;// [ c10+c11, c10-c11 ]
+        
+        LDRSB    Shift, [pQPDivTable, QP]            ;// Shift = pQPDivTable[QP]
+        LDRSB    Scale, [pQPModTable, QP]            ;// Scale = pQPModTable[QP]
+        
+        SADD16   c0w0, r0w0, r0w1                    ;// [ d00+d10, d01+d11 ]
+        SSUB16   c1w0, r0w0, r0w1                    ;// [ d00-d10, d01-d11 ]
+        
+        LSL      Scale, Scale, Shift                 ;// Scale = Scale << Shift
+        
+        SMULTB   Temp2, c0w0,  Scale                 ;// Temp2 = T(c0w0) * Scale
+        SMULTB   Temp4, c1w0,  Scale                 ;// Temp4 = T(c1w0) * Scale
+        SMULBB   Temp1, c0w0,  Scale                 ;// Temp1 = B(c0w0) * Scale
+        SMULBB   Temp3, c1w0,  Scale                 ;// Temp3 = B(c1w0) * Scale
+        MOV      Temp2, Temp2, ASR #1                ;// Temp2 = Temp2 >> 1 & Temp1 = (Temp1 >> 1) << 16
+        MOV      Temp4, Temp4, ASR #1                ;// Temp4 = Temp4 >> 1 & Temp3 = (Temp3 >> 1) << 16
+        PKHBT    c0w0,  Temp2, Temp1, LSL #15        ;// c0w0  = | Temp1 | Temp2 |
+        PKHBT    c1w0,  Temp4, Temp3, LSL #15        ;// c1w0  = | Temp3 | Temp4 |
+        STMIA    pDst, {c0w0, c1w0}                  ;// Storing all the coefficients at once
+        MOV      return, #OMX_Sts_NoErr
+        M_END
+        
+    ENDIF ;// ARM1136JS
+    
+    
+    
+    
+    END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
new file mode 100644
index 0000000..73caec2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
@@ -0,0 +1,469 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_TransformDequantLumaDCFromPair_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+;// Description:
+;// H.264 inverse quantize and transform module
+;// 
+;// 
+
+;// Include standard headers
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+;// Import/Export symbols required from/to other files
+;// (For example tables)
+        
+        IMPORT armVCM4P10_UnpackBlock4x4 
+        IMPORT armVCM4P10_QPDivTable
+        IMPORT armVCM4P10_VMatrixQPModTable
+        
+        M_VARIANTS ARM1136JS
+
+;// Set debugging level        
+;//DEBUG_ON    SETL {TRUE}
+
+
+;// Static Function: armVCM4P10_InvTransformDequantLumaDC4x4
+    
+
+;// Guarding implementation by the processor name
+    
+    IF  ARM1136JS 
+
+
+;//Input Registers
+pData               RN  0
+QP                  RN  1
+
+;//Output Registers
+
+
+;//Local Scratch Registers
+
+;// Packed Input pixels
+in00                RN  2                   ;// Src[0] & Src[1] 
+in02                RN  3                   ;// Src[2] & Src[3]
+in10                RN  4                   ;// Src[4] & Src[5]
+in12                RN  5                   ;// Src[6] & Src[7]
+in20                RN  6                   ;// Src[8] & Src[9]
+in22                RN  7                   ;// Src[10] & Src[11]
+in30                RN  8                   ;// Src[12] & Src[13]
+in32                RN  9                   ;// Src[14] & Src[15]
+
+;// Transpose for Row operations (Rows to cols)
+trRow00             RN  2
+trRow10             RN  10
+trRow02             RN  3
+trRow12             RN  5
+trRow20             RN  11
+trRow30             RN  12
+trRow32             RN  14
+trRow22             RN  7
+
+;// Intermediate calculations
+rowSum1             RN  4
+rowSum2             RN  6
+rowDiff1            RN  8
+rowDiff2            RN  9
+
+
+;// Row operated pixels
+rowOp00             RN  2
+rowOp10             RN  10
+rowOp20             RN  11
+rowOp30             RN  12
+rowOp02             RN  3
+rowOp12             RN  5
+rowOp22             RN  7
+rowOp32             RN  14
+
+;// Transpose for colulmn operations
+trCol00             RN  2                   
+trCol02             RN  3                   
+trCol10             RN  4                   
+trCol12             RN  5                   
+trCol20             RN  6                   
+trCol22             RN  7                   
+trCol30             RN  8                   
+trCol32             RN  9  
+
+;// Intermediate calculations
+colSum1             RN  10
+colSum2             RN  11
+colDiff1            RN  12
+colDiff2            RN  14
+
+
+;// Coloumn operated pixels
+colOp00             RN  2                   
+colOp02             RN  3                   
+colOp10             RN  4                   
+colOp12             RN  5                   
+colOp20             RN  6                   
+colOp22             RN  7                   
+colOp30             RN  8                   
+colOp32             RN  9  
+
+;// Temporary scratch varaibles
+pQPDivTable         RN  0
+pQPModTable         RN  11
+Shift               RN  10
+Scale               RN  14
+Round               RN  0
+
+temp1               RN  10
+temp2                RN  11
+temp3               RN  12
+temp4               RN  1
+
+
+
+;// InvTransformed and Dequantized pixels
+out00               RN  2                   
+out02               RN  3                   
+out10               RN  4                   
+out12               RN  5                   
+out20               RN  6                   
+out22               RN  7                   
+out30               RN  8                   
+out32               RN  9  
+      
+        
+
+       
+    ;// Allocate stack memory required by the function
+        M_ALLOC4    pDataOnStack, 4
+
+    ;// Write function header
+        M_START armVCM4P10_InvTransformDequantLumaDC4x4,r11
+        
+        ;******************************************************************
+        ;// The strategy used in implementing the transform is as follows:*
+        ;// Load the 4x4 block into 8 registers                           *  
+        ;// Transpose the 4x4 matrix                                      *  
+        ;// Perform the row operations (on columns) using SIMD            *  
+        ;// Transpose the 4x4 result matrix                               *  
+        ;// Perform the coloumn operations                                *
+        ;// Store the 4x4 block at one go                                 *  
+        ;******************************************************************
+
+        ;// Load all the 4x4 pixels
+        
+        LDMIA   pData,{in00,in02,in10,in12,in20,in22,in30,in32}
+        
+        ;//*****************************************************************
+        ;//
+        ;// Transpose the matrix inorder to perform row ops as coloumn ops
+        ;// Input:   in[][] = original matrix
+        ;// Output:  trRow[][]= transposed matrix
+        ;// Step1: Obtain the LL part of the transposed matrix
+        ;// Step2: Obtain the HL part
+        ;// step3: Obtain the LH part
+        ;// Step4: Obtain the HH part
+        ;//
+        ;//*****************************************************************
+        
+        ;// LL 2x2 transposed matrix 
+        ;//   d0 d1 - -
+        ;//   d4 d5 - -
+        ;//   -  -  - -
+        ;//   -  -  - -
+        
+        PKHTB   trRow10,in10,in00,ASR #16               ;// [5 4] = [f5:f1]    
+        PKHBT   trRow00,in00,in10,LSL #16               ;// [1 0] = [f4:f0]  
+        
+        ;// HL 2x2 transposed matrix  
+        ;//    -   -   - -
+        ;//    -   -   - -
+        ;//    d8  d9  - -
+        ;//   d12 d13  - -
+        
+         
+         PKHTB   trRow30,in12,in02,ASR #16              ;// [13 12] = [7 3]
+         PKHBT   trRow20,in02,in12,LSL #16              ;// [9 8] = [6 2] 
+        
+        ;// LH 2x2 transposed matrix 
+        ;//   - - d2 d3 
+        ;//   - - d6 d7 
+        ;//   - - -  -
+        ;//   - - -  -
+        
+        PKHBT   trRow02,in20,in30,LSL #16               ;// [3 2] = [f12:f8]  
+        PKHTB   trRow12,in30,in20,ASR #16               ;// [7 6] = [f13:f9] 
+        
+        
+        
+         
+        ;// HH 2x2 transposed matrix  
+        ;//    - -   -   -
+        ;//    - -   -   -
+        ;//    - -  d10 d11
+        ;//    - -  d14 d15
+        
+        PKHTB   trRow32,in32,in22,ASR #16               ;// [15 14] = [15 11]
+        PKHBT   trRow22,in22,in32,LSL #16               ;// [11 10] = [14 10]
+       
+        
+        ;**************************************** 
+        ;// Row Operations (Performed on columns)
+        ;**************************************** 
+        
+        
+        ;// SIMD operations on first two columns(two rows of the original matrix)
+        
+        SADD16      rowSum1,trRow00,trRow10                ;// (c0+c1)
+        SADD16      rowSum2,trRow20,trRow30                ;// (c2+c3)
+        SSUB16      rowDiff1,trRow00,trRow10               ;// (c0-c1)
+        SSUB16      rowDiff2,trRow20,trRow30               ;// (c2-c3)
+        SADD16      rowOp00,rowSum1,rowSum2                ;// (c0+c1+c2+c3)
+        SSUB16      rowOp10,rowSum1,rowSum2                ;// (c0+c1-c2-c3)
+        SSUB16      rowOp20,rowDiff1,rowDiff2              ;// (c0-c1-c2+c3)
+        SADD16      rowOp30,rowDiff1,rowDiff2              ;// (c0-c1+c2-c3)
+        
+                
+        ;// SIMD operations on next two columns(next two rows of the original matrix)
+        
+        SADD16      rowSum1,trRow02,trRow12                ;// (c0+c1)
+        SADD16      rowSum2,trRow22,trRow32                ;// (c2+c3)
+        SSUB16      rowDiff1,trRow02,trRow12               ;// (c0-c1)
+        SSUB16      rowDiff2,trRow22,trRow32               ;// (c2-c3)
+        SADD16      rowOp02,rowSum1,rowSum2                ;// (c0+c1+c2+c3)
+        SSUB16      rowOp12,rowSum1,rowSum2                ;// (c0+c1-c2-c3)
+        SSUB16      rowOp22,rowDiff1,rowDiff2              ;// (c0-c1-c2+c3)
+        SADD16      rowOp32,rowDiff1,rowDiff2              ;// (c0-c1+c2-c3)
+        
+        
+        
+        ;*****************************************************************
+        ;// Transpose the resultant matrix
+        ;// Input:  rowOp[][]
+        ;// Output: trCol[][] 
+        ;*****************************************************************
+        
+        ;// LL 2x2 transposed matrix 
+        ;//   d0 d1 - -
+        ;//   d4 d5 - -
+        ;//   -  -  - -
+        ;//   -  -  - -
+        
+        PKHTB   trCol10,rowOp10,rowOp00,ASR #16           ;// [5 4] = [f5:f1]
+        PKHBT   trCol00,rowOp00,rowOp10,LSL #16           ;// [1 0] = [f4:f0]  
+        
+        ;// HL 2x2 transposed matrix  
+        ;//    -   -   - -
+        ;//    -   -   - -
+        ;//    d8  d9  - -
+        ;//   d12 d13  - -
+        
+         
+         PKHTB   trCol30,rowOp12,rowOp02,ASR #16          ;// [13 12] = [7 3]
+         PKHBT   trCol20,rowOp02,rowOp12,LSL #16          ;// [9 8] = [6 2] 
+        
+        ;// LH 2x2 transposed matrix 
+        ;//   - - d2 d3 
+        ;//   - - d6 d7 
+        ;//   - - -  -
+        ;//   - - -  -
+        
+        PKHBT   trCol02,rowOp20,rowOp30,LSL #16           ;// [3 2] = [f12:f8]  
+        PKHTB   trCol12,rowOp30,rowOp20,ASR #16           ;// [7 6] = [f13:f9] 
+        
+        
+        
+         
+        ;// HH 2x2 transposed matrix  
+        ;//    - -   -   -
+        ;//    - -   -   -
+        ;//    - -  d10 d11
+        ;//    - -  d14 d15
+        
+        PKHTB   trCol32,rowOp32,rowOp22,ASR #16            ;// [15 14] = [15 11]
+        PKHBT   trCol22,rowOp22,rowOp32,LSL #16            ;// [11 10] = [14 10]
+       
+        
+        ;******************************* 
+        ;// Coloumn Operations 
+        ;******************************* 
+        
+        ;//--------------------------------------------------------------------------------------
+        ;// Store pData(RN0) on stack and restore it only at the final store back
+        ;// This frees up a register (RN0) which is used to reduce number of intermediate stalls 
+        ;//--------------------------------------------------------------------------------------
+        M_STR       pData,pDataOnStack
+        
+        
+        ;// SIMD operations on first two columns(two rows of the original matrix)
+                
+        SADD16      colSum1,trCol00,trCol10                ;// (c0+c1)
+        SADD16      colSum2,trCol20,trCol30                ;// (c2+c3)
+        SSUB16      colDiff1,trCol00,trCol10               ;// (c0-c1)
+        SSUB16      colDiff2,trCol20,trCol30               ;// (c2-c3)
+        SADD16      colOp00,colSum1,colSum2                ;// (c0+c1+c2+c3)
+        SSUB16      colOp10,colSum1,colSum2                ;// (c0+c1-c2-c3)
+        SSUB16      colOp20,colDiff1,colDiff2              ;// (c0-c1-c2+c3)
+        SADD16      colOp30,colDiff1,colDiff2              ;// (c0-c1+c2-c3)
+        
+                
+        ;// SIMD operations on next two columns(next two rows of the original matrix)
+        
+        LDR         pQPDivTable, =armVCM4P10_QPDivTable    ;// QP Division look-up-table base pointer
+        SADD16      colSum1,trCol02,trCol12                ;// (c0+c1)
+        SADD16      colSum2,trCol22,trCol32                ;// (c2+c3)
+        SSUB16      colDiff1,trCol02,trCol12               ;// (c0-c1)
+        SSUB16      colDiff2,trCol22,trCol32               ;// (c2-c3)
+        SADD16      colOp02,colSum1,colSum2                ;// (c0+c1+c2+c3)
+        SSUB16      colOp12,colSum1,colSum2                ;// (c0+c1-c2-c3)
+        LDR         pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer
+        LDRSB       Shift, [pQPDivTable, QP]               ;// Shift = pQPDivTable[QP]
+        SSUB16      colOp22,colDiff1,colDiff2              ;// (c0-c1-c2+c3)
+        SADD16      colOp32,colDiff1,colDiff2              ;// (c0-c1+c2-c3)
+        
+               
+        LDRSB       Scale, [pQPModTable, QP]               ;// Scale = pQPModTable[QP] 
+        
+        ;//----------------------------------------------------------------------
+        ;//
+        ;// <Dequantize> improves on the c-reference code
+        ;// Both the  cases i.e., Shift>=0 and Shift<0 cases are covered together
+        ;// We do not subtract 2 from Shift as in C reference, instead perform a
+        ;// Scale << Shift once in the beginning and do a right shift by a 
+        ;// constant 2 after the Multiplication. The value of Round would be 2 
+        ;// 
+        ;// By doing this we aviod the Branches required and also 
+        ;// reduce the code size substantially
+        ;// 
+        ;//----------------------------------------------------------------------
+        
+        MOV         Round, #2                               ;// Round = 2
+        LSL         Scale, Scale, Shift                     ;// Scale = Scale << Shift
+                
+        
+        ;// Row 1
+        SMLABB  temp1, colOp00, Scale, Round                ;// Temp1 = B(c0w0) * Scale + Round
+        SMLABB  temp3, colOp02, Scale, Round                ;// Temp3 = B(c1w0) * Scale + Round
+        SMLATB  temp2, colOp00, Scale, Round                ;// Temp2 = T(c0w0) * Scale + Round
+        SMLATB  temp4, colOp02, Scale, Round                ;// Temp4 = T(c1w0) * Scale + Round
+        
+        ASR     temp1, temp1, #2                            ;// Temp1 = Temp1 >> 2
+        ASR     temp3, temp3, #2                            ;// Temp3 = Temp3 >> 2
+        PKHBT   out00,  temp1, temp2, LSL #14               ;// c0w0  = | Temp2 | Temp1 |
+        PKHBT   out02,  temp3, temp4, LSL #14               ;// c1w0  = | Temp2 | Temp1 |
+        
+        
+        ;// Row 2
+        SMLABB  temp1, colOp10, Scale, Round                ;// Temp1 = B(c0w0) * Scale + Round
+        SMLABB  temp3, colOp12, Scale, Round                ;// Temp3 = B(c1w0) * Scale + Round
+        SMLATB  temp2, colOp10, Scale, Round                ;// Temp2 = T(c0w0) * Scale + Round
+        SMLATB  temp4, colOp12, Scale, Round                ;// Temp4 = T(c1w0) * Scale + Round
+        
+        ASR     temp1, temp1, #2                            ;// Temp1 = Temp1 >> 2
+        ASR     temp3, temp3, #2                            ;// Temp3 = Temp3 >> 2
+        PKHBT   out10,  temp1, temp2, LSL #14               ;// c0w0  = | Temp2 | Temp1 |
+        PKHBT   out12,  temp3, temp4, LSL #14               ;// c1w0  = | Temp2 | Temp1 |
+        
+        ;// Row 3
+        SMLABB  temp1, colOp20, Scale, Round                ;// Temp1 = B(c0w0) * Scale + Round
+        SMLABB  temp3, colOp22, Scale, Round                ;// Temp3 = B(c1w0) * Scale + Round
+        SMLATB  temp2, colOp20, Scale, Round                ;// Temp2 = T(c0w0) * Scale + Round
+        SMLATB  temp4, colOp22, Scale, Round                ;// Temp4 = T(c1w0) * Scale + Round
+        
+        ASR     temp1, temp1, #2                            ;// Temp1 = Temp1 >> 2 
+        ASR     temp3, temp3, #2                            ;// Temp3 = Temp3 >> 2
+        PKHBT   out20,  temp1, temp2, LSL #14               ;// c0w0  = | Temp2 | Temp1 |
+        PKHBT   out22,  temp3, temp4, LSL #14               ;// c1w0  = | Temp2 | Temp1 |
+        
+        ;// Row 4
+        SMLABB  temp1, colOp30, Scale, Round                ;// Temp1 = B(c0w0) * Scale + Round
+        SMLABB  temp3, colOp32, Scale, Round                ;// Temp3 = B(c1w0) * Scale + Round
+        SMLATB  temp2, colOp30, Scale, Round                ;// Temp2 = T(c0w0) * Scale + Round
+        SMLATB  temp4, colOp32, Scale, Round                ;// Temp4 = T(c1w0) * Scale + Round
+        
+        M_LDR   pData,pDataOnStack                          ;// Restore pData pointer from stack
+        ASR     temp1, temp1, #2                            ;// Temp1 = Temp1 >> 2
+        ASR     temp3, temp3, #2                            ;// Temp3 = Temp3 >> 2
+        PKHBT   out30,  temp1, temp2, LSL #14               ;// c0w0  = | Temp2 | Temp1 |
+        PKHBT   out32,  temp3, temp4, LSL #14               ;// c1w0  = | Temp2 | Temp1 |
+        
+        
+        
+        ;***************************
+        ;// Store all the 4x4 pixels
+        ;***************************
+
+store_coeff
+        
+        STMIA   pData,{out00,out02,out10,out12,out20,out22,out30,out32}
+        
+                               
+       
+        ;// Set return value
+        
+       
+        ;// Write function tail
+        M_END        
+        
+    ENDIF                                                           ;//ARM1136JS        
+    
+
+;// Static Function: armVCM4P10_InvTransformDequantLumaDC4x4
+
+;// Guarding implementation by the processor name
+    
+        
+
+
+;// Function: omxVCM4P10_TransformDequantLumaDCFromPair
+    
+;//Input Registers
+ppSrc               RN  0
+pDst                RN  1
+QPR2                RN  2
+
+;//Output Registers
+result              RN  0
+
+;//Local Scratch Registers
+pDstR4              RN  4
+pDstR0              RN  0
+QPR1                RN  1
+QPR5                RN  5
+
+;// Guarding implementation by the processor name
+    
+    IF ARM1136JS
+       
+    ;// Allocate stack memory required by the function
+        
+
+    ;// Write function header
+        M_START omxVCM4P10_TransformDequantLumaDCFromPair,r5
+        
+        MOV     pDstR4,pDst                         ;// Saving register r1
+        MOV     QPR5,QPR2                           ;// Saving register r2
+        BL      armVCM4P10_UnpackBlock4x4
+        
+        MOV     pDstR0,pDstR4                       ;// Setting up register r0
+        MOV     QPR1,QPR5                           ;// Setting up register r1
+        BL      armVCM4P10_InvTransformDequantLumaDC4x4
+                               
+       
+        ;// Set return value
+        MOV     result,#OMX_Sts_NoErr        
+       
+        ;// Write function tail
+        M_END
+        
+            
+    ENDIF                                                           ;//ARM1136JS  
+    
+
+    END
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
new file mode 100644
index 0000000..22115d3
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
@@ -0,0 +1,37 @@
+/**
+ * 
+ * File Name:  armVCM4P2_Huff_Tables_VLC.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ *
+ * File:        armVCM4P2_Huff_Tables.h
+ * Description: Declares Tables used for Hufffman coding and decoding 
+ *              in MP4P2 codec.
+ *
+ */
+ 
+#ifndef _OMXHUFFTAB_H_
+#define _OMXHUFFTAB_H_
+
+
+extern const OMX_U16 armVCM4P2_IntraVlcL0L1[200];
+
+
+extern const OMX_U16 armVCM4P2_InterVlcL0L1[200];
+
+extern const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64];
+//extern const OMX_U16 armVCM4P2_aIntraDCChromaIndex[32];
+extern const OMX_U16 armVCM4P2_aVlcMVD[124];
+
+extern const OMX_U8 armVCM4P2_InterL0L1LMAX[73];
+extern const OMX_U8 armVCM4P2_InterL0L1RMAX[35];
+extern const OMX_U8 armVCM4P2_IntraL0L1LMAX[53];
+extern const OMX_U8 armVCM4P2_IntraL0L1RMAX[40]
+
+#endif /* _OMXHUFFTAB_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
new file mode 100644
index 0000000..d5f865c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
@@ -0,0 +1,25 @@
+/**
+ * 
+ * File Name:  armVCM4P2_ZigZag_Tables.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ *
+ * File:        armVCM4P2_Zigzag_Tables.h
+ * Description: Declares Tables used for Zigzag scan in MP4P2 codec.
+ *
+ */
+ 
+#ifndef _OMXZIGZAGTAB_H
+#define _OMXZIGZAGTAB_H
+
+extern const OMX_U8 armVCM4P2_aClassicalZigzagScan [192];
+//extern const OMX_U8 armVCM4P2_aHorizontalZigzagScan [64];
+//extern const OMX_U8 armVCM4P2_aVerticalZigzagScan [64];
+
+#endif /* _OMXZIGZAGTAB_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s
new file mode 100644
index 0000000..7801e57
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s
@@ -0,0 +1,75 @@
+; /**
+; * 
+; * File Name:  armVCM4P2_Clip8_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   9641
+; * Date:       Thursday, February 7, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains module for Clipping 16 bit value to [0,255] Range
+; */ 
+
+      INCLUDE omxtypes_s.h
+      INCLUDE armCOMM_s.h
+      
+
+      M_VARIANTS ARM1136JS
+
+        
+     
+      IF ARM1136JS
+ 
+;//Input Arguments
+
+pSrc                 RN 0
+pDst                 RN 1
+step                 RN 2
+
+;// Local variables
+
+x0                   RN 3
+x1                   RN 4
+x2                   RN 5
+x3                   RN 6
+
+Count                RN 14
+     
+        
+        M_START armVCM4P2_Clip8,r6
+       
+
+        MOV          Count,#8
+loop
+
+        LDMIA        pSrc!,{x0, x1}
+        SUBS         Count,Count, #1          ;// count down
+        LDMIA        pSrc!,{x2, x3}
+        USAT16       x0, #8, x0                 ;// clip two samples to [0,255]
+        USAT16       x1, #8, x1                 ;// clip two samples to [0,255]
+        STRB         x0, [pDst]
+        MOV          x0, x0, LSR #16
+        STRB         x0, [pDst,#1]
+        STRB         x1, [pDst,#2]
+        MOV          x1, x1, LSR #16
+        STRB         x1, [pDst,#3]
+                
+        USAT16       x2, #8, x2                 ;// clip two samples to [0,255]
+        USAT16       x3, #8, x3                 ;// clip two samples to [0,255]
+        STRB         x2, [pDst,#4]
+        MOV          x2, x2, LSR #16
+        STRB         x2, [pDst,#5]
+        STRB         x3, [pDst,#6]
+        MOV          x3, x3, LSR #16
+        STRB         x3, [pDst,#7]
+        ADD          pDst,pDst,step             ;// Increment pDst by step value
+         
+        BGT          loop                       ;// Continue loop until Count reaches 64 
+
+        M_END
+        ENDIF
+        
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
new file mode 100644
index 0000000..9e30900
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
@@ -0,0 +1,398 @@
+;/**
+; * 
+; * File Name:  armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   9641
+; * Date:       Thursday, February 7, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter, intra block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_AC_unsafe
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan 
+; *
+; * 
+; *
+; * 
+; */
+
+
+      INCLUDE omxtypes_s.h
+      INCLUDE armCOMM_s.h
+      INCLUDE armCOMM_BitDec_s.h
+
+
+      M_VARIANTS ARM1136JS
+
+     
+
+
+
+     IF ARM1136JS
+     
+        
+
+
+
+;//Input Arguments
+
+ppBitStream          RN 0
+pBitOffset           RN 1
+pDst                 RN 2
+shortVideoHeader     RN 3
+
+
+;//Local Variables
+
+Return               RN 0
+
+pVlcTableL0L1        RN 4
+pLMAXTableL0L1       RN 4
+pRMAXTableL0L1       RN 4
+pZigzagTable         RN 4
+
+ftype                RN 0
+temp3                RN 4
+temp                 RN 5
+Count                RN 6
+Escape               RN 5
+
+;// armVCM4P2_FillVLDBuffer
+zigzag               RN 0
+storeLevel           RN 1
+temp2                RN 4
+temp1                RN 5
+sign                 RN 5
+Last                 RN 7
+storeRun             RN 14
+
+
+packRetIndex         RN 5
+
+
+markerbit            RN 5
+
+;// Scratch Registers
+
+RBitStream           RN 8
+RBitBuffer           RN 9
+RBitCount            RN 10
+
+T1                   RN 11
+T2                   RN 12
+LR                   RN 14        
+        
+
+
+        M_ALLOC4        pppBitStream,4
+        M_ALLOC4        ppOffset,4
+        M_ALLOC4        pLinkRegister,4       
+        
+        M_START armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+        ;// get the table addresses from stack       
+        M_ARG           ppVlcTableL0L1,4
+        M_ARG           ppLMAXTableL0L1,4
+        M_ARG           ppRMAXTableL0L1,4
+        M_ARG           ppZigzagTable,4
+        
+        ;// Store ALL zeros at pDst
+        
+        MOV             temp1,#0                                        ;// Initialize Count to zero                                
+        MOV             Last,#0
+        M_STR           LR,pLinkRegister                                ;// Store Link Register on Stack
+        MOV             temp2,#0
+        MOV             LR,#0          
+        
+        ;// Initialize the Macro and Store all zeros to pDst 
+  
+        STM             pDst!,{temp2,temp1,Last,LR}                   
+        M_BD_INIT0      ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount  
+        STM             pDst!,{temp2,temp1,Last,LR}
+        M_BD_INIT1      T1, T2, T2
+        STM             pDst!,{temp2,temp1,Last,LR}
+        M_BD_INIT2      T1, T2, T2
+        STM             pDst!,{temp2,temp1,Last,LR}
+        M_STR           ppBitStream,pppBitStream                        ;// Store ppBitstream on stack                         
+        STM             pDst!,{temp2,temp1,Last,LR}
+        M_STR           pBitOffset,ppOffset                             ;// Store pBitOffset on stack
+        STM             pDst!,{temp2,temp1,Last,LR}
+        
+        STM             pDst!,{temp2,temp1,Last,LR}
+        STM             pDst!,{temp2,temp1,Last,LR}
+ 
+        
+        SUB             pDst,pDst,#128                                  ;// Restore pDst
+
+        ;// The armVCM4P2_GetVLCBits begins
+
+getVLCbits
+        
+        M_BD_LOOK8      Escape,7                                        ;// Load Escape Value
+        LSR             Escape,Escape,#25                                                  
+        CMP             Escape,#3                                       ;// check for escape mode
+        MOVNE           ftype,#0
+        BNE             notEscapemode                                   ;// Branch if not in Escape mode 3
+
+        M_BD_VSKIP8     #7,T1
+        CMP             shortVideoHeader,#0                             ;// Check shortVideoHeader flag to know the type of Escape mode
+        BEQ             endFillVLD                                       
+        
+        ;// Escape Mode 4
+
+        M_BD_READ8      Last,1,T1
+        M_BD_READ8      storeRun,6,T1
+        M_BD_READ8      storeLevel,8,T1
+
+           
+        ;// Check whether the Reserved values for Level are used and Exit with an Error Message if it is so
+
+        TEQ             storeLevel,#0
+        TEQNE           storeLevel,#128                    
+        BEQ             ExitError
+
+        ADD             temp2,storeRun,Count
+        CMP             temp2,#64
+        BGE             ExitError                                       ;// error if Count+storeRun >= 64
+        
+        
+        ;// Load address of zigzagTable
+        
+        M_LDR           pZigzagTable,ppZigzagTable                      ;// Loading the Address of Zigzag table
+               
+                
+        ;// armVCM4P2_FillVLDBuffer
+                
+        SXTB            storeLevel,storeLevel                           ;// Sign Extend storeLevel to 32 bits
+                              
+        
+        ;// To Reflect Runlength
+
+        ADD             Count,Count,storeRun
+        LDRB            zigzag,[pZigzagTable,Count]
+        ADD             Count,Count,#1
+        STRH            storeLevel,[pDst,zigzag]                        ;// store Level
+              
+        B               ExitOk
+       
+        
+
+endFillVLD
+        
+               
+        ;// Load Ftype( Escape Mode) value based on the two successive bits in the bitstream
+     
+        M_BD_READ8      temp1,1,T1           
+        CMP             temp1,#0    
+        MOVEQ           ftype,#1
+        BEQ             notEscapemode
+        M_BD_READ8      temp1,1,T1
+        CMP             temp1,#1
+        MOVEQ           ftype,#3
+        MOVNE           ftype,#2
+        
+
+notEscapemode
+
+        ;// Load optimized packed VLC table with last=0 and Last=1
+        
+        M_LDR           pVlcTableL0L1,ppVlcTableL0L1                    ;// Load Combined VLC Table
+                
+       
+        CMP             ftype,#3                                        ;// If ftype >=3 get perform Fixed Length Decoding (Escape Mode 3)
+        BGE             EscapeMode3                                     ;// Else continue normal VLC Decoding
+        
+        ;// Variable lengh decoding, "armUnPackVLC32" 
+        
+        
+        M_BD_VLD        packRetIndex,T1,T2,pVlcTableL0L1,4,2
+        
+        
+        LDR             temp3,=0xFFF
+        
+        CMP             packRetIndex,temp3                              ;// Check for invalid symbol
+        BEQ             ExitError                                       ;// if invalid symbol occurs exit with an error message
+        
+        AND             Last,packRetIndex,#2                            ;// Get Last from packed Index
+              
+         
+        
+
+        LSR             storeRun,packRetIndex,#7                        ;// Get Run Value from Packed index
+        AND             storeLevel,packRetIndex,#0x7c                   ;// storeLevel=packRetIndex[2-6],storeLevel[0-1]=0 
+                                                                        
+     
+        M_LDR           pLMAXTableL0L1,ppLMAXTableL0L1                  ;// Load LMAX table
+              
+       
+        LSR             storeLevel,storeLevel,#2                        ;// Level value
+
+        CMP             ftype,#1                                    
+        BNE             ftype2
+        
+        ;// ftype==1; Escape mode =1
+          
+        
+        ADD            temp1, pLMAXTableL0L1, Last, LSL#4              ;// If the Last=1 add 32 to table address
+        LDRB            temp1,[temp1,storeRun]
+
+       
+        ADD             storeLevel,temp1,storeLevel                     
+
+ftype2
+
+        ;// ftype =2; Escape mode =2
+        
+        M_LDR           pRMAXTableL0L1,ppRMAXTableL0L1                  ;// Load RMAX Table 
+                
+        CMP             ftype,#2
+        BNE             FillVLDL1
+                  
+        ADD            temp1, pRMAXTableL0L1, Last, LSL#4               ;// If Last=1 add 32 to table address
+        SUB             temp2,storeLevel,#1
+        LDRB            temp1,[temp1,temp2]
+
+       
+        ADD             storeRun,storeRun,#1
+        ADD             storeRun,temp1
+        
+FillVLDL1        
+            
+                
+        ;// armVCM4P2_FillVLDBuffer
+
+        M_LDR           pZigzagTable,ppZigzagTable                     ;// Load address of zigzagTable 
+                
+        M_BD_READ8      sign,1,T1
+
+        CMP             sign,#1
+        RSBEQ           storeLevel,storeLevel,#0
+ 
+        ADD             temp1,storeRun,Count                           ;// Exit with an error message if Run + Count exceeds 63
+        CMP             temp1,#64
+        BGE             ExitError
+
+      
+        
+        
+              
+        
+        ;// To Reflect Runlenght
+
+        ADD             Count,Count,storeRun
+ 
+storeLevelL1
+        
+        LDRB            zigzag,[pZigzagTable,Count]
+        CMP             Last,#2                                         ;// Check if the Level val is Last non zero val
+        ADD             Count,Count,#1
+        LSR             Last,Last,#1
+        STRH            storeLevel,[pDst,zigzag]                  
+           
+        BNE             end
+        
+        B               ExitOk
+ 
+
+
+        ;// Fixed Lengh Decoding Escape Mode 3
+
+EscapeMode3
+
+        M_BD_READ8      Last,1,T1
+        M_BD_READ8      storeRun,6,T1
+        
+        ADD             temp2,storeRun,Count                            ;// Exit with an error message if Run + Count exceeds 63
+        CMP             temp2,#64
+        BGE             ExitError
+
+        M_BD_READ8      markerbit,1,T1
+        TEQ             markerbit,#0                                    ;// Exit with an error message if marker bit is zero
+        BEQ             ExitError
+        
+        M_BD_READ16     storeLevel,12,T1
+
+        TST             storeLevel,#0x800                               ;// test if the level is negative
+        SUBNE           storeLevel,storeLevel,#4096
+        CMP             storeLevel,#0
+        CMPNE           storeLevel,#-2048
+        BEQ             ExitError                                       ;// Exit with an error message if Level==0 or  -2048 
+
+        M_LDR           pZigzagTable,ppZigzagTable                      ;// Load address of zigzagTable
+              
+        M_BD_READ8      markerbit,1,T1
+           
+
+        ;// armVCM4P2_FillVLDBuffer ( Sign not used as storeLevel is preprocessed)
+            
+               
+
+        ;// To Reflect Run Length
+
+        ADD             Count,Count,storeRun
+
+
+ 
+storeLevelLast
+        
+        LDRB            zigzag,[pZigzagTable,Count]
+        CMP             Last,#1
+        ADD             Count,Count,#1
+        STRH            storeLevel,[pDst,zigzag]                          
+                
+        BNE             end 
+      
+        B               ExitOk
+        
+end
+
+        CMP             Count,#64                                       ;//Run the Loop untill Count reaches 64
+
+        BLT             getVLCbits
+
+        
+ExitOk
+        ;// Exit When VLC Decoding is done Successfully 
+   
+        ;// Loading ppBitStream and pBitOffset from stack
+        
+        CMP             Last,#1
+        M_LDR           ppBitStream,pppBitStream
+        M_LDR           pBitOffset,ppOffset
+
+        ;//Ending the macro
+
+        M_BD_FINI       ppBitStream,pBitOffset
+             
+        MOVEQ           Return,#OMX_Sts_NoErr
+        MOVNE           Return,#OMX_Sts_Err
+        M_LDR           LR,pLinkRegister                               ;// Load the Link Register Back
+        B               exit2
+
+ExitError
+        ;// Exit When an Error occurs 
+
+        M_LDR           ppBitStream,pppBitStream
+        M_LDR           pBitOffset,ppOffset
+        ;//Ending the macro
+
+        M_BD_FINI       ppBitStream,pBitOffset
+        M_LDR           LR,pLinkRegister
+        MOV             Return,#OMX_Sts_Err
+
+exit2
+       
+
+        M_END
+        ENDIF
+        
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
new file mode 100644
index 0000000..ba4d058
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
@@ -0,0 +1,211 @@
+ /**
+ * 
+ * File Name:  armVCM4P2_Huff_Tables_VLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File:        armVCM4P2_Huff_Tables_VLC.c
+ * Description: Contains all the Huffman tables used in MPEG4 codec
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armCOMM_Bitstream.h"
+
+
+
+
+// Contains optimized and Packed VLC tables with Last=0 and Last=1
+
+//              optimized Packed VLC table Entry Format 
+//              ---------------------------------------
+// 
+//        15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
+//       +------------------------------------------------+
+//       |  Len   |       Run       |     Level    |L | 1 |
+//       +------------------------------------------------+
+//       |                Offset                      | 0 |
+//       +------------------------------------------------+
+// If the table entry is a leaf entry then bit 0 set:
+//    Len    = Number of bits overread  (0 to 7)  3 bits
+//    Run    = RunLength of the Symbol  (0 to 63) 6 bits
+//    Level  = Level of the Symbol      (0 to 31) 5 bits
+//    L      = Last Value of the Symbol (0 or 1)  1 bit
+//
+// If the table entry is an internal node then bit 0 is clear:
+//    Offset = Number of (16-bit) half words from the table
+//             start to the next table node
+//
+// The table is accessed by successive lookup up on the
+// next Step bits of the input bitstream until a leaf node
+// is obtained. The Step sizes are supplied to the VLD macro.
+
+// The VLC tables used for Intra and non inta coefficients in non Escape mode
+// contains symbols with both Last=0 and Last=1.
+// If a symbol is not found in the table it will be coded as 0xFFF
+ 
+
+const OMX_U16 armVCM4P2_InterVlcL0L1[200] = {
+    0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x1b09,
+    0x4009, 0x4009, 0x4009, 0x4009, 0x2109, 0x2109, 0x0209, 0x0011,
+    0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058,
+    0x3fff, 0x3fff, 0x0038, 0x0040, 0x2115, 0x2115, 0x201d, 0x201d,
+    0x2059, 0x2059, 0x2051, 0x2051, 0x1c0d, 0x1b0d, 0x1a0d, 0x190d,
+    0x0911, 0x0811, 0x0711, 0x0611, 0x0511, 0x0319, 0x0219, 0x0121,
+    0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088,
+    0x2061, 0x2061, 0x2129, 0x2129, 0x3709, 0x3709, 0x3809, 0x3809,
+    0x3d0d, 0x3d0d, 0x3e0d, 0x3e0d, 0x3f0d, 0x3f0d, 0x200d, 0x200d,
+    0x0098, 0x00a0, 0x00a8, 0x00b0, 0x0131, 0x0221, 0x0419, 0x0519,
+    0x0619, 0x0a11, 0x1909, 0x1a09, 0x210d, 0x220d, 0x230d, 0x240d,
+    0x250d, 0x260d, 0x270d, 0x280d, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+    0x0049, 0x0041, 0x380d, 0x380d, 0x370d, 0x370d, 0x360d, 0x360d,
+    0x350d, 0x350d, 0x340d, 0x340d, 0x330d, 0x330d, 0x320d, 0x320d,
+    0x00e8, 0x00f0, 0x00f8, 0x0100, 0x310d, 0x310d, 0x2015, 0x2015,
+    0x3609, 0x3609, 0x3509, 0x3509, 0x3409, 0x3409, 0x3309, 0x3309,
+    0x3209, 0x3209, 0x3109, 0x3109, 0x0110, 0x0130, 0x0138, 0x0140,
+    0x0118, 0x0120, 0x0128, 0x100d, 0x3009, 0x3009, 0x2f09, 0x2f09,
+    0x2411, 0x2411, 0x2311, 0x2311, 0x2039, 0x2039, 0x2031, 0x2031,
+    0x0f0d, 0x0e0d, 0x0d0d, 0x0c0d, 0x0b0d, 0x0a0d, 0x090d, 0x0e09,
+    0x0d09, 0x0211, 0x0119, 0x0029, 0x0150, 0x0158, 0x0160, 0x0168,
+    0x280d, 0x280d, 0x270d, 0x270d, 0x260d, 0x260d, 0x250d, 0x250d,
+    0x2c09, 0x2c09, 0xb759, 0xb759, 0x2a09, 0x2a09, 0x2021, 0x2021,
+    0x040d, 0x030d, 0x0b35, 0x010d, 0x0909, 0x0809, 0x0709, 0x0609,
+    0x0111, 0x0019, 0x2509, 0x2509, 0x2409, 0x2409, 0x2309, 0x2309
+};
+
+
+const OMX_U16 armVCM4P2_IntraVlcL0L1[200] = {
+    0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x0f09,
+    0x4009, 0x4009, 0x4009, 0x4009, 0x2011, 0x2011, 0x0109, 0x0019,
+    0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058,
+    0x3fff, 0x3fff, 0x0038, 0x0040, 0x203d, 0x203d, 0x2035, 0x2035,
+    0x20b1, 0x20b1, 0x20a9, 0x20a9, 0x0215, 0x011d, 0x002d, 0x0d09,
+    0x0519, 0x0811, 0x0419, 0x0321, 0x0221, 0x0139, 0x00a1, 0x0099,
+    0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088,
+    0x20b9, 0x20b9, 0x20c1, 0x20c1, 0x2141, 0x2141, 0x2911, 0x2911,
+    0x2315, 0x2315, 0x2415, 0x2415, 0x2f0d, 0x2f0d, 0x300d, 0x300d,
+    0x0098, 0x00a0, 0x00a8, 0x00b0, 0x00c9, 0x00d1, 0x00d9, 0x0149,
+    0x0619, 0x0151, 0x0229, 0x0719, 0x0e09, 0x0045, 0x0515, 0x0615,
+    0x110d, 0x120d, 0x130d, 0x140d, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+    0x0091, 0x0089, 0x2e0d, 0x2e0d, 0x2d0d, 0x2d0d, 0x2c0d, 0x2c0d,
+    0x2b0d, 0x2b0d, 0x2a0d, 0x2a0d, 0x2115, 0x2115, 0x2025, 0x2025,
+    0x00e8, 0x00f0, 0x00f8, 0x0100, 0x2c09, 0x2c09, 0x2b09, 0x2b09,
+    0x2711, 0x2711, 0x2611, 0x2611, 0x2511, 0x2511, 0x2319, 0x2319,
+    0x2219, 0x2219, 0x2131, 0x2131, 0x0110, 0x0130, 0x0138, 0x0140,
+    0x0118, 0x0120, 0x0128, 0x080d, 0x2129, 0x2129, 0x2081, 0x2081,
+    0x2411, 0x2411, 0x2079, 0x2079, 0x2071, 0x2071, 0x2069, 0x2069,
+    0x1bb5, 0x060d, 0x001d, 0xd3f9, 0x0909, 0x0809, 0x090d, 0x0311,
+    0x0121, 0x0061, 0x0059, 0x0051, 0x0150, 0x0158, 0x0160, 0x0168,
+    0x240d, 0x240d, 0x230d, 0x230d, 0x2609, 0x2609, 0x250d, 0x250d,
+    0x2709, 0x2709, 0x2211, 0x2211, 0x2119, 0x2119, 0x2049, 0x2049,
+    0x0015, 0x0509, 0x020d, 0x010d, 0x0409, 0x0309, 0x0041, 0x0039,
+    0x0111, 0x0031, 0x2209, 0x2209, 0x2029, 0x2029, 0x2021, 0x2021
+};
+
+const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64] = {
+    0x0020, 0x000b, 0x2009, 0x2009, 0x2007, 0x2007, 0x2001, 0x2001,
+    0x4005, 0x4005, 0x4005, 0x4005, 0x4003, 0x4003, 0x4003, 0x4003,
+    0x0028, 0x000f, 0x200d, 0x200d, 0x0030, 0x0013, 0x2011, 0x2011,
+    0x0038, 0x0017, 0x2015, 0x2015, 0x3fff, 0x3fff, 0x2019, 0x2019,
+
+	0x0020, 0x0009, 0x2007, 0x2007, 0x4005, 0x4005, 0x4005, 0x4005,
+    0x4003, 0x4003, 0x4003, 0x4003, 0x4001, 0x4001, 0x4001, 0x4001,
+    0x0028, 0x000d, 0x200b, 0x200b, 0x0030, 0x0011, 0x200f, 0x200f,
+    0x0038, 0x0015, 0x2013, 0x2013, 0x1fff, 0x0019, 0x2017, 0x2017
+};
+
+
+const OMX_U16 armVCM4P2_aVlcMVD[124] = {
+    0x0010, 0x00f0, 0x0043, 0x003f, 0x4041, 0x4041, 0x4041, 0x4041,
+    0x0018, 0x00d8, 0x0047, 0x003b, 0x0020, 0x0080, 0x00a8, 0x00d0,
+    0x0028, 0x0048, 0x0070, 0x0078, 0x1fff, 0x0030, 0x0038, 0x0040,
+    0x0081, 0x0001, 0x007f, 0x0003, 0x207d, 0x207d, 0x2005, 0x2005,
+    0x207b, 0x207b, 0x2007, 0x2007, 0x0050, 0x0058, 0x0060, 0x0068,
+    0x2079, 0x2079, 0x2009, 0x2009, 0x2077, 0x2077, 0x200b, 0x200b,
+    0x2075, 0x2075, 0x200d, 0x200d, 0x2073, 0x2073, 0x200f, 0x200f,
+    0x0071, 0x0011, 0x006f, 0x0013, 0x006d, 0x0015, 0x006b, 0x0017,
+    0x0088, 0x0090, 0x0098, 0x00a0, 0x0069, 0x0019, 0x0067, 0x001b,
+    0x0065, 0x001d, 0x0063, 0x001f, 0x0061, 0x0021, 0x005f, 0x0023,
+    0x005d, 0x0025, 0x005b, 0x0027, 0x00b0, 0x00b8, 0x00c0, 0x00c8,
+    0x0059, 0x0029, 0x0057, 0x002b, 0x2055, 0x2055, 0x202d, 0x202d,
+    0x2053, 0x2053, 0x202f, 0x202f, 0x2051, 0x2051, 0x2031, 0x2031,
+    0x204f, 0x204f, 0x2033, 0x2033, 0x00e0, 0x00e8, 0x0049, 0x0039,
+    0x204d, 0x204d, 0x2035, 0x2035, 0x204b, 0x204b, 0x2037, 0x2037,
+    0x2045, 0x2045, 0x203d, 0x203d
+};
+
+/* LMAX table for non Inter (Last == 0 and Last=1)
+   Level - 1 Indexed
+   padded armVCM4P2_InterL0L1LMAX[27-31] with zeros to acess entries for Last=1 effectively
+
+*/
+const OMX_U8 armVCM4P2_InterL0L1LMAX[73] = 
+{
+   12,  6,  4,  3,  3,  3,  3,  2, 
+    2,  2,  2,  1,  1,  1,  1,  1,
+    1,  1,  1,  1,  1,  1,  1,  1,
+    1,  1,  1,  0,  0,  0,  0,  0,
+    3,  2,  1,  1,  1,  1,  1,  1, 
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1
+};
+
+/* RMAX table for non Inter (Last == 0 and Last=1)
+   Level - 1 Indexed 
+ padded armVCM4P2_InterL0L1RMAX[12-31] with zeros to access entries for Last=1 table effectively */
+
+
+const OMX_U8 armVCM4P2_InterL0L1RMAX[35] = 
+{
+   26, 10,  6,  2,  1,  1,   
+    0,  0,  0,  0,  0,  0,
+	0,	0,	0,	0,	0,	0,
+	0,	0,	0,	0,	0,	0,
+	0,	0,	0,	0,
+    0,  0,  0,  0,  40,  1,  0
+};
+
+/* LMAX table for non Intra (Last == 0 and Last=1)
+   Level - 1 Indexed
+   padded armVCM4P2_IntraL0L1LMAX[15-31] with zeros to acess entries for Last=1 effectively
+
+*/
+const OMX_U8 armVCM4P2_IntraL0L1LMAX[53] = 
+{
+   27, 10,  5,  4,  3,  3,  3,  
+    3,  2,  2,  1,  1,  1,  1,  1,	0,
+	0,	0,	0,	0,	0,	0,	0,	0,
+	0,	0,	0,	0,	0,	0,	0,	0,
+
+	8,  3,  2,  2,  2,  2,  2,  1, 
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1
+};
+
+
+/* RMAX table for non Inter (Last == 0 and Last=1)
+   Level - 1 Indexed 
+ padded armVCM4P2_IntraL0L1RMAX[27-31] with zeros to access entries for Last=1 table effectively */
+
+
+const OMX_U8 armVCM4P2_IntraL0L1RMAX[40] =
+{
+   14,  9,  7,  3,  2,  1,	1,  
+    1,  1,  1,  0,  0,  0, 	0,  
+    0,  0,  0,  0,  0,  0,  0,  
+    0,  0,  0,  0,  0,  0,  0,
+	0,	0,	0,	0,
+	
+	20,  6,  1,  0,  0,  0,  0,  0
+
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
new file mode 100644
index 0000000..25cf8db
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
@@ -0,0 +1,75 @@
+ /**
+ * 
+ * File Name:  armVCM4P2_Lookup_Tables.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File:        armVCM4P2_Lookup_Tables.c
+ * Description: Contains all the Lookup tables used in MPEG4 codec
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+    /* * Table Entries contain Dc Scaler values
+       * armVCM4P2_DCScaler[i]= 8           for i=1  to  4 and i=33 to 36
+       *                      = 2*i         for i=5  to  8
+       *                      = i+8         for i=9  to  25
+       *                      = 2*i-16      for i=26 to  31
+       *                      = (i-32+13)/2 for i=37 to  59
+       *                      = i-6-32      for i=60 to  63
+       *                      = 255         for i=0 and i=32
+       */
+       
+const OMX_U8 armVCM4P2_DCScaler[64]={
+	0xff, 0x8,  0x8,  0x8,  0x8,  0xa,  0xc,  0xe,  
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 
+    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+    0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e,
+    0xff, 0x8,  0x8,  0x8,  0x8,  0x9,  0x9,  0xa,  
+    0xa,  0xb,  0xb,  0xc,  0xc,  0xd,  0xd,  0xe,  
+    0xe,  0xf,  0xf,  0x10, 0x10, 0x11, 0x11, 0x12, 
+    0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
+
+};
+
+              
+     /*  Table Entries Contain reciprocal of 1 to 63
+      *  armVCM4P2_Reciprocal_QP_S16[i]=round(32767/i)
+      *  armVCM4P2_Reciprocal_QP_S16[0]= 0
+      */
+
+const OMX_S16 armVCM4P2_Reciprocal_QP_S16[64]={
+	0x0000,0x7fff,0x4000,0x2aaa,0x2000,0x1999,0x1555,0x1249,
+    0x1000,0x0e39,0x0ccd,0x0ba3,0x0aab,0x09d9,0x0925,0x0888,
+    0x0800,0x0787,0x071c,0x06bd,0x0666,0x0618,0x05d1,0x0591,
+    0x0555,0x051f,0x04ec,0x04be,0x0492,0x046a,0x0444,0x0421,
+    0x0400,0x03e1,0x03c4,0x03a8,0x038e,0x0376,0x035e,0x0348,
+    0x0333,0x031f,0x030c,0x02fa,0x02e9,0x02d8,0x02c8,0x02b9,
+    0x02ab,0x029d,0x028f,0x0282,0x0276,0x026a,0x025f,0x0254,
+    0x0249,0x023f,0x0235,0x022b,0x0222,0x0219,0x0211,0x0208
+	   
+};
+     
+      /* Table Entries Contain reciprocal of 1 to 63
+       * armVCM4P2_Reciprocal_QP_S32[i]=round(131071/i)
+       * armVCM4P2_Reciprocal_QP_S32[0]= 0
+       */
+
+const OMX_S32 armVCM4P2_Reciprocal_QP_S32[64]={
+	0x00000000,0x0001ffff,0x00010000,0x0000aaaa, 0x00008000, 0x00006666, 0x00005555, 0x00004924,
+    0x00004000,0x000038e3,0x00003333,0x00002e8c, 0x00002aab, 0x00002762, 0x00002492, 0x00002222,
+    0x00002000,0x00001e1e,0x00001c72,0x00001af2, 0x0000199a, 0x00001861, 0x00001746, 0x00001643,
+    0x00001555,0x0000147b,0x000013b1,0x000012f6, 0x00001249, 0x000011a8, 0x00001111, 0x00001084,
+    0x00001000,0x00000f84,0x00000f0f,0x00000ea1, 0x00000e39, 0x00000dd6, 0x00000d79, 0x00000d21,
+    0x00000ccd,0x00000c7d,0x00000c31,0x00000be8, 0x00000ba3, 0x00000b61, 0x00000b21, 0x00000ae5,
+    0x00000aab,0x00000a73,0x00000a3d,0x00000a0a, 0x000009d9, 0x000009a9, 0x0000097b, 0x0000094f,
+    0x00000925,0x000008fb,0x000008d4,0x000008ae, 0x00000889, 0x00000865, 0x00000842, 0x00000820
+	
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
new file mode 100644
index 0000000..3f92d85
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
@@ -0,0 +1,104 @@
+;//
+;// 
+;// File Name:  armVCM4P2_SetPredDir_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+; **
+; * Function: armVCM4P2_SetPredDir
+; *
+; * Description:
+; * Performs detecting the prediction direction
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] blockIndex  block index indicating the component type and
+; *                          position as defined in subclause 6.1.3.8, of ISO/IEC
+; *                          14496-2. Furthermore, indexes 6 to 9 indicate the
+; *                          alpha blocks spatially corresponding to luminance
+; *                          blocks 0 to 3 in the same macroblock.
+; * [in] pCoefBufRow pointer to the coefficient row buffer
+; * [in] pQpBuf      pointer to the quantization parameter buffer
+; * [out]predQP      quantization parameter of the predictor block
+; * [out]predDir     indicates the prediction direction which takes one
+; *                  of the following values:
+; *                  OMX_VC_HORIZONTAL    predict horizontally
+; *                  OMX_VC_VERTICAL      predict vertically
+; *
+; * Return Value:
+; * Standard OMXResult result. See enumeration for possible result codes.
+; *
+; */
+
+       INCLUDE omxtypes_s.h
+       INCLUDE armCOMM_s.h
+       INCLUDE omxVC_s.h
+
+
+       M_VARIANTS ARM1136JS
+
+
+       IF ARM1136JS
+ 
+;// Input Arguments
+BlockIndex         RN 0
+pCoefBufRow        RN 1
+pCoefBufCol        RN 2
+predDir            RN 3
+predQP             RN 4
+pQpBuf             RN 5
+
+;// Local Variables
+
+Return             RN 0
+blockDCLeft        RN 6  
+blockDCTop         RN 7
+blockDCTopLeft     RN 8
+temp1              RN 9
+temp2              RN 14
+
+       M_START    armVCM4P2_SetPredDir,r9
+
+       M_ARG       ppredQP,4
+       M_ARG       ppQpBuf,4
+    
+       LDRH        blockDCTopLeft,[pCoefBufRow,#-16]
+       LDRH        blockDCLeft,[pCoefBufCol]
+       
+       TEQ         BlockIndex,#3
+       LDREQH      blockDCTop,[pCoefBufCol,#-16]
+       LDRNEH      blockDCTop,[pCoefBufRow]
+             
+       SUBS        temp1,blockDCLeft,blockDCTopLeft
+       RSBLT       temp1,temp1,#0
+       SUBS        temp2,blockDCTopLeft,blockDCTop
+       RSBLT       temp2,temp2,#0
+      
+       M_LDR       pQpBuf,ppQpBuf
+       M_LDR       predQP,ppredQP
+       CMP         temp1,temp2
+       MOV         temp2,#OMX_VC_VERTICAL
+       LDRLTB      temp1,[pQpBuf,#1]
+       STRLT       temp2,[predDir]
+       STRLT       temp1,[predQP]
+       MOV         temp2,#OMX_VC_HORIZONTAL           
+       LDRGEB      temp1,[pQpBuf]
+       STRGE       temp2,[predDir]
+       MOV         Return,#OMX_Sts_NoErr
+       STRGE       temp1,[predQP] 
+
+         
+    
+       M_END
+ 
+       ENDIF
+
+       END    
+    
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
new file mode 100644
index 0000000..ed17f9b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
@@ -0,0 +1,61 @@
+/**
+ * 
+ * File Name:  armVCM4P2_Zigzag_Tables.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File:        armVCM4P2_ZigZag_Tables.c
+ * Description: Contains the zigzag tables
+ *
+ */
+
+#include "omxtypes.h"
+
+/* Contains Double the values in the reference Zigzag Table
+ * Contains Classical,Vetical and Horizontal Zigzagscan tables in one array  
+ */
+
+const OMX_U8 armVCM4P2_aClassicalZigzagScan [192] = 
+{
+     0,  2,  16, 32,  18,  4,  6, 20,
+    34, 48, 64, 50, 36, 22,  8,  10,
+    24, 38, 52, 66, 80, 96, 82, 68,
+    54, 40, 26,  12,  14, 28, 42, 56, 
+    70, 84, 98, 112, 114, 100, 86, 72,
+    58, 44, 30, 46, 60, 74, 88, 102,
+    116, 118, 104, 90, 76, 62, 78, 92,
+    106, 120, 122, 104, 94, 110, 124, 126,
+
+	0,  16, 32, 48,  2,  18,  4, 20,
+    34, 50, 64, 80, 96, 112, 114, 98,
+    82, 66, 52, 36,  6, 22,  8, 24,
+    38, 54, 68, 84, 100, 116, 70, 86,
+    102, 118, 40, 56,  10, 26,  12, 28,
+    42, 58, 72, 88, 104, 120, 74, 90, 
+    106, 122, 44, 60,  14, 30, 46, 62,
+    76, 92, 108, 124, 78, 94, 110, 126,
+
+    0,  2,  4,  6,  16,  18, 32, 34,
+    20, 22,  8,  10,  12,  14, 30, 28,
+    26, 24, 38, 36, 48, 50, 64, 66,
+    52, 54, 40, 42, 44, 46, 56, 58,
+    60, 62, 68, 70, 80, 82, 96, 98,
+    84, 86, 72, 74, 76, 78, 88, 90, 
+    92, 94, 100, 102, 112, 114, 116, 118,
+    104, 106, 108, 110, 120, 122, 124, 126
+
+
+};
+
+
+
+
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
new file mode 100644
index 0000000..b63d295
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
@@ -0,0 +1,102 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_DecodeBlockCoef_Inter.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description: 
+ * Contains modules for inter reconstruction
+ * 
+ */
+ 
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Inter
+ *
+ * Description:
+ * Decodes the INTER block coefficients. Inverse quantization, inversely zigzag
+ * positioning and IDCT, with appropriate clipping on each step, are performed
+ * on the coefficients. The results (residuals) are placed in a contiguous array
+ * of 64 elements. For INTER block, the output buffer holds the residuals for
+ * further reconstruction.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte in
+ *								the bit stream buffer. There is no boundary
+ *								check for the bit stream buffer.
+ * [in]	pBitOffset		pointer to the bit position in the byte pointed
+ *								to by *ppBitStream. *pBitOffset is valid within
+ *								[0-7]
+ * [in]	QP				quantization parameter
+ * [in] shortVideoHeader    a flag indicating presence of short_video_header;
+ *                           shortVideoHeader==1 indicates using quantization method defined in short
+ *                           video header mode, and shortVideoHeader==0 indicates normail quantization method.
+ * [out] ppBitStream 	*ppBitStream is updated after the block is decoded, so that it points to the
+ *                      current byte in the bit stream buffer.
+ * [out] pBitOffset		*pBitOffset is updated so that it points to the current bit position in the
+ *                      byte pointed by *ppBitStream
+ * [out] pDst			pointer to the decoded residual buffer (a contiguous array of 64 elements of
+ *                      OMX_S16 data type). Must be 16-byte aligned.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ *   - At least one of the following pointers is Null: ppBitStream, *ppBitStream, pBitOffset , pDst
+ *   - At least one of the below case:
+ *   - *pBitOffset exceeds [0,7], QP <= 0;
+ *	 - pDst not 16-byte aligned
+ * OMX_Sts_Err - status error
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Inter(
+     const OMX_U8 ** ppBitStream,
+     OMX_INT * pBitOffset,
+     OMX_S16 * pDst,
+     OMX_INT QP,
+     OMX_INT shortVideoHeader
+)
+{
+    /* 64 elements are needed but to align it to 16 bytes need
+    15 more elements of padding */
+    OMX_S16 tempBuf[79];
+    OMX_S16 *pTempBuf1;
+    OMXResult errorCode;
+    /* Aligning the local buffers */
+    pTempBuf1 = armAlignTo16Bytes(tempBuf);
+    
+    
+    /* VLD and zigzag */
+    errorCode = omxVCM4P2_DecodeVLCZigzag_Inter(ppBitStream, pBitOffset, 
+                                        pTempBuf1,shortVideoHeader);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Dequantization */
+    errorCode = omxVCM4P2_QuantInvInter_I(
+     pTempBuf1,
+     QP);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Inverse transform */
+    errorCode = omxVCM4P2_IDCT8x8blk(pTempBuf1, pDst);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+	    
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
new file mode 100644
index 0000000..c609a60
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
@@ -0,0 +1,208 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_DecodeBlockCoef_Intra.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description: 
+ * Contains modules for intra reconstruction
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Intra
+ *
+ * Description:
+ * Decodes the INTRA block coefficients. Inverse quantization, inversely zigzag
+ * positioning, and IDCT, with appropriate clipping on each step, are performed
+ * on the coefficients. The results are then placed in the output frame/plane on
+ * a pixel basis. For INTRA block, the output values are clipped to [0, 255] and
+ * written to corresponding block buffer within the destination plane.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte in
+ *								the bit stream buffer. There is no boundary
+ *								check for the bit stream buffer.
+ * [in]	pBitOffset		pointer to the bit position in the byte pointed
+ *								to by *ppBitStream. *pBitOffset is valid within
+ *								[0-7].
+ * [in]	step			width of the destination plane
+ * [in/out]	pCoefBufRow		[in]  pointer to the coefficient row buffer
+ *                        [out] updated coefficient rwo buffer
+ * [in/out]	pCoefBufCol		[in]  pointer to the coefficient column buffer
+ *                        [out] updated coefficient column buffer
+ * [in]	curQP			quantization parameter of the macroblock which
+ *								the current block belongs to
+ * [in]	pQpBuf		 Pointer to a 2-element QP array. pQpBuf[0] holds the QP of the 8x8 block left to
+ *                   the current block(QPa). pQpBuf[1] holds the QP of the 8x8 block just above the
+ *                   current block(QPc).
+ *                   Note, in case the corresponding block is out of VOP bound, the QP value will have
+ *                   no effect to the intra-prediction process. Refer to subclause  "7.4.3.3 Adaptive
+ *                   ac coefficient prediction" of ISO/IEC 14496-2(MPEG4 Part2) for accurate description.
+ * [in]	blockIndex		block index indicating the component type and
+ *								position as defined in subclause 6.1.3.8,
+ *								Figure 6-5 of ISO/IEC 14496-2. 
+ * [in]	intraDCVLC		a code determined by intra_dc_vlc_thr and QP.
+ *								This allows a mechanism to switch between two VLC
+ *								for coding of Intra DC coefficients as per Table
+ *								6-21 of ISO/IEC 14496-2. 
+ * [in]	ACPredFlag		a flag equal to ac_pred_flag (of luminance) indicating
+ *								if the ac coefficients of the first row or first
+ *								column are differentially coded for intra coded
+ *								macroblock.
+ * [in] shortVideoHeader    a flag indicating presence of short_video_header;
+ *                           shortVideoHeader==1 selects linear intra DC mode,
+ *							and shortVideoHeader==0 selects nonlinear intra DC mode.
+ * [out]	ppBitStream		*ppBitStream is updated after the block is
+ *								decoded, so that it points to the current byte
+ *								in the bit stream buffer
+ * [out]	pBitOffset		*pBitOffset is updated so that it points to the
+ *								current bit position in the byte pointed by
+ *								*ppBitStream
+ * [out]	pDst			pointer to the block in the destination plane.
+ *								pDst should be 16-byte aligned.
+ * [out]	pCoefBufRow		pointer to the updated coefficient row buffer.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ *   -	At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset,
+ *                                                      pCoefBufRow, pCoefBufCol, pQPBuf, pDst.
+ *      or
+ *   -  At least one of the below case: *pBitOffset exceeds [0,7], curQP exceeds (1, 31),
+ *      blockIndex exceeds [0,9], step is not the multiple of 8, intraDCVLC is zero while
+ *      blockIndex greater than 5.
+ *      or
+ *   -	pDst is not 16-byte aligned
+ * OMX_Sts_Err - status error
+ *
+ */
+
+OMXResult omxVCM4P2_DecodeBlockCoef_Intra(
+     const OMX_U8 ** ppBitStream,
+     OMX_INT *pBitOffset,
+     OMX_U8 *pDst,
+     OMX_INT step,
+     OMX_S16 *pCoefBufRow,
+     OMX_S16 *pCoefBufCol,
+     OMX_U8 curQP,
+     const OMX_U8 *pQPBuf,
+     OMX_INT blockIndex,
+     OMX_INT intraDCVLC,
+     OMX_INT ACPredFlag,
+	 OMX_INT shortVideoHeader
+ )
+{
+    OMX_S16 tempBuf1[79], tempBuf2[79];
+    OMX_S16 *pTempBuf1, *pTempBuf2;
+    OMX_INT predDir, predACDir;
+    OMX_INT  predQP;
+    OMXVCM4P2VideoComponent videoComp;
+    OMXResult errorCode;
+    
+    
+    /* Aligning the local buffers */
+    pTempBuf1 = armAlignTo16Bytes(tempBuf1);
+    pTempBuf2 = armAlignTo16Bytes(tempBuf2);
+    
+    /* Setting the AC prediction direction and prediction direction */
+    armVCM4P2_SetPredDir(
+        blockIndex,
+        pCoefBufRow,
+        pCoefBufCol,
+        &predDir,
+        &predQP,
+        pQPBuf);
+
+    predACDir = predDir;
+
+    
+    if (ACPredFlag == 0)
+    {
+        predACDir = OMX_VC_NONE;
+    }
+
+    /* Setting the videoComp */
+    if (blockIndex <= 3)
+    {
+        videoComp = OMX_VC_LUMINANCE;
+    }
+    else
+    {
+        videoComp = OMX_VC_CHROMINANCE;
+    }
+    
+
+    /* VLD and zigzag */
+    if (intraDCVLC == 1)
+    {
+        errorCode = omxVCM4P2_DecodeVLCZigzag_IntraDCVLC(
+            ppBitStream,
+            pBitOffset,
+            pTempBuf1,
+            predACDir,
+            shortVideoHeader,
+            videoComp);
+        armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    }
+    else
+    {
+        errorCode = omxVCM4P2_DecodeVLCZigzag_IntraACVLC(
+            ppBitStream,
+            pBitOffset,
+            pTempBuf1,
+            predACDir,
+            shortVideoHeader);
+        armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    }
+
+    /* AC DC prediction */
+    errorCode = omxVCM4P2_PredictReconCoefIntra(
+        pTempBuf1,
+        pCoefBufRow,
+        pCoefBufCol,
+        curQP,
+        predQP,
+        predDir,
+        ACPredFlag,
+        videoComp);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Dequantization */
+    errorCode = omxVCM4P2_QuantInvIntra_I(
+     pTempBuf1,
+     curQP,
+     videoComp,
+     shortVideoHeader);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Inverse transform */
+    errorCode = omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf2);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Placing the linear array into the destination plane and clipping
+       it to 0 to 255 */
+    
+	armVCM4P2_Clip8(pTempBuf2,pDst,step);
+	
+	
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
new file mode 100644
index 0000000..a1861da
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
@@ -0,0 +1,364 @@
+; **********
+; * 
+; * File Name:  omxVCM4P2_DecodePadMV_PVOP_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   9641
+; * Date:       Thursday, February 7, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; * 
+; **
+; * Function: omxVCM4P2_DecodePadMV_PVOP
+; *
+; * Description:
+; * Decodes and pads four motion vectors of the non-intra macroblock in P-VOP.
+; * The motion vector padding process is specified in subclause 7.6.1.6 of
+; * ISO/IEC 14496-2.
+; *
+; * Remarks:
+; *
+; *
+; * Parameters:
+; * [in]    ppBitStream        pointer to the pointer to the current byte in
+; *                            the bit stream buffer
+; * [in]    pBitOffset         pointer to the bit position in the byte pointed
+; *                            to by *ppBitStream. *pBitOffset is valid within
+; *                            [0-7].
+; * [in]    pSrcMVLeftMB       pointers to the motion vector buffers of the
+; *                           macroblocks specially at the left side of the current macroblock
+; *                     respectively.
+; * [in]    pSrcMVUpperMB      pointers to the motion vector buffers of the
+; *                     macroblocks specially at the upper side of the current macroblock
+; *                     respectively.
+; * [in]    pSrcMVUpperRightMB pointers to the motion vector buffers of the
+; *                     macroblocks specially at the upper-right side of the current macroblock
+; *                     respectively.
+; * [in]    fcodeForward       a code equal to vop_fcode_forward in MPEG-4
+; *                     bit stream syntax
+; * [in]    MBType         the type of the current macroblock. If MBType
+; *                     is not equal to OMX_VC_INTER4V, the destination
+; *                     motion vector buffer is still filled with the
+; *                     same decoded vector.
+; * [out]   ppBitStream         *ppBitStream is updated after the block is decoded,
+; *                     so that it points to the current byte in the bit
+; *                     stream buffer
+; * [out]   pBitOffset         *pBitOffset is updated so that it points to the
+; *                     current bit position in the byte pointed by
+; *                     *ppBitStream
+; * [out]   pDstMVCurMB         pointer to the motion vector buffer of the current
+; *                     macroblock which contains four decoded motion vectors
+; *
+; * Return Value:
+; * OMX_Sts_NoErr -no error
+; * 
+; *                     
+; * OMX_Sts_Err - status error
+; *
+; *
+     
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        INCLUDE armCOMM_BitDec_s.h
+        INCLUDE omxVC_s.h
+        
+       M_VARIANTS ARM1136JS
+       
+                
+
+
+        IF ARM1136JS
+
+;//Input Arguments
+
+ppBitStream           RN 0
+pBitOffset            RN 1
+pSrcMVLeftMB          RN 2
+pSrcMVUpperMB         RN 3
+pSrcMVUpperRightMB    RN 4
+pDstMVCurMB           RN 5
+fcodeForward          RN 6
+MBType                RN 7
+
+;//Local Variables
+
+zero                  RN 4
+one                   RN 4
+scaleFactor           RN 1
+
+
+Return                RN 0
+
+VlcMVD                RN 0
+index                 RN 4
+Count                 RN 7
+
+mvHorData             RN 4
+mvHorResidual         RN 0
+
+mvVerData             RN 4             
+mvVerResidual         RN 0
+
+temp                  RN 1
+
+temp1                 RN 3
+High                  RN 4
+Low                   RN 2
+Range                 RN 1
+
+BlkCount              RN 14
+
+diffMVdx              RN 0
+diffMVdy              RN 1
+
+;// Scratch Registers
+
+RBitStream            RN 8
+RBitCount             RN 9
+RBitBuffer            RN 10
+
+T1                    RN 11
+T2                    RN 12
+LR                    RN 14
+
+       IMPORT          armVCM4P2_aVlcMVD
+       IMPORT          omxVCM4P2_FindMVpred
+
+       ;// Allocate stack memory        
+       
+       M_ALLOC4        ppDstMVCurMB,4
+       M_ALLOC4        pDstMVPredME,4
+       M_ALLOC4        pBlkCount,4
+       
+       M_ALLOC4        pppBitStream,4
+       M_ALLOC4        ppBitOffset,4
+       M_ALLOC4        ppSrcMVLeftMB,4
+       M_ALLOC4        ppSrcMVUpperMB,4
+       
+       M_ALLOC4        pdiffMVdx,4
+       M_ALLOC4        pdiffMVdy,4
+       M_ALLOC4        pHigh,4
+       
+              
+
+
+       M_START   omxVCM4P2_DecodePadMV_PVOP,r11
+       
+       M_ARG           pSrcMVUpperRightMBonStack,4           ;// pointer to  pSrcMVUpperRightMB on stack
+       M_ARG           pDstMVCurMBonStack,4                  ;// pointer to pDstMVCurMB on stack
+       M_ARG           fcodeForwardonStack,4                 ;// pointer to fcodeForward on stack 
+       M_ARG           MBTypeonStack,4                       ;// pointer to MBType on stack
+
+      
+       
+       
+       
+       ;// Initializing the BitStream Macro
+
+       M_BD_INIT0      ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount
+       M_LDR           MBType,MBTypeonStack                  ;// Load MBType from stack
+       M_LDR           pDstMVCurMB,pDstMVCurMBonStack        ;// Load pDstMVCurMB from stack
+       MOV             zero,#0
+
+       TEQ             MBType,#OMX_VC_INTRA                  ;// Check if MBType=OMX_VC_INTRA
+       TEQNE           MBType,#OMX_VC_INTRA_Q                ;// check if MBType=OMX_VC_INTRA_Q
+       STREQ           zero,[pDstMVCurMB]
+       M_BD_INIT1      T1, T2, T2
+       STREQ           zero,[pDstMVCurMB,#4]
+       M_BD_INIT2      T1, T2, T2
+       STREQ           zero,[pDstMVCurMB,#4]
+       MOVEQ           Return,#OMX_Sts_NoErr
+       MOV             BlkCount,#0
+       STREQ           zero,[pDstMVCurMB,#4]
+       
+       BEQ             ExitOK
+
+       TEQ             MBType,#OMX_VC_INTER4V                ;// Check if MBType=OMX_VC_INTER4V
+       TEQNE           MBType,#OMX_VC_INTER4V_Q              ;// Check if MBType=OMX_VC_INTER4V_Q
+       MOVEQ           Count,#4
+
+       TEQ             MBType,#OMX_VC_INTER                  ;// Check if MBType=OMX_VC_INTER
+       TEQNE           MBType,#OMX_VC_INTER_Q                ;// Check if MBType=OMX_VC_INTER_Q
+       MOVEQ           Count,#1
+       
+       M_LDR           fcodeForward,fcodeForwardonStack      ;// Load fcodeForward  from stack
+
+       ;// Storing the values temporarily on stack
+
+       M_STR           ppBitStream,pppBitStream              
+       M_STR           pBitOffset,ppBitOffset
+            
+
+       SUB             temp,fcodeForward,#1                  ;// temp=fcodeForward-1
+       MOV             one,#1
+       M_STR           pSrcMVLeftMB,ppSrcMVLeftMB
+       LSL             scaleFactor,one,temp                  ;// scaleFactor=1<<(fcodeForward-1)
+       M_STR           pSrcMVUpperMB,ppSrcMVUpperMB
+       LSL             scaleFactor,scaleFactor,#5            
+       M_STR           scaleFactor,pHigh                     ;// [pHigh]=32*scaleFactor
+              
+       ;// VLD Decoding
+
+
+Loop
+
+       LDR             VlcMVD, =armVCM4P2_aVlcMVD        ;// Load the optimized MVD VLC table
+
+       ;// Horizontal Data and Residual calculation
+
+       LDR             temp,=0xFFF                           
+       M_BD_VLD        index,T1,T2,VlcMVD,3,2                ;// variable lenght decoding using the macro
+      
+       TEQ             index,temp
+       BEQ             ExitError                             ;// Exit with an Error Message if the decoded symbol is an invalied symbol 
+       
+       SUB             mvHorData,index,#32                   ;// mvHorData=index-32             
+       MOV             mvHorResidual,#1                      ;// mvHorResidual=1
+       CMP             fcodeForward,#1
+       TEQNE           mvHorData,#0
+       MOVEQ           diffMVdx,mvHorData                    ;// if scaleFactor=1(fcodeForward=1) or mvHorData=0 diffMVdx=mvHorData         
+       BEQ             VerticalData
+       
+       SUB             temp,fcodeForward,#1
+       M_BD_VREAD8     mvHorResidual,temp,T1,T2              ;// get mvHorResidual from bitstream if fcodeForward>1 and mvHorData!=0              
+       
+       CMP             mvHorData,#0
+       RSBLT           mvHorData,mvHorData,#0                ;// mvHorData=abs(mvHorData)
+       SUB             mvHorResidual,mvHorResidual,fcodeForward
+       SMLABB          diffMVdx,mvHorData,fcodeForward,mvHorResidual ;// diffMVdx=abs(mvHorData)*fcodeForward+mvHorResidual-fcodeForward
+       ADD             diffMVdx,diffMVdx,#1
+       RSBLT           diffMVdx,diffMVdx,#0
+       
+       ;// Vertical Data and Residual calculation
+
+VerticalData
+
+       M_STR           diffMVdx,pdiffMVdx                    ;// Store the diffMVdx on stack
+       LDR             VlcMVD, =armVCM4P2_aVlcMVD        ;// Loading the address of optimized VLC tables
+
+       LDR             temp,=0xFFF
+       M_BD_VLD        index,T1,T2,VlcMVD,3,2                ;// VLC decoding using the macro
+       
+       TEQ             index,temp
+       BEQ             ExitError                             ;// Exit with an Error Message if an Invalied Symbol occurs
+       
+       SUB             mvVerData,index,#32                   ;// mvVerData=index-32             
+       MOV             mvVerResidual,#1     
+       CMP             fcodeForward,#1
+       TEQNE           mvVerData,#0
+       MOVEQ           diffMVdy,mvVerData                    ;// diffMVdy = mvVerData if scaleFactor=1(fcodeForward=1) or mvVerData=0
+       BEQ             FindMVPred
+
+       SUB             temp,fcodeForward,#1
+       M_BD_VREAD8     mvVerResidual,temp,T1,T2              ;// Get mvVerResidual from bit stream if fcodeForward>1 and mnVerData!=0
+             
+
+       CMP             mvVerData,#0
+       RSBLT           mvVerData,mvVerData,#0
+       SUB             mvVerResidual,mvVerResidual,fcodeForward
+       SMLABB          diffMVdy,mvVerData,fcodeForward,mvVerResidual ;// diffMVdy=abs(mvVerData)*fcodeForward+mvVerResidual-fcodeForward
+       ADD             diffMVdy,diffMVdy,#1
+       RSBLT           diffMVdy,diffMVdy,#0
+
+       ;//Calling the Function omxVCM4P2_FindMVpred
+        
+FindMVPred
+
+       M_STR           diffMVdy,pdiffMVdy
+       ADD             temp,pDstMVCurMB,BlkCount,LSL #2      ;// temp=pDstMVCurMB[BlkCount]
+       M_STR           temp,ppDstMVCurMB                     ;// store temp on stack for passing as an argument to FindMVPred
+       
+       MOV             temp,#0
+       M_STR           temp,pDstMVPredME                     ;// Pass pDstMVPredME=NULL as an argument         
+       M_STR           BlkCount,pBlkCount                    ;// Passs BlkCount as Argument through stack
+
+       MOV             temp,pSrcMVLeftMB                     ;// temp (RN 1)=pSrcMVLeftMB
+       M_LDR           pSrcMVUpperRightMB,pSrcMVUpperRightMBonStack
+       MOV             pSrcMVLeftMB,pSrcMVUpperMB            ;// pSrcMVLeftMB ( RN 2) = pSrcMVUpperMB
+       MOV             ppBitStream,pDstMVCurMB               ;// ppBitStream  ( RN 0) = pDstMVCurMB
+       MOV             pSrcMVUpperMB,pSrcMVUpperRightMB      ;// pSrcMVUpperMB( RN 3) = pSrcMVUpperRightMB      
+       BL              omxVCM4P2_FindMVpred              ;// Branch to subroutine omxVCM4P2_FindMVpred
+
+       ;// Store Horizontal Motion Vector
+     
+       M_LDR           BlkCount,pBlkCount                    ;// Load BlkCount from stack
+       M_LDR           High,pHigh                            ;// High=32*scaleFactor
+       LSL             temp1,BlkCount,#2                     ;// temp=BlkCount*4
+       M_LDR           diffMVdx,pdiffMVdx                    ;// Laad diffMVdx
+       
+       LDRSH           temp,[pDstMVCurMB,temp1]              ;// temp=pDstMVCurMB[BlkCount]
+       
+       
+       RSB             Low,High,#0                           ;// Low = -32*scaleFactor
+       ADD             diffMVdx,temp,diffMVdx                ;// diffMVdx=pDstMVCurMB[BlkCount]+diffMVdx
+       ADD             Range,High,High                       ;// Range=64*ScaleFactor
+       SUB             High,High,#1                          ;// High= 32*scaleFactor-1
+
+       CMP             diffMVdx,Low                          ;// If diffMVdx<Low          
+       ADDLT           diffMVdx,diffMVdx,Range               ;// diffMVdx+=Range
+        
+       CMP             diffMVdx,High                         
+       SUBGT           diffMVdx,diffMVdx,Range               ;// If diffMVdx > High diffMVdx-=Range
+       STRH            diffMVdx,[pDstMVCurMB,temp1]
+
+       ;// Store Vertical
+
+       ADD             temp1,temp1,#2                        ;// temp1=4*BlkCount+2
+       M_LDR           diffMVdx,pdiffMVdy                    ;// Laad diffMVdy
+       LDRSH           temp,[pDstMVCurMB,temp1]              ;// temp=pDstMVCurMB[BlkCount].diffMVdy
+       ADD             BlkCount,BlkCount,#1                  ;// BlkCount=BlkCount+1
+       ADD             diffMVdx,temp,diffMVdx                
+       CMP             diffMVdx,Low
+       ADDLT           diffMVdx,diffMVdx,Range               ;// If diffMVdy<Low  diffMVdy+=Range                
+       CMP             diffMVdx,High
+       SUBGT           diffMVdx,diffMVdx,Range               ;// If diffMVdy > High diffMVdy-=Range
+       STRH            diffMVdx,[pDstMVCurMB,temp1]    
+       
+       CMP             BlkCount,Count
+       M_LDR           pSrcMVLeftMB,ppSrcMVLeftMB
+       M_LDR           pSrcMVUpperMB,ppSrcMVUpperMB
+
+       BLT             Loop                                  ;// If BlkCount<Count Continue the Loop
+
+
+       ;// If MBType=OMX_VC_INTER or MBtype=OMX_VC_INTER_Q copy pDstMVCurMB[0] to
+       ;// pDstMVCurMB[1], pDstMVCurMB[2], pDstMVCurMB[3] 
+
+       M_LDR           MBType,MBTypeonStack
+
+       TEQ             MBType,#OMX_VC_INTER                                       
+       TEQNE           MBType,#OMX_VC_INTER_Q                            
+       LDREQ           temp,[pDstMVCurMB]
+       M_LDR           ppBitStream,pppBitStream
+       STREQ           temp,[pDstMVCurMB,#4]
+       
+       STREQ           temp,[pDstMVCurMB,#8]
+       STREQ           temp,[pDstMVCurMB,#12]
+       
+       
+       M_LDR           pBitOffset,ppBitOffset
+       ;//Ending the macro
+       M_BD_FINI       ppBitStream,pBitOffset                 ;// Finishing the Macro       
+
+       
+       MOV             Return,#OMX_Sts_NoErr
+       B               ExitOK
+ 
+ExitError
+
+       M_LDR           ppBitStream,pppBitStream
+       M_LDR           pBitOffset,ppBitOffset
+       ;//Ending the macro
+       M_BD_FINI       ppBitStream,pBitOffset
+       
+       MOV             Return,#OMX_Sts_Err
+
+ExitOK             
+
+       M_END
+       ENDIF
+       END
+
+
+   
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
new file mode 100644
index 0000000..c43b253
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
@@ -0,0 +1,132 @@
+;/**
+; * 
+; * File Name:  omxVCM4P2_DecodeVLCZigzag_Inter_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   9641
+; * Date:       Thursday, February 7, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one inter coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in]    ppBitStream        pointer to the pointer to the current byte in
+; *                    the bitstream buffer
+; * [in]    pBitOffset        pointer to the bit position in the byte pointed
+; *                    to by *ppBitStream. *pBitOffset is valid within    [0-7].
+; * [in] shortVideoHeader     binary flag indicating presence of short_video_header;
+; *                           escape modes 0-3 are used if shortVideoHeader==0,
+; *                           and escape mode 4 is used when shortVideoHeader==1.
+; * [out]    ppBitStream        *ppBitStream is updated after the block is
+; *                    decoded, so that it points to the current byte
+; *                    in the bit stream buffer
+; * [out]    pBitOffset        *pBitOffset is updated so that it points to the
+; *                    current bit position in the byte pointed by
+; *                    *ppBitStream
+; * [out]    pDst            pointer to the coefficient buffer of current
+; *                    block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; *   -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; *   -pDst is not 16-byte aligned, or
+; *   -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; *   -At least one mark bit is equal to zero
+; *   -Encountered an illegal stream code that cannot be found in the VLC table
+; *   -Encountered and illegal code in the VLC FLC table
+; *   -The number of coefficients is greater than 64
+; *
+; */
+
+
+      INCLUDE omxtypes_s.h
+      INCLUDE armCOMM_s.h
+      INCLUDE armCOMM_BitDec_s.h
+
+
+      M_VARIANTS ARM1136JS
+
+     
+
+
+
+     IF ARM1136JS
+     
+        ;// Import various tables needed for the function
+
+        
+        IMPORT          armVCM4P2_InterVlcL0L1             ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+                                                               ;// Packed in Run:Level:Last format
+        IMPORT          armVCM4P2_InterL0L1LMAX            ;// Contains LMAX table entries with both Last=0 and Last=1
+        IMPORT          armVCM4P2_InterL0L1RMAX            ;// Contains RMAX table entries with both Last=0 and Last=1
+        IMPORT          armVCM4P2_aClassicalZigzagScan     ;// contains classical Zigzag table entries with double the original values
+        IMPORT          armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+
+
+;//Input Arguments
+
+ppBitStream          RN 0
+pBitOffset           RN 1
+pDst                 RN 2
+shortVideoHeader     RN 3
+
+;//Local Variables
+
+Return               RN 0
+
+pVlcTableL0L1        RN 4
+pLMAXTableL0L1       RN 4
+pRMAXTableL0L1       RN 4
+pZigzagTable         RN 4
+Count                RN 6
+
+
+        
+        ;// Allocate stack memory to store the VLC,Zigzag,LMAX and RMAX tables
+     
+        
+        M_ALLOC4        ppVlcTableL0L1,4
+        M_ALLOC4        ppLMAXTableL0L1,4
+        M_ALLOC4        ppRMAXTableL0L1,4
+        M_ALLOC4        ppZigzagTable,4
+        
+        
+        M_START omxVCM4P2_DecodeVLCZigzag_Inter,r12
+
+        
+
+        
+        LDR             pZigzagTable, =armVCM4P2_aClassicalZigzagScan       ;// Load zigzag table
+        M_STR           pZigzagTable,ppZigzagTable                              ;// Store zigzag table on stack to pass as argument to unsafe function
+        LDR             pVlcTableL0L1, =armVCM4P2_InterVlcL0L1              ;// Load optimized VLC table with both L=0 and L=1 entries
+        M_STR           pVlcTableL0L1,ppVlcTableL0L1                            ;// Store optimized VLC table address on stack
+        LDR             pLMAXTableL0L1, =armVCM4P2_InterL0L1LMAX            ;// Load Interleaved L=0 and L=1 LMAX Tables
+        M_STR           pLMAXTableL0L1,ppLMAXTableL0L1                          ;// Store LMAX table address on stack
+        LDR             pRMAXTableL0L1, =armVCM4P2_InterL0L1RMAX            ;// Load Interleaved L=0 and L=1 RMAX Tables
+        MOV             Count,#0                                                ;// set start=0
+        M_STR           pRMAXTableL0L1,ppRMAXTableL0L1                          ;// store RMAX table address on stack
+                
+
+        BL              armVCM4P2_DecodeVLCZigzag_AC_unsafe                 ;// call Unsafe Function for VLC Zigzag Decoding
+         
+       
+
+        M_END
+        ENDIF
+        
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
new file mode 100644
index 0000000..166729e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
@@ -0,0 +1,136 @@
+;/**
+; * 
+; * File Name:  omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   9641
+; * Date:       Thursday, February 7, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in]    ppBitStream        pointer to the pointer to the current byte in
+; *                    the bitstream buffer
+; * [in]    pBitOffset        pointer to the bit position in the byte pointed
+; *                    to by *ppBitStream. *pBitOffset is valid within    [0-7].
+; * [in] shortVideoHeader     binary flag indicating presence of short_video_header;
+; *                           escape modes 0-3 are used if shortVideoHeader==0,
+; *                           and escape mode 4 is used when shortVideoHeader==1.
+; * [out]    ppBitStream        *ppBitStream is updated after the block is
+; *                    decoded, so that it points to the current byte
+; *                    in the bit stream buffer
+; * [out]    pBitOffset        *pBitOffset is updated so that it points to the
+; *                    current bit position in the byte pointed by
+; *                    *ppBitStream
+; * [out]    pDst            pointer to the coefficient buffer of current
+; *                    block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; *   -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; *   -pDst is not 16-byte aligned, or
+; *   -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; *   -At least one mark bit is equal to zero
+; *   -Encountered an illegal stream code that cannot be found in the VLC table
+; *   -Encountered and illegal code in the VLC FLC table
+; *   -The number of coefficients is greater than 64
+; *
+; */
+
+
+      INCLUDE omxtypes_s.h
+      INCLUDE armCOMM_s.h
+      INCLUDE armCOMM_BitDec_s.h
+
+
+      M_VARIANTS ARM1136JS
+
+     
+
+
+
+     IF ARM1136JS
+     
+        ;// Import various tables needed for the function
+
+        
+        IMPORT          armVCM4P2_IntraVlcL0L1             ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+                                                               ;// Packed in Run:Level:Last format
+        IMPORT          armVCM4P2_IntraL0L1LMAX            ;// Contains LMAX table entries with both Last=0 and Last=1
+        IMPORT          armVCM4P2_IntraL0L1RMAX            ;// Contains RMAX table entries with both Last=0 and Last=1
+        IMPORT          armVCM4P2_aClassicalZigzagScan     ;// contains classical Zigzag table entries with double the original values
+        IMPORT          armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+;//Input Arguments
+
+ppBitStream          RN 0
+pBitOffset           RN 1
+pDst                 RN 2
+PredDir              RN 3
+shortVideoHeader     RN 3
+
+;//Local Variables
+
+Return               RN 0
+
+pVlcTableL0L1        RN 4
+pLMAXTableL0L1       RN 4
+pRMAXTableL0L1       RN 4
+pZigzagTable         RN 4
+Count                RN 6
+
+
+        
+        ;// Allocate stack memory to store optimized VLC,Zigzag, RMAX, LMAX Table Addresses 
+     
+        M_ALLOC4        ppVlcTableL0L1,4
+        M_ALLOC4        ppLMAXTableL0L1,4
+        M_ALLOC4        ppRMAXTableL0L1,4
+        M_ALLOC4        ppZigzagTable,4
+
+        
+        M_START omxVCM4P2_DecodeVLCZigzag_IntraACVLC,r12
+
+        M_ARG           shortVideoHeaderonStack,4                             ;// pointer to Input Argument on stack           
+
+        LDR             pZigzagTable, =armVCM4P2_aClassicalZigzagScan     ;// Load Address of the Zigzag table    
+        ADD             pZigzagTable, pZigzagTable, PredDir, LSL #6           ;// Loading Different type of zigzag tables based on PredDir
+       
+        M_STR           pZigzagTable,ppZigzagTable                            ;// Store Zigzag table address on stack
+        LDR             pVlcTableL0L1, =armVCM4P2_IntraVlcL0L1            ;// Load optimized packed VLC Table with both L=0 and L=1 entries
+        M_STR           pVlcTableL0L1,ppVlcTableL0L1                          ;// Store VLC Table address on stack
+        LDR             pLMAXTableL0L1, =armVCM4P2_IntraL0L1LMAX          ;// Load LMAX Table
+        M_STR           pLMAXTableL0L1,ppLMAXTableL0L1                        ;// Store LMAX Table address on Stack
+        LDR             pRMAXTableL0L1, =armVCM4P2_IntraL0L1RMAX          ;// Load RMAX Table
+        MOV             Count,#0                                              ;// Set Start=0        
+        
+        M_STR           pRMAXTableL0L1,ppRMAXTableL0L1                        ;// Store RMAX Table address on stack
+              
+
+       
+        M_LDR           shortVideoHeader,shortVideoHeaderonStack              ;// get the Input Argument from stack
+
+        BL              armVCM4P2_DecodeVLCZigzag_AC_unsafe               ;// Call Unsafe Function
+
+
+
+        
+        M_END
+        ENDIF
+        
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
new file mode 100644
index 0000000..d19cb13
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
@@ -0,0 +1,224 @@
+;/**
+; * 
+; * File Name:  omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   9641
+; * Date:       Thursday, February 7, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in]    ppBitStream        pointer to the pointer to the current byte in
+; *                    the bitstream buffer
+; * [in]    pBitOffset        pointer to the bit position in the byte pointed
+; *                    to by *ppBitStream. *pBitOffset is valid within    [0-7].
+; * [in] shortVideoHeader     binary flag indicating presence of short_video_header;
+; *                           escape modes 0-3 are used if shortVideoHeader==0,
+; *                           and escape mode 4 is used when shortVideoHeader==1.
+; * [out]    ppBitStream        *ppBitStream is updated after the block is
+; *                    decoded, so that it points to the current byte
+; *                    in the bit stream buffer
+; * [out]    pBitOffset        *pBitOffset is updated so that it points to the
+; *                    current bit position in the byte pointed by
+; *                    *ppBitStream
+; * [out]    pDst            pointer to the coefficient buffer of current
+; *                    block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; *   -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; *   -pDst is not 16-byte aligned, or
+; *   -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; *   -At least one mark bit is equal to zero
+; *   -Encountered an illegal stream code that cannot be found in the VLC table
+; *   -Encountered and illegal code in the VLC FLC table
+; *   -The number of coefficients is greater than 64
+; *
+; */
+
+
+      INCLUDE omxtypes_s.h
+      INCLUDE armCOMM_s.h
+      INCLUDE armCOMM_BitDec_s.h
+
+
+      M_VARIANTS ARM1136JS
+
+     
+      
+
+
+      IF ARM1136JS :LOR: CortexA8
+
+     
+        ;// Import various tables needed for the function
+
+        
+        IMPORT          armVCM4P2_IntraVlcL0L1             ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+                                                               ;// Packed in Run:Level:Last format
+        IMPORT          armVCM4P2_IntraL0L1LMAX            ;// Contains LMAX table entries with both Last=0 and Last=1
+        IMPORT          armVCM4P2_IntraL0L1RMAX            ;// Contains RMAX table entries with both Last=0 and Last=1
+        IMPORT          armVCM4P2_aClassicalZigzagScan     ;// contains CLassical, Horizontal, Vertical Zigzag table entries with double the original values
+        IMPORT          armVCM4P2_aIntraDCLumaChromaIndex  ;// Contains Optimized DCLuma and DCChroma Index table Entries
+        
+
+        IMPORT          armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+;//Input Arguments
+
+ppBitStream          RN 0
+pBitOffset           RN 1
+pDst                 RN 2
+PredDir              RN 3
+shortVideoHeader     RN 3
+videoComp            RN 5
+;//Local Variables
+
+Return               RN 0
+
+pDCLumaChromaIndex   RN 4
+pDCChromaIndex       RN 7
+pVlcTableL0L1        RN 4
+pLMAXTableL0L1       RN 4
+pRMAXTableL0L1       RN 4
+pZigzagTable         RN 4
+Count                RN 6
+DCValueSize          RN 6
+powOfSize            RN 7
+temp1                RN 5
+
+
+;// Scratch Registers
+
+RBitStream           RN 8
+RBitBuffer           RN 9
+RBitCount            RN 10
+
+T1                   RN 11
+T2                   RN 12
+DCVal                RN 14
+
+        
+        ;// Allocate stack memory to store optimized VLC,Zigzag, RMAX, LMAX Table Addresses 
+     
+        M_ALLOC4        ppVlcTableL0L1,4
+        M_ALLOC4        ppLMAXTableL0L1,4
+        M_ALLOC4        ppRMAXTableL0L1,4
+        M_ALLOC4        ppZigzagTable,4
+        M_ALLOC4        pDCCoeff,4
+        
+
+        
+        M_START omxVCM4P2_DecodeVLCZigzag_IntraDCVLC,r12
+
+        M_ARG           shortVideoHeaderonStack,4                                  ;// Pointer to argument on stack  
+        M_ARG           videoComponstack,4                                         ;// Pointer to argument on stack
+
+        
+        ;// Decode DC Coefficient
+
+        
+        LDR             pDCLumaChromaIndex, =armVCM4P2_aIntraDCLumaChromaIndex ;// Load Optimized VLC Table for Luminance and Chrominance
+
+        ;// Initializing the Bitstream Macro
+
+        M_BD_INIT0      ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount
+        M_LDR           videoComp,videoComponstack                                 
+        M_BD_INIT1      T1, T2, T2
+        ADD             pDCLumaChromaIndex,pDCLumaChromaIndex,videoComp, LSL #6             
+        M_BD_INIT2      T1, T2, T2
+    
+        
+        M_BD_VLD        DCValueSize,T1,T2,pDCLumaChromaIndex,4,2                    ;// VLC Decode using optimized Luminance and Chrominance VLC Table
+
+    
+       
+
+DecodeDC
+                         
+        CMP             DCValueSize,#12     
+        BGT             ExitError
+        
+        CMP             DCValueSize,#0
+        MOVEQ           DCVal,#0                                                    ;// If DCValueSize is zero then DC coeff =0
+        BEQ             ACDecode                                                    ;// Branch to perform AC Coeff Decoding
+        
+        M_BD_VREAD16    DCVal,DCValueSize,T1,T2                                     ;// Get DC Value From Bit stream
+         
+
+        MOV             powOfSize,#1                                                
+        LSL             powOfSize,DCValueSize                                       ;// powOfSize=pow(2,DCValueSize)
+        CMP             DCVal,powOfSize,LSR #1                                      ;// Compare DCVal with powOfSize/2 
+        ADDLT           DCVal,DCVal,#1
+        SUBLT           DCVal,DCVal,powOfSize                                       ;// If Lessthan powOfSize/2 DCVal=DCVal-powOfSize+1
+                                                                                    ;// Else DCVal= fetchbits from bit stream
+
+CheckDCValueSize
+        
+        CMP             DCValueSize,#8                                              ;// If DCValueSize greater than 8 check marker bit
+
+        BLE             ACDecode
+
+        M_BD_READ8      temp1,1,T1
+        TEQ             temp1,#0                                                    ;// If Marker bit is zero Exit with an Error Message
+        BEQ             ExitError
+
+        
+
+        ;// Decode AC Coefficient
+
+ACDecode
+
+        M_STR           DCVal,pDCCoeff                                             ;// Store Decoded DC Coeff on Stack
+        M_BD_FINI       ppBitStream,pBitOffset                                     ;// Terminating the Bit stream Macro
+         
+        LDR             pZigzagTable, =armVCM4P2_aClassicalZigzagScan          ;// Load Zigzag talbe address   
+        ADD             pZigzagTable, pZigzagTable, PredDir, LSL #6                ;// Modify the Zigzag table adress based on PredDir                
+       
+        M_STR           pZigzagTable,ppZigzagTable                                 ;// Store zigzag table on stack
+        LDR             pVlcTableL0L1, =armVCM4P2_IntraVlcL0L1                 ;// Load Optimized VLC Table With both Last=0 and Last=1 Entries
+        M_STR           pVlcTableL0L1,ppVlcTableL0L1                               ;// Store Optimized VLC Table on stack
+        LDR             pLMAXTableL0L1, =armVCM4P2_IntraL0L1LMAX               ;// Load LMAX Table
+        M_STR           pLMAXTableL0L1,ppLMAXTableL0L1                             ;// Store LMAX table on stack
+        LDR             pRMAXTableL0L1, =armVCM4P2_IntraL0L1RMAX               ;// Load RMAX Table
+        MOV             Count,#1                                                   ;// Set Start =1        
+        
+        M_STR           pRMAXTableL0L1,ppRMAXTableL0L1                             ;// Store RMAX Table on Stack
+        
+       
+        M_LDR           shortVideoHeader,shortVideoHeaderonStack                   ;// Load the Input Argument From Stack
+        
+        BL              armVCM4P2_DecodeVLCZigzag_AC_unsafe                    ;// Call the Unsafe Function
+
+        M_LDR           DCVal,pDCCoeff                                             ;// Get the Decoded DC Value From Stack
+        STRH            DCVal,[pDst]                                               ;// Store the DC Value 
+        B               ExitOK
+        
+              
+
+ExitError
+ 
+        M_BD_FINI       ppBitStream,pBitOffset                                     ;// Terminating the Bit Stream Macro in case of an Error
+        MOV             Return,#OMX_Sts_Err                                        ;// Exit with an Error Message 
+ExitOK
+      
+        M_END
+        ENDIF
+        
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
new file mode 100644
index 0000000..a4bfa71
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
@@ -0,0 +1,194 @@
+;//
+;// 
+;// File Name:  omxVCM4P2_FindMVpred_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+;// Function:
+;//     omxVCM4P2_FindMVpred
+;//
+        ;// Include headers
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        INCLUDE armVCCOMM_s.h
+
+        ;// Define cpu variants
+        M_VARIANTS ARM1136JS
+        
+        
+        IF ARM1136JS
+        
+        M_TABLE armVCM4P2_pBlkIndexTable
+        DCD  OMXVCBlk0, OMXVCBlk1
+        DCD  OMXVCBlk2, OMXVCBlk3
+
+;//--------------------------------------------
+;// Declare input registers
+;//--------------------------------------------
+        
+pSrcMVCurMB            RN 0
+pSrcCandMV1            RN 1
+pSrcCandMV2            RN 2
+pSrcCandMV3            RN 3
+pDstMVPred             RN 4
+pDstMVPredME           RN 5
+iBlk                   RN 6
+
+pTable                 RN 4
+CandMV                 RN 12
+
+pCandMV1               RN 7
+pCandMV2               RN 8
+pCandMV3               RN 9
+
+CandMV1dx              RN 0 
+CandMV1dy              RN 1 
+CandMV2dx              RN 2
+CandMV2dy              RN 3
+CandMV3dx              RN 10
+CandMV3dy              RN 11
+
+temp                   RN 14
+
+zero                   RN 14
+return                 RN 0
+        
+; ----------------------------------------------
+; Main routine
+; ----------------------------------------------        
+
+        M_ALLOC4 MV, 4
+        
+        ;// Function header 
+        M_START omxVCM4P2_FindMVpred, r11
+        
+        ;// Define stack arguments
+        M_ARG   ppDstMVPred,  4
+        M_ARG   ppDstMVPredME, 4
+        M_ARG   Blk, 4
+        
+        M_ADR CandMV, MV
+        MOV   zero, #0
+        M_LDR iBlk, Blk
+        
+        ;// Set the default value for these
+        ;// to be used if pSrcCandMV[1|2|3] == NULL
+        MOV   pCandMV1, CandMV
+        MOV   pCandMV2, CandMV
+        MOV   pCandMV3, CandMV
+    
+        STR   zero, [CandMV]
+
+        ;// Branch to the case based on blk number
+        M_SWITCH iBlk
+        M_CASE   OMXVCBlk0      ;// iBlk=0
+        M_CASE   OMXVCBlk1      ;// iBlk=0
+        M_CASE   OMXVCBlk2      ;// iBlk=0
+        M_CASE   OMXVCBlk3      ;// iBlk=0
+        M_ENDSWITCH
+        
+OMXVCBlk0
+        CMP   pSrcCandMV1, #0
+        ADDNE pCandMV1, pSrcCandMV1, #4
+        
+        CMP   pSrcCandMV2, #0
+        ADDNE pCandMV2, pSrcCandMV2, #8
+
+        CMP   pSrcCandMV3, #0
+        ADDNE pCandMV3, pSrcCandMV3, #8
+        CMPEQ pSrcCandMV1, #0
+    
+        MOVEQ pCandMV3, pCandMV2
+        MOVEQ pCandMV1, pCandMV2
+                
+        CMP   pSrcCandMV1, #0
+        CMPEQ pSrcCandMV2, #0
+    
+        MOVEQ pCandMV1, pCandMV3
+        MOVEQ pCandMV2, pCandMV3
+        
+        CMP   pSrcCandMV2, #0
+        CMPEQ pSrcCandMV3, #0
+    
+        MOVEQ pCandMV2, pCandMV1
+        MOVEQ pCandMV3, pCandMV1
+        
+        B     BlkEnd
+    
+OMXVCBlk1
+        MOV   pCandMV1, pSrcMVCurMB
+        CMP   pSrcCandMV3, #0
+        ADDNE pCandMV3, pSrcCandMV3, #8
+        
+        CMP   pSrcCandMV2, #0
+        ADDNE pCandMV2, pSrcCandMV2, #12
+    
+        CMPEQ pSrcCandMV3, #0
+    
+        MOVEQ pCandMV2, pCandMV1
+        MOVEQ pCandMV3, pCandMV1
+            
+        B     BlkEnd
+
+OMXVCBlk2
+        CMP   pSrcCandMV1, #0
+        MOV   pCandMV2, pSrcMVCurMB
+        ADD   pCandMV3, pSrcMVCurMB, #4
+        ADDNE pCandMV1, pSrcCandMV1, #12
+        B     BlkEnd
+
+OMXVCBlk3
+        ADD   pCandMV1, pSrcMVCurMB, #8
+        MOV   pCandMV2, pSrcMVCurMB
+        ADD   pCandMV3, pSrcMVCurMB, #4
+    
+BlkEnd
+
+        ;// Using the transperancy info, zero
+        ;// out the candidate MV if neccesary
+        LDRSH CandMV1dx, [pCandMV1], #2
+        LDRSH CandMV2dx, [pCandMV2], #2
+        LDRSH CandMV3dx, [pCandMV3], #2
+    
+        ;// Load argument from the stack
+        M_LDR pDstMVPredME, ppDstMVPredME
+
+        LDRSH CandMV1dy, [pCandMV1]
+        LDRSH CandMV2dy, [pCandMV2]
+        LDRSH CandMV3dy, [pCandMV3]
+
+        CMP pDstMVPredME, #0        
+
+        ;// Store the candidate MV's into the pDstMVPredME, 
+        ;// these can be used in the fast algorithm if implemented 
+
+        STRHNE CandMV1dx, [pDstMVPredME], #2
+        STRHNE CandMV1dy, [pDstMVPredME], #2        
+        STRHNE CandMV2dx, [pDstMVPredME], #2
+        STRHNE CandMV2dy, [pDstMVPredME], #2
+        STRHNE CandMV3dx, [pDstMVPredME], #2
+        STRHNE CandMV3dy, [pDstMVPredME]
+           
+        ; Find the median of the 3 candidate MV's
+        M_MEDIAN3 CandMV1dx, CandMV2dx, CandMV3dx, temp
+
+        ;// Load argument from the stack
+        M_LDR pDstMVPred, ppDstMVPred
+
+        M_MEDIAN3 CandMV1dy, CandMV2dy, CandMV3dy, temp
+    
+        STRH CandMV3dx, [pDstMVPred], #2
+        STRH CandMV3dy, [pDstMVPred]
+
+        MOV return, #OMX_Sts_NoErr
+    
+        M_END
+    ENDIF ;// ARM1136JS :LOR: CortexA8
+    
+    END
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
new file mode 100644
index 0000000..bfeb540
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
@@ -0,0 +1,73 @@
+;//
+;// 
+;// File Name:  omxVCM4P2_IDCT8x8blk_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+;// Function:
+;//     omxVCM4P2_IDCT8x8blk
+;//
+        ;// Include headers
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+
+        ;// Define cpu variants
+        M_VARIANTS ARM1136JS
+
+        INCLUDE armCOMM_IDCT_s.h        
+        
+        IMPORT armCOMM_IDCTPreScale
+        ;//
+        ;// Function prototype
+        ;//
+        ;//     OMXResult
+        ;//     omxVCM4P2_IDCT8x8blk(const OMX_S16* pSrc,
+        ;//                                       OMX_S16* pDst)
+        ;//    
+        
+    IF ARM1136JS :LOR: CortexA8
+        M_ALLOC4  ppDest, 4
+        M_ALLOC4  pStride, 4
+        M_ALLOC8  pBlk, 2*8*8
+    ENDIF
+    
+    IF ARM1136JS
+        M_START omxVCM4P2_IDCT8x8blk, r11
+    ENDIF
+    
+        
+    IF ARM1136JS :LOR: CortexA8
+        
+;// Declare input registers
+pSrc            RN 0
+pDst            RN 1
+
+;// Declare other intermediate registers
+Result          RN 0
+
+;// Prototype for macro M_IDCT
+;// pSrc            RN 0  ;// source data buffer
+;// Stride          RN 1  ;// destination stride in bytes
+;// pDest           RN 2  ;// destination data buffer
+;// pScale          RN 3  ;// pointer to scaling table
+
+pSrc    RN 0    
+Stride  RN 1    
+pDest   RN 2    
+pScale  RN 3    
+                
+        MOV         pDest, pDst
+        LDR         pScale, =armCOMM_IDCTPreScale        
+        M_IDCT      s9, s16, 16      
+        MOV         Result, #OMX_Sts_NoErr
+        M_END       
+    ENDIF  
+        ;// ARM1136JS :LOR: CortexA8
+
+    END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
new file mode 100644
index 0000000..20965bf
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
@@ -0,0 +1,713 @@
+;//
+;// 
+;// File Name:  omxVCM4P2_MCReconBlock_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+;// Description:
+;//
+;//
+
+;// Include standard headers
+    INCLUDE omxtypes_s.h
+    INCLUDE armCOMM_s.h
+
+;// Import symbols required from other files
+
+    M_VARIANTS ARM1136JS
+
+;// ***************************************************************************
+;// ARM1136JS implementation
+;// ***************************************************************************
+    IF  ARM1136JS
+    
+;// ***************************************************************************
+;// MACRO DEFINITIONS
+;// ***************************************************************************
+    ;// Description:
+    ;//
+    ;//   dest[j] = (x[j] + y[j] + round) >> 1,   j=0..3
+    ;//
+    ;// Similar to UHADD8 instruction, but with a rounding value of 1 added to
+    ;// each sum before dividing by two, if round is 1
+    ;//
+    ;// Syntax:
+    ;// M_UHADD8R   $dest, $x, $y, $round, $mask
+    ;//
+    ;// Inputs:
+    ;// $x        four packed bytes,   x[3] :  x[2]  :  x[1]  :  x[0]
+    ;// $y        four packed bytes,   y[3] :  y[2]  :  y[1]  :  y[0]
+    ;// $round    0 if no rounding to be added, 1 if rounding to be done
+    ;// $mask     some register set to 0x80808080
+    ;//
+    ;// Outputs:
+    ;// $dest     four packed bytes,   z[3] :  z[2]  :  z[1]  :  z[0]
+
+    MACRO
+    M_UHADD8R   $dest, $x, $y, $round, $mask
+    IF $round = 1
+        IF  $dest /= $y
+            MVN         $dest, $x
+            UHSUB8      $dest, $y, $dest
+            EOR         $dest, $dest, $mask
+        ELSE
+            MVN         $dest, $y
+            UHSUB8      $dest, $x, $dest
+            EOR         $dest, $dest, $mask
+        ENDIF
+    ELSE
+        UHADD8      $dest, $x, $y
+    ENDIF
+    MEND
+;// ***************************************************************************
+    ;// Description:
+    ;// Load 8 bytes from $pSrc (aligned or unaligned locations)
+    ;//
+    ;// Syntax:
+    ;// M_LOAD_X    $pSrc, $srcStep, $out0, $out1, $scratch, $offset
+    ;// 
+    ;// Inputs:
+    ;// $pSrc       4 byte aligned source pointer to an address just less than 
+    ;//             or equal to the data location
+    ;// $srcStep    The stride on source
+    ;// $scratch    A scratch register, used internally for temp calculations
+    ;// $offset     Difference of source data location to the source pointer
+    ;//             Use when $offset != 0 (unaligned load)
+    ;//
+    ;// Outputs:
+    ;// $pSrc       In case the macro accepts stride, it increments the pSrc by 
+    ;//             that value, else unchanged
+    ;// $out0       four packed bytes,   z[3] :  z[2]  :  z[1]  :  z[0]
+    ;// $out1       four packed bytes,   z[7] :  z[6]  :  z[5]  :  z[4]
+    ;//
+    ;// Note: {$out0, $out1, $scratch} should be registers with ascending
+    ;// register numbering. In case offset is 0, $scratch is not modified.
+
+    MACRO
+    M_LOAD_X    $pSrc, $srcStep, $out0, $out1, $scratch, $offset
+        IF $offset = 0
+            LDM         $pSrc, {$out0, $out1}
+            ADD         $pSrc, $pSrc, $srcStep
+        ELSE
+            LDM         $pSrc, {$out0, $out1, $scratch} 
+            ADD         $pSrc, $pSrc, $srcStep
+            
+            MOV         $out0, $out0, LSR #8 * $offset
+            ORR         $out0, $out0, $out1, LSL #(32 - 8 * ($offset))
+            MOV         $out1, $out1, LSR #8 * $offset
+            ORR         $out1, $out1, $scratch, LSL #(32 - 8 * ($offset))
+        ENDIF
+    MEND
+
+;// ***************************************************************************
+    ;// Description:
+    ;// Loads three words for X interpolation, update pointer to next row. For 
+    ;// X interpolation, given a truncated-4byteAligned source pointer, 
+    ;// invariably three continous words are required from there to get the
+    ;// nine bytes from the source pointer for filtering. 
+    ;//
+    ;// Syntax:
+    ;// M_LOAD_XINT $pSrc, $srcStep, $offset, $word0, $word1, $word2, $word3
+    ;// 
+    ;// Inputs:
+    ;// $pSrc       4 byte aligned source pointer to an address just less than 
+    ;//             or equal to the data location
+    ;//
+    ;// $srcStep    The stride on source
+    ;//
+    ;// $offset     Difference of source data location to the source pointer
+    ;//             Use when $offset != 0 (unaligned load)
+    ;//
+    ;// Outputs:
+    ;// $pSrc       Incremented by $srcStep
+    ;//
+    ;// $word0, $word1, $word2, $word3
+    ;//             Three of these are outputs based on the $offset parameter. 
+    ;//             The outputs are specifically generated to be processed by 
+    ;//             the M_EXT_XINT macro. Following is the illustration to show 
+    ;//             how the nine bytes are spanned for different offsets from 
+    ;//             notTruncatedForAlignmentSourcePointer.
+    ;//
+    ;//              ------------------------------------------------------
+    ;//             | Offset | Aligned Ptr | word0 | word1 | word2 | word3 |
+    ;//             |------------------------------------------------------|
+    ;//             |    0   |       0     | 0123  | 4567  | 8xxx  |       |
+    ;//             |    1   |      -1     | x012  | 3456  | 78xx  |       |
+    ;//             |    2   |      -2     | xx01  | 2345  | 678x  |       |
+    ;//             |    3   |      -3     | xxx0  |       | 1234  | 5678  |
+    ;//              ------------------------------------------------------
+    ;// 
+    ;//             where the numbering (0-8) is to designate the 9 bytes from
+    ;//             start of a particular row. The illustration doesn't take in 
+    ;//             account the positioning of bytes with in the word and the 
+    ;//             macro combination with M_EXT_XINT will work only in little 
+    ;//             endian environs
+    ;// 
+    ;// Note: {$word0, $word1, $word2, $word3} should be registers with ascending
+    ;// register numbering
+
+    MACRO
+    M_LOAD_XINT $pSrc, $srcStep, $offset, $word0, $word1, $word2, $word3
+        IF $offset /= 3
+            LDM         $pSrc, {$word0, $word1, $word2}
+        ELSE
+            LDM         $pSrc, {$word0, $word2, $word3}
+        ENDIF
+        ADD         $pSrc, $pSrc, $srcStep
+    MEND
+
+;// ***************************************************************************
+    ;// Description:
+    ;// Extract four registers of four pixels for X interpolation 
+    ;// 
+    ;// Syntax:
+    ;// M_EXT_XINT $offset, $word0, $word1, $word2, $word3
+    ;// 
+    ;// Inputs:
+    ;// $offset     Difference of source data location to the source pointer
+    ;//             Use when $offset != 0 (unaligned load)
+    ;// 
+    ;// $word0, $word1, $word2, $word3
+    ;//             Three of these are inputs based on the $offset parameter. 
+    ;//             The inputs are specifically selected to be processed by 
+    ;//             the M_EXT_XINT macro.
+    ;//
+    ;//              ------------------------------------------------------
+    ;//             | Offset | Aligned Ptr | word0 | word1 | word2 | word3 |
+    ;//             |------------------------------------------------------|
+    ;//             |    0   |       0     | 0123  | 4567  | 8xxx  | yyyy  |
+    ;//             |    1   |      -1     | x012  | 3456  | 78xx  | yyyy  |
+    ;//             |    2   |      -2     | xx01  | 2345  | 678x  | yyyy  |
+    ;//             |    3   |      -3     | xxx0  | yyyy  | 1234  | 5678  |
+    ;//              ------------------------------------------------------
+    ;// 
+    ;// Outputs:
+    ;// $word0, $word1, $word2, $word3
+    ;//             Bytes from the original source pointer (not truncated for
+    ;//             4 byte alignment) as shown in the table. 
+    ;//              -------------------------------
+    ;//             | word0 | word1 | word2 | word3 |
+    ;//             |-------------------------------|
+    ;//             | 0123  | 4567  | 1234  | 5678  |
+    ;//              -------------------------------
+    ;//
+    ;// Note: {$word0, $word1, $word2, $word3} should be registers with ascending
+    ;// register numbering
+
+    MACRO
+    M_EXT_XINT $offset, $word0, $word1, $word2, $word3
+        IF $offset = 0
+            ; $word0 and $word1 are ok
+            ; $word2, $word3 are just 8 shifted versions
+            MOV         $word3, $word1, LSR #8
+            ORR         $word3, $word3, $word2, LSL #24
+            MOV         $word2, $word0, LSR #8
+            ORR         $word2, $word2, $word1, LSL #24
+        ELIF $offset = 3
+            ; $word2 and $word3 are ok (taken care while loading itself)
+            ; set $word0 & $word1
+            MOV         $word0, $word0, LSR #24
+            ORR         $word0, $word0, $word2, LSL #8
+            MOV         $word1, $word2, LSR #24
+            ORR         $word1, $word1, $word3, LSL #8
+        ELSE
+            MOV         $word0, $word0, LSR #8 * $offset
+            ORR         $word0, $word0, $word1, LSL #(32 - 8 * ($offset))
+            MOV         $word1, $word1, LSR #8 * $offset
+            ORR         $word1, $word1, $word2, LSL #(32 - 8 * ($offset))
+
+            MOV         $word3, $word1, LSR #8
+            ORR         $word3, $word3, $word2, LSL #(32 - 8 * (($offset)+1))
+            MOV         $word2, $word0, LSR #8
+            ORR         $word2, $word2, $word1, LSL #24
+        ENDIF
+    MEND
+
+;// ***************************************************************************
+    ;// Description:
+    ;// Computes half-sum and xor of two inputs and puts them in the input 
+    ;// registers in that order
+    ;//
+    ;// Syntax:
+    ;// M_HSUM_XOR      $v0, $v1, $tmp
+    ;// 
+    ;// Inputs:
+    ;// $v0         a, first input
+    ;// $v1         b, second input
+    ;// $tmp        scratch register
+    ;// 
+    ;// Outputs:
+    ;// $v0         (a + b)/2
+    ;// $v1         a ^ b
+
+    MACRO
+    M_HSUM_XOR      $v0, $v1, $tmp
+        UHADD8      $tmp, $v0, $v1     ;// s0 = a + b
+        EOR         $v1, $v0, $v1      ;// l0 = a ^ b
+        MOV         $v0, $tmp          ;// s0
+    MEND
+;// ***************************************************************************
+    ;// Description:
+    ;// Calculates average of 4 values (a,b,c,d) for HalfPixelXY predict type in 
+    ;// mcReconBlock module. Very specific to the implementation of 
+    ;// M_MCRECONBLOCK_HalfPixelXY done here. Uses "tmp" as scratch register and 
+    ;// "yMask" for mask variable "0x1010101x" set in it. In yMask 4 lsbs are 
+    ;// not significant and are used by the callee for row counter (y)
+    ;//
+    ;// Some points to note are:
+    ;// 1. Input is pair of pair-averages and Xors
+    ;// 2. $sum1 and $lsb1 are not modified and hence can be reused in another 
+    ;//    running average
+    ;// 3. Output is in the first argument
+    ;//
+    ;// Syntax:
+    ;// M_AVG4         $sum0, $lsb0, $sum1, $lsb1, $rndVal
+    ;// 
+    ;// Inputs:
+    ;// $sum0       (a + b) >> 1, where a and b are 1st and 2nd inputs to be averaged
+    ;// $lsb0       (a ^ b)
+    ;// $sum1       (c + d) >> 1. Not modified
+    ;// $lsb1       (c ^ d)       Not modified
+    ;// $rndVal     Assembler Variable. 0 for rounding, 1 for no rounding
+    ;// 
+    ;// Outputs:
+    ;// $sum0       (a + b + c + d + 1) / 4 : If no rounding
+    ;//             (a + b + c + d + 2) / 4 : If rounding
+
+    MACRO
+    M_AVG4          $sum0, $lsb0, $sum1, $lsb1, $rndVal
+        LCLS OP1
+        LCLS OP2
+        IF $rndVal = 0 ;// rounding case
+OP1 SETS "AND"
+OP2 SETS "ORR"
+        ELSE           ;// Not rounding case
+OP1 SETS "ORR"
+OP2 SETS "AND"
+        ENDIF
+        
+        LCLS lsb2
+        LCLS sum2
+        LCLS dest
+    
+lsb2  SETS "tmp"
+sum2  SETS "$lsb0"
+dest  SETS "$sum0"
+
+        $OP1        $lsb0, $lsb0, $lsb1          ;// e0 = e0 & e1
+        EOR         $lsb2, $sum0, $sum1          ;// e2 = s0 ^ s1
+        $OP2        $lsb2, $lsb2, $lsb0          ;// e2 = e2 | e0
+        AND         $lsb2, $lsb2, yMask, LSR # 4 ;// e2 = e2 & mask
+        UHADD8      $sum2, $sum0, $sum1          ;// s2 = (s0 + s1)/2
+        UADD8       $dest, $sum2, $lsb2          ;// dest =  s2 + e2
+    MEND
+;// ***************************************************************************
+;// Motion compensation handler macros
+;// ***************************************************************************
+    ;// Description:
+    ;// Implement motion compensation routines using the named registers in 
+    ;// callee function. Each of the following 4 implement the 4 predict type
+    ;// Each handles 8 cases each ie all the combinations of 4 types of source 
+    ;// alignment offsets and 2 types of rounding flag
+    ;//
+    ;// Syntax:
+    ;// M_MCRECONBLOCK_IntegerPixel $rndVal, $offset
+    ;// M_MCRECONBLOCK_HalfPixelX   $rndVal, $offset
+    ;// M_MCRECONBLOCK_HalfPixelY   $rndVal, $offset
+    ;// M_MCRECONBLOCK_HalfPixelXY  $rndVal, $offset
+    ;// 
+    ;// Inputs:
+    ;// $rndVal     Assembler Variable. 0 for rounding, 1 for no rounding
+    ;// $offset     $pSrc MOD 4 value. Offset from 4 byte aligned location.
+    ;// 
+    ;// Outputs:
+    ;// Outputs come in the named registers of the callee functions
+    ;// The macro loads the data from the source pointer, processes it and 
+    ;// stores in the destination pointer. Does the whole prediction cycle
+    ;// of Motion Compensation routine for a particular predictType
+    ;// After this only residue addition to the predicted values remain
+
+    MACRO
+    M_MCRECONBLOCK_IntegerPixel $rndVal, $offset
+    ;// Algorithmic Description:
+    ;// This handles motion compensation for IntegerPixel predictType. Both
+    ;// rounding cases are handled by the same code base. It is just a copy
+    ;// from source to destination. Two lines are done per loop to reduce 
+    ;// stalls. Loop has been software pipelined as well for that purpose.
+    ;// 
+    ;// M_LOAD_X loads a whole row in two registers and then they are stored
+    
+CaseIntegerPixelRnd0Offset$offset
+CaseIntegerPixelRnd1Offset$offset
+    M_LOAD_X    pSrc, srcStep, tmp1, tmp2, tmp3, $offset
+    M_LOAD_X    pSrc, srcStep, tmp3, tmp4, tmp5, $offset
+YloopIntegerPixelOffset$offset
+    SUBS        y, y, #2
+    STRD        tmp1, tmp2, [pDst], dstStep
+    STRD        tmp3, tmp4, [pDst], dstStep
+    M_LOAD_X    pSrc, srcStep, tmp1, tmp2, tmp3, $offset
+    M_LOAD_X    pSrc, srcStep, tmp3, tmp4, tmp5, $offset
+    BGT         YloopIntegerPixelOffset$offset
+
+    B           SwitchPredictTypeEnd
+    MEND
+;// ***************************************************************************
+    MACRO
+    M_MCRECONBLOCK_HalfPixelX $rndVal, $offset
+    ;// Algorithmic Description:
+    ;// This handles motion compensation for HalfPixelX predictType. The two
+    ;// rounding cases are handled by the different code base and spanned by 
+    ;// different macro calls. Loop has been software pipelined to reduce 
+    ;// stalls.
+    ;// 
+    ;// Filtering involves averaging a pixel with the next horizontal pixel.
+    ;// M_LOAD_XINT and M_EXT_XINT combination generate 4 registers, 2 with 
+    ;// all pixels in a row with 4 pixel in each register and another 2
+    ;// registers with pixels corresponding to one horizontally shifted pixel
+    ;// corresponding to the initial row pixels. These are set of packed 
+    ;// registers appropriate to do 4 lane SIMD.
+    ;// After that M_UHADD8R macro does the averaging taking care of the 
+    ;// rounding as required
+    
+CaseHalfPixelXRnd$rndVal.Offset$offset
+    IF $rndVal = 0
+        LDR mask, =0x80808080
+    ENDIF
+
+    M_LOAD_XINT pSrc, srcStep, $offset, tmp1, tmp2, tmp3, tmp4
+YloopHalfPixelXRnd$rndVal.Offset$offset
+    SUBS        y, y, #1
+    M_EXT_XINT  $offset, tmp1, tmp2, tmp3, tmp4
+    M_UHADD8R   tmp5, tmp1, tmp3, (1-$rndVal), mask
+    M_UHADD8R   tmp6, tmp2, tmp4, (1-$rndVal), mask
+    STRD        tmp5, tmp6, [pDst], dstStep
+    M_LOAD_XINT pSrc, srcStep, $offset, tmp1, tmp2, tmp3, tmp4
+    BGT         YloopHalfPixelXRnd$rndVal.Offset$offset
+
+    B           SwitchPredictTypeEnd
+    MEND
+;// ***************************************************************************
+    MACRO
+    M_MCRECONBLOCK_HalfPixelY $rndVal, $offset
+    ;// Algorithmic Description:
+    ;// This handles motion compensation for HalfPixelY predictType. The two
+    ;// rounding cases are handled by the different code base and spanned by 
+    ;// different macro calls. PreLoading is used to avoid reload of same data. 
+    ;// 
+    ;// Filtering involves averaging a pixel with the next vertical pixel.
+    ;// M_LOAD_X generates 2 registers with all pixels in a row with 4 pixel in 
+    ;// each register. These are set of packed registers appropriate to do 
+    ;// 4 lane SIMD. After that M_UHADD8R macro does the averaging taking care 
+    ;// of the rounding as required
+    
+CaseHalfPixelYRnd$rndVal.Offset$offset
+    IF $rndVal = 0
+        LDR mask, =0x80808080
+    ENDIF
+
+    M_LOAD_X    pSrc, srcStep, tmp1, tmp2, tmp5, $offset ;// Pre-load
+YloopHalfPixelYRnd$rndVal.Offset$offset
+    SUBS        y, y, #2
+    ;// Processing one line
+    M_LOAD_X    pSrc, srcStep, tmp3, tmp4, tmp5, $offset
+    M_UHADD8R   tmp1, tmp1, tmp3, (1-$rndVal), mask
+    M_UHADD8R   tmp2, tmp2, tmp4, (1-$rndVal), mask
+    STRD        tmp1, tmp2, [pDst], dstStep
+    ;// Processing another line
+    M_LOAD_X    pSrc, srcStep, tmp1, tmp2, tmp5, $offset
+    M_UHADD8R   tmp3, tmp3, tmp1, (1-$rndVal), mask
+    M_UHADD8R   tmp4, tmp4, tmp2, (1-$rndVal), mask
+    STRD        tmp3, tmp4, [pDst], dstStep
+
+    BGT         YloopHalfPixelYRnd$rndVal.Offset$offset
+
+    B           SwitchPredictTypeEnd
+    MEND
+;// ***************************************************************************
+    MACRO
+    M_MCRECONBLOCK_HalfPixelXY $rndVal, $offset
+    ;// Algorithmic Description:
+    ;// This handles motion compensation for HalfPixelXY predictType. The two
+    ;// rounding cases are handled by the different code base and spanned by 
+    ;// different macro calls. PreLoading is used to avoid reload of same data. 
+    ;// 
+    ;// Filtering involves averaging a pixel with the next vertical, horizontal 
+    ;// and right-down diagonal pixels. Just as in HalfPixelX case, M_LOAD_XINT
+    ;// and M_EXT_XINT combination generates 4 registers with a row and its
+    ;// 1 pixel right shifted version, with 4 pixels in one register. Another 
+    ;// call of that macro-combination gets another row. Then M_HSUM_XOR is 
+    ;// called to get mutual half-sum and xor combinations of a row with its
+    ;// shifted version as they are inputs to the M_AVG4 macro which computes
+    ;// the 4 element average with rounding. Note that it is the half-sum/xor 
+    ;// values that are preserved for next row as they can be re-used in the 
+    ;// next call to the M_AVG4 and saves recomputation.
+    ;// Due to lack of register, the row counter and a masking value required 
+    ;// in M_AVG4 are packed into a single register yMask where the last nibble
+    ;// holds the row counter values and rest holds the masking variable left 
+    ;// shifted by 4
+    
+CaseHalfPixelXYRnd$rndVal.Offset$offset
+    LDR         yMask, =((0x01010101 << 4) + 8)
+
+    M_LOAD_XINT pSrc, srcStep, $offset, t00, t01, t10, t11 ;// Load a, a', b, b'
+    M_EXT_XINT  $offset, t00, t01, t10, t11
+    M_HSUM_XOR  t00, t10, tmp               ;// s0, l0
+    M_HSUM_XOR  t01, t11, tmp               ;// s0', l0'
+
+YloopHalfPixelXYRnd$rndVal.Offset$offset
+    ;// Processsing one line
+    ;// t00, t01, t10, t11 required from previous loop
+    M_LOAD_XINT pSrc, srcStep, $offset, t20, t21, t30, t31 ;// Load c, c', d, d'
+    SUB         yMask, yMask, #2
+    M_EXT_XINT  $offset, t20, t21, t30, t31
+    M_HSUM_XOR  t20, t30, tmp               ;// s1, l1
+    M_HSUM_XOR  t21, t31, tmp               ;// s1', l1'
+    M_AVG4      t00, t10, t20, t30, $rndVal ;// s0, l0, s1, l1
+    M_AVG4      t01, t11, t21, t31, $rndVal ;// s0', l0', s1', l1'
+    STRD        t00, t01, [pDst], dstStep   ;// store the average
+    
+    ;// Processsing another line
+    ;// t20, t21, t30, t31 required from above
+    M_LOAD_XINT pSrc, srcStep, $offset, t00, t01, t10, t11 ;// Load a, a', b, b'
+    TST         yMask, #7
+    M_EXT_XINT  $offset, t00, t01, t10, t11
+    M_HSUM_XOR  t00, t10, tmp
+    M_HSUM_XOR  t01, t11, tmp
+    M_AVG4      t20, t30, t00, t10, $rndVal
+    M_AVG4      t21, t31, t01, t11, $rndVal
+    STRD        t20, t21, [pDst], dstStep
+
+    BGT         YloopHalfPixelXYRnd$rndVal.Offset$offset
+
+    IF $offset/=3 :LOR: $rndVal/=1
+        B           SwitchPredictTypeEnd
+    ENDIF
+    MEND
+;// ***************************************************************************
+;// Motion compensation handler macros end here
+;// ***************************************************************************
+    ;// Description:
+    ;// Populates all 4 kinds of offsets "cases" for each predictType and rndVal
+    ;// combination in the "switch" to prediction processing code segment
+    ;//
+    ;// Syntax:
+    ;// M_CASE_OFFSET $rnd, $predictType
+    ;// 
+    ;// Inputs:
+    ;// $rnd            0 for rounding, 1 for no rounding
+    ;// $predictType    The prediction mode
+    ;// 
+    ;// Outputs:
+    ;// Populated list of "M_CASE"s for the "M_SWITCH" macro
+
+    MACRO
+    M_CASE_OFFSET $rnd, $predictType
+        M_CASE      Case$predictType.Rnd$rnd.Offset0
+        M_CASE      Case$predictType.Rnd$rnd.Offset1
+        M_CASE      Case$predictType.Rnd$rnd.Offset2
+        M_CASE      Case$predictType.Rnd$rnd.Offset3
+    MEND
+;// ***************************************************************************
+    ;// Description:
+    ;// Populates all 2 kinds of rounding "cases" for each predictType in the 
+    ;// "switch" to prediction processing code segment
+    ;//
+    ;// Syntax:
+    ;// M_CASE_OFFSET $predictType
+    ;// 
+    ;// Inputs:
+    ;// $predictType    The prediction mode
+    ;// 
+    ;// Outputs:
+    ;// Populated list of "M_CASE_OFFSET" macros
+
+    MACRO
+    M_CASE_MCRECONBLOCK $predictType
+        M_CASE_OFFSET  0, $predictType ;// 0 for rounding
+        M_CASE_OFFSET  1, $predictType ;// 1 for no rounding
+    MEND
+;// ***************************************************************************
+    ;// Description:
+    ;// Populates all 8 kinds of rounding and offset combinations handling macros 
+    ;// for the specified predictType. In case of "IntegerPixel" predictType, 
+    ;// rounding is not required so same code segment handles both cases
+    ;//
+    ;// Syntax:
+    ;// M_MCRECONBLOCK    $predictType
+    ;// 
+    ;// Inputs:
+    ;// $predictType    The prediction mode
+    ;// 
+    ;// Outputs:
+    ;// Populated list of "M_MCRECONBLOCK_<predictType>" macros for specified 
+    ;// predictType. Each 
+    ;//                 M_MCRECONBLOCK_<predictType> $rnd, $offset 
+    ;// is an code segment (starting with a label indicating the predictType, 
+    ;// rounding and offset combination)
+    ;// Four calls of this macro with the 4 prediction modes populate all the 32 
+    ;// handlers
+
+    MACRO
+    M_MCRECONBLOCK $predictType
+        M_MCRECONBLOCK_$predictType 0, 0
+        M_MCRECONBLOCK_$predictType 0, 1
+        M_MCRECONBLOCK_$predictType 0, 2
+        M_MCRECONBLOCK_$predictType 0, 3
+    IF "$predictType" /= "IntegerPixel" ;// If not IntegerPixel then rounding makes a difference
+        M_MCRECONBLOCK_$predictType 1, 0
+        M_MCRECONBLOCK_$predictType 1, 1
+        M_MCRECONBLOCK_$predictType 1, 2
+        M_MCRECONBLOCK_$predictType 1, 3
+    ENDIF
+    MEND
+;// ***************************************************************************
+;// Input/Output Registers
+pSrc                  RN 0
+srcStep               RN 1
+arg_pSrcResidue       RN 2
+pSrcResidue           RN 12
+pDst                  RN 3
+dstStep               RN 2
+predictType           RN 10
+rndVal                RN 11
+mask                  RN 11
+
+;// Local Scratch Registers
+zero                  RN 12
+y                     RN 14
+
+tmp1                  RN 4
+tmp2                  RN 5
+tmp3                  RN 6
+tmp4                  RN 7
+tmp5                  RN 8
+tmp6                  RN 9
+tmp7                  RN 10
+tmp8                  RN 11
+tmp9                  RN 12
+
+t00                   RN 4
+t01                   RN 5
+t10                   RN 6
+t11                   RN 7
+t20                   RN 8
+t21                   RN 9
+t30                   RN 10
+t31                   RN 11
+tmp                   RN 12
+
+yMask                 RN 14
+
+dst                   RN 1
+return                RN 0
+
+    ;// Allocate memory on stack
+    M_ALLOC4    Stk_pDst,           4
+    M_ALLOC4    Stk_pSrcResidue,    4
+    ;// Function header
+    M_START     omxVCM4P2_MCReconBlock, r11
+    ;// Define stack arguments
+    M_ARG       Arg_dstStep,        4
+    M_ARG       Arg_predictType,    4
+    M_ARG       Arg_rndVal,         4
+    ;// Save on stack
+    M_STR       pDst, Stk_pDst
+    M_STR       arg_pSrcResidue, Stk_pSrcResidue
+    ;// Load argument from the stack
+    M_LDR       dstStep, Arg_dstStep
+    M_LDR       predictType, Arg_predictType
+    M_LDR       rndVal, Arg_rndVal
+    
+    MOV         y, #8
+    
+    AND         tmp1, pSrc, #3
+    ORR         predictType, tmp1, predictType, LSL #3
+    ORR         predictType, predictType, rndVal, LSL #2
+    ;// Truncating source pointer to align to 4 byte location
+    BIC         pSrc, pSrc, #3
+
+    ;// Implementation takes care of all combinations of different 
+    ;// predictTypes, rounding cases and source pointer offsets to alignment 
+    ;// of 4 bytes in different code bases unless one of these parameter wasn't 
+    ;// making any difference to the implementation. Below M_CASE_MCRECONBLOCK
+    ;// macros branch into 8 M_CASE macros for all combinations of the 2 
+    ;// rounding cases and 4 offsets of the pSrc pointer to the 4 byte 
+    ;// alignment. 
+    M_SWITCH    predictType
+        M_CASE_MCRECONBLOCK IntegerPixel
+        M_CASE_MCRECONBLOCK HalfPixelX
+        M_CASE_MCRECONBLOCK HalfPixelY
+        M_CASE_MCRECONBLOCK HalfPixelXY
+    M_ENDSWITCH
+
+    ;// The M_MCRECONBLOCK macros populate the code bases by calling all 8 
+    ;// particular macros (4 in case of IntegerPixel as rounding makes no 
+    ;// difference there) to generate the code for all cases of rounding and 
+    ;// offsets. LTORG is used to segment the code as code size bloated beyond 
+    ;// 4KB.
+    M_MCRECONBLOCK IntegerPixel
+    M_MCRECONBLOCK HalfPixelX
+    LTORG
+    M_MCRECONBLOCK HalfPixelY
+    M_MCRECONBLOCK HalfPixelXY
+SwitchPredictTypeEnd
+
+    ;// Residue Addition
+    ;// This is done in 2 lane SIMD though loads are further optimized and
+    ;// 4 bytes are loaded in case of destination buffer. Algorithmic 
+    ;// details are in inlined comments
+    M_LDR       pSrcResidue, Stk_pSrcResidue
+    CMP         pSrcResidue, #0
+    BEQ         pSrcResidueConditionEnd
+pSrcResidueNotNull    
+    M_LDR       pDst, Stk_pDst
+    MOV         y, #8
+    SUB         dstStep, dstStep, #4
+Yloop_pSrcResidueNotNull
+    SUBS        y, y, #1
+    LDR         dst, [pDst]                ;// dst = [dcba]
+    LDMIA       pSrcResidue!, {tmp1, tmp2} ;// tmp1=[DC] tmp2=[BA]
+    PKHBT       tmp3, tmp1, tmp2, LSL #16  ;// Deltaval1 = [C A]
+    PKHTB       tmp4, tmp2, tmp1, ASR #16  ;// DeltaVal2 = [D B]
+    UXTB16      tmp1, dst                  ;// tmp1 = [0c0a]
+    UXTB16      tmp2, dst, ROR #8          ;// tmp2 = [0d0b]
+    QADD16      tmp1, tmp1, tmp3           ;// Add and saturate to 16 bits
+    QADD16      tmp2, tmp2, tmp4
+    USAT16      tmp1, #8, tmp1
+    USAT16      tmp2, #8, tmp2             ;// armClip(0, 255, tmp2)
+    ORR         tmp1, tmp1, tmp2, LSL #8   ;// tmp1 = [dcba]
+    STR         tmp1, [pDst], #4
+    
+    LDR         dst, [pDst]
+    LDMIA       pSrcResidue!, {tmp1, tmp2}
+    PKHBT       tmp3, tmp1, tmp2, LSL #16
+    PKHTB       tmp4, tmp2, tmp1, ASR #16
+    UXTB16      tmp1, dst
+    UXTB16      tmp2, dst, ROR #8
+    QADD16      tmp1, tmp1, tmp3
+    QADD16      tmp2, tmp2, tmp4
+    USAT16      tmp1, #8, tmp1
+    USAT16      tmp2, #8, tmp2
+    ORR         tmp1, tmp1, tmp2, LSL #8
+    STR         tmp1, [pDst], dstStep
+    
+    BGT         Yloop_pSrcResidueNotNull
+pSrcResidueConditionEnd
+
+    MOV         return, #OMX_Sts_NoErr
+
+    M_END
+    ENDIF ;// ARM1136JS
+
+;// ***************************************************************************
+;// CortexA8 implementation
+;// ***************************************************************************
+    END
+;// ***************************************************************************
+;// omxVCM4P2_MCReconBlock ends
+;// ***************************************************************************
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
new file mode 100644
index 0000000..213444a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
@@ -0,0 +1,283 @@
+; **********
+; * 
+; * File Name:  omxVCM4P2_PredictReconCoefIntra_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   9641
+; * Date:       Thursday, February 7, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; * 
+; * Description:
+; * Contains module for DC/AC coefficient prediction
+; *
+; * 
+; * Function: omxVCM4P2_PredictReconCoefIntra
+; *
+; * Description:
+; * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+; * to the function call, prediction direction (predDir) should be selected
+; * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in]  pSrcDst      pointer to the coefficient buffer which contains the 
+; *                    quantized coefficient residuals (PQF) of the current 
+; *                    block; must be aligned on a 4-byte boundary. The 
+; *                    output coefficients are saturated to the range 
+; *                    [-2048, 2047].
+; * [in]  pPredBufRow  pointer to the coefficient row buffer; must be aligned
+; *                    on a 4-byte boundary.
+; * [in]  pPredBufCol  pointer to the coefficient column buffer; must be 
+; *                    aligned on a 4-byte boundary.
+; * [in]  curQP        quantization parameter of the current block. curQP may 
+; *                    equal to predQP especially when the current block and 
+; *                    the predictor block are in the same macroblock.
+; * [in]  predQP       quantization parameter of the predictor block
+; * [in]  predDir      indicates the prediction direction which takes one
+; *                    of the following values:
+; *                    OMX_VIDEO_HORIZONTAL    predict horizontally
+; *                    OMX_VIDEO_VERTICAL        predict vertically
+; * [in]  ACPredFlag   a flag indicating if AC prediction should be
+; *                    performed. It is equal to ac_pred_flag in the bit
+; *                    stream syntax of MPEG-4
+; * [in]  videoComp    video component type (luminance, chrominance or
+; *                    alpha) of the current block
+; * [out] pSrcDst      pointer to the coefficient buffer which contains
+; *                    the quantized coefficients (QF) of the current
+; *                    block
+; * [out] pPredBufRow  pointer to the updated coefficient row buffer
+; * [out] pPredBufCol  pointer to the updated coefficient column buffer
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - Bad arguments 
+; * - At least one of the pointers is NULL: pSrcDst, pPredBufRow, or pPredBufCol.
+; * - At least one the following cases: curQP <= 0, predQP <= 0, curQP >31, 
+; *   predQP > 31, preDir exceeds [1,2].
+; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not 
+; *   4-byte aligned.
+; *
+; *********
+     
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+       M_VARIANTS ARM1136JS
+       
+             
+
+       IMPORT        armVCM4P2_Reciprocal_QP_S32
+       IMPORT        armVCM4P2_Reciprocal_QP_S16
+       IMPORT        armVCM4P2_DCScaler
+       
+
+
+        IF ARM1136JS
+
+
+;// Input Arguments
+
+pSrcDst          RN 0
+pPredBufRow      RN 1
+pPredBufCol      RN 2
+curQP            RN 3
+QP               RN 3
+predQP           RN 4
+predDir          RN 5
+ACPredFlag       RN 6
+videoComp        RN 7  
+
+;// Local Variables
+
+temp2            RN 5
+negCurQP         RN 7
+negdcScaler      RN 7
+tempPred         RN 8
+
+dcScaler         RN 4
+CoeffTable       RN 9
+absCoeffDC       RN 9
+temp3            RN 6
+absCoeffAC       RN 6
+
+shortVideoHeader RN 9
+predCoeffTable   RN 10
+Count            RN 10
+temp1            RN 12
+index            RN 12
+Rem              RN 14
+temp             RN 11
+Return           RN 0
+
+       
+
+       M_START   omxVCM4P2_PredictReconCoefIntra,r12
+       
+       ;// Assigning pointers to Input arguments on Stack
+    
+       M_ARG           predQPonStack,4  
+       M_ARG           predDironStack,4
+       M_ARG           ACPredFlagonStack,4
+       M_ARG           videoComponStack,4
+       
+       ;// DC Prediction
+
+       M_LDR           videoComp,videoComponStack                     ;// Load videoComp From Stack               
+       
+       M_LDR           predDir,predDironStack                         ;// Load Prediction direction
+       
+       ;// dcScaler Calculation
+
+       LDR             index, =armVCM4P2_DCScaler
+       ADD             index,index,videoComp,LSL #5
+       LDRB            dcScaler,[index,QP]
+           
+    
+calDCVal
+      
+       
+       LDR             predCoeffTable, =armVCM4P2_Reciprocal_QP_S16   ;// Loading the table with entries 32767/(1 to 63) 
+      
+       CMP             predDir,#2                                     ;// Check if the Prediction direction is vertical
+
+       ;// Caulucate temp pred by performing Division
+            
+       LDREQSH         absCoeffDC,[pPredBufRow]                       ;// If vetical load the coeff from Row Prediction Buffer
+       LDRNESH         absCoeffDC,[pPredBufCol]                       ;// If horizontal load the coeff from column Prediction Buffer
+       
+       RSB             negdcScaler,dcScaler,#0                        ;// negdcScaler=-dcScaler  
+       
+       MOV             temp1,absCoeffDC                               ;// temp1=prediction coeff
+       CMP             temp1,#0
+       RSBLT           absCoeffDC,temp1,#0                            ;//absCoeffDC=abs(temp1)
+       
+       ADD             temp,dcScaler,dcScaler
+       LDRH            temp,[predCoeffTable,temp]                     ;// Load value from coeff table for performing division using multiplication
+       
+       SMULBB          tempPred,temp,absCoeffDC                       ;// tempPred=pPredBufRow(Col)[0]*32767/dcScaler
+       ADD             temp3,dcScaler,#1
+       LSR             tempPred,tempPred,#15                          ;// tempPred=pPredBufRow(Col)[0]/dcScaler          
+       LSR             temp3,temp3,#1                                 ;// temp3=round(dcScaler/2)
+       
+       MLA             Rem,negdcScaler,tempPred,absCoeffDC            ;// Rem = pPredBufRow(Col)[0]-tempPred*dcScaler      
+       
+       
+       LDRH            temp,[pPredBufCol]
+       CMP             Rem,temp3                                      
+       ADDGE           tempPred,#1                                    ;// If Rem>=round(dcScaler/2);tempPred=tempPred+1
+       CMP             temp1,#0
+       RSBLT           tempPred,tempPred,#0                            ;/ if pPredBufRow(Col)[0]<0; tempPred=-tempPred
+             
+       
+       STRH            temp,[pPredBufRow,#-16]      
+
+       LDRH            temp,[pSrcDst]                                 ;// temp=pSrcDst[0]
+       M_LDR           ACPredFlag,ACPredFlagonStack
+       ADD             temp,temp,tempPred                             ;// temp=pSrcDst[0]+tempPred
+       SSAT16          temp,#12,temp                                  ;// clip temp to [-2048,2047]
+       
+       SMULBB          temp1,temp,dcScaler                            ;// temp1=clipped(pSrcDst[0])*dcScaler           
+       M_LDR           predQP,predQPonStack
+       STRH            temp,[pSrcDst]                                 
+       CMP             ACPredFlag,#1                                  ;// Check if the AC prediction flag is set or not
+       STRH            temp1,[pPredBufCol]                            ;// store temp1 to pPredBufCol
+ 
+       ;// AC Prediction
+
+              
+       BNE             Exit                                           ;// If not set Exit
+       
+       LDR             predCoeffTable, =armVCM4P2_Reciprocal_QP_S32   ;// Loading the table with entries 0x1ffff/(1 to 63)
+       MOV             temp1,#4
+       MUL             temp1,curQP,temp1
+       CMP             predDir,#2                                     ;// Check the Prediction direction
+       RSB             negCurQP,curQP,#0                                  
+       LDR             CoeffTable,[predCoeffTable,temp1]              ;// CoeffTable=0x1ffff/curQP
+       ADD             curQP,curQP,#1                                 ;// curQP=curQP+1
+       LSR             curQP,curQP,#1                                 ;// curQP=round(curQP/2)                
+       MOV             Count,#2                                       ;// Initializing the Loop Count
+       BNE             Horizontal                                     ;// If the Prediction direction is horizontal branch to Horizontal
+
+       
+
+loop1       
+       ;// Calculate tempPred
+       
+       LDRSH           absCoeffAC,[pPredBufRow,Count]                 ;// absCoeffAC=pPredBufRow[i], 1=<i<=7
+       MOV             temp1,absCoeffAC
+       CMP             temp1,#0                                       ;// compare pPredBufRow[i] with zero, 1=<i<=7
+       RSBLT           absCoeffAC,temp1,#0                            ;// absCoeffAC= abs(pPredBufRow[i])
+                                            
+       SMULBB          absCoeffAC,absCoeffAC,predQP                   ;// temp1=pPredBufRow[i]*predQP
+       MUL             tempPred,absCoeffAC,CoeffTable                 ;// tempPred=pPredBufRow[i]*predQP*0x1ffff/curQP
+       LSR             tempPred,tempPred,#17          
+             
+       MLA             Rem,negCurQP,tempPred,absCoeffAC               ;// Rem=abs(pPredBufRow[i])-tempPred*curQP
+       LDRH            temp,[pSrcDst,Count]                           ;// temp=pSrcDst[i],1<=i<8
+       
+       CMP             Rem,curQP
+       ADDGE           tempPred,#1                                    ;// if Rem>=round(curQP/2); tempPred=tempPred+1
+       CMP             temp1,#0
+       RSBLT           tempPred,tempPred,#0                           ;// if pPredBufRow[i]<0 ; tempPred=-tempPred
+              
+       ;// Update source and Row Prediction buffers
+       
+       ADD             temp,temp,tempPred                             ;// temp=tempPred+pSrcDst[i]
+       SSAT16          temp,#12,temp                                  ;// Clip temp to [-2048,2047]
+       STRH            temp,[pSrcDst,Count]
+       STRH            temp,[pPredBufRow,Count]                       ;// pPredBufRow[i]=temp
+       ADD             Count,Count,#2                                 ;// i=i+1
+       CMP             Count,#16                                      ;// compare if i=8
+       BLT             loop1
+       B               Exit                                           ;// Branch to exit
+
+Horizontal
+
+       MOV             Count,#16                                      ;// Initializing i=8
+
+loop2  
+     
+       LSR             temp2,Count,#3                                 ;// temp2=i>>3
+       
+       ;// Calculate tempPred
+       
+       LDRH            absCoeffAC,[pPredBufCol,temp2]                 ;// absCoefAC=pPredBufCol[i>>3]                       
+       MOV             temp1,absCoeffAC
+       CMP             temp1,#0                                       ;// compare pPredBufRow[i] with zero, 1=<i<=7
+       RSBLT           absCoeffAC,temp1,#0                            ;// absCoeffAC=abs(pPredBufCol[i>>3])
+                                      
+       SMULBB          absCoeffAC,absCoeffAC,predQP                   ;// temp1=pPredBufCol[i>>3]*predQP
+       MUL             tempPred,absCoeffAC,CoeffTable                 ;// tempPred=pPredBufCol[i>>3]*predQP*0x1ffff/curQP
+       LSR             tempPred,tempPred,#17                          ;// tempPred=pPredBufCol[i>>3]*predQP/curQP
+       
+       MLA             Rem,negCurQP,tempPred,absCoeffAC
+       LDRH            temp,[pSrcDst,Count]                           ;// temp=pSrcDst[i]
+       
+       CMP             Rem,curQP                                      ;// Compare Rem with round(curQP/2)
+       ADDGE           tempPred,#1                                    ;// tempPred=tempPred+1 if Rem>=round(curQP/2)
+       CMP             temp1,#0
+       RSBLT           tempPred,tempPred,#0                           ;// if pPredBufCol[i>>3 <0 tempPred=-tempPred
+       
+       ;// Update source and Row Prediction buffers
+       
+       ADD             temp,temp,tempPred                             ;// temp=pSrcDst[i]+tempPred
+       SSAT16          temp,#12,temp                                  ;// Clip temp to [-2048,2047]
+       STRH            temp,[pSrcDst,Count]                           ;// pSrcDst[0]= clipped value
+       STRH            temp,[pPredBufCol,temp2]                       ;// pPredBufCol[i>>3]=temp
+       ADD             Count,Count,#16                                ;// i=i+8
+       CMP             Count,#128                                     ;// compare i with 64
+       BLT             loop2
+
+             
+Exit
+  
+       MOV             Return,#OMX_Sts_NoErr 
+
+       M_END
+       ENDIF
+       END
+
+
+   
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
new file mode 100644
index 0000000..c9591cb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
@@ -0,0 +1,141 @@
+;/**
+; * 
+; * File Name:  omxVCM4P2_QuantInvInter_I_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   9641
+; * Date:       Thursday, February 7, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains modules for inter reconstruction
+; * 
+; *
+; *
+; *
+; *
+; * Function: omxVCM4P2_QuantInvInter_I
+; *
+; * Description:
+; * Performs inverse quantization on intra/inter coded block.
+; * This function supports bits_per_pixel = 8. Mismatch control
+; * is performed for the first MPEG-4 mode inverse quantization method.
+; * The output coefficients are clipped to the range: [-2048, 2047].
+; * Mismatch control is performed for the first inverse quantization method.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] pSrcDst          pointer to the input (quantized) intra/inter block. Must be 16-byte aligned.
+; * [in] QP              quantization parameter (quantiser_scale)
+; * [in] videoComp      (Intra version only.) Video component type of the
+; *                  current block. Takes one of the following flags:
+; *                  OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE,
+; *                  OMX_VC_ALPHA.
+; * [in] shortVideoHeader a flag indicating presence of short_video_header;
+; *                       shortVideoHeader==1 selects linear intra DC mode,
+; *                  and shortVideoHeader==0 selects nonlinear intra DC mode.
+; * [out]    pSrcDst      pointer to the output (dequantized) intra/inter block.  Must be 16-byte aligned.
+; *
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - bad arguments
+; *    - If pSrcDst is NULL or is not 16-byte aligned.
+; *      or
+; *    - If QP <= 0.
+; *      or
+; *    - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA.
+; *
+; */
+
+   INCLUDE omxtypes_s.h
+   INCLUDE armCOMM_s.h
+
+   M_VARIANTS ARM1136JS
+
+         
+
+     IF ARM1136JS
+
+;//Input Arguments
+pSrcDst            RN 0
+QP                 RN 1
+
+;//Local Variables
+Return             RN 0
+Count              RN 4      
+tempVal21          RN 2
+tempVal43          RN 3
+QP1                RN 5
+X2                 RN 6
+X3                 RN 14
+Result1            RN 8
+Result2            RN 9
+two                RN 7
+
+    M_START omxVCM4P2_QuantInvInter_I,r9
+       
+        MOV      Count,#64
+        TST      QP,#1
+        LDRD     tempVal21,[pSrcDst]      ;// Loads first two values of pSrcDst to tempVal21,
+                                          ;// next two values to tempVal43
+        SUBEQ    QP1,QP,#1                ;// QP1=QP if QP is odd , QP1=QP-1 if QP is even
+        MOVNE    QP1,QP
+        MOV      two,#2
+        
+        
+
+Loop
+        
+        
+        SMULBB   X2,tempVal21,two         ;// X2= first val(lower 16 bits of tampVal21)*2
+        CMP      X2,#0
+        
+        RSBLT    X2,X2,#0                 ;// X2=absoluteval(first val)
+        SMLABBNE X2,QP,X2,QP1             ;// X2=2*absval(first val)*QP+QP if QP is odd 
+                                          ;// X2=2*absval(first val)*QP+QP-1 if QP is even 
+        SMULTB   X3,tempVal21,two         ;// X3= second val(top 16 bits of tampVal21)*2
+        RSBLT    X2,X2,#0
+        
+        CMP      X3,#0
+               
+        RSBLT    X3,X3,#0
+        SMLABBNE X3,QP,X3,QP1
+        
+        RSBLT    X3,X3,#0
+        PKHBT    Result1,X2,X3,LSL #16    ;// Result1[0-15]=X2[0-15],Result1[16-31]=X3[16-31]
+        SMULBB   X2,tempVal43,two         ;// X2= first val(lower 16 bits of tampVal43)*2
+        SSAT16   Result1,#12,Result1      ;// clip to range [-2048,2047]
+        CMP      X2,#0
+       
+        
+               
+        RSBLE    X2,X2,#0
+        SMLABBNE X2,QP,X2,QP1
+        SMULTB   X3,tempVal43,two         ;// X2= first val(top 16 bits of tampVal21)*2
+        RSBLT    X2,X2,#0
+        CMP      X3,#0
+        
+        LDRD     tempVal21,[pSrcDst,#8]   ;// Load next four Values to tempVal21,tempVal43
+                
+        RSBLT    X3,X3,#0
+        SMLABBNE X3,QP,X3,QP1
+        RSBLT    X3,X3,#0
+        PKHBT    Result2,X2,X3,LSL #16    ;// Result2[0-15]=X2[0-15],Result2[16-31]=X3[0-15]
+        SSAT16   Result2,#12,Result2      ;// clip to range [-2048,2047]
+        
+        SUBS     Count,Count,#4           ;// Decrement Count by 4 and continue if it has not reached 0         
+        STRD     Result1,[pSrcDst],#8     ;// Store Double words and increment the pointer to point the next store address
+        
+        
+               
+        BGT      Loop
+        
+        MOV      Return,#OMX_Sts_NoErr
+        
+        M_END
+        ENDIF        
+        END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
new file mode 100644
index 0000000..6328e01
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
@@ -0,0 +1,188 @@
+;/**
+; * 
+; * File Name:  omxVCM4P2_QuantInvIntra_I_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   9641
+; * Date:       Thursday, February 7, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains modules for inter reconstruction
+; * 
+; *
+; *
+; *
+; *
+; * 
+; * Function: omxVCM4P2_QuantInvIntra_I
+; *
+; * Description:
+; * Performs inverse quantization on intra/inter coded block.
+; * This function supports bits_per_pixel = 8. Mismatch control
+; * is performed for the first MPEG-4 mode inverse quantization method.
+; * The output coefficients are clipped to the range: [-2048, 2047].
+; * Mismatch control is performed for the first inverse quantization method.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in]    pSrcDst        pointer to the input (quantized) intra/inter block. Must be 16-byte aligned.
+; * [in]    QP            quantization parameter (quantiser_scale)
+; * [in]    videoComp          (Intra version only.) Video component type of the
+; *                    current block. Takes one of the following flags:
+; *                    OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE,
+; *                    OMX_VC_ALPHA.
+; * [in]    shortVideoHeader  a flag indicating presence of short_video_header;
+; *                           shortVideoHeader==1 selects linear intra DC mode,
+; *                    and shortVideoHeader==0 selects nonlinear intra DC mode.
+; * [out]    pSrcDst        pointer to the output (dequantized) intra/inter block.  Must be 16-byte aligned.
+; *
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - bad arguments
+; *    -    If pSrcDst is NULL or is not 16-byte aligned.
+; *      or
+; *    - If QP <= 0.
+; *      or
+; *    - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA.
+; *
+ 
+
+   INCLUDE omxtypes_s.h
+   INCLUDE armCOMM_s.h
+   
+   M_VARIANTS ARM1136JS
+   
+   
+   IMPORT        armVCM4P2_DCScaler
+ 
+         
+
+     IF ARM1136JS
+
+;//Input Arguments
+pSrcDst            RN 0
+QP                 RN 1
+videoComp          RN 2
+shortVideoHeader   RN 3
+
+;//Local Variables
+Return             RN 0
+dcScaler           RN 4
+temp               RN 12
+index              RN 6
+      
+tempVal21          RN 4
+tempVal43          RN 5
+QP1                RN 6
+X2                 RN 7
+X3                 RN 14
+Result1            RN 8
+Result2            RN 9
+two                RN 10
+Count              RN 11
+
+
+
+   
+    M_START omxVCM4P2_QuantInvIntra_I,r11
+
+
+        
+        ;// Perform Inverse Quantization for DC coefficient
+
+        TEQ       shortVideoHeader,#0      ;// Test if short Video Header flag =0             
+        MOVNE     dcScaler,#8              ;// if shortVideoHeader is non zero dcScaler=8
+        BNE       calDCVal
+        LDR       index, =armVCM4P2_DCScaler
+      ADD       index,index,videoComp,LSL #5
+      LDRB      dcScaler,[index,QP]
+
+
+        ;//M_CalDCScalar  shortVideoHeader,videoComp, QP
+
+calDCVal
+
+        LDRH     temp,[pSrcDst]
+        SMULBB   temp,temp,dcScaler       ;// dcCoeff = dcScaler * Quantized DC coefficient(from memory)
+        SSAT     temp,#12,temp            ;// Saturating to 12 bits
+        
+
+        MOV      Count,#64
+        TST      QP,#1
+        LDRD     tempVal21,[pSrcDst]      ;// Loads first two values of pSrcDst to tempVal21,
+                                          ;// next two values to tempVal43
+        SUBEQ    QP1,QP,#1                ;// QP1=QP if QP is odd , QP1=QP-1 if QP is even
+        MOVNE    QP1,QP
+        MOV      two,#2
+
+
+                
+        
+
+Loop
+        
+        
+        SMULBB   X2,tempVal21,two         ;// X2= first val(lower 16 bits of tampVal21)*2
+        CMP      X2,#0
+        
+        RSBLT    X2,X2,#0                 ;// X2=absoluteval(first val)
+        SMLABBNE X2,QP,X2,QP1             ;// X2=2*absval(first val)*QP+QP if QP is odd 
+                                          ;// X2=2*absval(first val)*QP+QP-1 if QP is even 
+        SMULTB   X3,tempVal21,two         ;// X3= second val(top 16 bits of tampVal21)*2
+        RSBLT    X2,X2,#0
+        
+        CMP      X3,#0
+               
+        RSBLT    X3,X3,#0
+        SMLABBNE X3,QP,X3,QP1
+        
+        RSBLT    X3,X3,#0
+        PKHBT    Result1,X2,X3,LSL #16    ;// Result1[0-15]=X2[0-15],Result1[16-31]=X3[16-31]
+        SMULBB   X2,tempVal43,two         ;// X2= first val(lower 16 bits of tampVal43)*2
+        SSAT16   Result1,#12,Result1      ;// clip to range [-2048,2047]
+        CMP      X2,#0
+       
+        
+               
+        RSBLE    X2,X2,#0
+        SMLABBNE X2,QP,X2,QP1
+        SMULTB   X3,tempVal43,two         ;// X2= first val(top 16 bits of tampVal21)*2
+        RSBLT    X2,X2,#0
+        CMP      X3,#0
+        
+        LDRD     tempVal21,[pSrcDst,#8]   ;// Load next four Values to tempVal21,tempVal43
+                
+        RSBLT    X3,X3,#0
+        SMLABBNE X3,QP,X3,QP1
+        RSBLT    X3,X3,#0
+        PKHBT    Result2,X2,X3,LSL #16    ;// Result2[0-15]=X2[0-15],Result2[16-31]=X3[16-31]
+        SSAT16   Result2,#12,Result2      ;// clip to range [-2048,2047]
+        
+        SUBS     Count,Count,#4           ;// Decrement Count by 4 and continue if it has not reached 0         
+        STRD     Result1,[pSrcDst],#8     ;// Store Double words and increment the pointer to point the next store address
+        
+        
+               
+        BGT      Loop
+
+        SUB      pSrcDst,pSrcDst,#128
+        
+        ;// Storing the Inverse Quantized DC coefficient
+
+        STRH     temp,[pSrcDst],#2
+        
+  
+        
+        MOV      Return,#OMX_Sts_NoErr
+             
+        
+        
+        
+        M_END
+        ENDIF        
+        END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/src/armVC_Version.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/src/armVC_Version.c
new file mode 100644
index 0000000..5d93681
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/src/armVC_Version.c
@@ -0,0 +1,6 @@
+#include "omxtypes.h"
+#include "armCOMM_Version.h"
+
+#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS
+const char * const omxVC_VersionDescription = "ARM OpenMAX DL v" ARM_VERSION_STRING "   Rel=" OMX_ARM_RELEASE_TAG "   Arch=" OMX_ARM_BUILD_ARCHITECTURE "   Tools="  OMX_ARM_BUILD_TOOLCHAIN ;
+#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_DELIVERY.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_DELIVERY.TXT
new file mode 100755
index 0000000..cc2d70a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_DELIVERY.TXT
@@ -0,0 +1,63 @@
+The contents of this transaction was created by Hedley Francis
+of ARM on 19-Feb-2008.
+
+It contains the ARM data versions listed below.
+
+This data, unless otherwise stated, is ARM Proprietary and access to it
+is subject to the agreements indicated below.
+
+If you experience problems with this data, please contact ARM support
+quoting transaction reference <97414>.
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+- OX002-SW-98010-r0p0-00bet1
+  Video codecs - optimised code
+  V7 code release for Hantro (Ver 1.0.2)
+  internal access
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+This transaction contains deliverables which are designated as being of
+beta release status (BET).
+
+Beta release status has a particular meaning to ARM of which the recipient
+must be aware. Beta is a pre-release status indicating that the deliverable
+so described is believed to robustly demonstrate specified behaviour, to be
+consistent across its included aspects and be ready for general deployment.
+But Beta also indicates that pre-release reliability trials are ongoing and
+that it is possible residual defects or errors in operation, consistency
+and documentation may still be encountered. The recipient should consider
+this position when using this Beta material supplied. ARM will normally
+attempt to provide fixes or a work-around for defects identified by the
+recipient, but the provision or timeliness of this support cannot be
+guaranteed. ARM shall not be responsible for direct or consequential
+damages as a result of encountering one or more of these residual defects.
+By accepting a Beta release, the recipient agrees to these constraints and
+to providing reasonable information to ARM to enable the replication of the
+defects identified by the recipient. The specific Beta version supplied
+will not be supported after release of a later or higher status version.
+It should be noted that Support for the Beta release of the deliverable
+will only be provided by ARM to a recipient who has a current support and
+maintenance contract for the deliverable.
+
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+In addition to the data versions listed above, this transaction contains
+two additional files at the top level.
+
+The first is this file, ARM_DELIVERY_97414.TXT, which is the delivery
+note.
+
+The second is ARM_MANIFEST_97414.TXT which contains a manifest of all the
+files included in this transaction, together with their checksums.
+
+The checksums provided are calculated using the RSA Data Security, Inc.
+MD5 Message-Digest Algorithm.
+
+The checksums can be used to verify the integrity of this data using the
+"md5sum" tool (which is part of the GNU "textutils" package) by running:
+
+  % md5sum --check ARM_MANIFEST_97414.TXT
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_MANIFEST.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_MANIFEST.TXT
new file mode 100755
index 0000000..8310f67
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_MANIFEST.TXT
@@ -0,0 +1,91 @@
+				  OX002-SW-98010-r0p0-00bet1/
+				  OX002-SW-98010-r0p0-00bet1/api/
+e049791cfab6060a08cbac7b3ad767d6  OX002-SW-98010-r0p0-00bet1/api/armCOMM_s.h
+ed798face25497b2703ede736d6d52b6  OX002-SW-98010-r0p0-00bet1/api/omxtypes_s.h
+4eebd63af087376811d6749f0646b864  OX002-SW-98010-r0p0-00bet1/api/armCOMM_BitDec_s.h
+43cf46c2cf2fe1f93c615b57bcbe4809  OX002-SW-98010-r0p0-00bet1/api/armCOMM.h
+8f248ceaac8f602e277a521b679dcbbe  OX002-SW-98010-r0p0-00bet1/api/armCOMM_IDCTTable.h
+8ac5fa80ea98e391f5730a375280b5bd  OX002-SW-98010-r0p0-00bet1/api/armCOMM_Version.h
+3a2f420ddf6a1b950470bd0f5ebd5c62  OX002-SW-98010-r0p0-00bet1/api/armCOMM_IDCT_s.h
+511c0bb534fe223599e2c84eff24c9ed  OX002-SW-98010-r0p0-00bet1/api/armCOMM_MaskTable.h
+8971932d56eed6b1ad1ba507f0bff5f0  OX002-SW-98010-r0p0-00bet1/api/armCOMM_Bitstream.h
+f87fedd9ca432fefa757008176864ef8  OX002-SW-98010-r0p0-00bet1/api/armOMX.h
+8e49899a428822c36ef9dd94e0e05f18  OX002-SW-98010-r0p0-00bet1/api/omxtypes.h
+323008b72e9f04099a8cb42e99a1face  OX002-SW-98010-r0p0-00bet1/build_vc.pl
+e72d96c0a415459748df9807f3dae72f  OX002-SW-98010-r0p0-00bet1/filelist_vc.txt
+				  OX002-SW-98010-r0p0-00bet1/src/
+5eeae659a29477f5c52296d24afffd3c  OX002-SW-98010-r0p0-00bet1/src/armCOMM_IDCTTable.c
+d64cdcf38f7749dc7f77465e5b7d356d  OX002-SW-98010-r0p0-00bet1/src/armCOMM_MaskTable.c
+				  OX002-SW-98010-r0p0-00bet1/vc/
+				  OX002-SW-98010-r0p0-00bet1/vc/m4p10/
+				  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/
+e7e0c320978564a7c9b2c723749a98d6  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_CAVLCTables.c
+4adcd0df081990bdfc4729041a2a9152  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
+852e0404142965dc1f3aa7f00ee5127b  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
+7054151c5bfea6b5e74feee86b2d7b01  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+5f7213a4f37627b3c58f6294ba477e30  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DequantTables_s.s
+32ff4b8be62e2f0f3e764b83c1e5e2fd  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
+d066e3c81d82616f37ec1810ea49e7b7  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
+fe629a3e9d55395a6098bdf2431b5f02  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
+5b13fb954b7679de20076bb6a7f4ee1d  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
+01ba60eff66ea49a4f833ce6279f8e2f  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
+fa1072cf1d17e9666c9f1e215fa302b1  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
+db387b9e66d32787f47ef9cf0347da2a  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
+ea537e4e2ad03a1940981055fa3ace01  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
+29a4283885b9473a3550a81eff2559d2  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
+2ddcaf60a8ea1e6e6b77737f768bfb9d  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_QuantTables_s.s
+c3002aad5600f872b70a5d7fe3915846  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
+a2900f2c47f1c61d20bd6c1eda33d6d4  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
+c921df73397a32c947dc996ba6858553  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
+3769e14f2fc3f514d025fe6ab73ff67a  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
+c029d1cebea0a09e1d235a37e2155002  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
+076a033f8161750a685756f9f51f04c9  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
+c5b5d22842822e6e5e31094882cbeb46  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
+f6bdf6d914a4a1479f524951a3409846  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
+ebeb0713a9b2ea25986360ef262138c4  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
+78ed9ea200faa7be665445a713859af1  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
+c2d995f787b6f44ef10c751c12d1935f  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
+40bed679a9f6e0d3efe216b7d4a9cf45  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
+4a52b3e9e268b8a8f07829bf500d03af  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
+11249f8a98c5d4b84cb5575b0e37ca9c  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
+2513b60559ba71ae495c6053fb779fa9  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
+2fb1ee17c36e3c1469c170f6dac11bf1  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
+cc4a6f32db0b72a91d3f278f6855df69  OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+				  OX002-SW-98010-r0p0-00bet1/vc/m4p10/api/
+6e530ddaa7c2b57ffe88162c020cb662  OX002-SW-98010-r0p0-00bet1/vc/m4p10/api/armVCM4P10_CAVLCTables.h
+				  OX002-SW-98010-r0p0-00bet1/vc/m4p2/
+				  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/
+bec6de348b113438498867b869001622  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Clip8_s.s
+dba9824e959b21d401cac925e68a11a6  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
+dfa7e5b58027be3542dda0593b77b2d3  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
+4fba4c431a783a78a2eb6497a94ac967  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
+39991961179ca03b6381b6e653b1f14b  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
+1b0b2990c2669dfb87cf6b810611c01b  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
+1c9b87abf3283e957816b3937c680701  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
+4fe1afca659a9055fc1172e58f78a506  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
+2ea067f0436f91ba1351edaf411cb4ea  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
+6ce363aadc9d65c308b40cca8902e4f6  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
+bf212f786772aed2bc705d22ff4e74f5  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
+293a48a648a3085456e6665bb7366fad  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
+2bb47ed9c9e25c5709c6d9b4ad39a38a  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
+437dfa204508850d61d4b87091446e9f  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
+bc9778898dd41101dc0fb0139eaf83cc  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
+fc191eeae43f8ce735dbd311cc7bcb8d  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
+a0d85f4f517c945a4c9317ac021f2d08  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
+386020dee8b725c7fe2526f1fc211d7d  OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
+				  OX002-SW-98010-r0p0-00bet1/vc/m4p2/api/
+4624e7c838e10a249abcc3d3f4f40748  OX002-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
+65e1057d04e2cb844559dc9f6e09795a  OX002-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
+				  OX002-SW-98010-r0p0-00bet1/vc/src/
+e627b3346b0dc9aff14446005ce0fa43  OX002-SW-98010-r0p0-00bet1/vc/src/armVC_Version.c
+				  OX002-SW-98010-r0p0-00bet1/vc/api/
+7ca94b1c33ac0211e17d38baadd7d1dd  OX002-SW-98010-r0p0-00bet1/vc/api/armVC.h
+12cf7596edbbf6048b626d15e8d0ed48  OX002-SW-98010-r0p0-00bet1/vc/api/omxVC.h
+11726e286a81257cb45f5547fb4d374c  OX002-SW-98010-r0p0-00bet1/vc/api/omxVC_s.h
+a5b2af605c319cd2491319e430741377  OX002-SW-98010-r0p0-00bet1/vc/api/armVCCOMM_s.h
+				  OX002-SW-98010-r0p0-00bet1/vc/comm/
+				  OX002-SW-98010-r0p0-00bet1/vc/comm/src/
+1f81187b48487a8ea6dbc327648e3e4f  OX002-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy16x16_s.s
+936d3f2038a6f8613ec25e50cc601fe8  OX002-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy8x8_s.s
+8f6708a249130962e0bc5c044ac6dd93  OX002-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
+aab7713414428e95de0ba799a2679b36  ARM_DELIVERY_97414.TXT
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM.h
new file mode 100755
index 0000000..64c1958
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM.h
@@ -0,0 +1,785 @@
+/**
+ * 
+ * File Name:  armCOMM.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *   
+ * File: armCOMM.h
+ * Brief: Declares Common APIs/Data Types used across OpenMAX API's
+ *
+ */
+ 
+  
+#ifndef _armCommon_H_
+#define _armCommon_H_
+
+#include "omxtypes.h"
+
+typedef struct
+{
+  OMX_F32 Re; /** Real part */
+  OMX_F32 Im; /** Imaginary part */	
+        
+} OMX_FC32; /** single precision floating point complex number */
+
+typedef struct
+{
+  OMX_F64 Re; /** Real part */
+  OMX_F64 Im; /** Imaginary part */	
+        
+} OMX_FC64; /** double precision floating point complex number */
+
+
+/* Used by both IP and IC domains for 8x8 JPEG blocks. */
+typedef OMX_S16 ARM_BLOCK8x8[64];
+
+
+#include "armOMX.h"
+
+#define  armPI (OMX_F64)(3.1415926535897932384626433832795)
+
+/***********************************************************************/
+
+/* Compiler extensions */
+#ifdef ARM_DEBUG
+/* debug version */
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#define armError(str) {printf((str)); printf("\n"); exit(-1);}
+#define armWarn(str) {printf((str)); printf("\n");}
+#define armIgnore(a) ((void)a)
+#define armAssert(a) assert(a)
+#else 
+/* release version */
+#define armError(str) ((void) (str))
+#define armWarn(str)  ((void) (str))
+#define armIgnore(a)  ((void) (a))
+#define armAssert(a)  ((void) (a))
+#endif /* ARM_DEBUG */
+
+/* Arithmetic operations */
+
+#define armMin(a,b)             ( (a) > (b) ?  (b):(a) )
+#define armMax(a,b)             ( (a) > (b) ?  (a):(b) )
+#define armAbs(a)               ( (a) <  0  ? -(a):(a) )
+
+/* Alignment operation */
+
+#define armAlignToBytes(Ptr,N)      (Ptr + ( ((N-(int)Ptr)&(N-1)) / sizeof(*Ptr) ))
+#define armAlignTo2Bytes(Ptr)       armAlignToBytes(Ptr,2)
+#define armAlignTo4Bytes(Ptr)       armAlignToBytes(Ptr,4)
+#define armAlignTo8Bytes(Ptr)       armAlignToBytes(Ptr,8)
+#define armAlignTo16Bytes(Ptr)      armAlignToBytes(Ptr,16)
+
+/* Error and Alignment check */
+
+#define armRetArgErrIf(condition, code)  if(condition) { return (code); }
+#define armRetDataErrIf(condition, code) if(condition) { return (code); }
+
+#ifndef ALIGNMENT_DOESNT_MATTER
+#define armIsByteAligned(Ptr,N)     ((((int)(Ptr)) % N)==0)
+#define armNotByteAligned(Ptr,N)    ((((int)(Ptr)) % N)!=0)
+#else
+#define armIsByteAligned(Ptr,N)     (1)
+#define armNotByteAligned(Ptr,N)    (0)
+#endif
+
+#define armIs2ByteAligned(Ptr)      armIsByteAligned(Ptr,2)
+#define armIs4ByteAligned(Ptr)      armIsByteAligned(Ptr,4)
+#define armIs8ByteAligned(Ptr)      armIsByteAligned(Ptr,8)
+#define armIs16ByteAligned(Ptr)     armIsByteAligned(Ptr,16)
+
+#define armNot2ByteAligned(Ptr)     armNotByteAligned(Ptr,2)
+#define armNot4ByteAligned(Ptr)     armNotByteAligned(Ptr,4)
+#define armNot8ByteAligned(Ptr)     armNotByteAligned(Ptr,8)
+#define armNot16ByteAligned(Ptr)    armNotByteAligned(Ptr,16)
+#define armNot32ByteAligned(Ptr)    armNotByteAligned(Ptr,32)
+
+/**
+ * Function: armRoundFloatToS16_ref/armRoundFloatToS32_ref/armRoundFloatToS64
+ *
+ * Description:
+ * Converts a double precision value into a short int/int after rounding
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16/OMX_S32 format
+ *
+ */
+
+OMX_S16 armRoundFloatToS16 (OMX_F64 Value);
+OMX_S32 armRoundFloatToS32 (OMX_F64 Value);
+OMX_S64 armRoundFloatToS64 (OMX_F64 Value);
+
+/**
+ * Function: armSatRoundFloatToS16_ref/armSatRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a short int/int after rounding and saturation
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16/OMX_S32 format
+ *
+ */
+
+OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value);
+OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value);
+
+/**
+ * Function: armSatRoundFloatToU16_ref/armSatRoundFloatToU32
+ *
+ * Description:
+ * Converts a double precision value into a unsigned short int/int after rounding and saturation
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U16/OMX_U32 format
+ *
+ */
+
+OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value);
+OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value);
+
+/**
+ * Function: armSignCheck
+ *
+ * Description:
+ * Checks the sign of a variable:
+ * returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	    var     Variable to be checked
+ *
+ * Return Value:
+ * OMX_INT --   returns 1 if it is Positive
+ *              returns 0 if it is 0
+ *              returns -1 if it is Negative 
+ */ 
+ 
+OMX_INT armSignCheck (OMX_S16 var);
+
+/**
+ * Function: armClip
+ *
+ * Description: Clips the input between MAX and MIN value
+ * 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min     lower bound
+ * [in] Max     upper bound
+ * [in] src     variable to the clipped
+ *
+ * Return Value:
+ * OMX_S32 --   returns clipped value
+ */ 
+ 
+OMX_S32 armClip (
+        OMX_INT min,
+        OMX_INT max, 
+        OMX_S32 src
+        );
+
+/**
+ * Function: armClip_F32
+ *
+ * Description: Clips the input between MAX and MIN value
+ * 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min     lower bound
+ * [in] Max     upper bound
+ * [in] src     variable to the clipped
+ *
+ * Return Value:
+ * OMX_F32 --   returns clipped value
+ */ 
+ 
+OMX_F32 armClip_F32 (
+        OMX_F32 min,
+        OMX_F32 max, 
+        OMX_F32 src
+        );
+
+/**
+ * Function: armShiftSat_F32
+ *
+ * Description: Divides a float value by 2^shift and 
+ * saturates it for unsigned value range for satBits.
+ * Second parameter is like "shifting" the corresponding 
+ * integer value. Takes care of rounding while clipping the final 
+ * value.
+ *
+ * Parameters:
+ * [in] v          Number to be operated upon
+ * [in] shift      Divides the input "v" by "2^shift"
+ * [in] satBits    Final range is [0, 2^satBits)
+ *
+ * Return Value:
+ * OMX_S32 --   returns "shifted" saturated value
+ */ 
+ 
+OMX_U32 armShiftSat_F32(
+        OMX_F32 v, 
+        OMX_INT shift, 
+        OMX_INT satBits
+        );
+
+/**
+ * Functions: armSwapElem
+ *
+ * Description:
+ * This function swaps two elements at the specified pointer locations.
+ * The size of each element could be anything as specified by <elemSize>
+ *
+ * Return Value:
+ * OMXResult -- Error status from the function
+ */
+OMXResult armSwapElem(OMX_U8 *pBuf1, OMX_U8 *pBuf2, OMX_INT elemSize);
+
+
+/**
+ * Function: armMedianOf3
+ *
+ * Description: Finds the median of three numbers
+ * 
+ * Remarks:
+ *
+ * Parameters:
+ * [in] fEntry     First entry
+ * [in] sEntry     second entry
+ * [in] tEntry     Third entry
+ *
+ * Return Value:
+ * OMX_S32 --   returns the median value
+ */ 
+ 
+OMX_S32 armMedianOf3 (
+    OMX_S32 fEntry,
+    OMX_S32 sEntry, 
+    OMX_S32 tEntry 
+    );
+
+/**
+ * Function: armLogSize
+ *
+ * Description: Finds the size of a positive value and returns the same
+ * 
+ * Remarks:
+ *
+ * Parameters:
+ * [in] value    Positive value
+ *
+ * Return Value:
+ * OMX_U8 --   returns the size of the positive value
+ */ 
+ 
+OMX_U8 armLogSize (
+    OMX_U16 value 
+    );    
+
+/***********************************************************************/
+                /* Saturating Arithmetic operations */
+
+/**
+ * Function :armSatAdd_S32()
+ *
+ * Description :
+ *   Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ *
+ * Return:
+ * [out]             Result of operation
+ * 
+ *    
+ **/
+
+OMX_S32 armSatAdd_S32(
+                OMX_S32 Value1,
+                OMX_S32 Value2
+                );
+
+/**
+ * Function :armSatAdd_S64()
+ *
+ * Description :
+ *   Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ *
+ * Return:
+ * [out]             Result of operation
+ * 
+ *    
+ **/
+
+OMX_S64 armSatAdd_S64(
+                OMX_S64 Value1,
+                OMX_S64 Value2
+                );
+
+/** Function :armSatSub_S32()
+ * 
+ * Description :
+ *     Returns the result of saturated substraction of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ *
+ * Return:
+ * [out]             Result of operation
+ * 
+ **/
+
+OMX_S32 armSatSub_S32(
+                    OMX_S32 Value1,
+                    OMX_S32 Value2
+                    );
+
+/**
+ * Function :armSatMac_S32()
+ *
+ * Description :
+ *     Returns the result of Multiplication of Value1 and Value2 and subesquent saturated
+ *     accumulation with Mac
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ * [in] Mac          Accumulator
+ *
+ * Return:
+ * [out]             Result of operation
+ **/
+
+OMX_S32 armSatMac_S32(
+                    OMX_S32 Mac,
+                    OMX_S16 Value1,
+                    OMX_S16 Value2
+                    );
+
+/**
+ * Function :armSatMac_S16S32_S32
+ *
+ * Description :
+ *   Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac
+ *
+ *   mac = mac + Saturate_in_32Bits(delayElem * filTap)
+ *
+ * Parametrs:
+ * [in] delayElem    First 32 bit Operand
+ * [in] filTap       Second 16 bit Operand
+ * [in] mac          Result of MAC operation
+ *
+ * Return:
+ * [out]  mac        Result of operation
+ *    
+ **/
+ 
+OMX_S32 armSatMac_S16S32_S32(
+                        OMX_S32 mac, 
+                        OMX_S32 delayElem, 
+                        OMX_S16 filTap );
+
+/**
+ * Function :armSatRoundRightShift_S32_S16
+ *
+ * Description :
+ *   Returns the result of rounded right shift operation of input by the scalefactor
+ *
+ *   output = Saturate_in_16Bits( ( RightShift( (Round(input) , scaleFactor ) )
+ *
+ * Parametrs:
+ * [in] input       The input to be operated on
+ * [in] scaleFactor The shift number
+ *
+ * Return:
+ * [out]            Result of operation
+ *    
+ **/
+
+
+OMX_S16 armSatRoundRightShift_S32_S16(
+                        OMX_S32 input, 
+                        OMX_INT scaleFactor);
+
+/**
+ * Function :armSatRoundLeftShift_S32()
+ *
+ * Description :
+ *     Returns the result of saturating left-shift operation on input
+ *     Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value        Operand
+ * [in] shift        Operand for shift operation
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+ 
+OMX_S32 armSatRoundLeftShift_S32(
+                        OMX_S32 Value,
+                        OMX_INT shift
+                        );
+
+/**
+ * Function :armSatRoundLeftShift_S64()
+ *
+ * Description :
+ *     Returns the result of saturating left-shift operation on input
+ *     Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value        Operand
+ * [in] shift        Operand for shift operation
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+ 
+OMX_S64 armSatRoundLeftShift_S64(
+                        OMX_S64 Value,
+                        OMX_INT shift
+                        );
+
+/**
+ * Function :armSatMulS16S32_S32()
+ *
+ * Description :
+ *     Returns the result of a S16 data type multiplied with an S32 data type
+ *     in a S32 container
+ *
+ * Parametrs:
+ * [in] input1       Operand 1
+ * [in] input2       Operand 2
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+
+
+OMX_S32 armSatMulS16S32_S32(
+                    OMX_S16 input1,
+                    OMX_S32 input2);
+
+/**
+ * Function :armSatMulS32S32_S32()
+ *
+ * Description :
+ *     Returns the result of a S32 data type multiplied with an S32 data type
+ *     in a S32 container
+ *
+ * Parametrs:
+ * [in] input1       Operand 1
+ * [in] input2       Operand 2
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+
+OMX_S32 armSatMulS32S32_S32(
+                    OMX_S32 input1,
+                    OMX_S32 input2);
+
+
+/**
+ * Function :armIntDivAwayFromZero()
+ *
+ * Description : Integer division with rounding to the nearest integer. 
+ *               Half-integer values are rounded away from zero
+ *               unless otherwise specified. For example 3//2 is rounded 
+ *               to 2, and -3//2 is rounded to -2.
+ *
+ * Parametrs:
+ * [in] Num        Operand 1
+ * [in] Deno       Operand 2
+ *
+ * Return:
+ * [out]             Result of operation input1//input2
+ *    
+ **/
+
+OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno);
+
+
+/***********************************************************************/
+/*
+ * Debugging macros
+ *
+ */
+
+
+/*
+ * Definition of output stream - change to stderr if necessary
+ */
+#define DEBUG_STREAM stdout
+
+/*
+ * Debug printf macros, one for each argument count.
+ * Add more if needed.
+ */
+#ifdef DEBUG_ON
+#include <stdio.h>
+
+#define DEBUG_PRINTF_0(a)                                               fprintf(DEBUG_STREAM, a)
+#define DEBUG_PRINTF_1(a, b)                                            fprintf(DEBUG_STREAM, a, b)
+#define DEBUG_PRINTF_2(a, b, c)                                         fprintf(DEBUG_STREAM, a, b, c)
+#define DEBUG_PRINTF_3(a, b, c, d)                                      fprintf(DEBUG_STREAM, a, b, c, d)
+#define DEBUG_PRINTF_4(a, b, c, d, e)                                   fprintf(DEBUG_STREAM, a, b, c, d, e)
+#define DEBUG_PRINTF_5(a, b, c, d, e, f)                                fprintf(DEBUG_STREAM, a, b, c, d, e, f)
+#define DEBUG_PRINTF_6(a, b, c, d, e, f, g)                             fprintf(DEBUG_STREAM, a, b, c, d, e, f, g)
+#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h)                          fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h)
+#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i)                       fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i)
+#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j)                    fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j)
+#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k)                fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k)
+#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l)             fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l)
+#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m)          fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m)
+#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n)       fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n)
+#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)    fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
+#else /* DEBUG_ON */
+#define DEBUG_PRINTF_0(a)                                  
+#define DEBUG_PRINTF_1(a, b)                               
+#define DEBUG_PRINTF_2(a, b, c)                            
+#define DEBUG_PRINTF_3(a, b, c, d)                         
+#define DEBUG_PRINTF_4(a, b, c, d, e)                      
+#define DEBUG_PRINTF_5(a, b, c, d, e, f)                   
+#define DEBUG_PRINTF_6(a, b, c, d, e, f, g)                
+#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h)             
+#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i)          
+#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j)       
+#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k)    
+#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l)             
+#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m)          
+#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n)      
+#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)   
+#endif /* DEBUG_ON */
+
+
+/*
+ * Domain and sub domain definitions
+ *
+ * In order to turn on debug for an entire domain or sub-domain
+ * at compile time, one of the DEBUG_DOMAIN_* below may be defined,
+ * which will activate debug in all of the defines it contains.
+ */
+
+#ifdef DEBUG_DOMAIN_AC
+#define DEBUG_OMXACAAC_DECODECHANPAIRELT_MPEG4
+#define DEBUG_OMXACAAC_DECODECHANPAIRELT
+#define DEBUG_OMXACAAC_DECODEDATSTRELT
+#define DEBUG_OMXACAAC_DECODEFILLELT
+#define DEBUG_OMXACAAC_DECODEISSTEREO_S32
+#define DEBUG_OMXACAAC_DECODEMSPNS_S32
+#define DEBUG_OMXACAAC_DECODEMSSTEREO_S32_I
+#define DEBUG_OMXACAAC_DECODEPRGCFGELT
+#define DEBUG_OMXACAAC_DECODETNS_S32_I
+#define DEBUG_OMXACAAC_DEINTERLEAVESPECTRUM_S32
+#define DEBUG_OMXACAAC_ENCODETNS_S32_I
+#define DEBUG_OMXACAAC_LONGTERMPREDICT_S32
+#define DEBUG_OMXACAAC_LONGTERMRECONSTRUCT_S32
+#define DEBUG_OMXACAAC_MDCTFWD_S32
+#define DEBUG_OMXACAAC_MDCTINV_S32_S16
+#define DEBUG_OMXACAAC_NOISELESSDECODE
+#define DEBUG_OMXACAAC_QUANTINV_S32_I
+#define DEBUG_OMXACAAC_UNPACKADIFHEADER
+#define DEBUG_OMXACAAC_UNPACKADTSFRAMEHEADER
+#define DEBUG_OMXACMP3_HUFFMANDECODESFBMBP_S32
+#define DEBUG_OMXACMP3_HUFFMANDECODESFB_S32
+#define DEBUG_OMXACMP3_HUFFMANDECODE_S32
+#define DEBUG_OMXACMP3_MDCTINV_S32
+#define DEBUG_OMXACMP3_REQUANTIZESFB_S32_I
+#define DEBUG_OMXACMP3_REQUANTIZE_S32_I
+#define DEBUG_OMXACMP3_SYNTHPQMF_S32_S16
+#define DEBUG_OMXACMP3_UNPACKFRAMEHEADER
+#define DEBUG_OMXACMP3_UNPACKSCALEFACTORS_S8
+#define DEBUG_OMXACMP3_UNPACKSIDEINFO
+#endif /* DEBUG_DOMAIN_AC */
+
+
+#ifdef DEBUG_DOMAIN_VC
+#define DEBUG_OMXVCM4P10_AVERAGE_16X
+#define DEBUG_OMXVCM4P10_AVERAGE_4X
+#define DEBUG_OMXVCM4P10_AVERAGE_8X
+#define DEBUG_OMXVCM4P10_DEBLOCKCHROMA_U8_C1IR
+#define DEBUG_OMXVCM4P10_DEBLOCKLUMA_U8_C1IR
+#define DEBUG_OMXVCM4P10_DECODECHROMADCCOEFFSTOPAIRCAVLC_U8
+#define DEBUG_OMXVCM4P10_DECODECOEFFSTOPAIRCAVLC_U8
+#define DEBUG_OMXVCM4P10_DEQUANTTRANSFORMACFROMPAIR_U8_S16_C1_DLX
+#define DEBUG_OMXVCM4P10_EXPANDFRAME
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_HOREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_VEREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_HOREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_VEREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_PREDICTINTRACHROMA8X8_U8_C1R
+#define DEBUG_OMXVCM4P10_PREDICTINTRA_16X16_U8_C1R
+#define DEBUG_OMXVCM4P10_PREDICTINTRA_4X4_U8_C1R
+#define DEBUG_OMXVCM4P10_SADQUAR_16X
+#define DEBUG_OMXVCM4P10_SADQUAR_4X
+#define DEBUG_OMXVCM4P10_SADQUAR_8X
+#define DEBUG_OMXVCM4P10_SAD_16X
+#define DEBUG_OMXVCM4P10_SAD_4X
+#define DEBUG_OMXVCM4P10_SAD_8X
+#define DEBUG_OMXVCM4P10_SATD_4X4
+#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTCHROMADCFROMPAIR_U8_S16_C1
+#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTLUMADCFROMPAIR_U8_S16_C1
+#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_CHROMADC
+#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_LUMADC
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_16X16
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_8X8
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_16X16
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_8X8
+#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_SAD_U8_S16
+#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_U8_S16
+#define DEBUG_OMXVCM4P2_DCT8X8BLKDLX
+#define DEBUG_OMXVCM4P2_DECODEBLOCKCOEF_INTER_S16
+#define DEBUG_OMXVCM4P2_DECODEPADMV_PVOP
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTER_S16
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRAACVLC_S16
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRADCVLC_S16
+#define DEBUG_OMXVCM4P2_ENCODEMV_U8_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTER_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRAACVLC_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRADCVLC_S16
+#define DEBUG_OMXVCM4P2_FINDMVPRED
+#define DEBUG_OMXVCM4P2_IDCT8X8BLKDLX
+#define DEBUG_OMXVCM4P2_LIMITMVTORECT
+#define DEBUG_OMXVCM4P2_MOTIONESTIMATIONMB
+#define DEBUG_OMXVCM4P2_PADMBGRAY_U8
+#define DEBUG_OMXVCM4P2_PADMBHORIZONTAL_U8
+#define DEBUG_OMXVCM4P2_PADMBVERTICAL_U8
+#define DEBUG_OMXVCM4P2_PADMV
+#define DEBUG_OMXVCM4P2_QUANTINTER_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINTRA_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINVINTER_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINVINTRA_S16_I
+#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTER
+#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTRA
+#endif /* DEBUG_DOMAIN_VC */
+
+
+#ifdef DEBUG_DOMAIN_IC
+/* To be filled in */
+#endif /* DEBUG_DOMAIN_IC */
+
+
+#ifdef DEBUG_DOMAIN_SP
+#define DEBUG_OMXACSP_DOTPROD_S16
+#define DEBUG_OMXACSP_BLOCKEXP_S16
+#define DEBUG_OMXACSP_BLOCKEXP_S32
+#define DEBUG_OMXACSP_COPY_S16
+#define DEBUG_OMXACSP_DOTPROD_S16
+#define DEBUG_OMXACSP_DOTPROD_S16_SFS
+#define DEBUG_OMXACSP_FFTFWD_CTOC_SC16_SFS
+#define DEBUG_OMXACSP_FFTFWD_CTOC_SC32_SFS
+#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S16S32_SFS
+#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S32_SFS
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC16
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC32
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S16_S32
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S32
+#define DEBUG_OMXACSP_FFTINIT_C_SC16
+#define DEBUG_OMXACSP_FFTINIT_C_SC32
+#define DEBUG_OMXACSP_FFTINIT_R_S16_S32
+#define DEBUG_OMXACSP_FFTINIT_R_S32
+#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32S16_SFS
+#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32_SFS
+#define DEBUG_OMXACSP_FFTINV_CTOC_SC16_SFS
+#define DEBUG_OMXACSP_FFTINV_CTOC_SC32_SFS
+#define DEBUG_OMXACSP_FILTERMEDIAN_S32_I
+#define DEBUG_OMXACSP_FILTERMEDIAN_S32
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_ISFS
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_I
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_SFS
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_ISFS
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_I
+#define DEBUG_OMXACSP_FIR_DIRECT_S16
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_SFS
+#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16_I
+#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16
+#define DEBUG_OMXACSP_IIRONE_DIRECT_S16_I
+#define DEBUG_OMXACSP_IIRONE_DIRECT_S16
+#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16_I
+#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16
+#define DEBUG_OMXACSP_IIR_DIRECT_S16_I
+#define DEBUG_OMXACSP_IIR_DIRECT_S16
+#endif /* DEBUG_DOMAIN_SP */
+
+
+#ifdef DEBUG_DOMAIN_IP
+#define DEBUG_OMXIPBM_ADDC_U8_C1R_SFS
+#define DEBUG_OMXIPBM_COPY_U8_C1R
+#define DEBUG_OMXIPBM_COPY_U8_C3R
+#define DEBUG_OMXIPBM_MIRROR_U8_C1R
+#define DEBUG_OMXIPBM_MULC_U8_C1R_SFS
+#define DEBUG_OMXIPCS_COLORTWISTQ14_U8_C3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR420LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR422LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR444LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR420LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR422LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR444LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_YCBCR420RSZROT_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR420TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR420TORGB565_U8_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR420TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422RSZCSCROTRGB_U8_C2R
+#define DEBUG_OMXIPCS_YCBCR422RSZROT_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB565_U8_U16_C2C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB_U8_C2C3R
+#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_C2P3R
+#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR444TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR444TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB_U8_C3R
+#define DEBUG_OMXIPPP_GETCENTRALMOMENT_S64
+#define DEBUG_OMXIPPP_GETSPATIALMOMENT_S64
+#define DEBUG_OMXIPPP_MOMENTGETSTATESIZE_S64
+#define DEBUG_OMXIPPP_MOMENTINIT_S64
+#define DEBUG_OMXIPPP_MOMENTS64S_U8_C1R
+#define DEBUG_OMXIPPP_MOMENTS64S_U8_C3R
+#endif /* DEBUG_DOMAIN_IP */
+
+
+#endif /* _armCommon_H_ */
+
+/*End of File*/
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_BitDec_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_BitDec_s.h
new file mode 100755
index 0000000..c738f72
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_BitDec_s.h
@@ -0,0 +1,670 @@
+;//
+;// 
+;// File Name:  armCOMM_BitDec_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;// 
+;// OpenMAX optimized bitstream decode module
+;//
+;// You must include armCOMM_s.h before including this file
+;//
+;// This module provides macros to perform assembly optimized fixed and
+;// variable length decoding from a read-only bitstream. The variable
+;// length decode modules take as input a pointer to a table of 16-bit
+;// entries of the following format.
+;//
+;// VLD Table Entry format
+;//
+;//        15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
+;//       +------------------------------------------------+
+;//       |  Len   |               Symbol              | 1 |
+;//       +------------------------------------------------+
+;//       |                Offset                      | 0 |
+;//       +------------------------------------------------+
+;//
+;// If the table entry is a leaf entry then bit 0 set:
+;//    Len    = Number of bits overread (0 to 7)
+;//    Symbol = Symbol payload (unsigned 12 bits)
+;//
+;// If the table entry is an internal node then bit 0 is clear:
+;//    Offset = Number of (16-bit) half words from the table
+;//             start to the next table node
+;//
+;// The table is accessed by successive lookup up on the
+;// next Step bits of the input bitstream until a leaf node
+;// is obtained. The Step sizes are supplied to the VLD macro.
+;//
+;// USAGE:
+;//
+;// To use any of the macros in this package, first call:
+;//
+;//    M_BD_INIT ppBitStream, pBitOffset, pBitStream, RBitBuffer, RBitCount, Tmp
+;//
+;// This caches the current bitstream position and next available
+;// bits in registers pBitStream, RBitBuffer, RBitCount. These registers
+;// are reserved for use by the bitstream decode package until you
+;// call M_BD_FINI.
+;//
+;// Next call the following macro(s) as many times as you need:
+;//
+;//    M_BD_LOOK8       - Look ahead constant 1<=N<=8  bits into the bitstream
+;//    M_BD_LOOK16      - Look ahead constant 1<=N<=16 bits into the bitstream
+;//    M_BD_READ8       - Read constant 1<=N<=8  bits from the bitstream
+;//    M_BD_READ16      - Read constant 1<=N<=16 bits from the bitstream
+;//    M_BD_VREAD8      - Read variable 1<=N<=8  bits from the bitstream
+;//    M_BD_VREAD16     - Read variable 1<=N<=16 bits from the bitstream
+;//    M_BD_VLD         - Perform variable length decode using lookup table
+;//
+;// Finally call the macro:
+;//
+;//    M_BD_FINI ppBitStream, pBitOffset
+;//
+;// This writes the bitstream state back to memory.
+;//
+;// The three bitstream cache register names are assigned to the following global
+;// variables:
+;//
+
+        GBLS    pBitStream  ;// Register name for pBitStream
+        GBLS    BitBuffer   ;// Register name for BitBuffer
+        GBLS    BitCount    ;// Register name for BitCount
+   
+;//        
+;// These register variables must have a certain defined state on entry to every bitstream
+;// macro (except M_BD_INIT) and on exit from every bitstream macro (except M_BD_FINI).
+;// The state may depend on implementation.
+;//
+;// For the default (ARM11) implementation the following hold:
+;//    pBitStream - points to the first byte not held in the BitBuffer
+;//    BitBuffer  - is a cache of (4 bytes) 32 bits, bit 31 the first bit
+;//    BitCount   - is offset (from the top bit) to the next unused bitstream bit
+;//    0<=BitCount<=15 (so BitBuffer holds at least 17 unused bits)
+;//
+;//
+
+        ;// Bitstream Decode initialise
+        ;//
+        ;// Initialises the bitstream decode global registers from
+        ;// bitstream pointers. This macro is split into 3 parts to enable
+        ;// scheduling.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $ppBitStream    - pointer to pointer to the next bitstream byte
+        ;// $pBitOffset     - pointer to the number of bits used in the current byte (0..7)
+        ;// $RBitStream     - register to use for pBitStream (can be $ppBitStream)
+        ;// $RBitBuffer     - register to use for BitBuffer
+        ;// $RBitCount      - register to use for BitCount   (can be $pBitOffset)
+        ;//
+        ;// Output Registers:
+        ;//
+        ;// $T1,$T2,$T3     - registers that must be preserved between calls to
+        ;//                   M_BD_INIT1 and M_BD_INIT2
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_INIT0  $ppBitStream, $pBitOffset, $RBitStream, $RBitBuffer, $RBitCount
+
+pBitStream  SETS "$RBitStream"
+BitBuffer   SETS "$RBitBuffer"
+BitCount    SETS "$RBitCount"        
+        
+        ;// load inputs
+        LDR     $pBitStream, [$ppBitStream]
+        LDR     $BitCount, [$pBitOffset]
+        MEND
+        
+        MACRO
+        M_BD_INIT1  $T1, $T2, $T3
+        LDRB    $T2, [$pBitStream, #2]
+        LDRB    $T1, [$pBitStream, #1]
+        LDRB    $BitBuffer,  [$pBitStream], #3
+        ADD     $BitCount, $BitCount, #8
+        MEND
+        
+        MACRO
+        M_BD_INIT2  $T1, $T2, $T3
+        ORR     $T2, $T2, $T1, LSL #8
+        ORR     $BitBuffer, $T2, $BitBuffer, LSL #16
+        MEND    
+        
+        ;//
+        ;// Look ahead fixed 1<=N<=8 bits without consuming any bits
+        ;// The next bits will be placed at bit 31..24 of destination register
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $N              - number of bits to look
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $Symbol         - the next N bits of the bitstream
+        ;// $T1             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_LOOK8  $Symbol, $N
+        ASSERT  ($N>=1):LAND:($N<=8)
+        MOV     $Symbol, $BitBuffer, LSL $BitCount
+        MEND
+        
+        ;//
+        ;// Look ahead fixed 1<=N<=16 bits without consuming any bits
+        ;// The next bits will be placed at bit 31..16 of destination register
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $N              - number of bits to look
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $Symbol         - the next N bits of the bitstream
+        ;// $T1             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_LOOK16  $Symbol, $N, $T1
+        ASSERT  ($N >= 1):LAND:($N <= 16)
+        MOV     $Symbol, $BitBuffer, LSL $BitCount
+        MEND
+        
+        ;//
+        ;// Skips fixed 1<=N<=8 bits from the bitstream, advancing the bitstream pointer
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $N              - number of bits
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $T1             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_SKIP8 $N, $T1
+        ASSERT  ($N>=1):LAND:($N<=8)        
+        SUBS    $BitCount, $BitCount, #(8-$N)
+        LDRCSB  $T1, [$pBitStream], #1   
+        ADDCC   $BitCount, $BitCount, #8
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        MEND
+        
+        
+        ;//
+        ;// Read fixed 1<=N<=8 bits from the bitstream, advancing the bitstream pointer
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $N              - number of bits to read
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $Symbol         - the next N bits of the bitstream
+        ;// $T1             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_READ8 $Symbol, $N, $T1
+        ASSERT  ($N>=1):LAND:($N<=8)                
+        MOVS    $Symbol, $BitBuffer, LSL $BitCount        
+        SUBS    $BitCount, $BitCount, #(8-$N)
+        LDRCSB  $T1, [$pBitStream], #1   
+        ADDCC   $BitCount, $BitCount, #8
+        MOV     $Symbol, $Symbol, LSR #(32-$N)
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        MEND
+
+        ;//
+        ;// Read fixed 1<=N<=16 bits from the bitstream, advancing the bitstream pointer
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $N              - number of bits to read
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $Symbol         - the next N bits of the bitstream
+        ;// $T1             - corrupted temp/scratch register
+        ;// $T2             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_READ16 $Symbol, $N, $T1, $T2
+        ASSERT  ($N>=1):LAND:($N<=16)
+        ASSERT  $Symbol<>$T1
+        IF ($N<=8)
+            M_BD_READ8  $Symbol, $N, $T1
+        ELSE        
+            ;// N>8 so we will be able to refill at least one byte            
+            LDRB    $T1, [$pBitStream], #1            
+            MOVS    $Symbol, $BitBuffer, LSL $BitCount
+            ORR     $BitBuffer, $T1, $BitBuffer, LSL #8                       
+            SUBS    $BitCount, $BitCount, #(16-$N)
+            LDRCSB  $T1, [$pBitStream], #1            
+            MOV     $Symbol, $Symbol, LSR #(32-$N)
+            ADDCC   $BitCount, $BitCount, #8
+            ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        ENDIF
+        MEND
+        
+        ;//
+        ;// Skip variable 1<=N<=8 bits from the bitstream, advancing the bitstream pointer.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $N              - number of bits. 1<=N<=8
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $T1             - corrupted temp/scratch register
+        ;// $T2             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_VSKIP8 $N, $T1
+        ADD     $BitCount, $BitCount, $N
+        SUBS    $BitCount, $BitCount, #8
+        LDRCSB  $T1, [$pBitStream], #1        
+        ADDCC   $BitCount, $BitCount, #8
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        MEND        
+        
+        ;//
+        ;// Skip variable 1<=N<=16 bits from the bitstream, advancing the bitstream pointer.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $N              - number of bits. 1<=N<=16
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $T1             - corrupted temp/scratch register
+        ;// $T2             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_VSKIP16 $N, $T1, $T2
+        ADD     $BitCount, $BitCount, $N
+        SUBS    $BitCount, $BitCount, #8
+        LDRCSB  $T1, [$pBitStream], #1        
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        SUBCSS  $BitCount, $BitCount, #8        
+        LDRCSB  $T1, [$pBitStream], #1
+        ADDCC   $BitCount, $BitCount, #8
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        MEND        
+
+        ;//
+        ;// Read variable 1<=N<=8 bits from the bitstream, advancing the bitstream pointer.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $N              - number of bits to read. 1<=N<=8
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $Symbol         - the next N bits of the bitstream
+        ;// $T1             - corrupted temp/scratch register
+        ;// $T2             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_VREAD8 $Symbol, $N, $T1, $T2
+        MOV     $Symbol, $BitBuffer, LSL $BitCount        
+        ADD     $BitCount, $BitCount, $N
+        SUBS    $BitCount, $BitCount, #8
+        LDRCSB  $T1, [$pBitStream], #1        
+        RSB     $T2, $N, #32        
+        ADDCC   $BitCount, $BitCount, #8
+        MOV     $Symbol, $Symbol, LSR $T2
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        MEND
+
+
+        ;//
+        ;// Read variable 1<=N<=16 bits from the bitstream, advancing the bitstream pointer.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $N              - number of bits to read. 1<=N<=16
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $Symbol         - the next N bits of the bitstream
+        ;// $T1             - corrupted temp/scratch register
+        ;// $T2             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_VREAD16 $Symbol, $N, $T1, $T2
+        MOV     $Symbol, $BitBuffer, LSL $BitCount        
+        ADD     $BitCount, $BitCount, $N
+        SUBS    $BitCount, $BitCount, #8
+        LDRCSB  $T1, [$pBitStream], #1        
+        RSB     $T2, $N, #32        
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        SUBCSS  $BitCount, $BitCount, #8        
+        LDRCSB  $T1, [$pBitStream], #1
+        ADDCC   $BitCount, $BitCount, #8
+        MOV     $Symbol, $Symbol, LSR $T2
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        MEND
+
+
+        ;//
+        ;// Decode a code of the form 0000...001 where there
+        ;// are N zeros before the 1 and N<=15 (code length<=16)
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $Symbol         - the number of zeros before the next 1
+        ;//                   >=16 is an illegal code
+        ;// $T1             - corrupted temp/scratch register
+        ;// $T2             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//        
+        MACRO
+        M_BD_CLZ16 $Symbol, $T1, $T2
+        MOVS    $Symbol, $BitBuffer, LSL $BitCount
+        CLZ     $Symbol, $Symbol                
+        ADD     $BitCount, $BitCount, $Symbol
+        SUBS    $BitCount, $BitCount, #7        ;// length is Symbol+1
+        LDRCSB  $T1, [$pBitStream], #1
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        SUBCSS  $BitCount, $BitCount, #8        
+        LDRCSB  $T1, [$pBitStream], #1
+        ADDCC   $BitCount, $BitCount, #8
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        MEND  
+
+        ;//
+        ;// Decode a code of the form 1111...110 where there
+        ;// are N ones before the 0 and N<=15 (code length<=16)
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $Symbol         - the number of zeros before the next 1
+        ;//                   >=16 is an illegal code
+        ;// $T1             - corrupted temp/scratch register
+        ;// $T2             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//        
+        MACRO
+        M_BD_CLO16 $Symbol, $T1, $T2
+        MOV     $Symbol, $BitBuffer, LSL $BitCount
+        MVN     $Symbol, $Symbol
+        CLZ     $Symbol, $Symbol                
+        ADD     $BitCount, $BitCount, $Symbol
+        SUBS    $BitCount, $BitCount, #7        ;// length is Symbol+1
+        LDRCSB  $T1, [$pBitStream], #1
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        SUBCSS  $BitCount, $BitCount, #8        
+        LDRCSB  $T1, [$pBitStream], #1
+        ADDCC   $BitCount, $BitCount, #8
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8
+        MEND  
+
+
+        ;//
+        ;// Variable Length Decode module
+        ;//
+        ;// Decodes one VLD Symbol from a bitstream and refill the bitstream
+        ;// buffer.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $pVLDTable      - pointer to VLD decode table of 16-bit entries.
+        ;//                   The format is described above at the start of
+        ;//                   this file.
+        ;// $S0             - The number of bits to look up for the first step
+        ;//                   1<=$S0<=8
+        ;// $S1             - The number of bits to look up for each subsequent
+        ;//                   step 1<=$S1<=$S0.
+        ;//
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// 
+        ;// Output Registers:
+        ;//
+        ;// $Symbol         - decoded VLD symbol value
+        ;// $T1             - corrupted temp/scratch register
+        ;// $T2             - corrupted temp/scratch register
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_VLD $Symbol, $T1, $T2, $pVLDTable, $S0, $S1
+        ASSERT (1<=$S0):LAND:($S0<=8)
+        ASSERT (1<=$S1):LAND:($S1<=$S0)
+        
+        ;// Note 0<=BitCount<=15 on entry and exit
+        
+        MOVS    $T1, $BitBuffer, LSL $BitCount       ;// left align next bits
+        MOVS    $Symbol, #(2<<$S0)-2                 ;// create mask
+        AND     $Symbol, $Symbol, $T1, LSR #(31-$S0) ;// 2*(next $S0 bits)
+        SUBS    $BitCount, $BitCount, #8             ;// CS if buffer can be filled
+01
+        LDRCSB  $T1, [$pBitStream], #1               ;// load refill byte
+        LDRH    $Symbol, [$pVLDTable, $Symbol]       ;// load table entry
+        ADDCC   $BitCount, $BitCount, #8             ;// refill not possible
+        ADD     $BitCount, $BitCount, #$S0           ;// assume $S0 bits used
+        ORRCS   $BitBuffer, $T1, $BitBuffer, LSL #8  ;// merge in refill byte
+        MOVS    $T1, $Symbol, LSR #1                 ;// CS=leaf entry
+        BCS     %FT02
+        
+        MOVS    $T1, $BitBuffer, LSL $BitCount       ;// left align next bit
+        IF (2*$S0-$S1<=8)
+            ;// Can combine refill check and -S0+S1 and keep $BitCount<=15
+            SUBS    $BitCount, $BitCount, #8+($S0-$S1)
+        ELSE
+            ;// Separate refill check and -S0+S1 offset
+            SUBS  $BitCount, $BitCount, #8
+            SUB   $BitCount, $BitCount, #($S0-$S1)
+        ENDIF
+        ADD     $Symbol, $Symbol, $T1, LSR #(31-$S1) ;// add 2*(next $S1 bits) to
+        BIC     $Symbol, $Symbol, #1                 ;//   table offset
+        B       %BT01                                ;// load next table entry
+02
+        ;// BitCount range now depend on the route here
+        ;// if (first step)       S0 <= BitCount <= 7+S0        <=15
+        ;// else if (2*S0-S1<=8)  S0 <= BitCount <= 7+(2*S0-S1) <=15
+        ;// else                  S1 <= BitCount <= 7+S1        <=15
+        
+        SUB     $BitCount, $BitCount, $Symbol, LSR#13
+        BIC     $Symbol, $T1, #0xF000
+        MEND
+        
+
+        ;// Add an offset number of bits
+        ;//
+        ;// Outputs destination byte and bit index values which corresponds to an offset number of bits 
+        ;// from the current location. This is used to compare bitstream positions using. M_BD_CMP.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $Offset         - Offset to be added in bits.
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        ;// Output Registers:
+        ;//
+        ;// $ByteIndex      - Destination pBitStream pointer after adding the Offset. 
+        ;//                   This value will be 4 byte ahead and needs to subtract by 4 to get exact 
+        ;//                   pointer (as in M_BD_FINI). But for using with M_BD_CMP subtract is not needed.
+        ;// $BitIndex       - Destination BitCount after the addition of Offset number of bits
+        ;//
+        MACRO
+        M_BD_ADD  $ByteIndex, $BitIndex, $Offset
+
+        ;// ($ByteIndex,$BitIndex) = Current position + $Offset bits
+        ADD     $Offset, $Offset, $BitCount
+        AND     $BitIndex, $Offset, #7
+        ADD     $ByteIndex, $pBitStream, $Offset, ASR #3        
+        MEND
+
+        ;// Move bitstream pointers to the location given
+        ;//
+        ;// Outputs destination byte and bit index values which corresponds to  
+        ;// the current location given (calculated using M_BD_ADD). 
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;// $ByteIndex      - Destination pBitStream pointer after move. 
+        ;//                   This value will be 4 byte ahead and needs to subtract by 4 to get exact 
+        ;//                   pointer (as in M_BD_FINI).
+        ;// $BitIndex       - Destination BitCount after the move
+        ;//
+        ;// Output Registers:
+        ;//
+        ;// $pBitStream     \ 
+        ;//                  } See description above.  
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_MOV  $ByteIndex, $BitIndex
+
+        ;// ($pBitStream, $Offset) = ($ByteIndex,$BitIndex)
+        MOV     $BitCount, $BitIndex
+        MOV     $pBitStream, $ByteIndex
+        MEND
+
+        ;// Bitstream Compare
+        ;//
+        ;// Compares bitstream position with that of a destination position. Destination position 
+        ;// is held in two input registers which are calculated using M_BD_ADD macro
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $ByteIndex      - Destination pBitStream pointer, (4 byte ahead as described in M_BD_ADD)
+        ;// $BitIndex       - Destination BitCount
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        ;// Output Registers:
+        ;//
+        ;// FLAGS           - GE if destination is reached, LT = is destination is ahead
+        ;// $T1             - corrupted temp/scratch register
+        ;//
+        MACRO
+        M_BD_CMP  $ByteIndex, $BitIndex, $T1
+        
+        ;// Return flags set by (current positon)-($ByteIndex,$BitIndex)
+        ;// so GE means that we have reached the indicated position
+
+        ADD         $T1, $pBitStream, $BitCount, LSR #3
+        CMP         $T1, $ByteIndex
+        AND         $T1, $BitCount, #7
+        CMPEQ       $T1, $BitIndex        
+        MEND
+
+        
+        ;// Bitstream Decode finalise
+        ;//
+        ;// Writes back the bitstream state to the bitstream pointers
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } See description above.
+        ;// $BitCount       / 
+        ;//
+        ;// Output Registers:
+        ;//
+        ;// $ppBitStream    - pointer to pointer to the next bitstream byte
+        ;// $pBitOffset     - pointer to the number of bits used in the current byte (0..7)
+        ;// $pBitStream     \ 
+        ;// $BitBuffer       } these register are corrupted
+        ;// $BitCount       / 
+        ;//
+        MACRO
+        M_BD_FINI  $ppBitStream, $pBitOffset
+        
+        ;// Advance pointer by the number of free bits in the buffer
+        ADD     $pBitStream, $pBitStream, $BitCount, LSR#3
+        AND     $BitCount, $BitCount, #7
+        
+        ;// Now move back 32 bits to reach the first usued bit
+        SUB     $pBitStream, $pBitStream, #4
+        
+        ;// Store out bitstream state
+        STR     $BitCount, [$pBitOffset]
+        STR     $pBitStream, [$ppBitStream]
+        MEND
+        
+        END
+        
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Bitstream.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Bitstream.h
new file mode 100755
index 0000000..b699034
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Bitstream.h
@@ -0,0 +1,212 @@
+/**
+ * 
+ * File Name:  armCOMM_Bitstream.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File: armCOMM_Bitstream.h
+ * Brief: Declares common API's/Data types used across the OpenMax Encoders/Decoders.
+ *
+ */
+
+#ifndef _armCodec_H_
+#define _armCodec_H_
+
+#include "omxtypes.h"
+
+typedef struct {
+    OMX_U8   codeLen;
+    OMX_U32	 codeWord;
+} ARM_VLC32;
+
+/* The above should be renamed as "ARM_VLC32" */
+
+/**
+ * Function: armLookAheadBits()
+ *
+ * Description:
+ * Get the next N bits from the bitstream without advancing the bitstream pointer
+ *
+ * Parameters:
+ * [in]     **ppBitStream
+ * [in]     *pOffset
+ * [in]     N=1...32
+ *
+ * Returns  Value
+ */
+
+OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N);
+
+/**
+ * Function: armGetBits()
+ *
+ * Description:
+ * Read N bits from the bitstream
+ *    
+ * Parameters:
+ * [in]     *ppBitStream
+ * [in]     *pOffset
+ * [in]     N=1..32
+ *
+ * [out]    *ppBitStream
+ * [out]    *pOffset
+ * Returns  Value
+ */
+
+OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N);
+
+/**
+ * Function: armByteAlign()
+ *
+ * Description:
+ * Align the pointer *ppBitStream to the next byte boundary
+ *
+ * Parameters:
+ * [in]     *ppBitStream
+ * [in]     *pOffset
+ *
+ * [out]    *ppBitStream
+ * [out]    *pOffset
+ *
+ **/
+ 
+OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset);
+
+/** 
+ * Function: armSkipBits()
+ *
+ * Description:
+ * Skip N bits from the value at *ppBitStream
+ *
+ * Parameters:
+ * [in]     *ppBitStream
+ * [in]     *pOffset
+ * [in]     N
+ *
+ * [out]    *ppBitStream
+ * [out]    *pOffset
+ *
+ **/
+
+OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N);
+
+/***************************************
+ * Variable bit length Decode
+ ***************************************/
+
+/**
+ * Function: armUnPackVLC32()
+ *
+ * Description:
+ * Variable length decode of variable length symbol (max size 32 bits) read from
+ * the bit stream pointed by *ppBitStream at *pOffset by using the table
+ * pointed by pCodeBook
+ * 
+ * Parameters:
+ * [in]     **ppBitStream
+ * [in]     *pOffset
+ * [in]     pCodeBook
+ * 
+ * [out]    **ppBitStream
+ * [out]    *pOffset
+ *
+ * Returns : Code Book Index if successfull. 
+ *         : "ARM_NO_CODEBOOK_INDEX = 0xFFFF" if search fails.
+ **/
+
+#define ARM_NO_CODEBOOK_INDEX (OMX_U16)(0xFFFF)
+
+OMX_U16 armUnPackVLC32(
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pOffset,
+    const ARM_VLC32 *pCodeBook
+);
+
+/***************************************
+ * Fixed bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackBits
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte 
+ *                      in the bit stream.
+ * [in]	pOffset	        pointer to the bit position in the byte 
+ *                      pointed by *ppBitStream. Valid within 0
+ *                      to 7.
+ * [in]	codeWord		Code word that need to be inserted in to the
+ *                          bitstream
+ * [in]	codeLength		Length of the code word valid range 1...32
+ *
+ * [out] ppBitStream	*ppBitStream is updated after the block is encoded,
+ *	                        so that it points to the current byte in the bit
+ *							stream buffer.
+ * [out] pBitOffset		*pBitOffset is updated so that it points to the
+ *							current bit position in the byte pointed by
+ *							*ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+ 
+OMXResult armPackBits (
+    OMX_U8  **ppBitStream, 
+    OMX_INT *pOffset,
+    OMX_U32 codeWord, 
+    OMX_INT codeLength 
+);
+ 
+/***************************************
+ * Variable bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackVLC32
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte 
+ *                      in the bit stream.
+ * [in]	pBitOffset	    pointer to the bit position in the byte 
+ *                      pointed by *ppBitStream. Valid within 0
+ *                      to 7.
+ * [in]	 code     		VLC code word that need to be inserted in to the
+ *                      bitstream
+ *
+ * [out] ppBitStream	*ppBitStream is updated after the block is encoded,
+ *	                    so that it points to the current byte in the bit
+ *						stream buffer.
+ * [out] pBitOffset		*pBitOffset is updated so that it points to the
+ *						current bit position in the byte pointed by
+ *						*ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+ 
+OMXResult armPackVLC32 (
+    OMX_U8 **ppBitStream, 
+    OMX_INT *pBitOffset,
+    ARM_VLC32 code 
+);
+
+#endif      /*_armCodec_H_*/
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCTTable.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCTTable.h
new file mode 100755
index 0000000..e0cfdaa
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCTTable.h
@@ -0,0 +1,40 @@
+/**
+ *
+ * 
+ * File Name:  armCOMM_IDCTTable.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File         : armCOMM_IDCTTable.h
+ * Description  : Contains declarations of tables for IDCT calculation.
+ *
+ */
+  
+#ifndef _armCOMM_IDCTTable_H_
+#define _armCOMM_IDCTTable_H_
+
+#include "omxtypes.h"
+
+     /*  Table of s(u)*A(u)*A(v)/16 at Q15
+      *  s(u)=1.0 0 <= u <= 5
+      *  s(6)=2.0
+      *  s(7)=4.0
+      *  A(0) = 2*sqrt(2)
+      *  A(u) = 4*cos(u*pi/16)  for (u!=0)
+	  */
+extern const OMX_U16 armCOMM_IDCTPreScale [64];
+extern const OMX_U16 armCOMM_IDCTCoef [4];
+
+#endif /* _armCOMM_IDCTTable_H_ */
+
+
+/* End of File */
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCT_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCT_s.h
new file mode 100755
index 0000000..0baa087
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCT_s.h
@@ -0,0 +1,1451 @@
+;//
+;// This confidential and proprietary software may be used only as
+;// authorised by a licensing agreement from ARM Limited
+;//   (C) COPYRIGHT 2004 ARM Limited
+;//       ALL RIGHTS RESERVED
+;// The entire notice above must be reproduced on all authorised
+;// copies and copies may only be made to the extent permitted
+;// by a licensing agreement from ARM Limited.
+;//
+;// IDCT_s.s
+;//
+;// Inverse DCT module
+;//
+;// 
+;// ALGORITHM DESCRIPTION
+;//
+;// The 8x8 2D IDCT is performed by calculating a 1D IDCT for each
+;// column and then a 1D IDCT for each row.
+;//
+;// The 8-point 1D IDCT is defined by
+;//   f(x) = (C(0)*T(0)*c(0,x) + ... + C(7)*T(7)*c(7,x))/2
+;//
+;//   C(u) = 1/sqrt(2) if u=0 or 1 if u!=0
+;//   c(u,x) = cos( (2x+1)*u*pi/16 )
+;//
+;// We compute the 8-point 1D IDCT using the reverse of
+;// the Arai-Agui-Nakajima flow graph which we split into
+;// 5 stages named in reverse order to identify with the
+;// forward DCT. Direct inversion of the forward formulae
+;// in file FDCT_s.s gives:
+;//
+;// IStage 5:   j(u) = T(u)*A(u)  [ A(u)=4*C(u)*c(u,0) ]
+;//             [ A(0) = 2*sqrt(2)
+;//               A(u) = 4*cos(u*pi/16)  for (u!=0) ]
+;//
+;// IStage 4:   i0 = j0             i1 = j4
+;//             i3 = (j2+j6)/2      i2 = (j2-j6)/2
+;//             i7 = (j5+j3)/2      i4 = (j5-j3)/2
+;//             i5 = (j1+j7)/2      i6 = (j1-j7)/2
+;//
+;// IStage 3:   h0 = (i0+i1)/2      h1 = (i0-i1)/2
+;//             h2 = (i2*sqrt2)-i3  h3 = i3
+;//             h4 =  cos(pi/8)*i4 + sin(pi/8)*i6
+;//             h6 = -sin(pi/8)*i4 + cos(pi/8)*i6
+;//             [ The above two lines rotate by -(pi/8) ]
+;//             h5 = (i5-i7)/sqrt2  h7 = (i5+i7)/2 
+;//             
+;// IStage 2:   g0 = (h0+h3)/2      g3 = (h0-h3)/2
+;//             g1 = (h1+h2)/2      g2 = (h1-h2)/2
+;//             g7 = h7             g6 = h6 - h7
+;//             g5 = h5 - g6        g4 = h4 - g5
+;//
+;// IStage 1:   f0 = (g0+g7)/2      f7 = (g0-g7)/2
+;//             f1 = (g1+g6)/2      f6 = (g1-g6)/2
+;//             f2 = (g2+g5)/2      f5 = (g2-g5)/2
+;//             f3 = (g3+g4)/2      f4 = (g3-g4)/2
+;//
+;// Note that most coefficients are halved 3 times during the
+;// above calculation. We can rescale the algorithm dividing
+;// the input by 8 to remove the halvings.
+;//
+;// IStage 5:   j(u) = T(u)*A(u)/8
+;//
+;// IStage 4:   i0 = j0             i1 = j4
+;//             i3 = j2 + j6        i2 = j2 - j6
+;//             i7 = j5 + j3        i4 = j5 - j3
+;//             i5 = j1 + j7        i6 = j1 - j7
+;//
+;// IStage 3:   h0 = i0 + i1        h1 = i0 - i1
+;//             h2 = (i2*sqrt2)-i3  h3 = i3
+;//             h4 = 2*( cos(pi/8)*i4 + sin(pi/8)*i6)
+;//             h6 = 2*(-sin(pi/8)*i4 + cos(pi/8)*i6)
+;//             h5 = (i5-i7)*sqrt2  h7 = i5 + i7 
+;//             
+;// IStage 2:   g0 = h0 + h3        g3 = h0 - h3
+;//             g1 = h1 + h2        g2 = h1 - h2
+;//             g7 = h7             g6 = h6 - h7
+;//             g5 = h5 - g6        g4 = h4 - g5
+;//
+;// IStage 1:   f0 = g0 + g7        f7 = g0 - g7
+;//             f1 = g1 + g6        f6 = g1 - g6
+;//             f2 = g2 + g5        f5 = g2 - g5
+;//             f3 = g3 + g4        f4 = g3 - g4
+;//
+;// Note:
+;// 1. The scaling by A(u)/8 can often be combined with inverse
+;//    quantization. The column and row scalings can be combined.
+;// 2. The flowgraph in the AAN paper has h4,g6 negated compared
+;//    to the above code but is otherwise identical.
+;// 3. The rotation by -pi/8 can be peformed using three multiplies
+;//    Eg  c*i4+s*i6 = (i6-i4)*s + (c+s)*i4
+;//       -s*i4+c*i6 = (i6-i4)*s + (c-s)*i6
+;// 4. If |T(u)|<=1 then from the IDCT definition,
+;//    |f(x)| <= ((1/sqrt2) + |c(1,x)| + .. + |c(7,x)|)/2
+;//            = ((1/sqrt2) + cos(pi/16) + ... + cos(7*pi/16))/2
+;//            = ((1/sqrt2) + (cot(pi/32)-1)/2)/2
+;//            = (1 + cos(pi/16) + cos(2pi/16) + cos(3pi/16))/sqrt(2)
+;//            = (approx)2.64
+;//    So the max gain of the 2D IDCT is ~x7.0 = 3 bits.
+;//    The table below shows input patterns generating the maximum
+;//    value of |f(u)| for input in the range |T(x)|<=1. M=-1, P=+1
+;//    InputPattern      Max |f(x)|
+;//      PPPPPPPP        |f0| =  2.64
+;//      PPPMMMMM        |f1| =  2.64
+;//      PPMMMPPP        |f2| =  2.64
+;//      PPMMPPMM        |f3| =  2.64
+;//      PMMPPMMP        |f4| =  2.64
+;//      PMMPMMPM        |f5| =  2.64
+;//      PMPPMPMP        |f6| =  2.64
+;//      PMPMPMPM        |f7| =  2.64
+;//   Note that this input pattern is the transpose of the
+;//   corresponding max input patter for the FDCT.
+
+;// Arguments
+
+pSrc    RN 0    ;// source data buffer
+Stride  RN 1    ;// destination stride in bytes
+pDest   RN 2    ;// destination data buffer
+pScale  RN 3    ;// pointer to scaling table
+
+
+        ;// DCT Inverse Macro
+        ;// The DCT code should be parametrized according
+        ;// to the following inputs:
+        ;// $outsize = "u8"  :  8-bit unsigned data saturated (0 to +255)
+        ;//            "s9"  : 16-bit signed data saturated to 9-bit (-256 to +255)
+        ;//            "s16" : 16-bit signed data not saturated (max size ~+/-14273)
+        ;// $inscale = "s16" : signed 16-bit aan-scale table, Q15 format, with 4 byte alignment
+        ;//            "s32" : signed 32-bit aan-scale table, Q23 format, with 4 byte alignment
+        ;//
+        ;// Inputs:
+        ;// pSrc   = r0 = Pointer to input data
+        ;//               Range is -256 to +255 (9-bit)
+        ;// Stride = r1 = Stride between input lines
+        ;// pDest  = r2 = Pointer to output data
+        ;// pScale = r3 = Pointer to aan-scale table in the format defined by $inscale
+        
+        
+        
+        MACRO
+        M_IDCT  $outsize, $inscale, $stride
+        LCLA    SHIFT
+        
+        
+        IF ARM1136JS
+        
+;// REGISTER ALLOCATION
+;// This is hard since we have 8 values, 9 free registers and each
+;// butterfly requires a temporary register. We also want to 
+;// maintain register order so we can use LDM/STM. The table below
+;// summarises the register allocation that meets all these criteria.
+;// a=1stcol, b=2ndcol, f,g,h,i are dataflow points described above.
+;//
+;// r1  a01     g0  h0
+;// r4  b01 f0  g1  h1  i0
+;// r5  a23 f1  g2      i1
+;// r6  b23 f2  g3  h2  i2
+;// r7  a45 f3      h3  i3
+;// r8  b45 f4  g4  h4  i4
+;// r9  a67 f5  g5  h5  i5
+;// r10 b67 f6  g6  h6  i6
+;// r11     f7  g7  h7  i7
+;//
+ra01    RN 1
+rb01    RN 4
+ra23    RN 5
+rb23    RN 6
+ra45    RN 7
+rb45    RN 8
+ra67    RN 9
+rb67    RN 10
+rtmp    RN 11
+csPiBy8 RN 12   ;// [ (Sin(pi/8)@Q15), (Cos(pi/8)@Q15) ]
+LoopRR2 RN 14   ;// [ LoopNumber<<13 , (1/Sqrt(2))@Q15 ]
+;// Transpose allocation
+xft     RN ra01
+xf0     RN rb01
+xf1     RN ra23
+xf2     RN rb23
+xf3     RN ra45
+xf4     RN rb45
+xf5     RN ra67
+xf6     RN rb67
+xf7     RN rtmp
+;// IStage 1 allocation
+xg0     RN xft
+xg1     RN xf0
+xg2     RN xf1
+xg3     RN xf2
+xgt     RN xf3
+xg4     RN xf4
+xg5     RN xf5
+xg6     RN xf6
+xg7     RN xf7
+;// IStage 2 allocation
+xh0     RN xg0
+xh1     RN xg1
+xht     RN xg2
+xh2     RN xg3
+xh3     RN xgt
+xh4     RN xg4
+xh5     RN xg5
+xh6     RN xg6
+xh7     RN xg7
+;// IStage 3,4 allocation
+xit     RN xh0
+xi0     RN xh1
+xi1     RN xht
+xi2     RN xh2
+xi3     RN xh3
+xi4     RN xh4
+xi5     RN xh5
+xi6     RN xh6
+xi7     RN xh7
+        
+        M_STR   pDest,  ppDest
+        IF "$stride"="s"
+            M_STR   Stride, pStride
+        ENDIF
+        M_ADR   pDest,  pBlk
+        LDR     csPiBy8, =0x30fc7642
+        LDR     LoopRR2, =0x00005a82
+  
+v6_idct_col$_F
+        ;// Load even values
+        LDR     xi4, [pSrc], #4  ;// j0
+        LDR     xi5, [pSrc, #4*16-4]  ;// j4
+        LDR     xi6, [pSrc, #2*16-4]  ;// j2
+        LDR     xi7, [pSrc, #6*16-4]  ;// j6
+        
+        ;// Scale Even Values
+        IF "$inscale"="s16" ;// 16x16 mul
+SHIFT       SETA    12
+            LDR     xi0, [pScale], #4
+            LDR     xi1, [pScale, #4*16-4]        
+            LDR     xi2, [pScale, #2*16-4]
+            MOV     xit, #1<<(SHIFT-1)
+            SMLABB  xi3, xi0, xi4, xit
+            SMLATT  xi4, xi0, xi4, xit
+            SMLABB  xi0, xi1, xi5, xit
+            SMLATT  xi5, xi1, xi5, xit
+            MOV     xi3, xi3, ASR #SHIFT
+            PKHBT   xi4, xi3, xi4, LSL #(16-SHIFT)
+            LDR     xi3, [pScale, #6*16-4]
+            SMLABB  xi1, xi2, xi6, xit
+            SMLATT  xi6, xi2, xi6, xit
+            MOV     xi0, xi0, ASR #SHIFT
+            PKHBT   xi5, xi0, xi5, LSL #(16-SHIFT)
+            SMLABB  xi2, xi3, xi7, xit
+            SMLATT  xi7, xi3, xi7, xit
+            MOV     xi1, xi1, ASR #SHIFT
+            PKHBT   xi6, xi1, xi6, LSL #(16-SHIFT)
+            MOV     xi2, xi2, ASR #SHIFT
+            PKHBT   xi7, xi2, xi7, LSL #(16-SHIFT)
+        ENDIF
+        IF "$inscale"="s32" ;// 32x16 mul
+SHIFT       SETA    (12+8-16)
+            MOV     xit, #1<<(SHIFT-1)
+            LDR     xi0, [pScale], #8
+            LDR     xi1, [pScale, #0*32+4-8]
+            LDR     xi2, [pScale, #4*32-8]
+            LDR     xi3, [pScale, #4*32+4-8]            
+            SMLAWB  xi0, xi0, xi4, xit
+            SMLAWT  xi1, xi1, xi4, xit
+            SMLAWB  xi2, xi2, xi5, xit
+            SMLAWT  xi3, xi3, xi5, xit            
+            MOV     xi0, xi0, ASR #SHIFT
+            PKHBT   xi4, xi0, xi1, LSL #(16-SHIFT)
+            MOV     xi2, xi2, ASR #SHIFT            
+            PKHBT   xi5, xi2, xi3, LSL #(16-SHIFT)
+            LDR     xi0, [pScale, #2*32-8]
+            LDR     xi1, [pScale, #2*32+4-8]
+            LDR     xi2, [pScale, #6*32-8]
+            LDR     xi3, [pScale, #6*32+4-8]            
+            SMLAWB  xi0, xi0, xi6, xit
+            SMLAWT  xi1, xi1, xi6, xit
+            SMLAWB  xi2, xi2, xi7, xit
+            SMLAWT  xi3, xi3, xi7, xit            
+            MOV     xi0, xi0, ASR #SHIFT
+            PKHBT   xi6, xi0, xi1, LSL #(16-SHIFT)
+            MOV     xi2, xi2, ASR #SHIFT            
+            PKHBT   xi7, xi2, xi3, LSL #(16-SHIFT)
+        ENDIF
+                
+        ;// Load odd values
+        LDR     xi0, [pSrc, #1*16-4]      ;// j1
+        LDR     xi1, [pSrc, #7*16-4]      ;// j7
+        LDR     xi2, [pSrc, #5*16-4]      ;// j5
+        LDR     xi3, [pSrc, #3*16-4]      ;// j3
+        
+        IF  {TRUE}
+            ;// shortcut if odd values 0
+            TEQ     xi0, #0
+            TEQEQ   xi1, #0
+            TEQEQ   xi2, #0
+            TEQEQ   xi3, #0
+            BEQ     v6OddZero$_F
+        ENDIF
+        
+        ;// Store scaled even values
+        STMIA   pDest, {xi4, xi5, xi6, xi7}
+        
+        ;// Scale odd values
+        IF "$inscale"="s16"
+            ;// Perform AAN Scale
+            LDR     xi4, [pScale, #1*16-4]
+            LDR     xi5, [pScale, #7*16-4]        
+            LDR     xi6, [pScale, #5*16-4]
+            SMLABB  xi7, xi0, xi4, xit
+            SMLATT  xi0, xi0, xi4, xit
+            SMLABB  xi4, xi1, xi5, xit
+            SMLATT  xi1, xi1, xi5, xit
+            MOV     xi7, xi7, ASR #SHIFT
+            PKHBT   xi0, xi7, xi0, LSL #(16-SHIFT)
+            LDR     xi7, [pScale, #3*16-4]
+            SMLABB  xi5, xi2, xi6, xit
+            SMLATT  xi2, xi2, xi6, xit
+            MOV     xi4, xi4, ASR #SHIFT
+            PKHBT   xi1, xi4, xi1, LSL #(16-SHIFT)
+            SMLABB  xi6, xi3, xi7, xit
+            SMLATT  xi3, xi3, xi7, xit
+            MOV     xi5, xi5, ASR #SHIFT
+            PKHBT   xi2, xi5, xi2, LSL #(16-SHIFT)
+            MOV     xi6, xi6, ASR #SHIFT
+            PKHBT   xi3, xi6, xi3, LSL #(16-SHIFT)
+        ENDIF
+        IF "$inscale"="s32" ;// 32x16 mul
+            LDR     xi4, [pScale, #1*32-8]
+            LDR     xi5, [pScale, #1*32+4-8]
+            LDR     xi6, [pScale, #7*32-8]
+            LDR     xi7, [pScale, #7*32+4-8]            
+            SMLAWB  xi4, xi4, xi0, xit
+            SMLAWT  xi5, xi5, xi0, xit
+            SMLAWB  xi6, xi6, xi1, xit
+            SMLAWT  xi7, xi7, xi1, xit            
+            MOV     xi4, xi4, ASR #SHIFT
+            PKHBT   xi0, xi4, xi5, LSL #(16-SHIFT)
+            MOV     xi6, xi6, ASR #SHIFT            
+            PKHBT   xi1, xi6, xi7, LSL #(16-SHIFT)
+            LDR     xi4, [pScale, #5*32-8]
+            LDR     xi5, [pScale, #5*32+4-8]
+            LDR     xi6, [pScale, #3*32-8]
+            LDR     xi7, [pScale, #3*32+4-8]            
+            SMLAWB  xi4, xi4, xi2, xit
+            SMLAWT  xi5, xi5, xi2, xit
+            SMLAWB  xi6, xi6, xi3, xit
+            SMLAWT  xi7, xi7, xi3, xit            
+            MOV     xi4, xi4, ASR #SHIFT
+            PKHBT   xi2, xi4, xi5, LSL #(16-SHIFT)
+            MOV     xi6, xi6, ASR #SHIFT            
+            PKHBT   xi3, xi6, xi7, LSL #(16-SHIFT)
+        ENDIF
+        
+        LDR     xit, =0x00010001        ;// rounding constant
+        SADD16 xi5, xi0, xi1           ;// (j1+j7)/2
+        SHADD16 xi5, xi5, xit
+        
+        SSUB16  xi6, xi0, xi1           ;// j1-j7
+        SADD16 xi7, xi2, xi3           ;// (j5+j3)/2
+        SHADD16 xi7, xi7, xit
+        
+        SSUB16  xi4, xi2, xi3           ;// j5-j3
+        
+        SSUB16  xi3, xi5, xi7           ;// (i5-i7)/2
+        
+        PKHBT   xi0, xi6, xi4, LSL#16   ;// [i4,i6] row a
+        PKHTB   xi1, xi4, xi6, ASR#16   ;// [i4,i6] row b
+        
+        SMUADX  xi2, xi0, csPiBy8       ;// rowa by [c,s]
+        SMUADX  xi4, xi1, csPiBy8       ;// rowb by [c,s]
+        SMUSD   xi0, xi0, csPiBy8       ;// rowa by [-s,c]   
+        SMUSD   xi6, xi1, csPiBy8       ;// rowb by [-s,c]
+                
+        SMULBB  xi1, xi3, LoopRR2
+        SMULTB  xi3, xi3, LoopRR2
+                
+        PKHTB   xh4, xi4, xi2, ASR#16   ;// h4/4
+        PKHTB   xh6, xi6, xi0, ASR#16   ;// h6/4
+        SHADD16 xh7, xi5, xi7           ;// (i5+i7)/4
+                
+        ;// xi0,xi1,xi2,xi3 now free
+        ;// IStage 4,3, rows 2to3 x1/2
+        
+        MOV     xi3, xi3, LSL #1
+        PKHTB   xh5, xi3, xi1, ASR#15   ;// h5/4
+        LDRD    xi0, [pDest, #8]        ;// j2,j6 scaled
+                
+        ;// IStage 2, rows4to7
+        SSUB16  xg6, xh6, xh7
+        SSUB16  xg5, xh5, xg6        
+        SSUB16  xg4, xh4, xg5
+                
+        SSUB16  xi2, xi0, xi1           ;// (j2-j6)
+        
+        SHADD16 xi3, xi0, xi1           ;// (j2+j6)/2
+        
+        SMULBB  xi0, xi2, LoopRR2
+        SMULTB  xi2, xi2, LoopRR2
+        
+        MOV     xi2, xi2, LSL #1
+        PKHTB   xh2, xi2, xi0, ASR#15   ;// i2*sqrt(2)/4
+        
+        ;// xi0, xi1 now free
+        ;// IStage 4,3 rows 0to1 x 1/2
+        LDRD    xi0, [pDest]            ;// j0, j4 scaled
+        SSUB16  xh2, xh2, xi3
+        ADDS    LoopRR2, LoopRR2, #2<<29    ;// done two rows
+        
+        SHADD16 xh0, xi0, xi1
+        SHSUB16 xh1, xi0, xi1                
+        
+        ;// IStage 2 rows 0to3 x 1/2
+        SHSUB16 xg2, xh1, xh2
+        SHADD16 xg1, xh1, xh2
+        SHSUB16 xg3, xh0, xh3
+        SHADD16 xg0, xh0, xh3
+        
+        ;// IStage 1 all rows
+        SADD16  xf3, xg3, xg4
+        SSUB16  xf4, xg3, xg4
+        SADD16  xf2, xg2, xg5
+        SSUB16  xf5, xg2, xg5
+        SADD16  xf1, xg1, xg6
+        SSUB16  xf6, xg1, xg6
+        SADD16  xf0, xg0, xg7
+        SSUB16  xf7, xg0, xg7
+        
+        ;// Transpose, store and loop
+        PKHBT   ra01, xf0, xf1, LSL #16
+        PKHTB   rb01, xf1, xf0, ASR #16
+        
+        PKHBT   ra23, xf2, xf3, LSL #16
+        PKHTB   rb23, xf3, xf2, ASR #16
+        
+        PKHBT   ra45, xf4, xf5, LSL #16
+        PKHTB   rb45, xf5, xf4, ASR #16
+        
+        PKHBT   ra67, xf6, xf7, LSL #16
+        STMIA   pDest!, {ra01, ra23, ra45, ra67}      
+        PKHTB   rb67, xf7, xf6, ASR #16
+        STMIA   pDest!, {rb01, rb23, rb45, rb67}                              
+        BCC     v6_idct_col$_F
+        
+        SUB     pSrc, pDest, #(64*2)
+        M_LDR   pDest, ppDest
+        IF "$stride"="s"
+            M_LDR   pScale, pStride 
+        ENDIF
+        B       v6_idct_row$_F
+        
+v6OddZero$_F
+        SSUB16  xi2, xi6, xi7           ;// (j2-j6)
+        SHADD16 xi3, xi6, xi7           ;// (j2+j6)/2
+        
+        SMULBB  xi0, xi2, LoopRR2
+        SMULTB  xi2, xi2, LoopRR2
+        
+        MOV     xi2, xi2, LSL #1
+        PKHTB   xh2, xi2, xi0, ASR#15   ;// i2*sqrt(2)/4
+        SSUB16  xh2, xh2, xi3
+        
+        ;// xi0, xi1 now free
+        ;// IStage 4,3 rows 0to1 x 1/2
+        
+        SHADD16 xh0, xi4, xi5
+        SHSUB16 xh1, xi4, xi5                
+        
+        ;// IStage 2 rows 0to3 x 1/2
+        SHSUB16 xg2, xh1, xh2
+        SHADD16 xg1, xh1, xh2
+        SHSUB16 xg3, xh0, xh3
+        SHADD16 xg0, xh0, xh3
+               
+        ;// IStage 1 all rows
+        MOV  xf3, xg3
+        MOV  xf4, xg3
+        MOV  xf2, xg2
+        MOV  xf5, xg2
+        MOV  xf1, xg1
+        MOV  xf6, xg1
+        MOV  xf0, xg0
+        MOV  xf7, xg0
+        
+        ;// Transpose
+        PKHBT   ra01, xf0, xf1, LSL #16
+        PKHTB   rb01, xf1, xf0, ASR #16
+        
+        PKHBT   ra23, xf2, xf3, LSL #16
+        PKHTB   rb23, xf3, xf2, ASR #16
+        
+        PKHBT   ra45, xf4, xf5, LSL #16
+        PKHTB   rb45, xf5, xf4, ASR #16
+        
+        PKHBT   ra67, xf6, xf7, LSL #16
+        PKHTB   rb67, xf7, xf6, ASR #16
+                
+        STMIA   pDest!, {ra01, ra23, ra45, ra67}      
+        ADDS    LoopRR2, LoopRR2, #2<<29    ;// done two rows
+        STMIA   pDest!, {rb01, rb23, rb45, rb67}      
+        
+        BCC     v6_idct_col$_F
+        SUB     pSrc, pDest, #(64*2)
+        M_LDR   pDest, ppDest
+        IF "$stride"="s"
+            M_LDR   pScale, pStride 
+        ENDIF
+               
+        
+v6_idct_row$_F
+        ;// IStage 4,3, rows4to7 x1/4
+        LDR     xit, =0x00010001        ;// rounding constant
+        LDR     xi0, [pSrc, #1*16]      ;// j1
+        LDR     xi1, [pSrc, #7*16]      ;// 4*j7
+        LDR     xi2, [pSrc, #5*16]      ;// j5
+        LDR     xi3, [pSrc, #3*16]      ;// j3
+        
+        SHADD16 xi1, xi1, xit           ;// 2*j7
+        SHADD16 xi1, xi1, xit           ;// j7                
+        
+        SHADD16 xi5, xi0, xi1           ;// (j1+j7)/2
+        SSUB16  xi6, xi0, xi1           ;// j1-j7
+        SHADD16 xi7, xi2, xi3           ;// (j5+j3)/2
+        SSUB16  xi4, xi2, xi3           ;// j5-j3
+        
+        SSUB16  xi3, xi5, xi7           ;// (i5-i7)/2
+        
+        PKHBT   xi0, xi6, xi4, LSL#16   ;// [i4,i6] row a
+        PKHTB   xi1, xi4, xi6, ASR#16   ;// [i4,i6] row b
+        
+        SMUADX  xi2, xi0, csPiBy8       ;// rowa by [c,s]
+        SMUADX  xi4, xi1, csPiBy8       ;// rowb by [c,s]
+        SMUSD   xi0, xi0, csPiBy8       ;// rowa by [-s,c]   
+        SMUSD   xi6, xi1, csPiBy8       ;// rowb by [-s,c]
+                
+        SMULBB  xi1, xi3, LoopRR2
+        SMULTB  xi3, xi3, LoopRR2
+                
+        PKHTB   xh4, xi4, xi2, ASR#16   ;// h4/4
+        PKHTB   xh6, xi6, xi0, ASR#16   ;// h6/4
+        SHADD16 xh7, xi5, xi7           ;// (i5+i7)/4
+        
+        MOV     xi3, xi3, LSL #1
+        PKHTB   xh5, xi3, xi1, ASR#15   ;// h5/4
+               
+        ;// xi0,xi1,xi2,xi3 now free
+        ;// IStage 4,3, rows 2to3 x1/2
+        
+        LDR     xi0, [pSrc, #2*16]      ;// j2
+        LDR     xi1, [pSrc, #6*16]      ;// 2*j6
+        
+        ;// IStage 2, rows4to7
+        SSUB16  xg6, xh6, xh7
+        SSUB16  xg5, xh5, xg6
+        SSUB16  xg4, xh4, xg5
+        
+        SHADD16 xi1, xi1, xit           ;// j6
+        SSUB16  xi2, xi0, xi1           ;// (j2-j6)        
+        SHADD16 xi3, xi0, xi1           ;// (j2+j6)/2
+        
+        SMULBB  xi0, xi2, LoopRR2
+        SMULTB  xi2, xi2, LoopRR2
+        
+        MOV     xi2, xi2, LSL #1
+        
+        PKHTB   xh2, xi2, xi0, ASR#15   ;// i2*sqrt(2)/4
+        
+        ;// xi0, xi1 now free
+        ;// IStage 4,3 rows 0to1 x 1/2
+        LDR     xi1, [pSrc, #4*16]      ;// j4
+        LDR     xi0, [pSrc], #4         ;// j0
+
+        SSUB16  xh2, xh2, xi3
+        ADDS    LoopRR2, LoopRR2, #2<<29    ;// done two rows
+        
+        ADD     xi0, xi0, xit, LSL #2   ;// ensure correct round
+        SHADD16 xh0, xi0, xi1           ;// of DC result
+        SHSUB16 xh1, xi0, xi1
+                
+        ;// IStage 2 rows 0to3 x 1/2
+        SHSUB16 xg2, xh1, xh2
+        SHADD16 xg1, xh1, xh2
+        SHSUB16 xg3, xh0, xh3
+        SHADD16 xg0, xh0, xh3
+        
+        ;// IStage 1 all rows
+        SHADD16 xf3, xg3, xg4
+        SHSUB16 xf4, xg3, xg4
+        SHADD16 xf2, xg2, xg5
+        SHSUB16 xf5, xg2, xg5
+        SHADD16 xf1, xg1, xg6
+        SHSUB16 xf6, xg1, xg6
+        SHADD16 xf0, xg0, xg7
+        SHSUB16 xf7, xg0, xg7
+        
+        ;// Saturate
+        IF ("$outsize"="u8")
+            USAT16  xf0, #8, xf0
+            USAT16  xf1, #8, xf1
+            USAT16  xf2, #8, xf2
+            USAT16  xf3, #8, xf3
+            USAT16  xf4, #8, xf4
+            USAT16  xf5, #8, xf5
+            USAT16  xf6, #8, xf6
+            USAT16  xf7, #8, xf7        
+        ENDIF
+        IF ("$outsize"="s9")
+            SSAT16  xf0, #9, xf0
+            SSAT16  xf1, #9, xf1
+            SSAT16  xf2, #9, xf2
+            SSAT16  xf3, #9, xf3
+            SSAT16  xf4, #9, xf4
+            SSAT16  xf5, #9, xf5
+            SSAT16  xf6, #9, xf6
+            SSAT16  xf7, #9, xf7        
+        ENDIF
+        
+        ;// Transpose to Row, Pack and store
+        IF ("$outsize"="u8")
+            ORR     xf0, xf0, xf1, LSL #8 ;// [ b1 b0 a1 a0 ]
+            ORR     xf2, xf2, xf3, LSL #8 ;// [ b3 b2 a3 a2 ]
+            ORR     xf4, xf4, xf5, LSL #8 ;// [ b5 b4 a5 a4 ]
+            ORR     xf6, xf6, xf7, LSL #8 ;// [ b7 b6 a7 a6 ]
+            PKHBT   ra01, xf0, xf2, LSL #16
+            PKHTB   rb01, xf2, xf0, ASR #16
+            PKHBT   ra23, xf4, xf6, LSL #16
+            PKHTB   rb23, xf6, xf4, ASR #16
+            STMIA   pDest, {ra01, ra23}
+            IF "$stride"="s"
+                ADD     pDest, pDest, pScale
+                STMIA   pDest, {rb01, rb23}
+                ADD     pDest, pDest, pScale
+            ELSE                
+                ADD     pDest, pDest, #($stride)
+                STMIA   pDest, {rb01, rb23}
+                ADD     pDest, pDest, #($stride)
+            ENDIF
+        ENDIF
+        IF ("$outsize"="s9"):LOR:("$outsize"="s16")        
+            PKHBT   ra01, xf0, xf1, LSL #16
+            PKHTB   rb01, xf1, xf0, ASR #16
+        
+            PKHBT   ra23, xf2, xf3, LSL #16
+            PKHTB   rb23, xf3, xf2, ASR #16
+            
+            PKHBT   ra45, xf4, xf5, LSL #16
+            PKHTB   rb45, xf5, xf4, ASR #16
+            
+            PKHBT   ra67, xf6, xf7, LSL #16
+            PKHTB   rb67, xf7, xf6, ASR #16
+            
+            STMIA   pDest, {ra01, ra23, ra45, ra67}      
+            IF "$stride"="s"
+                ADD     pDest, pDest, pScale
+                STMIA   pDest, {rb01, rb23, rb45, rb67}      
+                ADD     pDest, pDest, pScale
+            ELSE                
+                ADD     pDest, pDest, #($stride)
+                STMIA   pDest, {rb01, rb23, rb45, rb67}      
+                ADD     pDest, pDest, #($stride)
+            ENDIF
+        ENDIF
+        
+        BCC     v6_idct_row$_F
+        ENDIF ;// ARM1136JS
+
+
+        IF CortexA8
+        
+Src0            EQU  7              
+Src1            EQU  8              
+Src2            EQU  9              
+Src3            EQU  10              
+Src4            EQU  11              
+Src5            EQU  12              
+Src6            EQU  13
+Src7            EQU  14
+Tmp             EQU  15
+
+qXj0            QN Src0.S16 
+qXj1            QN Src1.S16
+qXj2            QN Src2.S16
+qXj3            QN Src3.S16
+qXj4            QN Src4.S16
+qXj5            QN Src5.S16
+qXj6            QN Src6.S16
+qXj7            QN Src7.S16
+qXjt            QN Tmp.S16
+
+dXj0lo          DN (Src0*2).S16
+dXj0hi          DN (Src0*2+1).S16
+dXj1lo          DN (Src1*2).S16
+dXj1hi          DN (Src1*2+1).S16
+dXj2lo          DN (Src2*2).S16
+dXj2hi          DN (Src2*2+1).S16
+dXj3lo          DN (Src3*2).S16
+dXj3hi          DN (Src3*2+1).S16
+dXj4lo          DN (Src4*2).S16
+dXj4hi          DN (Src4*2+1).S16
+dXj5lo          DN (Src5*2).S16
+dXj5hi          DN (Src5*2+1).S16
+dXj6lo          DN (Src6*2).S16
+dXj6hi          DN (Src6*2+1).S16
+dXj7lo          DN (Src7*2).S16
+dXj7hi          DN (Src7*2+1).S16
+dXjtlo          DN (Tmp*2).S16
+dXjthi          DN (Tmp*2+1).S16
+
+qXi0            QN qXj0
+qXi1            QN qXj4
+qXi2            QN qXj2
+qXi3            QN qXj7
+qXi4            QN qXj5
+qXi5            QN qXjt
+qXi6            QN qXj1
+qXi7            QN qXj6
+qXit            QN qXj3
+
+dXi0lo          DN dXj0lo
+dXi0hi          DN dXj0hi
+dXi1lo          DN dXj4lo
+dXi1hi          DN dXj4hi
+dXi2lo          DN dXj2lo
+dXi2hi          DN dXj2hi
+dXi3lo          DN dXj7lo
+dXi3hi          DN dXj7hi
+dXi4lo          DN dXj5lo
+dXi4hi          DN dXj5hi
+dXi5lo          DN dXjtlo
+dXi5hi          DN dXjthi
+dXi6lo          DN dXj1lo
+dXi6hi          DN dXj1hi
+dXi7lo          DN dXj6lo
+dXi7hi          DN dXj6hi
+dXitlo          DN dXj3lo
+dXithi          DN dXj3hi
+
+qXh0            QN qXit
+qXh1            QN qXi0
+qXh2            QN qXi2
+qXh3            QN qXi3
+qXh4            QN qXi7
+qXh5            QN qXi5
+qXh6            QN qXi4
+qXh7            QN qXi1
+qXht            QN qXi6
+
+dXh0lo          DN dXitlo
+dXh0hi          DN dXithi
+dXh1lo          DN dXi0lo
+dXh1hi          DN dXi0hi
+dXh2lo          DN dXi2lo
+dXh2hi          DN dXi2hi
+dXh3lo          DN dXi3lo
+dXh3hi          DN dXi3hi
+dXh4lo          DN dXi7lo
+dXh4hi          DN dXi7hi
+dXh5lo          DN dXi5lo
+dXh5hi          DN dXi5hi
+dXh6lo          DN dXi4lo
+dXh6hi          DN dXi4hi
+dXh7lo          DN dXi1lo
+dXh7hi          DN dXi1hi
+dXhtlo          DN dXi6lo
+dXhthi          DN dXi6hi
+
+qXg0            QN qXh2
+qXg1            QN qXht
+qXg2            QN qXh1
+qXg3            QN qXh0
+qXg4            QN qXh4
+qXg5            QN qXh5
+qXg6            QN qXh6
+qXg7            QN qXh7
+qXgt            QN qXh3
+
+qXf0            QN qXg6
+qXf1            QN qXg5
+qXf2            QN qXg4
+qXf3            QN qXgt
+qXf4            QN qXg3
+qXf5            QN qXg2
+qXf6            QN qXg1
+qXf7            QN qXg0
+qXft            QN qXg7
+
+
+qXt0            QN 1.S32
+qXt1            QN 2.S32
+qT0lo           QN 1.S32         
+qT0hi           QN 2.S32         
+qT1lo           QN 3.S32         
+qT1hi           QN 4.S32         
+qScalelo        QN 5.S32        ;// used to read post scale values
+qScalehi        QN 6.S32
+qTemp0          QN 5.S32         
+qTemp1          QN 6.S32    
+
+
+Scale1          EQU 6
+Scale2          EQU 15
+qScale1         QN Scale1.S16     
+qScale2         QN Scale2.S16     
+dScale1lo       DN (Scale1*2).S16     
+dScale1hi       DN (Scale1*2+1).S16
+dScale2lo       DN (Scale2*2).S16     
+dScale2hi       DN (Scale2*2+1).S16
+
+dCoefs          DN 0.S16        ;// Scale coefficients in format {[0] [C] [S] [InvSqrt2]}
+InvSqrt2        DN dCoefs[0]    ;// 1/sqrt(2) in Q15
+S               DN dCoefs[1]    ;// Sin(PI/8) in Q15
+C               DN dCoefs[2]    ;// Cos(PI/8) in Q15
+
+pTemp           RN 12
+
+                
+        IMPORT  armCOMM_IDCTCoef
+                    
+        VLD1        {qXj0,qXj1}, [pSrc @64]!
+        VLD1        {qXj2,qXj3}, [pSrc @64]!
+        VLD1        {qXj4,qXj5}, [pSrc @64]!
+        VLD1        {qXj6,qXj7}, [pSrc @64]!
+        
+        ;// Load PreScale and multiply with Src
+        ;// IStage 4
+        
+        IF "$inscale"="s16"                         ;// 16X16 Mul
+            M_IDCT_PRESCALE16
+        ENDIF
+        
+        IF "$inscale"="s32"                         ;// 32X32 ,ul
+            M_IDCT_PRESCALE32
+        ENDIF
+
+        ;// IStage 3
+        VQDMULH     qXi2, qXi2, InvSqrt2            ;// i2/sqrt(2)
+        VHADD       qXh0, qXi0, qXi1                ;// (i0+i1)/2
+        VHSUB       qXh1, qXi0, qXi1                ;// (i0-i1)/2
+        VHADD       qXh7, qXi5, qXi7                ;// (i5+i7)/4
+        VSUB        qXh5, qXi5, qXi7                ;// (i5-i7)/2
+        VQDMULH     qXh5, qXh5, InvSqrt2            ;// h5/sqrt(2)
+        VSUB        qXh2, qXi2, qXi3                ;// h2, h3
+
+        VMULL       qXt0, dXi4lo, C                 ;// c*i4
+        VMLAL       qXt0, dXi6lo, S                 ;// c*i4+s*i6
+        VMULL       qXt1, dXi4hi, C
+        VMLAL       qXt1, dXi6hi, S
+        VSHRN       dXh4lo, qXt0, #16               ;// h4
+        VSHRN       dXh4hi, qXt1, #16
+        
+        VMULL       qXt0, dXi6lo, C                 ;// c*i6
+        VMLSL       qXt0, dXi4lo, S                 ;// -s*i4 + c*h6
+        VMULL       qXt1, dXi6hi, C
+        VMLSL       qXt1, dXi4hi, S
+        VSHRN       dXh6lo, qXt0, #16               ;// h6
+        VSHRN       dXh6hi, qXt1, #16
+        
+        ;// IStage 2
+        VSUB        qXg6, qXh6, qXh7
+        VSUB        qXg5, qXh5, qXg6
+        VSUB        qXg4, qXh4, qXg5
+        VHADD       qXg1, qXh1, qXh2        ;// (h1+h2)/2
+        VHSUB       qXg2, qXh1, qXh2        ;// (h1-h2)/2
+        VHADD       qXg0, qXh0, qXh3        ;// (h0+h3)/2
+        VHSUB       qXg3, qXh0, qXh3        ;// (h0-h3)/2
+
+        ;// IStage 1 all rows
+        VADD        qXf3, qXg3, qXg4        
+        VSUB        qXf4, qXg3, qXg4        
+        VADD        qXf2, qXg2, qXg5        
+        VSUB        qXf5, qXg2, qXg5        
+        VADD        qXf1, qXg1, qXg6
+        VSUB        qXf6, qXg1, qXg6        
+        VADD        qXf0, qXg0, qXg7
+        VSUB        qXf7, qXg0, qXg7      
+
+        ;// Transpose, store and loop
+XTR0            EQU Src5
+XTR1            EQU Tmp
+XTR2            EQU Src6
+XTR3            EQU Src7
+XTR4            EQU Src3
+XTR5            EQU Src0
+XTR6            EQU Src1
+XTR7            EQU Src2
+XTRt            EQU Src4
+                
+qA0             QN  XTR0.S32  ;// for XTRpose
+qA1             QN  XTR1.S32
+qA2             QN  XTR2.S32
+qA3             QN  XTR3.S32
+qA4             QN  XTR4.S32
+qA5             QN  XTR5.S32
+qA6             QN  XTR6.S32
+qA7             QN  XTR7.S32
+
+dB0             DN  XTR0*2+1      ;// for using VSWP
+dB1             DN  XTR1*2+1
+dB2             DN  XTR2*2+1
+dB3             DN  XTR3*2+1
+dB4             DN  XTR4*2
+dB5             DN  XTR5*2
+dB6             DN  XTR6*2
+dB7             DN  XTR7*2
+
+          
+        VTRN        qXf0, qXf1
+        VTRN        qXf2, qXf3
+        VTRN        qXf4, qXf5
+        VTRN        qXf6, qXf7
+        VTRN        qA0, qA2
+        VTRN        qA1, qA3
+        VTRN        qA4, qA6
+        VTRN        qA5, qA7        
+        VSWP        dB0, dB4
+        VSWP        dB1, dB5
+        VSWP        dB2, dB6
+        VSWP        dB3, dB7
+        
+
+qYj0            QN qXf0
+qYj1            QN qXf1
+qYj2            QN qXf2
+qYj3            QN qXf3
+qYj4            QN qXf4
+qYj5            QN qXf5
+qYj6            QN qXf6
+qYj7            QN qXf7
+qYjt            QN qXft
+
+dYj0lo          DN (XTR0*2).S16
+dYj0hi          DN (XTR0*2+1).S16
+dYj1lo          DN (XTR1*2).S16
+dYj1hi          DN (XTR1*2+1).S16
+dYj2lo          DN (XTR2*2).S16
+dYj2hi          DN (XTR2*2+1).S16
+dYj3lo          DN (XTR3*2).S16
+dYj3hi          DN (XTR3*2+1).S16
+dYj4lo          DN (XTR4*2).S16
+dYj4hi          DN (XTR4*2+1).S16
+dYj5lo          DN (XTR5*2).S16
+dYj5hi          DN (XTR5*2+1).S16
+dYj6lo          DN (XTR6*2).S16
+dYj6hi          DN (XTR6*2+1).S16
+dYj7lo          DN (XTR7*2).S16
+dYj7hi          DN (XTR7*2+1).S16
+dYjtlo          DN (XTRt*2).S16
+dYjthi          DN (XTRt*2+1).S16
+
+qYi0            QN qYj0
+qYi1            QN qYj4
+qYi2            QN qYj2
+qYi3            QN qYj7
+qYi4            QN qYj5
+qYi5            QN qYjt
+qYi6            QN qYj1
+qYi7            QN qYj6
+qYit            QN qYj3
+
+dYi0lo          DN dYj0lo
+dYi0hi          DN dYj0hi
+dYi1lo          DN dYj4lo
+dYi1hi          DN dYj4hi
+dYi2lo          DN dYj2lo
+dYi2hi          DN dYj2hi
+dYi3lo          DN dYj7lo
+dYi3hi          DN dYj7hi
+dYi4lo          DN dYj5lo
+dYi4hi          DN dYj5hi
+dYi5lo          DN dYjtlo
+dYi5hi          DN dYjthi
+dYi6lo          DN dYj1lo
+dYi6hi          DN dYj1hi
+dYi7lo          DN dYj6lo
+dYi7hi          DN dYj6hi
+dYitlo          DN dYj3lo
+dYithi          DN dYj3hi
+
+qYh0            QN qYit
+qYh1            QN qYi0
+qYh2            QN qYi2
+qYh3            QN qYi3
+qYh4            QN qYi7
+qYh5            QN qYi5
+qYh6            QN qYi4
+qYh7            QN qYi1
+qYht            QN qYi6
+
+dYh0lo          DN dYitlo
+dYh0hi          DN dYithi
+dYh1lo          DN dYi0lo
+dYh1hi          DN dYi0hi
+dYh2lo          DN dYi2lo
+dYh2hi          DN dYi2hi
+dYh3lo          DN dYi3lo
+dYh3hi          DN dYi3hi
+dYh4lo          DN dYi7lo
+dYh4hi          DN dYi7hi
+dYh5lo          DN dYi5lo
+dYh5hi          DN dYi5hi
+dYh6lo          DN dYi4lo
+dYh6hi          DN dYi4hi
+dYh7lo          DN dYi1lo
+dYh7hi          DN dYi1hi
+dYhtlo          DN dYi6lo
+dYhthi          DN dYi6hi
+
+qYg0            QN qYh2
+qYg1            QN qYht
+qYg2            QN qYh1
+qYg3            QN qYh0
+qYg4            QN qYh4
+qYg5            QN qYh5
+qYg6            QN qYh6
+qYg7            QN qYh7
+qYgt            QN qYh3
+
+qYf0            QN qYg6
+qYf1            QN qYg5
+qYf2            QN qYg4
+qYf3            QN qYgt
+qYf4            QN qYg3
+qYf5            QN qYg2
+qYf6            QN qYg1
+qYf7            QN qYg0
+qYft            QN qYg7
+
+        VRSHR       qYj7, qYj7, #2
+        VRSHR       qYj6, qYj6, #1
+        
+        VHADD       qYi5, qYj1, qYj7        ;// i5 = (j1+j7)/2
+        VSUB        qYi6, qYj1, qYj7        ;// i6 = j1-j7
+        VHADD       qYi3, qYj2, qYj6        ;// i3 = (j2+j6)/2
+        VSUB        qYi2, qYj2, qYj6        ;// i2 = j2-j6
+        VHADD       qYi7, qYj5, qYj3        ;// i7 = (j5+j3)/2
+        VSUB        qYi4, qYj5, qYj3        ;// i4 = j5-j3
+
+        VQDMULH     qYi2, qYi2, InvSqrt2    ;// i2/sqrt(2)
+        ;// IStage 4,3 rows 0to1 x 1/2
+        
+        MOV         pTemp, #0x4             ;// ensure correct round
+        VDUP        qScale1, pTemp           ;// of DC result
+        VADD        qYi0, qYi0, qScale1
+        
+        VHADD       qYh0, qYi0, qYi1        ;// (i0+i1)/2
+        VHSUB       qYh1, qYi0, qYi1        ;// (i0-i1)/2
+
+        VHADD       qYh7, qYi5, qYi7        ;// (i5+i7)/4
+        VSUB        qYh5, qYi5, qYi7        ;// (i5-i7)/2
+        VSUB        qYh2, qYi2, qYi3        ;// h2, h3
+        VQDMULH     qYh5, qYh5, InvSqrt2    ;// h5/sqrt(2)
+
+        VMULL       qXt0, dYi4lo, C         ;// c*i4
+        VMLAL       qXt0, dYi6lo, S         ;// c*i4+s*i6
+        VMULL       qXt1, dYi4hi, C
+        VMLAL       qXt1, dYi6hi, S
+        VSHRN       dYh4lo, qXt0, #16       ;// h4
+        VSHRN       dYh4hi, qXt1, #16
+        
+        VMULL       qXt0, dYi6lo, C         ;// c*i6
+        VMLSL       qXt0, dYi4lo, S         ;// -s*i4 + c*h6
+        VMULL       qXt1, dYi6hi, C
+        VMLSL       qXt1, dYi4hi, S
+        VSHRN       dYh6lo, qXt0, #16       ;// h6
+        VSHRN       dYh6hi, qXt1, #16
+        
+        VSUB        qYg6, qYh6, qYh7
+        VSUB        qYg5, qYh5, qYg6
+        VSUB        qYg4, qYh4, qYg5
+        
+        ;// IStage 2 rows 0to3 x 1/2
+        VHADD       qYg1, qYh1, qYh2        ;// (h1+h2)/2
+        VHSUB       qYg2, qYh1, qYh2        ;// (h1-h2)/2
+        VHADD       qYg0, qYh0, qYh3        ;// (h0+h3)/2
+        VHSUB       qYg3, qYh0, qYh3        ;// (h0-h3)/2
+        
+
+        ;// IStage 1 all rows
+        VHADD        qYf3, qYg3, qYg4        
+        VHSUB        qYf4, qYg3, qYg4        
+        VHADD        qYf2, qYg2, qYg5        
+        VHSUB        qYf5, qYg2, qYg5        
+        VHADD        qYf1, qYg1, qYg6
+        VHSUB        qYf6, qYg1, qYg6        
+        VHADD        qYf0, qYg0, qYg7
+        VHSUB        qYf7, qYg0, qYg7      
+
+YTR0            EQU Src0
+YTR1            EQU Src4
+YTR2            EQU Src1
+YTR3            EQU Src2
+YTR4            EQU Src7
+YTR5            EQU Src5
+YTR6            EQU Tmp
+YTR7            EQU Src6
+YTRt            EQU Src3
+
+qC0             QN  YTR0.S32                ;// for YTRpose
+qC1             QN  YTR1.S32
+qC2             QN  YTR2.S32
+qC3             QN  YTR3.S32
+qC4             QN  YTR4.S32
+qC5             QN  YTR5.S32
+qC6             QN  YTR6.S32
+qC7             QN  YTR7.S32
+
+dD0             DN  YTR0*2+1                ;// for using VSWP
+dD1             DN  YTR1*2+1
+dD2             DN  YTR2*2+1
+dD3             DN  YTR3*2+1
+dD4             DN  YTR4*2
+dD5             DN  YTR5*2
+dD6             DN  YTR6*2
+dD7             DN  YTR7*2
+          
+        VTRN        qYf0, qYf1
+        VTRN        qYf2, qYf3
+        VTRN        qYf4, qYf5
+        VTRN        qYf6, qYf7
+        VTRN        qC0, qC2
+        VTRN        qC1, qC3
+        VTRN        qC4, qC6
+        VTRN        qC5, qC7        
+        VSWP        dD0, dD4
+        VSWP        dD1, dD5
+        VSWP        dD2, dD6
+        VSWP        dD3, dD7
+
+        
+dYf0U8          DN YTR0*2.U8
+dYf1U8          DN YTR1*2.U8
+dYf2U8          DN YTR2*2.U8
+dYf3U8          DN YTR3*2.U8
+dYf4U8          DN YTR4*2.U8
+dYf5U8          DN YTR5*2.U8
+dYf6U8          DN YTR6*2.U8
+dYf7U8          DN YTR7*2.U8
+        
+        ;//
+        ;// Do saturation if outsize is other than S16
+        ;//
+        
+        IF ("$outsize"="u8")
+            ;// Output range [0-255]
+            VQMOVN            dYf0U8, qYf0
+            VQMOVN            dYf1U8, qYf1
+            VQMOVN            dYf2U8, qYf2
+            VQMOVN            dYf3U8, qYf3
+            VQMOVN            dYf4U8, qYf4
+            VQMOVN            dYf5U8, qYf5
+            VQMOVN            dYf6U8, qYf6
+            VQMOVN            dYf7U8, qYf7
+        ENDIF
+        
+        IF ("$outsize"="s9")
+            ;// Output range [-256 to +255]
+            VQSHL            qYf0, qYf0, #16-9
+            VQSHL            qYf1, qYf1, #16-9
+            VQSHL            qYf2, qYf2, #16-9
+            VQSHL            qYf3, qYf3, #16-9
+            VQSHL            qYf4, qYf4, #16-9
+            VQSHL            qYf5, qYf5, #16-9
+            VQSHL            qYf6, qYf6, #16-9
+            VQSHL            qYf7, qYf7, #16-9
+            
+            VSHR             qYf0, qYf0, #16-9
+            VSHR             qYf1, qYf1, #16-9
+            VSHR             qYf2, qYf2, #16-9
+            VSHR             qYf3, qYf3, #16-9
+            VSHR             qYf4, qYf4, #16-9
+            VSHR             qYf5, qYf5, #16-9
+            VSHR             qYf6, qYf6, #16-9
+            VSHR             qYf7, qYf7, #16-9
+        ENDIF
+
+        ;// Store output depending on the Stride size
+        IF "$stride"="s"
+            VST1        qYf0, [pDest @64], Stride
+            VST1        qYf1, [pDest @64], Stride
+            VST1        qYf2, [pDest @64], Stride
+            VST1        qYf3, [pDest @64], Stride
+            VST1        qYf4, [pDest @64], Stride
+            VST1        qYf5, [pDest @64], Stride
+            VST1        qYf6, [pDest @64], Stride
+            VST1        qYf7, [pDest @64]            
+        ELSE
+            IF ("$outsize"="u8")
+                VST1        dYf0U8, [pDest @64], #8
+                VST1        dYf1U8, [pDest @64], #8
+                VST1        dYf2U8, [pDest @64], #8
+                VST1        dYf3U8, [pDest @64], #8
+                VST1        dYf4U8, [pDest @64], #8
+                VST1        dYf5U8, [pDest @64], #8
+                VST1        dYf6U8, [pDest @64], #8
+                VST1        dYf7U8, [pDest @64]
+            ELSE
+                ;// ("$outsize"="s9") or ("$outsize"="s16")
+                VST1        qYf0, [pDest @64], #16
+                VST1        qYf1, [pDest @64], #16
+                VST1        qYf2, [pDest @64], #16
+                VST1        qYf3, [pDest @64], #16
+                VST1        qYf4, [pDest @64], #16
+                VST1        qYf5, [pDest @64], #16
+                VST1        qYf6, [pDest @64], #16
+                VST1        qYf7, [pDest @64]
+            ENDIF
+        
+        ENDIF
+
+
+
+        ENDIF ;// CortexA8
+
+
+
+        MEND        
+
+        ;// Scale TWO input rows with TWO rows of 16 bit scale values
+        ;//
+        ;// This macro is used by M_IDCT_PRESCALE16 to pre-scale one row
+        ;// input (Eight input values) with one row of scale values. Also 
+        ;// Loads next scale values from pScale, if $LastRow flag is not set.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// $dAlo           - Input D register with first four S16 values of row n
+        ;// $dAhi           - Input D register with next four S16 values of row n
+        ;// $dBlo           - Input D register with first four S16 values of row n+1
+        ;// $dBhi           - Input D register with next four S16 values of row n+1
+        ;// pScale          - Pointer to next row of scale values
+        ;// qT0lo           - Temporary scratch register
+        ;// qT0hi           - Temporary scratch register
+        ;// qT1lo           - Temporary scratch register
+        ;// qT1hi           - Temporary scratch register
+        ;// dScale1lo       - Scale value of row n
+        ;// dScale1hi       - Scale value of row n
+        ;// dScale2lo       - Scale value of row n+1
+        ;// dScale2hi       - Scale value of row n+1
+        ;//
+        ;// Input Flag
+        ;//
+        ;// $LastRow        - Flag to indicate whether current row is last row
+        ;//
+        ;// Output Registers:
+        ;//
+        ;// $dAlo           - Scaled output values (first four S16 of row n)
+        ;// $dAhi           - Scaled output values (next four S16 of row n)
+        ;// $dBlo           - Scaled output values (first four S16 of row n+1)
+        ;// $dBhi           - Scaled output values (next four S16 of row n+1)
+        ;// qScale1         - Scale values for next row
+        ;// qScale2         - Scale values for next row+1
+        ;// pScale          - Pointer to next row of scale values
+        ;//
+        MACRO
+        M_IDCT_SCALE16 $dAlo, $dAhi, $dBlo, $dBhi, $LastRow
+        VMULL       qT0lo, $dAlo, dScale1lo
+        VMULL       qT0hi, $dAhi, dScale1hi
+        VMULL       qT1lo, $dBlo, dScale2lo
+        VMULL       qT1hi, $dBhi, dScale2hi
+        IF "$LastRow"="0"
+            VLD1        qScale1, [pScale], #16  ;// Load scale for row n+1
+            VLD1        qScale2, [pScale], #16  ;// Load scale for row n+2
+        ENDIF
+        VQRSHRN       $dAlo, qT0lo, #12        
+        VQRSHRN       $dAhi, qT0hi, #12        
+        VQRSHRN       $dBlo, qT1lo, #12        
+        VQRSHRN       $dBhi, qT1hi, #12        
+        MEND
+
+        ;// Scale 8x8 block input values with 16 bit scale values
+        ;//
+        ;// This macro is used to pre-scale block of 8x8 input.
+        ;// This also do the Ist stage transformations of IDCT.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// dXjnlo          - n th input D register with first four S16 values
+        ;// dXjnhi          - n th input D register with next four S16 values
+        ;// qXjn            - n th input Q register with eight S16 values
+        ;// pScale          - Pointer to scale values
+        ;//
+        ;// Output Registers:
+        ;//
+        ;// qXin            - n th output Q register with eight S16 output values of 1st stage
+        ;//
+        MACRO
+        M_IDCT_PRESCALE16
+        VLD1        qScale1, [pScale], #16      ;// Load Pre scale for row 0
+        VLD1        qScale2, [pScale], #16      ;// Load Pre scale for row 0
+        M_IDCT_SCALE16 dXj0lo, dXj0hi, dXj1lo, dXj1hi, 0        ;// Pre scale row 0 & 1
+        M_IDCT_SCALE16 dXj2lo, dXj2hi, dXj3lo, dXj3hi, 0        
+        M_IDCT_SCALE16 dXj4lo, dXj4hi, dXj5lo, dXj5hi, 0        
+        M_IDCT_SCALE16 dXj6lo, dXj6hi, dXj7lo, dXj7hi, 1        
+        VHADD       qXi5, qXj1, qXj7            ;// (j1+j7)/2
+        VSUB        qXi6, qXj1, qXj7            ;// j1-j7
+        LDR         pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants
+        VHADD       qXi3, qXj2, qXj6            ;// (j2+j6)/2
+        VSUB        qXi2, qXj2, qXj6            ;// j2-j6
+        VLDR        dCoefs, [pSrc]              ;// Load DCT inverse AAN constants
+        VHADD       qXi7, qXj5, qXj3            ;// (j5+j3)/2
+        VSUB        qXi4, qXj5, qXj3            ;// j5-j3
+        MEND    
+        
+        
+        ;// Scale 8x8 block input values with 32 bit scale values
+        ;//
+        ;// This macro is used to pre-scale block of 8x8 input.
+        ;// This also do the Ist stage transformations of IDCT.
+        ;//
+        ;// Input Registers:
+        ;//
+        ;// dXjnlo          - n th input D register with first four S16 values
+        ;// dXjnhi          - n th input D register with next four S16 values
+        ;// qXjn            - n th input Q register with eight S16 values
+        ;// pScale          - Pointer to 32bit scale values in Q23 format
+        ;//
+        ;// Output Registers:
+        ;//
+        ;// dXinlo          - n th output D register with first four S16 output values of 1st stage
+        ;// dXinhi          - n th output D register with next four S16 output values of 1st stage
+        ;//
+        MACRO
+        M_IDCT_PRESCALE32
+qScale0lo       QN 0.S32
+qScale0hi       QN 1.S32
+qScale1lo       QN 2.S32
+qScale1hi       QN 3.S32
+qScale2lo       QN qScale1lo
+qScale2hi       QN qScale1hi
+qScale3lo       QN qScale1lo
+qScale3hi       QN qScale1hi
+qScale4lo       QN qScale1lo
+qScale4hi       QN qScale1hi
+qScale5lo       QN qScale0lo
+qScale5hi       QN qScale0hi
+qScale6lo       QN qScale0lo
+qScale6hi       QN qScale0hi
+qScale7lo       QN qScale0lo
+qScale7hi       QN qScale0hi
+
+qSrc0lo         QN 4.S32
+qSrc0hi         QN 5.S32
+qSrc1lo         QN 6.S32
+qSrc1hi         QN Src4.S32
+qSrc2lo         QN qSrc0lo
+qSrc2hi         QN qSrc0hi
+qSrc3lo         QN qSrc0lo
+qSrc3hi         QN qSrc0hi
+qSrc4lo         QN qSrc0lo
+qSrc4hi         QN qSrc0hi
+qSrc5lo         QN qSrc1lo
+qSrc5hi         QN qSrc1hi
+qSrc6lo         QN qSrc1lo
+qSrc6hi         QN qSrc1hi
+qSrc7lo         QN qSrc0lo
+qSrc7hi         QN qSrc0hi
+
+qRes17lo        QN qScale0lo
+qRes17hi        QN qScale0hi
+qRes26lo        QN qScale0lo
+qRes26hi        QN qScale0hi
+qRes53lo        QN qScale0lo
+qRes53hi        QN qScale0hi
+
+            ADD         pTemp, pScale, #4*8*7           ;// Address of  pScale[7]
+            
+            ;// Row 0
+            VLD1        {qScale0lo, qScale0hi}, [pScale]!
+            VSHLL       qSrc0lo, dXj0lo, #(12-1)
+            VSHLL       qSrc0hi, dXj0hi, #(12-1)            
+            VLD1        {qScale1lo, qScale1hi}, [pScale]!
+            VQRDMULH    qSrc0lo, qScale0lo, qSrc0lo
+            VQRDMULH    qSrc0hi, qScale0hi, qSrc0hi
+            VLD1        {qScale7lo, qScale7hi}, [pTemp]!
+            VSHLL       qSrc1lo, dXj1lo, #(12-1)
+            VSHLL       qSrc1hi, dXj1hi, #(12-1)            
+            VMOVN       dXi0lo, qSrc0lo                 ;// Output i0
+            VMOVN       dXi0hi, qSrc0hi
+            VSHLL       qSrc7lo, dXj7lo, #(12-1)
+            VSHLL       qSrc7hi, dXj7hi, #(12-1)
+            SUB         pTemp, pTemp, #((16*2)+(4*8*1))
+            VQRDMULH    qSrc1lo, qScale1lo, qSrc1lo
+            VQRDMULH    qSrc1hi, qScale1hi, qSrc1hi
+            VQRDMULH    qSrc7lo, qScale7lo, qSrc7lo
+            VQRDMULH    qSrc7hi, qScale7hi, qSrc7hi
+            VLD1        {qScale2lo, qScale2hi}, [pScale]!
+
+            ;// Row 1 & 7
+            VHADD       qRes17lo, qSrc1lo, qSrc7lo      ;// (j1+j7)/2
+            VHADD       qRes17hi, qSrc1hi, qSrc7hi      ;// (j1+j7)/2
+            VMOVN       dXi5lo, qRes17lo                ;// Output i5
+            VMOVN       dXi5hi, qRes17hi              
+            VSUB        qRes17lo, qSrc1lo, qSrc7lo      ;// j1-j7
+            VSUB        qRes17hi, qSrc1hi, qSrc7hi      ;// j1-j7
+            VMOVN       dXi6lo, qRes17lo                ;// Output i6
+            VMOVN       dXi6hi, qRes17hi      
+            VSHLL       qSrc2lo, dXj2lo, #(12-1)
+            VSHLL       qSrc2hi, dXj2hi, #(12-1)
+            VLD1        {qScale6lo, qScale6hi}, [pTemp]!
+            VSHLL       qSrc6lo, dXj6lo, #(12-1)
+            VSHLL       qSrc6hi, dXj6hi, #(12-1)
+            SUB         pTemp, pTemp, #((16*2)+(4*8*1))
+            VQRDMULH    qSrc2lo, qScale2lo, qSrc2lo
+            VQRDMULH    qSrc2hi, qScale2hi, qSrc2hi
+            VQRDMULH    qSrc6lo, qScale6lo, qSrc6lo
+            VQRDMULH    qSrc6hi, qScale6hi, qSrc6hi
+            VLD1        {qScale3lo, qScale3hi}, [pScale]!
+
+            ;// Row 2 & 6
+            VHADD       qRes26lo, qSrc2lo, qSrc6lo      ;// (j2+j6)/2
+            VHADD       qRes26hi, qSrc2hi, qSrc6hi      ;// (j2+j6)/2
+            VMOVN       dXi3lo, qRes26lo                ;// Output i3
+            VMOVN       dXi3hi, qRes26hi              
+            VSUB        qRes26lo, qSrc2lo, qSrc6lo      ;// j2-j6
+            VSUB        qRes26hi, qSrc2hi, qSrc6hi      ;// j2-j6
+            VMOVN       dXi2lo, qRes26lo                ;// Output i2
+            VMOVN       dXi2hi, qRes26hi      
+            VSHLL       qSrc3lo, dXj3lo, #(12-1)
+            VSHLL       qSrc3hi, dXj3hi, #(12-1)
+            VLD1        {qScale5lo, qScale5hi}, [pTemp]!
+            VSHLL       qSrc5lo, dXj5lo, #(12-1)
+            VSHLL       qSrc5hi, dXj5hi, #(12-1)
+            VQRDMULH    qSrc3lo, qScale3lo, qSrc3lo
+            VQRDMULH    qSrc3hi, qScale3hi, qSrc3hi
+            VQRDMULH    qSrc5lo, qScale5lo, qSrc5lo
+            VQRDMULH    qSrc5hi, qScale5hi, qSrc5hi
+            
+            ;// Row 3 & 5
+            VHADD       qRes53lo, qSrc5lo, qSrc3lo      ;// (j5+j3)/2
+            VHADD       qRes53hi, qSrc5hi, qSrc3hi      ;// (j5+j3)/2
+            SUB         pSrc, pSrc, #16*2*2
+            VMOVN       dXi7lo, qRes53lo                ;// Output i7
+            VMOVN       dXi7hi, qRes53hi              
+            VSUB        qRes53lo, qSrc5lo, qSrc3lo      ;// j5-j3
+            VSUB        qRes53hi, qSrc5hi, qSrc3hi      ;// j5-j3
+            VLD1        qXj4, [pSrc @64]
+            VMOVN       dXi4lo, qRes53lo                ;// Output i4
+            VMOVN       dXi4hi, qRes53hi                              
+            VSHLL       qSrc4lo, dXj4lo, #(12-1)
+            VSHLL       qSrc4hi, dXj4hi, #(12-1)
+            VLD1        {qScale4lo, qScale4hi}, [pScale]            
+            LDR         pSrc, =armCOMM_IDCTCoef     ;// Address of DCT inverse AAN constants
+            VQRDMULH    qSrc4lo, qScale4lo, qSrc4lo
+            VQRDMULH    qSrc4hi, qScale4hi, qSrc4hi
+            VLDR        dCoefs, [pSrc]                  ;// Load DCT inverse AAN constants
+            ;// Row 4
+            VMOVN       dXi1lo, qSrc4lo                 ;// Output i1
+            VMOVN       dXi1hi, qSrc4hi              
+        
+        MEND
+                                                
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_MaskTable.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_MaskTable.h
new file mode 100755
index 0000000..51118fd
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_MaskTable.h
@@ -0,0 +1,27 @@
+/**
+ * 
+ * File Name:  armCOMM_MaskTable.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Mask Table to mask the end of array
+ */
+ 
+
+
+#ifndef _ARMCOMM_MASKTABLE_H_
+#define _ARMCOMM_MASKTABLE_H_
+
+#define MaskTableSize 72
+  
+/* Mask table */
+
+extern const OMX_U16 armCOMM_qMaskTable16[MaskTableSize];
+extern const OMX_U8 armCOMM_qMaskTable8[MaskTableSize];
+
+#endif
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Version.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Version.h
new file mode 100755
index 0000000..41b3e1e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Version.h
@@ -0,0 +1,43 @@
+/* Guard the header against multiple inclusion. */
+#ifndef __ARM_COMM_VERSION_H__
+#define __ARM_COMM_VERSION_H__
+
+
+/* The following line should be in omxtypes.h but hasn't been approved by OpenMAX yet */
+#define OMX_VERSION 102
+
+/* We need to define these macros in order to convert a #define number into a #define string. */
+#define ARM_QUOTE(a) #a
+#define ARM_INDIRECT(A) ARM_QUOTE(A)
+
+/* Convert the OMX_VERSION number into a string that can be used, for example, to print it out. */
+#define ARM_VERSION_STRING ARM_INDIRECT(OMX_VERSION)
+
+
+/* Define this in order to turn on ARM version/release/build strings in each domain */
+#define ARM_INCLUDE_VERSION_DESCRIPTIONS
+
+#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS
+  extern const char * const omxAC_VersionDescription;
+  extern const char * const omxIC_VersionDescription;
+  extern const char * const omxIP_VersionDescription;
+  extern const char * const omxSP_VersionDescription;
+  extern const char * const omxVC_VersionDescription;
+#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */
+
+
+/* The following entries should be automatically updated by the release script */
+/* They are used in the ARM version strings defined for each domain.             */
+
+/* The release tag associated with this release of the library. - used for source and object releases */
+#define OMX_ARM_RELEASE_TAG  "r1p0-00bet0"
+
+/* The ARM architecture used to build any objects or executables in this release. */
+#define OMX_ARM_BUILD_ARCHITECTURE "ARM Architecture V7 with NEON"
+
+/* The ARM Toolchain used to build any objects or executables in this release. */
+#define OMX_ARM_BUILD_TOOLCHAIN    "ARM RVCT 3.1"
+
+
+#endif /* __ARM_COMM_VERSION_H__ */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_s.h
new file mode 100755
index 0000000..0956bd1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_s.h
@@ -0,0 +1,1157 @@
+;//
+;// 
+;// File Name:  armCOMM_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+;// ARM optimized OpenMAX common header file
+;//
+
+;// Protect against multiple inclusion
+ IF :LNOT::DEF:ARMCOMM_S_H
+ GBLL ARMCOMM_S_H
+
+        REQUIRE8            ;// Requires 8-byte stack alignment
+        PRESERVE8           ;// Preserves 8-byte stack alignment
+        
+        GBLL    ARM_ERRORCHECK
+ARM_ERRORCHECK  SETL {FALSE}
+
+;// Globals
+
+        GBLS    _RRegList   ;// R saved register list
+        GBLS    _DRegList   ;// D saved register list
+        GBLS    _Variant    ;// Selected processor variant
+        GBLS    _CPU        ;// CPU name
+        GBLS    _Struct     ;// Structure name
+        
+        GBLL    _InFunc     ;// Inside function assembly flag
+        GBLL    _SwLong     ;// Long switch flag
+        
+        GBLA    _RBytes     ;// Number of register bytes on stack
+        GBLA    _SBytes     ;// Number of scratch bytes on stack 
+        GBLA    _ABytes     ;// Stack offset of next argument
+        GBLA    _Workspace  ;// Stack offset of scratch workspace
+        GBLA    _F          ;// Function number
+        GBLA    _StOff      ;// Struct offset
+        GBLA    _SwNum      ;// Switch number
+        GBLS    _32         ;// Suffix for 32 byte alignmnet
+        GBLS    _16         ;// Suffix for 16 byte alignmnet
+        
+_InFunc         SETL    {FALSE}
+_SBytes         SETA    0
+_F              SETA    0
+_SwNum          SETA    0
+_32             SETS    "ALIGN32"
+_16             SETS    "ALIGN16"
+
+;/////////////////////////////////////////////////////////
+;// Override the tools settings of the CPU if the #define
+;// USECPU is set, otherwise use the CPU defined by the
+;// assembler settings.
+;/////////////////////////////////////////////////////////
+
+       IF :DEF: OVERRIDECPU
+_CPU       SETS  OVERRIDECPU
+       ELSE
+_CPU       SETS    {CPU}       
+       ENDIF
+
+
+
+;/////////////////////////////////////////////////////////
+;// Work out which code to build
+;/////////////////////////////////////////////////////////
+
+        IF :DEF:ARM1136JS:LOR::DEF:CortexA8:LOR::DEF:ARM_GENERIC
+            INFO 1,"Please switch to using M_VARIANTS"
+        ENDIF
+
+        ;// Define and reset all officially recongnised variants
+        MACRO
+        _M_DEF_VARIANTS
+        _M_DEF_VARIANT ARM926EJS
+        _M_DEF_VARIANT ARM1136JS
+        _M_DEF_VARIANT ARM1136JS_U
+        _M_DEF_VARIANT CortexA8
+        _M_DEF_VARIANT ARM7TDMI
+        MEND
+        
+        MACRO
+        _M_DEF_VARIANT $var
+        GBLL $var
+        GBLL _ok$var
+$var    SETL {FALSE}
+        MEND        
+        
+
+        ;// Variant declaration
+        ;//
+        ;// Define a list of code variants supported by this
+        ;// source file. This macro then chooses the most
+        ;// appropriate variant to build for the currently configured
+        ;// core.
+        ;//        
+        MACRO
+        M_VARIANTS $v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7        
+        ;// Set to TRUE variants that are supported
+        _M_DEF_VARIANTS
+        _M_VARIANT $v0
+        _M_VARIANT $v1
+        _M_VARIANT $v2
+        _M_VARIANT $v3
+        _M_VARIANT $v4
+        _M_VARIANT $v5
+        _M_VARIANT $v6
+        _M_VARIANT $v7
+        
+        ;// Look for first available variant to match a CPU
+        ;// _M_TRY cpu, variant fall back list
+_Variant SETS ""                
+        _M_TRY ARM926EJ-S,   ARM926EJS
+        _M_TRY ARM1176JZ-S,  ARM1136JS
+        _M_TRY ARM1176JZF-S, ARM1136JS
+        _M_TRY ARM1156T2-S,  ARM1136JS
+        _M_TRY ARM1156T2F-S, ARM1136JS
+        _M_TRY ARM1136J-S,   ARM1136JS
+        _M_TRY ARM1136JF-S,  ARM1136JS
+        _M_TRY MPCore,       ARM1136JS
+        _M_TRY falcon-vfp, ARM1136JS
+        _M_TRY falcon-full-neon, CortexA8
+        _M_TRY Cortex-A8NoNeon, ARM1136JS
+        _M_TRY Cortex-A8,    CortexA8, ARM1136JS
+        _M_TRY Cortex-R4,    ARM1136JS
+        _M_TRY ARM7TDMI
+        
+        ;// Select the correct variant
+        _M_DEF_VARIANTS
+        IF _Variant=""
+            INFO 1, "No match found for CPU '$_CPU'"
+        ELSE
+$_Variant   SETL {TRUE}
+        ENDIF
+        MEND
+        
+        ;// Register a variant as available
+        MACRO
+        _M_VARIANT $var
+        IF "$var"=""
+            MEXIT
+        ENDIF
+        IF :LNOT::DEF:_ok$var
+            INFO 1, "Unrecognized variant '$var'"
+        ENDIF
+$var    SETL {TRUE}
+        MEND
+        
+        ;// For a given CPU, see if any of the variants supporting
+        ;// this CPU are available. The first available variant is
+        ;// chosen
+        MACRO
+        _M_TRY $cpu, $v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7
+        IF "$cpu"<>_CPU
+            MEXIT
+        ENDIF
+        _M_TRY1 $v0
+        _M_TRY1 $v1
+        _M_TRY1 $v2
+        _M_TRY1 $v3
+        _M_TRY1 $v4
+        _M_TRY1 $v5
+        _M_TRY1 $v6
+        _M_TRY1 $v7
+        ;// Check a match was found
+        IF _Variant=""
+            INFO 1, "No variant match found for CPU '$_CPU'"
+        ENDIF
+        MEND
+        
+        MACRO
+        _M_TRY1 $var
+        IF "$var"=""
+            MEXIT
+        ENDIF
+        IF (_Variant=""):LAND:$var
+_Variant SETS "$var"
+        ENDIF
+        MEND
+        
+;////////////////////////////////////////////////////////
+;// Structure definition
+;////////////////////////////////////////////////////////
+
+        ;// Declare a structure of given name
+        MACRO
+        M_STRUCT $sname
+_Struct SETS "$sname"
+_StOff  SETA 0
+        MEND
+        
+        ;// Declare a structure field
+        ;// The field is called $sname_$fname
+        ;// $size   = the size of each entry, must be power of 2 
+        ;// $number = (if provided) the number of entries for an array
+        MACRO
+        M_FIELD $fname, $size, $number
+        IF (_StOff:AND:($size-1))!=0
+_StOff      SETA _StOff + ($size - (_StOff:AND:($size-1)))
+        ENDIF
+$_Struct._$fname EQU _StOff
+        IF "$number"<>""
+_StOff      SETA _StOff + $size*$number
+        ELSE
+_StOff      SETA _StOff + $size
+        ENDIF
+        MEND
+        
+        
+        MACRO
+        M_ENDSTRUCT
+sizeof_$_Struct EQU _StOff
+_Struct SETS ""
+        MEND
+
+;//////////////////////////////////////////////////////////
+;// Switch and table macros
+;//////////////////////////////////////////////////////////
+
+        ;// Start a relative switch table with register to switch on
+        ;//
+        ;// $v = the register to switch on
+        ;// $s = if specified must be "L" to indicate long
+        ;//      this allows a greater range to the case code
+        MACRO
+        M_SWITCH $v, $s
+        ASSERT "$s"="":LOR:"$s"="L"
+_SwLong SETL {FALSE}
+        IF "$s"="L"
+_SwLong     SETL {TRUE}
+        ENDIF
+_SwNum  SETA _SwNum+1        
+        IF {CONFIG}=16
+            ;// Thumb
+            IF _SwLong
+                TBH [pc, $v, LSL#1]
+            ELSE
+                TBB [pc, $v]
+            ENDIF
+_Switch$_SwNum
+        ELSE
+            ;// ARM
+            ADD pc, pc, $v, LSL #2
+            NOP
+        ENDIF
+        MEND
+        
+        ;// Add a case to the switch statement
+        MACRO
+        M_CASE  $label
+        IF {CONFIG}=16
+            ;// Thumb
+            IF _SwLong
+                DCW ($label - _Switch$_SwNum)/2
+            ELSE
+                DCB ($label - _Switch$_SwNum)/2
+            ENDIF
+        ELSE
+            ;// ARM
+            B   $label
+        ENDIF
+        MEND
+        
+        ;// End of switch statement
+        MACRO
+        M_ENDSWITCH
+        ALIGN 2
+        MEND       
+
+
+;////////////////////////////////////////////////////////
+;// Data area allocation
+;////////////////////////////////////////////////////////
+
+        ;// Constant table allocator macro
+        ;//
+        ;// Creates a new section for each constant table
+        ;// $name is symbol through which the table can be accessed.
+        ;// $align is the optional alignment of the table, log2 of 
+        ;//  the byte alignment - $align=4 is 16 byte aligned
+        MACRO
+        M_TABLE  $name, $align
+        ASSERT :LNOT:_InFunc
+        IF "$align"=""
+            AREA |.constdata|, READONLY, DATA
+        ELSE
+            ;// AREAs inherit the alignment of the first declaration.
+            ;// Therefore for each alignment size we must have an area
+            ;// of a different name.
+            AREA constdata_a$align, READONLY, DATA, ALIGN=$align
+            
+            ;// We also force alignment incase we are tagging onto
+            ;// an already started area.
+            ALIGN (1<<$align)
+        ENDIF
+$name
+        MEND
+        
+;/////////////////////////////////////////////////////
+;// Macros to allocate space on the stack
+;//
+;// These all assume that the stack is 8-byte aligned
+;// at entry to the function, which means that the 
+;// 32-byte alignment macro needs to work in a
+;// bit more of a special way...
+;/////////////////////////////////////////////////////
+
+        
+
+
+        ;// Allocate 1-byte aligned area of name
+        ;// $name size $size bytes.
+        MACRO
+        M_ALLOC1  $name, $size
+        ASSERT :LNOT:_InFunc
+$name$_F   EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+        MEND
+            
+        ;// Allocate 2-byte aligned area of name
+        ;// $name size $size bytes.
+        MACRO
+        M_ALLOC2  $name, $size
+        ASSERT :LNOT:_InFunc
+        IF (_SBytes:AND:1)!=0
+_SBytes     SETA _SBytes + (2 - (_SBytes:AND:1))
+        ENDIF
+$name$_F   EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+        MEND
+            
+        ;// Allocate 4-byte aligned area of name
+        ;// $name size $size bytes.
+        MACRO
+        M_ALLOC4  $name, $size
+        ASSERT :LNOT:_InFunc
+        IF (_SBytes:AND:3)!=0
+_SBytes     SETA _SBytes + (4 - (_SBytes:AND:3))
+        ENDIF
+$name$_F   EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+        MEND
+            
+        ;// Allocate 8-byte aligned area of name
+        ;// $name size $size bytes.
+        MACRO
+        M_ALLOC8  $name, $size
+        ASSERT :LNOT:_InFunc
+        IF (_SBytes:AND:7)!=0
+_SBytes     SETA _SBytes + (8 - (_SBytes:AND:7))
+        ENDIF
+$name$_F   EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+        MEND        
+
+        
+        ;// Allocate 8-byte aligned area of name
+        ;// $name size ($size+16) bytes.
+        ;// The extra 16 bytes are later used to align the pointer to 16 bytes
+        
+        MACRO
+        M_ALLOC16  $name, $size
+        ASSERT :LNOT:_InFunc
+        IF (_SBytes:AND:7)!=0
+_SBytes     SETA _SBytes + (8 - (_SBytes:AND:7))
+        ENDIF
+$name$_F$_16   EQU (_SBytes + 8)
+_SBytes SETA _SBytes + ($size) + 8
+        MEND        
+        
+        ;// Allocate 8-byte aligned area of name
+        ;// $name size ($size+32) bytes.
+        ;// The extra 32 bytes are later used to align the pointer to 32 bytes
+        
+        MACRO
+        M_ALLOC32  $name, $size
+        ASSERT :LNOT:_InFunc
+        IF (_SBytes:AND:7)!=0
+_SBytes     SETA _SBytes + (8 - (_SBytes:AND:7))
+        ENDIF
+$name$_F$_32   EQU (_SBytes + 24)
+_SBytes SETA _SBytes + ($size) + 24
+        MEND        
+        
+        
+        
+        
+        ;// Argument Declaration Macro
+        ;//
+        ;// Allocate an argument name $name
+        ;// size $size bytes
+        MACRO
+        M_ARG     $name, $size
+        ASSERT _InFunc
+$name$_F    EQU _ABytes
+_ABytes SETA _ABytes + ($size)
+        MEND        
+        
+;///////////////////////////////////////////////
+;// Macros to access stacked variables
+;///////////////////////////////////////////////
+
+        ;// Macro to perform a data processing operation
+        ;// with a constant second operand
+        MACRO
+        _M_OPC $op,$rd,$rn,$const
+        LCLA    _sh
+        LCLA    _cst
+_sh     SETA    0
+_cst    SETA    $const
+        IF _cst=0
+        $op $rd, $rn, #_cst
+            MEXIT
+        ENDIF
+        WHILE (_cst:AND:3)=0
+_cst        SETA _cst>>2
+_sh         SETA _sh+2
+        WEND
+        $op $rd, $rn, #(_cst:AND:0x000000FF)<<_sh
+        IF _cst>=256
+            $op $rd, $rd, #(_cst:AND:0xFFFFFF00)<<_sh
+        ENDIF
+        MEND
+
+        ;// Macro to perform a data access operation
+        ;// Such as LDR or STR
+        ;// The addressing mode is modified such that
+        ;// 1. If no address is given then the name is taken
+        ;//    as a stack offset
+        ;// 2. If the addressing mode is not available for the
+        ;//    state being assembled for (eg Thumb) then a suitable
+        ;//    addressing mode is substituted.
+        ;//
+        ;// On Entry:
+        ;// $i = Instruction to perform (eg "LDRB")
+        ;// $a = Required byte alignment
+        ;// $r = Register(s) to transfer (eg "r1")
+        ;// $a0,$a1,$a2. Addressing mode and condition. One of:
+        ;//     label {,cc}
+        ;//     [base]                    {,,,cc}
+        ;//     [base, offset]{!}         {,,cc}
+        ;//     [base, offset, shift]{!}  {,cc}
+        ;//     [base], offset            {,,cc}
+        ;//     [base], offset, shift     {,cc}
+        MACRO
+        _M_DATA $i,$a,$r,$a0,$a1,$a2,$a3
+        IF "$a0":LEFT:1="["
+            IF "$a1"=""
+                $i$a3   $r, $a0
+            ELSE
+                IF "$a0":RIGHT:1="]"
+                    IF "$a2"=""
+                        _M_POSTIND $i$a3, "$r", $a0, $a1
+                    ELSE
+                        _M_POSTIND $i$a3, "$r", $a0, "$a1,$a2"
+                    ENDIF
+                ELSE
+                    IF "$a2"=""
+                        _M_PREIND  $i$a3, "$r", $a0, $a1
+                    ELSE
+                        _M_PREIND  $i$a3, "$r", $a0, "$a1,$a2"
+                    ENDIF
+                ENDIF
+            ENDIF
+        ELSE
+            LCLA    _Offset
+_Offset     SETA    _Workspace + $a0$_F
+            ASSERT  (_Offset:AND:($a-1))=0
+            $i$a1   $r, [sp, #_Offset]
+        ENDIF
+        MEND
+        
+        ;// Handle post indexed load/stores
+        ;// op  reg, [base], offset
+        MACRO
+        _M_POSTIND $i,$r,$a0,$a1
+        LCLS _base
+        LCLS _offset
+        IF {CONFIG}=16 ;// Thumb
+_base       SETS ("$a0":LEFT:(:LEN:"$a0"-1)):RIGHT:(:LEN:"$a0"-2)   ;// remove []
+_offset     SETS "$a1"
+            IF _offset:LEFT:1="+"
+_offset         SETS _offset:RIGHT:(:LEN:_offset-1)
+            ENDIF
+            $i  $r, $a0
+            IF _offset:LEFT:1="-"
+_offset         SETS _offset:RIGHT:(:LEN:_offset-1)
+                SUB $_base, $_base, $_offset
+            ELSE                
+                ADD $_base, $_base, $_offset
+            ENDIF
+        ELSE ;// ARM
+            $i  $r, $a0, $a1
+        ENDIF
+        MEND
+        
+        ;// Handle pre indexed load/store
+        ;// op  reg, [base, offset]{!}
+        MACRO
+        _M_PREIND $i,$r,$a0,$a1
+        LCLS _base
+        LCLS _offset
+        IF ({CONFIG}=16):LAND:(("$a1":RIGHT:2)="]!")
+_base       SETS "$a0":RIGHT:(:LEN:("$a0")-1)
+_offset     SETS "$a1":LEFT:(:LEN:("$a1")-2)
+            $i $r, [$_base, $_offset]
+            ADD $_base, $_base, $_offset
+        ELSE
+            $i  $r, $a0, $a1
+        ENDIF
+        MEND
+
+        ;// Load unsigned byte from stack
+        MACRO
+        M_LDRB  $r,$a0,$a1,$a2,$a3
+        _M_DATA "LDRB",1,$r,$a0,$a1,$a2,$a3
+        MEND
+        
+        ;// Load signed byte from stack
+        MACRO
+        M_LDRSB $r,$a0,$a1,$a2,$a3
+        _M_DATA "LDRSB",1,$r,$a0,$a1,$a2,$a3
+        MEND
+        
+        ;// Store byte to stack
+        MACRO
+        M_STRB  $r,$a0,$a1,$a2,$a3
+        _M_DATA "STRB",1,$r,$a0,$a1,$a2,$a3
+        MEND
+        
+        ;// Load unsigned half word from stack
+        MACRO
+        M_LDRH  $r,$a0,$a1,$a2,$a3
+        _M_DATA "LDRH",2,$r,$a0,$a1,$a2,$a3
+        MEND
+        
+        ;// Load signed half word from stack
+        MACRO
+        M_LDRSH $r,$a0,$a1,$a2,$a3
+        _M_DATA "LDRSH",2,$r,$a0,$a1,$a2,$a3
+        MEND
+        
+        ;// Store half word to stack
+        MACRO
+        M_STRH  $r,$a0,$a1,$a2,$a3
+        _M_DATA "STRH",2,$r,$a0,$a1,$a2,$a3
+        MEND
+
+        ;// Load word from stack
+        MACRO
+        M_LDR   $r,$a0,$a1,$a2,$a3
+        _M_DATA "LDR",4,$r,$a0,$a1,$a2,$a3
+        MEND
+        
+        ;// Store word to stack
+        MACRO
+        M_STR   $r,$a0,$a1,$a2,$a3
+        _M_DATA "STR",4,$r,$a0,$a1,$a2,$a3
+        MEND
+
+        ;// Load double word from stack
+        MACRO
+        M_LDRD  $r0,$r1,$a0,$a1,$a2,$a3
+        _M_DATA "LDRD",8,"$r0,$r1",$a0,$a1,$a2,$a3
+        MEND
+                
+        ;// Store double word to stack
+        MACRO
+        M_STRD  $r0,$r1,$a0,$a1,$a2,$a3
+        _M_DATA "STRD",8,"$r0,$r1",$a0,$a1,$a2,$a3
+        MEND
+        
+        ;// Get absolute address of stack allocated location
+        MACRO
+        M_ADR   $a, $b, $cc
+        _M_OPC  ADD$cc, $a, sp, (_Workspace + $b$_F)
+        MEND
+        
+        ;// Get absolute address of stack allocated location and align the address to 16 bytes
+        MACRO
+        M_ADR16 $a, $b, $cc
+            _M_OPC  ADD$cc, $a, sp, (_Workspace + $b$_F$_16)
+        
+            ;// Now align $a to 16 bytes
+            BIC$cc  $a,$a,#0x0F
+        MEND
+        
+        ;// Get absolute address of stack allocated location and align the address to 32 bytes
+        MACRO
+        M_ADR32 $a, $b, $cc
+            _M_OPC  ADD$cc, $a, sp, (_Workspace + $b$_F$_32)
+        
+            ;// Now align $a to 32 bytes
+            BIC$cc  $a,$a,#0x1F
+        MEND
+
+;//////////////////////////////////////////////////////////
+;// Function header and footer macros
+;//////////////////////////////////////////////////////////      
+        
+        ;// Function Header Macro    
+        ;// Generates the function prologue
+        ;// Note that functions should all be "stack-moves-once"
+        ;// The FNSTART and FNEND macros should be the only places
+        ;// where the stack moves.
+        ;//    
+        ;// $name  = function name
+        ;// $rreg  = ""   don't stack any registers
+        ;//          "lr" stack "lr" only
+        ;//          "rN" stack registers "r4-rN,lr"
+        ;// $dreg  = ""   don't stack any D registers
+        ;//          "dN" stack registers "d8-dN"
+        ;//
+        ;// Note: ARM Archicture procedure call standard AAPCS
+        ;// states that r4-r11, sp, d8-d15 must be preserved by
+        ;// a compliant function.
+        MACRO
+        M_START $name, $rreg, $dreg
+        ASSERT :LNOT:_InFunc
+        ASSERT "$name"!=""
+_InFunc SETL {TRUE}
+_RBytes SETA 0
+_Workspace SETA 0
+
+        ;// Create an area for the function        
+        AREA    |.text|, CODE
+        EXPORT  $name
+$name   FUNCTION
+        
+        ;// Save R registers
+        _M_GETRREGLIST $rreg
+        IF _RRegList<>""
+            STMFD   sp!, {$_RRegList, lr}
+        ENDIF
+                
+        ;// Save D registers
+        _M_GETDREGLIST  $dreg        
+        IF _DRegList<>""
+            VSTMFD  sp!, {$_DRegList}
+        ENDIF            
+            
+                    
+        ;// Ensure size claimed on stack is 8-byte aligned
+        IF ((_SBytes:AND:7)!=0)
+_SBytes     SETA _SBytes + (8 - (_SBytes:AND:7))
+        ENDIF
+        
+        IF (_SBytes!=0)
+            _M_OPC SUB, sp, sp, _SBytes
+        ENDIF
+        
+        
+_ABytes SETA _SBytes + _RBytes - _Workspace
+
+                        
+        ;// Print function name if debug enabled
+        M_PRINTF "$name\n",
+        MEND
+        
+        ;// Work out a list of R saved registers
+        MACRO
+        _M_GETRREGLIST $rreg
+        IF "$rreg"=""
+_RRegList   SETS ""
+            MEXIT
+        ENDIF        
+        IF "$rreg"="lr":LOR:"$rreg"="r4"
+_RRegList   SETS "r4"
+_RBytes     SETA _RBytes+8
+            MEXIT
+        ENDIF
+        IF "$rreg"="r5":LOR:"$rreg"="r6"
+_RRegList   SETS "r4-r6"
+_RBytes     SETA _RBytes+16
+            MEXIT
+        ENDIF
+        IF "$rreg"="r7":LOR:"$rreg"="r8"
+_RRegList   SETS "r4-r8"
+_RBytes     SETA _RBytes+24
+            MEXIT
+        ENDIF
+        IF "$rreg"="r9":LOR:"$rreg"="r10"
+_RRegList   SETS "r4-r10"
+_RBytes     SETA _RBytes+32
+            MEXIT
+        ENDIF
+        IF "$rreg"="r11":LOR:"$rreg"="r12"
+_RRegList   SETS "r4-r12"
+_RBytes     SETA _RBytes+40
+            MEXIT
+        ENDIF
+        INFO 1, "Unrecognized saved r register limit '$rreg'"
+        MEND        
+        
+        ;// Work out a list of D saved registers
+        MACRO
+        _M_GETDREGLIST $dreg
+        IF "$dreg"=""
+_DRegList   SETS ""
+            MEXIT
+        ENDIF        
+        IF "$dreg"="d8"
+_DRegList   SETS "d8"
+_RBytes     SETA _RBytes+8
+            MEXIT
+        ENDIF
+        IF "$dreg"="d9"
+_DRegList   SETS "d8-d9"
+_RBytes     SETA _RBytes+16
+            MEXIT
+        ENDIF
+        IF "$dreg"="d10"
+_DRegList   SETS "d8-d10"
+_RBytes     SETA _RBytes+24
+            MEXIT
+        ENDIF
+        IF "$dreg"="d11"
+_DRegList   SETS "d8-d11"
+_RBytes     SETA _RBytes+32
+            MEXIT
+        ENDIF
+        IF "$dreg"="d12"
+_DRegList   SETS "d8-d12"
+_RBytes     SETA _RBytes+40
+            MEXIT
+        ENDIF
+        IF "$dreg"="d13"
+_DRegList   SETS "d8-d13"
+_RBytes     SETA _RBytes+48
+            MEXIT
+        ENDIF
+        IF "$dreg"="d14"
+_DRegList   SETS "d8-d14"
+_RBytes     SETA _RBytes+56
+            MEXIT
+        ENDIF
+        IF "$dreg"="d15"
+_DRegList   SETS "d8-d15"
+_RBytes     SETA _RBytes+64
+            MEXIT
+        ENDIF
+        INFO 1, "Unrecognized saved d register limit '$dreg'"
+        MEND
+        
+        ;// Produce function return instructions
+        MACRO
+        _M_RET $cc
+        IF _DRegList<>""
+            VPOP$cc {$_DRegList}
+        ENDIF
+        IF _RRegList=""
+            BX$cc lr
+        ELSE
+            LDM$cc.FD sp!, {$_RRegList, pc}
+        ENDIF
+        MEND        
+        
+        ;// Early Function Exit Macro
+        ;// $cc = condition to exit with
+        ;// (Example: M_EXIT EQ)
+        MACRO
+        M_EXIT  $cc
+        ASSERT  _InFunc
+        IF  _SBytes!=0
+            ;// Restore stack frame and exit
+            B$cc  _End$_F
+        ELSE
+            ;// Can return directly
+            _M_RET $cc
+        ENDIF        
+        MEND        
+
+        ;// Function Footer Macro        
+        ;// Generates the function epilogue
+        MACRO
+        M_END
+        ASSERT _InFunc
+_InFunc SETL {FALSE}
+_End$_F
+
+        ;// Restore the stack pointer to its original value on function entry
+        IF _SBytes!=0
+            _M_OPC ADD, sp, sp, _SBytes
+        ENDIF
+        _M_RET
+        ENDFUNC
+
+        ;// Reset the global stack tracking variables back to their 
+        ;// initial values, and increment the function count
+_SBytes        SETA 0
+_F             SETA _F+1
+        MEND
+
+                
+;//==========================================================================
+;// Debug Macros
+;//==========================================================================
+
+        GBLL    DEBUG_ON
+DEBUG_ON SETL   {FALSE}
+        GBLL    DEBUG_STALLS_ON
+DEBUG_STALLS_ON SETL {FALSE}
+        
+        ;//==========================================================================
+        ;// Debug call to printf
+        ;//  M_PRINTF $format, $val0, $val1, $val2
+        ;//
+        ;// Examples:
+        ;//  M_PRINTF "x=%08x\n", r0
+        ;//
+        ;// This macro preserves the value of all registers including the
+        ;// flags.
+        ;//==========================================================================
+
+        MACRO
+        M_PRINTF  $format, $val0, $val1, $val2
+        IF DEBUG_ON
+        
+        IMPORT  printf
+        LCLA    nArgs
+nArgs	SETA    0
+        
+        ;// save registers so we don't corrupt them
+        STMFD   sp!, {r0-r12, lr}
+        
+        ;// Drop stack to give us some workspace
+        SUB     sp, sp, #16
+        
+        ;// Save registers we need to print to the stack
+        IF "$val2" <> ""
+            ASSERT "$val1" <> ""
+            STR    $val2, [sp, #8]
+nArgs       SETA   nArgs+1
+        ENDIF
+        IF "$val1" <> ""
+            ASSERT "$val0" <> ""
+            STR    $val1, [sp, #4]
+nArgs	    SETA   nArgs+1
+        ENDIF
+        IF "$val0"<>""
+            STR    $val0, [sp]
+nArgs	    SETA   nArgs+1
+        ENDIF
+        
+        ;// Now we are safe to corrupt registers
+        ADR     r0, %FT00
+        IF nArgs=1
+          LDR   r1, [sp]
+        ENDIF
+        IF nArgs=2
+          LDMIA sp, {r1,r2}
+        ENDIF
+        IF nArgs=3
+          LDMIA sp, {r1,r2,r3}
+        ENDIF
+        
+        ;// print the values
+        MRS     r4, cpsr        ;// preserve flags
+        BL      printf
+        MSR     cpsr_f, r4      ;// restore flags
+        B       %FT01
+00      ;// string to print
+        DCB     "$format", 0
+        ALIGN
+01      ;// Finished
+        ADD     sp, sp, #16
+        ;// Restore registers
+        LDMFD	sp!, {r0-r12,lr}
+
+        ENDIF   ;// DEBUG_ON
+        MEND
+
+
+        ;// Stall Simulation Macro
+        ;// Inserts a given number of NOPs for the currently
+        ;//  defined platform
+        MACRO
+        M_STALL $plat1stall, $plat2stall, $plat3stall, $plat4stall, $plat5stall, $plat6stall
+        IF DEBUG_STALLS_ON
+            _M_STALL_SUB $plat1stall    
+            _M_STALL_SUB $plat2stall    
+            _M_STALL_SUB $plat3stall    
+            _M_STALL_SUB $plat4stall    
+            _M_STALL_SUB $plat5stall    
+            _M_STALL_SUB $plat6stall    
+        ENDIF
+        MEND
+        
+        MACRO
+        _M_STALL_SUB $platstall
+        IF "$platstall"!=""
+            LCLA _pllen
+            LCLS _pl
+            LCLL _pllog
+_pllen      SETA :LEN:"$platstall"
+_pl         SETS "$platstall":LEFT:(_pllen - 2)
+            IF :DEF:$_pl
+                IF $_pl
+                    LCLS _st
+                    LCLA _stnum
+_st                 SETS "$platstall":RIGHT:1        
+_stnum              SETA $_st
+                    WHILE _stnum>0
+			MOV sp, sp
+_stnum                  SETA _stnum - 1
+                    WEND
+                ENDIF
+            ENDIF
+        ENDIF
+        MEND
+        
+        
+        
+;//==========================================================================
+;// Endian Invarience Macros
+;// 
+;// The idea behind these macros is that if an array is
+;// loaded as words then the SMUL00 macro will multiply
+;// array elements 0 regardless of the endianess of the
+;// system. For little endian SMUL00=SMULBB, for big
+;// endian SMUL00=SMULTT and similarly for other packed operations.
+;//
+;//==========================================================================
+
+        MACRO
+        LIBI4   $comli, $combi, $a, $b, $c, $d, $cc
+        IF {ENDIAN}="big"
+        $combi.$cc $a, $b, $c, $d
+        ELSE
+        $comli.$cc $a, $b, $c, $d
+        ENDIF
+        MEND
+        
+        MACRO
+        LIBI3   $comli, $combi, $a, $b, $c, $cc
+        IF {ENDIAN}="big"
+        $combi.$cc $a, $b, $c
+        ELSE
+        $comli.$cc $a, $b, $c
+        ENDIF
+        MEND
+        
+        ;// SMLAxy macros
+        
+        MACRO
+        SMLA00  $a, $b, $c, $d, $cc
+        LIBI4 SMLABB, SMLATT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLA01  $a, $b, $c, $d, $cc
+        LIBI4 SMLABT, SMLATB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLA0B  $a, $b, $c, $d, $cc
+        LIBI4 SMLABB, SMLATB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLA0T  $a, $b, $c, $d, $cc
+        LIBI4 SMLABT, SMLATT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLA10  $a, $b, $c, $d, $cc
+        LIBI4 SMLATB, SMLABT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLA11  $a, $b, $c, $d, $cc
+        LIBI4 SMLATT, SMLABB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLA1B  $a, $b, $c, $d, $cc
+        LIBI4 SMLATB, SMLABB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLA1T  $a, $b, $c, $d, $cc
+        LIBI4 SMLATT, SMLABT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAB0  $a, $b, $c, $d, $cc
+        LIBI4 SMLABB, SMLABT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAB1  $a, $b, $c, $d, $cc
+        LIBI4 SMLABT, SMLABB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAT0  $a, $b, $c, $d, $cc
+        LIBI4 SMLATB, SMLATT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAT1  $a, $b, $c, $d, $cc
+        LIBI4 SMLATT, SMLATB, $a, $b, $c, $d, $cc
+        MEND
+        
+        ;// SMULxy macros
+        
+        MACRO
+        SMUL00  $a, $b, $c, $cc
+        LIBI3 SMULBB, SMULTT, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMUL01  $a, $b, $c, $cc
+        LIBI3 SMULBT, SMULTB, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMUL0B  $a, $b, $c, $cc
+        LIBI3 SMULBB, SMULTB, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMUL0T  $a, $b, $c, $cc
+        LIBI3 SMULBT, SMULTT, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMUL10  $a, $b, $c, $cc
+        LIBI3 SMULTB, SMULBT, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMUL11  $a, $b, $c, $cc
+        LIBI3 SMULTT, SMULBB, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMUL1B  $a, $b, $c, $cc
+        LIBI3 SMULTB, SMULBB, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMUL1T  $a, $b, $c, $cc
+        LIBI3 SMULTT, SMULBT, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMULB0  $a, $b, $c, $cc
+        LIBI3 SMULBB, SMULBT, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMULB1  $a, $b, $c, $cc
+        LIBI3 SMULBT, SMULBB, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMULT0  $a, $b, $c, $cc
+        LIBI3 SMULTB, SMULTT, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMULT1  $a, $b, $c, $cc
+        LIBI3 SMULTT, SMULTB, $a, $b, $c, $cc
+        MEND
+        
+        ;// SMLAWx, SMULWx macros
+        
+        MACRO
+        SMLAW0  $a, $b, $c, $d, $cc
+        LIBI4 SMLAWB, SMLAWT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAW1  $a, $b, $c, $d, $cc
+        LIBI4 SMLAWT, SMLAWB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMULW0  $a, $b, $c, $cc
+        LIBI3 SMULWB, SMULWT, $a, $b, $c, $cc
+        MEND
+        
+        MACRO
+        SMULW1  $a, $b, $c, $cc
+        LIBI3 SMULWT, SMULWB, $a, $b, $c, $cc
+        MEND
+
+        ;// SMLALxy macros
+
+
+        MACRO
+        SMLAL00  $a, $b, $c, $d, $cc
+        LIBI4 SMLALBB, SMLALTT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAL01  $a, $b, $c, $d, $cc
+        LIBI4 SMLALBT, SMLALTB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAL0B  $a, $b, $c, $d, $cc
+        LIBI4 SMLALBB, SMLALTB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAL0T  $a, $b, $c, $d, $cc
+        LIBI4 SMLALBT, SMLALTT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAL10  $a, $b, $c, $d, $cc
+        LIBI4 SMLALTB, SMLALBT, $a, $b, $c, $d, $cc
+        MEND
+
+        MACRO
+        SMLAL11  $a, $b, $c, $d, $cc
+        LIBI4 SMLALTT, SMLALBB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAL1B  $a, $b, $c, $d, $cc
+        LIBI4 SMLALTB, SMLALBB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLAL1T  $a, $b, $c, $d, $cc
+        LIBI4 SMLALTT, SMLALBT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLALB0  $a, $b, $c, $d, $cc
+        LIBI4 SMLALBB, SMLALBT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLALB1  $a, $b, $c, $d, $cc
+        LIBI4 SMLALBT, SMLALBB, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLALT0  $a, $b, $c, $d, $cc
+        LIBI4 SMLALTB, SMLALTT, $a, $b, $c, $d, $cc
+        MEND
+        
+        MACRO
+        SMLALT1  $a, $b, $c, $d, $cc
+        LIBI4 SMLALTT, SMLALTB, $a, $b, $c, $d, $cc
+        MEND
+        
+  ENDIF ;// ARMCOMM_S_H
+            
+  END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armOMX.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armOMX.h
new file mode 100755
index 0000000..7a68d14
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armOMX.h
@@ -0,0 +1,274 @@
+/* 
+ * 
+ * File Name:  armOMX_ReleaseVersion.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * This file allows a version of the OMX DL libraries to be built where some or
+ * all of the function names can be given a user specified suffix. 
+ *
+ * You might want to use it where:
+ *
+ * - you want to rename a function "out of the way" so that you could replace
+ *   a function with a different version (the original version would still be
+ *   in the library just with a different name - so you could debug the new
+ *   version by comparing it to the output of the old)
+ *
+ * - you want to rename all the functions to versions with a suffix so that 
+ *   you can include two versions of the library and choose between functions
+ *   at runtime.
+ *
+ *     e.g. omxIPBM_Copy_U8_C1R could be renamed omxIPBM_Copy_U8_C1R_CortexA8
+ * 
+ */
+
+  
+#ifndef _armOMX_H_
+#define _armOMX_H_
+
+
+/* We need to define these two macros in order to expand and concatenate the names */
+#define OMXCAT2BAR(A, B) omx ## A ## B
+#define OMXCATBAR(A, B) OMXCAT2BAR(A, B)
+
+/* Define the suffix to add to all functions - the default is no suffix */
+#define BARE_SUFFIX 
+
+
+
+/* Define what happens to the bare suffix-less functions, down to the sub-domain accuracy */
+#define OMXACAAC_SUFFIX    BARE_SUFFIX   
+#define OMXACMP3_SUFFIX    BARE_SUFFIX
+#define OMXICJP_SUFFIX     BARE_SUFFIX
+#define OMXIPBM_SUFFIX     BARE_SUFFIX
+#define OMXIPCS_SUFFIX     BARE_SUFFIX
+#define OMXIPPP_SUFFIX     BARE_SUFFIX
+#define OMXSP_SUFFIX       BARE_SUFFIX
+#define OMXVCCOMM_SUFFIX   BARE_SUFFIX
+#define OMXVCM4P10_SUFFIX  BARE_SUFFIX
+#define OMXVCM4P2_SUFFIX   BARE_SUFFIX
+
+
+
+
+/* Define what the each bare, un-suffixed OpenMAX API function names is to be renamed */
+#define omxACAAC_DecodeChanPairElt                        OMXCATBAR(ACAAC_DecodeChanPairElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeDatStrElt                          OMXCATBAR(ACAAC_DecodeDatStrElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeFillElt                            OMXCATBAR(ACAAC_DecodeFillElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeIsStereo_S32                       OMXCATBAR(ACAAC_DecodeIsStereo_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeMsPNS_S32_I                        OMXCATBAR(ACAAC_DecodeMsPNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeMsStereo_S32_I                     OMXCATBAR(ACAAC_DecodeMsStereo_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodePrgCfgElt                          OMXCATBAR(ACAAC_DecodePrgCfgElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeTNS_S32_I                          OMXCATBAR(ACAAC_DecodeTNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DeinterleaveSpectrum_S32                 OMXCATBAR(ACAAC_DeinterleaveSpectrum_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_EncodeTNS_S32_I                          OMXCATBAR(ACAAC_EncodeTNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_LongTermPredict_S32                      OMXCATBAR(ACAAC_LongTermPredict_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_LongTermReconstruct_S32_I                OMXCATBAR(ACAAC_LongTermReconstruct_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_MDCTFwd_S32                              OMXCATBAR(ACAAC_MDCTFwd_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_MDCTInv_S32_S16                          OMXCATBAR(ACAAC_MDCTInv_S32_S16, OMXACAAC_SUFFIX)
+#define omxACAAC_NoiselessDecode                          OMXCATBAR(ACAAC_NoiselessDecode, OMXACAAC_SUFFIX)
+#define omxACAAC_QuantInv_S32_I                           OMXCATBAR(ACAAC_QuantInv_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_UnpackADIFHeader                         OMXCATBAR(ACAAC_UnpackADIFHeader, OMXACAAC_SUFFIX)
+#define omxACAAC_UnpackADTSFrameHeader                    OMXCATBAR(ACAAC_UnpackADTSFrameHeader, OMXACAAC_SUFFIX)
+
+
+#define omxACMP3_HuffmanDecode_S32                        OMXCATBAR(ACMP3_HuffmanDecode_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_HuffmanDecodeSfb_S32                     OMXCATBAR(ACMP3_HuffmanDecodeSfb_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_HuffmanDecodeSfbMbp_S32                  OMXCATBAR(ACMP3_HuffmanDecodeSfbMbp_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_MDCTInv_S32                              OMXCATBAR(ACMP3_MDCTInv_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_ReQuantize_S32_I                         OMXCATBAR(ACMP3_ReQuantize_S32_I, OMXACMP3_SUFFIX)
+#define omxACMP3_ReQuantizeSfb_S32_I                      OMXCATBAR(ACMP3_ReQuantizeSfb_S32_I, OMXACMP3_SUFFIX)
+#define omxACMP3_SynthPQMF_S32_S16                        OMXCATBAR(ACMP3_SynthPQMF_S32_S16, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackFrameHeader                        OMXCATBAR(ACMP3_UnpackFrameHeader, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackScaleFactors_S8                    OMXCATBAR(ACMP3_UnpackScaleFactors_S8, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackSideInfo                           OMXCATBAR(ACMP3_UnpackSideInfo, OMXACMP3_SUFFIX)
+
+#define omxICJP_CopyExpand_U8_C3                          OMXCATBAR(ICJP_CopyExpand_U8_C3, OMXICJP_SUFFIX)
+#define omxICJP_DCTFwd_S16                                OMXCATBAR(ICJP_DCTFwd_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTFwd_S16_I                              OMXCATBAR(ICJP_DCTFwd_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTInv_S16                                OMXCATBAR(ICJP_DCTInv_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTInv_S16_I                              OMXCATBAR(ICJP_DCTInv_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_Multiple_S16                  OMXCATBAR(ICJP_DCTQuantFwd_Multiple_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_S16                           OMXCATBAR(ICJP_DCTQuantFwd_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_S16_I                         OMXCATBAR(ICJP_DCTQuantFwd_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwdTableInit                      OMXCATBAR(ICJP_DCTQuantFwdTableInit, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_Multiple_S16                  OMXCATBAR(ICJP_DCTQuantInv_Multiple_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_S16                           OMXCATBAR(ICJP_DCTQuantInv_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_S16_I                         OMXCATBAR(ICJP_DCTQuantInv_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInvTableInit                      OMXCATBAR(ICJP_DCTQuantInvTableInit, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffman8x8_Direct_S16_C1            OMXCATBAR(ICJP_DecodeHuffman8x8_Direct_S16_C1, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffmanSpecGetBufSize_U8            OMXCATBAR(ICJP_DecodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffmanSpecInit_U8                  OMXCATBAR(ICJP_DecodeHuffmanSpecInit_U8, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffman8x8_Direct_S16_U1_C1         OMXCATBAR(ICJP_EncodeHuffman8x8_Direct_S16_U1_C1, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffmanSpecGetBufSize_U8            OMXCATBAR(ICJP_EncodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffmanSpecInit_U8                  OMXCATBAR(ICJP_EncodeHuffmanSpecInit_U8, OMXICJP_SUFFIX)
+
+#define omxIPBM_AddC_U8_C1R_Sfs                           OMXCATBAR(IPBM_AddC_U8_C1R_Sfs, OMXIPBM_SUFFIX)
+#define omxIPBM_Copy_U8_C1R                               OMXCATBAR(IPBM_Copy_U8_C1R, OMXIPBM_SUFFIX)
+#define omxIPBM_Copy_U8_C3R                               OMXCATBAR(IPBM_Copy_U8_C3R, OMXIPBM_SUFFIX)
+#define omxIPBM_Mirror_U8_C1R                             OMXCATBAR(IPBM_Mirror_U8_C1R, OMXIPBM_SUFFIX)
+#define omxIPBM_MulC_U8_C1R_Sfs                           OMXCATBAR(IPBM_MulC_U8_C1R_Sfs, OMXIPBM_SUFFIX)
+
+#define omxIPCS_ColorTwistQ14_U8_C3R                      OMXCATBAR(IPCS_ColorTwistQ14_U8_C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R      OMXCATBAR(IPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R      OMXCATBAR(IPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R      OMXCATBAR(IPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R       OMXCATBAR(IPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R       OMXCATBAR(IPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R       OMXCATBAR(IPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420RszCscRotBGR_U8_P3C3R             OMXCATBAR(IPCS_YCbCr420RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420RszRot_U8_P3R                     OMXCATBAR(IPCS_YCbCr420RszRot_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR565_U8_U16_P3C3R             OMXCATBAR(IPCS_YCbCr420ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R      OMXCATBAR(IPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R       OMXCATBAR(IPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422RszCscRotBGR_U8_P3C3R             OMXCATBAR(IPCS_YCbCr422RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R          OMXCATBAR(IPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422RszRot_U8_P3R                     OMXCATBAR(IPCS_YCbCr422RszRot_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbYCr422ToBGR565_U8_U16_C2C3R            OMXCATBAR(IPCS_YCbYCr422ToBGR565_U8_U16_C2C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R      OMXCATBAR(IPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbYCr422ToBGR888_U8_C2C3R                OMXCATBAR(IPCS_YCbYCr422ToBGR888_U8_C2C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R       OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R       OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R        OMXCATBAR(IPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToYCbCr420Rotate_U8_P3R           OMXCATBAR(IPCS_YCbCr422ToYCbCr420Rotate_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565_U8_U16_C3R               OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565_U8_U16_P3C3R             OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R      OMXCATBAR(IPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR888_U8_C3R                   OMXCATBAR(IPCS_YCbCr444ToBGR888_U8_C3R, OMXIPCS_SUFFIX)
+
+#define omxIPPP_Deblock_HorEdge_U8_I                      OMXCATBAR(IPPP_Deblock_HorEdge_U8_I, OMXIPPP_SUFFIX)
+#define omxIPPP_Deblock_VerEdge_U8_I                      OMXCATBAR(IPPP_Deblock_VerEdge_U8_I, OMXIPPP_SUFFIX)
+#define omxIPPP_FilterFIR_U8_C1R                          OMXCATBAR(IPPP_FilterFIR_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_FilterMedian_U8_C1R                       OMXCATBAR(IPPP_FilterMedian_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_GetCentralMoment_S64                      OMXCATBAR(IPPP_GetCentralMoment_S64, OMXIPPP_SUFFIX)
+#define omxIPPP_GetSpatialMoment_S64                      OMXCATBAR(IPPP_GetSpatialMoment_S64, OMXIPPP_SUFFIX)
+#define omxIPPP_MomentGetStateSize                        OMXCATBAR(IPPP_MomentGetStateSize, OMXIPPP_SUFFIX)
+#define omxIPPP_MomentInit                                OMXCATBAR(IPPP_MomentInit, OMXIPPP_SUFFIX)
+#define omxIPPP_Moments_U8_C1R                            OMXCATBAR(IPPP_Moments_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_Moments_U8_C3R                            OMXCATBAR(IPPP_Moments_U8_C3R, OMXIPPP_SUFFIX)
+
+#define omxSP_BlockExp_S16                                OMXCATBAR(SP_BlockExp_S16, OMXSP_SUFFIX)
+#define omxSP_BlockExp_S32                                OMXCATBAR(SP_BlockExp_S32, OMXSP_SUFFIX)
+#define omxSP_Copy_S16                                    OMXCATBAR(SP_Copy_S16, OMXSP_SUFFIX)
+#define omxSP_DotProd_S16                                 OMXCATBAR(SP_DotProd_S16, OMXSP_SUFFIX)
+#define omxSP_DotProd_S16_Sfs                             OMXCATBAR(SP_DotProd_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_CToC_SC16_Sfs                        OMXCATBAR(SP_FFTFwd_CToC_SC16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_CToC_SC32_Sfs                        OMXCATBAR(SP_FFTFwd_CToC_SC32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_RToCCS_S16S32_Sfs                    OMXCATBAR(SP_FFTFwd_RToCCS_S16S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_RToCCS_S32_Sfs                       OMXCATBAR(SP_FFTFwd_RToCCS_S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_C_SC16                        OMXCATBAR(SP_FFTGetBufSize_C_SC16, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_C_SC32                        OMXCATBAR(SP_FFTGetBufSize_C_SC32, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_R_S16S32                      OMXCATBAR(SP_FFTGetBufSize_R_S16S32, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_R_S32                         OMXCATBAR(SP_FFTGetBufSize_R_S32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_C_SC16                              OMXCATBAR(SP_FFTInit_C_SC16, OMXSP_SUFFIX)
+#define omxSP_FFTInit_C_SC32                              OMXCATBAR(SP_FFTInit_C_SC32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_R_S16S32                            OMXCATBAR(SP_FFTInit_R_S16S32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_R_S32                               OMXCATBAR(SP_FFTInit_R_S32, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CCSToR_S32_Sfs                       OMXCATBAR(SP_FFTInv_CCSToR_S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CCSToR_S32S16_Sfs                    OMXCATBAR(SP_FFTInv_CCSToR_S32S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CToC_SC16_Sfs                        OMXCATBAR(SP_FFTInv_CToC_SC16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CToC_SC32_Sfs                        OMXCATBAR(SP_FFTInv_CToC_SC32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FilterMedian_S32                            OMXCATBAR(SP_FilterMedian_S32, OMXSP_SUFFIX)
+#define omxSP_FilterMedian_S32_I                          OMXCATBAR(SP_FilterMedian_S32_I, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16                              OMXCATBAR(SP_FIR_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_I                            OMXCATBAR(SP_FIR_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_ISfs                         OMXCATBAR(SP_FIR_Direct_S16_ISfs, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_Sfs                          OMXCATBAR(SP_FIR_Direct_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16                           OMXCATBAR(SP_FIROne_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_I                         OMXCATBAR(SP_FIROne_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_ISfs                      OMXCATBAR(SP_FIROne_Direct_S16_ISfs, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_Sfs                       OMXCATBAR(SP_FIROne_Direct_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_IIR_BiQuadDirect_S16                        OMXCATBAR(SP_IIR_BiQuadDirect_S16, OMXSP_SUFFIX)
+#define omxSP_IIR_BiQuadDirect_S16_I                      OMXCATBAR(SP_IIR_BiQuadDirect_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIR_Direct_S16                              OMXCATBAR(SP_IIR_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_IIR_Direct_S16_I                            OMXCATBAR(SP_IIR_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIROne_BiQuadDirect_S16                     OMXCATBAR(SP_IIROne_BiQuadDirect_S16, OMXSP_SUFFIX)
+#define omxSP_IIROne_BiQuadDirect_S16_I                   OMXCATBAR(SP_IIROne_BiQuadDirect_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIROne_Direct_S16                           OMXCATBAR(SP_IIROne_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_IIROne_Direct_S16_I                         OMXCATBAR(SP_IIROne_Direct_S16_I, OMXSP_SUFFIX)
+
+#define omxVCCOMM_Average_16x                             OMXCATBAR(VCCOMM_Average_16x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Average_8x                              OMXCATBAR(VCCOMM_Average_8x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ComputeTextureErrorBlock                OMXCATBAR(VCCOMM_ComputeTextureErrorBlock, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ComputeTextureErrorBlock_SAD            OMXCATBAR(VCCOMM_ComputeTextureErrorBlock_SAD, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Copy16x16                               OMXCATBAR(VCCOMM_Copy16x16, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Copy8x8                                 OMXCATBAR(VCCOMM_Copy8x8, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ExpandFrame_I                           OMXCATBAR(VCCOMM_ExpandFrame_I, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_LimitMVToRect                           OMXCATBAR(VCCOMM_LimitMVToRect, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_SAD_16x                                 OMXCATBAR(VCCOMM_SAD_16x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_SAD_8x                                  OMXCATBAR(VCCOMM_SAD_8x, OMXVCCOMM_SUFFIX)
+
+#define omxVCM4P10_Average_4x                             OMXCATBAR(VCM4P10_Average_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Half                        OMXCATBAR(VCM4P10_BlockMatch_Half, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Integer                     OMXCATBAR(VCM4P10_BlockMatch_Integer, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Quarter                     OMXCATBAR(VCM4P10_BlockMatch_Quarter, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DeblockChroma_I                        OMXCATBAR(VCM4P10_DeblockChroma_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DeblockLuma_I                          OMXCATBAR(VCM4P10_DeblockLuma_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC        OMXCATBAR(VCM4P10_DecodeChromaDcCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DecodeCoeffsToPairCAVLC                OMXCATBAR(VCM4P10_DecodeCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DequantTransformResidualFromPairAndAdd OMXCATBAR(VCM4P10_DequantTransformResidualFromPairAndAdd, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingChroma_HorEdge_I       OMXCATBAR(VCM4P10_FilterDeblockingChroma_HorEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingChroma_VerEdge_I       OMXCATBAR(VCM4P10_FilterDeblockingChroma_VerEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingLuma_HorEdge_I         OMXCATBAR(VCM4P10_FilterDeblockingLuma_HorEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingLuma_VerEdge_I         OMXCATBAR(VCM4P10_FilterDeblockingLuma_VerEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_GetVLCInfo                             OMXCATBAR(VCM4P10_GetVLCInfo, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateChroma                      OMXCATBAR(VCM4P10_InterpolateChroma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateHalfHor_Luma                OMXCATBAR(VCM4P10_InterpolateHalfHor_Luma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateHalfVer_Luma                OMXCATBAR(VCM4P10_InterpolateHalfVer_Luma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateLuma                        OMXCATBAR(VCM4P10_InterpolateLuma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformDequant_ChromaDC           OMXCATBAR(VCM4P10_InvTransformDequant_ChromaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformDequant_LumaDC             OMXCATBAR(VCM4P10_InvTransformDequant_LumaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformResidualAndAdd             OMXCATBAR(VCM4P10_InvTransformResidualAndAdd, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MEGetBufSize                           OMXCATBAR(VCM4P10_MEGetBufSize, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MEInit                                 OMXCATBAR(VCM4P10_MEInit, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MotionEstimationMB                     OMXCATBAR(VCM4P10_MotionEstimationMB, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntra_16x16                     OMXCATBAR(VCM4P10_PredictIntra_16x16, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntra_4x4                       OMXCATBAR(VCM4P10_PredictIntra_4x4, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntraChroma_8x8                  OMXCATBAR(VCM4P10_PredictIntraChroma_8x8, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SAD_4x                                 OMXCATBAR(VCM4P10_SAD_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_16x                            OMXCATBAR(VCM4P10_SADQuar_16x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_4x                             OMXCATBAR(VCM4P10_SADQuar_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_8x                             OMXCATBAR(VCM4P10_SADQuar_8x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SATD_4x4                               OMXCATBAR(VCM4P10_SATD_4x4, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SubAndTransformQDQResidual             OMXCATBAR(VCM4P10_SubAndTransformQDQResidual, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformDequantChromaDCFromPair       OMXCATBAR(VCM4P10_TransformDequantChromaDCFromPair, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformDequantLumaDCFromPair         OMXCATBAR(VCM4P10_TransformDequantLumaDCFromPair, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformQuant_ChromaDC                OMXCATBAR(VCM4P10_TransformQuant_ChromaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformQuant_LumaDC                  OMXCATBAR(VCM4P10_TransformQuant_LumaDC, OMXVCM4P10_SUFFIX)
+
+#define omxVCM4P2_BlockMatch_Half_16x16                   OMXCATBAR(VCM4P2_BlockMatch_Half_16x16, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Half_8x8                     OMXCATBAR(VCM4P2_BlockMatch_Half_8x8, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Integer_16x16                OMXCATBAR(VCM4P2_BlockMatch_Integer_16x16, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Integer_8x8                  OMXCATBAR(VCM4P2_BlockMatch_Integer_8x8, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DCT8x8blk                               OMXCATBAR(VCM4P2_DCT8x8blk, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeBlockCoef_Inter                   OMXCATBAR(VCM4P2_DecodeBlockCoef_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeBlockCoef_Intra                   OMXCATBAR(VCM4P2_DecodeBlockCoef_Intra, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodePadMV_PVOP                        OMXCATBAR(VCM4P2_DecodePadMV_PVOP, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_Inter                   OMXCATBAR(VCM4P2_DecodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_IntraACVLC              OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_IntraDCVLC              OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeMV                                OMXCATBAR(VCM4P2_EncodeMV, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_Inter                   OMXCATBAR(VCM4P2_EncodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_IntraACVLC              OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_IntraDCVLC              OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_FindMVpred                              OMXCATBAR(VCM4P2_FindMVpred, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_IDCT8x8blk                              OMXCATBAR(VCM4P2_IDCT8x8blk, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MCReconBlock                            OMXCATBAR(VCM4P2_MCReconBlock, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MEGetBufSize                            OMXCATBAR(VCM4P2_MEGetBufSize, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MEInit                                  OMXCATBAR(VCM4P2_MEInit, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MotionEstimationMB                      OMXCATBAR(VCM4P2_MotionEstimationMB, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_PredictReconCoefIntra                   OMXCATBAR(VCM4P2_PredictReconCoefIntra, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInter_I                            OMXCATBAR(VCM4P2_QuantInter_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantIntra_I                            OMXCATBAR(VCM4P2_QuantIntra_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInvInter_I                         OMXCATBAR(VCM4P2_QuantInvInter_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInvIntra_I                         OMXCATBAR(VCM4P2_QuantInvIntra_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_TransRecBlockCoef_inter                 OMXCATBAR(VCM4P2_TransRecBlockCoef_inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_TransRecBlockCoef_intra                 OMXCATBAR(VCM4P2_TransRecBlockCoef_intra, OMXVCM4P2_SUFFIX)
+
+
+#endif /* _armOMX_h_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes.h
new file mode 100755
index 0000000..8b295a6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes.h
@@ -0,0 +1,252 @@
+/**
+ * File: omxtypes.h
+ * Brief: Defines basic Data types used in OpenMAX v1.0.2 header files.
+ *
+ * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved. 
+ *
+ * These materials are protected by copyright laws and contain material 
+ * proprietary to the Khronos Group, Inc.  You may use these materials 
+ * for implementing Khronos specifications, without altering or removing 
+ * any trademark, copyright or other notice from the specification.
+ * 
+ * Khronos Group makes no, and expressly disclaims any, representations 
+ * or warranties, express or implied, regarding these materials, including, 
+ * without limitation, any implied warranties of merchantability or fitness 
+ * for a particular purpose or non-infringement of any intellectual property. 
+ * Khronos Group makes no, and expressly disclaims any, warranties, express 
+ * or implied, regarding the correctness, accuracy, completeness, timeliness, 
+ * and reliability of these materials. 
+ *
+ * Under no circumstances will the Khronos Group, or any of its Promoters, 
+ * Contributors or Members or their respective partners, officers, directors, 
+ * employees, agents or representatives be liable for any damages, whether 
+ * direct, indirect, special or consequential damages for lost revenues, 
+ * lost profits, or otherwise, arising from or in connection with these 
+ * materials.
+ * 
+ * Khronos and OpenMAX are trademarks of the Khronos Group Inc. 
+ *
+ */
+  
+#ifndef _OMXTYPES_H_
+#define _OMXTYPES_H_
+
+#include <limits.h> 
+
+#define OMX_IN
+#define OMX_OUT
+#define OMX_INOUT
+
+
+typedef enum {
+    
+    /* Mandatory return codes - use cases are explicitly described for each function */
+    OMX_Sts_NoErr                    =  0,    /* No error, the function completed successfully */
+    OMX_Sts_Err                      = -2,    /* Unknown/unspecified error */    
+    OMX_Sts_InvalidBitstreamValErr   = -182,  /* Invalid value detected during bitstream processing */    
+    OMX_Sts_MemAllocErr              = -9,    /* Not enough memory allocated for the operation */
+    OMX_StsACAAC_GainCtrErr    	     = -159,  /* AAC: Unsupported gain control data detected */
+    OMX_StsACAAC_PrgNumErr           = -167,  /* AAC: Invalid number of elements for one program   */
+    OMX_StsACAAC_CoefValErr          = -163,  /* AAC: Invalid quantized coefficient value          */     
+    OMX_StsACAAC_MaxSfbErr           = -162,  /* AAC: Invalid maxSfb value in relation to numSwb */    
+	OMX_StsACAAC_PlsDataErr		     = -160,  /* AAC: pulse escape sequence data error */
+
+    /* Optional return codes - use cases are explicitly described for each function*/
+    OMX_Sts_BadArgErr                = -5,    /* Bad Arguments */
+
+    OMX_StsACAAC_TnsNumFiltErr       = -157,  /* AAC: Invalid number of TNS filters  */
+    OMX_StsACAAC_TnsLenErr           = -156,  /* AAC: Invalid TNS region length  */   
+    OMX_StsACAAC_TnsOrderErr         = -155,  /* AAC: Invalid order of TNS filter  */                  
+    OMX_StsACAAC_TnsCoefResErr       = -154,  /* AAC: Invalid bit-resolution for TNS filter coefficients  */
+    OMX_StsACAAC_TnsCoefErr          = -153,  /* AAC: Invalid TNS filter coefficients  */                  
+    OMX_StsACAAC_TnsDirectErr        = -152,  /* AAC: Invalid TNS filter direction  */  
+
+    OMX_StsICJP_JPEGMarkerErr        = -183,  /* JPEG marker encountered within an entropy-coded block; */
+                                              /* Huffman decoding operation terminated early.           */
+    OMX_StsICJP_JPEGMarker           = -181,  /* JPEG marker encountered; Huffman decoding */
+                                              /* operation terminated early.                         */
+    OMX_StsIPPP_ContextMatchErr      = -17,   /* Context parameter doesn't match to the operation */
+
+    OMX_StsSP_EvenMedianMaskSizeErr  = -180,  /* Even size of the Median Filter mask was replaced by the odd one */
+
+    OMX_Sts_MaximumEnumeration       = INT_MAX  /*Placeholder, forces enum of size OMX_INT*/
+    
+ } OMXResult;          /** Return value or error value returned from a function. Identical to OMX_INT */
+
+ 
+/* OMX_U8 */
+#if UCHAR_MAX == 0xff
+typedef unsigned char OMX_U8;
+#elif USHRT_MAX == 0xff 
+typedef unsigned short int OMX_U8; 
+#else
+#error OMX_U8 undefined
+#endif 
+
+ 
+/* OMX_S8 */
+#if SCHAR_MAX == 0x7f 
+typedef signed char OMX_S8;
+#elif SHRT_MAX == 0x7f 
+typedef signed short int OMX_S8; 
+#else
+#error OMX_S8 undefined
+#endif
+ 
+ 
+/* OMX_U16 */
+#if USHRT_MAX == 0xffff
+typedef unsigned short int OMX_U16;
+#elif UINT_MAX == 0xffff
+typedef unsigned int OMX_U16; 
+#else
+#error OMX_U16 undefined
+#endif
+
+
+/* OMX_S16 */
+#if SHRT_MAX == 0x7fff 
+typedef signed short int OMX_S16;
+#elif INT_MAX == 0x7fff 
+typedef signed int OMX_S16; 
+#else
+#error OMX_S16 undefined
+#endif
+
+
+/* OMX_U32 */
+#if UINT_MAX == 0xffffffff
+typedef unsigned int OMX_U32;
+#elif LONG_MAX == 0xffffffff
+typedef unsigned long int OMX_U32; 
+#else
+#error OMX_U32 undefined
+#endif
+
+
+/* OMX_S32 */
+#if INT_MAX == 0x7fffffff
+typedef signed int OMX_S32;
+#elif LONG_MAX == 0x7fffffff
+typedef long signed int OMX_S32; 
+#else
+#error OMX_S32 undefined
+#endif
+
+
+/* OMX_U64 & OMX_S64 */
+#if defined( _WIN32 ) || defined ( _WIN64 )
+    typedef __int64 OMX_S64; /** Signed 64-bit integer */
+    typedef unsigned __int64 OMX_U64; /** Unsigned 64-bit integer */
+    #define OMX_MIN_S64			(0x8000000000000000i64)
+    #define OMX_MIN_U64			(0x0000000000000000i64)
+    #define OMX_MAX_S64			(0x7FFFFFFFFFFFFFFFi64)
+    #define OMX_MAX_U64			(0xFFFFFFFFFFFFFFFFi64)
+#else
+    typedef long long OMX_S64; /** Signed 64-bit integer */
+    typedef unsigned long long OMX_U64; /** Unsigned 64-bit integer */
+    #define OMX_MIN_S64			(0x8000000000000000LL)
+    #define OMX_MIN_U64			(0x0000000000000000LL)
+    #define OMX_MAX_S64			(0x7FFFFFFFFFFFFFFFLL)
+    #define OMX_MAX_U64			(0xFFFFFFFFFFFFFFFFLL)
+#endif
+
+
+/* OMX_SC8 */
+typedef struct
+{
+  OMX_S8 Re; /** Real part */
+  OMX_S8 Im; /** Imaginary part */	
+	
+} OMX_SC8; /** Signed 8-bit complex number */
+
+
+/* OMX_SC16 */
+typedef struct
+{
+  OMX_S16 Re; /** Real part */
+  OMX_S16 Im; /** Imaginary part */	
+	
+} OMX_SC16; /** Signed 16-bit complex number */
+
+
+/* OMX_SC32 */
+typedef struct
+{
+  OMX_S32 Re; /** Real part */
+  OMX_S32 Im; /** Imaginary part */	
+	
+} OMX_SC32; /** Signed 32-bit complex number */
+
+
+/* OMX_SC64 */
+typedef struct
+{
+  OMX_S64 Re; /** Real part */
+  OMX_S64 Im; /** Imaginary part */	
+	
+} OMX_SC64; /** Signed 64-bit complex number */
+
+
+/* OMX_F32 */
+typedef float OMX_F32; /** Single precision floating point,IEEE 754 */
+
+
+/* OMX_F64 */
+typedef double OMX_F64; /** Double precision floating point,IEEE 754 */
+
+
+/* OMX_INT */
+typedef int OMX_INT; /** signed integer corresponding to machine word length, has maximum signed value INT_MAX*/
+
+
+#define OMX_MIN_S8  	   	(-128)
+#define OMX_MIN_U8  		0
+#define OMX_MIN_S16		 	(-32768)
+#define OMX_MIN_U16			0
+#define OMX_MIN_S32			(-2147483647-1)
+#define OMX_MIN_U32			0
+
+#define OMX_MAX_S8			(127)
+#define OMX_MAX_U8			(255)
+#define OMX_MAX_S16			(32767)
+#define OMX_MAX_U16			(0xFFFF)
+#define OMX_MAX_S32			(2147483647)
+#define OMX_MAX_U32			(0xFFFFFFFF)
+
+typedef void OMXVoid;
+
+#ifndef NULL
+#define NULL ((void*)0)
+#endif
+
+/** Defines the geometric position and size of a rectangle, 
+  * where x,y defines the coordinates of the top left corner
+  * of the rectangle, with dimensions width in the x-direction 
+  * and height in the y-direction */
+typedef struct {
+	OMX_INT x;      /** x-coordinate of top left corner of rectangle */
+	OMX_INT y;      /** y-coordinate of top left corner of rectangle */
+	OMX_INT width;  /** Width in the x-direction. */
+	OMX_INT height; /** Height in the y-direction. */
+}OMXRect;
+
+
+/** Defines the geometric position of a point, */
+typedef struct 
+{
+ OMX_INT x; /** x-coordinate */
+ OMX_INT y;	/** y-coordinate */
+	
+} OMXPoint;
+
+
+/** Defines the dimensions of a rectangle, or region of interest in an image */
+typedef struct 
+{
+ OMX_INT width;  /** Width of the rectangle, in the x-direction */
+ OMX_INT height; /** Height of the rectangle, in the y-direction */
+	
+} OMXSize;
+
+#endif /* _OMXTYPES_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes_s.h
new file mode 100755
index 0000000..48703d1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes_s.h
@@ -0,0 +1,77 @@
+;//
+;// 
+;// File Name:  omxtypes_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+;// Mandatory return codes - use cases are explicitly described for each function 
+OMX_Sts_NoErr                    EQU  0    ;// No error the function completed successfully 
+OMX_Sts_Err                      EQU -2    ;// Unknown/unspecified error     
+OMX_Sts_InvalidBitstreamValErr   EQU -182  ;// Invalid value detected during bitstream processing     
+OMX_Sts_MemAllocErr              EQU -9    ;// Not enough memory allocated for the operation 
+OMX_StsACAAC_GainCtrErr    	     EQU -159  ;// AAC: Unsupported gain control data detected 
+OMX_StsACAAC_PrgNumErr           EQU -167  ;// AAC: Invalid number of elements for one program   
+OMX_StsACAAC_CoefValErr          EQU -163  ;// AAC: Invalid quantized coefficient value               
+OMX_StsACAAC_MaxSfbErr           EQU -162  ;// AAC: Invalid maxSfb value in relation to numSwb     
+OMX_StsACAAC_PlsDataErr		     EQU -160  ;// AAC: pulse escape sequence data error 
+
+;// Optional return codes - use cases are explicitly described for each function
+OMX_Sts_BadArgErr                EQU -5    ;// Bad Arguments 
+
+OMX_StsACAAC_TnsNumFiltErr       EQU -157  ;// AAC: Invalid number of TNS filters  
+OMX_StsACAAC_TnsLenErr           EQU -156  ;// AAC: Invalid TNS region length     
+OMX_StsACAAC_TnsOrderErr         EQU -155  ;// AAC: Invalid order of TNS filter                    
+OMX_StsACAAC_TnsCoefResErr       EQU -154  ;// AAC: Invalid bit-resolution for TNS filter coefficients  
+OMX_StsACAAC_TnsCoefErr          EQU -153  ;// AAC: Invalid TNS filter coefficients                    
+OMX_StsACAAC_TnsDirectErr        EQU -152  ;// AAC: Invalid TNS filter direction    
+
+OMX_StsICJP_JPEGMarkerErr        EQU -183  ;// JPEG marker encountered within an entropy-coded block; 
+                                            ;// Huffman decoding operation terminated early.           
+OMX_StsICJP_JPEGMarker           EQU -181  ;// JPEG marker encountered; Huffman decoding 
+                                            ;// operation terminated early.                         
+OMX_StsIPPP_ContextMatchErr      EQU -17   ;// Context parameter doesn't match to the operation 
+
+OMX_StsSP_EvenMedianMaskSizeErr  EQU -180  ;// Even size of the Median Filter mask was replaced by the odd one 
+
+OMX_Sts_MaximumEnumeration       EQU 0x7FFFFFFF
+
+
+
+OMX_MIN_S8      EQU 	   	(-128)
+OMX_MIN_U8  	EQU     	0
+OMX_MIN_S16		EQU      	(-32768)
+OMX_MIN_U16		EQU	        0
+
+
+OMX_MIN_S32		EQU	(-2147483647-1)
+OMX_MIN_U32		EQU	0
+
+OMX_MAX_S8		EQU	(127)
+OMX_MAX_U8		EQU	(255)
+OMX_MAX_S16		EQU	(32767)
+OMX_MAX_U16		EQU	(0xFFFF)
+OMX_MAX_S32		EQU	(2147483647)
+OMX_MAX_U32		EQU	(0xFFFFFFFF)
+
+OMX_VC_UPPER    EQU 0x1                 ;// Used by the PredictIntra functions   
+OMX_VC_LEFT     EQU 0x2                 ;// Used by the PredictIntra functions 
+OMX_VC_UPPER_RIGHT    EQU 0x40          ;// Used by the PredictIntra functions   
+
+NULL    EQU 0
+
+;// Structures
+
+    INCLUDE     armCOMM_s.h
+
+    M_STRUCT    OMXPoint
+    M_FIELD     x, 4
+    M_FIELD     y, 4
+    M_ENDSTRUCT
+
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/build_vc.pl b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/build_vc.pl
new file mode 100755
index 0000000..649e74c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/build_vc.pl
@@ -0,0 +1,113 @@
+#!/usr/bin/perl
+#
+# 
+# File Name:  build_vc.pl
+# OpenMAX DL: v1.0.2
+# Revision:   12290
+# Date:       Wednesday, April 9, 2008
+# 
+# (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+# 
+# 
+#
+# This file builds the OpenMAX DL vc domain library omxVC.o.
+#
+
+use File::Spec;
+use strict;
+
+my ($CC, $CC_OPTS, $AS, $AS_OPTS, $LIB, $LIB_OPTS, $LIB_TYPE);
+
+$CC       = 'armcc';
+$CC_OPTS  = '--no_unaligned_access --cpu Cortex-A8 -c';
+$AS       = 'armasm';
+$AS_OPTS  = '--no_unaligned_access --cpu Cortex-A8';
+# $LIB      = 'armlink';
+# $LIB_OPTS = '--partial -o';
+# $LIB_TYPE = '.o';
+$LIB      = 'armar';
+$LIB_OPTS = '--create -r';
+$LIB_TYPE = '.a';
+
+#------------------------
+
+my (@headerlist, @filelist, $hd, $file, $ofile, $command, $objlist, $libfile, $h);
+
+# Define the list of directories containing included header files.
+@headerlist = qw(api vc/api vc/m4p2/api vc/m4p10/api);
+
+# Define the list of source files to compile.
+open(FILES, '<filelist_vc.txt') or die("Can't open source file list\n");
+@filelist = <FILES>;
+close(FILES);
+
+# Fix the file separators in the header paths
+foreach $h (@headerlist)
+{
+        $h = File::Spec->canonpath($h);
+}
+
+# Create the include path to be passed to the compiler
+$hd = '-I' . join(' -I', @headerlist);
+
+# Create the build directories "/lib/" and "/obj/" (if they are not there already)
+mkdir "obj", 0777 if (! -d "obj");
+mkdir "lib", 0777 if (! -d "lib");
+
+$objlist = '';
+
+# Compile each file
+foreach $file (@filelist)
+{
+	my $f;
+	my $base;
+	my $ext;
+	my $objfile;
+
+	chomp($file);
+	$file = File::Spec->canonpath($file);
+
+	(undef, undef, $f) = File::Spec->splitpath($file);
+    $f=~s/[\n\f\r]//g; # Remove any end-of-line characters
+
+	if(($base, $ext) = $f =~ /(.+)\.(\w)$/)
+	{
+		$objfile = File::Spec->catfile('obj', $base.'.o');
+
+		if($ext eq 'c')
+		{
+			$objlist .= "$objfile ";
+			$command = $CC.' '.$CC_OPTS.' '.$hd.' -o '.$objfile.' '.$file;
+			print "$command\n";
+			system($command);
+		}
+		elsif($ext eq 's')
+		{
+			$objlist .= "$objfile ";
+			$command = $AS.' '.$AS_OPTS.' '.$hd.' -o '.$objfile.' '.$file;
+			print "$command\n";
+			system($command);
+		}
+		else
+		{
+			print "Ignoring file: $f\n";
+		}
+	}
+	else
+	{
+		die "No file extension found: $f\n";
+	}
+}
+
+# Do the final link stage to create the libraries.
+$libfile = File::Spec->catfile('lib', 'omxVC'.$LIB_TYPE);
+$command = $LIB.' '.$LIB_OPTS.' '.$libfile.' '.$objlist;
+print "$command\n";
+(system($command) == 0) and print "Build successful\n";
+
+
+
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/filelist_vc.txt b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/filelist_vc.txt
new file mode 100755
index 0000000..8db8eeb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/filelist_vc.txt
@@ -0,0 +1,75 @@
+./api/armCOMM.h
+./api/armCOMM_BitDec_s.h
+./api/armCOMM_Bitstream.h
+./api/armCOMM_IDCT_s.h
+./api/armCOMM_IDCTTable.h
+./api/armCOMM_MaskTable.h
+./api/armCOMM_s.h
+./api/armCOMM_Version.h
+./api/armOMX_ReleaseVersion.h
+./api/omxtypes.h
+./api/omxtypes_s.h
+./src/armCOMM_IDCTTable.c
+./src/armCOMM_MaskTable.c
+./vc/api/armVC.h
+./vc/api/armVCCOMM_s.h
+./vc/api/omxVC.h
+./vc/api/omxVC_s.h
+./vc/comm/src/omxVCCOMM_Copy16x16_s.s
+./vc/comm/src/omxVCCOMM_Copy8x8_s.s
+./vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
+./vc/m4p10/api/armVCM4P10_CAVLCTables.h
+./vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_CAVLCTables.c
+./vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
+./vc/m4p10/src/armVCM4P10_DequantTables_s.s
+./vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_QuantTables_s.s
+./vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
+./vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
+./vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
+./vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
+./vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+./vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+./vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
+./vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
+./vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
+./vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
+./vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
+./vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
+./vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
+./vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
+./vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
+./vc/m4p2/src/armVCM4P2_Clip8_s.s
+./vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
+./vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
+./vc/m4p2/src/armVCM4P2_Lookup_Tables.c
+./vc/m4p2/src/armVCM4P2_SetPredDir_s.s
+./vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
+./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
+./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
+./vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
+./vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
+./vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
+./vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
+./vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
+./vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
+./vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
+./vc/src/armVC_Version.c
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM.c
new file mode 100755
index 0000000..e572a89
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM.c
@@ -0,0 +1,936 @@
+/**
+ * 
+ * File Name:  armCOMM.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Defines Common APIs used across OpenMAX API's
+ */
+
+#include "omxtypes.h"
+#include "armCOMM.h"
+
+/***********************************************************************/
+                /* Miscellaneous Arithmetic operations */
+
+/**
+ * Function: armRoundFloatToS16
+ *
+ * Description:
+ * Converts a double precision value into a short int after rounding
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16 format
+ *
+ */
+
+OMX_S16 armRoundFloatToS16 (OMX_F64 Value)
+{
+    if (Value > 0)
+    {
+        return (OMX_S16)(Value + .5);
+    }
+    else
+    {
+        return (OMX_S16)(Value - .5);
+    }
+}
+
+/**
+ * Function: armRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a int after rounding
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S32 format
+ *
+ */
+
+OMX_S32 armRoundFloatToS32 (OMX_F64 Value)
+{
+    if (Value > 0)
+    {
+        return (OMX_S32)(Value + .5);
+    }
+    else
+    {
+        return (OMX_S32)(Value - .5);
+    }
+}
+/**
+ * Function: armSatRoundFloatToS16
+ *
+ * Description:
+ * Converts a double precision value into a short int after rounding and saturation
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16 format
+ *
+ */
+
+OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value)
+{
+    if (Value > 0)
+    {
+        Value += 0.5;
+        
+        if(Value > (OMX_S16)OMX_MAX_S16 )
+        {
+            return (OMX_S16)OMX_MAX_S16;
+        }
+        else
+        {
+            return (OMX_S16)Value;
+        }
+    }
+    else
+    {
+        Value -= 0.5;
+
+        if(Value < (OMX_S16)OMX_MIN_S16 )
+        {
+            return (OMX_S16)OMX_MIN_S16;
+        }
+        else
+        {
+            return (OMX_S16)Value;
+        }
+    }
+}
+
+/**
+ * Function: armSatRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a int after rounding and saturation
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S32 format
+ *
+ */
+
+OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value)
+{
+    if (Value > 0)
+    {
+        Value += 0.5;
+        
+        if(Value > (OMX_S32)OMX_MAX_S32 )
+        {
+            return (OMX_S32)OMX_MAX_S32;
+        }
+        else
+        {
+            return (OMX_S32)Value;
+        }
+    }
+    else
+    {
+        Value -= 0.5;
+
+        if(Value < (OMX_S32)OMX_MIN_S32 )
+        {
+            return (OMX_S32)OMX_MIN_S32;
+        }
+        else
+        {
+            return (OMX_S32)Value;
+        }
+    }
+}
+
+/**
+ * Function: armSatRoundFloatToU16
+ *
+ * Description:
+ * Converts a double precision value into a unsigned short int after rounding and saturation
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U16 format
+ *
+ */
+
+OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value)
+{
+    Value += 0.5;
+    
+    if(Value > (OMX_U16)OMX_MAX_U16 )
+    {
+        return (OMX_U16)OMX_MAX_U16;
+    }
+    else
+    {
+        return (OMX_U16)Value;
+    }
+}
+
+/**
+ * Function: armSatRoundFloatToU32
+ *
+ * Description:
+ * Converts a double precision value into a unsigned int after rounding and saturation
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U32 format
+ *
+ */
+
+OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value)
+{
+    Value += 0.5;
+    
+    if(Value > (OMX_U32)OMX_MAX_U32 )
+    {
+        return (OMX_U32)OMX_MAX_U32;
+    }
+    else
+    {
+        return (OMX_U32)Value;
+    }
+}
+
+/**
+ * Function: armRoundFloatToS64
+ *
+ * Description:
+ * Converts a double precision value into a 64 bit int after rounding
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S64 format
+ *
+ */
+
+OMX_S64 armRoundFloatToS64 (OMX_F64 Value)
+{
+    if (Value > 0)
+    {
+        return (OMX_S64)(Value + .5);
+    }
+    else
+    {
+        return (OMX_S64)(Value - .5);
+    }
+}
+
+/**
+ * Function: armSignCheck
+ *
+ * Description:
+ * Checks the sign of a variable:
+ * returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	    var     Variable to be checked
+ *
+ * Return Value:
+ * OMX_INT --   returns 1 if it is Positive
+ *              returns 0 if it is 0
+ *              returns -1 if it is Negative 
+ */ 
+
+OMX_INT armSignCheck (
+    OMX_S16 var
+)
+
+{
+    OMX_INT Sign;
+    
+    if (var < 0)
+    {
+        Sign = -1;
+    }
+    else if ( var > 0)
+    {
+        Sign = 1;
+    }
+    else
+    {
+        Sign = 0;
+    }
+    
+    return Sign;
+}
+
+/**
+ * Function: armClip
+ *
+ * Description: Clips the input between MAX and MIN value
+ * 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min     lower bound
+ * [in] Max     upper bound
+ * [in] src     variable to the clipped
+ *
+ * Return Value:
+ * OMX_S32 --   returns clipped value
+ */ 
+ 
+OMX_S32 armClip (
+    OMX_INT min,
+    OMX_INT max, 
+    OMX_S32 src 
+)
+ 
+{
+    if (src > max)
+    {
+        src = max;
+    }
+    else if (src < min)
+    {
+        src = min;
+    }
+    
+    return src;
+}
+
+/**
+ * Function: armClip_F32
+ *
+ * Description: Clips the input between MAX and MIN value
+ * 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min     lower bound
+ * [in] Max     upper bound
+ * [in] src     variable to the clipped
+ *
+ * Return Value:
+ * OMX_F32 --   returns clipped value
+ */ 
+ 
+OMX_F32 armClip_F32 (
+    OMX_F32 min,
+    OMX_F32 max, 
+    OMX_F32 src 
+)
+ 
+{
+    if (src > max)
+    {
+        src = max;
+    }
+    else if (src < min)
+    {
+        src = min;
+    }
+    
+    return src;
+}
+
+/**
+ * Function: armShiftSat_F32
+ *
+ * Description: Divides a float value by 2^shift and 
+ * saturates it for unsigned value range for satBits.
+ * Second parameter is like "shifting" the corresponding 
+ * integer value. Takes care of rounding while clipping the final 
+ * value.
+ *
+ * Parameters:
+ * [in] v          Number to be operated upon
+ * [in] shift      Divides the input "v" by "2^shift"
+ * [in] satBits    Final range is [0, 2^satBits)
+ *
+ * Return Value:
+ * OMX_S32 --   returns "shifted" saturated value
+ */ 
+ 
+OMX_U32 armShiftSat_F32(OMX_F32 v, OMX_INT shift, OMX_INT satBits) 
+{
+    OMX_U32 allOnes = (OMX_U32)(-1);
+    OMX_U32 maxV = allOnes >> (32-satBits);
+    OMX_F32 vShifted, vRounded, shiftDiv = (OMX_F32)(1 << shift);
+    OMX_U32 vInt;
+    OMX_U32 vIntSat;
+    
+    if(v <= 0)
+        return 0;
+    
+    vShifted = v / shiftDiv;
+    vRounded = (OMX_F32)(vShifted + 0.5);
+    vInt = (OMX_U32)vRounded;
+    vIntSat = vInt;
+    if(vIntSat > maxV) 
+        vIntSat = maxV;
+    return vIntSat;
+}
+
+/**
+ * Functions: armSwapElem
+ *
+ * Description:
+ * These function swaps two elements at the specified pointer locations.
+ * The size of each element could be anything as specified by <elemSize>
+ *
+ * Return Value:
+ * OMXResult -- Error status from the function
+ */
+OMXResult armSwapElem(
+        OMX_U8 *pBuf1,
+        OMX_U8 *pBuf2,
+        OMX_INT elemSize
+       )
+{
+    OMX_INT i;
+    OMX_U8 temp;
+    armRetArgErrIf(!pBuf1 || !pBuf2, OMX_Sts_BadArgErr);
+    
+    for(i = 0; i < elemSize; i++)
+    {
+        temp = *(pBuf1 + i);
+        *(pBuf1 + i) = *(pBuf2 + i);
+        *(pBuf2 + i) = temp;
+    }
+    return OMX_Sts_NoErr;
+}
+
+/**
+ * Function: armMedianOf3
+ *
+ * Description: Finds the median of three numbers
+ * 
+ * Remarks:
+ *
+ * Parameters:
+ * [in] fEntry     First entry
+ * [in] sEntry     second entry
+ * [in] tEntry     Third entry
+ *
+ * Return Value:
+ * OMX_S32 --   returns the median value
+ */ 
+ 
+OMX_S32 armMedianOf3 (
+    OMX_S32 fEntry,
+    OMX_S32 sEntry, 
+    OMX_S32 tEntry 
+)
+{
+    OMX_S32 a, b, c;
+    
+    a = armMin (fEntry, sEntry);
+    b = armMax (fEntry, sEntry);
+    c = armMin (b, tEntry);
+    return (armMax (a, c));
+}
+
+/**
+ * Function: armLogSize
+ *
+ * Description: Finds the size of a positive value and returns the same
+ * 
+ * Remarks:
+ *
+ * Parameters:
+ * [in] value    Positive value
+ *
+ * Return Value:
+ * OMX_U8 --     Returns the minimum number of bits required to represent the positive value. 
+                 This is the smallest k>=0 such that that value is less than (1<<k).
+ */ 
+ 
+OMX_U8 armLogSize (
+    OMX_U16 value 
+)
+{
+    OMX_U8 i;    
+    for ( i = 0; value > 0; value = value >> 1) 
+    {
+        i++;
+    }
+    return i;
+}
+
+/***********************************************************************/
+                /* Saturating Arithmetic operations */
+
+/**
+ * Function :armSatAdd_S32()
+ *
+ * Description :
+ *   Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ *
+ * Return:
+ * [out]             Result of operation
+ * 
+ *    
+ **/
+ 
+OMX_S32 armSatAdd_S32(OMX_S32 Value1,OMX_S32 Value2)
+{
+    OMX_S32 Result;
+    
+    Result = Value1 + Value2;
+
+    if( (Value1^Value2) >= 0)
+    {
+        /*Same sign*/
+        if( (Result^Value1) >= 0)
+        {
+            /*Result has not saturated*/
+            return Result;
+        }
+        else
+        {
+            if(Value1 >= 0)
+            {
+                /*Result has saturated in positive side*/
+                return OMX_MAX_S32;
+            }
+            else
+            {
+                /*Result has saturated in negative side*/
+                return OMX_MIN_S32;
+            }
+        
+        }
+   
+    }
+    else
+    {
+        return Result;
+    }
+    
+}
+
+/**
+ * Function :armSatAdd_S64()
+ *
+ * Description :
+ *   Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ *
+ * Return:
+ * [out]             Result of operation
+ * 
+ *    
+ **/
+ 
+OMX_S64 armSatAdd_S64(OMX_S64 Value1,OMX_S64 Value2)
+{
+    OMX_S64 Result;
+    
+    Result = Value1 + Value2;
+
+    if( (Value1^Value2) >= 0)
+    {
+        /*Same sign*/
+        if( (Result^Value1) >= 0)
+        {
+            /*Result has not saturated*/
+            return Result;
+        }
+        else
+        {
+            if(Value1 >= 0)
+            {
+                /*Result has saturated in positive side*/
+                Result = OMX_MAX_S64;
+                return Result;
+            }
+            else
+            {
+                /*Result has saturated in negative side*/
+                return OMX_MIN_S64;
+            }
+        
+        }
+   
+    }
+    else
+    {
+        return Result;
+    }
+    
+}
+
+/** Function :armSatSub_S32()
+ * 
+ * Description :
+ *     Returns the result of saturated substraction of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ *
+ * Return:
+ * [out]             Result of operation
+ * 
+ **/
+
+OMX_S32 armSatSub_S32(OMX_S32 Value1,OMX_S32 Value2)
+{
+    OMX_S32 Result;
+    
+    Result = Value1 - Value2;
+
+    if( (Value1^Value2) < 0)
+    {
+        /*Opposite sign*/
+        if( (Result^Value1) >= 0)
+        {
+            /*Result has not saturated*/
+            return Result;
+        }
+        else
+        {
+            if(Value1 >= 0)
+            {
+                /*Result has saturated in positive side*/
+                return OMX_MAX_S32;
+            }
+            else
+            {
+                /*Result has saturated in negative side*/
+                return OMX_MIN_S32;
+            }
+        
+        }
+   
+    }
+    else
+    {
+        return Result;
+    }
+    
+}
+
+/**
+ * Function :armSatMac_S32()
+ *
+ * Description :
+ *     Returns the result of Multiplication of Value1 and Value2 and subesquent saturated
+ *     accumulation with Mac
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ * [in] Mac          Accumulator
+ *
+ * Return:
+ * [out]             Result of operation
+ **/
+
+OMX_S32 armSatMac_S32(OMX_S32 Mac,OMX_S16 Value1,OMX_S16 Value2)
+{
+    OMX_S32 Result;
+    
+    Result = (OMX_S32)(Value1*Value2);
+    Result = armSatAdd_S32( Mac , Result );
+
+    return Result;    
+}
+
+/**
+ * Function :armSatMac_S16S32_S32
+ *
+ * Description :
+ *   Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac
+ *
+ *   mac = mac + Saturate_in_32Bits(delayElem * filTap)
+ *
+ * Parametrs:
+ * [in] delayElem    First 32 bit Operand
+ * [in] filTap       Second 16 bit Operand
+ * [in] mac          Result of MAC operation
+ *
+ * Return:
+ * [out]  mac        Result of operation
+ *    
+ **/
+ 
+OMX_S32 armSatMac_S16S32_S32(OMX_S32 mac, OMX_S32 delayElem, OMX_S16 filTap )
+{
+    
+    OMX_S32 result;
+
+    result = armSatMulS16S32_S32(filTap,delayElem); 
+
+    if ( result > OMX_MAX_S16 )
+    {
+        result = OMX_MAX_S32;
+    }
+    else if( result < OMX_MIN_S16 )
+    {
+        result = OMX_MIN_S32;
+    }
+    else
+    {
+        result = delayElem * filTap;
+    }
+
+    mac = armSatAdd_S32(mac,result);
+    
+    return mac;
+}
+
+
+/**
+ * Function :armSatRoundRightShift_S32_S16
+ *
+ * Description :
+ *   Returns the result of rounded right shift operation of input by the scalefactor
+ *
+ *   output = Saturate_in_16Bits( ( Right/LeftShift( (Round(input) , shift ) )
+ *
+ * Parametrs:
+ * [in] input       The input to be operated on
+ * [in] shift The shift number
+ *
+ * Return:
+ * [out]            Result of operation
+ *    
+ **/
+
+
+OMX_S16 armSatRoundRightShift_S32_S16(OMX_S32 input, OMX_INT shift)
+{
+    input = armSatRoundLeftShift_S32(input,-shift);
+
+    if ( input > OMX_MAX_S16 )
+    {
+        return (OMX_S16)OMX_MAX_S16;
+    }
+    else if (input < OMX_MIN_S16)
+    {
+        return (OMX_S16)OMX_MIN_S16;
+    }
+    else
+    {
+       return (OMX_S16)input;
+    }
+
+}
+
+/**
+ * Function :armSatRoundLeftShift_S32()
+ *
+ * Description :
+ *     Returns the result of saturating left-shift operation on input
+ *     Or rounded Right shift if the input Shift is negative.
+ *     
+ * Parametrs:
+ * [in] Value        Operand
+ * [in] Shift        Operand for shift operation
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+
+OMX_S32 armSatRoundLeftShift_S32(OMX_S32 Value, OMX_INT Shift)
+{
+    OMX_INT i;
+    
+    if (Shift < 0)
+    {
+        Shift = -Shift;
+        Value = armSatAdd_S32(Value, (1 << (Shift - 1)));
+        Value = Value >> Shift;
+    }
+    else
+    {
+        for (i = 0; i < Shift; i++)
+        {
+            Value = armSatAdd_S32(Value, Value);
+        }
+    }
+    return Value;
+}
+
+/**
+ * Function :armSatRoundLeftShift_S64()
+ *
+ * Description :
+ *     Returns the result of saturating left-shift operation on input
+ *     Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value        Operand
+ * [in] shift        Operand for shift operation
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+ 
+OMX_S64 armSatRoundLeftShift_S64(OMX_S64 Value, OMX_INT Shift)
+{
+    OMX_INT i;
+    
+    if (Shift < 0)
+    {
+        Shift = -Shift;
+        Value = armSatAdd_S64(Value, ((OMX_S64)1 << (Shift - 1)));
+        Value = Value >> Shift;
+    }
+    else
+    {
+        for (i = 0; i < Shift; i++)
+        {
+            Value = armSatAdd_S64(Value, Value);
+        }
+    }
+    return Value;
+}
+
+/**
+ * Function :armSatMulS16S32_S32()
+ *
+ * Description :
+ *     Returns the result of a S16 data type multiplied with an S32 data type
+ *     in a S32 container
+ *
+ * Parametrs:
+ * [in] input1       Operand 1
+ * [in] input2       Operand 2
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+
+
+OMX_S32 armSatMulS16S32_S32(OMX_S16 input1,OMX_S32 input2)
+{
+    OMX_S16 hi2,lo1;
+    OMX_U16 lo2;
+    
+    OMX_S32 temp1,temp2;
+    OMX_S32 result;
+    
+    lo1  = input1;
+
+    hi2  = ( input2 >>  16 );
+    lo2  = ( (OMX_U32)( input2 << 16 ) >> 16 );
+    
+    temp1 = hi2 * lo1;
+    temp2 = ( lo2* lo1 ) >> 16;
+
+    result =  armSatAdd_S32(temp1,temp2);
+
+    return result;
+}
+
+/**
+ * Function :armSatMulS32S32_S32()
+ *
+ * Description :
+ *     Returns the result of a S32 data type multiplied with an S32 data type
+ *     in a S32 container
+ *
+ * Parametrs:
+ * [in] input1       Operand 1
+ * [in] input2       Operand 2
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+
+OMX_S32 armSatMulS32S32_S32(OMX_S32 input1,OMX_S32 input2)
+{
+    OMX_S16 hi1,hi2;
+    OMX_U16 lo1,lo2;
+    
+    OMX_S32 temp1,temp2,temp3;
+    OMX_S32 result;
+
+    hi1  = ( input1 >>  16 );
+    lo1  = ( (OMX_U32)( input1 << 16 ) >> 16 );
+
+    hi2  = ( input2 >>  16 );
+    lo2  = ( (OMX_U32)( input2 << 16 ) >> 16 );
+    
+    temp1 =   hi1 * hi2;
+    temp2 = ( hi1* lo2 ) >> 16;
+    temp3 = ( hi2* lo1 ) >> 16;
+
+    result = armSatAdd_S32(temp1,temp2);
+    result = armSatAdd_S32(result,temp3);
+
+    return result;
+}
+
+/**
+ * Function :armIntDivAwayFromZero()
+ *
+ * Description : Integer division with rounding to the nearest integer. 
+ *               Half-integer values are rounded away from zero
+ *               unless otherwise specified. For example 3//2 is rounded 
+ *               to 2, and -3//2 is rounded to -2.
+ *
+ * Parametrs:
+ * [in] Num        Operand 1
+ * [in] Deno       Operand 2
+ *
+ * Return:
+ * [out]             Result of operation input1//input2
+ *    
+ **/
+
+OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno)
+{
+    OMX_F64 result;
+    
+    result = ((OMX_F64)Num)/((OMX_F64)Deno);
+    
+    if (result >= 0)
+    {
+        result += 0.5;
+    }
+    else
+    {
+        result -= 0.5;
+    }
+
+    return (OMX_S32)(result);
+}
+
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_Bitstream.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_Bitstream.c
new file mode 100755
index 0000000..9ef9319
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_Bitstream.c
@@ -0,0 +1,329 @@
+/**
+ * 
+ * File Name:  armCOMM_Bitstream.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Defines bitstream encode and decode functions common to all codecs
+ */
+
+#include "omxtypes.h"
+#include "armCOMM.h"
+#include "armCOMM_Bitstream.h"
+
+/***************************************
+ * Fixed bit length Decode
+ ***************************************/
+
+/**
+ * Function: armLookAheadBits()
+ *
+ * Description:
+ * Get the next N bits from the bitstream without advancing the bitstream pointer
+ *
+ * Parameters:
+ * [in]     **ppBitStream
+ * [in]     *pOffset
+ * [in]     N=1...32
+ *
+ * Returns  Value
+ */
+
+OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N)
+{
+    const OMX_U8 *pBitStream = *ppBitStream;
+    OMX_INT Offset = *pOffset;
+    OMX_U32 Value;
+
+    armAssert(Offset>=0 && Offset<=7);
+    armAssert(N>=1 && N<=32);
+
+    /* Read next 32 bits from stream */
+    Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16)  | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+    Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+    /* Return N bits */
+    return Value >> (32-N);
+}
+
+
+/**
+ * Function: armGetBits()
+ *
+ * Description:
+ * Read N bits from the bitstream
+ *    
+ * Parameters:
+ * [in]     *ppBitStream
+ * [in]     *pOffset
+ * [in]     N=1..32
+ *
+ * [out]    *ppBitStream
+ * [out]    *pOffset
+ * Returns  Value
+ */
+
+
+OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N)
+{
+    const OMX_U8 *pBitStream = *ppBitStream;
+    OMX_INT Offset = *pOffset;
+    OMX_U32 Value;
+    
+    if(N == 0)
+    {
+      return 0;
+    }
+
+    armAssert(Offset>=0 && Offset<=7);
+    armAssert(N>=1 && N<=32);
+
+    /* Read next 32 bits from stream */
+    Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16)  | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+    Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+    /* Advance bitstream pointer by N bits */
+    Offset += N;
+    *ppBitStream = pBitStream + (Offset>>3);
+    *pOffset = Offset & 7;
+
+    /* Return N bits */
+    return Value >> (32-N);
+}
+
+/**
+ * Function: armByteAlign()
+ *
+ * Description:
+ * Align the pointer *ppBitStream to the next byte boundary
+ *
+ * Parameters:
+ * [in]     *ppBitStream
+ * [in]     *pOffset
+ *
+ * [out]    *ppBitStream
+ * [out]    *pOffset
+ *
+ **/
+ 
+OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset)
+{
+    if(*pOffset > 0)
+    {
+        *ppBitStream += 1;
+        *pOffset = 0;
+    }    
+}
+
+/** 
+ * Function: armSkipBits()
+ *
+ * Description:
+ * Skip N bits from the value at *ppBitStream
+ *
+ * Parameters:
+ * [in]     *ppBitStream
+ * [in]     *pOffset
+ * [in]     N
+ *
+ * [out]    *ppBitStream
+ * [out]    *pOffset
+ *
+ **/
+
+
+OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N)
+{
+    OMX_INT Offset = *pOffset;
+    const OMX_U8 *pBitStream = *ppBitStream;
+   
+    /* Advance bitstream pointer by N bits */
+    Offset += N;
+    *ppBitStream = pBitStream + (Offset>>3);
+    *pOffset = Offset & 7;
+}
+
+/***************************************
+ * Variable bit length Decode
+ ***************************************/
+
+/**
+ * Function: armUnPackVLC32()
+ *
+ * Description:
+ * Variable length decode of variable length symbol (max size 32 bits) read from
+ * the bit stream pointed by *ppBitStream at *pOffset by using the table
+ * pointed by pCodeBook
+ * 
+ * Parameters:
+ * [in]     *pBitStream
+ * [in]     *pOffset
+ * [in]     pCodeBook
+ * 
+ * [out]    *pBitStream
+ * [out]    *pOffset
+ *
+ * Returns : Code Book Index if successfull. 
+ *         : ARM_NO_CODEBOOK_INDEX = -1 if search fails.
+ **/
+#ifndef C_OPTIMIZED_IMPLEMENTATION 
+
+OMX_U16 armUnPackVLC32(
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pOffset,
+    const ARM_VLC32 *pCodeBook
+)
+{    
+    const OMX_U8 *pBitStream = *ppBitStream;
+    OMX_INT Offset = *pOffset;
+    OMX_U32 Value;
+    OMX_INT Index;
+        
+    armAssert(Offset>=0 && Offset<=7);
+
+    /* Read next 32 bits from stream */
+    Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16)  | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+    Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+    /* Search through the codebook */    
+    for (Index=0; pCodeBook->codeLen != 0; Index++)
+    {
+        if (pCodeBook->codeWord == (Value >> (32 - pCodeBook->codeLen)))
+        {
+            Offset       = Offset + pCodeBook->codeLen;
+            *ppBitStream = pBitStream + (Offset >> 3) ;
+            *pOffset     = Offset & 7;
+            
+            return Index;
+        }        
+        pCodeBook++;
+    }
+
+    /* No code match found */
+    return ARM_NO_CODEBOOK_INDEX;
+}
+
+#endif
+
+/***************************************
+ * Fixed bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackBits
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream     pointer to the pointer to the current byte 
+ *                      in the bit stream.
+ * [in] pOffset         pointer to the bit position in the byte 
+ *                      pointed by *ppBitStream. Valid within 0
+ *                      to 7.
+ * [in] codeWord        Code word that need to be inserted in to the
+ *                          bitstream
+ * [in] codeLength      Length of the code word valid range 1...32
+ *
+ * [out] ppBitStream    *ppBitStream is updated after the block is encoded,
+ *                          so that it points to the current byte in the bit
+ *                          stream buffer.
+ * [out] pBitOffset     *pBitOffset is updated so that it points to the
+ *                          current bit position in the byte pointed by
+ *                          *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+ 
+OMXResult armPackBits (
+    OMX_U8  **ppBitStream, 
+    OMX_INT *pOffset,
+    OMX_U32 codeWord, 
+    OMX_INT codeLength 
+)
+{
+    OMX_U8  *pBitStream = *ppBitStream;
+    OMX_INT Offset = *pOffset;
+    OMX_U32 Value;
+        
+    /* checking argument validity */
+    armRetArgErrIf(Offset < 0, OMX_Sts_BadArgErr);
+    armRetArgErrIf(Offset > 7, OMX_Sts_BadArgErr);
+    armRetArgErrIf(codeLength < 1, OMX_Sts_BadArgErr);
+    armRetArgErrIf(codeLength > 32, OMX_Sts_BadArgErr);
+
+    /* Prepare the first byte */
+    codeWord = codeWord << (32-codeLength);
+    Value = (pBitStream[0] >> (8-Offset)) << (8-Offset);
+    Value = Value | (codeWord >> (24+Offset));
+
+    /* Write out whole bytes */
+    while (8-Offset <= codeLength)
+    {
+        *pBitStream++ = (OMX_U8)Value;
+        codeWord   = codeWord  << (8-Offset);
+        codeLength = codeLength - (8-Offset);
+        Offset = 0;
+        Value = codeWord >> 24;
+    }
+
+    /* Write out final partial byte */
+    *pBitStream  = (OMX_U8)Value;
+    *ppBitStream = pBitStream;
+    *pOffset = Offset + codeLength;
+    
+    return  OMX_Sts_NoErr;
+}
+ 
+/***************************************
+ * Variable bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackVLC32
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte 
+ *                      in the bit stream.
+ * [in]	pBitOffset	    pointer to the bit position in the byte 
+ *                      pointed by *ppBitStream. Valid within 0
+ *                      to 7.
+ * [in]	 code     		VLC code word that need to be inserted in to the
+ *                      bitstream
+ *
+ * [out] ppBitStream	*ppBitStream is updated after the block is encoded,
+ *	                    so that it points to the current byte in the bit
+ *						stream buffer.
+ * [out] pBitOffset		*pBitOffset is updated so that it points to the
+ *						current bit position in the byte pointed by
+ *						*ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+ 
+OMXResult armPackVLC32 (
+    OMX_U8 **ppBitStream, 
+    OMX_INT *pBitOffset,
+    ARM_VLC32 code 
+)
+{
+    return (armPackBits(ppBitStream, pBitOffset, code.codeWord, code.codeLen));
+}
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_IDCTTable.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_IDCTTable.c
new file mode 100755
index 0000000..3f5e279
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_IDCTTable.c
@@ -0,0 +1,60 @@
+/**
+ * 
+ * File Name:  armCOMM_IDCTTable.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *   
+ * File: armCOMM_IDCTTable.c
+ * Brief: Defines Tables used in IDCT computation
+ *
+ */
+
+#include "armCOMM_IDCTTable.h"
+
+     /*  Table of s(u)*A(u)*A(v)/16 at Q15
+      *  s(u)=1.0 0 <= u <= 5
+      *  s(6)=2.0
+      *  s(7)=4.0
+      *  A(0) = 2*sqrt(2)
+      *  A(u) = 4*cos(u*pi/16)  for (u!=0)
+	  */
+	  
+__align(4) const OMX_U16 armCOMM_IDCTPreScale [64] =
+{
+    0x4000, 0x58c5, 0x539f, 0x4b42, 0x4000, 0x3249, 0x4546, 0x46a1,
+    0x58c5, 0x7b21, 0x73fc, 0x6862, 0x58c5, 0x45bf, 0x6016, 0x61f8,
+    0x539f, 0x73fc, 0x6d41, 0x6254, 0x539f, 0x41b3, 0x5a82, 0x5c48,
+    0x4b42, 0x6862, 0x6254, 0x587e, 0x4b42, 0x3b21, 0x5175, 0x530d,
+    0x4000, 0x58c5, 0x539f, 0x4b42, 0x4000, 0x3249, 0x4546, 0x46a1,
+    0x3249, 0x45bf, 0x41b3, 0x3b21, 0x3249, 0x2782, 0x366d, 0x377e,
+    0x22a3, 0x300b, 0x2d41, 0x28ba, 0x22a3, 0x1b37, 0x257e, 0x263a,
+    0x11a8, 0x187e, 0x1712, 0x14c3, 0x11a8, 0x0de0, 0x131d, 0x137d    
+};
+    /* Above array armCOMM_IDCTPreScale,  in Q23 format */
+const OMX_U32 armCOMM_IDCTPreScaleU32 [64] =
+{
+    0x400000, 0x58c543, 0x539eba, 0x4b418c, 0x400000, 0x3248d4, 0x4545ea, 0x46a157,
+    0x58c543, 0x7b20d8, 0x73fbfc, 0x686214, 0x58c543, 0x45bf1f, 0x6015a5, 0x61f78b,
+    0x539eba, 0x73fbfc, 0x6d413d, 0x6253a6, 0x539eba, 0x41b328, 0x5a827a, 0x5c4869,
+    0x4b418c, 0x686214, 0x6253a6, 0x587de3, 0x4b418c, 0x3b20d8, 0x5174e0, 0x530d69,
+    0x400000, 0x58c543, 0x539eba, 0x4b418c, 0x400000, 0x3248d4, 0x4545ea, 0x46a157,
+    0x3248d4, 0x45bf1f, 0x41b328, 0x3b20d8, 0x3248d4, 0x27821d, 0x366d72, 0x377e6b,
+    0x22a2f5, 0x300ad3, 0x2d413d, 0x28ba70, 0x22a2f5, 0x1b36b9, 0x257d86, 0x26398d,
+    0x11a856, 0x187de3, 0x17121a, 0x14c35a, 0x11a856, 0x0ddf9b, 0x131cc7, 0x137ca2
+};
+   
+const OMX_U16 armCOMM_IDCTCoef [4] =
+{
+    0x5a82, /* InvSqrt2 */
+    0x30fc, /* SinPIBy8 */
+    0x7642, /* CosPIBy8 */
+    0x0000    
+};
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_MaskTable.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_MaskTable.c
new file mode 100755
index 0000000..09f88c3
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_MaskTable.c
@@ -0,0 +1,45 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  armCOMM_MaskTable.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Mask Table to mask the end of array.
+ * 
+ */
+ 
+#include "omxtypes.h"
+
+#define MaskTableSize 72
+
+const OMX_U16 armCOMM_qMaskTable16[MaskTableSize] = 
+{
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000,
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000,
+        0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF    
+};
+
+const OMX_U8 armCOMM_qMaskTable8[MaskTableSize] = 
+{
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
+        0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
+        0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
+        0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 
+        0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00,  
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00,  
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 
+        0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF    
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVC.h
new file mode 100755
index 0000000..35b510b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVC.h
@@ -0,0 +1,1153 @@
+/**
+ * 
+ * File Name:  armVC.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File: armVideo.h
+ * Brief: Declares API's/Basic Data types used across the OpenMAX Video domain
+ *
+ */
+
+
+#ifndef _armVideo_H_
+#define _armVideo_H_
+
+#include "omxVC.h"
+#include "armCOMM_Bitstream.h"
+
+/**
+ * ARM specific state structure to hold Motion Estimation information.
+ */
+ 
+struct m4p2_MESpec
+{
+    OMXVCM4P2MEParams MEParams;
+    OMXVCM4P2MEMode   MEMode;
+};
+
+struct m4p10_MESpec
+{
+    OMXVCM4P10MEParams MEParams;
+    OMXVCM4P10MEMode   MEMode;
+};
+
+typedef struct m4p2_MESpec  ARMVCM4P2_MESpec;
+typedef struct m4p10_MESpec ARMVCM4P10_MESpec;
+
+/**
+ * Function: armVCM4P2_CompareMV
+ *
+ * Description:
+ * Performs comparision of motion vectors and SAD's to decide the
+ * best MV and SAD
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]     mvX     x coordinate of the candidate motion vector
+ * [in]     mvY     y coordinate of the candidate motion vector
+ * [in]     candSAD Candidate SAD
+ * [in]     bestMVX x coordinate of the best motion vector
+ * [in]     bestMVY y coordinate of the best motion vector
+ * [in]     bestSAD best SAD
+ *
+ * Return Value:
+ * OMX_INT -- 1 to indicate that the current sad is the best
+ *            0 to indicate that it is NOT the best SAD
+ */
+
+OMX_INT armVCM4P2_CompareMV (
+    OMX_S16 mvX,
+    OMX_S16 mvY,
+    OMX_INT candSAD,
+    OMX_S16 bestMVX,
+    OMX_S16 bestMVY,
+    OMX_INT bestSAD);
+
+/**
+ * Function: armVCM4P2_ACDCPredict
+ *
+ * Description:
+ * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+ * to the function call, prediction direction (predDir) should be selected
+ * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDst     pointer to the coefficient buffer which contains
+ *                          the quantized coefficient residuals (PQF) of the
+ *                          current block
+ * [in] pPredBufRow pointer to the coefficient row buffer
+ * [in] pPredBufCol pointer to the coefficient column buffer
+ * [in] curQP       quantization parameter of the current block. curQP
+ *                          may equal to predQP especially when the current
+ *                          block and the predictor block are in the same
+ *                          macroblock.
+ * [in] predQP      quantization parameter of the predictor block
+ * [in] predDir     indicates the prediction direction which takes one
+ *                          of the following values:
+ *                          OMX_VIDEO_HORIZONTAL    predict horizontally
+ *                          OMX_VIDEO_VERTICAL      predict vertically
+ * [in] ACPredFlag  a flag indicating if AC prediction should be
+ *                          performed. It is equal to ac_pred_flag in the bit
+ *                          stream syntax of MPEG-4
+ * [in] videoComp   video component type (luminance, chrominance or
+ *                          alpha) of the current block
+ * [in] flag        This flag defines the if one wants to use this functions to
+ *                  calculate PQF (set 1, prediction) or QF (set 0, reconstruction)
+ * [out]    pPreACPredict   pointer to the predicted coefficients buffer.
+ *                          Filled ONLY if it is not NULL
+ * [out]    pSrcDst     pointer to the coefficient buffer which contains
+ *                          the quantized coefficients (QF) of the current
+ *                          block
+ * [out]    pPredBufRow pointer to the updated coefficient row buffer
+ * [out]    pPredBufCol pointer to the updated coefficient column buffer
+ * [out]    pSumErr     pointer to the updated sum of the difference
+ *                      between predicted and unpredicted coefficients
+ *                      If this is NULL, do not update
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_ACDCPredict(
+     OMX_S16 * pSrcDst,
+     OMX_S16 * pPreACPredict,
+     OMX_S16 * pPredBufRow,
+     OMX_S16 * pPredBufCol,
+     OMX_INT curQP,
+     OMX_INT predQP,
+     OMX_INT predDir,
+     OMX_INT ACPredFlag,
+     OMXVCM4P2VideoComponent  videoComp,
+     OMX_U8 flag,
+     OMX_INT *pSumErr
+);
+
+/**
+ * Function: armVCM4P2_SetPredDir
+ *
+ * Description:
+ * Performs detecting the prediction direction
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] blockIndex  block index indicating the component type and
+ *                          position as defined in subclause 6.1.3.8, of ISO/IEC
+ *                          14496-2. Furthermore, indexes 6 to 9 indicate the
+ *                          alpha blocks spatially corresponding to luminance
+ *                          blocks 0 to 3 in the same macroblock.
+ * [in] pCoefBufRow pointer to the coefficient row buffer
+ * [in] pQpBuf      pointer to the quantization parameter buffer
+ * [out]    predQP      quantization parameter of the predictor block
+ * [out]    predDir     indicates the prediction direction which takes one
+ *                          of the following values:
+ *                          OMX_VIDEO_HORIZONTAL    predict horizontally
+ *                          OMX_VIDEO_VERTICAL      predict vertically
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_SetPredDir(
+     OMX_INT blockIndex,
+     OMX_S16 *pCoefBufRow,
+     OMX_S16 *pCoefBufCol,
+     OMX_INT *predDir,
+     OMX_INT *predQP,
+     const OMX_U8 *pQpBuf
+);
+
+/**
+ * Function: armVCM4P2_EncodeVLCZigzag_Intra
+ *
+ * Description:
+ * Performs zigzag scanning and VLC encoding for one intra block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream     pointer to the pointer to the current byte in
+ *                              the bit stream
+ * [in] pBitOffset      pointer to the bit position in the byte pointed
+ *                              by *ppBitStream. Valid within 0 to 7.
+ * [in] pQDctBlkCoef    pointer to the quantized DCT coefficient
+ * [in] predDir         AC prediction direction, which is used to decide
+ *                              the zigzag scan pattern. This takes one of the
+ *                              following values:
+ *                              OMX_VIDEO_NONE          AC prediction not used.
+ *                                                      Performs classical zigzag
+ *                                                      scan.
+ *                              OMX_VIDEO_HORIZONTAL    Horizontal prediction.
+ *                                                      Performs alternate-vertical
+ *                                                      zigzag scan.
+ *                              OMX_VIDEO_VERTICAL      Vertical prediction.
+ *                                                      Performs alternate-horizontal
+ *                                                      zigzag scan.
+ * [in] pattern         block pattern which is used to decide whether
+ *                              this block is encoded
+ * [in] start           start indicates whether the encoding begins with 0th element
+ *                      or 1st.
+ * [out]    ppBitStream     *ppBitStream is updated after the block is encoded,
+ *                              so that it points to the current byte in the bit
+ *                              stream buffer.
+ * [out]    pBitOffset      *pBitOffset is updated so that it points to the
+ *                              current bit position in the byte pointed by
+ *                              *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_EncodeVLCZigzag_Intra(
+     OMX_U8 **ppBitStream,
+     OMX_INT *pBitOffset,
+     const OMX_S16 *pQDctBlkCoef,
+     OMX_U8 predDir,
+     OMX_U8 pattern,
+     OMX_INT shortVideoHeader,
+     OMX_U8 start
+);
+
+/**
+ * Function: armVCM4P2_DecodeVLCZigzag_Intra
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream     pointer to the pointer to the current byte in
+ *                              the bitstream buffer
+ * [in] pBitOffset      pointer to the bit position in the byte pointed
+ *                              to by *ppBitStream. *pBitOffset is valid within
+ *                              [0-7].
+ * [in] predDir         AC prediction direction which is used to decide
+ *                              the zigzag scan pattern. It takes one of the
+ *                              following values:
+ *                              OMX_VIDEO_NONE  AC prediction not used;
+ *                                              perform classical zigzag scan;
+ *                              OMX_VIDEO_HORIZONTAL    Horizontal prediction;
+ *                                                      perform alternate-vertical
+ *                                                      zigzag scan;
+ *                              OMX_VIDEO_VERTICAL      Vertical prediction;
+ *                                                      thus perform
+ *                                                      alternate-horizontal
+ *                                                      zigzag scan.
+ * [in] videoComp       video component type (luminance, chrominance or
+ *                              alpha) of the current block
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ *                           and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start           start indicates whether the encoding begins with 0th element
+ *                      or 1st.
+ * [out]    ppBitStream     *ppBitStream is updated after the block is
+ *                              decoded, so that it points to the current byte
+ *                              in the bit stream buffer
+ * [out]    pBitOffset      *pBitOffset is updated so that it points to the
+ *                              current bit position in the byte pointed by
+ *                              *ppBitStream
+ * [out]    pDst            pointer to the coefficient buffer of current
+ *                              block. Should be 32-bit aligned
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_DecodeVLCZigzag_Intra(
+     const OMX_U8 ** ppBitStream,
+     OMX_INT * pBitOffset,
+     OMX_S16 * pDst,
+     OMX_U8 predDir,
+     OMX_INT shortVideoHeader, 
+     OMX_U8  start
+);
+
+/**
+ * Function: armVCM4P2_FillVLDBuffer
+ *
+ * Description:
+ * Performs filling of the coefficient buffer according to the run, level
+ * and sign, also updates the index
+ * 
+ * Parameters:
+ * [in]  storeRun        Stored Run value (count of zeros)   
+ * [in]  storeLevel      Stored Level value (non-zero value)
+ * [in]  sign            Flag indicating the sign of level
+ * [in]  last            status of the last flag
+ * [in]  pIndex          pointer to coefficient index in 8x8 matrix
+ * [out] pIndex          pointer to updated coefficient index in 8x8 
+ *                       matrix
+ * [in]  pZigzagTable    pointer to the zigzag tables
+ * [out] pDst            pointer to the coefficient buffer of current
+ *                       block. Should be 32-bit aligned
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_FillVLDBuffer(
+    OMX_U32 storeRun,
+    OMX_S16 * pDst,
+    OMX_S16 storeLevel,
+    OMX_U8  sign,
+    OMX_U8  last,
+    OMX_U8  * index,
+    const OMX_U8 * pZigzagTable
+);
+
+/**
+ * Function: armVCM4P2_GetVLCBits
+ *
+ * Description:
+ * Performs escape mode decision based on the run, run+, level, level+ and 
+ * last combinations.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte in
+ *								the bit stream
+ * [in]	pBitOffset		pointer to the bit position in the byte pointed
+ *								by *ppBitStream. Valid within 0 to 7
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ *                           and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start           start indicates whether the encoding begins with 
+ *                      0th element or 1st.
+ * [in/out] pLast       pointer to last status flag
+ * [in] runBeginSingleLevelEntriesL0      The run value from which level 
+ *                                        will be equal to 1: last == 0
+ * [in] IndexBeginSingleLevelEntriesL0    Array index in the VLC table 
+ *                                        pointing to the  
+ *                                        runBeginSingleLevelEntriesL0 
+ * [in] runBeginSingleLevelEntriesL1      The run value from which level 
+ *                                        will be equal to 1: last == 1
+ * [in] IndexBeginSingleLevelEntriesL1    Array index in the VLC table 
+ *                                        pointing to the  
+ *                                        runBeginSingleLevelEntriesL0 
+ * [in] pRunIndexTableL0    Run Index table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pVlcTableL0         VLC table for last == 0
+ * [in] pRunIndexTableL1    Run Index table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pVlcTableL1         VLC table for last == 1
+ * [in] pLMAXTableL0        Level MAX table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pLMAXTableL1        Level MAX table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pRMAXTableL0        Run MAX table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pRMAXTableL1        Run MAX table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [out]pDst			    pointer to the coefficient buffer of current
+ *							block. Should be 32-bit aligned
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_GetVLCBits (
+              const OMX_U8 **ppBitStream,
+              OMX_INT * pBitOffset,
+			  OMX_S16 * pDst,
+			  OMX_INT shortVideoHeader,
+			  OMX_U8    start,			  
+			  OMX_U8  * pLast,
+			  OMX_U8    runBeginSingleLevelEntriesL0,
+			  OMX_U8    maxIndexForMultipleEntriesL0,
+			  OMX_U8    maxRunForMultipleEntriesL1,
+			  OMX_U8    maxIndexForMultipleEntriesL1,
+              const OMX_U8  * pRunIndexTableL0,
+              const ARM_VLC32 *pVlcTableL0,
+			  const OMX_U8  * pRunIndexTableL1,
+              const ARM_VLC32 *pVlcTableL1,
+              const OMX_U8  * pLMAXTableL0,
+              const OMX_U8  * pLMAXTableL1,
+              const OMX_U8  * pRMAXTableL0,
+              const OMX_U8  * pRMAXTableL1,
+              const OMX_U8  * pZigzagTable
+);
+
+/**
+ * Function: armVCM4P2_PutVLCBits
+ *
+ * Description:
+ * Checks the type of Escape Mode and put encoded bits for 
+ * quantized DCT coefficients.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	 ppBitStream      pointer to the pointer to the current byte in
+ *						  the bit stream
+ * [in]	 pBitOffset       pointer to the bit position in the byte pointed
+ *                        by *ppBitStream. Valid within 0 to 7
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ *                           and escape mode 4 is used when shortVideoHeader==1.
+ * [in]  start            start indicates whether the encoding begins with 
+ *                        0th element or 1st.
+ * [in]  maxStoreRunL0    Max store possible (considering last and inter/intra)
+ *                        for last = 0
+ * [in]  maxStoreRunL1    Max store possible (considering last and inter/intra)
+ *                        for last = 1
+ * [in]  maxRunForMultipleEntriesL0 
+ *                        The run value after which level 
+ *                        will be equal to 1: 
+ *                        (considering last and inter/intra status) for last = 0
+ * [in]  maxRunForMultipleEntriesL1 
+ *                        The run value after which level 
+ *                        will be equal to 1: 
+ *                        (considering last and inter/intra status) for last = 1
+ * [in]  pRunIndexTableL0 Run Index table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in]  pVlcTableL0      VLC table for last == 0
+ * [in]  pRunIndexTableL1 Run Index table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in]  pVlcTableL1      VLC table for last == 1
+ * [in]  pLMAXTableL0     Level MAX table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in]  pLMAXTableL1     Level MAX table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in]  pRMAXTableL0     Run MAX table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in]  pRMAXTableL1     Run MAX table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [out] pQDctBlkCoef     pointer to the quantized DCT coefficient
+ * [out] ppBitStream      *ppBitStream is updated after the block is encoded
+ *                        so that it points to the current byte in the bit
+ *                        stream buffer.
+ * [out] pBitOffset       *pBitOffset is updated so that it points to the
+ *                        current bit position in the byte pointed by
+ *                        *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+
+OMXResult armVCM4P2_PutVLCBits (
+              OMX_U8 **ppBitStream,
+              OMX_INT * pBitOffset,
+              const OMX_S16 *pQDctBlkCoef,
+              OMX_INT shortVideoHeader,
+              OMX_U8 start,
+              OMX_U8 maxStoreRunL0,
+              OMX_U8 maxStoreRunL1,
+              OMX_U8  maxRunForMultipleEntriesL0,
+              OMX_U8  maxRunForMultipleEntriesL1,
+              const OMX_U8  * pRunIndexTableL0,
+              const ARM_VLC32 *pVlcTableL0,
+			  const OMX_U8  * pRunIndexTableL1,
+              const ARM_VLC32 *pVlcTableL1,
+              const OMX_U8  * pLMAXTableL0,
+              const OMX_U8  * pLMAXTableL1,
+              const OMX_U8  * pRMAXTableL0,
+              const OMX_U8  * pRMAXTableL1,
+              const OMX_U8  * pZigzagTable
+);
+/**
+ * Function: armVCM4P2_FillVLCBuffer
+ *
+ * Description:
+ * Performs calculating the VLC bits depending on the escape type and insert 
+ * the same in the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	 ppBitStream		pointer to the pointer to the current byte in
+ *	                        the bit stream
+ * [in]	 pBitOffset         pointer to the bit position in the byte pointed
+ *                          by *ppBitStream. Valid within 0 to 7
+ * [in]  run                Run value (count of zeros) to be encoded  
+ * [in]  level              Level value (non-zero value) to be encoded
+ * [in]  runPlus            Calculated as runPlus = run - (RMAX + 1)  
+ * [in]  levelPlus          Calculated as 
+ *                          levelPlus = sign(level)*[abs(level) - LMAX]
+ * [in]  fMode              Flag indicating the escape modes
+ * [in]  last               status of the last flag
+ * [in]  maxRunForMultipleEntries 
+ *                          The run value after which level will be equal to 1: 
+ *                          (considering last and inter/intra status)
+ * [in]  pRunIndexTable     Run Index table defined in
+ *                          armVCM4P2_Huff_tables_VLC.h
+ * [in]  pVlcTable          VLC table defined in armVCM4P2_Huff_tables_VLC.h
+ * [out] ppBitStream		*ppBitStream is updated after the block is encoded
+ *                          so that it points to the current byte in the bit
+ *                          stream buffer.
+ * [out] pBitOffset         *pBitOffset is updated so that it points to the
+ *                          current bit position in the byte pointed by
+ *                          *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_FillVLCBuffer (
+              OMX_U8 **ppBitStream,
+              OMX_INT * pBitOffset,
+              OMX_U32 run,
+              OMX_S16 level, 
+			  OMX_U32 runPlus,
+              OMX_S16 levelPlus, 
+              OMX_U8  fMode,
+			  OMX_U8  last,
+              OMX_U8  maxRunForMultipleEntries, 
+              const OMX_U8  *pRunIndexTable,
+              const ARM_VLC32 *pVlcTable
+);
+
+/**
+ * Function: armVCM4P2_CheckVLCEscapeMode
+ *
+ * Description:
+ * Performs escape mode decision based on the run, run+, level, level+ and 
+ * last combinations.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] run             Run value (count of zeros) to be encoded  
+ * [in] level           Level value (non-zero value) to be encoded
+ * [in] runPlus         Calculated as runPlus = run - (RMAX + 1)  
+ * [in] levelPlus       Calculated as 
+ *                      levelPlus = sign(level)*[abs(level) - LMAX]
+ * [in] maxStoreRun     Max store possible (considering last and inter/intra)
+ * [in] maxRunForMultipleEntries 
+ *                      The run value after which level 
+ *                      will be equal to 1: 
+ *                      (considering last and inter/intra status)
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ *                           and escape mode 4 is used when shortVideoHeader==1.
+ * [in] pRunIndexTable  Run Index table defined in 
+ *                      armVCM4P2_Huff_Tables_VLC.c
+ *                      (considering last and inter/intra status)
+ *
+ *                      
+ * Return Value:
+ * Returns an Escape mode which can take values from 0 to 3
+ * 0 --> no escape mode, 1 --> escape type 1,
+ * 1 --> escape type 2, 3 --> escape type 3, check section 7.4.1.3
+ * in the MPEG ISO standard.
+ *
+ */
+
+OMX_U8 armVCM4P2_CheckVLCEscapeMode(
+     OMX_U32 run,
+     OMX_U32 runPlus,
+     OMX_S16 level,
+     OMX_S16 levelPlus,
+     OMX_U8  maxStoreRun,
+     OMX_U8  maxRunForMultipleEntries,
+     OMX_INT shortVideoHeader,
+     const OMX_U8  *pRunIndexTable
+);
+
+
+/**
+ * Function: armVCM4P2_BlockMatch_Integer
+ *
+ * Description:
+ * Performs a 16x16 block search; estimates motion vector and associated minimum SAD.  
+ * Both the input and output motion vectors are represented using half-pixel units, and 
+ * therefore a shift left or right by 1 bit may be required, respectively, to match the 
+ * input or output MVs with other functions that either generate output MVs or expect 
+ * input MVs represented using integer pixel units. 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	pSrcRefBuf		pointer to the reference Y plane; points to the reference MB that 
+ *                    corresponds to the location of the current macroblock in the current 
+ *                    plane.
+ * [in]	refWidth		  width of the reference plane
+ * [in]	pRefRect		  pointer to the valid rectangular in reference plane. Relative to image origin. 
+ *                    It's not limited to the image boundary, but depended on the padding. For example, 
+ *                    if you pad 4 pixels outside the image border, then the value for left border 
+ *                    can be -4
+ * [in]	pSrcCurrBuf		pointer to the current macroblock extracted from original plane (linear array, 
+ *                    256 entries); must be aligned on an 8-byte boundary.
+ * [in] pCurrPointPos	position of the current macroblock in the current plane
+ * [in] pSrcPreMV		  pointer to predicted motion vector; NULL indicates no predicted MV
+ * [in] pSrcPreSAD		pointer to SAD associated with the predicted MV (referenced by pSrcPreMV)
+ * [in] searchRange		search range for 16X16 integer block,the units of it is full pixel,the search range 
+ *                    is the same in all directions.It is in inclusive of the boundary and specified in 
+ *                    terms of integer pixel units.
+ * [in] pMESpec			  vendor-specific motion estimation specification structure; must have been allocated 
+ *                    and then initialized using omxVCM4P2_MEInit prior to calling the block matching 
+ *                    function.
+ * [in] BlockSize     MacroBlock Size i.e either 16x16 or 8x8.
+ * [out]	pDstMV			pointer to estimated MV
+ * [out]	pDstSAD			pointer to minimum SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr ¨C no error.
+ * OMX_Sts_BadArgErr ¨C bad arguments
+ *
+ */
+
+OMXResult armVCM4P2_BlockMatch_Integer(
+     const OMX_U8 *pSrcRefBuf,
+     OMX_INT refWidth,
+     const OMXRect *pRefRect,
+     const OMX_U8 *pSrcCurrBuf,
+     const OMXVCM4P2Coordinate *pCurrPointPos,
+     const OMXVCMotionVector *pSrcPreMV,
+     const OMX_INT *pSrcPreSAD,
+     void *pMESpec,
+     OMXVCMotionVector *pDstMV,
+     OMX_INT *pDstSAD,
+     OMX_U8 BlockSize
+);
+
+/**
+ * Function: armVCM4P2_BlockMatch_Half
+ *
+ * Description:
+ * Performs a 16x16 block match with half-pixel resolution.  Returns the estimated 
+ * motion vector and associated minimum SAD.  This function estimates the half-pixel 
+ * motion vector by interpolating the integer resolution motion vector referenced 
+ * by the input parameter pSrcDstMV, i.e., the initial integer MV is generated 
+ * externally.  The input parameters pSrcRefBuf and pSearchPointRefPos should be 
+ * shifted by the winning MV of 16x16 integer search prior to calling BlockMatch_Half_16x16.  
+ * The function BlockMatch_Integer_16x16 may be used for integer motion estimation.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	pSrcRefBuf		pointer to the reference Y plane; points to the reference MB 
+ *                    that corresponds to the location of the current macroblock in 
+ *                    the	current plane.
+ * [in]	refWidth		  width of the reference plane
+ * [in]	pRefRect		  reference plane valid region rectangle
+ * [in]	pSrcCurrBuf		pointer to the current macroblock extracted from original plane 
+ *                    (linear array, 256 entries); must be aligned on an 8-byte boundary. 
+ * [in]	pSearchPointRefPos	position of the starting point for half pixel search (specified 
+ *                          in terms of integer pixel units) in the reference plane.
+ * [in]	rndVal			  rounding control bit for half pixel motion estimation; 
+ *                    0=rounding control disabled; 1=rounding control enabled
+ * [in]	pSrcDstMV		pointer to the initial MV estimate; typically generated during a prior 
+ *                  16X16 integer search and its unit is half pixel.
+ * [in] BlockSize     MacroBlock Size i.e either 16x16 or 8x8.
+ * [out]pSrcDstMV		pointer to estimated MV
+ * [out]pDstSAD			pointer to minimum SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr ¨C no error
+ * OMX_Sts_BadArgErr ¨C bad arguments
+ *
+ */
+
+OMXResult armVCM4P2_BlockMatch_Half(
+     const OMX_U8 *pSrcRefBuf,
+     OMX_INT refWidth,
+     const OMXRect *pRefRect,
+     const OMX_U8 *pSrcCurrBuf,
+     const OMXVCM4P2Coordinate *pSearchPointRefPos,
+     OMX_INT rndVal,
+     OMXVCMotionVector *pSrcDstMV,
+     OMX_INT *pDstSAD,
+     OMX_U8 BlockSize
+);
+/**
+ * Function: armVCM4P2_PadMV
+ *
+ * Description:
+ * Performs motion vector padding for a macroblock.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDstMV       pointer to motion vector buffer of the current
+ *                              macroblock
+ * [in] pTransp         pointer to transparent status buffer of the
+ *                              current macroblock
+ * [out]    pSrcDstMV       pointer to motion vector buffer in which the
+ *                              motion vectors have been padded
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_PadMV(
+     OMXVCMotionVector * pSrcDstMV,
+     OMX_U8 * pTransp
+);
+
+/* 
+ * H.264 Specific Declarations 
+ */
+/* Defines */
+#define ARM_M4P10_Q_OFFSET        (15)
+
+
+/* Dequant tables */
+
+extern const OMX_U8 armVCM4P10_PosToVCol4x4[16];
+extern const OMX_U8 armVCM4P10_PosToVCol2x2[4];
+extern const OMX_U8 armVCM4P10_VMatrix[6][3];
+extern const OMX_U32 armVCM4P10_MFMatrix[6][3];
+
+
+/*
+ * Description:
+ * This function perform the work required by the OpenMAX
+ * DecodeCoeffsToPair function and DecodeChromaDCCoeffsToPair.
+ * Since most of the code is common we share it here.
+ *
+ * Parameters:
+ * [in]	ppBitStream		Double pointer to current byte in bit stream buffer
+ * [in]	pOffset			Pointer to current bit position in the byte pointed
+ *								to by *ppBitStream
+ * [in]	sMaxNumCoeff	Maximum number of non-zero coefficients in current
+ *								block (4,15 or 16)
+ * [in]	nTable          Table number (0 to 4) according to the five columns
+ *                      of Table 9-5 in the H.264 spec
+ * [out]	ppBitStream		*ppBitStream is updated after each block is decoded
+ * [out]	pOffset			*pOffset is updated after each block is decoded
+ * [out]	pNumCoeff		Pointer to the number of nonzero coefficients in
+ *								this block
+ * [out]	ppPosCoefbuf	Double pointer to destination residual
+ *								coefficient-position pair buffer
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+
+ */
+
+OMXResult armVCM4P10_DecodeCoeffsToPair(
+     const OMX_U8** ppBitStream,
+     OMX_S32* pOffset,
+     OMX_U8* pNumCoeff,
+     OMX_U8**ppPosCoefbuf,
+     OMX_INT nTable,
+     OMX_INT sMaxNumCoeff        
+ );
+
+/*
+ * Description:
+ * Perform DC style intra prediction, averaging upper and left block
+ *
+ * Parameters:
+ * [in]	pSrcLeft		Pointer to the buffer of 16 left coefficients:
+ *								p[x, y] (x = -1, y = 0..3)
+ * [in]	pSrcAbove		Pointer to the buffer of 16 above coefficients:
+ *								p[x,y] (x = 0..3, y = -1)
+ * [in]	leftStep		Step of left coefficient buffer
+ * [in]	dstStep			Step of the destination buffer
+ * [in]	availability	Neighboring 16x16 MB availability flag
+ * [out]	pDst			Pointer to the destination buffer
+ *
+ * Return Value:
+ * None
+ */
+
+void armVCM4P10_PredictIntraDC4x4(
+     const OMX_U8* pSrcLeft,
+     const OMX_U8 *pSrcAbove,
+     OMX_U8* pDst,
+     OMX_INT leftStep,
+     OMX_INT dstStep,
+     OMX_S32 availability        
+);
+
+/*
+ * Description
+ * Unpack a 4x4 block of coefficient-residual pair values
+ *
+ * Parameters:
+ * [in]	ppSrc	Double pointer to residual coefficient-position pair
+ *						buffer output by CALVC decoding
+ * [out]	ppSrc	*ppSrc is updated to the start of next non empty block
+ * [out]	pDst	Pointer to unpacked 4x4 block
+ */
+
+void armVCM4P10_UnpackBlock4x4(
+     const OMX_U8 **ppSrc,
+     OMX_S16* pDst
+);
+
+/*
+ * Description
+ * Unpack a 2x2 block of coefficient-residual pair values
+ *
+ * Parameters:
+ * [in]	ppSrc	Double pointer to residual coefficient-position pair
+ *						buffer output by CALVC decoding
+ * [out]	ppSrc	*ppSrc is updated to the start of next non empty block
+ * [out]	pDst	Pointer to unpacked 4x4 block
+ */
+
+void armVCM4P10_UnpackBlock2x2(
+     const OMX_U8 **ppSrc,
+     OMX_S16* pDst
+);
+
+/*
+ * Description
+ * Deblock one boundary pixel
+ *
+ * Parameters:
+ * [in]	pQ0         Pointer to pixel q0
+ * [in] Step        Step between pixels q0 and q1
+ * [in] tC0         Edge threshold value
+ * [in] alpha       alpha threshold value
+ * [in] beta        beta threshold value
+ * [in] bS          deblocking strength
+ * [in] ChromaFlag  True for chroma blocks
+ * [out] pQ0        Deblocked pixels
+ * 
+ */
+
+void armVCM4P10_DeBlockPixel(
+    OMX_U8 *pQ0,    /* pointer to the pixel q0 */
+    int Step,       /* step between pixels q0 and q1 */
+    int tC0,        /* edge threshold value */
+    int alpha,      /* alpha */
+    int beta,       /* beta */
+    int bS,         /* deblocking strength */
+    int ChromaFlag
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfHor_Luma
+ *
+ * Description:
+ * This function performs interpolation for horizontal 1/2-pel positions
+ *
+ * Remarks:
+ *
+ *	[in]	pSrc			Pointer to top-left corner of block used to interpolate 
+ 													in the reconstructed frame plane
+ *	[in]	iSrcStep	Step of the source buffer.
+ *	[in]	iDstStep	Step of the destination(interpolation) buffer.
+ *	[in]	iWidth		Width of the current block
+ *	[in]	iHeight		Height of the current block
+ *	[out]	pDst	    Pointer to the interpolation buffer of the 1/2-pel 
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfHor_Luma(
+        const OMX_U8*		pSrc, 
+		OMX_U32 	iSrcStep, 
+		OMX_U8* 	pDst, 
+		OMX_U32 	iDstStep, 
+		OMX_U32 	iWidth, 
+		OMX_U32 	iHeight
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfVer_Luma
+ * 
+ * Description:
+ * This function performs interpolation for vertical 1/2-pel positions 
+ * around a full-pel position.
+ *
+ * Remarks:
+ *
+ *	[in]	pSrc			Pointer to top-left corner of block used to interpolate 
+ *												in the reconstructed frame plane
+ *	[in]	iSrcStep	Step of the source buffer.
+ *	[in]	iDstStep	Step of the destination(interpolation) buffer.
+ *	[in]	iWidth		Width of the current block
+ *	[in]	iHeight		Height of the current block
+ *	[out]	pDst    	Pointer to the interpolation buffer of the 1/2-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfVer_Luma(	
+	 const OMX_U8* 	pSrc, 
+	 OMX_U32 	iSrcStep, 
+ 	 OMX_U8* 	pDst,
+ 	 OMX_U32 	iDstStep, 
+ 	 OMX_U32 	iWidth, 
+ 	 OMX_U32 	iHeight
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfDiag_Luma
+ * 
+ * Description:
+ * This function performs interpolation for (1/2, 1/2)  positions 
+ * around a full-pel position.
+ *
+ * Remarks:
+ *
+ *  [in]    pSrc        Pointer to top-left corner of block used to interpolate 
+ *                      in the reconstructed frame plane
+ *  [in]    iSrcStep    Step of the source buffer.
+ *  [in]    iDstStep    Step of the destination(interpolation) buffer.
+ *  [in]    iWidth      Width of the current block
+ *  [in]    iHeight     Height of the current block
+ *  [out]   pDst        Pointer to the interpolation buffer of the (1/2,1/2)-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfDiag_Luma(  
+        const OMX_U8*     pSrc, 
+        OMX_U32     iSrcStep, 
+        OMX_U8*     pDst, 
+        OMX_U32     iDstStep,
+        OMX_U32     iWidth, 
+        OMX_U32     iHeight
+);
+
+/*
+ * Description:
+ * Transform Residual 4x4 Coefficients
+ *
+ * Parameters:
+ * [in]  pSrc		Source 4x4 block
+ * [out] pDst		Destination 4x4 block
+ *
+ */
+
+void armVCM4P10_TransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc);
+
+/*
+ * Description:
+ * Forward Transform Residual 4x4 Coefficients
+ *
+ * Parameters:
+ * [in]  pSrc		Source 4x4 block
+ * [out] pDst		Destination 4x4 block
+ *
+ */
+
+void armVCM4P10_FwdTransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc);
+
+OMX_INT armVCM4P10_CompareMotionCostToMV (
+    OMX_S16  mvX,
+    OMX_S16  mvY,
+    OMXVCMotionVector diffMV, 
+    OMX_INT candSAD, 
+    OMXVCMotionVector *bestMV, 
+    OMX_U32 nLamda,
+    OMX_S32 *pBestCost);
+
+/**
+ * Function: armVCCOMM_SAD
+ *
+ * Description:
+ * This function calculate the SAD for NxM blocks.
+ *
+ * Remarks:
+ *
+ * [in]		pSrcOrg		Pointer to the original block
+ * [in]		iStepOrg	Step of the original block buffer
+ * [in]		pSrcRef		Pointer to the reference block
+ * [in]		iStepRef	Step of the reference block buffer
+ * [in]		iHeight		Height of the block
+ * [in]		iWidth		Width of the block
+ * [out]	pDstSAD		Pointer of result SAD
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+OMXResult armVCCOMM_SAD(	
+	const OMX_U8* 	pSrcOrg,
+	OMX_U32 	iStepOrg,
+	const OMX_U8* 	pSrcRef,
+	OMX_U32 	iStepRef,
+	OMX_S32*	pDstSAD,
+	OMX_U32		iHeight,
+	OMX_U32		iWidth);
+
+/**
+ * Function: armVCCOMM_Average
+ *
+ * Description:
+ * This function calculates the average of two blocks and stores the result.
+ *
+ * Remarks:
+ *
+ *	[in]	pPred0			Pointer to the top-left corner of reference block 0
+ *	[in]	pPred1			Pointer to the top-left corner of reference block 1
+ *	[in]	iPredStep0	    Step of reference block 0
+ *	[in]	iPredStep1	    Step of reference block 1
+ *	[in]	iDstStep 		Step of the destination buffer
+ *	[in]	iWidth			Width of the blocks
+ *	[in]	iHeight			Height of the blocks
+ *	[out]	pDstPred		Pointer to the destination buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+ OMXResult armVCCOMM_Average (
+	 const OMX_U8* 	    pPred0,
+	 const OMX_U8* 	    pPred1,	
+	 OMX_U32		iPredStep0,
+	 OMX_U32		iPredStep1,
+	 OMX_U8*		pDstPred,
+	 OMX_U32		iDstStep, 
+	 OMX_U32		iWidth,
+	 OMX_U32		iHeight
+);
+
+/**
+ * Function: armVCM4P10_SADQuar
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the 
+ * average of the other two (pSrcRef0 and pSrcRef1)
+ *
+ * Remarks:
+ *
+ * [in]		pSrc				Pointer to the original block
+ * [in]		pSrcRef0		Pointer to reference block 0
+ * [in]		pSrcRef1		Pointer to reference block 1
+ * [in]		iSrcStep 		Step of the original block buffer
+ * [in]		iRefStep0		Step of reference block 0 
+ * [in]		iRefStep1 	Step of reference block 1 
+ * [in]		iHeight			Height of the block
+ * [in]		iWidth			Width of the block
+ * [out]	pDstSAD			Pointer of result SAD
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+OMXResult armVCM4P10_SADQuar(
+	const OMX_U8* 	pSrc,
+    const OMX_U8* 	pSrcRef0,
+	const OMX_U8* 	pSrcRef1,	
+    OMX_U32 	iSrcStep,
+    OMX_U32		iRefStep0,
+    OMX_U32		iRefStep1,
+    OMX_U32*	pDstSAD,
+    OMX_U32     iHeight,
+    OMX_U32     iWidth
+);
+
+/**
+ * Function: armVCM4P10_Interpolate_Chroma
+ *
+ * Description:
+ * This function performs interpolation for chroma components.
+ *
+ * Remarks:
+ *
+ *  [in]    pSrc            Pointer to top-left corner of block used to 
+ *                                              interpolate in the reconstructed frame plane
+ *  [in]    iSrcStep    Step of the source buffer.
+ *  [in]    iDstStep    Step of the destination(interpolation) buffer.
+ *  [in]    iWidth      Width of the current block
+ *  [in]    iHeight     Height of the current block
+ *  [in]    dx              Fractional part of horizontal motion vector 
+ *                                              component in 1/8 pixel unit (0~7) 
+ *  [in]    dy              Fractional part of vertical motion vector 
+ *                                              component in 1/8 pixel unit (0~7)
+ *  [out]   pDst            Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+ OMXResult armVCM4P10_Interpolate_Chroma(
+        OMX_U8      *pSrc,
+        OMX_U32     iSrcStep,
+        OMX_U8      *pDst,
+        OMX_U32     iDstStep,
+        OMX_U32     iWidth,
+        OMX_U32     iHeight,
+        OMX_U32     dx,
+        OMX_U32     dy
+);
+
+/**
+ * Function: armVCM4P10_Interpolate_Luma
+ *
+ * Description:
+ * This function performs interpolation for luma components.
+ *
+ * Remarks:
+ *
+ *  [in]    pSrc            Pointer to top-left corner of block used to 
+ *                                              interpolate in the reconstructed frame plane
+ *  [in]    iSrcStep    Step of the source buffer.
+ *  [in]    iDstStep    Step of the destination(interpolation) buffer.
+ *  [in]    iWidth      Width of the current block
+ *  [in]    iHeight     Height of the current block
+ *  [in]    dx              Fractional part of horizontal motion vector 
+ *                                              component in 1/4 pixel unit (0~3) 
+ *  [in]    dy              Fractional part of vertical motion vector 
+ *                                              component in 1/4 pixel unit (0~3) 
+ *  [out]   pDst            Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+ OMXResult armVCM4P10_Interpolate_Luma(
+     const OMX_U8     *pSrc,
+     OMX_U32    iSrcStep,
+     OMX_U8     *pDst,
+     OMX_U32    iDstStep,
+     OMX_U32    iWidth,
+     OMX_U32    iHeight,
+     OMX_U32    dx,
+     OMX_U32    dy
+);
+
+/**
+ * Function: omxVCH264_DequantTransformACFromPair_U8_S16_C1_DLx
+ *
+ * Description:
+ * Reconstruct the 4x4 residual block from coefficient-position pair buffer,
+ * perform dequantisation and integer inverse transformation for 4x4 block of
+ * residuals and update the pair buffer pointer to next non-empty block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppSrc		Double pointer to residual coefficient-position
+ *							pair buffer output by CALVC decoding
+ * [in]	pDC			Pointer to the DC coefficient of this block, NULL
+ *							if it doesn't exist
+ * [in]	QP			Quantization parameter
+ * [in] AC          Flag indicating if at least one non-zero coefficient exists
+ * [out]	pDst		pointer to the reconstructed 4x4 block data
+ *
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P10_DequantTransformACFromPair_U8_S16_C1_DLx(
+     OMX_U8 **ppSrc,
+     OMX_S16 *pDst,
+     OMX_INT QP,
+     OMX_S16* pDC,
+     int AC
+);
+
+#endif  /*_armVideo_H_*/
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVCCOMM_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVCCOMM_s.h
new file mode 100755
index 0000000..32a0166
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVCCOMM_s.h
@@ -0,0 +1,72 @@
+;//
+;// 
+;// File Name:  armVCCOMM_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+;// ARM optimized OpenMAX AC header file
+;// 
+;// Formula used:
+;// MACRO for calculating median for three values.
+
+
+
+    IF :LNOT::DEF:ARMVCCOMM_S_H
+        INCLUDE armCOMM_s.h
+    M_VARIANTS      CortexA8, ARM1136JS
+    
+    IF ARM1136JS :LOR: CortexA8 
+     
+     ;///*
+     ;// * Macro: M_MEDIAN3
+     ;// *
+     ;// * Description: Finds the median of three numbers
+     ;// * 
+     ;// * Remarks:
+     ;// *
+     ;// * Parameters:
+     ;// * [in] x     First entry for the list of three numbers.
+     ;// * [in] y     Second entry for the list of three numbers.
+     ;// *            Input value may be corrupted at the end of
+     ;// *            the execution of this macro.
+     ;// * [in] z     Third entry of the list of three numbers.
+     ;// *            Input value corrupted at the end of the 
+     ;// *            execution of this macro.
+     ;// * [in] t     Temporary scratch  register.
+     ;// * [out]z     Median of the three numbers.       
+     ;// */
+     
+     MACRO
+
+     M_MEDIAN3 $x, $y, $z, $t 
+     
+     SUBS  $t, $y, $z; // if (y < z)
+     ADDLT $z, $z, $t; //  swap y and z
+     SUBLT $y, $y, $t;
+
+     ;// Now z' <= y', so there are three cases for the
+     ;// median value, depending on x.
+
+     ;// 1) x <= z'      <= y'      : median value is z'
+     ;// 2)      z' <= x <= y'      : median value is x
+     ;// 3)      z'      <= y' <= x : median value is y'
+
+     CMP   $z, $x;     // if ( x > min(y,z) )
+     MOVLT $z, $x;     // ans = x 
+
+     CMP   $x, $y;     // if ( x > max(y,z) )
+     MOVGT $z, $y;     // ans = max(y,z)
+     
+     MEND
+    ENDIF      
+    
+    
+        
+    ENDIF ;// ARMACCOMM_S_H
+
+ END
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC.h
new file mode 100755
index 0000000..7b3cc72
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC.h
@@ -0,0 +1,4381 @@
+/**
+ * File: omxVC.h
+ * Brief: OpenMAX DL v1.0.2 - Video Coding library
+ *
+ * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved. 
+ *
+ * These materials are protected by copyright laws and contain material 
+ * proprietary to the Khronos Group, Inc.  You may use these materials 
+ * for implementing Khronos specifications, without altering or removing 
+ * any trademark, copyright or other notice from the specification.
+ * 
+ * Khronos Group makes no, and expressly disclaims any, representations 
+ * or warranties, express or implied, regarding these materials, including, 
+ * without limitation, any implied warranties of merchantability or fitness 
+ * for a particular purpose or non-infringement of any intellectual property. 
+ * Khronos Group makes no, and expressly disclaims any, warranties, express 
+ * or implied, regarding the correctness, accuracy, completeness, timeliness, 
+ * and reliability of these materials. 
+ *
+ * Under no circumstances will the Khronos Group, or any of its Promoters, 
+ * Contributors or Members or their respective partners, officers, directors, 
+ * employees, agents or representatives be liable for any damages, whether 
+ * direct, indirect, special or consequential damages for lost revenues, 
+ * lost profits, or otherwise, arising from or in connection with these 
+ * materials.
+ * 
+ * Khronos and OpenMAX are trademarks of the Khronos Group Inc. 
+ *
+ */
+
+/* *****************************************************************************************/
+
+#ifndef _OMXVC_H_
+#define _OMXVC_H_
+
+#include "omxtypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* 6.1.1.1 Motion Vectors  */
+/* In omxVC, motion vectors are represented as follows:  */
+
+typedef struct {
+    OMX_S16 dx;
+    OMX_S16 dy;
+} OMXVCMotionVector;
+
+
+
+/**
+ * Function:  omxVCCOMM_Average_8x   (6.1.3.1.1)
+ *
+ * Description:
+ * This function calculates the average of two 8x4, 8x8, or 8x16 blocks.  The 
+ * result is rounded according to (a+b+1)/2.  The block average function can 
+ * be used in conjunction with half-pixel interpolation to obtain quarter 
+ * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1. 
+ *
+ * Input Arguments:
+ *   
+ *   pPred0     - Pointer to the top-left corner of reference block 0 
+ *   pPred1     - Pointer to the top-left corner of reference block 1 
+ *   iPredStep0 - Step of reference block 0 
+ *   iPredStep1 - Step of reference block 1 
+ *   iDstStep   - Step of the destination buffer. 
+ *   iHeight    - Height of the blocks 
+ *
+ * Output Arguments:
+ *   
+ *   pDstPred - Pointer to the destination buffer. 8-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -   one or more of the following pointers is NULL: pPred0, pPred1, or 
+ *              pDstPred. 
+ *    -   pDstPred is not aligned on an 8-byte boundary. 
+ *    -   iPredStep0 <= 0 or iPredStep0 is not a multiple of 8. 
+ *    -   iPredStep1 <= 0 or iPredStep1 is not a multiple of 8. 
+ *    -   iDstStep   <= 0 or iDstStep is not a multiple of 8. 
+ *    -   iHeight is not 4, 8, or 16. 
+ *
+ */
+OMXResult omxVCCOMM_Average_8x (
+    const OMX_U8 *pPred0,
+    const OMX_U8 *pPred1,
+    OMX_U32 iPredStep0,
+    OMX_U32 iPredStep1,
+    OMX_U8 *pDstPred,
+    OMX_U32 iDstStep,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_Average_16x   (6.1.3.1.2)
+ *
+ * Description:
+ * This function calculates the average of two 16x16 or 16x8 blocks.  The 
+ * result is rounded according to (a+b+1)/2.  The block average function can 
+ * be used in conjunction with half-pixel interpolation to obtain quarter 
+ * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1. 
+ *
+ * Input Arguments:
+ *   
+ *   pPred0 - Pointer to the top-left corner of reference block 0 
+ *   pPred1 - Pointer to the top-left corner of reference block 1 
+ *   iPredStep0 - Step of reference block 0 
+ *   iPredStep1 - Step of reference block 1 
+ *   iDstStep - Step of the destination buffer 
+ *   iHeight - Height of the blocks 
+ *
+ * Output Arguments:
+ *   
+ *   pDstPred - Pointer to the destination buffer. 16-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -   one or more of the following pointers is NULL: pPred0, pPred1, or 
+ *              pDstPred. 
+ *    -   pDstPred is not aligned on a 16-byte boundary. 
+ *    -   iPredStep0 <= 0 or iPredStep0 is not a multiple of 16. 
+ *    -   iPredStep1 <= 0 or iPredStep1 is not a multiple of 16. 
+ *    -   iDstStep <= 0 or iDstStep is not a multiple of 16. 
+ *    -   iHeight is not 8 or 16. 
+ *
+ */
+OMXResult omxVCCOMM_Average_16x (
+    const OMX_U8 *pPred0,
+    const OMX_U8 *pPred1,
+    OMX_U32 iPredStep0,
+    OMX_U32 iPredStep1,
+    OMX_U8 *pDstPred,
+    OMX_U32 iDstStep,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_ExpandFrame_I   (6.1.3.2.1)
+ *
+ * Description:
+ * This function expands a reconstructed frame in-place.  The unexpanded 
+ * source frame should be stored in a plane buffer with sufficient space 
+ * pre-allocated for edge expansion, and the input frame should be located in 
+ * the plane buffer center.  This function executes the pixel expansion by 
+ * replicating source frame edge pixel intensities in the empty pixel 
+ * locations (expansion region) between the source frame edge and the plane 
+ * buffer edge.  The width/height of the expansion regions on the 
+ * horizontal/vertical edges is controlled by the parameter iExpandPels. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDstPlane - pointer to the top-left corner of the frame to be 
+ *            expanded; must be aligned on an 8-byte boundary. 
+ *   iFrameWidth - frame width; must be a multiple of 8. 
+ *   iFrameHeight -frame height; must be a multiple of 8. 
+ *   iExpandPels - number of pixels to be expanded in the horizontal and 
+ *            vertical directions; must be a multiple of 8. 
+ *   iPlaneStep - distance, in bytes, between the start of consecutive lines 
+ *            in the plane buffer; must be larger than or equal to 
+ *            (iFrameWidth + 2 * iExpandPels). 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstPlane -Pointer to the top-left corner of the frame (NOT the 
+ *            top-left corner of the plane); must be aligned on an 8-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -    pSrcDstPlane is NULL. 
+ *    -    pSrcDstPlane is not aligned on an 8-byte boundary. 
+ *    -    one of the following parameters is either equal to zero or is a 
+ *              non-multiple of 8: iFrameHeight, iFrameWidth, iPlaneStep, or 
+ *              iExpandPels. 
+ *    -    iPlaneStep < (iFrameWidth + 2 * iExpandPels). 
+ *
+ */
+OMXResult omxVCCOMM_ExpandFrame_I (
+    OMX_U8 *pSrcDstPlane,
+    OMX_U32 iFrameWidth,
+    OMX_U32 iFrameHeight,
+    OMX_U32 iExpandPels,
+    OMX_U32 iPlaneStep
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_Copy8x8   (6.1.3.3.1)
+ *
+ * Description:
+ * Copies the reference 8x8 block to the current block. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the reference block in the source frame; must be 
+ *            aligned on an 8-byte boundary. 
+ *   step - distance between the starts of consecutive lines in the reference 
+ *            frame, in bytes; must be a multiple of 8 and must be larger than 
+ *            or equal to 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination block; must be aligned on an 8-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -   one or more of the following pointers is NULL: pSrc, pDst 
+ *    -   one or more of the following pointers is not aligned on an 8-byte 
+ *              boundary: pSrc, pDst 
+ *    -    step <8 or step is not a multiple of 8. 
+ *
+ */
+OMXResult omxVCCOMM_Copy8x8 (
+    const OMX_U8 *pSrc,
+    OMX_U8 *pDst,
+    OMX_INT step
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_Copy16x16   (6.1.3.3.2)
+ *
+ * Description:
+ * Copies the reference 16x16 macroblock to the current macroblock. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the reference macroblock in the source frame; must be 
+ *            aligned on a 16-byte boundary. 
+ *   step - distance between the starts of consecutive lines in the reference 
+ *            frame, in bytes; must be a multiple of 16 and must be larger 
+ *            than or equal to 16. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination macroblock; must be aligned on a 
+ *            16-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -   one or more of the following pointers is NULL: pSrc, pDst 
+ *    -   one or more of the following pointers is not aligned on a 16-byte 
+ *              boundary: pSrc, pDst 
+ *    -    step <16 or step is not a multiple of 16. 
+ *
+ */
+OMXResult omxVCCOMM_Copy16x16 (
+    const OMX_U8 *pSrc,
+    OMX_U8 *pDst,
+    OMX_INT step
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_ComputeTextureErrorBlock_SAD   (6.1.4.1.1)
+ *
+ * Description:
+ * Computes texture error of the block; also returns SAD. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the source plane; must be aligned on an 8-byte 
+ *            boundary. 
+ *   srcStep - step of the source plane 
+ *   pSrcRef - pointer to the reference buffer, an 8x8 block; must be aligned 
+ *            on an 8-byte boundary. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination buffer, an 8x8 block; must be aligned 
+ *            on an 8-byte boundary. 
+ *   pDstSAD - pointer to the Sum of Absolute Differences (SAD) value 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments 
+ *    -    At least one of the following 
+ *         pointers is NULL: pSrc, pSrcRef, pDst and pDstSAD. 
+ *    -    pSrc is not 8-byte aligned. 
+ *    -    SrcStep <= 0 or srcStep is not a multiple of 8. 
+ *    -    pSrcRef is not 8-byte aligned. 
+ *    -    pDst is not 8-byte aligned. 
+ *
+ */
+OMXResult omxVCCOMM_ComputeTextureErrorBlock_SAD (
+    const OMX_U8 *pSrc,
+    OMX_INT srcStep,
+    const OMX_U8 *pSrcRef,
+    OMX_S16 *pDst,
+    OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_ComputeTextureErrorBlock   (6.1.4.1.2)
+ *
+ * Description:
+ * Computes the texture error of the block. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the source plane. This should be aligned on an 8-byte 
+ *            boundary. 
+ *   srcStep - step of the source plane 
+ *   pSrcRef - pointer to the reference buffer, an 8x8 block. This should be 
+ *            aligned on an 8-byte boundary. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination buffer, an 8x8 block. This should be 
+ *            aligned on an 8-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    At least one of the following pointers is NULL: 
+ *         pSrc, pSrcRef, pDst. 
+ *    -    pSrc is not 8-byte aligned. 
+ *    -    SrcStep <= 0 or srcStep is not a multiple of 8. 
+ *    -    pSrcRef is not 8-byte aligned. 
+ *    -    pDst is not 8-byte aligned 
+ *
+ */
+OMXResult omxVCCOMM_ComputeTextureErrorBlock (
+    const OMX_U8 *pSrc,
+    OMX_INT srcStep,
+    const OMX_U8 *pSrcRef,
+    OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_LimitMVToRect   (6.1.4.1.3)
+ *
+ * Description:
+ * Limits the motion vector associated with the current block/macroblock to 
+ * prevent the motion compensated block/macroblock from moving outside a 
+ * bounding rectangle as shown in Figure 6-1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcMV - pointer to the motion vector associated with the current block 
+ *            or macroblock 
+ *   pRectVOPRef - pointer to the bounding rectangle 
+ *   Xcoord, Ycoord  - coordinates of the current block or macroblock 
+ *   size - size of the current block or macroblock; must be equal to 8 or 
+ *            16. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstMV - pointer to the limited motion vector 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one or more of the 
+ *              following conditions is true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcMV, pDstMV, or pRectVOPRef. 
+ *    -    size is not equal to either 8 or 16. 
+ *    -    the width or height of the bounding rectangle is less than 
+ *         twice the block size.
+ */
+OMXResult omxVCCOMM_LimitMVToRect (
+    const OMXVCMotionVector *pSrcMV,
+    OMXVCMotionVector *pDstMV,
+    const OMXRect *pRectVOPRef,
+    OMX_INT Xcoord,
+    OMX_INT Ycoord,
+    OMX_INT size
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_SAD_16x   (6.1.4.1.4)
+ *
+ * Description:
+ * This function calculates the SAD for 16x16 and 16x8 blocks. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg - Pointer to the original block; must be aligned on a 16-byte 
+ *             boundary. 
+ *   iStepOrg - Step of the original block buffer 
+ *   pSrcRef  - Pointer to the reference block 
+ *   iStepRef - Step of the reference block buffer 
+ *   iHeight  - Height of the block 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD - Pointer of result SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one or more of the 
+ *              following conditions is true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcOrg, pDstSAD, or pSrcRef 
+ *    -    pSrcOrg is not 16-byte aligned. 
+ *    -    iStepOrg  <= 0 or iStepOrg is not a multiple of 16 
+ *    -    iStepRef <= 0 or iStepRef is not a multiple of 16 
+ *    -    iHeight is not 8 or 16 
+ *
+ */
+OMXResult omxVCCOMM_SAD_16x (
+    const OMX_U8 *pSrcOrg,
+    OMX_U32 iStepOrg,
+    const OMX_U8 *pSrcRef,
+    OMX_U32 iStepRef,
+    OMX_S32 *pDstSAD,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_SAD_8x   (6.1.4.1.5)
+ *
+ * Description:
+ * This function calculates the SAD for 8x16, 8x8, 8x4 blocks. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg  - Pointer to the original block; must be aligned on a 8-byte 
+ *              boundary. 
+ *   iStepOrg - Step of the original block buffer 
+ *   pSrcRef  - Pointer to the reference block 
+ *   iStepRef - Step of the reference block buffer 
+ *   iHeight  - Height of the block 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD -Pointer of result SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one or more of the 
+ *              following conditions is true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcOrg, pDstSAD, or pSrcRef 
+ *    -    pSrcOrg is not 8-byte aligned. 
+ *    -    iStepOrg  <= 0 or iStepOrg is not a multiple of 8 
+ *    -    iStepRef <= 0 or iStepRef is not a multiple of 8 
+ *    -    iHeight is not 4, 8 or 16 
+ *
+ */
+OMXResult omxVCCOMM_SAD_8x (
+    const OMX_U8 *pSrcOrg,
+    OMX_U32 iStepOrg,
+    const OMX_U8 *pSrcRef,
+    OMX_U32 iStepRef,
+    OMX_S32*pDstSAD,
+    OMX_U32 iHeight
+);
+
+
+
+/* 6.2.1.1 Direction  */
+/* The direction enumerator is used with functions that perform AC/DC prediction and zig-zag scan.  */
+
+enum {
+    OMX_VC_NONE       = 0,
+    OMX_VC_HORIZONTAL = 1,
+    OMX_VC_VERTICAL   = 2 
+};
+
+
+
+/* 6.2.1.2 Bilinear Interpolation  */
+/* The bilinear interpolation enumerator is used with motion estimation, motion compensation, and reconstruction functions.  */
+
+enum {
+    OMX_VC_INTEGER_PIXEL = 0, /* case a */
+    OMX_VC_HALF_PIXEL_X  = 1, /* case b */
+    OMX_VC_HALF_PIXEL_Y  = 2, /* case c */
+    OMX_VC_HALF_PIXEL_XY = 3  /* case d */ 
+};
+
+
+
+/* 6.2.1.3 Neighboring Macroblock Availability  */
+/* Neighboring macroblock availability is indicated using the following flags:   */
+
+enum {
+    OMX_VC_UPPER = 1,        /** above macroblock is available */
+    OMX_VC_LEFT = 2,         /** left macroblock is available */
+    OMX_VC_CENTER = 4,
+    OMX_VC_RIGHT = 8,
+    OMX_VC_LOWER = 16,
+    OMX_VC_UPPER_LEFT = 32,  /** above-left macroblock is available */
+    OMX_VC_UPPER_RIGHT = 64, /** above-right macroblock is available */
+    OMX_VC_LOWER_LEFT = 128,
+    OMX_VC_LOWER_RIGHT = 256 
+};
+
+
+
+/* 6.2.1.4 Video Components  */
+/* A data type that enumerates video components is defined as follows:  */
+
+typedef enum {
+    OMX_VC_LUMINANCE,    /** Luminance component */
+    OMX_VC_CHROMINANCE   /** chrominance component */ 
+} OMXVCM4P2VideoComponent;
+
+
+
+/* 6.2.1.5 MacroblockTypes  */
+/* A data type that enumerates macroblock types is defined as follows:  */
+
+typedef enum {
+    OMX_VC_INTER     = 0, /** P picture or P-VOP */
+    OMX_VC_INTER_Q   = 1, /** P picture or P-VOP */
+    OMX_VC_INTER4V   = 2, /** P picture or P-VOP */
+    OMX_VC_INTRA     = 3, /** I and P picture, I- and P-VOP */
+    OMX_VC_INTRA_Q   = 4, /** I and P picture, I- and P-VOP */
+    OMX_VC_INTER4V_Q = 5  /** P picture or P-VOP (H.263)*/
+} OMXVCM4P2MacroblockType;
+
+
+
+/* 6.2.1.6 Coordinates  */
+/* Coordinates are represented as follows:  */
+
+typedef struct {
+    OMX_INT x;
+    OMX_INT y;
+} OMXVCM4P2Coordinate;
+
+
+
+/* 6.2.1.7 Motion Estimation Algorithms  */
+/* A data type that enumerates motion estimation search methods is defined as follows:  */
+
+typedef enum {
+    OMX_VC_M4P2_FAST_SEARCH = 0,  /** Fast motion search */
+    OMX_VC_M4P2_FULL_SEARCH = 1   /** Full motion search */ 
+} OMXVCM4P2MEMode;
+
+
+
+/* 6.2.1.8 Motion Estimation Parameters  */
+/* A data structure containing control parameters for 
+ * motion estimation functions is defined as follows:  
+ */
+
+typedef struct {
+    OMX_INT searchEnable8x8;     /** enables 8x8 search */
+    OMX_INT halfPelSearchEnable; /** enables half-pel resolution */
+    OMX_INT searchRange;         /** search range */
+    OMX_INT rndVal;              /** rounding control; 0-disabled, 1-enabled*/
+} OMXVCM4P2MEParams;
+
+
+
+/* 6.2.1.9 Macroblock Information   */
+/* A data structure containing macroblock parameters for 
+ * motion estimation functions is defined as follows:  
+ */
+
+typedef struct {
+    OMX_S32 sliceId;                 /* slice number */
+    OMXVCM4P2MacroblockType mbType;  /* MB type: OMX_VC_INTRA, OMX_VC_INTER, or OMX_VC_INTER4 */
+    OMX_S32 qp;                      /* quantization parameter*/
+    OMX_U32 cbpy;                    /* CBP Luma */
+    OMX_U32 cbpc;                    /* CBP Chroma */
+    OMXVCMotionVector pMV0[2][2];    /* motion vector, represented using 1/2-pel units, 
+                                      * pMV0[blocky][blockx] (blocky = 0~1, blockx =0~1) 
+                                      */
+    OMXVCMotionVector pMVPred[2][2]; /* motion vector prediction, represented using 1/2-pel units, 
+                                      * pMVPred[blocky][blockx] (blocky = 0~1, blockx = 0~1) 
+                                      */
+    OMX_U8 pPredDir[2][2];           /* AC prediction direction: 
+                                      *   OMX_VC_NONE, OMX_VC_VERTICAL, OMX_VC_HORIZONTAL 
+                                      */
+} OMXVCM4P2MBInfo, *OMXVCM4P2MBInfoPtr;
+
+
+
+/**
+ * Function:  omxVCM4P2_FindMVpred   (6.2.3.1.1)
+ *
+ * Description:
+ * Predicts a motion vector for the current block using the procedure 
+ * specified in [ISO14496-2], subclause 7.6.5.  The resulting predicted MV is 
+ * returned in pDstMVPred. If the parameter pDstMVPredME if is not NULL then 
+ * the set of three MV candidates used for prediction is also returned, 
+ * otherwise pDstMVPredMEis NULL upon return. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcMVCurMB - pointer to the MV buffer associated with the current Y 
+ *            macroblock; a value of NULL indicates unavailability. 
+ *   pSrcCandMV1 - pointer to the MV buffer containing the 4 MVs associated 
+ *            with the MB located to the left of the current MB; set to NULL 
+ *            if there is no MB to the left. 
+ *   pSrcCandMV2 - pointer to the MV buffer containing the 4 MVs associated 
+ *            with the MB located above the current MB; set to NULL if there 
+ *            is no MB located above the current MB. 
+ *   pSrcCandMV3 - pointer to the MV buffer containing the 4 MVs associated 
+ *            with the MB located to the right and above the current MB; set 
+ *            to NULL if there is no MB located to the above-right. 
+ *   iBlk - the index of block in the current macroblock 
+ *   pDstMVPredME - MV candidate return buffer;  if set to NULL then 
+ *            prediction candidate MVs are not returned and pDstMVPredME will 
+ *            be NULL upon function return; if pDstMVPredME is non-NULL then it 
+ *            must point to a buffer containing sufficient space for three 
+ *            return MVs. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstMVPred - pointer to the predicted motion vector 
+ *   pDstMVPredME - if non-NULL upon input then pDstMVPredME  points upon 
+ *            return to a buffer containing the three motion vector candidates 
+ *            used for prediction as specified in [ISO14496-2], subclause 
+ *            7.6.5, otherwise if NULL upon input then pDstMVPredME is NULL 
+ *            upon output. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -    the pointer pDstMVPred is NULL 
+ *    -    the parameter iBlk does not fall into the range 0 <= iBlk<=3 
+ *
+ */
+OMXResult omxVCM4P2_FindMVpred (
+    const OMXVCMotionVector *pSrcMVCurMB,
+    const OMXVCMotionVector *pSrcCandMV1,
+    const OMXVCMotionVector *pSrcCandMV2,
+    const OMXVCMotionVector *pSrcCandMV3,
+    OMXVCMotionVector *pDstMVPred,
+    OMXVCMotionVector *pDstMVPredME,
+    OMX_INT iBlk
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_IDCT8x8blk   (6.2.3.2.1)
+ *
+ * Description:
+ * Computes a 2D inverse DCT for a single 8x8 block, as defined in 
+ * [ISO14496-2]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the start of the linearly arranged IDCT input buffer; 
+ *            must be aligned on a 16-byte boundary.  According to 
+ *            [ISO14496-2], the input coefficient values should lie within the 
+ *            range [-2048, 2047]. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the start of the linearly arranged IDCT output buffer; 
+ *            must be aligned on a 16-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    pSrc or pDst is NULL. 
+ *    -    pSrc or pDst is not 16-byte aligned. 
+ *
+ */
+OMXResult omxVCM4P2_IDCT8x8blk (
+    const OMX_S16 *pSrc,
+    OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_MEGetBufSize   (6.2.4.1.1)
+ *
+ * Description:
+ * Computes the size, in bytes, of the vendor-specific specification 
+ * structure for the following motion estimation functions: 
+ * BlockMatch_Integer_8x8, BlockMatch_Integer_16x16, and MotionEstimationMB. 
+ *
+ * Input Arguments:
+ *   
+ *   MEmode - motion estimation mode; available modes are defined by the 
+ *            enumerated type OMXVCM4P2MEMode 
+ *   pMEParams - motion estimation parameters 
+ *
+ * Output Arguments:
+ *   
+ *   pSize - pointer to the number of bytes required for the specification 
+ *            structure 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - one or more of the following is true: 
+ *    -    an invalid value was specified for the parameter MEmode 
+ *    -    a negative or zero value was specified for the 
+ *         parameter pMEParams->searchRange 
+ *
+ */
+OMXResult omxVCM4P2_MEGetBufSize (
+    OMXVCM4P2MEMode MEmode,
+    const OMXVCM4P2MEParams *pMEParams,
+    OMX_U32 *pSize
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_MEInit   (6.2.4.1.2)
+ *
+ * Description:
+ * Initializes the vendor-specific specification structure required for the 
+ * following motion estimation functions:  BlockMatch_Integer_8x8, 
+ * BlockMatch_Integer_16x16, and MotionEstimationMB. Memory for the 
+ * specification structure *pMESpec must be allocated prior to calling the 
+ * function, and should be aligned on a 4-byte boundary.  Following 
+ * initialization by this function, the vendor-specific structure *pMESpec 
+ * should contain an implementation-specific representation of all motion 
+ * estimation parameters received via the structure pMEParams, for example  
+ * rndVal, searchRange, etc.  The number of bytes required for the 
+ * specification structure can be determined using the function 
+ * omxVCM4P2_MEGetBufSize. 
+ *
+ * Input Arguments:
+ *   
+ *   MEmode - motion estimation mode; available modes are defined by the 
+ *            enumerated type OMXVCM4P2MEMode 
+ *   pMEParams - motion estimation parameters 
+ *   pMESpec - pointer to the uninitialized ME specification structure 
+ *
+ * Output Arguments:
+ *   
+ *   pMESpec - pointer to the initialized ME specification structure 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - one or more of the following is true: 
+ *    -    an invalid value was specified for the parameter MEmode 
+ *    -    a negative or zero value was specified for the 
+ *         parameter pMEParams->searchRange 
+ *
+ */
+OMXResult omxVCM4P2_MEInit (
+    OMXVCM4P2MEMode MEmode,
+    const OMXVCM4P2MEParams*pMEParams,
+    void *pMESpec
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_BlockMatch_Integer_16x16   (6.2.4.2.1)
+ *
+ * Description:
+ * Performs a 16x16 block search; estimates motion vector and associated 
+ * minimum SAD. Both the input and output motion vectors are represented using 
+ * half-pixel units, and therefore a shift left or right by 1 bit may be 
+ * required, respectively, to match the input or output MVs with other 
+ * functions that either generate output MVs or expect input MVs represented 
+ * using integer pixel units. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            MB that corresponds to the location of the current macroblock in 
+ *            the current plane. 
+ *   refWidth - width of the reference plane 
+ *   pRefRect - pointer to the valid reference plane rectangle; coordinates 
+ *            are specified relative to the image origin.  Rectangle 
+ *            boundaries may extend beyond image boundaries if the image has 
+ *            been padded.  For example, if padding extends 4 pixels beyond 
+ *            frame border, then the value for the left border could be set to 
+ *            -4. 
+ *   pSrcCurrBuf - pointer to the current block in the current macroblock 
+ *            buffer extracted from the original plane (linear array, 256 
+ *            entries); must be aligned on a 16-byte boundary.  The number of 
+ *            bytes between lines (step) is 16. 
+ *   pCurrPointPos - position of the current macroblock in the current plane 
+ *   pSrcPreMV - pointer to predicted motion vector; NULL indicates no 
+ *            predicted MV 
+ *   pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced 
+ *            by pSrcPreMV); may be set to NULL if unavailable. 
+ *   pMESpec - vendor-specific motion estimation specification structure; 
+ *            must have been allocated and then initialized using 
+ *            omxVCM4P2_MEInit prior to calling the block matching function. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstMV - pointer to estimated MV 
+ *   pDstSAD - pointer to minimum SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one of the following 
+ *              conditions is true: 
+ *    -    at least one of the following pointers is NULL: pSrcRefBuf, 
+ *              pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or 
+ *              pMESpec, or 
+ *    -    pSrcCurrBuf is not 16-byte aligned 
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Integer_16x16 (
+    const OMX_U8 *pSrcRefBuf,
+    OMX_INT refWidth,
+    const OMXRect *pRefRect,
+    const OMX_U8 *pSrcCurrBuf,
+    const OMXVCM4P2Coordinate *pCurrPointPos,
+    const OMXVCMotionVector*pSrcPreMV,
+    const OMX_INT *pSrcPreSAD,
+    void *pMESpec,
+    OMXVCMotionVector*pDstMV,
+    OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_BlockMatch_Integer_8x8   (6.2.4.2.2)
+ *
+ * Description:
+ * Performs an 8x8 block search; estimates motion vector and associated 
+ * minimum SAD.  Both the input and output motion vectors are represented 
+ * using half-pixel units, and therefore a shift left or right by 1 bit may be 
+ * required, respectively, to match the input or output MVs with other 
+ * functions that either generate output MVs or expect input MVs represented 
+ * using integer pixel units. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            block that corresponds to the location of the current 8x8 block 
+ *            in the current plane. 
+ *   refWidth - width of the reference plane 
+ *   pRefRect - pointer to the valid reference plane rectangle; coordinates 
+ *            are specified relative to the image origin.  Rectangle 
+ *            boundaries may extend beyond image boundaries if the image has 
+ *            been padded. 
+ *   pSrcCurrBuf - pointer to the current block in the current macroblock 
+ *            buffer extracted from the original plane (linear array, 128 
+ *            entries); must be aligned on an 8-byte boundary.  The number of 
+ *            bytes between lines (step) is 16 bytes. 
+ *   pCurrPointPos - position of the current block in the current plane 
+ *   pSrcPreMV - pointer to predicted motion vector; NULL indicates no 
+ *            predicted MV 
+ *   pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced 
+ *            by pSrcPreMV); may be set to NULL if unavailable. 
+ *   pMESpec - vendor-specific motion estimation specification structure; 
+ *            must have been allocated and then initialized using 
+ *            omxVCM4P2_MEInit prior to calling the block matching function. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstMV - pointer to estimated MV 
+ *   pDstSAD - pointer to minimum SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one of the following 
+ *              conditions is true: 
+ *    -    at least one of the following pointers is NULL: pSrcRefBuf, 
+ *              pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or 
+ *              pMESpec, or 
+ *    -    pSrcCurrBuf is not 8-byte aligned 
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Integer_8x8 (
+    const OMX_U8 *pSrcRefBuf,
+    OMX_INT refWidth,
+    const OMXRect *pRefRect,
+    const OMX_U8 *pSrcCurrBuf,
+    const OMXVCM4P2Coordinate *pCurrPointPos,
+    const OMXVCMotionVector *pSrcPreMV,
+    const OMX_INT *pSrcPreSAD,
+    void *pMESpec,
+    OMXVCMotionVector *pDstMV,
+    OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_BlockMatch_Half_16x16   (6.2.4.2.3)
+ *
+ * Description:
+ * Performs a 16x16 block match with half-pixel resolution.  Returns the 
+ * estimated motion vector and associated minimum SAD.  This function 
+ * estimates the half-pixel motion vector by interpolating the integer 
+ * resolution motion vector referenced by the input parameter pSrcDstMV, i.e., 
+ * the initial integer MV is generated externally.  The input parameters 
+ * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of 
+ * 16x16 integer search prior to calling BlockMatch_Half_16x16. The function 
+ * BlockMatch_Integer_16x16 may be used for integer motion estimation. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            macroblock that corresponds to the location of the current 
+ *            macroblock in the current plane. 
+ *   refWidth - width of the reference plane 
+ *   pRefRect - reference plane valid region rectangle 
+ *   pSrcCurrBuf - pointer to the current block in the current macroblock 
+ *            buffer extracted from the original plane (linear array, 256 
+ *            entries); must be aligned on a 16-byte boundary.  The number of 
+ *            bytes between lines (step) is 16. 
+ *   pSearchPointRefPos - position of the starting point for half pixel 
+ *            search (specified in terms of integer pixel units) in the 
+ *            reference plane, i.e., the reference position pointed to by the 
+ *            predicted motion vector. 
+ *   rndVal - rounding control parameter: 0 - disabled; 1 - enabled. 
+ *   pSrcDstMV - pointer to the initial MV estimate; typically generated 
+ *            during a prior 16X16 integer search; specified in terms of 
+ *            half-pixel units. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstMV - pointer to estimated MV 
+ *   pDstSAD - pointer to minimum SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one of the following 
+ *              conditions is true: 
+ *    -    at least one of the following pointers is NULL: pSrcRefBuf, 
+ *         pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV.
+ *    -    pSrcCurrBuf is not 16-byte aligned, or 
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Half_16x16 (
+    const OMX_U8 *pSrcRefBuf,
+    OMX_INT refWidth,
+    const OMXRect *pRefRect,
+    const OMX_U8 *pSrcCurrBuf,
+    const OMXVCM4P2Coordinate *pSearchPointRefPos,
+    OMX_INT rndVal,
+    OMXVCMotionVector *pSrcDstMV,
+    OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_BlockMatch_Half_8x8   (6.2.4.2.4)
+ *
+ * Description:
+ * Performs an 8x8 block match with half-pixel resolution. Returns the 
+ * estimated motion vector and associated minimum SAD.  This function 
+ * estimates the half-pixel motion vector by interpolating the integer 
+ * resolution motion vector referenced by the input parameter pSrcDstMV, i.e., 
+ * the initial integer MV is generated externally.  The input parameters 
+ * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of 
+ * 8x8 integer search prior to calling BlockMatch_Half_8x8. The function 
+ * BlockMatch_Integer_8x8 may be used for integer motion estimation. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            block that corresponds to the location of the current 8x8 block 
+ *            in the current plane. 
+ *   refWidth - width of the reference plane 
+ *   pRefRect - reference plane valid region rectangle 
+ *   pSrcCurrBuf - pointer to the current block in the current macroblock 
+ *            buffer extracted from the original plane (linear array, 128 
+ *            entries); must be aligned on a 8-byte boundary.  The number of 
+ *            bytes between lines (step) is 16. 
+ *   pSearchPointRefPos - position of the starting point for half pixel 
+ *            search (specified in terms of integer pixel units) in the 
+ *            reference plane. 
+ *   rndVal - rounding control parameter: 0 - disabled; 1 - enabled. 
+ *   pSrcDstMV - pointer to the initial MV estimate; typically generated 
+ *            during a prior 8x8 integer search, specified in terms of 
+ *            half-pixel units. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstMV - pointer to estimated MV 
+ *   pDstSAD - pointer to minimum SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one of the following 
+ *              conditions is true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcRefBuf, pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV
+ *    -    pSrcCurrBuf is not 8-byte aligned 
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Half_8x8 (
+    const OMX_U8 *pSrcRefBuf,
+    OMX_INT refWidth,
+    const OMXRect *pRefRect,
+    const OMX_U8 *pSrcCurrBuf,
+    const OMXVCM4P2Coordinate *pSearchPointRefPos,
+    OMX_INT rndVal,
+    OMXVCMotionVector *pSrcDstMV,
+    OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_MotionEstimationMB   (6.2.4.3.1)
+ *
+ * Description:
+ * Performs motion search for a 16x16 macroblock.  Selects best motion search 
+ * strategy from among inter-1MV, inter-4MV, and intra modes.  Supports 
+ * integer and half pixel resolution. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcCurrBuf - pointer to the top-left corner of the current MB in the 
+ *            original picture plane; must be aligned on a 16-byte boundary.  
+ *            The function does not expect source data outside the region 
+ *            bounded by the MB to be available; for example it is not 
+ *            necessary for the caller to guarantee the availability of 
+ *            pSrcCurrBuf[-SrcCurrStep], i.e., the row of pixels above the MB 
+ *            to be processed. 
+ *   srcCurrStep - width of the original picture plane, in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            plane location corresponding to the location of the current 
+ *            macroblock in the current plane; must be aligned on a 16-byte 
+ *            boundary. 
+ *   srcRefStep - width of the reference picture plane, in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   pRefRect - reference plane valid region rectangle, specified relative to 
+ *            the image origin 
+ *   pCurrPointPos - position of the current macroblock in the current plane 
+ *   pMESpec - pointer to the vendor-specific motion estimation specification 
+ *            structure; must be allocated and then initialized using 
+ *            omxVCM4P2_MEInit prior to calling this function. 
+ *   pMBInfo - array, of dimension four, containing pointers to information 
+ *            associated with four nearby MBs: 
+ *            -   pMBInfo[0] - pointer to left MB information 
+ *            -   pMBInfo[1] - pointer to top MB information 
+ *            -   pMBInfo[2] - pointer to top-left MB information 
+ *            -   pMBInfo[3] - pointer to top-right MB information 
+ *            Any pointer in the array may be set equal to NULL if the 
+ *            corresponding MB doesn't exist.  For each MB, the following structure 
+ *            members are used:    
+ *            -   mbType - macroblock type, either OMX_VC_INTRA, OMX_VC_INTER, or 
+ *                OMX_VC_INTER4V 
+ *            -   pMV0[2][2] - estimated motion vectors; represented 
+ *                in 1/2 pixel units 
+ *            -   sliceID - number of the slice to which the MB belongs 
+ *   pSrcDstMBCurr - pointer to information structure for the current MB.  
+ *            The following entries should be set prior to calling the 
+ *            function: sliceID - the number of the slice the to which the 
+ *            current MB belongs.  The structure elements cbpy and cbpc are 
+ *            ignored. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstMBCurr - pointer to updated information structure for the current 
+ *            MB after MB-level motion estimation has been completed.  The 
+ *            following structure members are updated by the ME function:   
+ *              -  mbType - macroblock type: OMX_VC_INTRA, OMX_VC_INTER, or 
+ *                 OMX_VC_INTER4V. 
+ *              -  pMV0[2][2] - estimated motion vectors; represented in 
+ *                 terms of 1/2 pel units. 
+ *              -  pMVPred[2][2] - predicted motion vectors; represented 
+ *                 in terms of 1/2 pel units. 
+ *            The structure members cbpy and cbpc are not updated by the function. 
+ *   pDstSAD - pointer to the minimum SAD for INTER1V, or sum of minimum SADs 
+ *            for INTER4V 
+ *   pDstBlockSAD - pointer to an array of SAD values for each of the four 
+ *            8x8 luma blocks in the MB.  The block SADs are in scan order for 
+ *            each MB. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one or more of the 
+ *              following conditions is true: 
+ *    -    at least one of the following pointers is NULL: pSrcCurrBuf, 
+ *              pSrcRefBuf, pRefRect, pCurrPointPos, pMBInter, pMBIntra, 
+ *              pSrcDstMBCurr, or pDstSAD. 
+ *
+ */
+OMXResult omxVCM4P2_MotionEstimationMB (
+    const OMX_U8 *pSrcCurrBuf,
+    OMX_S32 srcCurrStep,
+    const OMX_U8 *pSrcRefBuf,
+    OMX_S32 srcRefStep,
+    const OMXRect*pRefRect,
+    const OMXVCM4P2Coordinate *pCurrPointPos,
+    void *pMESpec,
+    const OMXVCM4P2MBInfoPtr *pMBInfo,
+    OMXVCM4P2MBInfo *pSrcDstMBCurr,
+    OMX_U16 *pDstSAD,
+    OMX_U16 *pDstBlockSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DCT8x8blk   (6.2.4.4.1)
+ *
+ * Description:
+ * Computes a 2D forward DCT for a single 8x8 block, as defined in 
+ * [ISO14496-2]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the start of the linearly arranged input buffer; must 
+ *            be aligned on a 16-byte boundary.  Input values (pixel 
+ *            intensities) are valid in the range [-255,255]. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the start of the linearly arranged output buffer; must 
+ *            be aligned on a 16-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, returned if:
+ *    -    pSrc or pDst is NULL. 
+ *    -    pSrc or pDst is not 16-byte aligned. 
+ *
+ */
+OMXResult omxVCM4P2_DCT8x8blk (
+    const OMX_S16 *pSrc,
+    OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_QuantIntra_I   (6.2.4.4.2)
+ *
+ * Description:
+ * Performs quantization on intra block coefficients. This function supports 
+ * bits_per_pixel == 8. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input intra block coefficients; must be aligned 
+ *            on a 16-byte boundary. 
+ *   QP - quantization parameter (quantizer_scale). 
+ *   blockIndex - block index indicating the component type and position, 
+ *            valid in the range 0 to 5, as defined in [ISO14496-2], subclause 
+ *            6.1.3.8. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; shortVideoHeader==1 selects linear intra DC 
+ *            mode, and shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the output (quantized) interblock coefficients.  
+ *            When shortVideoHeader==1, AC coefficients are saturated on the 
+ *            interval [-127, 127], and DC coefficients are saturated on the 
+ *            interval [1, 254].  When shortVideoHeader==0, AC coefficients 
+ *            are saturated on the interval [-2047, 2047]. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    pSrcDst is NULL. 
+ *    -    blockIndex < 0 or blockIndex >= 10 
+ *    -    QP <= 0 or QP >= 32. 
+ *
+ */
+OMXResult omxVCM4P2_QuantIntra_I (
+    OMX_S16 *pSrcDst,
+    OMX_U8 QP,
+    OMX_INT blockIndex,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_QuantInter_I   (6.2.4.4.3)
+ *
+ * Description:
+ * Performs quantization on an inter coefficient block; supports 
+ * bits_per_pixel == 8. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input inter block coefficients; must be aligned 
+ *            on a 16-byte boundary. 
+ *   QP - quantization parameter (quantizer_scale) 
+ *   shortVideoHeader - binary flag indicating presence of short_video_header; 
+ *            shortVideoHeader==1 selects linear intra DC mode, and 
+ *            shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the output (quantized) interblock coefficients.  
+ *            When shortVideoHeader==1, AC coefficients are saturated on the 
+ *            interval [-127, 127], and DC coefficients are saturated on the 
+ *            interval [1, 254].  When shortVideoHeader==0, AC coefficients 
+ *            are saturated on the interval [-2047, 2047]. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    pSrcDst is NULL. 
+ *    -    QP <= 0 or QP >= 32. 
+ *
+ */
+OMXResult omxVCM4P2_QuantInter_I (
+    OMX_S16 *pSrcDst,
+    OMX_U8 QP,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_TransRecBlockCoef_intra   (6.2.4.4.4)
+ *
+ * Description:
+ * Quantizes the DCT coefficients, implements intra block AC/DC coefficient 
+ * prediction, and reconstructs the current intra block texture for prediction 
+ * on the next frame.  Quantized row and column coefficients are returned in 
+ * the updated coefficient buffers. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the pixels of current intra block; must be aligned on 
+ *            an 8-byte boundary. 
+ *   pPredBufRow - pointer to the coefficient row buffer containing 
+ *            ((num_mb_per_row * 2 + 1) * 8) elements of type OMX_S16. 
+ *            Coefficients are organized into blocks of eight as described 
+ *            below (Internal Prediction Coefficient Update Procedures).  The 
+ *            DC coefficient is first, and the remaining buffer locations 
+ *            contain the quantized AC coefficients. Each group of eight row 
+ *            buffer elements combined with one element eight elements ahead 
+ *            contains the coefficient predictors of the neighboring block 
+ *            that is spatially above or to the left of the block currently to 
+ *            be decoded. A negative-valued DC coefficient indicates that this 
+ *            neighboring block is not INTRA-coded or out of bounds, and 
+ *            therefore the AC and DC coefficients are invalid.  Pointer must 
+ *            be aligned on an 8-byte boundary. 
+ *   pPredBufCol - pointer to the prediction coefficient column buffer 
+ *            containing 16 elements of type OMX_S16. Coefficients are 
+ *            organized as described in section 6.2.2.5.  Pointer must be 
+ *            aligned on an 8-byte boundary. 
+ *   pSumErr - pointer to a flag indicating whether or not AC prediction is 
+ *            required; AC prediction is enabled if *pSumErr >=0, but the 
+ *            value is not used for coefficient prediction, i.e., the sum of 
+ *            absolute differences starts from 0 for each call to this 
+ *            function.  Otherwise AC prediction is disabled if *pSumErr < 0 . 
+ *   blockIndex - block index indicating the component type and position, as 
+ *            defined in [ISO14496-2], subclause 6.1.3.8. 
+ *   curQp - quantization parameter of the macroblock to which the current 
+ *            block belongs 
+ *   pQpBuf - pointer to a 2-element quantization parameter buffer; pQpBuf[0] 
+ *            contains the quantization parameter associated with the 8x8 
+ *            block left of the current block (QPa), and pQpBuf[1] contains 
+ *            the quantization parameter associated with the 8x8 block above 
+ *            the current block (QPc).  In the event that the corresponding 
+ *            block is outside of the VOP bound, the Qp value will not affect 
+ *            the intra prediction process, as described in [ISO14496-2], 
+ *            sub-clause 7.4.3.3,  Adaptive AC Coefficient Prediction.  
+ *   srcStep - width of the source buffer; must be a multiple of 8. 
+ *   dstStep - width of the reconstructed destination buffer; must be a 
+ *            multiple of 16. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; shortVideoHeader==1 selects linear intra DC 
+ *            mode, and shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the quantized DCT coefficient buffer; pDst[0] contains 
+ *            the predicted DC coefficient; the remaining entries contain the 
+ *            quantized AC coefficients (without prediction).  The pointer 
+ *            pDstmust be aligned on a 16-byte boundary. 
+ *   pRec - pointer to the reconstructed texture; must be aligned on an 
+ *            8-byte boundary. 
+ *   pPredBufRow - pointer to the updated coefficient row buffer 
+ *   pPredBufCol - pointer to the updated coefficient column buffer 
+ *   pPreACPredict - if prediction is enabled, the parameter points to the 
+ *            start of the buffer containing the coefficient differences for 
+ *            VLC encoding. The entry pPreACPredict[0]indicates prediction 
+ *            direction for the current block and takes one of the following 
+ *            values: OMX_VC_NONE (prediction disabled), OMX_VC_HORIZONTAL, or 
+ *            OMX_VC_VERTICAL.  The entries 
+ *            pPreACPredict[1]-pPreACPredict[7]contain predicted AC 
+ *            coefficients.  If prediction is disabled (*pSumErr<0) then the 
+ *            contents of this buffer are undefined upon return from the 
+ *            function 
+ *   pSumErr - pointer to the value of the accumulated AC coefficient errors, 
+ *            i.e., sum of the absolute differences between predicted and 
+ *            unpredicted AC coefficients 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - Bad arguments:
+ *    -    At least one of the following pointers is NULL: pSrc, pDst, pRec, 
+ *         pCoefBufRow, pCoefBufCol, pQpBuf, pPreACPredict, pSumErr. 
+ *    -    blockIndex < 0 or blockIndex >= 10; 
+ *    -    curQP <= 0 or curQP >= 32. 
+ *    -    srcStep, or dstStep <= 0 or not a multiple of 8. 
+ *    -    pDst is not 16-byte aligned: . 
+ *    -    At least one of the following pointers is not 8-byte aligned: 
+ *         pSrc, pRec.  
+ *
+ *  Note: The coefficient buffers must be updated in accordance with the 
+ *        update procedures defined in section in 6.2.2. 
+ *
+ */
+OMXResult omxVCM4P2_TransRecBlockCoef_intra (
+    const OMX_U8 *pSrc,
+    OMX_S16 *pDst,
+    OMX_U8 *pRec,
+    OMX_S16 *pPredBufRow,
+    OMX_S16 *pPredBufCol,
+    OMX_S16 *pPreACPredict,
+    OMX_INT *pSumErr,
+    OMX_INT blockIndex,
+    OMX_U8 curQp,
+    const OMX_U8 *pQpBuf,
+    OMX_INT srcStep,
+    OMX_INT dstStep,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_TransRecBlockCoef_inter   (6.2.4.4.5)
+ *
+ * Description:
+ * Implements DCT, and quantizes the DCT coefficients of the inter block 
+ * while reconstructing the texture residual. There is no boundary check for 
+ * the bit stream buffer. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc -pointer to the residuals to be encoded; must be aligned on an 
+ *            16-byte boundary. 
+ *   QP - quantization parameter. 
+ *   shortVideoHeader - binary flag indicating presence of short_video_header; 
+ *                      shortVideoHeader==1 selects linear intra DC mode, and 
+ *                      shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the quantized DCT coefficients buffer; must be aligned 
+ *            on a 16-byte boundary. 
+ *   pRec - pointer to the reconstructed texture residuals; must be aligned 
+ *            on a 16-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    At least one of the following pointers is either NULL or 
+ *         not 16-byte aligned: 
+ *            - pSrc 
+ *            - pDst
+ *            - pRec
+ *    -    QP <= 0 or QP >= 32. 
+ *
+ */
+OMXResult omxVCM4P2_TransRecBlockCoef_inter (
+    const OMX_S16 *pSrc,
+    OMX_S16 *pDst,
+    OMX_S16 *pRec,
+    OMX_U8 QP,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_EncodeVLCZigzag_IntraDCVLC   (6.2.4.5.2)
+ *
+ * Description:
+ * Performs zigzag scan and VLC encoding of AC and DC coefficients for one 
+ * intra block.  Two versions of the function (DCVLC and ACVLC) are provided 
+ * in order to support the two different methods of processing DC 
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, "Intra DC 
+ * Coefficient Decoding for the Case of Switched VLC Encoding".  
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - double pointer to the current byte in the bitstream 
+ *   pBitOffset - pointer to the bit position in the byte pointed by 
+ *            *ppBitStream. Valid within 0 to 7. 
+ *   pQDctBlkCoef - pointer to the quantized DCT coefficient 
+ *   predDir - AC prediction direction, which is used to decide the zigzag 
+ *            scan pattern; takes one of the following values: 
+ *            -  OMX_VC_NONE - AC prediction not used.  
+ *                             Performs classical zigzag scan. 
+ *            -  OMX_VC_HORIZONTAL - Horizontal prediction.  
+ *                             Performs alternate-vertical zigzag scan. 
+ *            -  OMX_VC_VERTICAL - Vertical prediction.  
+ *                             Performs alternate-horizontal zigzag scan. 
+ *   pattern - block pattern which is used to decide whether this block is 
+ *            encoded 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *   videoComp - video component type (luminance, chrominance) of the current 
+ *            block 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is encoded, so 
+ *            that it points to the current byte in the bit stream buffer. 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - Bad arguments:
+ *    -    At least one of the following pointers is NULL: ppBitStream, 
+ *              *ppBitStream, pBitOffset, pQDctBlkCoef. 
+ *    -   *pBitOffset < 0, or *pBitOffset >7. 
+ *    -    PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or 
+ *         OMX_VC_VERTICAL. 
+ *    -    VideoComp is not one component of enum OMXVCM4P2VideoComponent. 
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_IntraDCVLC (
+    OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    const OMX_S16 *pQDctBlkCoef,
+    OMX_U8 predDir,
+    OMX_U8 pattern,
+    OMX_INT shortVideoHeader,
+    OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_EncodeVLCZigzag_IntraACVLC   (6.2.4.5.2)
+ *
+ * Description:
+ * Performs zigzag scan and VLC encoding of AC and DC coefficients for one 
+ * intra block.  Two versions of the function (DCVLC and ACVLC) are provided 
+ * in order to support the two different methods of processing DC 
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4,  Intra DC 
+ * Coefficient Decoding for the Case of Switched VLC Encoding.  
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - double pointer to the current byte in the bitstream 
+ *   pBitOffset - pointer to the bit position in the byte pointed by 
+ *            *ppBitStream. Valid within 0 to 7. 
+ *   pQDctBlkCoef - pointer to the quantized DCT coefficient 
+ *   predDir - AC prediction direction, which is used to decide the zigzag 
+ *            scan pattern; takes one of the following values: 
+ *            -  OMX_VC_NONE - AC prediction not used.  
+ *                             Performs classical zigzag scan. 
+ *            -  OMX_VC_HORIZONTAL - Horizontal prediction.  
+ *                             Performs alternate-vertical zigzag scan. 
+ *            -  OMX_VC_VERTICAL - Vertical prediction.  
+ *                             Performs alternate-horizontal zigzag scan. 
+ *   pattern - block pattern which is used to decide whether this block is 
+ *            encoded 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is encoded, so 
+ *            that it points to the current byte in the bit stream buffer. 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - Bad arguments:
+ *    -    At least one of the following pointers is NULL: ppBitStream, 
+ *              *ppBitStream, pBitOffset, pQDctBlkCoef. 
+ *    -   *pBitOffset < 0, or *pBitOffset >7. 
+ *    -    PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or 
+ *         OMX_VC_VERTICAL. 
+ *    -    VideoComp is not one component of enum OMXVCM4P2VideoComponent. 
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_IntraACVLC (
+    OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    const OMX_S16 *pQDctBlkCoef,
+    OMX_U8 predDir,
+    OMX_U8 pattern,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_EncodeVLCZigzag_Inter   (6.2.4.5.3)
+ *
+ * Description:
+ * Performs classical zigzag scanning and VLC encoding for one inter block. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the bit 
+ *            stream 
+ *   pBitOffset - pointer to the bit position in the byte pointed by 
+ *            *ppBitStream. Valid within 0 to 7 
+ *   pQDctBlkCoef - pointer to the quantized DCT coefficient 
+ *   pattern - block pattern which is used to decide whether this block is 
+ *            encoded 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is encoded so that 
+ *            it points to the current byte in the bit stream buffer. 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - Bad arguments 
+ *    -    At least one of the pointers: is NULL: ppBitStream, *ppBitStream, 
+ *              pBitOffset, pQDctBlkCoef 
+ *    -   *pBitOffset < 0, or *pBitOffset >7. 
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_Inter (
+    OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    const OMX_S16 *pQDctBlkCoef,
+    OMX_U8 pattern,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_EncodeMV   (6.2.4.5.4)
+ *
+ * Description:
+ * Predicts a motion vector for the current macroblock, encodes the 
+ * difference, and writes the output to the stream buffer. The input MVs 
+ * pMVCurMB, pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB should lie 
+ * within the ranges associated with the input parameter fcodeForward, as 
+ * described in [ISO14496-2], subclause 7.6.3.  This function provides a 
+ * superset of the functionality associated with the function 
+ * omxVCM4P2_FindMVpred. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - double pointer to the current byte in the bitstream buffer 
+ *   pBitOffset - index of the first free (next available) bit in the stream 
+ *            buffer referenced by *ppBitStream, valid in the range 0 to 7. 
+ *   pMVCurMB - pointer to the current macroblock motion vector; a value of 
+ *            NULL indicates unavailability. 
+ *   pSrcMVLeftMB - pointer to the source left macroblock motion vector; a 
+ *            value of  NULLindicates unavailability. 
+ *   pSrcMVUpperMB - pointer to source upper macroblock motion vector; a 
+ *            value of NULL indicates unavailability. 
+ *   pSrcMVUpperRightMB - pointer to source upper right MB motion vector; a 
+ *            value of NULL indicates unavailability. 
+ *   fcodeForward - an integer with values from 1 to 7; used in encoding 
+ *            motion vectors related to search range, as described in 
+ *            [ISO14496-2], subclause 7.6.3. 
+ *   MBType - macro block type, valid in the range 0 to 5 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - updated pointer to the current byte in the bit stream 
+ *            buffer 
+ *   pBitOffset - updated index of the next available bit position in stream 
+ *            buffer referenced by *ppBitStream 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments 
+ *    -    At least one of the following pointers is NULL: ppBitStream, 
+ *              *ppBitStream, pBitOffset, pMVCurMB 
+ *    -    *pBitOffset < 0, or *pBitOffset >7. 
+ *    -    fcodeForward <= 0, or fcodeForward > 7, or MBType < 0. 
+ *
+ */
+OMXResult omxVCM4P2_EncodeMV (
+    OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    const OMXVCMotionVector *pMVCurMB,
+    const OMXVCMotionVector*pSrcMVLeftMB,
+    const OMXVCMotionVector *pSrcMVUpperMB,
+    const OMXVCMotionVector *pSrcMVUpperRightMB,
+    OMX_INT fcodeForward,
+    OMXVCM4P2MacroblockType MBType
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodePadMV_PVOP   (6.2.5.1.1)
+ *
+ * Description:
+ * Decodes and pads the four motion vectors associated with a non-intra P-VOP 
+ * macroblock.  For macroblocks of type OMX_VC_INTER4V, the output MV is 
+ * padded as specified in [ISO14496-2], subclause 7.6.1.6. Otherwise, for 
+ * macroblocks of types other than OMX_VC_INTER4V, the decoded MV is copied to 
+ * all four output MV buffer entries. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the bit 
+ *            stream buffer 
+ *   pBitOffset - pointer to the bit position in the byte pointed to by 
+ *            *ppBitStream. *pBitOffset is valid within [0-7]. 
+ *   pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB - pointers to the 
+ *            motion vector buffers of the macroblocks specially at the left, 
+ *            upper, and upper-right side of the current macroblock, 
+ *            respectively; a value of NULL indicates unavailability.  Note: 
+ *            Any neighborhood macroblock outside the current VOP or video 
+ *            packet or outside the current GOB (when short_video_header is 
+ *             1 ) for which gob_header_empty is  0  is treated as 
+ *            transparent, according to [ISO14496-2], subclause 7.6.5. 
+ *   fcodeForward - a code equal to vop_fcode_forward in MPEG-4 bit stream 
+ *            syntax 
+ *   MBType - the type of the current macroblock. If MBType is not equal to 
+ *            OMX_VC_INTER4V, the destination motion vector buffer is still 
+ *            filled with the same decoded vector. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded, so 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream 
+ *   pDstMVCurMB - pointer to the motion vector buffer for the current 
+ *            macroblock; contains four decoded motion vectors 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    At least one of the following pointers is NULL: 
+ *         ppBitStream, *ppBitStream, pBitOffset, pDstMVCurMB 
+ *    -    *pBitOffset exceeds [0,7]
+ *    -    fcodeForward exceeds (0,7]
+ *    -    MBType less than zero
+ *    -    motion vector buffer is not 4-byte aligned. 
+ *    OMX_Sts_Err - status error 
+ *
+ */
+OMXResult omxVCM4P2_DecodePadMV_PVOP (
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    OMXVCMotionVector *pSrcMVLeftMB,
+    OMXVCMotionVector*pSrcMVUpperMB,
+    OMXVCMotionVector *pSrcMVUpperRightMB,
+    OMXVCMotionVector*pDstMVCurMB,
+    OMX_INT fcodeForward,
+    OMXVCM4P2MacroblockType MBType
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodeVLCZigzag_IntraDCVLC   (6.2.5.2.2)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients 
+ * for one intra block.  Two versions of the function (DCVLC and ACVLC) are 
+ * provided in order to support the two different methods of processing DC 
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4,  Intra DC 
+ * Coefficient Decoding for the Case of Switched VLC Encoding.  
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the 
+ *            bitstream buffer 
+ *   pBitOffset - pointer to the bit position in the current byte referenced 
+ *            by *ppBitStream.  The parameter *pBitOffset is valid in the 
+ *            range [0-7]. 
+ *            Bit Position in one byte:  |Most      Least| 
+ *                    *pBitOffset        |0 1 2 3 4 5 6 7| 
+ *   predDir - AC prediction direction; used to select the zigzag scan 
+ *            pattern; takes one of the following values: 
+ *            -  OMX_VC_NONE - AC prediction not used; 
+ *                             performs classical zigzag scan. 
+ *            -  OMX_VC_HORIZONTAL - Horizontal prediction; 
+ *                             performs alternate-vertical zigzag scan; 
+ *            -  OMX_VC_VERTICAL - Vertical prediction; 
+ *                             performs alternate-horizontal zigzag scan. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *   videoComp - video component type (luminance or chrominance) of the 
+ *            current block 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded such 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated such that it points to the current 
+ *            bit position in the byte pointed by *ppBitStream 
+ *   pDst - pointer to the coefficient buffer of current block; must be 
+ *            4-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, if:
+ *    -    At least one of the following pointers is NULL: 
+ *         ppBitStream, *ppBitStream, pBitOffset, pDst
+ *    -    *pBitOffset exceeds [0,7]
+ *    -    preDir exceeds [0,2]
+ *    -    pDst is not 4-byte aligned 
+ *    OMX_Sts_Err - if:
+ *    -    In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 
+ *    -    At least one of mark bits equals zero 
+ *    -    Illegal stream encountered; code cannot be located in VLC table 
+ *    -    Forbidden code encountered in the VLC FLC table. 
+ *    -    The number of coefficients is greater than 64 
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_IntraDCVLC (
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    OMX_S16 *pDst,
+    OMX_U8 predDir,
+    OMX_INT shortVideoHeader,
+    OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodeVLCZigzag_IntraACVLC   (6.2.5.2.2)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients 
+ * for one intra block.  Two versions of the function (DCVLC and ACVLC) are 
+ * provided in order to support the two different methods of processing DC 
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4,  Intra DC 
+ * Coefficient Decoding for the Case of Switched VLC Encoding.  
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the 
+ *            bitstream buffer 
+ *   pBitOffset - pointer to the bit position in the current byte referenced 
+ *            by *ppBitStream.  The parameter *pBitOffset is valid in the 
+ *            range [0-7]. Bit Position in one byte:  |Most Least| *pBitOffset 
+ *            |0 1 2 3 4 5 6 7| 
+ *   predDir - AC prediction direction; used to select the zigzag scan 
+ *            pattern; takes one of the following values: OMX_VC_NONE - AC 
+ *            prediction not used; performs classical zigzag scan. 
+ *            OMX_VC_HORIZONTAL - Horizontal prediction; performs 
+ *            alternate-vertical zigzag scan; OMX_VC_VERTICAL - Vertical 
+ *            prediction; performs alternate-horizontal zigzag scan. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *   videoComp - video component type (luminance or chrominance) of the 
+ *            current block 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded such 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated such that it points to the current 
+ *            bit position in the byte pointed by *ppBitStream 
+ *   pDst - pointer to the coefficient buffer of current block; must be 
+ *            4-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments At least one of the following 
+ *              pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, 
+ *              or At least one of the following conditions is true: 
+ *              *pBitOffset exceeds [0,7], preDir exceeds [0,2], or pDst is 
+ *              not 4-byte aligned 
+ *    OMX_Sts_Err In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 At least one of 
+ *              mark bits equals zero Illegal stream encountered; code cannot 
+ *              be located in VLC table Forbidden code encountered in the VLC 
+ *              FLC table The number of coefficients is greater than 64 
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_IntraACVLC (
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    OMX_S16 *pDst,
+    OMX_U8 predDir,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodeVLCZigzag_Inter   (6.2.5.2.3)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan for one inter-coded block. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - double pointer to the current byte in the stream buffer 
+ *   pBitOffset - pointer to the next available bit in the current stream 
+ *            byte referenced by *ppBitStream. The parameter *pBitOffset is 
+ *            valid within the range [0-7]. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded such 
+ *            that it points to the current byte in the stream buffer 
+ *   pBitOffset - *pBitOffset is updated after decoding such that it points 
+ *            to the next available bit in the stream byte referenced by 
+ *            *ppBitStream 
+ *   pDst - pointer to the coefficient buffer of current block; must be 
+ *            4-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    At least one of the following pointers is NULL: 
+ *         ppBitStream, *ppBitStream, pBitOffset, pDst
+ *    -    pDst is not 4-byte aligned
+ *    -   *pBitOffset exceeds [0,7]
+ *    OMX_Sts_Err - status error, if:
+ *    -    At least one mark bit is equal to zero 
+ *    -    Encountered an illegal stream code that cannot be found in the VLC table 
+ *    -    Encountered an illegal code in the VLC FLC table 
+ *    -    The number of coefficients is greater than 64 
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_Inter (
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    OMX_S16 *pDst,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_QuantInvIntra_I   (6.2.5.3.2)
+ *
+ * Description:
+ * Performs the second inverse quantization mode on an intra/inter coded 
+ * block. Supports bits_per_pixel = 8. The output coefficients are clipped to 
+ * the range [-2048, 2047]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input (quantized) intra/inter block; must be 
+ *            aligned on a 16-byte boundary. 
+ *   QP - quantization parameter (quantizer_scale) 
+ *   videoComp - video component type of the current block. Takes one of the 
+ *            following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra 
+ *            version only). 
+ *   shortVideoHeader - binary flag indicating presence of short_video_header 
+ *            (intra version only). 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the output (dequantized) intra/inter block 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; one or more of the following is 
+ *              true: 
+ *    -    pSrcDst is NULL 
+ *    -    QP <= 0 or QP >=31 
+ *    -    videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE. 
+ *
+ */
+OMXResult omxVCM4P2_QuantInvIntra_I (
+    OMX_S16 *pSrcDst,
+    OMX_INT QP,
+    OMXVCM4P2VideoComponent videoComp,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_QuantInvInter_I   (6.2.5.3.2)
+ *
+ * Description:
+ * Performs the second inverse quantization mode on an intra/inter coded 
+ * block. Supports bits_per_pixel = 8. The output coefficients are clipped to 
+ * the range [-2048, 2047]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input (quantized) intra/inter block; must be 
+ *            aligned on a 16-byte boundary. 
+ *   QP - quantization parameter (quantizer_scale) 
+ *   videoComp - video component type of the current block. Takes one of the 
+ *            following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra 
+ *            version only). 
+ *   shortVideoHeader - binary flag indicating presence of short_video_header 
+ *            (intra version only). 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the output (dequantized) intra/inter block 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; one or more of the following is 
+ *              true: 
+ *    -    pSrcDst is NULL 
+ *    -    QP <= 0 or QP >=31 
+ *    -    videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE. 
+ *
+ */
+OMXResult omxVCM4P2_QuantInvInter_I (
+    OMX_S16 *pSrcDst,
+    OMX_INT QP
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodeBlockCoef_Intra   (6.2.5.4.1)
+ *
+ * Description:
+ * Decodes the INTRA block coefficients. Inverse quantization, inversely 
+ * zigzag positioning, and IDCT, with appropriate clipping on each step, are 
+ * performed on the coefficients. The results are then placed in the output 
+ * frame/plane on a pixel basis.  Note: This function will be used only when 
+ * at least one non-zero AC coefficient of current block exists in the bit 
+ * stream. The DC only condition will be handled in another function. 
+ *
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the bit 
+ *            stream buffer. There is no boundary check for the bit stream 
+ *            buffer. 
+ *   pBitOffset - pointer to the bit position in the byte pointed to by 
+ *            *ppBitStream. *pBitOffset is valid within [0-7]. 
+ *   step - width of the destination plane 
+ *   pCoefBufRow - pointer to the coefficient row buffer; must be aligned on 
+ *            an 8-byte boundary. 
+ *   pCoefBufCol - pointer to the coefficient column buffer; must be aligned 
+ *            on an 8-byte boundary. 
+ *   curQP - quantization parameter of the macroblock which the current block 
+ *            belongs to 
+ *   pQPBuf - pointer to the quantization parameter buffer 
+ *   blockIndex - block index indicating the component type and position as 
+ *            defined in [ISO14496-2], subclause 6.1.3.8, Figure 6-5. 
+ *   intraDCVLC - a code determined by intra_dc_vlc_thr and QP. This allows a 
+ *            mechanism to switch between two VLC for coding of Intra DC 
+ *            coefficients as per [ISO14496-2], Table 6-21. 
+ *   ACPredFlag - a flag equal to ac_pred_flag (of luminance) indicating if 
+ *            the ac coefficients of the first row or first column are 
+ *            differentially coded for intra coded macroblock. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; shortVideoHeader==1 selects linear intra DC 
+ *            mode, and shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded, so 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream 
+ *   pDst - pointer to the block in the destination plane; must be aligned on 
+ *            an 8-byte boundary. 
+ *   pCoefBufRow - pointer to the updated coefficient row buffer. 
+ *   pCoefBufCol - pointer to the updated coefficient column buffer  Note: 
+ *            The coefficient buffers must be updated in accordance with the 
+ *            update procedure defined in section 6.2.2. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, if:
+ *    -    At least one of the following pointers is NULL: 
+ *         ppBitStream, *ppBitStream, pBitOffset, pCoefBufRow, pCoefBufCol, 
+ *         pQPBuf, pDst. 
+ *    -    *pBitOffset exceeds [0,7] 
+ *    -    curQP exceeds (1, 31)
+ *    -    blockIndex exceeds [0,5]
+ *    -    step is not the multiple of 8
+ *    -    a pointer alignment requirement was violated. 
+ *    OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Intra.  
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Intra (
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    OMX_U8 *pDst,
+    OMX_INT step,
+    OMX_S16 *pCoefBufRow,
+    OMX_S16 *pCoefBufCol,
+    OMX_U8 curQP,
+    const OMX_U8 *pQPBuf,
+    OMX_INT blockIndex,
+    OMX_INT intraDCVLC,
+    OMX_INT ACPredFlag,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodeBlockCoef_Inter   (6.2.5.4.2)
+ *
+ * Description:
+ * Decodes the INTER block coefficients. This function performs inverse 
+ * quantization, inverse zigzag positioning, and IDCT (with appropriate 
+ * clipping on each step) on the coefficients. The results (residuals) are 
+ * placed in a contiguous array of 64 elements. For INTER block, the output 
+ * buffer holds the residuals for further reconstruction. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the bit 
+ *            stream buffer. There is no boundary check for the bit stream 
+ *            buffer. 
+ *   pBitOffset - pointer to the bit position in the byte pointed to by 
+ *            *ppBitStream. *pBitOffset is valid within [0-7] 
+ *   QP - quantization parameter 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; shortVideoHeader==1 selects linear intra DC 
+ *            mode, and shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded, so 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream 
+ *   pDst - pointer to the decoded residual buffer (a contiguous array of 64 
+ *            elements of OMX_S16 data type); must be aligned on a 16-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, if:
+ *    -    At least one of the following pointers is Null: 
+ *         ppBitStream, *ppBitStream, pBitOffset , pDst 
+ *    -    *pBitOffset exceeds [0,7]
+ *    -    QP <= 0. 
+ *    -    pDst is not 16-byte aligned 
+ *    OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Inter . 
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Inter (
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    OMX_S16 *pDst,
+    OMX_INT QP,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_PredictReconCoefIntra   (6.2.5.4.3)
+ *
+ * Description:
+ * Performs adaptive DC/AC coefficient prediction for an intra block.  Prior 
+ * to the function call, prediction direction (predDir) should be selected as 
+ * specified in [ISO14496-2], subclause 7.4.3.1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the coefficient buffer which contains the quantized 
+ *            coefficient residuals (PQF) of the current block; must be 
+ *            aligned on a 4-byte boundary.  The output coefficients are 
+ *            saturated to the range [-2048, 2047]. 
+ *   pPredBufRow - pointer to the coefficient row buffer; must be aligned on 
+ *            a 4-byte boundary. 
+ *   pPredBufCol - pointer to the coefficient column buffer; must be aligned 
+ *            on a 4-byte boundary. 
+ *   curQP - quantization parameter of the current block. curQP may equal to 
+ *            predQP especially when the current block and the predictor block 
+ *            are in the same macroblock. 
+ *   predQP - quantization parameter of the predictor block 
+ *   predDir - indicates the prediction direction which takes one of the 
+ *            following values: OMX_VC_HORIZONTAL - predict horizontally 
+ *            OMX_VC_VERTICAL - predict vertically 
+ *   ACPredFlag - a flag indicating if AC prediction should be performed. It 
+ *            is equal to ac_pred_flag in the bit stream syntax of MPEG-4 
+ *   videoComp - video component type (luminance or chrominance) of the 
+ *            current block 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the coefficient buffer which contains the quantized 
+ *            coefficients (QF) of the current block 
+ *   pPredBufRow - pointer to the updated coefficient row buffer 
+ *   pPredBufCol - pointer to the updated coefficient column buffer  Note: 
+ *            Buffer update: Update the AC prediction buffer (both row and 
+ *            column buffer). 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, if:
+ *        -    At least one of the pointers is NULL: 
+ *              pSrcDst, pPredBufRow, or pPredBufCol. 
+ *        -    curQP <= 0, 
+ *        -    predQP <= 0, 
+ *        -    curQP >31, 
+ *        -    predQP > 31, 
+ *        -    preDir exceeds [1,2]
+ *        -    pSrcDst, pPredBufRow, or pPredBufCol is not 4-byte aligned. 
+ *
+ */
+OMXResult omxVCM4P2_PredictReconCoefIntra (
+    OMX_S16 *pSrcDst,
+    OMX_S16 *pPredBufRow,
+    OMX_S16 *pPredBufCol,
+    OMX_INT curQP,
+    OMX_INT predQP,
+    OMX_INT predDir,
+    OMX_INT ACPredFlag,
+    OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_MCReconBlock   (6.2.5.5.1)
+ *
+ * Description:
+ * Performs motion compensation prediction for an 8x8 block using 
+ * interpolation described in [ISO14496-2], subclause 7.6.2. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the block in the reference plane. 
+ *   srcStep - distance between the start of consecutive lines in the 
+ *            reference plane, in bytes; must be a multiple of 8. 
+ *   dstStep - distance between the start of consecutive lines in the 
+ *            destination plane, in bytes; must be a multiple of 8. 
+ *   pSrcResidue - pointer to a buffer containing the 16-bit prediction 
+ *            residuals; must be 16-byte aligned. If the pointer is NULL, then 
+ *            no prediction is done, only motion compensation, i.e., the block 
+ *            is moved with interpolation. 
+ *   predictType - bilinear interpolation type, as defined in section 
+ *            6.2.1.2. 
+ *   rndVal - rounding control parameter: 0 - disabled; 1 - enabled. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination buffer; must be 8-byte aligned.  If 
+ *            prediction residuals are added then output intensities are 
+ *            clipped to the range [0,255]. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -    pDst is not 8-byte aligned. 
+ *    -    pSrcResidue is not 16-byte aligned. 
+ *    -    one or more of the following pointers is NULL: pSrc or pDst. 
+ *    -    either srcStep or dstStep is not a multiple of 8. 
+ *    -    invalid type specified for the parameter predictType. 
+ *    -    the parameter rndVal is not equal either to 0 or 1. 
+ *
+ */
+OMXResult omxVCM4P2_MCReconBlock (
+    const OMX_U8 *pSrc,
+    OMX_INT srcStep,
+    const OMX_S16 *pSrcResidue,
+    OMX_U8 *pDst,
+    OMX_INT dstStep,
+    OMX_INT predictType,
+    OMX_INT rndVal
+);
+
+
+
+/* 6.3.1.1 Intra 16x16 Prediction Modes  */
+/* A data type that enumerates intra_16x16 macroblock prediction modes is defined as follows:  */
+
+typedef enum {
+    OMX_VC_16X16_VERT = 0,  /** Intra_16x16_Vertical */
+    OMX_VC_16X16_HOR = 1,   /** Intra_16x16_Horizontal */
+    OMX_VC_16X16_DC = 2,    /** Intra_16x16_DC */
+    OMX_VC_16X16_PLANE = 3  /** Intra_16x16_Plane */ 
+} OMXVCM4P10Intra16x16PredMode;
+
+
+
+/* 6.3.1.2 Intra 4x4 Prediction Modes  */
+/* A data type that enumerates intra_4x4 macroblock prediction modes is defined as follows:  */
+
+typedef enum {
+    OMX_VC_4X4_VERT = 0,     /** Intra_4x4_Vertical */
+    OMX_VC_4X4_HOR = 1,      /** Intra_4x4_Horizontal */
+    OMX_VC_4X4_DC = 2,       /** Intra_4x4_DC */
+    OMX_VC_4X4_DIAG_DL = 3,  /** Intra_4x4_Diagonal_Down_Left */
+    OMX_VC_4X4_DIAG_DR = 4,  /** Intra_4x4_Diagonal_Down_Right */
+    OMX_VC_4X4_VR = 5,       /** Intra_4x4_Vertical_Right */
+    OMX_VC_4X4_HD = 6,       /** Intra_4x4_Horizontal_Down */
+    OMX_VC_4X4_VL = 7,       /** Intra_4x4_Vertical_Left */
+    OMX_VC_4X4_HU = 8        /** Intra_4x4_Horizontal_Up */ 
+} OMXVCM4P10Intra4x4PredMode;
+
+
+
+/* 6.3.1.3 Chroma Prediction Modes  */
+/* A data type that enumerates intra chroma prediction modes is defined as follows:  */
+
+typedef enum {
+    OMX_VC_CHROMA_DC = 0,    /** Intra_Chroma_DC */
+    OMX_VC_CHROMA_HOR = 1,   /** Intra_Chroma_Horizontal */
+    OMX_VC_CHROMA_VERT = 2,  /** Intra_Chroma_Vertical */
+    OMX_VC_CHROMA_PLANE = 3  /** Intra_Chroma_Plane */ 
+} OMXVCM4P10IntraChromaPredMode;
+
+
+
+/* 6.3.1.4 Motion Estimation Modes  */
+/* A data type that enumerates H.264 motion estimation modes is defined as follows:  */
+
+typedef enum {
+    OMX_VC_M4P10_FAST_SEARCH = 0, /** Fast motion search */
+    OMX_VC_M4P10_FULL_SEARCH = 1  /** Full motion search */ 
+} OMXVCM4P10MEMode;
+
+
+
+/* 6.3.1.5 Macroblock Types  */
+/* A data type that enumerates H.264 macroblock types is defined as follows:  */
+
+typedef enum {
+    OMX_VC_P_16x16  = 0, /* defined by [ISO14496-10] */
+    OMX_VC_P_16x8  = 1,
+    OMX_VC_P_8x16  = 2,
+    OMX_VC_P_8x8  = 3,
+    OMX_VC_PREF0_8x8  = 4,
+    OMX_VC_INTER_SKIP  = 5,
+    OMX_VC_INTRA_4x4  = 8,
+    OMX_VC_INTRA_16x16  = 9,
+    OMX_VC_INTRA_PCM = 10 
+} OMXVCM4P10MacroblockType;
+
+
+
+/* 6.3.1.6 Sub-Macroblock Types  */
+/* A data type that enumerates H.264 sub-macroblock types is defined as follows:  */
+
+typedef enum {
+    OMX_VC_SUB_P_8x8 = 0, /* defined by [ISO14496-10] */
+    OMX_VC_SUB_P_8x4 = 1,
+    OMX_VC_SUB_P_4x8 = 2,
+    OMX_VC_SUB_P_4x4 = 3 
+} OMXVCM4P10SubMacroblockType;
+
+
+
+/* 6.3.1.7 Variable Length Coding (VLC) Information  */
+
+typedef struct {
+    OMX_U8 uTrailing_Ones;      /* Trailing ones; 3 at most */
+    OMX_U8 uTrailing_One_Signs; /* Trailing ones signal */
+    OMX_U8 uNumCoeffs;          /* Total number of non-zero coefs, including trailing ones */
+    OMX_U8 uTotalZeros;         /* Total number of zero coefs */
+    OMX_S16 iLevels[16];        /* Levels of non-zero coefs, in reverse zig-zag order */
+    OMX_U8 uRuns[16];           /* Runs for levels and trailing ones, in reverse zig-zag order */
+} OMXVCM4P10VLCInfo;
+
+
+
+/* 6.3.1.8 Macroblock Information  */
+
+typedef struct {
+    OMX_S32 sliceId;                          /* slice number */
+    OMXVCM4P10MacroblockType mbType;          /* MB type */
+    OMXVCM4P10SubMacroblockType subMBType[4]; /* sub-block type */
+    OMX_S32 qpy;                              /* qp for luma */
+    OMX_S32 qpc;                              /* qp for chroma */
+    OMX_U32 cbpy;                             /* CBP Luma */
+    OMX_U32 cbpc;                             /* CBP Chroma */
+    OMXVCMotionVector pMV0[4][4]; /* motion vector, represented using 1/4-pel units, pMV0[blocky][blockx] (blocky = 0~3, blockx =0~3) */
+    OMXVCMotionVector pMVPred[4][4]; /* motion vector prediction, Represented using 1/4-pel units, pMVPred[blocky][blockx] (blocky = 0~3, blockx = 0~3) */
+    OMX_U8 pRefL0Idx[4];                      /* reference picture indices */
+    OMXVCM4P10Intra16x16PredMode Intra16x16PredMode; /* best intra 16x16 prediction mode */
+    OMXVCM4P10Intra4x4PredMode pIntra4x4PredMode[16]; /* best intra 4x4 prediction mode for each block, pMV0 indexed as above */
+} OMXVCM4P10MBInfo, *OMXVCM4P10MBInfoPtr;
+
+
+
+/* 6.3.1.9 Motion Estimation Parameters  */
+
+typedef struct {
+    OMX_S32 blockSplitEnable8x8; /* enables 16x8, 8x16, 8x8 */
+    OMX_S32 blockSplitEnable4x4; /* enable splitting of 8x4, 4x8, 4x4 blocks */
+    OMX_S32 halfSearchEnable;
+    OMX_S32 quarterSearchEnable;
+    OMX_S32 intraEnable4x4;      /* 1=enable, 0=disable */
+    OMX_S32 searchRange16x16;    /* integer pixel units */
+    OMX_S32 searchRange8x8;
+    OMX_S32 searchRange4x4;
+} OMXVCM4P10MEParams;
+
+
+
+/**
+ * Function:  omxVCM4P10_PredictIntra_4x4   (6.3.3.1.1)
+ *
+ * Description:
+ * Perform Intra_4x4 prediction for luma samples. If the upper-right block is 
+ * not available, then duplication work should be handled inside the function. 
+ * Users need not define them outside. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcLeft -  Pointer to the buffer of 4 left pixels: 
+ *                  p[x, y] (x = -1, y = 0..3) 
+ *   pSrcAbove - Pointer to the buffer of 8 above pixels: 
+ *                  p[x,y] (x = 0..7, y =-1); 
+ *               must be aligned on a 4-byte boundary. 
+ *   pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) 
+ *   leftStep - Step of left pixel buffer; must be a multiple of 4. 
+ *   dstStep - Step of the destination buffer; must be a multiple of 4. 
+ *   predMode - Intra_4x4 prediction mode. 
+ *   availability - Neighboring 4x4 block availability flag, refer to 
+ *             "Neighboring Macroblock Availability" . 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to the destination buffer; must be aligned on a 4-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pDst is NULL. 
+ *    dstStep < 4, or dstStep is not a multiple of 4. 
+ *    leftStep is not a multiple of 4. 
+ *    predMode is not in the valid range of enumeration 
+ *              OMXVCM4P10Intra4x4PredMode. 
+ *    predMode is OMX_VC_4x4_VERT, but availability doesn't set OMX_VC_UPPER 
+ *              indicating p[x,-1] (x = 0..3) is not available. 
+ *    predMode is OMX_VC_4x4_HOR, but availability doesn't set OMX_VC_LEFT 
+ *              indicating p[-1,y] (y = 0..3) is not available. 
+ *    predMode is OMX_VC_4x4_DIAG_DL, but availability doesn't set 
+ *              OMX_VC_UPPER indicating p[x, -1] (x = 0..3) is not available. 
+ *    predMode is OMX_VC_4x4_DIAG_DR, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not 
+ *              available. 
+ *    predMode is OMX_VC_4x4_VR, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not 
+ *              available. 
+ *    predMode is OMX_VC_4x4_HD, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not 
+ *              available. 
+ *    predMode is OMX_VC_4x4_VL, but availability doesn't set OMX_VC_UPPER 
+ *              indicating p[x,-1] (x = 0..3) is not available. 
+ *    predMode is OMX_VC_4x4_HU, but availability doesn't set OMX_VC_LEFT 
+ *              indicating p[-1,y] (y = 0..3) is not available. 
+ *    availability sets OMX_VC_UPPER, but pSrcAbove is NULL. 
+ *    availability sets OMX_VC_LEFT, but pSrcLeft is NULL. 
+ *    availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. 
+ *    either pSrcAbove or pDst is not aligned on a 4-byte boundary.  
+ *
+ * Note: 
+ *     pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if 
+ *     they are not used by intra prediction as implied in predMode. 
+ *
+ */
+OMXResult omxVCM4P10_PredictIntra_4x4 (
+    const OMX_U8 *pSrcLeft,
+    const OMX_U8 *pSrcAbove,
+    const OMX_U8 *pSrcAboveLeft,
+    OMX_U8 *pDst,
+    OMX_INT leftStep,
+    OMX_INT dstStep,
+    OMXVCM4P10Intra4x4PredMode predMode,
+    OMX_S32 availability
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_PredictIntra_16x16   (6.3.3.1.2)
+ *
+ * Description:
+ * Perform Intra_16x16 prediction for luma samples. If the upper-right block 
+ * is not available, then duplication work should be handled inside the 
+ * function. Users need not define them outside. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcLeft - Pointer to the buffer of 16 left pixels: p[x, y] (x = -1, y = 
+ *            0..15) 
+ *   pSrcAbove - Pointer to the buffer of 16 above pixels: p[x,y] (x = 0..15, 
+ *            y= -1); must be aligned on a 16-byte boundary. 
+ *   pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) 
+ *   leftStep - Step of left pixel buffer; must be a multiple of 16. 
+ *   dstStep - Step of the destination buffer; must be a multiple of 16. 
+ *   predMode - Intra_16x16 prediction mode, please refer to section 3.4.1. 
+ *   availability - Neighboring 16x16 MB availability flag. Refer to 
+ *                  section 3.4.4. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst -Pointer to the destination buffer; must be aligned on a 16-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pDst is NULL. 
+ *    dstStep < 16. or dstStep is not a multiple of 16. 
+ *    leftStep is not a multiple of 16. 
+ *    predMode is not in the valid range of enumeration 
+ *              OMXVCM4P10Intra16x16PredMode 
+ *    predMode is OMX_VC_16X16_VERT, but availability doesn't set 
+ *              OMX_VC_UPPER indicating p[x,-1] (x = 0..15) is not available. 
+ *    predMode is OMX_VC_16X16_HOR, but availability doesn't set OMX_VC_LEFT 
+ *              indicating p[-1,y] (y = 0..15) is not available. 
+ *    predMode is OMX_VC_16X16_PLANE, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1](x = 0..15), or p[-1,y] (y = 0..15), or p[-1,-1] is not 
+ *              available. 
+ *    availability sets OMX_VC_UPPER, but pSrcAbove is NULL. 
+ *    availability sets OMX_VC_LEFT, but pSrcLeft is NULL. 
+ *    availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. 
+ *    either pSrcAbove or pDst is not aligned on a 16-byte boundary.  
+ *
+ * Note: 
+ *     pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if 
+ *     they are not used by intra prediction implied in predMode. 
+ * Note: 
+ *     OMX_VC_UPPER_RIGHT is not used in intra_16x16 luma prediction. 
+ *
+ */
+OMXResult omxVCM4P10_PredictIntra_16x16 (
+    const OMX_U8 *pSrcLeft,
+    const OMX_U8 *pSrcAbove,
+    const OMX_U8 *pSrcAboveLeft,
+    OMX_U8 *pDst,
+    OMX_INT leftStep,
+    OMX_INT dstStep,
+    OMXVCM4P10Intra16x16PredMode predMode,
+    OMX_S32 availability
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_PredictIntraChroma_8x8   (6.3.3.1.3)
+ *
+ * Description:
+ * Performs intra prediction for chroma samples. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcLeft - Pointer to the buffer of 8 left pixels: p[x, y] (x = -1, y= 
+ *            0..7). 
+ *   pSrcAbove - Pointer to the buffer of 8 above pixels: p[x,y] (x = 0..7, y 
+ *            = -1); must be aligned on an 8-byte boundary. 
+ *   pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) 
+ *   leftStep - Step of left pixel buffer; must be a multiple of 8. 
+ *   dstStep - Step of the destination buffer; must be a multiple of 8. 
+ *   predMode - Intra chroma prediction mode, please refer to section 3.4.3. 
+ *   availability - Neighboring chroma block availability flag, please refer 
+ *            to  "Neighboring Macroblock Availability". 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to the destination buffer; must be aligned on an 8-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If any of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pDst is NULL. 
+ *    dstStep < 8 or dstStep is not a multiple of 8. 
+ *    leftStep is not a multiple of 8. 
+ *    predMode is not in the valid range of enumeration 
+ *              OMXVCM4P10IntraChromaPredMode. 
+ *    predMode is OMX_VC_CHROMA_VERT, but availability doesn't set 
+ *              OMX_VC_UPPER indicating p[x,-1] (x = 0..7) is not available. 
+ *    predMode is OMX_VC_CHROMA_HOR, but availability doesn't set OMX_VC_LEFT 
+ *              indicating p[-1,y] (y = 0..7) is not available. 
+ *    predMode is OMX_VC_CHROMA_PLANE, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1](x = 0..7), or p[-1,y] (y = 0..7), or p[-1,-1] is not 
+ *              available. 
+ *    availability sets OMX_VC_UPPER, but pSrcAbove is NULL. 
+ *    availability sets OMX_VC_LEFT, but pSrcLeft is NULL. 
+ *    availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. 
+ *    either pSrcAbove or pDst is not aligned on a 8-byte boundary.  
+ *
+ *  Note: pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointer if 
+ *  they are not used by intra prediction implied in predMode. 
+ *
+ *  Note: OMX_VC_UPPER_RIGHT is not used in intra chroma prediction. 
+ *
+ */
+OMXResult omxVCM4P10_PredictIntraChroma_8x8 (
+    const OMX_U8 *pSrcLeft,
+    const OMX_U8 *pSrcAbove,
+    const OMX_U8 *pSrcAboveLeft,
+    OMX_U8 *pDst,
+    OMX_INT leftStep,
+    OMX_INT dstStep,
+    OMXVCM4P10IntraChromaPredMode predMode,
+    OMX_S32 availability
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InterpolateLuma   (6.3.3.2.1)
+ *
+ * Description:
+ * Performs quarter-pixel interpolation for inter luma MB. It is assumed that 
+ * the frame is already padded when calling this function. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the source reference frame buffer 
+ *   srcStep - reference frame step, in bytes; must be a multiple of roi.width 
+ *   dstStep - destination frame step, in bytes; must be a multiple of 
+ *            roi.width 
+ *   dx - Fractional part of horizontal motion vector component in 1/4 pixel 
+ *            unit; valid in the range [0,3] 
+ *   dy - Fractional part of vertical motion vector y component in 1/4 pixel 
+ *            unit; valid in the range [0,3] 
+ *   roi - Dimension of the interpolation region; the parameters roi.width and 
+ *            roi.height must be equal to either 4, 8, or 16. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to the destination frame buffer: 
+ *          if roi.width==4,  4-byte alignment required 
+ *          if roi.width==8,  8-byte alignment required 
+ *          if roi.width==16, 16-byte alignment required 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pSrc or pDst is NULL. 
+ *    srcStep or dstStep < roi.width. 
+ *    dx or dy is out of range [0,3]. 
+ *    roi.width or roi.height is out of range {4, 8, 16}. 
+ *    roi.width is equal to 4, but pDst is not 4 byte aligned. 
+ *    roi.width is equal to 8 or 16, but pDst is not 8 byte aligned. 
+ *    srcStep or dstStep is not a multiple of 8. 
+ *
+ */
+OMXResult omxVCM4P10_InterpolateLuma (
+    const OMX_U8 *pSrc,
+    OMX_S32 srcStep,
+    OMX_U8 *pDst,
+    OMX_S32 dstStep,
+    OMX_S32 dx,
+    OMX_S32 dy,
+    OMXSize roi
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InterpolateChroma   (6.3.3.2.2)
+ *
+ * Description:
+ * Performs 1/8-pixel interpolation for inter chroma MB. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc -Pointer to the source reference frame buffer 
+ *   srcStep -Reference frame step in bytes 
+ *   dstStep -Destination frame step in bytes; must be a multiple of 
+ *            roi.width. 
+ *   dx -Fractional part of horizontal motion vector component in 1/8 pixel 
+ *            unit; valid in the range [0,7] 
+ *   dy -Fractional part of vertical motion vector component in 1/8 pixel 
+ *            unit; valid in the range [0,7] 
+ *   roi -Dimension of the interpolation region; the parameters roi.width and 
+ *            roi.height must be equal to either 2, 4, or 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst -Pointer to the destination frame buffer:
+ *         if roi.width==2,  2-byte alignment required 
+ *         if roi.width==4,  4-byte alignment required 
+ *         if roi.width==8, 8-byte alignment required 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pSrc or pDst is NULL. 
+ *    srcStep or dstStep < 8. 
+ *    dx or dy is out of range [0-7]. 
+ *    roi.width or roi.height is out of range {2,4,8}. 
+ *    roi.width is equal to 2, but pDst is not 2-byte aligned. 
+ *    roi.width is equal to 4, but pDst is not 4-byte aligned. 
+ *    roi.width is equal to 8, but pDst is not 8 byte aligned. 
+ *    srcStep or dstStep is not a multiple of 8. 
+ *
+ */
+OMXResult omxVCM4P10_InterpolateChroma (
+    const OMX_U8 *pSrc,
+    OMX_S32 srcStep,
+    OMX_U8 *pDst,
+    OMX_S32 dstStep,
+    OMX_S32 dx,
+    OMX_S32 dy,
+    OMXSize roi
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_FilterDeblockingLuma_VerEdge_I   (6.3.3.3.1)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four vertical edges of the luma 
+ * macroblock (16x16). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - Pointer to the input macroblock; must be 16-byte aligned. 
+ *   srcdstStep -Step of the arrays; must be a multiple of 16. 
+ *   pAlpha -Array of size 2 of alpha thresholds (the first item is the alpha 
+ *            threshold for the external vertical edge, and the second item is 
+ *            for the internal vertical edge); per [ISO14496-10] alpha values 
+ *            must be in the range [0,255]. 
+ *   pBeta -Array of size 2 of beta thresholds (the first item is the beta 
+ *            threshold for the external vertical edge, and the second item is 
+ *            for the internal vertical edge); per [ISO14496-10] beta values 
+ *            must be in the range [0,18]. 
+ *   pThresholds -Array of size 16 of Thresholds (TC0) (values for the left 
+ *            edge of each 4x4 block, arranged in vertical block order); must 
+ *            be aligned on a 4-byte boundary..  Per [ISO14496-10] values must 
+ *            be in the range [0,25]. 
+ *   pBS -Array of size 16 of BS parameters (arranged in vertical block 
+ *            order); valid in the range [0,4] with the following 
+ *            restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) 
+ *            pBS[i]== 4 if and only if pBS[i^3]== 4.  Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst -Pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    Either of the pointers in pSrcDst, pAlpha, pBeta, pThresholds, or pBS 
+ *              is NULL. 
+ *    Either pThresholds or pBS is not aligned on a 4-byte boundary. 
+ *    pSrcDst is not 16-byte aligned. 
+ *    srcdstStep is not a multiple of 16. 
+ *    pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. 
+ *    pBeta[0] and/or pBeta[1] is outside the range [0,18]. 
+ *    One or more entries in the table pThresholds[0..15]is outside of the 
+ *              range [0,25]. 
+ *    pBS is out of range, i.e., one of the following conditions is true: 
+ *              pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && 
+ *              pBS[i^3]!=4) for 0<=i<=3. 
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingLuma_VerEdge_I (
+    OMX_U8 *pSrcDst,
+    OMX_S32 srcdstStep,
+    const OMX_U8 *pAlpha,
+    const OMX_U8 *pBeta,
+    const OMX_U8 *pThresholds,
+    const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_FilterDeblockingLuma_HorEdge_I   (6.3.3.3.2)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four horizontal edges of the luma 
+ * macroblock (16x16). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input macroblock; must be 16-byte aligned. 
+ *   srcdstStep - step of the arrays; must be a multiple of 16. 
+ *   pAlpha - array of size 2 of alpha thresholds (the first item is the alpha 
+ *            threshold for the external vertical edge, and the second item is 
+ *            for the internal horizontal edge); per [ISO14496-10] alpha 
+ *            values must be in the range [0,255]. 
+ *   pBeta - array of size 2 of beta thresholds (the first item is the beta 
+ *            threshold for the external horizontal edge, and the second item 
+ *            is for the internal horizontal edge). Per [ISO14496-10] beta 
+ *            values must be in the range [0,18]. 
+ *   pThresholds - array of size 16 containing thresholds, TC0, for the top 
+ *            horizontal edge of each 4x4 block, arranged in horizontal block 
+ *            order; must be aligned on a 4-byte boundary.  Per [ISO14496 10] 
+ *            values must be in the range [0,25]. 
+ *   pBS - array of size 16 of BS parameters (arranged in horizontal block 
+ *            order); valid in the range [0,4] with the following 
+ *            restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) 
+ *            pBS[i]== 4 if and only if pBS[i^3]== 4.  Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst -Pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr, if one of the following cases occurs: 
+ *    -    one or more of the following pointers is NULL: pSrcDst, pAlpha, 
+ *              pBeta, pThresholds, or pBS. 
+ *    -    either pThresholds or pBS is not aligned on a 4-byte boundary. 
+ *    -    pSrcDst is not 16-byte aligned. 
+ *    -    srcdstStep is not a multiple of 16. 
+ *    -    pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. 
+ *    -    pBeta[0] and/or pBeta[1] is outside the range [0,18]. 
+ *    -    One or more entries in the table pThresholds[0..15] is 
+ *         outside of the range [0,25]. 
+ *    -    pBS is out of range, i.e., one of the following conditions is true: 
+ *              pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or 
+ *              (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. 
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingLuma_HorEdge_I (
+    OMX_U8 *pSrcDst,
+    OMX_S32 srcdstStep,
+    const OMX_U8 *pAlpha,
+    const OMX_U8 *pBeta,
+    const OMX_U8 *pThresholds,
+    const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_FilterDeblockingChroma_VerEdge_I   (6.3.3.3.3)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four vertical edges of the chroma 
+ * macroblock (8x8). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - Pointer to the input macroblock; must be 8-byte aligned. 
+ *   srcdstStep - Step of the arrays; must be a multiple of 8. 
+ *   pAlpha - Array of size 2 of alpha thresholds (the first item is alpha 
+ *            threshold for external vertical edge, and the second item is for 
+ *            internal vertical edge); per [ISO14496-10] alpha values must be 
+ *            in the range [0,255]. 
+ *   pBeta - Array of size 2 of beta thresholds (the first item is the beta 
+ *            threshold for the external vertical edge, and the second item is 
+ *            for the internal vertical edge); per [ISO14496-10] beta values 
+ *            must be in the range [0,18]. 
+ *   pThresholds - Array of size 8 containing thresholds, TC0, for the left 
+ *            vertical edge of each 4x2 chroma block, arranged in vertical 
+ *            block order; must be aligned on a 4-byte boundary.  Per 
+ *            [ISO14496-10] values must be in the range [0,25]. 
+ *   pBS - Array of size 16 of BS parameters (values for each 2x2 chroma 
+ *            block, arranged in vertical block order). This parameter is the 
+ *            same as the pBS parameter passed into FilterDeblockLuma_VerEdge; 
+ *            valid in the range [0,4] with the following restrictions: i) 
+ *            pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and 
+ *            only if pBS[i^3]== 4.  Must be 4 byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst -Pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    one or more of the following pointers is NULL: pSrcDst, pAlpha, 
+ *              pBeta, pThresholds, or pBS. 
+ *    -    pSrcDst is not 8-byte aligned. 
+ *    -    srcdstStep is not a multiple of 8. 
+ *    -    pThresholds is not 4-byte aligned. 
+ *    -    pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. 
+ *    -    pBeta[0] and/or pBeta[1] is outside the range [0,18]. 
+ *    -    One or more entries in the table pThresholds[0..7] is outside 
+ *         of the range [0,25]. 
+ *    -    pBS is out of range, i.e., one of the following conditions is true: 
+ *         pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or 
+ *         (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. 
+ *    -    pBS is not 4-byte aligned. 
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingChroma_VerEdge_I (
+    OMX_U8 *pSrcDst,
+    OMX_S32 srcdstStep,
+    const OMX_U8 *pAlpha,
+    const OMX_U8 *pBeta,
+    const OMX_U8 *pThresholds,
+    const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_FilterDeblockingChroma_HorEdge_I   (6.3.3.3.4)
+ *
+ * Description:
+ * Performs in-place deblock filtering on the horizontal edges of the chroma 
+ * macroblock (8x8). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input macroblock; must be 8-byte aligned. 
+ *   srcdstStep - array step; must be a multiple of 8. 
+ *   pAlpha - array of size 2 containing alpha thresholds; the first element 
+ *            contains the threshold for the external horizontal edge, and the 
+ *            second element contains the threshold for internal horizontal 
+ *            edge.  Per [ISO14496-10] alpha values must be in the range 
+ *            [0,255]. 
+ *   pBeta - array of size 2 containing beta thresholds; the first element 
+ *            contains the threshold for the external horizontal edge, and the 
+ *            second element contains the threshold for the internal 
+ *            horizontal edge.  Per [ISO14496-10] beta values must be in the 
+ *            range [0,18]. 
+ *   pThresholds - array of size 8 containing thresholds, TC0, for the top 
+ *            horizontal edge of each 2x4 chroma block, arranged in horizontal 
+ *            block order; must be aligned on a 4-byte boundary.  Per 
+ *            [ISO14496-10] values must be in the range [0,25]. 
+ *   pBS - array of size 16 containing BS parameters for each 2x2 chroma 
+ *            block, arranged in horizontal block order; valid in the range 
+ *            [0,4] with the following restrictions: i) pBS[i]== 4 may occur 
+ *            only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 4. 
+ *            Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst -Pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr, if one of the following cases occurs: 
+ *    -    any of the following pointers is NULL: 
+ *         pSrcDst, pAlpha, pBeta, pThresholds, or pBS. 
+ *    -    pSrcDst is not 8-byte aligned. 
+ *    -    srcdstStep is not a multiple of 8. 
+ *    -    pThresholds is not 4-byte aligned. 
+ *    -    pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. 
+ *    -    pBeta[0] and/or pBeta[1] is outside the range [0,18]. 
+ *    -    One or more entries in the table pThresholds[0..7] is outside 
+ *         of the range [0,25]. 
+ *    -    pBS is out of range, i.e., one of the following conditions is true: 
+ *              pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or 
+ *              (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ *    -    pBS is not 4-byte aligned. 
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingChroma_HorEdge_I (
+    OMX_U8 *pSrcDst,
+    OMX_S32 srcdstStep,
+    const OMX_U8 *pAlpha,
+    const OMX_U8 *pBeta,
+    const OMX_U8 *pThresholds,
+    const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_DeblockLuma_I   (6.3.3.3.5)
+ *
+ * Description:
+ * This function performs in-place deblock filtering the horizontal and 
+ * vertical edges of a luma macroblock (16x16). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input macroblock; must be 16-byte aligned. 
+ *   srcdstStep - image width; must be a multiple of 16. 
+ *   pAlpha - pointer to a 2x2 table of alpha thresholds, organized as 
+ *            follows: {external vertical edge, internal vertical edge, 
+ *            external horizontal edge, internal horizontal edge }.  Per 
+ *            [ISO14496-10] alpha values must be in the range [0,255]. 
+ *   pBeta - pointer to a 2x2 table of beta thresholds, organized as follows: 
+ *            {external vertical edge, internal vertical edge, external 
+ *            horizontal edge, internal horizontal edge }.  Per [ISO14496-10] 
+ *            beta values must be in the range [0,18]. 
+ *   pThresholds - pointer to a 16x2 table of threshold (TC0), organized as 
+ *            follows: {values for the left or above edge of each 4x4 block, 
+ *            arranged in vertical block order and then in horizontal block 
+ *            order}; must be aligned on a 4-byte boundary.  Per [ISO14496-10] 
+ *            values must be in the range [0,25]. 
+ *   pBS - pointer to a 16x2 table of BS parameters arranged in scan block 
+ *            order for vertical edges and then horizontal edges; valid in the 
+ *            range [0,4] with the following restrictions: i) pBS[i]== 4 may 
+ *            occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 
+ *            4. Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments 
+ *    -     one or more of the following pointers is NULL: pSrcDst, pAlpha, 
+ *              pBeta, pThresholds or pBS. 
+ *    -    pSrcDst is not 16-byte aligned. 
+ *    -    either pThresholds or pBS is not aligned on a 4-byte boundary. 
+ *    -    one or more entries in the table pAlpha[0..3] is outside the range 
+ *              [0,255]. 
+ *    -    one or more entries in the table pBeta[0..3] is outside the range 
+ *              [0,18]. 
+ *    -    one or more entries in the table pThresholds[0..31]is outside of 
+ *              the range [0,25]. 
+ *    -    pBS is out of range, i.e., one of the following conditions is true: 
+ *              pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or 
+ *             (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. 
+ *    -    srcdstStep is not a multiple of 16. 
+ *
+ */
+OMXResult omxVCM4P10_DeblockLuma_I (
+    OMX_U8 *pSrcDst,
+    OMX_S32 srcdstStep,
+    const OMX_U8 *pAlpha,
+    const OMX_U8 *pBeta,
+    const OMX_U8 *pThresholds,
+    const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_DeblockChroma_I   (6.3.3.3.6)
+ *
+ * Description:
+ * Performs in-place deblocking filtering on all edges of the chroma 
+ * macroblock (16x16). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input macroblock; must be 8-byte aligned. 
+ *   srcdstStep - step of the arrays; must be a multiple of 8. 
+ *   pAlpha - pointer to a 2x2 array of alpha thresholds, organized as 
+ *            follows: {external vertical edge, internal vertical edge, 
+ *            external horizontal edge, internal horizontal edge }.  Per 
+ *            [ISO14496-10] alpha values must be in the range [0,255]. 
+ *   pBeta - pointer to a 2x2 array of Beta Thresholds, organized as follows: 
+ *            { external vertical edge, internal vertical edge, external 
+ *            horizontal edge, internal horizontal edge }.  Per [ISO14496-10] 
+ *            beta values must be in the range [0,18]. 
+ *   pThresholds - array of size 8x2 of Thresholds (TC0) (values for the left 
+ *            or above edge of each 4x2 or 2x4 block, arranged in vertical 
+ *            block order and then in horizontal block order); must be aligned 
+ *            on a 4-byte boundary. Per [ISO14496-10] values must be in the 
+ *            range [0,25]. 
+ *   pBS - array of size 16x2 of BS parameters (arranged in scan block order 
+ *            for vertical edges and then horizontal edges); valid in the 
+ *            range [0,4] with the following restrictions: i) pBS[i]== 4 may 
+ *            occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 
+ *            4.  Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments 
+ *    -   one or more of the following pointers is NULL: pSrcDst, pAlpha, 
+ *              pBeta, pThresholds, or pBS. 
+ *    -   pSrcDst is not 8-byte aligned. 
+ *    -   either pThresholds or pBS is not 4-byte aligned. 
+ *    -   one or more entries in the table pAlpha[0..3] is outside the range 
+ *              [0,255]. 
+ *    -   one or more entries in the table pBeta[0..3] is outside the range 
+ *              [0,18]. 
+ *    -   one or more entries in the table pThresholds[0..15]is outside of 
+ *              the range [0,25]. 
+ *    -   pBS is out of range, i.e., one of the following conditions is true: 
+ *            pBS[i]<0, pBS[i]>4, pBS[i]==4  for i>=4, or 
+ *            (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. 
+ *    -   srcdstStep is not a multiple of 8. 
+ *
+ */
+OMXResult omxVCM4P10_DeblockChroma_I (
+    OMX_U8 *pSrcDst,
+    OMX_S32 srcdstStep,
+    const OMX_U8 *pAlpha,
+    const OMX_U8 *pBeta,
+    const OMX_U8 *pThresholds,
+    const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC   (6.3.4.1.1)
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse raster scan for a 2x2 block of 
+ * ChromaDCLevel.  The decoded coefficients in the packed position-coefficient 
+ * buffer are stored in reverse zig-zag order, i.e., the first buffer element 
+ * contains the last non-zero postion-coefficient pair of the block. Within 
+ * each position-coefficient pair, the position entry indicates the 
+ * raster-scan position of the coefficient, while the coefficient entry 
+ * contains the coefficient value. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - Double pointer to current byte in bit stream buffer 
+ *   pOffset - Pointer to current bit position in the byte pointed to by 
+ *            *ppBitStream; valid in the range [0,7]. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after each block is decoded 
+ *   pOffset - *pOffset is updated after each block is decoded 
+ *   pNumCoeff - Pointer to the number of nonzero coefficients in this block 
+ *   ppPosCoefBuf - Double pointer to destination residual 
+ *            coefficient-position pair buffer.  Buffer position 
+ *            (*ppPosCoefBuf) is updated upon return, unless there are only 
+ *            zero coefficients in the currently decoded block.  In this case 
+ *            the caller is expected to bypass the transform/dequantization of 
+ *            the empty blocks. 
+ *
+ * Return Value:
+ *
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    ppBitStream or pOffset is NULL. 
+ *    -    ppPosCoefBuf or pNumCoeff is NULL. 
+ *    OMX_Sts_Err - if one of the following is true: 
+ *    -    an illegal code is encountered in the bitstream 
+ *
+ */
+OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (
+    const OMX_U8 **ppBitStream,
+    OMX_S32*pOffset,
+    OMX_U8 *pNumCoeff,
+    OMX_U8 **ppPosCoefbuf
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_DecodeCoeffsToPairCAVLC   (6.3.4.1.2)
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of 
+ * Intra16x16DCLevel, Intra16x16ACLevel, LumaLevel, and ChromaACLevel. Inverse 
+ * field scan is not supported. The decoded coefficients in the packed 
+ * position-coefficient buffer are stored in reverse zig-zag order, i.e., the 
+ * first buffer element contains the last non-zero postion-coefficient pair of 
+ * the block. Within each position-coefficient pair, the position entry 
+ * indicates the raster-scan position of the coefficient, while the 
+ * coefficient entry contains the coefficient value. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream -Double pointer to current byte in bit stream buffer 
+ *   pOffset - Pointer to current bit position in the byte pointed to by 
+ *            *ppBitStream; valid in the range [0,7]. 
+ *   sMaxNumCoeff - Maximum the number of non-zero coefficients in current 
+ *            block 
+ *   sVLCSelect - VLC table selector, obtained from the number of non-zero 
+ *            coefficients contained in the above and left 4x4 blocks.  It is 
+ *            equivalent to the variable nC described in H.264 standard table 
+ *            9 5, except its value can t be less than zero. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after each block is decoded.  
+ *            Buffer position (*ppPosCoefBuf) is updated upon return, unless 
+ *            there are only zero coefficients in the currently decoded block. 
+ *             In this case the caller is expected to bypass the 
+ *            transform/dequantization of the empty blocks. 
+ *   pOffset - *pOffset is updated after each block is decoded 
+ *   pNumCoeff - Pointer to the number of nonzero coefficients in this block 
+ *   ppPosCoefBuf - Double pointer to destination residual 
+ *            coefficient-position pair buffer 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    ppBitStream or pOffset is NULL. 
+ *    -    ppPosCoefBuf or pNumCoeff is NULL. 
+ *    -    sMaxNumCoeff is not equal to either 15 or 16. 
+ *    -    sVLCSelect is less than 0. 
+ *
+ *    OMX_Sts_Err - if one of the following is true: 
+ *    -    an illegal code is encountered in the bitstream 
+ *
+ */
+OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC (
+    const OMX_U8 **ppBitStream,
+    OMX_S32 *pOffset,
+    OMX_U8 *pNumCoeff,
+    OMX_U8 **ppPosCoefbuf,
+    OMX_INT sVLCSelect,
+    OMX_INT sMaxNumCoeff
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_TransformDequantLumaDCFromPair   (6.3.4.2.1)
+ *
+ * Description:
+ * Reconstructs the 4x4 LumaDC block from the coefficient-position pair 
+ * buffer, performs integer inverse, and dequantization for 4x4 LumaDC 
+ * coefficients, and updates the pair buffer pointer to the next non-empty 
+ * block. 
+ *
+ * Input Arguments:
+ *   
+ *   ppSrc - Double pointer to residual coefficient-position pair buffer 
+ *            output by CALVC decoding 
+ *   QP - Quantization parameter QpY 
+ *
+ * Output Arguments:
+ *   
+ *   ppSrc - *ppSrc is updated to the start of next non empty block 
+ *   pDst - Pointer to the reconstructed 4x4 LumaDC coefficients buffer; must 
+ *            be aligned on a 8-byte boundary. 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    ppSrc or pDst is NULL. 
+ *    -    pDst is not 8 byte aligned. 
+ *    -    QP is not in the range of [0-51]. 
+ *
+ */
+OMXResult omxVCM4P10_TransformDequantLumaDCFromPair (
+    const OMX_U8 **ppSrc,
+    OMX_S16 *pDst,
+    OMX_INT QP
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_TransformDequantChromaDCFromPair   (6.3.4.2.2)
+ *
+ * Description:
+ * Reconstruct the 2x2 ChromaDC block from coefficient-position pair buffer, 
+ * perform integer inverse transformation, and dequantization for 2x2 chroma 
+ * DC coefficients, and update the pair buffer pointer to next non-empty 
+ * block. 
+ *
+ * Input Arguments:
+ *   
+ *   ppSrc - Double pointer to residual coefficient-position pair buffer 
+ *            output by CALVC decoding 
+ *   QP - Quantization parameter QpC 
+ *
+ * Output Arguments:
+ *   
+ *   ppSrc - *ppSrc is updated to the start of next non empty block 
+ *   pDst - Pointer to the reconstructed 2x2 ChromaDC coefficients buffer; 
+ *            must be aligned on a 4-byte boundary. 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    ppSrc or pDst is NULL. 
+ *    -    pDst is not 4-byte aligned. 
+ *    -    QP is not in the range of [0-51]. 
+ *
+ */
+OMXResult omxVCM4P10_TransformDequantChromaDCFromPair (
+    const OMX_U8 **ppSrc,
+    OMX_S16 *pDst,
+    OMX_INT QP
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_DequantTransformResidualFromPairAndAdd   (6.3.4.2.3)
+ *
+ * Description:
+ * Reconstruct the 4x4 residual block from coefficient-position pair buffer, 
+ * perform dequantization and integer inverse transformation for 4x4 block of 
+ * residuals with previous intra prediction or motion compensation data, and 
+ * update the pair buffer pointer to next non-empty block. If pDC == NULL, 
+ * there re 16 non-zero AC coefficients at most in the packed buffer starting 
+ * from 4x4 block position 0; If pDC != NULL, there re 15 non-zero AC 
+ * coefficients at most in the packet buffer starting from 4x4 block position 
+ * 1. 
+ *
+ * Input Arguments:
+ *   
+ *   ppSrc - Double pointer to residual coefficient-position pair buffer 
+ *            output by CALVC decoding 
+ *   pPred - Pointer to the predicted 4x4 block; must be aligned on a 4-byte 
+ *            boundary 
+ *   predStep - Predicted frame step size in bytes; must be a multiple of 4 
+ *   dstStep - Destination frame step in bytes; must be a multiple of 4 
+ *   pDC - Pointer to the DC coefficient of this block, NULL if it doesn't 
+ *            exist 
+ *   QP - QP Quantization parameter.  It should be QpC in chroma 4x4 block 
+ *            decoding, otherwise it should be QpY. 
+ *   AC - Flag indicating if at least one non-zero AC coefficient exists 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the reconstructed 4x4 block data; must be aligned on a 
+ *            4-byte boundary 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    pPred or pDst is NULL. 
+ *    -    pPred or pDst is not 4-byte aligned. 
+ *    -    predStep or dstStep is not a multiple of 4. 
+ *    -    AC !=0 and Qp is not in the range of [0-51] or ppSrc == NULL. 
+ *    -    AC ==0 && pDC ==NULL. 
+ *
+ */
+OMXResult omxVCM4P10_DequantTransformResidualFromPairAndAdd (
+    const OMX_U8 **ppSrc,
+    const OMX_U8 *pPred,
+    const OMX_S16 *pDC,
+    OMX_U8 *pDst,
+    OMX_INT predStep,
+    OMX_INT dstStep,
+    OMX_INT QP,
+    OMX_INT AC
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_MEGetBufSize   (6.3.5.1.1)
+ *
+ * Description:
+ * Computes the size, in bytes, of the vendor-specific specification 
+ * structure for the omxVCM4P10 motion estimation functions BlockMatch_Integer 
+ * and MotionEstimationMB. 
+ *
+ * Input Arguments:
+ *   
+ *   MEmode - motion estimation mode; available modes are defined by the 
+ *            enumerated type OMXVCM4P10MEMode 
+ *   pMEParams -motion estimation parameters 
+ *
+ * Output Arguments:
+ *   
+ *   pSize - pointer to the number of bytes required for the motion 
+ *            estimation specification structure 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    pMEParams or pSize is NULL. 
+ *    -    an invalid MEMode is specified. 
+ *
+ */
+OMXResult omxVCM4P10_MEGetBufSize (
+    OMXVCM4P10MEMode MEmode,
+    const OMXVCM4P10MEParams *pMEParams,
+    OMX_U32 *pSize
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_MEInit   (6.3.5.1.2)
+ *
+ * Description:
+ * Initializes the vendor-specific specification structure required for the 
+ * omxVCM4P10 motion estimation functions:  BlockMatch_Integer and 
+ * MotionEstimationMB. Memory for the specification structure *pMESpec must be 
+ * allocated prior to calling the function, and should be aligned on a 4-byte 
+ * boundary.  The number of bytes required for the specification structure can 
+ * be determined using the function omxVCM4P10_MEGetBufSize. Following 
+ * initialization by this function, the vendor-specific structure *pMESpec 
+ * should contain an implementation-specific representation of all motion 
+ * estimation parameters received via the structure pMEParams, for example  
+ * searchRange16x16, searchRange8x8, etc. 
+ *
+ * Input Arguments:
+ *   
+ *   MEmode - motion estimation mode; available modes are defined by the 
+ *            enumerated type OMXVCM4P10MEMode 
+ *   pMEParams - motion estimation parameters 
+ *   pMESpec - pointer to the uninitialized ME specification structure 
+ *
+ * Output Arguments:
+ *   
+ *   pMESpec - pointer to the initialized ME specification structure 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    pMEParams or pSize is NULL. 
+ *    -    an invalid value was specified for the parameter MEmode 
+ *    -    a negative or zero value was specified for one of the search ranges 
+ *         (e.g.,  pMBParams >searchRange8x8, pMEParams->searchRange16x16, etc.) 
+ *    -    either in isolation or in combination, one or more of the enables or 
+ *         search ranges in the structure *pMEParams were configured such 
+ *         that the requested behavior fails to comply with [ISO14496-10]. 
+ *
+ */
+OMXResult omxVCM4P10_MEInit (
+    OMXVCM4P10MEMode MEmode,
+    const OMXVCM4P10MEParams *pMEParams,
+    void *pMESpec
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_BlockMatch_Integer   (6.3.5.2.1)
+ *
+ * Description:
+ * Performs integer block match.  Returns best MV and associated cost. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrgY - Pointer to the top-left corner of the current block:
+ *            If iBlockWidth==4,  4-byte alignment required. 
+ *            If iBlockWidth==8,  8-byte alignment required. 
+ *            If iBlockWidth==16, 16-byte alignment required. 
+ *   pSrcRefY - Pointer to the top-left corner of the co-located block in the 
+ *            reference picture: 
+ *            If iBlockWidth==4,  4-byte alignment required.  
+ *            If iBlockWidth==8,  8-byte alignment required.  
+ *            If iBlockWidth==16, 16-byte alignment required. 
+ *   nSrcOrgStep - Stride of the original picture plane, expressed in terms 
+ *            of integer pixels; must be a multiple of iBlockWidth. 
+ *   nSrcRefStep - Stride of the reference picture plane, expressed in terms 
+ *            of integer pixels 
+ *   pRefRect - pointer to the valid reference rectangle inside the reference 
+ *            picture plane 
+ *   nCurrPointPos - position of the current block in the current plane 
+ *   iBlockWidth - Width of the current block, expressed in terms of integer 
+ *            pixels; must be equal to either 4, 8, or 16. 
+ *   iBlockHeight - Height of the current block, expressed in terms of 
+ *            integer pixels; must be equal to either 4, 8, or 16. 
+ *   nLamda - Lamda factor; used to compute motion cost 
+ *   pMVPred - Predicted MV; used to compute motion cost, expressed in terms 
+ *            of 1/4-pel units 
+ *   pMVCandidate - Candidate MV; used to initialize the motion search, 
+ *            expressed in terms of integer pixels 
+ *   pMESpec - pointer to the ME specification structure 
+ *
+ * Output Arguments:
+ *   
+ *   pDstBestMV - Best MV resulting from integer search, expressed in terms 
+ *            of 1/4-pel units 
+ *   pBestCost - Motion cost associated with the best MV; computed as 
+ *            SAD+Lamda*BitsUsedByMV 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    any of the following pointers are NULL:
+ *         pSrcOrgY, pSrcRefY, pRefRect, pMVPred, pMVCandidate, or pMESpec. 
+ *    -    Either iBlockWidth or iBlockHeight are values other than 4, 8, or 16. 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Integer (
+    const OMX_U8 *pSrcOrgY,
+    OMX_S32 nSrcOrgStep,
+    const OMX_U8 *pSrcRefY,
+    OMX_S32 nSrcRefStep,
+    const OMXRect *pRefRect,
+    const OMXVCM4P2Coordinate *pCurrPointPos,
+    OMX_U8 iBlockWidth,
+    OMX_U8 iBlockHeight,
+    OMX_U32 nLamda,
+    const OMXVCMotionVector *pMVPred,
+    const OMXVCMotionVector *pMVCandidate,
+    OMXVCMotionVector *pBestMV,
+    OMX_S32 *pBestCost,
+    void *pMESpec
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_BlockMatch_Half   (6.3.5.2.2)
+ *
+ * Description:
+ * Performs a half-pel block match using results from a prior integer search. 
+ *  Returns the best MV and associated cost.  This function estimates the 
+ * half-pixel motion vector by interpolating the integer resolution motion 
+ * vector referenced by the input parameter pSrcDstBestMV, i.e., the initial 
+ * integer MV is generated externally.  The function 
+ * omxVCM4P10_BlockMatch_Integer may be used for integer motion estimation. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrgY - Pointer to the current position in original picture plane:
+ *              If iBlockWidth==4,  4-byte alignment required. 
+ *              If iBlockWidth==8,  8-byte alignment required. 
+ *              If iBlockWidth==16, 16-byte alignment required. 
+ *   pSrcRefY - Pointer to the top-left corner of the co-located block in the 
+ *            reference picture:  
+ *              If iBlockWidth==4,  4-byte alignment required.  
+ *              If iBlockWidth==8,  8-byte alignment required.  
+ *              If iBlockWidth==16, 16-byte alignment required. 
+ *   nSrcOrgStep - Stride of the original picture plane in terms of full 
+ *            pixels; must be a multiple of iBlockWidth. 
+ *   nSrcRefStep - Stride of the reference picture plane in terms of full 
+ *            pixels 
+ *   iBlockWidth - Width of the current block in terms of full pixels; must 
+ *            be equal to either 4, 8, or 16. 
+ *   iBlockHeight - Height of the current block in terms of full pixels; must 
+ *            be equal to either 4, 8, or 16. 
+ *   nLamda - Lamda factor, used to compute motion cost 
+ *   pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to 
+ *            compute motion cost 
+ *   pSrcDstBestMV - The best MV resulting from a prior integer search, 
+ *            represented in terms of 1/4-pel units 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstBestMV - Best MV resulting from the half-pel search, expressed in 
+ *            terms of 1/4-pel units 
+ *   pBestCost - Motion cost associated with the best MV; computed as 
+ *            SAD+Lamda*BitsUsedByMV 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    any of the following pointers is NULL: pSrcOrgY, pSrcRefY, 
+ *              pSrcDstBestMV, pMVPred, pBestCost 
+ *    -    iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16. 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Half (
+    const OMX_U8 *pSrcOrgY,
+    OMX_S32 nSrcOrgStep,
+    const OMX_U8 *pSrcRefY,
+    OMX_S32 nSrcRefStep,
+    OMX_U8 iBlockWidth,
+    OMX_U8 iBlockHeight,
+    OMX_U32 nLamda,
+    const OMXVCMotionVector *pMVPred,
+    OMXVCMotionVector *pSrcDstBestMV,
+    OMX_S32 *pBestCost
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_BlockMatch_Quarter   (6.3.5.2.3)
+ *
+ * Description:
+ * Performs a quarter-pel block match using results from a prior half-pel 
+ * search.  Returns the best MV and associated cost.  This function estimates 
+ * the quarter-pixel motion vector by interpolating the half-pel resolution 
+ * motion vector referenced by the input parameter pSrcDstBestMV, i.e., the 
+ * initial half-pel MV is generated externally.  The function 
+ * omxVCM4P10_BlockMatch_Half may be used for half-pel motion estimation. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrgY - Pointer to the current position in original picture plane:
+ *            If iBlockWidth==4,  4-byte alignment required. 
+ *            If iBlockWidth==8,  8-byte alignment required. 
+ *            If iBlockWidth==16, 16-byte alignment required. 
+ *   pSrcRefY - Pointer to the top-left corner of the co-located block in the 
+ *            reference picture:
+ *            If iBlockWidth==4,  4-byte alignment required.  
+ *            If iBlockWidth==8,  8-byte alignment required.  
+ *            If iBlockWidth==16, 16-byte alignment required. 
+ *   nSrcOrgStep - Stride of the original picture plane in terms of full 
+ *            pixels; must be a multiple of iBlockWidth. 
+ *   nSrcRefStep - Stride of the reference picture plane in terms of full 
+ *            pixels 
+ *   iBlockWidth - Width of the current block in terms of full pixels; must 
+ *            be equal to either 4, 8, or 16. 
+ *   iBlockHeight - Height of the current block in terms of full pixels; must 
+ *            be equal to either 4, 8, or 16. 
+ *   nLamda - Lamda factor, used to compute motion cost 
+ *   pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to 
+ *            compute motion cost 
+ *   pSrcDstBestMV - The best MV resulting from a prior half-pel search, 
+ *            represented in terms of 1/4 pel units 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstBestMV - Best MV resulting from the quarter-pel search, expressed 
+ *            in terms of 1/4-pel units 
+ *   pBestCost - Motion cost associated with the best MV; computed as 
+ *            SAD+Lamda*BitsUsedByMV 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    One or more of the following pointers is NULL: 
+ *         pSrcOrgY, pSrcRefY, pSrcDstBestMV, pMVPred, pBestCost 
+ *    -    iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16. 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Quarter (
+    const OMX_U8 *pSrcOrgY,
+    OMX_S32 nSrcOrgStep,
+    const OMX_U8 *pSrcRefY,
+    OMX_S32 nSrcRefStep,
+    OMX_U8 iBlockWidth,
+    OMX_U8 iBlockHeight,
+    OMX_U32 nLamda,
+    const OMXVCMotionVector *pMVPred,
+    OMXVCMotionVector *pSrcDstBestMV,
+    OMX_S32 *pBestCost
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_MotionEstimationMB   (6.3.5.3.1)
+ *
+ * Description:
+ * Performs MB-level motion estimation and selects best motion estimation 
+ * strategy from the set of modes supported in baseline profile [ISO14496-10]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcCurrBuf - Pointer to the current position in original picture plane; 
+ *            16-byte alignment required 
+ *   pSrcRefBufList - Pointer to an array with 16 entries.  Each entry points 
+ *            to the top-left corner of the co-located MB in a reference 
+ *            picture.  The array is filled from low-to-high with valid 
+ *            reference frame pointers; the unused high entries should be set 
+ *            to NULL.  Ordering of the reference frames should follow 
+ *            [ISO14496-10] subclause 8.2.4  Decoding Process for Reference 
+ *            Picture Lists.   The entries must be 16-byte aligned. 
+ *   pSrcRecBuf - Pointer to the top-left corner of the co-located MB in the 
+ *            reconstructed picture; must be 16-byte aligned. 
+ *   SrcCurrStep - Width of the original picture plane in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   SrcRefStep - Width of the reference picture plane in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   SrcRecStep - Width of the reconstructed picture plane in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   pRefRect - Pointer to the valid reference rectangle; relative to the 
+ *            image origin. 
+ *   pCurrPointPos - Position of the current macroblock in the current plane. 
+ *   Lambda - Lagrange factor for computing the cost function 
+ *   pMESpec - Pointer to the motion estimation specification structure; must 
+ *            have been allocated and initialized prior to calling this 
+ *            function. 
+ *   pMBInter - Array, of dimension four, containing pointers to information 
+ *            associated with four adjacent type INTER MBs (Left, Top, 
+ *            Top-Left, Top-Right). Any pointer in the array may be set equal 
+ *            to NULL if the corresponding MB doesn t exist or is not of type 
+ *            INTER. 
+ *            -  pMBInter[0] - Pointer to left MB information 
+ *            -  pMBInter[1] - Pointer to top MB information 
+ *            -  pMBInter[2] - Pointer to top-left MB information 
+ *            -  pMBInter[3] - Pointer to top-right MB information 
+ *   pMBIntra - Array, of dimension four, containing pointers to information 
+ *            associated with four adjacent type INTRA MBs (Left, Top, 
+ *            Top-Left, Top-Right). Any pointer in the array may be set equal 
+ *            to NULL if the corresponding MB doesn t exist or is not of type 
+ *            INTRA. 
+ *            -  pMBIntra[0] - Pointer to left MB information 
+ *            -  pMBIntra[1] - Pointer to top MB information 
+ *            -  pMBIntra[2] - Pointer to top-left MB information 
+ *            -  pMBIntra[3] - Pointer to top-right MB information 
+ *   pSrcDstMBCurr - Pointer to information structure for the current MB.  
+ *            The following entries should be set prior to calling the 
+ *            function:  sliceID - the number of the slice the to which the 
+ *            current MB belongs. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstCost - Pointer to the minimum motion cost for the current MB. 
+ *   pDstBlockSAD - Pointer to the array of SADs for each of the sixteen luma 
+ *            4x4 blocks in each MB.  The block SADs are in scan order for 
+ *            each MB.  For implementations that cannot compute the SAD values 
+ *            individually, the maximum possible value (0xffff) is returned 
+ *            for each of the 16 block SAD entries. 
+ *   pSrcDstMBCurr - Pointer to updated information structure for the current 
+ *            MB after MB-level motion estimation has been completed.  The 
+ *            following fields are updated by the ME function.   The following 
+ *            parameter set quantifies the MB-level ME search results: 
+ *            -  MbType 
+ *            -  subMBType[4] 
+ *            -  pMV0[4][4] 
+ *            -  pMVPred[4][4] 
+ *            -  pRefL0Idx[4] 
+ *            -  Intra16x16PredMode 
+ *            -  pIntra4x4PredMode[4][4] 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -   One or more of the following pointers is NULL: pSrcCurrBuf, 
+ *           pSrcRefBufList, pSrcRecBuf, pRefRect, pCurrPointPos, pMESpec, 
+ *           pMBInter, pMBIntra,pSrcDstMBCurr, pDstCost, pSrcRefBufList[0] 
+ *    -    SrcRefStep, SrcRecStep are not multiples of 16 
+ *    -    iBlockWidth or iBlockHeight are values other than 4, 8, or 16. 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_MotionEstimationMB (
+    const OMX_U8 *pSrcCurrBuf,
+    OMX_S32 SrcCurrStep,
+    const OMX_U8 *pSrcRefBufList[15],
+    OMX_S32 SrcRefStep,
+    const OMX_U8 *pSrcRecBuf,
+    OMX_S32 SrcRecStep,
+    const OMXRect *pRefRect,
+    const OMXVCM4P2Coordinate *pCurrPointPos,
+    OMX_U32 Lambda,
+    void *pMESpec,
+    const OMXVCM4P10MBInfoPtr *pMBInter,
+    const OMXVCM4P10MBInfoPtr *pMBIntra,
+    OMXVCM4P10MBInfoPtr pSrcDstMBCurr,
+    OMX_INT *pDstCost,
+    OMX_U16 *pDstBlockSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_SAD_4x   (6.3.5.4.1)
+ *
+ * Description:
+ * This function calculates the SAD for 4x8 and 4x4 blocks. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg -Pointer to the original block; must be aligned on a 4-byte 
+ *            boundary. 
+ *   iStepOrg -Step of the original block buffer; must be a multiple of 4. 
+ *   pSrcRef -Pointer to the reference block 
+ *   iStepRef -Step of the reference block buffer 
+ *   iHeight -Height of the block; must be equal to either 4 or 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD -Pointer of result SAD 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    One or more of the following pointers is NULL: 
+ *         pSrcOrg, pSrcRef, or pDstSAD 
+ *    -    iHeight is not equal to either 4 or 8. 
+ *    -    iStepOrg is not a multiple of 4 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_SAD_4x (
+    const OMX_U8 *pSrcOrg,
+    OMX_U32 iStepOrg,
+    const OMX_U8 *pSrcRef,
+    OMX_U32 iStepRef,
+    OMX_S32 *pDstSAD,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_SADQuar_4x   (6.3.5.4.2)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average 
+ * of the other two (pSrcRef0 and pSrcRef1) for 4x8 or 4x4 blocks.  Rounding 
+ * is applied according to the convention (a+b+1)>>1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the original block; must be aligned on a 4-byte 
+ *            boundary. 
+ *   pSrcRef0 - Pointer to reference block 0 
+ *   pSrcRef1 - Pointer to reference block 1 
+ *   iSrcStep - Step of the original block buffer; must be a multiple of 4. 
+ *   iRefStep0 - Step of reference block 0 
+ *   iRefStep1 - Step of reference block 1 
+ *   iHeight - Height of the block; must be equal to either 4 or 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD - Pointer of result SAD 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    iHeight is not equal to either 4 or 8. 
+ *    -    One or more of the following pointers is NULL: pSrc, pSrcRef0, 
+ *              pSrcRef1, pDstSAD. 
+ *    -    iSrcStep is not a multiple of 4 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_4x (
+    const OMX_U8 *pSrc,
+    const OMX_U8 *pSrcRef0,
+    const OMX_U8 *pSrcRef1,
+    OMX_U32 iSrcStep,
+    OMX_U32 iRefStep0,
+    OMX_U32 iRefStep1,
+    OMX_U32 *pDstSAD,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_SADQuar_8x   (6.3.5.4.3)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average 
+ * of the other two (pSrcRef0 and pSrcRef1) for 8x16, 8x8, or 8x4 blocks.  
+ * Rounding is applied according to the convention (a+b+1)>>1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the original block; must be aligned on an 8-byte 
+ *            boundary. 
+ *   pSrcRef0 - Pointer to reference block 0 
+ *   pSrcRef1 - Pointer to reference block 1 
+ *   iSrcStep - Step of the original block buffer; must be a multiple of 8. 
+ *   iRefStep0 - Step of reference block 0 
+ *   iRefStep1 - Step of reference block 1 
+ *   iHeight - Height of the block; must be equal either 4, 8, or 16. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD - Pointer of result SAD 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    iHeight is not equal to either 4, 8, or 16. 
+ *    -    One or more of the following pointers is NULL: pSrc, pSrcRef0, 
+ *              pSrcRef1, pDstSAD. 
+ *    -    iSrcStep is not a multiple of 8 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_8x (
+    const OMX_U8 *pSrc,
+    const OMX_U8 *pSrcRef0,
+    const OMX_U8 *pSrcRef1,
+    OMX_U32 iSrcStep,
+    OMX_U32 iRefStep0,
+    OMX_U32 iRefStep1,
+    OMX_U32 *pDstSAD,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_SADQuar_16x   (6.3.5.4.4)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average 
+ * of the other two (pSrcRef0 and pSrcRef1) for 16x16 or 16x8 blocks.  
+ * Rounding is applied according to the convention (a+b+1)>>1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the original block; must be aligned on a 16-byte 
+ *            boundary. 
+ *   pSrcRef0 - Pointer to reference block 0 
+ *   pSrcRef1 - Pointer to reference block 1 
+ *   iSrcStep - Step of the original block buffer; must be a multiple of 16 
+ *   iRefStep0 - Step of reference block 0 
+ *   iRefStep1 - Step of reference block 1 
+ *   iHeight - Height of the block; must be equal to either 8 or 16 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD -Pointer of result SAD 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    iHeight is not equal to either 8 or 16. 
+ *    -    One or more of the following pointers is NULL: pSrc, pSrcRef0, 
+ *              pSrcRef1, pDstSAD. 
+ *    -    iSrcStep is not a multiple of 16 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_16x (
+    const OMX_U8 *pSrc,
+    const OMX_U8 *pSrcRef0,
+    const OMX_U8 *pSrcRef1,
+    OMX_U32 iSrcStep,
+    OMX_U32 iRefStep0,
+    OMX_U32 iRefStep1,
+    OMX_U32 *pDstSAD,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_SATD_4x4   (6.3.5.4.5)
+ *
+ * Description:
+ * This function calculates the sum of absolute transform differences (SATD) 
+ * for a 4x4 block by applying a Hadamard transform to the difference block 
+ * and then calculating the sum of absolute coefficient values. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg - Pointer to the original block; must be aligned on a 4-byte 
+ *            boundary 
+ *   iStepOrg - Step of the original block buffer; must be a multiple of 4 
+ *   pSrcRef - Pointer to the reference block; must be aligned on a 4-byte 
+ *            boundary 
+ *   iStepRef - Step of the reference block buffer; must be a multiple of 4 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD - pointer to the resulting SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcOrg, pSrcRef, or pDstSAD either pSrcOrg 
+ *    -    pSrcRef is not aligned on a 4-byte boundary 
+ *    -    iStepOrg <= 0 or iStepOrg is not a multiple of 4 
+ *    -    iStepRef <= 0 or iStepRef is not a multiple of 4 
+ *
+ */
+OMXResult omxVCM4P10_SATD_4x4 (
+    const OMX_U8 *pSrcOrg,
+    OMX_U32 iStepOrg,
+    const OMX_U8 *pSrcRef,
+    OMX_U32 iStepRef,
+    OMX_U32 *pDstSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InterpolateHalfHor_Luma   (6.3.5.5.1)
+ *
+ * Description:
+ * This function performs interpolation for two horizontal 1/2-pel positions 
+ * (-1/2,0) and (1/2, 0) - around a full-pel position. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the top-left corner of the block used to interpolate in 
+ *            the reconstruction frame plane. 
+ *   iSrcStep - Step of the source buffer. 
+ *   iDstStep - Step of the destination(interpolation) buffer; must be a 
+ *            multiple of iWidth. 
+ *   iWidth - Width of the current block; must be equal to either 4, 8, or 16 
+ *   iHeight - Height of the current block; must be equal to 4, 8, or 16 
+ *
+ * Output Arguments:
+ *   
+ *   pDstLeft -Pointer to the interpolation buffer of the left -pel position 
+ *            (-1/2, 0) 
+ *                 If iWidth==4,  4-byte alignment required. 
+ *                 If iWidth==8,  8-byte alignment required. 
+ *                 If iWidth==16, 16-byte alignment required. 
+ *   pDstRight -Pointer to the interpolation buffer of the right -pel 
+ *            position (1/2, 0) 
+ *                 If iWidth==4,  4-byte alignment required. 
+ *                 If iWidth==8,  8-byte alignment required. 
+ *                 If iWidth==16, 16-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *             pSrc, pDstLeft, or pDstRight 
+ *    -    iWidth or iHeight have values other than 4, 8, or 16 
+ *    -    iWidth==4 but pDstLeft and/or pDstRight is/are not aligned on a 4-byte boundary 
+ *    -    iWidth==8 but pDstLeft and/or pDstRight is/are not aligned on a 8-byte boundary 
+ *    -    iWidth==16 but pDstLeft and/or pDstRight is/are not aligned on a 16-byte boundary 
+ *    -    any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_InterpolateHalfHor_Luma (
+    const OMX_U8 *pSrc,
+    OMX_U32 iSrcStep,
+    OMX_U8 *pDstLeft,
+    OMX_U8 *pDstRight,
+    OMX_U32 iDstStep,
+    OMX_U32 iWidth,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InterpolateHalfVer_Luma   (6.3.5.5.2)
+ *
+ * Description:
+ * This function performs interpolation for two vertical 1/2-pel positions - 
+ * (0, -1/2) and (0, 1/2) - around a full-pel position. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to top-left corner of block used to interpolate in the 
+ *            reconstructed frame plane 
+ *   iSrcStep - Step of the source buffer. 
+ *   iDstStep - Step of the destination (interpolation) buffer; must be a 
+ *            multiple of iWidth. 
+ *   iWidth - Width of the current block; must be equal to either 4, 8, or 16 
+ *   iHeight - Height of the current block; must be equal to either 4, 8, or 16 
+ *
+ * Output Arguments:
+ *   
+ *   pDstUp -Pointer to the interpolation buffer of the -pel position above 
+ *            the current full-pel position (0, -1/2) 
+ *                If iWidth==4, 4-byte alignment required. 
+ *                If iWidth==8, 8-byte alignment required. 
+ *                If iWidth==16, 16-byte alignment required. 
+ *   pDstDown -Pointer to the interpolation buffer of the -pel position below 
+ *            the current full-pel position (0, 1/2) 
+ *                If iWidth==4, 4-byte alignment required. 
+ *                If iWidth==8, 8-byte alignment required. 
+ *                If iWidth==16, 16-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *            pSrc, pDstUp, or pDstDown 
+ *    -    iWidth or iHeight have values other than 4, 8, or 16 
+ *    -    iWidth==4 but pDstUp and/or pDstDown is/are not aligned on a 4-byte boundary 
+ *    -    iWidth==8 but pDstUp and/or pDstDown is/are not aligned on a 8-byte boundary 
+ *    -    iWidth==16 but pDstUp and/or pDstDown is/are not aligned on a 16-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_InterpolateHalfVer_Luma (
+    const OMX_U8 *pSrc,
+    OMX_U32 iSrcStep,
+    OMX_U8 *pDstUp,
+    OMX_U8 *pDstDown,
+    OMX_U32 iDstStep,
+    OMX_U32 iWidth,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_Average_4x   (6.3.5.5.3)
+ *
+ * Description:
+ * This function calculates the average of two 4x4, 4x8 blocks.  The result 
+ * is rounded according to (a+b+1)/2. 
+ *
+ * Input Arguments:
+ *   
+ *   pPred0 - Pointer to the top-left corner of reference block 0 
+ *   pPred1 - Pointer to the top-left corner of reference block 1 
+ *   iPredStep0 - Step of reference block 0; must be a multiple of 4. 
+ *   iPredStep1 - Step of reference block 1; must be a multiple of 4. 
+ *   iDstStep - Step of the destination buffer; must be a multiple of 4. 
+ *   iHeight - Height of the blocks; must be either 4 or 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstPred - Pointer to the destination buffer. 4-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *           pPred0, pPred1, or pDstPred 
+ *    -    pDstPred is not aligned on a 4-byte boundary 
+ *    -    iPredStep0 <= 0 or iPredStep0 is not a multiple of 4 
+ *    -    iPredStep1 <= 0 or iPredStep1 is not a multiple of 4 
+ *    -    iDstStep <= 0 or iDstStep is not a multiple of 4 
+ *    -    iHeight is not equal to either 4 or 8 
+ *
+ */
+OMXResult omxVCM4P10_Average_4x (
+    const OMX_U8 *pPred0,
+    const OMX_U8 *pPred1,
+    OMX_U32 iPredStep0,
+    OMX_U32 iPredStep1,
+    OMX_U8 *pDstPred,
+    OMX_U32 iDstStep,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_TransformQuant_ChromaDC   (6.3.5.6.1)
+ *
+ * Description:
+ * This function performs 2x2 Hadamard transform of chroma DC coefficients 
+ * and then quantizes the coefficients. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - Pointer to the 2x2 array of chroma DC coefficients.  8-byte 
+ *            alignment required. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *   bIntra - Indicate whether this is an INTRA block. 1-INTRA, 0-INTER 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - Pointer to transformed and quantized coefficients.  8-byte 
+ *            alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *             pSrcDst 
+ *    -    pSrcDst is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_TransformQuant_ChromaDC (
+    OMX_S16 *pSrcDst,
+    OMX_U32 iQP,
+    OMX_U8 bIntra
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_TransformQuant_LumaDC   (6.3.5.6.2)
+ *
+ * Description:
+ * This function performs a 4x4 Hadamard transform of luma DC coefficients 
+ * and then quantizes the coefficients. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - Pointer to the 4x4 array of luma DC coefficients.  16-byte 
+ *            alignment required. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - Pointer to transformed and quantized coefficients.  16-byte 
+ *             alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: pSrcDst 
+ *    -    pSrcDst is not aligned on an 16-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_TransformQuant_LumaDC (
+    OMX_S16 *pSrcDst,
+    OMX_U32 iQP
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InvTransformDequant_LumaDC   (6.3.5.6.3)
+ *
+ * Description:
+ * This function performs inverse 4x4 Hadamard transform and then dequantizes 
+ * the coefficients. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the 4x4 array of the 4x4 Hadamard-transformed and 
+ *            quantized coefficients.  16 byte alignment required. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to inverse-transformed and dequantized coefficients.  
+ *            16-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: pSrc 
+ *    -    pSrc or pDst is not aligned on a 16-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_InvTransformDequant_LumaDC (
+    const OMX_S16 *pSrc,
+    OMX_S16 *pDst,
+    OMX_U32 iQP
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InvTransformDequant_ChromaDC   (6.3.5.6.4)
+ *
+ * Description:
+ * This function performs inverse 2x2 Hadamard transform and then dequantizes 
+ * the coefficients. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the 2x2 array of the 2x2 Hadamard-transformed and 
+ *            quantized coefficients.  8 byte alignment required. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to inverse-transformed and dequantized coefficients.  
+ *            8-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: pSrc 
+ *    -    pSrc or pDst is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_InvTransformDequant_ChromaDC (
+    const OMX_S16 *pSrc,
+    OMX_S16 *pDst,
+    OMX_U32 iQP
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InvTransformResidualAndAdd   (6.3.5.7.1)
+ *
+ * Description:
+ * This function performs inverse an 4x4 integer transformation to produce 
+ * the difference signal and then adds the difference to the prediction to get 
+ * the reconstructed signal. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcPred - Pointer to prediction signal.  4-byte alignment required. 
+ *   pDequantCoeff - Pointer to the transformed coefficients.  8-byte 
+ *            alignment required. 
+ *   iSrcPredStep - Step of the prediction buffer; must be a multiple of 4. 
+ *   iDstReconStep - Step of the destination reconstruction buffer; must be a 
+ *            multiple of 4. 
+ *   bAC - Indicate whether there is AC coefficients in the coefficients 
+ *            matrix. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstRecon -Pointer to the destination reconstruction buffer.  4-byte 
+ *            alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *            pSrcPred, pDequantCoeff, pDstRecon 
+ *    -    pSrcPred is not aligned on a 4-byte boundary 
+ *    -    iSrcPredStep or iDstReconStep is not a multiple of 4. 
+ *    -    pDequantCoeff is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_InvTransformResidualAndAdd (
+    const OMX_U8 *pSrcPred,
+    const OMX_S16 *pDequantCoeff,
+    OMX_U8 *pDstRecon,
+    OMX_U32 iSrcPredStep,
+    OMX_U32 iDstReconStep,
+    OMX_U8 bAC
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_SubAndTransformQDQResidual   (6.3.5.8.1)
+ *
+ * Description:
+ * This function subtracts the prediction signal from the original signal to 
+ * produce the difference signal and then performs a 4x4 integer transform and 
+ * quantization. The quantized transformed coefficients are stored as 
+ * pDstQuantCoeff. This function can also output dequantized coefficients or 
+ * unquantized DC coefficients optionally by setting the pointers 
+ * pDstDeQuantCoeff, pDCCoeff. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg - Pointer to original signal. 4-byte alignment required. 
+ *   pSrcPred - Pointer to prediction signal. 4-byte alignment required. 
+ *   iSrcOrgStep - Step of the original signal buffer; must be a multiple of 
+ *            4. 
+ *   iSrcPredStep - Step of the prediction signal buffer; must be a multiple 
+ *            of 4. 
+ *   pNumCoeff -Number of non-zero coefficients after quantization. If this 
+ *            parameter is not required, it is set to NULL. 
+ *   nThreshSAD - Zero-block early detection threshold. If this parameter is 
+ *            not required, it is set to 0. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *   bIntra - Indicates whether this is an INTRA block, either 1-INTRA or 
+ *            0-INTER 
+ *
+ * Output Arguments:
+ *   
+ *   pDstQuantCoeff - Pointer to the quantized transformed coefficients.  
+ *            8-byte alignment required. 
+ *   pDstDeQuantCoeff - Pointer to the dequantized transformed coefficients 
+ *            if this parameter is not equal to NULL.  8-byte alignment 
+ *            required. 
+ *   pDCCoeff - Pointer to the unquantized DC coefficient if this parameter 
+ *            is not equal to NULL. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *            pSrcOrg, pSrcPred, pNumCoeff, pDstQuantCoeff, 
+ *            pDstDeQuantCoeff, pDCCoeff 
+ *    -    pSrcOrg is not aligned on a 4-byte boundary 
+ *    -    pSrcPred is not aligned on a 4-byte boundary 
+ *    -    iSrcOrgStep is not a multiple of 4 
+ *    -    iSrcPredStep is not a multiple of 4 
+ *    -    pDstQuantCoeff or pDstDeQuantCoeff is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_SubAndTransformQDQResidual (
+    const OMX_U8 *pSrcOrg,
+    const OMX_U8 *pSrcPred,
+    OMX_U32 iSrcOrgStep,
+    OMX_U32 iSrcPredStep,
+    OMX_S16 *pDstQuantCoeff,
+    OMX_S16 *pDstDeQuantCoeff,
+    OMX_S16 *pDCCoeff,
+    OMX_S8 *pNumCoeff,
+    OMX_U32 nThreshSAD,
+    OMX_U32 iQP,
+    OMX_U8 bIntra
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_GetVLCInfo   (6.3.5.9.1)
+ *
+ * Description:
+ * This function extracts run-length encoding (RLE) information from the 
+ * coefficient matrix.  The results are returned in an OMXVCM4P10VLCInfo 
+ * structure. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcCoeff - pointer to the transform coefficient matrix.  8-byte 
+ *            alignment required. 
+ *   pScanMatrix - pointer to the scan order definition matrix.  For a luma 
+ *            block the scan matrix should follow [ISO14496-10] section 8.5.4, 
+ *            and should contain the values 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 
+ *            10, 7, 11, 14, 15.  For a chroma block, the scan matrix should 
+ *            contain the values 0, 1, 2, 3. 
+ *   bAC - indicates presence of a DC coefficient; 0 = DC coefficient 
+ *            present, 1= DC coefficient absent. 
+ *   MaxNumCoef - specifies the number of coefficients contained in the 
+ *            transform coefficient matrix, pSrcCoeff. The value should be 16 
+ *            for blocks of type LUMADC, LUMAAC, LUMALEVEL, and CHROMAAC. The 
+ *            value should be 4 for blocks of type CHROMADC. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstVLCInfo - pointer to structure that stores information for 
+ *            run-length coding. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *            pSrcCoeff, pScanMatrix, pDstVLCInfo 
+ *    -    pSrcCoeff is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_GetVLCInfo (
+    const OMX_S16 *pSrcCoeff,
+    const OMX_U8 *pScanMatrix,
+    OMX_U8 bAC,
+    OMX_U32 MaxNumCoef,
+    OMXVCM4P10VLCInfo*pDstVLCInfo
+);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /** end of #define _OMXVC_H_ */
+
+/** EOF */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC_s.h
new file mode 100755
index 0000000..89f3040
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC_s.h
@@ -0,0 +1,129 @@
+;/******************************************************************************
+;// Copyright (c) 1999-2005 The Khronos Group Inc. All Rights Reserved
+;//
+;//
+;//
+;//
+;//
+;//
+;//
+;//
+;******************************************************************************/
+
+;/** =============== Structure Definition for Sample Generation ============== */
+;/** transparent status */
+
+;enum {
+OMX_VIDEO_TRANSPARENT	EQU 0;	/** Wholly transparent */
+OMX_VIDEO_PARTIAL		EQU 1;	/** Partially transparent */
+OMX_VIDEO_OPAQUE		EQU 2;	/** Opaque */
+;}
+
+;/** direction */
+;enum {
+OMX_VIDEO_NONE			EQU 0;
+OMX_VIDEO_HORIZONTAL	EQU 1;
+OMX_VIDEO_VERTICAL		EQU 2;
+;}
+
+;/** bilinear interpolation type */
+;enum {
+OMX_VIDEO_INTEGER_PIXEL EQU 0;	/** case a */
+OMX_VIDEO_HALF_PIXEL_X  EQU 1;	/** case b */
+OMX_VIDEO_HALF_PIXEL_Y  EQU 2;	/** case c */
+OMX_VIDEO_HALF_PIXEL_XY EQU 3;	/** case d */
+;}
+
+;enum {
+OMX_UPPER  				EQU 1;		/** set if the above macroblock is available */
+OMX_LEFT   				EQU 2;		/** set if the left macroblock is available */
+OMX_CENTER 				EQU 4;
+OMX_RIGHT				EQU 8;
+OMX_LOWER  				EQU	16;
+OMX_UPPER_LEFT  		EQU 32;		/** set if the above-left macroblock is available */
+OMX_UPPER_RIGHT 		EQU 64;		/** set if the above-right macroblock is available */
+OMX_LOWER_LEFT  		EQU 128;
+OMX_LOWER_RIGHT 		EQU 256
+;}
+
+;enum {
+OMX_VIDEO_LUMINANCE  	EQU 0;	/** Luminance component */
+OMX_VIDEO_CHROMINANCE  	EQU 1;	/** chrominance component */
+OMX_VIDEO_ALPHA  		EQU 2;	/** Alpha component */
+;}
+
+;enum {
+OMX_VIDEO_INTER			EQU 0;	/** P picture or P-VOP */
+OMX_VIDEO_INTER_Q		EQU 1;	/** P picture or P-VOP */
+OMX_VIDEO_INTER4V		EQU 2;	/** P picture or P-VOP */
+OMX_VIDEO_INTRA			EQU 3;	/** I and P picture; I- and P-VOP */
+OMX_VIDEO_INTRA_Q		EQU 4;	/** I and P picture; I- and P-VOP */
+OMX_VIDEO_INTER4V_Q		EQU 5;	/** P picture or P-VOP (H.263)*/
+OMX_VIDEO_DIRECT		EQU 6;	/** B picture or B-VOP (MPEG-4 only) */
+OMX_VIDEO_INTERPOLATE	EQU 7;	/** B picture or B-VOP */
+OMX_VIDEO_BACKWARD		EQU 8;	/** B picture or B-VOP */
+OMX_VIDEO_FORWARD		EQU 9;	/** B picture or B-VOP */
+OMX_VIDEO_NOTCODED		EQU 10;	/** B picture or B-VOP */
+;}
+
+;enum {
+OMX_16X16_VERT 			EQU 0;		/** Intra_16x16_Vertical (prediction mode) */
+OMX_16X16_HOR 			EQU 1;		/** Intra_16x16_Horizontal (prediction mode) */
+OMX_16X16_DC 			EQU 2;		/** Intra_16x16_DC (prediction mode) */
+OMX_16X16_PLANE 		EQU 3;	/** Intra_16x16_Plane (prediction mode) */
+;}
+
+;enum {
+OMX_4x4_VERT 			EQU 0;		/** Intra_4x4_Vertical (prediction mode) */
+OMX_4x4_HOR  			EQU 1;		/** Intra_4x4_Horizontal (prediction mode) */
+OMX_4x4_DC   			EQU 2;		/** Intra_4x4_DC (prediction mode) */
+OMX_4x4_DIAG_DL 		EQU 3;	/** Intra_4x4_Diagonal_Down_Left (prediction mode) */
+OMX_4x4_DIAG_DR 		EQU 4;	/** Intra_4x4_Diagonal_Down_Right (prediction mode) */
+OMX_4x4_VR 				EQU 5;			/** Intra_4x4_Vertical_Right (prediction mode) */
+OMX_4x4_HD 				EQU 6;			/** Intra_4x4_Horizontal_Down (prediction mode) */
+OMX_4x4_VL 				EQU 7;			/** Intra_4x4_Vertical_Left (prediction mode) */
+OMX_4x4_HU 				EQU 8;			/** Intra_4x4_Horizontal_Up (prediction mode) */
+;}
+
+;enum {
+OMX_CHROMA_DC 			EQU 0;		/** Intra_Chroma_DC (prediction mode) */
+OMX_CHROMA_HOR 			EQU 1;		/** Intra_Chroma_Horizontal (prediction mode) */
+OMX_CHROMA_VERT 		EQU 2;	/** Intra_Chroma_Vertical (prediction mode) */
+OMX_CHROMA_PLANE 		EQU 3;	/** Intra_Chroma_Plane (prediction mode) */
+;}
+
+;typedef	struct {	
+x	EQU	0;
+y	EQU	4;
+;}OMXCoordinate;
+
+;typedef struct {
+dx	EQU	0;
+dy	EQU	2;
+;}OMXMotionVector;
+
+;typedef struct {
+xx		EQU	0;
+yy		EQU	4;
+width	EQU	8;
+height	EQU	12;
+;}OMXiRect;
+
+;typedef enum {
+OMX_VC_INTER         EQU 0;        /** P picture or P-VOP */
+OMX_VC_INTER_Q       EQU 1;       /** P picture or P-VOP */
+OMX_VC_INTER4V       EQU 2;       /** P picture or P-VOP */
+OMX_VC_INTRA         EQU 3;        /** I and P picture, I- and P-VOP */
+OMX_VC_INTRA_Q       EQU 4;       /** I and P picture, I- and P-VOP */
+OMX_VC_INTER4V_Q     EQU 5;    /** P picture or P-VOP (H.263)*/
+;} OMXVCM4P2MacroblockType;
+
+;enum {
+OMX_VC_NONE          EQU 0
+OMX_VC_HORIZONTAL    EQU 1
+OMX_VC_VERTICAL      EQU 2 
+;};
+
+
+	END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy16x16_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy16x16_s.s
new file mode 100755
index 0000000..296d59d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy16x16_s.s
@@ -0,0 +1,95 @@
+ ;/**
+ ; * Function: omxVCCOMM_Copy16x16
+ ; *
+ ; * Description:
+ ; * Copies the reference 16x16 block to the current block.
+ ; * Parameters:
+ ; * [in] pSrc         - pointer to the reference block in the source frame; must be aligned on an 16-byte boundary.
+ ; * [in] step         - distance between the starts of consecutive lines in the reference frame, in bytes;
+ ; *                     must be a multiple of 16 and must be larger than or equal to 16.
+ ; * [out] pDst        - pointer to the destination block; must be aligned on an 8-byte boundary.
+ ; * Return Value:
+ ; * OMX_Sts_NoErr     - no error
+ ; * OMX_Sts_BadArgErr - bad arguments; returned under any of the following conditions:
+ ; *                   - one or more of the following pointers is NULL:  pSrc, pDst
+ ; *                   - one or more of the following pointers is not aligned on an 16-byte boundary:  pSrc, pDst
+ ; *                   - step <16 or step is not a multiple of 16.  
+ ; */
+
+   INCLUDE omxtypes_s.h
+   
+     
+     M_VARIANTS CortexA8
+     
+     IF CortexA8
+     
+     
+ ;//Input Arguments
+pSrc    RN 0        
+pDst    RN 1        
+step    RN 2
+
+;//Local Variables
+Return  RN 0
+;// Neon Registers
+
+X0      DN D0.S8 
+X1      DN D1.S8 
+X2      DN D2.S8
+X3      DN D3.S8
+X4      DN D4.S8
+X5      DN D5.S8
+X6      DN D6.S8
+X7      DN D7.S8 
+ 
+     M_START omxVCCOMM_Copy16x16
+         
+        
+        VLD1  {X0,X1},[pSrc@128],step       ;// Load 16 bytes from 16 byte aligned pSrc and pSrc=pSrc + step after loading
+        VLD1  {X2,X3},[pSrc@128],step
+        VLD1  {X4,X5},[pSrc@128],step
+        VLD1  {X6,X7},[pSrc@128],step
+        
+        VST1  {X0,X1,X2,X3},[pDst@128]!     ;// Store 32 bytes to 16 byte aligned pDst   
+        VST1  {X4,X5,X6,X7},[pDst@128]!        
+               
+         
+        VLD1  {X0,X1},[pSrc@128],step
+        VLD1  {X2,X3},[pSrc@128],step
+        VLD1  {X4,X5},[pSrc@128],step
+        VLD1  {X6,X7},[pSrc@128],step
+        
+        VST1  {X0,X1,X2,X3},[pDst@128]!
+        VST1  {X4,X5,X6,X7},[pDst@128]!
+         
+      
+        VLD1  {X0,X1},[pSrc@128],step
+        VLD1  {X2,X3},[pSrc@128],step
+        VLD1  {X4,X5},[pSrc@128],step
+        VLD1  {X6,X7},[pSrc@128],step
+        
+        VST1  {X0,X1,X2,X3},[pDst@128]!              
+        VST1  {X4,X5,X6,X7},[pDst@128]!        
+        
+        
+        VLD1  {X0,X1},[pSrc@128],step
+        VLD1  {X2,X3},[pSrc@128],step
+        VLD1  {X4,X5},[pSrc@128],step
+        VLD1  {X6,X7},[pSrc@128],step
+        
+        VST1  {X0,X1,X2,X3},[pDst@128]!
+        VST1  {X4,X5,X6,X7},[pDst@128]!
+
+        
+        MOV   Return,#OMX_Sts_NoErr
+
+     
+        
+        M_END
+        ENDIF
+
+
+
+        
+        END
+       
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy8x8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy8x8_s.s
new file mode 100755
index 0000000..db9e5ef
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy8x8_s.s
@@ -0,0 +1,70 @@
+ ;/**
+ ; * Function: omxVCCOMM_Copy8x8
+ ; *
+ ; * Description:
+ ; * Copies the reference 8x8 block to the current block.
+ ; * Parameters:
+ ; * [in] pSrc         - pointer to the reference block in the source frame; must be aligned on an 8-byte boundary.
+ ; * [in] step         - distance between the starts of consecutive lines in the reference frame, in bytes;
+ ; *                     must be a multiple of 8 and must be larger than or equal to 8.
+ ; * [out] pDst        - pointer to the destination block; must be aligned on an 8-byte boundary.
+ ; * Return Value:
+ ; * OMX_Sts_NoErr     - no error
+ ; * OMX_Sts_BadArgErr - bad arguments; returned under any of the following conditions:
+ ; *                   - one or more of the following pointers is NULL:  pSrc, pDst
+ ; *                   - one or more of the following pointers is not aligned on an 8-byte boundary:  pSrc, pDst
+ ; *                   - step <8 or step is not a multiple of 8.  
+ ; */
+
+   INCLUDE omxtypes_s.h
+   
+     
+     M_VARIANTS CortexA8
+     
+     IF CortexA8
+     
+     
+ ;//Input Arguments
+pSrc    RN 0        
+pDst    RN 1        
+step    RN 2
+
+;//Local Variables
+Count   RN 3
+Return  RN 0
+;// Neon Registers
+
+X0      DN D0.S8 
+X1      DN D1.S8
+X2      DN D2.S8
+X3      DN D3.S8
+     M_START omxVCCOMM_Copy8x8
+        
+            
+        
+        VLD1  {X0},[pSrc],step            ;// Load 8 bytes from 8 byte aligned pSrc, pSrc=pSrc+step after load
+        VLD1  {X1},[pSrc],step
+        VLD1  {X2},[pSrc],step
+        VLD1  {X3},[pSrc],step
+        
+        VST1  {X0,X1},[pDst]!            ;// Store 16 bytes to 8 byte aligned pDst  
+        VST1  {X2,X3},[pDst]!              
+        
+        VLD1  {X0},[pSrc],step
+        VLD1  {X1},[pSrc],step
+        VLD1  {X2},[pSrc],step
+        VLD1  {X3},[pSrc],step
+        
+        VST1  {X0,X1},[pDst]!              
+        VST1  {X2,X3},[pDst]!             
+                
+        MOV   Return,#OMX_Sts_NoErr
+             
+        M_END
+        ENDIF
+
+
+
+        
+        END
+        
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
new file mode 100755
index 0000000..5c5b7d8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
@@ -0,0 +1,236 @@
+;//
+;// 
+;// File Name:  omxVCCOMM_ExpandFrame_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+;// Description:
+;// This function will Expand Frame boundary pixels into Plane
+;// 
+;// 
+
+;// Include standard headers
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS CortexA8
+
+;// Import symbols required from other files
+;// (For example tables)
+    
+  
+;// Set debugging level        
+DEBUG_ON    SETL {FALSE}
+
+
+    
+    IF CortexA8
+    
+        M_START omxVCCOMM_ExpandFrame_I,r11
+
+;//Input registers
+
+pSrcDstPlane    RN  0
+iFrameWidth     RN  1
+iFrameHeight    RN  2    
+iExpandPels     RN  3
+iPlaneStep      RN  4
+pTop            RN  5
+pBot            RN  6
+pDstTop         RN  7
+pDstBot         RN  8
+pLeft           RN  5
+pRight          RN  6
+pDstLeft        RN  9
+pDstRight       RN  10
+Offset          RN  11
+Temp            RN  14
+Counter         RN  12
+Tmp             RN  7
+;//Output registers
+
+result          RN  0
+;// Neon registers
+qData0          QN  0.U8
+qData1          QN  1.U8
+dData0          DN  0.U8
+dData1          DN  1.U8
+dData2          DN  2.U8
+dData3          DN  3.U8
+
+        ;// Define stack arguments
+        M_ARG       pPlaneStep, 4
+        
+        ;// Load argument from the stack
+        M_LDR       iPlaneStep, pPlaneStep
+        
+        SUB         pTop, pSrcDstPlane, #0              ;// Top row pointer of the frame
+        MUL         Offset, iExpandPels, iPlaneStep     ;// E*Step        
+        SUB         Temp, iFrameHeight, #1              ;// H-1
+        MUL         Temp, iPlaneStep, Temp              ;// (H-1)*Step
+        ADD         pBot, Temp, pSrcDstPlane            ;// BPtr = TPtr + (H-1)*Step
+        MOV         Temp, iFrameWidth                   ;// Outer loop counter
+        
+        ;// Check if pSrcDstPlane and iPlaneStep are 16 byte aligned
+        TST         pSrcDstPlane, #0xf
+        TSTEQ       iPlaneStep, #0xf        
+        BNE         Hor8Loop00
+        
+        ;//
+        ;// Copy top and bottom region of the plane as follows
+        ;// top region = top row elements from the frame
+        ;// bottom region = last row elements from the frame
+        ;//
+
+        ;// Case for 16 byte alignment
+Hor16Loop00
+        SUB         pDstTop, pTop, Offset
+        VLD1        qData0, [pTop @128]!
+        MOV         Counter, iExpandPels                ;// Inner loop counter
+        ADD         pDstBot, pBot, iPlaneStep
+        VLD1        qData1, [pBot @128]!
+Ver16Loop0
+        VST1        qData0, [pDstTop @128], iPlaneStep
+        VST1        qData0, [pDstTop @128], iPlaneStep
+        VST1        qData0, [pDstTop @128], iPlaneStep
+        VST1        qData0, [pDstTop @128], iPlaneStep
+        VST1        qData0, [pDstTop @128], iPlaneStep
+        VST1        qData0, [pDstTop @128], iPlaneStep
+        VST1        qData0, [pDstTop @128], iPlaneStep
+        VST1        qData0, [pDstTop @128], iPlaneStep
+        SUBS        Counter, Counter, #8
+        VST1        qData1, [pDstBot @128], iPlaneStep
+        VST1        qData1, [pDstBot @128], iPlaneStep
+        VST1        qData1, [pDstBot @128], iPlaneStep
+        VST1        qData1, [pDstBot @128], iPlaneStep
+        VST1        qData1, [pDstBot @128], iPlaneStep
+        VST1        qData1, [pDstBot @128], iPlaneStep
+        VST1        qData1, [pDstBot @128], iPlaneStep
+        VST1        qData1, [pDstBot @128], iPlaneStep        
+        BGT         Ver16Loop0
+
+        SUBS        Temp, Temp, #16
+        BGT         Hor16Loop00
+        B           EndAlignedLoop
+        
+        ;// Case for 8 byte alignment
+Hor8Loop00
+        SUB         pDstTop, pTop, Offset
+        VLD1        qData0, [pTop @64]!
+        MOV         Counter, iExpandPels                ;// Inner loop counter
+        ADD         pDstBot, pBot, iPlaneStep
+        VLD1        qData1, [pBot @64]!
+Ver8Loop0
+        VST1        qData0, [pDstTop @64], iPlaneStep
+        VST1        qData0, [pDstTop @64], iPlaneStep
+        VST1        qData0, [pDstTop @64], iPlaneStep
+        VST1        qData0, [pDstTop @64], iPlaneStep
+        VST1        qData0, [pDstTop @64], iPlaneStep
+        VST1        qData0, [pDstTop @64], iPlaneStep
+        VST1        qData0, [pDstTop @64], iPlaneStep
+        VST1        qData0, [pDstTop @64], iPlaneStep
+        SUBS        Counter, Counter, #8
+        VST1        qData1, [pDstBot @64], iPlaneStep
+        VST1        qData1, [pDstBot @64], iPlaneStep
+        VST1        qData1, [pDstBot @64], iPlaneStep
+        VST1        qData1, [pDstBot @64], iPlaneStep
+        VST1        qData1, [pDstBot @64], iPlaneStep
+        VST1        qData1, [pDstBot @64], iPlaneStep
+        VST1        qData1, [pDstBot @64], iPlaneStep
+        VST1        qData1, [pDstBot @64], iPlaneStep        
+        BGT         Ver8Loop0
+
+        SUBS        Temp, Temp, #16
+        BGT         Hor8Loop00
+
+EndAlignedLoop
+        ADD         Temp, pSrcDstPlane, iFrameWidth
+        SUB         pDstRight, Temp, Offset
+        SUB         pRight, Temp, #1
+        SUB         pDstLeft, pSrcDstPlane, Offset    
+        SUB         pDstLeft, pDstLeft, iExpandPels    
+        ADD         pLeft, pSrcDstPlane, #0
+        
+        VLD1        {dData0 []}, [pLeft], iPlaneStep        ;// Top-Left corner pixel from frame duplicated in dData0
+        SUB         Offset, iPlaneStep, iExpandPels
+        VLD1        {dData1 []}, [pRight], iPlaneStep       ;// Top-Right corner pixel from frame duplicated in dData1
+        MOV         Temp, iExpandPels
+
+        ;//
+        ;// Copy top-left and top-right region of the plane as follows
+        ;// top-left region = top-left corner pixel from the frame
+        ;// top-right region = top-right corner pixel from the frame
+        ;//
+HorLoop11
+        MOV         Counter, iExpandPels
+VerLoop1
+        VST1        dData0, [pDstLeft], #8
+        SUBS        Counter, Counter, #8
+        VST1        dData1, [pDstRight], #8        
+        BGT         VerLoop1
+
+        SUBS        Temp, Temp, #1
+        ADD         pDstLeft, pDstLeft, Offset
+        ADD         pDstRight, pDstRight, Offset
+        BPL         HorLoop11
+
+        SUB         iFrameHeight, iFrameHeight, #1
+        ;//
+        ;// Copy left and right region of the plane as follows
+        ;// Left region = copy the row with left start pixel from the frame
+        ;// Right region = copy the row with right end pixel from the frame
+        ;//
+HorLoop22
+        VLD1        {dData0 []}, [pLeft], iPlaneStep
+        MOV         Counter, iExpandPels
+        VLD1        {dData1 []}, [pRight], iPlaneStep
+VerLoop2
+        VST1        dData0, [pDstLeft], #8
+        SUBS        Counter, Counter, #8
+        VST1        dData1, [pDstRight], #8        
+        BGT         VerLoop2
+
+        SUBS        iFrameHeight, iFrameHeight, #1
+        ADD         pDstLeft, pDstLeft, Offset
+        ADD         pDstRight, pDstRight, Offset
+        BGT         HorLoop22
+                
+        MOV         Temp, iExpandPels
+        ;//
+        ;// Copy bottom-left and bottom-right region of the plane as follows
+        ;// bottom-left region = bottom-left corner pixel from the frame
+        ;// bottom-right region = bottom-right corner pixel from the frame
+        ;//
+HorLoop33
+        MOV         Counter, iExpandPels
+VerLoop3
+        VST1        dData0, [pDstLeft], #8
+        SUBS        Counter, Counter, #8
+        VST1        dData1, [pDstRight], #8        
+        BGT         VerLoop3
+
+        SUBS        Temp, Temp, #1
+        ADD         pDstLeft, pDstLeft, Offset
+        ADD         pDstRight, pDstRight, Offset
+        BGT         HorLoop33
+End
+        MOV         r0, #OMX_Sts_NoErr
+        
+        M_END    
+    
+    ENDIF
+
+
+
+        
+;// Guarding implementation by the processor name
+    
+ 
+            
+    END
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/api/armVCM4P10_CAVLCTables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/api/armVCM4P10_CAVLCTables.h
new file mode 100755
index 0000000..547a2d9
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/api/armVCM4P10_CAVLCTables.h
@@ -0,0 +1,30 @@
+/* ----------------------------------------------------------------
+ * 
+ * 
+ * File Name:  armVCM4P10_CAVLCTables.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * Header file for optimized H.264 CALVC tables
+ * 
+ */
+ 
+#ifndef ARMVCM4P10_CAVLCTABLES_H
+#define ARMVCM4P10_CAVLCTABLES_H
+  
+/* CAVLC tables */
+
+extern const OMX_U16 *armVCM4P10_CAVLCCoeffTokenTables[18];
+extern const OMX_U16 *armVCM4P10_CAVLCTotalZeroTables[15];
+extern const OMX_U16 *armVCM4P10_CAVLCTotalZeros2x2Tables[3];
+extern const OMX_U16 *armVCM4P10_CAVLCRunBeforeTables[15];
+extern const OMX_U8 armVCM4P10_ZigZag_4x4[16];
+extern const OMX_U8 armVCM4P10_ZigZag_2x2[4];
+extern const OMX_S8 armVCM4P10_SuffixToLevel[7];
+
+#endif
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
new file mode 100755
index 0000000..4f0892d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
@@ -0,0 +1,222 @@
+;//
+;// 
+;// File Name:  armVCM4P10_Average_4x_Align_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+
+;// Functions:
+;//     armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe  
+;//
+;// Implements Average of 4x4 with equation c = (a+b+1)>>1.
+;// First operand will be at offset ALIGNMENT from aligned address
+;// Second operand will be at aligned location and will be used as output.
+;// destination pointed by (pDst) for vertical interpolation.
+;// This function needs to copy 4 bytes in horizontal direction 
+;//
+;// Registers used as input for this function
+;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size
+;//
+;// Registers preserved for top level function
+;// r4,r5,r6,r8,r9,r14
+;//
+;// Registers modified by the function
+;// r7,r10,r11,r12
+;//
+;// Output registers
+;// r2 - pointer to the aligned location
+;// r3 - step size to this aligned location
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS ARM1136JS
+
+        EXPORT armVCM4P10_Average_4x4_Align0_unsafe
+        EXPORT armVCM4P10_Average_4x4_Align2_unsafe
+        EXPORT armVCM4P10_Average_4x4_Align3_unsafe
+
+DEBUG_ON    SETL {FALSE}
+
+;// Declare input registers
+pPred0          RN 0
+iPredStep0      RN 1
+pPred1          RN 2
+iPredStep1      RN 3
+pDstPred        RN 2
+iDstStep        RN 3
+
+;// Declare other intermediate registers
+iPredA0         RN 10
+iPredA1         RN 11
+iPredB0         RN 12
+iPredB1         RN 14
+Temp1           RN 4
+Temp2           RN 5
+ResultA         RN 5
+ResultB         RN 4
+r0x80808080     RN 7
+
+    IF ARM1136JS
+        
+        ;// This function calculates average of 4x4 block 
+        ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4
+
+        ;// Function header
+        M_START armVCM4P10_Average_4x4_Align0_unsafe, r6
+
+        ;// Code start        
+        LDR         r0x80808080, =0x80808080
+
+        ;// 1st load
+        M_LDR       iPredB0, [pPred1]
+        M_LDR       iPredA0, [pPred0], iPredStep0        
+        M_LDR       iPredB1, [pPred1, iPredStep1]
+        M_LDR       iPredA1, [pPred0], iPredStep0
+
+        ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
+        MVN         iPredB0, iPredB0
+        MVN         iPredB1, iPredB1
+        UHSUB8      ResultA, iPredA0, iPredB0
+        UHSUB8      ResultB, iPredA1, iPredB1
+        EOR         ResultA, ResultA, r0x80808080
+        M_STR       ResultA, [pDstPred], iDstStep        
+        EOR         ResultB, ResultB, r0x80808080
+        M_STR       ResultB, [pDstPred], iDstStep        
+        
+        ;// 2nd load
+        M_LDR       iPredA0, [pPred0], iPredStep0        
+        M_LDR       iPredB0, [pPred1]
+        M_LDR       iPredA1, [pPred0], iPredStep0
+        M_LDR       iPredB1, [pPred1, iPredStep1]
+
+        MVN         iPredB0, iPredB0
+        UHSUB8      ResultA, iPredA0, iPredB0
+        MVN         iPredB1, iPredB1
+        UHSUB8      ResultB, iPredA1, iPredB1
+        EOR         ResultA, ResultA, r0x80808080        
+        M_STR       ResultA, [pDstPred], iDstStep        
+        EOR         ResultB, ResultB, r0x80808080
+        M_STR       ResultB, [pDstPred], iDstStep                
+End0
+        M_END
+
+        ;// This function calculates average of 4x4 block 
+        ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4
+
+        ;// Function header
+        M_START armVCM4P10_Average_4x4_Align2_unsafe, r6
+
+        ;// Code start        
+        LDR         r0x80808080, =0x80808080
+
+        ;// 1st load
+        LDR         Temp1, [pPred0, #4]
+        M_LDR       iPredA0, [pPred0], iPredStep0        
+        M_LDR       iPredB0, [pPred1]
+        M_LDR       iPredB1, [pPred1, iPredStep1]
+        M_LDR       Temp2, [pPred0, #4]
+        M_LDR       iPredA1, [pPred0], iPredStep0
+        MVN         iPredB0, iPredB0
+        MVN         iPredB1, iPredB1        
+        MOV         iPredA0, iPredA0, LSR #16
+        ORR         iPredA0, iPredA0, Temp1, LSL #16        
+        MOV         iPredA1, iPredA1, LSR #16
+        ORR         iPredA1, iPredA1, Temp2, LSL #16
+
+        ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
+        UHSUB8      ResultA, iPredA0, iPredB0
+        UHSUB8      ResultB, iPredA1, iPredB1
+        EOR         ResultA, ResultA, r0x80808080
+        M_STR       ResultA, [pDstPred], iDstStep        
+        EOR         ResultB, ResultB, r0x80808080
+        M_STR       ResultB, [pDstPred], iDstStep        
+        
+        ;// 2nd load
+        LDR         Temp1, [pPred0, #4]
+        M_LDR         iPredA0, [pPred0], iPredStep0        
+        LDR         iPredB0, [pPred1]
+        LDR         iPredB1, [pPred1, iPredStep1]
+        LDR         Temp2, [pPred0, #4]
+        M_LDR         iPredA1, [pPred0], iPredStep0
+        MVN         iPredB0, iPredB0
+        MVN         iPredB1, iPredB1
+        MOV         iPredA0, iPredA0, LSR #16
+        ORR         iPredA0, iPredA0, Temp1, LSL #16        
+        MOV         iPredA1, iPredA1, LSR #16
+        ORR         iPredA1, iPredA1, Temp2, LSL #16
+
+        UHSUB8      ResultA, iPredA0, iPredB0
+        UHSUB8      ResultB, iPredA1, iPredB1
+        EOR         ResultA, ResultA, r0x80808080        
+        M_STR       ResultA, [pDstPred], iDstStep        
+        EOR         ResultB, ResultB, r0x80808080
+        M_STR       ResultB, [pDstPred], iDstStep                
+End2
+        M_END
+
+
+        ;// This function calculates average of 4x4 block 
+        ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4
+
+        ;// Function header
+        M_START armVCM4P10_Average_4x4_Align3_unsafe, r6
+
+        ;// Code start        
+        LDR         r0x80808080, =0x80808080
+
+        ;// 1st load
+        LDR         Temp1, [pPred0, #4]
+        M_LDR       iPredA0, [pPred0], iPredStep0        
+        LDR         iPredB0, [pPred1]
+        LDR         iPredB1, [pPred1, iPredStep1]
+        LDR         Temp2, [pPred0, #4]
+        M_LDR       iPredA1, [pPred0], iPredStep0
+
+        MVN         iPredB0, iPredB0
+        MVN         iPredB1, iPredB1
+        MOV         iPredA0, iPredA0, LSR #24
+        ORR         iPredA0, iPredA0, Temp1, LSL #8                
+        MOV         iPredA1, iPredA1, LSR #24
+        ORR         iPredA1, iPredA1, Temp2, LSL #8
+        UHSUB8      ResultA, iPredA0, iPredB0
+        UHSUB8      ResultB, iPredA1, iPredB1
+        EOR         ResultA, ResultA, r0x80808080
+        M_STR       ResultA, [pDstPred], iDstStep        
+        EOR         ResultB, ResultB, r0x80808080
+        M_STR       ResultB, [pDstPred], iDstStep        
+        
+        ;// 2nd load
+        LDR         Temp1, [pPred0, #4]
+        M_LDR       iPredA0, [pPred0], iPredStep0        
+        LDR         iPredB0, [pPred1]
+        LDR         iPredB1, [pPred1, iPredStep1]
+        LDR         Temp2, [pPred0, #4]
+        M_LDR       iPredA1, [pPred0], iPredStep0
+
+        MVN         iPredB0, iPredB0
+        MVN         iPredB1, iPredB1
+        MOV         iPredA0, iPredA0, LSR #24
+        ORR         iPredA0, iPredA0, Temp1, LSL #8        
+        MOV         iPredA1, iPredA1, LSR #24
+        ORR         iPredA1, iPredA1, Temp2, LSL #8
+
+        UHSUB8      ResultA, iPredA0, iPredB0
+        UHSUB8      ResultB, iPredA1, iPredB1
+        EOR         ResultA, ResultA, r0x80808080        
+        M_STR       ResultA, [pDstPred], iDstStep        
+        EOR         ResultB, ResultB, r0x80808080
+        M_STR       ResultB, [pDstPred], iDstStep                
+End3
+        M_END
+
+    ENDIF
+    
+    END
+    
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_CAVLCTables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_CAVLCTables.c
new file mode 100755
index 0000000..137495d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_CAVLCTables.c
@@ -0,0 +1,327 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  armVCM4P10_CAVLCTables.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * Optimized CAVLC tables for H.264
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVCM4P10_CAVLCTables.h"
+
+/* 4x4 DeZigZag table */
+
+const OMX_U8 armVCM4P10_ZigZag_4x4[16] =
+{
+    0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+/* 2x2 DeZigZag table */
+
+const OMX_U8 armVCM4P10_ZigZag_2x2[4] =
+{
+    0, 1, 2, 3
+};
+
+
+/*
+ * Suffix To Level table
+ * We increment the suffix length if 
+ * ((LevelCode>>1)+1)>(3<<(SuffixLength-1)) && SuffixLength<6
+ * (LevelCode>>1)>=(3<<(SuffixLength-1))    && SuffixLength<6
+ *  LevelCode    >= 3<<SuffixLength         && SuffixLength<6
+ * (LevelCode+2) >= (3<<SuffixLength)+2     && SuffixLength<6
+ */
+const OMX_S8 armVCM4P10_SuffixToLevel[7] =
+{
+    (3<<1)+2,       /* SuffixLength=1 */
+    (3<<1)+2,       /* SuffixLength=1 */
+    (3<<2)+2,       /* SuffixLength=2 */
+    (3<<3)+2,       /* SuffixLength=3 */
+    (3<<4)+2,       /* SuffixLength=4 */
+    (3<<5)+2,       /* SuffixLength=5 */
+    -1              /* SuffixLength=6 - never increment */
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_0[132] = {
+    0x0020, 0x0100, 0x2015, 0x2015, 0x400b, 0x400b, 0x400b, 0x400b,
+    0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001,
+    0x0028, 0x00f0, 0x00f8, 0x0027, 0x0030, 0x00d8, 0x00e0, 0x00e8,
+    0x0038, 0x00a0, 0x00c8, 0x00d0, 0x0040, 0x0068, 0x0090, 0x0098,
+    0x0048, 0x0050, 0x0058, 0x0060, 0x27ff, 0x27ff, 0x206b, 0x206b,
+    0x0081, 0x0085, 0x0083, 0x0079, 0x0087, 0x007d, 0x007b, 0x0071,
+    0x007f, 0x0075, 0x0073, 0x0069, 0x0070, 0x0078, 0x0080, 0x0088,
+    0x2077, 0x2077, 0x206d, 0x206d, 0x2063, 0x2063, 0x2061, 0x2061,
+    0x206f, 0x206f, 0x2065, 0x2065, 0x205b, 0x205b, 0x2059, 0x2059,
+    0x0067, 0x005d, 0x0053, 0x0051, 0x005f, 0x0055, 0x004b, 0x0049,
+    0x00a8, 0x00b0, 0x00b8, 0x00c0, 0x2041, 0x2041, 0x204d, 0x204d,
+    0x2043, 0x2043, 0x2039, 0x2039, 0x2057, 0x2057, 0x2045, 0x2045,
+    0x203b, 0x203b, 0x2031, 0x2031, 0x204f, 0x204f, 0x203d, 0x203d,
+    0x2033, 0x2033, 0x2029, 0x2029, 0x0047, 0x0035, 0x002b, 0x0021,
+    0x203f, 0x203f, 0x202d, 0x202d, 0x2023, 0x2023, 0x2019, 0x2019,
+    0x0037, 0x0025, 0x001b, 0x0011, 0x202f, 0x202f, 0x201d, 0x201d,
+    0x0013, 0x0009, 0x201f, 0x201f
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_1[128] = {
+    0x0020, 0x00e8, 0x00f0, 0x00f8, 0x0027, 0x001f, 0x2015, 0x2015,
+    0x400b, 0x400b, 0x400b, 0x400b, 0x4001, 0x4001, 0x4001, 0x4001,
+    0x0028, 0x00d0, 0x00d8, 0x00e0, 0x0030, 0x0098, 0x00c0, 0x00c8,
+    0x0038, 0x0060, 0x0088, 0x0090, 0x0040, 0x0048, 0x0050, 0x0058,
+    0x27ff, 0x27ff, 0x207f, 0x207f, 0x0087, 0x0085, 0x0083, 0x0081,
+    0x007b, 0x0079, 0x007d, 0x0073, 0x2075, 0x2075, 0x2071, 0x2071,
+    0x0068, 0x0070, 0x0078, 0x0080, 0x2077, 0x2077, 0x206d, 0x206d,
+    0x206b, 0x206b, 0x2069, 0x2069, 0x206f, 0x206f, 0x2065, 0x2065,
+    0x2063, 0x2063, 0x2061, 0x2061, 0x0059, 0x005d, 0x005b, 0x0051,
+    0x0067, 0x0055, 0x0053, 0x0049, 0x00a0, 0x00a8, 0x00b0, 0x00b8,
+    0x205f, 0x205f, 0x204d, 0x204d, 0x204b, 0x204b, 0x2041, 0x2041,
+    0x2057, 0x2057, 0x2045, 0x2045, 0x2043, 0x2043, 0x2039, 0x2039,
+    0x204f, 0x204f, 0x203d, 0x203d, 0x203b, 0x203b, 0x2031, 0x2031,
+    0x0029, 0x0035, 0x0033, 0x0021, 0x2047, 0x2047, 0x202d, 0x202d,
+    0x202b, 0x202b, 0x2019, 0x2019, 0x003f, 0x0025, 0x0023, 0x0011,
+    0x0037, 0x001d, 0x001b, 0x0009, 0x202f, 0x202f, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_2[112] = {
+    0x0020, 0x0088, 0x00b0, 0x00b8, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+    0x003f, 0x0037, 0x002f, 0x0027, 0x001f, 0x0015, 0x000b, 0x0001,
+    0x0028, 0x0050, 0x0078, 0x0080, 0x0030, 0x0038, 0x0040, 0x0048,
+    0x07ff, 0x0081, 0x0087, 0x0085, 0x0083, 0x0079, 0x007f, 0x007d,
+    0x007b, 0x0071, 0x0077, 0x0075, 0x0073, 0x0069, 0x206b, 0x206b,
+    0x0058, 0x0060, 0x0068, 0x0070, 0x2061, 0x2061, 0x206d, 0x206d,
+    0x2063, 0x2063, 0x2059, 0x2059, 0x206f, 0x206f, 0x2065, 0x2065,
+    0x205b, 0x205b, 0x2051, 0x2051, 0x0067, 0x005d, 0x0053, 0x0049,
+    0x005f, 0x0055, 0x004b, 0x0041, 0x0090, 0x0098, 0x00a0, 0x00a8,
+    0x2039, 0x2039, 0x2031, 0x2031, 0x204d, 0x204d, 0x2029, 0x2029,
+    0x2057, 0x2057, 0x2045, 0x2045, 0x2043, 0x2043, 0x2021, 0x2021,
+    0x0019, 0x003d, 0x003b, 0x0011, 0x004f, 0x0035, 0x0033, 0x0009,
+    0x202b, 0x202b, 0x202d, 0x202d, 0x2023, 0x2023, 0x2025, 0x2025,
+    0x201b, 0x201b, 0x2047, 0x2047, 0x201d, 0x201d, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_3[80] = {
+    0x0020, 0x0028, 0x0030, 0x0038, 0x0040, 0x0048, 0x0050, 0x0058,
+    0x0060, 0x0068, 0x0070, 0x0078, 0x0080, 0x0088, 0x0090, 0x0098,
+    0x0009, 0x000b, 0x07ff, 0x0001, 0x0011, 0x0013, 0x0015, 0x07ff,
+    0x0019, 0x001b, 0x001d, 0x001f, 0x0021, 0x0023, 0x0025, 0x0027,
+    0x0029, 0x002b, 0x002d, 0x002f, 0x0031, 0x0033, 0x0035, 0x0037,
+    0x0039, 0x003b, 0x003d, 0x003f, 0x0041, 0x0043, 0x0045, 0x0047,
+    0x0049, 0x004b, 0x004d, 0x004f, 0x0051, 0x0053, 0x0055, 0x0057,
+    0x0059, 0x005b, 0x005d, 0x005f, 0x0061, 0x0063, 0x0065, 0x0067,
+    0x0069, 0x006b, 0x006d, 0x006f, 0x0071, 0x0073, 0x0075, 0x0077,
+    0x0079, 0x007b, 0x007d, 0x007f, 0x0081, 0x0083, 0x0085, 0x0087
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_4[32] = {
+    0x0020, 0x0038, 0x2015, 0x2015, 0x4001, 0x4001, 0x4001, 0x4001,
+    0x600b, 0x600b, 0x600b, 0x600b, 0x600b, 0x600b, 0x600b, 0x600b,
+    0x0028, 0x0030, 0x0021, 0x0019, 0x2027, 0x2027, 0x0025, 0x0023,
+    0x201d, 0x201d, 0x201b, 0x201b, 0x0011, 0x001f, 0x0013, 0x0009
+};
+
+const OMX_U16 * armVCM4P10_CAVLCCoeffTokenTables[18] = {
+    armVCM4P10_CAVLCCoeffTokenTables_0, /* nC=0 */
+    armVCM4P10_CAVLCCoeffTokenTables_0, /* nC=1 */
+    armVCM4P10_CAVLCCoeffTokenTables_1, /* nC=2 */
+    armVCM4P10_CAVLCCoeffTokenTables_1, /* nC=3 */
+    armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=4 */
+    armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=5 */
+    armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=6 */
+    armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=7 */
+    armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=8 */
+    armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=9 */
+    armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=10 */
+    armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=11 */
+    armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=12 */
+    armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=13 */
+    armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=14 */
+    armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=15 */
+    armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=16 */
+    armVCM4P10_CAVLCCoeffTokenTables_4  /* nC=-1 */
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_0[40] = {
+    0x0020, 0x0048, 0x0009, 0x0007, 0x2005, 0x2005, 0x2003, 0x2003,
+    0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001,
+    0x0028, 0x0040, 0x0011, 0x000f, 0x0030, 0x0038, 0x0019, 0x0017,
+    0x27ff, 0x27ff, 0x201f, 0x201f, 0x201d, 0x201d, 0x201b, 0x201b,
+    0x2015, 0x2015, 0x2013, 0x2013, 0x200d, 0x200d, 0x200b, 0x200b
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_1[24] = {
+    0x0020, 0x0028, 0x0011, 0x000f, 0x000d, 0x000b, 0x2009, 0x2009,
+    0x2007, 0x2007, 0x2005, 0x2005, 0x2003, 0x2003, 0x2001, 0x2001,
+    0x001d, 0x001b, 0x0019, 0x0017, 0x2015, 0x2015, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_2[24] = {
+    0x0020, 0x0028, 0x0011, 0x000b, 0x0009, 0x0001, 0x200f, 0x200f,
+    0x200d, 0x200d, 0x2007, 0x2007, 0x2005, 0x2005, 0x2003, 0x2003,
+    0x001b, 0x0017, 0x2019, 0x2019, 0x2015, 0x2015, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_3[24] = {
+    0x0020, 0x0028, 0x0013, 0x000f, 0x0007, 0x0005, 0x2011, 0x2011,
+    0x200d, 0x200d, 0x200b, 0x200b, 0x2009, 0x2009, 0x2003, 0x2003,
+    0x2019, 0x2019, 0x2017, 0x2017, 0x2015, 0x2015, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_4[20] = {
+    0x0020, 0x0015, 0x0011, 0x0005, 0x0003, 0x0001, 0x200f, 0x200f,
+    0x200d, 0x200d, 0x200b, 0x200b, 0x2009, 0x2009, 0x2007, 0x2007,
+    0x2017, 0x2017, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_5[20] = {
+    0x0020, 0x0011, 0x2013, 0x2013, 0x200f, 0x200f, 0x200d, 0x200d,
+    0x200b, 0x200b, 0x2009, 0x2009, 0x2007, 0x2007, 0x2005, 0x2005,
+    0x0015, 0x0001, 0x2003, 0x2003
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_6[20] = {
+    0x0020, 0x000f, 0x2011, 0x2011, 0x200d, 0x200d, 0x2009, 0x2009,
+    0x2007, 0x2007, 0x2005, 0x2005, 0x400b, 0x400b, 0x400b, 0x400b,
+    0x0013, 0x0001, 0x2003, 0x2003
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_7[20] = {
+    0x0020, 0x0003, 0x200f, 0x200f, 0x200d, 0x200d, 0x2007, 0x2007,
+    0x400b, 0x400b, 0x400b, 0x400b, 0x4009, 0x4009, 0x4009, 0x4009,
+    0x0011, 0x0001, 0x2005, 0x2005
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_8[20] = {
+    0x0020, 0x0005, 0x200b, 0x200b, 0x400d, 0x400d, 0x400d, 0x400d,
+    0x4009, 0x4009, 0x4009, 0x4009, 0x4007, 0x4007, 0x4007, 0x4007,
+    0x0003, 0x0001, 0x200f, 0x200f
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_9[20] = {
+    0x0020, 0x000d, 0x2005, 0x2005, 0x400b, 0x400b, 0x400b, 0x400b,
+    0x4009, 0x4009, 0x4009, 0x4009, 0x4007, 0x4007, 0x4007, 0x4007,
+    0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_10[16] = {
+    0x0001, 0x0003, 0x2005, 0x2005, 0x2007, 0x2007, 0x200b, 0x200b,
+    0x6009, 0x6009, 0x6009, 0x6009, 0x6009, 0x6009, 0x6009, 0x6009
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_11[16] = {
+    0x0001, 0x0003, 0x2009, 0x2009, 0x4005, 0x4005, 0x4005, 0x4005,
+    0x6007, 0x6007, 0x6007, 0x6007, 0x6007, 0x6007, 0x6007, 0x6007
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_12[16] = {
+    0x2001, 0x2001, 0x2003, 0x2003, 0x4007, 0x4007, 0x4007, 0x4007,
+    0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_13[16] = {
+    0x4001, 0x4001, 0x4001, 0x4001, 0x4003, 0x4003, 0x4003, 0x4003,
+    0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_14[16] = {
+    0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001,
+    0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003
+};
+
+const OMX_U16 * armVCM4P10_CAVLCTotalZeroTables[15] = {
+    armVCM4P10_CAVLCTotalZeroTables_0,
+    armVCM4P10_CAVLCTotalZeroTables_1,
+    armVCM4P10_CAVLCTotalZeroTables_2,
+    armVCM4P10_CAVLCTotalZeroTables_3,
+    armVCM4P10_CAVLCTotalZeroTables_4,
+    armVCM4P10_CAVLCTotalZeroTables_5,
+    armVCM4P10_CAVLCTotalZeroTables_6,
+    armVCM4P10_CAVLCTotalZeroTables_7,
+    armVCM4P10_CAVLCTotalZeroTables_8,
+    armVCM4P10_CAVLCTotalZeroTables_9,
+    armVCM4P10_CAVLCTotalZeroTables_10,
+    armVCM4P10_CAVLCTotalZeroTables_11,
+    armVCM4P10_CAVLCTotalZeroTables_12,
+    armVCM4P10_CAVLCTotalZeroTables_13,
+    armVCM4P10_CAVLCTotalZeroTables_14
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeros2x2Tables_0[16] = {
+    0x2007, 0x2007, 0x2005, 0x2005, 0x4003, 0x4003, 0x4003, 0x4003,
+    0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeros2x2Tables_1[16] = {
+    0x4005, 0x4005, 0x4005, 0x4005, 0x4003, 0x4003, 0x4003, 0x4003,
+    0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeros2x2Tables_2[16] = {
+    0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003,
+    0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001
+};
+
+const OMX_U16 * armVCM4P10_CAVLCTotalZeros2x2Tables[3] = {
+    armVCM4P10_CAVLCTotalZeros2x2Tables_0,
+    armVCM4P10_CAVLCTotalZeros2x2Tables_1,
+    armVCM4P10_CAVLCTotalZeros2x2Tables_2
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_0[8] = {
+    0x4003, 0x4003, 0x4003, 0x4003, 0x4001, 0x4001, 0x4001, 0x4001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_1[8] = {
+    0x2005, 0x2005, 0x2003, 0x2003, 0x4001, 0x4001, 0x4001, 0x4001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_2[8] = {
+    0x2007, 0x2007, 0x2005, 0x2005, 0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_3[8] = {
+    0x0009, 0x0007, 0x2005, 0x2005, 0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_4[8] = {
+    0x000b, 0x0009, 0x0007, 0x0005, 0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_5[8] = {
+    0x0003, 0x0005, 0x0009, 0x0007, 0x000d, 0x000b, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_6[24] = {
+    0x0010, 0x000d, 0x000b, 0x0009, 0x0007, 0x0005, 0x0003, 0x0001,
+    0x0018, 0x0011, 0x200f, 0x200f, 0x0020, 0x0015, 0x2013, 0x2013,
+    0x0028, 0x0019, 0x2017, 0x2017, 0x07ff, 0x001d, 0x201b, 0x201b
+};
+
+/* Tables 7 to 14 are duplicates of table 6 */
+
+const OMX_U16 * armVCM4P10_CAVLCRunBeforeTables[15] = {
+    armVCM4P10_CAVLCRunBeforeTables_0,  /* ZerosLeft=1 */
+    armVCM4P10_CAVLCRunBeforeTables_1,
+    armVCM4P10_CAVLCRunBeforeTables_2,
+    armVCM4P10_CAVLCRunBeforeTables_3,
+    armVCM4P10_CAVLCRunBeforeTables_4,
+    armVCM4P10_CAVLCRunBeforeTables_5,  /* ZerosLeft=6 */
+    armVCM4P10_CAVLCRunBeforeTables_6,  /* ZerosLeft=7 */
+    armVCM4P10_CAVLCRunBeforeTables_6,  /* ZerosLeft=8 */
+    armVCM4P10_CAVLCRunBeforeTables_6,  /* ZerosLeft=9 */
+    armVCM4P10_CAVLCRunBeforeTables_6,  /* ZerosLeft=10 */
+    armVCM4P10_CAVLCRunBeforeTables_6,  /* ZerosLeft=11 */
+    armVCM4P10_CAVLCRunBeforeTables_6,  /* ZerosLeft=12 */
+    armVCM4P10_CAVLCRunBeforeTables_6,  /* ZerosLeft=13 */
+    armVCM4P10_CAVLCRunBeforeTables_6,  /* ZerosLeft=14 */
+    armVCM4P10_CAVLCRunBeforeTables_6   /* ZerosLeft=15 */
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
new file mode 100755
index 0000000..4c3a77c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
@@ -0,0 +1,198 @@
+;//
+;// 
+;// File Name:  armVCM4P10_DeblockingChroma_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS CortexA8
+
+
+    IF  CortexA8
+        
+pAlpha      RN 2
+pBeta       RN 3
+
+pThresholds RN 5
+pBS         RN 4
+bS3210      RN 6
+
+;// Pixels
+dP_0        DN D4.U8
+dP_1        DN D5.U8  
+dP_2        DN D6.U8  
+dP_3        DN D7.U8  
+dQ_0        DN D8.U8  
+dQ_1        DN D9.U8  
+dQ_2        DN D10.U8 
+dQ_3        DN D11.U8 
+
+
+;// Filtering Decision
+dAlpha      DN D0.U8
+dBeta       DN D2.U8
+
+dFilt       DN D16.U8
+dAqflg      DN D12.U8
+dApflg      DN D17.U8 
+
+dAp0q0      DN D13.U8
+
+;// bSLT4
+dTC3210     DN D18.U8   
+dTCs        DN D31.S8
+dTC         DN D31.U8
+
+dMask_0     DN D14.U8
+dMask_1     DN D15.U8    
+dMask_4     DN D26.U16
+
+dTemp       DN D28.U8
+dDummy      DN D17.U8
+
+;// Computing P0,Q0
+qDq0p0      QN Q10.S16
+qDp1q1      QN Q11.S16
+qDelta      QN Q10.S16  ; reuse qDq0p0
+dDelta      DN D20.S8
+
+
+;// Computing P1,Q1
+qP_0n       QN Q14.S16
+qQ_0n       QN Q12.S16
+
+dQ_0n       DN D24.U8
+dP_0n       DN D29.U8
+
+;// bSGE4
+
+dHSp0q1     DN D13.U8
+dHSq0p1     DN D31.U8   
+
+dBS3210     DN D28.U16
+
+dP_0t       DN D13.U8   ;dHSp0q1        
+dQ_0t       DN D31.U8   ;Temp1        
+
+dP_0n       DN D29.U8
+dQ_0n       DN D24.U8   ;Temp2        
+
+;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe
+;//
+;// Inputs - Pixels             - p0-p3: D4-D7, q0-q3: D8-D11
+;//        - Filter masks       - filt: D16, aqflg: D12, apflg: D17
+;//        - Additional Params  - pThresholds: r5
+;//         
+;// Outputs - Pixels            - P0-P1: D29-D30, Q0-Q1: D24-D25
+;//         - Additional Params - pThresholds: r5
+
+;// Registers Corrupted         - D18-D31
+
+
+        M_START armVCM4P10_DeblockingChromabSLT4_unsafe
+
+        
+        ;dTC3210 -18
+        ;dTemp-28
+
+        VLD1        d18.U32[0], [pThresholds]! ;here
+
+        ;// delta = (((q0-p0)<<2) + (p1-q1) + 4) >> 3;
+        ;// dDelta = (qDp1q1 >> 2 + qDq0p0 + 1)>> 1
+
+        ;// qDp1q1-11
+        ;// qDq0p0-10
+        VSUBL       qDp1q1, dP_1, dQ_1      
+        VMOV        dTemp, dTC3210
+        VSUBL       qDq0p0, dQ_0, dP_0      
+        VSHR        qDp1q1, qDp1q1, #2      
+        VZIP.8      dTC3210, dTemp
+    
+        ;// qDelta-qDq0p0-10
+
+        ;// dTC = dTC01 + (dAplg & 1) + (dAqflg & 1)
+
+        ;// dTC3210-18
+        ;// dTemp-28
+        ;// dTC-31
+        VBIF        dTC3210, dMask_0, dFilt
+        VRHADD      qDelta, qDp1q1, qDq0p0  
+        VADD        dTC, dTC3210, dMask_1
+        VQMOVN      dDelta, qDelta
+        ;// dDelta-d20
+
+        ;// dDelta = (OMX_U8)armClip(0, 255, q0 - delta);
+        VLD1        {dAlpha[]}, [pAlpha]
+        VMIN        dDelta, dDelta, dTCs
+        VNEG        dTCs, dTCs
+        VLD1        {dBeta[]}, [pBeta]
+        ;1
+        VMAX        dDelta, dDelta, dTCs
+
+        ;// dP_0n - 29
+        ;// dQ_0n - 24
+
+        ;// pQ0[-1*Step] = (OMX_U8)armClip(0, 255, dP_0 - delta);
+        ;// pQ0[0*Step] = (OMX_U8)armClip(0, 255, dQ_0 - delta);
+
+        ;// dP_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta);
+        ;// dQ_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta);
+        
+        ;// qP_0n - 14
+        ;// qQ_0n - 12
+        
+        VMOVL       qP_0n, dP_0
+        VMOVL       qQ_0n, dQ_0
+
+        ;1
+        VADDW       qP_0n, qP_0n, dDelta
+        VSUBW       qQ_0n, qQ_0n, dDelta
+        
+        VQMOVUN     dP_0n, qP_0n
+        VQMOVUN     dQ_0n, qQ_0n
+
+        M_END
+
+;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe()
+;//
+;// Inputs - Pixels             - p0-p3: D4-D7, q0-q3: D8-D11
+;//        - Filter masks       - filt: D16, aqflg: D12, apflg: D17
+;//        - Additional Params  - alpha: D0, dMask_1: D15
+;//         
+;// Outputs - Pixels            - P0-P2: D29-D31, Q0-Q2: D24,D25,D28
+
+;// Registers Corrupted         - D18-D31
+
+        M_START armVCM4P10_DeblockingChromabSGE4_unsafe
+    
+        ;dHSq0p1 - 31
+        ;dHSp0q1 - 13
+        VHADD       dHSp0q1, dP_0, dQ_1     
+        VHADD       dHSq0p1, dQ_0, dP_1         
+
+        ;// Prepare the bS mask
+
+        ;// dHSp0q1-13
+        ;// dP_0t-dHSp0q1-13
+        ;// dHSq0p1-31
+        ;// dQ_0t-Temp1-31
+        VLD1        {dAlpha[]}, [pAlpha]
+        ADD         pThresholds, pThresholds, #4
+        VLD1        {dBeta[]}, [pBeta]
+
+        VRHADD      dP_0t, dHSp0q1, dP_1    
+        VRHADD      dQ_0t, dHSq0p1, dQ_1
+        
+        M_END
+        
+        ENDIF  
+
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
new file mode 100755
index 0000000..0afe4fd
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
@@ -0,0 +1,396 @@
+;//
+;// 
+;// File Name:  armVCM4P10_DeblockingLuma_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS CortexA8
+
+
+    IF  CortexA8
+        
+pThresholds RN 5
+
+;// Pixels
+dP_0        DN D4.U8
+dP_1        DN D5.U8  
+dP_2        DN D6.U8  
+dP_3        DN D7.U8  
+dQ_0        DN D8.U8  
+dQ_1        DN D9.U8  
+dQ_2        DN D10.U8 
+dQ_3        DN D11.U8 
+
+
+;// Filtering Decision
+dAlpha      DN D0.U8
+
+dFilt       DN D16.U8
+dAqflg      DN D12.U8
+dApflg      DN D17.U8 
+
+dAp0q0      DN D13.U8
+
+;// bSLT4
+dTC0        DN D18.U8   
+dTC1        DN D19.U8   
+dTC01       DN D18.U8   
+
+dTCs        DN D31.S8
+dTC         DN D31.U8
+
+dMask_0     DN D14.U8
+dMask_1     DN D15.U8    
+
+dTemp       DN D19.U8
+
+;// Computing P0,Q0
+qDq0p0      QN Q10.S16
+qDp1q1      QN Q11.S16
+qDelta      QN Q10.S16  ; reuse qDq0p0
+dDelta      DN D20.S8
+
+
+;// Computing P1,Q1
+dRp0q0      DN D24.U8
+
+dMaxP       DN D23.U8
+dMinP       DN D22.U8
+
+dMaxQ       DN D19.U8
+dMinQ       DN D21.U8
+
+dDeltaP     DN D26.U8
+dDeltaQ     DN D27.U8
+
+qP_0n       QN Q14.S16
+qQ_0n       QN Q12.S16
+
+dQ_0n       DN D24.U8
+dQ_1n       DN D25.U8
+dP_0n       DN D29.U8
+dP_1n       DN D30.U8
+
+;// bSGE4
+
+qSp0q0      QN Q10.U16
+
+qSp2q1      QN Q11.U16
+qSp0q0p1    QN Q12.U16
+qSp3p2      QN Q13.U16
+dHSp0q1     DN D28.U8
+
+qSq2p1      QN Q11.U16
+qSp0q0q1    QN Q12.U16
+qSq3q2      QN Q13.U16  ;!!
+dHSq0p1     DN D28.U8   ;!!
+
+qTemp1      QN Q11.U16  ;!!;qSp2q1 
+qTemp2      QN Q12.U16  ;!!;qSp0q0p1        
+
+dP_0t       DN D28.U8   ;!!;dHSp0q1        
+dQ_0t       DN D22.U8   ;!!;Temp1        
+
+dP_0n       DN D29.U8
+dP_1n       DN D30.U8
+dP_2n       DN D31.U8
+
+dQ_0n       DN D24.U8   ;!!;Temp2        
+dQ_1n       DN D25.U8   ;!!;Temp2        
+dQ_2n       DN D28.U8   ;!!;dQ_0t        
+
+;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe
+;//
+;// Inputs - Pixels             - p0-p3: D4-D7, q0-q3: D8-D11
+;//        - Filter masks       - filt: D16, aqflg: D12, apflg: D17
+;//        - Additional Params  - pThresholds: r5
+;//         
+;// Outputs - Pixels            - P0-P1: D29-D30, Q0-Q1: D24-D25
+;//         - Additional Params - pThresholds: r5
+
+;// Registers Corrupted         - D18-D31
+
+
+        M_START armVCM4P10_DeblockingLumabSLT4_unsafe
+
+        
+        ;// qDq0p0-10
+        VSUBL       qDp1q1, dP_1, dQ_1      
+        VLD1        {dTC0[]}, [pThresholds]!
+        ;// qDp1q1-11
+        VSUBL       qDq0p0, dQ_0, dP_0      
+        VLD1        {dTC1[]}, [pThresholds]!
+
+        ;// dRp0q0-24
+        VSHR        qDp1q1, qDp1q1, #2      
+    
+        ;// dTC01 = (dTC1 << 4) | dTC0
+        ;// dTC01-18
+        VEXT        dTC01, dTC0, dTC1, #4
+        ;// dTemp-19
+        VAND        dTemp, dApflg, dMask_1
+        
+        VBIF        dTC01, dMask_0, dFilt
+    
+
+        ;// delta = (((q0-p0)<<2) + (p1-q1) + 4) >> 3;
+        ;// dDelta = (qDp1q1 >> 2 + qDq0p0 + 1)>> 1
+
+        ;// qDelta-qDq0p0-10
+        VRHADD      qDelta, qDp1q1, qDq0p0  
+        VRHADD      dRp0q0, dP_0, dQ_0      
+        VADD        dTC, dTC01, dTemp
+
+        ;// dTC = dTC01 + (dAplg & 1) + (dAqflg & 1)
+        
+        VAND        dTemp, dAqflg, dMask_1
+        VQADD       dMaxP, dP_1, dTC01      
+        VQMOVN      dDelta, qDelta
+        VADD        dTC, dTC, dTemp
+
+        ;// dMaxP = QADD(dP_1, dTC01)
+        ;// dMinP = QSUB(dP_1, dTC01)
+ 
+        ;// dMaxP-d23
+        ;// dMinP-d22
+        VQSUB       dMinP, dP_1, dTC01      
+
+        ;// dDelta-d20
+
+        ;// dMaxQ = QADD(dQ_1, dTC01)
+        ;// dMinQ = QSUB(dQ_1, dTC01)
+ 
+        ;// dMaxQ-19
+        ;// dMinQ-21
+        VQADD       dMaxQ, dQ_1, dTC01
+        VHADD       dDeltaP, dRp0q0, dP_2   
+        VMIN        dDelta, dDelta, dTCs
+
+        ;// dDelta = (OMX_U8)armClip(0, 255, q0 - delta);
+        VNEG        dTCs, dTCs
+        
+        VQSUB       dMinQ, dQ_1, dTC01
+
+        ;// delta = (p2 + ((p0+q0+1)>>1) - (p1<<1))>>1;
+        ;// delta = armClip(-tC0, tC0, delta);
+        ;// pQ0[-2*Step] = (OMX_U8)(p1 + delta);
+
+        ;// dDeltaP = (dP_2 + dRp0q0)>>1;
+        ;// dP_1n = armClip(dP_1 - dTC01, dP_1 + dTC01, dDeltaP);
+        ;// dP_1n = armClip(MinP, MaxP, dDeltaP);
+        
+        ;// delta = (q2 + ((p0+q0+1)>>1) - (q1<<1))>>1;
+        ;// delta = armClip(-tC0, tC0, delta);
+        ;// pQ0[1*Step] = (OMX_U8)(q1 + delta);
+
+        ;// dDeltaQ = (dQ_2 + dRp0q0)>>1;
+        ;// dQ_1n = armClip(dQ_1 - dTC01, dQ_1 + dTC01, dDeltaQ);
+        ;// dQ_1n = armClip(MinQ, MaxQ, dDeltaQ);
+        
+        ;// dDeltaP-26
+        VHADD       dDeltaQ, dRp0q0, dQ_2   
+
+        ;// dDeltaQ-27
+        
+        ;// dP_0n - 29
+        ;// dP_1n - 30
+        ;// dQ_0n - 24
+        ;// dQ_1n - 25
+        
+        ;// delta = (q2 + ((p0+q0+1)>>1) - (q1<<1))>>1;
+        ;// dDeltaQ = (dQ_2 + dRp0q0)>>1;
+
+        VMAX        dP_1n, dDeltaP, dMinP   
+        VMAX        dDelta, dDelta, dTCs
+
+        ;// pQ0[-1*Step] = (OMX_U8)armClip(0, 255, dP_0 - delta);
+        ;// pQ0[0*Step] = (OMX_U8)armClip(0, 255, dQ_0 - delta);
+
+        ;// dP_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta);
+        ;// dQ_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta);
+        
+        ;// qP_0n - 14
+        ;// qQ_0n - 12
+        
+        VMOVL       qP_0n, dP_0
+        VMOVL       qQ_0n, dQ_0
+
+        VADDW       qP_0n, qP_0n, dDelta
+        VSUBW       qQ_0n, qQ_0n, dDelta
+        
+        VQMOVUN     dP_0n, qP_0n
+        VQMOVUN     dQ_0n, qQ_0n
+        
+        VMAX        dQ_1n, dDeltaQ, dMinQ
+
+        VMIN        dP_1n, dP_1n, dMaxP
+        VMIN        dQ_1n, dQ_1n, dMaxQ
+        VBIF        dP_0n, dP_0, dFilt      
+
+        VBIF        dP_1n, dP_1, dApflg
+        VBIF        dQ_0n, dQ_0, dFilt  
+        VBIF        dQ_1n, dQ_1, dAqflg
+
+        M_END
+
+;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe()
+;//
+;// Inputs - Pixels             - p0-p3: D4-D7, q0-q3: D8-D11
+;//        - Filter masks       - filt: D16, aqflg: D12, apflg: D17
+;//        - Additional Params  - alpha: D0, dMask_1: D15
+;//         
+;// Outputs - Pixels            - P0-P2: D29-D31, Q0-Q2: D24,D25,D28
+
+;// Registers Corrupted         - D18-D31
+
+        M_START armVCM4P10_DeblockingLumabSGE4_unsafe
+    
+
+        ;// ap<beta && armAbs(p0-q0)<((alpha>>2)+2)        
+        ;// aq<beta && armAbs(p0-q0)<((alpha>>2)+2)        
+
+        ;// ( dApflg & dAp0q0 < (dAlpha >> 2 + 2) )
+        ;// ( dAqflg & dAp0q0 < (dAlpha >> 2 + 2) )
+
+        ;// ( dApflg = dApflg & dAp0q0 < (dTemp + dMask_1 + dMask_1) )
+        ;// ( dAqflg = dAqflg & dAp0q0 < (dTemp + dMask_1 + dMask_1) )
+
+        ;// P Filter
+
+        VSHR        dTemp, dAlpha, #2
+        VADD        dTemp, dTemp, dMask_1
+
+        ;// qSp0q0-10
+        VADDL       qSp0q0, dQ_0, dP_0      
+        VADD        dTemp, dTemp, dMask_1
+
+        ;// qSp2q1-11
+        ;// qSp0q0p1-12
+        VADDL       qSp2q1, dP_2, dQ_1      
+        VADDW       qSp0q0p1, qSp0q0, dP_1  
+
+        VCGT        dTemp, dTemp, dAp0q0
+        VSHR        qSp2q1, #1              
+
+        ;// pQ0[-1*Step] = (OMX_U8)((p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4)>>3);
+        ;// pQ0[-1*Step] = ( ( (p0 + q0 + p1) + (p2 + q1)>>1 ) >> 1 + 1 ) >> 1
+
+        ;// dP_0n = ( ( (qSp0q0 + dP_1) + qSp2q1>>1 ) >> 1 + 1 ) >> 1
+        ;// dP_0n = ( ( qSp0q0p1 + qSp2q1>>1 ) >> 1 + 1 ) >> 1
+        ;// dP_0n = ( qTemp1 + 1 ) >> 1
+        
+        ;// pQ0[-2*Step] = (OMX_U8)((p2 + p1 + p0 + q0 + 2)>>2);
+        
+        ;// dP_1n = (OMX_U8)((dP_2 + qSp0q0p1 + 2)>>2);
+        ;// dP_1n = (OMX_U8)((qTemp2 + 2)>>2);
+        
+        ;// pQ0[-3*Step] = (OMX_U8)((2*p3 + 3*p2 + p1 + p0 + q0 + 4)>>3);
+        ;// pQ0[-3*Step] = (OMX_U8)(( (p3 + p2) + (p1 + p0 + q0 + p2) >> 1 + 2)>>2);
+
+        ;// dP_2n = (OMX_U8)(( qSp3p2 + (dP_2 + qSp0q0p1) >> 1 + 2) >> 2);
+        ;// dP_2n = (OMX_U8)(( qSp3p2 + qTemp2 >> 1 + 2) >> 2);
+
+        ;// qTemp1-qSp2q1-11
+        ;// qTemp2-qSp0q0p1-12
+        VHADD       qTemp1, qSp0q0p1, qSp2q1
+        VADDW       qTemp2, qSp0q0p1, dP_2  
+        
+        ;// qSp3p2-13
+        VADDL       qSp3p2, dP_3, dP_2      
+
+        VAND        dApflg, dApflg, dTemp
+        VHADD       dHSp0q1, dP_0, dQ_1     
+        VSRA        qSp3p2, qTemp2, #1      
+        ;// dHSp0q1-28
+        VAND        dAqflg, dAqflg, dTemp
+
+        ;// dP_0n-29
+        ;// dP_0t-dHSp0q1-28
+        VQRSHRN     dP_0n, qTemp1, #1
+        VRHADD      dP_0t, dHSp0q1, dP_1    
+
+        ;// dP_1n-30
+        VQRSHRN     dP_1n, qTemp2, #2
+        
+        VADDL       qSq2p1, dQ_2, dP_1          
+        VADDW       qSp0q0q1, qSp0q0, dQ_1      
+        
+        VBIF        dP_0n, dP_0t, dApflg    
+
+        ;// Q Filter
+
+        ;// pQ0[0*Step] = (OMX_U8)((q2 + 2*q1 + 2*q0 + 2*p0 + p1 + 4)>>3);
+        ;// pQ0[0*Step] = ( ( (p0 + q0 + q1) + (q2 + p1)>>1 ) >> 1 + 1 ) >> 1
+
+        ;// dQ_0n = ( ( (qSp0q0 + dQ_1) + qSq2p1>>1 ) >> 1 + 1 ) >> 1
+        ;// dQ_0n = ( ( qSp0q0q1 + qSq2p1>>1 ) >> 1 + 1 ) >> 1
+        ;// dQ_0n = ( qTemp1 + 1 ) >> 1
+        
+        ;// pQ0[1*Step] = (OMX_U8)((q2 + q1 + q0 + q0 + 2)>>2);
+        
+        ;// dQ_1n = (OMX_U8)((dQ_2 + qSp0q0q1 + 2)>>2);
+        ;// dQ_1n = (OMX_U8)((qTemp2 + 2)>>2);
+        
+        ;// pQ0[2*Step] = (OMX_U8)((2*q3 + 3*q2 + q1 + q0 + p0 + 4)>>3);
+        ;// pQ0[2*Step] = (OMX_U8)(( (q3 + q2) + (q1 + p0 + q0 + q2) >> 1 + 2)>>2);
+
+        ;// dQ_2n = (OMX_U8)(( qSq3q2 + (dQ_2 + qSp0q0q1) >> 1 + 2) >> 2);
+        ;// dQ_2n = (OMX_U8)(( qSq3q2 + qTemp2 >> 1 + 2) >> 2);
+
+        ;// qTemp1-qSp2q1-11
+        ;// qTemp2-qSp0q0p1-12
+        ;// qSq2p1-11
+        ;// qSp0q0q1-12
+
+
+        ;// qTemp2-qSp0q0p1-12
+        ;// qTemp1-qSq2p1-11
+        ;// qSq3q2-13
+        ;// dP_2n-31
+        
+        VQRSHRN     dP_2n, qSp3p2, #2
+        VADDL       qSq3q2, dQ_3, dQ_2          
+
+        VSHR        qSq2p1, #1                  
+
+        VHADD       qTemp1, qSp0q0q1, qSq2p1
+        VADDW       qTemp2, qSp0q0q1, dQ_2      
+
+        ;// dHSq0p1-28
+        VHADD       dHSq0p1, dQ_0, dP_1         
+
+        VBIF        dP_0n, dP_0, dFilt
+        VBIF        dP_1n, dP_1, dApflg
+
+        VSRA        qSq3q2, qTemp2, #1          
+
+        ;// dQ_1-Temp2-25
+        ;// dQ_0-Temp2-24
+        VQRSHRN     dQ_1n, qTemp2, #2
+        VQRSHRN     dQ_0n, qTemp1, #1
+
+        ;// dQ_0t-Temp1-22
+        VRHADD      dQ_0t, dHSq0p1, dQ_1
+        VBIF        dQ_1n, dQ_1, dAqflg         
+
+        VBIF        dP_2n, dP_2, dApflg        
+        VBIF        dQ_0n, dQ_0t, dAqflg        
+        VQRSHRN     dQ_2n, qSq3q2, #2
+        VBIF        dQ_0n, dQ_0, dFilt
+        VBIF        dQ_2n, dQ_2, dAqflg       
+
+        M_END
+        
+    ENDIF  
+
+
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
new file mode 100755
index 0000000..10a89e9
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
@@ -0,0 +1,325 @@
+;//
+;// 
+;// File Name:  armVCM4P10_DecodeCoeffsToPair_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        INCLUDE armCOMM_BitDec_s.h
+        
+        IMPORT armVCM4P10_CAVLCCoeffTokenTables
+        IMPORT armVCM4P10_CAVLCTotalZeroTables
+        IMPORT armVCM4P10_CAVLCTotalZeros2x2Tables
+        IMPORT armVCM4P10_CAVLCRunBeforeTables
+        IMPORT armVCM4P10_SuffixToLevel
+        IMPORT armVCM4P10_ZigZag_4x4
+        IMPORT armVCM4P10_ZigZag_2x2
+        
+        M_VARIANTS ARM1136JS
+        
+;//DEBUG_ON    SETL {TRUE}
+        
+LAST_COEFF               EQU 0x20        ;// End of block flag
+TWO_BYTE_COEFF           EQU 0x10
+
+;// Declare input registers
+
+ppBitStream     RN 0
+pOffset         RN 1
+pNumCoeff       RN 2
+ppPosCoefbuf    RN 3
+nC              RN 4 ;// number of coeffs or 17 for chroma
+sMaxNumCoeff    RN 5
+
+;// Declare inner loop registers
+
+;// Level loop
+Count           RN 0
+TrailingOnes    RN 1
+pLevel          RN 2
+LevelSuffix     RN 3
+SuffixLength    RN 4
+TotalCoeff      RN 5
+
+pVLDTable       RN 6
+Symbol          RN 7
+T1              RN 8
+T2              RN 9
+RBitStream      RN 10
+RBitBuffer      RN 11
+RBitCount       RN 12
+lr              RN 14
+
+;// Run loop
+Count           RN 0
+ZerosLeft       RN 1
+pLevel          RN 2
+ppRunTable      RN 3
+pRun            RN 4
+TotalCoeff      RN 5
+
+pVLDTable       RN 6
+Symbol          RN 7
+T1              RN 8
+T2              RN 9
+RBitStream      RN 10
+RBitBuffer      RN 11
+RBitCount       RN 12
+lr              RN 14
+
+;// Fill in coefficients loop
+pPosCoefbuf     RN 0
+temp            RN 1
+pLevel          RN 2
+ppPosCoefbuf    RN 3
+pRun            RN 4
+TotalCoeff      RN 5
+pZigZag         RN 6
+
+T1              RN 8
+T2              RN 9
+RBitStream      RN 10
+RBitBuffer      RN 11
+RBitCount       RN 12
+CoeffNum        RN 14
+
+
+
+    IF ARM1136JS
+        
+        ;// Allocate stack memory required by the function
+        M_ALLOC4 pppBitStream, 4
+        M_ALLOC4 ppOffset, 4
+        M_ALLOC4 pppPosCoefbuf, 4
+        M_ALLOC4 ppLevel, 16*2
+        M_ALLOC4 ppRun, 16
+        
+        ;// Write function header
+        M_START armVCM4P10_DecodeCoeffsToPair, r11
+        
+        ;// Define stack arguments
+        M_ARG   pNC, 4
+        M_ARG   pSMaxNumCoeff,4
+        
+        ;// Code start        
+        M_BD_INIT0 ppBitStream, pOffset, RBitStream, RBitBuffer, RBitCount
+        LDR        pVLDTable, =armVCM4P10_CAVLCCoeffTokenTables
+        M_LDR      nC, pNC
+        
+        M_BD_INIT1 T1, T2, lr
+        LDR     pVLDTable, [pVLDTable, nC, LSL #2]  ;// Find VLD table    
+        
+        M_BD_INIT2 T1, T2, lr
+
+        ;// Decode Symbol = TotalCoeff*4 + TrailingOnes
+        M_BD_VLD  Symbol, T1, T2, pVLDTable, 4, 2
+    
+        MOVS    TotalCoeff, Symbol, LSR #2    
+        STRB    TotalCoeff, [pNumCoeff]    
+        M_PRINTF "TotalCoeff=%d\n", TotalCoeff
+        BEQ.W   EndNoError                  ;// Finished if no coefficients
+
+        CMP     Symbol, #17*4
+        BGE.W   EndBadSymbol                ;// Error if bad symbol
+        
+        ;// Save bitstream pointers
+        M_STR   ppBitStream,  pppBitStream
+        M_STR   pOffset,      ppOffset
+        M_STR   ppPosCoefbuf, pppPosCoefbuf                
+        
+        ;// Decode Trailing Ones
+        ANDS    TrailingOnes, Symbol, #3
+        M_ADR   pLevel, ppLevel            
+        M_PRINTF "TrailingOnes=%d\n", TrailingOnes
+        BEQ     TrailingOnesDone    
+        MOV     Count, TrailingOnes
+TrailingOnesLoop    
+        M_BD_READ8 Symbol, 1, T1
+        SUBS    Count, Count, #1
+        MOV     T1, #1
+        SUB     T1, T1, Symbol, LSL #1
+        M_PRINTF "Level=%d\n", T1
+        STRH    T1, [pLevel], #2
+        BGT     TrailingOnesLoop
+TrailingOnesDone    
+    
+        ;// Decode level values    
+        SUBS    Count, TotalCoeff, TrailingOnes     ;// Number of levels to read
+        BEQ     DecodeRuns                          ;// None left
+        
+        MOV     SuffixLength, #1
+        CMP     TotalCoeff, #10
+        MOVLE   SuffixLength, #0
+        CMP     TrailingOnes, #3    ;// if (TrailingOnes<3)
+        MOVLT   TrailingOnes, #4    ;// then TrailingOnes = +4
+        MOVGE   TrailingOnes, #2    ;// else TrailingOnes = +2
+        MOVGE   SuffixLength, #0    ;//      SuffixLength = 0
+        
+LevelLoop
+        M_BD_CLZ16 Symbol, T1, T2   ;// Symbol=LevelPrefix
+        CMP     Symbol,#16
+        BGE     EndBadSymbol
+        
+        MOVS    lr, SuffixLength    ;// if LevelSuffixSize==0
+        TEQEQ   Symbol, #14         ;//   and  LevelPrefix==14
+        MOVEQ   lr, #4              ;//   then LevelSuffixSize=4
+        TEQ     Symbol, #15         ;// if LevelSuffixSize==15
+        MOVEQ   lr, #12             ;//   then LevelSuffixSize=12
+        
+        TEQEQ   SuffixLength,#0
+        ADDEQ   Symbol,Symbol,#15
+        
+        TEQ     lr, #0              ;// if LevelSuffixSize==0
+        BEQ     LevelCodeRead       ;// LevelCode = LevelPrefix
+        
+        M_BD_VREAD16 LevelSuffix, lr, T1, T2  ;// Read Level Suffix
+        
+        MOV     Symbol, Symbol, LSL SuffixLength
+        ADD     Symbol, LevelSuffix, Symbol
+             
+LevelCodeRead        
+        ;// Symbol = LevelCode
+        ADD     Symbol, Symbol, TrailingOnes ;// +4 if level cannot be +/-1, +2 o/w
+        MOV     TrailingOnes, #2
+        MOVS    T1, Symbol, LSR #1
+        RSBCS   T1, T1, #0                  ;// If Symbol odd then negate
+        M_PRINTF "Level=%d\n", T1
+        STRH    T1, [pLevel], #2            ;// Store level.
+        
+        LDR     T2, =armVCM4P10_SuffixToLevel
+        LDRSB   T1, [T2, SuffixLength]      ;// Find increment level        
+        TEQ     SuffixLength, #0
+        MOVEQ   SuffixLength, #1
+        CMP     Symbol, T1
+        ADDCS   SuffixLength, SuffixLength, #1        
+        SUBS    Count, Count, #1        
+        BGT     LevelLoop
+        
+DecodeRuns        
+        ;// Find number of zeros
+        M_LDR   T1, pSMaxNumCoeff           ;// sMaxNumCoeff
+        SUB     Count, TotalCoeff, #1       ;// Number of runs excluding last
+        SUBS    ZerosLeft, T1, TotalCoeff   ;// Maximum number of zeros there could be
+        M_ADR   pRun, ppRun
+        MOV     CoeffNum,TotalCoeff
+        SUB     CoeffNum,CoeffNum,#1
+        BEQ     NoZerosLeft
+        
+        ;// Unpack number of zeros from bitstream
+        TEQ     T1, #4        
+        LDREQ   pVLDTable, =(armVCM4P10_CAVLCTotalZeros2x2Tables-4)
+        LDRNE   pVLDTable, =(armVCM4P10_CAVLCTotalZeroTables-4)
+        LDR     pVLDTable, [pVLDTable, TotalCoeff, LSL #2]
+        
+        M_BD_VLD  Symbol, T1, T2, pVLDTable, 4, 2 ;// Symbol = ZerosLeft
+        CMP     Symbol,#16
+        BGE     EndBadSymbol
+
+        LDR     ppRunTable, =(armVCM4P10_CAVLCRunBeforeTables-4)
+        M_ADR   pRun, ppRun
+        MOVS    ZerosLeft, Symbol
+
+        ADD     CoeffNum,CoeffNum,ZerosLeft        
+
+        BEQ     NoZerosLeft
+        
+        ;// Decode runs while zeros are left and more than one coefficient
+RunLoop 
+        SUBS    Count, Count, #1
+        LDR     pVLDTable, [ppRunTable, ZerosLeft, LSL#2]
+        BLT     LastRun
+        M_BD_VLD  Symbol, T1, T2, pVLDTable, 3, 2 ;// Symbol = Run
+        CMP     Symbol,#15         
+        BGE     EndBadSymbol        
+
+        SUBS    ZerosLeft, ZerosLeft, Symbol
+        M_PRINTF "Run=%d\n", Symbol
+        STRB    Symbol, [pRun], #1
+        BGT     RunLoop
+        
+        ;// Decode runs while no zeros are left
+NoZerosLeft 
+        SUBS    Count, Count, #1
+        M_PRINTF "Run=%d\n", ZerosLeft
+        STRGEB  ZerosLeft, [pRun], #1
+        BGT     NoZerosLeft
+
+LastRun        
+        ;// Final run length is remaining zeros
+        M_PRINTF "LastRun=%d\n", ZerosLeft
+        STRB    ZerosLeft, [pRun], #1        
+        
+        ;// Write coefficients to output array
+        M_LDR   T1, pSMaxNumCoeff                    ;// sMaxNumCoeff
+        TEQ     T1, #15
+        ADDEQ   CoeffNum,CoeffNum,#1
+        
+
+        SUB     pRun,pRun,TotalCoeff
+        SUB     pLevel,pLevel,TotalCoeff  
+        SUB     pLevel,pLevel,TotalCoeff   
+
+        M_LDR   ppPosCoefbuf, pppPosCoefbuf
+        LDR     pPosCoefbuf, [ppPosCoefbuf]
+        TEQ     T1, #4
+        LDREQ   pZigZag, =armVCM4P10_ZigZag_2x2
+        LDRNE   pZigZag, =armVCM4P10_ZigZag_4x4
+
+        
+        
+OutputLoop
+        
+        LDRB    T2, [pRun],#1
+        LDRB    T1, [pZigZag, CoeffNum]
+        SUB     CoeffNum, CoeffNum, #1      ;// Skip Non zero
+        SUB     CoeffNum, CoeffNum, T2      ;// Skip Zero run
+        
+        LDRSH   T2, [pLevel],#2
+        
+        SUBS    TotalCoeff, TotalCoeff, #1       
+        ORREQ   T1, T1, #LAST_COEFF
+        
+        ADD     temp, T2, #128
+        CMP     temp, #256
+        ORRCS   T1, T1, #TWO_BYTE_COEFF
+
+        
+        TEQ     TotalCoeff, #0              ;// Preserves carry        
+        
+        M_PRINTF "Output=%02x %04x\n", T1, T2
+        STRB    T1, [pPosCoefbuf], #1
+        STRB    T2, [pPosCoefbuf], #1
+        MOV     T2, T2, LSR #8
+        STRCSB  T2, [pPosCoefbuf], #1                
+        BNE     OutputLoop
+        
+        ;// Finished
+        STR     pPosCoefbuf, [ppPosCoefbuf]
+        M_LDR   ppBitStream, pppBitStream
+        M_LDR   pOffset, ppOffset
+        B       EndNoError
+            
+EndBadSymbol
+        MOV     r0, #OMX_Sts_Err
+        B       End    
+        
+EndNoError
+        ;// Finished reading from the bitstream                
+        M_BD_FINI ppBitStream, pOffset
+        
+        ;// Set return value
+        MOV     r0, #OMX_Sts_NoErr    
+End
+        M_END
+    
+    ENDIF
+    
+    END
+    
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DequantTables_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DequantTables_s.s
new file mode 100755
index 0000000..2761600
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DequantTables_s.s
@@ -0,0 +1,123 @@
+;//
+;// 
+;// File Name:  armVCM4P10_DequantTables_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        
+
+         INCLUDE omxtypes_s.h
+         INCLUDE armCOMM_s.h
+     
+         EXPORT armVCM4P10_QPDivTable
+         EXPORT armVCM4P10_VMatrixQPModTable
+         EXPORT armVCM4P10_PosToVCol4x4
+         EXPORT armVCM4P10_PosToVCol2x2
+         EXPORT armVCM4P10_VMatrix 
+         EXPORT armVCM4P10_QPModuloTable
+         EXPORT armVCM4P10_VMatrixU16
+         
+;// Define the processor variants supported by this file
+         
+         M_VARIANTS CortexA8
+           
+         
+;// Guarding implementation by the processor name
+
+    
+    IF CortexA8
+           
+ 
+         M_TABLE armVCM4P10_PosToVCol4x4
+         DCB  0, 2, 0, 2
+         DCB  2, 1, 2, 1
+         DCB  0, 2, 0, 2
+         DCB  2, 1, 2, 1
+
+
+         M_TABLE armVCM4P10_PosToVCol2x2
+         DCB  0, 2
+         DCB  2, 1
+
+
+         M_TABLE armVCM4P10_VMatrix
+         DCB  10, 16, 13
+         DCB  11, 18, 14
+         DCB  13, 20, 16
+         DCB  14, 23, 18
+         DCB  16, 25, 20
+         DCB  18, 29, 23
+
+;//-------------------------------------------------------
+;// This table evaluates the expression [(INT)(QP/6)],
+;// for values of QP from 0 to 51 (inclusive). 
+;//-------------------------------------------------------
+
+         M_TABLE armVCM4P10_QPDivTable
+         DCB  0,  0,  0,  0,  0,  0
+         DCB  1,  1,  1,  1,  1,  1
+         DCB  2,  2,  2,  2,  2,  2
+         DCB  3,  3,  3,  3,  3,  3
+         DCB  4,  4,  4,  4,  4,  4
+         DCB  5,  5,  5,  5,  5,  5
+         DCB  6,  6,  6,  6,  6,  6
+         DCB  7,  7,  7,  7,  7,  7
+         DCB  8,  8,  8,  8,  8,  8
+    
+;//----------------------------------------------------
+;// This table contains armVCM4P10_VMatrix[QP%6][0] entires,
+;// for values of QP from 0 to 51 (inclusive). 
+;//----------------------------------------------------
+
+         M_TABLE armVCM4P10_VMatrixQPModTable
+         DCB 10, 11, 13, 14, 16, 18
+         DCB 10, 11, 13, 14, 16, 18
+         DCB 10, 11, 13, 14, 16, 18
+         DCB 10, 11, 13, 14, 16, 18
+         DCB 10, 11, 13, 14, 16, 18
+         DCB 10, 11, 13, 14, 16, 18
+         DCB 10, 11, 13, 14, 16, 18
+         DCB 10, 11, 13, 14, 16, 18
+         DCB 10, 11, 13, 14, 16, 18
+    
+;//-------------------------------------------------------
+;// This table evaluates the modulus expression [QP%6]*6,
+;// for values of QP from 0 to 51 (inclusive). 
+;//-------------------------------------------------------
+
+         M_TABLE armVCM4P10_QPModuloTable
+         DCB 0, 6, 12, 18, 24, 30
+         DCB 0, 6, 12, 18, 24, 30
+         DCB 0, 6, 12, 18, 24, 30
+         DCB 0, 6, 12, 18, 24, 30
+         DCB 0, 6, 12, 18, 24, 30
+         DCB 0, 6, 12, 18, 24, 30
+         DCB 0, 6, 12, 18, 24, 30
+         DCB 0, 6, 12, 18, 24, 30
+         DCB 0, 6, 12, 18, 24, 30
+        
+;//-------------------------------------------------------
+;// This table contains the invidual byte values stored as
+;// halfwords. This avoids unpacking inside the function
+;//-------------------------------------------------------
+        
+         M_TABLE armVCM4P10_VMatrixU16
+         DCW 10, 16, 13 
+         DCW 11, 18, 14
+         DCW 13, 20, 16
+         DCW 14, 23, 18
+         DCW 16, 25, 20
+         DCW 18, 29, 23 
+         
+    ENDIF                                                           ;//ARM1136JS            
+
+
+                           
+    
+         END
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
new file mode 100755
index 0000000..6e912d7
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
@@ -0,0 +1,236 @@
+;//
+;// 
+;// File Name:  armVCM4P10_InterpolateLuma_Align_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS ARM1136JS
+
+        EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+        EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+
+DEBUG_ON    SETL {FALSE}
+
+    IF ARM1136JS 
+
+;// Declare input registers
+pSrc            RN 0
+srcStep         RN 1
+pDst            RN 8
+iHeight         RN 9
+
+;// Declare inner loop registers
+x               RN 7
+x0              RN 7
+x1              RN 10
+x2              RN 11
+Scratch         RN 12
+
+;// Function: 
+;//     armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+;//
+;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned
+;// destination pointed by (pDst) for horizontal interpolation.
+;// This function needs to copy 9 bytes in horizontal direction. 
+;//
+;// Registers used as input for this function
+;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy
+;//
+;// Registers preserved for top level function
+;// r2,r3,r4,r5,r6
+;//
+;// Registers modified by the function
+;// r7,r8,r9,r10,r11,r12
+;//
+;// Output registers
+;// r0 - pointer to the new aligned location which will be used as pSrc
+;// r1 - step size to this aligned location
+
+        ;// Function header
+        M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe     
+        
+        ;// Copy pDst to scratch
+        MOV     Scratch, pDst
+
+StartAlignedStackCopy
+        AND     x, pSrc, #3
+        BIC     pSrc, pSrc, #3
+        
+        M_SWITCH x
+        M_CASE   Copy0toAligned
+        M_CASE   Copy1toAligned
+        M_CASE   Copy2toAligned
+        M_CASE   Copy3toAligned
+        M_ENDSWITCH
+
+Copy0toAligned  
+        LDM     pSrc, {x0, x1, x2}
+        SUBS    iHeight, iHeight, #1
+        ADD     pSrc, pSrc, srcStep
+        
+        ;// One cycle stall
+
+        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
+        BGT     Copy0toAligned
+        B       CopyEnd  
+      
+Copy1toAligned        
+        LDM     pSrc, {x0, x1, x2}
+        SUBS    iHeight, iHeight, #1
+        ADD     pSrc, pSrc, srcStep
+        
+        ;// One cycle stall
+
+        MOV     x0, x0, LSR #8
+        ORR     x0, x0, x1, LSL #24
+        MOV     x1, x1, LSR #8
+        ORR     x1, x1, x2, LSL #24
+        MOV     x2, x2, LSR #8
+        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
+        BGT     Copy1toAligned
+        B       CopyEnd  
+
+Copy2toAligned        
+        LDM     pSrc, {x0, x1, x2}
+        SUBS    iHeight, iHeight, #1
+        ADD     pSrc, pSrc, srcStep
+        
+        ;// One cycle stall
+
+        MOV     x0, x0, LSR #16
+        ORR     x0, x0, x1, LSL #16
+        MOV     x1, x1, LSR #16
+        ORR     x1, x1, x2, LSL #16
+        MOV     x2, x2, LSR #16
+        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
+        BGT     Copy2toAligned
+        B       CopyEnd  
+
+Copy3toAligned        
+        LDM     pSrc, {x0, x1, x2}
+        SUBS    iHeight, iHeight, #1
+        ADD     pSrc, pSrc, srcStep
+        
+        ;// One cycle stall
+
+        MOV     x0, x0, LSR #24
+        ORR     x0, x0, x1, LSL #8
+        MOV     x1, x1, LSR #24
+        ORR     x1, x1, x2, LSL #8
+        MOV     x2, x2, LSR #24
+        STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
+        BGT     Copy3toAligned
+
+CopyEnd  
+        
+        MOV     pSrc, Scratch
+        MOV     srcStep, #12
+
+        M_END
+    
+
+;// Function:
+;//     armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+;//
+;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned
+;// destination pointed by (pDst) for vertical interpolation.
+;// This function needs to copy 4 bytes in horizontal direction 
+;//
+;// Registers used as input for this function
+;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy
+;//
+;// Registers preserved for top level function
+;// r2,r3,r4,r5,r6
+;//
+;// Registers modified by the function
+;// r7,r8,r9,r10,r11,r12
+;//
+;// Output registers
+;// r0 - pointer to the new aligned location which will be used as pSrc
+;// r1 - step size to this aligned location
+
+        ;// Function header
+        M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe     
+        
+        ;// Copy pSrc to stack
+StartVAlignedStackCopy
+        AND     x, pSrc, #3
+        BIC     pSrc, pSrc, #3                        
+        
+        
+        M_SWITCH x
+        M_CASE   Copy0toVAligned
+        M_CASE   Copy1toVAligned
+        M_CASE   Copy2toVAligned
+        M_CASE   Copy3toVAligned
+        M_ENDSWITCH
+        
+Copy0toVAligned  
+        M_LDR   x0, [pSrc], srcStep
+        SUBS    iHeight, iHeight, #1
+        
+        ;// One cycle stall
+
+        STR     x0, [pDst], #4                              ;// Store aligned output row
+        BGT     Copy0toVAligned
+        B       CopyVEnd  
+      
+Copy1toVAligned        
+        LDR     x1, [pSrc, #4]
+        M_LDR   x0, [pSrc], srcStep
+        SUBS    iHeight, iHeight, #1        
+        
+        ;// One cycle stall
+
+        MOV     x1, x1, LSL #24
+        ORR     x0, x1, x0, LSR #8
+        STR     x0, [pDst], #4                              ;// Store aligned output row
+        BGT     Copy1toVAligned
+        B       CopyVEnd  
+
+Copy2toVAligned        
+        LDR     x1, [pSrc, #4]
+        M_LDR   x0, [pSrc], srcStep
+        SUBS    iHeight, iHeight, #1        
+        
+        ;// One cycle stall
+
+        MOV     x1, x1, LSL #16
+        ORR     x0, x1, x0, LSR #16
+        STR     x0, [pDst], #4                              ;// Store aligned output row
+        BGT     Copy2toVAligned
+        B       CopyVEnd  
+
+Copy3toVAligned        
+        LDR     x1, [pSrc, #4]
+        M_LDR   x0, [pSrc], srcStep
+        SUBS    iHeight, iHeight, #1        
+        
+        ;// One cycle stall
+
+        MOV     x1, x1, LSL #8
+        ORR     x0, x1, x0, LSR #24
+        STR     x0, [pDst], #4                              ;// Store aligned output row
+        BGT     Copy3toVAligned
+
+CopyVEnd  
+
+        SUB     pSrc, pDst, #28
+        MOV     srcStep, #4
+
+        M_END
+
+
+    ENDIF
+
+    END
+    
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
new file mode 100755
index 0000000..d2758912
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
@@ -0,0 +1,149 @@
+;//
+;// 
+;// File Name:  armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+;// Function:
+;//     armVCM4P10_InterpolateLuma_Copy4x4_unsafe 
+;//
+;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned
+;// destination pointed by (pDst)
+;//
+;// Registers preserved for top level function
+;// r1,r3,r4,r5,r6,r7,r10,r11,r14
+;//
+;// Registers modified by the function
+;// r0,r2,r8,r9,r12
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS ARM1136JS
+
+        EXPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe
+        
+;// Declare input registers
+pSrc            RN 0
+srcStep         RN 1
+pDst            RN 2
+dstStep         RN 3
+
+;// Declare other intermediate registers
+x0              RN 4
+x1              RN 5
+x2              RN 8
+x3              RN 9
+Temp            RN 12
+
+    IF ARM1136JS
+
+        M_START armVCM4P10_InterpolateLuma_Copy4x4_unsafe, r6
+
+Copy4x4Start
+        ;// Do Copy and branch to EndOfInterpolation
+        AND     Temp, pSrc, #3
+        BIC     pSrc, pSrc, #3                        
+
+        M_SWITCH Temp
+        M_CASE  Copy4x4Align0
+        M_CASE  Copy4x4Align1
+        M_CASE  Copy4x4Align2
+        M_CASE  Copy4x4Align3
+        M_ENDSWITCH
+
+Copy4x4Align0
+        M_LDR   x0, [pSrc], srcStep
+        M_LDR   x1, [pSrc], srcStep
+        M_STR   x0, [pDst], dstStep
+        M_LDR   x2, [pSrc], srcStep
+        M_STR   x1, [pDst], dstStep
+        M_LDR   x3, [pSrc], srcStep
+        M_STR   x2, [pDst], dstStep
+        M_STR   x3, [pDst], dstStep
+        B       Copy4x4End  
+
+Copy4x4Align1
+        LDR     x1, [pSrc, #4]
+        M_LDR   x0, [pSrc], srcStep
+        LDR     x3, [pSrc, #4]
+        M_LDR   x2, [pSrc], srcStep
+        MOV     x0, x0, LSR #8
+        ORR     x0, x0, x1, LSL #24
+        M_STR   x0, [pDst], dstStep
+        MOV     x2, x2, LSR #8
+        ORR     x2, x2, x3, LSL #24
+        LDR     x1, [pSrc, #4]
+        M_LDR   x0, [pSrc], srcStep
+        M_STR   x2, [pDst], dstStep
+        LDR     x3, [pSrc, #4]
+        M_LDR   x2, [pSrc], srcStep
+        MOV     x0, x0, LSR #8
+        ORR     x0, x0, x1, LSL #24
+        M_STR   x0, [pDst], dstStep
+        MOV     x2, x2, LSR #8
+        ORR     x2, x2, x3, LSL #24
+        M_STR   x2, [pDst], dstStep
+        B       Copy4x4End  
+      
+Copy4x4Align2
+        LDR     x1, [pSrc, #4]
+        M_LDR   x0, [pSrc], srcStep
+        LDR     x3, [pSrc, #4]
+        M_LDR   x2, [pSrc], srcStep
+        MOV     x0, x0, LSR #16
+        ORR     x0, x0, x1, LSL #16
+        M_STR   x0, [pDst], dstStep
+        MOV     x2, x2, LSR #16
+        ORR     x2, x2, x3, LSL #16
+        M_STR   x2, [pDst], dstStep        
+
+        LDR     x1, [pSrc, #4]
+        M_LDR   x0, [pSrc], srcStep
+        LDR     x3, [pSrc, #4]
+        M_LDR   x2, [pSrc], srcStep
+        MOV     x0, x0, LSR #16
+        ORR     x0, x0, x1, LSL #16
+        M_STR   x0, [pDst], dstStep
+        MOV     x2, x2, LSR #16
+        ORR     x2, x2, x3, LSL #16
+        M_STR   x2, [pDst], dstStep        
+        B       Copy4x4End  
+
+Copy4x4Align3 
+        LDR     x1, [pSrc, #4]
+        M_LDR   x0, [pSrc], srcStep
+        LDR     x3, [pSrc, #4]
+        M_LDR   x2, [pSrc], srcStep
+        MOV     x0, x0, LSR #24
+        ORR     x0, x0, x1, LSL #8
+        M_STR   x0, [pDst], dstStep
+        MOV     x2, x2, LSR #24
+        ORR     x2, x2, x3, LSL #8
+        M_STR   x2, [pDst], dstStep
+
+        LDR     x1, [pSrc, #4]
+        M_LDR   x0, [pSrc], srcStep
+        LDR     x3, [pSrc, #4]
+        M_LDR   x2, [pSrc], srcStep
+        MOV     x0, x0, LSR #24
+        ORR     x0, x0, x1, LSL #8
+        M_STR   x0, [pDst], dstStep
+        MOV     x2, x2, LSR #24
+        ORR     x2, x2, x3, LSL #8
+        M_STR   x2, [pDst], dstStep
+        B       Copy4x4End  
+
+Copy4x4End
+        M_END
+
+    ENDIF
+
+    END
+    
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
new file mode 100755
index 0000000..4e5a39d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
@@ -0,0 +1,178 @@
+;//
+;// 
+;// File Name:  armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS ARM1136JS
+
+        EXPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
+        EXPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+
+;// Functions: 
+;//     armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe and
+;//     armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe 
+;//
+;// Implements re-arrangement of data from temporary buffer to a buffer pointed by pBuf.
+;// This will do the convertion of data from 16 bit to 8 bit and it also
+;// remove offset and check for saturation.
+;//
+;// Registers used as input for this function
+;// r0,r1,r7 where r0 is input pointer and r2 its step size, r7 is output pointer
+;//
+;// Registers preserved for top level function
+;// r4,r5,r6,r8,r9,r14
+;//
+;// Registers modified by the function
+;// r7,r10,r11,r12
+;//
+;// Output registers
+;// r0 - pointer to the destination location
+;// r1 - step size to this destination location
+
+
+DEBUG_ON    SETL {FALSE}
+        
+MASK            EQU 0x80808080  ;// Mask is used to implement (a+b+1)/2
+
+;// Declare input registers
+
+pSrc0           RN 0
+srcStep0        RN 1
+
+;// Declare other intermediate registers
+Temp1           RN 4
+Temp2           RN 5
+Temp3           RN 10
+Temp4           RN 11
+pBuf            RN 7
+r0x0fe00fe0     RN 6
+r0x00ff00ff     RN 12
+Count           RN 14
+ValueA0         RN 10
+ValueA1         RN 11
+
+    IF ARM1136JS
+
+
+        ;// Function header
+        M_START armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe, r6
+
+        ;// Code start     
+        MOV         Count, #4   
+        LDR         r0x0fe00fe0, =0x0fe00fe0
+        LDR         r0x00ff00ff, =0x00ff00ff        
+LoopStart1
+        LDR         Temp4, [pSrc0, #12]
+        LDR         Temp3, [pSrc0, #8]        
+        LDR         Temp2, [pSrc0, #4]
+        M_LDR       Temp1, [pSrc0], srcStep0              
+        UQSUB16     Temp4, Temp4, r0x0fe00fe0        
+        UQSUB16     Temp3, Temp3, r0x0fe00fe0                 
+        UQSUB16     Temp2, Temp2, r0x0fe00fe0        
+        UQSUB16     Temp1, Temp1, r0x0fe00fe0                 
+        USAT16      Temp4, #13, Temp4
+        USAT16      Temp3, #13, Temp3                          
+        USAT16      Temp2, #13, Temp2
+        USAT16      Temp1, #13, Temp1                                  
+        AND         Temp4, r0x00ff00ff, Temp4, LSR #5         
+        AND         Temp3, r0x00ff00ff, Temp3, LSR #5         
+        AND         Temp2, r0x00ff00ff, Temp2, LSR #5         
+        AND         Temp1, r0x00ff00ff, Temp1, LSR #5         
+        ORR         ValueA1, Temp3, Temp4, LSL #8             
+        ORR         ValueA0, Temp1, Temp2, LSL #8             
+        SUBS        Count, Count, #1                   
+        STRD        ValueA0, [pBuf], #8 
+        BGT         LoopStart1
+End1
+        SUB        pSrc0, pBuf, #32
+        MOV        srcStep0, #8
+
+        M_END
+
+
+        ;// Function header
+        M_START armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe, r6
+        
+        ;// Code start        
+        LDR         r0x0fe00fe0, =0x0fe00fe0
+        LDR         r0x00ff00ff, =0x00ff00ff
+        MOV         Count, #2
+
+LoopStart    
+        LDR         Temp4, [pSrc0, #12]
+        LDR         Temp3, [pSrc0, #8]        
+        LDR         Temp2, [pSrc0, #4]
+        M_LDR       Temp1, [pSrc0], srcStep0
+        
+        UQSUB16     Temp4, Temp4, r0x0fe00fe0        
+        UQSUB16     Temp3, Temp3, r0x0fe00fe0                 
+        UQSUB16     Temp2, Temp2, r0x0fe00fe0        
+        UQSUB16     Temp1, Temp1, r0x0fe00fe0                 
+        
+        USAT16      Temp4, #13, Temp4
+        USAT16      Temp3, #13, Temp3                          
+        USAT16      Temp2, #13, Temp2
+        USAT16      Temp1, #13, Temp1
+                                  
+        AND         Temp4, r0x00ff00ff, Temp4, LSR #5         
+        AND         Temp3, r0x00ff00ff, Temp3, LSR #5         
+        AND         Temp2, r0x00ff00ff, Temp2, LSR #5         
+        AND         Temp1, r0x00ff00ff, Temp1, LSR #5         
+        ORR         ValueA1, Temp3, Temp4, LSL #8        ;// [d2 c2 d0 c0]             
+        ORR         ValueA0, Temp1, Temp2, LSL #8        ;// [b2 a2 b0 a0]         
+                    
+        PKHBT       Temp1, ValueA0, ValueA1, LSL #16     ;// [d0 c0 b0 a0]
+
+        STR         Temp1, [pBuf], #8 
+        PKHTB       Temp2, ValueA1, ValueA0, ASR #16     ;// [d2 c2 b2 a2]
+        STR         Temp2, [pBuf], #-4  
+
+        LDR         Temp4, [pSrc0, #12]
+        LDR         Temp3, [pSrc0, #8]        
+        LDR         Temp2, [pSrc0, #4]
+        M_LDR       Temp1, [pSrc0], srcStep0
+        
+        UQSUB16     Temp4, Temp4, r0x0fe00fe0        
+        UQSUB16     Temp3, Temp3, r0x0fe00fe0                 
+        UQSUB16     Temp2, Temp2, r0x0fe00fe0        
+        UQSUB16     Temp1, Temp1, r0x0fe00fe0                 
+        
+        USAT16      Temp4, #13, Temp4
+        USAT16      Temp3, #13, Temp3                          
+        USAT16      Temp2, #13, Temp2
+        USAT16      Temp1, #13, Temp1
+                                  
+        AND         Temp4, r0x00ff00ff, Temp4, LSR #5         
+        AND         Temp3, r0x00ff00ff, Temp3, LSR #5         
+        AND         Temp2, r0x00ff00ff, Temp2, LSR #5         
+        AND         Temp1, r0x00ff00ff, Temp1, LSR #5         
+        ORR         ValueA1, Temp3, Temp4, LSL #8        ;// [d2 c2 d0 c0]             
+        ORR         ValueA0, Temp1, Temp2, LSL #8        ;// [b2 a2 b0 a0]         
+                    
+        PKHBT       Temp1, ValueA0, ValueA1, LSL #16     ;// [d0 c0 b0 a0]
+        SUBS        Count, Count, #1
+        STR         Temp1, [pBuf], #8 
+        PKHTB       Temp2, ValueA1, ValueA0, ASR #16     ;// [d2 c2 b2 a2]
+        STR         Temp2, [pBuf], #4  
+        
+        BGT         LoopStart
+End2
+        SUB         pSrc0, pBuf, #32-8
+        MOV         srcStep0, #4
+
+        M_END
+
+    ENDIF
+    
+    END
+    
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
new file mode 100755
index 0000000..d1684cb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
@@ -0,0 +1,313 @@
+;//
+;// 
+;// File Name:  armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        EXPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+
+        M_VARIANTS CortexA8
+
+    IF CortexA8
+
+        M_START armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe, r11
+
+;// Declare input registers
+pSrc            RN 0
+srcStep         RN 1
+pDst            RN 2
+dstStep         RN 3
+
+;// Declare Neon registers
+dCoeff5         DN 30.S16
+dCoeff20        DN 31.S16
+qCoeff5         QN 14.S32
+qCoeff20        QN 15.S32
+        
+qSrc01          QN 0.U8
+dSrc0           DN 0.U8
+dSrc1           DN 1.U8                
+                
+dSrcb           DN 4.U8
+dSrcc           DN 2.U8
+dSrcd           DN 3.U8
+dSrce           DN 5.U8
+dSrcf           DN 1.U8
+
+qSrcb           QN 2.S16
+qSrcc           QN 1.S16
+dSrcB           DN 4.S16
+dSrcC           DN 2.S16
+
+qRes0           QN 5.S16
+qRes1           QN 6.S16
+qRes2           QN 7.S16
+qRes3           QN 8.S16
+qRes4           QN 9.S16
+qRes5           QN 10.S16
+qRes6           QN 11.S16
+qRes7           QN 12.S16
+qRes8           QN 13.S16
+    
+dRes0           DN 10.S16
+dRes1           DN 12.S16
+dRes2           DN 14.S16
+dRes3           DN 16.S16
+dRes4           DN 18.S16
+dRes5           DN 20.S16
+dRes6           DN 22.S16
+dRes7           DN 24.S16
+dRes8           DN 26.S16
+    
+qAcc01          QN 5.S32
+qAcc23          QN 6.S32
+qAcc45          QN 2.S32
+qAcc67          QN 3.S32
+qSumBE          QN 0.S32
+qSumCD          QN 1.S32
+
+dTempAcc0       DN 0.U16
+dTempAcc1       DN 2.U16
+dTempAcc2       DN 4.U16
+dTempAcc3       DN 6.U16
+
+qTAcc0          QN 0.U16
+qTAcc1          QN 1.U16
+qTAcc2          QN 2.U16
+qTAcc3          QN 3.U16
+
+dAcc0           DN 0.U8
+dAcc1           DN 2.U8
+dAcc2           DN 4.U8
+dAcc3           DN 6.U8
+
+dTmp0           DN 8.S16
+dTmp1           DN 9.S16
+qTmp0           QN 4.S32
+
+        VLD1        qSrc01, [pSrc], srcStep     ;// [a0 a1 a2 a3 ..]
+        VMOV        dCoeff20, #20
+        VMOV        dCoeff5, #5
+
+        ;// Row0
+        VEXT        dSrcb, dSrc0, dSrc1, #1     ;// [b0 b1 b2 b3 ..]
+        VEXT        dSrcc, dSrc0, dSrc1, #2
+        VEXT        dSrcd, dSrc0, dSrc1, #3
+        VEXT        dSrce, dSrc0, dSrc1, #4
+        VEXT        dSrcf, dSrc0, dSrc1, #5     ;// [f0 f1 f2 f3 ..]
+        VADDL       qSrcc, dSrcc, dSrcd         ;// c+d                
+        VADDL       qSrcb, dSrcb, dSrce         ;// b+e        
+        VADDL       qRes0, dSrc0, dSrcf         ;// Acc=a+f
+        VLD1        qSrc01, [pSrc], srcStep     ;// [a0 a1 a2 a3 ..]
+        VMLA        dRes0, dSrcC, dCoeff20      ;// Acc += 20*(c+d)
+;        VMLS        dRes0, dSrcB, dCoeff5       ;// Acc -= 5*(b+e)
+        VMUL        dTmp0, dSrcB, dCoeff5       ;// Acc -= 5*(b+e)
+        
+        ;// Row1
+        VEXT        dSrcb, dSrc0, dSrc1, #1     ;// [b0 b1 b2 b3 ..]
+        VEXT        dSrcc, dSrc0, dSrc1, #2
+        VEXT        dSrcd, dSrc0, dSrc1, #3
+        VEXT        dSrce, dSrc0, dSrc1, #4
+        VEXT        dSrcf, dSrc0, dSrc1, #5     ;// [f0 f1 f2 f3 ..]
+        VADDL       qSrcc, dSrcc, dSrcd         ;// c+d                
+        VADDL       qSrcb, dSrcb, dSrce         ;// b+e        
+        VADDL       qRes1, dSrc0, dSrcf         ;// Acc=a+f
+        VLD1        qSrc01, [pSrc], srcStep     ;// [a0 a1 a2 a3 ..]
+        
+        VSUB        dRes0, dRes0, dTmp0 ;// TeRi
+        
+        VMLA        dRes1, dSrcC, dCoeff20      ;// Acc += 20*(c+d)
+;        VMLS        dRes1, dSrcB, dCoeff5       ;// Acc -= 5*(b+e)
+        VMUL        dTmp0, dSrcB, dCoeff5       ;// Acc -= 5*(b+e)
+
+        ;// Row2
+        VEXT        dSrcb, dSrc0, dSrc1, #1     ;// [b0 b1 b2 b3 ..]
+        VEXT        dSrcc, dSrc0, dSrc1, #2
+        VEXT        dSrcd, dSrc0, dSrc1, #3
+        VEXT        dSrce, dSrc0, dSrc1, #4
+        VEXT        dSrcf, dSrc0, dSrc1, #5     ;// [f0 f1 f2 f3 ..]
+        VADDL       qSrcc, dSrcc, dSrcd         ;// c+d                
+        VADDL       qSrcb, dSrcb, dSrce         ;// b+e        
+        VADDL       qRes2, dSrc0, dSrcf         ;// Acc=a+f
+        VLD1        qSrc01, [pSrc], srcStep     ;// [a0 a1 a2 a3 ..]
+        
+        VSUB        dRes1, dRes1, dTmp0
+
+        VMLA        dRes2, dSrcC, dCoeff20      ;// Acc += 20*(c+d)
+;        VMLS        dRes2, dSrcB, dCoeff5       ;// Acc -= 5*(b+e)
+        VMUL        dTmp0, dSrcB, dCoeff5       ;// Acc -= 5*(b+e)
+
+        ;// Row3
+        VEXT        dSrcb, dSrc0, dSrc1, #1     ;// [b0 b1 b2 b3 ..]
+        VEXT        dSrcc, dSrc0, dSrc1, #2
+        VEXT        dSrcd, dSrc0, dSrc1, #3
+        VEXT        dSrce, dSrc0, dSrc1, #4
+        VEXT        dSrcf, dSrc0, dSrc1, #5     ;// [f0 f1 f2 f3 ..]
+        VADDL       qSrcc, dSrcc, dSrcd         ;// c+d                
+        VADDL       qSrcb, dSrcb, dSrce         ;// b+e        
+        VADDL       qRes3, dSrc0, dSrcf         ;// Acc=a+f
+        VLD1        qSrc01, [pSrc], srcStep     ;// [a0 a1 a2 a3 ..]
+
+        VSUB        dRes2, dRes2, dTmp0
+
+        VMLA        dRes3, dSrcC, dCoeff20      ;// Acc += 20*(c+d)
+;        VMLS        dRes3, dSrcB, dCoeff5       ;// Acc -= 5*(b+e)
+        VMUL        dTmp0, dSrcB, dCoeff5       ;// Acc -= 5*(b+e)
+
+        ;// Row4
+        VEXT        dSrcb, dSrc0, dSrc1, #1     ;// [b0 b1 b2 b3 ..]
+        VEXT        dSrcc, dSrc0, dSrc1, #2
+        VEXT        dSrcd, dSrc0, dSrc1, #3
+        VEXT        dSrce, dSrc0, dSrc1, #4
+        VEXT        dSrcf, dSrc0, dSrc1, #5     ;// [f0 f1 f2 f3 ..]
+        VADDL       qSrcc, dSrcc, dSrcd         ;// c+d                
+        VADDL       qSrcb, dSrcb, dSrce         ;// b+e        
+        VADDL       qRes4, dSrc0, dSrcf         ;// Acc=a+f
+        VLD1        qSrc01, [pSrc], srcStep     ;// [a0 a1 a2 a3 ..]
+
+        VSUB        dRes3, dRes3, dTmp0
+
+        VMLA        dRes4, dSrcC, dCoeff20      ;// Acc += 20*(c+d)
+;        VMLS        dRes4, dSrcB, dCoeff5       ;// Acc -= 5*(b+e)
+        VMUL        dTmp0, dSrcB, dCoeff5       ;// Acc -= 5*(b+e)
+
+        ;// Row5
+        VEXT        dSrcb, dSrc0, dSrc1, #1     ;// [b0 b1 b2 b3 ..]
+        VEXT        dSrcc, dSrc0, dSrc1, #2
+        VEXT        dSrcd, dSrc0, dSrc1, #3
+        VEXT        dSrce, dSrc0, dSrc1, #4
+        VEXT        dSrcf, dSrc0, dSrc1, #5     ;// [f0 f1 f2 f3 ..]
+        VADDL       qSrcc, dSrcc, dSrcd         ;// c+d                
+        VADDL       qSrcb, dSrcb, dSrce         ;// b+e        
+        VADDL       qRes5, dSrc0, dSrcf         ;// Acc=a+f
+        VLD1        qSrc01, [pSrc], srcStep     ;// [a0 a1 a2 a3 ..]
+
+        VSUB        dRes4, dRes4, dTmp0
+
+        VMLA        dRes5, dSrcC, dCoeff20      ;// Acc += 20*(c+d)
+;        VMLS        dRes5, dSrcB, dCoeff5       ;// Acc -= 5*(b+e)
+        VMUL        dTmp0, dSrcB, dCoeff5       ;// Acc -= 5*(b+e)
+
+        ;// Row6
+        VEXT        dSrcb, dSrc0, dSrc1, #1     ;// [b0 b1 b2 b3 ..]
+        VEXT        dSrcc, dSrc0, dSrc1, #2
+        VEXT        dSrcd, dSrc0, dSrc1, #3
+        VEXT        dSrce, dSrc0, dSrc1, #4
+        VEXT        dSrcf, dSrc0, dSrc1, #5     ;// [f0 f1 f2 f3 ..]
+        VADDL       qSrcc, dSrcc, dSrcd         ;// c+d                
+        VADDL       qSrcb, dSrcb, dSrce         ;// b+e        
+        VADDL       qRes6, dSrc0, dSrcf         ;// Acc=a+f
+        VLD1        qSrc01, [pSrc], srcStep     ;// [a0 a1 a2 a3 ..]
+
+        VSUB        dRes5, dRes5, dTmp0
+
+        VMLA        dRes6, dSrcC, dCoeff20      ;// Acc += 20*(c+d)
+;        VMLS        dRes6, dSrcB, dCoeff5       ;// Acc -= 5*(b+e)
+        VMUL        dTmp0, dSrcB, dCoeff5       ;// Acc -= 5*(b+e)
+
+        ;// Row7
+        VEXT        dSrcb, dSrc0, dSrc1, #1     ;// [b0 b1 b2 b3 ..]
+        VEXT        dSrcc, dSrc0, dSrc1, #2
+        VEXT        dSrcd, dSrc0, dSrc1, #3
+        VEXT        dSrce, dSrc0, dSrc1, #4
+        VEXT        dSrcf, dSrc0, dSrc1, #5     ;// [f0 f1 f2 f3 ..]
+        VADDL       qSrcc, dSrcc, dSrcd         ;// c+d                
+        VADDL       qSrcb, dSrcb, dSrce         ;// b+e        
+        VADDL       qRes7, dSrc0, dSrcf         ;// Acc=a+f
+        VLD1        qSrc01, [pSrc], srcStep     ;// [a0 a1 a2 a3 ..]
+
+        VSUB        dRes6, dRes6, dTmp0
+
+        VMLA        dRes7, dSrcC, dCoeff20      ;// Acc += 20*(c+d)
+;        VMLS        dRes7, dSrcB, dCoeff5       ;// Acc -= 5*(b+e)
+        VMUL        dTmp0, dSrcB, dCoeff5       ;// Acc -= 5*(b+e)
+
+        ;// Row8
+        VEXT        dSrcb, dSrc0, dSrc1, #1     ;// [b0 b1 b2 b3 ..]
+        VEXT        dSrcc, dSrc0, dSrc1, #2
+        VEXT        dSrcd, dSrc0, dSrc1, #3
+        VEXT        dSrce, dSrc0, dSrc1, #4
+        VEXT        dSrcf, dSrc0, dSrc1, #5     ;// [f0 f1 f2 f3 ..]
+        VADDL       qSrcc, dSrcc, dSrcd         ;// c+d                
+        VADDL       qSrcb, dSrcb, dSrce         ;// b+e        
+        VADDL       qRes8, dSrc0, dSrcf         ;// Acc=a+f
+
+        VSUB        dRes7, dRes7, dTmp0
+
+        VMLA        dRes8, dSrcC, dCoeff20      ;// Acc += 20*(c+d)
+;        VMLS        dRes8, dSrcB, dCoeff5       ;// Acc -= 5*(b+e)
+        VMUL        dTmp0, dSrcB, dCoeff5       ;// Acc -= 5*(b+e)
+
+        VMOV        qCoeff20, #20
+        VMOV        qCoeff5, #5
+
+        ;// Col0
+        VADDL       qAcc01, dRes0, dRes5        ;// Acc = a+f
+        VADDL       qSumCD, dRes2, dRes3        ;// c+d
+        VADDL       qSumBE, dRes1, dRes4        ;// b+e
+
+        VSUB        dRes8, dRes8, dTmp0
+
+        VMLA        qAcc01, qSumCD, qCoeff20    ;// Acc += 20*(c+d)
+;        VMLS        qAcc01, qSumBE, qCoeff5     ;// Acc -= 20*(b+e)        
+        VMUL        qTmp0, qSumBE, qCoeff5     ;// Acc -= 20*(b+e)        
+
+        ;// Col1
+        VADDL       qAcc23, dRes1, dRes6        ;// Acc = a+f
+        VADDL       qSumCD, dRes3, dRes4        ;// c+d
+        VADDL       qSumBE, dRes2, dRes5        ;// b+e
+        VMLA        qAcc23, qSumCD, qCoeff20    ;// Acc += 20*(c+d)
+
+        VSUB        qAcc01, qAcc01, qTmp0
+
+;        VMLS        qAcc23, qSumBE, qCoeff5     ;// Acc -= 20*(b+e)        
+        VMUL        qTmp0, qSumBE, qCoeff5     ;// Acc -= 20*(b+e)        
+
+        ;// Col2
+        VADDL       qAcc45, dRes2, dRes7        ;// Acc = a+f
+        VADDL       qSumCD, dRes4, dRes5        ;// c+d
+        VADDL       qSumBE, dRes3, dRes6        ;// b+e
+        VMLA        qAcc45, qSumCD, qCoeff20    ;// Acc += 20*(c+d)
+
+        VSUB        qAcc23, qAcc23, qTmp0
+
+;        VMLS        qAcc45, qSumBE, qCoeff5     ;// Acc -= 20*(b+e)        
+        VMUL        qTmp0, qSumBE, qCoeff5     ;// Acc -= 20*(b+e)        
+        
+        ;// Col3
+        VADDL       qAcc67, dRes3, dRes8        ;// Acc = a+f
+        VADDL       qSumCD, dRes5, dRes6        ;// c+d
+        VADDL       qSumBE, dRes4, dRes7        ;// b+e
+        VMLA        qAcc67, qSumCD, qCoeff20    ;// Acc += 20*(c+d)
+
+        VSUB        qAcc45, qAcc45, qTmp0
+
+        VMLS        qAcc67, qSumBE, qCoeff5     ;// Acc -= 20*(b+e)        
+
+        VQRSHRUN    dTempAcc0, qAcc01, #10
+        VQRSHRUN    dTempAcc1, qAcc23, #10
+        VQRSHRUN    dTempAcc2, qAcc45, #10
+        VQRSHRUN    dTempAcc3, qAcc67, #10
+        
+        VQMOVN      dAcc0, qTAcc0
+        VQMOVN      dAcc1, qTAcc1
+        VQMOVN      dAcc2, qTAcc2
+        VQMOVN      dAcc3, qTAcc3
+                
+        M_END
+    
+    ENDIF
+
+
+    
+    END
+    
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
new file mode 100755
index 0000000..7bc091f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
@@ -0,0 +1,266 @@
+;//
+;// 
+;// File Name:  armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+
+        EXPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+
+        M_VARIANTS CortexA8
+
+    IF CortexA8
+        M_START armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe, r11
+
+;// Declare input registers
+pSrc            RN 0
+srcStep         RN 1
+pDst            RN 2
+dstStep         RN 3
+
+;// Declare Neon registers
+dTCoeff5        DN 30.U8
+dTCoeff20       DN 31.U8
+dCoeff5         DN 30.S16
+dCoeff20        DN 31.S16
+
+qSrcA01         QN 0.U8
+qSrcB23         QN 1.U8
+qSrcC45         QN 2.U8
+qSrcD67         QN 3.U8
+qSrcE89         QN 4.U8
+qSrcF1011       QN 5.U8
+qSrcG1213       QN 6.U8
+qSrcH1415       QN 7.U8
+qSrcI1617       QN 8.U8
+
+dSrcA0          DN 0.U8
+dSrcB2          DN 2.U8
+dSrcC4          DN 4.U8
+dSrcD6          DN 6.U8
+dSrcE8          DN 8.U8
+dSrcF10         DN 10.U8
+dSrcG12         DN 12.U8
+dSrcH14         DN 14.U8
+dSrcI16         DN 16.U8
+
+dSrcA1          DN 1.U8
+dSrcB3          DN 3.U8
+dSrcC5          DN 5.U8
+dSrcD7          DN 7.U8
+dSrcE9          DN 9.U8
+dSrcF11         DN 11.U8
+dSrcG13         DN 13.U8
+dSrcH15         DN 15.U8
+dSrcI17         DN 17.U8
+
+qTempP01        QN 9.S16
+qTempQ01        QN 10.S16
+qTempR01        QN 11.S16
+qTempS01        QN 12.S16
+
+qTempP23        QN 0.S16
+qTempQ23        QN 1.S16
+qTempR23        QN 2.S16
+qTempS23        QN 3.S16
+
+dTempP0         DN 18.S16
+dTempP1         DN 19.S16
+dTempP2         DN 0.S16
+
+dTempQ0         DN 20.S16
+dTempQ1         DN 21.S16
+dTempQ2         DN 2.S16
+
+dTempR0         DN 22.S16
+dTempR1         DN 23.S16
+dTempR2         DN 4.S16
+
+dTempS0         DN 24.S16
+dTempS1         DN 25.S16
+dTempS2         DN 6.S16
+ 
+dTempB0         DN 26.S16
+dTempC0         DN 27.S16
+dTempD0         DN 28.S16
+dTempF0         DN 29.S16
+
+dTempAcc0       DN 0.U16
+dTempAcc1       DN 2.U16
+dTempAcc2       DN 4.U16
+dTempAcc3       DN 6.U16
+
+dAcc0           DN 0.U8
+dAcc1           DN 2.U8
+dAcc2           DN 4.U8
+dAcc3           DN 6.U8
+
+qAcc0           QN 0.S32
+qAcc1           QN 1.S32
+qAcc2           QN 2.S32
+qAcc3           QN 3.S32
+
+qTAcc0          QN 0.U16
+qTAcc1          QN 1.U16
+qTAcc2          QN 2.U16
+qTAcc3          QN 3.U16                
+
+qTmp            QN 4.S16
+dTmp            DN 8.S16
+
+        VLD1        qSrcA01, [pSrc], srcStep                 ;// [a0 a1 a2 a3 .. a15]   
+        ADD         r12, pSrc, srcStep, LSL #2
+        VMOV        dTCoeff5, #5
+        VMOV        dTCoeff20, #20
+        VLD1        qSrcF1011, [r12], srcStep
+        VLD1        qSrcB23, [pSrc], srcStep                 ;// [b0 b1 b2 b3 .. b15]
+        
+        VLD1        qSrcG1213, [r12], srcStep
+        VADDL       qTempP01, dSrcA0, dSrcF10           
+        VLD1        qSrcC45, [pSrc], srcStep                 ;// [c0 c1 c2 c3 .. c15]
+        VADDL       qTempP23, dSrcA1, dSrcF11   
+        VLD1        qSrcD67, [pSrc], srcStep
+        VADDL       qTempQ01, dSrcB2, dSrcG12                   
+        VLD1        qSrcE89, [pSrc], srcStep
+        
+        ;//t0
+        VMLAL       qTempP01, dSrcC4, dTCoeff20
+        
+        VLD1        qSrcH1415, [r12], srcStep
+
+        VMLAL       qTempP23, dSrcC5, dTCoeff20
+        
+        VLD1        qSrcI1617, [r12], srcStep                 ;// [i0 i1 i2 i3 .. ]
+        
+        VMLAL       qTempP01, dSrcD6, dTCoeff20
+        VMLAL       qTempQ01, dSrcD6, dTCoeff20
+        VMLSL       qTempP23, dSrcB3, dTCoeff5
+        
+        VADDL       qTempR01, dSrcC4, dSrcH14   
+        
+        VMLSL       qTempP01, dSrcB2, dTCoeff5
+
+        VADDL       qTempQ23, dSrcB3, dSrcG13   
+
+        VMLAL       qTempP23, dSrcD7, dTCoeff20
+        VMLAL       qTempQ01, dSrcE8, dTCoeff20
+
+        VMLSL       qTempP01, dSrcE8, dTCoeff5
+        VMLAL       qTempQ23, dSrcD7, dTCoeff20
+
+        VMLSL       qTempP23, dSrcE9, dTCoeff5
+
+        ;//t1
+
+        VMLAL       qTempR01, dSrcE8, dTCoeff20
+        VMLSL       qTempQ01, dSrcC4, dTCoeff5
+        VMLSL       qTempQ23, dSrcC5, dTCoeff5
+        VADDL       qTempR23, dSrcC5, dSrcH15   
+
+        VMLAL       qTempR01, dSrcF10, dTCoeff20
+        VMLSL       qTempQ01, dSrcF10, dTCoeff5
+        VMLAL       qTempQ23, dSrcE9, dTCoeff20
+        VMLAL       qTempR23, dSrcE9, dTCoeff20
+        VADDL       qTempS01, dSrcD6, dSrcI16   
+
+
+        VMLSL       qTempR01, dSrcD6, dTCoeff5
+        VMLSL       qTempQ23, dSrcF11, dTCoeff5
+        VMLSL       qTempR23, dSrcD7, dTCoeff5
+
+        ;//t2
+        VADDL       qTempS23, dSrcD7, dSrcI17   
+        VMLAL       qTempS01, dSrcF10, dTCoeff20
+        VMLSL       qTempR01, dSrcG12, dTCoeff5
+        VMLSL       qTempR23, dSrcG13, dTCoeff5
+
+        VMLAL       qTempS23, dSrcF11, dTCoeff20
+        VMLAL       qTempS01, dSrcG12, dTCoeff20
+        VEXT        dTempB0, dTempP0, dTempP1, #1
+        VMLAL       qTempR23, dSrcF11, dTCoeff20
+
+
+        ;//t3
+        VMLAL       qTempS23, dSrcG13, dTCoeff20
+        VMLSL       qTempS01, dSrcE8, dTCoeff5
+        VEXT        dTempC0, dTempP0, dTempP1, #2
+        VMOV        dCoeff20, #20
+        VMLSL       qTempS23, dSrcE9, dTCoeff5
+        VMLSL       qTempS01, dSrcH14, dTCoeff5
+        VEXT        dTempF0, dTempP1, dTempP2, #1
+        VEXT        dTempD0, dTempP0, dTempP1, #3
+        VMLSL       qTempS23, dSrcH15, dTCoeff5
+        
+        VADDL       qAcc0, dTempP0, dTempF0
+        VADD        dTempC0, dTempC0, dTempD0
+        ;//h 
+        VMOV        dCoeff5, #5
+        
+        ;// res0
+        VADD        dTempB0, dTempB0, dTempP1
+        VMLAL       qAcc0, dTempC0, dCoeff20
+        VEXT        dTempC0, dTempQ0, dTempQ1, #2
+        VEXT        dTempD0, dTempQ0, dTempQ1, #3
+        VEXT        dTempF0, dTempQ1, dTempQ2, #1
+        VMLSL       qAcc0, dTempB0, dCoeff5
+
+        ;// res1
+        VEXT        dTempB0, dTempQ0, dTempQ1, #1
+        VADDL       qAcc1, dTempQ0, dTempF0
+        VADD        dTempC0, dTempC0, dTempD0
+        VADD        dTempB0, dTempB0, dTempQ1
+        VEXT        dTempD0, dTempR0, dTempR1, #3
+        VMLAL       qAcc1, dTempC0, dCoeff20
+        VEXT        dTempF0, dTempR1, dTempR2, #1
+        VEXT        dTempC0, dTempR0, dTempR1, #2
+        VEXT        dTmp, dTempR0, dTempR1, #1
+        VADDL       qAcc2, dTempR0, dTempF0
+        VMLSL       qAcc1, dTempB0, dCoeff5
+;        VEXT        dTempB0, dTempR0, dTempR1, #1
+        VADD        dTempC0, dTempC0, dTempD0
+        
+        ;// res2
+        VADD        dTempB0, dTmp, dTempR1
+        VEXT        dTempD0, dTempS0, dTempS1, #3
+        VMLAL       qAcc2, dTempC0, dCoeff20
+;        VADD        dTempB0, dTempB0, dTempR1
+        
+        ;// res3
+        VEXT        dTempC0, dTempS0, dTempS1, #2
+        VEXT        dTempF0, dTempS1, dTempS2, #1
+        VADD        dTempC0, dTempC0, dTempD0
+        VEXT        dTmp, dTempS0, dTempS1, #1
+        VADDL       qAcc3, dTempS0, dTempF0
+        VMLSL       qAcc2, dTempB0, dCoeff5
+        VMLAL       qAcc3, dTempC0, dCoeff20
+        VADD        dTmp, dTmp, dTempS1
+        VMLSL       qAcc3, dTmp, dCoeff5
+                
+        VQRSHRUN    dTempAcc0, qAcc0, #10
+        VQRSHRUN    dTempAcc1, qAcc1, #10
+        VQRSHRUN    dTempAcc2, qAcc2, #10
+        VQRSHRUN    dTempAcc3, qAcc3, #10
+
+        VQMOVN      dAcc0, qTAcc0
+        VQMOVN      dAcc1, qTAcc1
+        VQMOVN      dAcc2, qTAcc2
+        VQMOVN      dAcc3, qTAcc3
+        
+        M_END
+    
+    ENDIF
+    
+    
+    
+
+    
+    END
+    
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
new file mode 100755
index 0000000..babe8ad
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
@@ -0,0 +1,228 @@
+;//
+;// 
+;// File Name:  armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+
+        M_VARIANTS CortexA8
+        
+        EXPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+
+DEBUG_ON    SETL {FALSE}
+
+    IF CortexA8
+        
+        M_START armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe, r11
+
+;// Declare input registers
+pSrc            RN 0
+srcStep         RN 1
+pDst            RN 2
+dstStep         RN 3
+
+;// Declare Neon registers
+dCoeff5         DN 30.S16
+dCoeff20        DN 31.S16
+
+qSrcA01         QN 11.U8
+qSrcB01         QN 12.U8
+qSrcC01         QN 13.U8
+qSrcD01         QN 14.U8
+
+dSrcA0          DN 22.U8
+dSrcA1          DN 23.U8
+dSrcB0          DN 24.U8
+dSrcB1          DN 25.U8
+dSrcC0          DN 26.U8
+dSrcC1          DN 27.U8
+dSrcD0          DN 28.U8
+dSrcD1          DN 29.U8
+
+dSrcb           DN 12.U8
+dSrce           DN 13.U8
+dSrcf           DN 10.U8
+
+dSrc0c          DN 14.U8
+dSrc1c          DN 16.U8
+dSrc2c          DN 18.U8
+dSrc3c          DN 20.U8
+                   
+dSrc0d          DN 15.U8
+dSrc1d          DN 17.U8
+dSrc2d          DN 19.U8
+dSrc3d          DN 21.U8
+
+qTemp01         QN 4.S16
+qTemp23         QN 6.S16
+dTemp0          DN 8.S16
+dTemp2          DN 12.S16
+
+qRes01          QN 11.S16
+qRes23          QN 12.S16
+qRes45          QN 13.S16
+qRes67          QN 14.S16
+
+dRes0           DN 22.S16
+dRes2           DN 24.S16
+dRes4           DN 26.S16
+dRes6           DN 28.S16
+
+dAcc0           DN 22.U8
+dAcc2           DN 24.U8
+dAcc4           DN 26.U8
+dAcc6           DN 28.U8
+
+dResult0        DN 22.U32
+dResult2        DN 24.U32
+dResult4        DN 26.U32
+dResult6        DN 28.U32
+
+        VLD1        qSrcA01, [pSrc], srcStep    ;// Load A register [a0 a1 a2 a3 ..]
+        ;// One cycle stall
+        VEXT        dSrcf, dSrcA0, dSrcA1, #5   ;// [f0 f1 f2 f3 ..]
+        VEXT        dSrcb, dSrcA0, dSrcA1, #1   ;// [b0 b1 b2 b3 ..]
+;        VLD1        qSrcB01, [pSrc], srcStep    ;// Load B register [a0 a1 a2 a3 ..]
+        VEXT        dSrc0c, dSrcA0, dSrcA1, #2
+        VEXT        dSrc0d, dSrcA0, dSrcA1, #3
+        VEXT        dSrce, dSrcA0, dSrcA1, #4
+        VADDL       qRes01, dSrcA0, dSrcf       ;// Acc=a+f
+        VADDL       qTemp01, dSrc0c, dSrc0d     ;// c+d                
+        VADDL       qTemp23, dSrcb, dSrce       ;// b+e
+        
+        VLD1        qSrcB01, [pSrc], srcStep    ;// Load B register [a0 a1 a2 a3 ..]
+;        VLD1        qSrcC01, [pSrc], srcStep    ;// Load C register [a0 a1 a2 a3 ..]           
+        VMLA        dRes0, dTemp0, dCoeff20     ;// Acc += 20*(c+d)
+;        VMLS        dRes0, dTemp2, dCoeff5      ;// Acc -= 5*(b+e)
+        VMUL        dTemp0, dTemp2, dCoeff5 ;// TeRi
+        
+        VEXT        dSrcf, dSrcB0, dSrcB1, #5   ;// [f0 f1 f2 f3 ..]
+        VEXT        dSrcb, dSrcB0, dSrcB1, #1   ;// [b0 b1 b2 b3 ..]
+        VEXT        dSrc1c, dSrcB0, dSrcB1, #2
+        VEXT        dSrc1d, dSrcB0, dSrcB1, #3
+        VEXT        dSrce, dSrcB0, dSrcB1, #4
+        VADDL       qRes23, dSrcB0, dSrcf       ;// Acc=a+f
+
+        VSUB        dRes0, dRes0, dTemp0    ;// TeRi
+
+        VADDL       qTemp01, dSrc1c, dSrc1d     ;// c+d                
+        VADDL       qTemp23, dSrcb, dSrce       ;// b+e
+        
+        VLD1        qSrcC01, [pSrc], srcStep    ;// Load C register [a0 a1 a2 a3 ..]           
+;        VLD1        qSrcD01, [pSrc], srcStep    ;// Load D register [a0 a1 a2 a3 ..]  
+        
+        VMLA        dRes2, dTemp0, dCoeff20     ;// Acc += 20*(c+d)
+;        VMLS        dRes2, dTemp2, dCoeff5      ;// Acc -= 5*(b+e)
+        VMUL        dTemp0, dTemp2, dCoeff5 ;// TeRi
+
+        VEXT        dSrcf, dSrcC0, dSrcC1, #5   ;// [f0 f1 f2 f3 ..]
+        VEXT        dSrcb, dSrcC0, dSrcC1, #1   ;// [b0 b1 b2 b3 ..]
+        VEXT        dSrc2c, dSrcC0, dSrcC1, #2
+        VEXT        dSrc2d, dSrcC0, dSrcC1, #3
+        VEXT        dSrce, dSrcC0, dSrcC1, #4
+        VADDL       qRes45, dSrcC0, dSrcf       ;// Acc=a+f
+        
+        VSUB        dRes2, dRes2, dTemp0  ;// TeRi
+        
+        VADDL       qTemp01, dSrc2c, dSrc2d     ;// c+d                
+        VADDL       qTemp23, dSrcb, dSrce       ;// b+e
+
+        VLD1        qSrcD01, [pSrc], srcStep    ;// Load D register [a0 a1 a2 a3 ..]  
+
+        VMLA        dRes4, dTemp0, dCoeff20     ;// Acc += 20*(c+d)
+;        VMLS        dRes4, dTemp2, dCoeff5      ;// Acc -= 5*(b+e)
+        VMUL        dTemp0, dTemp2, dCoeff5      ;// Acc -= 5*(b+e) TeRi
+        
+
+        VEXT        dSrcf, dSrcD0, dSrcD1, #5   ;// [f0 f1 f2 f3 ..]
+        VEXT        dSrcb, dSrcD0, dSrcD1, #1   ;// [b0 b1 b2 b3 ..]
+        VEXT        dSrc3c, dSrcD0, dSrcD1, #2
+        VEXT        dSrc3d, dSrcD0, dSrcD1, #3
+        VEXT        dSrce, dSrcD0, dSrcD1, #4
+        VADDL       qRes67, dSrcD0, dSrcf       ;// Acc=a+f
+
+        VSUB        dRes4, dRes4, dTemp0 ;// TeRi
+
+        VADDL       qTemp01, dSrc3c, dSrc3d     ;// c+d                
+        VADDL       qTemp23, dSrcb, dSrce       ;// b+e
+        VMLA        dRes6, dTemp0, dCoeff20     ;// Acc += 20*(c+d)
+        VMLS        dRes6, dTemp2, dCoeff5      ;// Acc -= 5*(b+e)
+
+        VQRSHRUN    dAcc0, qRes01, #5           ;// Acc = Sat ((Acc + 16) / 32)
+        VQRSHRUN    dAcc2, qRes23, #5           ;// Acc = Sat ((Acc + 16) / 32)
+        VQRSHRUN    dAcc4, qRes45, #5           ;// Acc = Sat ((Acc + 16) / 32)
+        VQRSHRUN    dAcc6, qRes67, #5           ;// Acc = Sat ((Acc + 16) / 32)
+        
+        M_END
+    
+    ENDIF
+
+
+    END
+    
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
new file mode 100755
index 0000000..89c90aa
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
@@ -0,0 +1,134 @@
+;//
+;// 
+;// File Name:  armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+       
+        M_VARIANTS CortexA8
+       
+        EXPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+
+    IF CortexA8
+        
+        M_START armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe, r11
+
+;// Declare input registers
+pSrc            RN 0
+srcStep         RN 1
+pDst            RN 2
+dstStep         RN 3
+
+Temp            RN 12
+
+;// Declare Neon registers
+dCoeff5         DN 30.S16
+dCoeff20        DN 31.S16
+
+dSrc0           DN 7.U8
+dSrc1           DN 8.U8
+dSrc2           DN 9.U8
+dSrc3           DN 10.U8
+dSrc4           DN 11.U8
+dSrc5           DN 12.U8
+dSrc6           DN 13.U8
+dSrc7           DN 14.U8
+dSrc8           DN 15.U8
+
+qSumBE01        QN 8.S16
+qSumCD01        QN 9.S16
+dSumBE0         DN 16.S16
+dSumCD0         DN 18.S16
+
+qAcc01          QN 0.S16
+qAcc23          QN 1.S16
+qAcc45          QN 2.S16
+qAcc67          QN 3.S16
+
+dRes0           DN 0.S16
+dRes1           DN 2.S16
+dRes2           DN 4.S16
+dRes3           DN 6.S16
+
+dAcc0           DN 0.U8
+dAcc1           DN 2.U8
+dAcc2           DN 4.U8
+dAcc3           DN 6.U8        
+        
+
+dTmp0           DN 20.S16
+dTmp1           DN 21.S16
+dTmp2           DN 22.S16
+dTmp3           DN 23.S16
+
+
+        VLD1        dSrc0, [pSrc], srcStep     ;// [a0 a1 a2 a3 .. ] 
+        ADD         Temp, pSrc, srcStep, LSL #2
+        VLD1        dSrc1, [pSrc], srcStep     ;// [b0 b1 b2 b3 .. ]
+        ;// One cycle stall
+        VLD1        dSrc5, [Temp], srcStep        
+        ;// One cycle stall
+        VLD1        dSrc2, [pSrc], srcStep     ;// [c0 c1 c2 c3 .. ]
+        VADDL       qAcc01, dSrc0, dSrc5       ;// Acc = a+f
+        VLD1        dSrc3, [pSrc], srcStep
+        ;// One cycle stall
+        VLD1        dSrc6, [Temp], srcStep ;// TeRi
+        
+        VLD1        dSrc4, [pSrc], srcStep
+        VLD1        dSrc7, [Temp], srcStep ;// TeRi
+        VADDL       qSumBE01, dSrc1, dSrc4     ;// b+e
+        VADDL       qSumCD01, dSrc2, dSrc3     ;// c+d        
+        VLD1        dSrc8, [Temp], srcStep ;// TeRi
+        VMLS        dRes0, dSumBE0, dCoeff5    ;// Acc -= 20*(b+e)        
+;        VMLA        dRes0, dSumCD0, dCoeff20   ;// Acc += 20*(c+d)
+        VMUL        dTmp0, dSumCD0, dCoeff20   ;// Acc += 20*(c+d)
+        
+;        VLD1        dSrc6, [Temp], srcStep
+        VADDL       qSumBE01, dSrc2, dSrc5     ;// b+e
+        VADDL       qSumCD01, dSrc3, dSrc4     ;// c+d
+        VADDL       qAcc23, dSrc1, dSrc6       ;// Acc = a+f
+        VMLS        dRes1, dSumBE0, dCoeff5    ;// Acc -= 20*(b+e)
+;        VMLA        dRes1, dSumCD0, dCoeff20   ;// Acc += 20*(c+d)
+        VMUL        dTmp1, dSumCD0, dCoeff20   ;// Acc += 20*(c+d)
+
+;        VLD1        dSrc7, [Temp], srcStep
+        VADDL       qSumBE01, dSrc3, dSrc6     ;// b+e
+        VADDL       qSumCD01, dSrc4, dSrc5     ;// c+d
+        VADDL       qAcc45, dSrc2, dSrc7       ;// Acc = a+f
+        VMLS        dRes2, dSumBE0, dCoeff5    ;// Acc -= 20*(b+e)        
+;        VMLA        dRes2, dSumCD0, dCoeff20   ;// Acc += 20*(c+d)
+        VMUL        dTmp2, dSumCD0, dCoeff20   ;// Acc += 20*(c+d)
+
+;        VLD1        dSrc8, [Temp], srcStep     ;// [i0 i1 i2 i3 .. ]        
+        VADDL       qSumBE01, dSrc4, dSrc7     ;// b+e
+        VADDL       qAcc67, dSrc3, dSrc8       ;// Acc = a+f
+        VADDL       qSumCD01, dSrc5, dSrc6     ;// c+d
+        VMLS        dRes3, dSumBE0, dCoeff5    ;// Acc -= 20*(b+e)        
+        VADD        dRes0, dRes0, dTmp0
+        VADD        dRes1, dRes1, dTmp1
+        VADD        dRes2, dRes2, dTmp2
+        VMLA        dRes3, dSumCD0, dCoeff20   ;// Acc += 20*(c+d)
+;        VMUL        dTmp3, dSumCD0, dCoeff20   ;// Acc += 20*(c+d)
+;        VADD        dRes3, dRes3, dTmp3
+
+        VQRSHRUN    dAcc0, qAcc01, #5        
+        VQRSHRUN    dAcc1, qAcc23, #5        
+        VQRSHRUN    dAcc2, qAcc45, #5        
+        VQRSHRUN    dAcc3, qAcc67, #5        
+
+        M_END
+    
+    ENDIF
+
+    
+    
+    END
+    
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
new file mode 100755
index 0000000..0f0ec78
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
@@ -0,0 +1,318 @@
+;//
+;// 
+;// File Name:  armVCM4P10_Interpolate_Chroma_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS CortexA8
+        
+
+    IF CortexA8
+
+    M_TABLE armVCM4P10_WidthBranchTableMVIsNotZero      
+    
+    DCD   WidthIs2MVIsNotZero, WidthIs2MVIsNotZero
+    DCD   WidthIs4MVIsNotZero, WidthIs4MVIsNotZero
+    DCD   WidthIs8MVIsNotZero
+    
+    M_TABLE armVCM4P10_WidthBranchTableMVIsZero      
+    
+    DCD   WidthIs2MVIsZero, WidthIs2MVIsZero
+    DCD   WidthIs4MVIsZero, WidthIs4MVIsZero
+    DCD   WidthIs8MVIsZero
+    
+    
+;// input registers
+
+pSrc                 RN 0
+iSrcStep             RN 1
+pDst                 RN 2
+iDstStep             RN 3
+iWidth               RN 4
+iHeight              RN 5
+dx                   RN 6
+dy                   RN 7
+
+;// local variable registers
+pc                   RN 15
+return               RN 0
+EightMinusdx         RN 8 
+EightMinusdy         RN 9
+
+ACoeff               RN 12
+BCoeff               RN 9
+CCoeff               RN 8
+DCoeff               RN 6
+
+pTable               RN 11
+
+Step1                RN 10
+SrcStepMinus1        RN 14
+
+dACoeff              DN D12.U8
+dBCoeff              DN D13.U8
+dCCoeff              DN D14.U8
+dDCoeff              DN D15.U8
+
+dRow0a               DN D0.U8
+dRow0b               DN D1.U8
+dRow1a               DN D2.U8
+dRow1b               DN D3.U8
+
+qRow0a               QN Q2.S16
+qRow0b               QN Q3.S16
+
+;//dIndex               DN    D16.U8                 
+qRow1a               QN Q11.S16
+qRow1b               QN Q12.S16
+
+dRow2a               DN D16.U8
+dRow2b               DN D17.U8
+dRow3a               DN D18.U8
+dRow3b               DN D19.U8
+
+qOutRow2             QN Q11.U16
+qOutRow3             QN Q12.U16
+dOutRow2             DN D20.U8
+dOutRow3             DN D21.U8
+dOutRow2U64          DN D20.U64
+dOutRow3U64          DN D21.U64
+
+qOutRow0             QN Q2.U16
+qOutRow1             QN Q3.U16
+dOutRow0             DN D8.U8
+dOutRow1             DN D9.U8
+
+dOutRow0U64          DN D8.U64
+dOutRow1U64          DN D9.U64
+
+dOutRow0U32          DN D8.U32
+dOutRow1U32          DN D9.U32
+
+dOutRow0U16          DN D8.U16
+dOutRow1U16          DN D9.U16
+
+
+dOut0U64             DN D0.U64
+dOut1U64             DN D1.U64
+
+dOut00U32            DN D0.U32
+dOut01U32            DN D1.U32
+dOut10U32            DN D2.U32
+dOut11U32            DN D3.U32
+
+dOut0U16             DN D0.U16
+dOut1U16             DN D1.U16
+
+;//-----------------------------------------------------------------------------------------------
+;// armVCM4P10_Interpolate_Chroma_asm starts
+;//-----------------------------------------------------------------------------------------------
+        
+        ;// Write function header
+        M_START armVCM4P10_Interpolate_Chroma, r11, d15
+        
+        ;// Define stack arguments
+        M_ARG   Width,      4
+        M_ARG   Height,     4
+        M_ARG   Dx,         4
+        M_ARG   Dy,         4
+        
+        ;// Load argument from the stack
+        ;// M_STALL ARM1136JS=4
+        
+        M_LDRD   dx, dy, Dx
+        M_LDRD   iWidth, iHeight, Width
+        
+        ;// EightMinusdx = 8 - dx
+        ;// EightMinusdy = 8 - dy
+        
+        ;// ACoeff = EightMinusdx * EightMinusdy
+        ;// BCoeff = dx * EightMinusdy
+        ;// CCoeff = EightMinusdx * dy
+        ;// DCoeff = dx * dy
+        
+        RSB     EightMinusdx, dx, #8 
+        RSB     EightMinusdy, dy, #8
+        CMN     dx,dy
+        MOV     Step1, #1
+        LDREQ   pTable, =armVCM4P10_WidthBranchTableMVIsZero
+        SUB     SrcStepMinus1, iSrcStep, Step1
+        LDRNE   pTable, =armVCM4P10_WidthBranchTableMVIsNotZero
+        
+        VLD1    dRow0a, [pSrc], Step1                   ;// 0a
+        
+        SMULBB  ACoeff, EightMinusdx, EightMinusdy
+        SMULBB  BCoeff, dx, EightMinusdy
+        VLD1    dRow0b, [pSrc], SrcStepMinus1           ;// 0b
+        SMULBB  CCoeff, EightMinusdx, dy
+        SMULBB  DCoeff, dx, dy
+        
+        VDUP    dACoeff, ACoeff
+        VDUP    dBCoeff, BCoeff
+        VDUP    dCCoeff, CCoeff
+        VDUP    dDCoeff, DCoeff
+        
+        LDR     pc, [pTable, iWidth, LSL #1]      ;// Branch to the case based on iWidth
+        
+;// Pixel layout:
+;//
+;//   x00 x01 x02
+;//   x10 x11 x12
+;//   x20 x21 x22
+
+;// If fractionl mv is not (0, 0)
+WidthIs8MVIsNotZero
+
+                VLD1   dRow1a, [pSrc], Step1            ;// 1a
+                VMULL  qRow0a, dRow0a, dACoeff
+                VLD1   dRow1b, [pSrc], SrcStepMinus1    ;// 1b
+                VMULL  qRow0b, dRow1a, dACoeff
+                VLD1   dRow2a, [pSrc], Step1            ;// 2a
+                VMLAL  qRow0a, dRow0b, dBCoeff
+                VLD1   dRow2b, [pSrc], SrcStepMinus1    ;// 2b
+                VMULL  qRow1a, dRow2a, dACoeff
+                VMLAL  qRow0b, dRow1b, dBCoeff
+                VLD1   dRow3a, [pSrc], Step1            ;// 3a
+                VMLAL  qRow0a, dRow1a, dCCoeff
+                VMLAL  qRow1a, dRow2b, dBCoeff
+                VMULL  qRow1b, dRow3a, dACoeff
+                VLD1   dRow3b, [pSrc], SrcStepMinus1    ;// 3b
+                VMLAL  qRow0b, dRow2a, dCCoeff
+                VLD1   dRow0a, [pSrc], Step1            ;// 0a
+                VMLAL  qRow1b, dRow3b, dBCoeff
+                VMLAL  qRow1a, dRow3a, dCCoeff
+                VMLAL  qRow0a, dRow1b, dDCoeff
+                VLD1   dRow0b, [pSrc], SrcStepMinus1    ;// 0b
+                VMLAL  qRow1b, dRow0a, dCCoeff
+                VMLAL  qRow0b, dRow2b, dDCoeff
+                VMLAL  qRow1a, dRow3b, dDCoeff
+                
+                
+                SUBS   iHeight, iHeight, #4
+                VMLAL  qRow1b, dRow0b, dDCoeff
+
+                VQRSHRN dOutRow0, qOutRow0, #6
+                VQRSHRN dOutRow1, qOutRow1, #6
+                VQRSHRN dOutRow2, qOutRow2, #6
+                VST1   dOutRow0U64, [pDst], iDstStep
+                VQRSHRN dOutRow3, qOutRow3, #6
+                
+                VST1   dOutRow1U64, [pDst], iDstStep  
+                VST1   dOutRow2U64, [pDst], iDstStep
+                VST1   dOutRow3U64, [pDst], iDstStep  
+                
+
+                BGT     WidthIs8MVIsNotZero
+                MOV     return,  #OMX_Sts_NoErr
+                M_EXIT
+
+WidthIs4MVIsNotZero
+
+                VLD1   dRow1a, [pSrc], Step1
+                VMULL  qRow0a, dRow0a, dACoeff
+                VMULL  qRow0b, dRow1a, dACoeff
+                VLD1   dRow1b, [pSrc], SrcStepMinus1
+                VMLAL  qRow0a, dRow0b, dBCoeff
+                VMLAL  qRow0b, dRow1b, dBCoeff
+                VLD1   dRow0a, [pSrc], Step1
+                VMLAL  qRow0a, dRow1a, dCCoeff
+                VMLAL  qRow0b, dRow0a, dCCoeff
+                VLD1   dRow0b, [pSrc], SrcStepMinus1
+                SUBS   iHeight, iHeight, #2
+                VMLAL  qRow0b, dRow0b, dDCoeff
+                VMLAL  qRow0a, dRow1b, dDCoeff
+                
+                VQRSHRN dOutRow1, qOutRow1, #6
+                VQRSHRN dOutRow0, qOutRow0, #6
+                
+                VST1   dOutRow0U32[0], [pDst], iDstStep
+                VST1   dOutRow1U32[0], [pDst], iDstStep  
+                
+                BGT     WidthIs4MVIsNotZero
+                MOV     return,  #OMX_Sts_NoErr
+                M_EXIT
+
+WidthIs2MVIsNotZero
+
+                VLD1   dRow1a, [pSrc], Step1
+                VMULL  qRow0a, dRow0a, dACoeff
+                VMULL  qRow0b, dRow1a, dACoeff
+                VLD1   dRow1b, [pSrc], SrcStepMinus1
+                VMLAL  qRow0a, dRow0b, dBCoeff
+                VMLAL  qRow0b, dRow1b, dBCoeff
+                VLD1   dRow0a, [pSrc], Step1
+                VMLAL  qRow0a, dRow1a, dCCoeff
+                VMLAL  qRow0b, dRow0a, dCCoeff
+                VLD1   dRow0b, [pSrc], SrcStepMinus1
+                SUBS   iHeight, iHeight, #2
+                VMLAL  qRow0b, dRow0b, dDCoeff
+                VMLAL  qRow0a, dRow1b, dDCoeff
+                
+                VQRSHRN dOutRow1, qOutRow1, #6
+                VQRSHRN dOutRow0, qOutRow0, #6
+                
+                VST1   dOutRow0U16[0], [pDst], iDstStep
+                VST1   dOutRow1U16[0], [pDst], iDstStep  
+
+                BGT     WidthIs2MVIsNotZero 
+                MOV     return,  #OMX_Sts_NoErr
+                M_EXIT                
+                
+;// If fractionl mv is (0, 0)
+WidthIs8MVIsZero
+                SUB     pSrc, pSrc, iSrcStep
+
+WidthIs8LoopMVIsZero
+                VLD1    dRow0a, [pSrc], iSrcStep
+                SUBS    iHeight, iHeight, #2
+                VLD1    dRow0b, [pSrc], iSrcStep
+                VST1    dOut0U64, [pDst], iDstStep
+                VST1    dOut1U64, [pDst], iDstStep
+                BGT     WidthIs8LoopMVIsZero
+
+                MOV     return,  #OMX_Sts_NoErr
+                M_EXIT
+
+WidthIs4MVIsZero                
+                VLD1    dRow0b, [pSrc], iSrcStep
+                
+                SUBS    iHeight, iHeight, #2
+                
+                VST1    dOut00U32[0], [pDst], iDstStep
+                VLD1    dRow0a, [pSrc], iSrcStep
+                VST1    dOut01U32[0], [pDst], iDstStep
+                
+                BGT     WidthIs4MVIsZero 
+                MOV     return,  #OMX_Sts_NoErr
+                M_EXIT
+                
+WidthIs2MVIsZero                
+                VLD1    dRow0b, [pSrc], iSrcStep
+                SUBS    iHeight, iHeight, #2
+                
+                VST1    dOut0U16[0], [pDst], iDstStep
+                VLD1    dRow0a, [pSrc], iSrcStep
+                VST1    dOut1U16[0], [pDst], iDstStep
+                
+                BGT     WidthIs2MVIsZero 
+                MOV     return,  #OMX_Sts_NoErr                                
+                M_END
+                    
+        ENDIF ;// CortexA8
+        
+        END
+
+;//-----------------------------------------------------------------------------------------------
+;// armVCM4P10_Interpolate_Chroma_asm ends
+;//-----------------------------------------------------------------------------------------------
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_QuantTables_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_QuantTables_s.s
new file mode 100755
index 0000000..7e2642b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_QuantTables_s.s
@@ -0,0 +1,74 @@
+;//
+;// 
+;// File Name:  armVCM4P10_QuantTables_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;// Description:
+;// This file contains quantization tables
+;// 
+;// 
+
+         INCLUDE omxtypes_s.h
+         INCLUDE armCOMM_s.h
+     
+         
+         EXPORT armVCM4P10_MFMatrixQPModTable
+         EXPORT armVCM4P10_QPDivIntraTable
+         EXPORT armVCM4P10_QPDivPlusOneTable  
+         
+;//--------------------------------------------------------------
+;// This table contains armVCM4P10_MFMatrix [iQP % 6][0] entires,
+;// for values of iQP from 0 to 51 (inclusive). 
+;//--------------------------------------------------------------
+
+         M_TABLE armVCM4P10_MFMatrixQPModTable
+         DCW 13107, 11916, 10082, 9362, 8192, 7282
+         DCW 13107, 11916, 10082, 9362, 8192, 7282
+         DCW 13107, 11916, 10082, 9362, 8192, 7282
+         DCW 13107, 11916, 10082, 9362, 8192, 7282
+         DCW 13107, 11916, 10082, 9362, 8192, 7282
+         DCW 13107, 11916, 10082, 9362, 8192, 7282
+         DCW 13107, 11916, 10082, 9362, 8192, 7282
+         DCW 13107, 11916, 10082, 9362, 8192, 7282
+         DCW 13107, 11916, 10082, 9362, 8192, 7282
+         
+;//---------------------------------------------------------------
+;// This table contains ARM_M4P10_Q_OFFSET + 1 + (iQP / 6) values,
+;// for values of iQP from 0 to 51 (inclusive). 
+;//---------------------------------------------------------------
+
+         M_TABLE armVCM4P10_QPDivPlusOneTable
+         DCB 16, 16, 16, 16, 16, 16
+         DCB 17, 17, 17, 17, 17, 17
+         DCB 18, 18, 18, 18, 18, 18
+         DCB 19, 19, 19, 19, 19, 19
+         DCB 20, 20, 20, 20, 20, 20
+         DCB 21, 21, 21, 21, 21, 21
+         DCB 22, 22, 22, 22, 22, 22
+         DCB 23, 23, 23, 23, 23, 23
+         DCB 24, 24, 24, 24, 24, 24
+
+;//------------------------------------------------------------------
+;// This table contains (1 << QbitsPlusOne) / 3 Values (Intra case) ,
+;// for values of iQP from 0 to 51 (inclusive). 
+;//------------------------------------------------------------------
+    
+         M_TABLE armVCM4P10_QPDivIntraTable, 2
+         DCD 21845, 21845, 21845, 21845, 21845, 21845
+         DCD 43690, 43690, 43690, 43690, 43690, 43690
+         DCD 87381, 87381, 87381, 87381, 87381, 87381
+         DCD 174762, 174762, 174762, 174762, 174762, 174762
+         DCD 349525, 349525, 349525, 349525, 349525, 349525
+         DCD 699050, 699050, 699050, 699050, 699050, 699050
+         DCD 1398101, 1398101, 1398101, 1398101, 1398101, 1398101
+         DCD 2796202, 2796202, 2796202, 2796202, 2796202, 2796202
+         DCD 5592405, 5592405, 5592405, 5592405, 5592405, 5592405                
+         
+         
+         END
+         
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
new file mode 100755
index 0000000..ee9c339
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
@@ -0,0 +1,186 @@
+;//
+;// 
+;// File Name:  armVCM4P10_TransformResidual4x4_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+;// Description:
+;// Transform Residual 4x4 Coefficients
+;// 
+;// 
+
+        
+;// Include standard headers
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS CortexA8
+        
+;// Import symbols required from other files
+;// (For example tables)
+    
+        
+        
+        
+;// Set debugging level        
+;//DEBUG_ON    SETL {TRUE}
+
+
+
+;// Guarding implementation by the processor name
+    
+    
+    
+
+
+
+
+
+;// Guarding implementation by the processor name
+    
+    IF  CortexA8
+
+;// ARM Registers
+    
+;//Input Registers
+pDst                RN  0
+pSrc                RN  1
+
+
+;// Neon Registers
+      
+;// Packed Input pixels
+dIn0                DN  D0.S16       
+dIn1                DN  D1.S16       
+dIn2                DN  D2.S16       
+dIn3                DN  D3.S16
+
+;// Intermediate calculations       
+dZero               DN  D4.S16
+de0                 DN  D5.S16
+de1                 DN  D6.S16
+de2                 DN  D7.S16
+de3                 DN  D8.S16
+dIn1RS              DN  D7.S16
+dIn3RS              DN  D8.S16
+df0                 DN  D0.S16
+df1                 DN  D1.S16
+df2                 DN  D2.S16
+df3                 DN  D3.S16
+qf01                QN  Q0.32
+qf23                QN  Q1.32
+dg0                 DN  D5.S16
+dg1                 DN  D6.S16
+dg2                 DN  D7.S16
+dg3                 DN  D8.S16
+df1RS               DN  D7.S16
+df3RS               DN  D8.S16
+
+;// Output pixels
+dh0                 DN  D0.S16
+dh1                 DN  D1.S16
+dh2                 DN  D2.S16
+dh3                 DN  D3.S16
+
+       
+    ;// Allocate stack memory required by the function
+        
+
+    ;// Write function header
+        M_START armVCM4P10_TransformResidual4x4, ,d8
+        
+        ;******************************************************************
+        ;// The strategy used in implementing the transform is as follows:*
+        ;// Load the 4x4 block into 8 registers                           *  
+        ;// Transpose the 4x4 matrix                                      *  
+        ;// Perform the row operations (on columns) using SIMD            *  
+        ;// Transpose the 4x4 result matrix                               *  
+        ;// Perform the coloumn operations                                *
+        ;// Store the 4x4 block at one go                                 *  
+        ;******************************************************************
+
+        ;// Load all the 4x4 pixels in transposed form
+        
+        VLD4    {dIn0,dIn1,dIn2,dIn3},[pSrc]
+        
+        VMOV    dZero,#0                                    ;// Used to right shift by 1 
+        
+        
+        ;**************************************** 
+        ;// Row Operations (Performed on columns)
+        ;**************************************** 
+        
+        
+        VADD        de0,dIn0,dIn2                       ;//  e0 = d0 + d2 
+        VSUB        de1,dIn0,dIn2                        ;//  e1 = d0 - d2 
+        VHADD       dIn1RS,dIn1,dZero                   ;// (f1>>1) constZero is a register holding 0
+        VHADD       dIn3RS,dIn3,dZero
+        VSUB        de2,dIn1RS,dIn3                     ;//  e2 = (d1>>1) - d3 
+        VADD        de3,dIn1,dIn3RS                        ;//  e3 = d1 + (d3>>1) 
+        VADD        df0,de0,de3                         ;//  f0 = e0 + e3
+        VADD        df1,de1,de2                            ;//  f1 = e1 + e2
+        VSUB        df2,de1,de2                            ;//  f2 = e1 - e2
+        VSUB        df3,de0,de3                            ;//  f3 = e0 - e3
+        
+        
+        
+        ;*****************************************************************
+        ;// Transpose the resultant matrix
+        ;*****************************************************************
+        
+        VTRN    df0,df1
+        VTRN    df2,df3
+        VTRN    qf01,qf23 
+        
+        
+        ;******************************* 
+        ;// Coloumn Operations 
+        ;******************************* 
+        
+        
+        VADD        dg0,df0,df2                         ;//  e0 = d0 + d2 
+        VSUB        dg1,df0,df2                            ;//  e1 = d0 - d2 
+        VHADD       df1RS,df1,dZero                     ;// (f1>>1) constZero is a register holding 0
+        VHADD       df3RS,df3,dZero
+        VSUB        dg2,df1RS,df3                       ;//  e2 = (d1>>1) - d3 
+        VADD        dg3,df1,df3RS                        ;//  e3 = d1 + (d3>>1) 
+        VADD        dh0,dg0,dg3                         ;//  f0 = e0 + e3
+        VADD        dh1,dg1,dg2                            ;//  f1 = e1 + e2
+        VSUB        dh2,dg1,dg2                            ;//  f2 = e1 - e2
+        VSUB        dh3,dg0,dg3                            ;//  f3 = e0 - e3
+        
+             
+        ;************************************************
+        ;// Calculate final value (colOp[i][j] + 32)>>6
+        ;************************************************
+        
+        VRSHR       dh0,#6
+        VRSHR       dh1,#6
+        VRSHR       dh2,#6
+        VRSHR       dh3,#6
+        
+                
+        ;***************************
+        ;// Store all the 4x4 pixels
+        ;***************************
+        
+        VST1   {dh0,dh1,dh2,dh3},[pDst]
+            
+        
+        ;// Set return value
+        
+End                
+
+        
+        ;// Write function tail
+        M_END
+        
+    ENDIF                                                           ;//CortexA8            
+            
+    END
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
new file mode 100755
index 0000000..4c52e22
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
@@ -0,0 +1,92 @@
+;//
+;// 
+;// File Name:  armVCM4P10_UnpackBlock4x4_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+;// Define the processor variants supported by this file
+
+        M_VARIANTS ARM1136JS
+        
+                       
+        IF ARM1136JS
+        
+;//--------------------------------------
+;// Input Arguments and their scope/usage
+;//--------------------------------------
+ppSrc           RN 0    ;// Persistent variable
+pDst            RN 1    ;// Persistent variable
+
+;//--------------------------------
+;// Variables and their scope/usage
+;//--------------------------------
+pSrc            RN 2    ;// Persistent variables
+Flag            RN 3    
+Value           RN 4    
+Value2          RN 5    
+strOffset       RN 6    
+cstOffset       RN 7    
+
+        
+        M_START armVCM4P10_UnpackBlock4x4, r7
+        
+        LDR     pSrc, [ppSrc]                       ;// Load pSrc
+        MOV     cstOffset, #31                      ;// To be used in the loop, to compute offset
+        
+        ;//-----------------------------------------------------------------------
+        ; Firstly, fill all the coefficient values on the <pDst> buffer by zero
+        ;//-----------------------------------------------------------------------
+        
+        MOV      Value,  #0                         ;// Initialize the zero value
+        MOV      Value2, #0                         ;// Initialize the zero value
+        LDRB     Flag,  [pSrc], #1                  ;// Preload <Flag> before <unpackLoop>
+        
+        STRD     Value, [pDst, #0]                  ;// pDst[0]  = pDst[1]  = pDst[2]  = pDst[3]  = 0
+        STRD     Value, [pDst, #8]                  ;// pDst[4]  = pDst[5]  = pDst[6]  = pDst[7]  = 0
+        STRD     Value, [pDst, #16]                 ;// pDst[8]  = pDst[9]  = pDst[10] = pDst[11] = 0
+        STRD     Value, [pDst, #24]                 ;// pDst[12] = pDst[13] = pDst[14] = pDst[15] = 0
+        
+        ;//----------------------------------------------------------------------------
+        ;// The loop below parses and unpacks the input stream. The C-model has 
+        ;// a somewhat complicated logic for sign extension.  But in the v6 version,
+        ;// that can be easily taken care by loading the data from <pSrc> stream as 
+        ;// SIGNED byte/halfword. So, based on the first TST instruction, 8-bits or 
+        ;// 16-bits are read.
+        ;//
+        ;// Next, to compute the offset, where the unpacked value needs to be stored,
+        ;// we modify the computation to perform [(Flag & 15) < 1] as [(Flag < 1) & 31]
+        ;// This results in a saving of one cycle.
+        ;//----------------------------------------------------------------------------
+        
+unpackLoop
+        TST      Flag,  #0x10                        ;// Computing (Flag & 0x10)
+        LDRSBNE  Value2,[pSrc,#1]                    ;// Load byte wise to avoid unaligned access   
+        LDRBNE   Value, [pSrc], #2                   
+        AND      strOffset, cstOffset, Flag, LSL #1  ;// strOffset = (Flag & 15) < 1;
+        LDRSBEQ  Value, [pSrc], #1                   ;// Value = (OMX_U8)  *pSrc++
+        ORRNE    Value,Value,Value2, LSL #8          ;// Value = (OMX_U16) *pSrc++
+        
+        TST      Flag,  #0x20                        ;// Computing (Flag & 0x20) to check, if we're done
+        LDRBEQ   Flag,  [pSrc], #1                   ;// Flag  = (OMX_U8) *pSrc++, for next iteration
+        STRH     Value, [pDst, strOffset]            ;// Store <Value> at offset <strOffset>
+        BEQ      unpackLoop                          ;// Branch to the loop beginning
+        
+        STR      pSrc, [ppSrc]                       ;// Update the bitstream pointer
+        M_END
+    
+    ENDIF
+    
+    
+    
+    END
+    
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
new file mode 100755
index 0000000..40d4d5e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
@@ -0,0 +1,88 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_DeblockChroma_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 intra chroma deblock
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_DeblockChroma_I
+ *
+ * Description:
+ * Performs deblocking filtering on all edges of the chroma macroblock (16x16).
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	pSrcDst         pointer to the input macroblock. Must be 8-byte aligned.
+ * [in]	srcdstStep      Step of the arrays
+ * [in]	pAlpha          pointer to a 2x2 array of alpha thresholds, organized as follows: { external
+ *                          vertical edge, internal  vertical edge, external
+ *                         horizontal edge, internal horizontal edge }
+ * [in]	pBeta			pointer to a 2x2 array of beta thresholds, organized as follows: { external
+ *                              vertical edge, internal vertical edge, external  horizontal edge,
+ *                              internal  horizontal edge }
+ * [in]	pThresholds		AArray of size  8x2 of Thresholds (TC0) (values for the left or
+ *                               above edge of each 4x2 or 2x4 block, arranged in  vertical block order
+ *                               and then in  horizontal block order)
+ * [in]	pBS				array of size 16x2 of BS parameters (arranged in scan block order for vertical edges and then horizontal edges);
+ *                         valid in the range [0,4] with the following restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^1]== 4.  Must be 4-byte aligned.
+ * [out]	pSrcDst		pointer to filtered output macroblock
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ *   - Either of the pointers in pSrcDst, pAlpha, pBeta, pTresholds, or pBS is NULL.
+ *   - pSrcDst is not 8-byte aligned.
+ *   - either pThresholds or pBS is not 4-byte aligned.
+ *   - pBS is out of range, i.e., one of the following conditions is true: pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && pBS[i^1]!=4) for 0<=i<=3.
+ *   - srcdstStep is not a multiple of 8.
+ *
+ */
+OMXResult omxVCM4P10_DeblockChroma_I(
+	OMX_U8* pSrcDst, 
+	OMX_S32 srcdstStep, 
+	const OMX_U8* pAlpha, 
+	const OMX_U8* pBeta, 
+	const OMX_U8* pThresholds,
+    const OMX_U8 *pBS
+)
+{
+    OMXResult errorCode;
+    
+    armRetArgErrIf(pSrcDst == NULL,                 OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot8ByteAligned(pSrcDst),     OMX_Sts_BadArgErr);
+    armRetArgErrIf(srcdstStep & 7,                  OMX_Sts_BadArgErr);
+    armRetArgErrIf(pAlpha == NULL,                  OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBeta == NULL,                   OMX_Sts_BadArgErr);
+    armRetArgErrIf(pThresholds == NULL,             OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBS == NULL,                     OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pBS),         OMX_Sts_BadArgErr);
+
+    errorCode = omxVCM4P10_FilterDeblockingChroma_VerEdge_I(
+        pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS);
+
+    armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode)
+    
+    errorCode = omxVCM4P10_FilterDeblockingChroma_HorEdge_I(
+        pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+8, pBS+16);
+
+    return errorCode;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
new file mode 100755
index 0000000..619365f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
@@ -0,0 +1,91 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_DeblockLuma_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 luma deblock
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+ 
+
+/**
+ * Function: omxVCM4P10_DeblockLuma_I
+ *
+ * Description:
+ * This function performs deblock filtering the horizontal and vertical edges of a luma macroblock
+ *(16x16).
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	pSrcDst         pointer to the input macroblock. Must be 8-byte aligned.
+ * [in]	srcdstStep      image width
+ * [in]	pAlpha          pointer to a 2x2 table of alpha thresholds, organized as follows: { external
+ *                             vertical edge, internal vertical edge, external horizontal
+ *                             edge, internal horizontal edge }
+ * [in]	pBeta			pointer to a 2x2 table of beta thresholds, organized as follows: { external
+ *                              vertical edge, internal vertical edge, external  horizontal edge,
+ *                              internal  horizontal edge }
+ * [in]	pThresholds		pointer to a 16x2 table of threshold (TC0), organized as follows: { values for
+ *                              the  left or above edge of each 4x4 block, arranged in  vertical block order
+ *                              and then in horizontal block order)
+ * [in]	pBS				 pointer to a 16x2 table of BS parameters arranged in scan block order for vertical edges and then horizontal edges;
+ *                               valid in the range [0,4] with the following restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^1]== 4.  Must be 4-byte aligned.
+ * [out]	pSrcDst		pointer to filtered output macroblock.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ *    - Either of the pointers in pSrcDst, pAlpha, pBeta, pTresholds or pBS is NULL.
+ *    - pSrcDst is not 8-byte aligned.
+ *    - srcdstStep is not a multiple of 8
+ *    - pBS is out of range, i.e., one of the following conditions is true: pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && pBS[i^1]!=4) for 0<=i<=3.
+.
+ *
+ */
+
+OMXResult omxVCM4P10_DeblockLuma_I(
+	OMX_U8* pSrcDst, 
+	OMX_S32 srcdstStep, 
+	const OMX_U8* pAlpha, 
+	const OMX_U8* pBeta, 
+	const OMX_U8* pThresholds, 
+	const OMX_U8 *pBS
+)
+{
+    OMXResult errorCode;
+    
+    armRetArgErrIf(pSrcDst == NULL,             OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(srcdstStep & 7,              OMX_Sts_BadArgErr);    
+    armRetArgErrIf(pAlpha == NULL,              OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBeta == NULL,               OMX_Sts_BadArgErr);
+    armRetArgErrIf(pThresholds == NULL,         OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBS == NULL,                     OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pBS),         OMX_Sts_BadArgErr);
+
+    errorCode = omxVCM4P10_FilterDeblockingLuma_VerEdge_I(
+        pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS);
+
+    armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode)
+    
+    errorCode = omxVCM4P10_FilterDeblockingLuma_HorEdge_I(
+        pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+16, pBS+16);
+
+    return errorCode;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
new file mode 100755
index 0000000..4e871bf
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
@@ -0,0 +1,62 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 decode coefficients module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse raster scan for 2x2 block of 
+ * ChromaDCLevel. The decoded coefficients in packed position-coefficient 
+ * buffer are stored in increasing raster scan order, namely position order.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		Double pointer to current byte in bit stream
+ *								buffer
+ * [in]	pOffset			Pointer to current bit position in the byte 
+ *								pointed to by *ppBitStream
+ * [out]	ppBitStream		*ppBitStream is updated after each block is decoded
+ * [out]	pOffset			*pOffset is updated after each block is decoded
+ * [out]	pNumCoeff		Pointer to the number of nonzero coefficients
+ *								in this block
+ * [out]	ppPosCoefbuf	Double pointer to destination residual
+ *								coefficient-position pair buffer
+ *
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (
+     const OMX_U8** ppBitStream,
+     OMX_S32* pOffset,
+     OMX_U8* pNumCoeff,
+     OMX_U8** ppPosCoefbuf        
+ )
+
+{
+    return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff,
+                                         ppPosCoefbuf, 17, 4);
+
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
new file mode 100755
index 0000000..b29e576
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
@@ -0,0 +1,68 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 decode coefficients module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_DecodeCoeffsToPairCAVLC
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of 
+ * Intra16x16DCLevel, Intra16x16ACLevel,LumaLevel, and ChromaACLevel. 
+ * Inverse field scan is not supported. The decoded coefficients in packed 
+ * position-coefficient buffer are stored in increasing zigzag order instead 
+ * of position order.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		Double pointer to current byte in bit stream buffer
+ * [in]	pOffset			Pointer to current bit position in the byte pointed
+ *								to by *ppBitStream
+ * [in]	sMaxNumCoeff	Maximum number of non-zero coefficients in current
+ *								block
+ * [in]	sVLCSelect		VLC table selector, obtained from number of non-zero
+ *								AC coefficients of above and left 4x4 blocks. It is 
+ *								equivalent to the variable nC described in H.264 standard 
+ *								table 9-5, except its value can¡¯t be less than zero.
+ * [out]	ppBitStream		*ppBitStream is updated after each block is decoded
+ * [out]	pOffset			*pOffset is updated after each block is decoded
+ * [out]	pNumCoeff		Pointer to the number of nonzero coefficients in
+ *								this block
+ * [out]	ppPosCoefbuf	Double pointer to destination residual
+ *								coefficient-position pair buffer
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC(
+     const OMX_U8** ppBitStream,
+     OMX_S32* pOffset,
+     OMX_U8* pNumCoeff,
+     OMX_U8**ppPosCoefbuf,
+     OMX_INT sVLCSelect,
+     OMX_INT sMaxNumCoeff        
+ )
+{
+    return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff,
+                                         ppPosCoefbuf, sVLCSelect, sMaxNumCoeff);
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s
new file mode 100755
index 0000000..485a488
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s
@@ -0,0 +1,396 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+;// Description:
+;// H.264 inverse quantize and transform module
+;// 
+;// 
+
+        
+
+;// Include standard headers
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+;// Import symbols required from other files
+;// (For example tables)
+    
+        IMPORT armVCM4P10_UnpackBlock4x4
+        IMPORT armVCM4P10_TransformResidual4x4
+        IMPORT armVCM4P10_QPDivTable
+        IMPORT armVCM4P10_VMatrixU16
+        IMPORT armVCM4P10_QPModuloTable 
+        
+        M_VARIANTS CortexA8
+        
+;// Set debugging level        
+;//DEBUG_ON    SETL {TRUE}
+
+
+;// Static Function: armVCM4P10_DequantLumaAC4x4
+
+;// Guarding implementation by the processor name
+    
+ 
+
+;// Guarding implementation by the processor name
+    
+
+
+
+
+
+;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd            
+    
+;// Guarding implementation by the processor name
+    
+    
+    
+;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd            
+    
+;// Guarding implementation by the processor name
+    
+    IF  CortexA8
+    
+
+;// ARM Registers
+
+;//Input Registers
+ppSrc       RN  0
+pPred       RN  1
+pDC         RN  2
+pDst        RN  3
+   
+
+;//Output Registers
+result      RN  0
+
+;//Local Scratch Registers
+
+;//Registers used in armVCM4P10_DequantLumaAC4x4
+pQPdiv      RN  10
+pQPmod      RN  11
+pVRow       RN  2
+QPmod       RN  12
+shift       RN  14
+index0      RN  1 
+index1      RN  10 
+
+;//Registers used in DequantTransformResidualFromPairAndAdd
+pDelta      RN  4
+pDeltaTmp   RN  6
+AC          RN  5                   ;//Load from stack
+pPredTemp   RN  7
+pDCTemp     RN  8
+pDstTemp    RN  9
+pDeltaArg1  RN  1
+pDeltaArg0  RN  0
+QP          RN  1                   ;//Load from stack
+DCval       RN  10  
+predstep    RN  1
+dstStep     RN  10
+PredVal1    RN  3
+PredVal2    RN  5
+
+
+
+
+;// Neon Registers
+
+;// Registers used in armVCM4P10_DequantLumaAC4x4
+
+dVmatrix            DN  D6.8  
+dindexRow0          DN  D7.32 
+dindexRow1          DN  D9.32 
+dByteIndexRow0      DN  D7.8
+dByteIndexRow1      DN  D9.8
+dVRow0              DN  D8.8  
+dVRow1              DN  D4.8
+dVRow0U16           DN  D8.U16
+dVRow1U16           DN  D4.U16
+dVRow2U16           DN  D8.U16
+dVRow3U16           DN  D4.U16
+
+dShift              DN  D5.U16
+dSrcRow0            DN  D0.I16
+dSrcRow1            DN  D1.I16
+dSrcRow2            DN  D2.I16    
+dSrcRow3            DN  D3.I16
+dDqntRow0           DN  D0.I16  
+dDqntRow1           DN  D1.I16 
+dDqntRow2           DN  D2.I16 
+dDqntRow3           DN  D3.I16  
+
+;// Registers used in TransformResidual4x4
+
+;// Packed Input pixels
+dIn0                DN  D0.S16       
+dIn1                DN  D1.S16       
+dIn2                DN  D2.S16       
+dIn3                DN  D3.S16
+qIn01               QN  Q0.32
+qIn23               QN  Q1.32
+
+;// Intermediate calculations       
+dZero               DN  D4.S16
+de0                 DN  D5.S16
+de1                 DN  D6.S16
+de2                 DN  D7.S16
+de3                 DN  D8.S16
+dIn1RS              DN  D7.S16
+dIn3RS              DN  D8.S16
+df0                 DN  D0.S16
+df1                 DN  D1.S16
+df2                 DN  D2.S16
+df3                 DN  D3.S16
+qf01                QN  Q0.32
+qf23                QN  Q1.32
+dg0                 DN  D5.S16
+dg1                 DN  D6.S16
+dg2                 DN  D7.S16
+dg3                 DN  D8.S16
+df1RS               DN  D7.S16
+df3RS               DN  D8.S16
+
+;// Output pixels
+dh0                 DN  D0.S16
+dh1                 DN  D1.S16
+dh2                 DN  D2.S16
+dh3                 DN  D3.S16 
+
+;// Registers used in DequantTransformResidualFromPairAndAdd
+
+dDeltaRow0          DN  D0.S16
+dDeltaRow1          DN  D1.S16
+dDeltaRow2          DN  D2.S16
+dDeltaRow3          DN  D3.S16
+qDeltaRow01         QN  Q0.S16
+qDeltaRow23         QN  Q1.S16
+
+dPredValRow01       DN  D4.U8
+dPredValRow23       DN  D5.U8
+
+qSumRow01           QN  Q3.S16
+qSumRow23           QN  Q4.S16
+dDstRow01           DN  D0.U8
+dDstRow23           DN  D1.U8
+dDstRow0            DN  D0.32[0]
+dDstRow1            DN  D0.32[1]
+dDstRow2            DN  D1.32[0]
+dDstRow3            DN  D1.32[1]
+    
+           
+    ;// Allocate stack memory required by the function
+        M_ALLOC8 pBuffer, 32
+               
+
+    ;// Write function header
+        M_START omxVCM4P10_DequantTransformResidualFromPairAndAdd,r11,d9
+        
+        ;// Define stack arguments
+        M_ARG   predStepOnStack, 4
+        M_ARG   dstStepOnStack,4
+        M_ARG   QPOnStack, 4
+        M_ARG   ACOnStack,4
+  
+        
+        M_ADR   pDelta,pBuffer 
+        M_LDR   AC,ACOnStack 
+        
+         
+        ;// Save registers r1,r2,r3 before function call    
+        MOV     pPredTemp,pPred
+        MOV     pDCTemp,pDC
+        MOV     pDstTemp,pDst
+        
+        CMP     AC,#0
+        BEQ     DCcase
+        MOV     pDeltaArg1,pDelta                           ;// Set up r1 for armVCM4P10_UnpackBlock4x4
+    
+        BL      armVCM4P10_UnpackBlock4x4
+    
+        ;//--------------------------------------------------------
+        ;// armVCM4P10_DequantLumaAC4x4 : static function inlined
+        ;//--------------------------------------------------------
+        
+        ;//BL      armVCM4P10_DequantLumaAC4x4
+        M_LDR   QP,QPOnStack                                ;// Set up r1 for armVCM4P10_DequantLumaAC4x4
+                
+        LDR    pQPmod,=armVCM4P10_QPModuloTable
+        LDR    pQPdiv,=armVCM4P10_QPDivTable        
+        LDR    pVRow,=armVCM4P10_VMatrixU16
+        
+        
+        LDRSB  QPmod,[pQPmod,QP]                    ;// (QP%6) * 6
+        LDRSB  shift,[pQPdiv,QP]                    ;// Shift = QP / 6
+                
+        LDR    index1,=0x03020504 
+        LDR    index0,=0x05040100                   ;// Indexes into dVmatrix
+        ADD    pVRow,pVRow,QPmod
+        VDUP   dindexRow0,index0 
+        VDUP   dindexRow1,index1
+        VDUP   dShift,shift 
+        
+        ;// Load all 4x4 pVRow[] values
+        VLD1   dVmatrix,[pVRow]                     ;// dVmatrix = [0d|0c|0b|0a]
+        
+        
+        VTBL   dVRow0,dVmatrix,dByteIndexRow0       ;// row0 = row2 = [pVRow[2] | pVRow[0] | pVRow[2] | pVRow[0]]
+        VTBL   dVRow1,dVmatrix,dByteIndexRow1       ;// row1 = row3 = [pVRow[1] | pVRow[2] | pVRow[1] | pVRow[2]]
+        CMP     pDCTemp,#0
+        ;// Load all the 4x4 'src' values  
+        VLD1   { dSrcRow0,dSrcRow1,dSrcRow2,dSrcRow3 },[pDelta] 
+        
+        VSHL   dVRow0U16,dVRow0U16,dShift 
+        VSHL   dVRow1U16,dVRow1U16,dShift 
+        LDRSHNE DCval,[pDCTemp]
+        
+        
+        ;// Multiply src[] with pVRow[]
+        VMUL    dDqntRow0,dSrcRow0,dVRow0U16
+        VMUL    dDqntRow1,dSrcRow1,dVRow1U16
+        VMUL    dDqntRow2,dSrcRow2,dVRow2U16
+        VMUL    dDqntRow3,dSrcRow3,dVRow3U16
+        
+        
+        
+        ;//-------------------------------------------------------------
+        ;// TransformResidual4x4 : Inlined to avoid Load/Stores
+        ;//-------------------------------------------------------------
+        
+        
+        ;//BL      armVCM4P10_TransformResidual4x4
+        ;//STRHNE  DCval,[pDelta]
+        VMOVNE    dIn0[0],DCval
+        
+        
+        
+        ;//*****************************************************************
+        ;// Transpose the input pixels : perform Row ops as Col ops
+        ;//*****************************************************************
+        
+        VTRN    dIn0,dIn1
+        VTRN    dIn2,dIn3
+        VTRN    qIn01,qIn23 
+         
+        
+        VMOV    dZero,#0                                    ;// Used to right shift by 1 
+        
+        
+        ;//**************************************** 
+        ;// Row Operations (Performed on columns)
+        ;//**************************************** 
+        
+        
+        VADD        de0,dIn0,dIn2                       ;//  e0 = d0 + d2 
+        VSUB        de1,dIn0,dIn2                        ;//  e1 = d0 - d2 
+        VHADD       dIn1RS,dIn1,dZero                   ;// (f1>>1) constZero is a register holding 0
+        VHADD       dIn3RS,dIn3,dZero
+        VSUB        de2,dIn1RS,dIn3                     ;//  e2 = (d1>>1) - d3 
+        VADD        de3,dIn1,dIn3RS                        ;//  e3 = d1 + (d3>>1) 
+        VADD        df0,de0,de3                         ;//  f0 = e0 + e3
+        VADD        df1,de1,de2                            ;//  f1 = e1 + e2
+        VSUB        df2,de1,de2                            ;//  f2 = e1 - e2
+        VSUB        df3,de0,de3                            ;//  f3 = e0 - e3
+        
+        
+        
+        ;//*****************************************************************
+        ;// Transpose the resultant matrix
+        ;//*****************************************************************
+        
+        VTRN    df0,df1
+        VTRN    df2,df3
+        VTRN    qf01,qf23 
+        
+        
+        ;//******************************* 
+        ;// Coloumn Operations 
+        ;//******************************* 
+        
+        
+        VADD        dg0,df0,df2                         ;//  e0 = d0 + d2 
+        VSUB        dg1,df0,df2                            ;//  e1 = d0 - d2 
+        VHADD       df1RS,df1,dZero                     ;// (f1>>1) constZero is a register holding 0
+        VHADD       df3RS,df3,dZero
+        VSUB        dg2,df1RS,df3                       ;//  e2 = (d1>>1) - d3 
+        VADD        dg3,df1,df3RS                        ;//  e3 = d1 + (d3>>1) 
+        VADD        dh0,dg0,dg3                         ;//  f0 = e0 + e3
+        VADD        dh1,dg1,dg2                            ;//  f1 = e1 + e2
+        VSUB        dh2,dg1,dg2                            ;//  f2 = e1 - e2
+        VSUB        dh3,dg0,dg3                            ;//  f3 = e0 - e3
+        
+             
+        ;//************************************************
+        ;// Calculate final value (colOp[i][j] + 32)>>6
+        ;//************************************************
+        
+        VRSHR       dh0,#6
+        VRSHR       dh1,#6
+        VRSHR       dh2,#6
+        VRSHR       dh3,#6
+        
+               
+        B       OutDCcase 
+        
+
+DCcase
+        ;// Calculate the Transformed DCvalue : (DCval+32)>>6
+        LDRSH   DCval,[pDCTemp] 
+        ADD     DCval,DCval,#32 
+        ASR     DCval,DCval,#6
+        
+        VDUP    dDeltaRow0, DCval                       ;// pDelta[0]  = pDelta[1]  = pDelta[2]  = pDelta[3] = DCval
+        VDUP    dDeltaRow1, DCval                        ;// pDelta[4]  = pDelta[5]  = pDelta[6]  = pDelta[7] = DCval
+        VDUP    dDeltaRow2, DCval                        ;// pDelta[8]  = pDelta[9]  = pDelta[10] = pDelta[11] = DCval
+        VDUP    dDeltaRow3, DCval
+            
+                
+OutDCcase      
+        M_LDR   predstep,predStepOnStack
+        M_LDR   dstStep,dstStepOnStack
+        
+        LDR     PredVal1,[pPredTemp],predstep
+        LDR     PredVal2,[pPredTemp],predstep
+        VMOV    dPredValRow01,PredVal1,PredVal2
+        
+        LDR     PredVal1,[pPredTemp],predstep
+        LDR     PredVal2,[pPredTemp]
+        VMOV    dPredValRow23,PredVal1,PredVal2
+ 
+        
+        VADDW   qSumRow01,qDeltaRow01,dPredValRow01
+        VADDW   qSumRow23,qDeltaRow23,dPredValRow23
+        VQMOVUN dDstRow01,qSumRow01
+        VQMOVUN dDstRow23,qSumRow23
+        
+ 
+        VST1    dDstRow0,[pDstTemp],dstStep
+        VST1    dDstRow1,[pDstTemp],dstStep
+        VST1    dDstRow2,[pDstTemp],dstStep
+        VST1    dDstRow3,[pDstTemp]
+        
+        ;// Set return value
+        MOV     result,#OMX_Sts_NoErr
+        
+End                
+
+        
+        ;// Write function tail
+        
+        M_END
+        
+    ENDIF                                                    ;//CORTEXA8   
+    
+         
+            
+    END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
new file mode 100644
index 0000000..4606197
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
@@ -0,0 +1,202 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+
+        M_VARIANTS CortexA8
+
+        IF CortexA8
+
+        IMPORT  armVCM4P10_DeblockingChromabSGE4_unsafe
+        IMPORT  armVCM4P10_DeblockingChromabSLT4_unsafe
+        
+LOOP_COUNT  EQU 0x40000000
+MASK_3      EQU 0x03030303
+MASK_4      EQU 0x04040404
+
+;// Function arguments
+
+pSrcDst     RN 0
+srcdstStep  RN 1
+pAlpha      RN 2
+pBeta       RN 3
+
+pThresholds RN 5
+pBS         RN 4
+bS3210      RN 6
+
+;// Loop 
+
+XY          RN 7
+
+;// Pixels
+dP_0        DN D4.U8
+dP_1        DN D5.U8  
+dP_2        DN D6.U8  
+dQ_0        DN D8.U8  
+dQ_1        DN D9.U8  
+dQ_2        DN D10.U8 
+
+;// Filtering Decision
+dAlpha      DN D0.U8
+dBeta       DN D2.U8
+
+dFilt       DN D16.U8
+dAqflg      DN D12.U8
+dApflg      DN D17.U8 
+
+dAp0q0      DN D13.U8
+dAp1p0      DN D12.U8
+dAq1q0      DN D18.U8
+dAp2p0      DN D19.U8
+dAq2q0      DN D17.U8
+
+qBS3210     QN Q13.U16
+dBS3210     DN D26
+dMask_bs    DN D27
+dFilt_bs    DN D26.U16
+
+;// bSLT4
+dMask_0     DN D14.U8
+dMask_1     DN D15.U8    
+dMask_4     DN D1.U16
+
+Mask_4      RN 8
+Mask_3      RN 9
+
+dTemp       DN D19.U8
+
+;// Result
+dP_0t       DN D13.U8   
+dQ_0t       DN D31.U8   
+
+dP_0n       DN D29.U8
+dQ_0n       DN D24.U8
+
+        
+        ;// Function header
+        M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r9, d15
+        
+        ;//Arguments on the stack
+        M_ARG   ppThresholds, 4
+        M_ARG   ppBS, 4
+        
+        ;// d0-dAlpha_0
+        ;// d2-dBeta_0
+
+        ;load alpha1,beta1 somewhere to avoid more loads
+        VLD1        {dAlpha[]}, [pAlpha]!
+        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #1 ;?
+        SUB         pSrcDst, pSrcDst, srcdstStep
+        VLD1        {dBeta[]}, [pBeta]! 
+        
+        M_LDR       pBS, ppBS
+        M_LDR       pThresholds, ppThresholds 
+
+        LDR         Mask_3, =MASK_3
+        LDR         Mask_4, =MASK_4
+
+        VMOV        dMask_0, #0     
+        VMOV        dMask_1, #1     
+        VMOV        dMask_4, #4     
+        
+        LDR         XY, =LOOP_COUNT
+
+        ;// p0-p3 - d4-d7
+        ;// q0-q3 - d8-d11
+LoopY        
+        LDR         bS3210, [pBS], #8
+        
+        VLD1        dP_2, [pSrcDst], srcdstStep
+        ;1
+        VLD1        dP_1, [pSrcDst], srcdstStep
+        CMP         bS3210, #0
+        VLD1        dP_0, [pSrcDst], srcdstStep
+        ;1
+        VLD1        dQ_0, [pSrcDst], srcdstStep
+        VABD        dAp2p0, dP_2, dP_0
+        VLD1        dQ_1, [pSrcDst], srcdstStep
+        VABD        dAp0q0, dP_0, dQ_0
+        VLD1        dQ_2, [pSrcDst], srcdstStep
+        BEQ         NoFilterBS0
+
+        VABD        dAp1p0, dP_1, dP_0
+        VABD        dAq1q0, dQ_1, dQ_0
+
+        VCGT        dFilt, dAlpha, dAp0q0
+        VMOV.U32    dBS3210[0], bS3210
+        VMAX        dAp1p0, dAq1q0, dAp1p0
+        VMOVL       qBS3210, dBS3210.U8
+        VABD        dAq2q0, dQ_2, dQ_0
+        VCGT        dMask_bs.S16, dBS3210.S16, #0
+
+        VCGT        dAp1p0, dBeta, dAp1p0 
+        VCGT        dAp2p0, dBeta, dAp2p0
+        
+        VAND        dFilt, dMask_bs.U8
+
+        TST         bS3210, Mask_3
+
+        VCGT        dAq2q0, dBeta, dAq2q0
+        VAND        dFilt, dFilt, dAp1p0
+
+        VAND        dAqflg, dFilt, dAq2q0
+        VAND        dApflg, dFilt, dAp2p0
+        
+        ;// bS < 4 Filtering
+        BLNE        armVCM4P10_DeblockingChromabSLT4_unsafe
+
+        TST         bS3210, Mask_4        
+
+        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #2
+        VTST        dFilt_bs, dFilt_bs, dMask_4
+
+        ;// bS == 4 Filtering
+        BLNE        armVCM4P10_DeblockingChromabSGE4_unsafe
+                    
+        VBIT        dP_0n, dP_0t, dFilt_bs
+        VBIT        dQ_0n, dQ_0t, dFilt_bs
+        
+        VBIF        dP_0n, dP_0, dFilt      
+        VBIF        dQ_0n, dQ_0, dFilt  
+
+        ;// Result Storage
+        VST1        dP_0n, [pSrcDst], srcdstStep
+        ADDS        XY, XY, XY
+        VST1        dQ_0n, [pSrcDst], srcdstStep
+
+        BNE         LoopY        
+        
+        MOV         r0, #OMX_Sts_NoErr
+
+        M_EXIT
+        
+NoFilterBS0
+
+        VLD1        {dAlpha[]}, [pAlpha]
+        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #1
+        ADDS        XY, XY, XY
+        VLD1        {dBeta[]}, [pBeta]
+        ADD         pThresholds, pThresholds, #4
+        BNE         LoopY        
+
+        MOV         r0, #OMX_Sts_NoErr
+        M_END
+        
+        ENDIF
+        
+
+        END
+        
+        
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
new file mode 100644
index 0000000..18e6c1d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
@@ -0,0 +1,282 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+
+        M_VARIANTS CortexA8
+
+        IF CortexA8
+
+        IMPORT  armVCM4P10_DeblockingChromabSGE4_unsafe
+        IMPORT  armVCM4P10_DeblockingChromabSLT4_unsafe
+        
+LOOP_COUNT  EQU 0x40000000
+MASK_3      EQU 0x03030303
+MASK_4      EQU 0x04040404
+
+;// Function arguments
+
+pSrcDst     RN 0
+srcdstStep  RN 1
+pAlpha      RN 2
+pBeta       RN 3
+
+pThresholds RN 5
+pBS         RN 4
+bS3210      RN 6
+pSrcDst_P   RN 10
+pSrcDst_Q   RN 12
+
+pTmp        RN 10
+pTmp2       RN 12
+step        RN 14
+
+;// Loop 
+
+XY          RN 7
+
+;// Rows input
+dRow0       DN D7.U8
+dRow1       DN D8.U8  
+dRow2       DN D5.U8  
+dRow3       DN D10.U8  
+dRow4       DN D6.U8  
+dRow5       DN D9.U8  
+dRow6       DN D4.U8 
+dRow7       DN D11.U8 
+
+
+;// Pixels
+dP_0        DN D4.U8
+dP_1        DN D5.U8  
+dP_2        DN D6.U8  
+dQ_0        DN D8.U8  
+dQ_1        DN D9.U8  
+dQ_2        DN D10.U8 
+
+;// Filtering Decision
+dAlpha      DN D0.U8
+dBeta       DN D2.U8
+
+dFilt       DN D16.U8
+dAqflg      DN D12.U8
+dApflg      DN D17.U8 
+
+dAp0q0      DN D13.U8
+dAp1p0      DN D12.U8
+dAq1q0      DN D18.U8
+dAp2p0      DN D19.U8
+dAq2q0      DN D17.U8
+
+qBS3210     QN Q13.U16
+dBS3210     DN D26
+dMask_bs    DN D27
+dFilt_bs    DN D26.U16
+
+;// bSLT4
+dMask_0     DN D14.U8
+dMask_1     DN D15.U8    
+dMask_4     DN D1.U16
+
+Mask_4      RN 8
+Mask_3      RN 9
+
+dTemp       DN D19.U8
+
+;// Result
+dP_0t       DN D13.U8   
+dQ_0t       DN D31.U8   
+
+dP_0n       DN D29.U8
+dQ_0n       DN D24.U8
+
+        
+        ;// Function header
+        M_START omxVCM4P10_FilterDeblockingChroma_VerEdge_I, r12, d15
+        
+        ;//Arguments on the stack
+        M_ARG   ppThresholds, 4
+        M_ARG   ppBS, 4
+        
+        ;// d0-dAlpha_0
+        ;// d2-dBeta_0
+
+        ;load alpha1,beta1 somewhere to avoid more loads
+        VLD1        {dAlpha[]}, [pAlpha]!
+        SUB         pSrcDst, pSrcDst, #4
+        VLD1        {dBeta[]}, [pBeta]! 
+        
+        M_LDR       pBS, ppBS
+        M_LDR       pThresholds, ppThresholds 
+
+        LDR         Mask_4, =MASK_4
+        LDR         Mask_3, =MASK_3
+
+        ;dMask_0-14
+        ;dMask_1-15
+        ;dMask_4-19
+
+        VMOV        dMask_0, #0     
+        VMOV        dMask_1, #1     
+        VMOV        dMask_4, #4     
+        
+        LDR         XY, =LOOP_COUNT
+
+        ;// p0-p3 - d4-d7
+        ;// q0-q3 - d8-d11
+
+
+LoopY        
+        LDR         bS3210, [pBS], #8
+        ADD         pTmp, pSrcDst, srcdstStep
+        ADD         step, srcdstStep, srcdstStep
+        
+        ;1
+        VLD1        dRow0, [pSrcDst], step
+        ;1
+        VLD1        dRow1, [pTmp], step
+        VLD1        dRow2, [pSrcDst], step
+        VLD1        dRow3, [pTmp], step
+        VLD1        dRow4, [pSrcDst], step
+        VLD1        dRow5, [pTmp], step
+        VLD1        dRow6, [pSrcDst], step
+        VLD1        dRow7, [pTmp], step
+        
+        
+        ;// dRow0 = [q3r0 q2r0 q1r0 q0r0 p0r0 p1r0 p2r0 p3r0]
+        ;// dRow1 = [q3r1 q2r1 q1r1 q0r1 p0r1 p1r1 p2r1 p3r1]
+        ;// dRow2 = [q3r2 q2r2 q1r2 q0r2 p0r2 p1r2 p2r2 p3r2]
+        ;// dRow3 = [q3r3 q2r3 q1r3 q0r3 p0r3 p1r3 p2r3 p3r3]
+        ;// dRow4 = [q3r4 q2r4 q1r4 q0r4 p0r4 p1r4 p2r4 p3r4]
+        ;// dRow5 = [q3r5 q2r5 q1r5 q0r5 p0r5 p1r5 p2r5 p3r5]
+        ;// dRow6 = [q3r6 q2r6 q1r6 q0r6 p0r6 p1r6 p2r6 p3r6]
+        ;// dRow7 = [q3r7 q2r7 q1r7 q0r7 p0r7 p1r7 p2r7 p3r7]
+
+        ;// 8x8 Transpose
+        VZIP.8      dRow0, dRow1
+        VZIP.8      dRow2, dRow3
+        VZIP.8      dRow4, dRow5
+        VZIP.8      dRow6, dRow7
+
+        VZIP.16     dRow0, dRow2
+        VZIP.16     dRow1, dRow3
+        VZIP.16     dRow4, dRow6
+        VZIP.16     dRow5, dRow7
+
+        VZIP.32     dRow0, dRow4
+        VZIP.32     dRow2, dRow6
+        VZIP.32     dRow3, dRow7
+        VZIP.32     dRow1, dRow5
+
+
+        ;Realign the pointers
+
+        CMP         bS3210, #0
+        VABD        dAp2p0, dP_2, dP_0
+        VABD        dAp0q0, dP_0, dQ_0
+        BEQ         NoFilterBS0
+
+        VABD        dAp1p0, dP_1, dP_0
+        VABD        dAq1q0, dQ_1, dQ_0
+
+        VMOV.U32    dBS3210[0], bS3210
+        VCGT        dFilt, dAlpha, dAp0q0
+        VMAX        dAp1p0, dAq1q0, dAp1p0
+        VMOVL       qBS3210, dBS3210.U8
+        VABD        dAq2q0, dQ_2, dQ_0
+        VCGT        dMask_bs.S16, dBS3210.S16, #0
+
+        VCGT        dAp1p0, dBeta, dAp1p0
+        VCGT        dAp2p0, dBeta, dAp2p0
+        VAND        dFilt, dMask_bs.U8
+
+        TST         bS3210, Mask_3
+
+        VCGT        dAq2q0, dBeta, dAq2q0
+        VAND        dFilt, dFilt, dAp1p0
+
+        VAND        dAqflg, dFilt, dAq2q0
+        VAND        dApflg, dFilt, dAp2p0
+
+        ;// bS < 4 Filtering
+        BLNE        armVCM4P10_DeblockingChromabSLT4_unsafe
+
+        TST         bS3210, Mask_4        
+
+        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #3
+        VTST        dFilt_bs, dFilt_bs, dMask_4
+
+        ;// bS == 4 Filtering
+        BLNE        armVCM4P10_DeblockingChromabSGE4_unsafe
+
+        VBIT        dP_0n, dP_0t, dFilt_bs
+        VBIT        dQ_0n, dQ_0t, dFilt_bs
+
+        ;// Result Storage
+        ADD         pSrcDst_P, pSrcDst, #3
+        VBIF        dP_0n, dP_0, dFilt      
+        
+        ADD         pTmp2, pSrcDst_P, srcdstStep
+        ADD         step, srcdstStep, srcdstStep
+        VBIF        dQ_0n, dQ_0, dFilt  
+
+        ADDS        XY, XY, XY
+        
+        VST1        {dP_0n[0]}, [pSrcDst_P], step
+        VST1        {dP_0n[1]}, [pTmp2], step
+        VST1        {dP_0n[2]}, [pSrcDst_P], step
+        VST1        {dP_0n[3]}, [pTmp2], step
+        VST1        {dP_0n[4]}, [pSrcDst_P], step
+        VST1        {dP_0n[5]}, [pTmp2], step
+        VST1        {dP_0n[6]}, [pSrcDst_P], step
+        VST1        {dP_0n[7]}, [pTmp2], step
+        
+        ADD         pSrcDst_Q, pSrcDst, #4
+        ADD         pTmp, pSrcDst_Q, srcdstStep
+        
+        VST1        {dQ_0n[0]}, [pSrcDst_Q], step
+        VST1        {dQ_0n[1]}, [pTmp], step
+        VST1        {dQ_0n[2]}, [pSrcDst_Q], step
+        VST1        {dQ_0n[3]}, [pTmp], step
+        VST1        {dQ_0n[4]}, [pSrcDst_Q], step
+        VST1        {dQ_0n[5]}, [pTmp], step
+        VST1        {dQ_0n[6]}, [pSrcDst_Q], step
+        VST1        {dQ_0n[7]}, [pTmp], step
+        
+        ADD         pSrcDst, pSrcDst, #4
+
+        BNE         LoopY        
+        
+        MOV         r0, #OMX_Sts_NoErr
+        
+        M_EXIT
+        
+NoFilterBS0
+        VLD1        {dAlpha[]}, [pAlpha]
+        ADD         pSrcDst, pSrcDst, #4
+        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #3
+        ADDS        XY, XY, XY
+        VLD1        {dBeta[]}, [pBeta]
+        ADD         pThresholds, pThresholds, #4
+        BNE         LoopY        
+
+        MOV         r0, #OMX_Sts_NoErr
+
+        M_END
+        
+        ENDIF
+
+
+        END
+        
+        
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
new file mode 100755
index 0000000..0c3f4f2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
@@ -0,0 +1,288 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+
+        M_VARIANTS CortexA8
+
+        IMPORT  armVCM4P10_DeblockingLumabSLT4_unsafe
+        IMPORT  armVCM4P10_DeblockingLumabSGE4_unsafe
+
+        IF CortexA8
+
+LOOP_COUNT  EQU 0x55000000
+
+
+;// Function arguments
+
+pSrcDst     RN 0
+srcdstStep  RN 1
+pAlpha      RN 2
+pBeta       RN 3
+
+pThresholds RN 5
+pBS         RN 4
+bS10        RN 12
+
+pAlpha_0    RN 2
+pBeta_0     RN 3
+
+pAlpha_1    RN 7
+pBeta_1     RN 8
+
+
+
+;// Loop 
+
+XY          RN 9
+
+pTmp        RN 6
+step        RN 10
+
+;// Pixels
+dP_0        DN D4.U8
+dP_1        DN D5.U8  
+dP_2        DN D6.U8  
+dP_3        DN D7.U8  
+dQ_0        DN D8.U8  
+dQ_1        DN D9.U8  
+dQ_2        DN D10.U8 
+dQ_3        DN D11.U8 
+
+
+;// Filtering Decision
+dAlpha      DN D0.U8
+dBeta       DN D2.U8
+
+dFilt       DN D16.U8
+dAqflg      DN D12.U8
+dApflg      DN D17.U8 
+
+dAp0q0      DN D13.U8
+dAp1p0      DN D12.U8
+dAq1q0      DN D18.U8
+dAp2p0      DN D19.U8
+dAq2q0      DN D17.U8
+
+;// bSLT4
+dTC0        DN D18.U8   
+dTC1        DN D19.U8   
+dTC01       DN D18.U8   
+
+dTCs        DN D31.S8
+dTC         DN D31.U8
+
+dMask_0     DN D14.U8
+dMask_1     DN D15.U8    
+
+Mask_0      RN 11
+
+dTemp       DN D19.U8
+
+;// Computing P0,Q0
+qDq0p0      QN Q10.S16
+qDp1q1      QN Q11.S16
+qDelta      QN Q10.S16  ; reuse qDq0p0
+dDelta      DN D20.S8
+
+
+;// Computing P1,Q1
+dRp0q0      DN D24.U8
+
+dMaxP       DN D23.U8
+dMinP       DN D22.U8
+
+dMaxQ       DN D19.U8
+dMinQ       DN D21.U8
+
+dDeltaP     DN D26.U8
+dDeltaQ     DN D27.U8
+
+qP_0n       QN Q14.S16
+qQ_0n       QN Q12.S16
+
+dQ_0n       DN D24.U8
+dQ_1n       DN D25.U8
+dP_0n       DN D29.U8
+dP_1n       DN D30.U8
+
+;// bSGE4
+
+qSp0q0      QN Q10.U16
+
+qSp2q1      QN Q11.U16
+qSp0q0p1    QN Q12.U16
+qSp3p2      QN Q13.U16
+dHSp0q1     DN D28.U8
+
+qSq2p1      QN Q11.U16
+qSp0q0q1    QN Q12.U16
+qSq3q2      QN Q13.U16  ;!!
+dHSq0p1     DN D28.U8   ;!!
+
+qTemp1      QN Q11.U16  ;!!;qSp2q1 
+qTemp2      QN Q12.U16  ;!!;qSp0q0p1        
+
+dP_0t       DN D28.U8   ;!!;dHSp0q1        
+dQ_0t       DN D22.U8   ;!!;Temp1        
+
+dP_0n       DN D29.U8
+dP_1n       DN D30.U8
+dP_2n       DN D31.U8
+
+dQ_0n       DN D24.U8   ;!!;Temp2        
+dQ_1n       DN D25.U8   ;!!;Temp2        
+dQ_2n       DN D28.U8   ;!!;dQ_0t        
+
+        
+        ;// Function header
+        M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11, d15
+        
+        ;//Arguments on the stack
+        M_ARG   ppThresholds, 4
+        M_ARG   ppBS, 4
+        
+        ;// d0-dAlpha_0
+        ;// d2-dBeta_0
+
+        ADD         pAlpha_1, pAlpha_0, #1
+        ADD         pBeta_1, pBeta_0, #1
+        
+        VLD1        {dAlpha[]}, [pAlpha_0]
+        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #2
+        VLD1        {dBeta[]}, [pBeta_0] 
+        
+        M_LDR       pBS, ppBS
+        M_LDR       pThresholds, ppThresholds 
+
+        MOV         Mask_0,#0
+
+        ;dMask_0-14
+        ;dMask_1-15
+
+        VMOV        dMask_0, #0     
+        VMOV        dMask_1, #1     
+        
+        ADD         step, srcdstStep, srcdstStep
+
+        LDR         XY,=LOOP_COUNT
+
+        ;// p0-p3 - d4-d7
+        ;// q0-q3 - d8-d11
+LoopY        
+LoopX        
+        LDRH        bS10, [pBS], #2
+        ADD         pTmp, pSrcDst, srcdstStep
+        CMP         bS10, #0
+        BEQ         NoFilterBS0
+
+        VLD1        dP_3, [pSrcDst], step
+        VLD1        dP_2, [pTmp], step
+        VLD1        dP_1, [pSrcDst], step
+        VLD1        dP_0, [pTmp], step
+        VLD1        dQ_0, [pSrcDst], step
+        VABD        dAp1p0, dP_0, dP_1
+        VLD1        dQ_1, [pTmp]
+        VABD        dAp0q0, dQ_0, dP_0
+        VLD1        dQ_2, [pSrcDst], srcdstStep
+        
+        VABD        dAq1q0, dQ_1, dQ_0
+        VABD        dAp2p0, dP_2, dP_0
+        VCGT        dFilt, dAlpha, dAp0q0
+
+        TST         bS10, #0xff
+        VMAX        dAp1p0, dAq1q0, dAp1p0
+        VABD        dAq2q0, dQ_2, dQ_0
+
+        VMOVEQ.U32  dFilt[0], Mask_0
+        TST         bS10, #0xff00
+
+        VCGT        dAp2p0, dBeta, dAp2p0
+        VCGT        dAp1p0, dBeta, dAp1p0
+
+        VMOVEQ.U32  dFilt[1], Mask_0
+
+        VCGT        dAq2q0, dBeta, dAq2q0
+        VLD1        dQ_3, [pSrcDst]
+        VAND        dFilt, dFilt, dAp1p0
+        TST         bS10, #4 
+
+        VAND        dAqflg, dFilt, dAq2q0
+        VAND        dApflg, dFilt, dAp2p0
+    
+        BNE         bSGE4        
+bSLT4
+        ;// bS < 4 Filtering
+        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #2
+        SUB         pSrcDst, pSrcDst, srcdstStep
+
+        BL          armVCM4P10_DeblockingLumabSLT4_unsafe
+
+        ;// Result Storage
+        VST1        dP_1n, [pSrcDst], srcdstStep
+        VST1        dP_0n, [pSrcDst], srcdstStep
+        SUB         pTmp, pSrcDst, srcdstStep, LSL #2
+        VST1        dQ_0n, [pSrcDst], srcdstStep
+        ADDS        XY, XY, XY
+        VST1        dQ_1n, [pSrcDst]
+        ADD         pSrcDst, pTmp, #8
+
+        BCC         LoopX
+        B           ExitLoopY        
+
+NoFilterBS0
+        ADD         pSrcDst, pSrcDst, #8
+        ADDS        XY, XY, XY
+        ADD         pThresholds, pThresholds, #2
+        BCC         LoopX
+        B           ExitLoopY        
+bSGE4        
+        ;// bS >= 4 Filtering
+        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #2
+        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #1
+        BL          armVCM4P10_DeblockingLumabSGE4_unsafe
+
+        ;// Result Storage
+        VST1        dP_2n, [pSrcDst], srcdstStep
+        VST1        dP_1n, [pSrcDst], srcdstStep
+        VST1        dP_0n, [pSrcDst], srcdstStep
+        SUB         pTmp, pSrcDst, srcdstStep, LSL #2
+        VST1        dQ_0n, [pSrcDst], srcdstStep
+        ADDS        XY,XY,XY
+        VST1        dQ_1n, [pSrcDst], srcdstStep
+        ADD         pThresholds, pThresholds, #2
+        VST1        dQ_2n, [pSrcDst]
+        
+        ADD         pSrcDst, pTmp, #8
+        BCC         LoopX
+
+ExitLoopY        
+
+        SUB         pSrcDst, pSrcDst, #16
+        VLD1        {dAlpha[]}, [pAlpha_1]
+        ADD         pSrcDst, pSrcDst, srcdstStep, LSL #2 
+        VLD1        {dBeta[]}, [pBeta_1]
+        BNE         LoopY
+
+        MOV         r0, #OMX_Sts_NoErr
+
+        M_END
+        
+    ENDIF
+    
+
+        
+
+        END
+        
+        
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
new file mode 100755
index 0000000..e6fbb34
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
@@ -0,0 +1,436 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+
+        M_VARIANTS CortexA8
+
+        IMPORT  armVCM4P10_DeblockingLumabSLT4_unsafe
+        IMPORT  armVCM4P10_DeblockingLumabSGE4_unsafe
+        
+        IF CortexA8
+
+LOOP_COUNT  EQU 0x11000000
+
+
+;// Function arguments
+
+pSrcDst     RN 0
+srcdstStep  RN 1
+pAlpha      RN 2
+pBeta       RN 3
+
+pThresholds RN 5
+pBS         RN 4
+bS10        RN 12
+
+pAlpha_0    RN 2
+pBeta_0     RN 3
+
+pAlpha_1    RN 7
+pBeta_1     RN 8
+
+pTmp        RN 10
+pTmpStep    RN 11
+
+;// Loop 
+
+XY          RN 9
+
+;// Rows input
+dRow0       DN D7.U8
+dRow1       DN D8.U8  
+dRow2       DN D5.U8  
+dRow3       DN D10.U8  
+dRow4       DN D6.U8  
+dRow5       DN D9.U8  
+dRow6       DN D4.U8 
+dRow7       DN D11.U8 
+
+;// dRow0 - dP_3, dRow1 - dQ_0, dRow2 - dP_1, dRow3 - dQ_2
+;// dRow4 - dP_2, dRow5 - dQ_1, dRow6 - dP_0, dRow7 - dQ_3
+
+;// Rows output
+dRown0      DN D7.U8
+dRown1      DN D24.U8
+dRown2      DN D30.U8
+dRown3      DN D10.U8
+dRown4      DN D6.U8
+dRown5      DN D25.U8
+dRown6      DN D29.U8
+dRown7      DN D11.U8
+
+;// dP_0n       DN D29.U8
+;// dP_1n       DN D30.U8
+;// dP_2n       DN D31.U8
+;// 
+;// dQ_0n       DN D24.U8   ;!!;Temp2        
+;// dQ_1n       DN D25.U8   ;!!;Temp2        
+;// dQ_2n       DN D28.U8   ;!!;dQ_0t        
+;// 
+;// dRown0 - dP_3,  dRown1 - dQ_0n
+;// dRown2 - dP_1n, dRown3 - dQ_2
+;// dRown4 - dP_2,  dRown5 - dQ_1n
+;// dRown6 - dP_0n, dRown7 - dQ_3
+
+dRow0n      DN D7.U8
+dRow1n      DN D24.U8
+dRow2n      DN D30.U8
+dRow3n      DN D28.U8
+dRow4n      DN D31.U8
+dRow5n      DN D25.U8
+dRow6n      DN D29.U8
+dRow7n      DN D11.U8
+
+;// dRow0n - dP_3, dRow1n - dQ_0n, dRow2n - dP_1n, dRow3n - dQ_2n
+;// dRow4n - dP_2, dRow5n - dQ_1n, dRow6n - dP_0n, dRow7n - dQ_3
+
+;// Pixels
+dP_0        DN D4.U8
+dP_1        DN D5.U8  
+dP_2        DN D6.U8  
+dP_3        DN D7.U8  
+dQ_0        DN D8.U8  
+dQ_1        DN D9.U8  
+dQ_2        DN D10.U8 
+dQ_3        DN D11.U8 
+
+
+;// Filtering Decision
+dAlpha      DN D0.U8
+dBeta       DN D2.U8
+
+dFilt       DN D16.U8
+dAqflg      DN D12.U8
+dApflg      DN D17.U8 
+
+dAp0q0      DN D13.U8
+dAp1p0      DN D12.U8
+dAq1q0      DN D18.U8
+dAp2p0      DN D19.U8
+dAq2q0      DN D17.U8
+
+;// bSLT4
+dTC0        DN D18.U8   
+dTC1        DN D19.U8   
+dTC01       DN D18.U8   
+
+dTCs        DN D31.S8
+dTC         DN D31.U8
+
+dMask_0     DN D14.U8
+dMask_1     DN D15.U8    
+
+Mask_0      RN 6
+
+dTemp       DN D19.U8
+
+;// Computing P0,Q0
+qDq0p0      QN Q10.S16
+qDp1q1      QN Q11.S16
+qDelta      QN Q10.S16  ; reuse qDq0p0
+dDelta      DN D20.S8
+
+
+;// Computing P1,Q1
+dRp0q0      DN D24.U8
+
+dMaxP       DN D23.U8
+dMinP       DN D22.U8
+
+dMaxQ       DN D19.U8
+dMinQ       DN D21.U8
+
+dDeltaP     DN D26.U8
+dDeltaQ     DN D27.U8
+
+qP_0n       QN Q14.S16
+qQ_0n       QN Q12.S16
+
+dQ_0n       DN D24.U8
+dQ_1n       DN D25.U8
+dP_0n       DN D29.U8
+dP_1n       DN D30.U8
+
+;// bSGE4
+
+qSp0q0      QN Q10.U16
+
+qSp2q1      QN Q11.U16
+qSp0q0p1    QN Q12.U16
+qSp3p2      QN Q13.U16
+dHSp0q1     DN D28.U8
+
+qSq2p1      QN Q11.U16
+qSp0q0q1    QN Q12.U16
+qSq3q2      QN Q13.U16  ;!!
+dHSq0p1     DN D28.U8   ;!!
+
+qTemp1      QN Q11.U16  ;!!;qSp2q1 
+qTemp2      QN Q12.U16  ;!!;qSp0q0p1        
+
+dP_0t       DN D28.U8   ;!!;dHSp0q1        
+dQ_0t       DN D22.U8   ;!!;Temp1        
+
+dP_0n       DN D29.U8
+dP_1n       DN D30.U8
+dP_2n       DN D31.U8
+
+dQ_0n       DN D24.U8   ;!!;Temp2        
+dQ_1n       DN D25.U8   ;!!;Temp2        
+dQ_2n       DN D28.U8   ;!!;dQ_0t        
+
+        
+        ;// Function header
+        M_START omxVCM4P10_FilterDeblockingLuma_VerEdge_I, r11, d15
+        
+        ;//Arguments on the stack
+        M_ARG   ppThresholds, 4
+        M_ARG   ppBS, 4
+        
+        ;// d0-dAlpha_0
+        ;// d2-dBeta_0
+
+        ADD         pAlpha_1, pAlpha_0, #1
+        ADD         pBeta_1, pBeta_0, #1
+        
+        VLD1        {dAlpha[]}, [pAlpha_0]
+        SUB         pSrcDst, pSrcDst, #4
+        VLD1        {dBeta[]}, [pBeta_0] 
+        
+        M_LDR       pBS, ppBS
+        M_LDR       pThresholds, ppThresholds 
+
+        MOV         Mask_0,#0
+
+        ;dMask_0-14
+        ;dMask_1-15
+
+        VMOV        dMask_0, #0     
+        VMOV        dMask_1, #1     
+
+        LDR         XY,=LOOP_COUNT
+    
+        ADD         pTmpStep, srcdstStep, srcdstStep
+
+        ;// p0-p3 - d4-d7
+        ;// q0-q3 - d8-d11
+LoopY        
+LoopX        
+        LDRH        bS10, [pBS], #4
+
+        CMP         bS10, #0
+        BEQ         NoFilterBS0
+
+        ;// Load 8 rows of data
+        ADD         pTmp, pSrcDst, srcdstStep
+        VLD1        dRow0, [pSrcDst], pTmpStep
+        VLD1        dRow1, [pTmp], pTmpStep
+        VLD1        dRow2, [pSrcDst], pTmpStep
+        VZIP.8      dRow0, dRow1
+        VLD1        dRow3, [pTmp], pTmpStep
+        VLD1        dRow4, [pSrcDst], pTmpStep
+        VZIP.8      dRow2, dRow3
+        VLD1        dRow5, [pTmp], pTmpStep
+        VLD1        dRow6, [pSrcDst], pTmpStep
+        VLD1        dRow7, [pTmp], pTmpStep
+        VZIP.8      dRow4, dRow5
+        VZIP.16     dRow1, dRow3
+    
+
+        ;// dRow0 = [q3r0 q2r0 q1r0 q0r0 p0r0 p1r0 p2r0 p3r0]
+        ;// dRow1 = [q3r1 q2r1 q1r1 q0r1 p0r1 p1r1 p2r1 p3r1]
+        ;// dRow2 = [q3r2 q2r2 q1r2 q0r2 p0r2 p1r2 p2r2 p3r2]
+        ;// dRow3 = [q3r3 q2r3 q1r3 q0r3 p0r3 p1r3 p2r3 p3r3]
+        ;// dRow4 = [q3r4 q2r4 q1r4 q0r4 p0r4 p1r4 p2r4 p3r4]
+        ;// dRow5 = [q3r5 q2r5 q1r5 q0r5 p0r5 p1r5 p2r5 p3r5]
+        ;// dRow6 = [q3r6 q2r6 q1r6 q0r6 p0r6 p1r6 p2r6 p3r6]
+        ;// dRow7 = [q3r7 q2r7 q1r7 q0r7 p0r7 p1r7 p2r7 p3r7]
+
+        ;// 8x8 Transpose
+
+        VZIP.8      dRow6, dRow7
+
+        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #3
+        VZIP.16     dRow0, dRow2
+        VZIP.16     dRow5, dRow7
+        
+
+        VZIP.16     dRow4, dRow6
+        VZIP.32     dRow1, dRow5
+        VZIP.32     dRow2, dRow6
+        VZIP.32     dRow3, dRow7
+        VZIP.32     dRow0, dRow4
+        
+
+        ;// dRow0 - dP_3, dRow1 - dQ_0, dRow2 - dP_1, dRow3 - dQ_2
+        ;// dRow4 - dP_2, dRow5 - dQ_1, dRow6 - dP_0, dRow7 - dQ_3
+
+        ;// dQ_0 = [q0r7 q0r6 q0r5 q0r4 q0r3 q0r2 q0r1 q0r0]
+        ;// dQ_1 = [q1r7 q1r6 q1r5 q1r4 q1r3 q1r2 q1r1 q1r0]
+        ;// dQ_2 = [q2r7 q2r6 q2r5 q2r4 q2r3 q2r2 q2r1 q2r0]
+        ;// dQ_3 = [q3r7 q3r6 q3r5 q3r4 q3r3 q3r2 q3r1 q3r0]
+
+        ;// dP_0 = [p0r7 p0r6 p0r5 p0r4 p0r3 p0r2 p0r1 p0r0]
+        ;// dP_1 = [p1r7 p1r6 p1r5 p1r4 p1r3 p1r2 p1r1 p1r0]
+        ;// dP_2 = [p2r7 p2r6 p2r5 p2r4 p2r3 p2r2 p2r1 p2r0]
+        ;// dP_3 = [p3r7 p3r6 p3r5 p3r4 p3r3 p3r2 p3r1 p3r0]
+
+        VABD        dAp0q0, dP_0, dQ_0
+        VABD        dAp1p0, dP_1, dP_0
+
+        VABD        dAq1q0, dQ_1, dQ_0
+        VABD        dAp2p0, dP_2, dP_0
+        
+        TST         bS10, #0xff
+        VCGT        dFilt, dAlpha, dAp0q0
+
+        VMAX        dAp1p0, dAq1q0, dAp1p0
+        VABD        dAq2q0, dQ_2, dQ_0
+
+        VMOVEQ.U32  dFilt[0], Mask_0
+        TST         bS10, #0xff00
+
+        VCGT        dAp2p0, dBeta, dAp2p0
+        VCGT        dAp1p0, dBeta, dAp1p0
+
+        VMOVEQ.U32  dFilt[1], Mask_0
+
+        VCGT        dAq2q0, dBeta, dAq2q0
+        VAND        dFilt, dFilt, dAp1p0
+        TST         bS10, #4 
+
+        VAND        dAqflg, dFilt, dAq2q0
+        VAND        dApflg, dFilt, dAp2p0
+    
+        BNE         bSGE4        
+bSLT4
+        ;// bS < 4 Filtering
+
+        BL          armVCM4P10_DeblockingLumabSLT4_unsafe
+
+        ;// Transpose
+
+        VZIP.8      dP_3,  dP_2  
+        VZIP.8      dP_1n, dP_0n
+        VZIP.8      dQ_0n, dQ_1n
+        VZIP.8      dQ_2,  dQ_3
+
+        
+        VZIP.16     dP_3,  dP_1n
+        ADD         pTmp, pSrcDst, srcdstStep
+        VZIP.16     dQ_0n, dQ_2
+        VZIP.16     dQ_1n, dQ_3
+        VZIP.16     dP_2,  dP_0n
+
+        VZIP.32     dP_3,  dQ_0n
+        VZIP.32     dP_1n, dQ_2
+        VZIP.32     dP_2,  dQ_1n
+        VZIP.32     dP_0n, dQ_3
+
+        ;// dRown0 - dP_3,  dRown1 - dQ_0n
+        ;// dRown2 - dP_1n, dRown3 - dQ_2
+        ;// dRown4 - dP_2,  dRown5 - dQ_1n
+        ;// dRown6 - dP_0n, dRown7 - dQ_3
+
+        VST1        dRown0, [pSrcDst], pTmpStep
+        VST1        dRown1, [pTmp], pTmpStep
+        VST1        dRown2, [pSrcDst], pTmpStep
+        VST1        dRown3, [pTmp], pTmpStep
+        ;1
+        VST1        dRown4, [pSrcDst], pTmpStep
+        VST1        dRown5, [pTmp], pTmpStep
+        ADDS        XY, XY, XY
+        VST1        dRown6, [pSrcDst], pTmpStep
+        ADD         pThresholds, pThresholds, #2
+        VST1        dRown7, [pTmp], srcdstStep
+
+        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #3
+        VLD1        {dAlpha[]}, [pAlpha_1]
+        ADD         pSrcDst, pSrcDst, #4
+        VLD1        {dBeta[]}, [pBeta_1]
+
+        BCC         LoopX
+        B           ExitLoopY        
+
+NoFilterBS0
+        ADD         pSrcDst, pSrcDst, #4
+        ADDS        XY, XY, XY
+        VLD1        {dAlpha[]}, [pAlpha_1]
+        ADD         pThresholds, pThresholds, #4
+        VLD1        {dBeta[]}, [pBeta_1]
+        BCC         LoopX
+        B           ExitLoopY        
+bSGE4        
+        ;// bS >= 4 Filtering
+        
+        BL          armVCM4P10_DeblockingLumabSGE4_unsafe
+
+        ;// Transpose
+
+        VZIP.8      dP_3,  dP_2n   
+        VZIP.8      dP_1n, dP_0n
+        VZIP.8      dQ_0n, dQ_1n
+        VZIP.8      dQ_2n, dQ_3
+
+        VZIP.16     dP_3,  dP_1n
+        ADD         pTmp, pSrcDst, srcdstStep
+        VZIP.16     dQ_0n, dQ_2n
+        VZIP.16     dQ_1n, dQ_3
+        VZIP.16     dP_2n, dP_0n
+
+        VZIP.32     dP_3,  dQ_0n
+        VZIP.32     dP_1n, dQ_2n
+        VZIP.32     dP_2n, dQ_1n
+        VZIP.32     dP_0n, dQ_3
+
+        ;// dRow0n - dP_3, dRow1n - dQ_0n, dRow2n - dP_1n, dRow3n - dQ_2n
+        ;// dRow4n - dP_2, dRow5n - dQ_1n, dRow6n - dP_0n, dRow7n - dQ_3
+        
+        VST1        dRow0n, [pSrcDst], pTmpStep
+        VST1        dRow1n, [pTmp], pTmpStep
+        VST1        dRow2n, [pSrcDst], pTmpStep
+        VST1        dRow3n, [pTmp], pTmpStep
+        VST1        dRow4n, [pSrcDst], pTmpStep
+        VST1        dRow5n, [pTmp], pTmpStep
+        ADDS        XY,XY,XY
+        VST1        dRow6n, [pSrcDst], pTmpStep
+        ADD         pThresholds, pThresholds, #4
+        VST1        dRow7n, [pTmp], pTmpStep
+
+        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #3
+        VLD1        {dAlpha[]}, [pAlpha_1]
+        ADD         pSrcDst, pSrcDst, #4
+        VLD1        {dBeta[]}, [pBeta_1]
+
+        BCC         LoopX
+
+ExitLoopY        
+        SUB         pBS, pBS, #14
+        SUB         pThresholds, pThresholds, #14
+        SUB         pSrcDst, pSrcDst, #16
+        VLD1        {dAlpha[]}, [pAlpha_0]
+        ADD         pSrcDst, pSrcDst, srcdstStep, LSL #3 
+        VLD1        {dBeta[]}, [pBeta_0]
+        BNE         LoopY
+
+        MOV         r0, #OMX_Sts_NoErr
+
+        M_END
+        
+    ENDIF
+    
+        
+        END
+        
+        
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
new file mode 100755
index 0000000..3ce41be
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
@@ -0,0 +1,79 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_InterpolateChroma.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate 1/8 Pixel interpolation for Chroma Block
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P10_InterpolateChroma,
+ *
+ * Description:
+ * Performs 1/8-pixel interpolation for inter chroma MB.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	pSrc	Pointer to the source reference frame buffer
+ * [in]	srcStep Reference frame step in byte
+ * [in]	dstStep Destination frame step in byte. Must be multiple of roi.width.
+ * [in]	dx		Fractional part of horizontal motion vector component
+ *						in 1/8 pixel unit;valid in the range [0,7]
+ * [in]	dy		Fractional part of vertical motion vector component
+ *						in 1/8 pixel unit;valid in the range [0,7]
+ * [in]	roi		Dimension of the interpolation region;the parameters roi.width and roi.height must
+ *                      be equal to either 2, 4, or 8.
+ * [out]	pDst	Pointer to the destination frame buffer.
+ *                   if roi.width==2,  2-byte alignment required
+ *                   if roi.width==4,  4-byte alignment required
+ *                   if roi.width==8,  8-byte alignment required
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns OMX_Sts_BadArgErr:
+ *	pSrc or pDst is NULL.
+ *	srcStep or dstStep < 8.
+ *	dx or dy is out of range [0-7].
+ *	roi.width or roi.height is out of range {2,4,8}.
+ *	roi.width is equal to 2, but pDst is not 2-byte aligned.
+ *	roi.width is equal to 4, but pDst is not 4-byte aligned.
+ *	roi.width is equal to 8, but pDst is not 8 byte aligned.
+ *	srcStep or dstStep is not a multiple of 8.
+ *
+ */
+
+OMXResult omxVCM4P10_InterpolateChroma (
+     const OMX_U8* pSrc,
+     OMX_S32 srcStep,
+     OMX_U8* pDst,
+     OMX_S32 dstStep,
+     OMX_S32 dx,
+     OMX_S32 dy,
+     OMXSize roi
+ )
+{
+    return armVCM4P10_Interpolate_Chroma 
+        ((OMX_U8*)pSrc, srcStep, pDst, dstStep, roi.width, roi.height, dx, dy);
+}
+
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
new file mode 100755
index 0000000..942ebc6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
@@ -0,0 +1,553 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_InterpolateLuma_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+;// Function:
+;//     omxVCM4P10_InterpolateLuma
+;//
+;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly.
+;// Performs quarter pel interpolation of inter luma MB.
+;// It's assumed that the frame is already padded when calling this function.
+;// Parameters:
+;// [in]    pSrc        Pointer to the source reference frame buffer
+;// [in]    srcStep     Reference frame step in byte
+;// [in]    dstStep     Destination frame step in byte. Must be multiple of roi.width
+;// [in]    dx          Fractional part of horizontal motion vector
+;//                         component in 1/4 pixel unit; valid in the range [0,3]
+;// [in]    dy          Fractional part of vertical motion vector
+;//                         component in 1/4 pixel unit; valid in the range [0,3]
+;// [in]    roi         Dimension of the interpolation region;the parameters roi.width and roi.height must
+;//                         be equal to either 4, 8, or 16.
+;// [out]   pDst        Pointer to the destination frame buffer.
+;//                   if roi.width==4,  4-byte alignment required
+;//                   if roi.width==8,  8-byte alignment required
+;//                   if roi.width==16, 16-byte alignment required
+;//
+;// Return Value:
+;// If the function runs without error, it returns OMX_Sts_NoErr.
+;// It is assued that following cases are satisfied before calling this function:
+;//  pSrc or pDst is not NULL.
+;//  srcStep or dstStep >= roi.width.
+;//     dx or dy is in the range [0-3].
+;//     roi.width or roi.height is not out of range {4, 8, 16}.
+;//     If roi.width is equal to 4, Dst is 4 byte aligned.
+;//     If roi.width is equal to 8, pDst is 8 byte aligned.
+;//     If roi.width is equal to 16, pDst is 16 byte aligned.
+;//     srcStep and dstStep is multiple of 8.
+;//
+;//
+
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+
+        M_VARIANTS CortexA8
+
+        EXPORT omxVCM4P10_InterpolateLuma
+        
+
+    IF CortexA8
+        IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+        IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+        IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+        IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+    ENDIF
+    
+    
+
+;// Declare input registers
+pSrc            RN 0
+srcStep         RN 1
+pDst            RN 2
+dstStep         RN 3
+iHeight         RN 4
+iWidth          RN 5
+
+;// Declare other intermediate registers
+idx             RN 6
+idy             RN 7
+index           RN 6
+Temp            RN 12
+pArgs           RN 11
+
+
+    IF CortexA8
+
+        ;//
+        ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time.
+        ;//
+        M_ALLOC4    ppArgs, 16
+        
+        ;// Function header
+        M_START omxVCM4P10_InterpolateLuma, r11, d15
+
+pSrcBK          RN 8
+
+;// Declare Neon registers
+dCoeff5         DN 30.S16
+dCoeff20        DN 31.S16
+
+;// Registers used for implementing Horizontal interpolation
+dSrc0c          DN 14.U8
+dSrc1c          DN 16.U8
+dSrc2c          DN 18.U8
+dSrc3c          DN 20.U8                   
+dSrc0d          DN 15.U8
+dSrc1d          DN 17.U8
+dSrc2d          DN 19.U8
+dSrc3d          DN 21.U8
+dAccH0          DN 22.U8
+dAccH1          DN 24.U8
+dAccH2          DN 26.U8
+dAccH3          DN 28.U8
+dResultH0       DN 22.U32
+dResultH1       DN 24.U32
+dResultH2       DN 26.U32
+dResultH3       DN 28.U32
+
+;// Registers used for implementing Vertical interpolation
+dSrc0           DN 9.U8
+dSrc1           DN 10.U8
+dSrc2           DN 11.U8
+dSrc3           DN 12.U8
+dSrc4           DN 13.U8
+dAccV0          DN 0.U8
+dAccV1          DN 2.U8
+dAccV2          DN 4.U8
+dAccV3          DN 6.U8
+dResultV0       DN 0.U32
+dResultV1       DN 2.U32
+dResultV2       DN 4.U32
+dResultV3       DN 6.U32
+        
+;// Registers used for implementing Diagonal interpolation
+dTAcc0          DN 0.U8
+dTAcc1          DN 2.U8
+dTAcc2          DN 4.U8
+dTAcc3          DN 6.U8
+dTRes0          DN 0.32
+dTRes1          DN 2.32
+dTRes2          DN 4.32
+dTRes3          DN 6.32
+dTResult0       DN 14.U8
+dTResult1       DN 16.U8
+dTResult2       DN 18.U8
+dTResult3       DN 20.U8
+dTempP0         DN 18.S16
+dTempP1         DN 19.S16
+dTempQ0         DN 20.S16
+dTempQ1         DN 21.S16
+dTempR0         DN 22.S16
+dTempR1         DN 23.S16
+dTempS0         DN 24.S16
+dTempS1         DN 25.S16
+qTempP01        QN 9.S16
+qTempQ01        QN 10.S16
+qTempR01        QN 11.S16
+qTempS01        QN 12.S16
+
+;// Intermediate values for averaging
+qRes2           QN 7.S16
+qRes3           QN 8.S16
+qRes4           QN 9.S16
+qRes5           QN 10.S16
+qRes6           QN 11.S16
+       
+;// For implementing copy
+dDst0            DN 9.32
+dDst1            DN 10.32
+dDst2            DN 11.32
+dDst3            DN 12.32
+
+        ;// Define stack arguments
+        M_ARG       ptridx, 4
+        M_ARG       ptridy, 4        
+        M_ARG       ptrWidth, 4
+        M_ARG       ptrHeight, 4        
+
+        ;// Load structure elements of roi 
+        M_LDR       idx, ptridx
+        M_LDR       idy, ptridy
+        M_LDR       iWidth, ptrWidth
+        M_LDR       iHeight, ptrHeight
+        
+        ADD         index, idx, idy, LSL #2                 ;//  [index] = [idy][idx]
+        M_ADR       pArgs, ppArgs
+                    
+        ;// Move coefficients Neon registers
+        VMOV        dCoeff20, #20
+        VMOV        dCoeff5, #5
+                                        
+Block4x4WidthLoop
+Block4x4HeightLoop
+
+        STM         pArgs, {pSrc,srcStep,pDst,dstStep} 
+                                                            
+        ;// switch table using motion vector as index
+        ADD         pc, pc, index, LSL #2
+        B           Case_f
+        B           Case_0        
+        B           Case_1        
+        B           Case_2        
+        B           Case_3        
+        B           Case_4        
+        B           Case_5        
+        B           Case_6        
+        B           Case_7        
+        B           Case_8        
+        B           Case_9        
+        B           Case_a        
+        B           Case_b        
+        B           Case_c        
+        B           Case_d
+        B           Case_e        
+        B           Case_f
+                    
+Case_0                
+        ;// Case G
+        M_PRINTF "Case 0 \n"
+        
+        ;// Loads a 4x4 block of .8 and stores as .32
+        ADD         Temp, pSrc, srcStep, LSL #1 
+        VLD1        dSrc0, [pSrc], srcStep
+        VLD1        dSrc2, [Temp], srcStep
+        VLD1        dSrc1, [pSrc]
+        VLD1        dSrc3, [Temp]
+        
+        ADD         Temp, pDst, dstStep, LSL #1 
+        VST1        dDst0[0], [pDst], dstStep
+        VST1        dDst2[0], [Temp], dstStep
+        VST1        dDst1[0], [pDst]
+        VST1        dDst3[0], [Temp]
+        M_ADR       pArgs, ppArgs
+        B           Block4x4LoopEnd
+Case_1
+        ;// Case a
+        M_PRINTF "Case 1 \n"
+
+        SUB         pSrc, pSrc, #2
+        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe        
+        VRHADD      dAccH0, dAccH0, dSrc0c
+        VRHADD      dAccH2, dAccH2, dSrc2c
+        VRHADD      dAccH1, dAccH1, dSrc1c
+        VRHADD      dAccH3, dAccH3, dSrc3c
+        ADD         Temp, pDst, dstStep, LSL #1 
+        VST1        dResultH0[0], [pDst], dstStep
+        VST1        dResultH2[0], [Temp], dstStep
+        VST1        dResultH1[0], [pDst]
+        VST1        dResultH3[0], [Temp]
+        M_ADR       pArgs, ppArgs
+        B           Block4x4LoopEnd        
+Case_2
+        ;// Case b
+        M_PRINTF "Case 2 \n"
+
+        SUB         pSrc, pSrc, #2        
+        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+        ADD         Temp, pDst, dstStep, LSL #1 
+        VST1        dResultH0[0], [pDst], dstStep
+        VST1        dResultH2[0], [Temp], dstStep
+        VST1        dResultH1[0], [pDst]
+        VST1        dResultH3[0], [Temp]
+        M_ADR       pArgs, ppArgs
+        B           Block4x4LoopEnd        
+Case_3
+        ;// Case c
+        M_PRINTF "Case 3 \n"
+
+        SUB         pSrc, pSrc, #2
+        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe        
+        VRHADD      dAccH0, dAccH0, dSrc0d
+        VRHADD      dAccH2, dAccH2, dSrc2d
+        VRHADD      dAccH1, dAccH1, dSrc1d
+        VRHADD      dAccH3, dAccH3, dSrc3d
+        ADD         Temp, pDst, dstStep, LSL #1 
+        VST1        dResultH0[0], [pDst], dstStep
+        VST1        dResultH2[0], [Temp], dstStep
+        VST1        dResultH1[0], [pDst]
+        VST1        dResultH3[0], [Temp]
+        M_ADR       pArgs, ppArgs
+        B           Block4x4LoopEnd        
+Case_4
+        ;// Case d
+        M_PRINTF "Case 4 \n"
+
+        SUB         pSrc, pSrc, srcStep, LSL #1
+        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe        
+        VRHADD      dAccV0, dAccV0, dSrc0
+        VRHADD      dAccV2, dAccV2, dSrc2
+        VRHADD      dAccV1, dAccV1, dSrc1
+        VRHADD      dAccV3, dAccV3, dSrc3
+        ADD         Temp, pDst, dstStep, LSL #1 
+        VST1        dResultV0[0], [pDst], dstStep
+        VST1        dResultV2[0], [Temp], dstStep
+        VST1        dResultV1[0], [pDst]
+        VST1        dResultV3[0], [Temp]
+        M_ADR       pArgs, ppArgs
+        B           Block4x4LoopEnd        
+Case_5
+        ;// Case e
+        M_PRINTF "Case 5 \n"
+        
+        MOV         pSrcBK, pSrc
+        SUB         pSrc, pSrc, srcStep, LSL #1
+        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe        
+        SUB         pSrc, pSrcBK, #2
+        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe        
+        VRHADD      dAccH0, dAccH0, dAccV0
+        VRHADD      dAccH2, dAccH2, dAccV2
+        VRHADD      dAccH1, dAccH1, dAccV1
+        VRHADD      dAccH3, dAccH3, dAccV3        
+        ADD         Temp, pDst, dstStep, LSL #1 
+        VST1        dResultH0[0], [pDst], dstStep
+        VST1        dResultH2[0], [Temp], dstStep
+        VST1        dResultH1[0], [pDst]
+        VST1        dResultH3[0], [Temp]
+        
+        M_ADR       pArgs, ppArgs
+        B       Block4x4LoopEnd        
+Case_6
+        ;// Case f
+        M_PRINTF "Case 6 \n"
+
+        SUB         pSrc, pSrc, srcStep, LSL #1
+        SUB         pSrc, pSrc, #2
+        BL          armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+        VQRSHRUN    dTResult0, qRes2, #5        
+        VQRSHRUN    dTResult1, qRes3, #5        
+        VQRSHRUN    dTResult2, qRes4, #5        
+        VQRSHRUN    dTResult3, qRes5, #5        
+        VRHADD      dTAcc0, dTAcc0, dTResult0
+        VRHADD      dTAcc2, dTAcc2, dTResult2
+        VRHADD      dTAcc1, dTAcc1, dTResult1
+        VRHADD      dTAcc3, dTAcc3, dTResult3
+        ADD         Temp, pDst, dstStep, LSL #1 
+        VST1        dTRes0[0], [pDst], dstStep
+        VST1        dTRes2[0], [Temp], dstStep
+        VST1        dTRes1[0], [pDst]
+        VST1        dTRes3[0], [Temp]
+        
+        M_ADR       pArgs, ppArgs
+        B       Block4x4LoopEnd        
+Case_7
+        ;// Case g
+        M_PRINTF "Case 7 \n"
+        MOV         pSrcBK, pSrc
+        ADD         pSrc, pSrc, #1
+        SUB         pSrc, pSrc, srcStep, LSL #1
+        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe        
+        SUB         pSrc, pSrcBK, #2
+        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe        
+        VRHADD      dAccH0, dAccH0, dAccV0
+        VRHADD      dAccH2, dAccH2, dAccV2
+        VRHADD      dAccH1, dAccH1, dAccV1
+        VRHADD      dAccH3, dAccH3, dAccV3        
+        ADD         Temp, pDst, dstStep, LSL #1 
+        VST1        dResultH0[0], [pDst], dstStep
+        VST1        dResultH2[0], [Temp], dstStep
+        VST1        dResultH1[0], [pDst]
+        VST1        dResultH3[0], [Temp]
+        
+        M_ADR       pArgs, ppArgs
+        B       Block4x4LoopEnd
+Case_8
+        ;// Case h
+        M_PRINTF "Case 8 \n"
+
+        SUB         pSrc, pSrc, srcStep, LSL #1
+        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe        
+        ADD         Temp, pDst, dstStep, LSL #1 
+        VST1        dResultV0[0], [pDst], dstStep
+        VST1        dResultV2[0], [Temp], dstStep
+        VST1        dResultV1[0], [pDst]
+        VST1        dResultV3[0], [Temp]
+        M_ADR       pArgs, ppArgs
+        B           Block4x4LoopEnd
+Case_9
+        ;// Case i
+        M_PRINTF "Case 9 \n"
+        SUB         pSrc, pSrc, srcStep, LSL #1
+        SUB         pSrc, pSrc, #2
+        BL          armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+        VEXT        dTempP0, dTempP0, dTempP1, #2
+        VEXT        dTempQ0, dTempQ0, dTempQ1, #2
+        VEXT        dTempR0, dTempR0, dTempR1, #2
+        VEXT        dTempS0, dTempS0, dTempS1, #2
+        
+        VQRSHRUN    dTResult0, qTempP01, #5        
+        VQRSHRUN    dTResult1, qTempQ01, #5        
+        VQRSHRUN    dTResult2, qTempR01, #5        
+        VQRSHRUN    dTResult3, qTempS01, #5        
+
+        VRHADD      dTAcc0, dTAcc0, dTResult0
+        VRHADD      dTAcc2, dTAcc2, dTResult2
+        VRHADD      dTAcc1, dTAcc1, dTResult1
+        VRHADD      dTAcc3, dTAcc3, dTResult3
+        ADD         Temp, pDst, dstStep, LSL #1 
+        VST1        dTRes0[0], [pDst], dstStep
+        VST1        dTRes2[0], [Temp], dstStep
+        VST1        dTRes1[0], [pDst]
+        VST1        dTRes3[0], [Temp]
+        M_ADR       pArgs, ppArgs
+        B       Block4x4LoopEnd
+Case_a
+        ;// Case j
+        M_PRINTF "Case a \n"
+
+        SUB         pSrc, pSrc, srcStep, LSL #1
+        SUB         pSrc, pSrc, #2
+        BL          armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+        ADD         Temp, pDst, dstStep, LSL #1 
+        VST1        dTRes0[0], [pDst], dstStep
+        VST1        dTRes2[0], [Temp], dstStep
+        VST1        dTRes1[0], [pDst]
+        VST1        dTRes3[0], [Temp]
+        M_ADR       pArgs, ppArgs
+        B       Block4x4LoopEnd
+Case_b
+        ;// Case k
+        M_PRINTF "Case b \n"
+        SUB         pSrc, pSrc, srcStep, LSL #1
+        SUB         pSrc, pSrc, #2
+        BL          armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+        VEXT        dTempP0, dTempP0, dTempP1, #3
+        VEXT        dTempQ0, dTempQ0, dTempQ1, #3
+        VEXT        dTempR0, dTempR0, dTempR1, #3
+        VEXT        dTempS0, dTempS0, dTempS1, #3
+        
+        VQRSHRUN    dTResult0, qTempP01, #5        
+        VQRSHRUN    dTResult1, qTempQ01, #5        
+        VQRSHRUN    dTResult2, qTempR01, #5        
+        VQRSHRUN    dTResult3, qTempS01, #5        
+
+        VRHADD      dTAcc0, dTAcc0, dTResult0
+        VRHADD      dTAcc2, dTAcc2, dTResult2
+        VRHADD      dTAcc1, dTAcc1, dTResult1
+        VRHADD      dTAcc3, dTAcc3, dTResult3
+        ADD         Temp, pDst, dstStep, LSL #1 
+        VST1        dTRes0[0], [pDst], dstStep
+        VST1        dTRes2[0], [Temp], dstStep
+        VST1        dTRes1[0], [pDst]
+        VST1        dTRes3[0], [Temp]
+        M_ADR       pArgs, ppArgs
+        B       Block4x4LoopEnd
+Case_c
+        ;// Case n
+        M_PRINTF "Case c \n"
+
+        SUB         pSrc, pSrc, srcStep, LSL #1
+        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe        
+        VRHADD      dAccV0, dAccV0, dSrc1
+        VRHADD      dAccV2, dAccV2, dSrc3
+        VRHADD      dAccV1, dAccV1, dSrc2
+        VRHADD      dAccV3, dAccV3, dSrc4
+        ADD         Temp, pDst, dstStep, LSL #1 
+        VST1        dResultV0[0], [pDst], dstStep
+        VST1        dResultV2[0], [Temp], dstStep
+        VST1        dResultV1[0], [pDst]
+        VST1        dResultV3[0], [Temp]
+        M_ADR       pArgs, ppArgs
+        B           Block4x4LoopEnd
+Case_d
+        ;// Case p
+        M_PRINTF "Case d \n"
+        
+        MOV         pSrcBK, pSrc
+        SUB         pSrc, pSrc, srcStep, LSL #1
+        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe        
+        ADD         pSrc, pSrcBK, srcStep
+        SUB         pSrc, pSrc, #2
+        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe        
+        VRHADD      dAccH0, dAccH0, dAccV0
+        VRHADD      dAccH2, dAccH2, dAccV2
+        VRHADD      dAccH1, dAccH1, dAccV1
+        VRHADD      dAccH3, dAccH3, dAccV3        
+        ADD         Temp, pDst, dstStep, LSL #1 
+        VST1        dResultH0[0], [pDst], dstStep
+        VST1        dResultH2[0], [Temp], dstStep
+        VST1        dResultH1[0], [pDst]
+        VST1        dResultH3[0], [Temp]
+        M_ADR       pArgs, ppArgs
+        B       Block4x4LoopEnd
+Case_e
+        ;// Case q
+        M_PRINTF "Case e \n"
+        
+        SUB         pSrc, pSrc, srcStep, LSL #1
+        SUB         pSrc, pSrc, #2
+        BL          armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+        VQRSHRUN    dTResult0, qRes3, #5        
+        VQRSHRUN    dTResult1, qRes4, #5        
+        VQRSHRUN    dTResult2, qRes5, #5        
+        VQRSHRUN    dTResult3, qRes6, #5        
+
+        VRHADD      dTAcc0, dTAcc0, dTResult0
+        VRHADD      dTAcc2, dTAcc2, dTResult2
+        VRHADD      dTAcc1, dTAcc1, dTResult1
+        VRHADD      dTAcc3, dTAcc3, dTResult3
+        ADD         Temp, pDst, dstStep, LSL #1 
+        VST1        dTRes0[0], [pDst], dstStep
+        VST1        dTRes2[0], [Temp], dstStep
+        VST1        dTRes1[0], [pDst]
+        VST1        dTRes3[0], [Temp]
+        M_ADR       pArgs, ppArgs
+        B       Block4x4LoopEnd
+Case_f
+        ;// Case r
+        M_PRINTF "Case f \n"
+        MOV         pSrcBK, pSrc
+        ADD         pSrc, pSrc, #1
+        SUB         pSrc, pSrc, srcStep, LSL #1
+        BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe        
+        ADD         pSrc, pSrcBK, srcStep
+        SUB         pSrc, pSrc, #2
+        BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe        
+        VRHADD      dAccH0, dAccH0, dAccV0
+        VRHADD      dAccH2, dAccH2, dAccV2
+        VRHADD      dAccH1, dAccH1, dAccV1
+        VRHADD      dAccH3, dAccH3, dAccV3        
+        ADD         Temp, pDst, dstStep, LSL #1 
+        VST1        dResultH0[0], [pDst], dstStep
+        VST1        dResultH2[0], [Temp], dstStep
+        VST1        dResultH1[0], [pDst]
+        VST1        dResultH3[0], [Temp]
+        M_ADR       pArgs, ppArgs
+
+
+Block4x4LoopEnd
+
+        ;// Width Loop
+        ;//M_ADR       pArgs, ppArgs
+        LDM         pArgs, {pSrc,srcStep,pDst,dstStep}  ;// Load arguments
+        SUBS        iWidth, iWidth, #4
+        ADD         pSrc, pSrc, #4      
+        ADD         pDst, pDst, #4
+        BGT         Block4x4WidthLoop
+                    
+        ;// Height Loop
+        SUBS        iHeight, iHeight, #4
+        M_LDR       iWidth, ptrWidth
+        M_ADR       pArgs, ppArgs
+        ADD         pSrc, pSrc, srcStep, LSL #2      
+        ADD         pDst, pDst, dstStep, LSL #2
+        SUB         pSrc, pSrc, iWidth
+        SUB         pDst, pDst, iWidth
+        BGT         Block4x4HeightLoop
+
+EndOfInterpolation
+        MOV         r0, #0
+        M_END       
+
+    ENDIF  
+        ;// End of CortexA8
+                    
+    END
+    
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
new file mode 100755
index 0000000..3a60705
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
@@ -0,0 +1,436 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_PredictIntraChroma_8x8_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+  
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        EXPORT armVCM4P10_pIndexTable8x8
+        
+;// Define the processor variants supported by this file
+         
+         M_VARIANTS CortexA8
+     
+     AREA table, DATA    
+;//-------------------------------------------------------
+;// This table for implementing switch case of C in asm by
+;// the mehtod of two levels of indexing.
+;//-------------------------------------------------------
+
+    M_TABLE armVCM4P10_pIndexTable8x8
+    DCD  OMX_VC_CHROMA_DC,     OMX_VC_CHROMA_HOR 
+    DCD  OMX_VC_CHROMA_VERT,   OMX_VC_CHROMA_PLANE  
+    
+    M_TABLE armVCM4P10_MultiplierTableChroma8x8,1
+    DCW   3, 2, 1,4 
+    DCW  -3,-2,-1,0
+    DCW   1, 2, 3,4
+    
+        
+        
+    IF CortexA8
+
+;//--------------------------------------------
+;// Scratch variable
+;//--------------------------------------------
+ 
+pc              RN 15   
+return          RN 0  
+pTable          RN 8 
+  
+;//--------------------------------------------
+;// Input Arguments
+;//--------------------------------------------
+pSrcLeft        RN 0    ;// input pointer
+pSrcAbove       RN 1    ;// input pointer
+pSrcAboveLeft   RN 2    ;// input pointer
+pDst            RN 3    ;// output pointer
+leftStep        RN 4    ;// input variable
+dstStep         RN 5    ;// input variable
+predMode        RN 6    ;// input variable
+availability    RN 7    ;// input variable
+pMultiplierTable    RN  2     
+
+pTmp            RN 9
+step            RN 10
+
+;//---------------------
+;// Neon Registers
+;//---------------------
+
+;// OMX_VC_CHROMA_HOR
+
+dLeftVal0       DN  D0.8
+dLeftVal1       DN  D1.8
+dLeftVal2       DN  D2.8
+dLeftVal3       DN  D3.8
+dLeftVal4       DN  D4.8
+dLeftVal5       DN  D5.8
+dLeftVal6       DN  D6.8
+dLeftVal7       DN  D7.8
+
+;// OMX_VC_CHROMA_VERT
+
+dAboveVal       DN  D0.U8
+
+;// OMX_VC_CHROMA_DC
+
+dLeftVal        DN  D1.U8
+dSumAboveValU16 DN  D2.U16
+dSumAboveValU32 DN  D3.U32
+dSumAboveValU8  DN  D3.U8
+dSumLeftValU16  DN  D2.U16
+dSumLeftValU32  DN  D1.U32
+dSumLeftValU8   DN  D1.U8
+dSumAboveLeft   DN  D2.U32
+dSumAboveLeftU8 DN  D2.U8
+dIndexRow0U8    DN  D5.U8
+dIndexRow0      DN  D5.U64
+dIndexRow4U8    DN  D6.U8
+dIndexRow4      DN  D6.U64
+dDstRow0        DN  D0.U8
+dDstRow4        DN  D4.U8
+dConst128U8     DN  D0.U8
+
+;// OMX_VC_CHROMA_PLANE
+
+dRevAboveVal    DN  D3.U8  
+dRevAboveValU64 DN  D3.U64  
+dAboveLeftVal   DN  D2.U8
+qAbove7minus0   QN  Q3.S16 
+qAboveDiff      QN  Q2.S16 
+dIndex          DN  D8.U8  
+dDiffAboveU8    DN  D9.U8  
+dDiffAboveS16   DN  D9.S16 
+dAboveDiff0U8   DN  D4.U8  
+dAboveDiff0U64  DN  D4.U64
+dAbove7minus0U8 DN  D6.U8  
+dMultiplier     DN  D10.S16 
+dHorPred        DN  D11.S16 
+dRevLeftVal     DN  D3.U8
+dRevLeftValU64  DN  D3.U64
+qLeft7minus0    QN  Q7.S16
+qLeftDiff       QN  Q6.S16
+dDiffLeftU8     DN  D16.U8
+dDiffLeftS16    DN  D16.S16
+dLeftDiff0U8    DN  D12.U8
+dLeftDiff0U64   DN  D12.U64
+dLeft7minus0U8  DN  D14.U8
+dVerPred        DN  D3.S16 
+dHVValS16       DN  D3.S16
+dHVValS32       DN  D3.S32
+dHVTempS32      DN  D2.S32
+qA              QN  Q0.S16
+qB              QN  Q2.S16
+qC              QN  Q3.S16
+qMultiplier     QN  Q5.S16
+dMultiplier0    DN  D10.S16
+dMultiplier1    DN  D11.S16
+qC0             QN  Q0.S16
+qC1             QN  Q1.S16
+qC2             QN  Q4.S16
+qC3             QN  Q5.S16
+qC4             QN  Q6.S16
+qC5             QN  Q7.S16
+qC6             QN  Q8.S16
+qC7             QN  Q9.S16
+qSum0           QN  Q0.S16
+qSum1           QN  Q1.S16
+qSum2           QN  Q4.S16
+qSum3           QN  Q5.S16
+qSum4           QN  Q6.S16
+qSum5           QN  Q7.S16
+qSum6           QN  Q8.S16
+qSum7           QN  Q9.S16
+dSum0           DN  D0.U8
+dSum1           DN  D1.U8
+dSum2           DN  D2.U8
+dSum3           DN  D3.U8
+dSum4           DN  D4.U8
+dSum5           DN  D5.U8
+dSum6           DN  D6.U8
+dSum7           DN  D7.U8
+
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntraChroma_8x8 starts
+;//-----------------------------------------------------------------------------------------------
+        
+        ;// Write function header
+        M_START omxVCM4P10_PredictIntraChroma_8x8, r10, d15
+        
+        ;// Define stack arguments
+        M_ARG    LeftStep,     4
+        M_ARG    DstStep,      4
+        M_ARG    PredMode,     4
+        M_ARG    Availability, 4
+        
+        LDR      pTable,=armVCM4P10_pIndexTable8x8   ;// Load index table for switch case
+        
+        ;// Load argument from the stack
+        M_LDR    predMode, PredMode                  ;// Arg predMode loaded from stack to reg 
+        M_LDR    leftStep, LeftStep                  ;// Arg leftStep loaded from stack to reg 
+        M_LDR    dstStep,  DstStep                   ;// Arg dstStep loaded from stack to reg         
+        M_LDR    availability, Availability          ;// Arg availability loaded from stack to reg 
+        
+        
+        LDR      pc, [pTable, predMode, LSL #2]      ;// Branch to the case based on preMode
+
+OMX_VC_CHROMA_DC
+        
+        TST     availability, #OMX_VC_LEFT
+        BEQ     DCChroma8x8LeftNotAvailable
+
+        ADD     pTmp, pSrcLeft, leftStep
+        ADD     step, leftStep, leftStep
+
+        ;// Load Left Edge
+        VLD1    {dLeftVal[0]},[pSrcLeft],step               ;// pSrcLeft[0*leftStep]
+        VLD1    {dLeftVal[1]},[pTmp],step                   ;// pSrcLeft[1*leftStep]
+        VLD1    {dLeftVal[2]},[pSrcLeft],step               ;// pSrcLeft[2*leftStep]
+        VLD1    {dLeftVal[3]},[pTmp],step                   ;// pSrcLeft[3*leftStep]
+        VLD1    {dLeftVal[4]},[pSrcLeft],step               ;// pSrcLeft[4*leftStep]
+        VLD1    {dLeftVal[5]},[pTmp],step                   ;// pSrcLeft[5*leftStep]
+        VLD1    {dLeftVal[6]},[pSrcLeft],step               ;// pSrcLeft[6*leftStep]
+        VLD1    {dLeftVal[7]},[pTmp]                        ;// pSrcLeft[7*leftStep]      
+        
+        TST     availability, #OMX_VC_UPPER
+        BEQ     DCChroma8x8LeftOnlyAvailable
+
+        ;// Load Upper Edge also
+        VLD1     dAboveVal,[pSrcAbove]                      ;// pSrcAbove[0 to 7]  
+        
+        MOV      return, #OMX_Sts_NoErr                     ;// returnNoError
+        
+        VPADDL   dSumAboveValU16, dAboveVal                 ;// pSrcAbove[ 6+7 | 4+5 | 2+3 | 0+1 ]             
+        VPADDL   dSumAboveValU32, dSumAboveValU16           ;// pSrcAbove[ 4+5+6+7 |  0+1+2+3 ] 
+                
+        VPADDL   dSumLeftValU16, dLeftVal                   ;// pSrcLeft[ 6+7 | 4+5 | 2+3 | 0+1 ]             
+        VPADDL   dSumLeftValU32, dSumLeftValU16             ;// pSrcLeft[ 4+5+6+7 |  0+1+2+3 ]             
+        
+        VADD     dSumAboveLeft,dSumAboveValU32,dSumLeftValU32
+        VRSHR    dSumAboveLeft,dSumAboveLeft,#3             ;// Sum = (Sum + 4) >> 3
+        VRSHR    dSumAboveValU32,dSumAboveValU32,#2         ;// Sum = (Sum + 2) >> 2
+        VRSHR    dSumLeftValU32,dSumLeftValU32,#2           ;// Sum = (Sum + 2) >> 2
+        
+        VMOV     dIndexRow0U8,#0x0c                         
+        VMOV     dIndexRow4U8,#0x04
+        VSHL     dIndexRow0,dIndexRow0,#32                  ;// index0 = 0x0c0c0c0c00000000 
+        VSHR     dIndexRow4,dIndexRow4,#32                  ;// index4 = 0x0000000004040404
+        VADD     dIndexRow4U8,dIndexRow4U8,dIndexRow0U8     ;// index4 = 0x0c0c0c0c04040404
+        VTBL     dDstRow0,{dSumAboveLeftU8,dSumAboveValU8},dIndexRow0U8
+        VTBL     dDstRow4,{dSumLeftValU8,dSumAboveLeftU8},dIndexRow4U8
+ 
+DCChroma8x8LeftStore       
+        ADD     pTmp, pDst, dstStep
+        ADD     step, dstStep, dstStep
+        
+        VST1     dDstRow0,[pDst],step                    ;// pDst[0*dstStep+x] :0<= x <= 7
+        VST1     dDstRow0,[pTmp],step                    ;// pDst[1*dstStep+x] :0<= x <= 7
+        VST1     dDstRow0,[pDst],step                    ;// pDst[2*dstStep+x] :0<= x <= 7
+        VST1     dDstRow0,[pTmp],step                    ;// pDst[3*dstStep+x] :0<= x <= 7
+        VST1     dDstRow4,[pDst],step                    ;// pDst[4*dstStep+x] :0<= x <= 7
+        VST1     dDstRow4,[pTmp],step                    ;// pDst[5*dstStep+x] :0<= x <= 7
+        VST1     dDstRow4,[pDst],step                    ;// pDst[6*dstStep+x] :0<= x <= 7
+        VST1     dDstRow4,[pTmp]                         ;// pDst[7*dstStep+x] :0<= x <= 7
+
+        M_EXIT
+        
+
+DCChroma8x8LeftOnlyAvailable
+
+        MOV      return, #OMX_Sts_NoErr
+        
+        VPADDL   dSumLeftValU16, dLeftVal                   ;// pSrcLeft[ 6+7 | 4+5 | 2+3 | 0+1 ]             
+        VPADDL   dSumLeftValU32, dSumLeftValU16             ;// pSrcLeft[ 4+5+6+7 |  0+1+2+3 ]   
+        VRSHR    dSumLeftValU32,dSumLeftValU32,#2           ;// Sum = (Sum + 2) >> 2
+        
+        VDUP     dDstRow0,dSumLeftValU8[0]
+        VDUP     dDstRow4,dSumLeftValU8[4]
+        
+        B        DCChroma8x8LeftStore  
+        
+
+DCChroma8x8LeftNotAvailable
+                 
+        TST     availability, #OMX_VC_UPPER
+        BEQ     DCChroma8x8NoneAvailable
+
+        ;// Load Upper Edge 
+        VLD1     dAboveVal,[pSrcAbove]                      ;// pSrcAbove[0 to 7]  
+        MOV      return, #OMX_Sts_NoErr                     ;// returnNoError
+        
+        VPADDL   dSumAboveValU16, dAboveVal                 ;// pSrcAbove[ 6+7 | 4+5 | 2+3 | 0+1 ]             
+        VPADDL   dSumAboveValU32, dSumAboveValU16           ;// pSrcAbove[ 4+5+6+7 |  0+1+2+3 ] 
+        VRSHR    dSumAboveValU32,dSumAboveValU32,#2         ;// Sum = (Sum + 2) >> 2
+        VMOV     dIndexRow0U8,#0x04
+        VSHL     dIndexRow0,dIndexRow0,#32                  ;// index = 0x0404040400000000
+        VTBL     dDstRow0,{dSumAboveValU8},dIndexRow0U8 
+        
+        B        DCChroma8x8UpperStore
+        
+
+DCChroma8x8NoneAvailable        
+        
+        VMOV     dConst128U8,#0x80                          ;// 0x8080808080808080 if(count == 0)
+        MOV      return, #OMX_Sts_NoErr                     ;// returnNoError
+
+DCChroma8x8UpperStore        
+        
+        ADD     pTmp, pDst, dstStep
+        ADD     step, dstStep, dstStep
+        
+        VST1     dDstRow0,[pDst],step                    ;// pDst[0*dstStep+x] :0<= x <= 7
+        VST1     dDstRow0,[pTmp],step                    ;// pDst[1*dstStep+x] :0<= x <= 7
+        VST1     dDstRow0,[pDst],step                    ;// pDst[2*dstStep+x] :0<= x <= 7
+        VST1     dDstRow0,[pTmp],step                    ;// pDst[3*dstStep+x] :0<= x <= 7
+        VST1     dDstRow0,[pDst],step                    ;// pDst[4*dstStep+x] :0<= x <= 7
+        VST1     dDstRow0,[pTmp],step                    ;// pDst[5*dstStep+x] :0<= x <= 7
+        VST1     dDstRow0,[pDst],step                    ;// pDst[6*dstStep+x] :0<= x <= 7
+        VST1     dDstRow0,[pTmp]                         ;// pDst[7*dstStep+x] :0<= x <= 7
+        
+        M_EXIT
+
+
+OMX_VC_CHROMA_VERT
+        
+        VLD1     dAboveVal,[pSrcAbove]                      ;// pSrcAbove[x]      :0<= x <= 7   
+        MOV      return, #OMX_Sts_NoErr
+        
+        B        DCChroma8x8UpperStore
+        
+
+OMX_VC_CHROMA_HOR
+        
+        ADD     pTmp, pSrcLeft, leftStep
+        ADD     step, leftStep, leftStep
+        
+        VLD1    {dLeftVal0[]},[pSrcLeft],step           ;// pSrcLeft[0*leftStep]
+        VLD1    {dLeftVal1[]},[pTmp],step               ;// pSrcLeft[1*leftStep]
+        VLD1    {dLeftVal2[]},[pSrcLeft],step           ;// pSrcLeft[2*leftStep]
+        VLD1    {dLeftVal3[]},[pTmp],step               ;// pSrcLeft[3*leftStep]
+        VLD1    {dLeftVal4[]},[pSrcLeft],step           ;// pSrcLeft[4*leftStep]
+        VLD1    {dLeftVal5[]},[pTmp],step               ;// pSrcLeft[5*leftStep]
+        VLD1    {dLeftVal6[]},[pSrcLeft],step           ;// pSrcLeft[6*leftStep]
+        VLD1    {dLeftVal7[]},[pTmp]                    ;// pSrcLeft[7*leftStep]
+        
+        B        DCChroma8x8PlaneStore
+        
+        
+OMX_VC_CHROMA_PLANE
+        ADD     pTmp, pSrcLeft, leftStep
+        ADD     step, leftStep, leftStep
+        
+        VLD1    dAboveVal,[pSrcAbove]                       ;// pSrcAbove[x]      :0<= x <= 7   
+        VLD1    dAboveLeftVal[0],[pSrcAboveLeft]
+        
+        VLD1    {dLeftVal[0]},[pSrcLeft],step               ;// pSrcLeft[0*leftStep]
+        VLD1    {dLeftVal[1]},[pTmp],step                   ;// pSrcLeft[1*leftStep]
+        VLD1    {dLeftVal[2]},[pSrcLeft],step               ;// pSrcLeft[2*leftStep]
+        VLD1    {dLeftVal[3]},[pTmp],step                   ;// pSrcLeft[3*leftStep]
+        VLD1    {dLeftVal[4]},[pSrcLeft],step               ;// pSrcLeft[4*leftStep]
+        VLD1    {dLeftVal[5]},[pTmp],step                   ;// pSrcLeft[5*leftStep]
+        VLD1    {dLeftVal[6]},[pSrcLeft],step               ;// pSrcLeft[6*leftStep]
+        VLD1    {dLeftVal[7]},[pTmp]                        ;// pSrcLeft[7*leftStep] 
+        
+        
+        VREV64  dRevAboveVal,dAboveVal                      ;// Reverse order of bytes = pSrcAbove[0:1:2:3:4:5:6:7]
+        VSUBL   qAbove7minus0,dRevAboveVal,dAboveLeftVal    ;// qAbove7minus0[0] = pSrcAbove[7] - pSrcAboveLeft[0]
+        VSHR    dRevAboveValU64,dRevAboveValU64,#8          ;// pSrcAbove[X:0:1:2:3:4:5:6]
+        VSUBL   qAboveDiff,dRevAboveVal,dAboveVal           ;// pSrcAbove[6] - pSrcAbove[0]
+                                                            ;// pSrcAbove[5] - pSrcAbove[1]
+                                                            ;// pSrcAbove[4] - pSrcAbove[2]
+        
+        VREV64  dRevLeftVal,dLeftVal                        ;// Reverse order of bytes = pSrcLeft[0:1:2:3:4:5:6:7]
+        VSUBL   qLeft7minus0,dRevLeftVal,dAboveLeftVal      ;// qAbove7minus0[0] = pSrcLeft[7] - pSrcAboveLeft[0]
+        VSHR    dRevLeftValU64,dRevLeftValU64,#8            ;// pSrcLeft[X:0:1:2:3:4:5:6]
+        VSUBL   qLeftDiff,dRevLeftVal,dLeftVal              ;// pSrcLeft[6] - pSrcLeft[0]
+                                                            ;// pSrcLeft[5] - pSrcLeft[1]
+                                                            ;// pSrcLeft[4] - pSrcLeft[2]
+        
+        LDR     pMultiplierTable,=armVCM4P10_MultiplierTableChroma8x8   ;// Used to calculate Hval & Vval  
+        VSHL    dAboveDiff0U64,dAboveDiff0U64,#16  
+        VEXT    dDiffAboveU8,dAboveDiff0U8,dAbove7minus0U8,#2           ;// pSrcAbove[ 7-0 | 4-2 | 5-1 | 6-0 ]
+        VLD1    dMultiplier,[pMultiplierTable]! 
+        VSHL    dLeftDiff0U64,dLeftDiff0U64,#16  
+        VEXT    dDiffLeftU8,dLeftDiff0U8,dLeft7minus0U8,#2              ;// pSrcLeft[ 7-0 | 4-2 | 5-1 | 6-0 ]                                                   
+                                                                    
+        
+        VMUL    dHorPred,dDiffAboveS16,dMultiplier                      ;// pSrcAbove[ 4*(7-0) | 1*(4-2) | 2*(5-1) | 3*(6-0) ]
+        VMUL    dVerPred,dDiffLeftS16,dMultiplier
+        VPADD   dHVValS16,dHorPred,dVerPred
+        
+        
+        VPADDL  dHVValS32,dHVValS16                                     ;// [V|H] in 32 bits each
+        VSHL    dHVTempS32,dHVValS32,#4                                 ;// 17*H = 16*H + H = (H<<4)+H
+        VADD    dHVValS32,dHVValS32,dHVTempS32                          ;// [ 17*V  | 17*H ]in 32 bits each
+        VLD1    {dMultiplier0,dMultiplier1},[pMultiplierTable]          ;// qMultiplier = [ 4|3|2|1|0|-1|-2|-3 ]  
+        VRSHR   dHVValS32,dHVValS32,#5                                  ;// [c|b] in 16bits each
+        VADDL   qA,dAboveVal,dLeftVal
+        VDUP    qA,qA[7]
+        VSHL    qA,qA,#4                                                ;// [a|a|a|a|a|a|a|a]
+        VDUP    qB,dHVValS16[0]                                         ;// [b|b|b|b|b|b|b|b]
+        VDUP    qC,dHVValS16[2]                                         ;// [c|c|c|c|c|c|c|c]
+        
+        
+        VMUL    qB,qB,qMultiplier
+        VMUL    qC,qC,qMultiplier
+        VADD    qB,qB,qA 
+        
+        VDUP    qC0,qC[0]
+        VDUP    qC1,qC[1]
+        VDUP    qC2,qC[2]
+        VDUP    qC3,qC[3]
+        VDUP    qC4,qC[4]
+        VDUP    qC5,qC[5]
+        VDUP    qC6,qC[6]
+        VDUP    qC7,qC[7]
+        
+        VADD    qSum0,qB,qC0
+        VADD    qSum1,qB,qC1
+        VADD    qSum2,qB,qC2
+        VADD    qSum3,qB,qC3
+        VADD    qSum4,qB,qC4
+        VADD    qSum5,qB,qC5
+        VADD    qSum6,qB,qC6
+        VADD    qSum7,qB,qC7
+        
+        VQRSHRUN dSum0,qSum0,#5                         ;// (OMX_U8)armClip(0,255,(Sum+16)>>5)
+        VQRSHRUN dSum1,qSum1,#5
+        VQRSHRUN dSum2,qSum2,#5
+        VQRSHRUN dSum3,qSum3,#5
+        VQRSHRUN dSum4,qSum4,#5
+        VQRSHRUN dSum5,qSum5,#5
+        VQRSHRUN dSum6,qSum6,#5
+        VQRSHRUN dSum7,qSum7,#5      
+
+DCChroma8x8PlaneStore        
+        ADD     pTmp, pDst, dstStep
+        ADD     step, dstStep, dstStep
+        
+        VST1    dSum0,[pDst],step                    ;// pDst[0*dstStep+x] :0<= x <= 7
+        VST1    dSum1,[pTmp],step                    ;// pDst[1*dstStep+x] :0<= x <= 7
+        VST1    dSum2,[pDst],step                    ;// pDst[2*dstStep+x] :0<= x <= 7
+        VST1    dSum3,[pTmp],step                    ;// pDst[3*dstStep+x] :0<= x <= 7
+        VST1    dSum4,[pDst],step                    ;// pDst[4*dstStep+x] :0<= x <= 7
+        VST1    dSum5,[pTmp],step                    ;// pDst[5*dstStep+x] :0<= x <= 7
+        VST1    dSum6,[pDst],step                    ;// pDst[6*dstStep+x] :0<= x <= 7
+        VST1    dSum7,[pTmp]                         ;// pDst[7*dstStep+x] :0<= x <= 7       
+        
+        MOV     return, #OMX_Sts_NoErr
+        M_END
+        
+        ENDIF ;// CortexA8
+        
+        END
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntraChroma_8x8 ends
+;//-----------------------------------------------------------------------------------------------
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
new file mode 100755
index 0000000..e9c0eee
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
@@ -0,0 +1,424 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_PredictIntra_16x16_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS CortexA8
+     
+  
+;//-------------------------------------------------------
+;// This table for implementing switch case of C in asm by
+;// the mehtod of two levels of indexing.
+;//-------------------------------------------------------
+
+    M_TABLE armVCM4P10_pIndexTable16x16
+    DCD  OMX_VC_16X16_VERT, OMX_VC_16X16_HOR 
+    DCD  OMX_VC_16X16_DC,   OMX_VC_16X16_PLANE
+    
+
+    IF CortexA8
+
+    M_TABLE armVCM4P10_MultiplierTable16x16,1
+    DCW   7,  6,  5,  4,  3,  2,  1,  8 
+    DCW   0,  1,  2,  3,  4,  5,  6,  7
+    DCW   8,  9, 10, 11, 12, 13, 14, 15
+        
+;//--------------------------------------------
+;// Constants 
+;//--------------------------------------------  
+BLK_SIZE        EQU 0x10
+MUL_CONST0      EQU 0x01010101
+MUL_CONST1      EQU 0x00060004
+MUL_CONST2      EQU 0x00070005
+MUL_CONST3      EQU 0x00030001
+MASK_CONST      EQU 0x00FF00FF
+
+;//--------------------------------------------
+;// Scratch variable
+;//--------------------------------------------
+y               RN 12   
+pc              RN 15   
+
+return          RN 0    
+pTable          RN 9    
+count           RN 11   
+pMultTable      RN 9
+; ----------------------------------------------
+; Neon registers
+; ----------------------------------------------
+qAbove          QN Q0.U8
+qLeft           QN Q1.U8
+qSum8           QN Q0.U16
+dSum80          DN D0.U16
+dSum81          DN D1.U16
+dSum4           DN D0.U16
+dSum2           DN D0.U32
+dSum1           DN D0.U64
+qOut            QN Q3.U8
+dSumLeft        DN D6.U64
+dSumAbove       DN D7.U64
+dSum            DN D8.U64
+dSum0           DN D8.U8[0]
+
+qH              QN Q11.S32
+qV              QN Q12.S32
+qA              QN Q11.S16
+qB              QN Q6.S16
+qC              QN Q7.S16
+
+qB0             QN Q5.S16
+qB1             QN Q6.S16
+dA1             DN D23.S16
+
+dH0             DN D22.S32
+dH1             DN D23.S32
+dV0             DN D24.S32
+dV1             DN D25.S32
+
+qHV             QN Q11.S64
+qHV0            QN Q11.S32
+qHV1            QN Q12.S64
+
+dHV00           DN D22.S32
+dHV01           DN D23.S32
+
+dHV0            DN D22.S16[0]
+dHV1            DN D23.S16[0]
+dHV10           DN D24.S64
+dHV11           DN D25.S64
+
+qSum0           QN Q0.S16
+qSum1           QN Q1.S16
+
+dOut0           DN D6.U8
+dOut1           DN D7.U8
+
+dLeft0          DN D2.U8
+dLeft1          DN D3.U8
+qConst          QN Q13.S16
+
+dAbove0         DN D0.U8
+dAbove1         DN D1.U8
+
+dRevLeft64      DN D12.U64
+dRevLeft        DN D12.U8
+dRevAbove64     DN D5.U64
+dRevAbove       DN D5.U8
+qLeftDiff       QN Q8.S16
+dLeftDiff1      DN D17.S16
+dLeftDiff64     DN D17.S64
+qDiffLeft       QN Q8.S16
+qDiffAbove      QN Q4.S16
+dAboveDiff1     DN D9.S16
+dAboveDiff64    DN D9.S64
+qAboveDiff      QN Q4.S16
+
+dAboveLeft      DN D4.U8
+
+dDiffLeft0      DN D16.S16
+dDiffLeft1      DN D17.S16
+dDiffAbove0     DN D8.S16
+dDiffAbove1     DN D9.S16
+
+qLeft15minus0   QN Q7.S16
+dLeft15minus0   DN D14.S16
+qAbove15minus0  QN Q3.S16
+dAbove15minus0  DN D6.S16
+
+qMultiplier     QN Q10.S16
+qMultiplier0    QN Q10.S16
+qMultiplier1    QN Q12.S16
+dMultiplier0    DN D20.S16
+dMultiplier1    DN D21.S16
+
+dBPlusCMult7    DN D1.S64
+dBPlusCMult7S16 DN D1.S16
+
+qTmp            QN Q0.U8
+
+;//--------------------------------------------
+;// Declare input registers
+;//--------------------------------------------
+pSrcLeft        RN 0    ;// input pointer
+pSrcAbove       RN 1    ;// input pointer
+pSrcAboveLeft   RN 2    ;// input pointer
+pDst            RN 3    ;// output pointer
+leftStep        RN 4    ;// input variable
+dstStep         RN 5    ;// input variable
+predMode        RN 6    ;// input variable
+availability    RN 7    ;// input variable
+
+pTmp            RN 8
+step            RN 10
+pTmp2           RN 11
+
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntra_16x16 starts
+;//-----------------------------------------------------------------------------------------------
+        
+        ;// Write function header
+        M_START omxVCM4P10_PredictIntra_16x16, r11, d15
+        
+        ;// Define stack arguments
+        M_ARG    LeftStep,     4
+        M_ARG    DstStep,      4
+        M_ARG    PredMode,     4
+        M_ARG    Availability, 4
+        
+        ;// M_STALL ARM1136JS=4
+        
+        LDR      pTable,=armVCM4P10_pIndexTable16x16 ;// Load index table for switch case
+        
+        ;// Load argument from the stack
+        M_LDR    predMode, PredMode                  ;// Arg predMode loaded from stack to reg 
+        M_LDR    leftStep, LeftStep                  ;// Arg leftStep loaded from stack to reg 
+        M_LDR    dstStep,  DstStep                   ;// Arg dstStep loaded from stack to reg         
+        M_LDR    availability, Availability          ;// Arg availability loaded from stack to reg
+        
+        MOV      y, #BLK_SIZE                        ;// Outer Loop Count
+        LDR      pc, [pTable, predMode, LSL #2]      ;// Branch to the case based on preMode
+        
+OMX_VC_16X16_VERT
+        VLD1    qAbove,  [pSrcAbove]
+        ADD     pTmp, pDst, dstStep
+        ADD     step, dstStep, dstStep
+        VST1    qAbove, [pDst], step
+        VST1    qAbove, [pTmp], step
+        VST1    qAbove, [pDst], step
+        VST1    qAbove, [pTmp], step
+        VST1    qAbove, [pDst], step
+        VST1    qAbove, [pTmp], step
+        VST1    qAbove, [pDst], step
+        VST1    qAbove, [pTmp], step
+        VST1    qAbove, [pDst], step
+        VST1    qAbove, [pTmp], step
+        VST1    qAbove, [pDst], step
+        VST1    qAbove, [pTmp], step
+        VST1    qAbove, [pDst], step
+        VST1    qAbove, [pTmp], step
+        VST1    qAbove, [pDst]
+        VST1    qAbove, [pTmp]
+        MOV     return, #OMX_Sts_NoErr               ;// returnNoError
+        M_EXIT
+        
+OMX_VC_16X16_HOR
+        ADD     pTmp, pSrcLeft, leftStep
+        ADD     leftStep, leftStep, leftStep
+        ADD     pTmp2, pDst, dstStep
+        ADD     dstStep, dstStep, dstStep
+LoopHor 
+        VLD1     {qLeft[]}, [pSrcLeft], leftStep       
+        VLD1     {qTmp[]}, [pTmp], leftStep       
+        SUBS     y, y, #8
+        VST1     qLeft, [pDst], dstStep
+        VST1     qTmp, [pTmp2], dstStep
+        VLD1     {qLeft[]}, [pSrcLeft], leftStep       
+        VLD1     {qTmp[]}, [pTmp], leftStep       
+        VST1     qLeft, [pDst], dstStep
+        VST1     qTmp, [pTmp2], dstStep
+        VLD1     {qLeft[]}, [pSrcLeft], leftStep       
+        VLD1     {qTmp[]}, [pTmp], leftStep       
+        VST1     qLeft, [pDst], dstStep
+        VST1     qTmp, [pTmp2], dstStep
+        VLD1     {qLeft[]}, [pSrcLeft], leftStep       
+        VLD1     {qTmp[]}, [pTmp], leftStep       
+        VST1     qLeft, [pDst], dstStep
+        VST1     qTmp, [pTmp2], dstStep
+        
+        BNE      LoopHor                                  ;// Loop for 16 times
+        MOV      return, #OMX_Sts_NoErr
+        M_EXIT
+        
+OMX_VC_16X16_DC
+        MOV      count, #0                                 ;// count = 0
+        TST      availability, #OMX_VC_LEFT
+        BEQ      UpperOrNoneAvailable                      ;// Jump to Upper if not left
+
+        ADD     pTmp, pSrcLeft, leftStep
+        ADD     step, leftStep, leftStep
+
+        VLD1    {qLeft[0]}, [pSrcLeft],step    
+        VLD1    {qLeft[1]}, [pTmp],step   
+        VLD1    {qLeft[2]}, [pSrcLeft],step   
+        VLD1    {qLeft[3]}, [pTmp],step
+        VLD1    {qLeft[4]}, [pSrcLeft],step   
+        VLD1    {qLeft[5]}, [pTmp],step   
+        VLD1    {qLeft[6]}, [pSrcLeft],step    
+        VLD1    {qLeft[7]}, [pTmp],step
+        VLD1    {qLeft[8]}, [pSrcLeft],step    
+        VLD1    {qLeft[9]}, [pTmp],step   
+        VLD1    {qLeft[10]},[pSrcLeft],step   
+        VLD1    {qLeft[11]},[pTmp],step    
+        VLD1    {qLeft[12]},[pSrcLeft],step   
+        VLD1    {qLeft[13]},[pTmp],step   
+        VLD1    {qLeft[14]},[pSrcLeft],step    
+        VLD1    {qLeft[15]},[pTmp] 
+        
+        VPADDL   qSum8, qLeft
+        ADD     count, count, #1    
+        VPADD    dSum4, dSum80, dSum81
+        VPADDL   dSum2, dSum4
+        VPADDL   dSumLeft, dSum2
+        VRSHR    dSum, dSumLeft, #4
+        
+UpperOrNoneAvailable
+        TST      availability,  #OMX_VC_UPPER              ;// if(availability & #OMX_VC_UPPER)
+        BEQ      BothOrNoneAvailable                       ;// Jump to Left if not upper
+        VLD1     qAbove, [pSrcAbove]
+        ADD      count, count, #1                          ;// if upper inc count by 1
+        VPADDL   qSum8, qAbove
+        VPADD    dSum4, dSum80, dSum81
+        VPADDL   dSum2, dSum4
+        VPADDL   dSumAbove, dSum2
+        VRSHR    dSum, dSumAbove, #4
+        
+BothOrNoneAvailable
+        CMP      count, #2                                  ;// check if both available
+        BNE      NoneAvailable
+        VADD     dSum, dSumAbove, dSumLeft
+        VRSHR    dSum, dSum, #5
+        
+
+NoneAvailable
+        VDUP     qOut, dSum0        
+        CMP      count, #0                                  ;// check if none available
+        ADD      pTmp, pDst, dstStep
+        ADD      step, dstStep, dstStep
+        BNE      LoopDC
+        VMOV     qOut, #128
+LoopDC        
+        VST1    qOut, [pDst], step
+        VST1    qOut, [pTmp], step
+        VST1    qOut, [pDst], step
+        VST1    qOut, [pTmp], step
+        VST1    qOut, [pDst], step
+        VST1    qOut, [pTmp], step
+        VST1    qOut, [pDst], step
+        VST1    qOut, [pTmp], step
+        VST1    qOut, [pDst], step
+        VST1    qOut, [pTmp], step
+        VST1    qOut, [pDst], step
+        VST1    qOut, [pTmp], step
+        VST1    qOut, [pDst], step
+        VST1    qOut, [pTmp], step
+        VST1    qOut, [pDst], step
+        VST1    qOut, [pTmp], step
+        MOV     return, #OMX_Sts_NoErr
+        M_EXIT
+
+OMX_VC_16X16_PLANE
+        LDR     pMultTable, =armVCM4P10_MultiplierTable16x16
+        VLD1    qAbove, [pSrcAbove]                         ;// pSrcAbove[x]      :0<= x <= 7    
+        VLD1    dAboveLeft[0],[pSrcAboveLeft]                                               
+        ADD     pTmp, pSrcLeft, leftStep
+        ADD     step, leftStep, leftStep
+        VLD1    {qLeft[0]},  [pSrcLeft],step                                             
+        VLD1    {qLeft[1]},  [pTmp],step      
+        VLD1    {qLeft[2]},  [pSrcLeft],step  
+        VLD1    {qLeft[3]},  [pTmp],step       
+        VLD1    {qLeft[4]},  [pSrcLeft],step  
+        VLD1    {qLeft[5]},  [pTmp],step      
+        VLD1    {qLeft[6]},  [pSrcLeft],step   
+        VLD1    {qLeft[7]},  [pTmp],step
+        VLD1    {qLeft[8]},  [pSrcLeft],step   
+        VLD1    {qLeft[9]},  [pTmp],step      
+        VLD1    {qLeft[10]}, [pSrcLeft],step  
+        VLD1    {qLeft[11]}, [pTmp],step       
+        VLD1    {qLeft[12]}, [pSrcLeft],step  
+        VLD1    {qLeft[13]}, [pTmp],step      
+        VLD1    {qLeft[14]}, [pSrcLeft],step   
+        VLD1    {qLeft[15]}, [pTmp]   
+
+        VREV64  dRevAbove, dAbove1                          ;// pSrcAbove[15:14:13:12:11:10:9:8] 
+        VSUBL   qAbove15minus0, dRevAbove, dAboveLeft       ;// qAbove7minus0[0] = pSrcAbove[15] - pSrcAboveLeft[0] 
+        VSHR    dRevAbove64, dRevAbove64, #8                ;// pSrcAbove[14:13:12:11:10:9:8:X] 
+        VSUBL   qAboveDiff, dRevAbove, dAbove0              
+        
+        VSHL    dAboveDiff64, dAboveDiff64, #16 
+        VEXT    dDiffAbove1, dAboveDiff1, dAbove15minus0, #1  
+
+        VREV64  dRevLeft,dLeft1                             ;// pSrcLeft[15:14:13:12:11:10:9:8] 
+        VSUBL   qLeft15minus0,dRevLeft, dAboveLeft          ;// qAbove7minus0[0] = pSrcLeft[7] - pSrcAboveLeft[0] 
+        VSHR    dRevLeft64, dRevLeft64, #8                  ;// pSrcLeft[14:13:12:11:10:9:8:X] 
+        VSUBL   qLeftDiff,dRevLeft, dLeft0                  
+        
+        ;// Multiplier = [8|1|2|...|6|7]
+        VLD1    qMultiplier, [pMultTable]!                  
+        
+        VSHL    dLeftDiff64, dLeftDiff64, #16
+        VEXT    dDiffLeft1, dLeftDiff1, dLeft15minus0, #1     
+        
+        VMULL   qH,dDiffAbove0, dMultiplier0                
+        VMULL   qV,dDiffLeft0,  dMultiplier0                
+        VMLAL   qH,dDiffAbove1, dMultiplier1 
+        VMLAL   qV,dDiffLeft1,  dMultiplier1
+        
+        VPADD   dHV00,dH1,dH0                                 
+        VPADD   dHV01,dV1,dV0                                 
+        VPADDL  qHV, qHV0
+        VSHL    qHV1,qHV,#2
+        VADD    qHV,qHV,qHV1 
+        
+        ;// HV = [c = ((5*V+32)>>6) | b = ((5*H+32)>>6)]
+        VRSHR   qHV,qHV,#6
+        
+        ;// HV1 = [c*7|b*7]
+        VSHL    qHV1,qHV,#3
+        VSUB    qHV1,qHV1,qHV                             
+        
+        ;// Multiplier1 = [0|1|2|...|7]
+        VLD1    qMultiplier0, [pMultTable]!    
+        VDUP    qB, dHV0                                  
+        VDUP    qC, dHV1 
+        
+        VADDL   qA,dAbove1,dLeft1
+        VSHL    qA,qA, #4
+        VDUP    qA,dA1[3]  
+        VADD    dBPlusCMult7, dHV10, dHV11
+        
+        ;// Multiplier1 = [8|9|10|...|15]
+        VLD1    qMultiplier1, [pMultTable]
+        ;// Const = a - 7*(b+c)
+        VDUP    qConst, dBPlusCMult7S16[0]
+        VSUB    qConst, qA, qConst
+        
+        ;// B0 = [0*b|1*b|2*b|3*b|......|7*b]
+        VMUL    qB0,qB,qMultiplier0
+        
+        ;// B0 = [8*b|9*b|10*b|11*b|....|15*b]
+        VMUL    qB1,qB,qMultiplier1
+        
+        VADD    qSum0, qB0, qConst
+        VADD    qSum1, qB1, qConst  
+        
+        ;// Loops for 16 times
+LoopPlane       
+        ;// (b*x + c*y + C)>>5
+        VQRSHRUN dOut0, qSum0,#5
+        VQRSHRUN dOut1, qSum1,#5      
+        SUBS     y, y, #1
+        VST1     qOut,[pDst],dstStep
+        VADD     qSum0,qSum0,qC 
+        VADD     qSum1,qSum1,qC 
+        BNE      LoopPlane
+        
+        MOV      return, #OMX_Sts_NoErr
+
+        M_END
+        
+        ENDIF ;// CortexA8
+            
+        END
+;-----------------------------------------------------------------------------------------------
+; omxVCM4P10_PredictIntra_16x16 ends
+;-----------------------------------------------------------------------------------------------
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
new file mode 100755
index 0000000..39eb8a4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
@@ -0,0 +1,531 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_PredictIntra_4x4_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+;// Define the processor variants supported by this file
+         
+         M_VARIANTS CortexA8
+        
+;//-------------------------------------------------------
+;// This table for implementing switch case of C in asm by
+;// the mehtod of two levels of indexing.
+;//-------------------------------------------------------
+
+    M_TABLE armVCM4P10_pSwitchTable4x4
+    DCD  OMX_VC_4x4_VERT,     OMX_VC_4x4_HOR 
+    DCD  OMX_VC_4x4_DC,       OMX_VC_4x4_DIAG_DL
+    DCD  OMX_VC_4x4_DIAG_DR,  OMX_VC_4x4_VR
+    DCD  OMX_VC_4x4_HD,       OMX_VC_4x4_VL
+    DCD  OMX_VC_4x4_HU   
+    
+        
+        IF CortexA8
+        
+;//--------------------------------------------
+;// Scratch variable
+;//--------------------------------------------
+return          RN 0
+pTable          RN 8
+pc              RN 15
+
+;//--------------------------------------------
+;// Declare input registers
+;//--------------------------------------------
+pSrcLeft        RN 0    ;// input pointer
+pSrcAbove       RN 1    ;// input pointer
+pSrcAboveLeft   RN 2    ;// input pointer
+pDst            RN 3    ;// output pointer
+leftStep        RN 4    ;// input variable
+dstStep         RN 5    ;// input variable
+predMode        RN 6    ;// input variable
+availability    RN 7    ;// input variable
+pDst1           RN 1 
+pDst2           RN 4 
+pDst3           RN 6 
+
+pSrcTmp         RN 9
+srcStep         RN 10
+pDstTmp         RN 11
+dstep           RN 12
+
+;//-------------------
+;// Neon registers
+;//-------------------
+
+;// OMX_VC_CHROMA_VERT
+dAboveU32       DN  D0.U32
+
+;// OMX_VC_CHROMA_HOR
+dLeftVal0       DN  D0.8
+dLeftVal1       DN  D1.8
+dLeftVal2       DN  D2.8
+dLeftVal3       DN  D3.8
+dLeftVal0U32    DN  D0.U32
+dLeftVal1U32    DN  D1.U32
+dLeftVal2U32    DN  D2.U32
+dLeftVal3U32    DN  D3.U32
+
+;// OMX_VC_4x4_DC
+dLeftVal        DN  D0.U8
+dLeftValU32     DN  D0.U32
+dSumAboveLeftU16  DN  D1.U16
+dSumAboveLeftU32  DN  D1.U32
+dSumAboveLeftU64  DN  D1.U64
+dSumAboveLeftU8 DN  D1.U8
+dSum            DN  D0.U8
+
+dSumLeftValU16  DN  D1.U16
+dSumLeftValU32  DN  D1.U32
+dSumLeftValU64  DN  D1.U64
+dSumLeftValU8   DN  D1.U8
+
+dAboveVal       DN  D0.U8
+dSumAboveValU16  DN  D1.U16
+dSumAboveValU32  DN  D1.U32
+dSumAboveValU64  DN  D1.U64
+dSumAboveValU8   DN  D1.U8
+dConst128U8     DN  D0.U8
+
+
+;//OMX_VC_4x4_DIAG_DL
+
+dAbove          DN  D0.U8
+dU7             DN  D2.U8
+dU3             DN  D2.U8
+dAbove0         DN  D3.U8
+dAbove1         DN  D4.U8
+dAbove2         DN  D5.U8
+dTmp            DN  D6.U8
+dTmp0           DN  D7.U8
+dTmp1           DN  D8.U8
+dTmp2            DN  D9.U8
+dTmp3            DN  D10.U8
+dTmpU32         DN  D6.U32
+
+
+;//OMX_VC_4x4_DIAG_DR
+dLeft           DN  D1.U8
+dUL             DN  D2.U8
+
+;//OMX_VC_4x4_VR
+dLeft0          DN  D1.U8
+dLeft1          DN  D2.U8
+dEven0          DN  D3.U8
+dEven1          DN  D4.U8
+dEven2          DN  D5.U8
+dOdd0           DN  D6.U8
+dOdd1           DN  D11.U8
+dOdd2           DN  D12.U8
+dTmp3U32        DN  D10.U32    
+dTmp2U32        DN  D9.U32
+
+
+;//OMX_VC_4x4_HD
+dTmp1U64        DN  D8.U64
+dTmp0U64        DN  D7.U64
+dTmpU64         DN  D6.U64
+dTmpU32         DN  D6.U32
+dTmp1U32        DN  D8.U32
+
+;//OMX_VC_4x4_HU
+dL3             DN  D2.U8
+dLeftHU0        DN  D3.U8
+dLeftHU1        DN  D4.U8
+dLeftHU2        DN  D5.U8
+dTmp0U32        DN  D7.U32
+
+
+
+
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntra_4x4 starts
+;//-----------------------------------------------------------------------------------------------
+        
+        ;// Write function header
+        M_START omxVCM4P10_PredictIntra_4x4, r12,d12
+        
+        ;// Define stack arguments
+        M_ARG    LeftStep,     4
+        M_ARG    DstStep,      4
+        M_ARG    PredMode,     4
+        M_ARG    Availability, 4
+        
+                
+        LDR      pTable,=armVCM4P10_pSwitchTable4x4  ;// Load index table for switch case
+        
+        ;// Load argument from the stack
+        M_LDRD   predMode,availability,PredMode     ;// Arg predMode & availability loaded from stack to reg 
+        M_LDRD   leftStep,dstStep,LeftStep          ;// Arg leftStep & dstStep loaded from stack to reg 
+        
+        
+        LDR      pc, [pTable, predMode, LSL #2]      ;// Branch to the case based on preMode
+
+
+OMX_VC_4x4_HOR
+        
+        ADD     pSrcTmp, pSrcLeft, leftStep
+        ADD     srcStep, leftStep, leftStep
+        ;// Load Left Edge
+        VLD1    {dLeftVal0[]},[pSrcLeft],srcStep           ;// pSrcLeft[0*leftStep]
+        VLD1    {dLeftVal1[]},[pSrcTmp],srcStep            ;//    pSrcLeft[1*leftStep]
+        VLD1    {dLeftVal2[]},[pSrcLeft]                   ;//    pSrcLeft[2*leftStep]
+        VLD1    {dLeftVal3[]},[pSrcTmp]                    ;//    pSrcLeft[3*leftStep]
+        
+        ADD     pDstTmp, pDst, dstStep
+        ADD     dstep, dstStep, dstStep
+        
+        VST1    dLeftVal0U32[0],[pDst],dstep                ;// pDst[0*dstStep+x] :0<= x <= 7
+        VST1    dLeftVal1U32[0],[pDstTmp],dstep             ;// pDst[1*dstStep+x] :0<= x <= 7
+        VST1    dLeftVal2U32[0],[pDst]                      ;// pDst[2*dstStep+x] :0<= x <= 7
+        VST1    dLeftVal3U32[0],[pDstTmp]                   ;// pDst[3*dstStep+x] :0<= x <= 7
+        
+        B        ExitPredict4x4                             ;// Branch to exit code
+        
+OMX_VC_4x4_VERT
+        
+        ;// Load Upper Edge
+        VLD1     dAboveU32[0],[pSrcAbove]
+        ADD     pDstTmp, pDst, dstStep
+        ADD     dstep, dstStep, dstStep
+        
+DCPredict4x4VertStore         
+        
+        VST1     dAboveU32[0],[pDst],dstep
+        VST1     dAboveU32[0],[pDstTmp],dstep
+        VST1     dAboveU32[0],[pDst]
+        VST1     dAboveU32[0],[pDstTmp]
+
+        B        ExitPredict4x4                             ;// Branch to exit code
+
+OMX_VC_4x4_DC
+        
+        
+        TST     availability, #OMX_VC_LEFT
+        BEQ     DCPredict4x4LeftNotAvailable
+
+        ADD     pSrcTmp, pSrcLeft, leftStep
+        ADD     srcStep, leftStep, leftStep
+        ;// Load Left Edge
+        VLD1    {dLeftVal[0]},[pSrcLeft],srcStep            ;// pSrcLeft[0*leftStep]
+        VLD1    {dLeftVal[1]},[pSrcTmp],srcStep             ;//    pSrcLeft[1*leftStep]
+        VLD1    {dLeftVal[2]},[pSrcLeft]                    ;//    pSrcLeft[2*leftStep]
+        VLD1    {dLeftVal[3]},[pSrcTmp]                     ;//    pSrcLeft[3*leftStep]
+        
+        TST     availability, #OMX_VC_UPPER
+        BEQ     DCPredict4x4LeftOnlyAvailable
+
+        ;// Load Upper Edge also
+        VLD1     dLeftValU32[1],[pSrcAbove]                 ;// pSrcAbove[0 to 3]
+        MOV      return, #OMX_Sts_NoErr
+        
+        VPADDL   dSumAboveLeftU16, dLeftVal                 ;// [pSrcAbove[2+3 | 0+1] | pSrcLeft[2+3 | 0+1]]             
+        VPADDL   dSumAboveLeftU32, dSumAboveLeftU16         ;// [pSrcAbove[2+3+0+1] | pSrcLeft[2+3+0+1]] 
+        VPADDL   dSumAboveLeftU64, dSumAboveLeftU32         ;// [pSrcAbove[2+3+0+1] + pSrcLeft[2+3+0+1]]                          
+        VRSHR    dSumAboveLeftU64,dSumAboveLeftU64,#3       ;// Sum = (Sum + 4) >> 3
+        ADD     pDstTmp, pDst, dstStep
+        ADD     dstep, dstStep, dstStep
+        VDUP     dSum,dSumAboveLeftU8[0]
+        
+        B        DCPredict4x4VertStore  
+        
+DCPredict4x4LeftOnlyAvailable
+
+        MOV      return, #OMX_Sts_NoErr                     ;// returnNoError
+        
+        VPADDL   dSumLeftValU16, dLeftVal                   ;// [ XX | pSrcLeft[2+3 | 0+1]]             
+        VPADDL   dSumLeftValU32, dSumLeftValU16             ;// [ XXXX | pSrcLeft[2+3+0+1]] 
+        
+        VRSHR    dSumLeftValU32,dSumLeftValU32,#2           ;// Sum = (Sum + 2) >> 2
+        ADD     pDstTmp, pDst, dstStep
+        ADD     dstep, dstStep, dstStep
+        VDUP     dSum,dSumLeftValU8[0]
+        
+        B        DCPredict4x4VertStore   
+        
+DCPredict4x4LeftNotAvailable
+                 
+        TST     availability, #OMX_VC_UPPER
+        BEQ     DCPredict4x4NoneAvailable
+
+        ;// Load Upper Edge 
+        VLD1     dAboveU32[0],[pSrcAbove]                   ;// pSrcAbove[0 to 3]  
+        MOV      return, #OMX_Sts_NoErr
+        
+        VPADDL   dSumAboveValU16, dAboveVal                 ;// [ XX | pSrcAbove[2+3 | 0+1]]             
+        VPADDL   dSumAboveValU32, dSumAboveValU16           ;// [ XXXX | pSrcAbove[2+3+0+1]] 
+        
+        VRSHR    dSumAboveValU32,dSumAboveValU32,#2         ;// Sum = (Sum + 2) >> 2
+        ADD     pDstTmp, pDst, dstStep
+        ADD     dstep, dstStep, dstStep
+        VDUP     dSum,dSumAboveValU8[0]
+        
+        B        DCPredict4x4VertStore   
+        
+DCPredict4x4NoneAvailable        
+        
+        VMOV     dConst128U8,#0x80                          ;// 0x8080808080808080 if(count == 0)
+        MOV      return, #OMX_Sts_NoErr
+        
+        ADD     pDstTmp, pDst, dstStep
+        ADD     dstep, dstStep, dstStep
+        B        DCPredict4x4VertStore   
+        
+        
+        
+OMX_VC_4x4_DIAG_DL
+        
+        TST     availability, #OMX_VC_UPPER_RIGHT
+        BEQ     DiagDLUpperRightNotAvailable
+       
+        VLD1    dAbove0,[pSrcAbove]                     ;// [U7|U6|U5|U4|U3|U2|U1|U0] 
+        VDUP    dU7, dAbove0[7]                         ;// [U7|U7|U7|U7|U7|U7|U7|U7]
+        VEXT    dAbove1, dAbove0, dU7, #1               ;// [U7|U7|U6|U5|U4|U3|U2|U1]
+        VEXT    dAbove2, dAbove0, dU7, #2               ;// [U7|U7|U7|U6|U5|U4|U3|U2] 
+        B       DiagDLPredict4x4Store         
+       
+DiagDLUpperRightNotAvailable
+        VLD1    dAboveU32[1],[pSrcAbove]                ;// [U3|U2|U1|U0|-|-|-|-] 
+        VDUP    dU3, dAbove[7]                          ;// [U3 U3 U3 U3 U3 U3 U3 U3]
+
+        VEXT    dAbove0, dAbove, dU3, #4                ;// [U3 U3 U3 U3 U3 U2 U1 U0]
+        VEXT    dAbove1, dAbove, dU3, #5                ;// [U3 U3 U3 U3 U3 U3 U2 U1]
+        VEXT    dAbove2, dAbove, dU3, #6                ;// [U3 U3 U3 U3 U3 U3 U3 U2]
+       
+DiagDLPredict4x4Store  
+        
+        VHADD   dTmp, dAbove0, dAbove2
+        VRHADD  dTmp, dTmp, dAbove1                     ;// (a+2*b+c+2)>>2
+        
+
+        VST1    dTmpU32[0],[pDst],dstStep
+        VEXT    dTmp,dTmp,dTmp,#1
+        VST1    dTmpU32[0],[pDst],dstStep
+        VEXT    dTmp,dTmp,dTmp,#1
+        VST1    dTmpU32[0],[pDst],dstStep
+        VEXT    dTmp,dTmp,dTmp,#1
+        VST1    dTmpU32[0],[pDst]
+        
+        B        ExitPredict4x4                         ;// Branch to exit code
+        
+
+OMX_VC_4x4_DIAG_DR
+        
+        
+        ;// Load U0,U1,U2,U3
+        
+        VLD1    dAboveU32[0],[pSrcAbove]                ;// [X|X|X|X|U3|U2|U1|U0]
+                
+        ;// Load UL,L0,L1,L2,L3                         ;// dLeft = [UL|L0|L1|L2|L3|X|X|X]    
+        VLD1    {dLeft[7]},[pSrcAboveLeft]              
+        ADD     pSrcTmp, pSrcLeft, leftStep
+        ADD     srcStep, leftStep, leftStep
+        ADD     pDst1,pDst,dstStep
+        
+        VLD1    {dLeft[6]},[pSrcLeft],srcStep           ;// pSrcLeft[0*leftStep]
+        VLD1    {dLeft[5]},[pSrcTmp],srcStep            ;// pSrcLeft[1*leftStep]
+        VLD1    {dLeft[4]},[pSrcLeft]                   ;// pSrcLeft[2*leftStep]
+        VLD1    {dLeft[3]},[pSrcTmp]                    ;// pSrcLeft[3*leftStep]
+        
+        
+        VEXT    dAbove0,dLeft,dAbove,#3                 ;// [U2|U1|U0|UL|L0|L1|L2|L3]   
+        ADD     pDst2,pDst1,dstStep
+        VEXT    dAbove1,dLeft,dAbove,#4                 ;// [U3|U2|U1|U0|UL|L0|L1|L2]   
+        ADD     pDst3,pDst2,dstStep
+        VEXT    dAbove2,dLeft,dAbove,#5                 ;// [ X|U3|U2|U1|U0|UL|L0|L1]   
+        
+        VHADD   dTmp, dAbove0, dAbove2
+        VRHADD  dTmp, dTmp, dAbove1                     ;// (a+2*b+c+2)>>2
+        
+        
+        VST1    dTmpU32[0],[pDst3]                      ;// Store pTmp[0],[1],[2],[3] @ pDst3
+        VEXT    dTmp,dTmp,dTmp,#1
+        VST1    dTmpU32[0],[pDst2]                      ;// Store pTmp[1],[2],[3],[4] @ pDst2
+        VEXT    dTmp,dTmp,dTmp,#1
+        VST1    dTmpU32[0],[pDst1]                      ;// Store pTmp[2],[3],[4],[5] @ pDst1
+        VEXT    dTmp,dTmp,dTmp,#1
+        VST1    dTmpU32[0],[pDst]                       ;// Store pTmp[3],[4],[5],[6] @ pDst
+        
+        B        ExitPredict4x4                         ;// Branch to exit code
+
+OMX_VC_4x4_VR
+
+        
+        ;// Load UL,U0,U1,U2,U3
+        VLD1    dAboveU32[0],[pSrcAbove]
+        VLD1    dAbove[7],[pSrcAboveLeft]               ;// [UL|X|X|X|U3|U2|U1|U0]
+        
+        ;// Load L0,L1,L2                               ;// dLeft0 = [L0|L2|X|X|X|X|X|X]
+                                                        ;// dLeft1 = [L1| X|X|X|X|X|X|X]    
+        VLD1    {dLeft0[7]},[pSrcLeft],leftStep         ;// pSrcLeft[0*leftStep]
+        VLD1    {dLeft1[7]},[pSrcLeft],leftStep         ;// pSrcLeft[1*leftStep]
+        VLD1    {dLeft0[6]},[pSrcLeft]                  ;// pSrcLeft[2*leftStep]
+        
+        
+        VEXT    dOdd2,dAbove,dAbove,#7                  ;// [ x x x U3 U2 U1 U0 UL ]
+        VEXT    dEven0,dLeft0,dOdd2,#6                  ;// [ x x x U1 U0 UL L0 L2 ]
+        VEXT    dEven1,dLeft1,dOdd2,#7                  ;// [ x x x U2 U1 U0 UL L1 ]
+        VEXT    dEven2,dLeft0,dAbove,#7                 ;// [ x x x U3 U2 U1 U0 L0 ]
+        VEXT    dOdd0,dLeft1,dAbove,#7                  ;// [ x x x U3 U2 U1 U0 L1 ]
+        VEXT    dOdd1,dLeft0,dOdd2,#7                   ;// [ x x x U2 U1 U0 UL L0 ]
+        
+        VHADD   dTmp1, dOdd0, dOdd2
+        VRHADD  dTmp1, dTmp1, dOdd1                     ;// Tmp[ x x x 9 7 5 3 1 ]
+        
+        VHADD   dTmp0, dEven0, dEven2
+        VRHADD  dTmp0, dTmp0, dEven1                    ;// Tmp[ x x x 8 6 4 2 0 ]
+        
+        
+        VEXT    dTmp3,dTmp1,dTmp1,#1                    ;// Tmp[ x x x x 9 7 5 3 ] 
+        ADD     pDstTmp, pDst, dstStep
+        ADD     dstep, dstStep, dstStep
+        VEXT    dTmp2,dTmp0,dTmp0,#1                    ;// Tmp[ x x x x 8 6 4 2 ]
+        
+        
+        VST1    dTmp3U32[0],[pDst],dstep                ;// Tmp[9],[7],[5],[3]
+        VST1    dTmp2U32[0],[pDstTmp],dstep             ;// Tmp[8],[6],[4],[2]
+        VST1    dTmp1U32[0],[pDst],dstep                ;// Tmp[7],[5],[3],[1]
+        VST1    dTmp0U32[0],[pDstTmp]                   ;// Tmp[6],[4],[2],[0]
+        
+        B        ExitPredict4x4                         ;// Branch to exit code
+        
+OMX_VC_4x4_HD
+        
+        
+        ;// Load U0,U1,U2,U3
+        VLD1    dAbove,[pSrcAbove]                      ;//dAboveLeftVal = [U7|U6|U5|U4|U3|U2|U1|U0]
+        
+        ;// Load UL,L0,L1,L2,L3                         ;// dLeft = [UL|L0|L1|L2|L3|X|X|X] 
+        VLD1    {dLeft[7]},[pSrcAboveLeft]   
+        ADD     pSrcTmp, pSrcLeft, leftStep
+        ADD     srcStep, leftStep, leftStep
+        
+        VLD1    {dLeft[6]},[pSrcLeft],srcStep           ;// pSrcLeft[0*leftStep]
+        VLD1    {dLeft[5]},[pSrcTmp],srcStep            ;// pSrcLeft[1*leftStep]
+        VLD1    {dLeft[4]},[pSrcLeft]                   ;// pSrcLeft[2*leftStep]
+        VLD1    {dLeft[3]},[pSrcTmp]                    ;// pSrcLeft[3*leftStep]
+        
+        VEXT    dAbove0,dLeft,dAbove,#3                 ;// [ U2|U1|U0|UL|L0|L1|L2|L3 ]  
+        VEXT    dAbove1,dLeft,dAbove,#2                 ;// [ U1|U0|UL|L0|L1|L2|L3|X ]   
+        VEXT    dAbove2,dLeft,dAbove,#1                 ;// [ U0|UL|L0|L1|L2|L3|X|X ]     
+        
+        VHADD   dTmp0, dAbove0, dAbove2
+        VRHADD  dTmp0, dTmp0, dAbove1                   ;// Tmp[ 0 | 1 | 2 | 4 | 6 | 8 | X | X ]
+        
+        
+        VRHADD  dTmp1, dAbove1, dAbove0                 ;// (a+b+1)>>1
+        VSHL    dTmp1U64,dTmp1U64,#24                   ;// Tmp[ 3|5| 7 |9 | X | X | X | X ]
+        
+        
+        VSHL    dTmpU64,dTmp0U64,#16                    ;// Tmp[ 2|4|6|8| X | X | X | X ]
+        VZIP    dTmp1,dTmp                              ;// dTmp = [ 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 ]
+        VEXT    dTmp0,dTmp0,dTmp0,#6                    ;// Tmp[  X| X| X| X| X| X| 0 | 1 ]
+        VEXT    dTmp1,dTmp,dTmp0,#2                     ;// Tmp[ 0 | 1 | 2 | 3 | 4 | 5 | 6 |7 ]
+       
+        ADD     pDstTmp, pDst, dstStep
+        ADD     dstep, dstStep, dstStep
+        
+        VST1    dTmp1U32[1],[pDst],dstep                ;// Store pTmp[0|1|2|3]
+        VST1    dTmpU32[1],[pDstTmp],dstep              ;// Store pTmp[2|3|4|5]
+        VST1    dTmp1U32[0],[pDst]                      ;// Store pTmp[4|5|6|7]
+        VST1    dTmpU32[0],[pDstTmp]                    ;// Store pTmp[6|7|8|9]
+        
+        B        ExitPredict4x4                         ;// Branch to exit code
+        
+OMX_VC_4x4_VL
+
+        
+        TST     availability, #OMX_VC_UPPER_RIGHT
+        BEQ     DiagVLUpperRightNotAvailable
+       
+        VLD1    dAbove0,[pSrcAbove]                      ;// [U7|U6|U5|U4|U3|U2|U1|U0] 
+        VEXT    dAbove1,dAbove0,dAbove0,#1               ;// [ X|U7|U6|U5|U4|U3|U2|U1]
+        VEXT    dAbove2,dAbove1,dAbove1,#1               ;// [ X| X|U7|U6|U5|U4|U3|U2]
+        
+        B       DiagVLPredict4x4Store         
+       
+DiagVLUpperRightNotAvailable
+        VLD1    dAboveU32[1],[pSrcAbove]                 ;// [U3|U2|U1|U0|-|-|-|-] 
+        VDUP    dU3, dAbove[7]                           ;// [U3 U3 U3 U3 U3 U3 U3 U3]
+
+        VEXT    dAbove0, dAbove, dU3, #4                 ;// [U3 U3 U3 U3 U3 U2 U1 U0]
+        VEXT    dAbove1, dAbove, dU3, #5                 ;// [U3 U3 U3 U3 U3 U3 U2 U1]
+        VEXT    dAbove2, dAbove, dU3, #6                 ;// [U3 U3 U3 U3 U3 U3 U3 U2]
+       
+DiagVLPredict4x4Store  
+        
+        VRHADD  dTmp0, dAbove1, dAbove0                 ;// (a+b+1)>>1
+                                                        ;// Tmp[ X| X| X| 8| 6| 4| 2| 0 ]
+        
+        VHADD   dTmp3, dAbove0, dAbove2
+        VRHADD  dTmp3, dTmp3, dAbove1                   ;// (a+2*b+c+2)>>2
+                                                        ;// Tmp[ X| X| X| 9| 7| 5| 3| 1 ]
+                                                         
+        VEXT    dTmp1,dTmp0,dTmp0,#1                    ;// Tmp[ X| X| X| X| 8| 6| 4| 2 ]
+        ADD     pDstTmp, pDst, dstStep
+        ADD     dstep, dstStep, dstStep
+        VEXT    dTmp2,dTmp3,dTmp1,#1                    ;// Tmp[ X| X| X| X| 9| 7| 5| 3 ]
+        
+        VST1    dTmp0U32[0],[pDst],dstep                ;// Tmp[6],[4],[2],[0]
+        VST1    dTmp3U32[0],[pDstTmp],dstep             ;// Tmp[7],[5],[3],[1]
+        VST1    dTmp1U32[0],[pDst]                      ;// Tmp[8],[6],[4],[2]
+        VST1    dTmp2U32[0],[pDstTmp]                   ;// Tmp[9],[7],[5],[3]
+        
+        B        ExitPredict4x4                         ;// Branch to exit code
+        
+OMX_VC_4x4_HU
+        ADD     pSrcTmp, pSrcLeft, leftStep
+        ADD     srcStep, leftStep, leftStep
+
+        ;// Load Left Edge                              ;// [L3|L2|L1|L0|X|X|X|X]
+        VLD1    {dLeft[4]},[pSrcLeft],srcStep           ;// pSrcLeft[0*leftStep]
+        VLD1    {dLeft[5]},[pSrcTmp],srcStep            ;// pSrcLeft[1*leftStep]
+        VLD1    {dLeft[6]},[pSrcLeft]                   ;// pSrcLeft[2*leftStep]
+        VLD1    {dLeft[7]},[pSrcTmp]                    ;// pSrcLeft[3*leftStep]
+        
+        VDUP    dL3,dLeft[7]                            ;// [L3|L3|L3|L3|L3|L3|L3|L3]
+        
+        VEXT    dLeftHU0,dLeft,dL3,#4                   ;// [L3|L3|L3|L3|L3|L2|L1|L0]
+        VEXT    dLeftHU1,dLeft,dL3,#5                   ;// [L3|L3|L3|L3|L3|L3|L2|L1]
+        VEXT    dLeftHU2,dLeft,dL3,#6                   ;// [L3|L3|L3|L3|L3|L3|L3|L2]
+        
+        VHADD   dTmp0, dLeftHU0, dLeftHU2
+        VRHADD  dTmp0, dTmp0, dLeftHU1                  ;// Tmp[ L3 | L3 | L3 | L3 | L3 | 5 | 3 | 1 ]
+        
+        VRHADD  dTmp1, dLeftHU1, dLeftHU0               ;// (a+b+1)>>1 
+                                                        ;//  Tmp[ L3 | L3 | L3 | L3 | L3 | 4 | 2 | 0 ]
+                                                      
+        VZIP    dTmp1,dTmp0                             ;// dTmp1 = Tmp[7| 6| 5| 4| 3| 2| 1| 0]  
+                                                        ;// dTmp0 = [L3|L3|L3|L3|L3|L3|L3|L3]
+                                                                                                                            
+        
+        VST1    dTmp1U32[0],[pDst],dstStep              ;// [3|2|1|0] 
+        VEXT    dTmp1,dTmp1,dTmp1,#2
+        VST1    dTmp1U32[0],[pDst],dstStep              ;// [5|4|3|2] 
+        VEXT    dTmp1,dTmp1,dTmp1,#2
+        VST1    dTmp1U32[0],[pDst],dstStep              ;// [7|6|5|4]  
+        VST1    dTmp0U32[0],[pDst]                      ;// [9|8|7|6] 
+        
+        
+ExitPredict4x4
+        
+        MOV      return,  #OMX_Sts_NoErr
+        M_END
+
+        ENDIF ;// CortexA8
+        
+        END
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntra_4x4 ends
+;//-----------------------------------------------------------------------------------------------
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
new file mode 100755
index 0000000..e394339
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
@@ -0,0 +1,140 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_TransformDequantChromaDCFromPair_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+        
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        IMPORT armVCM4P10_QPDivTable
+        IMPORT armVCM4P10_VMatrixQPModTable
+            
+        M_VARIANTS CortexA8
+    
+
+    
+    
+    IF CortexA8
+
+;// ARM Registers
+;//--------------------------------------
+;// Declare input registers
+;//--------------------------------------
+ppSrc       RN 0
+pDst        RN 1
+QP          RN 2
+
+;//--------------------------------
+;// Scratch variable for Unpack2x2 
+;//--------------------------------
+pSrc        RN 9
+Value       RN 4
+Value2      RN 5
+Flag        RN 6
+strOffset   RN 7
+cstOffset   RN 8
+
+;//--------------------------------
+;// Scratch variable
+;//--------------------------------
+r0w0        RN  3
+r0w1        RN  4
+
+c0w0        RN  5
+c1w0        RN  6
+
+return      RN  0
+pQPDivTable RN  5
+pQPModTable    RN  6
+Shift        RN  9
+Scale        RN  2
+
+
+
+;// Neon Registers
+
+dZero       DN  D0.U16
+dInvTrCoeff DN  D0.S16
+dScale      DN  D1.S16
+qDqntCoeff  QN  Q1.S32
+dDqntCoeff  DN  D2.S16
+
+
+        ;// Write function header
+        M_START omxVCM4P10_TransformDequantChromaDCFromPair, r9
+        
+        LDR     pSrc, [ppSrc]                        ;// Load pSrc
+        VMOV    dZero, #0
+        MOV     cstOffset, #31                       ;// To be used in the loop, to compute offset
+        
+        ;//-----------------------------------------------------------------------
+        ;// Firstly, fill all the coefficient values on the <pDst> buffer by zero
+        ;//-----------------------------------------------------------------------
+        
+        VST1    dZero,[pDst]                         ;// pDst[0]  = pDst[1]  = pDst[2]  = pDst[3]  = 0   
+        LDRB     Flag,  [pSrc], #1                   ;// Preload <Flag> before <unpackLoop>
+
+
+unpackLoop
+        TST      Flag,  #0x10                        ;// Computing (Flag & 0x10)
+        LDRSBNE  Value2,[pSrc,#1]                  
+        LDRBNE   Value, [pSrc], #2                   ;// Load byte wise to avoid unaligned access
+        AND      strOffset, cstOffset, Flag, LSL #1  ;// strOffset = (Flag & 15) < 1;
+        LDRSBEQ  Value, [pSrc], #1                   ;// Value = (OMX_U8)  *pSrc++
+        ORRNE    Value,Value,Value2, LSL #8          ;// Value = (OMX_U16) *pSrc++
+        
+        TST      Flag,  #0x20                        ;// Computing (Flag & 0x20) to check, if we're done
+        LDRBEQ   Flag,  [pSrc], #1                   ;// Flag  = (OMX_U8) *pSrc++, for next iteration
+        STRH     Value, [pDst, strOffset]            ;// Store <Value> at offset <strOffset>
+        BEQ      unpackLoop                          ;// Branch to the loop beginning
+        
+        ;//--------------------------------------------------
+        ;//InvTransformDC2x2: Inlined (Implemented in ARM V6)
+        ;//--------------------------------------------------
+        
+        LDMIA    pDst, {r0w0, r0w1}                  ;// r0w0 = |c1|c0| & r0w1 = |c3|c2|
+
+        STR      pSrc, [ppSrc]                       ;// Update the bitstream pointer
+        
+        LDR      pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer
+        LDR      pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer
+        
+        SADDSUBX r0w0, r0w0,  r0w0                   ;// [ c00+c01, c00-c01 ]
+        SADDSUBX r0w1, r0w1,  r0w1                   ;// [ c10+c11, c10-c11 ]
+        
+        LDRSB    Shift, [pQPDivTable, QP]            ;// Shift = pQPDivTable[QP]
+        LDRSB    Scale, [pQPModTable, QP]            ;// Scale = pQPModTable[QP]
+        
+        SADD16   c0w0, r0w0, r0w1                    ;// [ d00+d10, d01+d11 ]
+        SSUB16   c1w0, r0w0, r0w1                    ;// [ d00-d10, d01-d11 ]
+        
+        ;//-------------------------------------------------
+        ;//DequantChromaDC2x2: Inlined (Neon Implementation)
+        ;//-------------------------------------------------
+        
+        LSL      Scale, Scale, Shift                 ;// Scale = Scale << Shift
+        VMOV     dInvTrCoeff, c0w0, c1w0  
+        VREV32   dInvTrCoeff,dInvTrCoeff  
+        VDUP     dScale,Scale 
+        
+        VMULL    qDqntCoeff,dInvTrCoeff,dScale
+        VSHRN    dDqntCoeff,qDqntCoeff,#1
+        
+        
+        VST1     dDqntCoeff,[pDst]                   ;// Storing all the coefficients at once
+        
+        MOV      return, #OMX_Sts_NoErr
+        M_END
+        
+    ENDIF ;// CortexA8
+    
+    
+    END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
new file mode 100755
index 0000000..2529959
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
@@ -0,0 +1,264 @@
+;//
+;// 
+;// File Name:  omxVCM4P10_TransformDequantLumaDCFromPair_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+;// Description:
+;// H.264 inverse quantize and transform module
+;// 
+;// 
+
+;// Include standard headers
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+;// Import/Export symbols required from/to other files
+;// (For example tables)
+        
+        IMPORT armVCM4P10_UnpackBlock4x4 
+        IMPORT armVCM4P10_QPDivTable
+        IMPORT armVCM4P10_VMatrixQPModTable
+        
+        M_VARIANTS CortexA8
+
+;// Set debugging level        
+;//DEBUG_ON    SETL {TRUE}
+
+
+;// Static Function: armVCM4P10_InvTransformDequantLumaDC4x4
+    
+
+;// Guarding implementation by the processor name
+    
+    
+
+;// Static Function: armVCM4P10_InvTransformDequantLumaDC4x4
+
+;// Guarding implementation by the processor name
+    
+    IF  CortexA8
+    
+;//Input Registers
+pData               RN  0
+QP                  RN  1    
+
+
+;//Local Scratch Registers
+
+;// ARM Registers
+
+pQPDivTable         RN  2
+pQPModTable         RN  3
+Shift               RN  4
+Scale               RN  5
+
+;// NEON Registers
+
+;// Packed Input pixels
+dIn0                DN  D0.S16
+dIn1                DN  D1.S16
+dIn2                DN  D2.S16
+dIn3                DN  D3.S16   
+
+;// Intermediate calculations
+dRowSum1            DN  D4.S16
+dRowSum2            DN  D5.S16
+dRowDiff1           DN  D6.S16
+dRowDiff2           DN  D7.S16
+
+;// Row operated pixels
+dRowOp0             DN  D0.S16
+dRowOp1                DN  D1.S16
+dRowOp2                DN  D2.S16
+dRowOp3                DN  D3.S16
+qRowOp01            QN  Q0.32
+qRowOp23            QN  Q1.32
+
+;// Intermediate calculations
+dColSum1            DN  D4.S16
+dColSum2            DN  D5.S16
+dColDiff1           DN  D6.S16
+dColDiff2           DN  D7.S16
+
+;// Coloumn operated pixels
+dColOp0             DN  D0.S16
+dColOp1                DN  D1.S16
+dColOp2                DN  D2.S16
+dColOp3                DN  D3.S16
+
+;// Temporary scratch varaibles
+
+dScale              DN  D5.S16
+qRound0             QN  Q3.S32
+qRound1             QN  Q4.S32
+qRound2             QN  Q5.S32
+qRound3             QN  Q6.S32
+
+;// InvTransformed and Dequantized pixels
+dOut0               DN  D0.S16
+dOut1                DN  D1.S16
+dOut2                DN  D2.S16
+dOut3                DN  D3.S16
+
+       
+    ;// Allocate stack memory required by the function
+        
+
+    ;// Write function header
+    M_START armVCM4P10_InvTransformDequantLumaDC4x4,r5,d13
+    
+    ;******************************************************************
+    ;// The strategy used in implementing the transform is as follows:*
+    ;// Load the 4x4 block into 4 D-registers                         *  
+    ;// Transpose the 4x4 matrix                                      *  
+    ;// Perform the row operations (on columns) using SIMD            *  
+    ;// Transpose the 4x4 result matrix                               *  
+    ;// Perform the coloumn operations                                *
+    ;******************************************************************
+
+        ;// Load all the 4x4 pixels in Transposed form
+        
+        VLD4    {dIn0,dIn1,dIn2,dIn3},[pData]
+        LDR     pQPDivTable, =armVCM4P10_QPDivTable        ;// QP Division look-up-table base pointer
+        LDR     pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer
+        
+        ;**************************************** 
+        ;// Row Operations (Performed on columns)
+        ;**************************************** 
+        ;// Scale factor calculation is done using ARM instructions
+        ;// Interleaved with NEON instructions inorder to Dual issue
+        
+        VADD    dRowSum1,dIn0,dIn1
+        VADD    dRowSum2,dIn2,dIn3
+        VSUB    dRowDiff1,dIn0,dIn1
+        LDRSB   Shift, [pQPDivTable, QP]               ;// ARM CODE: Shift = pQPDivTable[QP]
+        VSUB    dRowDiff2,dIn2,dIn3
+        LDRSB   Scale, [pQPModTable, QP]               ;// ARM CODE: Scale = pQPModTable[QP] 
+        VADD    dRowOp0,dRowSum1,dRowSum2
+        VSUB    dRowOp1,dRowSum1,dRowSum2
+        VSUB    dRowOp2,dRowDiff1,dRowDiff2
+        LSL     Scale, Scale, Shift                    ;// ARM CODE: Scale = Scale << Shift
+        VADD    dRowOp3,dRowDiff1,dRowDiff2
+        
+        ;****************************************
+        ;// Transpose the resultant matrix
+        ;****************************************
+        
+        VTRN    dRowOp0,dRowOp1
+        VTRN    dRowOp2,dRowOp3
+        VTRN    qRowOp01,qRowOp23 
+        
+        ;**************************************** 
+        ;// Coloumn Operations 
+        ;**************************************** 
+        
+        VADD    dColSum1,dRowOp0,dRowOp1
+        VADD    dColSum2,dRowOp2,dRowOp3
+        VSUB    dColDiff1,dRowOp0,dRowOp1
+        VSUB    dColDiff2,dRowOp2,dRowOp3
+        VADD    dColOp0,dColSum1,dColSum2
+        VSUB    dColOp1,dColSum1,dColSum2
+        VSUB    dColOp2,dColDiff1,dColDiff2
+        VADD    dColOp3,dColDiff1,dColDiff2
+        
+        ;//----------------------------------------------------------------------
+        ;//
+        ;// <Dequantize> improves on the c-reference code
+        ;// Both the  cases i.e., Shift>=0 and Shift<0 cases are covered together
+        ;// We do not subtract 2 from Shift as in C reference, instead perform a
+        ;// Scale << Shift once in the beginning and do a right shift by a 
+        ;// constant 2 after the Multiplication. The value of Round would be 2 
+        ;// 
+        ;// By doing this we aviod the Branches required and also 
+        ;// reduce the code size substantially
+        ;// 
+        ;//----------------------------------------------------------------------
+        
+        
+        VDUP    dScale, Scale                            ;// ARM -> NEON  copy 'scale' to vector
+               
+                
+        VMOV    qRound0,#2                               ;// Set the Round Value 
+        VMOV    qRound1,#2
+        VMOV    qRound2,#2
+        VMOV    qRound3,#2
+        
+        VMLAL   qRound0,dColOp0,dScale                   ;// pDst[i] * Scale + Round 
+        VMLAL   qRound1,dColOp1,dScale
+        VMLAL   qRound2,dColOp2,dScale
+        VMLAL   qRound3,dColOp3,dScale
+        
+        VSHRN   dOut0,qRound0,#2                          ;// Right shift by 2 & (OMX_S16)Value
+        VSHRN   dOut1,qRound1,#2
+        VSHRN   dOut2,qRound2,#2
+        VSHRN   dOut3,qRound3,#2
+        
+        ;***************************
+        ;// Store all the 4x4 pixels
+        ;***************************
+        
+        VST1  {dOut0,dOut1,dOut2,dOut3}, [pData]
+
+        
+        ;// Set return value
+        
+        ;// Write function tail
+        M_END        
+        
+    ENDIF                                                           ;//CORTEXA8   
+        
+
+
+;// Function: omxVCM4P10_TransformDequantLumaDCFromPair
+    
+;//Input Registers
+ppSrc               RN  0
+pDst                RN  1
+QPR2                RN  2
+
+;//Output Registers
+result              RN  0
+
+;//Local Scratch Registers
+pDstR4              RN  4
+pDstR0              RN  0
+QPR1                RN  1
+QPR5                RN  5
+
+;// Guarding implementation by the processor name
+    
+    IF CortexA8
+       
+    ;// Allocate stack memory required by the function
+        
+
+    ;// Write function header
+        M_START omxVCM4P10_TransformDequantLumaDCFromPair,r5
+        
+        MOV     pDstR4,pDst                         ;// Saving register r1
+        MOV     QPR5,QPR2                           ;// Saving register r2
+        BL      armVCM4P10_UnpackBlock4x4
+        
+        MOV     pDstR0,pDstR4                       ;// Setting up register r0
+        MOV     QPR1,QPR5                           ;// Setting up register r1
+        BL      armVCM4P10_InvTransformDequantLumaDC4x4
+                               
+       
+        ;// Set return value
+        MOV     result,#OMX_Sts_NoErr        
+       
+        ;// Write function tail
+        M_END
+        
+            
+    ENDIF                                                           ;//ARM1136JS  
+    
+
+    END
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S
new file mode 100644
index 0000000..aca2df4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S
@@ -0,0 +1,134 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global armVCM4P10_Average_4x4_Align0_unsafe
+    .func   armVCM4P10_Average_4x4_Align0_unsafe
+armVCM4P10_Average_4x4_Align0_unsafe:
+    PUSH     {r4-r6,lr}
+    LDR      r7, =0x80808080
+    LDR      r12,[r2,#0]
+    LDR      r10,[r0],r1
+    LDR      lr,[r2,r3]
+    LDR      r11,[r0],r1
+    MVN      r12,r12
+    MVN      lr,lr
+    UHSUB8   r5,r10,r12
+    UHSUB8   r4,r11,lr
+    EOR      r5,r5,r7
+    STR      r5,[r2],r3
+    EOR      r4,r4,r7
+    STR      r4,[r2],r3
+    LDR      r10,[r0],r1
+    LDR      r12,[r2,#0]
+    LDR      r11,[r0],r1
+    LDR      lr,[r2,r3]
+    MVN      r12,r12
+    UHSUB8   r5,r10,r12
+    MVN      lr,lr
+    UHSUB8   r4,r11,lr
+    EOR      r5,r5,r7
+    STR      r5,[r2],r3
+    EOR      r4,r4,r7
+    STR      r4,[r2],r3
+    POP      {r4-r6,pc}
+    .endfunc
+
+    .global armVCM4P10_Average_4x4_Align2_unsafe
+    .func   armVCM4P10_Average_4x4_Align2_unsafe
+armVCM4P10_Average_4x4_Align2_unsafe:
+    PUSH     {r4-r6,lr}
+    LDR      r7, =0x80808080
+    LDR      r4,[r0,#4]
+    LDR      r10,[r0],r1
+    LDR      r12,[r2,#0]
+    LDR      lr,[r2,r3]
+    LDR      r5,[r0,#4]
+    LDR      r11,[r0],r1
+    MVN      r12,r12
+    MVN      lr,lr
+    LSR      r10,r10,#16
+    ORR      r10,r10,r4,LSL #16
+    LSR      r11,r11,#16
+    ORR      r11,r11,r5,LSL #16
+    UHSUB8   r5,r10,r12
+    UHSUB8   r4,r11,lr
+    EOR      r5,r5,r7
+    STR      r5,[r2],r3
+    EOR      r4,r4,r7
+    STR      r4,[r2],r3
+    LDR      r4,[r0,#4]
+    LDR      r10,[r0],r1
+    LDR      r12,[r2,#0]
+    LDR      lr,[r2,r3]
+    LDR      r5,[r0,#4]
+    LDR      r11,[r0],r1
+    MVN      r12,r12
+    MVN      lr,lr
+    LSR      r10,r10,#16
+    ORR      r10,r10,r4,LSL #16
+    LSR      r11,r11,#16
+    ORR      r11,r11,r5,LSL #16
+    UHSUB8   r5,r10,r12
+    UHSUB8   r4,r11,lr
+    EOR      r5,r5,r7
+    STR      r5,[r2],r3
+    EOR      r4,r4,r7
+    STR      r4,[r2],r3
+    POP      {r4-r6,pc}
+    .endfunc
+
+    .global armVCM4P10_Average_4x4_Align3_unsafe
+    .func   armVCM4P10_Average_4x4_Align3_unsafe
+armVCM4P10_Average_4x4_Align3_unsafe:
+    PUSH     {r4-r6,lr}
+    LDR      r7, =0x80808080
+    LDR      r4,[r0,#4]
+    LDR      r10,[r0],r1
+    LDR      r12,[r2,#0]
+    LDR      lr,[r2,r3]
+    LDR      r5,[r0,#4]
+    LDR      r11,[r0],r1
+    MVN      r12,r12
+    MVN      lr,lr
+    LSR      r10,r10,#24
+    ORR      r10,r10,r4,LSL #8
+    LSR      r11,r11,#24
+    ORR      r11,r11,r5,LSL #8
+    UHSUB8   r5,r10,r12
+    UHSUB8   r4,r11,lr
+    EOR      r5,r5,r7
+    STR      r5,[r2],r3
+    EOR      r4,r4,r7
+    STR      r4,[r2],r3
+    LDR      r4,[r0,#4]
+    LDR      r10,[r0],r1
+    LDR      r12,[r2,#0]
+    LDR      lr,[r2,r3]
+    LDR      r5,[r0,#4]
+    LDR      r11,[r0],r1
+    MVN      r12,r12
+    MVN      lr,lr
+    LSR      r10,r10,#24
+    ORR      r10,r10,r4,LSL #8
+    LSR      r11,r11,#24
+    ORR      r11,r11,r5,LSL #8
+    UHSUB8   r5,r10,r12
+    UHSUB8   r4,r11,lr
+    EOR      r5,r5,r7
+    STR      r5,[r2],r3
+    EOR      r4,r4,r7
+    STR      r4,[r2],r3
+    POP      {r4-r6,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingChroma_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingChroma_unsafe_s.S
new file mode 100644
index 0000000..b9ee221
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingChroma_unsafe_s.S
@@ -0,0 +1,54 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global armVCM4P10_DeblockingChromabSLT4_unsafe
+    .func   armVCM4P10_DeblockingChromabSLT4_unsafe
+armVCM4P10_DeblockingChromabSLT4_unsafe:
+    VLD1.32  {d18[0]},[r5]!
+    VSUBL.U8 q11,d5,d9
+    VMOV     d28,d18
+    VSUBL.U8 q10,d8,d4
+    VSHR.S16 q11,q11,#2
+    VZIP.8   d18,d28
+    VBIF     d18,d14,d16
+    VRHADD.S16 q10,q11,q10
+    VADD.I8  d31,d18,d15
+    VQMOVN.S16 d20,q10
+    VLD1.8   {d0[]},[r2]
+    VMIN.S8  d20,d20,d31
+    VNEG.S8  d31,d31
+    VLD1.8   {d2[]},[r3]
+    VMAX.S8  d20,d20,d31
+    VMOVL.U8 q14,d4
+    VMOVL.U8 q12,d8
+    VADDW.S8 q14,q14,d20
+    VSUBW.S8 q12,q12,d20
+    VQMOVUN.S16 d29,q14
+    VQMOVUN.S16 d24,q12
+    BX       lr
+    .endfunc
+
+    .global armVCM4P10_DeblockingChromabSGE4_unsafe
+    .func   armVCM4P10_DeblockingChromabSGE4_unsafe
+armVCM4P10_DeblockingChromabSGE4_unsafe:
+    VHADD.U8 d13,d4,d9
+    VHADD.U8 d31,d8,d5
+    VLD1.8   {d0[]},[r2]
+    ADD      r5,r5,#4
+    VLD1.8   {d2[]},[r3]
+    VRHADD.U8 d13,d13,d5
+    VRHADD.U8 d31,d31,d9
+    BX       lr
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingLuma_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingLuma_unsafe_s.S
new file mode 100644
index 0000000..47f3d44
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingLuma_unsafe_s.S
@@ -0,0 +1,102 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global armVCM4P10_DeblockingLumabSLT4_unsafe
+    .func   armVCM4P10_DeblockingLumabSLT4_unsafe
+armVCM4P10_DeblockingLumabSLT4_unsafe:
+    VSUBL.U8 q11,d5,d9
+    VLD1.8   {d18[]},[r5]!
+    VSUBL.U8 q10,d8,d4
+    VLD1.8   {d19[]},[r5]!
+    VSHR.S16 q11,q11,#2
+    VEXT.8   d18,d18,d19,#4
+    VAND     d19,d17,d15
+    VBIF     d18,d14,d16
+    VRHADD.S16 q10,q11,q10
+    VRHADD.U8 d24,d4,d8
+    VADD.I8  d31,d18,d19
+    VAND     d19,d12,d15
+    VQADD.U8 d23,d5,d18
+    VQMOVN.S16 d20,q10
+    VADD.I8  d31,d31,d19
+    VQSUB.U8 d22,d5,d18
+    VQADD.U8 d19,d9,d18
+    VHADD.U8 d26,d24,d6
+    VMIN.S8  d20,d20,d31
+    VNEG.S8  d31,d31
+    VQSUB.U8 d21,d9,d18
+    VHADD.U8 d27,d24,d10
+    VMAX.U8  d30,d26,d22
+    VMAX.S8  d20,d20,d31
+    VMOVL.U8 q14,d4
+    VMOVL.U8 q12,d8
+    VADDW.S8 q14,q14,d20
+    VSUBW.S8 q12,q12,d20
+    VQMOVUN.S16 d29,q14
+    VQMOVUN.S16 d24,q12
+    VMAX.U8  d25,d27,d21
+    VMIN.U8  d30,d30,d23
+    VMIN.U8  d25,d25,d19
+    VBIF     d29,d4,d16
+    VBIF     d30,d5,d17
+    VBIF     d24,d8,d16
+    VBIF     d25,d9,d12
+    BX       lr
+    .endfunc
+
+    .global armVCM4P10_DeblockingLumabSGE4_unsafe
+    .func   armVCM4P10_DeblockingLumabSGE4_unsafe
+armVCM4P10_DeblockingLumabSGE4_unsafe:
+    VSHR.U8  d19,d0,#2
+    VADD.I8  d19,d19,d15
+    VADDL.U8 q10,d8,d4
+    VADD.I8  d19,d19,d15
+    VADDL.U8 q11,d6,d9
+    VADDW.U8 q12,q10,d5
+    VCGT.U8  d19,d19,d13
+    VSHR.U16 q11,q11,#1
+    VHADD.U16 q11,q12,q11
+    VADDW.U8 q12,q12,d6
+    VADDL.U8 q13,d7,d6
+    VAND     d17,d17,d19
+    VHADD.U8 d28,d4,d9
+    VSRA.U16 q13,q12,#1
+    VAND     d12,d12,d19
+    VQRSHRN.U16 d29,q11,#1
+    VRHADD.U8 d28,d28,d5
+    VQRSHRN.U16 d30,q12,#2
+    VADDL.U8 q11,d10,d5
+    VADDW.U8 q12,q10,d9
+    VBIF     d29,d28,d17
+    VQRSHRN.U16 d31,q13,#2
+    VADDL.U8 q13,d11,d10
+    VSHR.U16 q11,q11,#1
+    VHADD.U16 q11,q12,q11
+    VADDW.U8 q12,q12,d10
+    VHADD.U8 d28,d8,d5
+    VBIF     d29,d4,d16
+    VBIF     d30,d5,d17
+    VSRA.U16 q13,q12,#1
+    VQRSHRN.U16 d25,q12,#2
+    VQRSHRN.U16 d24,q11,#1
+    VRHADD.U8 d22,d28,d9
+    VBIF     d25,d9,d12
+    VBIF     d31,d6,d17
+    VBIF     d24,d22,d12
+    VQRSHRN.U16 d28,q13,#2
+    VBIF     d24,d8,d16
+    VBIF     d28,d10,d12
+    BX       lr
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S
new file mode 100644
index 0000000..e68bd8e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S
@@ -0,0 +1,272 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global armVCM4P10_DecodeCoeffsToPair
+    .func   armVCM4P10_DecodeCoeffsToPair
+armVCM4P10_DecodeCoeffsToPair:
+    PUSH     {r4-r12,lr}
+    SUB      sp,sp,#0x40
+    LDR      r10,[r0,#0]
+    LDR      r12,[r1,#0]
+    LDR      r6, =armVCM4P10_CAVLCCoeffTokenTables
+    LDR      r4,[sp,#0x68]
+    LDRB     r9,[r10,#2]
+    LDRB     r8,[r10,#1]
+    LDRB     r11,[r10],#3
+    ADD      r12,r12,#8
+    LDR      r6,[r6,r4,LSL #2]
+    ORR      r9,r9,r8,LSL #8
+    ORR      r11,r9,r11,LSL #16
+    LSLS     r8,r11,r12
+    MOVS     r7,#0x1e
+    AND      r7,r7,r8,LSR #27
+    SUBS     r12,r12,#8
+L0x44:
+    BCC      L1
+    LDRB     r8,[r10],#1
+L1:
+    LDRH     r7,[r6,r7]
+    ADDCC    r12,r12,#8
+    ADD      r12,r12,#4
+    ORRCS    r11,r8,r11,LSL #8
+    LSRS     r8,r7,#1
+    BCS      L0x74
+    LSLS     r8,r11,r12
+    SUBS     r12,r12,#0xa
+    ADD      r7,r7,r8,LSR #29
+    BIC      r7,r7,#1
+    B        L0x44
+L0x74:
+    SUB      r12,r12,r7,LSR #13
+    BIC      r7,r8,#0xf000
+    LSRS     r5,r7,#2
+    STRB     r5,[r2,#0]
+    BEQ      L0x344
+    CMP      r7,#0x44
+    BGE      L0x33c
+    STR      r0,[sp,#0]
+    STR      r1,[sp,#4]
+    STR      r3,[sp,#8]
+    ANDS     r1,r7,#3
+    ADD      r2,sp,#0xc
+    BEQ      L0xd8
+    MOV      r0,r1
+L0xac:
+    LSLS     r7,r11,r12
+    SUBS     r12,r12,#7
+    BCC      L2
+    LDRB     r8,[r10],#1
+L2:
+    ADDCC    r12,r12,#8
+    LSR      r7,r7,#31
+    ORRCS    r11,r8,r11,LSL #8
+    SUBS     r0,r0,#1
+    MOV      r8,#1
+    SUB      r8,r8,r7,LSL #1
+    STRH     r8,[r2],#2
+    BGT      L0xac
+L0xd8:
+    SUBS     r0,r5,r1
+    BEQ      L0x1b8
+    MOV      r4,#1
+    CMP      r5,#0xa
+    MOVLE    r4,#0
+    CMP      r1,#3
+    MOVLT    r1,#4
+    MOVGE    r1,#2
+    MOVGE    r4,#0
+L0xfc:
+    LSLS     r7,r11,r12
+    CLZ      r7,r7
+    ADD      r12,r12,r7
+    SUBS     r12,r12,#7
+    BCC      L3
+    LDRB     r8,[r10],#1
+    ORR      r11,r8,r11,LSL #8
+    SUBS     r12,r12,#8
+    BCC      L3
+    LDRB     r8,[r10],#1
+L3:
+    ADDCC    r12,r12,#8
+    ORRCS    r11,r8,r11,LSL #8
+    CMP      r7,#0x10
+    BGE      L0x33c
+    MOVS     lr,r4
+    TEQEQ    r7,#0xe
+    MOVEQ    lr,#4
+    TEQ      r7,#0xf
+    MOVEQ    lr,#0xc
+    TEQEQ    r4,#0
+    ADDEQ    r7,r7,#0xf
+    TEQ      lr,#0
+    BEQ      L0x184
+    LSL      r3,r11,r12
+    ADD      r12,r12,lr
+    SUBS     r12,r12,#8
+    RSB      r9,lr,#0x20
+    BCC      L4
+    LDRB     r8,[r10],#1
+    ORR      r11,r8,r11,LSL #8
+    SUBS     r12,r12,#8
+    BCC      L4
+    LDRB     r8,[r10],#1
+L4:
+    ADDCC    r12,r12,#8
+    LSR      r3,r3,r9
+    ORRCS    r11,r8,r11,LSL #8
+    LSL      r7,r7,r4
+    ADD      r7,r3,r7
+L0x184:
+    ADD      r7,r7,r1
+    MOV      r1,#2
+    LSRS     r8,r7,#1
+    RSBCS    r8,r8,#0
+    STRH     r8,[r2],#2
+    LDR      r9, =armVCM4P10_SuffixToLevel
+    LDRSB    r8,[r9,r4]
+    TEQ      r4,#0
+    MOVEQ    r4,#1
+    CMP      r7,r8
+    ADDCS    r4,r4,#1
+    SUBS     r0,r0,#1
+    BGT      L0xfc
+L0x1b8:
+    LDR      r8,[sp,#0x6c]
+    SUB      r0,r5,#1
+    SUBS     r1,r8,r5
+    ADD      r4,sp,#0x2c
+    MOV      lr,r5
+    SUB      lr,lr,#1
+    BEQ      L0x2b0
+    TEQ      r8,#4
+    LDREQ    r6, =(armVCM4P10_CAVLCTotalZeros2x2Tables - 4)
+    LDRNE    r6, =(armVCM4P10_CAVLCTotalZeroTables - 4)
+    LDR      r6,[r6,r5,LSL #2]
+    LSLS     r8,r11,r12
+    MOVS     r7,#0x1e
+    AND      r7,r7,r8,LSR #27
+    SUBS     r12,r12,#8
+L0x1f4:
+    BCC      L5
+    LDRB     r8,[r10],#1
+L5:
+    LDRH     r7,[r6,r7]
+    ADDCC    r12,r12,#8
+    ADD      r12,r12,#4
+    ORRCS    r11,r8,r11,LSL #8
+    LSRS     r8,r7,#1
+    BCS      L0x224
+    LSLS     r8,r11,r12
+    SUBS     r12,r12,#0xa
+    ADD      r7,r7,r8,LSR #29
+    BIC      r7,r7,#1
+    B        L0x1f4
+L0x224:
+    SUB      r12,r12,r7,LSR #13
+    BIC      r7,r8,#0xf000
+    CMP      r7,#0x10
+    BGE      L0x33c
+    LDR      r3, =(armVCM4P10_CAVLCRunBeforeTables - 4)
+    ADD      r4,sp,#0x2c
+    MOVS     r1,r7
+    ADD      lr,lr,r1
+    BEQ      L0x2b0
+L0x248:
+    SUBS     r0,r0,#1
+    LDR      r6,[r3,r1,LSL #2]
+    BLT      L0x2bc
+    LSLS     r8,r11,r12
+    MOVS     r7,#0xe
+    AND      r7,r7,r8,LSR #28
+    SUBS     r12,r12,#8
+L0x264:
+    BCC      L6
+    LDRB     r8,[r10],#1
+L6:
+    LDRH     r7,[r6,r7]
+    ADDCC    r12,r12,#8
+    ADD      r12,r12,#3
+    ORRCS    r11,r8,r11,LSL #8
+    LSRS     r8,r7,#1
+    BCS      L0x294
+    LSLS     r8,r11,r12
+    SUBS     r12,r12,#9
+    ADD      r7,r7,r8,LSR #29
+    BIC      r7,r7,#1
+    B        L0x264
+L0x294:
+    SUB      r12,r12,r7,LSR #13
+    BIC      r7,r8,#0xf000
+    CMP      r7,#0xf
+    BGE      L0x33c
+    SUBS     r1,r1,r7
+    STRB     r7,[r4],#1
+    BGT      L0x248
+L0x2b0:
+    SUBS     r0,r0,#1
+    BLT      L7
+    STRB     r1,[r4],#1
+L7:
+    BGT      L0x2b0
+L0x2bc:
+    STRB     r1,[r4],#1
+    LDR      r8,[sp,#0x6c]
+    TEQ      r8,#0xf
+    ADDEQ    lr,lr,#1
+    SUB      r4,r4,r5
+    SUB      r2,r2,r5
+    SUB      r2,r2,r5
+    LDR      r3,[sp,#8]
+    LDR      r0,[r3,#0]
+    TEQ      r8,#4
+    LDREQ    r6, =armVCM4P10_ZigZag_2x2
+    LDRNE    r6, =armVCM4P10_ZigZag_4x4
+L0x2ec:
+    LDRB     r9,[r4],#1
+    LDRB     r8,[r6,lr]
+    SUB      lr,lr,#1
+    SUB      lr,lr,r9
+    LDRSH    r9,[r2],#2
+    SUBS     r5,r5,#1
+    ORREQ    r8,r8,#0x20
+    ADD      r1,r9,#0x80
+    CMP      r1,#0x100
+    ORRCS    r8,r8,#0x10
+    TEQ      r5,#0
+    STRB     r8,[r0],#1
+    STRB     r9,[r0],#1
+    LSR      r9,r9,#8
+    BCC      L8
+    STRB     r9,[r0],#1
+L8:
+    BNE      L0x2ec
+    STR      r0,[r3,#0]
+    LDR      r0,[sp,#0]
+    LDR      r1,[sp,#4]
+    B        L0x344
+L0x33c:
+    MVN      r0,#1
+    B        L0x35c
+L0x344:
+    ADD      r10,r10,r12,LSR #3
+    AND      r12,r12,#7
+    SUB      r10,r10,#4
+    STR      r12,[r1,#0]
+    STR      r10,[r0,#0]
+    MOV      r0,#0
+L0x35c:
+    ADD      sp,sp,#0x40
+    POP      {r4-r12,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DequantTables_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DequantTables_s.S
new file mode 100644
index 0000000..44eb428
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DequantTables_s.S
@@ -0,0 +1,103 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .section .rodata
+    .align 4
+
+
+    .global armVCM4P10_QPDivTable
+    .global armVCM4P10_VMatrixQPModTable
+    .global armVCM4P10_PosToVCol4x4
+    .global armVCM4P10_PosToVCol2x2
+    .global armVCM4P10_VMatrix
+    .global armVCM4P10_QPModuloTable
+    .global armVCM4P10_VMatrixU16
+
+armVCM4P10_PosToVCol4x4:
+    .byte  0, 2, 0, 2
+    .byte  2, 1, 2, 1
+    .byte  0, 2, 0, 2
+    .byte  2, 1, 2, 1
+
+armVCM4P10_PosToVCol2x2:
+    .byte  0, 2
+    .byte  2, 1
+
+armVCM4P10_VMatrix:
+    .byte  10, 16, 13
+    .byte  11, 18, 14
+    .byte  13, 20, 16
+    .byte  14, 23, 18
+    .byte  16, 25, 20
+    .byte  18, 29, 23
+
+;//-------------------------------------------------------
+;// This table evaluates the expression [(INT)(QP/6)],
+;// for values of QP from 0 to 51 (inclusive).
+;//-------------------------------------------------------
+
+armVCM4P10_QPDivTable:
+    .byte  0,  0,  0,  0,  0,  0
+    .byte  1,  1,  1,  1,  1,  1
+    .byte  2,  2,  2,  2,  2,  2
+    .byte  3,  3,  3,  3,  3,  3
+    .byte  4,  4,  4,  4,  4,  4
+    .byte  5,  5,  5,  5,  5,  5
+    .byte  6,  6,  6,  6,  6,  6
+    .byte  7,  7,  7,  7,  7,  7
+    .byte  8,  8,  8,  8,  8,  8
+
+;//----------------------------------------------------
+;// This table contains armVCM4P10_VMatrix[QP%6][0] entires,
+;// for values of QP from 0 to 51 (inclusive).
+;//----------------------------------------------------
+
+armVCM4P10_VMatrixQPModTable:
+    .byte 10, 11, 13, 14, 16, 18
+    .byte 10, 11, 13, 14, 16, 18
+    .byte 10, 11, 13, 14, 16, 18
+    .byte 10, 11, 13, 14, 16, 18
+    .byte 10, 11, 13, 14, 16, 18
+    .byte 10, 11, 13, 14, 16, 18
+    .byte 10, 11, 13, 14, 16, 18
+    .byte 10, 11, 13, 14, 16, 18
+    .byte 10, 11, 13, 14, 16, 18
+
+;//-------------------------------------------------------
+;// This table evaluates the modulus expression [QP%6]*6,
+;// for values of QP from 0 to 51 (inclusive).
+;//-------------------------------------------------------
+
+armVCM4P10_QPModuloTable:
+    .byte 0, 6, 12, 18, 24, 30
+    .byte 0, 6, 12, 18, 24, 30
+    .byte 0, 6, 12, 18, 24, 30
+    .byte 0, 6, 12, 18, 24, 30
+    .byte 0, 6, 12, 18, 24, 30
+    .byte 0, 6, 12, 18, 24, 30
+    .byte 0, 6, 12, 18, 24, 30
+    .byte 0, 6, 12, 18, 24, 30
+    .byte 0, 6, 12, 18, 24, 30
+
+;//-------------------------------------------------------
+;// This table contains the invidual byte values stored as
+;// halfwords. This avoids unpacking inside the function
+;//-------------------------------------------------------
+
+armVCM4P10_VMatrixU16:
+    .hword 10, 16, 13
+    .hword 11, 18, 14
+    .hword 13, 20, 16
+    .hword 14, 23, 18
+    .hword 16, 25, 20
+    .hword 18, 29, 23
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Align_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Align_unsafe_s.S
new file mode 100644
index 0000000..37bc69b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Align_unsafe_s.S
@@ -0,0 +1,123 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+    .func   armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+armVCM4P10_InterpolateLuma_HorAlign9x_unsafe:
+    MOV      r12,r8
+    AND      r7,r0,#3
+    BIC      r0,r0,#3
+    ADD      pc,pc,r7,LSL #2
+    NOP
+    B        Copy0toAligned
+    B        Copy1toAligned
+    B        Copy2toAligned
+    B        Copy3toAligned
+Copy0toAligned:
+    LDM      r0,{r7,r10,r11}
+    SUBS     r9,r9,#1
+    ADD      r0,r0,r1
+    STM      r8!,{r7,r10,r11}
+    BGT      Copy0toAligned
+    B        CopyEnd
+Copy1toAligned:
+    LDM      r0,{r7,r10,r11}
+    SUBS     r9,r9,#1
+    ADD      r0,r0,r1
+    LSR      r7,r7,#8
+    ORR      r7,r7,r10,LSL #24
+    LSR      r10,r10,#8
+    ORR      r10,r10,r11,LSL #24
+    LSR      r11,r11,#8
+    STM      r8!,{r7,r10,r11}
+    BGT      Copy1toAligned
+    B        CopyEnd
+Copy2toAligned:
+    LDM      r0,{r7,r10,r11}
+    SUBS     r9,r9,#1
+    ADD      r0,r0,r1
+    LSR      r7,r7,#16
+    ORR      r7,r7,r10,LSL #16
+    LSR      r10,r10,#16
+    ORR      r10,r10,r11,LSL #16
+    LSR      r11,r11,#16
+    STM      r8!,{r7,r10,r11}
+    BGT      Copy2toAligned
+    B        CopyEnd
+Copy3toAligned:
+    LDM      r0,{r7,r10,r11}
+    SUBS     r9,r9,#1
+    ADD      r0,r0,r1
+    LSR      r7,r7,#24
+    ORR      r7,r7,r10,LSL #8
+    LSR      r10,r10,#24
+    ORR      r10,r10,r11,LSL #8
+    LSR      r11,r11,#24
+    STM      r8!,{r7,r10,r11}
+    BGT      Copy3toAligned
+CopyEnd:
+    MOV      r0,r12
+    MOV      r1,#0xc
+    BX       lr
+    .endfunc
+
+    .global armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+    .func   armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+armVCM4P10_InterpolateLuma_VerAlign4x_unsafe:
+    AND      r7,r0,#3
+    BIC      r0,r0,#3
+    ADD      pc,pc,r7,LSL #2
+    NOP
+    B        Copy0toVAligned
+    B        Copy1toVAligned
+    B        Copy2toVAligned
+    B        Copy3toVAligned
+Copy0toVAligned:
+    LDR      r7,[r0],r1
+    SUBS     r9,r9,#1
+    STR      r7,[r8],#4
+    BGT      Copy0toVAligned
+    B        CopyVEnd
+Copy1toVAligned:
+    LDR      r10,[r0,#4]
+    LDR      r7,[r0],r1
+    SUBS     r9,r9,#1
+    LSL      r10,r10,#24
+    ORR      r7,r10,r7,LSR #8
+    STR      r7,[r8],#4
+    BGT      Copy1toVAligned
+    B        CopyVEnd
+Copy2toVAligned:
+    LDR      r10,[r0,#4]
+    LDR      r7,[r0],r1
+    SUBS     r9,r9,#1
+    LSL      r10,r10,#16
+    ORR      r7,r10,r7,LSR #16
+    STR      r7,[r8],#4
+    BGT      Copy2toVAligned
+    B        CopyVEnd
+Copy3toVAligned:
+    LDR      r10,[r0,#4]
+    LDR      r7,[r0],r1
+    SUBS     r9,r9,#1
+    LSL      r10,r10,#8
+    ORR      r7,r10,r7,LSR #24
+    STR      r7,[r8],#4
+    BGT      Copy3toVAligned
+CopyVEnd:
+    SUB      r0,r8,#0x1c
+    MOV      r1,#4
+    BX       lr
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Copy_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Copy_unsafe_s.S
new file mode 100644
index 0000000..fe92201
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Copy_unsafe_s.S
@@ -0,0 +1,105 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global armVCM4P10_InterpolateLuma_Copy4x4_unsafe
+    .func   armVCM4P10_InterpolateLuma_Copy4x4_unsafe
+armVCM4P10_InterpolateLuma_Copy4x4_unsafe:
+    PUSH     {r4-r6,lr}
+    AND      r12,r0,#3
+    BIC      r0,r0,#3
+    ADD      pc,pc,r12,LSL #2
+    NOP
+    B        Copy4x4Align0
+    B        Copy4x4Align1
+    B        Copy4x4Align2
+    B        Copy4x4Align3
+Copy4x4Align0:
+    LDR      r4,[r0],r1
+    LDR      r5,[r0],r1
+    STR      r4,[r2],r3
+    LDR      r8,[r0],r1
+    STR      r5,[r2],r3
+    LDR      r9,[r0],r1
+    STR      r8,[r2],r3
+    STR      r9,[r2],r3
+    B        Copy4x4End
+Copy4x4Align1:
+    LDR      r5,[r0,#4]
+    LDR      r4,[r0],r1
+    LDR      r9,[r0,#4]
+    LDR      r8,[r0],r1
+    LSR      r4,r4,#8
+    ORR      r4,r4,r5,LSL #24
+    STR      r4,[r2],r3
+    LSR      r8,r8,#8
+    ORR      r8,r8,r9,LSL #24
+    LDR      r5,[r0,#4]
+    LDR      r4,[r0],r1
+    STR      r8,[r2],r3
+    LDR      r9,[r0,#4]
+    LDR      r8,[r0],r1
+    LSR      r4,r4,#8
+    ORR      r4,r4,r5,LSL #24
+    STR      r4,[r2],r3
+    LSR      r8,r8,#8
+    ORR      r8,r8,r9,LSL #24
+    STR      r8,[r2],r3
+    B        Copy4x4End
+Copy4x4Align2:
+    LDR      r5,[r0,#4]
+    LDR      r4,[r0],r1
+    LDR      r9,[r0,#4]
+    LDR      r8,[r0],r1
+    LSR      r4,r4,#16
+    ORR      r4,r4,r5,LSL #16
+    STR      r4,[r2],r3
+    LSR      r8,r8,#16
+    ORR      r8,r8,r9,LSL #16
+    STR      r8,[r2],r3
+    LDR      r5,[r0,#4]
+    LDR      r4,[r0],r1
+    LDR      r9,[r0,#4]
+    LDR      r8,[r0],r1
+    LSR      r4,r4,#16
+    ORR      r4,r4,r5,LSL #16
+    STR      r4,[r2],r3
+    LSR      r8,r8,#16
+    ORR      r8,r8,r9,LSL #16
+    STR      r8,[r2],r3
+    B        Copy4x4End
+Copy4x4Align3:
+    LDR      r5,[r0,#4]
+    LDR      r4,[r0],r1
+    LDR      r9,[r0,#4]
+    LDR      r8,[r0],r1
+    LSR      r4,r4,#24
+    ORR      r4,r4,r5,LSL #8
+    STR      r4,[r2],r3
+    LSR      r8,r8,#24
+    ORR      r8,r8,r9,LSL #8
+    STR      r8,[r2],r3
+    LDR      r5,[r0,#4]
+    LDR      r4,[r0],r1
+    LDR      r9,[r0,#4]
+    LDR      r8,[r0],r1
+    LSR      r4,r4,#24
+    ORR      r4,r4,r5,LSL #8
+    STR      r4,[r2],r3
+    LSR      r8,r8,#24
+    ORR      r8,r8,r9,LSL #8
+    STR      r8,[r2],r3
+Copy4x4End:
+    POP      {r4-r6,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S
new file mode 100644
index 0000000..544abe8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S
@@ -0,0 +1,107 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
+    .func   armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
+armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe:
+    PUSH     {r4-r6,lr}
+    MOV      lr,#4
+    LDR      r6, =0xfe00fe0
+    LDR      r12, =0xff00ff
+LoopStart1:
+    LDR      r11,[r0,#0xc]
+    LDR      r10,[r0,#8]
+    LDR      r5,[r0,#4]
+    LDR      r4,[r0],r1
+    UQSUB16  r11,r11,r6
+    UQSUB16  r10,r10,r6
+    UQSUB16  r5,r5,r6
+    UQSUB16  r4,r4,r6
+    USAT16   r11,#13,r11
+    USAT16   r10,#13,r10
+    USAT16   r5,#13,r5
+    USAT16   r4,#13,r4
+    AND      r11,r12,r11,LSR #5
+    AND      r10,r12,r10,LSR #5
+    AND      r5,r12,r5,LSR #5
+    AND      r4,r12,r4,LSR #5
+    ORR      r11,r10,r11,LSL #8
+    ORR      r10,r4,r5,LSL #8
+    SUBS     lr,lr,#1
+    STRD     r10,r11,[r7],#8
+    BGT      LoopStart1
+    SUB      r0,r7,#0x20
+    MOV      r1,#8
+    POP      {r4-r6,pc}
+    .endfunc
+
+    .global armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+    .func   armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe:
+    PUSH     {r4-r6,lr}
+    LDR      r6, =0xfe00fe0
+    LDR      r12, =0xff00ff
+    MOV      lr,#2
+LoopStart:
+    LDR      r11,[r0,#0xc]
+    LDR      r10,[r0,#8]
+    LDR      r5,[r0,#4]
+    LDR      r4,[r0],r1
+    UQSUB16  r11,r11,r6
+    UQSUB16  r10,r10,r6
+    UQSUB16  r5,r5,r6
+    UQSUB16  r4,r4,r6
+    USAT16   r11,#13,r11
+    USAT16   r10,#13,r10
+    USAT16   r5,#13,r5
+    USAT16   r4,#13,r4
+    AND      r11,r12,r11,LSR #5
+    AND      r10,r12,r10,LSR #5
+    AND      r5,r12,r5,LSR #5
+    AND      r4,r12,r4,LSR #5
+    ORR      r11,r10,r11,LSL #8
+    ORR      r10,r4,r5,LSL #8
+    PKHBT    r4,r10,r11,LSL #16
+    STR      r4,[r7],#8
+    PKHTB    r5,r11,r10,ASR #16
+    STR      r5,[r7],#-4
+    LDR      r11,[r0,#0xc]
+    LDR      r10,[r0,#8]
+    LDR      r5,[r0,#4]
+    LDR      r4,[r0],r1
+    UQSUB16  r11,r11,r6
+    UQSUB16  r10,r10,r6
+    UQSUB16  r5,r5,r6
+    UQSUB16  r4,r4,r6
+    USAT16   r11,#13,r11
+    USAT16   r10,#13,r10
+    USAT16   r5,#13,r5
+    USAT16   r4,#13,r4
+    AND      r11,r12,r11,LSR #5
+    AND      r10,r12,r10,LSR #5
+    AND      r5,r12,r5,LSR #5
+    AND      r4,r12,r4,LSR #5
+    ORR      r11,r10,r11,LSL #8
+    ORR      r10,r4,r5,LSL #8
+    PKHBT    r4,r10,r11,LSL #16
+    SUBS     lr,lr,#1
+    STR      r4,[r7],#8
+    PKHTB    r5,r11,r10,ASR #16
+    STR      r5,[r7],#4
+    BGT      LoopStart
+    SUB      r0,r7,#0x18
+    MOV      r1,#4
+    POP      {r4-r6,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.S
new file mode 100644
index 0000000..a330972
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.S
@@ -0,0 +1,164 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+    .func   armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe:
+    PUSH     {r4-r12,lr}
+    VLD1.8   {d0,d1},[r0],r1
+    VMOV.I16 d31,#0x14
+    VMOV.I16 d30,#0x5
+    VEXT.8   d4,d0,d1,#1
+    VEXT.8   d2,d0,d1,#2
+    VEXT.8   d3,d0,d1,#3
+    VEXT.8   d5,d0,d1,#4
+    VEXT.8   d1,d0,d1,#5
+    VADDL.U8 q1,d2,d3
+    VADDL.U8 q2,d4,d5
+    VADDL.U8 q5,d0,d1
+    VLD1.8   {d0,d1},[r0],r1
+    VMLA.I16 d10,d2,d31
+    VMUL.I16 d8,d4,d30
+    VEXT.8   d4,d0,d1,#1
+    VEXT.8   d2,d0,d1,#2
+    VEXT.8   d3,d0,d1,#3
+    VEXT.8   d5,d0,d1,#4
+    VEXT.8   d1,d0,d1,#5
+    VADDL.U8 q1,d2,d3
+    VADDL.U8 q2,d4,d5
+    VADDL.U8 q6,d0,d1
+    VLD1.8   {d0,d1},[r0],r1
+    VSUB.I16 d10,d10,d8
+    VMLA.I16 d12,d2,d31
+    VMUL.I16 d8,d4,d30
+    VEXT.8   d4,d0,d1,#1
+    VEXT.8   d2,d0,d1,#2
+    VEXT.8   d3,d0,d1,#3
+    VEXT.8   d5,d0,d1,#4
+    VEXT.8   d1,d0,d1,#5
+    VADDL.U8 q1,d2,d3
+    VADDL.U8 q2,d4,d5
+    VADDL.U8 q7,d0,d1
+    VLD1.8   {d0,d1},[r0],r1
+    VSUB.I16 d12,d12,d8
+    VMLA.I16 d14,d2,d31
+    VMUL.I16 d8,d4,d30
+    VEXT.8   d4,d0,d1,#1
+    VEXT.8   d2,d0,d1,#2
+    VEXT.8   d3,d0,d1,#3
+    VEXT.8   d5,d0,d1,#4
+    VEXT.8   d1,d0,d1,#5
+    VADDL.U8 q1,d2,d3
+    VADDL.U8 q2,d4,d5
+    VADDL.U8 q8,d0,d1
+    VLD1.8   {d0,d1},[r0],r1
+    VSUB.I16 d14,d14,d8
+    VMLA.I16 d16,d2,d31
+    VMUL.I16 d8,d4,d30
+    VEXT.8   d4,d0,d1,#1
+    VEXT.8   d2,d0,d1,#2
+    VEXT.8   d3,d0,d1,#3
+    VEXT.8   d5,d0,d1,#4
+    VEXT.8   d1,d0,d1,#5
+    VADDL.U8 q1,d2,d3
+    VADDL.U8 q2,d4,d5
+    VADDL.U8 q9,d0,d1
+    VLD1.8   {d0,d1},[r0],r1
+    VSUB.I16 d16,d16,d8
+    VMLA.I16 d18,d2,d31
+    VMUL.I16 d8,d4,d30
+    VEXT.8   d4,d0,d1,#1
+    VEXT.8   d2,d0,d1,#2
+    VEXT.8   d3,d0,d1,#3
+    VEXT.8   d5,d0,d1,#4
+    VEXT.8   d1,d0,d1,#5
+    VADDL.U8 q1,d2,d3
+    VADDL.U8 q2,d4,d5
+    VADDL.U8 q10,d0,d1
+    VLD1.8   {d0,d1},[r0],r1
+    VSUB.I16 d18,d18,d8
+    VMLA.I16 d20,d2,d31
+    VMUL.I16 d8,d4,d30
+    VEXT.8   d4,d0,d1,#1
+    VEXT.8   d2,d0,d1,#2
+    VEXT.8   d3,d0,d1,#3
+    VEXT.8   d5,d0,d1,#4
+    VEXT.8   d1,d0,d1,#5
+    VADDL.U8 q1,d2,d3
+    VADDL.U8 q2,d4,d5
+    VADDL.U8 q11,d0,d1
+    VLD1.8   {d0,d1},[r0],r1
+    VSUB.I16 d20,d20,d8
+    VMLA.I16 d22,d2,d31
+    VMUL.I16 d8,d4,d30
+    VEXT.8   d4,d0,d1,#1
+    VEXT.8   d2,d0,d1,#2
+    VEXT.8   d3,d0,d1,#3
+    VEXT.8   d5,d0,d1,#4
+    VEXT.8   d1,d0,d1,#5
+    VADDL.U8 q1,d2,d3
+    VADDL.U8 q2,d4,d5
+    VADDL.U8 q12,d0,d1
+    VLD1.8   {d0,d1},[r0],r1
+    VSUB.I16 d22,d22,d8
+    VMLA.I16 d24,d2,d31
+    VMUL.I16 d8,d4,d30
+    VEXT.8   d4,d0,d1,#1
+    VEXT.8   d2,d0,d1,#2
+    VEXT.8   d3,d0,d1,#3
+    VEXT.8   d5,d0,d1,#4
+    VEXT.8   d1,d0,d1,#5
+    VADDL.U8 q1,d2,d3
+    VADDL.U8 q2,d4,d5
+    VADDL.U8 q13,d0,d1
+    VSUB.I16 d24,d24,d8
+    VMLA.I16 d26,d2,d31
+    VMUL.I16 d8,d4,d30
+    VMOV.I32 q15,#0x14
+    VMOV.I32 q14,#0x5
+    VADDL.S16 q5,d10,d20
+    VADDL.S16 q1,d14,d16
+    VADDL.S16 q0,d12,d18
+    VSUB.I16 d26,d26,d8
+    VMLA.I32 q5,q1,q15
+    VMUL.I32 q4,q0,q14
+    VADDL.S16 q6,d12,d22
+    VADDL.S16 q1,d16,d18
+    VADDL.S16 q0,d14,d20
+    VMLA.I32 q6,q1,q15
+    VSUB.I32 q5,q5,q4
+    VMUL.I32 q4,q0,q14
+    VADDL.S16 q2,d14,d24
+    VADDL.S16 q1,d18,d20
+    VADDL.S16 q0,d16,d22
+    VMLA.I32 q2,q1,q15
+    VSUB.I32 q6,q6,q4
+    VMUL.I32 q4,q0,q14
+    VADDL.S16 q3,d16,d26
+    VADDL.S16 q1,d20,d22
+    VADDL.S16 q0,d18,d24
+    VMLA.I32 q3,q1,q15
+    VSUB.I32 q2,q2,q4
+    VMLS.I32 q3,q0,q14
+    VQRSHRUN.S32 d0,q5,#10
+    VQRSHRUN.S32 d2,q6,#10
+    VQRSHRUN.S32 d4,q2,#10
+    VQRSHRUN.S32 d6,q3,#10
+    VQMOVN.U16 d0,q0
+    VQMOVN.U16 d2,q1
+    VQMOVN.U16 d4,q2
+    VQMOVN.U16 d6,q3
+    POP      {r4-r12,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.S
new file mode 100644
index 0000000..991c33f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.S
@@ -0,0 +1,119 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+    .func   armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe:
+    PUSH     {r4-r12,lr}
+    VLD1.8   {d0,d1},[r0],r1
+    ADD      r12,r0,r1,LSL #2
+    VMOV.I8  d30,#0x5
+    VMOV.I8  d31,#0x14
+    VLD1.8   {d10,d11},[r12],r1
+    VLD1.8   {d2,d3},[r0],r1
+    VLD1.8   {d12,d13},[r12],r1
+    VADDL.U8 q9,d0,d10
+    VLD1.8   {d4,d5},[r0],r1
+    VADDL.U8 q0,d1,d11
+    VLD1.8   {d6,d7},[r0],r1
+    VADDL.U8 q10,d2,d12
+    VLD1.8   {d8,d9},[r0],r1
+    VMLAL.U8 q9,d4,d31
+    VLD1.8   {d14,d15},[r12],r1
+    VMLAL.U8 q0,d5,d31
+    VLD1.8   {d16,d17},[r12],r1
+    VMLAL.U8 q9,d6,d31
+    VMLAL.U8 q10,d6,d31
+    VMLSL.U8 q0,d3,d30
+    VADDL.U8 q11,d4,d14
+    VMLSL.U8 q9,d2,d30
+    VADDL.U8 q1,d3,d13
+    VMLAL.U8 q0,d7,d31
+    VMLAL.U8 q10,d8,d31
+    VMLSL.U8 q9,d8,d30
+    VMLAL.U8 q1,d7,d31
+    VMLSL.U8 q0,d9,d30
+    VMLAL.U8 q11,d8,d31
+    VMLSL.U8 q10,d4,d30
+    VMLSL.U8 q1,d5,d30
+    VADDL.U8 q2,d5,d15
+    VMLAL.U8 q11,d10,d31
+    VMLSL.U8 q10,d10,d30
+    VMLAL.U8 q1,d9,d31
+    VMLAL.U8 q2,d9,d31
+    VADDL.U8 q12,d6,d16
+    VMLSL.U8 q11,d6,d30
+    VMLSL.U8 q1,d11,d30
+    VMLSL.U8 q2,d7,d30
+    VADDL.U8 q3,d7,d17
+    VMLAL.U8 q12,d10,d31
+    VMLSL.U8 q11,d12,d30
+    VMLSL.U8 q2,d13,d30
+    VMLAL.U8 q3,d11,d31
+    VMLAL.U8 q12,d12,d31
+    VEXT.8   d26,d18,d19,#2
+    VMLAL.U8 q2,d11,d31
+    VMLAL.U8 q3,d13,d31
+    VMLSL.U8 q12,d8,d30
+    VEXT.8   d27,d18,d19,#4
+    VMOV.I16 d31,#0x14
+    VMLSL.U8 q3,d9,d30
+    VMLSL.U8 q12,d14,d30
+    VEXT.8   d29,d19,d0,#2
+    VEXT.8   d28,d18,d19,#6
+    VMLSL.U8 q3,d15,d30
+    VADDL.S16 q0,d18,d29
+    VADD.I16 d27,d27,d28
+    VMOV.I16 d30,#0x5
+    VADD.I16 d26,d26,d19
+    VMLAL.S16 q0,d27,d31
+    VEXT.8   d27,d20,d21,#4
+    VEXT.8   d28,d20,d21,#6
+    VEXT.8   d29,d21,d2,#2
+    VMLSL.S16 q0,d26,d30
+    VEXT.8   d26,d20,d21,#2
+    VADDL.S16 q1,d20,d29
+    VADD.I16 d27,d27,d28
+    VADD.I16 d26,d26,d21
+    VEXT.8   d28,d22,d23,#6
+    VMLAL.S16 q1,d27,d31
+    VEXT.8   d29,d23,d4,#2
+    VEXT.8   d27,d22,d23,#4
+    VEXT.8   d8,d22,d23,#2
+    VADDL.S16 q2,d22,d29
+    VMLSL.S16 q1,d26,d30
+    VADD.I16 d27,d27,d28
+    VADD.I16 d26,d8,d23
+    VEXT.8   d28,d24,d25,#6
+    VMLAL.S16 q2,d27,d31
+    VEXT.8   d27,d24,d25,#4
+    VEXT.8   d29,d25,d6,#2
+    VADD.I16 d27,d27,d28
+    VEXT.8   d8,d24,d25,#2
+    VADDL.S16 q3,d24,d29
+    VMLSL.S16 q2,d26,d30
+    VMLAL.S16 q3,d27,d31
+    VADD.I16 d8,d8,d25
+    VMLSL.S16 q3,d8,d30
+    VQRSHRUN.S32 d0,q0,#10
+    VQRSHRUN.S32 d2,q1,#10
+    VQRSHRUN.S32 d4,q2,#10
+    VQRSHRUN.S32 d6,q3,#10
+    VQMOVN.U16 d0,q0
+    VQMOVN.U16 d2,q1
+    VQMOVN.U16 d4,q2
+    VQMOVN.U16 d6,q3
+    POP      {r4-r12,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.S
new file mode 100644
index 0000000..40e141b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.S
@@ -0,0 +1,72 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+    .func   armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe:
+    PUSH     {r4-r12,lr}
+    VLD1.8   {d22,d23},[r0],r1
+    VEXT.8   d10,d22,d23,#5
+    VEXT.8   d12,d22,d23,#1
+    VEXT.8   d14,d22,d23,#2
+    VEXT.8   d15,d22,d23,#3
+    VEXT.8   d13,d22,d23,#4
+    VADDL.U8 q11,d22,d10
+    VADDL.U8 q4,d14,d15
+    VADDL.U8 q6,d12,d13
+    VLD1.8   {d24,d25},[r0],r1
+    VMLA.I16 d22,d8,d31
+    VMUL.I16 d8,d12,d30
+    VEXT.8   d10,d24,d25,#5
+    VEXT.8   d12,d24,d25,#1
+    VEXT.8   d16,d24,d25,#2
+    VEXT.8   d17,d24,d25,#3
+    VEXT.8   d13,d24,d25,#4
+    VADDL.U8 q12,d24,d10
+    VSUB.I16 d22,d22,d8
+    VADDL.U8 q4,d16,d17
+    VADDL.U8 q6,d12,d13
+    VLD1.8   {d26,d27},[r0],r1
+    VMLA.I16 d24,d8,d31
+    VMUL.I16 d8,d12,d30
+    VEXT.8   d10,d26,d27,#5
+    VEXT.8   d12,d26,d27,#1
+    VEXT.8   d18,d26,d27,#2
+    VEXT.8   d19,d26,d27,#3
+    VEXT.8   d13,d26,d27,#4
+    VADDL.U8 q13,d26,d10
+    VSUB.I16 d24,d24,d8
+    VADDL.U8 q4,d18,d19
+    VADDL.U8 q6,d12,d13
+    VLD1.8   {d28,d29},[r0],r1
+    VMLA.I16 d26,d8,d31
+    VMUL.I16 d8,d12,d30
+    VEXT.8   d10,d28,d29,#5
+    VEXT.8   d12,d28,d29,#1
+    VEXT.8   d20,d28,d29,#2
+    VEXT.8   d21,d28,d29,#3
+    VEXT.8   d13,d28,d29,#4
+    VADDL.U8 q14,d28,d10
+    VSUB.I16 d26,d26,d8
+    VADDL.U8 q4,d20,d21
+    VADDL.U8 q6,d12,d13
+    VMLA.I16 d28,d8,d31
+    VMLS.I16 d28,d12,d30
+    VQRSHRUN.S16 d22,q11,#5
+    VQRSHRUN.S16 d24,q12,#5
+    VQRSHRUN.S16 d26,q13,#5
+    VQRSHRUN.S16 d28,q14,#5
+    POP      {r4-r12,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S
new file mode 100644
index 0000000..955846f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S
@@ -0,0 +1,58 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+    .func   armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe:
+    PUSH     {r4-r12,lr}
+    VLD1.8   {d7},[r0],r1
+    ADD      r12,r0,r1,LSL #2
+    VLD1.8   {d8},[r0],r1
+    VLD1.8   {d12},[r12],r1
+    VLD1.8   {d9},[r0],r1
+    VADDL.U8 q0,d7,d12
+    VLD1.8   {d10},[r0],r1
+    VLD1.8   {d13},[r12],r1
+    VLD1.8   {d11},[r0],r1
+    VLD1.8   {d14},[r12],r1
+    VADDL.U8 q8,d8,d11
+    VADDL.U8 q9,d9,d10
+    VLD1.8   {d15},[r12],r1
+    VMLS.I16 d0,d16,d30
+    VMUL.I16 d20,d18,d31
+    VADDL.U8 q8,d9,d12
+    VADDL.U8 q9,d10,d11
+    VADDL.U8 q1,d8,d13
+    VMLS.I16 d2,d16,d30
+    VMUL.I16 d21,d18,d31
+    VADDL.U8 q8,d10,d13
+    VADDL.U8 q9,d11,d12
+    VADDL.U8 q2,d9,d14
+    VMLS.I16 d4,d16,d30
+    VMUL.I16 d22,d18,d31
+    VADDL.U8 q8,d11,d14
+    VADDL.U8 q3,d10,d15
+    VADDL.U8 q9,d12,d13
+    VMLS.I16 d6,d16,d30
+    VADD.I16 d0,d0,d20
+    VADD.I16 d2,d2,d21
+    VADD.I16 d4,d4,d22
+    VMLA.I16 d6,d18,d31
+    VQRSHRUN.S16 d0,q0,#5
+    VQRSHRUN.S16 d2,q1,#5
+    VQRSHRUN.S16 d4,q2,#5
+    VQRSHRUN.S16 d6,q3,#5
+    POP      {r4-r12,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S
new file mode 100644
index 0000000..66520da
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S
@@ -0,0 +1,175 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+
+    .section .rodata
+    .align 4
+
+armVCM4P10_WidthBranchTableMVIsNotZero:
+    .word   WidthIs2MVIsNotZero, WidthIs2MVIsNotZero
+    .word   WidthIs4MVIsNotZero, WidthIs4MVIsNotZero
+    .word   WidthIs8MVIsNotZero
+
+armVCM4P10_WidthBranchTableMVIsZero:
+    .word   WidthIs2MVIsZero, WidthIs2MVIsZero
+    .word   WidthIs4MVIsZero, WidthIs4MVIsZero
+    .word   WidthIs8MVIsZero
+
+    .text
+
+    .global armVCM4P10_Interpolate_Chroma
+    .func   armVCM4P10_Interpolate_Chroma
+armVCM4P10_Interpolate_Chroma:
+    PUSH     {r4-r12,lr}
+    VPUSH    {d8-d15}
+    LDRD     r6,r7,[sp,#0x70]
+    LDRD     r4,r5,[sp,#0x68]
+    RSB      r8,r6,#8
+    RSB      r9,r7,#8
+    CMN      r6,r7
+    MOV      r10,#1
+    LDREQ    r11, =armVCM4P10_WidthBranchTableMVIsZero
+    SUB      lr,r1,r10
+    LDRNE    r11, =armVCM4P10_WidthBranchTableMVIsNotZero
+    VLD1.8   {d0},[r0],r10
+    SMULBB   r12,r8,r9
+    SMULBB   r9,r6,r9
+    VLD1.8   {d1},[r0],lr
+    SMULBB   r8,r8,r7
+    SMULBB   r6,r6,r7
+    VDUP.8   d12,r12
+    VDUP.8   d13,r9
+    VDUP.8   d14,r8
+    VDUP.8   d15,r6
+    LDR      pc,[r11,r4,LSL #1]
+
+WidthIs8MVIsNotZero:
+    VLD1.8   {d2},[r0],r10
+    VMULL.U8 q2,d0,d12
+    VLD1.8   {d3},[r0],lr
+    VMULL.U8 q3,d2,d12
+    VLD1.8   {d16},[r0],r10
+    VMLAL.U8 q2,d1,d13
+    VLD1.8   {d17},[r0],lr
+    VMULL.U8 q11,d16,d12
+    VMLAL.U8 q3,d3,d13
+    VLD1.8   {d18},[r0],r10
+    VMLAL.U8 q2,d2,d14
+    VMLAL.U8 q11,d17,d13
+    VMULL.U8 q12,d18,d12
+    VLD1.8   {d19},[r0],lr
+    VMLAL.U8 q3,d16,d14
+    VLD1.8   {d0},[r0],r10
+    VMLAL.U8 q12,d19,d13
+    VMLAL.U8 q11,d18,d14
+    VMLAL.U8 q2,d3,d15
+    VLD1.8   {d1},[r0],lr
+    VMLAL.U8 q12,d0,d14
+    VMLAL.U8 q3,d17,d15
+    VMLAL.U8 q11,d19,d15
+    SUBS     r5,r5,#4
+    VMLAL.U8 q12,d1,d15
+    VQRSHRN.U16 d8,q2,#6
+    VQRSHRN.U16 d9,q3,#6
+    VQRSHRN.U16 d20,q11,#6
+    VST1.64  {d8},[r2],r3
+    VQRSHRN.U16 d21,q12,#6
+    VST1.64  {d9},[r2],r3
+    VST1.64  {d20},[r2],r3
+    VST1.64  {d21},[r2],r3
+    BGT      WidthIs8MVIsNotZero
+    MOV      r0,#0
+    VPOP     {d8-d15}
+    POP      {r4-r12,pc}
+
+WidthIs4MVIsNotZero:
+    VLD1.8   {d2},[r0],r10
+    VMULL.U8 q2,d0,d12
+    VMULL.U8 q3,d2,d12
+    VLD1.8   {d3},[r0],lr
+    VMLAL.U8 q2,d1,d13
+    VMLAL.U8 q3,d3,d13
+    VLD1.8   {d0},[r0],r10
+    VMLAL.U8 q2,d2,d14
+    VMLAL.U8 q3,d0,d14
+    VLD1.8   {d1},[r0],lr
+    SUBS     r5,r5,#2
+    VMLAL.U8 q3,d1,d15
+    VMLAL.U8 q2,d3,d15
+    VQRSHRN.U16 d9,q3,#6
+    VQRSHRN.U16 d8,q2,#6
+    VST1.32  {d8[0]},[r2],r3
+    VST1.32  {d9[0]},[r2],r3
+    BGT      WidthIs4MVIsNotZero
+    MOV      r0,#0
+    VPOP     {d8-d15}
+    POP      {r4-r12,pc}
+
+WidthIs2MVIsNotZero:
+    VLD1.8   {d2},[r0],r10
+    VMULL.U8 q2,d0,d12
+    VMULL.U8 q3,d2,d12
+    VLD1.8   {d3},[r0],lr
+    VMLAL.U8 q2,d1,d13
+    VMLAL.U8 q3,d3,d13
+    VLD1.8   {d0},[r0],r10
+    VMLAL.U8 q2,d2,d14
+    VMLAL.U8 q3,d0,d14
+    VLD1.8   {d1},[r0],lr
+    SUBS     r5,r5,#2
+    VMLAL.U8 q3,d1,d15
+    VMLAL.U8 q2,d3,d15
+    VQRSHRN.U16 d9,q3,#6
+    VQRSHRN.U16 d8,q2,#6
+    VST1.16  {d8[0]},[r2],r3
+    VST1.16  {d9[0]},[r2],r3
+    BGT      WidthIs2MVIsNotZero
+    MOV      r0,#0
+    VPOP     {d8-d15}
+    POP      {r4-r12,pc}
+
+WidthIs8MVIsZero:
+    SUB      r0,r0,r1
+WidthIs8LoopMVIsZero:
+    VLD1.8   {d0},[r0],r1
+    SUBS     r5,r5,#2
+    VLD1.8   {d1},[r0],r1
+    VST1.64  {d0},[r2],r3
+    VST1.64  {d1},[r2],r3
+    BGT      WidthIs8LoopMVIsZero
+    MOV      r0,#0
+    VPOP     {d8-d15}
+    POP      {r4-r12,pc}
+
+WidthIs4MVIsZero:
+    VLD1.8   {d1},[r0],r1
+    SUBS     r5,r5,#2
+    VST1.32  {d0[0]},[r2],r3
+    VLD1.8   {d0},[r0],r1
+    VST1.32  {d1[0]},[r2],r3
+    BGT      WidthIs4MVIsZero
+    MOV      r0,#0
+    VPOP     {d8-d15}
+    POP      {r4-r12,pc}
+
+WidthIs2MVIsZero:
+    VLD1.8   {d1},[r0],r1
+    SUBS     r5,r5,#2
+    VST1.16  {d0[0]},[r2],r3
+    VLD1.8   {d0},[r0],r1
+    VST1.16  {d1[0]},[r2],r3
+    BGT      WidthIs2MVIsZero
+    MOV      r0,#0
+    VPOP     {d8-d15}
+    POP      {r4-r12,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_QuantTables_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_QuantTables_s.S
new file mode 100644
index 0000000..f5d6d1f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_QuantTables_s.S
@@ -0,0 +1,68 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .section .rodata
+    .align 4
+
+    .global armVCM4P10_MFMatrixQPModTable
+    .global armVCM4P10_QPDivIntraTable
+    .global armVCM4P10_QPDivPlusOneTable
+
+;//------------------------------------------------------------------
+;// This table contains (1 << QbitsPlusOne) / 3 Values (Intra case) ,
+;// for values of iQP from 0 to 51 (inclusive).
+;//------------------------------------------------------------------
+
+
+armVCM4P10_QPDivIntraTable:
+    .word 21845, 21845, 21845, 21845, 21845, 21845
+    .word 43690, 43690, 43690, 43690, 43690, 43690
+    .word 87381, 87381, 87381, 87381, 87381, 87381
+    .word 174762, 174762, 174762, 174762, 174762, 174762
+    .word 349525, 349525, 349525, 349525, 349525, 349525
+    .word 699050, 699050, 699050, 699050, 699050, 699050
+    .word 1398101, 1398101, 1398101, 1398101, 1398101, 1398101
+    .word 2796202, 2796202, 2796202, 2796202, 2796202, 2796202
+
+
+;//--------------------------------------------------------------
+;// This table contains armVCM4P10_MFMatrix [iQP % 6][0] entires,
+;// for values of iQP from 0 to 51 (inclusive).
+;//--------------------------------------------------------------
+
+armVCM4P10_MFMatrixQPModTable:
+    .hword 13107, 11916, 10082, 9362, 8192, 7282
+    .hword 13107, 11916, 10082, 9362, 8192, 7282
+    .hword 13107, 11916, 10082, 9362, 8192, 7282
+    .hword 13107, 11916, 10082, 9362, 8192, 7282
+    .hword 13107, 11916, 10082, 9362, 8192, 7282
+    .hword 13107, 11916, 10082, 9362, 8192, 7282
+    .hword 13107, 11916, 10082, 9362, 8192, 7282
+    .hword 13107, 11916, 10082, 9362, 8192, 7282
+    .hword 13107, 11916, 10082, 9362, 8192, 7282
+
+;//---------------------------------------------------------------
+;// This table contains ARM_M4P10_Q_OFFSET + 1 + (iQP / 6) values,
+;// for values of iQP from 0 to 51 (inclusive).
+;//---------------------------------------------------------------
+
+armVCM4P10_QPDivPlusOneTable:
+    .byte 16, 16, 16, 16, 16, 16
+    .byte 17, 17, 17, 17, 17, 17
+    .byte 18, 18, 18, 18, 18, 18
+    .byte 19, 19, 19, 19, 19, 19
+    .byte 20, 20, 20, 20, 20, 20
+    .byte 21, 21, 21, 21, 21, 21
+    .byte 22, 22, 22, 22, 22, 22
+    .byte 23, 23, 23, 23, 23, 23
+    .byte 24, 24, 24, 24, 24, 24
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_TransformResidual4x4_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_TransformResidual4x4_s.S
new file mode 100644
index 0000000..c24d717
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_TransformResidual4x4_s.S
@@ -0,0 +1,52 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global armVCM4P10_TransformResidual4x4
+    .func   armVCM4P10_TransformResidual4x4
+armVCM4P10_TransformResidual4x4:
+    VPUSH    {d8}
+    VLD4.16  {d0,d1,d2,d3},[r1]
+    VMOV.I16 d4,#0
+    VADD.I16 d5,d0,d2
+    VSUB.I16 d6,d0,d2
+    VHADD.S16 d7,d1,d4
+    VHADD.S16 d8,d3,d4
+    VSUB.I16 d7,d7,d3
+    VADD.I16 d8,d1,d8
+    VADD.I16 d0,d5,d8
+    VADD.I16 d1,d6,d7
+    VSUB.I16 d2,d6,d7
+    VSUB.I16 d3,d5,d8
+    VTRN.16  d0,d1
+    VTRN.16  d2,d3
+    VTRN.32  q0,q1
+    VADD.I16 d5,d0,d2
+    VSUB.I16 d6,d0,d2
+    VHADD.S16 d7,d1,d4
+    VHADD.S16 d8,d3,d4
+    VSUB.I16 d7,d7,d3
+    VADD.I16 d8,d1,d8
+    VADD.I16 d0,d5,d8
+    VADD.I16 d1,d6,d7
+    VSUB.I16 d2,d6,d7
+    VSUB.I16 d3,d5,d8
+    VRSHR.S16 d0,d0,#6
+    VRSHR.S16 d1,d1,#6
+    VRSHR.S16 d2,d2,#6
+    VRSHR.S16 d3,d3,#6
+    VST1.16  {d0,d1,d2,d3},[r0]
+    VPOP     {d8}
+    BX       lr
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_UnpackBlock4x4_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_UnpackBlock4x4_s.S
new file mode 100644
index 0000000..c552f8d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_UnpackBlock4x4_s.S
@@ -0,0 +1,40 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global armVCM4P10_UnpackBlock4x4
+    .func   armVCM4P10_UnpackBlock4x4
+armVCM4P10_UnpackBlock4x4:
+    PUSH     {r4-r8,lr}
+    LDR      r2,[r0,#0]
+    MOV      r7,#0x1f
+    MOV      r4,#0
+    MOV      r5,#0
+    LDRB     r3,[r2],#1
+    STRD     r4,r5,[r1,#0]
+    STRD     r4,r5,[r1,#8]
+    STRD     r4,r5,[r1,#0x10]
+    STRD     r4,r5,[r1,#0x18]
+unpackLoop:
+    TST      r3,#0x10
+    LDRNESB  r5,[r2,#1]
+    LDRNEB   r4,[r2],#2
+    AND      r6,r7,r3,LSL #1
+    LDREQSB  r4,[r2],#1
+    ORRNE    r4,r4,r5,LSL #8
+    TST      r3,#0x20
+    LDREQB   r3,[r2],#1
+    STRH     r4,[r1,r6]
+    BEQ      unpackLoop
+    STR      r2,[r0,#0]
+    POP      {r4-r8,pc}
+    .endfunc
+    .end
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DeblockLuma_I.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DeblockLuma_I.S
new file mode 100644
index 0000000..ba61059
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DeblockLuma_I.S
@@ -0,0 +1,67 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global omxVCM4P10_DeblockLuma_I
+    .func   omxVCM4P10_DeblockLuma_I
+omxVCM4P10_DeblockLuma_I:
+    PUSH     {r4-r9,lr}
+    MOVS     r6,r0
+    SUB      sp,sp,#0xc
+    MOV      r9,r1
+    MOV      r7,r2
+    MOV      r8,r3
+    LDR      r4,[sp,#0x28]
+    LDR      r5,[sp,#0x2c]
+    BEQ      L0x58
+    TST      r6,#7
+    TSTEQ    r9,#7
+    BNE      L0x58
+    CMP      r7,#0
+    CMPNE    r8,#0
+    CMPNE    r4,#0
+    BEQ      L0x58
+    TST      r4,#3
+    BNE      L0x58
+    CMP      r5,#0
+    BEQ      L0x58
+    TST      r5,#3
+    BEQ      L0x64
+L0x58:
+    MVN      r0,#4
+L0x5c:
+    ADD      sp,sp,#0xc
+    POP      {r4-r9,pc}
+L0x64:
+    STR      r4,[sp,#0]
+    MOV      r3,r8
+    STR      r5,[sp,#4]
+    MOV      r2,r7
+    MOV      r1,r9
+    MOV      r0,r6
+    BL       omxVCM4P10_FilterDeblockingLuma_VerEdge_I
+    CMP      r0,#0
+    BNE      L0x5c
+    ADD      r3,r5,#0x10
+    ADD      r2,r4,#0x10
+    STR      r3,[sp,#4]
+    STR      r2,[sp,#0]
+    ADD      r3,r8,#2
+    ADD      r2,r7,#2
+    MOV      r1,r9
+    MOV      r0,r6
+    BL       omxVCM4P10_FilterDeblockingLuma_HorEdge_I
+    ADD      sp,sp,#0xc
+    POP      {r4-r9,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S
new file mode 100644
index 0000000..be21ee7
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S
@@ -0,0 +1,119 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global omxVCM4P10_DequantTransformResidualFromPairAndAdd
+    .func   omxVCM4P10_DequantTransformResidualFromPairAndAdd
+omxVCM4P10_DequantTransformResidualFromPairAndAdd:
+    PUSH     {r4-r12,lr}
+    VPUSH    {d8-d9}
+    SUB      sp,sp,#0x20
+    ADD      r4,sp,#0
+    LDR      r5,[sp,#0x64]
+    MOV      r7,r1
+    MOV      r8,r2
+    MOV      r9,r3
+    CMP      r5,#0
+    BEQ      L0x114
+    MOV      r1,r4
+    BL       armVCM4P10_UnpackBlock4x4  ;//
+    LDR      r1,[sp,#0x60]
+    LDR      r11, =armVCM4P10_QPModuloTable
+    LDR      r10, =armVCM4P10_QPDivTable
+    LDR      r2,  =armVCM4P10_VMatrixU16
+    LDRSB    r12,[r11,r1]
+    LDRSB    lr,[r10,r1]
+    LDR      r10, =0x3020504
+    LDR      r1, =0x5040100
+    ADD      r2,r2,r12
+    VDUP.32  d7,r1
+    VDUP.32  d9,r10
+    VDUP.16  d5,lr
+    VLD1.8   {d6},[r2]
+    VTBL.8   d8,{d6},d7
+    VTBL.8   d4,{d6},d9
+    CMP      r8,#0
+    VLD1.16  {d0,d1,d2,d3},[r4]
+    VSHL.U16 d8,d8,d5
+    VSHL.U16 d4,d4,d5
+    BEQ      L1
+    LDRSH    r10,[r8,#0]
+L1:
+    VMUL.I16 d0,d0,d8
+    VMUL.I16 d1,d1,d4
+    VMUL.I16 d2,d2,d8
+    VMUL.I16 d3,d3,d4
+    VMOVNE.16 d0[0],r10
+    VTRN.16  d0,d1
+    VTRN.16  d2,d3
+    VTRN.32  q0,q1
+    VMOV.I16 d4,#0
+    VADD.I16 d5,d0,d2
+    VSUB.I16 d6,d0,d2
+    VHADD.S16 d7,d1,d4
+    VHADD.S16 d8,d3,d4
+    VSUB.I16 d7,d7,d3
+    VADD.I16 d8,d1,d8
+    VADD.I16 d0,d5,d8
+    VADD.I16 d1,d6,d7
+    VSUB.I16 d2,d6,d7
+    VSUB.I16 d3,d5,d8
+    VTRN.16  d0,d1
+    VTRN.16  d2,d3
+    VTRN.32  q0,q1
+    VADD.I16 d5,d0,d2
+    VSUB.I16 d6,d0,d2
+    VHADD.S16 d7,d1,d4
+    VHADD.S16 d8,d3,d4
+    VSUB.I16 d7,d7,d3
+    VADD.I16 d8,d1,d8
+    VADD.I16 d0,d5,d8
+    VADD.I16 d1,d6,d7
+    VSUB.I16 d2,d6,d7
+    VSUB.I16 d3,d5,d8
+    VRSHR.S16 d0,d0,#6
+    VRSHR.S16 d1,d1,#6
+    VRSHR.S16 d2,d2,#6
+    VRSHR.S16 d3,d3,#6
+    B        L0x130
+L0x114:
+    LDRSH    r10,[r8,#0]
+    ADD      r10,r10,#0x20
+    ASR      r10,r10,#6
+    VDUP.16  d0,r10
+    VDUP.16  d1,r10
+    VDUP.16  d2,r10
+    VDUP.16  d3,r10
+L0x130:
+    LDR      r1,[sp,#0x58]
+    LDR      r10,[sp,#0x5c]
+    LDR      r3,[r7],r1
+    LDR      r5,[r7],r1
+    VMOV     d4,r3,r5
+    LDR      r3,[r7],r1
+    LDR      r5,[r7,#0]
+    VMOV     d5,r3,r5
+    VADDW.U8 q3,q0,d4
+    VADDW.U8 q4,q1,d5
+    VQMOVUN.S16 d0,q3
+    VQMOVUN.S16 d1,q4
+    VST1.32  {d0[0]},[r9],r10
+    VST1.32  {d0[1]},[r9],r10
+    VST1.32  {d1[0]},[r9],r10
+    VST1.32  {d1[1]},[r9]
+    MOV      r0,#0
+    ADD      sp,sp,#0x20
+    VPOP     {d8-d9}
+    POP      {r4-r12,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S
new file mode 100644
index 0000000..79ba538
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S
@@ -0,0 +1,87 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global omxVCM4P10_FilterDeblockingChroma_HorEdge_I
+    .func   omxVCM4P10_FilterDeblockingChroma_HorEdge_I
+omxVCM4P10_FilterDeblockingChroma_HorEdge_I:
+    PUSH     {r4-r10,lr}
+    VPUSH    {d8-d15}
+    VLD1.8   {d0[]},[r2]!
+    SUB      r0,r0,r1,LSL #1
+    SUB      r0,r0,r1
+    VLD1.8   {d2[]},[r3]!
+    LDR      r4,[sp,#0x64]
+    LDR      r5,[sp,#0x60]
+    LDR      r9, =0x3030303
+    LDR      r8, =0x4040404
+    VMOV.I8  d14,#0
+    VMOV.I8  d15,#0x1
+    VMOV.I16 d1,#0x4
+    MOV      r7,#0x40000000
+L0x38:
+    LDR      r6,[r4],#8
+    VLD1.8   {d6},[r0],r1
+    VLD1.8   {d5},[r0],r1
+    CMP      r6,#0
+    VLD1.8   {d4},[r0],r1
+    VLD1.8   {d8},[r0],r1
+    VABD.U8  d19,d6,d4
+    VLD1.8   {d9},[r0],r1
+    VABD.U8  d13,d4,d8
+    VLD1.8   {d10},[r0],r1
+    BEQ      L0xe4
+    VABD.U8  d12,d5,d4
+    VABD.U8  d18,d9,d8
+    VCGT.U8  d16,d0,d13
+    VMOV.32  d26[0],r6
+    VMAX.U8  d12,d18,d12
+    VMOVL.U8 q13,d26
+    VABD.U8  d17,d10,d8
+    VCGT.S16 d27,d26,#0
+    VCGT.U8  d12,d2,d12
+    VCGT.U8  d19,d2,d19
+    VAND     d16,d16,d27
+    TST      r6,r9
+    VCGT.U8  d17,d2,d17
+    VAND     d16,d16,d12
+    VAND     d12,d16,d17
+    VAND     d17,d16,d19
+    BLNE     armVCM4P10_DeblockingChromabSLT4_unsafe
+    TST      r6,r8
+    SUB      r0,r0,r1,LSL #2
+    VTST.16  d26,d26,d1
+    BLNE     armVCM4P10_DeblockingChromabSGE4_unsafe
+    VBIT     d29,d13,d26
+    VBIT     d24,d31,d26
+    VBIF     d29,d4,d16
+    VBIF     d24,d8,d16
+    VST1.8   {d29},[r0],r1
+    ADDS     r7,r7,r7
+    VST1.8   {d24},[r0],r1
+    BNE      L0x38
+    MOV      r0,#0
+    VPOP     {d8-d15}
+    POP      {r4-r10,pc}
+L0xe4:
+    VLD1.8   {d0[]},[r2]
+    SUB      r0,r0,r1,LSL #1
+    ADDS     r7,r7,r7
+    VLD1.8   {d2[]},[r3]
+    ADD      r5,r5,#4
+    BNE      L0x38
+    MOV      r0,#0
+    VPOP     {d8-d15}
+    POP      {r4-r10,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S
new file mode 100644
index 0000000..dcdddbe
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S
@@ -0,0 +1,123 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global omxVCM4P10_FilterDeblockingChroma_VerEdge_I
+    .func   omxVCM4P10_FilterDeblockingChroma_VerEdge_I
+omxVCM4P10_FilterDeblockingChroma_VerEdge_I:
+    PUSH     {r4-r12,lr}
+    VPUSH    {d8-d15}
+    VLD1.8   {d0[]},[r2]!
+    SUB      r0,r0,#4
+    VLD1.8   {d2[]},[r3]!
+    LDR      r4,[sp,#0x6c]
+    LDR      r5,[sp,#0x68]
+    LDR      r8, =0x4040404
+    LDR      r9, =0x3030303
+    VMOV.I8  d14,#0
+    VMOV.I8  d15,#0x1
+    VMOV.I16 d1,#0x4
+    MOV      r7,#0x40000000
+L0x34:
+    LDR      r6,[r4],#8
+    ADD      r10,r0,r1
+    ADD      lr,r1,r1
+    VLD1.8   {d7},[r0],lr
+    VLD1.8   {d8},[r10],lr
+    VLD1.8   {d5},[r0],lr
+    VLD1.8   {d10},[r10],lr
+    VLD1.8   {d6},[r0],lr
+    VLD1.8   {d9},[r10],lr
+    VLD1.8   {d4},[r0],lr
+    VLD1.8   {d11},[r10],lr
+    VZIP.8   d7,d8
+    VZIP.8   d5,d10
+    VZIP.8   d6,d9
+    VZIP.8   d4,d11
+    VZIP.16  d7,d5
+    VZIP.16  d8,d10
+    VZIP.16  d6,d4
+    VZIP.16  d9,d11
+    VTRN.32  d7,d6
+    VTRN.32  d5,d4
+    VTRN.32  d10,d11
+    VTRN.32  d8,d9
+    CMP      r6,#0
+    VABD.U8  d19,d6,d4
+    VABD.U8  d13,d4,d8
+    BEQ      L0x170
+    VABD.U8  d12,d5,d4
+    VABD.U8  d18,d9,d8
+    VMOV.32  d26[0],r6
+    VCGT.U8  d16,d0,d13
+    VMAX.U8  d12,d18,d12
+    VMOVL.U8 q13,d26
+    VABD.U8  d17,d10,d8
+    VCGT.S16 d27,d26,#0
+    VCGT.U8  d12,d2,d12
+    VCGT.U8  d19,d2,d19
+    VAND     d16,d16,d27
+    TST      r6,r9
+    VCGT.U8  d17,d2,d17
+    VAND     d16,d16,d12
+    VAND     d12,d16,d17
+    VAND     d17,d16,d19
+    BLNE     armVCM4P10_DeblockingChromabSLT4_unsafe
+    TST      r6,r8
+    SUB      r0,r0,r1,LSL #3
+    VTST.16  d26,d26,d1
+    BLNE     armVCM4P10_DeblockingChromabSGE4_unsafe
+    VBIT     d29,d13,d26
+    VBIT     d24,d31,d26
+    ADD      r10,r0,#3
+    VBIF     d29,d4,d16
+    ADD      r12,r10,r1
+    ADD      lr,r1,r1
+    VBIF     d24,d8,d16
+    ADDS     r7,r7,r7
+    VST1.8   {d29[0]},[r10],lr
+    VST1.8   {d29[1]},[r12],lr
+    VST1.8   {d29[2]},[r10],lr
+    VST1.8   {d29[3]},[r12],lr
+    VST1.8   {d29[4]},[r10],lr
+    VST1.8   {d29[5]},[r12],lr
+    VST1.8   {d29[6]},[r10],lr
+    VST1.8   {d29[7]},[r12],lr
+    ADD      r12,r0,#4
+    ADD      r10,r12,r1
+    VST1.8   {d24[0]},[r12],lr
+    VST1.8   {d24[1]},[r10],lr
+    VST1.8   {d24[2]},[r12],lr
+    VST1.8   {d24[3]},[r10],lr
+    VST1.8   {d24[4]},[r12],lr
+    VST1.8   {d24[5]},[r10],lr
+    VST1.8   {d24[6]},[r12],lr
+    VST1.8   {d24[7]},[r10],lr
+    ADD      r0,r0,#4
+    BNE      L0x34
+    MOV      r0,#0
+    VPOP     {d8-d15}
+    POP      {r4-r12,pc}
+L0x170:
+    VLD1.8   {d0[]},[r2]
+    ADD      r0,r0,#4
+    SUB      r0,r0,r1,LSL #3
+    ADDS     r7,r7,r7
+    VLD1.8   {d2[]},[r3]
+    ADD      r5,r5,#4
+    BNE      L0x34
+    MOV      r0,#0
+    VPOP     {d8-d15}
+    POP      {r4-r12,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S
new file mode 100644
index 0000000..9755899
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S
@@ -0,0 +1,107 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global omxVCM4P10_FilterDeblockingLuma_HorEdge_I
+    .func   omxVCM4P10_FilterDeblockingLuma_HorEdge_I
+omxVCM4P10_FilterDeblockingLuma_HorEdge_I:
+    PUSH     {r4-r12,lr}
+    VPUSH    {d8-d15}
+    ADD      r7,r2,#1
+    ADD      r8,r3,#1
+    VLD1.8   {d0[]},[r2]
+    SUB      r0,r0,r1,LSL #2
+    VLD1.8   {d2[]},[r3]
+    LDR      r4,[sp,#0x6c]
+    LDR      r5,[sp,#0x68]
+    MOV      r11,#0
+    VMOV.I8  d14,#0
+    VMOV.I8  d15,#0x1
+    ADD      r10,r1,r1
+    MOV      r9,#0x55000000
+L0x38:
+    LDRH     r12,[r4],#2
+    ADD      r6,r0,r1
+    CMP      r12,#0
+    BEQ      L0xe4
+    VLD1.8   {d7},[r0],r10
+    VLD1.8   {d6},[r6],r10
+    VLD1.8   {d5},[r0],r10
+    VLD1.8   {d4},[r6],r10
+    VLD1.8   {d8},[r0],r10
+    VABD.U8  d12,d4,d5
+    VLD1.8   {d9},[r6]
+    VABD.U8  d13,d8,d4
+    VLD1.8   {d10},[r0],r1
+    VABD.U8  d18,d9,d8
+    VABD.U8  d19,d6,d4
+    VCGT.U8  d16,d0,d13
+    TST      r12,#0xff
+    VMAX.U8  d12,d18,d12
+    VABD.U8  d17,d10,d8
+    VMOVEQ.32 d16[0],r11
+    TST      r12,#0xff00
+    VCGT.U8  d19,d2,d19
+    VCGT.U8  d12,d2,d12
+    VMOVEQ.32 d16[1],r11
+    VCGT.U8  d17,d2,d17
+    VLD1.8   {d11},[r0]
+    VAND     d16,d16,d12
+    TST      r12,#4
+    VAND     d12,d16,d17
+    VAND     d17,d16,d19
+    BNE      L0xf8
+    SUB      r0,r0,r1,LSL #2
+    SUB      r0,r0,r1
+    BL       armVCM4P10_DeblockingLumabSLT4_unsafe
+    VST1.8   {d30},[r0],r1
+    VST1.8   {d29},[r0],r1
+    SUB      r6,r0,r1,LSL #2
+    VST1.8   {d24},[r0],r1
+    ADDS     r9,r9,r9
+    VST1.8   {d25},[r0]
+    ADD      r0,r6,#8
+    BCC      L0x38
+    B        L0x130
+L0xe4:
+    ADD      r0,r0,#8
+    ADDS     r9,r9,r9
+    ADD      r5,r5,#2
+    BCC      L0x38
+    B        L0x130
+L0xf8:
+    SUB      r0,r0,r1,LSL #2
+    SUB      r0,r0,r1,LSL #1
+    BL       armVCM4P10_DeblockingLumabSGE4_unsafe
+    VST1.8   {d31},[r0],r1
+    VST1.8   {d30},[r0],r1
+    VST1.8   {d29},[r0],r1
+    SUB      r6,r0,r1,LSL #2
+    VST1.8   {d24},[r0],r1
+    ADDS     r9,r9,r9
+    VST1.8   {d25},[r0],r1
+    ADD      r5,r5,#2
+    VST1.8   {d28},[r0]
+    ADD      r0,r6,#8
+    BCC      L0x38
+L0x130:
+    SUB      r0,r0,#0x10
+    VLD1.8   {d0[]},[r7]
+    ADD      r0,r0,r1,LSL #2
+    VLD1.8   {d2[]},[r8]
+    BNE      L0x38
+    MOV      r0,#0
+    VPOP     {d8-d15}
+    POP      {r4-r12,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S
new file mode 100644
index 0000000..66cc32ea
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S
@@ -0,0 +1,157 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global omxVCM4P10_FilterDeblockingLuma_VerEdge_I
+    .func   omxVCM4P10_FilterDeblockingLuma_VerEdge_I
+omxVCM4P10_FilterDeblockingLuma_VerEdge_I:
+    PUSH     {r4-r12,lr}
+    VPUSH    {d8-d15}
+    ADD      r7,r2,#1
+    ADD      r8,r3,#1
+    VLD1.8   {d0[]},[r2]
+    SUB      r0,r0,#4
+    VLD1.8   {d2[]},[r3]
+    LDR      r4,[sp,#0x6c]
+    LDR      r5,[sp,#0x68]
+    MOV      r6,#0
+    VMOV.I8  d14,#0
+    VMOV.I8  d15,#0x1
+    MOV      r9,#0x11000000
+    ADD      r11,r1,r1
+L0x38:
+    LDRH     r12,[r4],#4
+    CMP      r12,#0
+    BEQ      L0x160
+    ADD      r10,r0,r1
+    VLD1.8   {d7},[r0],r11
+    VLD1.8   {d8},[r10],r11
+    VLD1.8   {d5},[r0],r11
+    VZIP.8   d7,d8
+    VLD1.8   {d10},[r10],r11
+    VLD1.8   {d6},[r0],r11
+    VZIP.8   d5,d10
+    VLD1.8   {d9},[r10],r11
+    VLD1.8   {d4},[r0],r11
+    VLD1.8   {d11},[r10],r11
+    VZIP.8   d6,d9
+    VZIP.16  d8,d10
+    VZIP.8   d4,d11
+    SUB      r0,r0,r1,LSL #3
+    VZIP.16  d7,d5
+    VZIP.16  d9,d11
+    VZIP.16  d6,d4
+    VTRN.32  d8,d9
+    VTRN.32  d5,d4
+    VTRN.32  d10,d11
+    VTRN.32  d7,d6
+    VABD.U8  d13,d4,d8
+    VABD.U8  d12,d5,d4
+    VABD.U8  d18,d9,d8
+    VABD.U8  d19,d6,d4
+    TST      r12,#0xff
+    VCGT.U8  d16,d0,d13
+    VMAX.U8  d12,d18,d12
+    VABD.U8  d17,d10,d8
+    VMOVEQ.32 d16[0],r6
+    TST      r12,#0xff00
+    VCGT.U8  d19,d2,d19
+    VCGT.U8  d12,d2,d12
+    VMOVEQ.32 d16[1],r6
+    VCGT.U8  d17,d2,d17
+    VAND     d16,d16,d12
+    TST      r12,#4
+    VAND     d12,d16,d17
+    VAND     d17,d16,d19
+    BNE      L0x17c
+    BL       armVCM4P10_DeblockingLumabSLT4_unsafe
+    VZIP.8   d7,d6
+    VZIP.8   d30,d29
+    VZIP.8   d24,d25
+    VZIP.8   d10,d11
+    VZIP.16  d7,d30
+    ADD      r10,r0,r1
+    VZIP.16  d24,d10
+    VZIP.16  d25,d11
+    VZIP.16  d6,d29
+    VTRN.32  d7,d24
+    VTRN.32  d30,d10
+    VTRN.32  d6,d25
+    VTRN.32  d29,d11
+    VST1.8   {d7},[r0],r11
+    VST1.8   {d24},[r10],r11
+    VST1.8   {d30},[r0],r11
+    VST1.8   {d10},[r10],r11
+    VST1.8   {d6},[r0],r11
+    VST1.8   {d25},[r10],r11
+    ADDS     r9,r9,r9
+    VST1.8   {d29},[r0],r11
+    ADD      r5,r5,#2
+    VST1.8   {d11},[r10],r1
+    SUB      r0,r0,r1,LSL #3
+    VLD1.8   {d0[]},[r7]
+    ADD      r0,r0,#4
+    VLD1.8   {d2[]},[r8]
+    BCC      L0x38
+    B        L0x1f0
+L0x160:
+    ADD      r0,r0,#4
+    ADDS     r9,r9,r9
+    VLD1.8   {d0[]},[r7]
+    ADD      r5,r5,#4
+    VLD1.8   {d2[]},[r8]
+    BCC      L0x38
+    B        L0x1f0
+L0x17c:
+    BL       armVCM4P10_DeblockingLumabSGE4_unsafe
+    VZIP.8   d7,d31
+    VZIP.8   d30,d29
+    VZIP.8   d24,d25
+    VZIP.8   d28,d11
+    VZIP.16  d7,d30
+    ADD      r10,r0,r1
+    VZIP.16  d24,d28
+    VZIP.16  d25,d11
+    VZIP.16  d31,d29
+    VTRN.32  d7,d24
+    VTRN.32  d30,d28
+    VTRN.32  d31,d25
+    VTRN.32  d29,d11
+    VST1.8   {d7},[r0],r11
+    VST1.8   {d24},[r10],r11
+    VST1.8   {d30},[r0],r11
+    VST1.8   {d28},[r10],r11
+    VST1.8   {d31},[r0],r11
+    VST1.8   {d25},[r10],r11
+    ADDS     r9,r9,r9
+    VST1.8   {d29},[r0],r11
+    ADD      r5,r5,#4
+    VST1.8   {d11},[r10],r11
+    SUB      r0,r0,r1,LSL #3
+    VLD1.8   {d0[]},[r7]
+    ADD      r0,r0,#4
+    VLD1.8   {d2[]},[r8]
+    BCC      L0x38
+L0x1f0:
+    SUB      r4,r4,#0xe
+    SUB      r5,r5,#0xe
+    SUB      r0,r0,#0x10
+    VLD1.8   {d0[]},[r2]
+    ADD      r0,r0,r1,LSL #3
+    VLD1.8   {d2[]},[r3]
+    BNE      L0x38
+    MOV      r0,#0
+    VPOP     {d8-d15}
+    POP      {r4-r12,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_InterpolateLuma_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_InterpolateLuma_s.S
new file mode 100644
index 0000000..76c3d7d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_InterpolateLuma_s.S
@@ -0,0 +1,323 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global omxVCM4P10_InterpolateLuma
+    .func   omxVCM4P10_InterpolateLuma
+omxVCM4P10_InterpolateLuma:
+    PUSH     {r4-r12,lr}
+    VPUSH    {d8-d15}
+    SUB      sp,sp,#0x10
+    LDR      r6,[sp,#0x78]
+    LDR      r7,[sp,#0x7c]
+    LDR      r5,[sp,#0x80]
+    LDR      r4,[sp,#0x84]
+    ADD      r6,r6,r7,LSL #2
+    ADD      r11,sp,#0
+    VMOV.I16 d31,#0x14
+    VMOV.I16 d30,#0x5
+L0x2c:
+    STM      r11,{r0-r3}
+    ADD      pc,pc,r6,LSL #2
+    B        L0x3f0
+    B        L0x78
+    B        L0xa8
+    B        L0xdc
+    B        L0x100
+    B        L0x134
+    B        L0x168
+    B        L0x1a8
+    B        L0x1f0
+    B        L0x234
+    B        L0x258
+    B        L0x2b0
+    B        L0x2d8
+    B        L0x330
+    B        L0x364
+    B        L0x3a8
+    B        L0x3f0
+L0x78:
+    ADD      r12,r0,r1,LSL #1
+    VLD1.8   {d9},[r0],r1
+    VLD1.8   {d11},[r12],r1
+    VLD1.8   {d10},[r0]
+    VLD1.8   {d12},[r12]
+    ADD      r12,r2,r3,LSL #1
+    VST1.32  {d9[0]},[r2],r3
+    VST1.32  {d11[0]},[r12],r3
+    VST1.32  {d10[0]},[r2]
+    VST1.32  {d12[0]},[r12]
+    ADD      r11,sp,#0
+    B        L0x434
+L0xa8:
+    SUB      r0,r0,#2
+    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+    VRHADD.U8 d22,d22,d14
+    VRHADD.U8 d26,d26,d18
+    VRHADD.U8 d24,d24,d16
+    VRHADD.U8 d28,d28,d20
+    ADD      r12,r2,r3,LSL #1
+    VST1.32  {d22[0]},[r2],r3
+    VST1.32  {d26[0]},[r12],r3
+    VST1.32  {d24[0]},[r2]
+    VST1.32  {d28[0]},[r12]
+    ADD      r11,sp,#0
+    B        L0x434
+L0xdc:
+    SUB      r0,r0,#2
+    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+    ADD      r12,r2,r3,LSL #1
+    VST1.32  {d22[0]},[r2],r3
+    VST1.32  {d26[0]},[r12],r3
+    VST1.32  {d24[0]},[r2]
+    VST1.32  {d28[0]},[r12]
+    ADD      r11,sp,#0
+    B        L0x434
+L0x100:
+    SUB      r0,r0,#2
+    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+    VRHADD.U8 d22,d22,d15
+    VRHADD.U8 d26,d26,d19
+    VRHADD.U8 d24,d24,d17
+    VRHADD.U8 d28,d28,d21
+    ADD      r12,r2,r3,LSL #1
+    VST1.32  {d22[0]},[r2],r3
+    VST1.32  {d26[0]},[r12],r3
+    VST1.32  {d24[0]},[r2]
+    VST1.32  {d28[0]},[r12]
+    ADD      r11,sp,#0
+    B        L0x434
+L0x134:
+    SUB      r0,r0,r1,LSL #1
+    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+    VRHADD.U8 d0,d0,d9
+    VRHADD.U8 d4,d4,d11
+    VRHADD.U8 d2,d2,d10
+    VRHADD.U8 d6,d6,d12
+    ADD      r12,r2,r3,LSL #1
+    VST1.32  {d0[0]},[r2],r3
+    VST1.32  {d4[0]},[r12],r3
+    VST1.32  {d2[0]},[r2]
+    VST1.32  {d6[0]},[r12]
+    ADD      r11,sp,#0
+    B        L0x434
+L0x168:
+    MOV      r8,r0
+    SUB      r0,r0,r1,LSL #1
+    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+    SUB      r0,r8,#2
+    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+    VRHADD.U8 d22,d22,d0
+    VRHADD.U8 d26,d26,d4
+    VRHADD.U8 d24,d24,d2
+    VRHADD.U8 d28,d28,d6
+    ADD      r12,r2,r3,LSL #1
+    VST1.32  {d22[0]},[r2],r3
+    VST1.32  {d26[0]},[r12],r3
+    VST1.32  {d24[0]},[r2]
+    VST1.32  {d28[0]},[r12]
+    ADD      r11,sp,#0
+    B        L0x434
+L0x1a8:
+    SUB      r0,r0,r1,LSL #1
+    SUB      r0,r0,#2
+    BL       armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+    VQRSHRUN.S16 d14,q7,#5
+    VQRSHRUN.S16 d16,q8,#5
+    VQRSHRUN.S16 d18,q9,#5
+    VQRSHRUN.S16 d20,q10,#5
+    VRHADD.U8 d0,d0,d14
+    VRHADD.U8 d4,d4,d18
+    VRHADD.U8 d2,d2,d16
+    VRHADD.U8 d6,d6,d20
+    ADD      r12,r2,r3,LSL #1
+    VST1.32  {d0[0]},[r2],r3
+    VST1.32  {d4[0]},[r12],r3
+    VST1.32  {d2[0]},[r2]
+    VST1.32  {d6[0]},[r12]
+    ADD      r11,sp,#0
+    B        L0x434
+L0x1f0:
+    MOV      r8,r0
+    ADD      r0,r0,#1
+    SUB      r0,r0,r1,LSL #1
+    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+    SUB      r0,r8,#2
+    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+    VRHADD.U8 d22,d22,d0
+    VRHADD.U8 d26,d26,d4
+    VRHADD.U8 d24,d24,d2
+    VRHADD.U8 d28,d28,d6
+    ADD      r12,r2,r3,LSL #1
+    VST1.32  {d22[0]},[r2],r3
+    VST1.32  {d26[0]},[r12],r3
+    VST1.32  {d24[0]},[r2]
+    VST1.32  {d28[0]},[r12]
+    ADD      r11,sp,#0
+    B        L0x434
+L0x234:
+    SUB      r0,r0,r1,LSL #1
+    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+    ADD      r12,r2,r3,LSL #1
+    VST1.32  {d0[0]},[r2],r3
+    VST1.32  {d4[0]},[r12],r3
+    VST1.32  {d2[0]},[r2]
+    VST1.32  {d6[0]},[r12]
+    ADD      r11,sp,#0
+    B        L0x434
+L0x258:
+    SUB      r0,r0,r1,LSL #1
+    SUB      r0,r0,#2
+    BL       armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+    VEXT.8   d18,d18,d19,#4
+    VEXT.8   d20,d20,d21,#4
+    VEXT.8   d22,d22,d23,#4
+    VEXT.8   d24,d24,d25,#4
+    VQRSHRUN.S16 d14,q9,#5
+    VQRSHRUN.S16 d16,q10,#5
+    VQRSHRUN.S16 d18,q11,#5
+    VQRSHRUN.S16 d20,q12,#5
+    VRHADD.U8 d0,d0,d14
+    VRHADD.U8 d4,d4,d18
+    VRHADD.U8 d2,d2,d16
+    VRHADD.U8 d6,d6,d20
+    ADD      r12,r2,r3,LSL #1
+    VST1.32  {d0[0]},[r2],r3
+    VST1.32  {d4[0]},[r12],r3
+    VST1.32  {d2[0]},[r2]
+    VST1.32  {d6[0]},[r12]
+    ADD      r11,sp,#0
+    B        L0x434
+L0x2b0:
+    SUB      r0,r0,r1,LSL #1
+    SUB      r0,r0,#2
+    BL       armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+    ADD      r12,r2,r3,LSL #1
+    VST1.32  {d0[0]},[r2],r3
+    VST1.32  {d4[0]},[r12],r3
+    VST1.32  {d2[0]},[r2]
+    VST1.32  {d6[0]},[r12]
+    ADD      r11,sp,#0
+    B        L0x434
+L0x2d8:
+    SUB      r0,r0,r1,LSL #1
+    SUB      r0,r0,#2
+    BL       armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+    VEXT.8   d18,d18,d19,#6
+    VEXT.8   d20,d20,d21,#6
+    VEXT.8   d22,d22,d23,#6
+    VEXT.8   d24,d24,d25,#6
+    VQRSHRUN.S16 d14,q9,#5
+    VQRSHRUN.S16 d16,q10,#5
+    VQRSHRUN.S16 d18,q11,#5
+    VQRSHRUN.S16 d20,q12,#5
+    VRHADD.U8 d0,d0,d14
+    VRHADD.U8 d4,d4,d18
+    VRHADD.U8 d2,d2,d16
+    VRHADD.U8 d6,d6,d20
+    ADD      r12,r2,r3,LSL #1
+    VST1.32  {d0[0]},[r2],r3
+    VST1.32  {d4[0]},[r12],r3
+    VST1.32  {d2[0]},[r2]
+    VST1.32  {d6[0]},[r12]
+    ADD      r11,sp,#0
+    B        L0x434
+L0x330:
+    SUB      r0,r0,r1,LSL #1
+    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+    VRHADD.U8 d0,d0,d10
+    VRHADD.U8 d4,d4,d12
+    VRHADD.U8 d2,d2,d11
+    VRHADD.U8 d6,d6,d13
+    ADD      r12,r2,r3,LSL #1
+    VST1.32  {d0[0]},[r2],r3
+    VST1.32  {d4[0]},[r12],r3
+    VST1.32  {d2[0]},[r2]
+    VST1.32  {d6[0]},[r12]
+    ADD      r11,sp,#0
+    B        L0x434
+L0x364:
+    MOV      r8,r0
+    SUB      r0,r0,r1,LSL #1
+    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+    ADD      r0,r8,r1
+    SUB      r0,r0,#2
+    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+    VRHADD.U8 d22,d22,d0
+    VRHADD.U8 d26,d26,d4
+    VRHADD.U8 d24,d24,d2
+    VRHADD.U8 d28,d28,d6
+    ADD      r12,r2,r3,LSL #1
+    VST1.32  {d22[0]},[r2],r3
+    VST1.32  {d26[0]},[r12],r3
+    VST1.32  {d24[0]},[r2]
+    VST1.32  {d28[0]},[r12]
+    ADD      r11,sp,#0
+    B        L0x434
+L0x3a8:
+    SUB      r0,r0,r1,LSL #1
+    SUB      r0,r0,#2
+    BL       armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+    VQRSHRUN.S16 d14,q8,#5
+    VQRSHRUN.S16 d16,q9,#5
+    VQRSHRUN.S16 d18,q10,#5
+    VQRSHRUN.S16 d20,q11,#5
+    VRHADD.U8 d0,d0,d14
+    VRHADD.U8 d4,d4,d18
+    VRHADD.U8 d2,d2,d16
+    VRHADD.U8 d6,d6,d20
+    ADD      r12,r2,r3,LSL #1
+    VST1.32  {d0[0]},[r2],r3
+    VST1.32  {d4[0]},[r12],r3
+    VST1.32  {d2[0]},[r2]
+    VST1.32  {d6[0]},[r12]
+    ADD      r11,sp,#0
+    B        L0x434
+L0x3f0:
+    MOV      r8,r0
+    ADD      r0,r0,#1
+    SUB      r0,r0,r1,LSL #1
+    BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+    ADD      r0,r8,r1
+    SUB      r0,r0,#2
+    BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+    VRHADD.U8 d22,d22,d0
+    VRHADD.U8 d26,d26,d4
+    VRHADD.U8 d24,d24,d2
+    VRHADD.U8 d28,d28,d6
+    ADD      r12,r2,r3,LSL #1
+    VST1.32  {d22[0]},[r2],r3
+    VST1.32  {d26[0]},[r12],r3
+    VST1.32  {d24[0]},[r2]
+    VST1.32  {d28[0]},[r12]
+    ADD      r11,sp,#0
+L0x434:
+    LDM      r11,{r0-r3}
+    SUBS     r5,r5,#4
+    ADD      r0,r0,#4
+    ADD      r2,r2,#4
+    BGT      L0x2c
+    SUBS     r4,r4,#4
+    LDR      r5,[sp,#0x80]
+    ADD      r11,sp,#0
+    ADD      r0,r0,r1,LSL #2
+    ADD      r2,r2,r3,LSL #2
+    SUB      r0,r0,r5
+    SUB      r2,r2,r5
+    BGT      L0x2c
+    MOV      r0,#0
+    ADD      sp,sp,#0x10
+    VPOP     {d8-d15}
+    POP      {r4-r12,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S
new file mode 100644
index 0000000..0d49e4b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S
@@ -0,0 +1,217 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .section .rodata
+    .align 4
+
+armVCM4P10_pIndexTable8x8:
+    .word  OMX_VC_CHROMA_DC,     OMX_VC_CHROMA_HOR
+    .word  OMX_VC_CHROMA_VERT,   OMX_VC_CHROMA_PLANE
+
+armVCM4P10_MultiplierTableChroma8x8:
+    .hword   3, 2, 1,4
+    .hword  -3,-2,-1,0
+    .hword   1, 2, 3,4
+
+
+    .text
+    .global omxVCM4P10_PredictIntraChroma_8x8
+    .func   omxVCM4P10_PredictIntraChroma_8x8
+omxVCM4P10_PredictIntraChroma_8x8:
+    PUSH     {r4-r10,lr}
+    VPUSH    {d8-d15}
+    LDR      r8, =armVCM4P10_pIndexTable8x8
+    LDR      r6,[sp,#0x68]
+    LDR      r4,[sp,#0x60]
+    LDR      r5,[sp,#0x64]
+    LDR      r7,[sp,#0x6c]
+    LDR      pc,[r8,r6,LSL #2]
+OMX_VC_CHROMA_DC:
+    TST      r7,#2
+    BEQ      L0xe8
+    ADD      r9,r0,r4
+    ADD      r10,r4,r4
+    VLD1.8   {d1[0]},[r0],r10
+    VLD1.8   {d1[1]},[r9],r10
+    VLD1.8   {d1[2]},[r0],r10
+    VLD1.8   {d1[3]},[r9],r10
+    VLD1.8   {d1[4]},[r0],r10
+    VLD1.8   {d1[5]},[r9],r10
+    VLD1.8   {d1[6]},[r0],r10
+    VLD1.8   {d1[7]},[r9]
+    TST      r7,#1
+    BEQ      L0xcc
+    VLD1.8   {d0},[r1]
+    MOV      r0,#0
+    VPADDL.U8 d2,d0
+    VPADDL.U16 d3,d2
+    VPADDL.U8 d2,d1
+    VPADDL.U16 d1,d2
+    VADD.I32 d2,d3,d1
+    VRSHR.U32 d2,d2,#3
+    VRSHR.U32 d3,d3,#2
+    VRSHR.U32 d1,d1,#2
+    VMOV.I8  d5,#0xc
+    VMOV.I8  d6,#0x4
+    VSHL.I64 d5,d5,#32
+    VSHR.U64 d6,d6,#32
+    VADD.I8  d6,d6,d5
+    VTBL.8   d0,{d2-d3},d5
+    VTBL.8   d4,{d1-d2},d6
+L0x9c:
+    ADD      r9,r3,r5
+    ADD      r10,r5,r5
+    VST1.8   {d0},[r3],r10
+    VST1.8   {d0},[r9],r10
+    VST1.8   {d0},[r3],r10
+    VST1.8   {d0},[r9],r10
+    VST1.8   {d4},[r3],r10
+    VST1.8   {d4},[r9],r10
+    VST1.8   {d4},[r3],r10
+    VST1.8   {d4},[r9]
+    VPOP     {d8-d15}
+    POP      {r4-r10,pc}
+L0xcc:
+    MOV      r0,#0
+    VPADDL.U8 d2,d1
+    VPADDL.U16 d1,d2
+    VRSHR.U32 d1,d1,#2
+    VDUP.8   d0,d1[0]
+    VDUP.8   d4,d1[4]
+    B        L0x9c
+L0xe8:
+    TST      r7,#1
+    BEQ      L0x114
+    VLD1.8   {d0},[r1]
+    MOV      r0,#0
+    VPADDL.U8 d2,d0
+    VPADDL.U16 d3,d2
+    VRSHR.U32 d3,d3,#2
+    VMOV.I8  d5,#0x4
+    VSHL.I64 d5,d5,#32
+    VTBL.8   d0,{d3},d5
+    B        L0x11c
+L0x114:
+    VMOV.I8  d0,#0x80
+    MOV      r0,#0
+L0x11c:
+    ADD      r9,r3,r5
+    ADD      r10,r5,r5
+    VST1.8   {d0},[r3],r10
+    VST1.8   {d0},[r9],r10
+    VST1.8   {d0},[r3],r10
+    VST1.8   {d0},[r9],r10
+    VST1.8   {d0},[r3],r10
+    VST1.8   {d0},[r9],r10
+    VST1.8   {d0},[r3],r10
+    VST1.8   {d0},[r9]
+    VPOP     {d8-d15}
+    POP      {r4-r10,pc}
+OMX_VC_CHROMA_VERT:
+    VLD1.8   {d0},[r1]
+    MOV      r0,#0
+    B        L0x11c
+OMX_VC_CHROMA_HOR:
+    ADD      r9,r0,r4
+    ADD      r10,r4,r4
+    VLD1.8   {d0[]},[r0],r10
+    VLD1.8   {d1[]},[r9],r10
+    VLD1.8   {d2[]},[r0],r10
+    VLD1.8   {d3[]},[r9],r10
+    VLD1.8   {d4[]},[r0],r10
+    VLD1.8   {d5[]},[r9],r10
+    VLD1.8   {d6[]},[r0],r10
+    VLD1.8   {d7[]},[r9]
+    B        L0x28c
+OMX_VC_CHROMA_PLANE:
+    ADD      r9,r0,r4
+    ADD      r10,r4,r4
+    VLD1.8   {d0},[r1]
+    VLD1.8   {d2[0]},[r2]
+    VLD1.8   {d1[0]},[r0],r10
+    VLD1.8   {d1[1]},[r9],r10
+    VLD1.8   {d1[2]},[r0],r10
+    VLD1.8   {d1[3]},[r9],r10
+    VLD1.8   {d1[4]},[r0],r10
+    VLD1.8   {d1[5]},[r9],r10
+    VLD1.8   {d1[6]},[r0],r10
+    VLD1.8   {d1[7]},[r9]
+    VREV64.8 d3,d0
+    VSUBL.U8 q3,d3,d2
+    VSHR.U64 d3,d3,#8
+    VSUBL.U8 q2,d3,d0
+    VREV64.8 d3,d1
+    VSUBL.U8 q7,d3,d2
+    VSHR.U64 d3,d3,#8
+    VSUBL.U8 q6,d3,d1
+    LDR      r2, =armVCM4P10_MultiplierTableChroma8x8
+    VSHL.I64 d4,d4,#16
+    VEXT.8   d9,d4,d6,#2
+    VLD1.16  {d10},[r2]!
+    VSHL.I64 d12,d12,#16
+    VEXT.8   d16,d12,d14,#2
+    VMUL.I16 d11,d9,d10
+    VMUL.I16 d3,d16,d10
+    VPADD.I16 d3,d11,d3
+    VPADDL.S16 d3,d3
+    VSHL.I32 d2,d3,#4
+    VADD.I32 d3,d3,d2
+    VLD1.16  {d10,d11},[r2]
+    VRSHR.S32 d3,d3,#5
+    VADDL.U8 q0,d0,d1
+    VDUP.16  q0,d1[3]
+    VSHL.I16 q0,q0,#4
+    VDUP.16  q2,d3[0]
+    VDUP.16  q3,d3[2]
+    VMUL.I16 q2,q2,q5
+    VMUL.I16 q3,q3,q5
+    VADD.I16 q2,q2,q0
+    VDUP.16  q0,d6[0]
+    VDUP.16  q1,d6[1]
+    VDUP.16  q4,d6[2]
+    VDUP.16  q5,d6[3]
+    VDUP.16  q6,d7[0]
+    VDUP.16  q7,d7[1]
+    VDUP.16  q8,d7[2]
+    VDUP.16  q9,d7[3]
+    VADD.I16 q0,q2,q0
+    VADD.I16 q1,q2,q1
+    VADD.I16 q4,q2,q4
+    VADD.I16 q5,q2,q5
+    VADD.I16 q6,q2,q6
+    VADD.I16 q7,q2,q7
+    VADD.I16 q8,q2,q8
+    VADD.I16 q9,q2,q9
+    VQRSHRUN.S16 d0,q0,#5
+    VQRSHRUN.S16 d1,q1,#5
+    VQRSHRUN.S16 d2,q4,#5
+    VQRSHRUN.S16 d3,q5,#5
+    VQRSHRUN.S16 d4,q6,#5
+    VQRSHRUN.S16 d5,q7,#5
+    VQRSHRUN.S16 d6,q8,#5
+    VQRSHRUN.S16 d7,q9,#5
+L0x28c:
+    ADD      r9,r3,r5
+    ADD      r10,r5,r5
+    VST1.8   {d0},[r3],r10
+    VST1.8   {d1},[r9],r10
+    VST1.8   {d2},[r3],r10
+    VST1.8   {d3},[r9],r10
+    VST1.8   {d4},[r3],r10
+    VST1.8   {d5},[r9],r10
+    VST1.8   {d6},[r3],r10
+    VST1.8   {d7},[r9]
+    MOV      r0,#0
+    VPOP     {d8-d15}
+    POP      {r4-r10,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S
new file mode 100644
index 0000000..53268f6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S
@@ -0,0 +1,239 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+
+    .section .rodata
+    .align 4
+;//-------------------------------------------------------
+;// This table for implementing switch case of C in asm by
+;// the mehtod of two levels of indexing.
+;//-------------------------------------------------------
+
+armVCM4P10_pIndexTable16x16:
+    .word  OMX_VC_16X16_VERT, OMX_VC_16X16_HOR
+    .word  OMX_VC_16X16_DC,   OMX_VC_16X16_PLANE
+
+
+
+armVCM4P10_MultiplierTable16x16:
+    .hword   7,  6,  5,  4,  3,  2,  1,  8
+    .hword   0,  1,  2,  3,  4,  5,  6,  7
+    .hword   8,  9, 10, 11, 12, 13, 14, 15
+
+    .text
+
+    .global omxVCM4P10_PredictIntra_16x16
+    .func   omxVCM4P10_PredictIntra_16x16
+omxVCM4P10_PredictIntra_16x16:
+    PUSH     {r4-r12,lr}
+    VPUSH    {d8-d15}
+    LDR      r9, =armVCM4P10_pIndexTable16x16
+    LDR      r6,[sp,#0x70]
+    LDR      r4,[sp,#0x68]
+    LDR      r5,[sp,#0x6c]
+    LDR      r7,[sp,#0x74]
+    MOV      r12,#0x10
+    LDR      pc,[r9,r6,LSL #2]
+OMX_VC_16X16_VERT:
+    VLD1.8   {d0,d1},[r1]
+    ADD      r8,r3,r5
+    ADD      r10,r5,r5
+    VST1.8   {d0,d1},[r3],r10
+    VST1.8   {d0,d1},[r8],r10
+    VST1.8   {d0,d1},[r3],r10
+    VST1.8   {d0,d1},[r8],r10
+    VST1.8   {d0,d1},[r3],r10
+    VST1.8   {d0,d1},[r8],r10
+    VST1.8   {d0,d1},[r3],r10
+    VST1.8   {d0,d1},[r8],r10
+    VST1.8   {d0,d1},[r3],r10
+    VST1.8   {d0,d1},[r8],r10
+    VST1.8   {d0,d1},[r3],r10
+    VST1.8   {d0,d1},[r8],r10
+    VST1.8   {d0,d1},[r3],r10
+    VST1.8   {d0,d1},[r8],r10
+    VST1.8   {d0,d1},[r3]
+    VST1.8   {d0,d1},[r8]
+    MOV      r0,#0
+    VPOP     {d8-d15}
+    POP      {r4-r12,pc}
+OMX_VC_16X16_HOR:
+    ADD      r8,r0,r4
+    ADD      r4,r4,r4
+    ADD      r11,r3,r5
+    ADD      r5,r5,r5
+L0x8c:
+    VLD1.8   {d2[],d3[]},[r0],r4
+    VLD1.8   {d0[],d1[]},[r8],r4
+    SUBS     r12,r12,#8
+    VST1.8   {d2,d3},[r3],r5
+    VST1.8   {d0,d1},[r11],r5
+    VLD1.8   {d2[],d3[]},[r0],r4
+    VLD1.8   {d0[],d1[]},[r8],r4
+    VST1.8   {d2,d3},[r3],r5
+    VST1.8   {d0,d1},[r11],r5
+    VLD1.8   {d2[],d3[]},[r0],r4
+    VLD1.8   {d0[],d1[]},[r8],r4
+    VST1.8   {d2,d3},[r3],r5
+    VST1.8   {d0,d1},[r11],r5
+    VLD1.8   {d2[],d3[]},[r0],r4
+    VLD1.8   {d0[],d1[]},[r8],r4
+    VST1.8   {d2,d3},[r3],r5
+    VST1.8   {d0,d1},[r11],r5
+    BNE      L0x8c
+    MOV      r0,#0
+    VPOP     {d8-d15}
+    POP      {r4-r12,pc}
+OMX_VC_16X16_DC:
+    MOV      r11,#0
+    TST      r7,#2
+    BEQ      L0x14c
+    ADD      r8,r0,r4
+    ADD      r10,r4,r4
+    VLD1.8   {d2[0]},[r0],r10
+    VLD1.8   {d2[1]},[r8],r10
+    VLD1.8   {d2[2]},[r0],r10
+    VLD1.8   {d2[3]},[r8],r10
+    VLD1.8   {d2[4]},[r0],r10
+    VLD1.8   {d2[5]},[r8],r10
+    VLD1.8   {d2[6]},[r0],r10
+    VLD1.8   {d2[7]},[r8],r10
+    VLD1.8   {d3[0]},[r0],r10
+    VLD1.8   {d3[1]},[r8],r10
+    VLD1.8   {d3[2]},[r0],r10
+    VLD1.8   {d3[3]},[r8],r10
+    VLD1.8   {d3[4]},[r0],r10
+    VLD1.8   {d3[5]},[r8],r10
+    VLD1.8   {d3[6]},[r0],r10
+    VLD1.8   {d3[7]},[r8]
+    VPADDL.U8 q0,q1
+    ADD      r11,r11,#1
+    VPADD.I16 d0,d0,d1
+    VPADDL.U16 d0,d0
+    VPADDL.U32 d6,d0
+    VRSHR.U64 d8,d6,#4
+L0x14c:
+    TST      r7,#1
+    BEQ      L0x170
+    VLD1.8   {d0,d1},[r1]
+    ADD      r11,r11,#1
+    VPADDL.U8 q0,q0
+    VPADD.I16 d0,d0,d1
+    VPADDL.U16 d0,d0
+    VPADDL.U32 d7,d0
+    VRSHR.U64 d8,d7,#4
+L0x170:
+    CMP      r11,#2
+    BNE      L0x180
+    VADD.I64 d8,d7,d6
+    VRSHR.U64 d8,d8,#5
+L0x180:
+    VDUP.8   q3,d8[0]
+    CMP      r11,#0
+    ADD      r8,r3,r5
+    ADD      r10,r5,r5
+    BNE      L0x198
+    VMOV.I8  q3,#0x80
+L0x198:
+    VST1.8   {d6,d7},[r3],r10
+    VST1.8   {d6,d7},[r8],r10
+    VST1.8   {d6,d7},[r3],r10
+    VST1.8   {d6,d7},[r8],r10
+    VST1.8   {d6,d7},[r3],r10
+    VST1.8   {d6,d7},[r8],r10
+    VST1.8   {d6,d7},[r3],r10
+    VST1.8   {d6,d7},[r8],r10
+    VST1.8   {d6,d7},[r3],r10
+    VST1.8   {d6,d7},[r8],r10
+    VST1.8   {d6,d7},[r3],r10
+    VST1.8   {d6,d7},[r8],r10
+    VST1.8   {d6,d7},[r3],r10
+    VST1.8   {d6,d7},[r8],r10
+    VST1.8   {d6,d7},[r3],r10
+    VST1.8   {d6,d7},[r8],r10
+    MOV      r0,#0
+    VPOP     {d8-d15}
+    POP      {r4-r12,pc}
+OMX_VC_16X16_PLANE:
+    LDR      r9, =armVCM4P10_MultiplierTable16x16
+    VLD1.8   {d0,d1},[r1]
+    VLD1.8   {d4[0]},[r2]
+    ADD      r8,r0,r4
+    ADD      r10,r4,r4
+    VLD1.8   {d2[0]},[r0],r10
+    VLD1.8   {d2[1]},[r8],r10
+    VLD1.8   {d2[2]},[r0],r10
+    VLD1.8   {d2[3]},[r8],r10
+    VLD1.8   {d2[4]},[r0],r10
+    VLD1.8   {d2[5]},[r8],r10
+    VLD1.8   {d2[6]},[r0],r10
+    VLD1.8   {d2[7]},[r8],r10
+    VLD1.8   {d3[0]},[r0],r10
+    VLD1.8   {d3[1]},[r8],r10
+    VLD1.8   {d3[2]},[r0],r10
+    VLD1.8   {d3[3]},[r8],r10
+    VLD1.8   {d3[4]},[r0],r10
+    VLD1.8   {d3[5]},[r8],r10
+    VLD1.8   {d3[6]},[r0],r10
+    VLD1.8   {d3[7]},[r8]
+    VREV64.8 d5,d1
+    VSUBL.U8 q3,d5,d4
+    VSHR.U64 d5,d5,#8
+    VSUBL.U8 q4,d5,d0
+    VSHL.I64 d9,d9,#16
+    VEXT.8   d9,d9,d6,#2
+    VREV64.8 d12,d3
+    VSUBL.U8 q7,d12,d4
+    VSHR.U64 d12,d12,#8
+    VSUBL.U8 q8,d12,d2
+    VLD1.16  {d20,d21},[r9]!
+    VSHL.I64 d17,d17,#16
+    VEXT.8   d17,d17,d14,#2
+    VMULL.S16 q11,d8,d20
+    VMULL.S16 q12,d16,d20
+    VMLAL.S16 q11,d9,d21
+    VMLAL.S16 q12,d17,d21
+    VPADD.I32 d22,d23,d22
+    VPADD.I32 d23,d25,d24
+    VPADDL.S32 q11,q11
+    VSHL.I64 q12,q11,#2
+    VADD.I64 q11,q11,q12
+    VRSHR.S64 q11,q11,#6
+    VSHL.I64 q12,q11,#3
+    VSUB.I64 q12,q12,q11
+    VLD1.16  {d20,d21},[r9]!
+    VDUP.16  q6,d22[0]
+    VDUP.16  q7,d23[0]
+    VADDL.U8 q11,d1,d3
+    VSHL.I16 q11,q11,#4
+    VDUP.16  q11,d23[3]
+    VADD.I64 d1,d24,d25
+    VLD1.16  {d24,d25},[r9]
+    VDUP.16  q13,d1[0]
+    VSUB.I16 q13,q11,q13
+    VMUL.I16 q5,q6,q10
+    VMUL.I16 q6,q6,q12
+    VADD.I16 q0,q5,q13
+    VADD.I16 q1,q6,q13
+L0x2d4:
+    VQRSHRUN.S16 d6,q0,#5
+    VQRSHRUN.S16 d7,q1,#5
+    SUBS     r12,r12,#1
+    VST1.8   {d6,d7},[r3],r5
+    VADD.I16 q0,q0,q7
+    VADD.I16 q1,q1,q7
+    BNE      L0x2d4
+    MOV      r0,#0
+    VPOP     {d8-d15}
+    POP      {r4-r12,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_4x4_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_4x4_s.S
new file mode 100644
index 0000000..aa6d7ef
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_4x4_s.S
@@ -0,0 +1,261 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+
+    .section .rodata
+    .align 4
+
+armVCM4P10_pSwitchTable4x4:
+    .word OMX_VC_4x4_VERT,     OMX_VC_4x4_HOR
+    .word OMX_VC_4x4_DC,       OMX_VC_4x4_DIAG_DL
+    .word OMX_VC_4x4_DIAG_DR,  OMX_VC_4x4_VR
+    .word OMX_VC_4x4_HD,       OMX_VC_4x4_VL
+    .word OMX_VC_4x4_HU
+
+    .text
+
+    .global omxVCM4P10_PredictIntra_4x4
+    .func   omxVCM4P10_PredictIntra_4x4
+omxVCM4P10_PredictIntra_4x4:
+    PUSH     {r4-r12,lr}
+    VPUSH    {d8-d12}
+    LDR      r8, =armVCM4P10_pSwitchTable4x4
+    LDRD     r6,r7,[sp,#0x58]
+    LDRD     r4,r5,[sp,#0x50]
+    LDR      pc,[r8,r6,LSL #2]
+OMX_VC_4x4_HOR:
+    ADD      r9,r0,r4
+    ADD      r10,r4,r4
+    VLD1.8   {d0[]},[r0],r10
+    VLD1.8   {d1[]},[r9],r10
+    VLD1.8   {d2[]},[r0]
+    VLD1.8   {d3[]},[r9]
+    ADD      r11,r3,r5
+    ADD      r12,r5,r5
+    VST1.32  {d0[0]},[r3],r12
+    VST1.32  {d1[0]},[r11],r12
+    VST1.32  {d2[0]},[r3]
+    VST1.32  {d3[0]},[r11]
+    B        L0x348
+OMX_VC_4x4_VERT:
+    VLD1.32  {d0[0]},[r1]
+    ADD      r11,r3,r5
+    ADD      r12,r5,r5
+L0x58:
+    VST1.32  {d0[0]},[r3],r12
+    VST1.32  {d0[0]},[r11],r12
+    VST1.32  {d0[0]},[r3]
+    VST1.32  {d0[0]},[r11]
+    B        L0x348
+OMX_VC_4x4_DC:
+    TST      r7,#2
+    BEQ      L0xdc
+    ADD      r9,r0,r4
+    ADD      r10,r4,r4
+    VLD1.8   {d0[0]},[r0],r10
+    VLD1.8   {d0[1]},[r9],r10
+    VLD1.8   {d0[2]},[r0]
+    VLD1.8   {d0[3]},[r9]
+    TST      r7,#1
+    BEQ      L0xbc
+    VLD1.32  {d0[1]},[r1]
+    MOV      r0,#0
+    VPADDL.U8 d1,d0
+    VPADDL.U16 d1,d1
+    VPADDL.U32 d1,d1
+    VRSHR.U64 d1,d1,#3
+    ADD      r11,r3,r5
+    ADD      r12,r5,r5
+    VDUP.8   d0,d1[0]
+    B        L0x58
+L0xbc:
+    MOV      r0,#0
+    VPADDL.U8 d1,d0
+    VPADDL.U16 d1,d1
+    VRSHR.U32 d1,d1,#2
+    ADD      r11,r3,r5
+    ADD      r12,r5,r5
+    VDUP.8   d0,d1[0]
+    B        L0x58
+L0xdc:
+    TST      r7,#1
+    BEQ      L0x108
+    VLD1.32  {d0[0]},[r1]
+    MOV      r0,#0
+    VPADDL.U8 d1,d0
+    VPADDL.U16 d1,d1
+    VRSHR.U32 d1,d1,#2
+    ADD      r11,r3,r5
+    ADD      r12,r5,r5
+    VDUP.8   d0,d1[0]
+    B        L0x58
+L0x108:
+    VMOV.I8  d0,#0x80
+    MOV      r0,#0
+    ADD      r11,r3,r5
+    ADD      r12,r5,r5
+    B        L0x58
+OMX_VC_4x4_DIAG_DL:
+    TST      r7,#0x40
+    BEQ      L0x138
+    VLD1.8   {d3},[r1]
+    VDUP.8   d2,d3[7]
+    VEXT.8   d4,d3,d2,#1
+    VEXT.8   d5,d3,d2,#2
+    B        L0x14c
+L0x138:
+    VLD1.32  {d0[1]},[r1]
+    VDUP.8   d2,d0[7]
+    VEXT.8   d3,d0,d2,#4
+    VEXT.8   d4,d0,d2,#5
+    VEXT.8   d5,d0,d2,#6
+L0x14c:
+    VHADD.U8 d6,d3,d5
+    VRHADD.U8 d6,d6,d4
+    VST1.32  {d6[0]},[r3],r5
+    VEXT.8   d6,d6,d6,#1
+    VST1.32  {d6[0]},[r3],r5
+    VEXT.8   d6,d6,d6,#1
+    VST1.32  {d6[0]},[r3],r5
+    VEXT.8   d6,d6,d6,#1
+    VST1.32  {d6[0]},[r3]
+    B        L0x348
+OMX_VC_4x4_DIAG_DR:
+    VLD1.32  {d0[0]},[r1]
+    VLD1.8   {d1[7]},[r2]
+    ADD      r9,r0,r4
+    ADD      r10,r4,r4
+    ADD      r1,r3,r5
+    VLD1.8   {d1[6]},[r0],r10
+    VLD1.8   {d1[5]},[r9],r10
+    VLD1.8   {d1[4]},[r0]
+    VLD1.8   {d1[3]},[r9]
+    VEXT.8   d3,d1,d0,#3
+    ADD      r4,r1,r5
+    VEXT.8   d4,d1,d0,#4
+    ADD      r6,r4,r5
+    VEXT.8   d5,d1,d0,#5
+    VHADD.U8 d6,d3,d5
+    VRHADD.U8 d6,d6,d4
+    VST1.32  {d6[0]},[r6]
+    VEXT.8   d6,d6,d6,#1
+    VST1.32  {d6[0]},[r4]
+    VEXT.8   d6,d6,d6,#1
+    VST1.32  {d6[0]},[r1]
+    VEXT.8   d6,d6,d6,#1
+    VST1.32  {d6[0]},[r3]
+    B        L0x348
+OMX_VC_4x4_VR:
+    VLD1.32  {d0[0]},[r1]
+    VLD1.8   {d0[7]},[r2]
+    VLD1.8   {d1[7]},[r0],r4
+    VLD1.8   {d2[7]},[r0],r4
+    VLD1.8   {d1[6]},[r0]
+    VEXT.8   d12,d0,d0,#7
+    VEXT.8   d3,d1,d12,#6
+    VEXT.8   d4,d2,d12,#7
+    VEXT.8   d5,d1,d0,#7
+    VEXT.8   d6,d2,d0,#7
+    VEXT.8   d11,d1,d12,#7
+    VHADD.U8 d8,d6,d12
+    VRHADD.U8 d8,d8,d11
+    VHADD.U8 d7,d3,d5
+    VRHADD.U8 d7,d7,d4
+    VEXT.8   d10,d8,d8,#1
+    ADD      r11,r3,r5
+    ADD      r12,r5,r5
+    VEXT.8   d9,d7,d7,#1
+    VST1.32  {d10[0]},[r3],r12
+    VST1.32  {d9[0]},[r11],r12
+    VST1.32  {d8[0]},[r3],r12
+    VST1.32  {d7[0]},[r11]
+    B        L0x348
+OMX_VC_4x4_HD:
+    VLD1.8   {d0},[r1]
+    VLD1.8   {d1[7]},[r2]
+    ADD      r9,r0,r4
+    ADD      r10,r4,r4
+    VLD1.8   {d1[6]},[r0],r10
+    VLD1.8   {d1[5]},[r9],r10
+    VLD1.8   {d1[4]},[r0]
+    VLD1.8   {d1[3]},[r9]
+    VEXT.8   d3,d1,d0,#3
+    VEXT.8   d4,d1,d0,#2
+    VEXT.8   d5,d1,d0,#1
+    VHADD.U8 d7,d3,d5
+    VRHADD.U8 d7,d7,d4
+    VRHADD.U8 d8,d4,d3
+    VSHL.I64 d8,d8,#24
+    VSHL.I64 d6,d7,#16
+    VZIP.8   d8,d6
+    VEXT.8   d7,d7,d7,#6
+    VEXT.8   d8,d6,d7,#2
+    ADD      r11,r3,r5
+    ADD      r12,r5,r5
+    VST1.32  {d8[1]},[r3],r12
+    VST1.32  {d6[1]},[r11],r12
+    VST1.32  {d8[0]},[r3]
+    VST1.32  {d6[0]},[r11]
+    B        L0x348
+OMX_VC_4x4_VL:
+    TST      r7,#0x40
+    BEQ      L0x2b4
+    VLD1.8   {d3},[r1]
+    VEXT.8   d4,d3,d3,#1
+    VEXT.8   d5,d4,d4,#1
+    B        L0x2c8
+L0x2b4:
+    VLD1.32  {d0[1]},[r1]
+    VDUP.8   d2,d0[7]
+    VEXT.8   d3,d0,d2,#4
+    VEXT.8   d4,d0,d2,#5
+    VEXT.8   d5,d0,d2,#6
+L0x2c8:
+    VRHADD.U8 d7,d4,d3
+    VHADD.U8 d10,d3,d5
+    VRHADD.U8 d10,d10,d4
+    VEXT.8   d8,d7,d7,#1
+    ADD      r11,r3,r5
+    ADD      r12,r5,r5
+    VEXT.8   d9,d10,d8,#1
+    VST1.32  {d7[0]},[r3],r12
+    VST1.32  {d10[0]},[r11],r12
+    VST1.32  {d8[0]},[r3]
+    VST1.32  {d9[0]},[r11]
+    B        L0x348
+OMX_VC_4x4_HU:
+    ADD      r9,r0,r4
+    ADD      r10,r4,r4
+    VLD1.8   {d1[4]},[r0],r10
+    VLD1.8   {d1[5]},[r9],r10
+    VLD1.8   {d1[6]},[r0]
+    VLD1.8   {d1[7]},[r9]
+    VDUP.8   d2,d1[7]
+    VEXT.8   d3,d1,d2,#4
+    VEXT.8   d4,d1,d2,#5
+    VEXT.8   d5,d1,d2,#6
+    VHADD.U8 d7,d3,d5
+    VRHADD.U8 d7,d7,d4
+    VRHADD.U8 d8,d4,d3
+    VZIP.8   d8,d7
+    VST1.32  {d8[0]},[r3],r5
+    VEXT.8   d8,d8,d8,#2
+    VST1.32  {d8[0]},[r3],r5
+    VEXT.8   d8,d8,d8,#2
+    VST1.32  {d8[0]},[r3],r5
+    VST1.32  {d7[0]},[r3]
+L0x348:
+    MOV      r0,#0
+    VPOP     {d8-d12}
+    POP      {r4-r12,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantChromaDCFromPair_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantChromaDCFromPair_s.S
new file mode 100644
index 0000000..28a89cb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantChromaDCFromPair_s.S
@@ -0,0 +1,54 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global omxVCM4P10_TransformDequantChromaDCFromPair
+    .func   omxVCM4P10_TransformDequantChromaDCFromPair
+omxVCM4P10_TransformDequantChromaDCFromPair:
+    push    {r4-r10, lr}
+    ldr     r9, [r0,#0]
+    vmov.i16    d0, #0
+    mov     r8, #0x1f
+    vst1.16    {d0}, [r1]
+    ldrb    r6, [r9], #1
+unpackLoop:
+    tst     r6, #0x10
+    ldrnesb r5, [r9, #1]
+    ldrneb  r4, [r9], #2
+    and     r7, r8, r6, lsl #1
+    ldreqsb r4, [r9], #1
+    orrne   r4, r4, r5, lsl #8
+    tst     r6, #0x20
+    ldreqb  r6, [r9], #1
+    strh    r4, [r1, r7]
+    beq     unpackLoop
+    ldmia   r1, {r3, r4}
+    str     r9, [r0, #0]
+    ldr     r5, =armVCM4P10_QPDivTable
+    ldr     r6, =armVCM4P10_VMatrixQPModTable
+    saddsubx        r3, r3, r3
+    saddsubx        r4, r4, r4
+    ldrsb   r9, [r5, r2]
+    ldrsb   r2, [r6, r2]
+    sadd16  r5, r3, r4
+    ssub16  r6, r3, r4
+    lsl     r2, r2, r9
+    vmov    d0, r5, r6
+    vrev32.16  d0, d0
+    vdup.16    d1, r2
+    vmull.s16   q1, d0, d1
+    vshrn.i32   d2, q1, #1
+    vst1.16    {d2}, [r1]
+    mov     r0, #0
+    pop     {r4-r10, pc}
+    .endfunc
+
+    .end
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S
new file mode 100644
index 0000000..a3a0715
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S
@@ -0,0 +1,76 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+    .eabi_attribute 24, 1
+    .eabi_attribute 25, 1
+
+    .arm
+    .fpu neon
+    .text
+
+    .global armVCM4P10_InvTransformDequantLumaDC4x4
+    .func   armVCM4P10_InvTransformDequantLumaDC4x4
+armVCM4P10_InvTransformDequantLumaDC4x4:
+    PUSH     {r4-r6,lr}
+    VPUSH    {d8-d13}
+    VLD4.16  {d0,d1,d2,d3},[r0]
+    LDR      r2, =armVCM4P10_QPDivTable
+    LDR      r3, =armVCM4P10_VMatrixQPModTable
+    VADD.I16 d4,d0,d1
+    VADD.I16 d5,d2,d3
+    VSUB.I16 d6,d0,d1
+    LDRSB    r4,[r2,r1]
+    VSUB.I16 d7,d2,d3
+    LDRSB    r5,[r3,r1]
+    VADD.I16 d0,d4,d5
+    VSUB.I16 d1,d4,d5
+    VSUB.I16 d2,d6,d7
+    LSL      r5,r5,r4
+    VADD.I16 d3,d6,d7
+    VTRN.16  d0,d1
+    VTRN.16  d2,d3
+    VTRN.32  q0,q1
+    VADD.I16 d4,d0,d1
+    VADD.I16 d5,d2,d3
+    VSUB.I16 d6,d0,d1
+    VSUB.I16 d7,d2,d3
+    VADD.I16 d0,d4,d5
+    VSUB.I16 d1,d4,d5
+    VSUB.I16 d2,d6,d7
+    VADD.I16 d3,d6,d7
+    VDUP.16  d5,r5
+    VMOV.I32 q3,#0x2
+    VMOV.I32 q4,#0x2
+    VMOV.I32 q5,#0x2
+    VMOV.I32 q6,#0x2
+    VMLAL.S16 q3,d0,d5
+    VMLAL.S16 q4,d1,d5
+    VMLAL.S16 q5,d2,d5
+    VMLAL.S16 q6,d3,d5
+    VSHRN.I32 d0,q3,#2
+    VSHRN.I32 d1,q4,#2
+    VSHRN.I32 d2,q5,#2
+    VSHRN.I32 d3,q6,#2
+    VST1.16  {d0,d1,d2,d3},[r0]
+    VPOP     {d8-d13}
+    POP      {r4-r6,pc}
+    .endfunc
+
+.global omxVCM4P10_TransformDequantLumaDCFromPair
+.func   omxVCM4P10_TransformDequantLumaDCFromPair
+omxVCM4P10_TransformDequantLumaDCFromPair:
+    PUSH     {r4-r6,lr}
+    MOV      r4,r1
+    MOV      r5,r2
+    BL       armVCM4P10_UnpackBlock4x4
+    MOV      r0,r4
+    MOV      r1,r5
+    BL       armVCM4P10_InvTransformDequantLumaDC4x4
+    MOV      r0,#0
+    POP      {r4-r6,pc}
+    .endfunc
+
+    .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
new file mode 100755
index 0000000..74b5505
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
@@ -0,0 +1,37 @@
+/**
+ * 
+ * File Name:  armVCM4P2_Huff_Tables_VLC.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ *
+ * File:        armVCM4P2_Huff_Tables.h
+ * Description: Declares Tables used for Hufffman coding and decoding 
+ *              in MP4P2 codec.
+ *
+ */
+ 
+#ifndef _OMXHUFFTAB_H_
+#define _OMXHUFFTAB_H_
+
+
+extern const OMX_U16 armVCM4P2_IntraVlcL0L1[200];
+
+
+extern const OMX_U16 armVCM4P2_InterVlcL0L1[200];
+
+extern const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64];
+//extern const OMX_U16 armVCM4P2_aIntraDCChromaIndex[32];
+extern const OMX_U16 armVCM4P2_aVlcMVD[124];
+
+extern const OMX_U8 armVCM4P2_InterL0L1LMAX[73];
+extern const OMX_U8 armVCM4P2_InterL0L1RMAX[35];
+extern const OMX_U8 armVCM4P2_IntraL0L1LMAX[53];
+extern const OMX_U8 armVCM4P2_IntraL0L1RMAX[40]
+
+#endif /* _OMXHUFFTAB_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
new file mode 100755
index 0000000..e95203a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
@@ -0,0 +1,25 @@
+/**
+ * 
+ * File Name:  armVCM4P2_ZigZag_Tables.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ *
+ * File:        armVCM4P2_Zigzag_Tables.h
+ * Description: Declares Tables used for Zigzag scan in MP4P2 codec.
+ *
+ */
+ 
+#ifndef _OMXZIGZAGTAB_H
+#define _OMXZIGZAGTAB_H
+
+extern const OMX_U8 armVCM4P2_aClassicalZigzagScan [192];
+//extern const OMX_U8 armVCM4P2_aHorizontalZigzagScan [64];
+//extern const OMX_U8 armVCM4P2_aVerticalZigzagScan [64];
+
+#endif /* _OMXZIGZAGTAB_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Clip8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Clip8_s.s
new file mode 100755
index 0000000..95fe6d2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Clip8_s.s
@@ -0,0 +1,82 @@
+; /**
+; * 
+; * File Name:  armVCM4P2_Clip8_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   12290
+; * Date:       Wednesday, April 9, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains module for Clipping 16 bit value to [0,255] Range
+; */ 
+
+      INCLUDE omxtypes_s.h
+      INCLUDE armCOMM_s.h
+      
+
+      M_VARIANTS CortexA8
+
+      IF CortexA8
+;//Input Arguments
+
+pSrc                 RN 0
+pDst                 RN 1
+step                 RN 2
+
+;// Neon Registers
+
+qx0                  QN  Q0.S16                  
+dx00                 DN  D0.S16
+dx01                 DN  D1.S16
+qx1                  QN  Q1.S16
+dx10                 DN  D2.S16
+dx11                 DN  D3.S16
+
+qx2                  QN  Q2.S16                  
+dx20                 DN  D4.S16
+dx21                 DN  D5.S16
+qx3                  QN  Q3.S16
+dx30                 DN  D6.S16
+dx31                 DN  D7.S16
+
+
+dclip0               DN  D0.U8
+dclip1               DN  D2.U8 
+dclip2               DN  D4.U8
+dclip3               DN  D6.U8
+ 
+       M_START armVCM4P2_Clip8
+
+       VLD1          {dx00,dx01,dx10,dx11},[pSrc]!          ;// Load 16 entries from pSrc
+       VLD1          {dx20,dx21,dx30,dx31},[pSrc]!          ;// Load next 16 entries from pSrc  
+       VQSHRUN       dclip0,qx0,#0                          ;// dclip0[i]=clip qx0[i] to [0,255]
+       VQSHRUN       dclip1,qx1,#0                          ;// dclip1[i]=clip qx1[i] to [0,255]
+       VST1          {dclip0},[pDst],step                   ;// store 8 bytes and pDst=pDst+step
+       VST1          {dclip1},[pDst],step                   ;// store 8 bytes and pDst=pDst+step
+       VQSHRUN       dclip2,qx2,#0
+       VQSHRUN       dclip3,qx3,#0
+       VST1          {dclip2},[pDst],step
+       VST1          {dclip3},[pDst],step
+       
+       VLD1          {dx00,dx01,dx10,dx11},[pSrc]!          ;// Load 16 entries from pSrc
+       VLD1          {dx20,dx21,dx30,dx31},[pSrc]!          ;// Load next 16 entries from pSrc  
+       VQSHRUN       dclip0,qx0,#0                          ;// dclip0[i]=clip qx0[i] to [0,255]
+       VQSHRUN       dclip1,qx1,#0                          ;// dclip1[i]=clip qx1[i] to [0,255]
+       VST1          {dclip0},[pDst],step                   ;// store 8 bytes and pDst=pDst+step
+       VST1          {dclip1},[pDst],step                   ;// store 8 bytes and pDst=pDst+step
+       VQSHRUN       dclip2,qx2,#0
+       VQSHRUN       dclip3,qx3,#0
+       VST1          {dclip2},[pDst],step
+       VST1          {dclip3},[pDst],step
+
+
+       
+        M_END
+        ENDIF
+        
+     
+        
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
new file mode 100755
index 0000000..e4a7f33
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
@@ -0,0 +1,398 @@
+;/**
+; * 
+; * File Name:  armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   12290
+; * Date:       Wednesday, April 9, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter, intra block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_AC_unsafe
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan 
+; *
+; * 
+; *
+; * 
+; */
+
+
+      INCLUDE omxtypes_s.h
+      INCLUDE armCOMM_s.h
+      INCLUDE armCOMM_BitDec_s.h
+
+
+      M_VARIANTS ARM1136JS
+
+     
+
+
+
+     IF ARM1136JS
+     
+        
+
+
+
+;//Input Arguments
+
+ppBitStream          RN 0
+pBitOffset           RN 1
+pDst                 RN 2
+shortVideoHeader     RN 3
+
+
+;//Local Variables
+
+Return               RN 0
+
+pVlcTableL0L1        RN 4
+pLMAXTableL0L1       RN 4
+pRMAXTableL0L1       RN 4
+pZigzagTable         RN 4
+
+ftype                RN 0
+temp3                RN 4
+temp                 RN 5
+Count                RN 6
+Escape               RN 5
+
+;// armVCM4P2_FillVLDBuffer
+zigzag               RN 0
+storeLevel           RN 1
+temp2                RN 4
+temp1                RN 5
+sign                 RN 5
+Last                 RN 7
+storeRun             RN 14
+
+
+packRetIndex         RN 5
+
+
+markerbit            RN 5
+
+;// Scratch Registers
+
+RBitStream           RN 8
+RBitBuffer           RN 9
+RBitCount            RN 10
+
+T1                   RN 11
+T2                   RN 12
+LR                   RN 14        
+        
+
+
+        M_ALLOC4        pppBitStream,4
+        M_ALLOC4        ppOffset,4
+        M_ALLOC4        pLinkRegister,4       
+        
+        M_START armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+        ;// get the table addresses from stack       
+        M_ARG           ppVlcTableL0L1,4
+        M_ARG           ppLMAXTableL0L1,4
+        M_ARG           ppRMAXTableL0L1,4
+        M_ARG           ppZigzagTable,4
+        
+        ;// Store ALL zeros at pDst
+        
+        MOV             temp1,#0                                        ;// Initialize Count to zero                                
+        MOV             Last,#0
+        M_STR           LR,pLinkRegister                                ;// Store Link Register on Stack
+        MOV             temp2,#0
+        MOV             LR,#0          
+        
+        ;// Initialize the Macro and Store all zeros to pDst 
+  
+        STM             pDst!,{temp2,temp1,Last,LR}                   
+        M_BD_INIT0      ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount  
+        STM             pDst!,{temp2,temp1,Last,LR}
+        M_BD_INIT1      T1, T2, T2
+        STM             pDst!,{temp2,temp1,Last,LR}
+        M_BD_INIT2      T1, T2, T2
+        STM             pDst!,{temp2,temp1,Last,LR}
+        M_STR           ppBitStream,pppBitStream                        ;// Store ppBitstream on stack                         
+        STM             pDst!,{temp2,temp1,Last,LR}
+        M_STR           pBitOffset,ppOffset                             ;// Store pBitOffset on stack
+        STM             pDst!,{temp2,temp1,Last,LR}
+        
+        STM             pDst!,{temp2,temp1,Last,LR}
+        STM             pDst!,{temp2,temp1,Last,LR}
+ 
+        
+        SUB             pDst,pDst,#128                                  ;// Restore pDst
+
+        ;// The armVCM4P2_GetVLCBits begins
+
+getVLCbits
+        
+        M_BD_LOOK8      Escape,7                                        ;// Load Escape Value
+        LSR             Escape,Escape,#25                                                  
+        CMP             Escape,#3                                       ;// check for escape mode
+        MOVNE           ftype,#0
+        BNE             notEscapemode                                   ;// Branch if not in Escape mode 3
+
+        M_BD_VSKIP8     #7,T1
+        CMP             shortVideoHeader,#0                             ;// Check shortVideoHeader flag to know the type of Escape mode
+        BEQ             endFillVLD                                       
+        
+        ;// Escape Mode 4
+
+        M_BD_READ8      Last,1,T1
+        M_BD_READ8      storeRun,6,T1
+        M_BD_READ8      storeLevel,8,T1
+
+           
+        ;// Check whether the Reserved values for Level are used and Exit with an Error Message if it is so
+
+        TEQ             storeLevel,#0
+        TEQNE           storeLevel,#128                    
+        BEQ             ExitError
+
+        ADD             temp2,storeRun,Count
+        CMP             temp2,#64
+        BGE             ExitError                                       ;// error if Count+storeRun >= 64
+        
+        
+        ;// Load address of zigzagTable
+        
+        M_LDR           pZigzagTable,ppZigzagTable                      ;// Loading the Address of Zigzag table
+               
+                
+        ;// armVCM4P2_FillVLDBuffer
+                
+        SXTB            storeLevel,storeLevel                           ;// Sign Extend storeLevel to 32 bits
+                              
+        
+        ;// To Reflect Runlength
+
+        ADD             Count,Count,storeRun
+        LDRB            zigzag,[pZigzagTable,Count]
+        ADD             Count,Count,#1
+        STRH            storeLevel,[pDst,zigzag]                        ;// store Level
+              
+        B               ExitOk
+       
+        
+
+endFillVLD
+        
+               
+        ;// Load Ftype( Escape Mode) value based on the two successive bits in the bitstream
+     
+        M_BD_READ8      temp1,1,T1           
+        CMP             temp1,#0    
+        MOVEQ           ftype,#1
+        BEQ             notEscapemode
+        M_BD_READ8      temp1,1,T1
+        CMP             temp1,#1
+        MOVEQ           ftype,#3
+        MOVNE           ftype,#2
+        
+
+notEscapemode
+
+        ;// Load optimized packed VLC table with last=0 and Last=1
+        
+        M_LDR           pVlcTableL0L1,ppVlcTableL0L1                    ;// Load Combined VLC Table
+                
+       
+        CMP             ftype,#3                                        ;// If ftype >=3 get perform Fixed Length Decoding (Escape Mode 3)
+        BGE             EscapeMode3                                     ;// Else continue normal VLC Decoding
+        
+        ;// Variable lengh decoding, "armUnPackVLC32" 
+        
+        
+        M_BD_VLD        packRetIndex,T1,T2,pVlcTableL0L1,4,2
+        
+        
+        LDR             temp3,=0xFFF
+        
+        CMP             packRetIndex,temp3                              ;// Check for invalid symbol
+        BEQ             ExitError                                       ;// if invalid symbol occurs exit with an error message
+        
+        AND             Last,packRetIndex,#2                            ;// Get Last from packed Index
+              
+         
+        
+
+        LSR             storeRun,packRetIndex,#7                        ;// Get Run Value from Packed index
+        AND             storeLevel,packRetIndex,#0x7c                   ;// storeLevel=packRetIndex[2-6],storeLevel[0-1]=0 
+                                                                        
+     
+        M_LDR           pLMAXTableL0L1,ppLMAXTableL0L1                  ;// Load LMAX table
+              
+       
+        LSR             storeLevel,storeLevel,#2                        ;// Level value
+
+        CMP             ftype,#1                                    
+        BNE             ftype2
+        
+        ;// ftype==1; Escape mode =1
+          
+        
+        ADD            temp1, pLMAXTableL0L1, Last, LSL#4              ;// If the Last=1 add 32 to table address
+        LDRB            temp1,[temp1,storeRun]
+
+       
+        ADD             storeLevel,temp1,storeLevel                     
+
+ftype2
+
+        ;// ftype =2; Escape mode =2
+        
+        M_LDR           pRMAXTableL0L1,ppRMAXTableL0L1                  ;// Load RMAX Table 
+                
+        CMP             ftype,#2
+        BNE             FillVLDL1
+                  
+        ADD            temp1, pRMAXTableL0L1, Last, LSL#4               ;// If Last=1 add 32 to table address
+        SUB             temp2,storeLevel,#1
+        LDRB            temp1,[temp1,temp2]
+
+       
+        ADD             storeRun,storeRun,#1
+        ADD             storeRun,temp1
+        
+FillVLDL1        
+            
+                
+        ;// armVCM4P2_FillVLDBuffer
+
+        M_LDR           pZigzagTable,ppZigzagTable                     ;// Load address of zigzagTable 
+                
+        M_BD_READ8      sign,1,T1
+
+        CMP             sign,#1
+        RSBEQ           storeLevel,storeLevel,#0
+ 
+        ADD             temp1,storeRun,Count                           ;// Exit with an error message if Run + Count exceeds 63
+        CMP             temp1,#64
+        BGE             ExitError
+
+      
+        
+        
+              
+        
+        ;// To Reflect Runlenght
+
+        ADD             Count,Count,storeRun
+ 
+storeLevelL1
+        
+        LDRB            zigzag,[pZigzagTable,Count]
+        CMP             Last,#2                                         ;// Check if the Level val is Last non zero val
+        ADD             Count,Count,#1
+        LSR             Last,Last,#1
+        STRH            storeLevel,[pDst,zigzag]                  
+           
+        BNE             end
+        
+        B               ExitOk
+ 
+
+
+        ;// Fixed Lengh Decoding Escape Mode 3
+
+EscapeMode3
+
+        M_BD_READ8      Last,1,T1
+        M_BD_READ8      storeRun,6,T1
+        
+        ADD             temp2,storeRun,Count                            ;// Exit with an error message if Run + Count exceeds 63
+        CMP             temp2,#64
+        BGE             ExitError
+
+        M_BD_READ8      markerbit,1,T1
+        TEQ             markerbit,#0                                    ;// Exit with an error message if marker bit is zero
+        BEQ             ExitError
+        
+        M_BD_READ16     storeLevel,12,T1
+
+        TST             storeLevel,#0x800                               ;// test if the level is negative
+        SUBNE           storeLevel,storeLevel,#4096
+        CMP             storeLevel,#0
+        CMPNE           storeLevel,#-2048
+        BEQ             ExitError                                       ;// Exit with an error message if Level==0 or  -2048 
+
+        M_LDR           pZigzagTable,ppZigzagTable                      ;// Load address of zigzagTable
+              
+        M_BD_READ8      markerbit,1,T1
+           
+
+        ;// armVCM4P2_FillVLDBuffer ( Sign not used as storeLevel is preprocessed)
+            
+               
+
+        ;// To Reflect Run Length
+
+        ADD             Count,Count,storeRun
+
+
+ 
+storeLevelLast
+        
+        LDRB            zigzag,[pZigzagTable,Count]
+        CMP             Last,#1
+        ADD             Count,Count,#1
+        STRH            storeLevel,[pDst,zigzag]                          
+                
+        BNE             end 
+      
+        B               ExitOk
+        
+end
+
+        CMP             Count,#64                                       ;//Run the Loop untill Count reaches 64
+
+        BLT             getVLCbits
+
+        
+ExitOk
+        ;// Exit When VLC Decoding is done Successfully 
+   
+        ;// Loading ppBitStream and pBitOffset from stack
+        
+        CMP             Last,#1
+        M_LDR           ppBitStream,pppBitStream
+        M_LDR           pBitOffset,ppOffset
+
+        ;//Ending the macro
+
+        M_BD_FINI       ppBitStream,pBitOffset
+             
+        MOVEQ           Return,#OMX_Sts_NoErr
+        MOVNE           Return,#OMX_Sts_Err
+        M_LDR           LR,pLinkRegister                               ;// Load the Link Register Back
+        B               exit2
+
+ExitError
+        ;// Exit When an Error occurs 
+
+        M_LDR           ppBitStream,pppBitStream
+        M_LDR           pBitOffset,ppOffset
+        ;//Ending the macro
+
+        M_BD_FINI       ppBitStream,pBitOffset
+        M_LDR           LR,pLinkRegister
+        MOV             Return,#OMX_Sts_Err
+
+exit2
+       
+
+        M_END
+        ENDIF
+        
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
new file mode 100755
index 0000000..38af975
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
@@ -0,0 +1,211 @@
+ /**
+ * 
+ * File Name:  armVCM4P2_Huff_Tables_VLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File:        armVCM4P2_Huff_Tables_VLC.c
+ * Description: Contains all the Huffman tables used in MPEG4 codec
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armCOMM_Bitstream.h"
+
+
+
+
+// Contains optimized and Packed VLC tables with Last=0 and Last=1
+
+//              optimized Packed VLC table Entry Format 
+//              ---------------------------------------
+// 
+//        15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
+//       +------------------------------------------------+
+//       |  Len   |       Run       |     Level    |L | 1 |
+//       +------------------------------------------------+
+//       |                Offset                      | 0 |
+//       +------------------------------------------------+
+// If the table entry is a leaf entry then bit 0 set:
+//    Len    = Number of bits overread  (0 to 7)  3 bits
+//    Run    = RunLength of the Symbol  (0 to 63) 6 bits
+//    Level  = Level of the Symbol      (0 to 31) 5 bits
+//    L      = Last Value of the Symbol (0 or 1)  1 bit
+//
+// If the table entry is an internal node then bit 0 is clear:
+//    Offset = Number of (16-bit) half words from the table
+//             start to the next table node
+//
+// The table is accessed by successive lookup up on the
+// next Step bits of the input bitstream until a leaf node
+// is obtained. The Step sizes are supplied to the VLD macro.
+
+// The VLC tables used for Intra and non inta coefficients in non Escape mode
+// contains symbols with both Last=0 and Last=1.
+// If a symbol is not found in the table it will be coded as 0xFFF
+ 
+
+const OMX_U16 armVCM4P2_InterVlcL0L1[200] = {
+    0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x1b09,
+    0x4009, 0x4009, 0x4009, 0x4009, 0x2109, 0x2109, 0x0209, 0x0011,
+    0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058,
+    0x3fff, 0x3fff, 0x0038, 0x0040, 0x2115, 0x2115, 0x201d, 0x201d,
+    0x2059, 0x2059, 0x2051, 0x2051, 0x1c0d, 0x1b0d, 0x1a0d, 0x190d,
+    0x0911, 0x0811, 0x0711, 0x0611, 0x0511, 0x0319, 0x0219, 0x0121,
+    0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088,
+    0x2061, 0x2061, 0x2129, 0x2129, 0x3709, 0x3709, 0x3809, 0x3809,
+    0x3d0d, 0x3d0d, 0x3e0d, 0x3e0d, 0x3f0d, 0x3f0d, 0x200d, 0x200d,
+    0x0098, 0x00a0, 0x00a8, 0x00b0, 0x0131, 0x0221, 0x0419, 0x0519,
+    0x0619, 0x0a11, 0x1909, 0x1a09, 0x210d, 0x220d, 0x230d, 0x240d,
+    0x250d, 0x260d, 0x270d, 0x280d, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+    0x0049, 0x0041, 0x380d, 0x380d, 0x370d, 0x370d, 0x360d, 0x360d,
+    0x350d, 0x350d, 0x340d, 0x340d, 0x330d, 0x330d, 0x320d, 0x320d,
+    0x00e8, 0x00f0, 0x00f8, 0x0100, 0x310d, 0x310d, 0x2015, 0x2015,
+    0x3609, 0x3609, 0x3509, 0x3509, 0x3409, 0x3409, 0x3309, 0x3309,
+    0x3209, 0x3209, 0x3109, 0x3109, 0x0110, 0x0130, 0x0138, 0x0140,
+    0x0118, 0x0120, 0x0128, 0x100d, 0x3009, 0x3009, 0x2f09, 0x2f09,
+    0x2411, 0x2411, 0x2311, 0x2311, 0x2039, 0x2039, 0x2031, 0x2031,
+    0x0f0d, 0x0e0d, 0x0d0d, 0x0c0d, 0x0b0d, 0x0a0d, 0x090d, 0x0e09,
+    0x0d09, 0x0211, 0x0119, 0x0029, 0x0150, 0x0158, 0x0160, 0x0168,
+    0x280d, 0x280d, 0x270d, 0x270d, 0x260d, 0x260d, 0x250d, 0x250d,
+    0x2c09, 0x2c09, 0xb759, 0xb759, 0x2a09, 0x2a09, 0x2021, 0x2021,
+    0x040d, 0x030d, 0x0b35, 0x010d, 0x0909, 0x0809, 0x0709, 0x0609,
+    0x0111, 0x0019, 0x2509, 0x2509, 0x2409, 0x2409, 0x2309, 0x2309
+};
+
+
+const OMX_U16 armVCM4P2_IntraVlcL0L1[200] = {
+    0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x0f09,
+    0x4009, 0x4009, 0x4009, 0x4009, 0x2011, 0x2011, 0x0109, 0x0019,
+    0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058,
+    0x3fff, 0x3fff, 0x0038, 0x0040, 0x203d, 0x203d, 0x2035, 0x2035,
+    0x20b1, 0x20b1, 0x20a9, 0x20a9, 0x0215, 0x011d, 0x002d, 0x0d09,
+    0x0519, 0x0811, 0x0419, 0x0321, 0x0221, 0x0139, 0x00a1, 0x0099,
+    0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088,
+    0x20b9, 0x20b9, 0x20c1, 0x20c1, 0x2141, 0x2141, 0x2911, 0x2911,
+    0x2315, 0x2315, 0x2415, 0x2415, 0x2f0d, 0x2f0d, 0x300d, 0x300d,
+    0x0098, 0x00a0, 0x00a8, 0x00b0, 0x00c9, 0x00d1, 0x00d9, 0x0149,
+    0x0619, 0x0151, 0x0229, 0x0719, 0x0e09, 0x0045, 0x0515, 0x0615,
+    0x110d, 0x120d, 0x130d, 0x140d, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+    0x0091, 0x0089, 0x2e0d, 0x2e0d, 0x2d0d, 0x2d0d, 0x2c0d, 0x2c0d,
+    0x2b0d, 0x2b0d, 0x2a0d, 0x2a0d, 0x2115, 0x2115, 0x2025, 0x2025,
+    0x00e8, 0x00f0, 0x00f8, 0x0100, 0x2c09, 0x2c09, 0x2b09, 0x2b09,
+    0x2711, 0x2711, 0x2611, 0x2611, 0x2511, 0x2511, 0x2319, 0x2319,
+    0x2219, 0x2219, 0x2131, 0x2131, 0x0110, 0x0130, 0x0138, 0x0140,
+    0x0118, 0x0120, 0x0128, 0x080d, 0x2129, 0x2129, 0x2081, 0x2081,
+    0x2411, 0x2411, 0x2079, 0x2079, 0x2071, 0x2071, 0x2069, 0x2069,
+    0x1bb5, 0x060d, 0x001d, 0xd3f9, 0x0909, 0x0809, 0x090d, 0x0311,
+    0x0121, 0x0061, 0x0059, 0x0051, 0x0150, 0x0158, 0x0160, 0x0168,
+    0x240d, 0x240d, 0x230d, 0x230d, 0x2609, 0x2609, 0x250d, 0x250d,
+    0x2709, 0x2709, 0x2211, 0x2211, 0x2119, 0x2119, 0x2049, 0x2049,
+    0x0015, 0x0509, 0x020d, 0x010d, 0x0409, 0x0309, 0x0041, 0x0039,
+    0x0111, 0x0031, 0x2209, 0x2209, 0x2029, 0x2029, 0x2021, 0x2021
+};
+
+const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64] = {
+    0x0020, 0x000b, 0x2009, 0x2009, 0x2007, 0x2007, 0x2001, 0x2001,
+    0x4005, 0x4005, 0x4005, 0x4005, 0x4003, 0x4003, 0x4003, 0x4003,
+    0x0028, 0x000f, 0x200d, 0x200d, 0x0030, 0x0013, 0x2011, 0x2011,
+    0x0038, 0x0017, 0x2015, 0x2015, 0x3fff, 0x3fff, 0x2019, 0x2019,
+
+	0x0020, 0x0009, 0x2007, 0x2007, 0x4005, 0x4005, 0x4005, 0x4005,
+    0x4003, 0x4003, 0x4003, 0x4003, 0x4001, 0x4001, 0x4001, 0x4001,
+    0x0028, 0x000d, 0x200b, 0x200b, 0x0030, 0x0011, 0x200f, 0x200f,
+    0x0038, 0x0015, 0x2013, 0x2013, 0x1fff, 0x0019, 0x2017, 0x2017
+};
+
+
+const OMX_U16 armVCM4P2_aVlcMVD[124] = {
+    0x0010, 0x00f0, 0x0043, 0x003f, 0x4041, 0x4041, 0x4041, 0x4041,
+    0x0018, 0x00d8, 0x0047, 0x003b, 0x0020, 0x0080, 0x00a8, 0x00d0,
+    0x0028, 0x0048, 0x0070, 0x0078, 0x1fff, 0x0030, 0x0038, 0x0040,
+    0x0081, 0x0001, 0x007f, 0x0003, 0x207d, 0x207d, 0x2005, 0x2005,
+    0x207b, 0x207b, 0x2007, 0x2007, 0x0050, 0x0058, 0x0060, 0x0068,
+    0x2079, 0x2079, 0x2009, 0x2009, 0x2077, 0x2077, 0x200b, 0x200b,
+    0x2075, 0x2075, 0x200d, 0x200d, 0x2073, 0x2073, 0x200f, 0x200f,
+    0x0071, 0x0011, 0x006f, 0x0013, 0x006d, 0x0015, 0x006b, 0x0017,
+    0x0088, 0x0090, 0x0098, 0x00a0, 0x0069, 0x0019, 0x0067, 0x001b,
+    0x0065, 0x001d, 0x0063, 0x001f, 0x0061, 0x0021, 0x005f, 0x0023,
+    0x005d, 0x0025, 0x005b, 0x0027, 0x00b0, 0x00b8, 0x00c0, 0x00c8,
+    0x0059, 0x0029, 0x0057, 0x002b, 0x2055, 0x2055, 0x202d, 0x202d,
+    0x2053, 0x2053, 0x202f, 0x202f, 0x2051, 0x2051, 0x2031, 0x2031,
+    0x204f, 0x204f, 0x2033, 0x2033, 0x00e0, 0x00e8, 0x0049, 0x0039,
+    0x204d, 0x204d, 0x2035, 0x2035, 0x204b, 0x204b, 0x2037, 0x2037,
+    0x2045, 0x2045, 0x203d, 0x203d
+};
+
+/* LMAX table for non Inter (Last == 0 and Last=1)
+   Level - 1 Indexed
+   padded armVCM4P2_InterL0L1LMAX[27-31] with zeros to acess entries for Last=1 effectively
+
+*/
+const OMX_U8 armVCM4P2_InterL0L1LMAX[73] = 
+{
+   12,  6,  4,  3,  3,  3,  3,  2, 
+    2,  2,  2,  1,  1,  1,  1,  1,
+    1,  1,  1,  1,  1,  1,  1,  1,
+    1,  1,  1,  0,  0,  0,  0,  0,
+    3,  2,  1,  1,  1,  1,  1,  1, 
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1
+};
+
+/* RMAX table for non Inter (Last == 0 and Last=1)
+   Level - 1 Indexed 
+ padded armVCM4P2_InterL0L1RMAX[12-31] with zeros to access entries for Last=1 table effectively */
+
+
+const OMX_U8 armVCM4P2_InterL0L1RMAX[35] = 
+{
+   26, 10,  6,  2,  1,  1,   
+    0,  0,  0,  0,  0,  0,
+	0,	0,	0,	0,	0,	0,
+	0,	0,	0,	0,	0,	0,
+	0,	0,	0,	0,
+    0,  0,  0,  0,  40,  1,  0
+};
+
+/* LMAX table for non Intra (Last == 0 and Last=1)
+   Level - 1 Indexed
+   padded armVCM4P2_IntraL0L1LMAX[15-31] with zeros to acess entries for Last=1 effectively
+
+*/
+const OMX_U8 armVCM4P2_IntraL0L1LMAX[53] = 
+{
+   27, 10,  5,  4,  3,  3,  3,  
+    3,  2,  2,  1,  1,  1,  1,  1,	0,
+	0,	0,	0,	0,	0,	0,	0,	0,
+	0,	0,	0,	0,	0,	0,	0,	0,
+
+	8,  3,  2,  2,  2,  2,  2,  1, 
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1
+};
+
+
+/* RMAX table for non Inter (Last == 0 and Last=1)
+   Level - 1 Indexed 
+ padded armVCM4P2_IntraL0L1RMAX[27-31] with zeros to access entries for Last=1 table effectively */
+
+
+const OMX_U8 armVCM4P2_IntraL0L1RMAX[40] =
+{
+   14,  9,  7,  3,  2,  1,	1,  
+    1,  1,  1,  0,  0,  0, 	0,  
+    0,  0,  0,  0,  0,  0,  0,  
+    0,  0,  0,  0,  0,  0,  0,
+	0,	0,	0,	0,
+	
+	20,  6,  1,  0,  0,  0,  0,  0
+
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Lookup_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
new file mode 100755
index 0000000..6948f80
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
@@ -0,0 +1,75 @@
+ /**
+ * 
+ * File Name:  armVCM4P2_Lookup_Tables.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File:        armVCM4P2_Lookup_Tables.c
+ * Description: Contains all the Lookup tables used in MPEG4 codec
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+    /* * Table Entries contain Dc Scaler values
+       * armVCM4P2_DCScaler[i]= 8           for i=1  to  4 and i=33 to 36
+       *                      = 2*i         for i=5  to  8
+       *                      = i+8         for i=9  to  25
+       *                      = 2*i-16      for i=26 to  31
+       *                      = (i-32+13)/2 for i=37 to  59
+       *                      = i-6-32      for i=60 to  63
+       *                      = 255         for i=0 and i=32
+       */
+       
+const OMX_U8 armVCM4P2_DCScaler[64]={
+	0xff, 0x8,  0x8,  0x8,  0x8,  0xa,  0xc,  0xe,  
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 
+    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+    0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e,
+    0xff, 0x8,  0x8,  0x8,  0x8,  0x9,  0x9,  0xa,  
+    0xa,  0xb,  0xb,  0xc,  0xc,  0xd,  0xd,  0xe,  
+    0xe,  0xf,  0xf,  0x10, 0x10, 0x11, 0x11, 0x12, 
+    0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
+
+};
+
+              
+     /*  Table Entries Contain reciprocal of 1 to 63
+      *  armVCM4P2_Reciprocal_QP_S16[i]=round(32767/i)
+      *  armVCM4P2_Reciprocal_QP_S16[0]= 0
+      */
+
+const OMX_S16 armVCM4P2_Reciprocal_QP_S16[64]={
+	0x0000,0x7fff,0x4000,0x2aaa,0x2000,0x1999,0x1555,0x1249,
+    0x1000,0x0e39,0x0ccd,0x0ba3,0x0aab,0x09d9,0x0925,0x0888,
+    0x0800,0x0787,0x071c,0x06bd,0x0666,0x0618,0x05d1,0x0591,
+    0x0555,0x051f,0x04ec,0x04be,0x0492,0x046a,0x0444,0x0421,
+    0x0400,0x03e1,0x03c4,0x03a8,0x038e,0x0376,0x035e,0x0348,
+    0x0333,0x031f,0x030c,0x02fa,0x02e9,0x02d8,0x02c8,0x02b9,
+    0x02ab,0x029d,0x028f,0x0282,0x0276,0x026a,0x025f,0x0254,
+    0x0249,0x023f,0x0235,0x022b,0x0222,0x0219,0x0211,0x0208
+	   
+};
+     
+      /* Table Entries Contain reciprocal of 1 to 63
+       * armVCM4P2_Reciprocal_QP_S32[i]=round(131071/i)
+       * armVCM4P2_Reciprocal_QP_S32[0]= 0
+       */
+
+const OMX_S32 armVCM4P2_Reciprocal_QP_S32[64]={
+	0x00000000,0x0001ffff,0x00010000,0x0000aaaa, 0x00008000, 0x00006666, 0x00005555, 0x00004924,
+    0x00004000,0x000038e3,0x00003333,0x00002e8c, 0x00002aab, 0x00002762, 0x00002492, 0x00002222,
+    0x00002000,0x00001e1e,0x00001c72,0x00001af2, 0x0000199a, 0x00001861, 0x00001746, 0x00001643,
+    0x00001555,0x0000147b,0x000013b1,0x000012f6, 0x00001249, 0x000011a8, 0x00001111, 0x00001084,
+    0x00001000,0x00000f84,0x00000f0f,0x00000ea1, 0x00000e39, 0x00000dd6, 0x00000d79, 0x00000d21,
+    0x00000ccd,0x00000c7d,0x00000c31,0x00000be8, 0x00000ba3, 0x00000b61, 0x00000b21, 0x00000ae5,
+    0x00000aab,0x00000a73,0x00000a3d,0x00000a0a, 0x000009d9, 0x000009a9, 0x0000097b, 0x0000094f,
+    0x00000925,0x000008fb,0x000008d4,0x000008ae, 0x00000889, 0x00000865, 0x00000842, 0x00000820
+	
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_SetPredDir_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
new file mode 100755
index 0000000..44f2460
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
@@ -0,0 +1,104 @@
+;//
+;// 
+;// File Name:  armVCM4P2_SetPredDir_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+; **
+; * Function: armVCM4P2_SetPredDir
+; *
+; * Description:
+; * Performs detecting the prediction direction
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] blockIndex  block index indicating the component type and
+; *                          position as defined in subclause 6.1.3.8, of ISO/IEC
+; *                          14496-2. Furthermore, indexes 6 to 9 indicate the
+; *                          alpha blocks spatially corresponding to luminance
+; *                          blocks 0 to 3 in the same macroblock.
+; * [in] pCoefBufRow pointer to the coefficient row buffer
+; * [in] pQpBuf      pointer to the quantization parameter buffer
+; * [out]predQP      quantization parameter of the predictor block
+; * [out]predDir     indicates the prediction direction which takes one
+; *                  of the following values:
+; *                  OMX_VC_HORIZONTAL    predict horizontally
+; *                  OMX_VC_VERTICAL      predict vertically
+; *
+; * Return Value:
+; * Standard OMXResult result. See enumeration for possible result codes.
+; *
+; */
+
+       INCLUDE omxtypes_s.h
+       INCLUDE armCOMM_s.h
+       INCLUDE omxVC_s.h
+
+
+       M_VARIANTS ARM1136JS
+
+
+       IF ARM1136JS
+ 
+;// Input Arguments
+BlockIndex         RN 0
+pCoefBufRow        RN 1
+pCoefBufCol        RN 2
+predDir            RN 3
+predQP             RN 4
+pQpBuf             RN 5
+
+;// Local Variables
+
+Return             RN 0
+blockDCLeft        RN 6  
+blockDCTop         RN 7
+blockDCTopLeft     RN 8
+temp1              RN 9
+temp2              RN 14
+
+       M_START    armVCM4P2_SetPredDir,r9
+
+       M_ARG       ppredQP,4
+       M_ARG       ppQpBuf,4
+    
+       LDRH        blockDCTopLeft,[pCoefBufRow,#-16]
+       LDRH        blockDCLeft,[pCoefBufCol]
+       
+       TEQ         BlockIndex,#3
+       LDREQH      blockDCTop,[pCoefBufCol,#-16]
+       LDRNEH      blockDCTop,[pCoefBufRow]
+             
+       SUBS        temp1,blockDCLeft,blockDCTopLeft
+       RSBLT       temp1,temp1,#0
+       SUBS        temp2,blockDCTopLeft,blockDCTop
+       RSBLT       temp2,temp2,#0
+      
+       M_LDR       pQpBuf,ppQpBuf
+       M_LDR       predQP,ppredQP
+       CMP         temp1,temp2
+       MOV         temp2,#OMX_VC_VERTICAL
+       LDRLTB      temp1,[pQpBuf,#1]
+       STRLT       temp2,[predDir]
+       STRLT       temp1,[predQP]
+       MOV         temp2,#OMX_VC_HORIZONTAL           
+       LDRGEB      temp1,[pQpBuf]
+       STRGE       temp2,[predDir]
+       MOV         Return,#OMX_Sts_NoErr
+       STRGE       temp1,[predQP] 
+
+         
+    
+       M_END
+ 
+       ENDIF
+
+       END    
+    
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
new file mode 100755
index 0000000..21fa715
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
@@ -0,0 +1,61 @@
+/**
+ * 
+ * File Name:  armVCM4P2_Zigzag_Tables.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File:        armVCM4P2_ZigZag_Tables.c
+ * Description: Contains the zigzag tables
+ *
+ */
+
+#include "omxtypes.h"
+
+/* Contains Double the values in the reference Zigzag Table
+ * Contains Classical,Vetical and Horizontal Zigzagscan tables in one array  
+ */
+
+const OMX_U8 armVCM4P2_aClassicalZigzagScan [192] = 
+{
+     0,  2,  16, 32,  18,  4,  6, 20,
+    34, 48, 64, 50, 36, 22,  8,  10,
+    24, 38, 52, 66, 80, 96, 82, 68,
+    54, 40, 26,  12,  14, 28, 42, 56, 
+    70, 84, 98, 112, 114, 100, 86, 72,
+    58, 44, 30, 46, 60, 74, 88, 102,
+    116, 118, 104, 90, 76, 62, 78, 92,
+    106, 120, 122, 104, 94, 110, 124, 126,
+
+	0,  16, 32, 48,  2,  18,  4, 20,
+    34, 50, 64, 80, 96, 112, 114, 98,
+    82, 66, 52, 36,  6, 22,  8, 24,
+    38, 54, 68, 84, 100, 116, 70, 86,
+    102, 118, 40, 56,  10, 26,  12, 28,
+    42, 58, 72, 88, 104, 120, 74, 90, 
+    106, 122, 44, 60,  14, 30, 46, 62,
+    76, 92, 108, 124, 78, 94, 110, 126,
+
+    0,  2,  4,  6,  16,  18, 32, 34,
+    20, 22,  8,  10,  12,  14, 30, 28,
+    26, 24, 38, 36, 48, 50, 64, 66,
+    52, 54, 40, 42, 44, 46, 56, 58,
+    60, 62, 68, 70, 80, 82, 96, 98,
+    84, 86, 72, 74, 76, 78, 88, 90, 
+    92, 94, 100, 102, 112, 114, 116, 118,
+    104, 106, 108, 110, 120, 122, 124, 126
+
+
+};
+
+
+
+
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
new file mode 100755
index 0000000..796ad6e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
@@ -0,0 +1,102 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_DecodeBlockCoef_Inter.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description: 
+ * Contains modules for inter reconstruction
+ * 
+ */
+ 
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Inter
+ *
+ * Description:
+ * Decodes the INTER block coefficients. Inverse quantization, inversely zigzag
+ * positioning and IDCT, with appropriate clipping on each step, are performed
+ * on the coefficients. The results (residuals) are placed in a contiguous array
+ * of 64 elements. For INTER block, the output buffer holds the residuals for
+ * further reconstruction.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte in
+ *								the bit stream buffer. There is no boundary
+ *								check for the bit stream buffer.
+ * [in]	pBitOffset		pointer to the bit position in the byte pointed
+ *								to by *ppBitStream. *pBitOffset is valid within
+ *								[0-7]
+ * [in]	QP				quantization parameter
+ * [in] shortVideoHeader    a flag indicating presence of short_video_header;
+ *                           shortVideoHeader==1 indicates using quantization method defined in short
+ *                           video header mode, and shortVideoHeader==0 indicates normail quantization method.
+ * [out] ppBitStream 	*ppBitStream is updated after the block is decoded, so that it points to the
+ *                      current byte in the bit stream buffer.
+ * [out] pBitOffset		*pBitOffset is updated so that it points to the current bit position in the
+ *                      byte pointed by *ppBitStream
+ * [out] pDst			pointer to the decoded residual buffer (a contiguous array of 64 elements of
+ *                      OMX_S16 data type). Must be 16-byte aligned.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ *   - At least one of the following pointers is Null: ppBitStream, *ppBitStream, pBitOffset , pDst
+ *   - At least one of the below case:
+ *   - *pBitOffset exceeds [0,7], QP <= 0;
+ *	 - pDst not 16-byte aligned
+ * OMX_Sts_Err - status error
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Inter(
+     const OMX_U8 ** ppBitStream,
+     OMX_INT * pBitOffset,
+     OMX_S16 * pDst,
+     OMX_INT QP,
+     OMX_INT shortVideoHeader
+)
+{
+    /* 64 elements are needed but to align it to 16 bytes need
+    15 more elements of padding */
+    OMX_S16 tempBuf[79];
+    OMX_S16 *pTempBuf1;
+    OMXResult errorCode;
+    /* Aligning the local buffers */
+    pTempBuf1 = armAlignTo16Bytes(tempBuf);
+    
+    
+    /* VLD and zigzag */
+    errorCode = omxVCM4P2_DecodeVLCZigzag_Inter(ppBitStream, pBitOffset, 
+                                        pTempBuf1,shortVideoHeader);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Dequantization */
+    errorCode = omxVCM4P2_QuantInvInter_I(
+     pTempBuf1,
+     QP);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Inverse transform */
+    errorCode = omxVCM4P2_IDCT8x8blk(pTempBuf1, pDst);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+	    
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
new file mode 100755
index 0000000..b28657c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
@@ -0,0 +1,214 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_DecodeBlockCoef_Intra.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   12290
+ * Date:       Wednesday, April 9, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description: 
+ * Contains modules for intra reconstruction
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/* Function for saturating 16 bit values to the [0,255] range and  */
+/* writing out as 8 bit values.  Does 64 entries                   */
+void armVCM4P2_Clip8(OMX_S16 *pSrc, OMX_U8 *pDst, OMX_INT dstStep );
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Intra
+ *
+ * Description:
+ * Decodes the INTRA block coefficients. Inverse quantization, inversely zigzag
+ * positioning, and IDCT, with appropriate clipping on each step, are performed
+ * on the coefficients. The results are then placed in the output frame/plane on
+ * a pixel basis. For INTRA block, the output values are clipped to [0, 255] and
+ * written to corresponding block buffer within the destination plane.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte in
+ *								the bit stream buffer. There is no boundary
+ *								check for the bit stream buffer.
+ * [in]	pBitOffset		pointer to the bit position in the byte pointed
+ *								to by *ppBitStream. *pBitOffset is valid within
+ *								[0-7].
+ * [in]	step			width of the destination plane
+ * [in/out]	pCoefBufRow		[in]  pointer to the coefficient row buffer
+ *                        [out] updated coefficient rwo buffer
+ * [in/out]	pCoefBufCol		[in]  pointer to the coefficient column buffer
+ *                        [out] updated coefficient column buffer
+ * [in]	curQP			quantization parameter of the macroblock which
+ *								the current block belongs to
+ * [in]	pQpBuf		 Pointer to a 2-element QP array. pQpBuf[0] holds the QP of the 8x8 block left to
+ *                   the current block(QPa). pQpBuf[1] holds the QP of the 8x8 block just above the
+ *                   current block(QPc).
+ *                   Note, in case the corresponding block is out of VOP bound, the QP value will have
+ *                   no effect to the intra-prediction process. Refer to subclause  "7.4.3.3 Adaptive
+ *                   ac coefficient prediction" of ISO/IEC 14496-2(MPEG4 Part2) for accurate description.
+ * [in]	blockIndex		block index indicating the component type and
+ *								position as defined in subclause 6.1.3.8,
+ *								Figure 6-5 of ISO/IEC 14496-2. 
+ * [in]	intraDCVLC		a code determined by intra_dc_vlc_thr and QP.
+ *								This allows a mechanism to switch between two VLC
+ *								for coding of Intra DC coefficients as per Table
+ *								6-21 of ISO/IEC 14496-2. 
+ * [in]	ACPredFlag		a flag equal to ac_pred_flag (of luminance) indicating
+ *								if the ac coefficients of the first row or first
+ *								column are differentially coded for intra coded
+ *								macroblock.
+ * [in] shortVideoHeader    a flag indicating presence of short_video_header;
+ *                           shortVideoHeader==1 selects linear intra DC mode,
+ *							and shortVideoHeader==0 selects nonlinear intra DC mode.
+ * [out]	ppBitStream		*ppBitStream is updated after the block is
+ *								decoded, so that it points to the current byte
+ *								in the bit stream buffer
+ * [out]	pBitOffset		*pBitOffset is updated so that it points to the
+ *								current bit position in the byte pointed by
+ *								*ppBitStream
+ * [out]	pDst			pointer to the block in the destination plane.
+ *								pDst should be 16-byte aligned.
+ * [out]	pCoefBufRow		pointer to the updated coefficient row buffer.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ *   -	At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset,
+ *                                                      pCoefBufRow, pCoefBufCol, pQPBuf, pDst.
+ *      or
+ *   -  At least one of the below case: *pBitOffset exceeds [0,7], curQP exceeds (1, 31),
+ *      blockIndex exceeds [0,9], step is not the multiple of 8, intraDCVLC is zero while
+ *      blockIndex greater than 5.
+ *      or
+ *   -	pDst is not 16-byte aligned
+ * OMX_Sts_Err - status error
+ *
+ */
+
+OMXResult omxVCM4P2_DecodeBlockCoef_Intra(
+     const OMX_U8 ** ppBitStream,
+     OMX_INT *pBitOffset,
+     OMX_U8 *pDst,
+     OMX_INT step,
+     OMX_S16 *pCoefBufRow,
+     OMX_S16 *pCoefBufCol,
+     OMX_U8 curQP,
+     const OMX_U8 *pQPBuf,
+     OMX_INT blockIndex,
+     OMX_INT intraDCVLC,
+     OMX_INT ACPredFlag,
+	 OMX_INT shortVideoHeader
+ )
+{
+    OMX_S16 tempBuf1[79], tempBuf2[79];
+    OMX_S16 *pTempBuf1, *pTempBuf2;
+    OMX_INT predDir, predACDir;
+    OMX_INT  predQP;
+    OMXVCM4P2VideoComponent videoComp;
+    OMXResult errorCode;
+    
+    
+    /* Aligning the local buffers */
+    pTempBuf1 = armAlignTo16Bytes(tempBuf1);
+    pTempBuf2 = armAlignTo16Bytes(tempBuf2);
+    
+    /* Setting the AC prediction direction and prediction direction */
+    armVCM4P2_SetPredDir(
+        blockIndex,
+        pCoefBufRow,
+        pCoefBufCol,
+        &predDir,
+        &predQP,
+        pQPBuf);
+
+    predACDir = predDir;
+
+    
+    if (ACPredFlag == 0)
+    {
+        predACDir = OMX_VC_NONE;
+    }
+
+    /* Setting the videoComp */
+    if (blockIndex <= 3)
+    {
+        videoComp = OMX_VC_LUMINANCE;
+    }
+    else
+    {
+        videoComp = OMX_VC_CHROMINANCE;
+    }
+    
+
+    /* VLD and zigzag */
+    if (intraDCVLC == 1)
+    {
+        errorCode = omxVCM4P2_DecodeVLCZigzag_IntraDCVLC(
+            ppBitStream,
+            pBitOffset,
+            pTempBuf1,
+            predACDir,
+            shortVideoHeader,
+            videoComp);
+        armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    }
+    else
+    {
+        errorCode = omxVCM4P2_DecodeVLCZigzag_IntraACVLC(
+            ppBitStream,
+            pBitOffset,
+            pTempBuf1,
+            predACDir,
+            shortVideoHeader);
+        armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    }
+
+    /* AC DC prediction */
+    errorCode = omxVCM4P2_PredictReconCoefIntra(
+        pTempBuf1,
+        pCoefBufRow,
+        pCoefBufCol,
+        curQP,
+        predQP,
+        predDir,
+        ACPredFlag,
+        videoComp);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Dequantization */
+    errorCode = omxVCM4P2_QuantInvIntra_I(
+     pTempBuf1,
+     curQP,
+     videoComp,
+     shortVideoHeader);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Inverse transform */
+    errorCode = omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf2);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Placing the linear array into the destination plane and clipping
+       it to 0 to 255 */
+    
+	armVCM4P2_Clip8(pTempBuf2,pDst,step);
+	
+	
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
new file mode 100755
index 0000000..cc16f5a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
@@ -0,0 +1,364 @@
+; **********
+; * 
+; * File Name:  omxVCM4P2_DecodePadMV_PVOP_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   12290
+; * Date:       Wednesday, April 9, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; * 
+; **
+; * Function: omxVCM4P2_DecodePadMV_PVOP
+; *
+; * Description:
+; * Decodes and pads four motion vectors of the non-intra macroblock in P-VOP.
+; * The motion vector padding process is specified in subclause 7.6.1.6 of
+; * ISO/IEC 14496-2.
+; *
+; * Remarks:
+; *
+; *
+; * Parameters:
+; * [in]    ppBitStream        pointer to the pointer to the current byte in
+; *                            the bit stream buffer
+; * [in]    pBitOffset         pointer to the bit position in the byte pointed
+; *                            to by *ppBitStream. *pBitOffset is valid within
+; *                            [0-7].
+; * [in]    pSrcMVLeftMB       pointers to the motion vector buffers of the
+; *                           macroblocks specially at the left side of the current macroblock
+; *                     respectively.
+; * [in]    pSrcMVUpperMB      pointers to the motion vector buffers of the
+; *                     macroblocks specially at the upper side of the current macroblock
+; *                     respectively.
+; * [in]    pSrcMVUpperRightMB pointers to the motion vector buffers of the
+; *                     macroblocks specially at the upper-right side of the current macroblock
+; *                     respectively.
+; * [in]    fcodeForward       a code equal to vop_fcode_forward in MPEG-4
+; *                     bit stream syntax
+; * [in]    MBType         the type of the current macroblock. If MBType
+; *                     is not equal to OMX_VC_INTER4V, the destination
+; *                     motion vector buffer is still filled with the
+; *                     same decoded vector.
+; * [out]   ppBitStream         *ppBitStream is updated after the block is decoded,
+; *                     so that it points to the current byte in the bit
+; *                     stream buffer
+; * [out]   pBitOffset         *pBitOffset is updated so that it points to the
+; *                     current bit position in the byte pointed by
+; *                     *ppBitStream
+; * [out]   pDstMVCurMB         pointer to the motion vector buffer of the current
+; *                     macroblock which contains four decoded motion vectors
+; *
+; * Return Value:
+; * OMX_Sts_NoErr -no error
+; * 
+; *                     
+; * OMX_Sts_Err - status error
+; *
+; *
+     
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        INCLUDE armCOMM_BitDec_s.h
+        INCLUDE omxVC_s.h
+        
+       M_VARIANTS ARM1136JS
+       
+                
+
+
+        IF ARM1136JS
+
+;//Input Arguments
+
+ppBitStream           RN 0
+pBitOffset            RN 1
+pSrcMVLeftMB          RN 2
+pSrcMVUpperMB         RN 3
+pSrcMVUpperRightMB    RN 4
+pDstMVCurMB           RN 5
+fcodeForward          RN 6
+MBType                RN 7
+
+;//Local Variables
+
+zero                  RN 4
+one                   RN 4
+scaleFactor           RN 1
+
+
+Return                RN 0
+
+VlcMVD                RN 0
+index                 RN 4
+Count                 RN 7
+
+mvHorData             RN 4
+mvHorResidual         RN 0
+
+mvVerData             RN 4             
+mvVerResidual         RN 0
+
+temp                  RN 1
+
+temp1                 RN 3
+High                  RN 4
+Low                   RN 2
+Range                 RN 1
+
+BlkCount              RN 14
+
+diffMVdx              RN 0
+diffMVdy              RN 1
+
+;// Scratch Registers
+
+RBitStream            RN 8
+RBitCount             RN 9
+RBitBuffer            RN 10
+
+T1                    RN 11
+T2                    RN 12
+LR                    RN 14
+
+       IMPORT          armVCM4P2_aVlcMVD
+       IMPORT          omxVCM4P2_FindMVpred
+
+       ;// Allocate stack memory        
+       
+       M_ALLOC4        ppDstMVCurMB,4
+       M_ALLOC4        pDstMVPredME,4
+       M_ALLOC4        pBlkCount,4
+       
+       M_ALLOC4        pppBitStream,4
+       M_ALLOC4        ppBitOffset,4
+       M_ALLOC4        ppSrcMVLeftMB,4
+       M_ALLOC4        ppSrcMVUpperMB,4
+       
+       M_ALLOC4        pdiffMVdx,4
+       M_ALLOC4        pdiffMVdy,4
+       M_ALLOC4        pHigh,4
+       
+              
+
+
+       M_START   omxVCM4P2_DecodePadMV_PVOP,r11
+       
+       M_ARG           pSrcMVUpperRightMBonStack,4           ;// pointer to  pSrcMVUpperRightMB on stack
+       M_ARG           pDstMVCurMBonStack,4                  ;// pointer to pDstMVCurMB on stack
+       M_ARG           fcodeForwardonStack,4                 ;// pointer to fcodeForward on stack 
+       M_ARG           MBTypeonStack,4                       ;// pointer to MBType on stack
+
+      
+       
+       
+       
+       ;// Initializing the BitStream Macro
+
+       M_BD_INIT0      ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount
+       M_LDR           MBType,MBTypeonStack                  ;// Load MBType from stack
+       M_LDR           pDstMVCurMB,pDstMVCurMBonStack        ;// Load pDstMVCurMB from stack
+       MOV             zero,#0
+
+       TEQ             MBType,#OMX_VC_INTRA                  ;// Check if MBType=OMX_VC_INTRA
+       TEQNE           MBType,#OMX_VC_INTRA_Q                ;// check if MBType=OMX_VC_INTRA_Q
+       STREQ           zero,[pDstMVCurMB]
+       M_BD_INIT1      T1, T2, T2
+       STREQ           zero,[pDstMVCurMB,#4]
+       M_BD_INIT2      T1, T2, T2
+       STREQ           zero,[pDstMVCurMB,#4]
+       MOVEQ           Return,#OMX_Sts_NoErr
+       MOV             BlkCount,#0
+       STREQ           zero,[pDstMVCurMB,#4]
+       
+       BEQ             ExitOK
+
+       TEQ             MBType,#OMX_VC_INTER4V                ;// Check if MBType=OMX_VC_INTER4V
+       TEQNE           MBType,#OMX_VC_INTER4V_Q              ;// Check if MBType=OMX_VC_INTER4V_Q
+       MOVEQ           Count,#4
+
+       TEQ             MBType,#OMX_VC_INTER                  ;// Check if MBType=OMX_VC_INTER
+       TEQNE           MBType,#OMX_VC_INTER_Q                ;// Check if MBType=OMX_VC_INTER_Q
+       MOVEQ           Count,#1
+       
+       M_LDR           fcodeForward,fcodeForwardonStack      ;// Load fcodeForward  from stack
+
+       ;// Storing the values temporarily on stack
+
+       M_STR           ppBitStream,pppBitStream              
+       M_STR           pBitOffset,ppBitOffset
+            
+
+       SUB             temp,fcodeForward,#1                  ;// temp=fcodeForward-1
+       MOV             one,#1
+       M_STR           pSrcMVLeftMB,ppSrcMVLeftMB
+       LSL             scaleFactor,one,temp                  ;// scaleFactor=1<<(fcodeForward-1)
+       M_STR           pSrcMVUpperMB,ppSrcMVUpperMB
+       LSL             scaleFactor,scaleFactor,#5            
+       M_STR           scaleFactor,pHigh                     ;// [pHigh]=32*scaleFactor
+              
+       ;// VLD Decoding
+
+
+Loop
+
+       LDR             VlcMVD, =armVCM4P2_aVlcMVD        ;// Load the optimized MVD VLC table
+
+       ;// Horizontal Data and Residual calculation
+
+       LDR             temp,=0xFFF                           
+       M_BD_VLD        index,T1,T2,VlcMVD,3,2                ;// variable lenght decoding using the macro
+      
+       TEQ             index,temp
+       BEQ             ExitError                             ;// Exit with an Error Message if the decoded symbol is an invalied symbol 
+       
+       SUB             mvHorData,index,#32                   ;// mvHorData=index-32             
+       MOV             mvHorResidual,#1                      ;// mvHorResidual=1
+       CMP             fcodeForward,#1
+       TEQNE           mvHorData,#0
+       MOVEQ           diffMVdx,mvHorData                    ;// if scaleFactor=1(fcodeForward=1) or mvHorData=0 diffMVdx=mvHorData         
+       BEQ             VerticalData
+       
+       SUB             temp,fcodeForward,#1
+       M_BD_VREAD8     mvHorResidual,temp,T1,T2              ;// get mvHorResidual from bitstream if fcodeForward>1 and mvHorData!=0              
+       
+       CMP             mvHorData,#0
+       RSBLT           mvHorData,mvHorData,#0                ;// mvHorData=abs(mvHorData)
+       SUB             mvHorResidual,mvHorResidual,fcodeForward
+       SMLABB          diffMVdx,mvHorData,fcodeForward,mvHorResidual ;// diffMVdx=abs(mvHorData)*fcodeForward+mvHorResidual-fcodeForward
+       ADD             diffMVdx,diffMVdx,#1
+       RSBLT           diffMVdx,diffMVdx,#0
+       
+       ;// Vertical Data and Residual calculation
+
+VerticalData
+
+       M_STR           diffMVdx,pdiffMVdx                    ;// Store the diffMVdx on stack
+       LDR             VlcMVD, =armVCM4P2_aVlcMVD        ;// Loading the address of optimized VLC tables
+
+       LDR             temp,=0xFFF
+       M_BD_VLD        index,T1,T2,VlcMVD,3,2                ;// VLC decoding using the macro
+       
+       TEQ             index,temp
+       BEQ             ExitError                             ;// Exit with an Error Message if an Invalied Symbol occurs
+       
+       SUB             mvVerData,index,#32                   ;// mvVerData=index-32             
+       MOV             mvVerResidual,#1     
+       CMP             fcodeForward,#1
+       TEQNE           mvVerData,#0
+       MOVEQ           diffMVdy,mvVerData                    ;// diffMVdy = mvVerData if scaleFactor=1(fcodeForward=1) or mvVerData=0
+       BEQ             FindMVPred
+
+       SUB             temp,fcodeForward,#1
+       M_BD_VREAD8     mvVerResidual,temp,T1,T2              ;// Get mvVerResidual from bit stream if fcodeForward>1 and mnVerData!=0
+             
+
+       CMP             mvVerData,#0
+       RSBLT           mvVerData,mvVerData,#0
+       SUB             mvVerResidual,mvVerResidual,fcodeForward
+       SMLABB          diffMVdy,mvVerData,fcodeForward,mvVerResidual ;// diffMVdy=abs(mvVerData)*fcodeForward+mvVerResidual-fcodeForward
+       ADD             diffMVdy,diffMVdy,#1
+       RSBLT           diffMVdy,diffMVdy,#0
+
+       ;//Calling the Function omxVCM4P2_FindMVpred
+        
+FindMVPred
+
+       M_STR           diffMVdy,pdiffMVdy
+       ADD             temp,pDstMVCurMB,BlkCount,LSL #2      ;// temp=pDstMVCurMB[BlkCount]
+       M_STR           temp,ppDstMVCurMB                     ;// store temp on stack for passing as an argument to FindMVPred
+       
+       MOV             temp,#0
+       M_STR           temp,pDstMVPredME                     ;// Pass pDstMVPredME=NULL as an argument         
+       M_STR           BlkCount,pBlkCount                    ;// Passs BlkCount as Argument through stack
+
+       MOV             temp,pSrcMVLeftMB                     ;// temp (RN 1)=pSrcMVLeftMB
+       M_LDR           pSrcMVUpperRightMB,pSrcMVUpperRightMBonStack
+       MOV             pSrcMVLeftMB,pSrcMVUpperMB            ;// pSrcMVLeftMB ( RN 2) = pSrcMVUpperMB
+       MOV             ppBitStream,pDstMVCurMB               ;// ppBitStream  ( RN 0) = pDstMVCurMB
+       MOV             pSrcMVUpperMB,pSrcMVUpperRightMB      ;// pSrcMVUpperMB( RN 3) = pSrcMVUpperRightMB      
+       BL              omxVCM4P2_FindMVpred              ;// Branch to subroutine omxVCM4P2_FindMVpred
+
+       ;// Store Horizontal Motion Vector
+     
+       M_LDR           BlkCount,pBlkCount                    ;// Load BlkCount from stack
+       M_LDR           High,pHigh                            ;// High=32*scaleFactor
+       LSL             temp1,BlkCount,#2                     ;// temp=BlkCount*4
+       M_LDR           diffMVdx,pdiffMVdx                    ;// Laad diffMVdx
+       
+       LDRSH           temp,[pDstMVCurMB,temp1]              ;// temp=pDstMVCurMB[BlkCount]
+       
+       
+       RSB             Low,High,#0                           ;// Low = -32*scaleFactor
+       ADD             diffMVdx,temp,diffMVdx                ;// diffMVdx=pDstMVCurMB[BlkCount]+diffMVdx
+       ADD             Range,High,High                       ;// Range=64*ScaleFactor
+       SUB             High,High,#1                          ;// High= 32*scaleFactor-1
+
+       CMP             diffMVdx,Low                          ;// If diffMVdx<Low          
+       ADDLT           diffMVdx,diffMVdx,Range               ;// diffMVdx+=Range
+        
+       CMP             diffMVdx,High                         
+       SUBGT           diffMVdx,diffMVdx,Range               ;// If diffMVdx > High diffMVdx-=Range
+       STRH            diffMVdx,[pDstMVCurMB,temp1]
+
+       ;// Store Vertical
+
+       ADD             temp1,temp1,#2                        ;// temp1=4*BlkCount+2
+       M_LDR           diffMVdx,pdiffMVdy                    ;// Laad diffMVdy
+       LDRSH           temp,[pDstMVCurMB,temp1]              ;// temp=pDstMVCurMB[BlkCount].diffMVdy
+       ADD             BlkCount,BlkCount,#1                  ;// BlkCount=BlkCount+1
+       ADD             diffMVdx,temp,diffMVdx                
+       CMP             diffMVdx,Low
+       ADDLT           diffMVdx,diffMVdx,Range               ;// If diffMVdy<Low  diffMVdy+=Range                
+       CMP             diffMVdx,High
+       SUBGT           diffMVdx,diffMVdx,Range               ;// If diffMVdy > High diffMVdy-=Range
+       STRH            diffMVdx,[pDstMVCurMB,temp1]    
+       
+       CMP             BlkCount,Count
+       M_LDR           pSrcMVLeftMB,ppSrcMVLeftMB
+       M_LDR           pSrcMVUpperMB,ppSrcMVUpperMB
+
+       BLT             Loop                                  ;// If BlkCount<Count Continue the Loop
+
+
+       ;// If MBType=OMX_VC_INTER or MBtype=OMX_VC_INTER_Q copy pDstMVCurMB[0] to
+       ;// pDstMVCurMB[1], pDstMVCurMB[2], pDstMVCurMB[3] 
+
+       M_LDR           MBType,MBTypeonStack
+
+       TEQ             MBType,#OMX_VC_INTER                                       
+       TEQNE           MBType,#OMX_VC_INTER_Q                            
+       LDREQ           temp,[pDstMVCurMB]
+       M_LDR           ppBitStream,pppBitStream
+       STREQ           temp,[pDstMVCurMB,#4]
+       
+       STREQ           temp,[pDstMVCurMB,#8]
+       STREQ           temp,[pDstMVCurMB,#12]
+       
+       
+       M_LDR           pBitOffset,ppBitOffset
+       ;//Ending the macro
+       M_BD_FINI       ppBitStream,pBitOffset                 ;// Finishing the Macro       
+
+       
+       MOV             Return,#OMX_Sts_NoErr
+       B               ExitOK
+ 
+ExitError
+
+       M_LDR           ppBitStream,pppBitStream
+       M_LDR           pBitOffset,ppBitOffset
+       ;//Ending the macro
+       M_BD_FINI       ppBitStream,pBitOffset
+       
+       MOV             Return,#OMX_Sts_Err
+
+ExitOK             
+
+       M_END
+       ENDIF
+       END
+
+
+   
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
new file mode 100755
index 0000000..7208c21
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
@@ -0,0 +1,132 @@
+;/**
+; * 
+; * File Name:  omxVCM4P2_DecodeVLCZigzag_Inter_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   12290
+; * Date:       Wednesday, April 9, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one inter coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in]    ppBitStream        pointer to the pointer to the current byte in
+; *                    the bitstream buffer
+; * [in]    pBitOffset        pointer to the bit position in the byte pointed
+; *                    to by *ppBitStream. *pBitOffset is valid within    [0-7].
+; * [in] shortVideoHeader     binary flag indicating presence of short_video_header;
+; *                           escape modes 0-3 are used if shortVideoHeader==0,
+; *                           and escape mode 4 is used when shortVideoHeader==1.
+; * [out]    ppBitStream        *ppBitStream is updated after the block is
+; *                    decoded, so that it points to the current byte
+; *                    in the bit stream buffer
+; * [out]    pBitOffset        *pBitOffset is updated so that it points to the
+; *                    current bit position in the byte pointed by
+; *                    *ppBitStream
+; * [out]    pDst            pointer to the coefficient buffer of current
+; *                    block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; *   -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; *   -pDst is not 16-byte aligned, or
+; *   -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; *   -At least one mark bit is equal to zero
+; *   -Encountered an illegal stream code that cannot be found in the VLC table
+; *   -Encountered and illegal code in the VLC FLC table
+; *   -The number of coefficients is greater than 64
+; *
+; */
+
+
+      INCLUDE omxtypes_s.h
+      INCLUDE armCOMM_s.h
+      INCLUDE armCOMM_BitDec_s.h
+
+
+      M_VARIANTS ARM1136JS
+
+     
+
+
+
+     IF ARM1136JS
+     
+        ;// Import various tables needed for the function
+
+        
+        IMPORT          armVCM4P2_InterVlcL0L1             ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+                                                               ;// Packed in Run:Level:Last format
+        IMPORT          armVCM4P2_InterL0L1LMAX            ;// Contains LMAX table entries with both Last=0 and Last=1
+        IMPORT          armVCM4P2_InterL0L1RMAX            ;// Contains RMAX table entries with both Last=0 and Last=1
+        IMPORT          armVCM4P2_aClassicalZigzagScan     ;// contains classical Zigzag table entries with double the original values
+        IMPORT          armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+
+
+;//Input Arguments
+
+ppBitStream          RN 0
+pBitOffset           RN 1
+pDst                 RN 2
+shortVideoHeader     RN 3
+
+;//Local Variables
+
+Return               RN 0
+
+pVlcTableL0L1        RN 4
+pLMAXTableL0L1       RN 4
+pRMAXTableL0L1       RN 4
+pZigzagTable         RN 4
+Count                RN 6
+
+
+        
+        ;// Allocate stack memory to store the VLC,Zigzag,LMAX and RMAX tables
+     
+        
+        M_ALLOC4        ppVlcTableL0L1,4
+        M_ALLOC4        ppLMAXTableL0L1,4
+        M_ALLOC4        ppRMAXTableL0L1,4
+        M_ALLOC4        ppZigzagTable,4
+        
+        
+        M_START omxVCM4P2_DecodeVLCZigzag_Inter,r12
+
+        
+
+        
+        LDR             pZigzagTable, =armVCM4P2_aClassicalZigzagScan       ;// Load zigzag table
+        M_STR           pZigzagTable,ppZigzagTable                              ;// Store zigzag table on stack to pass as argument to unsafe function
+        LDR             pVlcTableL0L1, =armVCM4P2_InterVlcL0L1              ;// Load optimized VLC table with both L=0 and L=1 entries
+        M_STR           pVlcTableL0L1,ppVlcTableL0L1                            ;// Store optimized VLC table address on stack
+        LDR             pLMAXTableL0L1, =armVCM4P2_InterL0L1LMAX            ;// Load Interleaved L=0 and L=1 LMAX Tables
+        M_STR           pLMAXTableL0L1,ppLMAXTableL0L1                          ;// Store LMAX table address on stack
+        LDR             pRMAXTableL0L1, =armVCM4P2_InterL0L1RMAX            ;// Load Interleaved L=0 and L=1 RMAX Tables
+        MOV             Count,#0                                                ;// set start=0
+        M_STR           pRMAXTableL0L1,ppRMAXTableL0L1                          ;// store RMAX table address on stack
+                
+
+        BL              armVCM4P2_DecodeVLCZigzag_AC_unsafe                 ;// call Unsafe Function for VLC Zigzag Decoding
+         
+       
+
+        M_END
+        ENDIF
+        
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
new file mode 100755
index 0000000..9a37ec9
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
@@ -0,0 +1,136 @@
+;/**
+; * 
+; * File Name:  omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   12290
+; * Date:       Wednesday, April 9, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in]    ppBitStream        pointer to the pointer to the current byte in
+; *                    the bitstream buffer
+; * [in]    pBitOffset        pointer to the bit position in the byte pointed
+; *                    to by *ppBitStream. *pBitOffset is valid within    [0-7].
+; * [in] shortVideoHeader     binary flag indicating presence of short_video_header;
+; *                           escape modes 0-3 are used if shortVideoHeader==0,
+; *                           and escape mode 4 is used when shortVideoHeader==1.
+; * [out]    ppBitStream        *ppBitStream is updated after the block is
+; *                    decoded, so that it points to the current byte
+; *                    in the bit stream buffer
+; * [out]    pBitOffset        *pBitOffset is updated so that it points to the
+; *                    current bit position in the byte pointed by
+; *                    *ppBitStream
+; * [out]    pDst            pointer to the coefficient buffer of current
+; *                    block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; *   -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; *   -pDst is not 16-byte aligned, or
+; *   -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; *   -At least one mark bit is equal to zero
+; *   -Encountered an illegal stream code that cannot be found in the VLC table
+; *   -Encountered and illegal code in the VLC FLC table
+; *   -The number of coefficients is greater than 64
+; *
+; */
+
+
+      INCLUDE omxtypes_s.h
+      INCLUDE armCOMM_s.h
+      INCLUDE armCOMM_BitDec_s.h
+
+
+      M_VARIANTS ARM1136JS
+
+     
+
+
+
+     IF ARM1136JS
+     
+        ;// Import various tables needed for the function
+
+        
+        IMPORT          armVCM4P2_IntraVlcL0L1             ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+                                                               ;// Packed in Run:Level:Last format
+        IMPORT          armVCM4P2_IntraL0L1LMAX            ;// Contains LMAX table entries with both Last=0 and Last=1
+        IMPORT          armVCM4P2_IntraL0L1RMAX            ;// Contains RMAX table entries with both Last=0 and Last=1
+        IMPORT          armVCM4P2_aClassicalZigzagScan     ;// contains classical Zigzag table entries with double the original values
+        IMPORT          armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+;//Input Arguments
+
+ppBitStream          RN 0
+pBitOffset           RN 1
+pDst                 RN 2
+PredDir              RN 3
+shortVideoHeader     RN 3
+
+;//Local Variables
+
+Return               RN 0
+
+pVlcTableL0L1        RN 4
+pLMAXTableL0L1       RN 4
+pRMAXTableL0L1       RN 4
+pZigzagTable         RN 4
+Count                RN 6
+
+
+        
+        ;// Allocate stack memory to store optimized VLC,Zigzag, RMAX, LMAX Table Addresses 
+     
+        M_ALLOC4        ppVlcTableL0L1,4
+        M_ALLOC4        ppLMAXTableL0L1,4
+        M_ALLOC4        ppRMAXTableL0L1,4
+        M_ALLOC4        ppZigzagTable,4
+
+        
+        M_START omxVCM4P2_DecodeVLCZigzag_IntraACVLC,r12
+
+        M_ARG           shortVideoHeaderonStack,4                             ;// pointer to Input Argument on stack           
+
+        LDR             pZigzagTable, =armVCM4P2_aClassicalZigzagScan     ;// Load Address of the Zigzag table    
+        ADD             pZigzagTable, pZigzagTable, PredDir, LSL #6           ;// Loading Different type of zigzag tables based on PredDir
+       
+        M_STR           pZigzagTable,ppZigzagTable                            ;// Store Zigzag table address on stack
+        LDR             pVlcTableL0L1, =armVCM4P2_IntraVlcL0L1            ;// Load optimized packed VLC Table with both L=0 and L=1 entries
+        M_STR           pVlcTableL0L1,ppVlcTableL0L1                          ;// Store VLC Table address on stack
+        LDR             pLMAXTableL0L1, =armVCM4P2_IntraL0L1LMAX          ;// Load LMAX Table
+        M_STR           pLMAXTableL0L1,ppLMAXTableL0L1                        ;// Store LMAX Table address on Stack
+        LDR             pRMAXTableL0L1, =armVCM4P2_IntraL0L1RMAX          ;// Load RMAX Table
+        MOV             Count,#0                                              ;// Set Start=0        
+        
+        M_STR           pRMAXTableL0L1,ppRMAXTableL0L1                        ;// Store RMAX Table address on stack
+              
+
+       
+        M_LDR           shortVideoHeader,shortVideoHeaderonStack              ;// get the Input Argument from stack
+
+        BL              armVCM4P2_DecodeVLCZigzag_AC_unsafe               ;// Call Unsafe Function
+
+
+
+        
+        M_END
+        ENDIF
+        
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
new file mode 100755
index 0000000..778aaf2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
@@ -0,0 +1,224 @@
+;/**
+; * 
+; * File Name:  omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   12290
+; * Date:       Wednesday, April 9, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in]    ppBitStream        pointer to the pointer to the current byte in
+; *                    the bitstream buffer
+; * [in]    pBitOffset        pointer to the bit position in the byte pointed
+; *                    to by *ppBitStream. *pBitOffset is valid within    [0-7].
+; * [in] shortVideoHeader     binary flag indicating presence of short_video_header;
+; *                           escape modes 0-3 are used if shortVideoHeader==0,
+; *                           and escape mode 4 is used when shortVideoHeader==1.
+; * [out]    ppBitStream        *ppBitStream is updated after the block is
+; *                    decoded, so that it points to the current byte
+; *                    in the bit stream buffer
+; * [out]    pBitOffset        *pBitOffset is updated so that it points to the
+; *                    current bit position in the byte pointed by
+; *                    *ppBitStream
+; * [out]    pDst            pointer to the coefficient buffer of current
+; *                    block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; *   -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; *   -pDst is not 16-byte aligned, or
+; *   -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; *   -At least one mark bit is equal to zero
+; *   -Encountered an illegal stream code that cannot be found in the VLC table
+; *   -Encountered and illegal code in the VLC FLC table
+; *   -The number of coefficients is greater than 64
+; *
+; */
+
+
+      INCLUDE omxtypes_s.h
+      INCLUDE armCOMM_s.h
+      INCLUDE armCOMM_BitDec_s.h
+
+
+      M_VARIANTS CortexA8
+
+     
+      
+
+
+      IF CortexA8
+
+     
+        ;// Import various tables needed for the function
+
+        
+        IMPORT          armVCM4P2_IntraVlcL0L1             ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+                                                               ;// Packed in Run:Level:Last format
+        IMPORT          armVCM4P2_IntraL0L1LMAX            ;// Contains LMAX table entries with both Last=0 and Last=1
+        IMPORT          armVCM4P2_IntraL0L1RMAX            ;// Contains RMAX table entries with both Last=0 and Last=1
+        IMPORT          armVCM4P2_aClassicalZigzagScan     ;// contains CLassical, Horizontal, Vertical Zigzag table entries with double the original values
+        IMPORT          armVCM4P2_aIntraDCLumaChromaIndex  ;// Contains Optimized DCLuma and DCChroma Index table Entries
+        
+
+        IMPORT          armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+;//Input Arguments
+
+ppBitStream          RN 0
+pBitOffset           RN 1
+pDst                 RN 2
+PredDir              RN 3
+shortVideoHeader     RN 3
+videoComp            RN 5
+;//Local Variables
+
+Return               RN 0
+
+pDCLumaChromaIndex   RN 4
+pDCChromaIndex       RN 7
+pVlcTableL0L1        RN 4
+pLMAXTableL0L1       RN 4
+pRMAXTableL0L1       RN 4
+pZigzagTable         RN 4
+Count                RN 6
+DCValueSize          RN 6
+powOfSize            RN 7
+temp1                RN 5
+
+
+;// Scratch Registers
+
+RBitStream           RN 8
+RBitBuffer           RN 9
+RBitCount            RN 10
+
+T1                   RN 11
+T2                   RN 12
+DCVal                RN 14
+
+        
+        ;// Allocate stack memory to store optimized VLC,Zigzag, RMAX, LMAX Table Addresses 
+     
+        M_ALLOC4        ppVlcTableL0L1,4
+        M_ALLOC4        ppLMAXTableL0L1,4
+        M_ALLOC4        ppRMAXTableL0L1,4
+        M_ALLOC4        ppZigzagTable,4
+        M_ALLOC4        pDCCoeff,4
+        
+
+        
+        M_START omxVCM4P2_DecodeVLCZigzag_IntraDCVLC,r12
+
+        M_ARG           shortVideoHeaderonStack,4                                  ;// Pointer to argument on stack  
+        M_ARG           videoComponstack,4                                         ;// Pointer to argument on stack
+
+        
+        ;// Decode DC Coefficient
+
+        
+        LDR             pDCLumaChromaIndex, =armVCM4P2_aIntraDCLumaChromaIndex ;// Load Optimized VLC Table for Luminance and Chrominance
+
+        ;// Initializing the Bitstream Macro
+
+        M_BD_INIT0      ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount
+        M_LDR           videoComp,videoComponstack                                 
+        M_BD_INIT1      T1, T2, T2
+        ADD             pDCLumaChromaIndex,pDCLumaChromaIndex,videoComp, LSL #6             
+        M_BD_INIT2      T1, T2, T2
+    
+        
+        M_BD_VLD        DCValueSize,T1,T2,pDCLumaChromaIndex,4,2                    ;// VLC Decode using optimized Luminance and Chrominance VLC Table
+
+    
+       
+
+DecodeDC
+                         
+        CMP             DCValueSize,#12     
+        BGT             ExitError
+        
+        CMP             DCValueSize,#0
+        MOVEQ           DCVal,#0                                                    ;// If DCValueSize is zero then DC coeff =0
+        BEQ             ACDecode                                                    ;// Branch to perform AC Coeff Decoding
+        
+        M_BD_VREAD16    DCVal,DCValueSize,T1,T2                                     ;// Get DC Value From Bit stream
+         
+
+        MOV             powOfSize,#1                                                
+        LSL             powOfSize,DCValueSize                                       ;// powOfSize=pow(2,DCValueSize)
+        CMP             DCVal,powOfSize,LSR #1                                      ;// Compare DCVal with powOfSize/2 
+        ADDLT           DCVal,DCVal,#1
+        SUBLT           DCVal,DCVal,powOfSize                                       ;// If Lessthan powOfSize/2 DCVal=DCVal-powOfSize+1
+                                                                                    ;// Else DCVal= fetchbits from bit stream
+
+CheckDCValueSize
+        
+        CMP             DCValueSize,#8                                              ;// If DCValueSize greater than 8 check marker bit
+
+        BLE             ACDecode
+
+        M_BD_READ8      temp1,1,T1
+        TEQ             temp1,#0                                                    ;// If Marker bit is zero Exit with an Error Message
+        BEQ             ExitError
+
+        
+
+        ;// Decode AC Coefficient
+
+ACDecode
+
+        M_STR           DCVal,pDCCoeff                                             ;// Store Decoded DC Coeff on Stack
+        M_BD_FINI       ppBitStream,pBitOffset                                     ;// Terminating the Bit stream Macro
+         
+        LDR             pZigzagTable, =armVCM4P2_aClassicalZigzagScan          ;// Load Zigzag talbe address   
+        ADD             pZigzagTable, pZigzagTable, PredDir, LSL #6                ;// Modify the Zigzag table adress based on PredDir                
+       
+        M_STR           pZigzagTable,ppZigzagTable                                 ;// Store zigzag table on stack
+        LDR             pVlcTableL0L1, =armVCM4P2_IntraVlcL0L1                 ;// Load Optimized VLC Table With both Last=0 and Last=1 Entries
+        M_STR           pVlcTableL0L1,ppVlcTableL0L1                               ;// Store Optimized VLC Table on stack
+        LDR             pLMAXTableL0L1, =armVCM4P2_IntraL0L1LMAX               ;// Load LMAX Table
+        M_STR           pLMAXTableL0L1,ppLMAXTableL0L1                             ;// Store LMAX table on stack
+        LDR             pRMAXTableL0L1, =armVCM4P2_IntraL0L1RMAX               ;// Load RMAX Table
+        MOV             Count,#1                                                   ;// Set Start =1        
+        
+        M_STR           pRMAXTableL0L1,ppRMAXTableL0L1                             ;// Store RMAX Table on Stack
+        
+       
+        M_LDR           shortVideoHeader,shortVideoHeaderonStack                   ;// Load the Input Argument From Stack
+        
+        BL              armVCM4P2_DecodeVLCZigzag_AC_unsafe                    ;// Call the Unsafe Function
+
+        M_LDR           DCVal,pDCCoeff                                             ;// Get the Decoded DC Value From Stack
+        STRH            DCVal,[pDst]                                               ;// Store the DC Value 
+        B               ExitOK
+        
+              
+
+ExitError
+ 
+        M_BD_FINI       ppBitStream,pBitOffset                                     ;// Terminating the Bit Stream Macro in case of an Error
+        MOV             Return,#OMX_Sts_Err                                        ;// Exit with an Error Message 
+ExitOK
+      
+        M_END
+        ENDIF
+        
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
new file mode 100755
index 0000000..caf7121
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
@@ -0,0 +1,194 @@
+;//
+;// 
+;// File Name:  omxVCM4P2_FindMVpred_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+;// Function:
+;//     omxVCM4P2_FindMVpred
+;//
+        ;// Include headers
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        INCLUDE armVCCOMM_s.h
+
+        ;// Define cpu variants
+        M_VARIANTS CortexA8
+        
+        
+        IF CortexA8
+        
+        M_TABLE armVCM4P2_pBlkIndexTable
+        DCD  OMXVCBlk0, OMXVCBlk1
+        DCD  OMXVCBlk2, OMXVCBlk3
+
+;//--------------------------------------------
+;// Declare input registers
+;//--------------------------------------------
+        
+pSrcMVCurMB            RN 0
+pSrcCandMV1            RN 1
+pSrcCandMV2            RN 2
+pSrcCandMV3            RN 3
+pDstMVPred             RN 4
+pDstMVPredME           RN 5
+iBlk                   RN 6
+
+pTable                 RN 4
+CandMV                 RN 12
+
+pCandMV1               RN 7
+pCandMV2               RN 8
+pCandMV3               RN 9
+
+CandMV1dx              RN 0 
+CandMV1dy              RN 1 
+CandMV2dx              RN 2
+CandMV2dy              RN 3
+CandMV3dx              RN 10
+CandMV3dy              RN 11
+
+temp                   RN 14
+
+zero                   RN 14
+return                 RN 0
+        
+; ----------------------------------------------
+; Main routine
+; ----------------------------------------------        
+
+        M_ALLOC4 MV, 4
+        
+        ;// Function header 
+        M_START omxVCM4P2_FindMVpred, r11
+        
+        ;// Define stack arguments
+        M_ARG   ppDstMVPred,  4
+        M_ARG   ppDstMVPredME, 4
+        M_ARG   Blk, 4
+        
+        M_ADR CandMV, MV
+        MOV   zero, #0
+        M_LDR iBlk, Blk
+        
+        ;// Set the default value for these
+        ;// to be used if pSrcCandMV[1|2|3] == NULL
+        MOV   pCandMV1, CandMV
+        MOV   pCandMV2, CandMV
+        MOV   pCandMV3, CandMV
+    
+        STR   zero, [CandMV]
+
+        ;// Branch to the case based on blk number
+        M_SWITCH iBlk
+        M_CASE   OMXVCBlk0      ;// iBlk=0
+        M_CASE   OMXVCBlk1      ;// iBlk=0
+        M_CASE   OMXVCBlk2      ;// iBlk=0
+        M_CASE   OMXVCBlk3      ;// iBlk=0
+        M_ENDSWITCH
+        
+OMXVCBlk0
+        CMP   pSrcCandMV1, #0
+        ADDNE pCandMV1, pSrcCandMV1, #4
+        
+        CMP   pSrcCandMV2, #0
+        ADDNE pCandMV2, pSrcCandMV2, #8
+
+        CMP   pSrcCandMV3, #0
+        ADDNE pCandMV3, pSrcCandMV3, #8
+        CMPEQ pSrcCandMV1, #0
+    
+        MOVEQ pCandMV3, pCandMV2
+        MOVEQ pCandMV1, pCandMV2
+                
+        CMP   pSrcCandMV1, #0
+        CMPEQ pSrcCandMV2, #0
+    
+        MOVEQ pCandMV1, pCandMV3
+        MOVEQ pCandMV2, pCandMV3
+        
+        CMP   pSrcCandMV2, #0
+        CMPEQ pSrcCandMV3, #0
+    
+        MOVEQ pCandMV2, pCandMV1
+        MOVEQ pCandMV3, pCandMV1
+        
+        B     BlkEnd
+    
+OMXVCBlk1
+        MOV   pCandMV1, pSrcMVCurMB
+        CMP   pSrcCandMV3, #0
+        ADDNE pCandMV3, pSrcCandMV3, #8
+        
+        CMP   pSrcCandMV2, #0
+        ADDNE pCandMV2, pSrcCandMV2, #12
+    
+        CMPEQ pSrcCandMV3, #0
+    
+        MOVEQ pCandMV2, pCandMV1
+        MOVEQ pCandMV3, pCandMV1
+            
+        B     BlkEnd
+
+OMXVCBlk2
+        CMP   pSrcCandMV1, #0
+        MOV   pCandMV2, pSrcMVCurMB
+        ADD   pCandMV3, pSrcMVCurMB, #4
+        ADDNE pCandMV1, pSrcCandMV1, #12
+        B     BlkEnd
+
+OMXVCBlk3
+        ADD   pCandMV1, pSrcMVCurMB, #8
+        MOV   pCandMV2, pSrcMVCurMB
+        ADD   pCandMV3, pSrcMVCurMB, #4
+    
+BlkEnd
+
+        ;// Using the transperancy info, zero
+        ;// out the candidate MV if neccesary
+        LDRSH CandMV1dx, [pCandMV1], #2
+        LDRSH CandMV2dx, [pCandMV2], #2
+        LDRSH CandMV3dx, [pCandMV3], #2
+    
+        ;// Load argument from the stack
+        M_LDR pDstMVPredME, ppDstMVPredME
+
+        LDRSH CandMV1dy, [pCandMV1]
+        LDRSH CandMV2dy, [pCandMV2]
+        LDRSH CandMV3dy, [pCandMV3]
+
+        CMP pDstMVPredME, #0        
+
+        ;// Store the candidate MV's into the pDstMVPredME, 
+        ;// these can be used in the fast algorithm if implemented 
+
+        STRHNE CandMV1dx, [pDstMVPredME], #2
+        STRHNE CandMV1dy, [pDstMVPredME], #2        
+        STRHNE CandMV2dx, [pDstMVPredME], #2
+        STRHNE CandMV2dy, [pDstMVPredME], #2
+        STRHNE CandMV3dx, [pDstMVPredME], #2
+        STRHNE CandMV3dy, [pDstMVPredME]
+           
+        ; Find the median of the 3 candidate MV's
+        M_MEDIAN3 CandMV1dx, CandMV2dx, CandMV3dx, temp
+
+        ;// Load argument from the stack
+        M_LDR pDstMVPred, ppDstMVPred
+
+        M_MEDIAN3 CandMV1dy, CandMV2dy, CandMV3dy, temp
+    
+        STRH CandMV3dx, [pDstMVPred], #2
+        STRH CandMV3dy, [pDstMVPred]
+
+        MOV return, #OMX_Sts_NoErr
+    
+        M_END
+    ENDIF ;// ARM1136JS :LOR: CortexA8
+    
+    END
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
new file mode 100755
index 0000000..b5e3d0d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
@@ -0,0 +1,73 @@
+;//
+;// 
+;// File Name:  omxVCM4P2_IDCT8x8blk_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+;// Function:
+;//     omxVCM4P2_IDCT8x8blk
+;//
+        ;// Include headers
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+
+        ;// Define cpu variants
+        M_VARIANTS CortexA8
+
+        INCLUDE armCOMM_IDCT_s.h        
+        
+        IMPORT armCOMM_IDCTPreScale
+        ;//
+        ;// Function prototype
+        ;//
+        ;//     OMXResult
+        ;//     omxVCM4P2_IDCT8x8blk(const OMX_S16* pSrc,
+        ;//                                       OMX_S16* pDst)
+        ;//    
+        
+    IF CortexA8
+        M_ALLOC4  ppDest, 4
+        M_ALLOC4  pStride, 4
+        M_ALLOC8  pBlk, 2*8*8
+    ENDIF
+    
+    
+    IF CortexA8
+        M_START omxVCM4P2_IDCT8x8blk, r11, d15
+    ENDIF
+        
+    IF CortexA8
+        
+;// Declare input registers
+pSrc            RN 0
+pDst            RN 1
+
+;// Declare other intermediate registers
+Result          RN 0
+
+;// Prototype for macro M_IDCT
+;// pSrc            RN 0  ;// source data buffer
+;// Stride          RN 1  ;// destination stride in bytes
+;// pDest           RN 2  ;// destination data buffer
+;// pScale          RN 3  ;// pointer to scaling table
+
+pSrc    RN 0    
+Stride  RN 1    
+pDest   RN 2    
+pScale  RN 3    
+                
+        MOV         pDest, pDst
+        LDR         pScale, =armCOMM_IDCTPreScale        
+        M_IDCT      s9, s16, 16      
+        MOV         Result, #OMX_Sts_NoErr
+        M_END       
+    ENDIF  
+        ;// ARM1136JS :LOR: CortexA8
+
+    END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
new file mode 100755
index 0000000..dd00df5
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
@@ -0,0 +1,444 @@
+;//
+;// 
+;// File Name:  omxVCM4P2_MCReconBlock_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   12290
+;// Date:       Wednesday, April 9, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+;// Description:
+;//
+;//
+
+;// Include standard headers
+    INCLUDE omxtypes_s.h
+    INCLUDE armCOMM_s.h
+
+;// Import symbols required from other files
+
+    M_VARIANTS CortexA8
+
+;// ***************************************************************************
+;// ARM1136JS implementation
+;// ***************************************************************************
+
+;// ***************************************************************************
+;// CortexA8 implementation
+;// ***************************************************************************
+    IF  CortexA8
+;// ***************************************************************************
+;// MACRO DEFINITIONS
+;// ***************************************************************************
+    ;// Description:
+    ;// Does interpolation for the case of "IntegerPixel" predictType. Both 
+    ;// rounding cases are handled. Just copies a block from pSrc to pDst
+    ;//
+    ;// Syntax:
+    ;// M_MCRECONBLOCK_IntegerPixel
+    ;// 
+    ;// Inputs: None
+    ;// Outputs: None
+
+    MACRO 
+    M_MCRECONBLOCK_IntegerPixel
+CaseIntegerPixel_Rnd0
+CaseIntegerPixel_Rnd1
+
+    VLD1        dRow0, [pSrc], srcStep
+    VLD1        dRow1, [pSrc], srcStep
+    VLD1        dRow2, [pSrc], srcStep
+    VLD1        dRow3, [pSrc], srcStep
+    VLD1        dRow4, [pSrc], srcStep
+    VLD1        dRow5, [pSrc], srcStep
+    VLD1        dRow6, [pSrc], srcStep
+    VLD1        dRow7, [pSrc], srcStep
+
+    VST1        dRow0, [pDst@64], dstStep
+    VST1        dRow1, [pDst@64], dstStep
+    VST1        dRow2, [pDst@64], dstStep
+    VST1        dRow3, [pDst@64], dstStep
+    VST1        dRow4, [pDst@64], dstStep
+    VST1        dRow5, [pDst@64], dstStep
+    VST1        dRow6, [pDst@64], dstStep
+    VST1        dRow7, [pDst@64], dstStep
+
+    B           SwitchPredictTypeEnd
+    MEND
+;// ***************************************************************************
+    ;// Description:
+    ;// Does interpolation for the case of "HalfPixelX" predictType. The two 
+    ;// rounding cases are handled by the parameter "$rndVal". Averages between
+    ;// a pixel and pixel right to it, rounding it based on $rndVal. The 
+    ;// rounding is implemented by using opCode switching between "VRHADD" and 
+    ;// "VHADD" instructions.
+    ;//
+    ;// Syntax:
+    ;// M_MCRECONBLOCK_HalfPixelX $rndVal
+    ;// 
+    ;// Inputs: 
+    ;//     $rndVal: 0 for rounding and 1 for no rounding
+    ;// Outputs: None
+
+    MACRO 
+    M_MCRECONBLOCK_HalfPixelX $rndVal
+
+    LCLS M_VHADDR
+    IF $rndVal = 0
+M_VHADDR SETS "VRHADD"
+    ELSE
+M_VHADDR SETS "VHADD"
+    ENDIF
+
+CaseHalfPixelX_Rnd$rndVal
+
+    VLD1        {dRow0, dRow0Shft}, [pSrc], srcStep
+    VEXT        dRow0Shft, dRow0, dRow0Shft, #1
+    VLD1        {dRow1, dRow1Shft}, [pSrc], srcStep
+    VEXT        dRow1Shft, dRow1, dRow1Shft, #1
+    VLD1        {dRow2, dRow2Shft}, [pSrc], srcStep
+    VEXT        dRow2Shft, dRow2, dRow2Shft, #1
+    VLD1        {dRow3, dRow3Shft}, [pSrc], srcStep
+    VEXT        dRow3Shft, dRow3, dRow3Shft, #1
+    VLD1        {dRow4, dRow4Shft}, [pSrc], srcStep
+    VEXT        dRow4Shft, dRow4, dRow4Shft, #1
+    VLD1        {dRow5, dRow5Shft}, [pSrc], srcStep
+    VEXT        dRow5Shft, dRow5, dRow5Shft, #1
+    VLD1        {dRow6, dRow6Shft}, [pSrc], srcStep
+    VEXT        dRow6Shft, dRow6, dRow6Shft, #1
+    VLD1        {dRow7, dRow7Shft}, [pSrc], srcStep
+    VEXT        dRow7Shft, dRow7, dRow7Shft, #1
+    $M_VHADDR   dRow0, dRow0, dRow0Shft
+    $M_VHADDR   dRow1, dRow1, dRow1Shft
+    VST1        dRow0, [pDst@64], dstStep
+    $M_VHADDR   dRow2, dRow2, dRow2Shft
+    VST1        dRow1, [pDst@64], dstStep
+    $M_VHADDR   dRow3, dRow3, dRow3Shft
+    VST1        dRow2, [pDst@64], dstStep
+    $M_VHADDR   dRow4, dRow4, dRow4Shft
+    VST1        dRow3, [pDst@64], dstStep
+    $M_VHADDR   dRow5, dRow5, dRow5Shft
+    VST1        dRow4, [pDst@64], dstStep
+    $M_VHADDR   dRow6, dRow6, dRow6Shft
+    VST1        dRow5, [pDst@64], dstStep
+    $M_VHADDR   dRow7, dRow7, dRow7Shft
+    VST1        dRow6, [pDst@64], dstStep
+    VST1        dRow7, [pDst@64], dstStep
+    
+    B           SwitchPredictTypeEnd
+    MEND
+;// ***************************************************************************
+    ;// Description:
+    ;// Does interpolation for the case of "HalfPixelY" predictType. The two 
+    ;// rounding cases are handled by the parameter "$rndVal". Averages between
+    ;// a pixel and pixel below it, rounding it based on $rndVal. The 
+    ;// rounding is implemented by using opCode switching between "VRHADD" and 
+    ;// "VHADD" instructions.
+    ;//
+    ;// Syntax:
+    ;// M_MCRECONBLOCK_HalfPixelY $rndVal
+    ;// 
+    ;// Inputs: 
+    ;//     $rndVal: 0 for rounding and 1 for no rounding
+    ;// Outputs: None
+
+    MACRO 
+    M_MCRECONBLOCK_HalfPixelY $rndVal
+
+    LCLS M_VHADDR
+    IF $rndVal = 0
+M_VHADDR SETS "VRHADD"
+    ELSE
+M_VHADDR SETS "VHADD"
+    ENDIF
+
+CaseHalfPixelY_Rnd$rndVal
+    VLD1        dRow0, [pSrc], srcStep
+    VLD1        dRow1, [pSrc], srcStep
+    VLD1        dRow2, [pSrc], srcStep
+    VLD1        dRow3, [pSrc], srcStep
+    VLD1        dRow4, [pSrc], srcStep
+    VLD1        dRow5, [pSrc], srcStep
+    VLD1        dRow6, [pSrc], srcStep
+    VLD1        dRow7, [pSrc], srcStep
+    $M_VHADDR   dRow0, dRow0, dRow1
+    VLD1        dRow8, [pSrc], srcStep
+    $M_VHADDR   dRow1, dRow1, dRow2
+    VST1        dRow0, [pDst@64], dstStep
+    $M_VHADDR   dRow2, dRow2, dRow3
+    VST1        dRow1, [pDst@64], dstStep
+    $M_VHADDR   dRow3, dRow3, dRow4
+    VST1        dRow2, [pDst@64], dstStep
+    $M_VHADDR   dRow4, dRow4, dRow5
+    VST1        dRow3, [pDst@64], dstStep
+    $M_VHADDR   dRow5, dRow5, dRow6
+    VST1        dRow4, [pDst@64], dstStep
+    $M_VHADDR   dRow6, dRow6, dRow7
+    VST1        dRow5, [pDst@64], dstStep
+    $M_VHADDR   dRow7, dRow7, dRow8
+    VST1        dRow6, [pDst@64], dstStep
+    VST1        dRow7, [pDst@64], dstStep
+
+    B           SwitchPredictTypeEnd
+    MEND
+;// ***************************************************************************
+    ;// Description:
+    ;// Does interpolation for the case of "IntegerPixel" predictType. Both 
+    ;// rounding cases are handled. 
+    ;// Typical computation for a row goes like this
+    ;//     1. VLD1        {dRow0, dRow0Shft}, [pSrc], srcStep ;// Load the row and next 8 bytes
+    ;//     2. VEXT        dRow0Shft, dRow0, dRow0Shft, #1     ;// Generate the shifted row
+    ;//     3. VADDL       qSum0, dRow0, dRow0Shft             ;// Generate the sum of row and shifted row
+    ;//     5. VADD        qSum0, qSum0, qSum1                 ;// Add to the sum of next row (odd row sum has rounding value added to it)
+    ;//     6. VSHRN       dRow0, qSum0, #2                    ;// Divide by 4
+    ;//     7. VST1        dRow0, [pDst@64], dstStep           ;// Store
+    ;// Odd rows undergo following computation after step 3
+    ;//     4. VADD        qSum1, qSum1, qRound
+    ;// This saves for adding rounding value to each final sum (overall saves 4 
+    ;// instructions).
+    ;// There is reuse of registers for qSum6, qSum7 & qSum8. Overall scheduling takes 
+    ;// care of this and also minimizes stalls. Rounding value was modified in 
+    ;// ARM register rndVal (originally used for rounding flag) before the switch.
+    ;// It is then populated into all lanes in this macro. No branching out to 
+    ;// label "SwitchPredictTypeEnd" is required in the end of the macro as these 
+    ;// are the last of switch cases.
+    ;// 
+    ;// Syntax:
+    ;// M_MCRECONBLOCK_HalfPixelXY
+    ;// 
+    ;// Inputs: None
+    ;// Outputs: None
+
+    MACRO 
+    M_MCRECONBLOCK_HalfPixelXY
+
+CaseHalfPixelXY_Rnd0
+CaseHalfPixelXY_Rnd1
+    VLD1        {dRow0, dRow0Shft}, [pSrc], srcStep
+    VDUP        qRound, rndVal
+    VLD1        {dRow1, dRow1Shft}, [pSrc], srcStep
+    VEXT        dRow0Shft, dRow0, dRow0Shft, #1
+    VLD1        {dRow2, dRow2Shft}, [pSrc], srcStep
+    VEXT        dRow1Shft, dRow1, dRow1Shft, #1
+    VLD1        {dRow3, dRow3Shft}, [pSrc], srcStep
+    VEXT        dRow2Shft, dRow2, dRow2Shft, #1
+    VLD1        {dRow4, dRow4Shft}, [pSrc], srcStep
+    VADDL       qSum0, dRow0, dRow0Shft
+    VLD1        {dRow5, dRow5Shft}, [pSrc], srcStep
+    VADDL       qSum1, dRow1, dRow1Shft
+    VLD1        {dRow6, dRow6Shft}, [pSrc], srcStep
+    VEXT        dRow3Shft, dRow3, dRow3Shft, #1
+    VLD1        {dRow7, dRow7Shft}, [pSrc], srcStep
+    VEXT        dRow4Shft, dRow4, dRow4Shft, #1
+    VLD1        {dRow8, dRow8Shft}, [pSrc], srcStep
+    VADD        qSum1, qSum1, qRound
+    VADDL       qSum2, dRow2, dRow2Shft
+    VEXT        dRow5Shft, dRow5, dRow5Shft, #1
+    VADD        qSum0, qSum0, qSum1
+    VADDL       qSum3, dRow3, dRow3Shft
+    VEXT        dRow6Shft, dRow6, dRow6Shft, #1
+    VADD        qSum1, qSum1, qSum2
+    VSHRN       dRow0, qSum0, #2
+    VADDL       qSum4, dRow4, dRow4Shft
+    VSHRN       dRow1, qSum1, #2
+    VADD        qSum3, qSum3, qRound
+    VADDL       qSum5, dRow5, dRow5Shft
+    VST1        dRow0, [pDst@64], dstStep
+    VEXT        dRow7Shft, dRow7, dRow7Shft, #1
+    VST1        dRow1, [pDst@64], dstStep
+    VEXT        dRow8Shft, dRow8, dRow8Shft, #1
+    VADD        qSum5, qSum5, qRound
+    VADD        qSum2, qSum2, qSum3
+    VADD        qSum3, qSum3, qSum4
+    VADD        qSum4, qSum4, qSum5
+    VSHRN       dRow2, qSum2, #2
+    VSHRN       dRow3, qSum3, #2
+    VSHRN       dRow4, qSum4, #2
+    VADDL       qSum6, dRow6, dRow6Shft
+    VADDL       qSum7, dRow7, dRow7Shft
+    VST1        dRow2, [pDst@64], dstStep
+    VADDL       qSum8, dRow8, dRow8Shft
+    VADD        qSum7, qSum7, qRound
+    VST1        dRow3, [pDst@64], dstStep
+    VST1        dRow4, [pDst@64], dstStep
+    VADD        qSum5, qSum5, qSum6
+    VADD        qSum6, qSum6, qSum7
+    VADD        qSum7, qSum7, qSum8
+    VSHRN       dRow5, qSum5, #2
+    VSHRN       dRow6, qSum6, #2
+    VSHRN       dRow7, qSum7, #2
+    VST1        dRow5, [pDst@64], dstStep
+    VST1        dRow6, [pDst@64], dstStep
+    VST1        dRow7, [pDst@64], dstStep
+
+    MEND
+;// ***************************************************************************
+
+;// Input/Output Registers
+pSrc                  RN 0
+srcStep               RN 1
+pSrcResidue           RN 2
+pDst                  RN 3
+dstStep               RN 4
+predictType           RN 5
+rndVal                RN 6
+
+;// Local Scratch Registers
+pDstCopy              RN 0
+return                RN 0
+
+;// Neon Registers
+dRow0                 DN D0.U8
+dRow0Shft             DN D1.U8
+dRow1                 DN D2.U8
+dRow1Shft             DN D3.U8
+dRow2                 DN D4.U8
+dRow2Shft             DN D5.U8
+dRow3                 DN D6.U8
+dRow3Shft             DN D7.U8
+dRow4                 DN D8.U8
+dRow4Shft             DN D9.U8
+dRow5                 DN D10.U8
+dRow5Shft             DN D11.U8
+dRow6                 DN D12.U8
+dRow6Shft             DN D13.U8
+dRow7                 DN D14.U8
+dRow7Shft             DN D15.U8
+dRow8                 DN D16.U8
+dRow8Shft             DN D17.U8
+
+
+qSum0                 QN Q9.U16
+qSum1                 QN Q10.U16
+qSum2                 QN Q11.U16
+qSum3                 QN Q12.U16
+qSum4                 QN Q13.U16
+qSum5                 QN Q14.U16
+qSum6                 QN Q0.U16
+qSum7                 QN Q1.U16
+qSum8                 QN Q2.U16
+
+qRound                QN Q15.U16
+
+dDst0                 DN D0.U8
+dDst1                 DN D1.U8
+dDst2                 DN D2.U8
+dDst3                 DN D3.U8
+dDst4                 DN D4.U8
+dDst5                 DN D5.U8
+dDst6                 DN D6.U8
+dDst7                 DN D7.U8
+
+qRes0                 QN Q4.S16
+qRes1                 QN Q5.S16
+qRes2                 QN Q6.S16
+qRes3                 QN Q7.S16
+qRes4                 QN Q8.S16
+qRes5                 QN Q9.S16
+qRes6                 QN Q10.S16
+qRes7                 QN Q11.S16
+
+    ;// Function header
+    M_START     omxVCM4P2_MCReconBlock, r6, d15
+    ;// Define stack arguments
+    M_ARG       Arg_dstStep,        4
+    M_ARG       Arg_predictType,    4
+    M_ARG       Arg_rndVal,         4
+    ;// Load argument from the stack
+    M_LDR       dstStep, Arg_dstStep
+    M_LDR       predictType, Arg_predictType
+    M_LDR       rndVal, Arg_rndVal
+    ADD         predictType, rndVal, predictType, LSL #1
+    RSB         rndVal, rndVal, #2              ;// preparing rndVal for HalfPixelXY
+    
+    ;// The following is implementation of switching to different code segments
+    ;// based on different predictType and rndVal flags. The corresponding 
+    ;// labels (e.g. CaseIntegerPixel_Rnd0) are embedded in the macros following
+    ;// M_ENDSWITCH (e.g. M_MCRECONBLOCK_IntegerPixel). While "M_MCRECONBLOCK_IntegerPixel" 
+    ;// and "M_MCRECONBLOCK_HalfPixelXY" handle for both rounding cases; 
+    ;// "M_MCRECONBLOCK_HalfPixelX" and "M_MCRECONBLOCK_HalfPixelY" macros handle 
+    ;// the two rounding cases in separate code bases.
+    ;// All these together implement the interpolation functionality
+    
+    M_SWITCH    predictType
+        M_CASE      CaseIntegerPixel_Rnd0
+        M_CASE      CaseIntegerPixel_Rnd1
+        M_CASE      CaseHalfPixelX_Rnd0
+        M_CASE      CaseHalfPixelX_Rnd1
+        M_CASE      CaseHalfPixelY_Rnd0
+        M_CASE      CaseHalfPixelY_Rnd1
+        M_CASE      CaseHalfPixelXY_Rnd0
+        M_CASE      CaseHalfPixelXY_Rnd1
+    M_ENDSWITCH
+
+    M_MCRECONBLOCK_IntegerPixel
+    M_MCRECONBLOCK_HalfPixelX 0
+    M_MCRECONBLOCK_HalfPixelX 1
+    M_MCRECONBLOCK_HalfPixelY 0
+    M_MCRECONBLOCK_HalfPixelY 1
+    M_MCRECONBLOCK_HalfPixelXY
+SwitchPredictTypeEnd
+
+    ;// After interpolation is done, residue needs to be added. This is done 
+    ;// only in case "pSrcResidue" parameter to the function is not NULL.
+    ;// Following is a completely unrolled code to do so. Each row and 
+    ;// corresponding residue is loaded and residue is added and value 
+    ;// stored
+    
+    CMP         pSrcResidue, #0
+    SUBNE       pDst, pDst, dstStep, LSL #3     ;// Restoring pDst
+    MOVNE       pDstCopy, pDst
+    BEQ         pSrcResidueConditionEnd
+pSrcResidueNotNull    
+    VLD1        dDst0, [pDst@64], dstStep
+    VLD1        qRes0, [pSrcResidue@128]!
+    VLD1        dDst1, [pDst@64], dstStep
+    VLD1        qRes1, [pSrcResidue@128]!
+    VLD1        dDst2, [pDst@64], dstStep
+    VLD1        qRes2, [pSrcResidue@128]!
+    VADDW       qRes0, qRes0, dDst0
+    VLD1        dDst3, [pDst@64], dstStep
+    VADDW       qRes1, qRes1, dDst1
+    VLD1        qRes3, [pSrcResidue@128]!
+    VADDW       qRes2, qRes2, dDst2
+    VLD1        dDst4, [pDst@64], dstStep
+    VQMOVUN     dDst0, qRes0
+    VLD1        qRes4, [pSrcResidue@128]!
+    VADDW       qRes3, qRes3, dDst3
+    VLD1        dDst5, [pDst@64], dstStep
+    VQMOVUN     dDst1, qRes1
+    VLD1        qRes5, [pSrcResidue@128]!
+    VADDW       qRes4, qRes4, dDst4
+    VLD1        dDst6, [pDst@64], dstStep
+    VQMOVUN     dDst2, qRes2
+    VLD1        qRes6, [pSrcResidue@128]!
+    VADDW       qRes5, qRes5, dDst5
+    VLD1        dDst7, [pDst@64], dstStep
+    VQMOVUN     dDst3, qRes3
+    VLD1        qRes7, [pSrcResidue@128]!
+    VADDW       qRes6, qRes6, dDst6
+    VST1        dDst0, [pDstCopy@64], dstStep
+    VQMOVUN     dDst4, qRes4
+    VST1        dDst1, [pDstCopy@64], dstStep
+    VADDW       qRes7, qRes7, dDst7
+    VST1        dDst2, [pDstCopy@64], dstStep
+    VQMOVUN     dDst5, qRes5
+    VST1        dDst3, [pDstCopy@64], dstStep
+    VQMOVUN     dDst6, qRes6
+    VST1        dDst4, [pDstCopy@64], dstStep
+    VQMOVUN     dDst7, qRes7
+    VST1        dDst5, [pDstCopy@64], dstStep
+    VST1        dDst6, [pDstCopy@64], dstStep
+    VST1        dDst7, [pDstCopy@64], dstStep
+    
+pSrcResidueConditionEnd
+    MOV         return, #OMX_Sts_NoErr
+
+    M_END
+    ENDIF ;// CortexA8
+    END
+;// ***************************************************************************
+;// omxVCM4P2_MCReconBlock ends
+;// ***************************************************************************
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
new file mode 100755
index 0000000..a73f64a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
@@ -0,0 +1,320 @@
+; **********
+; * 
+; * File Name:  omxVCM4P2_PredictReconCoefIntra_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   12290
+; * Date:       Wednesday, April 9, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; * 
+; * Description:
+; * Contains module for DC/AC coefficient prediction
+; *
+; * 
+; * Function: omxVCM4P2_PredictReconCoefIntra
+; *
+; * Description:
+; * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+; * to the function call, prediction direction (predDir) should be selected
+; * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in]  pSrcDst      pointer to the coefficient buffer which contains the 
+; *                    quantized coefficient residuals (PQF) of the current 
+; *                    block; must be aligned on a 4-byte boundary. The 
+; *                    output coefficients are saturated to the range 
+; *                    [-2048, 2047].
+; * [in]  pPredBufRow  pointer to the coefficient row buffer; must be aligned
+; *                    on a 4-byte boundary.
+; * [in]  pPredBufCol  pointer to the coefficient column buffer; must be 
+; *                    aligned on a 4-byte boundary.
+; * [in]  curQP        quantization parameter of the current block. curQP may 
+; *                    equal to predQP especially when the current block and 
+; *                    the predictor block are in the same macroblock.
+; * [in]  predQP       quantization parameter of the predictor block
+; * [in]  predDir      indicates the prediction direction which takes one
+; *                    of the following values:
+; *                    OMX_VIDEO_HORIZONTAL    predict horizontally
+; *                    OMX_VIDEO_VERTICAL        predict vertically
+; * [in]  ACPredFlag   a flag indicating if AC prediction should be
+; *                    performed. It is equal to ac_pred_flag in the bit
+; *                    stream syntax of MPEG-4
+; * [in]  videoComp    video component type (luminance, chrominance or
+; *                    alpha) of the current block
+; * [out] pSrcDst      pointer to the coefficient buffer which contains
+; *                    the quantized coefficients (QF) of the current
+; *                    block
+; * [out] pPredBufRow  pointer to the updated coefficient row buffer
+; * [out] pPredBufCol  pointer to the updated coefficient column buffer
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - Bad arguments 
+; * - At least one of the pointers is NULL: pSrcDst, pPredBufRow, or pPredBufCol.
+; * - At least one the following cases: curQP <= 0, predQP <= 0, curQP >31, 
+; *   predQP > 31, preDir exceeds [1,2].
+; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not 
+; *   4-byte aligned.
+; *
+; *********
+     
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+       M_VARIANTS CortexA8
+       
+             
+
+       IMPORT        armVCM4P2_Reciprocal_QP_S32
+       IMPORT        armVCM4P2_Reciprocal_QP_S16
+       IMPORT        armVCM4P2_DCScaler
+       
+        IF CortexA8
+;// Input Arguments
+
+pSrcDst          RN 0
+pPredBufRow      RN 1
+pPredBufCol      RN 2
+curQP            RN 3
+QP               RN 3
+predQP           RN 4
+predDir          RN 5
+ACPredFlag       RN 6
+videoComp        RN 7
+
+;// Local Variables
+
+shortVideoHeader RN 4
+dcScaler         RN 4
+index            RN 6
+predCoeffTable   RN 7
+temp1            RN 6
+temp2            RN 9
+temp             RN 14
+Const            RN 8
+temppPredColBuf  RN 8
+tempPred         RN 9
+
+absCoeffDC       RN 8
+negdcScaler      RN 10
+Rem              RN 11
+temp3            RN 12
+
+dcRowbufCoeff    RN 10
+dcColBuffCoeff   RN 11
+Return           RN 0
+
+;//NEON Registers
+
+qPredRowBuf       QN Q0.S16
+dPredRowBuf0      DN D0.S16
+dPredRowBuf1      DN D1.S16
+
+
+
+
+qCoeffTab         QN Q1.S32
+
+qPredQP           QN Q2.S16
+dPredQP0          DN D4.S16
+dPredQP1          DN D5.S16
+
+
+qtemp1            QN Q3.S32
+qtemp             QN Q3.S16
+
+dtemp0            DN D6.S16
+dtemp1            DN D7.S16
+
+dtemp2            DN D8.S16
+dtemp3            DN D9.S16
+
+dtemp4            DN D2.S16
+dtemp5            DN D3.S16
+dtemp6            DN D4.S16
+dtemp7            DN D5.S16
+ 
+qtempPred1        QN Q5.S32
+qtempPred         QN Q5.S16
+
+dtempPred0        DN D10.S16
+dtempPred1        DN D11.S16
+ 
+  
+
+      M_START   omxVCM4P2_PredictReconCoefIntra,r11,d11
+
+      ;// Assigning pointers to Input arguments on Stack
+    
+      M_ARG           predQPonStack,4  
+      M_ARG           predDironStack,4
+      M_ARG           ACPredFlagonStack,4
+      M_ARG           videoComponStack,4
+      
+      ;// DC Prediction
+
+      M_LDR           videoComp,videoComponStack                     ;// Load videoComp From Stack               
+            
+      M_LDR           predDir,predDironStack                         ;// Load Prediction direction
+      ;// DC Scaler calculation   
+      LDR             index, =armVCM4P2_DCScaler
+      ADD             index,index,videoComp,LSL #5
+      LDRB            dcScaler,[index,QP]
+
+       
+      LDR             predCoeffTable, =armVCM4P2_Reciprocal_QP_S16   ;// Loading the table with entries 32767/(1 to 63) 
+      CMP             predDir,#2                                     ;// Check if the Prediction direction is vertical
+
+      ;// Caulucate tempPred
+            
+      LDREQSH         absCoeffDC,[pPredBufRow]                       ;// If vetical load the coeff from Row Prediction Buffer
+      LDRNESH         absCoeffDC,[pPredBufCol]                       ;// If horizontal load the coeff from column Prediction Buffer
+      
+      RSB             negdcScaler,dcScaler,#0                        ;// negdcScaler=-dcScaler   
+      MOV             temp1,absCoeffDC                               ;// Load the Prediction coeff to temp for comparision                               
+      CMP             temp1,#0                                       
+      RSBLT           absCoeffDC,temp1,#0                            ;// calculate absolute val of prediction coeff
+      
+      ADD             temp,dcScaler,dcScaler
+      LDRH            temp,[predCoeffTable,temp]                     ;// Load value from coeff table for performing division using multiplication
+      SMULBB          tempPred,temp,absCoeffDC                       ;// tempped=pPredBufRow(Col)[0]*32767/dcScaler
+      ADD             temp3,dcScaler,#1
+      LSR             tempPred,tempPred,#15                          ;// tempped=pPredBufRow(Col)[0]/dcScaler                  
+      LSR             temp3,temp3,#1                                 ;// temp3=round(dcScaler/2)           
+      MLA             Rem,negdcScaler,tempPred,absCoeffDC            ;// Remainder Rem=abs(pPredBufRow(Col)[0])-tempPred*dcScaler
+      
+      LDRH            dcRowbufCoeff,[pPredBufCol]            
+      
+      CMP             Rem,temp3                                      ;// compare Rem with (dcScaler/2)
+      ADDGE           tempPred,#1                                    ;// tempPred=tempPred+1 if Rem>=(dcScaler/2)
+      CMP             temp1,#0
+      RSBLT           tempPred,tempPred,#0                           ;// tempPred=-tempPred if 
+       
+      STRH            dcRowbufCoeff,[pPredBufRow,#-16]      
+       
+
+      LDRH            temp,[pSrcDst]                                 ;// temp=pSrcDst[0]
+      ADD             temp,temp,tempPred                             ;// temp=pSrcDst[0]+tempPred
+      SSAT16          temp,#12,temp                                  ;// clip temp to [-2048,2047]
+      SMULBB          dcColBuffCoeff,temp,dcScaler                   ;// temp1=clipped(pSrcDst[0])*dcScaler           
+      M_LDR           ACPredFlag,ACPredFlagonStack
+      STRH            dcColBuffCoeff,[pPredBufCol]      
+      
+
+       ;// AC Prediction
+      
+      M_LDR           predQP,predQPonStack
+      
+      CMP             ACPredFlag,#1                                  ;// Check if the AC prediction flag is set or not
+      BNE             Exit                                           ;// If not set Exit
+      CMP             predDir,#2                                     ;// Check the Prediction direction                       
+      LDR             predCoeffTable, =armVCM4P2_Reciprocal_QP_S32   ;// Loading the table with entries 0x1ffff/(1 to 63) 
+      MOV             Const,#4
+      MUL             curQP,curQP,Const                              ;// curQP=4*curQP
+      VDUP            dPredQP0,predQP
+      LDR             temp2,[predCoeffTable,curQP]                   ;// temp=0x1ffff/curQP
+      VDUP            qCoeffTab,temp2
+      BNE             Horizontal                                     ;// If the Prediction direction is horizontal branch to Horizontal
+      
+     
+      
+      ;// Vertical
+      ;//Calculating tempPred
+
+      VLD1            {dPredRowBuf0,dPredRowBuf1},[pPredBufRow]      ;// Loading pPredBufRow[i]:i=0 t0 7
+      
+      VMULL           qtemp1,dPredRowBuf0,dPredQP0                   ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i]: i=0 t0 3
+      VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3
+      
+      VMULL           qtemp1,dPredRowBuf1,dPredQP0                   ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i] : i=4 t0 7      
+
+      VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP) : i=0 t0 3
+      VSHRN           dPredQP1,qtempPred1,#0                         ;// narrow qtempPred1[i] to 16 bits
+      
+      
+      VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7
+      VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP)  : i=4 t0 7
+      VLD1            {dtemp0,dtemp1},[pSrcDst]                      ;//Loading pSrcDst[i] : i=0 to 7
+      VSHRN           dtempPred1,qtempPred1,#0                       ;// narrow qtempPred1[i] to 16 bits
+      VMOV            dtempPred0,dPredQP1
+      
+      ;//updating source and row prediction buffer contents      
+      VADD            qtemp,qtemp,qtempPred                          ;//pSrcDst[i]=pSrcDst[i]+qtempPred[i]: i=0 to 7 
+      VQSHL           qtemp,qtemp,#4                                 ;//Clip to [-2048,2047]
+      LDRH            dcRowbufCoeff,[pPredBufRow]                    ;//Loading Dc Value of Row Prediction buffer
+      VSHR            qtemp,qtemp,#4
+      
+      VST1            {dtemp0,dtemp1},[pSrcDst]                      ;//storing back the updated values 
+      VST1            {dtemp0,dtemp1},[pPredBufRow]                  ;//storing back the updated row prediction values                      
+      STRH            dcRowbufCoeff,[pPredBufRow]                    ;// storing the updated DC Row Prediction coeff
+      
+      B               Exit
+
+Horizontal
+
+      ;// Calculating Temppred
+
+            
+
+      VLD1            {dPredRowBuf0,dPredRowBuf1},[pPredBufCol]      ;// Loading pPredBufCol[i]:i=0 t0 7
+      VMULL           qtemp1,dPredRowBuf0,dPredQP0                   ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i]: i=0 t0 3
+      VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3
+      
+      VMULL           qtemp1,dPredRowBuf1,dPredQP0                   ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i] : i=4 t0 7      
+
+      VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP) : i=0 t0 3
+      VSHRN           dPredQP1,qtempPred1,#0                         ;// narrow qtempPred1[i] to 16 bits
+      
+      
+      VMUL            qtempPred1,qtemp1,qCoeffTab                    ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7
+      
+      MOV             temppPredColBuf,pPredBufCol
+      VRSHR           qtempPred1,qtempPred1,#17                      ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP)  : i=4 t0 7
+      VLD4            {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst]        ;// Loading coefficients Interleaving by 4
+      VSHRN           dtempPred1,qtempPred1,#0                       ;// narrow qtempPred1[i] to 16 bits
+      VMOV            dtempPred0,dPredQP1
+      
+      ;// Updating source and column prediction buffer contents     
+      ADD             temp2,pSrcDst,#32                                  
+      VLD4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]          ;// Loading next 16 coefficients Interleaving by 4
+      VUZP            dtemp0,dtemp4                                  ;// Interleaving by 8
+      VADD            dtemp0,dtemp0,dtempPred0                       ;// Adding tempPred to coeffs
+      VQSHL           dtemp0,dtemp0,#4                               ;// Clip to [-2048,2047]
+      VSHR            dtemp0,dtemp0,#4
+      VST1            {dtemp0},[pPredBufCol]!                        ;// Updating Pridiction column buffer
+      VZIP            dtemp0,dtemp4                                  ;// deinterleaving
+      VST4            {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst]        ;// Updating source coeffs         
+      VST4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]!
+      
+      MOV             temp1,temp2                                     
+      VLD4            {dtemp0,dtemp1,dtemp2,dtemp3},[temp2]!         ;// Loading  coefficients Interleaving by 4
+      
+      VLD4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]
+      VUZP            dtemp0,dtemp4                                  ;// Interleaving by 8
+      VADD            dtemp0,dtemp0,dtempPred1
+      VQSHL           dtemp0,dtemp0,#4                               ;// Clip to [-2048,2047]
+      VSHR            dtemp0,dtemp0,#4
+      VST1            {dtemp0},[pPredBufCol]!
+      VZIP            dtemp0,dtemp4
+      VST4            {dtemp0,dtemp1,dtemp2,dtemp3},[temp1]
+      STRH            dcColBuffCoeff,[temppPredColBuf] 
+      VST4            {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]
+      
+Exit
+
+      STRH            temp,[pSrcDst]
+          
+ 
+      MOV             Return,#OMX_Sts_NoErr 
+ 
+      M_END
+      ENDIF
+
+
+       END
+
+
+   
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
new file mode 100755
index 0000000..bd0ad1f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
@@ -0,0 +1,162 @@
+;/**
+; * 
+; * File Name:  omxVCM4P2_QuantInvInter_I_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   12290
+; * Date:       Wednesday, April 9, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains modules for inter reconstruction
+; * 
+; *
+; *
+; *
+; *
+; * Function: omxVCM4P2_QuantInvInter_I
+; *
+; * Description:
+; * Performs inverse quantization on intra/inter coded block.
+; * This function supports bits_per_pixel = 8. Mismatch control
+; * is performed for the first MPEG-4 mode inverse quantization method.
+; * The output coefficients are clipped to the range: [-2048, 2047].
+; * Mismatch control is performed for the first inverse quantization method.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] pSrcDst          pointer to the input (quantized) intra/inter block. Must be 16-byte aligned.
+; * [in] QP              quantization parameter (quantiser_scale)
+; * [in] videoComp      (Intra version only.) Video component type of the
+; *                  current block. Takes one of the following flags:
+; *                  OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE,
+; *                  OMX_VC_ALPHA.
+; * [in] shortVideoHeader a flag indicating presence of short_video_header;
+; *                       shortVideoHeader==1 selects linear intra DC mode,
+; *                  and shortVideoHeader==0 selects nonlinear intra DC mode.
+; * [out]    pSrcDst      pointer to the output (dequantized) intra/inter block.  Must be 16-byte aligned.
+; *
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - bad arguments
+; *    - If pSrcDst is NULL or is not 16-byte aligned.
+; *      or
+; *    - If QP <= 0.
+; *      or
+; *    - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA.
+; *
+; */
+
+   INCLUDE omxtypes_s.h
+   INCLUDE armCOMM_s.h
+
+   M_VARIANTS CortexA8
+
+     IF CortexA8
+     
+     
+;//Input Arguments
+pSrcDst            RN 0
+QP                 RN 1
+     
+
+;//Local Variables
+Count              RN 3
+doubleQP           RN 4
+Return             RN 0
+;// Neon registers
+
+
+dQP10              DN D0.S32[0]
+qQP1               QN Q0.S32
+
+dQP1               DN D0.S16
+dMinusQP1          DN D1.S16
+
+dCoeff0            DN D2.S16
+dCoeff1            DN D3.S16   
+
+qResult0           QN Q3.S32
+dResult0           DN D7.S16
+qSign0             QN Q3.S32
+dSign0             DN D6.S16
+
+qResult1           QN Q4.S32
+dResult1           DN D8.S16
+qSign1             QN Q4.S32
+dSign1             DN D8.S16
+
+d2QP0              DN D10.S32[0]
+q2QP0              QN Q5.S32
+d2QP               DN D10.S16
+
+dZero0             DN D11.S16
+dZero1             DN D12.S16
+dConst0            DN D13.S16
+
+    
+     M_START omxVCM4P2_QuantInvInter_I,r4,d13
+     
+         
+         
+         ADD      doubleQP,QP,QP                   ;// doubleQP= 2*QP
+         VMOV     d2QP0,doubleQP
+         VDUP     q2QP0,d2QP0                      ;// Move doubleQP in to a scalar
+         TST      QP,#1                   
+         VLD1     {dCoeff0,dCoeff1},[pSrcDst]      ;// Load first 8 values to Coeff0,Coeff1
+         SUBEQ    QP,QP,#1                            
+         VMOV     dQP10,QP                         ;// If QP is even then QP1=QP-1 else QP1=QP     
+         MOV      Count,#64
+         VDUP     qQP1,dQP10                       ;// Duplicate tempResult with QP1
+         VSHRN    d2QP,q2QP0,#0
+         VEOR     dConst0,dConst0,dConst0
+         VSHRN    dQP1,qQP1,#0                     ;// QP1 truncated to 16 bits
+         VSUB     dMinusQP1,dConst0,dQP1           ;// dMinusQP1=-QP1
+
+Loop                       
+         
+        ;//Performing Inverse Quantization
+         
+         VCLT     dSign0,dCoeff0, #0               ;// Compare Coefficient 0 against 0
+         VCLT     dSign1,dCoeff1, #0               ;// Compare Coefficient 1 against 0
+         VCEQ     dZero0,dCoeff0,#0                ;// Compare Coefficient 0 against zero
+         VBSL     dSign0,dMinusQP1,dQP1            ;// dSign0 = -QP1 if Coeff0< 0 else QP1
+         VCEQ     dZero1,dCoeff1,#0                ;// Compare Coefficient 1 against zero
+         VBSL     dSign1,dMinusQP1,dQP1            ;// dSign1 = -QP1 if Coeff1< 0 else QP1
+         VMOVL    qSign0,dSign0                    ;// Sign extend qSign0 to 32 bits
+         VMOVL    qSign1,dSign1
+         VMLAL    qResult0,dCoeff0,d2QP            ;// qResult0[i]= qCoeff0[i]+qCoeff0[i]*(-2) if Coeff <0 
+                                                   ;// qResult0[i]= qCoeff0[i]                 if Coeff >=0 
+         VMLAL    qResult1,dCoeff1,d2QP            ;// qResult1[i]= qCoeff1[i]+qCoeff1[i]*(-2) if Coeff <0 
+                                                   ;// qResult1[i]= qCoeff1[i]                 if Coeff >=0 
+         ;// Clip Result to [-2048,2047]                     
+         
+         VQSHL    qResult0,qResult0,#20            ;// clip to [-2048,2047]
+         VQSHL    qResult1,qResult1,#20
+                 
+         VSHR     qResult0,qResult0,#4  
+         VSHR     qResult1,qResult1,#4
+         VSHRN    dResult0,qResult0,#16            ;// Narrow the clipped Value to Halfword
+         VSHRN    dResult1,qResult1,#16 
+         VBIT     dResult0,dConst0,dZero0  
+         VBIT     dResult1,dConst0,dZero1     
+         
+         VST1     {dResult0,dResult1},[pSrcDst]!   ;// Store the result
+         SUBS     Count,Count,#8
+         VLD1     {dCoeff0,dCoeff1},[pSrcDst]
+         
+         
+         BGT      Loop
+
+         MOV      Return,#OMX_Sts_NoErr
+
+
+         M_END
+         ENDIF
+         
+
+        END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
new file mode 100755
index 0000000..e00591f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
@@ -0,0 +1,210 @@
+;/**
+; * 
+; * File Name:  omxVCM4P2_QuantInvIntra_I_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   12290
+; * Date:       Wednesday, April 9, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains modules for inter reconstruction
+; * 
+; *
+; *
+; *
+; *
+; * 
+; * Function: omxVCM4P2_QuantInvIntra_I
+; *
+; * Description:
+; * Performs inverse quantization on intra/inter coded block.
+; * This function supports bits_per_pixel = 8. Mismatch control
+; * is performed for the first MPEG-4 mode inverse quantization method.
+; * The output coefficients are clipped to the range: [-2048, 2047].
+; * Mismatch control is performed for the first inverse quantization method.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in]    pSrcDst        pointer to the input (quantized) intra/inter block. Must be 16-byte aligned.
+; * [in]    QP            quantization parameter (quantiser_scale)
+; * [in]    videoComp          (Intra version only.) Video component type of the
+; *                    current block. Takes one of the following flags:
+; *                    OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE,
+; *                    OMX_VC_ALPHA.
+; * [in]    shortVideoHeader  a flag indicating presence of short_video_header;
+; *                           shortVideoHeader==1 selects linear intra DC mode,
+; *                    and shortVideoHeader==0 selects nonlinear intra DC mode.
+; * [out]    pSrcDst        pointer to the output (dequantized) intra/inter block.  Must be 16-byte aligned.
+; *
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - bad arguments
+; *    -    If pSrcDst is NULL or is not 16-byte aligned.
+; *      or
+; *    - If QP <= 0.
+; *      or
+; *    - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA.
+; *
+ 
+
+   INCLUDE omxtypes_s.h
+   INCLUDE armCOMM_s.h
+   
+   M_VARIANTS CortexA8
+   
+   
+   IMPORT        armVCM4P2_DCScaler
+ 
+     IF CortexA8
+     
+     
+;//Input Arguments
+pSrcDst            RN 0
+QP                 RN 1
+videoComp          RN 2
+shortVideoHeader   RN 3
+     
+
+;//Local Variables
+
+dcScaler           RN 4
+temp               RN 14
+index              RN 5
+
+
+Count              RN 5
+doubleQP           RN 4
+Return             RN 0
+
+
+;// Neon registers
+
+
+dQP10              DN D0.S32[0]
+qQP1               QN Q0.S32
+
+dQP1               DN D0.S16
+dMinusQP1          DN D1.S16
+
+dCoeff0            DN D2.S16
+dCoeff1            DN D3.S16   
+
+qResult0           QN Q3.S32
+dResult0           DN D7.S16
+qSign0             QN Q3.S32
+dSign0             DN D6.S16
+
+qResult1           QN Q4.S32
+dResult1           DN D8.S16
+qSign1             QN Q4.S32
+dSign1             DN D8.S16
+
+d2QP0              DN D10.S32[0]
+q2QP0              QN Q5.S32
+d2QP               DN D10.S16
+
+dZero0             DN D11.S16
+dZero1             DN D4.S16
+dConst0            DN D5.S16
+
+
+
+
+     
+     
+     M_START omxVCM4P2_QuantInvIntra_I,r5,d11
+
+
+        ;// Perform Inverse Quantization for DC coefficient
+
+        TEQ       shortVideoHeader,#0      ;// Test if short Video Header flag =0             
+        MOVNE     dcScaler,#8              ;// if shortVideoHeader is non zero dcScaler=8
+        BNE       calDCVal
+        
+        LDR       index, =armVCM4P2_DCScaler
+      ADD       index,index,videoComp,LSL #5
+      LDRB      dcScaler,[index,QP]
+
+        ;//M_CalDCScalar  shortVideoHeader,videoComp, QP
+
+calDCVal
+
+        LDRH     temp,[pSrcDst]
+        SMULBB   temp,temp,dcScaler       ;// dcCoeff = dcScaler * Quantized DC coefficient(from memory)
+        SSAT     temp,#12,temp            ;// Saturating to 12 bits
+      
+
+
+        ;// Perform Inverse Quantization for Ac Coefficients
+     
+         
+         
+         ADD      doubleQP,QP,QP                   ;// doubleQP= 2*QP
+         VMOV     d2QP0,doubleQP
+         VDUP     q2QP0,d2QP0                      ;// Move doubleQP in to a scalar
+         TST      QP,#1                   
+         VLD1     {dCoeff0,dCoeff1},[pSrcDst]      ;// Load first 8 values to Coeff0,Coeff1
+         SUBEQ    QP,QP,#1                            
+         VMOV     dQP10,QP                         ;// If QP is even then QP1=QP-1 else QP1=QP     
+         MOV      Count,#64
+         VDUP     qQP1,dQP10                       ;// Duplicate tempResult with QP1
+         VSHRN    d2QP,q2QP0,#0
+         VEOR     dConst0,dConst0,dConst0
+         VSHRN    dQP1,qQP1,#0                     ;// QP1 truncated to 16 bits
+         VSUB     dMinusQP1,dConst0,dQP1           ;// dMinusQP1=-QP1
+
+Loop                       
+         
+        ;//Performing Inverse Quantization
+         
+         VCLT     dSign0,dCoeff0, #0               ;// Compare Coefficient 0 against 0
+         VCLT     dSign1,dCoeff1, #0               ;// Compare Coefficient 1 against 0
+         VCEQ     dZero0,dCoeff0,#0                ;// Compare Coefficient 0 against zero
+         VBSL     dSign0,dMinusQP1,dQP1            ;// dSign0 = -QP1 if Coeff0< 0 else QP1
+         VCEQ     dZero1,dCoeff1,#0                ;// Compare Coefficient 1 against zero
+         VBSL     dSign1,dMinusQP1,dQP1            ;// dSign1 = -QP1 if Coeff1< 0 else QP1
+         VMOVL    qSign0,dSign0                    ;// Sign extend qSign0 to 32 bits
+         VMOVL    qSign1,dSign1
+         VMLAL    qResult0,dCoeff0,d2QP            ;// qResult0[i]= qCoeff0[i]+qCoeff0[i]*(-2) if Coeff <0 
+                                                   ;// qResult0[i]= qCoeff0[i]                 if Coeff >=0 
+         VMLAL    qResult1,dCoeff1,d2QP            ;// qResult1[i]= qCoeff1[i]+qCoeff1[i]*(-2) if Coeff <0 
+                                                   ;// qResult1[i]= qCoeff1[i]                 if Coeff >=0 
+         ;// Clip Result to [-2048,2047]                     
+         
+         VQSHL    qResult0,qResult0,#20            ;// clip to [-2048,2047]
+         VQSHL    qResult1,qResult1,#20
+                 
+         VSHR     qResult0,qResult0,#4  
+         VSHR     qResult1,qResult1,#4
+         VSHRN    dResult0,qResult0,#16            ;// Narrow the clipped Value to Halfword
+         VSHRN    dResult1,qResult1,#16 
+         VBIT     dResult0,dConst0,dZero0  
+         VBIT     dResult1,dConst0,dZero1     
+         
+         VST1     {dResult0,dResult1},[pSrcDst]!   ;// Store the result
+         SUBS     Count,Count,#8
+         VLD1     {dCoeff0,dCoeff1},[pSrcDst]
+         
+         
+         BGT      Loop
+         
+         SUB      pSrcDst,pSrcDst,#128
+         
+         ;// Store the Inverse quantized Dc coefficient
+         
+         STRH     temp,[pSrcDst],#2
+        
+         MOV      Return,#OMX_Sts_NoErr
+         
+
+
+         M_END
+         ENDIF
+         
+
+        END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/src/armVC_Version.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/src/armVC_Version.c
new file mode 100755
index 0000000..5d93681
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/src/armVC_Version.c
@@ -0,0 +1,6 @@
+#include "omxtypes.h"
+#include "armCOMM_Version.h"
+
+#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS
+const char * const omxVC_VersionDescription = "ARM OpenMAX DL v" ARM_VERSION_STRING "   Rel=" OMX_ARM_RELEASE_TAG "   Arch=" OMX_ARM_BUILD_ARCHITECTURE "   Tools="  OMX_ARM_BUILD_TOOLCHAIN ;
+#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_DELIVERY.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_DELIVERY.TXT
new file mode 100644
index 0000000..7801f3dd
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_DELIVERY.TXT
@@ -0,0 +1,63 @@
+The contents of this transaction was created by Hedley Francis
+of ARM on 19-Feb-2008.
+
+It contains the ARM data versions listed below.
+
+This data, unless otherwise stated, is ARM Proprietary and access to it
+is subject to the agreements indicated below.
+
+If you experience problems with this data, please contact ARM support
+quoting transaction reference <97412>.
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+- OX000-SW-98010-r0p0-00bet1
+  Video codecs - sample code
+  Sample code release for Hantro (Ver 1.0.2)
+  internal access
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+This transaction contains deliverables which are designated as being of
+beta release status (BET).
+
+Beta release status has a particular meaning to ARM of which the recipient
+must be aware. Beta is a pre-release status indicating that the deliverable
+so described is believed to robustly demonstrate specified behaviour, to be
+consistent across its included aspects and be ready for general deployment.
+But Beta also indicates that pre-release reliability trials are ongoing and
+that it is possible residual defects or errors in operation, consistency
+and documentation may still be encountered. The recipient should consider
+this position when using this Beta material supplied. ARM will normally
+attempt to provide fixes or a work-around for defects identified by the
+recipient, but the provision or timeliness of this support cannot be
+guaranteed. ARM shall not be responsible for direct or consequential
+damages as a result of encountering one or more of these residual defects.
+By accepting a Beta release, the recipient agrees to these constraints and
+to providing reasonable information to ARM to enable the replication of the
+defects identified by the recipient. The specific Beta version supplied
+will not be supported after release of a later or higher status version.
+It should be noted that Support for the Beta release of the deliverable
+will only be provided by ARM to a recipient who has a current support and
+maintenance contract for the deliverable.
+
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+In addition to the data versions listed above, this transaction contains
+two additional files at the top level.
+
+The first is this file, ARM_DELIVERY_97412.TXT, which is the delivery
+note.
+
+The second is ARM_MANIFEST_97412.TXT which contains a manifest of all the
+files included in this transaction, together with their checksums.
+
+The checksums provided are calculated using the RSA Data Security, Inc.
+MD5 Message-Digest Algorithm.
+
+The checksums can be used to verify the integrity of this data using the
+"md5sum" tool (which is part of the GNU "textutils" package) by running:
+
+  % md5sum --check ARM_MANIFEST_97412.TXT
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_MANIFEST.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_MANIFEST.TXT
new file mode 100644
index 0000000..8e01b1e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_MANIFEST.TXT
@@ -0,0 +1,140 @@
+				  OX000-SW-98010-r0p0-00bet1/
+				  OX000-SW-98010-r0p0-00bet1/api/
+8971932d56eed6b1ad1ba507f0bff5f0  OX000-SW-98010-r0p0-00bet1/api/armCOMM_Bitstream.h
+e88ec84e122534092b90c67841549d6f  OX000-SW-98010-r0p0-00bet1/api/armCOMM_Version.h
+43cf46c2cf2fe1f93c615b57bcbe4809  OX000-SW-98010-r0p0-00bet1/api/armCOMM.h
+f87fedd9ca432fefa757008176864ef8  OX000-SW-98010-r0p0-00bet1/api/armOMX.h
+8e49899a428822c36ef9dd94e0e05f18  OX000-SW-98010-r0p0-00bet1/api/omxtypes.h
+a06983abb39c476b081e87ea271361a5  OX000-SW-98010-r0p0-00bet1/build_vc.pl
+c01f8b93ab73d8c00ddf2499f01da5ff  OX000-SW-98010-r0p0-00bet1/filelist_vc.txt
+				  OX000-SW-98010-r0p0-00bet1/src/
+26e2ff3f633764eb720deb340978dc2d  OX000-SW-98010-r0p0-00bet1/src/armCOMM_Bitstream.c
+79aa23d9817efd11d0c4c2be36ec1e5c  OX000-SW-98010-r0p0-00bet1/src/armCOMM.c
+				  OX000-SW-98010-r0p0-00bet1/vc/
+				  OX000-SW-98010-r0p0-00bet1/vc/m4p10/
+				  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/
+e45297704d72302d4a947d0798c666fb  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_CAVLCTables.c
+205dfafe1fe7bb160bf36d2600e1100a  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
+bf92641e8548577b77e04e03ec04c358  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_MEGetBufSize.c
+f5ee6f7be822d87471cef3b1801dbfc2  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c
+28110b3a13cecf4f216d10bcc761c401  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Interpolate_Chroma.c
+9a1a25245c975d641e1c6378834aea4d  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateHalfVer_Luma.c
+3a643eaaaeb12e8d274dc59a7357a586  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c
+4c4de5973a6b74250ce91ac0b317a617  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InvTransformDequant_LumaDC.c
+4ecdbe9193aaba1f9bb0e24c938b34f9  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_UnpackBlock2x2.c
+66e912f8c88f6019cba3ede27150a407  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateLuma.c
+266da42f4e3015e67b2cbb58169d437f  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateHalfDiag_Luma.c
+d905247eeaa52d4e2cf5f6bc3f61b348  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair.c
+5b29448db0495cd1717a4b925f13377c  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16.c
+f6451df27f6dcc99036b4b1253c23bb6  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c
+892787d850eef09dc2148d45b416b062  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformQuant_ChromaDC.c
+33da1c01a31f47c0f3aea9a7a5eaa9be  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_BlockMatch_Quarter.c
+e9fb11b066775283dcfeae8d12a6c97a  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateHalfVer_Luma.c
+add97bec08e5e1a538aa8607168e61ba  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8.c
+b695ecfc917b39470d1f40773b923972  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_PredictIntraDC4x4.c
+51bc596fd2ff61ad5450d7138461f4a1  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_MEInit.c
+dc6baa0a388dc5ea8ff65c24b179e670  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_GetVLCInfo.c
+a5499902996576f6712849db34d5ad65  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c
+0c3b76745d53e74a8e64e80def31faba  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4.c
+4f2742ba5a3c2208f53bc0f6a443be14  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_SADQuar_4x.c
+b4ae2dc948e8ca64831fe3bbfbd89523  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
+e15118cbe372db7cadba225c9456f189  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_SADQuar.c
+623cf336cfce7d0174f4e54072456f33  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InvTransformResidualAndAdd.c
+89e452c80e30357cadfb04c05b6fe00c  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateHalfHor_Luma.c
+3a5551cc54e85bbe34fc966c7dc00f1c  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_BlockMatch_Integer.c
+114030fa0d8f00af6d3289f47a5e85bf  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+9e373ab296fb85bb45565a6c384f6ed8  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DequantTables.c
+2d200f7cc230f302da48c589da42c02f  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_UnpackBlock4x4.c
+ea3f1d1d1507b55610b1349c7b5946e8  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair.c
+bd2bf1743aef2a9396545ed025362be2  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_Average_4x.c
+ca68e809567bf89044631b67d228c7ce  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
+77caf2b5cbee96d360a919f27e1f14f4  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_QuantTables.c
+26081e384ec627fedad474a0e7dad877  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_SADQuar_8x.c
+1c83ae9207a54944936f4a63c665bd99  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_MotionEstimationMB.c
+4c36e04db20200f4ec72e5aba57446fd  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_SATD_4x4.c
+f75b7c5a80d8bf33e315380e4ef0ab8a  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateHalfHor_Luma.c
+488925bb7aeeae0ccf93ec44af9fce35  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_SADQuar_16x.c
+c91a5345b5f877b3831ed1abcc60d579  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_BlockMatch_Half.c
+35515a115a32fcac8479072a9a5b0db9  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Interpolate_Luma.c
+fdcf4622bc5f0ae75bdb0a51dcd03397  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformQuant_LumaDC.c
+74c9278177400a1f7cc6d799a8c8ab34  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_SAD_4x.c
+56aa2d506d0cfdb4ebd366c07adb2d85  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeBlockPixel.c
+36b2165fd4d2a7f3f3e1f8daff4f94e5  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_SubAndTransformQDQResidual.c
+4b6b1b933fc7bc8f14a184c02c028085  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_TransformResidual4x4.c
+cf0ff093a9b372dd3271e3e5c28984d4  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair.c
+9ccad9f894fbd32194f5b53da217072a  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InvTransformDequant_ChromaDC.c
+4943a7a2df7e9d700675f8c1debf4d90  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_CompareMotionCostToMV.c
+29e4a7f38f8c2e8246ed756db03c012e  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_FwdTransformResidual4x4.c
+27bc64e7c18da0aab9c987a388f61608  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+859185614bb9d0013861e454d7b918f2  OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd.c
+				  OX000-SW-98010-r0p0-00bet1/vc/m4p10/api/
+63e3b64b96cc42a235c04f3a0f991316  OX000-SW-98010-r0p0-00bet1/vc/m4p10/api/armVCM4P10_CAVLCTables.h
+				  OX000-SW-98010-r0p0-00bet1/vc/m4p2/
+				  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/
+0aae4f683d8903cba9956d3301ed9ffe  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_ACDCPredict.c
+8d6c1b44915329165df643081cc11a97  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MCReconBlock.c
+0435eca930eacda0f2a59e843d405eff  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MotionEstimationMB.c
+9a82dd0b1f05f798567436a009d02969  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP.c
+e1e24646c4bd03f5df78295452dd4eb2  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_8x8.c
+746e6b334e4a26d4a9bfae6d735826f6  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DCT8x8blk.c
+8b1d87b74d80ff13a16215b61d5e52ba  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_IDCT8x8blk.c
+309358d357baafc38d2b37bf1e9768a9  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvInter_I.c
+cc77c7242b53c153f8d09527583f2771  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_FindMVpred.c
+7cd8e7796017e3dd00b494d34f629f3f  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MEGetBufSize.c
+a4905cb5f8d4b244454ee4f60d18358b  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_PutVLCBits.c
+5596b31e433222c1e4860deebfa98ef2  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_DCT_Table.c
+365d072be6eab201f6e040058a3bacfc  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_BlockMatch_Integer.c
+78ed2212585b0cca75913a473b2ec430  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c
+50b2d8da8f20f6b1d39b8d3df38af55d  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_EncodeMV.c
+4a851a2ad6d357cdc233d9c0bf475e02  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_BlockMatch_Half.c
+0d6d63878f2827e00e5f85b1e8e26017  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
+48b865a983fe5bf3075eddf652950722  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra.c
+5f48fa7941835c46ac767e63fc29403b  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c
+bbaf454b64b32b2c42a76a7ec393d977  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_CompareMV.c
+eebff772f87a414436c5c5286f2cd213  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MEInit.c
+65ae242eb8cb6d1027677c8ef8f77ca0  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_FillVLCBuffer.c
+125642b1ea0c1256d79af1e0ddecae93  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInter_I.c
+ce24ba3d83da4cb791485d3128268bf6  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_intra.c
+09bc09a2e6fd962e719944582e38a8fd  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
+6b0ee7a116471a4dadbe5bc8dbf425b0  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_FillVLDBuffer.c
+21322dca027c28353e3e7eb8f3620062  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_CheckVLCEscapeMode.c
+ef353d83244288d8c37e0f70249177cc  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_GetVLCBits.c
+541de824f8aebe4a5cac6f15da943efa  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_8x8.c
+0b40b154b591c7f8842cffe4042d17c5  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
+2ffcec88d3fcb372543a8f4508ea1ac6  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantIntra_I.c
+e06d85ca000afcbb50580f98f0203ac8  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c
+ae82b6fcfcf731a61d70e1aa42e6277a  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_16x16.c
+1d04395e231b597562257e98cda6cfb0  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
+72c0a36327b6b9b436d3bce7c896c520  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c
+1b65aa7f311124ea6fb47e384ec06a50  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I.c
+714957104a6ef71341fbe6a9ec65c136  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_SetPredDir.c
+86493f0ee853f653354a7389f1727f73  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_inter.c
+5de8afcfb3052968794782a7c3a0b41a  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_Inter.c
+50bcc228cc660dbda037725309de3f8b  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_EncodeVLCZigzag_intra.c
+4f5cfa1ecc668913dde94e3caf97a2e1  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_intra.c
+c2ec804ddf64ee841146e39c3a783451  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_16x16.c
+4087f6a827912ee5b45ed4217f1a6d77  OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter.c
+				  OX000-SW-98010-r0p0-00bet1/vc/m4p2/api/
+5c711702dddcec85298003860d760cec  OX000-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_DCT_Table.h
+1b92c94b785c03ec76d4fae2f2bbdb8a  OX000-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
+ad9c6986d2a3200dd5e1f6103a54a99b  OX000-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
+				  OX000-SW-98010-r0p0-00bet1/vc/src/
+e627b3346b0dc9aff14446005ce0fa43  OX000-SW-98010-r0p0-00bet1/vc/src/armVC_Version.c
+				  OX000-SW-98010-r0p0-00bet1/vc/api/
+7ca94b1c33ac0211e17d38baadd7d1dd  OX000-SW-98010-r0p0-00bet1/vc/api/armVC.h
+12cf7596edbbf6048b626d15e8d0ed48  OX000-SW-98010-r0p0-00bet1/vc/api/omxVC.h
+				  OX000-SW-98010-r0p0-00bet1/vc/comm/
+				  OX000-SW-98010-r0p0-00bet1/vc/comm/src/
+3a6df0085736cbcbe2e3f45d08af4221  OX000-SW-98010-r0p0-00bet1/vc/comm/src/armVCCOMM_Average.c
+0bf3cb52863c829b28c0352835170211  OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy8x8.c
+538b62f510b5a8bdced4a39fa12d9a23  OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock_SAD.c
+66993edd9d441bf3b5b6c912f6400b6e  OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_ExpandFrame_I.c
+8e526a9007eb0d43ebf362c498b37415  OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_LimitMVToRect.c
+87f8f26e6e9178df0ab7419334d5a3db  OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_SAD_16x.c
+1a8577646132ad9b63a1477fdaec2464  OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Average_16x.c
+48529c4f70c7e954e832eece1aee57bd  OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_SAD_8x.c
+252977764d4f38282b6a56c59ccf4f09  OX000-SW-98010-r0p0-00bet1/vc/comm/src/armVCCOMM_SAD.c
+cc78cfaed9502c2e0282c91fb95eeac4  OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Average_8x.c
+e468751c15a581ebd22da031e22117d1  OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy16x16.c
+3f448d191eaeb82ecb7e27ef8ba27875  OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock.c
+b1291c307808631fa833684abb9c34ce  ARM_DELIVERY_97412.TXT
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM.h
new file mode 100644
index 0000000..2ed86a4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM.h
@@ -0,0 +1,785 @@
+/**
+ * 
+ * File Name:  armCOMM.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *   
+ * File: armCOMM.h
+ * Brief: Declares Common APIs/Data Types used across OpenMAX API's
+ *
+ */
+ 
+  
+#ifndef _armCommon_H_
+#define _armCommon_H_
+
+#include "omxtypes.h"
+
+typedef struct
+{
+  OMX_F32 Re; /** Real part */
+  OMX_F32 Im; /** Imaginary part */	
+        
+} OMX_FC32; /** single precision floating point complex number */
+
+typedef struct
+{
+  OMX_F64 Re; /** Real part */
+  OMX_F64 Im; /** Imaginary part */	
+        
+} OMX_FC64; /** double precision floating point complex number */
+
+
+/* Used by both IP and IC domains for 8x8 JPEG blocks. */
+typedef OMX_S16 ARM_BLOCK8x8[64];
+
+
+#include "armOMX.h"
+
+#define  armPI (OMX_F64)(3.1415926535897932384626433832795)
+
+/***********************************************************************/
+
+/* Compiler extensions */
+#ifdef ARM_DEBUG
+/* debug version */
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#define armError(str) {printf((str)); printf("\n"); exit(-1);}
+#define armWarn(str) {printf((str)); printf("\n");}
+#define armIgnore(a) ((void)a)
+#define armAssert(a) assert(a)
+#else 
+/* release version */
+#define armError(str) ((void) (str))
+#define armWarn(str)  ((void) (str))
+#define armIgnore(a)  ((void) (a))
+#define armAssert(a)  ((void) (a))
+#endif /* ARM_DEBUG */
+
+/* Arithmetic operations */
+
+#define armMin(a,b)             ( (a) > (b) ?  (b):(a) )
+#define armMax(a,b)             ( (a) > (b) ?  (a):(b) )
+#define armAbs(a)               ( (a) <  0  ? -(a):(a) )
+
+/* Alignment operation */
+
+#define armAlignToBytes(Ptr,N)      (Ptr + ( ((N-(int)Ptr)&(N-1)) / sizeof(*Ptr) ))
+#define armAlignTo2Bytes(Ptr)       armAlignToBytes(Ptr,2)
+#define armAlignTo4Bytes(Ptr)       armAlignToBytes(Ptr,4)
+#define armAlignTo8Bytes(Ptr)       armAlignToBytes(Ptr,8)
+#define armAlignTo16Bytes(Ptr)      armAlignToBytes(Ptr,16)
+
+/* Error and Alignment check */
+
+#define armRetArgErrIf(condition, code)  if(condition) { return (code); }
+#define armRetDataErrIf(condition, code) if(condition) { return (code); }
+
+#ifndef ALIGNMENT_DOESNT_MATTER
+#define armIsByteAligned(Ptr,N)     ((((int)(Ptr)) % N)==0)
+#define armNotByteAligned(Ptr,N)    ((((int)(Ptr)) % N)!=0)
+#else
+#define armIsByteAligned(Ptr,N)     (1)
+#define armNotByteAligned(Ptr,N)    (0)
+#endif
+
+#define armIs2ByteAligned(Ptr)      armIsByteAligned(Ptr,2)
+#define armIs4ByteAligned(Ptr)      armIsByteAligned(Ptr,4)
+#define armIs8ByteAligned(Ptr)      armIsByteAligned(Ptr,8)
+#define armIs16ByteAligned(Ptr)     armIsByteAligned(Ptr,16)
+
+#define armNot2ByteAligned(Ptr)     armNotByteAligned(Ptr,2)
+#define armNot4ByteAligned(Ptr)     armNotByteAligned(Ptr,4)
+#define armNot8ByteAligned(Ptr)     armNotByteAligned(Ptr,8)
+#define armNot16ByteAligned(Ptr)    armNotByteAligned(Ptr,16)
+#define armNot32ByteAligned(Ptr)    armNotByteAligned(Ptr,32)
+
+/**
+ * Function: armRoundFloatToS16_ref/armRoundFloatToS32_ref/armRoundFloatToS64
+ *
+ * Description:
+ * Converts a double precision value into a short int/int after rounding
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16/OMX_S32 format
+ *
+ */
+
+OMX_S16 armRoundFloatToS16 (OMX_F64 Value);
+OMX_S32 armRoundFloatToS32 (OMX_F64 Value);
+OMX_S64 armRoundFloatToS64 (OMX_F64 Value);
+
+/**
+ * Function: armSatRoundFloatToS16_ref/armSatRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a short int/int after rounding and saturation
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16/OMX_S32 format
+ *
+ */
+
+OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value);
+OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value);
+
+/**
+ * Function: armSatRoundFloatToU16_ref/armSatRoundFloatToU32
+ *
+ * Description:
+ * Converts a double precision value into a unsigned short int/int after rounding and saturation
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U16/OMX_U32 format
+ *
+ */
+
+OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value);
+OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value);
+
+/**
+ * Function: armSignCheck
+ *
+ * Description:
+ * Checks the sign of a variable:
+ * returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	    var     Variable to be checked
+ *
+ * Return Value:
+ * OMX_INT --   returns 1 if it is Positive
+ *              returns 0 if it is 0
+ *              returns -1 if it is Negative 
+ */ 
+ 
+OMX_INT armSignCheck (OMX_S16 var);
+
+/**
+ * Function: armClip
+ *
+ * Description: Clips the input between MAX and MIN value
+ * 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min     lower bound
+ * [in] Max     upper bound
+ * [in] src     variable to the clipped
+ *
+ * Return Value:
+ * OMX_S32 --   returns clipped value
+ */ 
+ 
+OMX_S32 armClip (
+        OMX_INT min,
+        OMX_INT max, 
+        OMX_S32 src
+        );
+
+/**
+ * Function: armClip_F32
+ *
+ * Description: Clips the input between MAX and MIN value
+ * 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min     lower bound
+ * [in] Max     upper bound
+ * [in] src     variable to the clipped
+ *
+ * Return Value:
+ * OMX_F32 --   returns clipped value
+ */ 
+ 
+OMX_F32 armClip_F32 (
+        OMX_F32 min,
+        OMX_F32 max, 
+        OMX_F32 src
+        );
+
+/**
+ * Function: armShiftSat_F32
+ *
+ * Description: Divides a float value by 2^shift and 
+ * saturates it for unsigned value range for satBits.
+ * Second parameter is like "shifting" the corresponding 
+ * integer value. Takes care of rounding while clipping the final 
+ * value.
+ *
+ * Parameters:
+ * [in] v          Number to be operated upon
+ * [in] shift      Divides the input "v" by "2^shift"
+ * [in] satBits    Final range is [0, 2^satBits)
+ *
+ * Return Value:
+ * OMX_S32 --   returns "shifted" saturated value
+ */ 
+ 
+OMX_U32 armShiftSat_F32(
+        OMX_F32 v, 
+        OMX_INT shift, 
+        OMX_INT satBits
+        );
+
+/**
+ * Functions: armSwapElem
+ *
+ * Description:
+ * This function swaps two elements at the specified pointer locations.
+ * The size of each element could be anything as specified by <elemSize>
+ *
+ * Return Value:
+ * OMXResult -- Error status from the function
+ */
+OMXResult armSwapElem(OMX_U8 *pBuf1, OMX_U8 *pBuf2, OMX_INT elemSize);
+
+
+/**
+ * Function: armMedianOf3
+ *
+ * Description: Finds the median of three numbers
+ * 
+ * Remarks:
+ *
+ * Parameters:
+ * [in] fEntry     First entry
+ * [in] sEntry     second entry
+ * [in] tEntry     Third entry
+ *
+ * Return Value:
+ * OMX_S32 --   returns the median value
+ */ 
+ 
+OMX_S32 armMedianOf3 (
+    OMX_S32 fEntry,
+    OMX_S32 sEntry, 
+    OMX_S32 tEntry 
+    );
+
+/**
+ * Function: armLogSize
+ *
+ * Description: Finds the size of a positive value and returns the same
+ * 
+ * Remarks:
+ *
+ * Parameters:
+ * [in] value    Positive value
+ *
+ * Return Value:
+ * OMX_U8 --   returns the size of the positive value
+ */ 
+ 
+OMX_U8 armLogSize (
+    OMX_U16 value 
+    );    
+
+/***********************************************************************/
+                /* Saturating Arithmetic operations */
+
+/**
+ * Function :armSatAdd_S32()
+ *
+ * Description :
+ *   Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ *
+ * Return:
+ * [out]             Result of operation
+ * 
+ *    
+ **/
+
+OMX_S32 armSatAdd_S32(
+                OMX_S32 Value1,
+                OMX_S32 Value2
+                );
+
+/**
+ * Function :armSatAdd_S64()
+ *
+ * Description :
+ *   Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ *
+ * Return:
+ * [out]             Result of operation
+ * 
+ *    
+ **/
+
+OMX_S64 armSatAdd_S64(
+                OMX_S64 Value1,
+                OMX_S64 Value2
+                );
+
+/** Function :armSatSub_S32()
+ * 
+ * Description :
+ *     Returns the result of saturated substraction of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ *
+ * Return:
+ * [out]             Result of operation
+ * 
+ **/
+
+OMX_S32 armSatSub_S32(
+                    OMX_S32 Value1,
+                    OMX_S32 Value2
+                    );
+
+/**
+ * Function :armSatMac_S32()
+ *
+ * Description :
+ *     Returns the result of Multiplication of Value1 and Value2 and subesquent saturated
+ *     accumulation with Mac
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ * [in] Mac          Accumulator
+ *
+ * Return:
+ * [out]             Result of operation
+ **/
+
+OMX_S32 armSatMac_S32(
+                    OMX_S32 Mac,
+                    OMX_S16 Value1,
+                    OMX_S16 Value2
+                    );
+
+/**
+ * Function :armSatMac_S16S32_S32
+ *
+ * Description :
+ *   Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac
+ *
+ *   mac = mac + Saturate_in_32Bits(delayElem * filTap)
+ *
+ * Parametrs:
+ * [in] delayElem    First 32 bit Operand
+ * [in] filTap       Second 16 bit Operand
+ * [in] mac          Result of MAC operation
+ *
+ * Return:
+ * [out]  mac        Result of operation
+ *    
+ **/
+ 
+OMX_S32 armSatMac_S16S32_S32(
+                        OMX_S32 mac, 
+                        OMX_S32 delayElem, 
+                        OMX_S16 filTap );
+
+/**
+ * Function :armSatRoundRightShift_S32_S16
+ *
+ * Description :
+ *   Returns the result of rounded right shift operation of input by the scalefactor
+ *
+ *   output = Saturate_in_16Bits( ( RightShift( (Round(input) , scaleFactor ) )
+ *
+ * Parametrs:
+ * [in] input       The input to be operated on
+ * [in] scaleFactor The shift number
+ *
+ * Return:
+ * [out]            Result of operation
+ *    
+ **/
+
+
+OMX_S16 armSatRoundRightShift_S32_S16(
+                        OMX_S32 input, 
+                        OMX_INT scaleFactor);
+
+/**
+ * Function :armSatRoundLeftShift_S32()
+ *
+ * Description :
+ *     Returns the result of saturating left-shift operation on input
+ *     Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value        Operand
+ * [in] shift        Operand for shift operation
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+ 
+OMX_S32 armSatRoundLeftShift_S32(
+                        OMX_S32 Value,
+                        OMX_INT shift
+                        );
+
+/**
+ * Function :armSatRoundLeftShift_S64()
+ *
+ * Description :
+ *     Returns the result of saturating left-shift operation on input
+ *     Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value        Operand
+ * [in] shift        Operand for shift operation
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+ 
+OMX_S64 armSatRoundLeftShift_S64(
+                        OMX_S64 Value,
+                        OMX_INT shift
+                        );
+
+/**
+ * Function :armSatMulS16S32_S32()
+ *
+ * Description :
+ *     Returns the result of a S16 data type multiplied with an S32 data type
+ *     in a S32 container
+ *
+ * Parametrs:
+ * [in] input1       Operand 1
+ * [in] input2       Operand 2
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+
+
+OMX_S32 armSatMulS16S32_S32(
+                    OMX_S16 input1,
+                    OMX_S32 input2);
+
+/**
+ * Function :armSatMulS32S32_S32()
+ *
+ * Description :
+ *     Returns the result of a S32 data type multiplied with an S32 data type
+ *     in a S32 container
+ *
+ * Parametrs:
+ * [in] input1       Operand 1
+ * [in] input2       Operand 2
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+
+OMX_S32 armSatMulS32S32_S32(
+                    OMX_S32 input1,
+                    OMX_S32 input2);
+
+
+/**
+ * Function :armIntDivAwayFromZero()
+ *
+ * Description : Integer division with rounding to the nearest integer. 
+ *               Half-integer values are rounded away from zero
+ *               unless otherwise specified. For example 3//2 is rounded 
+ *               to 2, and -3//2 is rounded to -2.
+ *
+ * Parametrs:
+ * [in] Num        Operand 1
+ * [in] Deno       Operand 2
+ *
+ * Return:
+ * [out]             Result of operation input1//input2
+ *    
+ **/
+
+OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno);
+
+
+/***********************************************************************/
+/*
+ * Debugging macros
+ *
+ */
+
+
+/*
+ * Definition of output stream - change to stderr if necessary
+ */
+#define DEBUG_STREAM stdout
+
+/*
+ * Debug printf macros, one for each argument count.
+ * Add more if needed.
+ */
+#ifdef DEBUG_ON
+#include <stdio.h>
+
+#define DEBUG_PRINTF_0(a)                                               fprintf(DEBUG_STREAM, a)
+#define DEBUG_PRINTF_1(a, b)                                            fprintf(DEBUG_STREAM, a, b)
+#define DEBUG_PRINTF_2(a, b, c)                                         fprintf(DEBUG_STREAM, a, b, c)
+#define DEBUG_PRINTF_3(a, b, c, d)                                      fprintf(DEBUG_STREAM, a, b, c, d)
+#define DEBUG_PRINTF_4(a, b, c, d, e)                                   fprintf(DEBUG_STREAM, a, b, c, d, e)
+#define DEBUG_PRINTF_5(a, b, c, d, e, f)                                fprintf(DEBUG_STREAM, a, b, c, d, e, f)
+#define DEBUG_PRINTF_6(a, b, c, d, e, f, g)                             fprintf(DEBUG_STREAM, a, b, c, d, e, f, g)
+#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h)                          fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h)
+#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i)                       fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i)
+#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j)                    fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j)
+#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k)                fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k)
+#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l)             fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l)
+#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m)          fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m)
+#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n)       fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n)
+#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)    fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
+#else /* DEBUG_ON */
+#define DEBUG_PRINTF_0(a)                                  
+#define DEBUG_PRINTF_1(a, b)                               
+#define DEBUG_PRINTF_2(a, b, c)                            
+#define DEBUG_PRINTF_3(a, b, c, d)                         
+#define DEBUG_PRINTF_4(a, b, c, d, e)                      
+#define DEBUG_PRINTF_5(a, b, c, d, e, f)                   
+#define DEBUG_PRINTF_6(a, b, c, d, e, f, g)                
+#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h)             
+#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i)          
+#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j)       
+#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k)    
+#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l)             
+#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m)          
+#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n)      
+#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)   
+#endif /* DEBUG_ON */
+
+
+/*
+ * Domain and sub domain definitions
+ *
+ * In order to turn on debug for an entire domain or sub-domain
+ * at compile time, one of the DEBUG_DOMAIN_* below may be defined,
+ * which will activate debug in all of the defines it contains.
+ */
+
+#ifdef DEBUG_DOMAIN_AC
+#define DEBUG_OMXACAAC_DECODECHANPAIRELT_MPEG4
+#define DEBUG_OMXACAAC_DECODECHANPAIRELT
+#define DEBUG_OMXACAAC_DECODEDATSTRELT
+#define DEBUG_OMXACAAC_DECODEFILLELT
+#define DEBUG_OMXACAAC_DECODEISSTEREO_S32
+#define DEBUG_OMXACAAC_DECODEMSPNS_S32
+#define DEBUG_OMXACAAC_DECODEMSSTEREO_S32_I
+#define DEBUG_OMXACAAC_DECODEPRGCFGELT
+#define DEBUG_OMXACAAC_DECODETNS_S32_I
+#define DEBUG_OMXACAAC_DEINTERLEAVESPECTRUM_S32
+#define DEBUG_OMXACAAC_ENCODETNS_S32_I
+#define DEBUG_OMXACAAC_LONGTERMPREDICT_S32
+#define DEBUG_OMXACAAC_LONGTERMRECONSTRUCT_S32
+#define DEBUG_OMXACAAC_MDCTFWD_S32
+#define DEBUG_OMXACAAC_MDCTINV_S32_S16
+#define DEBUG_OMXACAAC_NOISELESSDECODE
+#define DEBUG_OMXACAAC_QUANTINV_S32_I
+#define DEBUG_OMXACAAC_UNPACKADIFHEADER
+#define DEBUG_OMXACAAC_UNPACKADTSFRAMEHEADER
+#define DEBUG_OMXACMP3_HUFFMANDECODESFBMBP_S32
+#define DEBUG_OMXACMP3_HUFFMANDECODESFB_S32
+#define DEBUG_OMXACMP3_HUFFMANDECODE_S32
+#define DEBUG_OMXACMP3_MDCTINV_S32
+#define DEBUG_OMXACMP3_REQUANTIZESFB_S32_I
+#define DEBUG_OMXACMP3_REQUANTIZE_S32_I
+#define DEBUG_OMXACMP3_SYNTHPQMF_S32_S16
+#define DEBUG_OMXACMP3_UNPACKFRAMEHEADER
+#define DEBUG_OMXACMP3_UNPACKSCALEFACTORS_S8
+#define DEBUG_OMXACMP3_UNPACKSIDEINFO
+#endif /* DEBUG_DOMAIN_AC */
+
+
+#ifdef DEBUG_DOMAIN_VC
+#define DEBUG_OMXVCM4P10_AVERAGE_16X
+#define DEBUG_OMXVCM4P10_AVERAGE_4X
+#define DEBUG_OMXVCM4P10_AVERAGE_8X
+#define DEBUG_OMXVCM4P10_DEBLOCKCHROMA_U8_C1IR
+#define DEBUG_OMXVCM4P10_DEBLOCKLUMA_U8_C1IR
+#define DEBUG_OMXVCM4P10_DECODECHROMADCCOEFFSTOPAIRCAVLC_U8
+#define DEBUG_OMXVCM4P10_DECODECOEFFSTOPAIRCAVLC_U8
+#define DEBUG_OMXVCM4P10_DEQUANTTRANSFORMACFROMPAIR_U8_S16_C1_DLX
+#define DEBUG_OMXVCM4P10_EXPANDFRAME
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_HOREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_VEREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_HOREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_VEREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_PREDICTINTRACHROMA8X8_U8_C1R
+#define DEBUG_OMXVCM4P10_PREDICTINTRA_16X16_U8_C1R
+#define DEBUG_OMXVCM4P10_PREDICTINTRA_4X4_U8_C1R
+#define DEBUG_OMXVCM4P10_SADQUAR_16X
+#define DEBUG_OMXVCM4P10_SADQUAR_4X
+#define DEBUG_OMXVCM4P10_SADQUAR_8X
+#define DEBUG_OMXVCM4P10_SAD_16X
+#define DEBUG_OMXVCM4P10_SAD_4X
+#define DEBUG_OMXVCM4P10_SAD_8X
+#define DEBUG_OMXVCM4P10_SATD_4X4
+#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTCHROMADCFROMPAIR_U8_S16_C1
+#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTLUMADCFROMPAIR_U8_S16_C1
+#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_CHROMADC
+#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_LUMADC
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_16X16
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_8X8
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_16X16
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_8X8
+#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_SAD_U8_S16
+#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_U8_S16
+#define DEBUG_OMXVCM4P2_DCT8X8BLKDLX
+#define DEBUG_OMXVCM4P2_DECODEBLOCKCOEF_INTER_S16
+#define DEBUG_OMXVCM4P2_DECODEPADMV_PVOP
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTER_S16
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRAACVLC_S16
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRADCVLC_S16
+#define DEBUG_OMXVCM4P2_ENCODEMV_U8_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTER_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRAACVLC_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRADCVLC_S16
+#define DEBUG_OMXVCM4P2_FINDMVPRED
+#define DEBUG_OMXVCM4P2_IDCT8X8BLKDLX
+#define DEBUG_OMXVCM4P2_LIMITMVTORECT
+#define DEBUG_OMXVCM4P2_MOTIONESTIMATIONMB
+#define DEBUG_OMXVCM4P2_PADMBGRAY_U8
+#define DEBUG_OMXVCM4P2_PADMBHORIZONTAL_U8
+#define DEBUG_OMXVCM4P2_PADMBVERTICAL_U8
+#define DEBUG_OMXVCM4P2_PADMV
+#define DEBUG_OMXVCM4P2_QUANTINTER_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINTRA_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINVINTER_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINVINTRA_S16_I
+#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTER
+#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTRA
+#endif /* DEBUG_DOMAIN_VC */
+
+
+#ifdef DEBUG_DOMAIN_IC
+/* To be filled in */
+#endif /* DEBUG_DOMAIN_IC */
+
+
+#ifdef DEBUG_DOMAIN_SP
+#define DEBUG_OMXACSP_DOTPROD_S16
+#define DEBUG_OMXACSP_BLOCKEXP_S16
+#define DEBUG_OMXACSP_BLOCKEXP_S32
+#define DEBUG_OMXACSP_COPY_S16
+#define DEBUG_OMXACSP_DOTPROD_S16
+#define DEBUG_OMXACSP_DOTPROD_S16_SFS
+#define DEBUG_OMXACSP_FFTFWD_CTOC_SC16_SFS
+#define DEBUG_OMXACSP_FFTFWD_CTOC_SC32_SFS
+#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S16S32_SFS
+#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S32_SFS
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC16
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC32
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S16_S32
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S32
+#define DEBUG_OMXACSP_FFTINIT_C_SC16
+#define DEBUG_OMXACSP_FFTINIT_C_SC32
+#define DEBUG_OMXACSP_FFTINIT_R_S16_S32
+#define DEBUG_OMXACSP_FFTINIT_R_S32
+#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32S16_SFS
+#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32_SFS
+#define DEBUG_OMXACSP_FFTINV_CTOC_SC16_SFS
+#define DEBUG_OMXACSP_FFTINV_CTOC_SC32_SFS
+#define DEBUG_OMXACSP_FILTERMEDIAN_S32_I
+#define DEBUG_OMXACSP_FILTERMEDIAN_S32
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_ISFS
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_I
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_SFS
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_ISFS
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_I
+#define DEBUG_OMXACSP_FIR_DIRECT_S16
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_SFS
+#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16_I
+#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16
+#define DEBUG_OMXACSP_IIRONE_DIRECT_S16_I
+#define DEBUG_OMXACSP_IIRONE_DIRECT_S16
+#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16_I
+#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16
+#define DEBUG_OMXACSP_IIR_DIRECT_S16_I
+#define DEBUG_OMXACSP_IIR_DIRECT_S16
+#endif /* DEBUG_DOMAIN_SP */
+
+
+#ifdef DEBUG_DOMAIN_IP
+#define DEBUG_OMXIPBM_ADDC_U8_C1R_SFS
+#define DEBUG_OMXIPBM_COPY_U8_C1R
+#define DEBUG_OMXIPBM_COPY_U8_C3R
+#define DEBUG_OMXIPBM_MIRROR_U8_C1R
+#define DEBUG_OMXIPBM_MULC_U8_C1R_SFS
+#define DEBUG_OMXIPCS_COLORTWISTQ14_U8_C3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR420LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR422LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR444LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR420LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR422LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR444LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_YCBCR420RSZROT_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR420TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR420TORGB565_U8_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR420TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422RSZCSCROTRGB_U8_C2R
+#define DEBUG_OMXIPCS_YCBCR422RSZROT_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB565_U8_U16_C2C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB_U8_C2C3R
+#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_C2P3R
+#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR444TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR444TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB_U8_C3R
+#define DEBUG_OMXIPPP_GETCENTRALMOMENT_S64
+#define DEBUG_OMXIPPP_GETSPATIALMOMENT_S64
+#define DEBUG_OMXIPPP_MOMENTGETSTATESIZE_S64
+#define DEBUG_OMXIPPP_MOMENTINIT_S64
+#define DEBUG_OMXIPPP_MOMENTS64S_U8_C1R
+#define DEBUG_OMXIPPP_MOMENTS64S_U8_C3R
+#endif /* DEBUG_DOMAIN_IP */
+
+
+#endif /* _armCommon_H_ */
+
+/*End of File*/
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Bitstream.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Bitstream.h
new file mode 100644
index 0000000..4f9bc3b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Bitstream.h
@@ -0,0 +1,212 @@
+/**
+ * 
+ * File Name:  armCOMM_Bitstream.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File: armCOMM_Bitstream.h
+ * Brief: Declares common API's/Data types used across the OpenMax Encoders/Decoders.
+ *
+ */
+
+#ifndef _armCodec_H_
+#define _armCodec_H_
+
+#include "omxtypes.h"
+
+typedef struct {
+    OMX_U8   codeLen;
+    OMX_U32	 codeWord;
+} ARM_VLC32;
+
+/* The above should be renamed as "ARM_VLC32" */
+
+/**
+ * Function: armLookAheadBits()
+ *
+ * Description:
+ * Get the next N bits from the bitstream without advancing the bitstream pointer
+ *
+ * Parameters:
+ * [in]     **ppBitStream
+ * [in]     *pOffset
+ * [in]     N=1...32
+ *
+ * Returns  Value
+ */
+
+OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N);
+
+/**
+ * Function: armGetBits()
+ *
+ * Description:
+ * Read N bits from the bitstream
+ *    
+ * Parameters:
+ * [in]     *ppBitStream
+ * [in]     *pOffset
+ * [in]     N=1..32
+ *
+ * [out]    *ppBitStream
+ * [out]    *pOffset
+ * Returns  Value
+ */
+
+OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N);
+
+/**
+ * Function: armByteAlign()
+ *
+ * Description:
+ * Align the pointer *ppBitStream to the next byte boundary
+ *
+ * Parameters:
+ * [in]     *ppBitStream
+ * [in]     *pOffset
+ *
+ * [out]    *ppBitStream
+ * [out]    *pOffset
+ *
+ **/
+ 
+OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset);
+
+/** 
+ * Function: armSkipBits()
+ *
+ * Description:
+ * Skip N bits from the value at *ppBitStream
+ *
+ * Parameters:
+ * [in]     *ppBitStream
+ * [in]     *pOffset
+ * [in]     N
+ *
+ * [out]    *ppBitStream
+ * [out]    *pOffset
+ *
+ **/
+
+OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N);
+
+/***************************************
+ * Variable bit length Decode
+ ***************************************/
+
+/**
+ * Function: armUnPackVLC32()
+ *
+ * Description:
+ * Variable length decode of variable length symbol (max size 32 bits) read from
+ * the bit stream pointed by *ppBitStream at *pOffset by using the table
+ * pointed by pCodeBook
+ * 
+ * Parameters:
+ * [in]     **ppBitStream
+ * [in]     *pOffset
+ * [in]     pCodeBook
+ * 
+ * [out]    **ppBitStream
+ * [out]    *pOffset
+ *
+ * Returns : Code Book Index if successfull. 
+ *         : "ARM_NO_CODEBOOK_INDEX = 0xFFFF" if search fails.
+ **/
+
+#define ARM_NO_CODEBOOK_INDEX (OMX_U16)(0xFFFF)
+
+OMX_U16 armUnPackVLC32(
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pOffset,
+    const ARM_VLC32 *pCodeBook
+);
+
+/***************************************
+ * Fixed bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackBits
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte 
+ *                      in the bit stream.
+ * [in]	pOffset	        pointer to the bit position in the byte 
+ *                      pointed by *ppBitStream. Valid within 0
+ *                      to 7.
+ * [in]	codeWord		Code word that need to be inserted in to the
+ *                          bitstream
+ * [in]	codeLength		Length of the code word valid range 1...32
+ *
+ * [out] ppBitStream	*ppBitStream is updated after the block is encoded,
+ *	                        so that it points to the current byte in the bit
+ *							stream buffer.
+ * [out] pBitOffset		*pBitOffset is updated so that it points to the
+ *							current bit position in the byte pointed by
+ *							*ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+ 
+OMXResult armPackBits (
+    OMX_U8  **ppBitStream, 
+    OMX_INT *pOffset,
+    OMX_U32 codeWord, 
+    OMX_INT codeLength 
+);
+ 
+/***************************************
+ * Variable bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackVLC32
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte 
+ *                      in the bit stream.
+ * [in]	pBitOffset	    pointer to the bit position in the byte 
+ *                      pointed by *ppBitStream. Valid within 0
+ *                      to 7.
+ * [in]	 code     		VLC code word that need to be inserted in to the
+ *                      bitstream
+ *
+ * [out] ppBitStream	*ppBitStream is updated after the block is encoded,
+ *	                    so that it points to the current byte in the bit
+ *						stream buffer.
+ * [out] pBitOffset		*pBitOffset is updated so that it points to the
+ *						current bit position in the byte pointed by
+ *						*ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+ 
+OMXResult armPackVLC32 (
+    OMX_U8 **ppBitStream, 
+    OMX_INT *pBitOffset,
+    ARM_VLC32 code 
+);
+
+#endif      /*_armCodec_H_*/
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Version.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Version.h
new file mode 100644
index 0000000..e99a450
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Version.h
@@ -0,0 +1,43 @@
+/* Guard the header against multiple inclusion. */
+#ifndef __ARM_COMM_VERSION_H__
+#define __ARM_COMM_VERSION_H__
+
+
+/* The following line should be in omxtypes.h but hasn't been approved by OpenMAX yet */
+#define OMX_VERSION 102
+
+/* We need to define these macros in order to convert a #define number into a #define string. */
+#define ARM_QUOTE(a) #a
+#define ARM_INDIRECT(A) ARM_QUOTE(A)
+
+/* Convert the OMX_VERSION number into a string that can be used, for example, to print it out. */
+#define ARM_VERSION_STRING ARM_INDIRECT(OMX_VERSION)
+
+
+/* Define this in order to turn on ARM version/release/build strings in each domain */
+#define ARM_INCLUDE_VERSION_DESCRIPTIONS
+
+#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS
+  extern const char * const omxAC_VersionDescription;
+  extern const char * const omxIC_VersionDescription;
+  extern const char * const omxIP_VersionDescription;
+  extern const char * const omxSP_VersionDescription;
+  extern const char * const omxVC_VersionDescription;
+#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */
+
+
+/* The following entries should be automatically updated by the release script */
+/* They are used in the ARM version strings defined for each domain.             */
+
+/* The release tag associated with this release of the library. - used for source and object releases */
+#define OMX_ARM_RELEASE_TAG  "r0p0-00bet1"
+
+/* The ARM architecture used to build any objects or executables in this release. */
+#define OMX_ARM_BUILD_ARCHITECTURE "ANSI C"
+
+/* The ARM Toolchain used to build any objects or executables in this release. */
+#define OMX_ARM_BUILD_TOOLCHAIN    "ARM RVCT 3.1"
+
+
+#endif /* __ARM_COMM_VERSION_H__ */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armOMX.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armOMX.h
new file mode 100644
index 0000000..f629f72
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armOMX.h
@@ -0,0 +1,274 @@
+/* 
+ * 
+ * File Name:  armOMX_ReleaseVersion.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * This file allows a version of the OMX DL libraries to be built where some or
+ * all of the function names can be given a user specified suffix. 
+ *
+ * You might want to use it where:
+ *
+ * - you want to rename a function "out of the way" so that you could replace
+ *   a function with a different version (the original version would still be
+ *   in the library just with a different name - so you could debug the new
+ *   version by comparing it to the output of the old)
+ *
+ * - you want to rename all the functions to versions with a suffix so that 
+ *   you can include two versions of the library and choose between functions
+ *   at runtime.
+ *
+ *     e.g. omxIPBM_Copy_U8_C1R could be renamed omxIPBM_Copy_U8_C1R_CortexA8
+ * 
+ */
+
+  
+#ifndef _armOMX_H_
+#define _armOMX_H_
+
+
+/* We need to define these two macros in order to expand and concatenate the names */
+#define OMXCAT2BAR(A, B) omx ## A ## B
+#define OMXCATBAR(A, B) OMXCAT2BAR(A, B)
+
+/* Define the suffix to add to all functions - the default is no suffix */
+#define BARE_SUFFIX 
+
+
+
+/* Define what happens to the bare suffix-less functions, down to the sub-domain accuracy */
+#define OMXACAAC_SUFFIX    BARE_SUFFIX   
+#define OMXACMP3_SUFFIX    BARE_SUFFIX
+#define OMXICJP_SUFFIX     BARE_SUFFIX
+#define OMXIPBM_SUFFIX     BARE_SUFFIX
+#define OMXIPCS_SUFFIX     BARE_SUFFIX
+#define OMXIPPP_SUFFIX     BARE_SUFFIX
+#define OMXSP_SUFFIX       BARE_SUFFIX
+#define OMXVCCOMM_SUFFIX   BARE_SUFFIX
+#define OMXVCM4P10_SUFFIX  BARE_SUFFIX
+#define OMXVCM4P2_SUFFIX   BARE_SUFFIX
+
+
+
+
+/* Define what the each bare, un-suffixed OpenMAX API function names is to be renamed */
+#define omxACAAC_DecodeChanPairElt                        OMXCATBAR(ACAAC_DecodeChanPairElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeDatStrElt                          OMXCATBAR(ACAAC_DecodeDatStrElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeFillElt                            OMXCATBAR(ACAAC_DecodeFillElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeIsStereo_S32                       OMXCATBAR(ACAAC_DecodeIsStereo_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeMsPNS_S32_I                        OMXCATBAR(ACAAC_DecodeMsPNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeMsStereo_S32_I                     OMXCATBAR(ACAAC_DecodeMsStereo_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodePrgCfgElt                          OMXCATBAR(ACAAC_DecodePrgCfgElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeTNS_S32_I                          OMXCATBAR(ACAAC_DecodeTNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DeinterleaveSpectrum_S32                 OMXCATBAR(ACAAC_DeinterleaveSpectrum_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_EncodeTNS_S32_I                          OMXCATBAR(ACAAC_EncodeTNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_LongTermPredict_S32                      OMXCATBAR(ACAAC_LongTermPredict_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_LongTermReconstruct_S32_I                OMXCATBAR(ACAAC_LongTermReconstruct_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_MDCTFwd_S32                              OMXCATBAR(ACAAC_MDCTFwd_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_MDCTInv_S32_S16                          OMXCATBAR(ACAAC_MDCTInv_S32_S16, OMXACAAC_SUFFIX)
+#define omxACAAC_NoiselessDecode                          OMXCATBAR(ACAAC_NoiselessDecode, OMXACAAC_SUFFIX)
+#define omxACAAC_QuantInv_S32_I                           OMXCATBAR(ACAAC_QuantInv_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_UnpackADIFHeader                         OMXCATBAR(ACAAC_UnpackADIFHeader, OMXACAAC_SUFFIX)
+#define omxACAAC_UnpackADTSFrameHeader                    OMXCATBAR(ACAAC_UnpackADTSFrameHeader, OMXACAAC_SUFFIX)
+
+
+#define omxACMP3_HuffmanDecode_S32                        OMXCATBAR(ACMP3_HuffmanDecode_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_HuffmanDecodeSfb_S32                     OMXCATBAR(ACMP3_HuffmanDecodeSfb_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_HuffmanDecodeSfbMbp_S32                  OMXCATBAR(ACMP3_HuffmanDecodeSfbMbp_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_MDCTInv_S32                              OMXCATBAR(ACMP3_MDCTInv_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_ReQuantize_S32_I                         OMXCATBAR(ACMP3_ReQuantize_S32_I, OMXACMP3_SUFFIX)
+#define omxACMP3_ReQuantizeSfb_S32_I                      OMXCATBAR(ACMP3_ReQuantizeSfb_S32_I, OMXACMP3_SUFFIX)
+#define omxACMP3_SynthPQMF_S32_S16                        OMXCATBAR(ACMP3_SynthPQMF_S32_S16, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackFrameHeader                        OMXCATBAR(ACMP3_UnpackFrameHeader, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackScaleFactors_S8                    OMXCATBAR(ACMP3_UnpackScaleFactors_S8, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackSideInfo                           OMXCATBAR(ACMP3_UnpackSideInfo, OMXACMP3_SUFFIX)
+
+#define omxICJP_CopyExpand_U8_C3                          OMXCATBAR(ICJP_CopyExpand_U8_C3, OMXICJP_SUFFIX)
+#define omxICJP_DCTFwd_S16                                OMXCATBAR(ICJP_DCTFwd_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTFwd_S16_I                              OMXCATBAR(ICJP_DCTFwd_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTInv_S16                                OMXCATBAR(ICJP_DCTInv_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTInv_S16_I                              OMXCATBAR(ICJP_DCTInv_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_Multiple_S16                  OMXCATBAR(ICJP_DCTQuantFwd_Multiple_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_S16                           OMXCATBAR(ICJP_DCTQuantFwd_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_S16_I                         OMXCATBAR(ICJP_DCTQuantFwd_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwdTableInit                      OMXCATBAR(ICJP_DCTQuantFwdTableInit, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_Multiple_S16                  OMXCATBAR(ICJP_DCTQuantInv_Multiple_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_S16                           OMXCATBAR(ICJP_DCTQuantInv_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_S16_I                         OMXCATBAR(ICJP_DCTQuantInv_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInvTableInit                      OMXCATBAR(ICJP_DCTQuantInvTableInit, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffman8x8_Direct_S16_C1            OMXCATBAR(ICJP_DecodeHuffman8x8_Direct_S16_C1, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffmanSpecGetBufSize_U8            OMXCATBAR(ICJP_DecodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffmanSpecInit_U8                  OMXCATBAR(ICJP_DecodeHuffmanSpecInit_U8, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffman8x8_Direct_S16_U1_C1         OMXCATBAR(ICJP_EncodeHuffman8x8_Direct_S16_U1_C1, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffmanSpecGetBufSize_U8            OMXCATBAR(ICJP_EncodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffmanSpecInit_U8                  OMXCATBAR(ICJP_EncodeHuffmanSpecInit_U8, OMXICJP_SUFFIX)
+
+#define omxIPBM_AddC_U8_C1R_Sfs                           OMXCATBAR(IPBM_AddC_U8_C1R_Sfs, OMXIPBM_SUFFIX)
+#define omxIPBM_Copy_U8_C1R                               OMXCATBAR(IPBM_Copy_U8_C1R, OMXIPBM_SUFFIX)
+#define omxIPBM_Copy_U8_C3R                               OMXCATBAR(IPBM_Copy_U8_C3R, OMXIPBM_SUFFIX)
+#define omxIPBM_Mirror_U8_C1R                             OMXCATBAR(IPBM_Mirror_U8_C1R, OMXIPBM_SUFFIX)
+#define omxIPBM_MulC_U8_C1R_Sfs                           OMXCATBAR(IPBM_MulC_U8_C1R_Sfs, OMXIPBM_SUFFIX)
+
+#define omxIPCS_ColorTwistQ14_U8_C3R                      OMXCATBAR(IPCS_ColorTwistQ14_U8_C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R      OMXCATBAR(IPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R      OMXCATBAR(IPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R      OMXCATBAR(IPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R       OMXCATBAR(IPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R       OMXCATBAR(IPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R       OMXCATBAR(IPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420RszCscRotBGR_U8_P3C3R             OMXCATBAR(IPCS_YCbCr420RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420RszRot_U8_P3R                     OMXCATBAR(IPCS_YCbCr420RszRot_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR565_U8_U16_P3C3R             OMXCATBAR(IPCS_YCbCr420ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R      OMXCATBAR(IPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R       OMXCATBAR(IPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422RszCscRotBGR_U8_P3C3R             OMXCATBAR(IPCS_YCbCr422RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R          OMXCATBAR(IPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422RszRot_U8_P3R                     OMXCATBAR(IPCS_YCbCr422RszRot_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbYCr422ToBGR565_U8_U16_C2C3R            OMXCATBAR(IPCS_YCbYCr422ToBGR565_U8_U16_C2C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R      OMXCATBAR(IPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbYCr422ToBGR888_U8_C2C3R                OMXCATBAR(IPCS_YCbYCr422ToBGR888_U8_C2C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R       OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R       OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R        OMXCATBAR(IPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToYCbCr420Rotate_U8_P3R           OMXCATBAR(IPCS_YCbCr422ToYCbCr420Rotate_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565_U8_U16_C3R               OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565_U8_U16_P3C3R             OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R      OMXCATBAR(IPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR888_U8_C3R                   OMXCATBAR(IPCS_YCbCr444ToBGR888_U8_C3R, OMXIPCS_SUFFIX)
+
+#define omxIPPP_Deblock_HorEdge_U8_I                      OMXCATBAR(IPPP_Deblock_HorEdge_U8_I, OMXIPPP_SUFFIX)
+#define omxIPPP_Deblock_VerEdge_U8_I                      OMXCATBAR(IPPP_Deblock_VerEdge_U8_I, OMXIPPP_SUFFIX)
+#define omxIPPP_FilterFIR_U8_C1R                          OMXCATBAR(IPPP_FilterFIR_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_FilterMedian_U8_C1R                       OMXCATBAR(IPPP_FilterMedian_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_GetCentralMoment_S64                      OMXCATBAR(IPPP_GetCentralMoment_S64, OMXIPPP_SUFFIX)
+#define omxIPPP_GetSpatialMoment_S64                      OMXCATBAR(IPPP_GetSpatialMoment_S64, OMXIPPP_SUFFIX)
+#define omxIPPP_MomentGetStateSize                        OMXCATBAR(IPPP_MomentGetStateSize, OMXIPPP_SUFFIX)
+#define omxIPPP_MomentInit                                OMXCATBAR(IPPP_MomentInit, OMXIPPP_SUFFIX)
+#define omxIPPP_Moments_U8_C1R                            OMXCATBAR(IPPP_Moments_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_Moments_U8_C3R                            OMXCATBAR(IPPP_Moments_U8_C3R, OMXIPPP_SUFFIX)
+
+#define omxSP_BlockExp_S16                                OMXCATBAR(SP_BlockExp_S16, OMXSP_SUFFIX)
+#define omxSP_BlockExp_S32                                OMXCATBAR(SP_BlockExp_S32, OMXSP_SUFFIX)
+#define omxSP_Copy_S16                                    OMXCATBAR(SP_Copy_S16, OMXSP_SUFFIX)
+#define omxSP_DotProd_S16                                 OMXCATBAR(SP_DotProd_S16, OMXSP_SUFFIX)
+#define omxSP_DotProd_S16_Sfs                             OMXCATBAR(SP_DotProd_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_CToC_SC16_Sfs                        OMXCATBAR(SP_FFTFwd_CToC_SC16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_CToC_SC32_Sfs                        OMXCATBAR(SP_FFTFwd_CToC_SC32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_RToCCS_S16S32_Sfs                    OMXCATBAR(SP_FFTFwd_RToCCS_S16S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_RToCCS_S32_Sfs                       OMXCATBAR(SP_FFTFwd_RToCCS_S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_C_SC16                        OMXCATBAR(SP_FFTGetBufSize_C_SC16, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_C_SC32                        OMXCATBAR(SP_FFTGetBufSize_C_SC32, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_R_S16S32                      OMXCATBAR(SP_FFTGetBufSize_R_S16S32, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_R_S32                         OMXCATBAR(SP_FFTGetBufSize_R_S32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_C_SC16                              OMXCATBAR(SP_FFTInit_C_SC16, OMXSP_SUFFIX)
+#define omxSP_FFTInit_C_SC32                              OMXCATBAR(SP_FFTInit_C_SC32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_R_S16S32                            OMXCATBAR(SP_FFTInit_R_S16S32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_R_S32                               OMXCATBAR(SP_FFTInit_R_S32, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CCSToR_S32_Sfs                       OMXCATBAR(SP_FFTInv_CCSToR_S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CCSToR_S32S16_Sfs                    OMXCATBAR(SP_FFTInv_CCSToR_S32S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CToC_SC16_Sfs                        OMXCATBAR(SP_FFTInv_CToC_SC16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CToC_SC32_Sfs                        OMXCATBAR(SP_FFTInv_CToC_SC32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FilterMedian_S32                            OMXCATBAR(SP_FilterMedian_S32, OMXSP_SUFFIX)
+#define omxSP_FilterMedian_S32_I                          OMXCATBAR(SP_FilterMedian_S32_I, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16                              OMXCATBAR(SP_FIR_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_I                            OMXCATBAR(SP_FIR_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_ISfs                         OMXCATBAR(SP_FIR_Direct_S16_ISfs, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_Sfs                          OMXCATBAR(SP_FIR_Direct_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16                           OMXCATBAR(SP_FIROne_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_I                         OMXCATBAR(SP_FIROne_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_ISfs                      OMXCATBAR(SP_FIROne_Direct_S16_ISfs, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_Sfs                       OMXCATBAR(SP_FIROne_Direct_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_IIR_BiQuadDirect_S16                        OMXCATBAR(SP_IIR_BiQuadDirect_S16, OMXSP_SUFFIX)
+#define omxSP_IIR_BiQuadDirect_S16_I                      OMXCATBAR(SP_IIR_BiQuadDirect_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIR_Direct_S16                              OMXCATBAR(SP_IIR_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_IIR_Direct_S16_I                            OMXCATBAR(SP_IIR_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIROne_BiQuadDirect_S16                     OMXCATBAR(SP_IIROne_BiQuadDirect_S16, OMXSP_SUFFIX)
+#define omxSP_IIROne_BiQuadDirect_S16_I                   OMXCATBAR(SP_IIROne_BiQuadDirect_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIROne_Direct_S16                           OMXCATBAR(SP_IIROne_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_IIROne_Direct_S16_I                         OMXCATBAR(SP_IIROne_Direct_S16_I, OMXSP_SUFFIX)
+
+#define omxVCCOMM_Average_16x                             OMXCATBAR(VCCOMM_Average_16x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Average_8x                              OMXCATBAR(VCCOMM_Average_8x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ComputeTextureErrorBlock                OMXCATBAR(VCCOMM_ComputeTextureErrorBlock, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ComputeTextureErrorBlock_SAD            OMXCATBAR(VCCOMM_ComputeTextureErrorBlock_SAD, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Copy16x16                               OMXCATBAR(VCCOMM_Copy16x16, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Copy8x8                                 OMXCATBAR(VCCOMM_Copy8x8, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ExpandFrame_I                           OMXCATBAR(VCCOMM_ExpandFrame_I, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_LimitMVToRect                           OMXCATBAR(VCCOMM_LimitMVToRect, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_SAD_16x                                 OMXCATBAR(VCCOMM_SAD_16x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_SAD_8x                                  OMXCATBAR(VCCOMM_SAD_8x, OMXVCCOMM_SUFFIX)
+
+#define omxVCM4P10_Average_4x                             OMXCATBAR(VCM4P10_Average_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Half                        OMXCATBAR(VCM4P10_BlockMatch_Half, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Integer                     OMXCATBAR(VCM4P10_BlockMatch_Integer, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Quarter                     OMXCATBAR(VCM4P10_BlockMatch_Quarter, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DeblockChroma_I                        OMXCATBAR(VCM4P10_DeblockChroma_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DeblockLuma_I                          OMXCATBAR(VCM4P10_DeblockLuma_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC        OMXCATBAR(VCM4P10_DecodeChromaDcCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DecodeCoeffsToPairCAVLC                OMXCATBAR(VCM4P10_DecodeCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DequantTransformResidualFromPairAndAdd OMXCATBAR(VCM4P10_DequantTransformResidualFromPairAndAdd, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingChroma_HorEdge_I       OMXCATBAR(VCM4P10_FilterDeblockingChroma_HorEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingChroma_VerEdge_I       OMXCATBAR(VCM4P10_FilterDeblockingChroma_VerEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingLuma_HorEdge_I         OMXCATBAR(VCM4P10_FilterDeblockingLuma_HorEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingLuma_VerEdge_I         OMXCATBAR(VCM4P10_FilterDeblockingLuma_VerEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_GetVLCInfo                             OMXCATBAR(VCM4P10_GetVLCInfo, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateChroma                      OMXCATBAR(VCM4P10_InterpolateChroma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateHalfHor_Luma                OMXCATBAR(VCM4P10_InterpolateHalfHor_Luma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateHalfVer_Luma                OMXCATBAR(VCM4P10_InterpolateHalfVer_Luma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateLuma                        OMXCATBAR(VCM4P10_InterpolateLuma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformDequant_ChromaDC           OMXCATBAR(VCM4P10_InvTransformDequant_ChromaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformDequant_LumaDC             OMXCATBAR(VCM4P10_InvTransformDequant_LumaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformResidualAndAdd             OMXCATBAR(VCM4P10_InvTransformResidualAndAdd, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MEGetBufSize                           OMXCATBAR(VCM4P10_MEGetBufSize, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MEInit                                 OMXCATBAR(VCM4P10_MEInit, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MotionEstimationMB                     OMXCATBAR(VCM4P10_MotionEstimationMB, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntra_16x16                     OMXCATBAR(VCM4P10_PredictIntra_16x16, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntra_4x4                       OMXCATBAR(VCM4P10_PredictIntra_4x4, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntraChroma_8x8                  OMXCATBAR(VCM4P10_PredictIntraChroma_8x8, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SAD_4x                                 OMXCATBAR(VCM4P10_SAD_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_16x                            OMXCATBAR(VCM4P10_SADQuar_16x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_4x                             OMXCATBAR(VCM4P10_SADQuar_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_8x                             OMXCATBAR(VCM4P10_SADQuar_8x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SATD_4x4                               OMXCATBAR(VCM4P10_SATD_4x4, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SubAndTransformQDQResidual             OMXCATBAR(VCM4P10_SubAndTransformQDQResidual, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformDequantChromaDCFromPair       OMXCATBAR(VCM4P10_TransformDequantChromaDCFromPair, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformDequantLumaDCFromPair         OMXCATBAR(VCM4P10_TransformDequantLumaDCFromPair, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformQuant_ChromaDC                OMXCATBAR(VCM4P10_TransformQuant_ChromaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformQuant_LumaDC                  OMXCATBAR(VCM4P10_TransformQuant_LumaDC, OMXVCM4P10_SUFFIX)
+
+#define omxVCM4P2_BlockMatch_Half_16x16                   OMXCATBAR(VCM4P2_BlockMatch_Half_16x16, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Half_8x8                     OMXCATBAR(VCM4P2_BlockMatch_Half_8x8, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Integer_16x16                OMXCATBAR(VCM4P2_BlockMatch_Integer_16x16, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Integer_8x8                  OMXCATBAR(VCM4P2_BlockMatch_Integer_8x8, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DCT8x8blk                               OMXCATBAR(VCM4P2_DCT8x8blk, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeBlockCoef_Inter                   OMXCATBAR(VCM4P2_DecodeBlockCoef_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeBlockCoef_Intra                   OMXCATBAR(VCM4P2_DecodeBlockCoef_Intra, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodePadMV_PVOP                        OMXCATBAR(VCM4P2_DecodePadMV_PVOP, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_Inter                   OMXCATBAR(VCM4P2_DecodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_IntraACVLC              OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_IntraDCVLC              OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeMV                                OMXCATBAR(VCM4P2_EncodeMV, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_Inter                   OMXCATBAR(VCM4P2_EncodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_IntraACVLC              OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_IntraDCVLC              OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_FindMVpred                              OMXCATBAR(VCM4P2_FindMVpred, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_IDCT8x8blk                              OMXCATBAR(VCM4P2_IDCT8x8blk, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MCReconBlock                            OMXCATBAR(VCM4P2_MCReconBlock, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MEGetBufSize                            OMXCATBAR(VCM4P2_MEGetBufSize, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MEInit                                  OMXCATBAR(VCM4P2_MEInit, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MotionEstimationMB                      OMXCATBAR(VCM4P2_MotionEstimationMB, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_PredictReconCoefIntra                   OMXCATBAR(VCM4P2_PredictReconCoefIntra, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInter_I                            OMXCATBAR(VCM4P2_QuantInter_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantIntra_I                            OMXCATBAR(VCM4P2_QuantIntra_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInvInter_I                         OMXCATBAR(VCM4P2_QuantInvInter_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInvIntra_I                         OMXCATBAR(VCM4P2_QuantInvIntra_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_TransRecBlockCoef_inter                 OMXCATBAR(VCM4P2_TransRecBlockCoef_inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_TransRecBlockCoef_intra                 OMXCATBAR(VCM4P2_TransRecBlockCoef_intra, OMXVCM4P2_SUFFIX)
+
+
+#endif /* _armOMX_h_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/omxtypes.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/omxtypes.h
new file mode 100644
index 0000000..8b295a6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/omxtypes.h
@@ -0,0 +1,252 @@
+/**
+ * File: omxtypes.h
+ * Brief: Defines basic Data types used in OpenMAX v1.0.2 header files.
+ *
+ * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved. 
+ *
+ * These materials are protected by copyright laws and contain material 
+ * proprietary to the Khronos Group, Inc.  You may use these materials 
+ * for implementing Khronos specifications, without altering or removing 
+ * any trademark, copyright or other notice from the specification.
+ * 
+ * Khronos Group makes no, and expressly disclaims any, representations 
+ * or warranties, express or implied, regarding these materials, including, 
+ * without limitation, any implied warranties of merchantability or fitness 
+ * for a particular purpose or non-infringement of any intellectual property. 
+ * Khronos Group makes no, and expressly disclaims any, warranties, express 
+ * or implied, regarding the correctness, accuracy, completeness, timeliness, 
+ * and reliability of these materials. 
+ *
+ * Under no circumstances will the Khronos Group, or any of its Promoters, 
+ * Contributors or Members or their respective partners, officers, directors, 
+ * employees, agents or representatives be liable for any damages, whether 
+ * direct, indirect, special or consequential damages for lost revenues, 
+ * lost profits, or otherwise, arising from or in connection with these 
+ * materials.
+ * 
+ * Khronos and OpenMAX are trademarks of the Khronos Group Inc. 
+ *
+ */
+  
+#ifndef _OMXTYPES_H_
+#define _OMXTYPES_H_
+
+#include <limits.h> 
+
+#define OMX_IN
+#define OMX_OUT
+#define OMX_INOUT
+
+
+typedef enum {
+    
+    /* Mandatory return codes - use cases are explicitly described for each function */
+    OMX_Sts_NoErr                    =  0,    /* No error, the function completed successfully */
+    OMX_Sts_Err                      = -2,    /* Unknown/unspecified error */    
+    OMX_Sts_InvalidBitstreamValErr   = -182,  /* Invalid value detected during bitstream processing */    
+    OMX_Sts_MemAllocErr              = -9,    /* Not enough memory allocated for the operation */
+    OMX_StsACAAC_GainCtrErr    	     = -159,  /* AAC: Unsupported gain control data detected */
+    OMX_StsACAAC_PrgNumErr           = -167,  /* AAC: Invalid number of elements for one program   */
+    OMX_StsACAAC_CoefValErr          = -163,  /* AAC: Invalid quantized coefficient value          */     
+    OMX_StsACAAC_MaxSfbErr           = -162,  /* AAC: Invalid maxSfb value in relation to numSwb */    
+	OMX_StsACAAC_PlsDataErr		     = -160,  /* AAC: pulse escape sequence data error */
+
+    /* Optional return codes - use cases are explicitly described for each function*/
+    OMX_Sts_BadArgErr                = -5,    /* Bad Arguments */
+
+    OMX_StsACAAC_TnsNumFiltErr       = -157,  /* AAC: Invalid number of TNS filters  */
+    OMX_StsACAAC_TnsLenErr           = -156,  /* AAC: Invalid TNS region length  */   
+    OMX_StsACAAC_TnsOrderErr         = -155,  /* AAC: Invalid order of TNS filter  */                  
+    OMX_StsACAAC_TnsCoefResErr       = -154,  /* AAC: Invalid bit-resolution for TNS filter coefficients  */
+    OMX_StsACAAC_TnsCoefErr          = -153,  /* AAC: Invalid TNS filter coefficients  */                  
+    OMX_StsACAAC_TnsDirectErr        = -152,  /* AAC: Invalid TNS filter direction  */  
+
+    OMX_StsICJP_JPEGMarkerErr        = -183,  /* JPEG marker encountered within an entropy-coded block; */
+                                              /* Huffman decoding operation terminated early.           */
+    OMX_StsICJP_JPEGMarker           = -181,  /* JPEG marker encountered; Huffman decoding */
+                                              /* operation terminated early.                         */
+    OMX_StsIPPP_ContextMatchErr      = -17,   /* Context parameter doesn't match to the operation */
+
+    OMX_StsSP_EvenMedianMaskSizeErr  = -180,  /* Even size of the Median Filter mask was replaced by the odd one */
+
+    OMX_Sts_MaximumEnumeration       = INT_MAX  /*Placeholder, forces enum of size OMX_INT*/
+    
+ } OMXResult;          /** Return value or error value returned from a function. Identical to OMX_INT */
+
+ 
+/* OMX_U8 */
+#if UCHAR_MAX == 0xff
+typedef unsigned char OMX_U8;
+#elif USHRT_MAX == 0xff 
+typedef unsigned short int OMX_U8; 
+#else
+#error OMX_U8 undefined
+#endif 
+
+ 
+/* OMX_S8 */
+#if SCHAR_MAX == 0x7f 
+typedef signed char OMX_S8;
+#elif SHRT_MAX == 0x7f 
+typedef signed short int OMX_S8; 
+#else
+#error OMX_S8 undefined
+#endif
+ 
+ 
+/* OMX_U16 */
+#if USHRT_MAX == 0xffff
+typedef unsigned short int OMX_U16;
+#elif UINT_MAX == 0xffff
+typedef unsigned int OMX_U16; 
+#else
+#error OMX_U16 undefined
+#endif
+
+
+/* OMX_S16 */
+#if SHRT_MAX == 0x7fff 
+typedef signed short int OMX_S16;
+#elif INT_MAX == 0x7fff 
+typedef signed int OMX_S16; 
+#else
+#error OMX_S16 undefined
+#endif
+
+
+/* OMX_U32 */
+#if UINT_MAX == 0xffffffff
+typedef unsigned int OMX_U32;
+#elif LONG_MAX == 0xffffffff
+typedef unsigned long int OMX_U32; 
+#else
+#error OMX_U32 undefined
+#endif
+
+
+/* OMX_S32 */
+#if INT_MAX == 0x7fffffff
+typedef signed int OMX_S32;
+#elif LONG_MAX == 0x7fffffff
+typedef long signed int OMX_S32; 
+#else
+#error OMX_S32 undefined
+#endif
+
+
+/* OMX_U64 & OMX_S64 */
+#if defined( _WIN32 ) || defined ( _WIN64 )
+    typedef __int64 OMX_S64; /** Signed 64-bit integer */
+    typedef unsigned __int64 OMX_U64; /** Unsigned 64-bit integer */
+    #define OMX_MIN_S64			(0x8000000000000000i64)
+    #define OMX_MIN_U64			(0x0000000000000000i64)
+    #define OMX_MAX_S64			(0x7FFFFFFFFFFFFFFFi64)
+    #define OMX_MAX_U64			(0xFFFFFFFFFFFFFFFFi64)
+#else
+    typedef long long OMX_S64; /** Signed 64-bit integer */
+    typedef unsigned long long OMX_U64; /** Unsigned 64-bit integer */
+    #define OMX_MIN_S64			(0x8000000000000000LL)
+    #define OMX_MIN_U64			(0x0000000000000000LL)
+    #define OMX_MAX_S64			(0x7FFFFFFFFFFFFFFFLL)
+    #define OMX_MAX_U64			(0xFFFFFFFFFFFFFFFFLL)
+#endif
+
+
+/* OMX_SC8 */
+typedef struct
+{
+  OMX_S8 Re; /** Real part */
+  OMX_S8 Im; /** Imaginary part */	
+	
+} OMX_SC8; /** Signed 8-bit complex number */
+
+
+/* OMX_SC16 */
+typedef struct
+{
+  OMX_S16 Re; /** Real part */
+  OMX_S16 Im; /** Imaginary part */	
+	
+} OMX_SC16; /** Signed 16-bit complex number */
+
+
+/* OMX_SC32 */
+typedef struct
+{
+  OMX_S32 Re; /** Real part */
+  OMX_S32 Im; /** Imaginary part */	
+	
+} OMX_SC32; /** Signed 32-bit complex number */
+
+
+/* OMX_SC64 */
+typedef struct
+{
+  OMX_S64 Re; /** Real part */
+  OMX_S64 Im; /** Imaginary part */	
+	
+} OMX_SC64; /** Signed 64-bit complex number */
+
+
+/* OMX_F32 */
+typedef float OMX_F32; /** Single precision floating point,IEEE 754 */
+
+
+/* OMX_F64 */
+typedef double OMX_F64; /** Double precision floating point,IEEE 754 */
+
+
+/* OMX_INT */
+typedef int OMX_INT; /** signed integer corresponding to machine word length, has maximum signed value INT_MAX*/
+
+
+#define OMX_MIN_S8  	   	(-128)
+#define OMX_MIN_U8  		0
+#define OMX_MIN_S16		 	(-32768)
+#define OMX_MIN_U16			0
+#define OMX_MIN_S32			(-2147483647-1)
+#define OMX_MIN_U32			0
+
+#define OMX_MAX_S8			(127)
+#define OMX_MAX_U8			(255)
+#define OMX_MAX_S16			(32767)
+#define OMX_MAX_U16			(0xFFFF)
+#define OMX_MAX_S32			(2147483647)
+#define OMX_MAX_U32			(0xFFFFFFFF)
+
+typedef void OMXVoid;
+
+#ifndef NULL
+#define NULL ((void*)0)
+#endif
+
+/** Defines the geometric position and size of a rectangle, 
+  * where x,y defines the coordinates of the top left corner
+  * of the rectangle, with dimensions width in the x-direction 
+  * and height in the y-direction */
+typedef struct {
+	OMX_INT x;      /** x-coordinate of top left corner of rectangle */
+	OMX_INT y;      /** y-coordinate of top left corner of rectangle */
+	OMX_INT width;  /** Width in the x-direction. */
+	OMX_INT height; /** Height in the y-direction. */
+}OMXRect;
+
+
+/** Defines the geometric position of a point, */
+typedef struct 
+{
+ OMX_INT x; /** x-coordinate */
+ OMX_INT y;	/** y-coordinate */
+	
+} OMXPoint;
+
+
+/** Defines the dimensions of a rectangle, or region of interest in an image */
+typedef struct 
+{
+ OMX_INT width;  /** Width of the rectangle, in the x-direction */
+ OMX_INT height; /** Height of the rectangle, in the y-direction */
+	
+} OMXSize;
+
+#endif /* _OMXTYPES_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/build_vc.pl b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/build_vc.pl
new file mode 100755
index 0000000..f0b43e01
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/build_vc.pl
@@ -0,0 +1,111 @@
+#!/usr/bin/perl
+#
+# 
+# File Name:  build_vc.pl
+# OpenMAX DL: v1.0.2
+# Revision:   9641
+# Date:       Thursday, February 7, 2008
+# 
+# (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+# 
+# 
+#
+# This file builds the OpenMAX DL vc domain library omxVC.o.
+#
+
+use File::Spec;
+use strict;
+
+my ($CC, $CC_OPTS, $AS, $AS_OPTS, $LIB, $LIB_OPTS, $LIB_TYPE);
+
+$CC       = 'armcc';
+$CC_OPTS  = '--no_unaligned_access --cpu ARM7TDMI -c';
+$AS       = 'armasm';
+$AS_OPTS  = '--no_unaligned_access --cpu ARM7TDMI';
+# $LIB      = 'armlink';
+# $LIB_OPTS = '--partial -o';
+# $LIB_TYPE = '.o';
+$LIB      = 'armar';
+$LIB_OPTS = '--create -r';
+$LIB_TYPE = '.a';
+
+#------------------------
+
+my (@headerlist, @filelist, $hd, $file, $ofile, $command, $objlist, $libfile, $h);
+
+# Define the list of directories containing included header files.
+@headerlist = qw(api vc/api vc/m4p2/api vc/m4p10/api);
+
+# Define the list of source files to compile.
+open(FILES, '<filelist_vc.txt') or die("Can't open source file list\n");
+@filelist = <FILES>;
+close(FILES);
+
+# Fix the file separators in the header paths
+foreach $h (@headerlist)
+{
+        $h = File::Spec->canonpath($h);
+}
+
+# Create the include path to be passed to the compiler
+$hd = '-I' . join(' -I', @headerlist);
+
+# Create the build directories "/lib/" and "/obj/" (if they are not there already)
+mkdir "obj", 0777 if (! -d "obj");
+mkdir "lib", 0777 if (! -d "lib");
+
+$objlist = '';
+
+# Compile each file
+foreach $file (@filelist)
+{
+	my $f;
+	my $base;
+	my $ext;
+	my $objfile;
+
+	chomp($file);
+	$file = File::Spec->canonpath($file);
+
+	(undef, undef, $f) = File::Spec->splitpath($file);
+	if(($base, $ext) = $f =~ /(.+)\.(\w)$/)
+	{
+		$objfile = File::Spec->catfile('obj', $base.'.o');
+
+		if($ext eq 'c')
+		{
+			$objlist .= "$objfile ";
+			$command = $CC.' '.$CC_OPTS.' '.$hd.' -o '.$objfile.' '.$file;
+			print "$command\n";
+			system($command);
+		}
+		elsif($ext eq 's')
+		{
+			$objlist .= "$objfile ";
+			$command = $AS.' '.$AS_OPTS.' '.$hd.' -o '.$objfile.' '.$file;
+			print "$command\n";
+			system($command);
+		}
+		else
+		{
+			print "Ignoring file: $f\n";
+		}
+	}
+	else
+	{
+		die "No file extension found: $f\n";
+	}
+}
+
+# Do the final link stage to create the libraries.
+$libfile = File::Spec->catfile('lib', 'omxVC'.$LIB_TYPE);
+$command = $LIB.' '.$LIB_OPTS.' '.$libfile.' '.$objlist;
+print "$command\n";
+(system($command) == 0) and print "Build successful\n";
+
+
+
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/filelist_vc.txt b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/filelist_vc.txt
new file mode 100644
index 0000000..66f34ac
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/filelist_vc.txt
@@ -0,0 +1,123 @@
+./api/armCOMM.h
+./api/armCOMM_Bitstream.h
+./api/armCOMM_Version.h
+./api/armOMX_ReleaseVersion.h
+./api/omxtypes.h
+./src/armCOMM.c
+./src/armCOMM_Bitstream.c
+./vc/api/armVC.h
+./vc/api/omxVC.h
+./vc/comm/src/armVCCOMM_Average.c
+./vc/comm/src/armVCCOMM_SAD.c
+./vc/comm/src/omxVCCOMM_Average_16x.c
+./vc/comm/src/omxVCCOMM_Average_8x.c
+./vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock.c
+./vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock_SAD.c
+./vc/comm/src/omxVCCOMM_Copy16x16.c
+./vc/comm/src/omxVCCOMM_Copy8x8.c
+./vc/comm/src/omxVCCOMM_ExpandFrame_I.c
+./vc/comm/src/omxVCCOMM_LimitMVToRect.c
+./vc/comm/src/omxVCCOMM_SAD_16x.c
+./vc/comm/src/omxVCCOMM_SAD_8x.c
+./vc/m4p10/api/armVCM4P10_CAVLCTables.h
+./vc/m4p10/src/armVCM4P10_CAVLCTables.c
+./vc/m4p10/src/armVCM4P10_CompareMotionCostToMV.c
+./vc/m4p10/src/armVCM4P10_DeBlockPixel.c
+./vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair.c
+./vc/m4p10/src/armVCM4P10_DequantTables.c
+./vc/m4p10/src/armVCM4P10_FwdTransformResidual4x4.c
+./vc/m4p10/src/armVCM4P10_Interpolate_Chroma.c
+./vc/m4p10/src/armVCM4P10_Interpolate_Luma.c
+./vc/m4p10/src/armVCM4P10_InterpolateHalfDiag_Luma.c
+./vc/m4p10/src/armVCM4P10_InterpolateHalfHor_Luma.c
+./vc/m4p10/src/armVCM4P10_InterpolateHalfVer_Luma.c
+./vc/m4p10/src/armVCM4P10_PredictIntraDC4x4.c
+./vc/m4p10/src/armVCM4P10_QuantTables.c
+./vc/m4p10/src/armVCM4P10_SADQuar.c
+./vc/m4p10/src/armVCM4P10_TransformResidual4x4.c
+./vc/m4p10/src/armVCM4P10_UnpackBlock2x2.c
+./vc/m4p10/src/armVCM4P10_UnpackBlock4x4.c
+./vc/m4p10/src/omxVCM4P10_Average_4x.c
+./vc/m4p10/src/omxVCM4P10_BlockMatch_Half.c
+./vc/m4p10/src/omxVCM4P10_BlockMatch_Integer.c
+./vc/m4p10/src/omxVCM4P10_BlockMatch_Quarter.c
+./vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
+./vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
+./vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+./vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+./vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd.c
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c
+./vc/m4p10/src/omxVCM4P10_GetVLCInfo.c
+./vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
+./vc/m4p10/src/omxVCM4P10_InterpolateHalfHor_Luma.c
+./vc/m4p10/src/omxVCM4P10_InterpolateHalfVer_Luma.c
+./vc/m4p10/src/omxVCM4P10_InterpolateLuma.c
+./vc/m4p10/src/omxVCM4P10_InvTransformDequant_ChromaDC.c
+./vc/m4p10/src/omxVCM4P10_InvTransformDequant_LumaDC.c
+./vc/m4p10/src/omxVCM4P10_InvTransformResidualAndAdd.c
+./vc/m4p10/src/omxVCM4P10_MEGetBufSize.c
+./vc/m4p10/src/omxVCM4P10_MEInit.c
+./vc/m4p10/src/omxVCM4P10_MotionEstimationMB.c
+./vc/m4p10/src/omxVCM4P10_PredictIntra_16x16.c
+./vc/m4p10/src/omxVCM4P10_PredictIntra_4x4.c
+./vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8.c
+./vc/m4p10/src/omxVCM4P10_SAD_4x.c
+./vc/m4p10/src/omxVCM4P10_SADQuar_16x.c
+./vc/m4p10/src/omxVCM4P10_SADQuar_4x.c
+./vc/m4p10/src/omxVCM4P10_SADQuar_8x.c
+./vc/m4p10/src/omxVCM4P10_SATD_4x4.c
+./vc/m4p10/src/omxVCM4P10_SubAndTransformQDQResidual.c
+./vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair.c
+./vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair.c
+./vc/m4p10/src/omxVCM4P10_TransformQuant_ChromaDC.c
+./vc/m4p10/src/omxVCM4P10_TransformQuant_LumaDC.c
+./vc/m4p2/api/armVCM4P2_DCT_Table.h
+./vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
+./vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
+./vc/m4p2/src/armVCM4P2_ACDCPredict.c
+./vc/m4p2/src/armVCM4P2_BlockMatch_Half.c
+./vc/m4p2/src/armVCM4P2_BlockMatch_Integer.c
+./vc/m4p2/src/armVCM4P2_CheckVLCEscapeMode.c
+./vc/m4p2/src/armVCM4P2_CompareMV.c
+./vc/m4p2/src/armVCM4P2_DCT_Table.c
+./vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_intra.c
+./vc/m4p2/src/armVCM4P2_EncodeVLCZigzag_intra.c
+./vc/m4p2/src/armVCM4P2_FillVLCBuffer.c
+./vc/m4p2/src/armVCM4P2_FillVLDBuffer.c
+./vc/m4p2/src/armVCM4P2_GetVLCBits.c
+./vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
+./vc/m4p2/src/armVCM4P2_PutVLCBits.c
+./vc/m4p2/src/armVCM4P2_SetPredDir.c
+./vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
+./vc/m4p2/src/omxVCM4P2_BlockMatch_Half_16x16.c
+./vc/m4p2/src/omxVCM4P2_BlockMatch_Half_8x8.c
+./vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_16x16.c
+./vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_8x8.c
+./vc/m4p2/src/omxVCM4P2_DCT8x8blk.c
+./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
+./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
+./vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP.c
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter.c
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c
+./vc/m4p2/src/omxVCM4P2_EncodeMV.c
+./vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_Inter.c
+./vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c
+./vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c
+./vc/m4p2/src/omxVCM4P2_FindMVpred.c
+./vc/m4p2/src/omxVCM4P2_IDCT8x8blk.c
+./vc/m4p2/src/omxVCM4P2_MCReconBlock.c
+./vc/m4p2/src/omxVCM4P2_MEGetBufSize.c
+./vc/m4p2/src/omxVCM4P2_MEInit.c
+./vc/m4p2/src/omxVCM4P2_MotionEstimationMB.c
+./vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra.c
+./vc/m4p2/src/omxVCM4P2_QuantInter_I.c
+./vc/m4p2/src/omxVCM4P2_QuantIntra_I.c
+./vc/m4p2/src/omxVCM4P2_QuantInvInter_I.c
+./vc/m4p2/src/omxVCM4P2_QuantInvIntra_I.c
+./vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_inter.c
+./vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_intra.c
+./vc/src/armVC_Version.c
\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM.c
new file mode 100644
index 0000000..e572a89
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM.c
@@ -0,0 +1,936 @@
+/**
+ * 
+ * File Name:  armCOMM.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Defines Common APIs used across OpenMAX API's
+ */
+
+#include "omxtypes.h"
+#include "armCOMM.h"
+
+/***********************************************************************/
+                /* Miscellaneous Arithmetic operations */
+
+/**
+ * Function: armRoundFloatToS16
+ *
+ * Description:
+ * Converts a double precision value into a short int after rounding
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16 format
+ *
+ */
+
+OMX_S16 armRoundFloatToS16 (OMX_F64 Value)
+{
+    if (Value > 0)
+    {
+        return (OMX_S16)(Value + .5);
+    }
+    else
+    {
+        return (OMX_S16)(Value - .5);
+    }
+}
+
+/**
+ * Function: armRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a int after rounding
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S32 format
+ *
+ */
+
+OMX_S32 armRoundFloatToS32 (OMX_F64 Value)
+{
+    if (Value > 0)
+    {
+        return (OMX_S32)(Value + .5);
+    }
+    else
+    {
+        return (OMX_S32)(Value - .5);
+    }
+}
+/**
+ * Function: armSatRoundFloatToS16
+ *
+ * Description:
+ * Converts a double precision value into a short int after rounding and saturation
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16 format
+ *
+ */
+
+OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value)
+{
+    if (Value > 0)
+    {
+        Value += 0.5;
+        
+        if(Value > (OMX_S16)OMX_MAX_S16 )
+        {
+            return (OMX_S16)OMX_MAX_S16;
+        }
+        else
+        {
+            return (OMX_S16)Value;
+        }
+    }
+    else
+    {
+        Value -= 0.5;
+
+        if(Value < (OMX_S16)OMX_MIN_S16 )
+        {
+            return (OMX_S16)OMX_MIN_S16;
+        }
+        else
+        {
+            return (OMX_S16)Value;
+        }
+    }
+}
+
+/**
+ * Function: armSatRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a int after rounding and saturation
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S32 format
+ *
+ */
+
+OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value)
+{
+    if (Value > 0)
+    {
+        Value += 0.5;
+        
+        if(Value > (OMX_S32)OMX_MAX_S32 )
+        {
+            return (OMX_S32)OMX_MAX_S32;
+        }
+        else
+        {
+            return (OMX_S32)Value;
+        }
+    }
+    else
+    {
+        Value -= 0.5;
+
+        if(Value < (OMX_S32)OMX_MIN_S32 )
+        {
+            return (OMX_S32)OMX_MIN_S32;
+        }
+        else
+        {
+            return (OMX_S32)Value;
+        }
+    }
+}
+
+/**
+ * Function: armSatRoundFloatToU16
+ *
+ * Description:
+ * Converts a double precision value into a unsigned short int after rounding and saturation
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U16 format
+ *
+ */
+
+OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value)
+{
+    Value += 0.5;
+    
+    if(Value > (OMX_U16)OMX_MAX_U16 )
+    {
+        return (OMX_U16)OMX_MAX_U16;
+    }
+    else
+    {
+        return (OMX_U16)Value;
+    }
+}
+
+/**
+ * Function: armSatRoundFloatToU32
+ *
+ * Description:
+ * Converts a double precision value into a unsigned int after rounding and saturation
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U32 format
+ *
+ */
+
+OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value)
+{
+    Value += 0.5;
+    
+    if(Value > (OMX_U32)OMX_MAX_U32 )
+    {
+        return (OMX_U32)OMX_MAX_U32;
+    }
+    else
+    {
+        return (OMX_U32)Value;
+    }
+}
+
+/**
+ * Function: armRoundFloatToS64
+ *
+ * Description:
+ * Converts a double precision value into a 64 bit int after rounding
+ *
+ * Parameters:
+ * [in]  Value                 Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S64 format
+ *
+ */
+
+OMX_S64 armRoundFloatToS64 (OMX_F64 Value)
+{
+    if (Value > 0)
+    {
+        return (OMX_S64)(Value + .5);
+    }
+    else
+    {
+        return (OMX_S64)(Value - .5);
+    }
+}
+
+/**
+ * Function: armSignCheck
+ *
+ * Description:
+ * Checks the sign of a variable:
+ * returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	    var     Variable to be checked
+ *
+ * Return Value:
+ * OMX_INT --   returns 1 if it is Positive
+ *              returns 0 if it is 0
+ *              returns -1 if it is Negative 
+ */ 
+
+OMX_INT armSignCheck (
+    OMX_S16 var
+)
+
+{
+    OMX_INT Sign;
+    
+    if (var < 0)
+    {
+        Sign = -1;
+    }
+    else if ( var > 0)
+    {
+        Sign = 1;
+    }
+    else
+    {
+        Sign = 0;
+    }
+    
+    return Sign;
+}
+
+/**
+ * Function: armClip
+ *
+ * Description: Clips the input between MAX and MIN value
+ * 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min     lower bound
+ * [in] Max     upper bound
+ * [in] src     variable to the clipped
+ *
+ * Return Value:
+ * OMX_S32 --   returns clipped value
+ */ 
+ 
+OMX_S32 armClip (
+    OMX_INT min,
+    OMX_INT max, 
+    OMX_S32 src 
+)
+ 
+{
+    if (src > max)
+    {
+        src = max;
+    }
+    else if (src < min)
+    {
+        src = min;
+    }
+    
+    return src;
+}
+
+/**
+ * Function: armClip_F32
+ *
+ * Description: Clips the input between MAX and MIN value
+ * 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min     lower bound
+ * [in] Max     upper bound
+ * [in] src     variable to the clipped
+ *
+ * Return Value:
+ * OMX_F32 --   returns clipped value
+ */ 
+ 
+OMX_F32 armClip_F32 (
+    OMX_F32 min,
+    OMX_F32 max, 
+    OMX_F32 src 
+)
+ 
+{
+    if (src > max)
+    {
+        src = max;
+    }
+    else if (src < min)
+    {
+        src = min;
+    }
+    
+    return src;
+}
+
+/**
+ * Function: armShiftSat_F32
+ *
+ * Description: Divides a float value by 2^shift and 
+ * saturates it for unsigned value range for satBits.
+ * Second parameter is like "shifting" the corresponding 
+ * integer value. Takes care of rounding while clipping the final 
+ * value.
+ *
+ * Parameters:
+ * [in] v          Number to be operated upon
+ * [in] shift      Divides the input "v" by "2^shift"
+ * [in] satBits    Final range is [0, 2^satBits)
+ *
+ * Return Value:
+ * OMX_S32 --   returns "shifted" saturated value
+ */ 
+ 
+OMX_U32 armShiftSat_F32(OMX_F32 v, OMX_INT shift, OMX_INT satBits) 
+{
+    OMX_U32 allOnes = (OMX_U32)(-1);
+    OMX_U32 maxV = allOnes >> (32-satBits);
+    OMX_F32 vShifted, vRounded, shiftDiv = (OMX_F32)(1 << shift);
+    OMX_U32 vInt;
+    OMX_U32 vIntSat;
+    
+    if(v <= 0)
+        return 0;
+    
+    vShifted = v / shiftDiv;
+    vRounded = (OMX_F32)(vShifted + 0.5);
+    vInt = (OMX_U32)vRounded;
+    vIntSat = vInt;
+    if(vIntSat > maxV) 
+        vIntSat = maxV;
+    return vIntSat;
+}
+
+/**
+ * Functions: armSwapElem
+ *
+ * Description:
+ * These function swaps two elements at the specified pointer locations.
+ * The size of each element could be anything as specified by <elemSize>
+ *
+ * Return Value:
+ * OMXResult -- Error status from the function
+ */
+OMXResult armSwapElem(
+        OMX_U8 *pBuf1,
+        OMX_U8 *pBuf2,
+        OMX_INT elemSize
+       )
+{
+    OMX_INT i;
+    OMX_U8 temp;
+    armRetArgErrIf(!pBuf1 || !pBuf2, OMX_Sts_BadArgErr);
+    
+    for(i = 0; i < elemSize; i++)
+    {
+        temp = *(pBuf1 + i);
+        *(pBuf1 + i) = *(pBuf2 + i);
+        *(pBuf2 + i) = temp;
+    }
+    return OMX_Sts_NoErr;
+}
+
+/**
+ * Function: armMedianOf3
+ *
+ * Description: Finds the median of three numbers
+ * 
+ * Remarks:
+ *
+ * Parameters:
+ * [in] fEntry     First entry
+ * [in] sEntry     second entry
+ * [in] tEntry     Third entry
+ *
+ * Return Value:
+ * OMX_S32 --   returns the median value
+ */ 
+ 
+OMX_S32 armMedianOf3 (
+    OMX_S32 fEntry,
+    OMX_S32 sEntry, 
+    OMX_S32 tEntry 
+)
+{
+    OMX_S32 a, b, c;
+    
+    a = armMin (fEntry, sEntry);
+    b = armMax (fEntry, sEntry);
+    c = armMin (b, tEntry);
+    return (armMax (a, c));
+}
+
+/**
+ * Function: armLogSize
+ *
+ * Description: Finds the size of a positive value and returns the same
+ * 
+ * Remarks:
+ *
+ * Parameters:
+ * [in] value    Positive value
+ *
+ * Return Value:
+ * OMX_U8 --     Returns the minimum number of bits required to represent the positive value. 
+                 This is the smallest k>=0 such that that value is less than (1<<k).
+ */ 
+ 
+OMX_U8 armLogSize (
+    OMX_U16 value 
+)
+{
+    OMX_U8 i;    
+    for ( i = 0; value > 0; value = value >> 1) 
+    {
+        i++;
+    }
+    return i;
+}
+
+/***********************************************************************/
+                /* Saturating Arithmetic operations */
+
+/**
+ * Function :armSatAdd_S32()
+ *
+ * Description :
+ *   Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ *
+ * Return:
+ * [out]             Result of operation
+ * 
+ *    
+ **/
+ 
+OMX_S32 armSatAdd_S32(OMX_S32 Value1,OMX_S32 Value2)
+{
+    OMX_S32 Result;
+    
+    Result = Value1 + Value2;
+
+    if( (Value1^Value2) >= 0)
+    {
+        /*Same sign*/
+        if( (Result^Value1) >= 0)
+        {
+            /*Result has not saturated*/
+            return Result;
+        }
+        else
+        {
+            if(Value1 >= 0)
+            {
+                /*Result has saturated in positive side*/
+                return OMX_MAX_S32;
+            }
+            else
+            {
+                /*Result has saturated in negative side*/
+                return OMX_MIN_S32;
+            }
+        
+        }
+   
+    }
+    else
+    {
+        return Result;
+    }
+    
+}
+
+/**
+ * Function :armSatAdd_S64()
+ *
+ * Description :
+ *   Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ *
+ * Return:
+ * [out]             Result of operation
+ * 
+ *    
+ **/
+ 
+OMX_S64 armSatAdd_S64(OMX_S64 Value1,OMX_S64 Value2)
+{
+    OMX_S64 Result;
+    
+    Result = Value1 + Value2;
+
+    if( (Value1^Value2) >= 0)
+    {
+        /*Same sign*/
+        if( (Result^Value1) >= 0)
+        {
+            /*Result has not saturated*/
+            return Result;
+        }
+        else
+        {
+            if(Value1 >= 0)
+            {
+                /*Result has saturated in positive side*/
+                Result = OMX_MAX_S64;
+                return Result;
+            }
+            else
+            {
+                /*Result has saturated in negative side*/
+                return OMX_MIN_S64;
+            }
+        
+        }
+   
+    }
+    else
+    {
+        return Result;
+    }
+    
+}
+
+/** Function :armSatSub_S32()
+ * 
+ * Description :
+ *     Returns the result of saturated substraction of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ *
+ * Return:
+ * [out]             Result of operation
+ * 
+ **/
+
+OMX_S32 armSatSub_S32(OMX_S32 Value1,OMX_S32 Value2)
+{
+    OMX_S32 Result;
+    
+    Result = Value1 - Value2;
+
+    if( (Value1^Value2) < 0)
+    {
+        /*Opposite sign*/
+        if( (Result^Value1) >= 0)
+        {
+            /*Result has not saturated*/
+            return Result;
+        }
+        else
+        {
+            if(Value1 >= 0)
+            {
+                /*Result has saturated in positive side*/
+                return OMX_MAX_S32;
+            }
+            else
+            {
+                /*Result has saturated in negative side*/
+                return OMX_MIN_S32;
+            }
+        
+        }
+   
+    }
+    else
+    {
+        return Result;
+    }
+    
+}
+
+/**
+ * Function :armSatMac_S32()
+ *
+ * Description :
+ *     Returns the result of Multiplication of Value1 and Value2 and subesquent saturated
+ *     accumulation with Mac
+ *
+ * Parametrs:
+ * [in] Value1       First Operand
+ * [in] Value2       Second Operand
+ * [in] Mac          Accumulator
+ *
+ * Return:
+ * [out]             Result of operation
+ **/
+
+OMX_S32 armSatMac_S32(OMX_S32 Mac,OMX_S16 Value1,OMX_S16 Value2)
+{
+    OMX_S32 Result;
+    
+    Result = (OMX_S32)(Value1*Value2);
+    Result = armSatAdd_S32( Mac , Result );
+
+    return Result;    
+}
+
+/**
+ * Function :armSatMac_S16S32_S32
+ *
+ * Description :
+ *   Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac
+ *
+ *   mac = mac + Saturate_in_32Bits(delayElem * filTap)
+ *
+ * Parametrs:
+ * [in] delayElem    First 32 bit Operand
+ * [in] filTap       Second 16 bit Operand
+ * [in] mac          Result of MAC operation
+ *
+ * Return:
+ * [out]  mac        Result of operation
+ *    
+ **/
+ 
+OMX_S32 armSatMac_S16S32_S32(OMX_S32 mac, OMX_S32 delayElem, OMX_S16 filTap )
+{
+    
+    OMX_S32 result;
+
+    result = armSatMulS16S32_S32(filTap,delayElem); 
+
+    if ( result > OMX_MAX_S16 )
+    {
+        result = OMX_MAX_S32;
+    }
+    else if( result < OMX_MIN_S16 )
+    {
+        result = OMX_MIN_S32;
+    }
+    else
+    {
+        result = delayElem * filTap;
+    }
+
+    mac = armSatAdd_S32(mac,result);
+    
+    return mac;
+}
+
+
+/**
+ * Function :armSatRoundRightShift_S32_S16
+ *
+ * Description :
+ *   Returns the result of rounded right shift operation of input by the scalefactor
+ *
+ *   output = Saturate_in_16Bits( ( Right/LeftShift( (Round(input) , shift ) )
+ *
+ * Parametrs:
+ * [in] input       The input to be operated on
+ * [in] shift The shift number
+ *
+ * Return:
+ * [out]            Result of operation
+ *    
+ **/
+
+
+OMX_S16 armSatRoundRightShift_S32_S16(OMX_S32 input, OMX_INT shift)
+{
+    input = armSatRoundLeftShift_S32(input,-shift);
+
+    if ( input > OMX_MAX_S16 )
+    {
+        return (OMX_S16)OMX_MAX_S16;
+    }
+    else if (input < OMX_MIN_S16)
+    {
+        return (OMX_S16)OMX_MIN_S16;
+    }
+    else
+    {
+       return (OMX_S16)input;
+    }
+
+}
+
+/**
+ * Function :armSatRoundLeftShift_S32()
+ *
+ * Description :
+ *     Returns the result of saturating left-shift operation on input
+ *     Or rounded Right shift if the input Shift is negative.
+ *     
+ * Parametrs:
+ * [in] Value        Operand
+ * [in] Shift        Operand for shift operation
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+
+OMX_S32 armSatRoundLeftShift_S32(OMX_S32 Value, OMX_INT Shift)
+{
+    OMX_INT i;
+    
+    if (Shift < 0)
+    {
+        Shift = -Shift;
+        Value = armSatAdd_S32(Value, (1 << (Shift - 1)));
+        Value = Value >> Shift;
+    }
+    else
+    {
+        for (i = 0; i < Shift; i++)
+        {
+            Value = armSatAdd_S32(Value, Value);
+        }
+    }
+    return Value;
+}
+
+/**
+ * Function :armSatRoundLeftShift_S64()
+ *
+ * Description :
+ *     Returns the result of saturating left-shift operation on input
+ *     Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value        Operand
+ * [in] shift        Operand for shift operation
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+ 
+OMX_S64 armSatRoundLeftShift_S64(OMX_S64 Value, OMX_INT Shift)
+{
+    OMX_INT i;
+    
+    if (Shift < 0)
+    {
+        Shift = -Shift;
+        Value = armSatAdd_S64(Value, ((OMX_S64)1 << (Shift - 1)));
+        Value = Value >> Shift;
+    }
+    else
+    {
+        for (i = 0; i < Shift; i++)
+        {
+            Value = armSatAdd_S64(Value, Value);
+        }
+    }
+    return Value;
+}
+
+/**
+ * Function :armSatMulS16S32_S32()
+ *
+ * Description :
+ *     Returns the result of a S16 data type multiplied with an S32 data type
+ *     in a S32 container
+ *
+ * Parametrs:
+ * [in] input1       Operand 1
+ * [in] input2       Operand 2
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+
+
+OMX_S32 armSatMulS16S32_S32(OMX_S16 input1,OMX_S32 input2)
+{
+    OMX_S16 hi2,lo1;
+    OMX_U16 lo2;
+    
+    OMX_S32 temp1,temp2;
+    OMX_S32 result;
+    
+    lo1  = input1;
+
+    hi2  = ( input2 >>  16 );
+    lo2  = ( (OMX_U32)( input2 << 16 ) >> 16 );
+    
+    temp1 = hi2 * lo1;
+    temp2 = ( lo2* lo1 ) >> 16;
+
+    result =  armSatAdd_S32(temp1,temp2);
+
+    return result;
+}
+
+/**
+ * Function :armSatMulS32S32_S32()
+ *
+ * Description :
+ *     Returns the result of a S32 data type multiplied with an S32 data type
+ *     in a S32 container
+ *
+ * Parametrs:
+ * [in] input1       Operand 1
+ * [in] input2       Operand 2
+ *
+ * Return:
+ * [out]             Result of operation
+ *    
+ **/
+
+OMX_S32 armSatMulS32S32_S32(OMX_S32 input1,OMX_S32 input2)
+{
+    OMX_S16 hi1,hi2;
+    OMX_U16 lo1,lo2;
+    
+    OMX_S32 temp1,temp2,temp3;
+    OMX_S32 result;
+
+    hi1  = ( input1 >>  16 );
+    lo1  = ( (OMX_U32)( input1 << 16 ) >> 16 );
+
+    hi2  = ( input2 >>  16 );
+    lo2  = ( (OMX_U32)( input2 << 16 ) >> 16 );
+    
+    temp1 =   hi1 * hi2;
+    temp2 = ( hi1* lo2 ) >> 16;
+    temp3 = ( hi2* lo1 ) >> 16;
+
+    result = armSatAdd_S32(temp1,temp2);
+    result = armSatAdd_S32(result,temp3);
+
+    return result;
+}
+
+/**
+ * Function :armIntDivAwayFromZero()
+ *
+ * Description : Integer division with rounding to the nearest integer. 
+ *               Half-integer values are rounded away from zero
+ *               unless otherwise specified. For example 3//2 is rounded 
+ *               to 2, and -3//2 is rounded to -2.
+ *
+ * Parametrs:
+ * [in] Num        Operand 1
+ * [in] Deno       Operand 2
+ *
+ * Return:
+ * [out]             Result of operation input1//input2
+ *    
+ **/
+
+OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno)
+{
+    OMX_F64 result;
+    
+    result = ((OMX_F64)Num)/((OMX_F64)Deno);
+    
+    if (result >= 0)
+    {
+        result += 0.5;
+    }
+    else
+    {
+        result -= 0.5;
+    }
+
+    return (OMX_S32)(result);
+}
+
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM_Bitstream.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM_Bitstream.c
new file mode 100644
index 0000000..9ef9319
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM_Bitstream.c
@@ -0,0 +1,329 @@
+/**
+ * 
+ * File Name:  armCOMM_Bitstream.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Defines bitstream encode and decode functions common to all codecs
+ */
+
+#include "omxtypes.h"
+#include "armCOMM.h"
+#include "armCOMM_Bitstream.h"
+
+/***************************************
+ * Fixed bit length Decode
+ ***************************************/
+
+/**
+ * Function: armLookAheadBits()
+ *
+ * Description:
+ * Get the next N bits from the bitstream without advancing the bitstream pointer
+ *
+ * Parameters:
+ * [in]     **ppBitStream
+ * [in]     *pOffset
+ * [in]     N=1...32
+ *
+ * Returns  Value
+ */
+
+OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N)
+{
+    const OMX_U8 *pBitStream = *ppBitStream;
+    OMX_INT Offset = *pOffset;
+    OMX_U32 Value;
+
+    armAssert(Offset>=0 && Offset<=7);
+    armAssert(N>=1 && N<=32);
+
+    /* Read next 32 bits from stream */
+    Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16)  | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+    Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+    /* Return N bits */
+    return Value >> (32-N);
+}
+
+
+/**
+ * Function: armGetBits()
+ *
+ * Description:
+ * Read N bits from the bitstream
+ *    
+ * Parameters:
+ * [in]     *ppBitStream
+ * [in]     *pOffset
+ * [in]     N=1..32
+ *
+ * [out]    *ppBitStream
+ * [out]    *pOffset
+ * Returns  Value
+ */
+
+
+OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N)
+{
+    const OMX_U8 *pBitStream = *ppBitStream;
+    OMX_INT Offset = *pOffset;
+    OMX_U32 Value;
+    
+    if(N == 0)
+    {
+      return 0;
+    }
+
+    armAssert(Offset>=0 && Offset<=7);
+    armAssert(N>=1 && N<=32);
+
+    /* Read next 32 bits from stream */
+    Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16)  | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+    Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+    /* Advance bitstream pointer by N bits */
+    Offset += N;
+    *ppBitStream = pBitStream + (Offset>>3);
+    *pOffset = Offset & 7;
+
+    /* Return N bits */
+    return Value >> (32-N);
+}
+
+/**
+ * Function: armByteAlign()
+ *
+ * Description:
+ * Align the pointer *ppBitStream to the next byte boundary
+ *
+ * Parameters:
+ * [in]     *ppBitStream
+ * [in]     *pOffset
+ *
+ * [out]    *ppBitStream
+ * [out]    *pOffset
+ *
+ **/
+ 
+OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset)
+{
+    if(*pOffset > 0)
+    {
+        *ppBitStream += 1;
+        *pOffset = 0;
+    }    
+}
+
+/** 
+ * Function: armSkipBits()
+ *
+ * Description:
+ * Skip N bits from the value at *ppBitStream
+ *
+ * Parameters:
+ * [in]     *ppBitStream
+ * [in]     *pOffset
+ * [in]     N
+ *
+ * [out]    *ppBitStream
+ * [out]    *pOffset
+ *
+ **/
+
+
+OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N)
+{
+    OMX_INT Offset = *pOffset;
+    const OMX_U8 *pBitStream = *ppBitStream;
+   
+    /* Advance bitstream pointer by N bits */
+    Offset += N;
+    *ppBitStream = pBitStream + (Offset>>3);
+    *pOffset = Offset & 7;
+}
+
+/***************************************
+ * Variable bit length Decode
+ ***************************************/
+
+/**
+ * Function: armUnPackVLC32()
+ *
+ * Description:
+ * Variable length decode of variable length symbol (max size 32 bits) read from
+ * the bit stream pointed by *ppBitStream at *pOffset by using the table
+ * pointed by pCodeBook
+ * 
+ * Parameters:
+ * [in]     *pBitStream
+ * [in]     *pOffset
+ * [in]     pCodeBook
+ * 
+ * [out]    *pBitStream
+ * [out]    *pOffset
+ *
+ * Returns : Code Book Index if successfull. 
+ *         : ARM_NO_CODEBOOK_INDEX = -1 if search fails.
+ **/
+#ifndef C_OPTIMIZED_IMPLEMENTATION 
+
+OMX_U16 armUnPackVLC32(
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pOffset,
+    const ARM_VLC32 *pCodeBook
+)
+{    
+    const OMX_U8 *pBitStream = *ppBitStream;
+    OMX_INT Offset = *pOffset;
+    OMX_U32 Value;
+    OMX_INT Index;
+        
+    armAssert(Offset>=0 && Offset<=7);
+
+    /* Read next 32 bits from stream */
+    Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16)  | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+    Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+    /* Search through the codebook */    
+    for (Index=0; pCodeBook->codeLen != 0; Index++)
+    {
+        if (pCodeBook->codeWord == (Value >> (32 - pCodeBook->codeLen)))
+        {
+            Offset       = Offset + pCodeBook->codeLen;
+            *ppBitStream = pBitStream + (Offset >> 3) ;
+            *pOffset     = Offset & 7;
+            
+            return Index;
+        }        
+        pCodeBook++;
+    }
+
+    /* No code match found */
+    return ARM_NO_CODEBOOK_INDEX;
+}
+
+#endif
+
+/***************************************
+ * Fixed bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackBits
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream     pointer to the pointer to the current byte 
+ *                      in the bit stream.
+ * [in] pOffset         pointer to the bit position in the byte 
+ *                      pointed by *ppBitStream. Valid within 0
+ *                      to 7.
+ * [in] codeWord        Code word that need to be inserted in to the
+ *                          bitstream
+ * [in] codeLength      Length of the code word valid range 1...32
+ *
+ * [out] ppBitStream    *ppBitStream is updated after the block is encoded,
+ *                          so that it points to the current byte in the bit
+ *                          stream buffer.
+ * [out] pBitOffset     *pBitOffset is updated so that it points to the
+ *                          current bit position in the byte pointed by
+ *                          *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+ 
+OMXResult armPackBits (
+    OMX_U8  **ppBitStream, 
+    OMX_INT *pOffset,
+    OMX_U32 codeWord, 
+    OMX_INT codeLength 
+)
+{
+    OMX_U8  *pBitStream = *ppBitStream;
+    OMX_INT Offset = *pOffset;
+    OMX_U32 Value;
+        
+    /* checking argument validity */
+    armRetArgErrIf(Offset < 0, OMX_Sts_BadArgErr);
+    armRetArgErrIf(Offset > 7, OMX_Sts_BadArgErr);
+    armRetArgErrIf(codeLength < 1, OMX_Sts_BadArgErr);
+    armRetArgErrIf(codeLength > 32, OMX_Sts_BadArgErr);
+
+    /* Prepare the first byte */
+    codeWord = codeWord << (32-codeLength);
+    Value = (pBitStream[0] >> (8-Offset)) << (8-Offset);
+    Value = Value | (codeWord >> (24+Offset));
+
+    /* Write out whole bytes */
+    while (8-Offset <= codeLength)
+    {
+        *pBitStream++ = (OMX_U8)Value;
+        codeWord   = codeWord  << (8-Offset);
+        codeLength = codeLength - (8-Offset);
+        Offset = 0;
+        Value = codeWord >> 24;
+    }
+
+    /* Write out final partial byte */
+    *pBitStream  = (OMX_U8)Value;
+    *ppBitStream = pBitStream;
+    *pOffset = Offset + codeLength;
+    
+    return  OMX_Sts_NoErr;
+}
+ 
+/***************************************
+ * Variable bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackVLC32
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte 
+ *                      in the bit stream.
+ * [in]	pBitOffset	    pointer to the bit position in the byte 
+ *                      pointed by *ppBitStream. Valid within 0
+ *                      to 7.
+ * [in]	 code     		VLC code word that need to be inserted in to the
+ *                      bitstream
+ *
+ * [out] ppBitStream	*ppBitStream is updated after the block is encoded,
+ *	                    so that it points to the current byte in the bit
+ *						stream buffer.
+ * [out] pBitOffset		*pBitOffset is updated so that it points to the
+ *						current bit position in the byte pointed by
+ *						*ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+ 
+OMXResult armPackVLC32 (
+    OMX_U8 **ppBitStream, 
+    OMX_INT *pBitOffset,
+    ARM_VLC32 code 
+)
+{
+    return (armPackBits(ppBitStream, pBitOffset, code.codeWord, code.codeLen));
+}
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/armVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/armVC.h
new file mode 100644
index 0000000..7fa7716
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/armVC.h
@@ -0,0 +1,1153 @@
+/**
+ * 
+ * File Name:  armVC.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File: armVideo.h
+ * Brief: Declares API's/Basic Data types used across the OpenMAX Video domain
+ *
+ */
+
+
+#ifndef _armVideo_H_
+#define _armVideo_H_
+
+#include "omxVC.h"
+#include "armCOMM_Bitstream.h"
+
+/**
+ * ARM specific state structure to hold Motion Estimation information.
+ */
+ 
+struct m4p2_MESpec
+{
+    OMXVCM4P2MEParams MEParams;
+    OMXVCM4P2MEMode   MEMode;
+};
+
+struct m4p10_MESpec
+{
+    OMXVCM4P10MEParams MEParams;
+    OMXVCM4P10MEMode   MEMode;
+};
+
+typedef struct m4p2_MESpec  ARMVCM4P2_MESpec;
+typedef struct m4p10_MESpec ARMVCM4P10_MESpec;
+
+/**
+ * Function: armVCM4P2_CompareMV
+ *
+ * Description:
+ * Performs comparision of motion vectors and SAD's to decide the
+ * best MV and SAD
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]     mvX     x coordinate of the candidate motion vector
+ * [in]     mvY     y coordinate of the candidate motion vector
+ * [in]     candSAD Candidate SAD
+ * [in]     bestMVX x coordinate of the best motion vector
+ * [in]     bestMVY y coordinate of the best motion vector
+ * [in]     bestSAD best SAD
+ *
+ * Return Value:
+ * OMX_INT -- 1 to indicate that the current sad is the best
+ *            0 to indicate that it is NOT the best SAD
+ */
+
+OMX_INT armVCM4P2_CompareMV (
+    OMX_S16 mvX,
+    OMX_S16 mvY,
+    OMX_INT candSAD,
+    OMX_S16 bestMVX,
+    OMX_S16 bestMVY,
+    OMX_INT bestSAD);
+
+/**
+ * Function: armVCM4P2_ACDCPredict
+ *
+ * Description:
+ * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+ * to the function call, prediction direction (predDir) should be selected
+ * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDst     pointer to the coefficient buffer which contains
+ *                          the quantized coefficient residuals (PQF) of the
+ *                          current block
+ * [in] pPredBufRow pointer to the coefficient row buffer
+ * [in] pPredBufCol pointer to the coefficient column buffer
+ * [in] curQP       quantization parameter of the current block. curQP
+ *                          may equal to predQP especially when the current
+ *                          block and the predictor block are in the same
+ *                          macroblock.
+ * [in] predQP      quantization parameter of the predictor block
+ * [in] predDir     indicates the prediction direction which takes one
+ *                          of the following values:
+ *                          OMX_VIDEO_HORIZONTAL    predict horizontally
+ *                          OMX_VIDEO_VERTICAL      predict vertically
+ * [in] ACPredFlag  a flag indicating if AC prediction should be
+ *                          performed. It is equal to ac_pred_flag in the bit
+ *                          stream syntax of MPEG-4
+ * [in] videoComp   video component type (luminance, chrominance or
+ *                          alpha) of the current block
+ * [in] flag        This flag defines the if one wants to use this functions to
+ *                  calculate PQF (set 1, prediction) or QF (set 0, reconstruction)
+ * [out]    pPreACPredict   pointer to the predicted coefficients buffer.
+ *                          Filled ONLY if it is not NULL
+ * [out]    pSrcDst     pointer to the coefficient buffer which contains
+ *                          the quantized coefficients (QF) of the current
+ *                          block
+ * [out]    pPredBufRow pointer to the updated coefficient row buffer
+ * [out]    pPredBufCol pointer to the updated coefficient column buffer
+ * [out]    pSumErr     pointer to the updated sum of the difference
+ *                      between predicted and unpredicted coefficients
+ *                      If this is NULL, do not update
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_ACDCPredict(
+     OMX_S16 * pSrcDst,
+     OMX_S16 * pPreACPredict,
+     OMX_S16 * pPredBufRow,
+     OMX_S16 * pPredBufCol,
+     OMX_INT curQP,
+     OMX_INT predQP,
+     OMX_INT predDir,
+     OMX_INT ACPredFlag,
+     OMXVCM4P2VideoComponent  videoComp,
+     OMX_U8 flag,
+     OMX_INT *pSumErr
+);
+
+/**
+ * Function: armVCM4P2_SetPredDir
+ *
+ * Description:
+ * Performs detecting the prediction direction
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] blockIndex  block index indicating the component type and
+ *                          position as defined in subclause 6.1.3.8, of ISO/IEC
+ *                          14496-2. Furthermore, indexes 6 to 9 indicate the
+ *                          alpha blocks spatially corresponding to luminance
+ *                          blocks 0 to 3 in the same macroblock.
+ * [in] pCoefBufRow pointer to the coefficient row buffer
+ * [in] pQpBuf      pointer to the quantization parameter buffer
+ * [out]    predQP      quantization parameter of the predictor block
+ * [out]    predDir     indicates the prediction direction which takes one
+ *                          of the following values:
+ *                          OMX_VIDEO_HORIZONTAL    predict horizontally
+ *                          OMX_VIDEO_VERTICAL      predict vertically
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_SetPredDir(
+     OMX_INT blockIndex,
+     OMX_S16 *pCoefBufRow,
+     OMX_S16 *pCoefBufCol,
+     OMX_INT *predDir,
+     OMX_INT *predQP,
+     const OMX_U8 *pQpBuf
+);
+
+/**
+ * Function: armVCM4P2_EncodeVLCZigzag_Intra
+ *
+ * Description:
+ * Performs zigzag scanning and VLC encoding for one intra block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream     pointer to the pointer to the current byte in
+ *                              the bit stream
+ * [in] pBitOffset      pointer to the bit position in the byte pointed
+ *                              by *ppBitStream. Valid within 0 to 7.
+ * [in] pQDctBlkCoef    pointer to the quantized DCT coefficient
+ * [in] predDir         AC prediction direction, which is used to decide
+ *                              the zigzag scan pattern. This takes one of the
+ *                              following values:
+ *                              OMX_VIDEO_NONE          AC prediction not used.
+ *                                                      Performs classical zigzag
+ *                                                      scan.
+ *                              OMX_VIDEO_HORIZONTAL    Horizontal prediction.
+ *                                                      Performs alternate-vertical
+ *                                                      zigzag scan.
+ *                              OMX_VIDEO_VERTICAL      Vertical prediction.
+ *                                                      Performs alternate-horizontal
+ *                                                      zigzag scan.
+ * [in] pattern         block pattern which is used to decide whether
+ *                              this block is encoded
+ * [in] start           start indicates whether the encoding begins with 0th element
+ *                      or 1st.
+ * [out]    ppBitStream     *ppBitStream is updated after the block is encoded,
+ *                              so that it points to the current byte in the bit
+ *                              stream buffer.
+ * [out]    pBitOffset      *pBitOffset is updated so that it points to the
+ *                              current bit position in the byte pointed by
+ *                              *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_EncodeVLCZigzag_Intra(
+     OMX_U8 **ppBitStream,
+     OMX_INT *pBitOffset,
+     const OMX_S16 *pQDctBlkCoef,
+     OMX_U8 predDir,
+     OMX_U8 pattern,
+     OMX_INT shortVideoHeader,
+     OMX_U8 start
+);
+
+/**
+ * Function: armVCM4P2_DecodeVLCZigzag_Intra
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream     pointer to the pointer to the current byte in
+ *                              the bitstream buffer
+ * [in] pBitOffset      pointer to the bit position in the byte pointed
+ *                              to by *ppBitStream. *pBitOffset is valid within
+ *                              [0-7].
+ * [in] predDir         AC prediction direction which is used to decide
+ *                              the zigzag scan pattern. It takes one of the
+ *                              following values:
+ *                              OMX_VIDEO_NONE  AC prediction not used;
+ *                                              perform classical zigzag scan;
+ *                              OMX_VIDEO_HORIZONTAL    Horizontal prediction;
+ *                                                      perform alternate-vertical
+ *                                                      zigzag scan;
+ *                              OMX_VIDEO_VERTICAL      Vertical prediction;
+ *                                                      thus perform
+ *                                                      alternate-horizontal
+ *                                                      zigzag scan.
+ * [in] videoComp       video component type (luminance, chrominance or
+ *                              alpha) of the current block
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ *                           and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start           start indicates whether the encoding begins with 0th element
+ *                      or 1st.
+ * [out]    ppBitStream     *ppBitStream is updated after the block is
+ *                              decoded, so that it points to the current byte
+ *                              in the bit stream buffer
+ * [out]    pBitOffset      *pBitOffset is updated so that it points to the
+ *                              current bit position in the byte pointed by
+ *                              *ppBitStream
+ * [out]    pDst            pointer to the coefficient buffer of current
+ *                              block. Should be 32-bit aligned
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_DecodeVLCZigzag_Intra(
+     const OMX_U8 ** ppBitStream,
+     OMX_INT * pBitOffset,
+     OMX_S16 * pDst,
+     OMX_U8 predDir,
+     OMX_INT shortVideoHeader, 
+     OMX_U8  start
+);
+
+/**
+ * Function: armVCM4P2_FillVLDBuffer
+ *
+ * Description:
+ * Performs filling of the coefficient buffer according to the run, level
+ * and sign, also updates the index
+ * 
+ * Parameters:
+ * [in]  storeRun        Stored Run value (count of zeros)   
+ * [in]  storeLevel      Stored Level value (non-zero value)
+ * [in]  sign            Flag indicating the sign of level
+ * [in]  last            status of the last flag
+ * [in]  pIndex          pointer to coefficient index in 8x8 matrix
+ * [out] pIndex          pointer to updated coefficient index in 8x8 
+ *                       matrix
+ * [in]  pZigzagTable    pointer to the zigzag tables
+ * [out] pDst            pointer to the coefficient buffer of current
+ *                       block. Should be 32-bit aligned
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_FillVLDBuffer(
+    OMX_U32 storeRun,
+    OMX_S16 * pDst,
+    OMX_S16 storeLevel,
+    OMX_U8  sign,
+    OMX_U8  last,
+    OMX_U8  * index,
+    const OMX_U8 * pZigzagTable
+);
+
+/**
+ * Function: armVCM4P2_GetVLCBits
+ *
+ * Description:
+ * Performs escape mode decision based on the run, run+, level, level+ and 
+ * last combinations.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte in
+ *								the bit stream
+ * [in]	pBitOffset		pointer to the bit position in the byte pointed
+ *								by *ppBitStream. Valid within 0 to 7
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ *                           and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start           start indicates whether the encoding begins with 
+ *                      0th element or 1st.
+ * [in/out] pLast       pointer to last status flag
+ * [in] runBeginSingleLevelEntriesL0      The run value from which level 
+ *                                        will be equal to 1: last == 0
+ * [in] IndexBeginSingleLevelEntriesL0    Array index in the VLC table 
+ *                                        pointing to the  
+ *                                        runBeginSingleLevelEntriesL0 
+ * [in] runBeginSingleLevelEntriesL1      The run value from which level 
+ *                                        will be equal to 1: last == 1
+ * [in] IndexBeginSingleLevelEntriesL1    Array index in the VLC table 
+ *                                        pointing to the  
+ *                                        runBeginSingleLevelEntriesL0 
+ * [in] pRunIndexTableL0    Run Index table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pVlcTableL0         VLC table for last == 0
+ * [in] pRunIndexTableL1    Run Index table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pVlcTableL1         VLC table for last == 1
+ * [in] pLMAXTableL0        Level MAX table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pLMAXTableL1        Level MAX table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pRMAXTableL0        Run MAX table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pRMAXTableL1        Run MAX table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [out]pDst			    pointer to the coefficient buffer of current
+ *							block. Should be 32-bit aligned
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_GetVLCBits (
+              const OMX_U8 **ppBitStream,
+              OMX_INT * pBitOffset,
+			  OMX_S16 * pDst,
+			  OMX_INT shortVideoHeader,
+			  OMX_U8    start,			  
+			  OMX_U8  * pLast,
+			  OMX_U8    runBeginSingleLevelEntriesL0,
+			  OMX_U8    maxIndexForMultipleEntriesL0,
+			  OMX_U8    maxRunForMultipleEntriesL1,
+			  OMX_U8    maxIndexForMultipleEntriesL1,
+              const OMX_U8  * pRunIndexTableL0,
+              const ARM_VLC32 *pVlcTableL0,
+			  const OMX_U8  * pRunIndexTableL1,
+              const ARM_VLC32 *pVlcTableL1,
+              const OMX_U8  * pLMAXTableL0,
+              const OMX_U8  * pLMAXTableL1,
+              const OMX_U8  * pRMAXTableL0,
+              const OMX_U8  * pRMAXTableL1,
+              const OMX_U8  * pZigzagTable
+);
+
+/**
+ * Function: armVCM4P2_PutVLCBits
+ *
+ * Description:
+ * Checks the type of Escape Mode and put encoded bits for 
+ * quantized DCT coefficients.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	 ppBitStream      pointer to the pointer to the current byte in
+ *						  the bit stream
+ * [in]	 pBitOffset       pointer to the bit position in the byte pointed
+ *                        by *ppBitStream. Valid within 0 to 7
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ *                           and escape mode 4 is used when shortVideoHeader==1.
+ * [in]  start            start indicates whether the encoding begins with 
+ *                        0th element or 1st.
+ * [in]  maxStoreRunL0    Max store possible (considering last and inter/intra)
+ *                        for last = 0
+ * [in]  maxStoreRunL1    Max store possible (considering last and inter/intra)
+ *                        for last = 1
+ * [in]  maxRunForMultipleEntriesL0 
+ *                        The run value after which level 
+ *                        will be equal to 1: 
+ *                        (considering last and inter/intra status) for last = 0
+ * [in]  maxRunForMultipleEntriesL1 
+ *                        The run value after which level 
+ *                        will be equal to 1: 
+ *                        (considering last and inter/intra status) for last = 1
+ * [in]  pRunIndexTableL0 Run Index table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in]  pVlcTableL0      VLC table for last == 0
+ * [in]  pRunIndexTableL1 Run Index table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in]  pVlcTableL1      VLC table for last == 1
+ * [in]  pLMAXTableL0     Level MAX table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in]  pLMAXTableL1     Level MAX table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in]  pRMAXTableL0     Run MAX table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in]  pRMAXTableL1     Run MAX table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [out] pQDctBlkCoef     pointer to the quantized DCT coefficient
+ * [out] ppBitStream      *ppBitStream is updated after the block is encoded
+ *                        so that it points to the current byte in the bit
+ *                        stream buffer.
+ * [out] pBitOffset       *pBitOffset is updated so that it points to the
+ *                        current bit position in the byte pointed by
+ *                        *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+
+OMXResult armVCM4P2_PutVLCBits (
+              OMX_U8 **ppBitStream,
+              OMX_INT * pBitOffset,
+              const OMX_S16 *pQDctBlkCoef,
+              OMX_INT shortVideoHeader,
+              OMX_U8 start,
+              OMX_U8 maxStoreRunL0,
+              OMX_U8 maxStoreRunL1,
+              OMX_U8  maxRunForMultipleEntriesL0,
+              OMX_U8  maxRunForMultipleEntriesL1,
+              const OMX_U8  * pRunIndexTableL0,
+              const ARM_VLC32 *pVlcTableL0,
+			  const OMX_U8  * pRunIndexTableL1,
+              const ARM_VLC32 *pVlcTableL1,
+              const OMX_U8  * pLMAXTableL0,
+              const OMX_U8  * pLMAXTableL1,
+              const OMX_U8  * pRMAXTableL0,
+              const OMX_U8  * pRMAXTableL1,
+              const OMX_U8  * pZigzagTable
+);
+/**
+ * Function: armVCM4P2_FillVLCBuffer
+ *
+ * Description:
+ * Performs calculating the VLC bits depending on the escape type and insert 
+ * the same in the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	 ppBitStream		pointer to the pointer to the current byte in
+ *	                        the bit stream
+ * [in]	 pBitOffset         pointer to the bit position in the byte pointed
+ *                          by *ppBitStream. Valid within 0 to 7
+ * [in]  run                Run value (count of zeros) to be encoded  
+ * [in]  level              Level value (non-zero value) to be encoded
+ * [in]  runPlus            Calculated as runPlus = run - (RMAX + 1)  
+ * [in]  levelPlus          Calculated as 
+ *                          levelPlus = sign(level)*[abs(level) - LMAX]
+ * [in]  fMode              Flag indicating the escape modes
+ * [in]  last               status of the last flag
+ * [in]  maxRunForMultipleEntries 
+ *                          The run value after which level will be equal to 1: 
+ *                          (considering last and inter/intra status)
+ * [in]  pRunIndexTable     Run Index table defined in
+ *                          armVCM4P2_Huff_tables_VLC.h
+ * [in]  pVlcTable          VLC table defined in armVCM4P2_Huff_tables_VLC.h
+ * [out] ppBitStream		*ppBitStream is updated after the block is encoded
+ *                          so that it points to the current byte in the bit
+ *                          stream buffer.
+ * [out] pBitOffset         *pBitOffset is updated so that it points to the
+ *                          current bit position in the byte pointed by
+ *                          *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_FillVLCBuffer (
+              OMX_U8 **ppBitStream,
+              OMX_INT * pBitOffset,
+              OMX_U32 run,
+              OMX_S16 level, 
+			  OMX_U32 runPlus,
+              OMX_S16 levelPlus, 
+              OMX_U8  fMode,
+			  OMX_U8  last,
+              OMX_U8  maxRunForMultipleEntries, 
+              const OMX_U8  *pRunIndexTable,
+              const ARM_VLC32 *pVlcTable
+);
+
+/**
+ * Function: armVCM4P2_CheckVLCEscapeMode
+ *
+ * Description:
+ * Performs escape mode decision based on the run, run+, level, level+ and 
+ * last combinations.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] run             Run value (count of zeros) to be encoded  
+ * [in] level           Level value (non-zero value) to be encoded
+ * [in] runPlus         Calculated as runPlus = run - (RMAX + 1)  
+ * [in] levelPlus       Calculated as 
+ *                      levelPlus = sign(level)*[abs(level) - LMAX]
+ * [in] maxStoreRun     Max store possible (considering last and inter/intra)
+ * [in] maxRunForMultipleEntries 
+ *                      The run value after which level 
+ *                      will be equal to 1: 
+ *                      (considering last and inter/intra status)
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ *                           and escape mode 4 is used when shortVideoHeader==1.
+ * [in] pRunIndexTable  Run Index table defined in 
+ *                      armVCM4P2_Huff_Tables_VLC.c
+ *                      (considering last and inter/intra status)
+ *
+ *                      
+ * Return Value:
+ * Returns an Escape mode which can take values from 0 to 3
+ * 0 --> no escape mode, 1 --> escape type 1,
+ * 1 --> escape type 2, 3 --> escape type 3, check section 7.4.1.3
+ * in the MPEG ISO standard.
+ *
+ */
+
+OMX_U8 armVCM4P2_CheckVLCEscapeMode(
+     OMX_U32 run,
+     OMX_U32 runPlus,
+     OMX_S16 level,
+     OMX_S16 levelPlus,
+     OMX_U8  maxStoreRun,
+     OMX_U8  maxRunForMultipleEntries,
+     OMX_INT shortVideoHeader,
+     const OMX_U8  *pRunIndexTable
+);
+
+
+/**
+ * Function: armVCM4P2_BlockMatch_Integer
+ *
+ * Description:
+ * Performs a 16x16 block search; estimates motion vector and associated minimum SAD.  
+ * Both the input and output motion vectors are represented using half-pixel units, and 
+ * therefore a shift left or right by 1 bit may be required, respectively, to match the 
+ * input or output MVs with other functions that either generate output MVs or expect 
+ * input MVs represented using integer pixel units. 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	pSrcRefBuf		pointer to the reference Y plane; points to the reference MB that 
+ *                    corresponds to the location of the current macroblock in the current 
+ *                    plane.
+ * [in]	refWidth		  width of the reference plane
+ * [in]	pRefRect		  pointer to the valid rectangular in reference plane. Relative to image origin. 
+ *                    It's not limited to the image boundary, but depended on the padding. For example, 
+ *                    if you pad 4 pixels outside the image border, then the value for left border 
+ *                    can be -4
+ * [in]	pSrcCurrBuf		pointer to the current macroblock extracted from original plane (linear array, 
+ *                    256 entries); must be aligned on an 8-byte boundary.
+ * [in] pCurrPointPos	position of the current macroblock in the current plane
+ * [in] pSrcPreMV		  pointer to predicted motion vector; NULL indicates no predicted MV
+ * [in] pSrcPreSAD		pointer to SAD associated with the predicted MV (referenced by pSrcPreMV)
+ * [in] searchRange		search range for 16X16 integer block,the units of it is full pixel,the search range 
+ *                    is the same in all directions.It is in inclusive of the boundary and specified in 
+ *                    terms of integer pixel units.
+ * [in] pMESpec			  vendor-specific motion estimation specification structure; must have been allocated 
+ *                    and then initialized using omxVCM4P2_MEInit prior to calling the block matching 
+ *                    function.
+ * [in] BlockSize     MacroBlock Size i.e either 16x16 or 8x8.
+ * [out]	pDstMV			pointer to estimated MV
+ * [out]	pDstSAD			pointer to minimum SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr ¨C no error.
+ * OMX_Sts_BadArgErr ¨C bad arguments
+ *
+ */
+
+OMXResult armVCM4P2_BlockMatch_Integer(
+     const OMX_U8 *pSrcRefBuf,
+     OMX_INT refWidth,
+     const OMXRect *pRefRect,
+     const OMX_U8 *pSrcCurrBuf,
+     const OMXVCM4P2Coordinate *pCurrPointPos,
+     const OMXVCMotionVector *pSrcPreMV,
+     const OMX_INT *pSrcPreSAD,
+     void *pMESpec,
+     OMXVCMotionVector *pDstMV,
+     OMX_INT *pDstSAD,
+     OMX_U8 BlockSize
+);
+
+/**
+ * Function: armVCM4P2_BlockMatch_Half
+ *
+ * Description:
+ * Performs a 16x16 block match with half-pixel resolution.  Returns the estimated 
+ * motion vector and associated minimum SAD.  This function estimates the half-pixel 
+ * motion vector by interpolating the integer resolution motion vector referenced 
+ * by the input parameter pSrcDstMV, i.e., the initial integer MV is generated 
+ * externally.  The input parameters pSrcRefBuf and pSearchPointRefPos should be 
+ * shifted by the winning MV of 16x16 integer search prior to calling BlockMatch_Half_16x16.  
+ * The function BlockMatch_Integer_16x16 may be used for integer motion estimation.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	pSrcRefBuf		pointer to the reference Y plane; points to the reference MB 
+ *                    that corresponds to the location of the current macroblock in 
+ *                    the	current plane.
+ * [in]	refWidth		  width of the reference plane
+ * [in]	pRefRect		  reference plane valid region rectangle
+ * [in]	pSrcCurrBuf		pointer to the current macroblock extracted from original plane 
+ *                    (linear array, 256 entries); must be aligned on an 8-byte boundary. 
+ * [in]	pSearchPointRefPos	position of the starting point for half pixel search (specified 
+ *                          in terms of integer pixel units) in the reference plane.
+ * [in]	rndVal			  rounding control bit for half pixel motion estimation; 
+ *                    0=rounding control disabled; 1=rounding control enabled
+ * [in]	pSrcDstMV		pointer to the initial MV estimate; typically generated during a prior 
+ *                  16X16 integer search and its unit is half pixel.
+ * [in] BlockSize     MacroBlock Size i.e either 16x16 or 8x8.
+ * [out]pSrcDstMV		pointer to estimated MV
+ * [out]pDstSAD			pointer to minimum SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr ¨C no error
+ * OMX_Sts_BadArgErr ¨C bad arguments
+ *
+ */
+
+OMXResult armVCM4P2_BlockMatch_Half(
+     const OMX_U8 *pSrcRefBuf,
+     OMX_INT refWidth,
+     const OMXRect *pRefRect,
+     const OMX_U8 *pSrcCurrBuf,
+     const OMXVCM4P2Coordinate *pSearchPointRefPos,
+     OMX_INT rndVal,
+     OMXVCMotionVector *pSrcDstMV,
+     OMX_INT *pDstSAD,
+     OMX_U8 BlockSize
+);
+/**
+ * Function: armVCM4P2_PadMV
+ *
+ * Description:
+ * Performs motion vector padding for a macroblock.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDstMV       pointer to motion vector buffer of the current
+ *                              macroblock
+ * [in] pTransp         pointer to transparent status buffer of the
+ *                              current macroblock
+ * [out]    pSrcDstMV       pointer to motion vector buffer in which the
+ *                              motion vectors have been padded
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_PadMV(
+     OMXVCMotionVector * pSrcDstMV,
+     OMX_U8 * pTransp
+);
+
+/* 
+ * H.264 Specific Declarations 
+ */
+/* Defines */
+#define ARM_M4P10_Q_OFFSET        (15)
+
+
+/* Dequant tables */
+
+extern const OMX_U8 armVCM4P10_PosToVCol4x4[16];
+extern const OMX_U8 armVCM4P10_PosToVCol2x2[4];
+extern const OMX_U8 armVCM4P10_VMatrix[6][3];
+extern const OMX_U32 armVCM4P10_MFMatrix[6][3];
+
+
+/*
+ * Description:
+ * This function perform the work required by the OpenMAX
+ * DecodeCoeffsToPair function and DecodeChromaDCCoeffsToPair.
+ * Since most of the code is common we share it here.
+ *
+ * Parameters:
+ * [in]	ppBitStream		Double pointer to current byte in bit stream buffer
+ * [in]	pOffset			Pointer to current bit position in the byte pointed
+ *								to by *ppBitStream
+ * [in]	sMaxNumCoeff	Maximum number of non-zero coefficients in current
+ *								block (4,15 or 16)
+ * [in]	nTable          Table number (0 to 4) according to the five columns
+ *                      of Table 9-5 in the H.264 spec
+ * [out]	ppBitStream		*ppBitStream is updated after each block is decoded
+ * [out]	pOffset			*pOffset is updated after each block is decoded
+ * [out]	pNumCoeff		Pointer to the number of nonzero coefficients in
+ *								this block
+ * [out]	ppPosCoefbuf	Double pointer to destination residual
+ *								coefficient-position pair buffer
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+
+ */
+
+OMXResult armVCM4P10_DecodeCoeffsToPair(
+     const OMX_U8** ppBitStream,
+     OMX_S32* pOffset,
+     OMX_U8* pNumCoeff,
+     OMX_U8**ppPosCoefbuf,
+     OMX_INT nTable,
+     OMX_INT sMaxNumCoeff        
+ );
+
+/*
+ * Description:
+ * Perform DC style intra prediction, averaging upper and left block
+ *
+ * Parameters:
+ * [in]	pSrcLeft		Pointer to the buffer of 16 left coefficients:
+ *								p[x, y] (x = -1, y = 0..3)
+ * [in]	pSrcAbove		Pointer to the buffer of 16 above coefficients:
+ *								p[x,y] (x = 0..3, y = -1)
+ * [in]	leftStep		Step of left coefficient buffer
+ * [in]	dstStep			Step of the destination buffer
+ * [in]	availability	Neighboring 16x16 MB availability flag
+ * [out]	pDst			Pointer to the destination buffer
+ *
+ * Return Value:
+ * None
+ */
+
+void armVCM4P10_PredictIntraDC4x4(
+     const OMX_U8* pSrcLeft,
+     const OMX_U8 *pSrcAbove,
+     OMX_U8* pDst,
+     OMX_INT leftStep,
+     OMX_INT dstStep,
+     OMX_S32 availability        
+);
+
+/*
+ * Description
+ * Unpack a 4x4 block of coefficient-residual pair values
+ *
+ * Parameters:
+ * [in]	ppSrc	Double pointer to residual coefficient-position pair
+ *						buffer output by CALVC decoding
+ * [out]	ppSrc	*ppSrc is updated to the start of next non empty block
+ * [out]	pDst	Pointer to unpacked 4x4 block
+ */
+
+void armVCM4P10_UnpackBlock4x4(
+     const OMX_U8 **ppSrc,
+     OMX_S16* pDst
+);
+
+/*
+ * Description
+ * Unpack a 2x2 block of coefficient-residual pair values
+ *
+ * Parameters:
+ * [in]	ppSrc	Double pointer to residual coefficient-position pair
+ *						buffer output by CALVC decoding
+ * [out]	ppSrc	*ppSrc is updated to the start of next non empty block
+ * [out]	pDst	Pointer to unpacked 4x4 block
+ */
+
+void armVCM4P10_UnpackBlock2x2(
+     const OMX_U8 **ppSrc,
+     OMX_S16* pDst
+);
+
+/*
+ * Description
+ * Deblock one boundary pixel
+ *
+ * Parameters:
+ * [in]	pQ0         Pointer to pixel q0
+ * [in] Step        Step between pixels q0 and q1
+ * [in] tC0         Edge threshold value
+ * [in] alpha       alpha threshold value
+ * [in] beta        beta threshold value
+ * [in] bS          deblocking strength
+ * [in] ChromaFlag  True for chroma blocks
+ * [out] pQ0        Deblocked pixels
+ * 
+ */
+
+void armVCM4P10_DeBlockPixel(
+    OMX_U8 *pQ0,    /* pointer to the pixel q0 */
+    int Step,       /* step between pixels q0 and q1 */
+    int tC0,        /* edge threshold value */
+    int alpha,      /* alpha */
+    int beta,       /* beta */
+    int bS,         /* deblocking strength */
+    int ChromaFlag
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfHor_Luma
+ *
+ * Description:
+ * This function performs interpolation for horizontal 1/2-pel positions
+ *
+ * Remarks:
+ *
+ *	[in]	pSrc			Pointer to top-left corner of block used to interpolate 
+ 													in the reconstructed frame plane
+ *	[in]	iSrcStep	Step of the source buffer.
+ *	[in]	iDstStep	Step of the destination(interpolation) buffer.
+ *	[in]	iWidth		Width of the current block
+ *	[in]	iHeight		Height of the current block
+ *	[out]	pDst	    Pointer to the interpolation buffer of the 1/2-pel 
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfHor_Luma(
+        const OMX_U8*		pSrc, 
+		OMX_U32 	iSrcStep, 
+		OMX_U8* 	pDst, 
+		OMX_U32 	iDstStep, 
+		OMX_U32 	iWidth, 
+		OMX_U32 	iHeight
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfVer_Luma
+ * 
+ * Description:
+ * This function performs interpolation for vertical 1/2-pel positions 
+ * around a full-pel position.
+ *
+ * Remarks:
+ *
+ *	[in]	pSrc			Pointer to top-left corner of block used to interpolate 
+ *												in the reconstructed frame plane
+ *	[in]	iSrcStep	Step of the source buffer.
+ *	[in]	iDstStep	Step of the destination(interpolation) buffer.
+ *	[in]	iWidth		Width of the current block
+ *	[in]	iHeight		Height of the current block
+ *	[out]	pDst    	Pointer to the interpolation buffer of the 1/2-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfVer_Luma(	
+	 const OMX_U8* 	pSrc, 
+	 OMX_U32 	iSrcStep, 
+ 	 OMX_U8* 	pDst,
+ 	 OMX_U32 	iDstStep, 
+ 	 OMX_U32 	iWidth, 
+ 	 OMX_U32 	iHeight
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfDiag_Luma
+ * 
+ * Description:
+ * This function performs interpolation for (1/2, 1/2)  positions 
+ * around a full-pel position.
+ *
+ * Remarks:
+ *
+ *  [in]    pSrc        Pointer to top-left corner of block used to interpolate 
+ *                      in the reconstructed frame plane
+ *  [in]    iSrcStep    Step of the source buffer.
+ *  [in]    iDstStep    Step of the destination(interpolation) buffer.
+ *  [in]    iWidth      Width of the current block
+ *  [in]    iHeight     Height of the current block
+ *  [out]   pDst        Pointer to the interpolation buffer of the (1/2,1/2)-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfDiag_Luma(  
+        const OMX_U8*     pSrc, 
+        OMX_U32     iSrcStep, 
+        OMX_U8*     pDst, 
+        OMX_U32     iDstStep,
+        OMX_U32     iWidth, 
+        OMX_U32     iHeight
+);
+
+/*
+ * Description:
+ * Transform Residual 4x4 Coefficients
+ *
+ * Parameters:
+ * [in]  pSrc		Source 4x4 block
+ * [out] pDst		Destination 4x4 block
+ *
+ */
+
+void armVCM4P10_TransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc);
+
+/*
+ * Description:
+ * Forward Transform Residual 4x4 Coefficients
+ *
+ * Parameters:
+ * [in]  pSrc		Source 4x4 block
+ * [out] pDst		Destination 4x4 block
+ *
+ */
+
+void armVCM4P10_FwdTransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc);
+
+OMX_INT armVCM4P10_CompareMotionCostToMV (
+    OMX_S16  mvX,
+    OMX_S16  mvY,
+    OMXVCMotionVector diffMV, 
+    OMX_INT candSAD, 
+    OMXVCMotionVector *bestMV, 
+    OMX_U32 nLamda,
+    OMX_S32 *pBestCost);
+
+/**
+ * Function: armVCCOMM_SAD
+ *
+ * Description:
+ * This function calculate the SAD for NxM blocks.
+ *
+ * Remarks:
+ *
+ * [in]		pSrcOrg		Pointer to the original block
+ * [in]		iStepOrg	Step of the original block buffer
+ * [in]		pSrcRef		Pointer to the reference block
+ * [in]		iStepRef	Step of the reference block buffer
+ * [in]		iHeight		Height of the block
+ * [in]		iWidth		Width of the block
+ * [out]	pDstSAD		Pointer of result SAD
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+OMXResult armVCCOMM_SAD(	
+	const OMX_U8* 	pSrcOrg,
+	OMX_U32 	iStepOrg,
+	const OMX_U8* 	pSrcRef,
+	OMX_U32 	iStepRef,
+	OMX_S32*	pDstSAD,
+	OMX_U32		iHeight,
+	OMX_U32		iWidth);
+
+/**
+ * Function: armVCCOMM_Average
+ *
+ * Description:
+ * This function calculates the average of two blocks and stores the result.
+ *
+ * Remarks:
+ *
+ *	[in]	pPred0			Pointer to the top-left corner of reference block 0
+ *	[in]	pPred1			Pointer to the top-left corner of reference block 1
+ *	[in]	iPredStep0	    Step of reference block 0
+ *	[in]	iPredStep1	    Step of reference block 1
+ *	[in]	iDstStep 		Step of the destination buffer
+ *	[in]	iWidth			Width of the blocks
+ *	[in]	iHeight			Height of the blocks
+ *	[out]	pDstPred		Pointer to the destination buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+ OMXResult armVCCOMM_Average (
+	 const OMX_U8* 	    pPred0,
+	 const OMX_U8* 	    pPred1,	
+	 OMX_U32		iPredStep0,
+	 OMX_U32		iPredStep1,
+	 OMX_U8*		pDstPred,
+	 OMX_U32		iDstStep, 
+	 OMX_U32		iWidth,
+	 OMX_U32		iHeight
+);
+
+/**
+ * Function: armVCM4P10_SADQuar
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the 
+ * average of the other two (pSrcRef0 and pSrcRef1)
+ *
+ * Remarks:
+ *
+ * [in]		pSrc				Pointer to the original block
+ * [in]		pSrcRef0		Pointer to reference block 0
+ * [in]		pSrcRef1		Pointer to reference block 1
+ * [in]		iSrcStep 		Step of the original block buffer
+ * [in]		iRefStep0		Step of reference block 0 
+ * [in]		iRefStep1 	Step of reference block 1 
+ * [in]		iHeight			Height of the block
+ * [in]		iWidth			Width of the block
+ * [out]	pDstSAD			Pointer of result SAD
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+OMXResult armVCM4P10_SADQuar(
+	const OMX_U8* 	pSrc,
+    const OMX_U8* 	pSrcRef0,
+	const OMX_U8* 	pSrcRef1,	
+    OMX_U32 	iSrcStep,
+    OMX_U32		iRefStep0,
+    OMX_U32		iRefStep1,
+    OMX_U32*	pDstSAD,
+    OMX_U32     iHeight,
+    OMX_U32     iWidth
+);
+
+/**
+ * Function: armVCM4P10_Interpolate_Chroma
+ *
+ * Description:
+ * This function performs interpolation for chroma components.
+ *
+ * Remarks:
+ *
+ *  [in]    pSrc            Pointer to top-left corner of block used to 
+ *                                              interpolate in the reconstructed frame plane
+ *  [in]    iSrcStep    Step of the source buffer.
+ *  [in]    iDstStep    Step of the destination(interpolation) buffer.
+ *  [in]    iWidth      Width of the current block
+ *  [in]    iHeight     Height of the current block
+ *  [in]    dx              Fractional part of horizontal motion vector 
+ *                                              component in 1/8 pixel unit (0~7) 
+ *  [in]    dy              Fractional part of vertical motion vector 
+ *                                              component in 1/8 pixel unit (0~7)
+ *  [out]   pDst            Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+ OMXResult armVCM4P10_Interpolate_Chroma(
+        OMX_U8      *pSrc,
+        OMX_U32     iSrcStep,
+        OMX_U8      *pDst,
+        OMX_U32     iDstStep,
+        OMX_U32     iWidth,
+        OMX_U32     iHeight,
+        OMX_U32     dx,
+        OMX_U32     dy
+);
+
+/**
+ * Function: armVCM4P10_Interpolate_Luma
+ *
+ * Description:
+ * This function performs interpolation for luma components.
+ *
+ * Remarks:
+ *
+ *  [in]    pSrc            Pointer to top-left corner of block used to 
+ *                                              interpolate in the reconstructed frame plane
+ *  [in]    iSrcStep    Step of the source buffer.
+ *  [in]    iDstStep    Step of the destination(interpolation) buffer.
+ *  [in]    iWidth      Width of the current block
+ *  [in]    iHeight     Height of the current block
+ *  [in]    dx              Fractional part of horizontal motion vector 
+ *                                              component in 1/4 pixel unit (0~3) 
+ *  [in]    dy              Fractional part of vertical motion vector 
+ *                                              component in 1/4 pixel unit (0~3) 
+ *  [out]   pDst            Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+ OMXResult armVCM4P10_Interpolate_Luma(
+     const OMX_U8     *pSrc,
+     OMX_U32    iSrcStep,
+     OMX_U8     *pDst,
+     OMX_U32    iDstStep,
+     OMX_U32    iWidth,
+     OMX_U32    iHeight,
+     OMX_U32    dx,
+     OMX_U32    dy
+);
+
+/**
+ * Function: omxVCH264_DequantTransformACFromPair_U8_S16_C1_DLx
+ *
+ * Description:
+ * Reconstruct the 4x4 residual block from coefficient-position pair buffer,
+ * perform dequantisation and integer inverse transformation for 4x4 block of
+ * residuals and update the pair buffer pointer to next non-empty block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppSrc		Double pointer to residual coefficient-position
+ *							pair buffer output by CALVC decoding
+ * [in]	pDC			Pointer to the DC coefficient of this block, NULL
+ *							if it doesn't exist
+ * [in]	QP			Quantization parameter
+ * [in] AC          Flag indicating if at least one non-zero coefficient exists
+ * [out]	pDst		pointer to the reconstructed 4x4 block data
+ *
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P10_DequantTransformACFromPair_U8_S16_C1_DLx(
+     OMX_U8 **ppSrc,
+     OMX_S16 *pDst,
+     OMX_INT QP,
+     OMX_S16* pDC,
+     int AC
+);
+
+#endif  /*_armVideo_H_*/
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/omxVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/omxVC.h
new file mode 100644
index 0000000..7b3cc72
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/omxVC.h
@@ -0,0 +1,4381 @@
+/**
+ * File: omxVC.h
+ * Brief: OpenMAX DL v1.0.2 - Video Coding library
+ *
+ * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved. 
+ *
+ * These materials are protected by copyright laws and contain material 
+ * proprietary to the Khronos Group, Inc.  You may use these materials 
+ * for implementing Khronos specifications, without altering or removing 
+ * any trademark, copyright or other notice from the specification.
+ * 
+ * Khronos Group makes no, and expressly disclaims any, representations 
+ * or warranties, express or implied, regarding these materials, including, 
+ * without limitation, any implied warranties of merchantability or fitness 
+ * for a particular purpose or non-infringement of any intellectual property. 
+ * Khronos Group makes no, and expressly disclaims any, warranties, express 
+ * or implied, regarding the correctness, accuracy, completeness, timeliness, 
+ * and reliability of these materials. 
+ *
+ * Under no circumstances will the Khronos Group, or any of its Promoters, 
+ * Contributors or Members or their respective partners, officers, directors, 
+ * employees, agents or representatives be liable for any damages, whether 
+ * direct, indirect, special or consequential damages for lost revenues, 
+ * lost profits, or otherwise, arising from or in connection with these 
+ * materials.
+ * 
+ * Khronos and OpenMAX are trademarks of the Khronos Group Inc. 
+ *
+ */
+
+/* *****************************************************************************************/
+
+#ifndef _OMXVC_H_
+#define _OMXVC_H_
+
+#include "omxtypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* 6.1.1.1 Motion Vectors  */
+/* In omxVC, motion vectors are represented as follows:  */
+
+typedef struct {
+    OMX_S16 dx;
+    OMX_S16 dy;
+} OMXVCMotionVector;
+
+
+
+/**
+ * Function:  omxVCCOMM_Average_8x   (6.1.3.1.1)
+ *
+ * Description:
+ * This function calculates the average of two 8x4, 8x8, or 8x16 blocks.  The 
+ * result is rounded according to (a+b+1)/2.  The block average function can 
+ * be used in conjunction with half-pixel interpolation to obtain quarter 
+ * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1. 
+ *
+ * Input Arguments:
+ *   
+ *   pPred0     - Pointer to the top-left corner of reference block 0 
+ *   pPred1     - Pointer to the top-left corner of reference block 1 
+ *   iPredStep0 - Step of reference block 0 
+ *   iPredStep1 - Step of reference block 1 
+ *   iDstStep   - Step of the destination buffer. 
+ *   iHeight    - Height of the blocks 
+ *
+ * Output Arguments:
+ *   
+ *   pDstPred - Pointer to the destination buffer. 8-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -   one or more of the following pointers is NULL: pPred0, pPred1, or 
+ *              pDstPred. 
+ *    -   pDstPred is not aligned on an 8-byte boundary. 
+ *    -   iPredStep0 <= 0 or iPredStep0 is not a multiple of 8. 
+ *    -   iPredStep1 <= 0 or iPredStep1 is not a multiple of 8. 
+ *    -   iDstStep   <= 0 or iDstStep is not a multiple of 8. 
+ *    -   iHeight is not 4, 8, or 16. 
+ *
+ */
+OMXResult omxVCCOMM_Average_8x (
+    const OMX_U8 *pPred0,
+    const OMX_U8 *pPred1,
+    OMX_U32 iPredStep0,
+    OMX_U32 iPredStep1,
+    OMX_U8 *pDstPred,
+    OMX_U32 iDstStep,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_Average_16x   (6.1.3.1.2)
+ *
+ * Description:
+ * This function calculates the average of two 16x16 or 16x8 blocks.  The 
+ * result is rounded according to (a+b+1)/2.  The block average function can 
+ * be used in conjunction with half-pixel interpolation to obtain quarter 
+ * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1. 
+ *
+ * Input Arguments:
+ *   
+ *   pPred0 - Pointer to the top-left corner of reference block 0 
+ *   pPred1 - Pointer to the top-left corner of reference block 1 
+ *   iPredStep0 - Step of reference block 0 
+ *   iPredStep1 - Step of reference block 1 
+ *   iDstStep - Step of the destination buffer 
+ *   iHeight - Height of the blocks 
+ *
+ * Output Arguments:
+ *   
+ *   pDstPred - Pointer to the destination buffer. 16-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -   one or more of the following pointers is NULL: pPred0, pPred1, or 
+ *              pDstPred. 
+ *    -   pDstPred is not aligned on a 16-byte boundary. 
+ *    -   iPredStep0 <= 0 or iPredStep0 is not a multiple of 16. 
+ *    -   iPredStep1 <= 0 or iPredStep1 is not a multiple of 16. 
+ *    -   iDstStep <= 0 or iDstStep is not a multiple of 16. 
+ *    -   iHeight is not 8 or 16. 
+ *
+ */
+OMXResult omxVCCOMM_Average_16x (
+    const OMX_U8 *pPred0,
+    const OMX_U8 *pPred1,
+    OMX_U32 iPredStep0,
+    OMX_U32 iPredStep1,
+    OMX_U8 *pDstPred,
+    OMX_U32 iDstStep,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_ExpandFrame_I   (6.1.3.2.1)
+ *
+ * Description:
+ * This function expands a reconstructed frame in-place.  The unexpanded 
+ * source frame should be stored in a plane buffer with sufficient space 
+ * pre-allocated for edge expansion, and the input frame should be located in 
+ * the plane buffer center.  This function executes the pixel expansion by 
+ * replicating source frame edge pixel intensities in the empty pixel 
+ * locations (expansion region) between the source frame edge and the plane 
+ * buffer edge.  The width/height of the expansion regions on the 
+ * horizontal/vertical edges is controlled by the parameter iExpandPels. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDstPlane - pointer to the top-left corner of the frame to be 
+ *            expanded; must be aligned on an 8-byte boundary. 
+ *   iFrameWidth - frame width; must be a multiple of 8. 
+ *   iFrameHeight -frame height; must be a multiple of 8. 
+ *   iExpandPels - number of pixels to be expanded in the horizontal and 
+ *            vertical directions; must be a multiple of 8. 
+ *   iPlaneStep - distance, in bytes, between the start of consecutive lines 
+ *            in the plane buffer; must be larger than or equal to 
+ *            (iFrameWidth + 2 * iExpandPels). 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstPlane -Pointer to the top-left corner of the frame (NOT the 
+ *            top-left corner of the plane); must be aligned on an 8-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -    pSrcDstPlane is NULL. 
+ *    -    pSrcDstPlane is not aligned on an 8-byte boundary. 
+ *    -    one of the following parameters is either equal to zero or is a 
+ *              non-multiple of 8: iFrameHeight, iFrameWidth, iPlaneStep, or 
+ *              iExpandPels. 
+ *    -    iPlaneStep < (iFrameWidth + 2 * iExpandPels). 
+ *
+ */
+OMXResult omxVCCOMM_ExpandFrame_I (
+    OMX_U8 *pSrcDstPlane,
+    OMX_U32 iFrameWidth,
+    OMX_U32 iFrameHeight,
+    OMX_U32 iExpandPels,
+    OMX_U32 iPlaneStep
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_Copy8x8   (6.1.3.3.1)
+ *
+ * Description:
+ * Copies the reference 8x8 block to the current block. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the reference block in the source frame; must be 
+ *            aligned on an 8-byte boundary. 
+ *   step - distance between the starts of consecutive lines in the reference 
+ *            frame, in bytes; must be a multiple of 8 and must be larger than 
+ *            or equal to 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination block; must be aligned on an 8-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -   one or more of the following pointers is NULL: pSrc, pDst 
+ *    -   one or more of the following pointers is not aligned on an 8-byte 
+ *              boundary: pSrc, pDst 
+ *    -    step <8 or step is not a multiple of 8. 
+ *
+ */
+OMXResult omxVCCOMM_Copy8x8 (
+    const OMX_U8 *pSrc,
+    OMX_U8 *pDst,
+    OMX_INT step
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_Copy16x16   (6.1.3.3.2)
+ *
+ * Description:
+ * Copies the reference 16x16 macroblock to the current macroblock. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the reference macroblock in the source frame; must be 
+ *            aligned on a 16-byte boundary. 
+ *   step - distance between the starts of consecutive lines in the reference 
+ *            frame, in bytes; must be a multiple of 16 and must be larger 
+ *            than or equal to 16. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination macroblock; must be aligned on a 
+ *            16-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -   one or more of the following pointers is NULL: pSrc, pDst 
+ *    -   one or more of the following pointers is not aligned on a 16-byte 
+ *              boundary: pSrc, pDst 
+ *    -    step <16 or step is not a multiple of 16. 
+ *
+ */
+OMXResult omxVCCOMM_Copy16x16 (
+    const OMX_U8 *pSrc,
+    OMX_U8 *pDst,
+    OMX_INT step
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_ComputeTextureErrorBlock_SAD   (6.1.4.1.1)
+ *
+ * Description:
+ * Computes texture error of the block; also returns SAD. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the source plane; must be aligned on an 8-byte 
+ *            boundary. 
+ *   srcStep - step of the source plane 
+ *   pSrcRef - pointer to the reference buffer, an 8x8 block; must be aligned 
+ *            on an 8-byte boundary. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination buffer, an 8x8 block; must be aligned 
+ *            on an 8-byte boundary. 
+ *   pDstSAD - pointer to the Sum of Absolute Differences (SAD) value 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments 
+ *    -    At least one of the following 
+ *         pointers is NULL: pSrc, pSrcRef, pDst and pDstSAD. 
+ *    -    pSrc is not 8-byte aligned. 
+ *    -    SrcStep <= 0 or srcStep is not a multiple of 8. 
+ *    -    pSrcRef is not 8-byte aligned. 
+ *    -    pDst is not 8-byte aligned. 
+ *
+ */
+OMXResult omxVCCOMM_ComputeTextureErrorBlock_SAD (
+    const OMX_U8 *pSrc,
+    OMX_INT srcStep,
+    const OMX_U8 *pSrcRef,
+    OMX_S16 *pDst,
+    OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_ComputeTextureErrorBlock   (6.1.4.1.2)
+ *
+ * Description:
+ * Computes the texture error of the block. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the source plane. This should be aligned on an 8-byte 
+ *            boundary. 
+ *   srcStep - step of the source plane 
+ *   pSrcRef - pointer to the reference buffer, an 8x8 block. This should be 
+ *            aligned on an 8-byte boundary. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination buffer, an 8x8 block. This should be 
+ *            aligned on an 8-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    At least one of the following pointers is NULL: 
+ *         pSrc, pSrcRef, pDst. 
+ *    -    pSrc is not 8-byte aligned. 
+ *    -    SrcStep <= 0 or srcStep is not a multiple of 8. 
+ *    -    pSrcRef is not 8-byte aligned. 
+ *    -    pDst is not 8-byte aligned 
+ *
+ */
+OMXResult omxVCCOMM_ComputeTextureErrorBlock (
+    const OMX_U8 *pSrc,
+    OMX_INT srcStep,
+    const OMX_U8 *pSrcRef,
+    OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_LimitMVToRect   (6.1.4.1.3)
+ *
+ * Description:
+ * Limits the motion vector associated with the current block/macroblock to 
+ * prevent the motion compensated block/macroblock from moving outside a 
+ * bounding rectangle as shown in Figure 6-1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcMV - pointer to the motion vector associated with the current block 
+ *            or macroblock 
+ *   pRectVOPRef - pointer to the bounding rectangle 
+ *   Xcoord, Ycoord  - coordinates of the current block or macroblock 
+ *   size - size of the current block or macroblock; must be equal to 8 or 
+ *            16. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstMV - pointer to the limited motion vector 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one or more of the 
+ *              following conditions is true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcMV, pDstMV, or pRectVOPRef. 
+ *    -    size is not equal to either 8 or 16. 
+ *    -    the width or height of the bounding rectangle is less than 
+ *         twice the block size.
+ */
+OMXResult omxVCCOMM_LimitMVToRect (
+    const OMXVCMotionVector *pSrcMV,
+    OMXVCMotionVector *pDstMV,
+    const OMXRect *pRectVOPRef,
+    OMX_INT Xcoord,
+    OMX_INT Ycoord,
+    OMX_INT size
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_SAD_16x   (6.1.4.1.4)
+ *
+ * Description:
+ * This function calculates the SAD for 16x16 and 16x8 blocks. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg - Pointer to the original block; must be aligned on a 16-byte 
+ *             boundary. 
+ *   iStepOrg - Step of the original block buffer 
+ *   pSrcRef  - Pointer to the reference block 
+ *   iStepRef - Step of the reference block buffer 
+ *   iHeight  - Height of the block 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD - Pointer of result SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one or more of the 
+ *              following conditions is true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcOrg, pDstSAD, or pSrcRef 
+ *    -    pSrcOrg is not 16-byte aligned. 
+ *    -    iStepOrg  <= 0 or iStepOrg is not a multiple of 16 
+ *    -    iStepRef <= 0 or iStepRef is not a multiple of 16 
+ *    -    iHeight is not 8 or 16 
+ *
+ */
+OMXResult omxVCCOMM_SAD_16x (
+    const OMX_U8 *pSrcOrg,
+    OMX_U32 iStepOrg,
+    const OMX_U8 *pSrcRef,
+    OMX_U32 iStepRef,
+    OMX_S32 *pDstSAD,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCCOMM_SAD_8x   (6.1.4.1.5)
+ *
+ * Description:
+ * This function calculates the SAD for 8x16, 8x8, 8x4 blocks. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg  - Pointer to the original block; must be aligned on a 8-byte 
+ *              boundary. 
+ *   iStepOrg - Step of the original block buffer 
+ *   pSrcRef  - Pointer to the reference block 
+ *   iStepRef - Step of the reference block buffer 
+ *   iHeight  - Height of the block 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD -Pointer of result SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one or more of the 
+ *              following conditions is true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcOrg, pDstSAD, or pSrcRef 
+ *    -    pSrcOrg is not 8-byte aligned. 
+ *    -    iStepOrg  <= 0 or iStepOrg is not a multiple of 8 
+ *    -    iStepRef <= 0 or iStepRef is not a multiple of 8 
+ *    -    iHeight is not 4, 8 or 16 
+ *
+ */
+OMXResult omxVCCOMM_SAD_8x (
+    const OMX_U8 *pSrcOrg,
+    OMX_U32 iStepOrg,
+    const OMX_U8 *pSrcRef,
+    OMX_U32 iStepRef,
+    OMX_S32*pDstSAD,
+    OMX_U32 iHeight
+);
+
+
+
+/* 6.2.1.1 Direction  */
+/* The direction enumerator is used with functions that perform AC/DC prediction and zig-zag scan.  */
+
+enum {
+    OMX_VC_NONE       = 0,
+    OMX_VC_HORIZONTAL = 1,
+    OMX_VC_VERTICAL   = 2 
+};
+
+
+
+/* 6.2.1.2 Bilinear Interpolation  */
+/* The bilinear interpolation enumerator is used with motion estimation, motion compensation, and reconstruction functions.  */
+
+enum {
+    OMX_VC_INTEGER_PIXEL = 0, /* case a */
+    OMX_VC_HALF_PIXEL_X  = 1, /* case b */
+    OMX_VC_HALF_PIXEL_Y  = 2, /* case c */
+    OMX_VC_HALF_PIXEL_XY = 3  /* case d */ 
+};
+
+
+
+/* 6.2.1.3 Neighboring Macroblock Availability  */
+/* Neighboring macroblock availability is indicated using the following flags:   */
+
+enum {
+    OMX_VC_UPPER = 1,        /** above macroblock is available */
+    OMX_VC_LEFT = 2,         /** left macroblock is available */
+    OMX_VC_CENTER = 4,
+    OMX_VC_RIGHT = 8,
+    OMX_VC_LOWER = 16,
+    OMX_VC_UPPER_LEFT = 32,  /** above-left macroblock is available */
+    OMX_VC_UPPER_RIGHT = 64, /** above-right macroblock is available */
+    OMX_VC_LOWER_LEFT = 128,
+    OMX_VC_LOWER_RIGHT = 256 
+};
+
+
+
+/* 6.2.1.4 Video Components  */
+/* A data type that enumerates video components is defined as follows:  */
+
+typedef enum {
+    OMX_VC_LUMINANCE,    /** Luminance component */
+    OMX_VC_CHROMINANCE   /** chrominance component */ 
+} OMXVCM4P2VideoComponent;
+
+
+
+/* 6.2.1.5 MacroblockTypes  */
+/* A data type that enumerates macroblock types is defined as follows:  */
+
+typedef enum {
+    OMX_VC_INTER     = 0, /** P picture or P-VOP */
+    OMX_VC_INTER_Q   = 1, /** P picture or P-VOP */
+    OMX_VC_INTER4V   = 2, /** P picture or P-VOP */
+    OMX_VC_INTRA     = 3, /** I and P picture, I- and P-VOP */
+    OMX_VC_INTRA_Q   = 4, /** I and P picture, I- and P-VOP */
+    OMX_VC_INTER4V_Q = 5  /** P picture or P-VOP (H.263)*/
+} OMXVCM4P2MacroblockType;
+
+
+
+/* 6.2.1.6 Coordinates  */
+/* Coordinates are represented as follows:  */
+
+typedef struct {
+    OMX_INT x;
+    OMX_INT y;
+} OMXVCM4P2Coordinate;
+
+
+
+/* 6.2.1.7 Motion Estimation Algorithms  */
+/* A data type that enumerates motion estimation search methods is defined as follows:  */
+
+typedef enum {
+    OMX_VC_M4P2_FAST_SEARCH = 0,  /** Fast motion search */
+    OMX_VC_M4P2_FULL_SEARCH = 1   /** Full motion search */ 
+} OMXVCM4P2MEMode;
+
+
+
+/* 6.2.1.8 Motion Estimation Parameters  */
+/* A data structure containing control parameters for 
+ * motion estimation functions is defined as follows:  
+ */
+
+typedef struct {
+    OMX_INT searchEnable8x8;     /** enables 8x8 search */
+    OMX_INT halfPelSearchEnable; /** enables half-pel resolution */
+    OMX_INT searchRange;         /** search range */
+    OMX_INT rndVal;              /** rounding control; 0-disabled, 1-enabled*/
+} OMXVCM4P2MEParams;
+
+
+
+/* 6.2.1.9 Macroblock Information   */
+/* A data structure containing macroblock parameters for 
+ * motion estimation functions is defined as follows:  
+ */
+
+typedef struct {
+    OMX_S32 sliceId;                 /* slice number */
+    OMXVCM4P2MacroblockType mbType;  /* MB type: OMX_VC_INTRA, OMX_VC_INTER, or OMX_VC_INTER4 */
+    OMX_S32 qp;                      /* quantization parameter*/
+    OMX_U32 cbpy;                    /* CBP Luma */
+    OMX_U32 cbpc;                    /* CBP Chroma */
+    OMXVCMotionVector pMV0[2][2];    /* motion vector, represented using 1/2-pel units, 
+                                      * pMV0[blocky][blockx] (blocky = 0~1, blockx =0~1) 
+                                      */
+    OMXVCMotionVector pMVPred[2][2]; /* motion vector prediction, represented using 1/2-pel units, 
+                                      * pMVPred[blocky][blockx] (blocky = 0~1, blockx = 0~1) 
+                                      */
+    OMX_U8 pPredDir[2][2];           /* AC prediction direction: 
+                                      *   OMX_VC_NONE, OMX_VC_VERTICAL, OMX_VC_HORIZONTAL 
+                                      */
+} OMXVCM4P2MBInfo, *OMXVCM4P2MBInfoPtr;
+
+
+
+/**
+ * Function:  omxVCM4P2_FindMVpred   (6.2.3.1.1)
+ *
+ * Description:
+ * Predicts a motion vector for the current block using the procedure 
+ * specified in [ISO14496-2], subclause 7.6.5.  The resulting predicted MV is 
+ * returned in pDstMVPred. If the parameter pDstMVPredME if is not NULL then 
+ * the set of three MV candidates used for prediction is also returned, 
+ * otherwise pDstMVPredMEis NULL upon return. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcMVCurMB - pointer to the MV buffer associated with the current Y 
+ *            macroblock; a value of NULL indicates unavailability. 
+ *   pSrcCandMV1 - pointer to the MV buffer containing the 4 MVs associated 
+ *            with the MB located to the left of the current MB; set to NULL 
+ *            if there is no MB to the left. 
+ *   pSrcCandMV2 - pointer to the MV buffer containing the 4 MVs associated 
+ *            with the MB located above the current MB; set to NULL if there 
+ *            is no MB located above the current MB. 
+ *   pSrcCandMV3 - pointer to the MV buffer containing the 4 MVs associated 
+ *            with the MB located to the right and above the current MB; set 
+ *            to NULL if there is no MB located to the above-right. 
+ *   iBlk - the index of block in the current macroblock 
+ *   pDstMVPredME - MV candidate return buffer;  if set to NULL then 
+ *            prediction candidate MVs are not returned and pDstMVPredME will 
+ *            be NULL upon function return; if pDstMVPredME is non-NULL then it 
+ *            must point to a buffer containing sufficient space for three 
+ *            return MVs. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstMVPred - pointer to the predicted motion vector 
+ *   pDstMVPredME - if non-NULL upon input then pDstMVPredME  points upon 
+ *            return to a buffer containing the three motion vector candidates 
+ *            used for prediction as specified in [ISO14496-2], subclause 
+ *            7.6.5, otherwise if NULL upon input then pDstMVPredME is NULL 
+ *            upon output. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -    the pointer pDstMVPred is NULL 
+ *    -    the parameter iBlk does not fall into the range 0 <= iBlk<=3 
+ *
+ */
+OMXResult omxVCM4P2_FindMVpred (
+    const OMXVCMotionVector *pSrcMVCurMB,
+    const OMXVCMotionVector *pSrcCandMV1,
+    const OMXVCMotionVector *pSrcCandMV2,
+    const OMXVCMotionVector *pSrcCandMV3,
+    OMXVCMotionVector *pDstMVPred,
+    OMXVCMotionVector *pDstMVPredME,
+    OMX_INT iBlk
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_IDCT8x8blk   (6.2.3.2.1)
+ *
+ * Description:
+ * Computes a 2D inverse DCT for a single 8x8 block, as defined in 
+ * [ISO14496-2]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the start of the linearly arranged IDCT input buffer; 
+ *            must be aligned on a 16-byte boundary.  According to 
+ *            [ISO14496-2], the input coefficient values should lie within the 
+ *            range [-2048, 2047]. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the start of the linearly arranged IDCT output buffer; 
+ *            must be aligned on a 16-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    pSrc or pDst is NULL. 
+ *    -    pSrc or pDst is not 16-byte aligned. 
+ *
+ */
+OMXResult omxVCM4P2_IDCT8x8blk (
+    const OMX_S16 *pSrc,
+    OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_MEGetBufSize   (6.2.4.1.1)
+ *
+ * Description:
+ * Computes the size, in bytes, of the vendor-specific specification 
+ * structure for the following motion estimation functions: 
+ * BlockMatch_Integer_8x8, BlockMatch_Integer_16x16, and MotionEstimationMB. 
+ *
+ * Input Arguments:
+ *   
+ *   MEmode - motion estimation mode; available modes are defined by the 
+ *            enumerated type OMXVCM4P2MEMode 
+ *   pMEParams - motion estimation parameters 
+ *
+ * Output Arguments:
+ *   
+ *   pSize - pointer to the number of bytes required for the specification 
+ *            structure 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - one or more of the following is true: 
+ *    -    an invalid value was specified for the parameter MEmode 
+ *    -    a negative or zero value was specified for the 
+ *         parameter pMEParams->searchRange 
+ *
+ */
+OMXResult omxVCM4P2_MEGetBufSize (
+    OMXVCM4P2MEMode MEmode,
+    const OMXVCM4P2MEParams *pMEParams,
+    OMX_U32 *pSize
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_MEInit   (6.2.4.1.2)
+ *
+ * Description:
+ * Initializes the vendor-specific specification structure required for the 
+ * following motion estimation functions:  BlockMatch_Integer_8x8, 
+ * BlockMatch_Integer_16x16, and MotionEstimationMB. Memory for the 
+ * specification structure *pMESpec must be allocated prior to calling the 
+ * function, and should be aligned on a 4-byte boundary.  Following 
+ * initialization by this function, the vendor-specific structure *pMESpec 
+ * should contain an implementation-specific representation of all motion 
+ * estimation parameters received via the structure pMEParams, for example  
+ * rndVal, searchRange, etc.  The number of bytes required for the 
+ * specification structure can be determined using the function 
+ * omxVCM4P2_MEGetBufSize. 
+ *
+ * Input Arguments:
+ *   
+ *   MEmode - motion estimation mode; available modes are defined by the 
+ *            enumerated type OMXVCM4P2MEMode 
+ *   pMEParams - motion estimation parameters 
+ *   pMESpec - pointer to the uninitialized ME specification structure 
+ *
+ * Output Arguments:
+ *   
+ *   pMESpec - pointer to the initialized ME specification structure 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - one or more of the following is true: 
+ *    -    an invalid value was specified for the parameter MEmode 
+ *    -    a negative or zero value was specified for the 
+ *         parameter pMEParams->searchRange 
+ *
+ */
+OMXResult omxVCM4P2_MEInit (
+    OMXVCM4P2MEMode MEmode,
+    const OMXVCM4P2MEParams*pMEParams,
+    void *pMESpec
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_BlockMatch_Integer_16x16   (6.2.4.2.1)
+ *
+ * Description:
+ * Performs a 16x16 block search; estimates motion vector and associated 
+ * minimum SAD. Both the input and output motion vectors are represented using 
+ * half-pixel units, and therefore a shift left or right by 1 bit may be 
+ * required, respectively, to match the input or output MVs with other 
+ * functions that either generate output MVs or expect input MVs represented 
+ * using integer pixel units. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            MB that corresponds to the location of the current macroblock in 
+ *            the current plane. 
+ *   refWidth - width of the reference plane 
+ *   pRefRect - pointer to the valid reference plane rectangle; coordinates 
+ *            are specified relative to the image origin.  Rectangle 
+ *            boundaries may extend beyond image boundaries if the image has 
+ *            been padded.  For example, if padding extends 4 pixels beyond 
+ *            frame border, then the value for the left border could be set to 
+ *            -4. 
+ *   pSrcCurrBuf - pointer to the current block in the current macroblock 
+ *            buffer extracted from the original plane (linear array, 256 
+ *            entries); must be aligned on a 16-byte boundary.  The number of 
+ *            bytes between lines (step) is 16. 
+ *   pCurrPointPos - position of the current macroblock in the current plane 
+ *   pSrcPreMV - pointer to predicted motion vector; NULL indicates no 
+ *            predicted MV 
+ *   pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced 
+ *            by pSrcPreMV); may be set to NULL if unavailable. 
+ *   pMESpec - vendor-specific motion estimation specification structure; 
+ *            must have been allocated and then initialized using 
+ *            omxVCM4P2_MEInit prior to calling the block matching function. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstMV - pointer to estimated MV 
+ *   pDstSAD - pointer to minimum SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one of the following 
+ *              conditions is true: 
+ *    -    at least one of the following pointers is NULL: pSrcRefBuf, 
+ *              pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or 
+ *              pMESpec, or 
+ *    -    pSrcCurrBuf is not 16-byte aligned 
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Integer_16x16 (
+    const OMX_U8 *pSrcRefBuf,
+    OMX_INT refWidth,
+    const OMXRect *pRefRect,
+    const OMX_U8 *pSrcCurrBuf,
+    const OMXVCM4P2Coordinate *pCurrPointPos,
+    const OMXVCMotionVector*pSrcPreMV,
+    const OMX_INT *pSrcPreSAD,
+    void *pMESpec,
+    OMXVCMotionVector*pDstMV,
+    OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_BlockMatch_Integer_8x8   (6.2.4.2.2)
+ *
+ * Description:
+ * Performs an 8x8 block search; estimates motion vector and associated 
+ * minimum SAD.  Both the input and output motion vectors are represented 
+ * using half-pixel units, and therefore a shift left or right by 1 bit may be 
+ * required, respectively, to match the input or output MVs with other 
+ * functions that either generate output MVs or expect input MVs represented 
+ * using integer pixel units. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            block that corresponds to the location of the current 8x8 block 
+ *            in the current plane. 
+ *   refWidth - width of the reference plane 
+ *   pRefRect - pointer to the valid reference plane rectangle; coordinates 
+ *            are specified relative to the image origin.  Rectangle 
+ *            boundaries may extend beyond image boundaries if the image has 
+ *            been padded. 
+ *   pSrcCurrBuf - pointer to the current block in the current macroblock 
+ *            buffer extracted from the original plane (linear array, 128 
+ *            entries); must be aligned on an 8-byte boundary.  The number of 
+ *            bytes between lines (step) is 16 bytes. 
+ *   pCurrPointPos - position of the current block in the current plane 
+ *   pSrcPreMV - pointer to predicted motion vector; NULL indicates no 
+ *            predicted MV 
+ *   pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced 
+ *            by pSrcPreMV); may be set to NULL if unavailable. 
+ *   pMESpec - vendor-specific motion estimation specification structure; 
+ *            must have been allocated and then initialized using 
+ *            omxVCM4P2_MEInit prior to calling the block matching function. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstMV - pointer to estimated MV 
+ *   pDstSAD - pointer to minimum SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one of the following 
+ *              conditions is true: 
+ *    -    at least one of the following pointers is NULL: pSrcRefBuf, 
+ *              pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or 
+ *              pMESpec, or 
+ *    -    pSrcCurrBuf is not 8-byte aligned 
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Integer_8x8 (
+    const OMX_U8 *pSrcRefBuf,
+    OMX_INT refWidth,
+    const OMXRect *pRefRect,
+    const OMX_U8 *pSrcCurrBuf,
+    const OMXVCM4P2Coordinate *pCurrPointPos,
+    const OMXVCMotionVector *pSrcPreMV,
+    const OMX_INT *pSrcPreSAD,
+    void *pMESpec,
+    OMXVCMotionVector *pDstMV,
+    OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_BlockMatch_Half_16x16   (6.2.4.2.3)
+ *
+ * Description:
+ * Performs a 16x16 block match with half-pixel resolution.  Returns the 
+ * estimated motion vector and associated minimum SAD.  This function 
+ * estimates the half-pixel motion vector by interpolating the integer 
+ * resolution motion vector referenced by the input parameter pSrcDstMV, i.e., 
+ * the initial integer MV is generated externally.  The input parameters 
+ * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of 
+ * 16x16 integer search prior to calling BlockMatch_Half_16x16. The function 
+ * BlockMatch_Integer_16x16 may be used for integer motion estimation. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            macroblock that corresponds to the location of the current 
+ *            macroblock in the current plane. 
+ *   refWidth - width of the reference plane 
+ *   pRefRect - reference plane valid region rectangle 
+ *   pSrcCurrBuf - pointer to the current block in the current macroblock 
+ *            buffer extracted from the original plane (linear array, 256 
+ *            entries); must be aligned on a 16-byte boundary.  The number of 
+ *            bytes between lines (step) is 16. 
+ *   pSearchPointRefPos - position of the starting point for half pixel 
+ *            search (specified in terms of integer pixel units) in the 
+ *            reference plane, i.e., the reference position pointed to by the 
+ *            predicted motion vector. 
+ *   rndVal - rounding control parameter: 0 - disabled; 1 - enabled. 
+ *   pSrcDstMV - pointer to the initial MV estimate; typically generated 
+ *            during a prior 16X16 integer search; specified in terms of 
+ *            half-pixel units. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstMV - pointer to estimated MV 
+ *   pDstSAD - pointer to minimum SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one of the following 
+ *              conditions is true: 
+ *    -    at least one of the following pointers is NULL: pSrcRefBuf, 
+ *         pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV.
+ *    -    pSrcCurrBuf is not 16-byte aligned, or 
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Half_16x16 (
+    const OMX_U8 *pSrcRefBuf,
+    OMX_INT refWidth,
+    const OMXRect *pRefRect,
+    const OMX_U8 *pSrcCurrBuf,
+    const OMXVCM4P2Coordinate *pSearchPointRefPos,
+    OMX_INT rndVal,
+    OMXVCMotionVector *pSrcDstMV,
+    OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_BlockMatch_Half_8x8   (6.2.4.2.4)
+ *
+ * Description:
+ * Performs an 8x8 block match with half-pixel resolution. Returns the 
+ * estimated motion vector and associated minimum SAD.  This function 
+ * estimates the half-pixel motion vector by interpolating the integer 
+ * resolution motion vector referenced by the input parameter pSrcDstMV, i.e., 
+ * the initial integer MV is generated externally.  The input parameters 
+ * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of 
+ * 8x8 integer search prior to calling BlockMatch_Half_8x8. The function 
+ * BlockMatch_Integer_8x8 may be used for integer motion estimation. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            block that corresponds to the location of the current 8x8 block 
+ *            in the current plane. 
+ *   refWidth - width of the reference plane 
+ *   pRefRect - reference plane valid region rectangle 
+ *   pSrcCurrBuf - pointer to the current block in the current macroblock 
+ *            buffer extracted from the original plane (linear array, 128 
+ *            entries); must be aligned on a 8-byte boundary.  The number of 
+ *            bytes between lines (step) is 16. 
+ *   pSearchPointRefPos - position of the starting point for half pixel 
+ *            search (specified in terms of integer pixel units) in the 
+ *            reference plane. 
+ *   rndVal - rounding control parameter: 0 - disabled; 1 - enabled. 
+ *   pSrcDstMV - pointer to the initial MV estimate; typically generated 
+ *            during a prior 8x8 integer search, specified in terms of 
+ *            half-pixel units. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstMV - pointer to estimated MV 
+ *   pDstSAD - pointer to minimum SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one of the following 
+ *              conditions is true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcRefBuf, pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV
+ *    -    pSrcCurrBuf is not 8-byte aligned 
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Half_8x8 (
+    const OMX_U8 *pSrcRefBuf,
+    OMX_INT refWidth,
+    const OMXRect *pRefRect,
+    const OMX_U8 *pSrcCurrBuf,
+    const OMXVCM4P2Coordinate *pSearchPointRefPos,
+    OMX_INT rndVal,
+    OMXVCMotionVector *pSrcDstMV,
+    OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_MotionEstimationMB   (6.2.4.3.1)
+ *
+ * Description:
+ * Performs motion search for a 16x16 macroblock.  Selects best motion search 
+ * strategy from among inter-1MV, inter-4MV, and intra modes.  Supports 
+ * integer and half pixel resolution. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcCurrBuf - pointer to the top-left corner of the current MB in the 
+ *            original picture plane; must be aligned on a 16-byte boundary.  
+ *            The function does not expect source data outside the region 
+ *            bounded by the MB to be available; for example it is not 
+ *            necessary for the caller to guarantee the availability of 
+ *            pSrcCurrBuf[-SrcCurrStep], i.e., the row of pixels above the MB 
+ *            to be processed. 
+ *   srcCurrStep - width of the original picture plane, in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            plane location corresponding to the location of the current 
+ *            macroblock in the current plane; must be aligned on a 16-byte 
+ *            boundary. 
+ *   srcRefStep - width of the reference picture plane, in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   pRefRect - reference plane valid region rectangle, specified relative to 
+ *            the image origin 
+ *   pCurrPointPos - position of the current macroblock in the current plane 
+ *   pMESpec - pointer to the vendor-specific motion estimation specification 
+ *            structure; must be allocated and then initialized using 
+ *            omxVCM4P2_MEInit prior to calling this function. 
+ *   pMBInfo - array, of dimension four, containing pointers to information 
+ *            associated with four nearby MBs: 
+ *            -   pMBInfo[0] - pointer to left MB information 
+ *            -   pMBInfo[1] - pointer to top MB information 
+ *            -   pMBInfo[2] - pointer to top-left MB information 
+ *            -   pMBInfo[3] - pointer to top-right MB information 
+ *            Any pointer in the array may be set equal to NULL if the 
+ *            corresponding MB doesn't exist.  For each MB, the following structure 
+ *            members are used:    
+ *            -   mbType - macroblock type, either OMX_VC_INTRA, OMX_VC_INTER, or 
+ *                OMX_VC_INTER4V 
+ *            -   pMV0[2][2] - estimated motion vectors; represented 
+ *                in 1/2 pixel units 
+ *            -   sliceID - number of the slice to which the MB belongs 
+ *   pSrcDstMBCurr - pointer to information structure for the current MB.  
+ *            The following entries should be set prior to calling the 
+ *            function: sliceID - the number of the slice the to which the 
+ *            current MB belongs.  The structure elements cbpy and cbpc are 
+ *            ignored. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstMBCurr - pointer to updated information structure for the current 
+ *            MB after MB-level motion estimation has been completed.  The 
+ *            following structure members are updated by the ME function:   
+ *              -  mbType - macroblock type: OMX_VC_INTRA, OMX_VC_INTER, or 
+ *                 OMX_VC_INTER4V. 
+ *              -  pMV0[2][2] - estimated motion vectors; represented in 
+ *                 terms of 1/2 pel units. 
+ *              -  pMVPred[2][2] - predicted motion vectors; represented 
+ *                 in terms of 1/2 pel units. 
+ *            The structure members cbpy and cbpc are not updated by the function. 
+ *   pDstSAD - pointer to the minimum SAD for INTER1V, or sum of minimum SADs 
+ *            for INTER4V 
+ *   pDstBlockSAD - pointer to an array of SAD values for each of the four 
+ *            8x8 luma blocks in the MB.  The block SADs are in scan order for 
+ *            each MB. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one or more of the 
+ *              following conditions is true: 
+ *    -    at least one of the following pointers is NULL: pSrcCurrBuf, 
+ *              pSrcRefBuf, pRefRect, pCurrPointPos, pMBInter, pMBIntra, 
+ *              pSrcDstMBCurr, or pDstSAD. 
+ *
+ */
+OMXResult omxVCM4P2_MotionEstimationMB (
+    const OMX_U8 *pSrcCurrBuf,
+    OMX_S32 srcCurrStep,
+    const OMX_U8 *pSrcRefBuf,
+    OMX_S32 srcRefStep,
+    const OMXRect*pRefRect,
+    const OMXVCM4P2Coordinate *pCurrPointPos,
+    void *pMESpec,
+    const OMXVCM4P2MBInfoPtr *pMBInfo,
+    OMXVCM4P2MBInfo *pSrcDstMBCurr,
+    OMX_U16 *pDstSAD,
+    OMX_U16 *pDstBlockSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DCT8x8blk   (6.2.4.4.1)
+ *
+ * Description:
+ * Computes a 2D forward DCT for a single 8x8 block, as defined in 
+ * [ISO14496-2]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the start of the linearly arranged input buffer; must 
+ *            be aligned on a 16-byte boundary.  Input values (pixel 
+ *            intensities) are valid in the range [-255,255]. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the start of the linearly arranged output buffer; must 
+ *            be aligned on a 16-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, returned if:
+ *    -    pSrc or pDst is NULL. 
+ *    -    pSrc or pDst is not 16-byte aligned. 
+ *
+ */
+OMXResult omxVCM4P2_DCT8x8blk (
+    const OMX_S16 *pSrc,
+    OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_QuantIntra_I   (6.2.4.4.2)
+ *
+ * Description:
+ * Performs quantization on intra block coefficients. This function supports 
+ * bits_per_pixel == 8. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input intra block coefficients; must be aligned 
+ *            on a 16-byte boundary. 
+ *   QP - quantization parameter (quantizer_scale). 
+ *   blockIndex - block index indicating the component type and position, 
+ *            valid in the range 0 to 5, as defined in [ISO14496-2], subclause 
+ *            6.1.3.8. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; shortVideoHeader==1 selects linear intra DC 
+ *            mode, and shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the output (quantized) interblock coefficients.  
+ *            When shortVideoHeader==1, AC coefficients are saturated on the 
+ *            interval [-127, 127], and DC coefficients are saturated on the 
+ *            interval [1, 254].  When shortVideoHeader==0, AC coefficients 
+ *            are saturated on the interval [-2047, 2047]. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    pSrcDst is NULL. 
+ *    -    blockIndex < 0 or blockIndex >= 10 
+ *    -    QP <= 0 or QP >= 32. 
+ *
+ */
+OMXResult omxVCM4P2_QuantIntra_I (
+    OMX_S16 *pSrcDst,
+    OMX_U8 QP,
+    OMX_INT blockIndex,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_QuantInter_I   (6.2.4.4.3)
+ *
+ * Description:
+ * Performs quantization on an inter coefficient block; supports 
+ * bits_per_pixel == 8. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input inter block coefficients; must be aligned 
+ *            on a 16-byte boundary. 
+ *   QP - quantization parameter (quantizer_scale) 
+ *   shortVideoHeader - binary flag indicating presence of short_video_header; 
+ *            shortVideoHeader==1 selects linear intra DC mode, and 
+ *            shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the output (quantized) interblock coefficients.  
+ *            When shortVideoHeader==1, AC coefficients are saturated on the 
+ *            interval [-127, 127], and DC coefficients are saturated on the 
+ *            interval [1, 254].  When shortVideoHeader==0, AC coefficients 
+ *            are saturated on the interval [-2047, 2047]. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    pSrcDst is NULL. 
+ *    -    QP <= 0 or QP >= 32. 
+ *
+ */
+OMXResult omxVCM4P2_QuantInter_I (
+    OMX_S16 *pSrcDst,
+    OMX_U8 QP,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_TransRecBlockCoef_intra   (6.2.4.4.4)
+ *
+ * Description:
+ * Quantizes the DCT coefficients, implements intra block AC/DC coefficient 
+ * prediction, and reconstructs the current intra block texture for prediction 
+ * on the next frame.  Quantized row and column coefficients are returned in 
+ * the updated coefficient buffers. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the pixels of current intra block; must be aligned on 
+ *            an 8-byte boundary. 
+ *   pPredBufRow - pointer to the coefficient row buffer containing 
+ *            ((num_mb_per_row * 2 + 1) * 8) elements of type OMX_S16. 
+ *            Coefficients are organized into blocks of eight as described 
+ *            below (Internal Prediction Coefficient Update Procedures).  The 
+ *            DC coefficient is first, and the remaining buffer locations 
+ *            contain the quantized AC coefficients. Each group of eight row 
+ *            buffer elements combined with one element eight elements ahead 
+ *            contains the coefficient predictors of the neighboring block 
+ *            that is spatially above or to the left of the block currently to 
+ *            be decoded. A negative-valued DC coefficient indicates that this 
+ *            neighboring block is not INTRA-coded or out of bounds, and 
+ *            therefore the AC and DC coefficients are invalid.  Pointer must 
+ *            be aligned on an 8-byte boundary. 
+ *   pPredBufCol - pointer to the prediction coefficient column buffer 
+ *            containing 16 elements of type OMX_S16. Coefficients are 
+ *            organized as described in section 6.2.2.5.  Pointer must be 
+ *            aligned on an 8-byte boundary. 
+ *   pSumErr - pointer to a flag indicating whether or not AC prediction is 
+ *            required; AC prediction is enabled if *pSumErr >=0, but the 
+ *            value is not used for coefficient prediction, i.e., the sum of 
+ *            absolute differences starts from 0 for each call to this 
+ *            function.  Otherwise AC prediction is disabled if *pSumErr < 0 . 
+ *   blockIndex - block index indicating the component type and position, as 
+ *            defined in [ISO14496-2], subclause 6.1.3.8. 
+ *   curQp - quantization parameter of the macroblock to which the current 
+ *            block belongs 
+ *   pQpBuf - pointer to a 2-element quantization parameter buffer; pQpBuf[0] 
+ *            contains the quantization parameter associated with the 8x8 
+ *            block left of the current block (QPa), and pQpBuf[1] contains 
+ *            the quantization parameter associated with the 8x8 block above 
+ *            the current block (QPc).  In the event that the corresponding 
+ *            block is outside of the VOP bound, the Qp value will not affect 
+ *            the intra prediction process, as described in [ISO14496-2], 
+ *            sub-clause 7.4.3.3,  Adaptive AC Coefficient Prediction.  
+ *   srcStep - width of the source buffer; must be a multiple of 8. 
+ *   dstStep - width of the reconstructed destination buffer; must be a 
+ *            multiple of 16. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; shortVideoHeader==1 selects linear intra DC 
+ *            mode, and shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the quantized DCT coefficient buffer; pDst[0] contains 
+ *            the predicted DC coefficient; the remaining entries contain the 
+ *            quantized AC coefficients (without prediction).  The pointer 
+ *            pDstmust be aligned on a 16-byte boundary. 
+ *   pRec - pointer to the reconstructed texture; must be aligned on an 
+ *            8-byte boundary. 
+ *   pPredBufRow - pointer to the updated coefficient row buffer 
+ *   pPredBufCol - pointer to the updated coefficient column buffer 
+ *   pPreACPredict - if prediction is enabled, the parameter points to the 
+ *            start of the buffer containing the coefficient differences for 
+ *            VLC encoding. The entry pPreACPredict[0]indicates prediction 
+ *            direction for the current block and takes one of the following 
+ *            values: OMX_VC_NONE (prediction disabled), OMX_VC_HORIZONTAL, or 
+ *            OMX_VC_VERTICAL.  The entries 
+ *            pPreACPredict[1]-pPreACPredict[7]contain predicted AC 
+ *            coefficients.  If prediction is disabled (*pSumErr<0) then the 
+ *            contents of this buffer are undefined upon return from the 
+ *            function 
+ *   pSumErr - pointer to the value of the accumulated AC coefficient errors, 
+ *            i.e., sum of the absolute differences between predicted and 
+ *            unpredicted AC coefficients 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - Bad arguments:
+ *    -    At least one of the following pointers is NULL: pSrc, pDst, pRec, 
+ *         pCoefBufRow, pCoefBufCol, pQpBuf, pPreACPredict, pSumErr. 
+ *    -    blockIndex < 0 or blockIndex >= 10; 
+ *    -    curQP <= 0 or curQP >= 32. 
+ *    -    srcStep, or dstStep <= 0 or not a multiple of 8. 
+ *    -    pDst is not 16-byte aligned: . 
+ *    -    At least one of the following pointers is not 8-byte aligned: 
+ *         pSrc, pRec.  
+ *
+ *  Note: The coefficient buffers must be updated in accordance with the 
+ *        update procedures defined in section in 6.2.2. 
+ *
+ */
+OMXResult omxVCM4P2_TransRecBlockCoef_intra (
+    const OMX_U8 *pSrc,
+    OMX_S16 *pDst,
+    OMX_U8 *pRec,
+    OMX_S16 *pPredBufRow,
+    OMX_S16 *pPredBufCol,
+    OMX_S16 *pPreACPredict,
+    OMX_INT *pSumErr,
+    OMX_INT blockIndex,
+    OMX_U8 curQp,
+    const OMX_U8 *pQpBuf,
+    OMX_INT srcStep,
+    OMX_INT dstStep,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_TransRecBlockCoef_inter   (6.2.4.4.5)
+ *
+ * Description:
+ * Implements DCT, and quantizes the DCT coefficients of the inter block 
+ * while reconstructing the texture residual. There is no boundary check for 
+ * the bit stream buffer. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc -pointer to the residuals to be encoded; must be aligned on an 
+ *            16-byte boundary. 
+ *   QP - quantization parameter. 
+ *   shortVideoHeader - binary flag indicating presence of short_video_header; 
+ *                      shortVideoHeader==1 selects linear intra DC mode, and 
+ *                      shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the quantized DCT coefficients buffer; must be aligned 
+ *            on a 16-byte boundary. 
+ *   pRec - pointer to the reconstructed texture residuals; must be aligned 
+ *            on a 16-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    At least one of the following pointers is either NULL or 
+ *         not 16-byte aligned: 
+ *            - pSrc 
+ *            - pDst
+ *            - pRec
+ *    -    QP <= 0 or QP >= 32. 
+ *
+ */
+OMXResult omxVCM4P2_TransRecBlockCoef_inter (
+    const OMX_S16 *pSrc,
+    OMX_S16 *pDst,
+    OMX_S16 *pRec,
+    OMX_U8 QP,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_EncodeVLCZigzag_IntraDCVLC   (6.2.4.5.2)
+ *
+ * Description:
+ * Performs zigzag scan and VLC encoding of AC and DC coefficients for one 
+ * intra block.  Two versions of the function (DCVLC and ACVLC) are provided 
+ * in order to support the two different methods of processing DC 
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, "Intra DC 
+ * Coefficient Decoding for the Case of Switched VLC Encoding".  
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - double pointer to the current byte in the bitstream 
+ *   pBitOffset - pointer to the bit position in the byte pointed by 
+ *            *ppBitStream. Valid within 0 to 7. 
+ *   pQDctBlkCoef - pointer to the quantized DCT coefficient 
+ *   predDir - AC prediction direction, which is used to decide the zigzag 
+ *            scan pattern; takes one of the following values: 
+ *            -  OMX_VC_NONE - AC prediction not used.  
+ *                             Performs classical zigzag scan. 
+ *            -  OMX_VC_HORIZONTAL - Horizontal prediction.  
+ *                             Performs alternate-vertical zigzag scan. 
+ *            -  OMX_VC_VERTICAL - Vertical prediction.  
+ *                             Performs alternate-horizontal zigzag scan. 
+ *   pattern - block pattern which is used to decide whether this block is 
+ *            encoded 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *   videoComp - video component type (luminance, chrominance) of the current 
+ *            block 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is encoded, so 
+ *            that it points to the current byte in the bit stream buffer. 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - Bad arguments:
+ *    -    At least one of the following pointers is NULL: ppBitStream, 
+ *              *ppBitStream, pBitOffset, pQDctBlkCoef. 
+ *    -   *pBitOffset < 0, or *pBitOffset >7. 
+ *    -    PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or 
+ *         OMX_VC_VERTICAL. 
+ *    -    VideoComp is not one component of enum OMXVCM4P2VideoComponent. 
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_IntraDCVLC (
+    OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    const OMX_S16 *pQDctBlkCoef,
+    OMX_U8 predDir,
+    OMX_U8 pattern,
+    OMX_INT shortVideoHeader,
+    OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_EncodeVLCZigzag_IntraACVLC   (6.2.4.5.2)
+ *
+ * Description:
+ * Performs zigzag scan and VLC encoding of AC and DC coefficients for one 
+ * intra block.  Two versions of the function (DCVLC and ACVLC) are provided 
+ * in order to support the two different methods of processing DC 
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4,  Intra DC 
+ * Coefficient Decoding for the Case of Switched VLC Encoding.  
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - double pointer to the current byte in the bitstream 
+ *   pBitOffset - pointer to the bit position in the byte pointed by 
+ *            *ppBitStream. Valid within 0 to 7. 
+ *   pQDctBlkCoef - pointer to the quantized DCT coefficient 
+ *   predDir - AC prediction direction, which is used to decide the zigzag 
+ *            scan pattern; takes one of the following values: 
+ *            -  OMX_VC_NONE - AC prediction not used.  
+ *                             Performs classical zigzag scan. 
+ *            -  OMX_VC_HORIZONTAL - Horizontal prediction.  
+ *                             Performs alternate-vertical zigzag scan. 
+ *            -  OMX_VC_VERTICAL - Vertical prediction.  
+ *                             Performs alternate-horizontal zigzag scan. 
+ *   pattern - block pattern which is used to decide whether this block is 
+ *            encoded 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is encoded, so 
+ *            that it points to the current byte in the bit stream buffer. 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - Bad arguments:
+ *    -    At least one of the following pointers is NULL: ppBitStream, 
+ *              *ppBitStream, pBitOffset, pQDctBlkCoef. 
+ *    -   *pBitOffset < 0, or *pBitOffset >7. 
+ *    -    PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or 
+ *         OMX_VC_VERTICAL. 
+ *    -    VideoComp is not one component of enum OMXVCM4P2VideoComponent. 
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_IntraACVLC (
+    OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    const OMX_S16 *pQDctBlkCoef,
+    OMX_U8 predDir,
+    OMX_U8 pattern,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_EncodeVLCZigzag_Inter   (6.2.4.5.3)
+ *
+ * Description:
+ * Performs classical zigzag scanning and VLC encoding for one inter block. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the bit 
+ *            stream 
+ *   pBitOffset - pointer to the bit position in the byte pointed by 
+ *            *ppBitStream. Valid within 0 to 7 
+ *   pQDctBlkCoef - pointer to the quantized DCT coefficient 
+ *   pattern - block pattern which is used to decide whether this block is 
+ *            encoded 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is encoded so that 
+ *            it points to the current byte in the bit stream buffer. 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - Bad arguments 
+ *    -    At least one of the pointers: is NULL: ppBitStream, *ppBitStream, 
+ *              pBitOffset, pQDctBlkCoef 
+ *    -   *pBitOffset < 0, or *pBitOffset >7. 
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_Inter (
+    OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    const OMX_S16 *pQDctBlkCoef,
+    OMX_U8 pattern,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_EncodeMV   (6.2.4.5.4)
+ *
+ * Description:
+ * Predicts a motion vector for the current macroblock, encodes the 
+ * difference, and writes the output to the stream buffer. The input MVs 
+ * pMVCurMB, pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB should lie 
+ * within the ranges associated with the input parameter fcodeForward, as 
+ * described in [ISO14496-2], subclause 7.6.3.  This function provides a 
+ * superset of the functionality associated with the function 
+ * omxVCM4P2_FindMVpred. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - double pointer to the current byte in the bitstream buffer 
+ *   pBitOffset - index of the first free (next available) bit in the stream 
+ *            buffer referenced by *ppBitStream, valid in the range 0 to 7. 
+ *   pMVCurMB - pointer to the current macroblock motion vector; a value of 
+ *            NULL indicates unavailability. 
+ *   pSrcMVLeftMB - pointer to the source left macroblock motion vector; a 
+ *            value of  NULLindicates unavailability. 
+ *   pSrcMVUpperMB - pointer to source upper macroblock motion vector; a 
+ *            value of NULL indicates unavailability. 
+ *   pSrcMVUpperRightMB - pointer to source upper right MB motion vector; a 
+ *            value of NULL indicates unavailability. 
+ *   fcodeForward - an integer with values from 1 to 7; used in encoding 
+ *            motion vectors related to search range, as described in 
+ *            [ISO14496-2], subclause 7.6.3. 
+ *   MBType - macro block type, valid in the range 0 to 5 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - updated pointer to the current byte in the bit stream 
+ *            buffer 
+ *   pBitOffset - updated index of the next available bit position in stream 
+ *            buffer referenced by *ppBitStream 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments 
+ *    -    At least one of the following pointers is NULL: ppBitStream, 
+ *              *ppBitStream, pBitOffset, pMVCurMB 
+ *    -    *pBitOffset < 0, or *pBitOffset >7. 
+ *    -    fcodeForward <= 0, or fcodeForward > 7, or MBType < 0. 
+ *
+ */
+OMXResult omxVCM4P2_EncodeMV (
+    OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    const OMXVCMotionVector *pMVCurMB,
+    const OMXVCMotionVector*pSrcMVLeftMB,
+    const OMXVCMotionVector *pSrcMVUpperMB,
+    const OMXVCMotionVector *pSrcMVUpperRightMB,
+    OMX_INT fcodeForward,
+    OMXVCM4P2MacroblockType MBType
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodePadMV_PVOP   (6.2.5.1.1)
+ *
+ * Description:
+ * Decodes and pads the four motion vectors associated with a non-intra P-VOP 
+ * macroblock.  For macroblocks of type OMX_VC_INTER4V, the output MV is 
+ * padded as specified in [ISO14496-2], subclause 7.6.1.6. Otherwise, for 
+ * macroblocks of types other than OMX_VC_INTER4V, the decoded MV is copied to 
+ * all four output MV buffer entries. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the bit 
+ *            stream buffer 
+ *   pBitOffset - pointer to the bit position in the byte pointed to by 
+ *            *ppBitStream. *pBitOffset is valid within [0-7]. 
+ *   pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB - pointers to the 
+ *            motion vector buffers of the macroblocks specially at the left, 
+ *            upper, and upper-right side of the current macroblock, 
+ *            respectively; a value of NULL indicates unavailability.  Note: 
+ *            Any neighborhood macroblock outside the current VOP or video 
+ *            packet or outside the current GOB (when short_video_header is 
+ *             1 ) for which gob_header_empty is  0  is treated as 
+ *            transparent, according to [ISO14496-2], subclause 7.6.5. 
+ *   fcodeForward - a code equal to vop_fcode_forward in MPEG-4 bit stream 
+ *            syntax 
+ *   MBType - the type of the current macroblock. If MBType is not equal to 
+ *            OMX_VC_INTER4V, the destination motion vector buffer is still 
+ *            filled with the same decoded vector. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded, so 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream 
+ *   pDstMVCurMB - pointer to the motion vector buffer for the current 
+ *            macroblock; contains four decoded motion vectors 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    At least one of the following pointers is NULL: 
+ *         ppBitStream, *ppBitStream, pBitOffset, pDstMVCurMB 
+ *    -    *pBitOffset exceeds [0,7]
+ *    -    fcodeForward exceeds (0,7]
+ *    -    MBType less than zero
+ *    -    motion vector buffer is not 4-byte aligned. 
+ *    OMX_Sts_Err - status error 
+ *
+ */
+OMXResult omxVCM4P2_DecodePadMV_PVOP (
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    OMXVCMotionVector *pSrcMVLeftMB,
+    OMXVCMotionVector*pSrcMVUpperMB,
+    OMXVCMotionVector *pSrcMVUpperRightMB,
+    OMXVCMotionVector*pDstMVCurMB,
+    OMX_INT fcodeForward,
+    OMXVCM4P2MacroblockType MBType
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodeVLCZigzag_IntraDCVLC   (6.2.5.2.2)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients 
+ * for one intra block.  Two versions of the function (DCVLC and ACVLC) are 
+ * provided in order to support the two different methods of processing DC 
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4,  Intra DC 
+ * Coefficient Decoding for the Case of Switched VLC Encoding.  
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the 
+ *            bitstream buffer 
+ *   pBitOffset - pointer to the bit position in the current byte referenced 
+ *            by *ppBitStream.  The parameter *pBitOffset is valid in the 
+ *            range [0-7]. 
+ *            Bit Position in one byte:  |Most      Least| 
+ *                    *pBitOffset        |0 1 2 3 4 5 6 7| 
+ *   predDir - AC prediction direction; used to select the zigzag scan 
+ *            pattern; takes one of the following values: 
+ *            -  OMX_VC_NONE - AC prediction not used; 
+ *                             performs classical zigzag scan. 
+ *            -  OMX_VC_HORIZONTAL - Horizontal prediction; 
+ *                             performs alternate-vertical zigzag scan; 
+ *            -  OMX_VC_VERTICAL - Vertical prediction; 
+ *                             performs alternate-horizontal zigzag scan. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *   videoComp - video component type (luminance or chrominance) of the 
+ *            current block 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded such 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated such that it points to the current 
+ *            bit position in the byte pointed by *ppBitStream 
+ *   pDst - pointer to the coefficient buffer of current block; must be 
+ *            4-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, if:
+ *    -    At least one of the following pointers is NULL: 
+ *         ppBitStream, *ppBitStream, pBitOffset, pDst
+ *    -    *pBitOffset exceeds [0,7]
+ *    -    preDir exceeds [0,2]
+ *    -    pDst is not 4-byte aligned 
+ *    OMX_Sts_Err - if:
+ *    -    In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 
+ *    -    At least one of mark bits equals zero 
+ *    -    Illegal stream encountered; code cannot be located in VLC table 
+ *    -    Forbidden code encountered in the VLC FLC table. 
+ *    -    The number of coefficients is greater than 64 
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_IntraDCVLC (
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    OMX_S16 *pDst,
+    OMX_U8 predDir,
+    OMX_INT shortVideoHeader,
+    OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodeVLCZigzag_IntraACVLC   (6.2.5.2.2)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients 
+ * for one intra block.  Two versions of the function (DCVLC and ACVLC) are 
+ * provided in order to support the two different methods of processing DC 
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4,  Intra DC 
+ * Coefficient Decoding for the Case of Switched VLC Encoding.  
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the 
+ *            bitstream buffer 
+ *   pBitOffset - pointer to the bit position in the current byte referenced 
+ *            by *ppBitStream.  The parameter *pBitOffset is valid in the 
+ *            range [0-7]. Bit Position in one byte:  |Most Least| *pBitOffset 
+ *            |0 1 2 3 4 5 6 7| 
+ *   predDir - AC prediction direction; used to select the zigzag scan 
+ *            pattern; takes one of the following values: OMX_VC_NONE - AC 
+ *            prediction not used; performs classical zigzag scan. 
+ *            OMX_VC_HORIZONTAL - Horizontal prediction; performs 
+ *            alternate-vertical zigzag scan; OMX_VC_VERTICAL - Vertical 
+ *            prediction; performs alternate-horizontal zigzag scan. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *   videoComp - video component type (luminance or chrominance) of the 
+ *            current block 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded such 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated such that it points to the current 
+ *            bit position in the byte pointed by *ppBitStream 
+ *   pDst - pointer to the coefficient buffer of current block; must be 
+ *            4-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments At least one of the following 
+ *              pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, 
+ *              or At least one of the following conditions is true: 
+ *              *pBitOffset exceeds [0,7], preDir exceeds [0,2], or pDst is 
+ *              not 4-byte aligned 
+ *    OMX_Sts_Err In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 At least one of 
+ *              mark bits equals zero Illegal stream encountered; code cannot 
+ *              be located in VLC table Forbidden code encountered in the VLC 
+ *              FLC table The number of coefficients is greater than 64 
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_IntraACVLC (
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    OMX_S16 *pDst,
+    OMX_U8 predDir,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodeVLCZigzag_Inter   (6.2.5.2.3)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan for one inter-coded block. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - double pointer to the current byte in the stream buffer 
+ *   pBitOffset - pointer to the next available bit in the current stream 
+ *            byte referenced by *ppBitStream. The parameter *pBitOffset is 
+ *            valid within the range [0-7]. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded such 
+ *            that it points to the current byte in the stream buffer 
+ *   pBitOffset - *pBitOffset is updated after decoding such that it points 
+ *            to the next available bit in the stream byte referenced by 
+ *            *ppBitStream 
+ *   pDst - pointer to the coefficient buffer of current block; must be 
+ *            4-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    At least one of the following pointers is NULL: 
+ *         ppBitStream, *ppBitStream, pBitOffset, pDst
+ *    -    pDst is not 4-byte aligned
+ *    -   *pBitOffset exceeds [0,7]
+ *    OMX_Sts_Err - status error, if:
+ *    -    At least one mark bit is equal to zero 
+ *    -    Encountered an illegal stream code that cannot be found in the VLC table 
+ *    -    Encountered an illegal code in the VLC FLC table 
+ *    -    The number of coefficients is greater than 64 
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_Inter (
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    OMX_S16 *pDst,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_QuantInvIntra_I   (6.2.5.3.2)
+ *
+ * Description:
+ * Performs the second inverse quantization mode on an intra/inter coded 
+ * block. Supports bits_per_pixel = 8. The output coefficients are clipped to 
+ * the range [-2048, 2047]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input (quantized) intra/inter block; must be 
+ *            aligned on a 16-byte boundary. 
+ *   QP - quantization parameter (quantizer_scale) 
+ *   videoComp - video component type of the current block. Takes one of the 
+ *            following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra 
+ *            version only). 
+ *   shortVideoHeader - binary flag indicating presence of short_video_header 
+ *            (intra version only). 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the output (dequantized) intra/inter block 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; one or more of the following is 
+ *              true: 
+ *    -    pSrcDst is NULL 
+ *    -    QP <= 0 or QP >=31 
+ *    -    videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE. 
+ *
+ */
+OMXResult omxVCM4P2_QuantInvIntra_I (
+    OMX_S16 *pSrcDst,
+    OMX_INT QP,
+    OMXVCM4P2VideoComponent videoComp,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_QuantInvInter_I   (6.2.5.3.2)
+ *
+ * Description:
+ * Performs the second inverse quantization mode on an intra/inter coded 
+ * block. Supports bits_per_pixel = 8. The output coefficients are clipped to 
+ * the range [-2048, 2047]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input (quantized) intra/inter block; must be 
+ *            aligned on a 16-byte boundary. 
+ *   QP - quantization parameter (quantizer_scale) 
+ *   videoComp - video component type of the current block. Takes one of the 
+ *            following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra 
+ *            version only). 
+ *   shortVideoHeader - binary flag indicating presence of short_video_header 
+ *            (intra version only). 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the output (dequantized) intra/inter block 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; one or more of the following is 
+ *              true: 
+ *    -    pSrcDst is NULL 
+ *    -    QP <= 0 or QP >=31 
+ *    -    videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE. 
+ *
+ */
+OMXResult omxVCM4P2_QuantInvInter_I (
+    OMX_S16 *pSrcDst,
+    OMX_INT QP
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodeBlockCoef_Intra   (6.2.5.4.1)
+ *
+ * Description:
+ * Decodes the INTRA block coefficients. Inverse quantization, inversely 
+ * zigzag positioning, and IDCT, with appropriate clipping on each step, are 
+ * performed on the coefficients. The results are then placed in the output 
+ * frame/plane on a pixel basis.  Note: This function will be used only when 
+ * at least one non-zero AC coefficient of current block exists in the bit 
+ * stream. The DC only condition will be handled in another function. 
+ *
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the bit 
+ *            stream buffer. There is no boundary check for the bit stream 
+ *            buffer. 
+ *   pBitOffset - pointer to the bit position in the byte pointed to by 
+ *            *ppBitStream. *pBitOffset is valid within [0-7]. 
+ *   step - width of the destination plane 
+ *   pCoefBufRow - pointer to the coefficient row buffer; must be aligned on 
+ *            an 8-byte boundary. 
+ *   pCoefBufCol - pointer to the coefficient column buffer; must be aligned 
+ *            on an 8-byte boundary. 
+ *   curQP - quantization parameter of the macroblock which the current block 
+ *            belongs to 
+ *   pQPBuf - pointer to the quantization parameter buffer 
+ *   blockIndex - block index indicating the component type and position as 
+ *            defined in [ISO14496-2], subclause 6.1.3.8, Figure 6-5. 
+ *   intraDCVLC - a code determined by intra_dc_vlc_thr and QP. This allows a 
+ *            mechanism to switch between two VLC for coding of Intra DC 
+ *            coefficients as per [ISO14496-2], Table 6-21. 
+ *   ACPredFlag - a flag equal to ac_pred_flag (of luminance) indicating if 
+ *            the ac coefficients of the first row or first column are 
+ *            differentially coded for intra coded macroblock. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; shortVideoHeader==1 selects linear intra DC 
+ *            mode, and shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded, so 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream 
+ *   pDst - pointer to the block in the destination plane; must be aligned on 
+ *            an 8-byte boundary. 
+ *   pCoefBufRow - pointer to the updated coefficient row buffer. 
+ *   pCoefBufCol - pointer to the updated coefficient column buffer  Note: 
+ *            The coefficient buffers must be updated in accordance with the 
+ *            update procedure defined in section 6.2.2. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, if:
+ *    -    At least one of the following pointers is NULL: 
+ *         ppBitStream, *ppBitStream, pBitOffset, pCoefBufRow, pCoefBufCol, 
+ *         pQPBuf, pDst. 
+ *    -    *pBitOffset exceeds [0,7] 
+ *    -    curQP exceeds (1, 31)
+ *    -    blockIndex exceeds [0,5]
+ *    -    step is not the multiple of 8
+ *    -    a pointer alignment requirement was violated. 
+ *    OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Intra.  
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Intra (
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    OMX_U8 *pDst,
+    OMX_INT step,
+    OMX_S16 *pCoefBufRow,
+    OMX_S16 *pCoefBufCol,
+    OMX_U8 curQP,
+    const OMX_U8 *pQPBuf,
+    OMX_INT blockIndex,
+    OMX_INT intraDCVLC,
+    OMX_INT ACPredFlag,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodeBlockCoef_Inter   (6.2.5.4.2)
+ *
+ * Description:
+ * Decodes the INTER block coefficients. This function performs inverse 
+ * quantization, inverse zigzag positioning, and IDCT (with appropriate 
+ * clipping on each step) on the coefficients. The results (residuals) are 
+ * placed in a contiguous array of 64 elements. For INTER block, the output 
+ * buffer holds the residuals for further reconstruction. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the bit 
+ *            stream buffer. There is no boundary check for the bit stream 
+ *            buffer. 
+ *   pBitOffset - pointer to the bit position in the byte pointed to by 
+ *            *ppBitStream. *pBitOffset is valid within [0-7] 
+ *   QP - quantization parameter 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; shortVideoHeader==1 selects linear intra DC 
+ *            mode, and shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded, so 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream 
+ *   pDst - pointer to the decoded residual buffer (a contiguous array of 64 
+ *            elements of OMX_S16 data type); must be aligned on a 16-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, if:
+ *    -    At least one of the following pointers is Null: 
+ *         ppBitStream, *ppBitStream, pBitOffset , pDst 
+ *    -    *pBitOffset exceeds [0,7]
+ *    -    QP <= 0. 
+ *    -    pDst is not 16-byte aligned 
+ *    OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Inter . 
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Inter (
+    const OMX_U8 **ppBitStream,
+    OMX_INT *pBitOffset,
+    OMX_S16 *pDst,
+    OMX_INT QP,
+    OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_PredictReconCoefIntra   (6.2.5.4.3)
+ *
+ * Description:
+ * Performs adaptive DC/AC coefficient prediction for an intra block.  Prior 
+ * to the function call, prediction direction (predDir) should be selected as 
+ * specified in [ISO14496-2], subclause 7.4.3.1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the coefficient buffer which contains the quantized 
+ *            coefficient residuals (PQF) of the current block; must be 
+ *            aligned on a 4-byte boundary.  The output coefficients are 
+ *            saturated to the range [-2048, 2047]. 
+ *   pPredBufRow - pointer to the coefficient row buffer; must be aligned on 
+ *            a 4-byte boundary. 
+ *   pPredBufCol - pointer to the coefficient column buffer; must be aligned 
+ *            on a 4-byte boundary. 
+ *   curQP - quantization parameter of the current block. curQP may equal to 
+ *            predQP especially when the current block and the predictor block 
+ *            are in the same macroblock. 
+ *   predQP - quantization parameter of the predictor block 
+ *   predDir - indicates the prediction direction which takes one of the 
+ *            following values: OMX_VC_HORIZONTAL - predict horizontally 
+ *            OMX_VC_VERTICAL - predict vertically 
+ *   ACPredFlag - a flag indicating if AC prediction should be performed. It 
+ *            is equal to ac_pred_flag in the bit stream syntax of MPEG-4 
+ *   videoComp - video component type (luminance or chrominance) of the 
+ *            current block 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the coefficient buffer which contains the quantized 
+ *            coefficients (QF) of the current block 
+ *   pPredBufRow - pointer to the updated coefficient row buffer 
+ *   pPredBufCol - pointer to the updated coefficient column buffer  Note: 
+ *            Buffer update: Update the AC prediction buffer (both row and 
+ *            column buffer). 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, if:
+ *        -    At least one of the pointers is NULL: 
+ *              pSrcDst, pPredBufRow, or pPredBufCol. 
+ *        -    curQP <= 0, 
+ *        -    predQP <= 0, 
+ *        -    curQP >31, 
+ *        -    predQP > 31, 
+ *        -    preDir exceeds [1,2]
+ *        -    pSrcDst, pPredBufRow, or pPredBufCol is not 4-byte aligned. 
+ *
+ */
+OMXResult omxVCM4P2_PredictReconCoefIntra (
+    OMX_S16 *pSrcDst,
+    OMX_S16 *pPredBufRow,
+    OMX_S16 *pPredBufCol,
+    OMX_INT curQP,
+    OMX_INT predQP,
+    OMX_INT predDir,
+    OMX_INT ACPredFlag,
+    OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function:  omxVCM4P2_MCReconBlock   (6.2.5.5.1)
+ *
+ * Description:
+ * Performs motion compensation prediction for an 8x8 block using 
+ * interpolation described in [ISO14496-2], subclause 7.6.2. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the block in the reference plane. 
+ *   srcStep - distance between the start of consecutive lines in the 
+ *            reference plane, in bytes; must be a multiple of 8. 
+ *   dstStep - distance between the start of consecutive lines in the 
+ *            destination plane, in bytes; must be a multiple of 8. 
+ *   pSrcResidue - pointer to a buffer containing the 16-bit prediction 
+ *            residuals; must be 16-byte aligned. If the pointer is NULL, then 
+ *            no prediction is done, only motion compensation, i.e., the block 
+ *            is moved with interpolation. 
+ *   predictType - bilinear interpolation type, as defined in section 
+ *            6.2.1.2. 
+ *   rndVal - rounding control parameter: 0 - disabled; 1 - enabled. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination buffer; must be 8-byte aligned.  If 
+ *            prediction residuals are added then output intensities are 
+ *            clipped to the range [0,255]. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -    pDst is not 8-byte aligned. 
+ *    -    pSrcResidue is not 16-byte aligned. 
+ *    -    one or more of the following pointers is NULL: pSrc or pDst. 
+ *    -    either srcStep or dstStep is not a multiple of 8. 
+ *    -    invalid type specified for the parameter predictType. 
+ *    -    the parameter rndVal is not equal either to 0 or 1. 
+ *
+ */
+OMXResult omxVCM4P2_MCReconBlock (
+    const OMX_U8 *pSrc,
+    OMX_INT srcStep,
+    const OMX_S16 *pSrcResidue,
+    OMX_U8 *pDst,
+    OMX_INT dstStep,
+    OMX_INT predictType,
+    OMX_INT rndVal
+);
+
+
+
+/* 6.3.1.1 Intra 16x16 Prediction Modes  */
+/* A data type that enumerates intra_16x16 macroblock prediction modes is defined as follows:  */
+
+typedef enum {
+    OMX_VC_16X16_VERT = 0,  /** Intra_16x16_Vertical */
+    OMX_VC_16X16_HOR = 1,   /** Intra_16x16_Horizontal */
+    OMX_VC_16X16_DC = 2,    /** Intra_16x16_DC */
+    OMX_VC_16X16_PLANE = 3  /** Intra_16x16_Plane */ 
+} OMXVCM4P10Intra16x16PredMode;
+
+
+
+/* 6.3.1.2 Intra 4x4 Prediction Modes  */
+/* A data type that enumerates intra_4x4 macroblock prediction modes is defined as follows:  */
+
+typedef enum {
+    OMX_VC_4X4_VERT = 0,     /** Intra_4x4_Vertical */
+    OMX_VC_4X4_HOR = 1,      /** Intra_4x4_Horizontal */
+    OMX_VC_4X4_DC = 2,       /** Intra_4x4_DC */
+    OMX_VC_4X4_DIAG_DL = 3,  /** Intra_4x4_Diagonal_Down_Left */
+    OMX_VC_4X4_DIAG_DR = 4,  /** Intra_4x4_Diagonal_Down_Right */
+    OMX_VC_4X4_VR = 5,       /** Intra_4x4_Vertical_Right */
+    OMX_VC_4X4_HD = 6,       /** Intra_4x4_Horizontal_Down */
+    OMX_VC_4X4_VL = 7,       /** Intra_4x4_Vertical_Left */
+    OMX_VC_4X4_HU = 8        /** Intra_4x4_Horizontal_Up */ 
+} OMXVCM4P10Intra4x4PredMode;
+
+
+
+/* 6.3.1.3 Chroma Prediction Modes  */
+/* A data type that enumerates intra chroma prediction modes is defined as follows:  */
+
+typedef enum {
+    OMX_VC_CHROMA_DC = 0,    /** Intra_Chroma_DC */
+    OMX_VC_CHROMA_HOR = 1,   /** Intra_Chroma_Horizontal */
+    OMX_VC_CHROMA_VERT = 2,  /** Intra_Chroma_Vertical */
+    OMX_VC_CHROMA_PLANE = 3  /** Intra_Chroma_Plane */ 
+} OMXVCM4P10IntraChromaPredMode;
+
+
+
+/* 6.3.1.4 Motion Estimation Modes  */
+/* A data type that enumerates H.264 motion estimation modes is defined as follows:  */
+
+typedef enum {
+    OMX_VC_M4P10_FAST_SEARCH = 0, /** Fast motion search */
+    OMX_VC_M4P10_FULL_SEARCH = 1  /** Full motion search */ 
+} OMXVCM4P10MEMode;
+
+
+
+/* 6.3.1.5 Macroblock Types  */
+/* A data type that enumerates H.264 macroblock types is defined as follows:  */
+
+typedef enum {
+    OMX_VC_P_16x16  = 0, /* defined by [ISO14496-10] */
+    OMX_VC_P_16x8  = 1,
+    OMX_VC_P_8x16  = 2,
+    OMX_VC_P_8x8  = 3,
+    OMX_VC_PREF0_8x8  = 4,
+    OMX_VC_INTER_SKIP  = 5,
+    OMX_VC_INTRA_4x4  = 8,
+    OMX_VC_INTRA_16x16  = 9,
+    OMX_VC_INTRA_PCM = 10 
+} OMXVCM4P10MacroblockType;
+
+
+
+/* 6.3.1.6 Sub-Macroblock Types  */
+/* A data type that enumerates H.264 sub-macroblock types is defined as follows:  */
+
+typedef enum {
+    OMX_VC_SUB_P_8x8 = 0, /* defined by [ISO14496-10] */
+    OMX_VC_SUB_P_8x4 = 1,
+    OMX_VC_SUB_P_4x8 = 2,
+    OMX_VC_SUB_P_4x4 = 3 
+} OMXVCM4P10SubMacroblockType;
+
+
+
+/* 6.3.1.7 Variable Length Coding (VLC) Information  */
+
+typedef struct {
+    OMX_U8 uTrailing_Ones;      /* Trailing ones; 3 at most */
+    OMX_U8 uTrailing_One_Signs; /* Trailing ones signal */
+    OMX_U8 uNumCoeffs;          /* Total number of non-zero coefs, including trailing ones */
+    OMX_U8 uTotalZeros;         /* Total number of zero coefs */
+    OMX_S16 iLevels[16];        /* Levels of non-zero coefs, in reverse zig-zag order */
+    OMX_U8 uRuns[16];           /* Runs for levels and trailing ones, in reverse zig-zag order */
+} OMXVCM4P10VLCInfo;
+
+
+
+/* 6.3.1.8 Macroblock Information  */
+
+typedef struct {
+    OMX_S32 sliceId;                          /* slice number */
+    OMXVCM4P10MacroblockType mbType;          /* MB type */
+    OMXVCM4P10SubMacroblockType subMBType[4]; /* sub-block type */
+    OMX_S32 qpy;                              /* qp for luma */
+    OMX_S32 qpc;                              /* qp for chroma */
+    OMX_U32 cbpy;                             /* CBP Luma */
+    OMX_U32 cbpc;                             /* CBP Chroma */
+    OMXVCMotionVector pMV0[4][4]; /* motion vector, represented using 1/4-pel units, pMV0[blocky][blockx] (blocky = 0~3, blockx =0~3) */
+    OMXVCMotionVector pMVPred[4][4]; /* motion vector prediction, Represented using 1/4-pel units, pMVPred[blocky][blockx] (blocky = 0~3, blockx = 0~3) */
+    OMX_U8 pRefL0Idx[4];                      /* reference picture indices */
+    OMXVCM4P10Intra16x16PredMode Intra16x16PredMode; /* best intra 16x16 prediction mode */
+    OMXVCM4P10Intra4x4PredMode pIntra4x4PredMode[16]; /* best intra 4x4 prediction mode for each block, pMV0 indexed as above */
+} OMXVCM4P10MBInfo, *OMXVCM4P10MBInfoPtr;
+
+
+
+/* 6.3.1.9 Motion Estimation Parameters  */
+
+typedef struct {
+    OMX_S32 blockSplitEnable8x8; /* enables 16x8, 8x16, 8x8 */
+    OMX_S32 blockSplitEnable4x4; /* enable splitting of 8x4, 4x8, 4x4 blocks */
+    OMX_S32 halfSearchEnable;
+    OMX_S32 quarterSearchEnable;
+    OMX_S32 intraEnable4x4;      /* 1=enable, 0=disable */
+    OMX_S32 searchRange16x16;    /* integer pixel units */
+    OMX_S32 searchRange8x8;
+    OMX_S32 searchRange4x4;
+} OMXVCM4P10MEParams;
+
+
+
+/**
+ * Function:  omxVCM4P10_PredictIntra_4x4   (6.3.3.1.1)
+ *
+ * Description:
+ * Perform Intra_4x4 prediction for luma samples. If the upper-right block is 
+ * not available, then duplication work should be handled inside the function. 
+ * Users need not define them outside. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcLeft -  Pointer to the buffer of 4 left pixels: 
+ *                  p[x, y] (x = -1, y = 0..3) 
+ *   pSrcAbove - Pointer to the buffer of 8 above pixels: 
+ *                  p[x,y] (x = 0..7, y =-1); 
+ *               must be aligned on a 4-byte boundary. 
+ *   pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) 
+ *   leftStep - Step of left pixel buffer; must be a multiple of 4. 
+ *   dstStep - Step of the destination buffer; must be a multiple of 4. 
+ *   predMode - Intra_4x4 prediction mode. 
+ *   availability - Neighboring 4x4 block availability flag, refer to 
+ *             "Neighboring Macroblock Availability" . 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to the destination buffer; must be aligned on a 4-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pDst is NULL. 
+ *    dstStep < 4, or dstStep is not a multiple of 4. 
+ *    leftStep is not a multiple of 4. 
+ *    predMode is not in the valid range of enumeration 
+ *              OMXVCM4P10Intra4x4PredMode. 
+ *    predMode is OMX_VC_4x4_VERT, but availability doesn't set OMX_VC_UPPER 
+ *              indicating p[x,-1] (x = 0..3) is not available. 
+ *    predMode is OMX_VC_4x4_HOR, but availability doesn't set OMX_VC_LEFT 
+ *              indicating p[-1,y] (y = 0..3) is not available. 
+ *    predMode is OMX_VC_4x4_DIAG_DL, but availability doesn't set 
+ *              OMX_VC_UPPER indicating p[x, -1] (x = 0..3) is not available. 
+ *    predMode is OMX_VC_4x4_DIAG_DR, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not 
+ *              available. 
+ *    predMode is OMX_VC_4x4_VR, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not 
+ *              available. 
+ *    predMode is OMX_VC_4x4_HD, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not 
+ *              available. 
+ *    predMode is OMX_VC_4x4_VL, but availability doesn't set OMX_VC_UPPER 
+ *              indicating p[x,-1] (x = 0..3) is not available. 
+ *    predMode is OMX_VC_4x4_HU, but availability doesn't set OMX_VC_LEFT 
+ *              indicating p[-1,y] (y = 0..3) is not available. 
+ *    availability sets OMX_VC_UPPER, but pSrcAbove is NULL. 
+ *    availability sets OMX_VC_LEFT, but pSrcLeft is NULL. 
+ *    availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. 
+ *    either pSrcAbove or pDst is not aligned on a 4-byte boundary.  
+ *
+ * Note: 
+ *     pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if 
+ *     they are not used by intra prediction as implied in predMode. 
+ *
+ */
+OMXResult omxVCM4P10_PredictIntra_4x4 (
+    const OMX_U8 *pSrcLeft,
+    const OMX_U8 *pSrcAbove,
+    const OMX_U8 *pSrcAboveLeft,
+    OMX_U8 *pDst,
+    OMX_INT leftStep,
+    OMX_INT dstStep,
+    OMXVCM4P10Intra4x4PredMode predMode,
+    OMX_S32 availability
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_PredictIntra_16x16   (6.3.3.1.2)
+ *
+ * Description:
+ * Perform Intra_16x16 prediction for luma samples. If the upper-right block 
+ * is not available, then duplication work should be handled inside the 
+ * function. Users need not define them outside. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcLeft - Pointer to the buffer of 16 left pixels: p[x, y] (x = -1, y = 
+ *            0..15) 
+ *   pSrcAbove - Pointer to the buffer of 16 above pixels: p[x,y] (x = 0..15, 
+ *            y= -1); must be aligned on a 16-byte boundary. 
+ *   pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) 
+ *   leftStep - Step of left pixel buffer; must be a multiple of 16. 
+ *   dstStep - Step of the destination buffer; must be a multiple of 16. 
+ *   predMode - Intra_16x16 prediction mode, please refer to section 3.4.1. 
+ *   availability - Neighboring 16x16 MB availability flag. Refer to 
+ *                  section 3.4.4. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst -Pointer to the destination buffer; must be aligned on a 16-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pDst is NULL. 
+ *    dstStep < 16. or dstStep is not a multiple of 16. 
+ *    leftStep is not a multiple of 16. 
+ *    predMode is not in the valid range of enumeration 
+ *              OMXVCM4P10Intra16x16PredMode 
+ *    predMode is OMX_VC_16X16_VERT, but availability doesn't set 
+ *              OMX_VC_UPPER indicating p[x,-1] (x = 0..15) is not available. 
+ *    predMode is OMX_VC_16X16_HOR, but availability doesn't set OMX_VC_LEFT 
+ *              indicating p[-1,y] (y = 0..15) is not available. 
+ *    predMode is OMX_VC_16X16_PLANE, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1](x = 0..15), or p[-1,y] (y = 0..15), or p[-1,-1] is not 
+ *              available. 
+ *    availability sets OMX_VC_UPPER, but pSrcAbove is NULL. 
+ *    availability sets OMX_VC_LEFT, but pSrcLeft is NULL. 
+ *    availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. 
+ *    either pSrcAbove or pDst is not aligned on a 16-byte boundary.  
+ *
+ * Note: 
+ *     pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if 
+ *     they are not used by intra prediction implied in predMode. 
+ * Note: 
+ *     OMX_VC_UPPER_RIGHT is not used in intra_16x16 luma prediction. 
+ *
+ */
+OMXResult omxVCM4P10_PredictIntra_16x16 (
+    const OMX_U8 *pSrcLeft,
+    const OMX_U8 *pSrcAbove,
+    const OMX_U8 *pSrcAboveLeft,
+    OMX_U8 *pDst,
+    OMX_INT leftStep,
+    OMX_INT dstStep,
+    OMXVCM4P10Intra16x16PredMode predMode,
+    OMX_S32 availability
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_PredictIntraChroma_8x8   (6.3.3.1.3)
+ *
+ * Description:
+ * Performs intra prediction for chroma samples. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcLeft - Pointer to the buffer of 8 left pixels: p[x, y] (x = -1, y= 
+ *            0..7). 
+ *   pSrcAbove - Pointer to the buffer of 8 above pixels: p[x,y] (x = 0..7, y 
+ *            = -1); must be aligned on an 8-byte boundary. 
+ *   pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) 
+ *   leftStep - Step of left pixel buffer; must be a multiple of 8. 
+ *   dstStep - Step of the destination buffer; must be a multiple of 8. 
+ *   predMode - Intra chroma prediction mode, please refer to section 3.4.3. 
+ *   availability - Neighboring chroma block availability flag, please refer 
+ *            to  "Neighboring Macroblock Availability". 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to the destination buffer; must be aligned on an 8-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If any of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pDst is NULL. 
+ *    dstStep < 8 or dstStep is not a multiple of 8. 
+ *    leftStep is not a multiple of 8. 
+ *    predMode is not in the valid range of enumeration 
+ *              OMXVCM4P10IntraChromaPredMode. 
+ *    predMode is OMX_VC_CHROMA_VERT, but availability doesn't set 
+ *              OMX_VC_UPPER indicating p[x,-1] (x = 0..7) is not available. 
+ *    predMode is OMX_VC_CHROMA_HOR, but availability doesn't set OMX_VC_LEFT 
+ *              indicating p[-1,y] (y = 0..7) is not available. 
+ *    predMode is OMX_VC_CHROMA_PLANE, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1](x = 0..7), or p[-1,y] (y = 0..7), or p[-1,-1] is not 
+ *              available. 
+ *    availability sets OMX_VC_UPPER, but pSrcAbove is NULL. 
+ *    availability sets OMX_VC_LEFT, but pSrcLeft is NULL. 
+ *    availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. 
+ *    either pSrcAbove or pDst is not aligned on a 8-byte boundary.  
+ *
+ *  Note: pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointer if 
+ *  they are not used by intra prediction implied in predMode. 
+ *
+ *  Note: OMX_VC_UPPER_RIGHT is not used in intra chroma prediction. 
+ *
+ */
+OMXResult omxVCM4P10_PredictIntraChroma_8x8 (
+    const OMX_U8 *pSrcLeft,
+    const OMX_U8 *pSrcAbove,
+    const OMX_U8 *pSrcAboveLeft,
+    OMX_U8 *pDst,
+    OMX_INT leftStep,
+    OMX_INT dstStep,
+    OMXVCM4P10IntraChromaPredMode predMode,
+    OMX_S32 availability
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InterpolateLuma   (6.3.3.2.1)
+ *
+ * Description:
+ * Performs quarter-pixel interpolation for inter luma MB. It is assumed that 
+ * the frame is already padded when calling this function. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the source reference frame buffer 
+ *   srcStep - reference frame step, in bytes; must be a multiple of roi.width 
+ *   dstStep - destination frame step, in bytes; must be a multiple of 
+ *            roi.width 
+ *   dx - Fractional part of horizontal motion vector component in 1/4 pixel 
+ *            unit; valid in the range [0,3] 
+ *   dy - Fractional part of vertical motion vector y component in 1/4 pixel 
+ *            unit; valid in the range [0,3] 
+ *   roi - Dimension of the interpolation region; the parameters roi.width and 
+ *            roi.height must be equal to either 4, 8, or 16. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to the destination frame buffer: 
+ *          if roi.width==4,  4-byte alignment required 
+ *          if roi.width==8,  8-byte alignment required 
+ *          if roi.width==16, 16-byte alignment required 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pSrc or pDst is NULL. 
+ *    srcStep or dstStep < roi.width. 
+ *    dx or dy is out of range [0,3]. 
+ *    roi.width or roi.height is out of range {4, 8, 16}. 
+ *    roi.width is equal to 4, but pDst is not 4 byte aligned. 
+ *    roi.width is equal to 8 or 16, but pDst is not 8 byte aligned. 
+ *    srcStep or dstStep is not a multiple of 8. 
+ *
+ */
+OMXResult omxVCM4P10_InterpolateLuma (
+    const OMX_U8 *pSrc,
+    OMX_S32 srcStep,
+    OMX_U8 *pDst,
+    OMX_S32 dstStep,
+    OMX_S32 dx,
+    OMX_S32 dy,
+    OMXSize roi
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InterpolateChroma   (6.3.3.2.2)
+ *
+ * Description:
+ * Performs 1/8-pixel interpolation for inter chroma MB. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc -Pointer to the source reference frame buffer 
+ *   srcStep -Reference frame step in bytes 
+ *   dstStep -Destination frame step in bytes; must be a multiple of 
+ *            roi.width. 
+ *   dx -Fractional part of horizontal motion vector component in 1/8 pixel 
+ *            unit; valid in the range [0,7] 
+ *   dy -Fractional part of vertical motion vector component in 1/8 pixel 
+ *            unit; valid in the range [0,7] 
+ *   roi -Dimension of the interpolation region; the parameters roi.width and 
+ *            roi.height must be equal to either 2, 4, or 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst -Pointer to the destination frame buffer:
+ *         if roi.width==2,  2-byte alignment required 
+ *         if roi.width==4,  4-byte alignment required 
+ *         if roi.width==8, 8-byte alignment required 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pSrc or pDst is NULL. 
+ *    srcStep or dstStep < 8. 
+ *    dx or dy is out of range [0-7]. 
+ *    roi.width or roi.height is out of range {2,4,8}. 
+ *    roi.width is equal to 2, but pDst is not 2-byte aligned. 
+ *    roi.width is equal to 4, but pDst is not 4-byte aligned. 
+ *    roi.width is equal to 8, but pDst is not 8 byte aligned. 
+ *    srcStep or dstStep is not a multiple of 8. 
+ *
+ */
+OMXResult omxVCM4P10_InterpolateChroma (
+    const OMX_U8 *pSrc,
+    OMX_S32 srcStep,
+    OMX_U8 *pDst,
+    OMX_S32 dstStep,
+    OMX_S32 dx,
+    OMX_S32 dy,
+    OMXSize roi
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_FilterDeblockingLuma_VerEdge_I   (6.3.3.3.1)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four vertical edges of the luma 
+ * macroblock (16x16). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - Pointer to the input macroblock; must be 16-byte aligned. 
+ *   srcdstStep -Step of the arrays; must be a multiple of 16. 
+ *   pAlpha -Array of size 2 of alpha thresholds (the first item is the alpha 
+ *            threshold for the external vertical edge, and the second item is 
+ *            for the internal vertical edge); per [ISO14496-10] alpha values 
+ *            must be in the range [0,255]. 
+ *   pBeta -Array of size 2 of beta thresholds (the first item is the beta 
+ *            threshold for the external vertical edge, and the second item is 
+ *            for the internal vertical edge); per [ISO14496-10] beta values 
+ *            must be in the range [0,18]. 
+ *   pThresholds -Array of size 16 of Thresholds (TC0) (values for the left 
+ *            edge of each 4x4 block, arranged in vertical block order); must 
+ *            be aligned on a 4-byte boundary..  Per [ISO14496-10] values must 
+ *            be in the range [0,25]. 
+ *   pBS -Array of size 16 of BS parameters (arranged in vertical block 
+ *            order); valid in the range [0,4] with the following 
+ *            restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) 
+ *            pBS[i]== 4 if and only if pBS[i^3]== 4.  Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst -Pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    Either of the pointers in pSrcDst, pAlpha, pBeta, pThresholds, or pBS 
+ *              is NULL. 
+ *    Either pThresholds or pBS is not aligned on a 4-byte boundary. 
+ *    pSrcDst is not 16-byte aligned. 
+ *    srcdstStep is not a multiple of 16. 
+ *    pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. 
+ *    pBeta[0] and/or pBeta[1] is outside the range [0,18]. 
+ *    One or more entries in the table pThresholds[0..15]is outside of the 
+ *              range [0,25]. 
+ *    pBS is out of range, i.e., one of the following conditions is true: 
+ *              pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && 
+ *              pBS[i^3]!=4) for 0<=i<=3. 
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingLuma_VerEdge_I (
+    OMX_U8 *pSrcDst,
+    OMX_S32 srcdstStep,
+    const OMX_U8 *pAlpha,
+    const OMX_U8 *pBeta,
+    const OMX_U8 *pThresholds,
+    const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_FilterDeblockingLuma_HorEdge_I   (6.3.3.3.2)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four horizontal edges of the luma 
+ * macroblock (16x16). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input macroblock; must be 16-byte aligned. 
+ *   srcdstStep - step of the arrays; must be a multiple of 16. 
+ *   pAlpha - array of size 2 of alpha thresholds (the first item is the alpha 
+ *            threshold for the external vertical edge, and the second item is 
+ *            for the internal horizontal edge); per [ISO14496-10] alpha 
+ *            values must be in the range [0,255]. 
+ *   pBeta - array of size 2 of beta thresholds (the first item is the beta 
+ *            threshold for the external horizontal edge, and the second item 
+ *            is for the internal horizontal edge). Per [ISO14496-10] beta 
+ *            values must be in the range [0,18]. 
+ *   pThresholds - array of size 16 containing thresholds, TC0, for the top 
+ *            horizontal edge of each 4x4 block, arranged in horizontal block 
+ *            order; must be aligned on a 4-byte boundary.  Per [ISO14496 10] 
+ *            values must be in the range [0,25]. 
+ *   pBS - array of size 16 of BS parameters (arranged in horizontal block 
+ *            order); valid in the range [0,4] with the following 
+ *            restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) 
+ *            pBS[i]== 4 if and only if pBS[i^3]== 4.  Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst -Pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr, if one of the following cases occurs: 
+ *    -    one or more of the following pointers is NULL: pSrcDst, pAlpha, 
+ *              pBeta, pThresholds, or pBS. 
+ *    -    either pThresholds or pBS is not aligned on a 4-byte boundary. 
+ *    -    pSrcDst is not 16-byte aligned. 
+ *    -    srcdstStep is not a multiple of 16. 
+ *    -    pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. 
+ *    -    pBeta[0] and/or pBeta[1] is outside the range [0,18]. 
+ *    -    One or more entries in the table pThresholds[0..15] is 
+ *         outside of the range [0,25]. 
+ *    -    pBS is out of range, i.e., one of the following conditions is true: 
+ *              pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or 
+ *              (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. 
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingLuma_HorEdge_I (
+    OMX_U8 *pSrcDst,
+    OMX_S32 srcdstStep,
+    const OMX_U8 *pAlpha,
+    const OMX_U8 *pBeta,
+    const OMX_U8 *pThresholds,
+    const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_FilterDeblockingChroma_VerEdge_I   (6.3.3.3.3)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four vertical edges of the chroma 
+ * macroblock (8x8). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - Pointer to the input macroblock; must be 8-byte aligned. 
+ *   srcdstStep - Step of the arrays; must be a multiple of 8. 
+ *   pAlpha - Array of size 2 of alpha thresholds (the first item is alpha 
+ *            threshold for external vertical edge, and the second item is for 
+ *            internal vertical edge); per [ISO14496-10] alpha values must be 
+ *            in the range [0,255]. 
+ *   pBeta - Array of size 2 of beta thresholds (the first item is the beta 
+ *            threshold for the external vertical edge, and the second item is 
+ *            for the internal vertical edge); per [ISO14496-10] beta values 
+ *            must be in the range [0,18]. 
+ *   pThresholds - Array of size 8 containing thresholds, TC0, for the left 
+ *            vertical edge of each 4x2 chroma block, arranged in vertical 
+ *            block order; must be aligned on a 4-byte boundary.  Per 
+ *            [ISO14496-10] values must be in the range [0,25]. 
+ *   pBS - Array of size 16 of BS parameters (values for each 2x2 chroma 
+ *            block, arranged in vertical block order). This parameter is the 
+ *            same as the pBS parameter passed into FilterDeblockLuma_VerEdge; 
+ *            valid in the range [0,4] with the following restrictions: i) 
+ *            pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and 
+ *            only if pBS[i^3]== 4.  Must be 4 byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst -Pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    one or more of the following pointers is NULL: pSrcDst, pAlpha, 
+ *              pBeta, pThresholds, or pBS. 
+ *    -    pSrcDst is not 8-byte aligned. 
+ *    -    srcdstStep is not a multiple of 8. 
+ *    -    pThresholds is not 4-byte aligned. 
+ *    -    pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. 
+ *    -    pBeta[0] and/or pBeta[1] is outside the range [0,18]. 
+ *    -    One or more entries in the table pThresholds[0..7] is outside 
+ *         of the range [0,25]. 
+ *    -    pBS is out of range, i.e., one of the following conditions is true: 
+ *         pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or 
+ *         (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. 
+ *    -    pBS is not 4-byte aligned. 
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingChroma_VerEdge_I (
+    OMX_U8 *pSrcDst,
+    OMX_S32 srcdstStep,
+    const OMX_U8 *pAlpha,
+    const OMX_U8 *pBeta,
+    const OMX_U8 *pThresholds,
+    const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_FilterDeblockingChroma_HorEdge_I   (6.3.3.3.4)
+ *
+ * Description:
+ * Performs in-place deblock filtering on the horizontal edges of the chroma 
+ * macroblock (8x8). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input macroblock; must be 8-byte aligned. 
+ *   srcdstStep - array step; must be a multiple of 8. 
+ *   pAlpha - array of size 2 containing alpha thresholds; the first element 
+ *            contains the threshold for the external horizontal edge, and the 
+ *            second element contains the threshold for internal horizontal 
+ *            edge.  Per [ISO14496-10] alpha values must be in the range 
+ *            [0,255]. 
+ *   pBeta - array of size 2 containing beta thresholds; the first element 
+ *            contains the threshold for the external horizontal edge, and the 
+ *            second element contains the threshold for the internal 
+ *            horizontal edge.  Per [ISO14496-10] beta values must be in the 
+ *            range [0,18]. 
+ *   pThresholds - array of size 8 containing thresholds, TC0, for the top 
+ *            horizontal edge of each 2x4 chroma block, arranged in horizontal 
+ *            block order; must be aligned on a 4-byte boundary.  Per 
+ *            [ISO14496-10] values must be in the range [0,25]. 
+ *   pBS - array of size 16 containing BS parameters for each 2x2 chroma 
+ *            block, arranged in horizontal block order; valid in the range 
+ *            [0,4] with the following restrictions: i) pBS[i]== 4 may occur 
+ *            only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 4. 
+ *            Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst -Pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr, if one of the following cases occurs: 
+ *    -    any of the following pointers is NULL: 
+ *         pSrcDst, pAlpha, pBeta, pThresholds, or pBS. 
+ *    -    pSrcDst is not 8-byte aligned. 
+ *    -    srcdstStep is not a multiple of 8. 
+ *    -    pThresholds is not 4-byte aligned. 
+ *    -    pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. 
+ *    -    pBeta[0] and/or pBeta[1] is outside the range [0,18]. 
+ *    -    One or more entries in the table pThresholds[0..7] is outside 
+ *         of the range [0,25]. 
+ *    -    pBS is out of range, i.e., one of the following conditions is true: 
+ *              pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or 
+ *              (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ *    -    pBS is not 4-byte aligned. 
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingChroma_HorEdge_I (
+    OMX_U8 *pSrcDst,
+    OMX_S32 srcdstStep,
+    const OMX_U8 *pAlpha,
+    const OMX_U8 *pBeta,
+    const OMX_U8 *pThresholds,
+    const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_DeblockLuma_I   (6.3.3.3.5)
+ *
+ * Description:
+ * This function performs in-place deblock filtering the horizontal and 
+ * vertical edges of a luma macroblock (16x16). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input macroblock; must be 16-byte aligned. 
+ *   srcdstStep - image width; must be a multiple of 16. 
+ *   pAlpha - pointer to a 2x2 table of alpha thresholds, organized as 
+ *            follows: {external vertical edge, internal vertical edge, 
+ *            external horizontal edge, internal horizontal edge }.  Per 
+ *            [ISO14496-10] alpha values must be in the range [0,255]. 
+ *   pBeta - pointer to a 2x2 table of beta thresholds, organized as follows: 
+ *            {external vertical edge, internal vertical edge, external 
+ *            horizontal edge, internal horizontal edge }.  Per [ISO14496-10] 
+ *            beta values must be in the range [0,18]. 
+ *   pThresholds - pointer to a 16x2 table of threshold (TC0), organized as 
+ *            follows: {values for the left or above edge of each 4x4 block, 
+ *            arranged in vertical block order and then in horizontal block 
+ *            order}; must be aligned on a 4-byte boundary.  Per [ISO14496-10] 
+ *            values must be in the range [0,25]. 
+ *   pBS - pointer to a 16x2 table of BS parameters arranged in scan block 
+ *            order for vertical edges and then horizontal edges; valid in the 
+ *            range [0,4] with the following restrictions: i) pBS[i]== 4 may 
+ *            occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 
+ *            4. Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments 
+ *    -     one or more of the following pointers is NULL: pSrcDst, pAlpha, 
+ *              pBeta, pThresholds or pBS. 
+ *    -    pSrcDst is not 16-byte aligned. 
+ *    -    either pThresholds or pBS is not aligned on a 4-byte boundary. 
+ *    -    one or more entries in the table pAlpha[0..3] is outside the range 
+ *              [0,255]. 
+ *    -    one or more entries in the table pBeta[0..3] is outside the range 
+ *              [0,18]. 
+ *    -    one or more entries in the table pThresholds[0..31]is outside of 
+ *              the range [0,25]. 
+ *    -    pBS is out of range, i.e., one of the following conditions is true: 
+ *              pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or 
+ *             (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. 
+ *    -    srcdstStep is not a multiple of 16. 
+ *
+ */
+OMXResult omxVCM4P10_DeblockLuma_I (
+    OMX_U8 *pSrcDst,
+    OMX_S32 srcdstStep,
+    const OMX_U8 *pAlpha,
+    const OMX_U8 *pBeta,
+    const OMX_U8 *pThresholds,
+    const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_DeblockChroma_I   (6.3.3.3.6)
+ *
+ * Description:
+ * Performs in-place deblocking filtering on all edges of the chroma 
+ * macroblock (16x16). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input macroblock; must be 8-byte aligned. 
+ *   srcdstStep - step of the arrays; must be a multiple of 8. 
+ *   pAlpha - pointer to a 2x2 array of alpha thresholds, organized as 
+ *            follows: {external vertical edge, internal vertical edge, 
+ *            external horizontal edge, internal horizontal edge }.  Per 
+ *            [ISO14496-10] alpha values must be in the range [0,255]. 
+ *   pBeta - pointer to a 2x2 array of Beta Thresholds, organized as follows: 
+ *            { external vertical edge, internal vertical edge, external 
+ *            horizontal edge, internal horizontal edge }.  Per [ISO14496-10] 
+ *            beta values must be in the range [0,18]. 
+ *   pThresholds - array of size 8x2 of Thresholds (TC0) (values for the left 
+ *            or above edge of each 4x2 or 2x4 block, arranged in vertical 
+ *            block order and then in horizontal block order); must be aligned 
+ *            on a 4-byte boundary. Per [ISO14496-10] values must be in the 
+ *            range [0,25]. 
+ *   pBS - array of size 16x2 of BS parameters (arranged in scan block order 
+ *            for vertical edges and then horizontal edges); valid in the 
+ *            range [0,4] with the following restrictions: i) pBS[i]== 4 may 
+ *            occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 
+ *            4.  Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments 
+ *    -   one or more of the following pointers is NULL: pSrcDst, pAlpha, 
+ *              pBeta, pThresholds, or pBS. 
+ *    -   pSrcDst is not 8-byte aligned. 
+ *    -   either pThresholds or pBS is not 4-byte aligned. 
+ *    -   one or more entries in the table pAlpha[0..3] is outside the range 
+ *              [0,255]. 
+ *    -   one or more entries in the table pBeta[0..3] is outside the range 
+ *              [0,18]. 
+ *    -   one or more entries in the table pThresholds[0..15]is outside of 
+ *              the range [0,25]. 
+ *    -   pBS is out of range, i.e., one of the following conditions is true: 
+ *            pBS[i]<0, pBS[i]>4, pBS[i]==4  for i>=4, or 
+ *            (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. 
+ *    -   srcdstStep is not a multiple of 8. 
+ *
+ */
+OMXResult omxVCM4P10_DeblockChroma_I (
+    OMX_U8 *pSrcDst,
+    OMX_S32 srcdstStep,
+    const OMX_U8 *pAlpha,
+    const OMX_U8 *pBeta,
+    const OMX_U8 *pThresholds,
+    const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC   (6.3.4.1.1)
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse raster scan for a 2x2 block of 
+ * ChromaDCLevel.  The decoded coefficients in the packed position-coefficient 
+ * buffer are stored in reverse zig-zag order, i.e., the first buffer element 
+ * contains the last non-zero postion-coefficient pair of the block. Within 
+ * each position-coefficient pair, the position entry indicates the 
+ * raster-scan position of the coefficient, while the coefficient entry 
+ * contains the coefficient value. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - Double pointer to current byte in bit stream buffer 
+ *   pOffset - Pointer to current bit position in the byte pointed to by 
+ *            *ppBitStream; valid in the range [0,7]. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after each block is decoded 
+ *   pOffset - *pOffset is updated after each block is decoded 
+ *   pNumCoeff - Pointer to the number of nonzero coefficients in this block 
+ *   ppPosCoefBuf - Double pointer to destination residual 
+ *            coefficient-position pair buffer.  Buffer position 
+ *            (*ppPosCoefBuf) is updated upon return, unless there are only 
+ *            zero coefficients in the currently decoded block.  In this case 
+ *            the caller is expected to bypass the transform/dequantization of 
+ *            the empty blocks. 
+ *
+ * Return Value:
+ *
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    ppBitStream or pOffset is NULL. 
+ *    -    ppPosCoefBuf or pNumCoeff is NULL. 
+ *    OMX_Sts_Err - if one of the following is true: 
+ *    -    an illegal code is encountered in the bitstream 
+ *
+ */
+OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (
+    const OMX_U8 **ppBitStream,
+    OMX_S32*pOffset,
+    OMX_U8 *pNumCoeff,
+    OMX_U8 **ppPosCoefbuf
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_DecodeCoeffsToPairCAVLC   (6.3.4.1.2)
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of 
+ * Intra16x16DCLevel, Intra16x16ACLevel, LumaLevel, and ChromaACLevel. Inverse 
+ * field scan is not supported. The decoded coefficients in the packed 
+ * position-coefficient buffer are stored in reverse zig-zag order, i.e., the 
+ * first buffer element contains the last non-zero postion-coefficient pair of 
+ * the block. Within each position-coefficient pair, the position entry 
+ * indicates the raster-scan position of the coefficient, while the 
+ * coefficient entry contains the coefficient value. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream -Double pointer to current byte in bit stream buffer 
+ *   pOffset - Pointer to current bit position in the byte pointed to by 
+ *            *ppBitStream; valid in the range [0,7]. 
+ *   sMaxNumCoeff - Maximum the number of non-zero coefficients in current 
+ *            block 
+ *   sVLCSelect - VLC table selector, obtained from the number of non-zero 
+ *            coefficients contained in the above and left 4x4 blocks.  It is 
+ *            equivalent to the variable nC described in H.264 standard table 
+ *            9 5, except its value can t be less than zero. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after each block is decoded.  
+ *            Buffer position (*ppPosCoefBuf) is updated upon return, unless 
+ *            there are only zero coefficients in the currently decoded block. 
+ *             In this case the caller is expected to bypass the 
+ *            transform/dequantization of the empty blocks. 
+ *   pOffset - *pOffset is updated after each block is decoded 
+ *   pNumCoeff - Pointer to the number of nonzero coefficients in this block 
+ *   ppPosCoefBuf - Double pointer to destination residual 
+ *            coefficient-position pair buffer 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    ppBitStream or pOffset is NULL. 
+ *    -    ppPosCoefBuf or pNumCoeff is NULL. 
+ *    -    sMaxNumCoeff is not equal to either 15 or 16. 
+ *    -    sVLCSelect is less than 0. 
+ *
+ *    OMX_Sts_Err - if one of the following is true: 
+ *    -    an illegal code is encountered in the bitstream 
+ *
+ */
+OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC (
+    const OMX_U8 **ppBitStream,
+    OMX_S32 *pOffset,
+    OMX_U8 *pNumCoeff,
+    OMX_U8 **ppPosCoefbuf,
+    OMX_INT sVLCSelect,
+    OMX_INT sMaxNumCoeff
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_TransformDequantLumaDCFromPair   (6.3.4.2.1)
+ *
+ * Description:
+ * Reconstructs the 4x4 LumaDC block from the coefficient-position pair 
+ * buffer, performs integer inverse, and dequantization for 4x4 LumaDC 
+ * coefficients, and updates the pair buffer pointer to the next non-empty 
+ * block. 
+ *
+ * Input Arguments:
+ *   
+ *   ppSrc - Double pointer to residual coefficient-position pair buffer 
+ *            output by CALVC decoding 
+ *   QP - Quantization parameter QpY 
+ *
+ * Output Arguments:
+ *   
+ *   ppSrc - *ppSrc is updated to the start of next non empty block 
+ *   pDst - Pointer to the reconstructed 4x4 LumaDC coefficients buffer; must 
+ *            be aligned on a 8-byte boundary. 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    ppSrc or pDst is NULL. 
+ *    -    pDst is not 8 byte aligned. 
+ *    -    QP is not in the range of [0-51]. 
+ *
+ */
+OMXResult omxVCM4P10_TransformDequantLumaDCFromPair (
+    const OMX_U8 **ppSrc,
+    OMX_S16 *pDst,
+    OMX_INT QP
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_TransformDequantChromaDCFromPair   (6.3.4.2.2)
+ *
+ * Description:
+ * Reconstruct the 2x2 ChromaDC block from coefficient-position pair buffer, 
+ * perform integer inverse transformation, and dequantization for 2x2 chroma 
+ * DC coefficients, and update the pair buffer pointer to next non-empty 
+ * block. 
+ *
+ * Input Arguments:
+ *   
+ *   ppSrc - Double pointer to residual coefficient-position pair buffer 
+ *            output by CALVC decoding 
+ *   QP - Quantization parameter QpC 
+ *
+ * Output Arguments:
+ *   
+ *   ppSrc - *ppSrc is updated to the start of next non empty block 
+ *   pDst - Pointer to the reconstructed 2x2 ChromaDC coefficients buffer; 
+ *            must be aligned on a 4-byte boundary. 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    ppSrc or pDst is NULL. 
+ *    -    pDst is not 4-byte aligned. 
+ *    -    QP is not in the range of [0-51]. 
+ *
+ */
+OMXResult omxVCM4P10_TransformDequantChromaDCFromPair (
+    const OMX_U8 **ppSrc,
+    OMX_S16 *pDst,
+    OMX_INT QP
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_DequantTransformResidualFromPairAndAdd   (6.3.4.2.3)
+ *
+ * Description:
+ * Reconstruct the 4x4 residual block from coefficient-position pair buffer, 
+ * perform dequantization and integer inverse transformation for 4x4 block of 
+ * residuals with previous intra prediction or motion compensation data, and 
+ * update the pair buffer pointer to next non-empty block. If pDC == NULL, 
+ * there re 16 non-zero AC coefficients at most in the packed buffer starting 
+ * from 4x4 block position 0; If pDC != NULL, there re 15 non-zero AC 
+ * coefficients at most in the packet buffer starting from 4x4 block position 
+ * 1. 
+ *
+ * Input Arguments:
+ *   
+ *   ppSrc - Double pointer to residual coefficient-position pair buffer 
+ *            output by CALVC decoding 
+ *   pPred - Pointer to the predicted 4x4 block; must be aligned on a 4-byte 
+ *            boundary 
+ *   predStep - Predicted frame step size in bytes; must be a multiple of 4 
+ *   dstStep - Destination frame step in bytes; must be a multiple of 4 
+ *   pDC - Pointer to the DC coefficient of this block, NULL if it doesn't 
+ *            exist 
+ *   QP - QP Quantization parameter.  It should be QpC in chroma 4x4 block 
+ *            decoding, otherwise it should be QpY. 
+ *   AC - Flag indicating if at least one non-zero AC coefficient exists 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the reconstructed 4x4 block data; must be aligned on a 
+ *            4-byte boundary 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    pPred or pDst is NULL. 
+ *    -    pPred or pDst is not 4-byte aligned. 
+ *    -    predStep or dstStep is not a multiple of 4. 
+ *    -    AC !=0 and Qp is not in the range of [0-51] or ppSrc == NULL. 
+ *    -    AC ==0 && pDC ==NULL. 
+ *
+ */
+OMXResult omxVCM4P10_DequantTransformResidualFromPairAndAdd (
+    const OMX_U8 **ppSrc,
+    const OMX_U8 *pPred,
+    const OMX_S16 *pDC,
+    OMX_U8 *pDst,
+    OMX_INT predStep,
+    OMX_INT dstStep,
+    OMX_INT QP,
+    OMX_INT AC
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_MEGetBufSize   (6.3.5.1.1)
+ *
+ * Description:
+ * Computes the size, in bytes, of the vendor-specific specification 
+ * structure for the omxVCM4P10 motion estimation functions BlockMatch_Integer 
+ * and MotionEstimationMB. 
+ *
+ * Input Arguments:
+ *   
+ *   MEmode - motion estimation mode; available modes are defined by the 
+ *            enumerated type OMXVCM4P10MEMode 
+ *   pMEParams -motion estimation parameters 
+ *
+ * Output Arguments:
+ *   
+ *   pSize - pointer to the number of bytes required for the motion 
+ *            estimation specification structure 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    pMEParams or pSize is NULL. 
+ *    -    an invalid MEMode is specified. 
+ *
+ */
+OMXResult omxVCM4P10_MEGetBufSize (
+    OMXVCM4P10MEMode MEmode,
+    const OMXVCM4P10MEParams *pMEParams,
+    OMX_U32 *pSize
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_MEInit   (6.3.5.1.2)
+ *
+ * Description:
+ * Initializes the vendor-specific specification structure required for the 
+ * omxVCM4P10 motion estimation functions:  BlockMatch_Integer and 
+ * MotionEstimationMB. Memory for the specification structure *pMESpec must be 
+ * allocated prior to calling the function, and should be aligned on a 4-byte 
+ * boundary.  The number of bytes required for the specification structure can 
+ * be determined using the function omxVCM4P10_MEGetBufSize. Following 
+ * initialization by this function, the vendor-specific structure *pMESpec 
+ * should contain an implementation-specific representation of all motion 
+ * estimation parameters received via the structure pMEParams, for example  
+ * searchRange16x16, searchRange8x8, etc. 
+ *
+ * Input Arguments:
+ *   
+ *   MEmode - motion estimation mode; available modes are defined by the 
+ *            enumerated type OMXVCM4P10MEMode 
+ *   pMEParams - motion estimation parameters 
+ *   pMESpec - pointer to the uninitialized ME specification structure 
+ *
+ * Output Arguments:
+ *   
+ *   pMESpec - pointer to the initialized ME specification structure 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    pMEParams or pSize is NULL. 
+ *    -    an invalid value was specified for the parameter MEmode 
+ *    -    a negative or zero value was specified for one of the search ranges 
+ *         (e.g.,  pMBParams >searchRange8x8, pMEParams->searchRange16x16, etc.) 
+ *    -    either in isolation or in combination, one or more of the enables or 
+ *         search ranges in the structure *pMEParams were configured such 
+ *         that the requested behavior fails to comply with [ISO14496-10]. 
+ *
+ */
+OMXResult omxVCM4P10_MEInit (
+    OMXVCM4P10MEMode MEmode,
+    const OMXVCM4P10MEParams *pMEParams,
+    void *pMESpec
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_BlockMatch_Integer   (6.3.5.2.1)
+ *
+ * Description:
+ * Performs integer block match.  Returns best MV and associated cost. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrgY - Pointer to the top-left corner of the current block:
+ *            If iBlockWidth==4,  4-byte alignment required. 
+ *            If iBlockWidth==8,  8-byte alignment required. 
+ *            If iBlockWidth==16, 16-byte alignment required. 
+ *   pSrcRefY - Pointer to the top-left corner of the co-located block in the 
+ *            reference picture: 
+ *            If iBlockWidth==4,  4-byte alignment required.  
+ *            If iBlockWidth==8,  8-byte alignment required.  
+ *            If iBlockWidth==16, 16-byte alignment required. 
+ *   nSrcOrgStep - Stride of the original picture plane, expressed in terms 
+ *            of integer pixels; must be a multiple of iBlockWidth. 
+ *   nSrcRefStep - Stride of the reference picture plane, expressed in terms 
+ *            of integer pixels 
+ *   pRefRect - pointer to the valid reference rectangle inside the reference 
+ *            picture plane 
+ *   nCurrPointPos - position of the current block in the current plane 
+ *   iBlockWidth - Width of the current block, expressed in terms of integer 
+ *            pixels; must be equal to either 4, 8, or 16. 
+ *   iBlockHeight - Height of the current block, expressed in terms of 
+ *            integer pixels; must be equal to either 4, 8, or 16. 
+ *   nLamda - Lamda factor; used to compute motion cost 
+ *   pMVPred - Predicted MV; used to compute motion cost, expressed in terms 
+ *            of 1/4-pel units 
+ *   pMVCandidate - Candidate MV; used to initialize the motion search, 
+ *            expressed in terms of integer pixels 
+ *   pMESpec - pointer to the ME specification structure 
+ *
+ * Output Arguments:
+ *   
+ *   pDstBestMV - Best MV resulting from integer search, expressed in terms 
+ *            of 1/4-pel units 
+ *   pBestCost - Motion cost associated with the best MV; computed as 
+ *            SAD+Lamda*BitsUsedByMV 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    any of the following pointers are NULL:
+ *         pSrcOrgY, pSrcRefY, pRefRect, pMVPred, pMVCandidate, or pMESpec. 
+ *    -    Either iBlockWidth or iBlockHeight are values other than 4, 8, or 16. 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Integer (
+    const OMX_U8 *pSrcOrgY,
+    OMX_S32 nSrcOrgStep,
+    const OMX_U8 *pSrcRefY,
+    OMX_S32 nSrcRefStep,
+    const OMXRect *pRefRect,
+    const OMXVCM4P2Coordinate *pCurrPointPos,
+    OMX_U8 iBlockWidth,
+    OMX_U8 iBlockHeight,
+    OMX_U32 nLamda,
+    const OMXVCMotionVector *pMVPred,
+    const OMXVCMotionVector *pMVCandidate,
+    OMXVCMotionVector *pBestMV,
+    OMX_S32 *pBestCost,
+    void *pMESpec
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_BlockMatch_Half   (6.3.5.2.2)
+ *
+ * Description:
+ * Performs a half-pel block match using results from a prior integer search. 
+ *  Returns the best MV and associated cost.  This function estimates the 
+ * half-pixel motion vector by interpolating the integer resolution motion 
+ * vector referenced by the input parameter pSrcDstBestMV, i.e., the initial 
+ * integer MV is generated externally.  The function 
+ * omxVCM4P10_BlockMatch_Integer may be used for integer motion estimation. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrgY - Pointer to the current position in original picture plane:
+ *              If iBlockWidth==4,  4-byte alignment required. 
+ *              If iBlockWidth==8,  8-byte alignment required. 
+ *              If iBlockWidth==16, 16-byte alignment required. 
+ *   pSrcRefY - Pointer to the top-left corner of the co-located block in the 
+ *            reference picture:  
+ *              If iBlockWidth==4,  4-byte alignment required.  
+ *              If iBlockWidth==8,  8-byte alignment required.  
+ *              If iBlockWidth==16, 16-byte alignment required. 
+ *   nSrcOrgStep - Stride of the original picture plane in terms of full 
+ *            pixels; must be a multiple of iBlockWidth. 
+ *   nSrcRefStep - Stride of the reference picture plane in terms of full 
+ *            pixels 
+ *   iBlockWidth - Width of the current block in terms of full pixels; must 
+ *            be equal to either 4, 8, or 16. 
+ *   iBlockHeight - Height of the current block in terms of full pixels; must 
+ *            be equal to either 4, 8, or 16. 
+ *   nLamda - Lamda factor, used to compute motion cost 
+ *   pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to 
+ *            compute motion cost 
+ *   pSrcDstBestMV - The best MV resulting from a prior integer search, 
+ *            represented in terms of 1/4-pel units 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstBestMV - Best MV resulting from the half-pel search, expressed in 
+ *            terms of 1/4-pel units 
+ *   pBestCost - Motion cost associated with the best MV; computed as 
+ *            SAD+Lamda*BitsUsedByMV 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    any of the following pointers is NULL: pSrcOrgY, pSrcRefY, 
+ *              pSrcDstBestMV, pMVPred, pBestCost 
+ *    -    iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16. 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Half (
+    const OMX_U8 *pSrcOrgY,
+    OMX_S32 nSrcOrgStep,
+    const OMX_U8 *pSrcRefY,
+    OMX_S32 nSrcRefStep,
+    OMX_U8 iBlockWidth,
+    OMX_U8 iBlockHeight,
+    OMX_U32 nLamda,
+    const OMXVCMotionVector *pMVPred,
+    OMXVCMotionVector *pSrcDstBestMV,
+    OMX_S32 *pBestCost
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_BlockMatch_Quarter   (6.3.5.2.3)
+ *
+ * Description:
+ * Performs a quarter-pel block match using results from a prior half-pel 
+ * search.  Returns the best MV and associated cost.  This function estimates 
+ * the quarter-pixel motion vector by interpolating the half-pel resolution 
+ * motion vector referenced by the input parameter pSrcDstBestMV, i.e., the 
+ * initial half-pel MV is generated externally.  The function 
+ * omxVCM4P10_BlockMatch_Half may be used for half-pel motion estimation. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrgY - Pointer to the current position in original picture plane:
+ *            If iBlockWidth==4,  4-byte alignment required. 
+ *            If iBlockWidth==8,  8-byte alignment required. 
+ *            If iBlockWidth==16, 16-byte alignment required. 
+ *   pSrcRefY - Pointer to the top-left corner of the co-located block in the 
+ *            reference picture:
+ *            If iBlockWidth==4,  4-byte alignment required.  
+ *            If iBlockWidth==8,  8-byte alignment required.  
+ *            If iBlockWidth==16, 16-byte alignment required. 
+ *   nSrcOrgStep - Stride of the original picture plane in terms of full 
+ *            pixels; must be a multiple of iBlockWidth. 
+ *   nSrcRefStep - Stride of the reference picture plane in terms of full 
+ *            pixels 
+ *   iBlockWidth - Width of the current block in terms of full pixels; must 
+ *            be equal to either 4, 8, or 16. 
+ *   iBlockHeight - Height of the current block in terms of full pixels; must 
+ *            be equal to either 4, 8, or 16. 
+ *   nLamda - Lamda factor, used to compute motion cost 
+ *   pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to 
+ *            compute motion cost 
+ *   pSrcDstBestMV - The best MV resulting from a prior half-pel search, 
+ *            represented in terms of 1/4 pel units 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstBestMV - Best MV resulting from the quarter-pel search, expressed 
+ *            in terms of 1/4-pel units 
+ *   pBestCost - Motion cost associated with the best MV; computed as 
+ *            SAD+Lamda*BitsUsedByMV 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    One or more of the following pointers is NULL: 
+ *         pSrcOrgY, pSrcRefY, pSrcDstBestMV, pMVPred, pBestCost 
+ *    -    iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16. 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Quarter (
+    const OMX_U8 *pSrcOrgY,
+    OMX_S32 nSrcOrgStep,
+    const OMX_U8 *pSrcRefY,
+    OMX_S32 nSrcRefStep,
+    OMX_U8 iBlockWidth,
+    OMX_U8 iBlockHeight,
+    OMX_U32 nLamda,
+    const OMXVCMotionVector *pMVPred,
+    OMXVCMotionVector *pSrcDstBestMV,
+    OMX_S32 *pBestCost
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_MotionEstimationMB   (6.3.5.3.1)
+ *
+ * Description:
+ * Performs MB-level motion estimation and selects best motion estimation 
+ * strategy from the set of modes supported in baseline profile [ISO14496-10]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcCurrBuf - Pointer to the current position in original picture plane; 
+ *            16-byte alignment required 
+ *   pSrcRefBufList - Pointer to an array with 16 entries.  Each entry points 
+ *            to the top-left corner of the co-located MB in a reference 
+ *            picture.  The array is filled from low-to-high with valid 
+ *            reference frame pointers; the unused high entries should be set 
+ *            to NULL.  Ordering of the reference frames should follow 
+ *            [ISO14496-10] subclause 8.2.4  Decoding Process for Reference 
+ *            Picture Lists.   The entries must be 16-byte aligned. 
+ *   pSrcRecBuf - Pointer to the top-left corner of the co-located MB in the 
+ *            reconstructed picture; must be 16-byte aligned. 
+ *   SrcCurrStep - Width of the original picture plane in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   SrcRefStep - Width of the reference picture plane in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   SrcRecStep - Width of the reconstructed picture plane in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   pRefRect - Pointer to the valid reference rectangle; relative to the 
+ *            image origin. 
+ *   pCurrPointPos - Position of the current macroblock in the current plane. 
+ *   Lambda - Lagrange factor for computing the cost function 
+ *   pMESpec - Pointer to the motion estimation specification structure; must 
+ *            have been allocated and initialized prior to calling this 
+ *            function. 
+ *   pMBInter - Array, of dimension four, containing pointers to information 
+ *            associated with four adjacent type INTER MBs (Left, Top, 
+ *            Top-Left, Top-Right). Any pointer in the array may be set equal 
+ *            to NULL if the corresponding MB doesn t exist or is not of type 
+ *            INTER. 
+ *            -  pMBInter[0] - Pointer to left MB information 
+ *            -  pMBInter[1] - Pointer to top MB information 
+ *            -  pMBInter[2] - Pointer to top-left MB information 
+ *            -  pMBInter[3] - Pointer to top-right MB information 
+ *   pMBIntra - Array, of dimension four, containing pointers to information 
+ *            associated with four adjacent type INTRA MBs (Left, Top, 
+ *            Top-Left, Top-Right). Any pointer in the array may be set equal 
+ *            to NULL if the corresponding MB doesn t exist or is not of type 
+ *            INTRA. 
+ *            -  pMBIntra[0] - Pointer to left MB information 
+ *            -  pMBIntra[1] - Pointer to top MB information 
+ *            -  pMBIntra[2] - Pointer to top-left MB information 
+ *            -  pMBIntra[3] - Pointer to top-right MB information 
+ *   pSrcDstMBCurr - Pointer to information structure for the current MB.  
+ *            The following entries should be set prior to calling the 
+ *            function:  sliceID - the number of the slice the to which the 
+ *            current MB belongs. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstCost - Pointer to the minimum motion cost for the current MB. 
+ *   pDstBlockSAD - Pointer to the array of SADs for each of the sixteen luma 
+ *            4x4 blocks in each MB.  The block SADs are in scan order for 
+ *            each MB.  For implementations that cannot compute the SAD values 
+ *            individually, the maximum possible value (0xffff) is returned 
+ *            for each of the 16 block SAD entries. 
+ *   pSrcDstMBCurr - Pointer to updated information structure for the current 
+ *            MB after MB-level motion estimation has been completed.  The 
+ *            following fields are updated by the ME function.   The following 
+ *            parameter set quantifies the MB-level ME search results: 
+ *            -  MbType 
+ *            -  subMBType[4] 
+ *            -  pMV0[4][4] 
+ *            -  pMVPred[4][4] 
+ *            -  pRefL0Idx[4] 
+ *            -  Intra16x16PredMode 
+ *            -  pIntra4x4PredMode[4][4] 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -   One or more of the following pointers is NULL: pSrcCurrBuf, 
+ *           pSrcRefBufList, pSrcRecBuf, pRefRect, pCurrPointPos, pMESpec, 
+ *           pMBInter, pMBIntra,pSrcDstMBCurr, pDstCost, pSrcRefBufList[0] 
+ *    -    SrcRefStep, SrcRecStep are not multiples of 16 
+ *    -    iBlockWidth or iBlockHeight are values other than 4, 8, or 16. 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_MotionEstimationMB (
+    const OMX_U8 *pSrcCurrBuf,
+    OMX_S32 SrcCurrStep,
+    const OMX_U8 *pSrcRefBufList[15],
+    OMX_S32 SrcRefStep,
+    const OMX_U8 *pSrcRecBuf,
+    OMX_S32 SrcRecStep,
+    const OMXRect *pRefRect,
+    const OMXVCM4P2Coordinate *pCurrPointPos,
+    OMX_U32 Lambda,
+    void *pMESpec,
+    const OMXVCM4P10MBInfoPtr *pMBInter,
+    const OMXVCM4P10MBInfoPtr *pMBIntra,
+    OMXVCM4P10MBInfoPtr pSrcDstMBCurr,
+    OMX_INT *pDstCost,
+    OMX_U16 *pDstBlockSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_SAD_4x   (6.3.5.4.1)
+ *
+ * Description:
+ * This function calculates the SAD for 4x8 and 4x4 blocks. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg -Pointer to the original block; must be aligned on a 4-byte 
+ *            boundary. 
+ *   iStepOrg -Step of the original block buffer; must be a multiple of 4. 
+ *   pSrcRef -Pointer to the reference block 
+ *   iStepRef -Step of the reference block buffer 
+ *   iHeight -Height of the block; must be equal to either 4 or 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD -Pointer of result SAD 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    One or more of the following pointers is NULL: 
+ *         pSrcOrg, pSrcRef, or pDstSAD 
+ *    -    iHeight is not equal to either 4 or 8. 
+ *    -    iStepOrg is not a multiple of 4 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_SAD_4x (
+    const OMX_U8 *pSrcOrg,
+    OMX_U32 iStepOrg,
+    const OMX_U8 *pSrcRef,
+    OMX_U32 iStepRef,
+    OMX_S32 *pDstSAD,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_SADQuar_4x   (6.3.5.4.2)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average 
+ * of the other two (pSrcRef0 and pSrcRef1) for 4x8 or 4x4 blocks.  Rounding 
+ * is applied according to the convention (a+b+1)>>1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the original block; must be aligned on a 4-byte 
+ *            boundary. 
+ *   pSrcRef0 - Pointer to reference block 0 
+ *   pSrcRef1 - Pointer to reference block 1 
+ *   iSrcStep - Step of the original block buffer; must be a multiple of 4. 
+ *   iRefStep0 - Step of reference block 0 
+ *   iRefStep1 - Step of reference block 1 
+ *   iHeight - Height of the block; must be equal to either 4 or 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD - Pointer of result SAD 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    iHeight is not equal to either 4 or 8. 
+ *    -    One or more of the following pointers is NULL: pSrc, pSrcRef0, 
+ *              pSrcRef1, pDstSAD. 
+ *    -    iSrcStep is not a multiple of 4 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_4x (
+    const OMX_U8 *pSrc,
+    const OMX_U8 *pSrcRef0,
+    const OMX_U8 *pSrcRef1,
+    OMX_U32 iSrcStep,
+    OMX_U32 iRefStep0,
+    OMX_U32 iRefStep1,
+    OMX_U32 *pDstSAD,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_SADQuar_8x   (6.3.5.4.3)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average 
+ * of the other two (pSrcRef0 and pSrcRef1) for 8x16, 8x8, or 8x4 blocks.  
+ * Rounding is applied according to the convention (a+b+1)>>1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the original block; must be aligned on an 8-byte 
+ *            boundary. 
+ *   pSrcRef0 - Pointer to reference block 0 
+ *   pSrcRef1 - Pointer to reference block 1 
+ *   iSrcStep - Step of the original block buffer; must be a multiple of 8. 
+ *   iRefStep0 - Step of reference block 0 
+ *   iRefStep1 - Step of reference block 1 
+ *   iHeight - Height of the block; must be equal either 4, 8, or 16. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD - Pointer of result SAD 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    iHeight is not equal to either 4, 8, or 16. 
+ *    -    One or more of the following pointers is NULL: pSrc, pSrcRef0, 
+ *              pSrcRef1, pDstSAD. 
+ *    -    iSrcStep is not a multiple of 8 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_8x (
+    const OMX_U8 *pSrc,
+    const OMX_U8 *pSrcRef0,
+    const OMX_U8 *pSrcRef1,
+    OMX_U32 iSrcStep,
+    OMX_U32 iRefStep0,
+    OMX_U32 iRefStep1,
+    OMX_U32 *pDstSAD,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_SADQuar_16x   (6.3.5.4.4)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average 
+ * of the other two (pSrcRef0 and pSrcRef1) for 16x16 or 16x8 blocks.  
+ * Rounding is applied according to the convention (a+b+1)>>1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the original block; must be aligned on a 16-byte 
+ *            boundary. 
+ *   pSrcRef0 - Pointer to reference block 0 
+ *   pSrcRef1 - Pointer to reference block 1 
+ *   iSrcStep - Step of the original block buffer; must be a multiple of 16 
+ *   iRefStep0 - Step of reference block 0 
+ *   iRefStep1 - Step of reference block 1 
+ *   iHeight - Height of the block; must be equal to either 8 or 16 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD -Pointer of result SAD 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    iHeight is not equal to either 8 or 16. 
+ *    -    One or more of the following pointers is NULL: pSrc, pSrcRef0, 
+ *              pSrcRef1, pDstSAD. 
+ *    -    iSrcStep is not a multiple of 16 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_16x (
+    const OMX_U8 *pSrc,
+    const OMX_U8 *pSrcRef0,
+    const OMX_U8 *pSrcRef1,
+    OMX_U32 iSrcStep,
+    OMX_U32 iRefStep0,
+    OMX_U32 iRefStep1,
+    OMX_U32 *pDstSAD,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_SATD_4x4   (6.3.5.4.5)
+ *
+ * Description:
+ * This function calculates the sum of absolute transform differences (SATD) 
+ * for a 4x4 block by applying a Hadamard transform to the difference block 
+ * and then calculating the sum of absolute coefficient values. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg - Pointer to the original block; must be aligned on a 4-byte 
+ *            boundary 
+ *   iStepOrg - Step of the original block buffer; must be a multiple of 4 
+ *   pSrcRef - Pointer to the reference block; must be aligned on a 4-byte 
+ *            boundary 
+ *   iStepRef - Step of the reference block buffer; must be a multiple of 4 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD - pointer to the resulting SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcOrg, pSrcRef, or pDstSAD either pSrcOrg 
+ *    -    pSrcRef is not aligned on a 4-byte boundary 
+ *    -    iStepOrg <= 0 or iStepOrg is not a multiple of 4 
+ *    -    iStepRef <= 0 or iStepRef is not a multiple of 4 
+ *
+ */
+OMXResult omxVCM4P10_SATD_4x4 (
+    const OMX_U8 *pSrcOrg,
+    OMX_U32 iStepOrg,
+    const OMX_U8 *pSrcRef,
+    OMX_U32 iStepRef,
+    OMX_U32 *pDstSAD
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InterpolateHalfHor_Luma   (6.3.5.5.1)
+ *
+ * Description:
+ * This function performs interpolation for two horizontal 1/2-pel positions 
+ * (-1/2,0) and (1/2, 0) - around a full-pel position. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the top-left corner of the block used to interpolate in 
+ *            the reconstruction frame plane. 
+ *   iSrcStep - Step of the source buffer. 
+ *   iDstStep - Step of the destination(interpolation) buffer; must be a 
+ *            multiple of iWidth. 
+ *   iWidth - Width of the current block; must be equal to either 4, 8, or 16 
+ *   iHeight - Height of the current block; must be equal to 4, 8, or 16 
+ *
+ * Output Arguments:
+ *   
+ *   pDstLeft -Pointer to the interpolation buffer of the left -pel position 
+ *            (-1/2, 0) 
+ *                 If iWidth==4,  4-byte alignment required. 
+ *                 If iWidth==8,  8-byte alignment required. 
+ *                 If iWidth==16, 16-byte alignment required. 
+ *   pDstRight -Pointer to the interpolation buffer of the right -pel 
+ *            position (1/2, 0) 
+ *                 If iWidth==4,  4-byte alignment required. 
+ *                 If iWidth==8,  8-byte alignment required. 
+ *                 If iWidth==16, 16-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *             pSrc, pDstLeft, or pDstRight 
+ *    -    iWidth or iHeight have values other than 4, 8, or 16 
+ *    -    iWidth==4 but pDstLeft and/or pDstRight is/are not aligned on a 4-byte boundary 
+ *    -    iWidth==8 but pDstLeft and/or pDstRight is/are not aligned on a 8-byte boundary 
+ *    -    iWidth==16 but pDstLeft and/or pDstRight is/are not aligned on a 16-byte boundary 
+ *    -    any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_InterpolateHalfHor_Luma (
+    const OMX_U8 *pSrc,
+    OMX_U32 iSrcStep,
+    OMX_U8 *pDstLeft,
+    OMX_U8 *pDstRight,
+    OMX_U32 iDstStep,
+    OMX_U32 iWidth,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InterpolateHalfVer_Luma   (6.3.5.5.2)
+ *
+ * Description:
+ * This function performs interpolation for two vertical 1/2-pel positions - 
+ * (0, -1/2) and (0, 1/2) - around a full-pel position. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to top-left corner of block used to interpolate in the 
+ *            reconstructed frame plane 
+ *   iSrcStep - Step of the source buffer. 
+ *   iDstStep - Step of the destination (interpolation) buffer; must be a 
+ *            multiple of iWidth. 
+ *   iWidth - Width of the current block; must be equal to either 4, 8, or 16 
+ *   iHeight - Height of the current block; must be equal to either 4, 8, or 16 
+ *
+ * Output Arguments:
+ *   
+ *   pDstUp -Pointer to the interpolation buffer of the -pel position above 
+ *            the current full-pel position (0, -1/2) 
+ *                If iWidth==4, 4-byte alignment required. 
+ *                If iWidth==8, 8-byte alignment required. 
+ *                If iWidth==16, 16-byte alignment required. 
+ *   pDstDown -Pointer to the interpolation buffer of the -pel position below 
+ *            the current full-pel position (0, 1/2) 
+ *                If iWidth==4, 4-byte alignment required. 
+ *                If iWidth==8, 8-byte alignment required. 
+ *                If iWidth==16, 16-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *            pSrc, pDstUp, or pDstDown 
+ *    -    iWidth or iHeight have values other than 4, 8, or 16 
+ *    -    iWidth==4 but pDstUp and/or pDstDown is/are not aligned on a 4-byte boundary 
+ *    -    iWidth==8 but pDstUp and/or pDstDown is/are not aligned on a 8-byte boundary 
+ *    -    iWidth==16 but pDstUp and/or pDstDown is/are not aligned on a 16-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_InterpolateHalfVer_Luma (
+    const OMX_U8 *pSrc,
+    OMX_U32 iSrcStep,
+    OMX_U8 *pDstUp,
+    OMX_U8 *pDstDown,
+    OMX_U32 iDstStep,
+    OMX_U32 iWidth,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_Average_4x   (6.3.5.5.3)
+ *
+ * Description:
+ * This function calculates the average of two 4x4, 4x8 blocks.  The result 
+ * is rounded according to (a+b+1)/2. 
+ *
+ * Input Arguments:
+ *   
+ *   pPred0 - Pointer to the top-left corner of reference block 0 
+ *   pPred1 - Pointer to the top-left corner of reference block 1 
+ *   iPredStep0 - Step of reference block 0; must be a multiple of 4. 
+ *   iPredStep1 - Step of reference block 1; must be a multiple of 4. 
+ *   iDstStep - Step of the destination buffer; must be a multiple of 4. 
+ *   iHeight - Height of the blocks; must be either 4 or 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstPred - Pointer to the destination buffer. 4-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *           pPred0, pPred1, or pDstPred 
+ *    -    pDstPred is not aligned on a 4-byte boundary 
+ *    -    iPredStep0 <= 0 or iPredStep0 is not a multiple of 4 
+ *    -    iPredStep1 <= 0 or iPredStep1 is not a multiple of 4 
+ *    -    iDstStep <= 0 or iDstStep is not a multiple of 4 
+ *    -    iHeight is not equal to either 4 or 8 
+ *
+ */
+OMXResult omxVCM4P10_Average_4x (
+    const OMX_U8 *pPred0,
+    const OMX_U8 *pPred1,
+    OMX_U32 iPredStep0,
+    OMX_U32 iPredStep1,
+    OMX_U8 *pDstPred,
+    OMX_U32 iDstStep,
+    OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_TransformQuant_ChromaDC   (6.3.5.6.1)
+ *
+ * Description:
+ * This function performs 2x2 Hadamard transform of chroma DC coefficients 
+ * and then quantizes the coefficients. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - Pointer to the 2x2 array of chroma DC coefficients.  8-byte 
+ *            alignment required. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *   bIntra - Indicate whether this is an INTRA block. 1-INTRA, 0-INTER 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - Pointer to transformed and quantized coefficients.  8-byte 
+ *            alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *             pSrcDst 
+ *    -    pSrcDst is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_TransformQuant_ChromaDC (
+    OMX_S16 *pSrcDst,
+    OMX_U32 iQP,
+    OMX_U8 bIntra
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_TransformQuant_LumaDC   (6.3.5.6.2)
+ *
+ * Description:
+ * This function performs a 4x4 Hadamard transform of luma DC coefficients 
+ * and then quantizes the coefficients. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - Pointer to the 4x4 array of luma DC coefficients.  16-byte 
+ *            alignment required. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - Pointer to transformed and quantized coefficients.  16-byte 
+ *             alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: pSrcDst 
+ *    -    pSrcDst is not aligned on an 16-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_TransformQuant_LumaDC (
+    OMX_S16 *pSrcDst,
+    OMX_U32 iQP
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InvTransformDequant_LumaDC   (6.3.5.6.3)
+ *
+ * Description:
+ * This function performs inverse 4x4 Hadamard transform and then dequantizes 
+ * the coefficients. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the 4x4 array of the 4x4 Hadamard-transformed and 
+ *            quantized coefficients.  16 byte alignment required. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to inverse-transformed and dequantized coefficients.  
+ *            16-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: pSrc 
+ *    -    pSrc or pDst is not aligned on a 16-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_InvTransformDequant_LumaDC (
+    const OMX_S16 *pSrc,
+    OMX_S16 *pDst,
+    OMX_U32 iQP
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InvTransformDequant_ChromaDC   (6.3.5.6.4)
+ *
+ * Description:
+ * This function performs inverse 2x2 Hadamard transform and then dequantizes 
+ * the coefficients. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the 2x2 array of the 2x2 Hadamard-transformed and 
+ *            quantized coefficients.  8 byte alignment required. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to inverse-transformed and dequantized coefficients.  
+ *            8-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: pSrc 
+ *    -    pSrc or pDst is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_InvTransformDequant_ChromaDC (
+    const OMX_S16 *pSrc,
+    OMX_S16 *pDst,
+    OMX_U32 iQP
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_InvTransformResidualAndAdd   (6.3.5.7.1)
+ *
+ * Description:
+ * This function performs inverse an 4x4 integer transformation to produce 
+ * the difference signal and then adds the difference to the prediction to get 
+ * the reconstructed signal. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcPred - Pointer to prediction signal.  4-byte alignment required. 
+ *   pDequantCoeff - Pointer to the transformed coefficients.  8-byte 
+ *            alignment required. 
+ *   iSrcPredStep - Step of the prediction buffer; must be a multiple of 4. 
+ *   iDstReconStep - Step of the destination reconstruction buffer; must be a 
+ *            multiple of 4. 
+ *   bAC - Indicate whether there is AC coefficients in the coefficients 
+ *            matrix. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstRecon -Pointer to the destination reconstruction buffer.  4-byte 
+ *            alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *            pSrcPred, pDequantCoeff, pDstRecon 
+ *    -    pSrcPred is not aligned on a 4-byte boundary 
+ *    -    iSrcPredStep or iDstReconStep is not a multiple of 4. 
+ *    -    pDequantCoeff is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_InvTransformResidualAndAdd (
+    const OMX_U8 *pSrcPred,
+    const OMX_S16 *pDequantCoeff,
+    OMX_U8 *pDstRecon,
+    OMX_U32 iSrcPredStep,
+    OMX_U32 iDstReconStep,
+    OMX_U8 bAC
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_SubAndTransformQDQResidual   (6.3.5.8.1)
+ *
+ * Description:
+ * This function subtracts the prediction signal from the original signal to 
+ * produce the difference signal and then performs a 4x4 integer transform and 
+ * quantization. The quantized transformed coefficients are stored as 
+ * pDstQuantCoeff. This function can also output dequantized coefficients or 
+ * unquantized DC coefficients optionally by setting the pointers 
+ * pDstDeQuantCoeff, pDCCoeff. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg - Pointer to original signal. 4-byte alignment required. 
+ *   pSrcPred - Pointer to prediction signal. 4-byte alignment required. 
+ *   iSrcOrgStep - Step of the original signal buffer; must be a multiple of 
+ *            4. 
+ *   iSrcPredStep - Step of the prediction signal buffer; must be a multiple 
+ *            of 4. 
+ *   pNumCoeff -Number of non-zero coefficients after quantization. If this 
+ *            parameter is not required, it is set to NULL. 
+ *   nThreshSAD - Zero-block early detection threshold. If this parameter is 
+ *            not required, it is set to 0. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *   bIntra - Indicates whether this is an INTRA block, either 1-INTRA or 
+ *            0-INTER 
+ *
+ * Output Arguments:
+ *   
+ *   pDstQuantCoeff - Pointer to the quantized transformed coefficients.  
+ *            8-byte alignment required. 
+ *   pDstDeQuantCoeff - Pointer to the dequantized transformed coefficients 
+ *            if this parameter is not equal to NULL.  8-byte alignment 
+ *            required. 
+ *   pDCCoeff - Pointer to the unquantized DC coefficient if this parameter 
+ *            is not equal to NULL. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *            pSrcOrg, pSrcPred, pNumCoeff, pDstQuantCoeff, 
+ *            pDstDeQuantCoeff, pDCCoeff 
+ *    -    pSrcOrg is not aligned on a 4-byte boundary 
+ *    -    pSrcPred is not aligned on a 4-byte boundary 
+ *    -    iSrcOrgStep is not a multiple of 4 
+ *    -    iSrcPredStep is not a multiple of 4 
+ *    -    pDstQuantCoeff or pDstDeQuantCoeff is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_SubAndTransformQDQResidual (
+    const OMX_U8 *pSrcOrg,
+    const OMX_U8 *pSrcPred,
+    OMX_U32 iSrcOrgStep,
+    OMX_U32 iSrcPredStep,
+    OMX_S16 *pDstQuantCoeff,
+    OMX_S16 *pDstDeQuantCoeff,
+    OMX_S16 *pDCCoeff,
+    OMX_S8 *pNumCoeff,
+    OMX_U32 nThreshSAD,
+    OMX_U32 iQP,
+    OMX_U8 bIntra
+);
+
+
+
+/**
+ * Function:  omxVCM4P10_GetVLCInfo   (6.3.5.9.1)
+ *
+ * Description:
+ * This function extracts run-length encoding (RLE) information from the 
+ * coefficient matrix.  The results are returned in an OMXVCM4P10VLCInfo 
+ * structure. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcCoeff - pointer to the transform coefficient matrix.  8-byte 
+ *            alignment required. 
+ *   pScanMatrix - pointer to the scan order definition matrix.  For a luma 
+ *            block the scan matrix should follow [ISO14496-10] section 8.5.4, 
+ *            and should contain the values 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 
+ *            10, 7, 11, 14, 15.  For a chroma block, the scan matrix should 
+ *            contain the values 0, 1, 2, 3. 
+ *   bAC - indicates presence of a DC coefficient; 0 = DC coefficient 
+ *            present, 1= DC coefficient absent. 
+ *   MaxNumCoef - specifies the number of coefficients contained in the 
+ *            transform coefficient matrix, pSrcCoeff. The value should be 16 
+ *            for blocks of type LUMADC, LUMAAC, LUMALEVEL, and CHROMAAC. The 
+ *            value should be 4 for blocks of type CHROMADC. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstVLCInfo - pointer to structure that stores information for 
+ *            run-length coding. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *            pSrcCoeff, pScanMatrix, pDstVLCInfo 
+ *    -    pSrcCoeff is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_GetVLCInfo (
+    const OMX_S16 *pSrcCoeff,
+    const OMX_U8 *pScanMatrix,
+    OMX_U8 bAC,
+    OMX_U32 MaxNumCoef,
+    OMXVCM4P10VLCInfo*pDstVLCInfo
+);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /** end of #define _OMXVC_H_ */
+
+/** EOF */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_Average.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_Average.c
new file mode 100644
index 0000000..1e51077
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_Average.c
@@ -0,0 +1,78 @@
+/**
+ * 
+ * File Name:  armVCCOMM_Average.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate Average of two blocks if size iWidth X iHeight
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: armVCCOMM_Average
+ *
+ * Description:
+ * This function calculates the average of two blocks and stores the result.
+ *
+ * Remarks:
+ *
+ *	[in]	pPred0			Pointer to the top-left corner of reference block 0
+ *	[in]	pPred1			Pointer to the top-left corner of reference block 1
+ *	[in]	iPredStep0	    Step of reference block 0
+ *	[in]	iPredStep1	    Step of reference block 1
+ *	[in]	iDstStep 		Step of the destination buffer
+ *	[in]	iWidth			Width of the blocks
+ *	[in]	iHeight			Height of the blocks
+ *	[out]	pDstPred		Pointer to the destination buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+ OMXResult armVCCOMM_Average (
+	 const OMX_U8* 	    pPred0,
+	 const OMX_U8* 	    pPred1,	
+	 OMX_U32		iPredStep0,
+	 OMX_U32		iPredStep1,
+	 OMX_U8*		pDstPred,
+	 OMX_U32		iDstStep, 
+	 OMX_U32		iWidth,
+	 OMX_U32		iHeight
+)
+{
+    OMX_U32     x, y;
+
+    /* check for argument error */
+    armRetArgErrIf(pPred0 == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pPred1 == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstPred == NULL, OMX_Sts_BadArgErr)
+
+    for (y = 0; y < iHeight; y++)
+    {
+        for (x = 0; x < iWidth; x++)
+        {
+            pDstPred [y * iDstStep + x] = 
+                (OMX_U8)(((OMX_U32)pPred0 [y * iPredStep0 + x] + 
+                                  pPred1 [y * iPredStep1 + x] + 1) >> 1);
+        }
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_SAD.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_SAD.c
new file mode 100644
index 0000000..d41ac9a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_SAD.c
@@ -0,0 +1,75 @@
+/**
+ * 
+ * File Name:  armVCCOMM_SAD.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate SAD for NxM blocks
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function: armVCCOMM_SAD
+ *
+ * Description:
+ * This function calculate the SAD for NxM blocks.
+ *
+ * Remarks:
+ *
+ * [in]		pSrcOrg		Pointer to the original block
+ * [in]		iStepOrg	Step of the original block buffer
+ * [in]		pSrcRef		Pointer to the reference block
+ * [in]		iStepRef	Step of the reference block buffer
+ * [in]		iHeight		Height of the block
+ * [in]		iWidth		Width of the block
+ * [out]	pDstSAD		Pointer of result SAD
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+OMXResult armVCCOMM_SAD(	
+	const OMX_U8* 	pSrcOrg,
+	OMX_U32 	iStepOrg,
+	const OMX_U8* 	pSrcRef,
+	OMX_U32 	iStepRef,
+	OMX_S32*	pDstSAD,
+	OMX_U32		iHeight,
+	OMX_U32		iWidth
+)
+{
+    OMX_INT     x, y;
+    
+    /* check for argument error */
+    armRetArgErrIf(pSrcOrg == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr)
+    
+    *pDstSAD = 0;
+    for (y = 0; y < iHeight; y++)
+    {
+        for (x = 0; x < iWidth; x++)
+        {
+            *pDstSAD += armAbs(pSrcOrg [(y * iStepOrg) + x] - 
+                       pSrcRef [(y * iStepRef) + x]);
+        }
+    }
+    
+    return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_16x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_16x.c
new file mode 100644
index 0000000..6d1447e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_16x.c
@@ -0,0 +1,86 @@
+/**
+ * 
+ * File Name:  omxVCCOMM_Average_16x.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate Average of two 16x16 or 16x8 blocks
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCCOMM_Average_16x   (6.1.3.1.2)
+ *
+ * Description:
+ * This function calculates the average of two 16x16 or 16x8 blocks.  The 
+ * result is rounded according to (a+b+1)/2.  The block average function can 
+ * be used in conjunction with half-pixel interpolation to obtain quarter 
+ * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1. 
+ *
+ * Input Arguments:
+ *   
+ *   pPred0 - Pointer to the top-left corner of reference block 0 
+ *   pPred1 - Pointer to the top-left corner of reference block 1 
+ *   iPredStep0 - Step of reference block 0 
+ *   iPredStep1 - Step of reference block 1 
+ *   iDstStep - Step of the destination buffer 
+ *   iHeight - Height of the blocks 
+ *
+ * Output Arguments:
+ *   
+ *   pDstPred - Pointer to the destination buffer. 16-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -   one or more of the following pointers is NULL: pPred0, pPred1, or 
+ *              pDstPred. 
+ *    -   pDstPred is not aligned on a 16-byte boundary. 
+ *    -   iPredStep0 <= 0 or iPredStep0 is not a multiple of 16. 
+ *    -   iPredStep1 <= 0 or iPredStep1 is not a multiple of 16. 
+ *    -   iDstStep <= 0 or iDstStep is not a multiple of 16. 
+ *    -   iHeight is not 8 or 16. 
+ *
+ */
+ OMXResult omxVCCOMM_Average_16x (
+	 const OMX_U8* 	    pPred0,
+	 const OMX_U8* 	    pPred1,	
+	 OMX_U32		iPredStep0,
+	 OMX_U32		iPredStep1,
+	 OMX_U8*		pDstPred,
+	 OMX_U32		iDstStep, 
+	 OMX_U32		iHeight
+)
+{
+    /* check for argument error */
+    armRetArgErrIf(pPred0 == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pPred1 == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstPred == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf((iHeight != 8) && (iHeight != 16), OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot16ByteAligned(pDstPred), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iPredStep0 == 0) || (iPredStep0 & 15), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iPredStep1 == 0) || (iPredStep1 & 15), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iDstStep == 0) || (iDstStep & 15), OMX_Sts_BadArgErr)
+
+    return armVCCOMM_Average 
+        (pPred0, pPred1, iPredStep0, iPredStep1, pDstPred, iDstStep, 16, iHeight);
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_8x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_8x.c
new file mode 100644
index 0000000..17b1326
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_8x.c
@@ -0,0 +1,87 @@
+/**
+ * 
+ * File Name:  omxVCCOMM_Average_8x.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate Average of two 8x4 or 8x8 or 8x16 blocks
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCCOMM_Average_8x   (6.1.3.1.1)
+ *
+ * Description:
+ * This function calculates the average of two 8x4, 8x8, or 8x16 blocks.  The 
+ * result is rounded according to (a+b+1)/2.  The block average function can 
+ * be used in conjunction with half-pixel interpolation to obtain quarter 
+ * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1. 
+ *
+ * Input Arguments:
+ *   
+ *   pPred0     - Pointer to the top-left corner of reference block 0 
+ *   pPred1     - Pointer to the top-left corner of reference block 1 
+ *   iPredStep0 - Step of reference block 0 
+ *   iPredStep1 - Step of reference block 1 
+ *   iDstStep   - Step of the destination buffer. 
+ *   iHeight    - Height of the blocks 
+ *
+ * Output Arguments:
+ *   
+ *   pDstPred - Pointer to the destination buffer. 8-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -   one or more of the following pointers is NULL: pPred0, pPred1, or 
+ *              pDstPred. 
+ *    -   pDstPred is not aligned on an 8-byte boundary. 
+ *    -   iPredStep0 <= 0 or iPredStep0 is not a multiple of 8. 
+ *    -   iPredStep1 <= 0 or iPredStep1 is not a multiple of 8. 
+ *    -   iDstStep   <= 0 or iDstStep is not a multiple of 8. 
+ *    -   iHeight is not 4, 8, or 16. 
+ *
+ */
+ OMXResult omxVCCOMM_Average_8x (	
+	 const OMX_U8* 	    pPred0,
+	 const OMX_U8* 	    pPred1,	
+     OMX_U32		iPredStep0,
+     OMX_U32		iPredStep1,
+	 OMX_U8*		pDstPred,
+     OMX_U32		iDstStep, 
+	 OMX_U32		iHeight
+)
+{
+    /* check for argument error */
+    armRetArgErrIf(pPred0 == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pPred1 == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstPred == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf((iPredStep0 == 0) || (iPredStep0 & 7), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iPredStep1 == 0) || (iPredStep1 & 7), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iDstStep == 0) || (iDstStep & 7), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iHeight != 4) && (iHeight != 8) && (iHeight != 16), OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot8ByteAligned(pDstPred), OMX_Sts_BadArgErr)
+
+    return armVCCOMM_Average 
+        (pPred0, pPred1, iPredStep0, iPredStep1, pDstPred, iDstStep, 8, iHeight);
+}
+
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock.c
new file mode 100644
index 0000000..e559adf
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock.c
@@ -0,0 +1,88 @@
+/**
+ * 
+ * File Name:  omxVCCOMM_ComputeTextureErrorBlock.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains module computing the error for a MB of size 8x8
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCCOMM_ComputeTextureErrorBlock   (6.1.4.1.2)
+ *
+ * Description:
+ * Computes the texture error of the block. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the source plane. This should be aligned on an 8-byte 
+ *            boundary. 
+ *   srcStep - step of the source plane 
+ *   pSrcRef - pointer to the reference buffer, an 8x8 block. This should be 
+ *            aligned on an 8-byte boundary. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination buffer, an 8x8 block. This should be 
+ *            aligned on an 8-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    At least one of the following pointers is NULL: 
+ *         pSrc, pSrcRef, pDst. 
+ *    -    pSrc is not 8-byte aligned. 
+ *    -    SrcStep <= 0 or srcStep is not a multiple of 8. 
+ *    -    pSrcRef is not 8-byte aligned. 
+ *    -    pDst is not 8-byte aligned 
+ *
+ */
+
+OMXResult omxVCCOMM_ComputeTextureErrorBlock(
+     const OMX_U8 *pSrc,
+     OMX_INT srcStep,
+     const OMX_U8 *pSrcRef,
+     OMX_S16 * pDst
+)
+{
+
+    OMX_INT     x, y, count;
+
+    /* Argument error checks */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs8ByteAligned(pSrc), OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs8ByteAligned(pSrcRef), OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs8ByteAligned(pDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf((srcStep <= 0) || (srcStep & 7), OMX_Sts_BadArgErr);
+
+    /* Calculate the error block */
+    for (y = 0, count = 0;
+         y < 8;
+         y++, pSrc += srcStep)
+    {
+        for (x = 0; x < 8; x++, count++)
+        {
+            pDst[count] = pSrc[x] - pSrcRef[count];
+        }
+    }
+
+    return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock_SAD.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock_SAD.c
new file mode 100644
index 0000000..c4731aad
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock_SAD.c
@@ -0,0 +1,93 @@
+/**
+ * 
+ * File Name:  omxVCCOMM_ComputeTextureErrorBlock_SAD.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains module computing the error for a MB of size 8x8
+ * 
+ */
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+
+/**
+ * Function:  omxVCCOMM_ComputeTextureErrorBlock_SAD   (6.1.4.1.1)
+ *
+ * Description:
+ * Computes texture error of the block; also returns SAD. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the source plane; must be aligned on an 8-byte 
+ *            boundary. 
+ *   srcStep - step of the source plane 
+ *   pSrcRef - pointer to the reference buffer, an 8x8 block; must be aligned 
+ *            on an 8-byte boundary. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination buffer, an 8x8 block; must be aligned 
+ *            on an 8-byte boundary. 
+ *   pDstSAD - pointer to the Sum of Absolute Differences (SAD) value 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments 
+ *    -    At least one of the following 
+ *         pointers is NULL: pSrc, pSrcRef, pDst and pDstSAD. 
+ *    -    pSrc is not 8-byte aligned. 
+ *    -    SrcStep <= 0 or srcStep is not a multiple of 8. 
+ *    -    pSrcRef is not 8-byte aligned. 
+ *    -    pDst is not 8-byte aligned. 
+ *
+ */
+
+OMXResult omxVCCOMM_ComputeTextureErrorBlock_SAD(
+     const OMX_U8 *pSrc,
+     OMX_INT srcStep,
+     const OMX_U8 *pSrcRef,
+     OMX_S16 * pDst,
+     OMX_INT *pDstSAD
+)
+{
+
+    OMX_INT     x, y, count;
+
+    /* Argument error checks */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs8ByteAligned(pSrc), OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs8ByteAligned(pSrcRef), OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs8ByteAligned(pDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf((srcStep <= 0) || (srcStep & 7), OMX_Sts_BadArgErr);
+
+    /* Calculate the error block */
+    for (y = 0, count = 0, *pDstSAD = 0;
+         y < 8;
+         y++, pSrc += srcStep)
+    {
+        for (x = 0; x < 8; x++, count++)
+        {
+            pDst[count] = pSrc[x] - pSrcRef[count];
+            *pDstSAD += armAbs(pDst[count]);
+        }
+    }
+
+    return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy16x16.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy16x16.c
new file mode 100644
index 0000000..4857024
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy16x16.c
@@ -0,0 +1,79 @@
+/**
+ * 
+ * File Name:  omxVCCOMM_Copy16x16.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * MPEG4 16x16 Copy module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCCOMM_Copy16x16   (6.1.3.3.2)
+ *
+ * Description:
+ * Copies the reference 16x16 macroblock to the current macroblock. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the reference macroblock in the source frame; must be 
+ *            aligned on a 16-byte boundary. 
+ *   step - distance between the starts of consecutive lines in the reference 
+ *            frame, in bytes; must be a multiple of 16 and must be larger 
+ *            than or equal to 16. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination macroblock; must be aligned on a 
+ *            16-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -   one or more of the following pointers is NULL: pSrc, pDst 
+ *    -   one or more of the following pointers is not aligned on a 16-byte 
+ *              boundary: pSrc, pDst 
+ *    -    step <16 or step is not a multiple of 16. 
+ *
+ */
+
+OMXResult omxVCCOMM_Copy16x16(
+		const OMX_U8 *pSrc, 
+		OMX_U8 *pDst, 
+		OMX_INT step)
+ {
+    /* Definitions and Initializations*/
+
+    OMX_INT count,index, x, y;
+    
+    /* Argument error checks */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs16ByteAligned(pSrc), OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(((step < 16) || (step % 16)), OMX_Sts_BadArgErr);
+    
+    
+    /* Copying the ref 16x16 blk to the curr blk */
+    for (y = 0, count = 0, index = 0; y < 16; y++, count = count + step - 16)
+    {
+        for (x = 0; x < 16; x++, count++, index++)
+        {
+            pDst[index] = pSrc[count];
+        }       
+    }
+    return OMX_Sts_NoErr;
+ }
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy8x8.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy8x8.c
new file mode 100644
index 0000000..a4f9dde
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy8x8.c
@@ -0,0 +1,79 @@
+/**
+ * 
+ * File Name:  omxVCCOMM_Copy8x8.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * MPEG4 8x8 Copy module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCCOMM_Copy8x8   (6.1.3.3.1)
+ *
+ * Description:
+ * Copies the reference 8x8 block to the current block. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the reference block in the source frame; must be 
+ *            aligned on an 8-byte boundary. 
+ *   step - distance between the starts of consecutive lines in the reference 
+ *            frame, in bytes; must be a multiple of 8 and must be larger than 
+ *            or equal to 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination block; must be aligned on an 8-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -   one or more of the following pointers is NULL: pSrc, pDst 
+ *    -   one or more of the following pointers is not aligned on an 8-byte 
+ *              boundary: pSrc, pDst 
+ *    -    step <8 or step is not a multiple of 8. 
+ *
+ */
+
+OMXResult omxVCCOMM_Copy8x8(
+		const OMX_U8 *pSrc, 
+		OMX_U8 *pDst, 
+		OMX_INT step)
+ {
+    /* Definitions and Initializations*/
+
+    OMX_INT count,index, x, y;
+    
+    /* Argument error checks */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs8ByteAligned(pSrc), OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs8ByteAligned(pDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(((step < 8) || (step % 8)), OMX_Sts_BadArgErr);
+    
+    
+    /* Copying the ref 8x8 blk to the curr blk */
+    for (y = 0, count = 0, index = 0; y < 8; y++, count = count + step - 8)
+    {
+        for (x = 0; x < 8; x++, count++, index++)
+        {
+            pDst[index] = pSrc[count];
+        }       
+    }
+    return OMX_Sts_NoErr;
+ }
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ExpandFrame_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ExpandFrame_I.c
new file mode 100644
index 0000000..9536df7
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ExpandFrame_I.c
@@ -0,0 +1,126 @@
+/**
+ * 
+ * File Name:  omxVCCOMM_ExpandFrame_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will Expand Frame boundary pixels into Plane
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCCOMM_ExpandFrame_I   (6.1.3.2.1)
+ *
+ * Description:
+ * This function expands a reconstructed frame in-place.  The unexpanded 
+ * source frame should be stored in a plane buffer with sufficient space 
+ * pre-allocated for edge expansion, and the input frame should be located in 
+ * the plane buffer center.  This function executes the pixel expansion by 
+ * replicating source frame edge pixel intensities in the empty pixel 
+ * locations (expansion region) between the source frame edge and the plane 
+ * buffer edge.  The width/height of the expansion regions on the 
+ * horizontal/vertical edges is controlled by the parameter iExpandPels. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDstPlane - pointer to the top-left corner of the frame to be 
+ *            expanded; must be aligned on an 8-byte boundary. 
+ *   iFrameWidth - frame width; must be a multiple of 8. 
+ *   iFrameHeight -frame height; must be a multiple of 8. 
+ *   iExpandPels - number of pixels to be expanded in the horizontal and 
+ *            vertical directions; must be a multiple of 8. 
+ *   iPlaneStep - distance, in bytes, between the start of consecutive lines 
+ *            in the plane buffer; must be larger than or equal to 
+ *            (iFrameWidth + 2 * iExpandPels). 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstPlane -Pointer to the top-left corner of the frame (NOT the 
+ *            top-left corner of the plane); must be aligned on an 8-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -    pSrcDstPlane is NULL. 
+ *    -    pSrcDstPlane is not aligned on an 8-byte boundary. 
+ *    -    one of the following parameters is either equal to zero or is a 
+ *              non-multiple of 8: iFrameHeight, iFrameWidth, iPlaneStep, or 
+ *              iExpandPels. 
+ *    -    iPlaneStep < (iFrameWidth + 2 * iExpandPels). 
+ *
+ */
+OMXResult omxVCCOMM_ExpandFrame_I(
+	OMX_U8*	pSrcDstPlane, 
+	OMX_U32	iFrameWidth, 
+	OMX_U32	iFrameHeight, 
+	OMX_U32	iExpandPels, 
+	OMX_U32	iPlaneStep
+)
+{
+    OMX_INT     x, y;
+    OMX_U8*     pLeft;
+    OMX_U8*     pRight;
+    OMX_U8*     pTop;
+    OMX_U8*     pBottom;
+
+    /* check for argument error */
+    armRetArgErrIf(pSrcDstPlane == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot8ByteAligned(pSrcDstPlane), OMX_Sts_BadArgErr)
+    armRetArgErrIf(iFrameWidth == 0 || iFrameWidth & 7, OMX_Sts_BadArgErr)
+    armRetArgErrIf(iFrameHeight== 0 || iFrameHeight & 7, OMX_Sts_BadArgErr)
+    armRetArgErrIf(iExpandPels == 0 || iExpandPels & 7, OMX_Sts_BadArgErr)
+    armRetArgErrIf(iPlaneStep == 0 || iPlaneStep & 7, OMX_Sts_BadArgErr)
+    armRetArgErrIf(iPlaneStep < (iFrameWidth + 2 * iExpandPels), 
+                   OMX_Sts_BadArgErr)
+
+    /* Top and Bottom */
+    pTop = pSrcDstPlane - (iExpandPels * iPlaneStep);
+    pBottom = pSrcDstPlane + (iFrameHeight * iPlaneStep);
+
+    for (y = 0; y < (OMX_INT)iExpandPels; y++)
+    {
+        for (x = 0; x < (OMX_INT)iFrameWidth; x++)
+        {
+            pTop [y * iPlaneStep + x] = 
+                pSrcDstPlane [x];
+            pBottom [y * iPlaneStep + x] = 
+                pSrcDstPlane [(iFrameHeight - 1) * iPlaneStep + x];
+        }
+    }
+
+    /* Left, Right and Corners */
+    pLeft = pSrcDstPlane - iExpandPels;
+    pRight = pSrcDstPlane + iFrameWidth;
+
+    for (y = -(OMX_INT)iExpandPels; y < (OMX_INT)(iFrameHeight + iExpandPels); y++)
+    {
+        for (x = 0; x < (OMX_INT)iExpandPels; x++)
+        {
+            pLeft [y * iPlaneStep + x] = 
+                pSrcDstPlane [y * iPlaneStep + 0];
+            pRight [y * iPlaneStep + x] = 
+                pSrcDstPlane [y * iPlaneStep + (iFrameWidth - 1)];
+        }
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_LimitMVToRect.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_LimitMVToRect.c
new file mode 100644
index 0000000..af04582
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_LimitMVToRect.c
@@ -0,0 +1,81 @@
+/**
+ * 
+ * File Name:  omxVCCOMM_LimitMVToRect.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains module for limiting the MV
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCCOMM_LimitMVToRect   (6.1.4.1.3)
+ *
+ * Description:
+ * Limits the motion vector associated with the current block/macroblock to 
+ * prevent the motion compensated block/macroblock from moving outside a 
+ * bounding rectangle as shown in Figure 6-1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcMV - pointer to the motion vector associated with the current block 
+ *            or macroblock 
+ *   pRectVOPRef - pointer to the bounding rectangle 
+ *   Xcoord, Ycoord  - coordinates of the current block or macroblock 
+ *   size - size of the current block or macroblock; must be equal to 8 or 
+ *            16. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstMV - pointer to the limited motion vector 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one or more of the 
+ *              following conditions is true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcMV, pDstMV, or pRectVOPRef. 
+ *    -    size is not equal to either 8 or 16. 
+ *    -    the width or height of the bounding rectangle is less than 
+ *         twice the block size.
+ */
+OMXResult omxVCCOMM_LimitMVToRect(
+     const OMXVCMotionVector * pSrcMV,
+     OMXVCMotionVector *pDstMV,
+     const OMXRect * pRectVOPRef,
+     OMX_INT Xcoord,
+     OMX_INT Ycoord,
+     OMX_INT size
+)
+{
+    /* Argument error checks */
+    armRetArgErrIf(pSrcMV == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDstMV == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pRectVOPRef == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf((size != 8) && (size != 16), OMX_Sts_BadArgErr);
+    armRetArgErrIf((pRectVOPRef->width < (2* size)), OMX_Sts_BadArgErr);
+    armRetArgErrIf((pRectVOPRef->height < (2* size)), OMX_Sts_BadArgErr);
+    
+    pDstMV->dx = armMin (armMax (pSrcMV->dx, 2*pRectVOPRef->x - Xcoord),
+                    (2*pRectVOPRef->x + pRectVOPRef->width - Xcoord - size));
+    pDstMV->dy = armMin (armMax (pSrcMV->dy, 2*pRectVOPRef->y - Ycoord),
+                    (2*pRectVOPRef->y + pRectVOPRef->height - Ycoord - size));
+
+
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_16x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_16x.c
new file mode 100644
index 0000000..0f0cedb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_16x.c
@@ -0,0 +1,80 @@
+/**
+ * 
+ * File Name:  omxVCCOMM_SAD_16x.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate SAD for 16x16 and 16x8 blocks
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCCOMM_SAD_16x   (6.1.4.1.4)
+ *
+ * Description:
+ * This function calculates the SAD for 16x16 and 16x8 blocks. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg - Pointer to the original block; must be aligned on a 16-byte 
+ *             boundary. 
+ *   iStepOrg - Step of the original block buffer 
+ *   pSrcRef  - Pointer to the reference block 
+ *   iStepRef - Step of the reference block buffer 
+ *   iHeight  - Height of the block 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD - Pointer of result SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one or more of the 
+ *              following conditions is true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcOrg, pDstSAD, or pSrcRef 
+ *    -    pSrcOrg is not 16-byte aligned. 
+ *    -    iStepOrg  <= 0 or iStepOrg is not a multiple of 16 
+ *    -    iStepRef <= 0 or iStepRef is not a multiple of 16 
+ *    -    iHeight is not 8 or 16 
+ *
+ */
+OMXResult omxVCCOMM_SAD_16x(
+			const OMX_U8* 	pSrcOrg,
+			OMX_U32 	iStepOrg,
+			const OMX_U8* 	pSrcRef,
+			OMX_U32 	iStepRef,
+			OMX_S32*	pDstSAD,
+			OMX_U32		iHeight
+)
+{
+    /* check for argument error */
+    armRetArgErrIf(pSrcOrg == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf((iHeight != 16) && (iHeight != 8), OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot16ByteAligned(pSrcOrg), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iStepOrg == 0) || (iStepOrg & 15), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iStepRef == 0) || (iStepRef & 15), OMX_Sts_BadArgErr)
+
+    return armVCCOMM_SAD 
+        (pSrcOrg, iStepOrg, pSrcRef, iStepRef, pDstSAD, iHeight, 16);
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_8x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_8x.c
new file mode 100644
index 0000000..1421d99
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_8x.c
@@ -0,0 +1,80 @@
+/**
+ * 
+ * File Name:  omxVCCOMM_SAD_8x.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate SAD for 8x16, 8x8, 8x4 blocks
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCCOMM_SAD_8x   (6.1.4.1.5)
+ *
+ * Description:
+ * This function calculates the SAD for 8x16, 8x8, 8x4 blocks. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg  - Pointer to the original block; must be aligned on a 8-byte 
+ *              boundary. 
+ *   iStepOrg - Step of the original block buffer 
+ *   pSrcRef  - Pointer to the reference block 
+ *   iStepRef - Step of the reference block buffer 
+ *   iHeight  - Height of the block 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD -Pointer of result SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one or more of the 
+ *              following conditions is true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcOrg, pDstSAD, or pSrcRef 
+ *    -    pSrcOrg is not 8-byte aligned. 
+ *    -    iStepOrg  <= 0 or iStepOrg is not a multiple of 8 
+ *    -    iStepRef <= 0 or iStepRef is not a multiple of 8 
+ *    -    iHeight is not 4, 8 or 16 
+ *
+ */
+OMXResult omxVCCOMM_SAD_8x(	
+	const OMX_U8* 	pSrcOrg,
+	OMX_U32 	iStepOrg,
+	const OMX_U8* 	pSrcRef,
+	OMX_U32 	iStepRef,
+	OMX_S32*	pDstSAD,
+	OMX_U32		iHeight
+)
+{
+    /* check for argument error */
+    armRetArgErrIf(pSrcOrg == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf((iHeight != 16) && (iHeight != 8) && (iHeight != 4), OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot8ByteAligned(pSrcOrg), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iStepOrg == 0) || (iStepOrg & 7), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iStepRef == 0) || (iStepRef & 7), OMX_Sts_BadArgErr)
+
+    return armVCCOMM_SAD 
+        (pSrcOrg, iStepOrg, pSrcRef, iStepRef, pDstSAD, iHeight, 8);
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/api/armVCM4P10_CAVLCTables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/api/armVCM4P10_CAVLCTables.h
new file mode 100644
index 0000000..8d18a8f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/api/armVCM4P10_CAVLCTables.h
@@ -0,0 +1,34 @@
+/* ----------------------------------------------------------------
+ * 
+ * 
+ * File Name:  armVCM4P10_CAVLCTables.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * ----------------------------------------------------------------
+ * File:     armVCM4P10_CAVLCTables.h
+ * ----------------------------------------------------------------
+ * 
+ * Header file for ARM implementation of OpenMAX VCM4P10
+ * 
+ */
+ 
+#ifndef ARMVCM4P10_CAVLCTABLES_H
+#define ARMVCM4P10_CAVLCTABLES_H
+  
+/* CAVLC tables */
+
+extern const OMX_U8 armVCM4P10_CAVLCTrailingOnes[62];
+extern const OMX_U8 armVCM4P10_CAVLCTotalCoeff[62];
+extern const ARM_VLC32 *armVCM4P10_CAVLCCoeffTokenTables[5];
+extern const ARM_VLC32 armVCM4P10_CAVLCLevelPrefix[17];
+extern const ARM_VLC32 *armVCM4P10_CAVLCTotalZeroTables[15];
+extern const ARM_VLC32 *armVCM4P10_CAVLCTotalZeros2x2Tables[3];
+extern const ARM_VLC32 *armVCM4P10_CAVLCRunBeforeTables[7];
+
+#endif
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CAVLCTables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CAVLCTables.c
new file mode 100644
index 0000000..f4e36ad
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CAVLCTables.c
@@ -0,0 +1,703 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  armVCM4P10_CAVLCTables.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * CAVLC tables for H.264
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM_Bitstream.h"
+#include "armVC.h"
+#include "armVCM4P10_CAVLCTables.h"
+
+/* Tables mapping a code to TrailingOnes and TotalCoeff */
+
+const OMX_U8 armVCM4P10_CAVLCTrailingOnes[62] = {
+ 0,
+ 0, 1,
+ 0, 1, 2,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3
+};
+
+const OMX_U8 armVCM4P10_CAVLCTotalCoeff[62] = {
+ 0,
+ 1, 1,
+ 2, 2, 2,
+ 3, 3, 3, 3,
+ 4, 4, 4, 4,
+ 5, 5, 5, 5,
+ 6, 6, 6, 6,
+ 7, 7, 7, 7,
+ 8, 8, 8, 8,
+ 9, 9, 9, 9,
+ 10, 10, 10, 10,
+ 11, 11, 11, 11,
+ 12, 12, 12, 12,
+ 13, 13, 13, 13,
+ 14, 14, 14, 14,
+ 15, 15, 15, 15,
+ 16, 16, 16, 16
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCCoeffToken0[63] = {
+    {  1, 0x0001 },
+    {  6, 0x0005 },
+    {  2, 0x0001 },
+    {  8, 0x0007 },
+    {  6, 0x0004 },
+    {  3, 0x0001 },
+    {  9, 0x0007 },
+    {  8, 0x0006 },
+    {  7, 0x0005 },
+    {  5, 0x0003 },
+    { 10, 0x0007 },
+    {  9, 0x0006 },
+    {  8, 0x0005 },
+    {  6, 0x0003 },
+    { 11, 0x0007 },
+    { 10, 0x0006 },
+    {  9, 0x0005 },
+    {  7, 0x0004 },
+    { 13, 0x000f },
+    { 11, 0x0006 },
+    { 10, 0x0005 },
+    {  8, 0x0004 },
+    { 13, 0x000b },
+    { 13, 0x000e },
+    { 11, 0x0005 },
+    {  9, 0x0004 },
+    { 13, 0x0008 },
+    { 13, 0x000a },
+    { 13, 0x000d },
+    { 10, 0x0004 },
+    { 14, 0x000f },
+    { 14, 0x000e },
+    { 13, 0x0009 },
+    { 11, 0x0004 },
+    { 14, 0x000b },
+    { 14, 0x000a },
+    { 14, 0x000d },
+    { 13, 0x000c },
+    { 15, 0x000f },
+    { 15, 0x000e },
+    { 14, 0x0009 },
+    { 14, 0x000c },
+    { 15, 0x000b },
+    { 15, 0x000a },
+    { 15, 0x000d },
+    { 14, 0x0008 },
+    { 16, 0x000f },
+    { 15, 0x0001 },
+    { 15, 0x0009 },
+    { 15, 0x000c },
+    { 16, 0x000b },
+    { 16, 0x000e },
+    { 16, 0x000d },
+    { 15, 0x0008 },
+    { 16, 0x0007 },
+    { 16, 0x000a },
+    { 16, 0x0009 },
+    { 16, 0x000c },
+    { 16, 0x0004 },
+    { 16, 0x0006 },
+    { 16, 0x0005 },
+    { 16, 0x0008 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCCoeffToken1[63] = {
+    {  2, 0x0003 },
+    {  6, 0x000b },
+    {  2, 0x0002 },
+    {  6, 0x0007 },
+    {  5, 0x0007 },
+    {  3, 0x0003 },
+    {  7, 0x0007 },
+    {  6, 0x000a },
+    {  6, 0x0009 },
+    {  4, 0x0005 },
+    {  8, 0x0007 },
+    {  6, 0x0006 },
+    {  6, 0x0005 },
+    {  4, 0x0004 },
+    {  8, 0x0004 },
+    {  7, 0x0006 },
+    {  7, 0x0005 },
+    {  5, 0x0006 },
+    {  9, 0x0007 },
+    {  8, 0x0006 },
+    {  8, 0x0005 },
+    {  6, 0x0008 },
+    { 11, 0x000f },
+    {  9, 0x0006 },
+    {  9, 0x0005 },
+    {  6, 0x0004 },
+    { 11, 0x000b },
+    { 11, 0x000e },
+    { 11, 0x000d },
+    {  7, 0x0004 },
+    { 12, 0x000f },
+    { 11, 0x000a },
+    { 11, 0x0009 },
+    {  9, 0x0004 },
+    { 12, 0x000b },
+    { 12, 0x000e },
+    { 12, 0x000d },
+    { 11, 0x000c },
+    { 12, 0x0008 },
+    { 12, 0x000a },
+    { 12, 0x0009 },
+    { 11, 0x0008 },
+    { 13, 0x000f },
+    { 13, 0x000e },
+    { 13, 0x000d },
+    { 12, 0x000c },
+    { 13, 0x000b },
+    { 13, 0x000a },
+    { 13, 0x0009 },
+    { 13, 0x000c },
+    { 13, 0x0007 },
+    { 14, 0x000b },
+    { 13, 0x0006 },
+    { 13, 0x0008 },
+    { 14, 0x0009 },
+    { 14, 0x0008 },
+    { 14, 0x000a },
+    { 13, 0x0001 },
+    { 14, 0x0007 },
+    { 14, 0x0006 },
+    { 14, 0x0005 },
+    { 14, 0x0004 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCCoeffToken2[63] = {
+    {  4, 0x000f },
+    {  6, 0x000f },
+    {  4, 0x000e },
+    {  6, 0x000b },
+    {  5, 0x000f },
+    {  4, 0x000d },
+    {  6, 0x0008 },
+    {  5, 0x000c },
+    {  5, 0x000e },
+    {  4, 0x000c },
+    {  7, 0x000f },
+    {  5, 0x000a },
+    {  5, 0x000b },
+    {  4, 0x000b },
+    {  7, 0x000b },
+    {  5, 0x0008 },
+    {  5, 0x0009 },
+    {  4, 0x000a },
+    {  7, 0x0009 },
+    {  6, 0x000e },
+    {  6, 0x000d },
+    {  4, 0x0009 },
+    {  7, 0x0008 },
+    {  6, 0x000a },
+    {  6, 0x0009 },
+    {  4, 0x0008 },
+    {  8, 0x000f },
+    {  7, 0x000e },
+    {  7, 0x000d },
+    {  5, 0x000d },
+    {  8, 0x000b },
+    {  8, 0x000e },
+    {  7, 0x000a },
+    {  6, 0x000c },
+    {  9, 0x000f },
+    {  8, 0x000a },
+    {  8, 0x000d },
+    {  7, 0x000c },
+    {  9, 0x000b },
+    {  9, 0x000e },
+    {  8, 0x0009 },
+    {  8, 0x000c },
+    {  9, 0x0008 },
+    {  9, 0x000a },
+    {  9, 0x000d },
+    {  8, 0x0008 },
+    { 10, 0x000d },
+    {  9, 0x0007 },
+    {  9, 0x0009 },
+    {  9, 0x000c },
+    { 10, 0x0009 },
+    { 10, 0x000c },
+    { 10, 0x000b },
+    { 10, 0x000a },
+    { 10, 0x0005 },
+    { 10, 0x0008 },
+    { 10, 0x0007 },
+    { 10, 0x0006 },
+    { 10, 0x0001 },
+    { 10, 0x0004 },
+    { 10, 0x0003 },
+    { 10, 0x0002 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCCoeffToken3[63] = {
+    {  6, 0x0003 },
+    {  6, 0x0000 },
+    {  6, 0x0001 },
+    {  6, 0x0004 },
+    {  6, 0x0005 },
+    {  6, 0x0006 },
+    {  6, 0x0008 },
+    {  6, 0x0009 },
+    {  6, 0x000a },
+    {  6, 0x000b },
+    {  6, 0x000c },
+    {  6, 0x000d },
+    {  6, 0x000e },
+    {  6, 0x000f },
+    {  6, 0x0010 },
+    {  6, 0x0011 },
+    {  6, 0x0012 },
+    {  6, 0x0013 },
+    {  6, 0x0014 },
+    {  6, 0x0015 },
+    {  6, 0x0016 },
+    {  6, 0x0017 },
+    {  6, 0x0018 },
+    {  6, 0x0019 },
+    {  6, 0x001a },
+    {  6, 0x001b },
+    {  6, 0x001c },
+    {  6, 0x001d },
+    {  6, 0x001e },
+    {  6, 0x001f },
+    {  6, 0x0020 },
+    {  6, 0x0021 },
+    {  6, 0x0022 },
+    {  6, 0x0023 },
+    {  6, 0x0024 },
+    {  6, 0x0025 },
+    {  6, 0x0026 },
+    {  6, 0x0027 },
+    {  6, 0x0028 },
+    {  6, 0x0029 },
+    {  6, 0x002a },
+    {  6, 0x002b },
+    {  6, 0x002c },
+    {  6, 0x002d },
+    {  6, 0x002e },
+    {  6, 0x002f },
+    {  6, 0x0030 },
+    {  6, 0x0031 },
+    {  6, 0x0032 },
+    {  6, 0x0033 },
+    {  6, 0x0034 },
+    {  6, 0x0035 },
+    {  6, 0x0036 },
+    {  6, 0x0037 },
+    {  6, 0x0038 },
+    {  6, 0x0039 },
+    {  6, 0x003a },
+    {  6, 0x003b },
+    {  6, 0x003c },
+    {  6, 0x003d },
+    {  6, 0x003e },
+    {  6, 0x003f },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCCoeffToken4[15] = {
+    {  2, 0x0001 },
+    {  6, 0x0007 },
+    {  1, 0x0001 },
+    {  6, 0x0004 },
+    {  6, 0x0006 },
+    {  3, 0x0001 },
+    {  6, 0x0003 },
+    {  7, 0x0003 },
+    {  7, 0x0002 },
+    {  6, 0x0005 },
+    {  6, 0x0002 },
+    {  8, 0x0003 },
+    {  8, 0x0002 },
+    {  7, 0x0000 },
+    {  0, 0x0000 }
+};
+
+
+const ARM_VLC32 *armVCM4P10_CAVLCCoeffTokenTables[5] = {
+     armVCM4P10_CAVLCCoeffToken0, 
+     armVCM4P10_CAVLCCoeffToken1,
+     armVCM4P10_CAVLCCoeffToken2, 
+     armVCM4P10_CAVLCCoeffToken3, 
+     armVCM4P10_CAVLCCoeffToken4
+};
+
+/* Table for level_prefix */
+
+const ARM_VLC32 armVCM4P10_CAVLCLevelPrefix[17] = {
+    {  1, 1},
+    {  2, 1},
+    {  3, 1},
+    {  4, 1},
+    {  5, 1},
+    {  6, 1},
+    {  7, 1},
+    {  8, 1},
+    {  9, 1},
+    { 10, 1},
+    { 11, 1},
+    { 12, 1},
+    { 13, 1},
+    { 14, 1},
+    { 15, 1},
+    { 16, 1},
+    {  0, 0}
+};
+
+/* Tables for total_zeros */
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros1[17] = {
+    {  1, 0x0001 },
+    {  3, 0x0003 },
+    {  3, 0x0002 },
+    {  4, 0x0003 },
+    {  4, 0x0002 },
+    {  5, 0x0003 },
+    {  5, 0x0002 },
+    {  6, 0x0003 },
+    {  6, 0x0002 },
+    {  7, 0x0003 },
+    {  7, 0x0002 },
+    {  8, 0x0003 },
+    {  8, 0x0002 },
+    {  9, 0x0003 },
+    {  9, 0x0002 },
+    {  9, 0x0001 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros2[16] = {
+    {  3, 0x0007 },
+    {  3, 0x0006 },
+    {  3, 0x0005 },
+    {  3, 0x0004 },
+    {  3, 0x0003 },
+    {  4, 0x0005 },
+    {  4, 0x0004 },
+    {  4, 0x0003 },
+    {  4, 0x0002 },
+    {  5, 0x0003 },
+    {  5, 0x0002 },
+    {  6, 0x0003 },
+    {  6, 0x0002 },
+    {  6, 0x0001 },
+    {  6, 0x0000 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros3[15] = {
+    {  4, 0x0005 },
+    {  3, 0x0007 },
+    {  3, 0x0006 },
+    {  3, 0x0005 },
+    {  4, 0x0004 },
+    {  4, 0x0003 },
+    {  3, 0x0004 },
+    {  3, 0x0003 },
+    {  4, 0x0002 },
+    {  5, 0x0003 },
+    {  5, 0x0002 },
+    {  6, 0x0001 },
+    {  5, 0x0001 },
+    {  6, 0x0000 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros4[14] = {
+    {  5, 0x0003 },
+    {  3, 0x0007 },
+    {  4, 0x0005 },
+    {  4, 0x0004 },
+    {  3, 0x0006 },
+    {  3, 0x0005 },
+    {  3, 0x0004 },
+    {  4, 0x0003 },
+    {  3, 0x0003 },
+    {  4, 0x0002 },
+    {  5, 0x0002 },
+    {  5, 0x0001 },
+    {  5, 0x0000 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros5[13] = {
+    {  4, 0x0005 },
+    {  4, 0x0004 },
+    {  4, 0x0003 },
+    {  3, 0x0007 },
+    {  3, 0x0006 },
+    {  3, 0x0005 },
+    {  3, 0x0004 },
+    {  3, 0x0003 },
+    {  4, 0x0002 },
+    {  5, 0x0001 },
+    {  4, 0x0001 },
+    {  5, 0x0000 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros6[12] = {
+    {  6, 0x0001 },
+    {  5, 0x0001 },
+    {  3, 0x0007 },
+    {  3, 0x0006 },
+    {  3, 0x0005 },
+    {  3, 0x0004 },
+    {  3, 0x0003 },
+    {  3, 0x0002 },
+    {  4, 0x0001 },
+    {  3, 0x0001 },
+    {  6, 0x0000 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros7[11] = {
+    {  6, 0x0001 },
+    {  5, 0x0001 },
+    {  3, 0x0005 },
+    {  3, 0x0004 },
+    {  3, 0x0003 },
+    {  2, 0x0003 },
+    {  3, 0x0002 },
+    {  4, 0x0001 },
+    {  3, 0x0001 },
+    {  6, 0x0000 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros8[10] = {
+    {  6, 0x0001 },
+    {  4, 0x0001 },
+    {  5, 0x0001 },
+    {  3, 0x0003 },
+    {  2, 0x0003 },
+    {  2, 0x0002 },
+    {  3, 0x0002 },
+    {  3, 0x0001 },
+    {  6, 0x0000 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros9[9] = {
+    {  6, 0x0001 },
+    {  6, 0x0000 },
+    {  4, 0x0001 },
+    {  2, 0x0003 },
+    {  2, 0x0002 },
+    {  3, 0x0001 },
+    {  2, 0x0001 },
+    {  5, 0x0001 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros10[8] = {
+    {  5, 0x0001 },
+    {  5, 0x0000 },
+    {  3, 0x0001 },
+    {  2, 0x0003 },
+    {  2, 0x0002 },
+    {  2, 0x0001 },
+    {  4, 0x0001 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros11[7] = {
+    {  4, 0x0000 },
+    {  4, 0x0001 },
+    {  3, 0x0001 },
+    {  3, 0x0002 },
+    {  1, 0x0001 },
+    {  3, 0x0003 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros12[6] = {
+    {  4, 0x0000 },
+    {  4, 0x0001 },
+    {  2, 0x0001 },
+    {  1, 0x0001 },
+    {  3, 0x0001 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros13[5] = {
+    {  3, 0x0000 },
+    {  3, 0x0001 },
+    {  1, 0x0001 },
+    {  2, 0x0001 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros14[4] = {
+    {  2, 0x0000 },
+    {  2, 0x0001 },
+    {  1, 0x0001 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros15[3] = {
+    {  1, 0x0000 },
+    {  1, 0x0001 },
+    {  0, 0x0000 }
+};
+
+const ARM_VLC32 *armVCM4P10_CAVLCTotalZeroTables[15] = {
+     armVCM4P10_CAVLCTotalZeros1, 
+     armVCM4P10_CAVLCTotalZeros2,
+     armVCM4P10_CAVLCTotalZeros3, 
+     armVCM4P10_CAVLCTotalZeros4, 
+     armVCM4P10_CAVLCTotalZeros5, 
+     armVCM4P10_CAVLCTotalZeros6, 
+     armVCM4P10_CAVLCTotalZeros7, 
+     armVCM4P10_CAVLCTotalZeros8, 
+     armVCM4P10_CAVLCTotalZeros9, 
+     armVCM4P10_CAVLCTotalZeros10, 
+     armVCM4P10_CAVLCTotalZeros11, 
+     armVCM4P10_CAVLCTotalZeros12, 
+     armVCM4P10_CAVLCTotalZeros13, 
+     armVCM4P10_CAVLCTotalZeros14, 
+     armVCM4P10_CAVLCTotalZeros15 
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros2x2_1[5] = {
+    {  1, 1 },
+    {  2, 1 },
+    {  3, 1 },
+    {  3, 0 },
+    {  0, 0 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros2x2_2[4] = {
+    {  1, 1 },
+    {  2, 1 },
+    {  2, 0 },
+    {  0, 0 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros2x2_3[3] = {
+    {  1, 1 },
+    {  1, 0 },
+    {  0, 0 }
+};
+
+const ARM_VLC32 *armVCM4P10_CAVLCTotalZeros2x2Tables[3] = {
+     armVCM4P10_CAVLCTotalZeros2x2_1, 
+     armVCM4P10_CAVLCTotalZeros2x2_2, 
+     armVCM4P10_CAVLCTotalZeros2x2_3
+};
+
+
+/* Tables for run_before */
+
+static const ARM_VLC32 armVCM4P10_CAVLCRunBefore1[3] = {
+    {  1, 0x0001 },
+    {  1, 0x0000 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCRunBefore2[4] = {
+    {  1, 0x0001 },
+    {  2, 0x0001 },
+    {  2, 0x0000 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCRunBefore3[5] = {
+    {  2, 0x0003 },
+    {  2, 0x0002 },
+    {  2, 0x0001 },
+    {  2, 0x0000 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCRunBefore4[6] = {
+    {  2, 0x0003 },
+    {  2, 0x0002 },
+    {  2, 0x0001 },
+    {  3, 0x0001 },
+    {  3, 0x0000 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCRunBefore5[7] = {
+    {  2, 0x0003 },
+    {  2, 0x0002 },
+    {  3, 0x0003 },
+    {  3, 0x0002 },
+    {  3, 0x0001 },
+    {  3, 0x0000 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCRunBefore6[8] = {
+    {  2, 0x0003 },
+    {  3, 0x0000 },
+    {  3, 0x0001 },
+    {  3, 0x0003 },
+    {  3, 0x0002 },
+    {  3, 0x0005 },
+    {  3, 0x0004 },
+    {  0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCRunBefore7[16] = {
+    {  3, 0x0007 },
+    {  3, 0x0006 },
+    {  3, 0x0005 },
+    {  3, 0x0004 },
+    {  3, 0x0003 },
+    {  3, 0x0002 },
+    {  3, 0x0001 },
+    {  4, 0x0001 },
+    {  5, 0x0001 },
+    {  6, 0x0001 },
+    {  7, 0x0001 },
+    {  8, 0x0001 },
+    {  9, 0x0001 },
+    { 10, 0x0001 },
+    { 11, 0x0001 },
+    {  0, 0x0000 }
+};
+
+const ARM_VLC32 *armVCM4P10_CAVLCRunBeforeTables[7] = {
+     armVCM4P10_CAVLCRunBefore1, 
+     armVCM4P10_CAVLCRunBefore2, 
+     armVCM4P10_CAVLCRunBefore3, 
+     armVCM4P10_CAVLCRunBefore4, 
+     armVCM4P10_CAVLCRunBefore5, 
+     armVCM4P10_CAVLCRunBefore6, 
+     armVCM4P10_CAVLCRunBefore7
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CompareMotionCostToMV.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CompareMotionCostToMV.c
new file mode 100644
index 0000000..e4bedc2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CompareMotionCostToMV.c
@@ -0,0 +1,133 @@
+/**
+ * 
+ * File Name:  armVCM4P10_CompareMotionCostToMV.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * Description:
+ * Contains module for comparing motion vectors and SAD's to decide 
+ * the best MV and SAD
+ *
+ */
+  
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P10_ExpGolBitsUsed
+ *
+ * Description:
+ * Performs calculating Exp-Golomb code length for a given values
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	         val	Signed number for which Exp-Golomb code length has
+ *                      to be calculated
+ *
+ * Return Value: 
+ *             Returns the length of the Exp-Golomb code for val
+ */
+
+static OMX_U16 armVCM4P10_ExpGolBitsUsed (OMX_S16 val)
+{
+    OMX_U16 sizeCodeNum, codeNum;
+    
+    /* Mapping val to codeNum */
+    codeNum = armAbs (val);
+    if (val > 0)
+    {
+        codeNum = (2 * codeNum) - 1;
+    }
+    else
+    {
+        codeNum = 2 * codeNum;
+    }
+    
+    /* Size of the exp-golomb code */
+    sizeCodeNum = (2 * armLogSize (codeNum + 1)) - 1;
+    
+    return sizeCodeNum;
+}
+                
+
+/**
+ * Function: armVCM4P10_CompareMotionCostToMV
+ *
+ * Description:
+ * Performs comparision of motion vectors and Motion cost to decide the 
+ * best MV and best MC
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	         mvX	x coordinate of the candidate motion vector in 1/4 pel units
+ * [in]	         mvY	y coordinate of the candidate motion vector in 1/4 pel units
+ * [in]	      diffMV	differential MV
+ * [in]	     candSAD	Candidate SAD
+ * [in]	      bestMV	Best MV, contains best MV till the previous interation.
+ * [in]       nLamda    Lamda factor; used to compute motion cost 
+ * [in]   *pBestCost    Contains the current best motion cost.
+ * [out]  *pBestCost    pBestCost Motion cost will be associated with the best MV 
+ *                      after judgement; 
+ *                      computed as SAD+Lamda*BitsUsedByMV, if the candCost is less 
+ *                      than the best cost passed then the *pBestCost will be equal to candCost
+ * [out]	  bestMV	Finally will have the best MV after the judgement.
+ *
+ * Return Value:
+ * OMX_INT -- 1 to indicate that the current motion cost is the best 
+ *            0 to indicate that it is NOT the best motion cost
+ */
+
+OMX_INT armVCM4P10_CompareMotionCostToMV (
+    OMX_S16  mvX,
+    OMX_S16  mvY,
+    OMXVCMotionVector diffMV, 
+    OMX_INT candSAD, 
+    OMXVCMotionVector *bestMV, 
+    OMX_U32 nLamda,
+    OMX_S32 *pBestCost
+) 
+{
+    OMX_S32 candCost;
+    OMX_U16 sizeCodeNum;
+    
+    sizeCodeNum = armVCM4P10_ExpGolBitsUsed (diffMV.dx);
+    sizeCodeNum += armVCM4P10_ExpGolBitsUsed (diffMV.dy);
+    
+    /* Motion cost = SAD +  lamda * ((bitsused(diffMVx) + (bitsused(diffMVy))*/
+    candCost = candSAD + (nLamda * sizeCodeNum);
+        
+    /* Calculate candCost */
+    if (candCost < *pBestCost)
+    {
+        *pBestCost = candCost;
+        bestMV->dx = mvX;
+        bestMV->dy = mvY;
+        return 1;
+    }
+    if (candCost > *pBestCost)
+    {
+        return 0;
+    }
+    /* shorter motion vector */
+    if ( (mvX * mvX + mvY * mvY) < ((bestMV->dx * bestMV->dx) + (bestMV->dy * bestMV->dy)) )
+    {
+        *pBestCost = candCost;
+        bestMV->dx = mvX;
+        bestMV->dy = mvY;
+        return 1;
+    }
+    
+    return 0;
+}
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DeBlockPixel.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DeBlockPixel.c
new file mode 100644
index 0000000..f4fb1d9
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DeBlockPixel.c
@@ -0,0 +1,151 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  armVCM4P10_DeBlockPixel.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * H.264 luma deblock module
+ * 
+ */
+
+#ifdef DEBUG_ARMVCM4P10_DEBLOCKPIXEL
+#undef DEBUG_ON
+#define DEBUG_ON
+#endif /* DEBUG_ARMVCM4P10_DEBLOCKPIXEL */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/*
+ * Description
+ * Deblock one boundary pixel
+ *
+ * Parameters:
+ * [in]	pQ0         Pointer to pixel q0
+ * [in] Step        Step between pixels q0 and q1
+ * [in] tC0         Edge threshold value
+ * [in] alpha       alpha threshold value
+ * [in] beta        beta threshold value
+ * [in] bS          deblocking strength
+ * [in] ChromaFlag  True for chroma blocks
+ * [out] pQ0        Deblocked pixels
+ * 
+ */
+
+void armVCM4P10_DeBlockPixel(
+    OMX_U8 *pQ0,    /* pointer to the pixel q0 */
+    int Step,       /* step between pixels q0 and q1 */
+    int tC0,        /* edge threshold value */
+    int alpha,      /* alpha */
+    int beta,       /* beta */
+    int bS,         /* deblocking strength */
+    int ChromaFlag
+)
+{
+    int p3, p2, p1, p0, q0, q1, q2, q3;
+    int ap, aq, delta;
+
+    if (bS==0)
+    {
+        return;
+    }
+
+    p3 = pQ0[-4*Step];
+    p2 = pQ0[-3*Step];
+    p1 = pQ0[-2*Step];
+    p0 = pQ0[-1*Step];
+    q0 = pQ0[ 0*Step];
+    q1 = pQ0[ 1*Step];
+    q2 = pQ0[ 2*Step];
+    q3 = pQ0[ 3*Step];
+
+    if (armAbs(p0-q0)>=alpha || armAbs(p1-p0)>=beta || armAbs(q1-q0)>=beta)
+    {
+        DEBUG_PRINTF_10("DeBlockPixel: %02x %02x %02x %02x | %02x %02x %02x %02x alpha=%d beta=%d\n",
+            p3, p2, p1, p0, q0, q1, q2, q3, alpha, beta);
+        return;
+    }
+
+    ap = armAbs(p2 - p0);
+    aq = armAbs(q2 - q0);
+
+    if (bS < 4)
+    {
+        int tC = tC0;
+
+        if (ChromaFlag)
+        {
+            tC++;
+        }
+        else
+        {
+            if (ap < beta)
+            {
+                tC++;
+            }
+            if (aq < beta)
+            {
+                tC++;
+            }
+        }
+    
+        delta = (((q0-p0)<<2) + (p1-q1) + 4) >> 3;
+        delta = armClip(-tC, tC, delta);
+
+        pQ0[-1*Step] = (OMX_U8)armClip(0, 255, p0 + delta);
+        pQ0[ 0*Step] = (OMX_U8)armClip(0, 255, q0 - delta);
+
+        if (ChromaFlag==0 && ap<beta)
+        {
+            delta = (p2 + ((p0+q0+1)>>1) - (p1<<1))>>1;
+            delta = armClip(-tC0, tC0, delta);
+            pQ0[-2*Step] = (OMX_U8)(p1 + delta);
+        }
+
+        if (ChromaFlag==0 && aq<beta)
+        {
+            delta = (q2 + ((p0+q0+1)>>1) - (q1<<1))>>1;
+            delta = armClip(-tC0, tC0, delta);
+            pQ0[ 1*Step] = (OMX_U8)(q1 + delta);
+        }
+    }
+    else /* bS==4 */
+    {
+        if (ChromaFlag==0 && ap<beta && armAbs(p0-q0)<((alpha>>2)+2))
+        {
+            pQ0[-1*Step] = (OMX_U8)((p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4)>>3);
+            pQ0[-2*Step] = (OMX_U8)((p2 + p1 + p0 + q0 + 2)>>2);
+            pQ0[-3*Step] = (OMX_U8)((2*p3 + 3*p2 + p1 + p0 + q0 + 4)>>3);
+        }
+        else
+        {
+            pQ0[-1*Step] = (OMX_U8)((2*p1 + p0 + q1 + 2)>>2);
+        }
+
+        if (ChromaFlag==0 && aq<beta && armAbs(p0-q0)<((alpha>>2)+2))
+        {
+            pQ0[ 0*Step] = (OMX_U8)((q2 + 2*q1 + 2*q0 + 2*p0 + p1 + 4)>>3);
+            pQ0[ 1*Step] = (OMX_U8)((q2 + q1 + p0 + q0 + 2)>>2);
+            pQ0[ 2*Step] = (OMX_U8)((2*q3 + 3*q2 + q1 + q0 + p0 + 4)>>3);
+        }
+        else
+        {
+            pQ0[ 0*Step] = (OMX_U8)((2*q1 + q0 + p1 + 2)>>2);
+        }
+    }
+
+    DEBUG_PRINTF_13("DeBlockPixel: %02x %02x %02x %02x | %02x %02x %02x %02x bS=%d -> %02x %02x %02x %02x\n",
+        p3, p2, p1, p0, q0, q1, q2, q3, bS,
+        pQ0[-2*Step], pQ0[-1*Step],pQ0[0*Step],pQ0[1*Step]);
+
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair.c
new file mode 100644
index 0000000..7616add
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair.c
@@ -0,0 +1,267 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  armVCM4P10_DecodeCoeffsToPair.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * H.264 decode coefficients module
+ * 
+ */
+ 
+#ifdef DEBUG_ARMVCM4P10_DECODECOEFFSTOPAIR
+#undef DEBUG_ON
+#define DEBUG_ON
+#endif
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armCOMM_Bitstream.h"
+#include "armVCM4P10_CAVLCTables.h"
+
+/* 4x4 DeZigZag table */
+
+static const OMX_U8 armVCM4P10_ZigZag[16] =
+{
+    0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+/*
+ * Description:
+ * This function perform the work required by the OpenMAX
+ * DecodeCoeffsToPair function and DecodeChromaDCCoeffsToPair.
+ * Since most of the code is common we share it here.
+ *
+ * Parameters:
+ * [in]	ppBitStream		Double pointer to current byte in bit stream buffer
+ * [in]	pOffset			Pointer to current bit position in the byte pointed
+ *								to by *ppBitStream
+ * [in]	sMaxNumCoeff	Maximum number of non-zero coefficients in current
+ *								block (4,15 or 16)
+ * [in]	nTable          Table number (0 to 4) according to the five columns
+ *                      of Table 9-5 in the H.264 spec
+ * [out]	ppBitStream		*ppBitStream is updated after each block is decoded
+ * [out]	pOffset			*pOffset is updated after each block is decoded
+ * [out]	pNumCoeff		Pointer to the number of nonzero coefficients in
+ *								this block
+ * [out]	ppPosCoefbuf	Double pointer to destination residual
+ *								coefficient-position pair buffer
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+
+ */
+
+OMXResult armVCM4P10_DecodeCoeffsToPair(
+     const OMX_U8** ppBitStream,
+     OMX_S32* pOffset,
+     OMX_U8* pNumCoeff,
+     OMX_U8  **ppPosCoefbuf,
+     OMX_INT nTable,
+     OMX_INT sMaxNumCoeff        
+ )
+{
+    int CoeffToken, TotalCoeff, TrailingOnes;
+    int Level, LevelCode, LevelPrefix, LevelSuffix, LevelSuffixSize;
+    int SuffixLength, Run, ZerosLeft,CoeffNum;
+    int i, Flags;
+    OMX_U8 *pPosCoefbuf = *ppPosCoefbuf;
+    OMX_S16 pLevel[16];
+    OMX_U8  pRun[16];
+
+    CoeffToken = armUnPackVLC32(ppBitStream, pOffset, armVCM4P10_CAVLCCoeffTokenTables[nTable]);
+    armRetDataErrIf(CoeffToken == ARM_NO_CODEBOOK_INDEX, OMX_Sts_Err);
+
+    TrailingOnes = armVCM4P10_CAVLCTrailingOnes[CoeffToken];
+    TotalCoeff   = armVCM4P10_CAVLCTotalCoeff[CoeffToken];
+    *pNumCoeff   = (OMX_U8)TotalCoeff;
+
+    DEBUG_PRINTF_2("TotalCoeff = %d, TrailingOnes = %d\n", TotalCoeff, TrailingOnes);
+
+    if (TotalCoeff == 0)
+    {
+        /* Nothing to do */
+        return OMX_Sts_NoErr;
+    }
+
+    /* Decode trailing ones */
+    for (i=TotalCoeff-1; i>=TotalCoeff-TrailingOnes; i--)
+    {
+        if (armGetBits(ppBitStream, pOffset, 1))
+        {
+            Level = -1;
+        }
+        else
+        {
+            Level = +1;
+        }
+        pLevel[i] = (OMX_S16)Level;
+
+        DEBUG_PRINTF_2("Level[%d] = %d\n", i, pLevel[i]);
+    }
+
+    /* Decode (non zero) level values */
+    SuffixLength = 0;
+    if (TotalCoeff>10 && TrailingOnes<3)
+    {
+        SuffixLength=1;
+    }
+    for ( ; i>=0; i--)
+    {
+        LevelPrefix = armUnPackVLC32(ppBitStream, pOffset, armVCM4P10_CAVLCLevelPrefix);
+        armRetDataErrIf(LevelPrefix == ARM_NO_CODEBOOK_INDEX, OMX_Sts_Err);
+
+        LevelSuffixSize = SuffixLength;
+        if (LevelPrefix==14 && SuffixLength==0)
+        {
+            LevelSuffixSize = 4;
+        }
+        if (LevelPrefix==15)
+        {
+            LevelSuffixSize = 12;
+        }
+        
+        LevelSuffix = 0;
+        if (LevelSuffixSize > 0)
+        {
+            LevelSuffix = armGetBits(ppBitStream, pOffset, LevelSuffixSize);
+        }
+
+        LevelCode = (LevelPrefix << SuffixLength) + LevelSuffix;
+
+
+        if (LevelPrefix==15 && SuffixLength==0)
+        {
+            LevelCode += 15;
+        }
+
+        /* LevelCode = 2*(magnitude-1) + sign */
+
+        if (i==TotalCoeff-1-TrailingOnes && TrailingOnes<3)
+        {
+            /* Level magnitude can't be 1 */
+            LevelCode += 2;
+        }
+        if (LevelCode & 1)
+        {
+            /* 2a+1 maps to -a-1 */
+            Level = (-LevelCode-1)>>1;
+        }
+        else
+        {
+            /* 2a+0 maps to +a+1 */
+            Level = (LevelCode+2)>>1;
+        }
+        pLevel[i] = (OMX_S16)Level;
+
+        DEBUG_PRINTF_2("Level[%d] = %d\n", i, pLevel[i]);
+
+        if (SuffixLength==0)
+        {
+            SuffixLength=1;
+        }
+        if ( ((LevelCode>>1)+1)>(3<<(SuffixLength-1)) && SuffixLength<6 )
+        {
+            SuffixLength++;
+        }
+    }
+
+    /* Decode run values */
+    ZerosLeft = 0;
+    if (TotalCoeff < sMaxNumCoeff)
+    {
+        /* Decode TotalZeros VLC */
+        if (sMaxNumCoeff==4)
+        {
+            ZerosLeft = armUnPackVLC32(ppBitStream, pOffset, armVCM4P10_CAVLCTotalZeros2x2Tables[TotalCoeff-1]);
+            armRetDataErrIf(ZerosLeft ==ARM_NO_CODEBOOK_INDEX , OMX_Sts_Err);
+        }
+        else
+        {
+            ZerosLeft = armUnPackVLC32(ppBitStream, pOffset, armVCM4P10_CAVLCTotalZeroTables[TotalCoeff-1]);
+             armRetDataErrIf(ZerosLeft ==ARM_NO_CODEBOOK_INDEX , OMX_Sts_Err);
+	    }
+    }
+
+    DEBUG_PRINTF_1("TotalZeros = %d\n", ZerosLeft);
+
+	CoeffNum=ZerosLeft+TotalCoeff-1;
+
+    for (i=TotalCoeff-1; i>0; i--)
+    {
+        Run = 0;
+        if (ZerosLeft > 0)
+        {
+            int Table = ZerosLeft;
+            if (Table > 6)
+            {
+                Table = 7;
+            }
+            Run = armUnPackVLC32(ppBitStream, pOffset, armVCM4P10_CAVLCRunBeforeTables[Table-1]);
+            armRetDataErrIf(Run == ARM_NO_CODEBOOK_INDEX, OMX_Sts_Err);
+        }
+        pRun[i] = (OMX_U8)Run;
+
+        DEBUG_PRINTF_2("Run[%d] = %d\n", i, pRun[i]);
+
+        ZerosLeft -= Run;
+    }
+    pRun[0] = (OMX_U8)ZerosLeft;
+
+    DEBUG_PRINTF_1("Run[0] = %d\n", pRun[i]);
+
+
+    /* Fill in coefficients */
+	    
+    if (sMaxNumCoeff==15)
+    {
+        CoeffNum++; /* Skip the DC position */
+    }
+	
+	/*for (i=0;i<TotalCoeff;i++)
+		CoeffNum += pRun[i]+1;*/
+    
+	for (i=(TotalCoeff-1); i>=0; i--)
+    {
+        /*CoeffNum += pRun[i]+1;*/
+        Level     = pLevel[i];
+
+        DEBUG_PRINTF_2("Coef[%d] = %d\n", CoeffNum, Level);
+
+        Flags = CoeffNum;
+		CoeffNum -= (pRun[i]+1);
+        if (sMaxNumCoeff>4)
+        {
+            /* Perform 4x4 DeZigZag */
+            Flags = armVCM4P10_ZigZag[Flags];
+        }
+        if (i==0)
+        {   
+            /* End of block flag */
+            Flags += 0x20;
+        }
+        if (Level<-128 || Level>127)
+        {
+            /* Overflow flag */
+            Flags += 0x10;
+        }
+        
+        *pPosCoefbuf++ = (OMX_U8)(Flags);
+        *pPosCoefbuf++ = (OMX_U8)(Level & 0xFF);
+        if (Flags & 0x10)
+        {
+            *pPosCoefbuf++ = (OMX_U8)(Level>>8);
+        }
+    }
+
+    *ppPosCoefbuf = pPosCoefbuf;
+
+    return OMX_Sts_NoErr;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DequantTables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DequantTables.c
new file mode 100644
index 0000000..d9c2541
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DequantTables.c
@@ -0,0 +1,45 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  armVCM4P10_DequantTables.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 inverse quantize tables
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+#include "armVC.h"
+
+
+const OMX_U8 armVCM4P10_PosToVCol4x4[16] = 
+{
+    0, 2, 0, 2,
+    2, 1, 2, 1,
+    0, 2, 0, 2,
+    2, 1, 2, 1
+};
+
+const OMX_U8 armVCM4P10_PosToVCol2x2[4] = 
+{
+    0, 2,
+    2, 1
+};
+
+const OMX_U8 armVCM4P10_VMatrix[6][3] =
+{
+    { 10, 16, 13 },
+    { 11, 18, 14 },
+    { 13, 20, 16 },
+    { 14, 23, 18 },
+    { 16, 25, 20 },
+    { 18, 29, 23 }
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_FwdTransformResidual4x4.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_FwdTransformResidual4x4.c
new file mode 100644
index 0000000..93d54c3
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_FwdTransformResidual4x4.c
@@ -0,0 +1,78 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  armVCM4P10_FwdTransformResidual4x4.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 transform module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+ 
+/*
+ * Description:
+ * Forward Transform Residual 4x4 Coefficients
+ *
+ * Parameters:
+ * [in]  pSrc		Source 4x4 block
+ * [out] pDst		Destination 4x4 block
+ *
+ */
+void armVCM4P10_FwdTransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc)
+{
+    int i;
+
+    /* Transform rows */
+    for (i=0; i<16; i+=4)
+    {
+        int d0 = pSrc[i+0];
+        int d1 = pSrc[i+1];
+        int d2 = pSrc[i+2];
+        int d3 = pSrc[i+3];
+        int e0 = d0 + d3;
+        int e1 = d0 - d3;
+        int e2 = d1 + d2;
+        int e3 = d1 - d2;
+        int f0 = e0 + e2;
+        int f1 = (e1 << 1) + e3;
+        int f2 = e0 - e2;
+        int f3 = e1 - (e3 << 1);
+        pDst[i+0] = (OMX_S16)f0;
+        pDst[i+1] = (OMX_S16)f1;
+        pDst[i+2] = (OMX_S16)f2;
+        pDst[i+3] = (OMX_S16)f3;
+    }
+
+    /* Transform columns */
+    for (i=0; i<4; i++)
+    {
+        int f0 = pDst[i+0];
+        int f1 = pDst[i+4];
+        int f2 = pDst[i+8];
+        int f3 = pDst[i+12];
+        int g0 = f0 + f3;
+        int g1 = f0 - f3;
+        int g2 = f1 + f2;
+        int g3 = f1 - f2;
+        int h0 = g0 + g2;
+        int h1 = (g1 << 1) + g3;
+        int h2 = g0 - g2;
+        int h3 = g1 - (g3 << 1);
+        pDst[i+0] = (OMX_S16) h0;
+        pDst[i+4] = (OMX_S16) h1;
+        pDst[i+8] = (OMX_S16) h2;
+        pDst[i+12] = (OMX_S16) h3;
+    }
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfDiag_Luma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfDiag_Luma.c
new file mode 100644
index 0000000..8732f4f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfDiag_Luma.c
@@ -0,0 +1,106 @@
+/**
+ * 
+ * File Name:  armVCM4P10_InterpolateHalfDiag_Luma.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This functions will help to calculate Half Pel luma interpolation
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+
+/**
+ * Function: armVCM4P10_InterpolateHalfDiag_Luma
+ * 
+ * Description:
+ * This function performs interpolation for (1/2, 1/2)  positions 
+ * around a full-pel position.
+ *
+ * Remarks:
+ *
+ *  [in]    pSrc        Pointer to top-left corner of block used to interpolate 
+ *                      in the reconstructed frame plane
+ *  [in]    iSrcStep    Step of the source buffer.
+ *  [in]    iDstStep    Step of the destination(interpolation) buffer.
+ *  [in]    iWidth      Width of the current block
+ *  [in]    iHeight     Height of the current block
+ *  [out]   pDst        Pointer to the interpolation buffer of the (1/2,1/2)-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfDiag_Luma(  
+        const OMX_U8*     pSrc, 
+        OMX_U32     iSrcStep, 
+        OMX_U8*     pDst, 
+        OMX_U32     iDstStep,
+        OMX_U32     iWidth, 
+        OMX_U32     iHeight
+)
+{
+    OMX_S32     HalfCoeff, pos;
+    OMX_S16     Buf [21 * 16];  /* 21 rows by 16 pixels per row */
+    OMX_U32     y, x;
+
+    /* check for argument error */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
+
+    /*
+     * Intermediate values will be 1/2 pel at Horizontal direction
+     * Starting at (0.5, -2) at top extending to (0.5, height + 3) at bottom
+     * Buf contains a 2D array of size (iWidth)X(iHeight + 5)
+     */
+    for (y = 0; y < iHeight + 5; y++)
+    {
+        for (x = 0; x < iWidth; x++)
+        {
+            pos = (y-2) * iSrcStep + x;
+            HalfCoeff = 
+                pSrc [pos - 2] - 
+                5 * pSrc [pos - 1] + 
+                20 * pSrc [pos] + 
+                20 * pSrc [pos + 1] - 
+                5 * pSrc [pos + 2] + 
+                pSrc [pos + 3];
+            Buf [y * iWidth + x] = (OMX_S16)HalfCoeff;
+        } /* x */
+    } /* y */
+
+    /* Vertical interpolate */
+    for (y = 0; y < iHeight; y++)
+    {
+        for (x = 0; x < iWidth; x++)
+        {
+            pos = y * iWidth + x;
+            HalfCoeff = 
+                Buf [pos] - 
+                5 * Buf [pos + 1 * iWidth] + 
+                20 * Buf [pos + 2 * iWidth] + 
+                20 * Buf [pos + 3 * iWidth] - 
+                5 * Buf [pos + 4 * iWidth] + 
+                Buf [pos + 5 * iWidth];
+
+            HalfCoeff = (HalfCoeff + 512) >> 10;
+            HalfCoeff = armClip(0, 255, HalfCoeff);
+
+            pDst [y * iDstStep + x] = (OMX_U8) HalfCoeff;
+        }
+    }
+        
+    return OMX_Sts_NoErr;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfHor_Luma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfHor_Luma.c
new file mode 100644
index 0000000..89c0079
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfHor_Luma.c
@@ -0,0 +1,82 @@
+/**
+ * 
+ * File Name:  armVCM4P10_InterpolateHalfHor_Luma.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This functions will help to calculate Half Pel luma interpolation
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: armVCM4P10_InterpolateHalfHor_Luma
+ *
+ * Description:
+ * This function performs interpolation for horizontal 1/2-pel positions
+ *
+ * Remarks:
+ *
+ *  [in]    pSrc        Pointer to top-left corner of block used to interpolate 
+ *                      in the reconstructed frame plane
+ *  [in]    iSrcStep    Step of the source buffer.
+ *  [in]    iDstStep    Step of the destination(interpolation) buffer.
+ *  [in]    iWidth      Width of the current block
+ *  [in]    iHeight     Height of the current block
+ *  [out]   pDst        Pointer to the interpolation buffer of the 1/2-pel 
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfHor_Luma(
+        const OMX_U8*     pSrc, 
+        OMX_U32     iSrcStep, 
+        OMX_U8*     pDst, 
+        OMX_U32     iDstStep, 
+        OMX_U32     iWidth, 
+        OMX_U32     iHeight
+)
+{
+    OMX_INT     x, y;
+    OMX_S32     HalfCoeff, pos;
+
+    /* check for argument error */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
+
+    for (y = 0; y < iHeight; y++)
+    {
+        for (x = 0; x < iWidth; x++)
+        {
+            pos = y * iSrcStep + x;
+            HalfCoeff = 
+                pSrc [pos - 2] - 
+                5 * pSrc [pos - 1] + 
+                20 * pSrc [pos] + 
+                20 * pSrc [pos + 1] - 
+                5 * pSrc [pos + 2] + 
+                pSrc [pos + 3];
+
+            HalfCoeff = (HalfCoeff + 16) >> 5;
+            HalfCoeff = armClip(0, 255, HalfCoeff);
+
+            pDst [y * iDstStep + x] = HalfCoeff;
+        } /* x */
+    } /* y */
+
+    return OMX_Sts_NoErr;
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfVer_Luma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfVer_Luma.c
new file mode 100644
index 0000000..f7ecfc5
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfVer_Luma.c
@@ -0,0 +1,84 @@
+/**
+ * 
+ * File Name:  armVCM4P10_InterpolateHalfVer_Luma.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This functions will help to calculate Half Pel luma interpolation
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: armVCM4P10_InterpolateHalfVer_Luma
+ * 
+ * Description:
+ * This function performs interpolation for vertical 1/2-pel positions 
+ * around a full-pel position.
+ *
+ * Remarks:
+ *
+ *  [in]    pSrc        Pointer to top-left corner of block used to interpolate 
+ *                      in the reconstructed frame plane
+ *  [in]    iSrcStep    Step of the source buffer.
+ *  [in]    iDstStep    Step of the destination(interpolation) buffer.
+ *  [in]    iWidth      Width of the current block
+ *  [in]    iHeight     Height of the current block
+ *  [out]   pDst        Pointer to the interpolation buffer of the 1/2-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfVer_Luma(   
+     const OMX_U8*    pSrc, 
+     OMX_U32    iSrcStep, 
+     OMX_U8*    pDst,
+     OMX_U32    iDstStep, 
+     OMX_U32    iWidth, 
+     OMX_U32    iHeight
+)
+{
+    OMX_S32     HalfCoeff, pos;
+    OMX_INT     y, x;
+
+    /* check for argument error */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
+
+
+    for (y = 0; y < iHeight; y++)
+    {
+        for (x = 0; x < iWidth; x++)
+        {
+            pos = y * iSrcStep + x;
+            HalfCoeff = 
+                pSrc [pos - 2 * iSrcStep] - 
+                5 * pSrc [pos - 1 * iSrcStep] + 
+                20 * pSrc [pos] + 
+                20 * pSrc [pos + 1 * iSrcStep] - 
+                5 * pSrc [pos + 2 * iSrcStep] + 
+                pSrc [pos + 3 * iSrcStep];
+
+            HalfCoeff = (HalfCoeff + 16) >> 5;
+            HalfCoeff = armClip(0, 255, HalfCoeff);
+
+            pDst [y * iDstStep + x] = (OMX_U8) HalfCoeff;
+        }
+    }
+    
+    return OMX_Sts_NoErr;
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Chroma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Chroma.c
new file mode 100644
index 0000000..1507d23
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Chroma.c
@@ -0,0 +1,109 @@
+/**
+ * 
+ * File Name:  armVCM4P10_Interpolate_Chroma.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate interpolation for chroma components
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P10_Interpolate_Chroma
+ *
+ * Description:
+ * This function performs interpolation for chroma components.
+ *
+ * Remarks:
+ *
+ *  [in]    pSrc            Pointer to top-left corner of block used to 
+ *                                              interpolate in the reconstructed frame plane
+ *  [in]    iSrcStep    Step of the source buffer.
+ *  [in]    iDstStep    Step of the destination(interpolation) buffer.
+ *  [in]    iWidth      Width of the current block
+ *  [in]    iHeight     Height of the current block
+ *  [in]    dx              Fractional part of horizontal motion vector 
+ *                                              component in 1/8 pixel unit (0~7) 
+ *  [in]    dy              Fractional part of vertical motion vector 
+ *                                              component in 1/8 pixel unit (0~7)
+ *  [out]   pDst            Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+ OMXResult armVCM4P10_Interpolate_Chroma(
+        OMX_U8      *pSrc,
+        OMX_U32     iSrcStep,
+        OMX_U8      *pDst,
+        OMX_U32     iDstStep,
+        OMX_U32     iWidth,
+        OMX_U32     iHeight,
+        OMX_U32     dx,
+        OMX_U32     dy
+)
+{
+    OMX_U32     EightMinusdx = 8 - dx;
+    OMX_U32     EightMinusdy = 8 - dy;
+    OMX_U32     ACoeff, BCoeff, CCoeff, DCoeff;
+    OMX_U32     x, y;
+
+    /* check for argument error */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(dx > 7, OMX_Sts_BadArgErr)
+    armRetArgErrIf(dy > 7, OMX_Sts_BadArgErr)
+    armRetArgErrIf(iSrcStep == 0, OMX_Sts_BadArgErr)
+    armRetArgErrIf(iDstStep == 0, OMX_Sts_BadArgErr)
+    armRetArgErrIf(iWidth == 0, OMX_Sts_BadArgErr)
+    armRetArgErrIf(iHeight == 0, OMX_Sts_BadArgErr)
+    
+    /* if fractionl mv is not (0, 0) */
+    if (dx != 0 || dy != 0)
+    {
+        ACoeff = EightMinusdx * EightMinusdy;
+        BCoeff = dx * EightMinusdy;
+        CCoeff = EightMinusdx * dy;
+        DCoeff = dx * dy;
+
+        for (y = 0; y < iHeight; y++)
+        {
+            for (x = 0; x < iWidth; x++)
+            {
+                pDst [y * iDstStep + x] = (
+                    ACoeff * pSrc [y * iSrcStep + x] +
+                    BCoeff * pSrc [y * iSrcStep + x + 1] +
+                    CCoeff * pSrc [(y + 1) * iSrcStep + x] +
+                    DCoeff * pSrc [(y + 1) * iSrcStep + x + 1] +
+                    32) >> 6;
+            }
+        }
+    }
+    else
+    {
+        for (y = 0; y < iHeight; y++)
+        {
+            for (x = 0; x < iWidth; x++)
+            {
+                pDst [y * iDstStep + x] = pSrc [y * iSrcStep + x];
+            }
+        }
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Luma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Luma.c
new file mode 100644
index 0000000..89978dd
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Luma.c
@@ -0,0 +1,195 @@
+/**
+ * 
+ * File Name:  armVCM4P10_Interpolate_Luma.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate interpolation for luma components
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: armM4P10_Copy
+ *
+ * Description:
+ * This function performs copy a block of data from source to destination
+ *
+ * Remarks:
+ *
+ *  [in]    pSrc            Pointer to top-left corner of block
+ *  [in]    iSrcStep    Step of the source buffer.
+ *  [in]    iDstStep    Step of the destination  buffer.
+ *  [in]    iWidth      Width of the current block
+ *  [in]    iHeight     Height of the current block
+ *  [out]   pDst            Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+static OMXResult armM4P10_Copy(  
+    const OMX_U8*     pSrc,
+    OMX_U32     iSrcStep,
+    OMX_U8*     pDst,
+    OMX_U32     iDstStep, 
+    OMX_U32     iWidth,
+    OMX_U32     iHeight
+)
+{
+    OMX_U32     x, y;
+
+    for (y = 0; y < iHeight; y++)
+    {
+        for (x = 0; x < iWidth; x++)
+        {
+            pDst [y * iDstStep + x] = pSrc [y * iSrcStep + x];
+        }
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+/**
+ * Function: armVCM4P10_Interpolate_Luma
+ *
+ * Description:
+ * This function performs interpolation for luma components.
+ *
+ * Remarks:
+ *
+ *  [in]    pSrc            Pointer to top-left corner of block used to 
+ *                                              interpolate in the reconstructed frame plane
+ *  [in]    iSrcStep    Step of the source buffer.
+ *  [in]    iDstStep    Step of the destination(interpolation) buffer.
+ *  [in]    iWidth      Width of the current block
+ *  [in]    iHeight     Height of the current block
+ *  [in]    dx              Fractional part of horizontal motion vector 
+ *                                              component in 1/4 pixel unit (0~3) 
+ *  [in]    dy              Fractional part of vertical motion vector 
+ *                                              component in 1/4 pixel unit (0~3) 
+ *  [out]   pDst            Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+ OMXResult armVCM4P10_Interpolate_Luma(
+     const OMX_U8     *pSrc,
+     OMX_U32    iSrcStep,
+     OMX_U8     *pDst,
+     OMX_U32    iDstStep,
+     OMX_U32    iWidth,
+     OMX_U32    iHeight,
+     OMX_U32    dx,
+     OMX_U32    dy
+)
+{
+    OMX_U8      pBuf1 [16*16];
+    const OMX_U8      *pSrcHalfHor = pSrc;
+    const OMX_U8      *pSrcHalfVer = pSrc;
+
+    /* check for argument error */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(dx > 3, OMX_Sts_BadArgErr)
+    armRetArgErrIf(dy > 3, OMX_Sts_BadArgErr)
+
+    /* Work out positions for half pixel interpolation */
+    if (dx == 3)
+    {
+        pSrcHalfVer += 1;
+    }
+    if (dy == 3)
+    {
+        pSrcHalfHor += iSrcStep;
+    }
+
+    /* Switch on type of pixel
+     * Pixels are named 'a' to 's' as in the H.264 standard
+     */
+    if (dx == 0 && dy == 0)
+    {
+        /* G */
+        armM4P10_Copy(pSrc, iSrcStep, pDst, iDstStep, iWidth, iHeight);
+    }
+    else if (dy == 0)
+    {
+        /* a, b, c */
+        armVCM4P10_InterpolateHalfHor_Luma
+            (pSrcHalfHor, iSrcStep, pDst, iDstStep, iWidth, iHeight);            
+        
+        if (dx == 1 || dx == 3)
+        {
+            armVCCOMM_Average 
+                (pDst, pSrcHalfVer, iDstStep, iSrcStep, pDst, iDstStep, iWidth, iHeight);
+        }
+    }
+    else if (dx == 0)
+    {
+        /* d, h, n */
+        armVCM4P10_InterpolateHalfVer_Luma
+            (pSrcHalfVer, iSrcStep, pDst, iDstStep, iWidth, iHeight);
+
+        if (dy == 1 || dy == 3)
+        {
+            armVCCOMM_Average 
+                (pDst, pSrcHalfHor, iDstStep, iSrcStep, pDst, iDstStep, iWidth, iHeight);
+        }
+    }
+    else if (dx == 2 || dy == 2)
+    {
+        /* j */
+        armVCM4P10_InterpolateHalfDiag_Luma
+            (pSrc, iSrcStep, pDst, iDstStep, iWidth, iHeight);
+
+        if (dx == 1 || dx == 3)
+        {
+            /* i, k */
+            armVCM4P10_InterpolateHalfVer_Luma
+                (pSrcHalfVer, iSrcStep, pBuf1, iWidth, iWidth, iHeight);
+                
+            armVCCOMM_Average 
+                (pDst, pBuf1, iDstStep, iWidth, pDst, iDstStep, iWidth, iHeight);
+        }
+        if (dy == 1 || dy == 3)
+        {
+            /* f,q */
+            armVCM4P10_InterpolateHalfHor_Luma
+                (pSrcHalfHor, iSrcStep, pBuf1, iWidth, iWidth, iHeight);
+
+            armVCCOMM_Average 
+                (pDst, pBuf1, iDstStep, iWidth, pDst, iDstStep, iWidth, iHeight);
+        }
+    }
+    else /* dx=1,3 and dy=1,3 */
+    {
+        /* e, g, p, r */
+        armVCM4P10_InterpolateHalfHor_Luma
+            (pSrcHalfHor, iSrcStep, pBuf1, iWidth, iWidth, iHeight);
+
+        armVCM4P10_InterpolateHalfVer_Luma
+            (pSrcHalfVer, iSrcStep, pDst, iDstStep, iWidth, iHeight);
+
+        armVCCOMM_Average 
+            (pBuf1, pDst, iWidth, iDstStep, pDst, iDstStep, iWidth, iHeight);
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_PredictIntraDC4x4.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_PredictIntraDC4x4.c
new file mode 100644
index 0000000..b713073
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_PredictIntraDC4x4.c
@@ -0,0 +1,88 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  armVCM4P10_PredictIntraDC4x4.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 4x4 intra prediction module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/*
+ * Description:
+ * Perform DC style intra prediction, averaging upper and left block
+ *
+ * Parameters:
+ * [in]	pSrcLeft		Pointer to the buffer of 16 left coefficients:
+ *								p[x, y] (x = -1, y = 0..3)
+ * [in]	pSrcAbove		Pointer to the buffer of 16 above coefficients:
+ *								p[x,y] (x = 0..3, y = -1)
+ * [in]	leftStep		Step of left coefficient buffer
+ * [in]	dstStep			Step of the destination buffer
+ * [in]	availability	Neighboring 16x16 MB availability flag
+ * [out]	pDst			Pointer to the destination buffer
+ *
+ * Return Value:
+ * None
+ */
+
+void armVCM4P10_PredictIntraDC4x4(
+     const OMX_U8* pSrcLeft,
+     const OMX_U8 *pSrcAbove,
+     OMX_U8* pDst,
+     OMX_INT leftStep,
+     OMX_INT dstStep,
+     OMX_S32 availability        
+)
+{
+    int x, y, Sum=0, Count = 0;
+
+    if (availability & OMX_VC_LEFT)
+    {
+        for (y=0; y<4; y++)
+        {
+            Sum += pSrcLeft[y*leftStep];
+        }
+        Count++;
+    }
+    if (availability & OMX_VC_UPPER)
+    {
+        for (x=0; x<4; x++)
+        {
+            Sum += pSrcAbove[x];
+        }
+        Count++;
+    }
+    if (Count==0)
+    {
+        Sum = 128;
+    }
+    else if (Count==1)
+    {
+        Sum = (Sum + 2) >> 2;
+    }
+    else /* Count = 2 */
+    {
+        Sum = (Sum + 4) >> 3;
+    }
+    for (y=0; y<4; y++)
+    {
+        for (x=0; x<4; x++)
+        {
+            pDst[y*dstStep+x] = (OMX_U8)Sum;
+        }
+    }
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_QuantTables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_QuantTables.c
new file mode 100644
index 0000000..f0b5bb0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_QuantTables.c
@@ -0,0 +1,31 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  armVCM4P10_QuantTables.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 inverse quantize tables
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+
+const OMX_U32 armVCM4P10_MFMatrix[6][3] =
+{
+    {13107, 5243, 8066},
+    {11916, 4660, 7490},
+    {10082, 4194, 6554},
+    { 9362, 3647, 5825},
+    { 8192, 3355, 5243},
+    { 7282, 2893, 4559}
+}; 
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_SADQuar.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_SADQuar.c
new file mode 100644
index 0000000..a41e04b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_SADQuar.c
@@ -0,0 +1,84 @@
+/**
+ * 
+ * File Name:  armVCM4P10_SADQuar.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate SAD of pSrc with average of two Ref blocks
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P10_SADQuar
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the 
+ * average of the other two (pSrcRef0 and pSrcRef1)
+ *
+ * Remarks:
+ *
+ * [in]		pSrc				Pointer to the original block
+ * [in]		pSrcRef0		Pointer to reference block 0
+ * [in]		pSrcRef1		Pointer to reference block 1
+ * [in]		iSrcStep 		Step of the original block buffer
+ * [in]		iRefStep0		Step of reference block 0 
+ * [in]		iRefStep1 	Step of reference block 1 
+ * [in]		iHeight			Height of the block
+ * [in]		iWidth			Width of the block
+ * [out]	pDstSAD			Pointer of result SAD
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+OMXResult armVCM4P10_SADQuar(
+	const OMX_U8* 	pSrc,
+    const OMX_U8* 	pSrcRef0,
+	const OMX_U8* 	pSrcRef1,	
+    OMX_U32 	iSrcStep,
+    OMX_U32		iRefStep0,
+    OMX_U32		iRefStep1,
+    OMX_U32*	pDstSAD,
+    OMX_U32     iHeight,
+    OMX_U32     iWidth
+)
+{
+    OMX_INT     x, y;
+    OMX_S32     SAD = 0;
+
+    /* check for argument error */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pSrcRef0 == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pSrcRef1 == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr)
+
+    for (y = 0; y < iHeight; y++)
+    {
+        for (x = 0; x < iWidth; x++)
+        {
+            SAD += armAbs(pSrc [y * iSrcStep + x] - ((
+                    pSrcRef0 [y * iRefStep0 + x] + 
+                    pSrcRef1 [y * iRefStep1 + x] + 1) >> 1));
+        }
+    }
+        
+    *pDstSAD = SAD;
+
+    return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_TransformResidual4x4.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_TransformResidual4x4.c
new file mode 100644
index 0000000..f9f756a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_TransformResidual4x4.c
@@ -0,0 +1,80 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  armVCM4P10_TransformResidual4x4.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 transform module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+ 
+/*
+ * Description:
+ * Transform Residual 4x4 Coefficients
+ *
+ * Parameters:
+ * [in]  pSrc		Source 4x4 block
+ * [out] pDst		Destination 4x4 block
+ *
+ */
+
+void armVCM4P10_TransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc)
+{
+    int i;
+
+    /* Transform rows */
+    for (i=0; i<16; i+=4)
+    {
+        int d0 = pSrc[i+0];
+        int d1 = pSrc[i+1];
+        int d2 = pSrc[i+2];
+        int d3 = pSrc[i+3];
+        int e0 = d0 + d2;
+        int e1 = d0 - d2;
+        int e2 = (d1>>1) - d3;
+        int e3 = d1 + (d3>>1);
+        int f0 = e0 + e3;
+        int f1 = e1 + e2;
+        int f2 = e1 - e2;
+        int f3 = e0 - e3;
+        pDst[i+0] = (OMX_S16)f0;
+        pDst[i+1] = (OMX_S16)f1;
+        pDst[i+2] = (OMX_S16)f2;
+        pDst[i+3] = (OMX_S16)f3;
+    }
+
+    /* Transform columns */
+    for (i=0; i<4; i++)
+    {
+        int f0 = pDst[i+0];
+        int f1 = pDst[i+4];
+        int f2 = pDst[i+8];
+        int f3 = pDst[i+12];
+        int g0 = f0 + f2;
+        int g1 = f0 - f2;
+        int g2 = (f1>>1) - f3;
+        int g3 = f1 + (f3>>1);
+        int h0 = g0 + g3;
+        int h1 = g1 + g2;
+        int h2 = g1 - g2;
+        int h3 = g0 - g3;
+        pDst[i+0] = (OMX_S16)((h0+32)>>6);
+        pDst[i+4] = (OMX_S16)((h1+32)>>6);
+        pDst[i+8] = (OMX_S16)((h2+32)>>6);
+        pDst[i+12] = (OMX_S16)((h3+32)>>6);
+    }
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock2x2.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock2x2.c
new file mode 100644
index 0000000..dda49f6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock2x2.c
@@ -0,0 +1,78 @@
+/* ----------------------------------------------------------------
+ * 
+ * 
+ * File Name:  armVCM4P10_UnpackBlock2x2.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 inverse quantize and transform helper module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+
+/*
+ * Description
+ * Unpack a 2x2 block of coefficient-residual pair values
+ *
+ * Parameters:
+ * [in]	ppSrc	Double pointer to residual coefficient-position pair
+ *						buffer output by CALVC decoding
+ * [out]	ppSrc	*ppSrc is updated to the start of next non empty block
+ * [out]	pDst	Pointer to unpacked 4x4 block
+ */
+
+void armVCM4P10_UnpackBlock2x2(
+     const OMX_U8 **ppSrc,
+     OMX_S16* pDst
+)
+{
+    const OMX_U8 *pSrc = *ppSrc;
+    int i;
+    int Flag, Value;
+
+    for (i=0; i<4; i++)
+    {
+        pDst[i] = 0;
+    }
+
+    do
+    {
+        Flag  = *pSrc++;
+        if (Flag & 0x10)
+        {
+            /* 16 bit */
+            Value = *pSrc++;
+            Value = Value | ((*pSrc++)<<8);
+            if (Value & 0x8000)
+            {
+                Value -= 0x10000;
+            }
+        }
+        else
+        {
+            /* 8 bit */
+            Value = *pSrc++;
+            if (Value & 0x80)
+            {
+                Value -= 0x100;
+            }
+        }
+        i = Flag & 15;
+        pDst[i] = (OMX_S16)Value;
+    }
+    while ((Flag & 0x20)==0);
+
+    *ppSrc = pSrc;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock4x4.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock4x4.c
new file mode 100644
index 0000000..3c0dcbd
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock4x4.c
@@ -0,0 +1,78 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  armVCM4P10_UnpackBlock4x4.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 inverse quantize and transform helper module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+
+/*
+ * Description
+ * Unpack a 4x4 block of coefficient-residual pair values
+ *
+ * Parameters:
+ * [in]	ppSrc	Double pointer to residual coefficient-position pair
+ *						buffer output by CALVC decoding
+ * [out]	ppSrc	*ppSrc is updated to the start of next non empty block
+ * [out]	pDst	Pointer to unpacked 4x4 block
+ */
+
+void armVCM4P10_UnpackBlock4x4(
+     const OMX_U8 **ppSrc,
+     OMX_S16* pDst
+)
+{
+    const OMX_U8 *pSrc = *ppSrc;
+    int i;
+    int Flag, Value;
+
+    for (i=0; i<16; i++)
+    {
+        pDst[i] = 0;
+    }
+
+    do
+    {
+        Flag  = *pSrc++;
+        if (Flag & 0x10)
+        {
+            /* 16 bit */
+            Value = *pSrc++;
+            Value = Value | ((*pSrc++)<<8);
+            if (Value & 0x8000)
+            {
+                Value -= 0x10000;
+            }
+        }
+        else
+        {
+            /* 8 bit */
+            Value = *pSrc++;
+            if (Value & 0x80)
+            {
+                Value -= 0x100;
+            }
+        }
+        i = Flag & 15;
+        pDst[i] = (OMX_S16)Value;
+    }
+    while ((Flag & 0x20)==0);
+
+    *ppSrc = pSrc;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_Average_4x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_Average_4x.c
new file mode 100644
index 0000000..ac0d523
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_Average_4x.c
@@ -0,0 +1,84 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_Average_4x.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate Average of two 4x4 or 4x8 blocks
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCM4P10_Average_4x   (6.3.5.5.3)
+ *
+ * Description:
+ * This function calculates the average of two 4x4, 4x8 blocks.  The result 
+ * is rounded according to (a+b+1)/2. 
+ *
+ * Input Arguments:
+ *   
+ *   pPred0 - Pointer to the top-left corner of reference block 0 
+ *   pPred1 - Pointer to the top-left corner of reference block 1 
+ *   iPredStep0 - Step of reference block 0; must be a multiple of 4. 
+ *   iPredStep1 - Step of reference block 1; must be a multiple of 4. 
+ *   iDstStep - Step of the destination buffer; must be a multiple of 4. 
+ *   iHeight - Height of the blocks; must be either 4 or 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstPred - Pointer to the destination buffer. 4-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *           pPred0, pPred1, or pDstPred 
+ *    -    pDstPred is not aligned on a 4-byte boundary 
+ *    -    iPredStep0 <= 0 or iPredStep0 is not a multiple of 4 
+ *    -    iPredStep1 <= 0 or iPredStep1 is not a multiple of 4 
+ *    -    iDstStep <= 0 or iDstStep is not a multiple of 4 
+ *    -    iHeight is not equal to either 4 or 8 
+ *
+ */
+ OMXResult omxVCM4P10_Average_4x (
+	 const OMX_U8* 	    pPred0,
+	 const OMX_U8* 	    pPred1,	
+	 OMX_U32		iPredStep0,
+	 OMX_U32		iPredStep1,
+	 OMX_U8*		pDstPred,
+	 OMX_U32		iDstStep, 
+	 OMX_U32		iHeight
+)
+{
+    /* check for argument error */
+    armRetArgErrIf(pPred0 == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pPred1 == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstPred == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf((iHeight != 4) && (iHeight != 8), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iPredStep0 == 0) || (iPredStep0 & 3), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iPredStep1 == 0) || (iPredStep1 & 3), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iDstStep == 0) || (iDstStep & 3), OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot4ByteAligned(pDstPred), OMX_Sts_BadArgErr)
+
+    return armVCCOMM_Average 
+        (pPred0, pPred1, iPredStep0, iPredStep1, pDstPred, iDstStep, 4, iHeight);
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Half.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Half.c
new file mode 100644
index 0000000..c490e10
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Half.c
@@ -0,0 +1,191 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_BlockMatch_Half.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains modules for half pel Block matching, 
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+    
+/**
+ * Function:  omxVCM4P10_BlockMatch_Half   (6.3.5.2.2)
+ *
+ * Description:
+ * Performs a half-pel block match using results from a prior integer search. 
+ *  Returns the best MV and associated cost.  This function estimates the 
+ * half-pixel motion vector by interpolating the integer resolution motion 
+ * vector referenced by the input parameter pSrcDstBestMV, i.e., the initial 
+ * integer MV is generated externally.  The function 
+ * omxVCM4P10_BlockMatch_Integer may be used for integer motion estimation. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrgY - Pointer to the current position in original picture plane. If 
+ *            iBlockWidth==4,  4-byte alignment required. If iBlockWidth==8,  
+ *            8-byte alignment required. If iBlockWidth==16, 16-byte alignment 
+ *            required. 
+ *   pSrcRefY - Pointer to the top-left corner of the co-located block in the 
+ *            reference picture  If iBlockWidth==4,  4-byte alignment 
+ *            required.  If iBlockWidth==8,  8-byte alignment required.  If 
+ *            iBlockWidth==16, 16-byte alignment required. 
+ *   nSrcOrgStep - Stride of the original picture plane in terms of full 
+ *            pixels; must be a multiple of iBlockWidth. 
+ *   nSrcRefStep - Stride of the reference picture plane in terms of full 
+ *            pixels 
+ *   iBlockWidth - Width of the current block in terms of full pixels; must 
+ *            be equal to either 4, 8, or 16. 
+ *   iBlockHeight - Height of the current block in terms of full pixels; must 
+ *            be equal to either 4, 8, or 16. 
+ *   nLamda - Lamda factor, used to compute motion cost 
+ *   pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to 
+ *            compute motion cost 
+ *   pSrcDstBestMV - The best MV resulting from a prior integer search, 
+ *            represented in terms of 1/4-pel units 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstBestMV - Best MV resulting from the half-pel search, expressed in 
+ *            terms of 1/4-pel units 
+ *   pBestCost - Motion cost associated with the best MV; computed as 
+ *            SAD+Lamda*BitsUsedByMV 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    any of the following pointers is NULL: pSrcOrgY, pSrcRefY, 
+ *              pSrcDstBestMV, pMVPred, pBestCost 
+ *    -    iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16. 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+ 
+OMXResult omxVCM4P10_BlockMatch_Half(
+    const OMX_U8* pSrcOrgY, 
+    OMX_S32 nSrcOrgStep, 
+    const OMX_U8* pSrcRefY, 
+    OMX_S32 nSrcRefStep, 
+    OMX_U8 iBlockWidth, 
+    OMX_U8 iBlockHeight, 
+    OMX_U32 nLamda, 
+    const OMXVCMotionVector* pMVPred, 
+    OMXVCMotionVector* pSrcDstBestMV, 
+    OMX_S32* pBestCost
+)
+{
+    /* Definitions and Initializations*/
+    OMX_INT     candSAD;
+    OMX_INT     fromX, toX, fromY, toY;
+    /* Offset to the reference at the begining of the bounding box */
+    const OMX_U8      *pTempSrcRefY, *pTempSrcOrgY;
+    OMX_S16     x, y;
+    OMXVCMotionVector diffMV, candMV, integerMV;
+    OMX_U8      interpolY[256];
+
+    /* Argument error checks */
+    armRetArgErrIf((iBlockWidth ==  4) && (!armIs4ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr);
+    armRetArgErrIf((iBlockWidth ==  8) && (!armIs8ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr);
+    armRetArgErrIf((iBlockWidth == 16) && (!armIs16ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr);
+	armRetArgErrIf((iBlockWidth ==  4) && (!armIs4ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr);
+    armRetArgErrIf((iBlockWidth ==  8) && (!armIs8ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr);
+    armRetArgErrIf((iBlockWidth == 16) && (!armIs16ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr);
+    armRetArgErrIf((nSrcOrgStep % iBlockWidth), OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcOrgY == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcRefY == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pMVPred == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcDstBestMV == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBestCost == NULL, OMX_Sts_BadArgErr);
+	armRetArgErrIf(((iBlockWidth!=4)&&(iBlockWidth!=8)&&(iBlockWidth!=16)) , OMX_Sts_BadArgErr);
+	armRetArgErrIf(((iBlockHeight!=4)&&(iBlockHeight!=8)&&(iBlockHeight!=16)) , OMX_Sts_BadArgErr);
+        
+            
+    /* Check for valid region */ 
+    fromX = 1;
+    toX   = 1;
+    fromY = 1;
+    toY   = 1;
+    
+    /* Initialize to max value as a start point */
+    *pBestCost = 0x7fffffff;
+    
+    integerMV.dx = pSrcDstBestMV->dx;
+    integerMV.dy = pSrcDstBestMV->dy;
+    
+    /* Looping on y- axis */
+    for (y = -fromY; y <= toY; y++)
+    {
+        /* Looping on x- axis */
+        for (x = -fromX; x <= toX; x++)
+        {
+            /* Positioning the pointer */
+            pTempSrcRefY = pSrcRefY + (nSrcRefStep * (integerMV.dy/4)) + (integerMV.dx/4);
+            if (x < 0)
+            {
+                pTempSrcRefY = pTempSrcRefY + x;
+            }
+            if (y < 0)
+            {
+                pTempSrcRefY = pTempSrcRefY + (y * nSrcRefStep);
+            }
+            pTempSrcOrgY = pSrcOrgY;
+            
+            /* Prepare cand MV */
+            candMV.dx = integerMV.dx + x * 2;
+            candMV.dy = integerMV.dy + y * 2;
+            
+            /* Interpolate half pel for the current position*/
+            armVCM4P10_Interpolate_Luma(
+                        pTempSrcRefY,
+                        nSrcRefStep,
+                        interpolY,
+                        iBlockWidth,
+                        iBlockWidth,
+                        iBlockHeight,
+                        armAbs(x) * 2,
+                        armAbs(y) * 2);
+            
+            /* Calculate the SAD */
+            armVCCOMM_SAD(	
+                        pTempSrcOrgY,
+                        nSrcOrgStep,
+                        interpolY,
+                        iBlockWidth,
+                        &candSAD,
+                        iBlockHeight,
+                        iBlockWidth);
+ 
+            diffMV.dx = candMV.dx - pMVPred->dx;
+            diffMV.dy = candMV.dy - pMVPred->dy;
+            
+            /* Result calculations */
+            armVCM4P10_CompareMotionCostToMV (
+                        candMV.dx, 
+                        candMV.dy, 
+                        diffMV, 
+                        candSAD, 
+                        pSrcDstBestMV, 
+                        nLamda, 
+                        pBestCost);
+
+        } /* End of x- axis */
+    } /* End of y-axis */
+
+    return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Integer.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Integer.c
new file mode 100644
index 0000000..f7764e1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Integer.c
@@ -0,0 +1,196 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_BlockMatch_Integer.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains modules for Block matching, a full search algorithm
+ * is implemented
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P10_BlockMatch_Integer   (6.3.5.2.1)
+ *
+ * Description:
+ * Performs integer block match.  Returns best MV and associated cost. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrgY - Pointer to the top-left corner of the current block. If 
+ *            iBlockWidth==4,  4-byte alignment required. If iBlockWidth==8,  
+ *            8-byte alignment required. If iBlockWidth==16, 16-byte alignment 
+ *            required. 
+ *   pSrcRefY - Pointer to the top-left corner of the co-located block in the 
+ *            reference picture. If iBlockWidth==4,  4-byte alignment 
+ *            required.  If iBlockWidth==8,  8-byte alignment required.  If 
+ *            iBlockWidth==16, 16-byte alignment required. 
+ *   nSrcOrgStep - Stride of the original picture plane, expressed in terms 
+ *            of integer pixels; must be a multiple of iBlockWidth. 
+ *   nSrcRefStep - Stride of the reference picture plane, expressed in terms 
+ *            of integer pixels 
+ *   pRefRect - pointer to the valid reference rectangle inside the reference 
+ *            picture plane 
+ *   nCurrPointPos - position of the current block in the current plane 
+ *   iBlockWidth - Width of the current block, expressed in terms of integer 
+ *            pixels; must be equal to either 4, 8, or 16. 
+ *   iBlockHeight - Height of the current block, expressed in terms of 
+ *            integer pixels; must be equal to either 4, 8, or 16. 
+ *   nLamda - Lamda factor; used to compute motion cost 
+ *   pMVPred - Predicted MV; used to compute motion cost, expressed in terms 
+ *            of 1/4-pel units 
+ *   pMVCandidate - Candidate MV; used to initialize the motion search, 
+ *            expressed in terms of integer pixels 
+ *   pMESpec - pointer to the ME specification structure 
+ *
+ * Output Arguments:
+ *   
+ *   pDstBestMV - Best MV resulting from integer search, expressed in terms 
+ *            of 1/4-pel units 
+ *   pBestCost - Motion cost associated with the best MV; computed as 
+ *            SAD+Lamda*BitsUsedByMV 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    any of the following poitners are NULL:
+ *         pSrcOrgY, pSrcRefY, pRefRect, pMVPred, pMVCandidate, or pMESpec. 
+ *    -    Either iBlockWidth or iBlockHeight are values other than 4, 8, or 16. 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+ 
+ OMXResult omxVCM4P10_BlockMatch_Integer (
+     const OMX_U8 *pSrcOrgY,
+     OMX_S32 nSrcOrgStep,
+     const OMX_U8 *pSrcRefY,
+     OMX_S32 nSrcRefStep,
+	 const OMXRect *pRefRect,
+	 const OMXVCM4P2Coordinate *pCurrPointPos,
+     OMX_U8 iBlockWidth,
+     OMX_U8 iBlockHeight,
+     OMX_U32 nLamda,
+     const OMXVCMotionVector *pMVPred,
+     const OMXVCMotionVector *pMVCandidate,
+     OMXVCMotionVector *pBestMV,
+     OMX_S32 *pBestCost,
+     void *pMESpec
+)
+{
+    /* Definitions and Initializations*/
+    OMX_INT candSAD;
+    OMX_INT fromX, toX, fromY, toY;
+    /* Offset to the reference at the begining of the bounding box */
+    const OMX_U8 *pTempSrcRefY, *pTempSrcOrgY;
+    OMX_S16 x, y;
+    OMXVCMotionVector diffMV;
+    OMX_S32 nSearchRange;
+    ARMVCM4P10_MESpec *armMESpec = (ARMVCM4P10_MESpec *) pMESpec;
+
+    /* Argument error checks */
+    armRetArgErrIf((iBlockWidth ==  4) && (!armIs4ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr);
+    armRetArgErrIf((iBlockWidth ==  8) && (!armIs8ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr);
+    armRetArgErrIf((iBlockWidth == 16) && (!armIs16ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr);
+	armRetArgErrIf((iBlockWidth ==  4) && (!armIs4ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr);
+    armRetArgErrIf((iBlockWidth ==  8) && (!armIs8ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr);
+    armRetArgErrIf((iBlockWidth == 16) && (!armIs16ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcOrgY == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcRefY == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pMVPred == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pMVCandidate == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBestMV == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBestCost == NULL, OMX_Sts_BadArgErr);
+	armRetArgErrIf(((iBlockWidth!=4)&&(iBlockWidth!=8)&&(iBlockWidth!=16)) , OMX_Sts_BadArgErr);
+	armRetArgErrIf(((iBlockHeight!=4)&&(iBlockHeight!=8)&&(iBlockHeight!=16)) , OMX_Sts_BadArgErr);
+    armIgnore (pMESpec);
+
+    if(iBlockWidth == 4)
+    {
+        nSearchRange = armMESpec->MEParams.searchRange4x4;
+    }
+    else if(iBlockWidth == 8)
+    {
+        nSearchRange = armMESpec->MEParams.searchRange8x8;
+    }
+    else
+    {
+        nSearchRange = armMESpec->MEParams.searchRange16x16;
+    }
+    /* Check for valid region */ 
+    fromX = nSearchRange;
+    toX   = nSearchRange;
+    fromY = nSearchRange;
+    toY   = nSearchRange;
+    
+    if ((pCurrPointPos->x - nSearchRange) < pRefRect->x)
+    {
+        fromX =  pCurrPointPos->x - pRefRect->x;
+    }
+
+    if ((pCurrPointPos->x + iBlockWidth + nSearchRange) > (pRefRect->x + pRefRect->width))
+    {
+        toX   = pRefRect->width - (pCurrPointPos->x - pRefRect->x) - iBlockWidth;
+    }
+
+    if ((pCurrPointPos->y - nSearchRange) < pRefRect->y)
+    {
+        fromY = pCurrPointPos->y - pRefRect->y;
+    }
+
+    if ((pCurrPointPos->y + iBlockWidth + nSearchRange) > (pRefRect->y + pRefRect->height))
+    {
+        toY   = pRefRect->width - (pCurrPointPos->y - pRefRect->y) - iBlockWidth;
+    }
+    
+    pBestMV->dx = -fromX * 4;
+    pBestMV->dy = -fromY * 4;
+    /* Initialize to max value as a start point */
+    *pBestCost = 0x7fffffff;
+    
+    /* Looping on y- axis */
+    for (y = -fromY; y <= toY; y++)
+    {
+        /* Looping on x- axis */
+        for (x = -fromX; x <= toX; x++)
+        {
+            /* Positioning the pointer */
+            pTempSrcRefY = pSrcRefY + (nSrcRefStep * y) + x;
+            pTempSrcOrgY = pSrcOrgY;
+            
+            /* Calculate the SAD */
+            armVCCOMM_SAD(	
+    	        pTempSrcOrgY,
+    	        nSrcOrgStep,
+    	        pTempSrcRefY,
+    	        nSrcRefStep,
+    	        &candSAD,
+    	        iBlockHeight,
+    	        iBlockWidth);
+    	    
+            diffMV.dx = (x * 4) - pMVPred->dx;
+            diffMV.dy = (y * 4) - pMVPred->dy;
+            
+            /* Result calculations */
+            armVCM4P10_CompareMotionCostToMV ((x * 4), (y * 4), diffMV, candSAD, pBestMV, nLamda, pBestCost);
+
+        } /* End of x- axis */
+    } /* End of y-axis */
+
+    return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Quarter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Quarter.c
new file mode 100644
index 0000000..513ee25
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Quarter.c
@@ -0,0 +1,199 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_BlockMatch_Quarter.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains modules for quater pel Block matching, 
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+ 
+    
+/**
+ * Function:  omxVCM4P10_BlockMatch_Quarter   (6.3.5.2.3)
+ *
+ * Description:
+ * Performs a quarter-pel block match using results from a prior half-pel 
+ * search.  Returns the best MV and associated cost.  This function estimates 
+ * the quarter-pixel motion vector by interpolating the half-pel resolution 
+ * motion vector referenced by the input parameter pSrcDstBestMV, i.e., the 
+ * initial half-pel MV is generated externally.  The function 
+ * omxVCM4P10_BlockMatch_Half may be used for half-pel motion estimation. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrgY - Pointer to the current position in original picture plane. If 
+ *            iBlockWidth==4,  4-byte alignment required. If iBlockWidth==8,  
+ *            8-byte alignment required. If iBlockWidth==16, 16-byte alignment 
+ *            required. 
+ *   pSrcRefY - Pointer to the top-left corner of the co-located block in the 
+ *            reference picture  If iBlockWidth==4,  4-byte alignment 
+ *            required.  If iBlockWidth==8,  8-byte alignment required.  If 
+ *            iBlockWidth==16, 16-byte alignment required. 
+ *   nSrcOrgStep - Stride of the original picture plane in terms of full 
+ *            pixels; must be a multiple of iBlockWidth. 
+ *   nSrcRefStep - Stride of the reference picture plane in terms of full 
+ *            pixels 
+ *   iBlockWidth - Width of the current block in terms of full pixels; must 
+ *            be equal to either 4, 8, or 16. 
+ *   iBlockHeight - Height of the current block in terms of full pixels; must 
+ *            be equal to either 4, 8, or 16. 
+ *   nLamda - Lamda factor, used to compute motion cost 
+ *   pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to 
+ *            compute motion cost 
+ *   pSrcDstBestMV - The best MV resulting from a prior half-pel search, 
+ *            represented in terms of 1/4 pel units 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstBestMV - Best MV resulting from the quarter-pel search, expressed 
+ *            in terms of 1/4-pel units 
+ *   pBestCost - Motion cost associated with the best MV; computed as 
+ *            SAD+Lamda*BitsUsedByMV 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    One of more of the following pointers is NULL: 
+ *         pSrcOrgY, pSrcRefY, pSrcDstBestMV, pMVPred, pBestCost 
+ *    -    iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16. 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+ 
+OMXResult omxVCM4P10_BlockMatch_Quarter(
+    const OMX_U8* pSrcOrgY, 
+    OMX_S32 nSrcOrgStep, 
+    const OMX_U8* pSrcRefY, 
+    OMX_S32 nSrcRefStep, 
+    OMX_U8 iBlockWidth, 
+    OMX_U8 iBlockHeight, 
+    OMX_U32 nLamda, 
+    const OMXVCMotionVector* pMVPred, 
+    OMXVCMotionVector* pSrcDstBestMV, 
+    OMX_S32* pBestCost
+)
+{
+    /* Definitions and Initializations*/
+    OMX_INT     candSAD;
+    OMX_INT     fromX, toX, fromY, toY;
+    /* Offset to the reference at the begining of the bounding box */
+    const OMX_U8      *pTempSrcRefY, *pTempSrcOrgY;
+    OMX_S16     x, y;
+    OMXVCMotionVector diffMV, candMV, initialMV;
+    OMX_U8      interpolY[256];
+    OMX_S32     pelPosX, pelPosY;
+
+    /* Argument error checks */
+    armRetArgErrIf((iBlockWidth ==  4) && (!armIs4ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr);
+    armRetArgErrIf((iBlockWidth ==  8) && (!armIs8ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr);
+    armRetArgErrIf((iBlockWidth == 16) && (!armIs16ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr);
+	armRetArgErrIf((iBlockWidth ==  4) && (!armIs4ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr);
+    armRetArgErrIf((iBlockWidth ==  8) && (!armIs8ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr);
+    armRetArgErrIf((iBlockWidth == 16) && (!armIs16ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr);
+    armRetArgErrIf((nSrcOrgStep % iBlockWidth), OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcOrgY == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcRefY == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pMVPred == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcDstBestMV == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBestCost == NULL, OMX_Sts_BadArgErr);
+	armRetArgErrIf(((iBlockWidth!=4)&&(iBlockWidth!=8)&&(iBlockWidth!=16)) , OMX_Sts_BadArgErr);
+	armRetArgErrIf(((iBlockHeight!=4)&&(iBlockHeight!=8)&&(iBlockHeight!=16)) , OMX_Sts_BadArgErr);
+        
+            
+    /* Check for valid region */ 
+    fromX = 1;
+    toX   = 1;
+    fromY = 1;
+    toY   = 1;
+    
+    /* Initialize to max value as a start point */
+    *pBestCost = 0x7fffffff;
+    
+    initialMV.dx = pSrcDstBestMV->dx;
+    initialMV.dy = pSrcDstBestMV->dy;
+    
+    /* Looping on y- axis */
+    for (y = -fromY; y <= toY; y++)
+    {
+        /* Looping on x- axis */
+        for (x = -fromX; x <= toX; x++)
+        {
+            /* Positioning the pointer */
+            pTempSrcRefY = pSrcRefY + (nSrcRefStep * (initialMV.dy/4)) + (initialMV.dx/4);
+            
+            /* Calculating the fract pel position */
+            pelPosX = (initialMV.dx % 4) + x;
+            if (pelPosX < 0) 
+            {
+                pTempSrcRefY = pTempSrcRefY - 1;
+                pelPosX += 4;
+            }
+            pelPosY = (initialMV.dy % 4) + y;
+            if (pelPosY < 0) 
+            {
+                pTempSrcRefY = pTempSrcRefY - (1 * nSrcRefStep);
+                pelPosY += 4;
+            }
+            
+            pTempSrcOrgY = pSrcOrgY; 
+            
+            /* Prepare cand MV */
+            candMV.dx = initialMV.dx + x;
+            candMV.dy = initialMV.dy + y;
+             
+            /* Interpolate Quater pel for the current position*/
+            armVCM4P10_Interpolate_Luma(
+                        pTempSrcRefY,
+                        nSrcRefStep,
+                        interpolY,
+                        iBlockWidth,
+                        iBlockWidth,
+                        iBlockHeight,
+                        pelPosX,
+                        pelPosY);
+            
+            /* Calculate the SAD */
+            armVCCOMM_SAD(	
+                        pTempSrcOrgY,
+                        nSrcOrgStep,
+                        interpolY,
+                        iBlockWidth,
+                        &candSAD,
+                        iBlockHeight,
+                        iBlockWidth);
+ 
+            diffMV.dx = candMV.dx - pMVPred->dx;
+            diffMV.dy = candMV.dy - pMVPred->dy;
+            
+            /* Result calculations */
+            armVCM4P10_CompareMotionCostToMV (
+                        candMV.dx, 
+                        candMV.dy, 
+                        diffMV, 
+                        candSAD, 
+                        pSrcDstBestMV, 
+                        nLamda, 
+                        pBestCost);
+
+        } /* End of x- axis */
+    } /* End of y-axis */
+
+    return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
new file mode 100644
index 0000000..a07b1bb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
@@ -0,0 +1,107 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_DeblockChroma_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 intra chroma deblock
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCM4P10_DeblockChroma_I   (6.3.3.3.6)
+ *
+ * Description:
+ * Performs in-place deblocking filtering on all edges of the chroma 
+ * macroblock (16x16). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input macroblock; must be 8-byte aligned. 
+ *   srcdstStep - step of the arrays; must be a multiple of 8. 
+ *   pAlpha - pointer to a 2x2 array of alpha thresholds, organized as 
+ *            follows: {external vertical edge, internal vertical edge, 
+ *            external horizontal edge, internal horizontal edge }.  Per 
+ *            [ISO14496-10] alpha values must be in the range [0,255]. 
+ *   pBeta - pointer to a 2x2 array of Beta Thresholds, organized as follows: 
+ *            { external vertical edge, internal vertical edge, external 
+ *            horizontal edge, internal horizontal edge }.  Per [ISO14496-10] 
+ *            beta values must be in the range [0,18]. 
+ *   pThresholds - array of size 8x2 of Thresholds (TC0) (values for the left 
+ *            or above edge of each 4x2 or 2x4 block, arranged in vertical 
+ *            block order and then in horizontal block order); must be aligned 
+ *            on a 4-byte boundary. Per [ISO14496-10] values must be in the 
+ *            range [0,25]. 
+ *   pBS - array of size 16x2 of BS parameters (arranged in scan block order 
+ *            for vertical edges and then horizontal edges); valid in the 
+ *            range [0,4] with the following restrictions: i) pBS[i]== 4 may 
+ *            occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 
+ *            4.  Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments 
+ *    -    one or more of the following pointers is NULL: pSrcDst, pAlpha, 
+ *              pBeta, pThresholds, or pBS. pSrcDst is not 8-byte aligned. 
+ *              either pThresholds or pBS is not 4-byte aligned. 
+ *    -   one or more entries in the table pAlpha[0..3] is outside the range 
+ *              [0,255]. 
+ *    -   one or more entries in the table pBeta[0..3] is outside the range 
+ *              [0,18]. 
+ *    -   one or more entries in the table pThresholds[0..15]is outside of 
+ *              the range [0,25]. 
+ *    -   pBS is out of range, i.e., one of the following conditions is true: 
+ *            pBS[i]<0, pBS[i]>4, pBS[i]==4  for i>=4, or 
+ *            (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. 
+ *    -   srcdstStep is not a multiple of 8. 
+ *
+ */
+OMXResult omxVCM4P10_DeblockChroma_I(
+	OMX_U8* pSrcDst, 
+	OMX_S32 srcdstStep, 
+	const OMX_U8* pAlpha, 
+	const OMX_U8* pBeta, 
+	const OMX_U8* pThresholds,
+    const OMX_U8 *pBS
+)
+{
+    OMXResult errorCode;
+    
+    armRetArgErrIf(pSrcDst == NULL,                 OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot8ByteAligned(pSrcDst),     OMX_Sts_BadArgErr);
+    armRetArgErrIf(srcdstStep & 7,                  OMX_Sts_BadArgErr);
+    armRetArgErrIf(pAlpha == NULL,                  OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBeta == NULL,                   OMX_Sts_BadArgErr);
+    armRetArgErrIf(pThresholds == NULL,             OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBS == NULL,                     OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pBS),         OMX_Sts_BadArgErr);
+
+    errorCode = omxVCM4P10_FilterDeblockingChroma_VerEdge_I(
+        pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS);
+
+    armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode)
+    
+    errorCode = omxVCM4P10_FilterDeblockingChroma_HorEdge_I(
+        pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+8, pBS+16);
+
+    return errorCode;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
new file mode 100644
index 0000000..1f3a646
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
@@ -0,0 +1,109 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_DeblockLuma_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 luma deblock
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+ 
+
+/**
+ * Function:  omxVCM4P10_DeblockLuma_I   (6.3.3.3.5)
+ *
+ * Description:
+ * This function performs in-place deblock filtering the horizontal and 
+ * vertical edges of a luma macroblock (16x16). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input macroblock; must be 16-byte aligned. 
+ *   srcdstStep - image width; must be a multiple of 16. 
+ *   pAlpha - pointer to a 2x2 table of alpha thresholds, organized as 
+ *            follows: {external vertical edge, internal vertical edge, 
+ *            external horizontal edge, internal horizontal edge }.  Per 
+ *            [ISO14496-10] alpha values must be in the range [0,255]. 
+ *   pBeta - pointer to a 2x2 table of beta thresholds, organized as follows: 
+ *            {external vertical edge, internal vertical edge, external 
+ *            horizontal edge, internal horizontal edge }.  Per [ISO14496-10] 
+ *            beta values must be in the range [0,18]. 
+ *   pThresholds - pointer to a 16x2 table of threshold (TC0), organized as 
+ *            follows: {values for the left or above edge of each 4x4 block, 
+ *            arranged in vertical block order and then in horizontal block 
+ *            order}; must be aligned on a 4-byte boundary.  Per [ISO14496-10] 
+ *            values must be in the range [0,25]. 
+ *   pBS - pointer to a 16x2 table of BS parameters arranged in scan block 
+ *            order for vertical edges and then horizontal edges; valid in the 
+ *            range [0,4] with the following restrictions: i) pBS[i]== 4 may 
+ *            occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 
+ *            4. Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments 
+ *    -     one or more of the following pointers is NULL: pSrcDst, pAlpha, 
+ *              pBeta, pThresholds or pBS. pSrcDst is not 16-byte aligned. 
+ *              either pThresholds or pBS is not aligned on a 4-byte boundary. 
+ *    -    one or more entries in the table pAlpha[0..3] is outside the range 
+ *              [0,255]. 
+ *    -    one or more entries in the table pBeta[0..3] is outside the range 
+ *              [0,18]. 
+ *    -    one or more entries in the table pThresholds[0..31]is outside of 
+ *              the range [0,25]. 
+ *    -    pBS is out of range, i.e., one of the following conditions is true: 
+ *              pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or 
+ *             (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. 
+ *    -    srcdstStep is not a multiple of 16. 
+ *
+ */
+
+OMXResult omxVCM4P10_DeblockLuma_I(
+	OMX_U8* pSrcDst, 
+	OMX_S32 srcdstStep, 
+	const OMX_U8* pAlpha, 
+	const OMX_U8* pBeta, 
+	const OMX_U8* pThresholds, 
+	const OMX_U8 *pBS
+)
+{
+    OMXResult errorCode;
+    
+    armRetArgErrIf(pSrcDst == NULL,             OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot16ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(srcdstStep & 15,              OMX_Sts_BadArgErr);    
+    armRetArgErrIf(pAlpha == NULL,              OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBeta == NULL,               OMX_Sts_BadArgErr);
+    armRetArgErrIf(pThresholds == NULL,         OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBS == NULL,                     OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pBS),         OMX_Sts_BadArgErr);
+
+    errorCode = omxVCM4P10_FilterDeblockingLuma_VerEdge_I(
+        pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS);
+
+    armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode)
+    
+    errorCode = omxVCM4P10_FilterDeblockingLuma_HorEdge_I(
+        pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+16, pBS+16);
+
+    return errorCode;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
new file mode 100644
index 0000000..830ddc7
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
@@ -0,0 +1,86 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 decode coefficients module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC   (6.3.4.1.1)
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse raster scan for a 2x2 block of 
+ * ChromaDCLevel.  The decoded coefficients in the packed position-coefficient 
+ * buffer are stored in reverse zig-zag order, i.e., the first buffer element 
+ * contains the last non-zero postion-coefficient pair of the block. Within 
+ * each position-coefficient pair, the position entry indicates the 
+ * raster-scan position of the coefficient, while the coefficient entry 
+ * contains the coefficient value. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - Double pointer to current byte in bit stream buffer 
+ *   pOffset - Pointer to current bit position in the byte pointed to by 
+ *            *ppBitStream; valid in the range [0,7]. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after each block is decoded 
+ *   pOffset - *pOffset is updated after each block is decoded 
+ *   pNumCoeff - Pointer to the number of nonzero coefficients in this block 
+ *   ppPosCoefBuf - Double pointer to destination residual 
+ *            coefficient-position pair buffer.  Buffer position 
+ *            (*ppPosCoefBuf) is updated upon return, unless there are only 
+ *            zero coefficients in the currently decoded block.  In this case 
+ *            the caller is expected to bypass the transform/dequantization of 
+ *            the empty blocks. 
+ *
+ * Return Value:
+ *
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    ppBitStream or pOffset is NULL. 
+ *    -    ppPosCoefBuf or pNumCoeff is NULL. 
+ *    OMX_Sts_Err - if one of the following is true: 
+ *    -    an illegal code is encountered in the bitstream 
+ *
+ */
+
+OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (
+     const OMX_U8** ppBitStream,
+     OMX_S32* pOffset,
+     OMX_U8* pNumCoeff,
+     OMX_U8** ppPosCoefbuf        
+ )
+
+{
+    armRetArgErrIf(ppBitStream==NULL   , OMX_Sts_BadArgErr);
+    armRetArgErrIf(*ppBitStream==NULL  , OMX_Sts_BadArgErr);
+    armRetArgErrIf(pOffset==NULL       , OMX_Sts_BadArgErr);
+    armRetArgErrIf(*pOffset<0          , OMX_Sts_BadArgErr);
+    armRetArgErrIf(*pOffset>7          , OMX_Sts_BadArgErr);
+    armRetArgErrIf(pNumCoeff==NULL     , OMX_Sts_BadArgErr);
+    armRetArgErrIf(ppPosCoefbuf==NULL  , OMX_Sts_BadArgErr);
+    armRetArgErrIf(*ppPosCoefbuf==NULL , OMX_Sts_BadArgErr);
+
+    return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff,
+                                         ppPosCoefbuf, 4, 4);
+
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
new file mode 100644
index 0000000..7e83d1e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
@@ -0,0 +1,117 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 decode coefficients module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCM4P10_DecodeCoeffsToPairCAVLC   (6.3.4.1.2)
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of 
+ * Intra16x16DCLevel, Intra16x16ACLevel, LumaLevel, and ChromaACLevel. Inverse 
+ * field scan is not supported. The decoded coefficients in the packed 
+ * position-coefficient buffer are stored in reverse zig-zag order, i.e., the 
+ * first buffer element contains the last non-zero postion-coefficient pair of 
+ * the block. Within each position-coefficient pair, the position entry 
+ * indicates the raster-scan position of the coefficient, while the 
+ * coefficient entry contains the coefficient value. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream -Double pointer to current byte in bit stream buffer 
+ *   pOffset - Pointer to current bit position in the byte pointed to by 
+ *            *ppBitStream; valid in the range [0,7]. 
+ *   sMaxNumCoeff - Maximum the number of non-zero coefficients in current 
+ *            block 
+ *   sVLCSelect - VLC table selector, obtained from the number of non-zero 
+ *            coefficients contained in the above and left 4x4 blocks.  It is 
+ *            equivalent to the variable nC described in H.264 standard table 
+ *            9 5, except its value can t be less than zero. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after each block is decoded.  
+ *            Buffer position (*ppPosCoefBuf) is updated upon return, unless 
+ *            there are only zero coefficients in the currently decoded block. 
+ *             In this case the caller is expected to bypass the 
+ *            transform/dequantization of the empty blocks. 
+ *   pOffset - *pOffset is updated after each block is decoded 
+ *   pNumCoeff - Pointer to the number of nonzero coefficients in this block 
+ *   ppPosCoefBuf - Double pointer to destination residual 
+ *            coefficient-position pair buffer 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    ppBitStream or pOffset is NULL. 
+ *    -    ppPosCoefBuf or pNumCoeff is NULL. 
+ *    -    sMaxNumCoeff is not equal to either 15 or 16. 
+ *    -    sVLCSelect is less than 0. 
+ *
+ *    OMX_Sts_Err - if one of the following is true: 
+ *    -    an illegal code is encountered in the bitstream 
+ *
+ */
+
+OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC(
+     const OMX_U8** ppBitStream,
+     OMX_S32* pOffset,
+     OMX_U8* pNumCoeff,
+     OMX_U8**ppPosCoefbuf,
+     OMX_INT sVLCSelect,
+     OMX_INT sMaxNumCoeff        
+ )
+{
+    int nTable;
+
+    armRetArgErrIf(ppBitStream==NULL   , OMX_Sts_BadArgErr);
+    armRetArgErrIf(*ppBitStream==NULL  , OMX_Sts_BadArgErr);
+    armRetArgErrIf(pOffset==NULL       , OMX_Sts_BadArgErr);
+    armRetArgErrIf(*pOffset<0          , OMX_Sts_BadArgErr);
+    armRetArgErrIf(*pOffset>7          , OMX_Sts_BadArgErr);
+    armRetArgErrIf(pNumCoeff==NULL     , OMX_Sts_BadArgErr);
+    armRetArgErrIf(ppPosCoefbuf==NULL  , OMX_Sts_BadArgErr);
+    armRetArgErrIf(*ppPosCoefbuf==NULL , OMX_Sts_BadArgErr);
+    armRetArgErrIf(sVLCSelect<0        , OMX_Sts_BadArgErr);
+    armRetArgErrIf(sMaxNumCoeff<15     , OMX_Sts_BadArgErr);
+    armRetArgErrIf(sMaxNumCoeff>16     , OMX_Sts_BadArgErr);
+    
+    /* Find VLC table number */
+    if (sVLCSelect<2)
+    {
+        nTable = 0;
+    }
+    else if (sVLCSelect<4)
+    {
+        nTable = 1;
+    }
+    else if (sVLCSelect<8)
+    {
+        nTable = 2;
+    }
+    else /* sVLCSelect >= 8 */
+    {
+        nTable = 3;
+    }
+
+    return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff,
+                                         ppPosCoefbuf, nTable, sMaxNumCoeff);
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd.c
new file mode 100644
index 0000000..ed5a158
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd.c
@@ -0,0 +1,145 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_DequantTransformResidualFromPairAndAdd.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 inverse quantize and transform module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/*
+ * Description:
+ * Dequantize Luma AC block
+ */
+
+static void DequantLumaAC4x4(
+     OMX_S16* pSrcDst,
+     OMX_INT QP        
+)
+{
+    const OMX_U8 *pVRow = &armVCM4P10_VMatrix[QP%6][0];
+    int Shift = QP / 6;
+    int i;
+    OMX_S32 Value;
+
+    for (i=0; i<16; i++)
+    {
+
+        Value = (pSrcDst[i] * pVRow[armVCM4P10_PosToVCol4x4[i]]) << Shift;
+        pSrcDst[i] = (OMX_S16)Value;
+    }
+}
+
+/**
+ * Function:  omxVCM4P10_DequantTransformResidualFromPairAndAdd   (6.3.4.2.3)
+ *
+ * Description:
+ * Reconstruct the 4x4 residual block from coefficient-position pair buffer, 
+ * perform dequantization and integer inverse transformation for 4x4 block of 
+ * residuals with previous intra prediction or motion compensation data, and 
+ * update the pair buffer pointer to next non-empty block. If pDC == NULL, 
+ * there re 16 non-zero AC coefficients at most in the packed buffer starting 
+ * from 4x4 block position 0; If pDC != NULL, there re 15 non-zero AC 
+ * coefficients at most in the packet buffer starting from 4x4 block position 
+ * 1. 
+ *
+ * Input Arguments:
+ *   
+ *   ppSrc - Double pointer to residual coefficient-position pair buffer 
+ *            output by CALVC decoding 
+ *   pPred - Pointer to the predicted 4x4 block; must be aligned on a 4-byte 
+ *            boundary 
+ *   predStep - Predicted frame step size in bytes; must be a multiple of 4 
+ *   dstStep - Destination frame step in bytes; must be a multiple of 4 
+ *   pDC - Pointer to the DC coefficient of this block, NULL if it doesn't 
+ *            exist 
+ *   QP - QP Quantization parameter.  It should be QpC in chroma 4x4 block 
+ *            decoding, otherwise it should be QpY. 
+ *   AC - Flag indicating if at least one non-zero AC coefficient exists 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the reconstructed 4x4 block data; must be aligned on a 
+ *            4-byte boundary 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    pPred or pDst is NULL. 
+ *    -    pPred or pDst is not 4-byte aligned. 
+ *    -    predStep or dstStep is not a multiple of 4. 
+ *    -    AC !=0 and Qp is not in the range of [0-51] or ppSrc == NULL. 
+ *    -    AC ==0 && pDC ==NULL. 
+ *
+ */
+
+OMXResult omxVCM4P10_DequantTransformResidualFromPairAndAdd(
+     const OMX_U8 **ppSrc,
+     const OMX_U8 *pPred,
+     const OMX_S16 *pDC,
+     OMX_U8 *pDst,
+     OMX_INT predStep,
+     OMX_INT dstStep,
+     OMX_INT QP,
+     OMX_INT AC        
+)
+{
+    OMX_S16 pBuffer[16+4];
+    OMX_S16 *pDelta;
+    int i,x,y;
+    
+    armRetArgErrIf(pPred == NULL,            OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pPred),OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDst   == NULL,           OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(predStep & 3,             OMX_Sts_BadArgErr);
+    armRetArgErrIf(dstStep & 3,              OMX_Sts_BadArgErr);
+    armRetArgErrIf(AC!=0 && (QP<0),          OMX_Sts_BadArgErr);
+    armRetArgErrIf(AC!=0 && (QP>51),         OMX_Sts_BadArgErr);
+    armRetArgErrIf(AC!=0 && ppSrc==NULL,     OMX_Sts_BadArgErr);
+    armRetArgErrIf(AC!=0 && *ppSrc==NULL,    OMX_Sts_BadArgErr);
+    armRetArgErrIf(AC==0 && pDC==NULL,       OMX_Sts_BadArgErr);
+    
+    pDelta = armAlignTo8Bytes(pBuffer);    
+
+    for (i=0; i<16; i++)
+    {
+        pDelta[i] = 0;
+    }
+    if (AC)
+    {
+        armVCM4P10_UnpackBlock4x4(ppSrc, pDelta);
+        DequantLumaAC4x4(pDelta, QP);
+    }
+    if (pDC)
+    {
+        pDelta[0] = pDC[0];
+    }
+    armVCM4P10_TransformResidual4x4(pDelta,pDelta);
+
+    for (y=0; y<4; y++)
+    {
+        for (x=0; x<4; x++)
+        {
+            pDst[y*dstStep+x] = (OMX_U8)armClip(0,255,pPred[y*predStep+x] + pDelta[4*y+x]);
+        }
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c
new file mode 100644
index 0000000..75edee2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c
@@ -0,0 +1,130 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 chroma deblock module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCM4P10_FilterDeblockingChroma_HorEdge_I   (6.3.3.3.4)
+ *
+ * Description:
+ * Performs in-place deblock filtering on the horizontal edges of the chroma 
+ * macroblock (8x8). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input macroblock; must be 8-byte aligned. 
+ *   srcdstStep - array step; must be a multiple of 8. 
+ *   pAlpha - array of size 2 containing alpha thresholds; the first element 
+ *            contains the threshold for the external horizontal edge, and the 
+ *            second element contains the threshold for internal horizontal 
+ *            edge.  Per [ISO14496-10] alpha values must be in the range 
+ *            [0,255]. 
+ *   pBeta - array of size 2 containing beta thresholds; the first element 
+ *            contains the threshold for the external horizontal edge, and the 
+ *            second element contains the threshold for the internal 
+ *            horizontal edge.  Per [ISO14496-10] beta values must be in the 
+ *            range [0,18]. 
+ *   pThresholds - array of size 8 containing thresholds, TC0, for the top 
+ *            horizontal edge of each 2x4 chroma block, arranged in horizontal 
+ *            block order; must be aligned on a 4-byte boundary.  Per 
+ *            [ISO14496-10] values must be in the range [0,25]. 
+ *   pBS - array of size 16 containing BS parameters for each 2x2 chroma 
+ *            block, arranged in horizontal block order; valid in the range 
+ *            [0,4] with the following restrictions: i) pBS[i]== 4 may occur 
+ *            only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 4. 
+ *            Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst -Pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr, if one of the following cases occurs: 
+ *    -    any of the following pointers is NULL: 
+ *         pSrcDst, pAlpha, pBeta, pThresholds, or pBS. 
+ *    -    pSrcDst is not 8-byte aligned. 
+ *    -    srcdstStep is not a multiple of 8. 
+ *    -    pThresholds is not 4-byte aligned. 
+ *    -    pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. 
+ *    -    pBeta[0] and/or pBeta[1] is outside the range [0,18]. 
+ *    -    One or more entries in the table pThresholds[0..7] is outside 
+ *         of the range [0,25]. 
+ *    -    pBS is out of range, i.e., one of the following conditions is true: 
+ *              pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or 
+ *              (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ *    -    pBS is not 4-byte aligned. 
+ *
+ */
+
+OMXResult omxVCM4P10_FilterDeblockingChroma_HorEdge_I(
+     OMX_U8* pSrcDst,
+     OMX_S32 srcdstStep,
+     const OMX_U8* pAlpha,
+     const OMX_U8* pBeta,
+     const OMX_U8* pThresholds,
+     const OMX_U8 *pBS        
+ )
+{
+    int I, X, Y, Internal=0;
+
+    armRetArgErrIf(pSrcDst == NULL,                 OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot8ByteAligned(pSrcDst),     OMX_Sts_BadArgErr);
+    armRetArgErrIf(srcdstStep & 7,                  OMX_Sts_BadArgErr);
+    armRetArgErrIf(pAlpha == NULL,                  OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBeta == NULL,                   OMX_Sts_BadArgErr);
+    armRetArgErrIf(pThresholds == NULL,             OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBS == NULL,                     OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pBS),         OMX_Sts_BadArgErr);
+
+    for (Y=0; Y<8; Y+=4, Internal=1)
+    {
+        for (X=0; X<8; X++)
+        {
+            I = (X>>1)+4*(Y>>1);
+            
+            armRetArgErrIf(pBS[I] > 4, OMX_Sts_BadArgErr)
+            
+            armRetArgErrIf( (I > 3) && (pBS[I] == 4),
+                            OMX_Sts_BadArgErr)
+            
+            armRetArgErrIf( (I < 4)       && 
+                          ( (pBS[I] == 4) && (pBS[I^1] != 4) ),
+                            OMX_Sts_BadArgErr)
+            
+
+            /* Filter horizontal edge with q0 at (X,Y) */
+            armVCM4P10_DeBlockPixel(
+                pSrcDst + Y*srcdstStep + X,
+                srcdstStep,
+                pThresholds[(X>>1)+4*(Y>>2)],
+                pAlpha[Internal],
+                pBeta[Internal],
+                pBS[I],
+                1);
+        }
+    }
+
+    return OMX_Sts_NoErr;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c
new file mode 100644
index 0000000..10b2592
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c
@@ -0,0 +1,131 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 deblocking module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCM4P10_FilterDeblockingChroma_VerEdge_I   (6.3.3.3.3)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four vertical edges of the chroma 
+ * macroblock (8x8). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - Pointer to the input macroblock; must be 8-byte aligned. 
+ *   srcdstStep - Step of the arrays; must be a multiple of 8. 
+ *   pAlpha - Array of size 2 of alpha thresholds (the first item is alpha 
+ *            threshold for external vertical edge, and the second item is for 
+ *            internal vertical edge); per [ISO14496-10] alpha values must be 
+ *            in the range [0,255]. 
+ *   pBeta - Array of size 2 of beta thresholds (the first item is the beta 
+ *            threshold for the external vertical edge, and the second item is 
+ *            for the internal vertical edge); per [ISO14496-10] beta values 
+ *            must be in the range [0,18]. 
+ *   pThresholds - Array of size 8 containing thresholds, TC0, for the left 
+ *            vertical edge of each 4x2 chroma block, arranged in vertical 
+ *            block order; must be aligned on a 4-byte boundary.  Per 
+ *            [ISO14496-10] values must be in the range [0,25]. 
+ *   pBS - Array of size 16 of BS parameters (values for each 2x2 chroma 
+ *            block, arranged in vertical block order). This parameter is the 
+ *            same as the pBSparameter passed into FilterDeblockLuma_VerEdge; 
+ *            valid in the range [0,4] with the following restrictions: i) 
+ *            pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and 
+ *            only if pBS[i^3]== 4.  Must be 4 byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst -Pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    one or more of the following pointers is NULL: pSrcDst, pAlpha, 
+ *              pBeta, pThresholds, or pBS. 
+ *    -    pSrcDst is not 8-byte aligned. 
+ *    -    srcdstStep is not a multiple of 8. 
+ *    -    pThresholds is not 4-byte aligned. 
+ *    -    pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. 
+ *    -    pBeta[0] and/or pBeta[1] is outside the range [0,18]. 
+ *    -    One or more entries in the table pThresholds[0..7] is outside 
+ *         of the range [0,25]. 
+ *    -    pBS is out of range, i.e., one of the following conditions is true: 
+ *         pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or 
+ *         (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. 
+ *    -    pBS is not 4-byte aligned. 
+ *
+ */
+
+OMXResult omxVCM4P10_FilterDeblockingChroma_VerEdge_I(
+     OMX_U8* pSrcDst,
+     OMX_S32 srcdstStep,
+     const OMX_U8* pAlpha,
+     const OMX_U8* pBeta,
+     const OMX_U8* pThresholds,
+     const OMX_U8 *pBS        
+ )
+{
+    int I, X, Y, Internal=0;
+
+    armRetArgErrIf(pSrcDst == NULL,                 OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot8ByteAligned(pSrcDst),     OMX_Sts_BadArgErr);
+    armRetArgErrIf(srcdstStep & 7,                  OMX_Sts_BadArgErr);
+    armRetArgErrIf(pAlpha == NULL,                  OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBeta == NULL,                   OMX_Sts_BadArgErr);
+    armRetArgErrIf(pThresholds == NULL,             OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBS == NULL,                     OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pBS),         OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBeta[0] > 18,  OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBeta[1] > 18,  OMX_Sts_BadArgErr);
+
+    for (X=0; X<8; X+=4, Internal=1)
+    {
+        for (Y=0; Y<8; Y++)
+        {
+            I = (Y>>1)+4*(X>>1);
+            
+            armRetArgErrIf(pBS[I] > 4, OMX_Sts_BadArgErr);
+            
+            armRetArgErrIf( (I > 3) && (pBS[I] == 4),
+                            OMX_Sts_BadArgErr);
+            
+            armRetArgErrIf( ( (pBS[I] == 4) && (pBS[I^3] != 4) ),
+                            OMX_Sts_BadArgErr);
+            armRetArgErrIf(pThresholds[Y] > 25, OMX_Sts_BadArgErr);
+            
+
+            /* Filter vertical edge with q0 at (X,Y) */
+            armVCM4P10_DeBlockPixel(
+                pSrcDst + Y*srcdstStep + X,
+                1,
+                pThresholds[(Y>>1)+4*(X>>2)],
+                pAlpha[Internal],
+                pBeta[Internal],
+                pBS[I],
+                1);
+        }
+    }
+
+    return OMX_Sts_NoErr;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c
new file mode 100644
index 0000000..30a37da
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c
@@ -0,0 +1,125 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 luma deblock module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCM4P10_FilterDeblockingLuma_HorEdge_I   (6.3.3.3.2)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four horizontal edges of the luma 
+ * macroblock (16x16). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input macroblock; must be 16-byte aligned. 
+ *   srcdstStep -s tep of the arrays; must be a multiple of 16. 
+ *   pAlpha - array of size 2 of alpha thresholds (the first item is the alpha 
+ *            threshold for the external vertical edge, and the second item is 
+ *            for the internal horizontal edge); per [ISO14496-10] alpha 
+ *            values must be in the range [0,255]. 
+ *   pBeta - array of size 2 of beta thresholds (the first item is the beta 
+ *            threshold for the external horizontal edge, and the second item 
+ *            is for the internal horizontal edge). Per [ISO14496-10] beta 
+ *            values must be in the range [0,18]. 
+ *   pThresholds - array of size 16 containing thresholds, TC0, for the top 
+ *            horizontal edge of each 4x4 block, arranged in horizontal block 
+ *            order; must be aligned on a 4-byte boundary.  Per [ISO14496 10] 
+ *            values must be in the range [0,25]. 
+ *   pBS - array of size 16 of BS parameters (arranged in horizontal block 
+ *            order); valid in the range [0,4] with the following 
+ *            restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) 
+ *            pBS[i]== 4 if and only if pBS[i^3]== 4.  Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst -Pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr, if the function runs without error.
+ * 
+ *    OMX_Sts_BadArgErr, if one of the following cases occurs: 
+ *    -    one or more of the following pointers is NULL: pSrcDst, pAlpha, 
+ *              pBeta, pThresholds, or pBS. 
+ *    -    either pThresholds or pBS is not aligned on a 4-byte boundary. 
+ *    -    pSrcDst is not 16-byte aligned. 
+ *    -    srcdstStep is not a multiple of 16. 
+ *    -    pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. 
+ *    -    pBeta[0] and/or pBeta[1] is outside the range [0,18]. 
+ *    -    One or more entries in the table pThresholds[0..15] is 
+ *         outside of the range [0,25]. 
+ *    -    pBS is out of range, i.e., one of the following conditions is true: 
+ *              pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or 
+ *              (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. 
+ *
+ */
+
+OMXResult omxVCM4P10_FilterDeblockingLuma_HorEdge_I(
+     OMX_U8* pSrcDst,
+     OMX_S32 srcdstStep,
+     const OMX_U8* pAlpha,
+     const OMX_U8* pBeta,
+     const OMX_U8* pThresholds,
+     const OMX_U8 *pBS        
+ )
+{
+    int I, X, Y, Internal=0;
+
+    armRetArgErrIf(pSrcDst == NULL,             OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(srcdstStep & 7,              OMX_Sts_BadArgErr);    
+    armRetArgErrIf(pAlpha == NULL,              OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBeta == NULL,               OMX_Sts_BadArgErr);
+    armRetArgErrIf(pThresholds == NULL,         OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBS == NULL,                     OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pBS),         OMX_Sts_BadArgErr);
+    
+    for (Y=0; Y<16; Y+=4, Internal=1)
+    {
+        for (X=0; X<16; X++)
+        {
+            I = (X>>2)+4*(Y>>2);
+            
+            armRetArgErrIf(pBS[I] > 4, OMX_Sts_BadArgErr)
+            
+            armRetArgErrIf( (I > 3) && (pBS[I] == 4),
+                            OMX_Sts_BadArgErr)
+            
+            armRetArgErrIf( (I < 4)       && 
+                          ( (pBS[I] == 4) && (pBS[I^1] != 4) ),
+                            OMX_Sts_BadArgErr)
+            
+            /* Filter horizontal edge with q0 at (X,Y) */
+            armVCM4P10_DeBlockPixel(
+                pSrcDst + Y*srcdstStep + X,
+                srcdstStep,
+                pThresholds[I],
+                pAlpha[Internal],
+                pBeta[Internal],
+                pBS[I],
+                0);
+        }
+    }
+
+    return OMX_Sts_NoErr;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c
new file mode 100644
index 0000000..8733427
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c
@@ -0,0 +1,128 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 luma deblock module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCM4P10_FilterDeblockingLuma_VerEdge_I   (6.3.3.3.1)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four vertical edges of the luma 
+ * macroblock (16x16). 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - Pointer to the input macroblock; must be 16-byte aligned. 
+ *   srcdstStep -Step of the arrays; must be a multiple of 16. 
+ *   pAlpha -Array of size 2 of alpha thresholds (the first item is the alpha 
+ *            threshold for the external vertical edge, and the second item is 
+ *            for the internal vertical edge); per [ISO14496-10] alpha values 
+ *            must be in the range [0,255]. 
+ *   pBeta -Array of size 2 of beta thresholds (the first item is the beta 
+ *            threshold for the external vertical edge, and the second item is 
+ *            for the internal vertical edge); per [ISO14496-10] beta values 
+ *            must be in the range [0,18]. 
+ *   pThresholds -Array of size 16 of Thresholds (TC0) (values for the left 
+ *            edge of each 4x4 block, arranged in vertical block order); must 
+ *            be aligned on a 4-byte boundary..  Per [ISO14496-10] values must 
+ *            be in the range [0,25]. 
+ *   pBS -Array of size 16 of BS parameters (arranged in vertical block 
+ *            order); valid in the range [0,4] with the following 
+ *            restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) 
+ *            pBS[i]== 4 if and only if pBS[i^3]== 4.  Must be 4-byte aligned. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst -Pointer to filtered output macroblock. 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    Either of the pointers in pSrcDst, pAlpha, pBeta, pThresholds, or pBS 
+ *              is NULL. 
+ *    Either pThresholds or pBS is not aligned on a 4-byte boundary. 
+ *    pSrcDst is not 16-byte aligned. 
+ *    srcdstStep is not a multiple of 16. 
+ *    pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. 
+ *    pBeta[0] and/or pBeta[1] is outside the range [0,18]. 
+ *    One or more entries in the table pThresholds[0..15]is outside of the 
+ *              range [0,25]. 
+ *    pBS is out of range, i.e., one of the following conditions is true: 
+ *              pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && 
+ *              pBS[i^3]!=4) for 0<=i<=3. 
+ *
+ */
+
+OMXResult omxVCM4P10_FilterDeblockingLuma_VerEdge_I(
+     OMX_U8* pSrcDst,
+     OMX_S32 srcdstStep,
+     const OMX_U8* pAlpha,
+     const OMX_U8* pBeta,
+     const OMX_U8* pThresholds,
+     const OMX_U8 *pBS        
+ )
+{
+    int X, Y, I, Internal=0;
+
+    armRetArgErrIf(pSrcDst == NULL,             OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot16ByteAligned(pSrcDst),OMX_Sts_BadArgErr);
+    armRetArgErrIf(srcdstStep & 15,             OMX_Sts_BadArgErr);    
+    armRetArgErrIf(pAlpha == NULL,              OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBeta == NULL,               OMX_Sts_BadArgErr);
+    armRetArgErrIf(pThresholds == NULL,         OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBS == NULL,                     OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pBS),         OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBeta[0] > 18,  OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBeta[1] > 18,  OMX_Sts_BadArgErr);
+    
+
+    for (X=0; X<16; X+=4, Internal=1)
+    {
+        for (Y=0; Y<16; Y++)
+        {
+            I = (Y>>2)+4*(X>>2);
+            
+            armRetArgErrIf(pBS[Y] > 4, OMX_Sts_BadArgErr);
+            
+            armRetArgErrIf((pBS[Y] == 4) && (Y > 3),
+                            OMX_Sts_BadArgErr);
+            
+            armRetArgErrIf(( (pBS[Y] == 4) && (pBS[Y^3] != 4) ),
+                            OMX_Sts_BadArgErr);
+                            
+            armRetArgErrIf(pThresholds[Y] > 25, OMX_Sts_BadArgErr);
+            
+            /* Filter vertical edge with q0 at (X,Y) */
+            armVCM4P10_DeBlockPixel(
+                pSrcDst + Y*srcdstStep + X,
+                1,
+                pThresholds[I],
+                pAlpha[Internal],
+                pBeta[Internal],
+                pBS[I],
+                0);
+        }
+    }
+
+    return OMX_Sts_NoErr;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_GetVLCInfo.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_GetVLCInfo.c
new file mode 100644
index 0000000..81c59d6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_GetVLCInfo.c
@@ -0,0 +1,192 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_GetVLCInfo.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * 
+ * This function extracts run-length encoding (RLE) information
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCM4P10_GetVLCInfo   (6.3.5.9.1)
+ *
+ * Description:
+ * This function extracts run-length encoding (RLE) information from the 
+ * coefficient matrix.  The results are returned in an OMXVCM4P10VLCInfo 
+ * structure. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcCoeff - pointer to the transform coefficient matrix.  8-byte 
+ *            alignment required. 
+ *   pScanMatrix - pointer to the scan order definition matrix.  For a luma 
+ *            block the scan matrix should follow [ISO14496-10] section 8.5.4, 
+ *            and should contain the values 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 
+ *            10, 7, 11, 14, 15.  For a chroma block, the scan matrix should 
+ *            contain the values 0, 1, 2, 3. 
+ *   bAC - indicates presence of a DC coefficient; 0 = DC coefficient 
+ *            present, 1= DC coefficient absent. 
+ *   MaxNumCoef - specifies the number of coefficients contained in the 
+ *            transform coefficient matrix, pSrcCoeff. The value should be 16 
+ *            for blocks of type LUMADC, LUMAAC, LUMALEVEL, and CHROMAAC. The 
+ *            value should be 4 for blocks of type CHROMADC. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstVLCInfo - pointer to structure that stores information for 
+ *            run-length coding. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *            pSrcCoeff, pScanMatrix, pDstVLCInfo 
+ *    -    pSrcCoeff is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_GetVLCInfo (
+	const OMX_S16*		    pSrcCoeff,
+	const OMX_U8*			    pScanMatrix,
+	OMX_U8			    bAC,
+	OMX_U32			    MaxNumCoef,
+	OMXVCM4P10VLCInfo*	pDstVLCInfo
+)
+{
+    OMX_INT     i, MinIndex;
+    OMX_S32     Value;
+    OMX_U32     Mask = 4, RunBefore;
+    OMX_S16     *pLevel;
+    OMX_U8      *pRun;
+    OMX_S16     Buf [16];
+
+    /* check for argument error */
+    armRetArgErrIf(pSrcCoeff == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot8ByteAligned(pSrcCoeff), OMX_Sts_BadArgErr)
+    armRetArgErrIf(pScanMatrix == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstVLCInfo == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(bAC > 1, OMX_Sts_BadArgErr)
+    armRetArgErrIf(MaxNumCoef > 16, OMX_Sts_BadArgErr)
+
+    /* Initialize RLE Info structure */
+    pDstVLCInfo->uTrailing_Ones = 0;
+    pDstVLCInfo->uTrailing_One_Signs = 0;
+    pDstVLCInfo->uNumCoeffs = 0;
+    pDstVLCInfo->uTotalZeros = 0;
+
+    for (i = 0; i < 16; i++)
+    {
+        pDstVLCInfo->iLevels [i] = 0;
+        pDstVLCInfo->uRuns [i] = 0;
+    }
+    
+    MinIndex = (bAC == 0 && MaxNumCoef == 15) ? 1 : 0;
+    for (i = MinIndex; i < (MaxNumCoef + MinIndex); i++)
+    {        
+        /* Scan */
+        Buf [i - MinIndex] = pSrcCoeff [pScanMatrix [i]];
+    }
+
+    /* skip zeros at the end */
+    i = MaxNumCoef - 1;
+    while (!Buf [i] && i >= 0)
+    {
+        i--;
+    }
+    
+    if (i < 0)
+    {
+        return OMX_Sts_NoErr;
+    }
+
+    /* Fill RLE Info structure */
+    pLevel = pDstVLCInfo->iLevels;
+    pRun = pDstVLCInfo->uRuns;
+    RunBefore = 0;
+
+    /* Handle first non zero separate */
+    pDstVLCInfo->uNumCoeffs++;
+    Value = Buf [i];
+    if (Value == 1 || Value == -1)
+    {
+        pDstVLCInfo->uTrailing_Ones++;
+        
+        pDstVLCInfo->uTrailing_One_Signs |= 
+            Value == -1 ? Mask : 0;
+        Mask >>= 1;
+    }
+    else
+    {
+        Value -= (Value > 0 ? 1 : -1);
+        *pLevel++ = Value;
+        Mask = 0;
+    }
+
+    /* Remaining non zero */
+    while (--i >= 0)
+    {
+        Value = Buf [i];
+        if (Value)
+        {
+            pDstVLCInfo->uNumCoeffs++;
+
+            /* Mask becomes zero after entering */
+            if (Mask &&
+                (Value == 1 || 
+                 Value == -1))
+            {
+                pDstVLCInfo->uTrailing_Ones++;
+                
+                pDstVLCInfo->uTrailing_One_Signs |= 
+                    Value == -1 ? Mask : 0;
+                Mask >>= 1;
+                *pRun++ = RunBefore;
+                RunBefore = 0;
+            }
+            else
+            {
+                /* If 3 trailing ones are not completed */
+                if (Mask)
+                {
+                    Mask = 0;
+                    Value -= (Value > 0 ? 1 : -1);
+                }
+                *pLevel++ = Value;
+                *pRun++ = RunBefore;
+                RunBefore = 0;
+            }
+        }
+        else
+        {
+            pDstVLCInfo->uTotalZeros++;
+            RunBefore++;
+        }        
+    }
+    
+    /* Update last run */
+    if (RunBefore)
+    {
+        *pRun++ = RunBefore;
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
new file mode 100644
index 0000000..8824de2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
@@ -0,0 +1,99 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_InterpolateChroma.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate 1/8 Pixel interpolation for Chroma Block
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+
+/**
+ * Function:  omxVCM4P10_InterpolateChroma   (6.3.3.2.2)
+ *
+ * Description:
+ * Performs 1/8-pixel interpolation for inter chroma MB. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc -Pointer to the source reference frame buffer 
+ *   srcStep -Reference frame step in bytes 
+ *   dstStep -Destination frame step in bytes; must be a multiple of 
+ *            roi.width. 
+ *   dx -Fractional part of horizontal motion vector component in 1/8 pixel 
+ *            unit; valid in the range [0,7] 
+ *   dy -Fractional part of vertical motion vector component in 1/8 pixel 
+ *            unit; valid in the range [0,7] 
+ *   roi -Dimension of the interpolation region; the parameters roi.width and 
+ *            roi.height must be equal to either 2, 4, or 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst -Pointer to the destination frame buffer if roi.width==2,  2-byte 
+ *            alignment required if roi.width==4,  4-byte alignment required 
+ *            if roi.width==8, 8-byte alignment required 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pSrc or pDst is NULL. 
+ *    srcStep or dstStep < 8. 
+ *    dx or dy is out of range [0-7]. 
+ *    roi.width or roi.height is out of range {2,4,8}. 
+ *    roi.width is equal to 2, but pDst is not 2-byte aligned. 
+ *    roi.width is equal to 4, but pDst is not 4-byte aligned. 
+ *    roi.width is equal to 8, but pDst is not 8 byte aligned. 
+ *    srcStep or dstStep is not a multiple of 8. 
+ *
+ */
+
+OMXResult omxVCM4P10_InterpolateChroma (
+     const OMX_U8* pSrc,
+     OMX_S32 srcStep,
+     OMX_U8* pDst,
+     OMX_S32 dstStep,
+     OMX_S32 dx,
+     OMX_S32 dy,
+     OMXSize roi
+ )
+{
+    /* check for argument error */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(srcStep < 8, OMX_Sts_BadArgErr)
+    armRetArgErrIf(dstStep < 8, OMX_Sts_BadArgErr)
+    armRetArgErrIf(dx < 0, OMX_Sts_BadArgErr)
+    armRetArgErrIf(dx > 7, OMX_Sts_BadArgErr)
+    armRetArgErrIf(dy < 0, OMX_Sts_BadArgErr)
+    armRetArgErrIf(dy > 7, OMX_Sts_BadArgErr)
+    armRetArgErrIf((roi.width != 2) && (roi.width != 4) && (roi.width != 8), OMX_Sts_BadArgErr)
+    armRetArgErrIf((roi.height != 2) && (roi.height != 4) && (roi.height != 8), OMX_Sts_BadArgErr)
+    armRetArgErrIf((roi.width == 2) && armNot2ByteAligned(pDst), OMX_Sts_BadArgErr)
+    armRetArgErrIf((roi.width == 4) && armNot4ByteAligned(pDst), OMX_Sts_BadArgErr)
+    armRetArgErrIf((roi.width == 8) && armNot8ByteAligned(pDst), OMX_Sts_BadArgErr)
+    armRetArgErrIf(srcStep & 7, OMX_Sts_BadArgErr)
+    armRetArgErrIf(dstStep & 7, OMX_Sts_BadArgErr)
+
+    return armVCM4P10_Interpolate_Chroma 
+        ((OMX_U8*)pSrc, srcStep, pDst, dstStep, roi.width, roi.height, dx, dy);
+}
+
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfHor_Luma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfHor_Luma.c
new file mode 100644
index 0000000..ef0befa
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfHor_Luma.c
@@ -0,0 +1,124 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_InterpolateHalfHor_Luma.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate Half horizontal luma interpolation
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCM4P10_InterpolateHalfHor_Luma   (6.3.5.5.1)
+ *
+ * Description:
+ * This function performs interpolation for two horizontal 1/2-pel positions 
+ * (-1/2,0) and (1/2, 0) - around a full-pel position. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the top-left corner of the block used to interpolate in 
+ *            the reconstruction frame plane. 
+ *   iSrcStep - Step of the source buffer. 
+ *   iDstStep - Step of the destination(interpolation) buffer; must be a 
+ *            multiple of iWidth. 
+ *   iWidth - Width of the current block; must be equal to either 4, 8, or 16 
+ *   iHeight - Height of the current block; must be equal to 4, 8, or 16 
+ *
+ * Output Arguments:
+ *   
+ *   pDstLeft -Pointer to the interpolation buffer of the left -pel position 
+ *            (-1/2, 0) 
+ *                 If iWidth==4,  4-byte alignment required. 
+ *                 If iWidth==8,  8-byte alignment required. 
+ *                 If iWidth==16, 16-byte alignment required. 
+ *   pDstRight -Pointer to the interpolation buffer of the right -pel 
+ *            position (1/2, 0) 
+ *                 If iWidth==4,  4-byte alignment required. 
+ *                 If iWidth==8,  8-byte alignment required. 
+ *                 If iWidth==16, 16-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *             pSrc, pDstLeft, or pDstRight 
+ *    -    iWidth or iHeight have values other than 4, 8, or 16 
+ *    -    iWidth==4 but pDstLeft and/or pDstRight is/are not aligned on a 4-byte boundary 
+ *    -    iWidth==8 but pDstLeft and/or pDstRight is/are not aligned on a 8-byte boundary 
+ *    -    iWidth==16 but pDstLeft and/or pDstRight is/are not aligned on a 16-byte boundary 
+ *    -    any alignment restrictions are violated 
+ *
+ */
+
+OMXResult omxVCM4P10_InterpolateHalfHor_Luma(
+        const OMX_U8*     pSrc, 
+        OMX_U32     iSrcStep, 
+        OMX_U8*     pDstLeft, 
+        OMX_U8*     pDstRight, 
+        OMX_U32     iDstStep, 
+        OMX_U32     iWidth, 
+        OMX_U32     iHeight
+)
+{
+    OMXResult   RetValue;    
+
+    /* check for argument error */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstLeft == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstRight == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf((iWidth == 4) && 
+                   armNot4ByteAligned(pDstLeft) &&
+                   armNot4ByteAligned(pDstRight), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iWidth == 8) && 
+                   armNot8ByteAligned(pDstLeft) &&
+                   armNot8ByteAligned(pDstRight), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iWidth == 16) && 
+                   armNot16ByteAligned(pDstLeft) &&
+                   armNot16ByteAligned(pDstRight), OMX_Sts_BadArgErr)
+
+    armRetArgErrIf((iHeight != 16) && (iHeight != 8)&& (iHeight != 4), OMX_Sts_BadArgErr)
+	armRetArgErrIf((iWidth != 16) && (iWidth != 8)&& (iWidth != 4), OMX_Sts_BadArgErr)
+
+    RetValue = armVCM4P10_InterpolateHalfHor_Luma (
+        pSrc - 1,     
+        iSrcStep, 
+        pDstLeft,     
+        iDstStep, 
+        iWidth,   
+        iHeight);
+
+    if (RetValue != OMX_Sts_NoErr)
+    {
+        return RetValue;
+    }
+
+    RetValue = armVCM4P10_InterpolateHalfHor_Luma (
+        pSrc,     
+        iSrcStep, 
+        pDstRight,     
+        iDstStep, 
+        iWidth,   
+        iHeight);
+
+    return RetValue;
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfVer_Luma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfVer_Luma.c
new file mode 100644
index 0000000..3560ff8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfVer_Luma.c
@@ -0,0 +1,123 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_InterpolateHalfVer_Luma.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate SAD for 4x4 blocks
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+
+/**
+ * Function:  omxVCM4P10_InterpolateHalfVer_Luma   (6.3.5.5.2)
+ *
+ * Description:
+ * This function performs interpolation for two vertical 1/2-pel positions - 
+ * (0, -1/2) and (0, 1/2) - around a full-pel position. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to top-left corner of block used to interpolate in the 
+ *            reconstructed frame plane 
+ *   iSrcStep - Step of the source buffer. 
+ *   iDstStep - Step of the destination (interpolation) buffer; must be a 
+ *            multiple of iWidth. 
+ *   iWidth - Width of the current block; must be equal to either 4, 8, or 16 
+ *   iHeight - Height of the current block; must be equal to either 4, 8, or 16 
+ *
+ * Output Arguments:
+ *   
+ *   pDstUp -Pointer to the interpolation buffer of the -pel position above 
+ *            the current full-pel position (0, -1/2) 
+ *                If iWidth==4, 4-byte alignment required. 
+ *                If iWidth==8, 8-byte alignment required. 
+ *                If iWidth==16, 16-byte alignment required. 
+ *   pDstDown -Pointer to the interpolation buffer of the -pel position below 
+ *            the current full-pel position (0, 1/2) 
+ *                If iWidth==4, 4-byte alignment required. 
+ *                If iWidth==8, 8-byte alignment required. 
+ *                If iWidth==16, 16-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *            pSrc, pDstUp, or pDstDown 
+ *    -    iWidth or iHeight have values other than 4, 8, or 16 
+ *    -    iWidth==4 but pDstUp and/or pDstDown is/are not aligned on a 4-byte boundary 
+ *    -    iWidth==8 but pDstUp and/or pDstDown is/are not aligned on a 8-byte boundary 
+ *    -    iWidth==16 but pDstUp and/or pDstDown is/are not aligned on a 16-byte boundary 
+ *
+ */
+ OMXResult omxVCM4P10_InterpolateHalfVer_Luma(  
+     const OMX_U8*    pSrc, 
+     OMX_U32    iSrcStep, 
+     OMX_U8*    pDstUp, 
+     OMX_U8*    pDstDown, 
+     OMX_U32    iDstStep, 
+     OMX_U32    iWidth, 
+     OMX_U32    iHeight
+)
+{
+    OMXResult   RetValue;
+
+    /* check for argument error */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstUp == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstDown == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf((iWidth == 4) && 
+                   armNot4ByteAligned(pDstUp) &&
+                   armNot4ByteAligned(pDstDown), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iWidth == 8) && 
+                   armNot8ByteAligned(pDstUp) &&
+                   armNot8ByteAligned(pDstDown), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iWidth == 16) && 
+                   armNot16ByteAligned(pDstUp) &&
+                   armNot16ByteAligned(pDstDown), OMX_Sts_BadArgErr)
+
+    armRetArgErrIf((iHeight != 16) && (iHeight != 8)&& (iHeight != 4), OMX_Sts_BadArgErr)
+	armRetArgErrIf((iWidth != 16) && (iWidth != 8)&& (iWidth != 4), OMX_Sts_BadArgErr)
+
+    RetValue = armVCM4P10_InterpolateHalfVer_Luma(  
+        pSrc - iSrcStep, 
+        iSrcStep, 
+        pDstUp,
+        iDstStep, 
+        iWidth, 
+        iHeight);
+    
+    if (RetValue != OMX_Sts_NoErr)
+    {
+        return RetValue;
+    }
+
+    RetValue = armVCM4P10_InterpolateHalfVer_Luma(  
+        pSrc, 
+        iSrcStep, 
+        pDstDown,
+        iDstStep, 
+        iWidth, 
+        iHeight);
+    
+    return RetValue;
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateLuma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateLuma.c
new file mode 100644
index 0000000..d233735
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateLuma.c
@@ -0,0 +1,99 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_InterpolateLuma.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate Performs quarter-pixel interpolation 
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P10_InterpolateLuma   (6.3.3.2.1)
+ *
+ * Description:
+ * Performs quarter-pixel interpolation for inter luma MB. It is assumed that 
+ * the frame is already padded when calling this function. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc -Pointer to the source reference frame buffer 
+ *   srcStep -reference frame step, in bytes; must be a multiple of roi.width 
+ *   dstStep -destination frame step, in bytes; must be a multiple of 
+ *            roi.width 
+ *   dx -Fractional part of horizontal motion vector component in 1/4 pixel 
+ *            unit; valid in the range [0,3] 
+ *   dy -Fractional part of vertical motion vector y component in 1/4 pixel 
+ *            unit; valid in the range [0,3] 
+ *   roi -Dimension of the interpolation region; the parameters roi.width and 
+ *            roi.height must be equal to either 4, 8, or 16. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst -Pointer to the destination frame buffer if roi.width==4,  4-byte 
+ *            alignment required if roi.width==8,  8-byte alignment required 
+ *            if roi.width==16, 16-byte alignment required 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pSrc or pDst is NULL. 
+ *    srcStep or dstStep < roi.width. 
+ *    dx or dy is out of range [0,3]. 
+ *    roi.width or roi.height is out of range {4, 8, 16}. 
+ *    roi.width is equal to 4, but pDst is not 4 byte aligned. 
+ *    roi.width is equal to 8 or 16, but pDst is not 8 byte aligned. 
+ *    srcStep or dstStep is not a multiple of 8. 
+ *
+ */
+
+OMXResult omxVCM4P10_InterpolateLuma (
+     const OMX_U8* pSrc,
+     OMX_S32 srcStep,
+     OMX_U8* pDst,
+     OMX_S32 dstStep,
+     OMX_S32 dx,
+     OMX_S32 dy,
+     OMXSize roi        
+ )
+{
+    /* check for argument error */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(srcStep < roi.width, OMX_Sts_BadArgErr)
+    armRetArgErrIf(dstStep < roi.width, OMX_Sts_BadArgErr)
+    armRetArgErrIf(dx < 0, OMX_Sts_BadArgErr)
+    armRetArgErrIf(dx > 3, OMX_Sts_BadArgErr)
+    armRetArgErrIf(dy < 0, OMX_Sts_BadArgErr)
+    armRetArgErrIf(dy > 3, OMX_Sts_BadArgErr)
+    armRetArgErrIf((roi.width != 4) && (roi.width != 8) && (roi.width != 16), OMX_Sts_BadArgErr)
+    armRetArgErrIf((roi.height != 4) && (roi.height != 8) && (roi.height != 16), OMX_Sts_BadArgErr)
+    armRetArgErrIf((roi.width == 4) && armNot4ByteAligned(pDst), OMX_Sts_BadArgErr)
+    armRetArgErrIf((roi.width == 8) && armNot8ByteAligned(pDst), OMX_Sts_BadArgErr)
+    armRetArgErrIf((roi.width == 16) && armNot16ByteAligned(pDst), OMX_Sts_BadArgErr)
+    armRetArgErrIf(srcStep & 7, OMX_Sts_BadArgErr)
+    armRetArgErrIf(dstStep & 7, OMX_Sts_BadArgErr) 
+
+    return armVCM4P10_Interpolate_Luma 
+        (pSrc, srcStep, pDst, dstStep, roi.width, roi.height, dx, dy);
+
+}
+
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_ChromaDC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_ChromaDC.c
new file mode 100644
index 0000000..92ba031
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_ChromaDC.c
@@ -0,0 +1,102 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_InvTransformDequant_ChromaDC.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate 4x4 hadamard transform of chroma DC  
+ * coefficients and quantization
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P10_InvTransformDequant_ChromaDC   (6.3.5.6.4)
+ *
+ * Description:
+ * This function performs inverse 2x2 Hadamard transform and then dequantizes 
+ * the coefficients. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the 2x2 array of the 2x2 Hadamard-transformed and 
+ *            quantized coefficients.  8 byte alignment required. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to inverse-transformed and dequantized coefficients.  
+ *            8-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: pSrc 
+ *    -    pSrc or pDst is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_InvTransformDequant_ChromaDC(
+	const OMX_S16* 	pSrc,
+	OMX_S16*	pDst,
+	OMX_U32		iQP
+)
+{
+    OMX_INT     i, j;
+    OMX_S32     m[2][2];
+    OMX_S32     QPer, V00, Value;
+
+    /* check for argument error */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot8ByteAligned(pSrc), OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot8ByteAligned(pDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(iQP > 51, OMX_Sts_BadArgErr)
+
+    /* Inv Hadamard Transform for 2x2 block */
+    m[0][0] = pSrc[0] + pSrc[1] +  pSrc[2] + pSrc[3];
+    m[0][1] = pSrc[0] - pSrc[1] +  pSrc[2] - pSrc[3];
+    m[1][0] = pSrc[0] + pSrc[1] -  pSrc[2] - pSrc[3];
+    m[1][1] = pSrc[0] - pSrc[1] -  pSrc[2] + pSrc[3];
+
+    /* Quantization */
+    /* Scaling */
+    QPer = iQP / 6;
+    V00 = armVCM4P10_VMatrix [iQP % 6][0];
+
+    for (j = 0; j < 2; j++)
+    {
+        for (i = 0; i < 2; i++)
+        {
+            if (QPer < 1)
+            {
+                Value = (m[j][i] * V00) >> 1;
+            }
+            else
+            {
+                Value = (m[j][i] * V00) << (QPer - 1);
+            }
+
+            pDst[j * 2 + i] = (OMX_S16) Value;
+        }
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_LumaDC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_LumaDC.c
new file mode 100644
index 0000000..a3b1200
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_LumaDC.c
@@ -0,0 +1,128 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_InvTransformDequant_LumaDC.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate 4x4 hadamard transform of luma DC coefficients 
+ * and quantization
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCM4P10_InvTransformDequant_LumaDC   (6.3.5.6.3)
+ *
+ * Description:
+ * This function performs inverse 4x4 Hadamard transform and then dequantizes 
+ * the coefficients. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the 4x4 array of the 4x4 Hadamard-transformed and 
+ *            quantized coefficients.  16 byte alignment required. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to inverse-transformed and dequantized coefficients.  
+ *            16-byte alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: pSrc 
+ *    -    pSrc or pDst is not aligned on a 16-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_InvTransformDequant_LumaDC(	
+	const OMX_S16* 	pSrc,
+	OMX_S16*	pDst,
+	OMX_U32		iQP
+)
+{
+    OMX_INT     i, j;
+    OMX_S32     m1[4][4], m2[4][4], Value;
+    OMX_S32     QPer, V;
+
+    /* check for argument error */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(iQP > 51, OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot16ByteAligned(pSrc), OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot16ByteAligned(pDst), OMX_Sts_BadArgErr)
+
+    /* Inv Hadamard Transform for DC Luma 4x4 block */
+    /* Horizontal */
+    for (i = 0; i < 4; i++)
+    {
+        j = i * 4;
+        
+        m1[i][0] = pSrc[j + 0] + pSrc[j + 2]; /* a+c */
+        m1[i][1] = pSrc[j + 1] + pSrc[j + 3]; /* b+d */
+        m1[i][2] = pSrc[j + 0] - pSrc[j + 2]; /* a-c */
+        m1[i][3] = pSrc[j + 1] - pSrc[j + 3]; /* b-d */
+
+        m2[i][0] = m1[i][0] + m1[i][1]; /* a+b+c+d */
+        m2[i][1] = m1[i][2] + m1[i][3]; /* a+b-c-d */
+        m2[i][2] = m1[i][2] - m1[i][3]; /* a-b-c+d */
+        m2[i][3] = m1[i][0] - m1[i][1]; /* a-b+c-d */
+
+    }
+
+    /* Vertical */
+    for (i = 0; i < 4; i++)
+    {
+        m1[0][i] = m2[0][i] + m2[2][i];
+        m1[1][i] = m2[1][i] + m2[3][i];
+        m1[2][i] = m2[0][i] - m2[2][i];
+        m1[3][i] = m2[1][i] - m2[3][i];
+
+        m2[0][i] = m1[0][i] + m1[1][i];
+        m2[1][i] = m1[2][i] + m1[3][i];
+        m2[2][i] = m1[2][i] - m1[3][i];
+        m2[3][i] = m1[0][i] - m1[1][i];
+    }
+
+    
+    /* Scaling */
+    QPer = iQP / 6;
+    V = armVCM4P10_VMatrix [iQP % 6][0];
+
+    for (j = 0; j < 4; j++)
+    {
+        for (i = 0; i < 4; i++)
+        {
+            if (QPer < 2)
+            {
+                Value = (m2[j][i] * V + (1 << (1 - QPer))) >> (2 - QPer);
+            }
+            else
+            {
+                Value = m2[j][i] * V * (1 << (QPer - 2));
+            }
+                        
+            pDst[j * 4 + i] = (OMX_S16) Value;
+            
+        }
+    }
+    return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformResidualAndAdd.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformResidualAndAdd.c
new file mode 100644
index 0000000..3303997
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformResidualAndAdd.c
@@ -0,0 +1,124 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_InvTransformResidualAndAdd.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will inverse integer 4x4 transform
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCM4P10_InvTransformResidualAndAdd   (6.3.5.7.1)
+ *
+ * Description:
+ * This function performs inverse an 4x4 integer transformation to produce 
+ * the difference signal and then adds the difference to the prediction to get 
+ * the reconstructed signal. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcPred - Pointer to prediction signal.  4-byte alignment required. 
+ *   pDequantCoeff - Pointer to the transformed coefficients.  8-byte 
+ *            alignment required. 
+ *   iSrcPredStep - Step of the prediction buffer; must be a multiple of 4. 
+ *   iDstReconStep - Step of the destination reconstruction buffer; must be a 
+ *            multiple of 4. 
+ *   bAC - Indicate whether there is AC coefficients in the coefficients 
+ *            matrix. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstRecon -Pointer to the destination reconstruction buffer.  4-byte 
+ *            alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *            pSrcPred, pDequantCoeff, pDstRecon 
+ *    -    pSrcPred is not aligned on a 4-byte boundary 
+ *    -    iSrcPredStep or iDstReconStep is not a multiple of 4. 
+ *    -    pDequantCoeff is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_InvTransformResidualAndAdd(
+	const OMX_U8* 	pSrcPred, 
+	const OMX_S16* 	pDequantCoeff, 
+	OMX_U8* 	pDstRecon,
+	OMX_U32 	iSrcPredStep, 
+	OMX_U32		iDstReconStep, 
+	OMX_U8		bAC
+)
+{
+    OMX_INT     i, j;
+    OMX_S16     In[16], Out[16];
+    OMX_S32     Value;
+
+    /* check for argument error */
+    armRetArgErrIf(pSrcPred == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot4ByteAligned(pSrcPred), OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDequantCoeff == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot8ByteAligned(pDequantCoeff), OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstRecon == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot4ByteAligned(pDstRecon), OMX_Sts_BadArgErr)
+    armRetArgErrIf(bAC > 1, OMX_Sts_BadArgErr)
+    armRetArgErrIf(iSrcPredStep == 0 || iSrcPredStep & 3, OMX_Sts_BadArgErr)
+    armRetArgErrIf(iDstReconStep == 0 || iDstReconStep & 3, OMX_Sts_BadArgErr)
+
+    if (bAC)
+    {
+        for (i = 0; i < 16; i++)
+        {
+            In[i] = pDequantCoeff [i];
+        }
+    }
+    else
+    {
+        /* Copy DC */
+        In[0] = pDequantCoeff [0];
+    
+        for (i = 1; i < 16; i++)
+        {
+            In[i] = 0;
+        }
+    }
+
+    /* Residual Transform */
+    armVCM4P10_TransformResidual4x4 (Out, In);    
+    
+    for (j = 0; j < 4; j++)
+    {
+        for (i = 0; i < 4; i++)
+        {
+            /* Add predition */
+            Value = (OMX_S32) Out [j * 4 + i] + pSrcPred [j * iSrcPredStep + i];
+            
+            /* Saturate Value to OMX_U8 */
+            Value = armClip (0, 255, Value);
+
+            pDstRecon[j * iDstReconStep + i] = (OMX_U8) Value;
+        }
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEGetBufSize.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEGetBufSize.c
new file mode 100644
index 0000000..8c3a5c3
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEGetBufSize.c
@@ -0,0 +1,70 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_MEGetBufSize.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Initialization modules for the vendor specific Motion Estimation structure.
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P10_MEGetBufSize   (6.3.5.1.1)
+ *
+ * Description:
+ * Computes the size, in bytes, of the vendor-specific specification 
+ * structure for the omxVCM4P10 motion estimation functions BlockMatch_Integer 
+ * and MotionEstimationMB. 
+ *
+ * Input Arguments:
+ *   
+ *   MEmode - motion estimation mode; available modes are defined by the 
+ *            enumerated type OMXVCM4P10MEMode 
+ *   pMEParams -motion estimation parameters 
+ *
+ * Output Arguments:
+ *   
+ *   pSize - pointer to the number of bytes required for the motion 
+ *            estimation specification structure 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    pMEParams or pSize is NULL. 
+ *    -    an invalid MEMode is specified. 
+ *
+ */
+
+OMXResult omxVCM4P10_MEGetBufSize(
+    OMXVCM4P10MEMode MEMode,
+    const OMXVCM4P10MEParams *pMEParams,
+    OMX_U32 *pSize
+    )
+{
+    armRetArgErrIf(!pMEParams, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!pSize, OMX_Sts_BadArgErr);
+    armRetArgErrIf((MEMode != OMX_VC_M4P10_FAST_SEARCH) && 
+                   (MEMode != OMX_VC_M4P10_FULL_SEARCH), OMX_Sts_BadArgErr);
+    armRetArgErrIf((pMEParams->searchRange16x16 <= 0) || 
+                   (pMEParams->searchRange8x8 <= 0) || 
+                   (pMEParams->searchRange4x4 <= 0), OMX_Sts_BadArgErr);
+                   
+    *pSize = (OMX_INT) sizeof(ARMVCM4P10_MESpec);
+    
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEInit.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEInit.c
new file mode 100644
index 0000000..58ecc88
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEInit.c
@@ -0,0 +1,92 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_MEInit.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Initialization modules for the vendor specific Motion Estimation structure.
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P10_MEInit   (6.3.5.1.2)
+ *
+ * Description:
+ * Initializes the vendor-specific specification structure required for the 
+ * omxVCM4P10 motion estimation functions:  BlockMatch_Integer and 
+ * MotionEstimationMB. Memory for the specification structure *pMESpec must be 
+ * allocated prior to calling the function, and should be aligned on a 4-byte 
+ * boundary.  The number of bytes required for the specification structure can 
+ * be determined using the function omxVCM4P10_MEGetBufSize. Following 
+ * initialization by this function, the vendor-specific structure *pMESpec 
+ * should contain an implementation-specific representation of all motion 
+ * estimation parameters received via the structure pMEParams, for example  
+ * searchRange16x16, searchRange8x8, etc. 
+ *
+ * Input Arguments:
+ *   
+ *   MEmode - motion estimation mode; available modes are defined by the 
+ *            enumerated type OMXVCM4P10MEMode 
+ *   pMEParams - motion estimation parameters 
+ *   pMESpec - pointer to the uninitialized ME specification structure 
+ *
+ * Output Arguments:
+ *   
+ *   pMESpec - pointer to the initialized ME specification structure 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    pMEParams or pSize is NULL. 
+ *    -    an invalid value was specified for the parameter MEmode 
+ *    -    a negative or zero value was specified for one of the search ranges 
+ *         (e.g.,  pMBParams >searchRange8x8, pMEParams->searchRange16x16, etc.) 
+ *    -    either in isolation or in combination, one or more of the enables or 
+ *         search ranges in the structure *pMEParams were configured such 
+ *         that the requested behavior fails to comply with [ISO14496-10]. 
+ *
+ */
+
+OMXResult omxVCM4P10_MEInit(
+        OMXVCM4P10MEMode MEMode,
+        const OMXVCM4P10MEParams *pMEParams,
+        void *pMESpec
+       )
+{
+    ARMVCM4P10_MESpec *armMESpec = (ARMVCM4P10_MESpec *) pMESpec;
+    
+    armRetArgErrIf(!pMEParams, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!pMESpec, OMX_Sts_BadArgErr);
+    armRetArgErrIf((MEMode != OMX_VC_M4P10_FAST_SEARCH) && 
+                   (MEMode != OMX_VC_M4P10_FULL_SEARCH), OMX_Sts_BadArgErr);
+    armRetArgErrIf((pMEParams->searchRange16x16 <= 0) || 
+                   (pMEParams->searchRange8x8 <= 0) || 
+                   (pMEParams->searchRange4x4 <= 0), OMX_Sts_BadArgErr);
+    
+    armMESpec->MEParams.blockSplitEnable8x8 = pMEParams->blockSplitEnable8x8;
+    armMESpec->MEParams.blockSplitEnable4x4 = pMEParams->blockSplitEnable4x4;
+    armMESpec->MEParams.halfSearchEnable    = pMEParams->halfSearchEnable;
+    armMESpec->MEParams.quarterSearchEnable = pMEParams->quarterSearchEnable;
+    armMESpec->MEParams.intraEnable4x4      = pMEParams->intraEnable4x4;     
+    armMESpec->MEParams.searchRange16x16    = pMEParams->searchRange16x16;   
+    armMESpec->MEParams.searchRange8x8      = pMEParams->searchRange8x8;
+    armMESpec->MEParams.searchRange4x4      = pMEParams->searchRange4x4;
+    armMESpec->MEMode                       = MEMode;
+    
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MotionEstimationMB.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MotionEstimationMB.c
new file mode 100644
index 0000000..33dbf3f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MotionEstimationMB.c
@@ -0,0 +1,1892 @@
+/**                                                                            x
+ * 
+ * File Name:  omxVCM4P10_MotionEstimationMB.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function perform MB level motion estimation
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+#define  ARM_VCM4P10_MAX_FRAMES     (15)
+#define  ARM_VCM4P10_MAX_4x4_SAD		(0xffff)
+#define  ARM_VCM4P10_MAX_MODE_VALUE     (0xffffffff)
+#define  ARM_VCM4P10_MAX_MODES          (16)
+#define  ARM_VCM4P10_MB_BLOCK_SIZE      (16)
+#define  ARM_VCM4P10_MEDIAN(a,b,c)      (a>b?a>c?b>c?b:c:a:b>c?a>c?a:c:b)
+#define  ARM_VCM4P10_SHIFT_QP           (12)
+
+#define  ARM_VCM4P10_MVPRED_MEDIAN      (0)
+#define  ARM_VCM4P10_MVPRED_L           (1)
+#define  ARM_VCM4P10_MVPRED_U           (2)
+#define  ARM_VCM4P10_MVPRED_UR          (3)
+
+#define ARM_VCM4P10_MB_BLOCK_SIZE       (16)
+#define ARM_VCM4P10_BLOCK_SIZE          (4)
+#define ARM_VCM4P10_MAX_COST            (1 << 30)
+#define  ARM_VCM4P10_INVALID_BLOCK      (-2)
+
+
+/**
+ * Function: armVCM4P10_CalculateBlockSAD
+ *
+ * Description:
+ *    Calculate SAD value for the selected MB encoding mode and update 
+ * pDstBlockSAD parameter. These SAD values are calculated 4x4 blocks at
+ * a time and in the scan order.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcMBInfo    - 
+ * [in] pSrcCurrBuf   - 
+ * [in] SrcCurrStep   - 
+ * [in] pSrcRefBufList- 
+ * [in] SrcRefStep    - 
+ * [in] pSrcRecBuf    - 
+ * [in] SrcRecStep    - 
+ * [in] pRefRect      - 
+ * [in] pCurrPointPos - 
+ * [in] Lambda        - 
+ * [in] pMESpec       - 
+ * [in] pMBInter      - 
+ * [in] pMBIntra      - 
+ * [out] pDstBlockSAD - pointer to 16 element array for SAD corresponding to 4x4 blocks
+ * Return Value:
+ * None
+ *
+ */
+
+static OMXResult armVCM4P10_CalculateBlockSAD(
+	OMXVCM4P10MBInfo *pSrcMBInfo, 
+    const OMX_U8 *pSrcCurrBuf,                                  
+	OMX_S32 SrcCurrStep, 
+	const OMX_U8 *pSrcRefBufList[ARM_VCM4P10_MAX_FRAMES],
+	OMX_S32 SrcRefStep,
+	const OMX_U8 *pSrcRecBuf, 
+	OMX_S32 SrcRecStep,
+	const OMXRect *pRefRect,
+	const OMXVCM4P2Coordinate *pCurrPointPos,
+	const OMXVCM4P10MBInfoPtr *pMBInter, 
+	const OMXVCM4P10MBInfoPtr *pMBIntra,
+	OMX_U16 *pDstBlockSAD)
+{
+	OMX_INT		InvalidSAD = 0;
+	OMX_INT		i;
+
+	OMX_U8		Buffer [16*16 + 15];
+	OMX_U8		*pTempDstBuf;
+	OMX_S32		TempDstStep;
+	OMX_U8		*pTempRefBuf;
+	OMX_S32		TempRefStep; 
+
+	/* Temporary buffer to store the predicted mb coefficients */
+	pTempDstBuf = armAlignTo16Bytes(Buffer);
+	TempDstStep = 16;
+
+	/* Update pDstBlockSAD if MB is a valid type */
+	if (pSrcMBInfo)
+	{
+	    OMX_U32     Width=0, Height=0, MaxXPart, MaxYPart,MaxSubXPart,MaxSubYPart;
+	    
+		/* Depending on type of MB, do prediction and fill temp buffer */
+		switch (pSrcMBInfo->mbType)
+		{
+		case OMX_VC_P_16x16:
+				Width = 16;
+				Height = 16;
+				break;
+		case OMX_VC_P_16x8:
+				Width = 16;
+				Height = 8;
+				break;
+		case OMX_VC_P_8x16:
+				Width = 8;
+				Height = 16;
+				break;
+		case OMX_VC_P_8x8:
+				Width = 8;
+				Height = 8;
+				break;
+		case OMX_VC_INTRA_4x4:
+			{
+				/* Create predicted MB Intra4x4 mode */
+				OMX_S32     PredIntra4x4Mode [5][9];
+				OMX_S32		x, y, Block8x8, Block4x4, BlockX, BlockY;
+				OMX_U8      pSrcYBuff [(16*3)*(16*2)];
+				OMX_U8		*pSrcY;
+				OMX_S32     StepSrcY;
+				OMX_S32		availability;
+
+				for (y = 0; y < 5; y++)
+				{
+					for (x = 0; x < 9; x++)
+					{
+						/* 
+						 * Initialize with value of ARM_VCM4P10_INVALID_BLOCK, to mean this 
+						 * 4x4 block is not available 
+						 */
+						PredIntra4x4Mode [y][x] = ARM_VCM4P10_INVALID_BLOCK;
+					}
+				}
+
+				/* Replace ARM_VCM4P10_INVALID_BLOCK value with available MBs values*/
+				for (x = 0; x < 4; x++)
+				{
+					/* Store values of b0, b1, b2, b3 */
+					if (pMBIntra[1] != NULL)
+					{
+						PredIntra4x4Mode [0][x + 1] = 
+							pMBIntra[1]->pIntra4x4PredMode[3*4 + x];            
+					}
+			        
+					/* Store values of d0, d1, d2, d3 */
+					if (pMBIntra[3] != NULL)
+					{
+						PredIntra4x4Mode [0][x + 5] = 
+							pMBIntra[3]->pIntra4x4PredMode[3*4 + x];
+					}
+				}
+		    
+				/* Store values of c3 */
+				if (pMBIntra[2] != NULL)
+				{
+					PredIntra4x4Mode [0][0] = pMBIntra[2]->pIntra4x4PredMode[15];
+				}
+		    
+				for (y = 0; y < 4; y++)
+				{
+					/* Store values of a0, a1, a2, a3 */
+					if (pMBIntra[0] != NULL)
+					{
+						PredIntra4x4Mode [y + 1][0] = 
+							pMBIntra[0]->pIntra4x4PredMode[y*4 + 3];
+					}
+				}
+		        
+				/*
+				 * Update neighbouring Pred mode array which will be used for
+				 * prediction of Intra4x4 modes.
+				 */
+			    
+				pSrcY = pSrcYBuff;
+				StepSrcY = 16 * 3;
+				for (y = 0; y < (16 * 2); y++)
+				{
+					for (x = 0; x < (16 * 3); x++)
+					{
+						pSrcY [StepSrcY * y + x] = 
+							pSrcRecBuf [SrcRecStep * (y - 16) + x - 16];
+					}
+				}
+
+		    
+				/* for each 8x8 block */
+				for (Block8x8 = 0; Block8x8 < 4; Block8x8++)
+				{
+					/* for each 4x4 block inside 8x8 block */
+					for (Block4x4 = 0; Block4x4 < 4; Block4x4++)
+					{
+						/* Get block cordinates from 8x8 block index and 4x4 block index */
+						BlockX = ((Block8x8 & 1) << 1) + (Block4x4 & 1);
+						BlockY = ((Block8x8 >> 1) << 1) + (Block4x4 >> 1);
+					    
+						/* Add offset to point to start of current MB in the array pIntra4x4PredMode */
+						x = BlockX + 1;
+						y = BlockY + 1;
+
+						availability = 0;
+
+						/* Check for availability of LEFT Block */
+						if (PredIntra4x4Mode [y][x - 1] != ARM_VCM4P10_INVALID_BLOCK)
+						{
+							availability |= OMX_VC_LEFT;        
+						}
+
+						/* Check for availability of UPPER Block */
+						if (PredIntra4x4Mode [y - 1][x] != ARM_VCM4P10_INVALID_BLOCK)
+						{
+							availability |= OMX_VC_UPPER;        
+						}
+
+						/* Check for availability of UPPER LEFT Block */
+						if (PredIntra4x4Mode [y - 1][x - 1] != ARM_VCM4P10_INVALID_BLOCK)
+						{
+							availability |= OMX_VC_UPPER_LEFT;        
+						}
+						
+						PredIntra4x4Mode [y][x] = pSrcMBInfo->pIntra4x4PredMode[BlockY*4+BlockX];
+						x = BlockX * 4;
+						y = BlockY * 4;
+
+						pSrcY = pSrcYBuff + 16 * StepSrcY + 16 + y * StepSrcY + x;
+
+						omxVCM4P10_PredictIntra_4x4(
+							 pSrcY - 1,
+							 pSrcY - StepSrcY,
+							 pSrcY - StepSrcY - 1,
+							 pTempDstBuf + x + y * TempDstStep,
+							 StepSrcY,
+							 TempDstStep,
+							 pSrcMBInfo->pIntra4x4PredMode[BlockY*4+BlockX],
+							 availability);
+
+						for (BlockY=0;BlockY<4;BlockY++)
+						{
+							for(BlockX=0;BlockX<4;BlockX++)
+							{
+								pSrcY [BlockY * StepSrcY + BlockX] = 
+									(OMX_U8)(*(pTempDstBuf + x + y * TempDstStep + BlockY * TempDstStep + BlockX));
+							}
+						}
+
+					}
+				}
+				break;
+			}
+		case OMX_VC_INTRA_16x16:
+			{
+				OMX_U32     MBPosX = pCurrPointPos->x >> 4;        
+				OMX_U32     MBPosY = pCurrPointPos->y >> 4;        
+				OMX_U32		availability = 0;
+
+				/* Check for availability of LEFT MB */
+				if ((MBPosX != 0) && (pMBIntra [0] != 0 || pMBInter [0] != 0))
+				{
+					availability |= OMX_VC_LEFT;        
+				}
+
+				/* Check for availability of UP MB */
+				if ((MBPosY != 0) && (pMBIntra [1] != 0 || pMBInter [1] != 0))
+				{
+					availability |= OMX_VC_UPPER;        
+				}
+
+				/* Check for availability of UP-LEFT MB */
+				if ((MBPosX > 0) && (MBPosY > 0) && 
+					(pMBIntra [2] != 0 || pMBInter [2] != 0))
+				{
+					availability |= OMX_VC_UPPER_LEFT;        
+				}
+
+				omxVCM4P10_PredictIntra_16x16(
+						pSrcRecBuf - 1, 
+						pSrcRecBuf - SrcRecStep, 
+						pSrcRecBuf - SrcRecStep - 1, 
+						pTempDstBuf, 
+						SrcRecStep, 
+						TempDstStep, 
+						pSrcMBInfo->Intra16x16PredMode, 
+						availability);
+				
+				break;
+			}
+
+		case OMX_VC_INTER_SKIP:
+		case OMX_VC_PREF0_8x8:
+		case OMX_VC_INTRA_PCM:
+		default:
+			/* These cases will update pDstBlockSAD with MAX value */
+			InvalidSAD = 1;
+			break;
+		}
+
+		/* INTER MB */
+		if ((pSrcMBInfo->mbType == OMX_VC_P_16x16) ||
+			(pSrcMBInfo->mbType == OMX_VC_P_8x16) ||
+			(pSrcMBInfo->mbType == OMX_VC_P_16x8) ||
+			(pSrcMBInfo->mbType == OMX_VC_P_8x8))
+		{
+        	const OMX_U8		*pTempSrcBuf;
+        	OMX_S32		TempSrcStep;
+        	OMX_S32		mvx,mvy;
+        	OMX_U32		PartX, PartY, SubPartX, SubPartY;
+        	
+			TempSrcStep = SrcRefStep;
+
+			MaxXPart = 16/Width;
+			MaxYPart = 16/Height;
+
+
+			for (PartY = 0; PartY < MaxYPart; PartY++)
+			{
+				for (PartX = 0; PartX < MaxXPart; PartX++)
+				{
+
+					pTempSrcBuf = pSrcRefBufList[pSrcMBInfo->pRefL0Idx[PartY * 2 + PartX]];
+
+					if (MaxXPart == 2 && MaxYPart == 2)
+					{
+        				switch (pSrcMBInfo->subMBType[PartY*2+PartX])
+        				{
+        				    case OMX_VC_SUB_P_8x8:
+								Width = 8;
+								Height = 8;
+            				    break;
+        				    case OMX_VC_SUB_P_8x4:
+								Width = 8;
+								Height = 4;
+            				    break;
+        				    case OMX_VC_SUB_P_4x8:
+								Width = 4;
+								Height = 8;
+            				    break;
+        				    case OMX_VC_SUB_P_4x4:
+								Width = 4;
+								Height = 4;
+            				    break;
+        				    default:
+								/* Default */
+								Width = 4;
+								Height = 4;
+        				    break;
+        				}
+					
+    				    MaxSubXPart = 8/Width;
+    				    MaxSubYPart = 8/Height;
+
+						for (SubPartY = 0; SubPartY < MaxSubYPart; SubPartY++)
+						{
+							for (SubPartX = 0; SubPartX < MaxSubXPart; SubPartX++)
+							{
+								mvx = pSrcMBInfo->pMV0 [2*PartY + SubPartY][2*PartX + SubPartX].dx;
+								mvy = pSrcMBInfo->pMV0 [2*PartY + SubPartY][2*PartX + SubPartX].dy;
+								armVCM4P10_Interpolate_Luma(
+									pTempSrcBuf + (8*PartX + 4*SubPartX + (mvx/4)) + (8*PartY + 4*SubPartY + (mvy/4)) * TempSrcStep,
+									TempSrcStep,
+									pTempDstBuf + (8*PartX + 4*SubPartX) + (8*PartY + 4*SubPartY) * TempDstStep,
+									TempDstStep,
+									Width,
+									Height,
+									mvx & 3,
+									mvy & 3
+									);
+							}
+						}
+					}
+					else
+					{
+
+						mvx = pSrcMBInfo->pMV0 [2*PartY][2*PartX].dx;
+						mvy = pSrcMBInfo->pMV0 [2*PartY][2*PartX].dy;
+						armVCM4P10_Interpolate_Luma(
+							pTempSrcBuf + (8*PartX + (mvx/4)) + (8*PartY + (mvy/4)) * TempSrcStep,
+							TempSrcStep,
+							pTempDstBuf + (8*PartX) + (8*PartY) * TempDstStep,
+							TempDstStep,
+							Width,
+							Height,
+							mvx & 3,
+							mvy & 3
+							);
+
+					}
+				}
+			}
+		}
+	}
+	else
+	{
+		InvalidSAD = 1;
+	}
+
+	/* Calculate SAD from predicted buffer */
+	if (!InvalidSAD)
+	{
+	    OMX_U32     x8x8, y8x8, x4x4, y4x4, Block8x8, Block4x4;
+	    OMX_S32     SAD;
+	    
+		pTempRefBuf = pTempDstBuf;
+		TempRefStep = 16;
+
+		/* SAD for each 4x4 block in scan order */
+		for (Block8x8 = 0; Block8x8 < 4; Block8x8++)
+		{
+			x8x8 = 8*(Block8x8 & 1);
+			y8x8 = 8*(Block8x8 >> 1);
+			for (Block4x4 = 0; Block4x4 < 4; Block4x4++)
+			{
+				x4x4 = 4*(Block4x4 & 1);
+				y4x4 = 4*(Block4x4 >> 1);
+
+				armVCCOMM_SAD(	
+					pSrcCurrBuf + (x8x8 + x4x4) + (y8x8 + y4x4) * SrcCurrStep, 
+					SrcCurrStep,
+					pTempRefBuf + (x8x8 + x4x4) + (y8x8 + y4x4) * TempRefStep,
+					TempRefStep,
+    				&SAD,
+    				4, /* Height */
+    				4); /* Width */
+                *(pDstBlockSAD + 4 * Block8x8 + Block4x4) = (SAD < 0x7fff) ? (OMX_U16) SAD : ARM_VCM4P10_MAX_MODE_VALUE;   			    
+ 			}
+		}
+	}
+	else
+	{
+		/* Fill SADs with max values and return*/
+		for (i = 0; i < 16; i++)
+		{
+			pDstBlockSAD [i] = ARM_VCM4P10_MAX_4x4_SAD;
+		}
+	}
+	return OMX_Sts_NoErr;
+}
+
+
+
+/**
+ * Function: armVCM4P10_Mode4x4Decision
+ *
+ * Description:
+ *    Intra 4x4 Mode decision by calculating cost for all possible modes and
+ * choosing the best mode
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcCurrBuf    - Pointer to the start of current Macroblock
+ * [in] SrcCurrStep - Step size of the pointer pSrcCurrBuf
+ * [in/out] pSrcDstMBCurr - Pointer to the OMXVCM4P10MBInfo which will be updated for
+ *                    field pIntra4x4PredMode of the current block.
+ * [in] Block8x8    - Index 8x8 block in which current 4x4 block belongs
+ * [in] Block4x4    - Index of current 4x4 block
+ * [in/out] pPredIntra4x4SrcY - Pointer to current block location in buffer 
+ *                    with reconstructed values. This will be modified by this
+ *                    function with best mode predicted values 
+ * [in] StepPredIntra4x4SrcY  - Step size of the pointer pPredIntra4x4SrcY
+ * [in] pIntra4x4PredMode     - Array of Intra 4x4 prediction mode for the MB.
+ *                              Current MB modes starts at [1,1].
+ * [in] pBestCost   - Cost for the Best Intra 4x4 mode
+ * Return Value:
+ * None
+ *
+ */
+static OMXVoid armVCM4P10_Mode4x4Decision (
+    const OMX_U8* pSrcCurrBuf,   
+    OMX_S32 SrcCurrStep,
+    OMXVCM4P10MBInfo *pSrcDstMBCurr,
+    OMX_S32 Block8x8,
+    OMX_S32 Block4x4,
+    OMX_U8  *pPredIntra4x4SrcY,
+    OMX_S32 StepPredIntra4x4SrcY,
+    OMX_S32 pIntra4x4PredMode [][9],
+    OMX_S32 *pBestCost
+)
+{
+    OMX_S32     i, j, x, y, BlockX, BlockY, mode;
+    OMX_S32     Cost, BestCost;
+    OMX_U8      *pSrcY;
+    OMX_S32     StepSrcY;
+    OMX_S32     availability = 0;
+    OMX_U8      pPredBlock [4*4];
+    OMXResult   Ret = OMX_Sts_Err;
+
+    /* Get block cordinates from 8x8 block index and 4x4 block index */
+    BlockX = ((Block8x8 & 1) << 1) + (Block4x4 & 1);
+    BlockY = ((Block8x8 >> 1) << 1) + (Block4x4 >> 1);
+    
+    /* Add offset to point to start of current MB in the array pIntra4x4PredMode */
+    x = BlockX + 1;
+    y = BlockY + 1;
+
+    /* Check for availability of LEFT Block */
+    if (pIntra4x4PredMode [y][x - 1] != ARM_VCM4P10_INVALID_BLOCK)
+    {
+        availability |= OMX_VC_LEFT;        
+    }
+
+    /* Check for availability of UPPER Block */
+    if (pIntra4x4PredMode [y - 1][x] != ARM_VCM4P10_INVALID_BLOCK)
+    {
+        availability |= OMX_VC_UPPER;        
+    }
+
+    /* Check for availability of UPPER LEFT Block */
+    if (pIntra4x4PredMode [y - 1][x - 1] != ARM_VCM4P10_INVALID_BLOCK)
+    {
+        availability |= OMX_VC_UPPER_LEFT;        
+    }
+
+    pSrcY = pPredIntra4x4SrcY + 
+            StepPredIntra4x4SrcY * (BlockY << 2) + 
+            (BlockX << 2);
+            
+    StepSrcY = StepPredIntra4x4SrcY;
+              
+    x = BlockX * 4;
+    y = BlockY * 4;
+
+    Cost = BestCost = ARM_VCM4P10_MAX_COST;
+    
+    /* Go through each mode for minim cost */
+    for (mode = 0; mode < 9; mode++)
+    {
+        Ret = omxVCM4P10_PredictIntra_4x4(
+             pSrcY - 1,
+             pSrcY - StepSrcY,
+             pSrcY - StepSrcY - 1,
+             pPredBlock,
+             StepSrcY,
+             4,
+             (OMXVCM4P10Intra4x4PredMode) mode,
+             availability);
+             
+        if (Ret == OMX_Sts_NoErr)
+        {            
+            armVCCOMM_SAD(    
+                pSrcCurrBuf + (y * SrcCurrStep) + x,
+                SrcCurrStep,
+                pPredBlock,
+                4,
+                &Cost,
+                4,
+                4);
+            
+            if (Cost < BestCost)
+            {
+                BestCost = Cost;
+                
+                pIntra4x4PredMode [BlockY + 1][BlockX + 1] = 
+                    (OMXVCM4P10Intra4x4PredMode) mode;                
+                pSrcDstMBCurr->pIntra4x4PredMode [BlockY * 4 + BlockX] = 
+                    (OMXVCM4P10Intra4x4PredMode) mode;
+
+                for (j = 0; j < 4; j++)
+                {
+                    for (i = 0; i < 4; i++)
+                    {
+                        pSrcY [StepSrcY * j + i] = pPredBlock [4 * j + i];
+                    }
+                }
+            }
+        }
+    }
+
+    *pBestCost = BestCost;
+    return;
+}
+
+/**
+ * Function: armVCM4P10_SetMotionVectorPredictor
+ *
+ * Description:
+ *    This function will do the MV Prediction for Inter MBs
+ *
+ * Parameters:
+ * [in] BlockStartX - Start X index in integer pels in current Block
+ * [in] BlockStartY - Start Y index in integer pels in current Block
+ * [in] BlockSizeX  - Width of current block
+ * [in] BlockSizeY  - Height of current block
+ * [in] RefFrame    - Index of the reference frame for prediction
+ * [in] pRefFrArr   - Pointer to Ref array storing neighbouring MVs for MV prediction
+ * [in] pMVArr      - Pointer to MV array storing neighbouring MVs for MV prediction
+ * [out] pMVPred    - Pointer to predicted MVs
+ * Remarks:
+ *
+ * Return Value:
+ * None
+ *
+ */
+static OMXVoid armVCM4P10_SetMotionVectorPredictor(
+    OMX_U32 BlockStartX, 
+    OMX_U32 BlockStartY,
+    OMX_U32 BlockSizex,
+    OMX_U32 BlockSizey,
+    OMX_S32 RefFrame,
+    OMX_S32 pRefFrArr[][6], 
+    OMXVCMotionVector pMVArr[][12],
+    OMXVCMotionVector *pMVPred
+)
+{
+    OMX_S32     RFrameL;       /* Left */
+    OMX_S32     RFrameU;       /* Up */
+    OMX_S32     RFrameUR;      /* Up-Right */
+
+    OMX_S32     BlockX, BlockY, BlockXFr, BlockYFr, MVPredType;
+    OMX_S32     BlockXPlusOff, BlockXPlusOffFr, BlockXMin1Fr, BlockYMin1Fr;
+    
+    BlockX = 4 + (BlockStartX >> 2);
+    BlockY = 4 + (BlockStartY >> 2); 
+    BlockXPlusOff = BlockX + (BlockSizex >> 2);
+    
+    BlockXFr = BlockX >> 1;  
+    BlockYFr = BlockY >> 1;  
+    BlockXMin1Fr = (BlockX - 1) >> 1;  
+    BlockYMin1Fr = (BlockY - 1) >> 1;  
+    BlockXPlusOffFr = BlockXPlusOff >> 1;
+    
+    MVPredType = ARM_VCM4P10_MVPRED_MEDIAN;
+
+    RFrameL = pRefFrArr [BlockYFr][BlockXMin1Fr];
+    RFrameU = pRefFrArr [BlockYMin1Fr][BlockXFr];
+    RFrameUR = pRefFrArr [BlockYMin1Fr][BlockXPlusOffFr];
+
+    if (RFrameUR == ARM_VCM4P10_INVALID_BLOCK)
+    {
+        RFrameUR = pRefFrArr [BlockYMin1Fr][BlockXMin1Fr];
+    }
+
+    /* 
+     * Prediction if only one of the neighbors uses the reference frame
+     * we are checking
+     */
+  
+    if (RFrameL == RefFrame && RFrameU != RefFrame && RFrameUR != RefFrame)
+    {
+        MVPredType = ARM_VCM4P10_MVPRED_L;
+    }
+    else if(RFrameL != RefFrame && RFrameU == RefFrame && RFrameUR != RefFrame)
+    {
+        MVPredType = ARM_VCM4P10_MVPRED_U;
+    }
+    else if(RFrameL != RefFrame && RFrameU != RefFrame && RFrameUR == RefFrame)
+    {
+        MVPredType = ARM_VCM4P10_MVPRED_UR;
+    }
+
+    /* Directional predictions  */
+    else if(BlockSizex == 8 && BlockSizey == 16)
+    {
+        if(BlockStartX == 0)
+        {
+            if(RFrameL == RefFrame)
+            {
+                MVPredType = ARM_VCM4P10_MVPRED_L;
+            }
+        }
+        else
+        {
+            if (RFrameUR == RefFrame)
+            {
+                MVPredType = ARM_VCM4P10_MVPRED_UR;
+            }
+        }
+    }
+    else if(BlockSizex == 16 && BlockSizey == 8)
+    {
+        if(BlockStartY == 0)
+        {
+            if(RFrameU == RefFrame)
+            {
+                MVPredType = ARM_VCM4P10_MVPRED_U;
+            }
+        }
+        else
+        {
+            if(RFrameL == RefFrame)
+            {
+                MVPredType = ARM_VCM4P10_MVPRED_L;
+            }
+        }
+    }
+
+    switch (MVPredType)
+    {
+    case ARM_VCM4P10_MVPRED_MEDIAN:
+        if (!(pRefFrArr [BlockYMin1Fr][BlockXMin1Fr] == ARM_VCM4P10_INVALID_BLOCK || 
+              pRefFrArr [BlockYMin1Fr][BlockXFr] == ARM_VCM4P10_INVALID_BLOCK || 
+              pRefFrArr [BlockYMin1Fr][BlockXPlusOffFr] == ARM_VCM4P10_INVALID_BLOCK))
+        {
+            pMVPred->dx = pMVArr [BlockY][BlockX - 1].dx;
+            pMVPred->dy = pMVArr [BlockY][BlockX - 1].dy;
+        }
+        else
+        {
+            pMVPred->dx = 
+                ARM_VCM4P10_MEDIAN(pMVArr [BlockY][BlockX - 1].dx, 
+                pMVArr [BlockY - 1][BlockX].dx, 
+                pMVArr [BlockY - 1][BlockXPlusOff].dx);
+            pMVPred->dy = 
+                ARM_VCM4P10_MEDIAN(pMVArr [BlockY][BlockX - 1].dy, 
+                pMVArr [BlockY - 1][BlockX].dy, 
+                pMVArr [BlockY - 1][BlockXPlusOff].dy);
+        }
+        break;
+      
+    case ARM_VCM4P10_MVPRED_L:
+        pMVPred->dx = pMVArr [BlockY][BlockX - 1].dx;
+        pMVPred->dy = pMVArr [BlockY][BlockX - 1].dy;
+        break;
+    case ARM_VCM4P10_MVPRED_U:
+        pMVPred->dx = pMVArr [BlockY - 1][BlockX].dx;
+        pMVPred->dy = pMVArr [BlockY - 1][BlockX].dy;
+        break;
+    case ARM_VCM4P10_MVPRED_UR:
+        if (pRefFrArr [BlockYMin1Fr][BlockXPlusOffFr] != ARM_VCM4P10_INVALID_BLOCK)
+        {
+            pMVPred->dx = pMVArr [BlockY - 1][BlockXPlusOff].dx;
+            pMVPred->dy = pMVArr [BlockY - 1][BlockXPlusOff].dy;
+        }
+        else
+        {
+            pMVPred->dx = pMVArr [BlockY - 1][BlockX - 1].dx;
+            pMVPred->dy = pMVArr [BlockY - 1][BlockX - 1].dy;
+        }
+        break;
+    default:
+        break;
+    }
+    
+    return;
+}
+
+/**
+ * Function: armVCM4P10_BlockMotionSearch
+ *
+ * Description:
+ *    Gets best MV for the current block
+ *
+ * Parameters:
+ * [in] pSrcCurrBuf    - Pointer to the start of luma component of current Macroblock 
+ * [in] SrcCurrStep - Step size for the pointer pSrcCurrBuf 
+ * [in] pSrcRefY    - Pointer to the start of luma component of co-located reference MB
+ * [in] nSrcRefStep - Step size for the pointer pSrcRefY 
+ * [in] pRefRect   Pointer to the valid reference rectangle; relative to the image origin.
+ * [in] pCurrPointPos   Position of the current macroblock in the current plane.
+ * [in] pMESpec     - Motion estimation structure
+ * [in] pMBInter    - Array, of dimension four, containing pointers to information associated with four
+ *                    adjacent type INTER MBs (Left, Top, Top-Left, Top-Right). 
+ * [in] nLamda      - For calculating the cost
+ * [out] pBestCost  - Minimum cost for encoding current block 
+ * [out] pBestMV    - MV corresponding to best cost
+ * [in] BlockStartX - Block start X index in integer pels
+ * [in] BlockStartY - Block start Y index in integer pels
+ * [in] BlockSizeX  - Width of current block
+ * [in] BlockSizeY  - Height of current block
+ * [in] RefFrame    - Index of the reference frame for prediction
+ * [in] pRefFrArr   - Pointer to reference frame array storing neighbouring MVs for prediction
+ * [in] pMVArr      - Pointer to MV array storing neighbouring MVs for MV prediction
+ * [in] pMVPred     - Pointer to MV predicted from neighbour MVs
+ * Remarks:
+ *
+ * Return Value:
+ * OMXResult
+ *
+ */
+static OMXResult armVCM4P10_BlockMotionSearch(
+    const OMX_U8* pSrcCurrBuf, 
+    OMX_S32 SrcCurrStep, 
+    const OMX_U8* pSrcRefY, 
+    OMX_S32 nSrcRefStep, 
+	const OMXRect *pRefRect,
+	const OMXVCM4P2Coordinate *pCurrPointPos,
+    void* pMESpec, 
+
+    OMX_S32 nLamda,
+    OMX_S32* pBestCost, 
+    OMXVCMotionVector *pBestMV,
+    
+    OMX_U32 BlockStartX, 
+    OMX_U32 BlockStartY,
+    OMX_U32 BlockSizeX,
+    OMX_U32 BlockSizeY,
+    OMX_S32 RefFrame,
+    OMX_S32 pRefFrArr [][6], 
+    OMXVCMotionVector pMVArr [][12],
+    OMXVCMotionVector *pMVPred
+   )
+{
+
+    OMXVCMotionVector   MVCalculated, MVCandidate;
+    OMX_S32             Cost;
+    OMXResult           RetValue;
+    OMXVCM4P10MEParams  *pMEParams;    
+	OMXVCM4P2Coordinate CurrBlockPos;
+
+    /* Get Predicted Motion Vectors */
+    armVCM4P10_SetMotionVectorPredictor (
+        BlockStartX, 
+        BlockStartY,
+        BlockSizeX, 
+        BlockSizeY,
+        RefFrame,
+        pRefFrArr,   
+        pMVArr,      
+        pMVPred);
+
+    /* Initialize candidate MV */
+    MVCandidate.dx = 0;
+    MVCandidate.dy = 0;
+    
+    CurrBlockPos.x = pCurrPointPos->x + BlockStartX;
+    CurrBlockPos.y = pCurrPointPos->y + BlockStartY;
+
+    /* Block Match Integer */
+    RetValue = omxVCM4P10_BlockMatch_Integer (
+        pSrcCurrBuf, 
+        SrcCurrStep, 
+        pSrcRefY, 
+        nSrcRefStep, 
+        pRefRect, 
+        &CurrBlockPos, 
+        BlockSizeX, 
+        BlockSizeY, 
+        nLamda, 
+        pMVPred, 
+        &MVCandidate, 
+        &MVCalculated, 
+        &Cost, 
+        pMESpec);
+    
+    /* updated BestMV*/
+    /**pBestCost = Cost;
+    pBestMV->dx = MVCalculated.dx;
+    pBestMV->dy = MVCalculated.dy;*/
+
+    pMEParams = (OMXVCM4P10MEParams *) pMESpec;
+    
+    /* Block Match Half pel */
+    if (pMEParams->halfSearchEnable)
+    {
+        RetValue = omxVCM4P10_BlockMatch_Half(
+            pSrcCurrBuf, 
+            SrcCurrStep, 
+            pSrcRefY, 
+            nSrcRefStep, 
+            BlockSizeX, 
+            BlockSizeY, 
+            nLamda, 
+            pMVPred, 
+            &MVCalculated,        /* input/output*/
+            &Cost);
+    }
+
+    /* Block Match Quarter pel */
+    if (pMEParams->quarterSearchEnable)
+    {
+        RetValue = omxVCM4P10_BlockMatch_Quarter(
+            pSrcCurrBuf, 
+            SrcCurrStep, 
+            pSrcRefY, 
+            nSrcRefStep, 
+            BlockSizeX, 
+            BlockSizeY, 
+            nLamda, 
+            pMVPred, 
+            &MVCalculated, 
+            &Cost);
+    }
+
+    /* updated Best Cost and Best MV */
+    *pBestCost = Cost;
+    pBestMV->dx = MVCalculated.dx;
+    pBestMV->dy = MVCalculated.dy;
+
+    /*
+     * Skip MB cost calculations of 16x16 inter mode
+     */
+    return RetValue;
+}
+
+/**
+ * Function: armVCM4P10_PartitionME
+ *
+ * Description:
+ *    Gets best cost for the current partition
+ *
+ * Parameters:
+ * [in] pSrcCurrBuf    - Pointer to the start of luma component of current Macroblock 
+ * [in] SrcCurrStep - Step size for the pointer pSrcCurrBuf 
+ * [in] pSrcRefBufList    - Pointer to List of ref buffer of co-located reference MB
+ * [in] nSrcRefStep - Step size for the pointer pSrcRefY 
+ * [in] pRefRect   Pointer to the valid reference rectangle; relative to the image origin.
+ * [in] pCurrPointPos   Position of the current macroblock in the current plane.
+ * [in] pMESpec     - Motion estimation structure
+ * [in] PartWidth   - Width of current partition
+ * [in] PartHeight  - Height of current partition
+ * [in] BlockWidth  - Width of current block
+ * [in] BlockHeight - Height of current block
+ * [in] PartStartX  - Partition start X index in integer pels
+ * [in] PartStartY  - Partition start Y index in integer pels
+ * [in] pMVArr      - Pointer to MV array storing neighbouring MVs for MV prediction
+ * [in] pRefFrArr   - Pointer to reference frame array storing neighbouring MVs for prediction
+ * [in] Lambda      - For calculating the cost
+ * [out] pCost      - Pointer to cost for Inter MB
+ *
+ * Return Value:
+ * OMXResult
+ *
+ */
+static OMXResult armVCM4P10_PartitionME (
+    const OMX_U8* pSrcCurrBuf,   
+    OMX_S32 SrcCurrStep,
+	const OMX_U8 *pSrcRefBufList[ARM_VCM4P10_MAX_FRAMES],
+	OMX_S32 SrcRefStep,
+	const OMXRect *pRefRect,
+	const OMXVCM4P2Coordinate *pCurrPointPos,
+    void* pMESpec, 
+
+    OMX_S32 PartWidth,
+    OMX_S32 PartHeight,
+    OMX_S32 BlockWidth,
+    OMX_S32 BlockHeight,
+    OMX_S32 PartStartX, 
+    OMX_S32 PartStartY,
+
+    OMXVCMotionVector pMVArr [][12],
+    OMX_S32 pRefFrArr [][6],
+    OMXVCMotionVector pMVPredArr [][4],
+
+    OMX_S32 Lambda,
+    OMX_S32 *pCost
+)
+{
+    OMX_U32     x, y, i, j, ref, OffX, OffY, OffSrc, OffRef;
+    OMX_S32     BlockCost, PartitionCost, BestCost;
+    OMX_S32     BestRefFrame=0;
+    OMXVCMotionVector   BestMV [4][4];
+    OMXVCMotionVector   BestMVPred [4][4];
+    OMXVCMotionVector   MVPred;
+    OMXVCMotionVector   DstMV;
+
+    BestCost = ARM_VCM4P10_MAX_COST;
+    
+    for (ref = 0; ref < ARM_VCM4P10_MAX_FRAMES; ref++)
+    {
+        if (pSrcRefBufList [ref] == NULL)
+        {
+        	/* No reference frame, continue */
+        	continue;
+        }
+
+        PartitionCost = 0;
+        
+        for (y = 0; y < PartHeight; y += BlockHeight)
+        {
+            for (x = 0; x < PartWidth; x += BlockWidth)
+            {
+            	OffSrc = SrcCurrStep * (PartStartY + y) + PartStartX + x;
+            	OffRef = SrcRefStep * (PartStartY + y) + PartStartX + x;
+                armVCM4P10_BlockMotionSearch (
+                    pSrcCurrBuf + OffSrc, 
+                    SrcCurrStep, 
+                    pSrcRefBufList [ref] + OffRef, 
+                    SrcRefStep, 
+                    pRefRect,
+                    pCurrPointPos,
+                    pMESpec, 
+
+                    Lambda,
+                    &BlockCost, 
+                    &DstMV,
+                    
+                    x + PartStartX, 
+                    y + PartStartY,
+                    BlockWidth,
+                    BlockHeight,
+                    ref,
+                    pRefFrArr, 
+                    pMVArr,
+                    &MVPred);
+
+                PartitionCost += BlockCost;
+				
+				OffX = (PartStartX + x) >> 2;
+				OffY = (PartStartY + y) >> 2;
+				
+	            for (j = 0; j < (BlockHeight >> 2); j++)
+	            {
+	                for (i = 0; i < (BlockWidth >> 2); i++)
+	                {
+	                    pMVArr [4 + OffY + j][4 + OffX + i].dx = DstMV.dx;
+	                    pMVArr [4 + OffY + j][4 + OffX + i].dy = DstMV.dy;
+	                    pMVPredArr [OffY + j][OffX + i].dx = MVPred.dx;
+	                    pMVPredArr [OffY + j][OffX + i].dy = MVPred.dy;
+	                }
+	            }
+
+				pRefFrArr [2 + (OffY >> 1)][2 + (OffX >> 1)] = ref;
+	            for (j = 0; j < (BlockHeight >> 3); j++)
+	            {
+	                for (i = 0; i < (BlockWidth >> 3); i++)
+	                {
+			            pRefFrArr [2 + (OffY >> 1) + j][2 + (OffX >> 1) + i] = ref;
+	                }
+	            }
+
+            }
+        }
+
+		/*
+		 * If PartitionCost is less for this reference frame, motion vectors needs to be backedup
+		 */
+        if (PartitionCost <= BestCost)
+        {
+            BestCost = PartitionCost;            
+            BestRefFrame = ref;
+            
+            for (y = 0; y < (PartHeight/BlockHeight); y++)
+            {
+                for (x = 0; x < (PartWidth/BlockWidth); x++)
+                {
+					OffX = (PartStartX + x * BlockWidth) >> 2;
+					OffY = (PartStartY + y * BlockHeight) >> 2;
+				
+                    BestMV[y][x].dx = pMVArr [4 + OffY][4 + OffX].dx;
+                    BestMV[y][x].dy = pMVArr [4 + OffY][4 + OffX].dy;
+                    BestMVPred[y][x].dx = pMVPredArr [OffY][OffX].dx;
+                    BestMVPred[y][x].dy = pMVPredArr [OffY][OffX].dy;
+                }
+            }
+        }
+
+    }
+
+	/*
+	 * Copy back best reference frame, motion vectors and cost.
+	 */
+    for (y = 0; y < (PartHeight/BlockHeight); y++)
+    {
+        for (x = 0; x < (PartWidth/BlockWidth); x++)
+        {
+			OffX = (PartStartX + x * BlockWidth) >> 2;
+			OffY = (PartStartY + y * BlockHeight) >> 2;            
+            
+            for (j = 0; j < (BlockHeight >> 2); j++)
+            {
+                for (i = 0; i < (BlockWidth >> 2); i++)
+                {
+                    pMVArr [4 + OffY + j][4 + OffX + i].dx = BestMV[y][x].dx;
+                    pMVArr [4 + OffY + j][4 + OffX + i].dy = BestMV[y][x].dy;
+                    pMVPredArr [OffY + j][OffX + i].dx = BestMVPred[y][x].dx;
+                    pMVPredArr [OffY + j][OffX + i].dy = BestMVPred[y][x].dy;
+                }
+            }
+            
+            for (j = 0; j < (BlockHeight >> 3); j++)
+            {
+                for (i = 0; i < (BlockWidth >> 3); i++)
+                {
+		            pRefFrArr [2 + (OffY >> 1) + j][2 + (OffX >> 1) + i] = BestRefFrame;
+                }
+            }
+        }
+    }
+
+	*pCost = BestCost;
+    return OMX_Sts_NoErr;
+
+}
+
+/**
+ * Function: armVCM4P10_Intra16x16Estimation
+ *
+ * Description:
+ * Performs MB-level motion estimation for INTER MB type and selects best motion estimation strategy from 
+ * the set of modes supported in baseline profile ISO/IEC 14496-10.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcCurrBuf    - Pointer to the start of luma component of current Macroblock 
+ * [in] SrcCurrStep - Step size for the pointer pSrcCurrBuf 
+ * [in] pSrcRecBuf    - Pointer to the start of luma component of co-located reconstructed MB 
+ * [in] SrcRecStep - Step size for the pointer pSrcRecBuf 
+ * [in] nMBPosX     - Position of MB in the frame w.r.t X axis
+ * [in] nMBPosY     - Position of MB in the frame w.r.t Y axis
+ * [in] pMBInter    - Array, of dimension four, containing pointers to information associated with four
+ *                    adjacent type INTER MBs (Left, Top, Top-Left, Top-Right). 
+ * [in] pMBIntra    - Array, of dimension four, containing pointers to information associated with four
+ *                    adjacent type INTRA MBs (Left, Top, Top-Left, Top-Right). 
+ * [in/out] pSrcDstMBCurr - Pointer to information structure for the current MB.  Following member should be set 
+ *                    before calling this function
+ * [in] Lambda      - For calculating the cost
+ * [out] pCost      - Pointer to cost for Intra16x16
+ * Return Value:
+ * OMX_Sts_NoErr - No Error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ *
+ */
+
+static OMXResult armVCM4P10_Intra16x16Estimation(
+    const OMX_U8* pSrcCurrBuf,   
+    OMX_S32 SrcCurrStep,
+    const OMX_U8* pSrcRecBuf,   
+    OMX_S32 SrcRecStep,
+	const OMXVCM4P2Coordinate *pCurrPointPos,
+    const OMXVCM4P10MBInfoPtr *pMBInter,
+    const OMXVCM4P10MBInfoPtr *pMBIntra,
+    OMXVCM4P10MBInfo *pSrcDstMBCurr,
+    OMX_U32 *pCost)
+{
+    OMX_U8      PredBuf [16*16 + 16];
+    OMX_U8      *pPred;
+    OMX_S32     mode;
+    OMX_S32     Cost;
+    OMX_S32     availability = 0;
+    OMXResult   Ret;
+    OMXVCM4P10Intra16x16PredMode    IntraMode16x16 [4] = 
+        {OMX_VC_16X16_VERT, OMX_VC_16X16_HOR, 
+        OMX_VC_16X16_DC, OMX_VC_16X16_PLANE};        
+    OMX_U32     MBPosX = pCurrPointPos->x >> 4;        
+    OMX_U32     MBPosY = pCurrPointPos->y >> 4;        
+
+	pPred = armAlignTo16Bytes(PredBuf);
+    
+	/* Check for availability of LEFT MB */
+    if ((MBPosX != 0) && (pMBIntra [0] != 0 || pMBInter [0] != 0))
+    {
+        availability |= OMX_VC_LEFT;        
+    }
+
+    /* Check for availability of UP MB */
+    if ((MBPosY != 0) && (pMBIntra [1] != 0 || pMBInter [1] != 0))
+    {
+        availability |= OMX_VC_UPPER;        
+    }
+
+    /* Check for availability of UP-LEFT MB */
+    if ((MBPosX > 0) && (MBPosY > 0) && 
+        (pMBIntra [2] != 0 || pMBInter [2] != 0))
+    {
+        availability |= OMX_VC_UPPER_LEFT;        
+    }
+
+    *pCost = ARM_VCM4P10_MAX_COST;
+    for (mode = 0; mode < 4; mode++)
+    {
+        Ret = omxVCM4P10_PredictIntra_16x16(
+                pSrcRecBuf - 1, 
+                pSrcRecBuf - SrcRecStep, 
+                pSrcRecBuf - SrcRecStep - 1, 
+                pPred, 
+                SrcRecStep, 
+                16, 
+                IntraMode16x16 [mode], 
+                availability);
+        if (Ret == OMX_Sts_NoErr)                         
+        {
+            armVCCOMM_SAD(    
+                pSrcCurrBuf,
+                SrcCurrStep,
+                pPred,
+                16,
+                &Cost,
+                16,
+                16);
+            if (Cost < *pCost)
+            {
+                *pCost = Cost;
+                pSrcDstMBCurr->Intra16x16PredMode = IntraMode16x16 [mode];
+            }
+            
+        }
+        
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+/**
+ * Function: armVCM4P10_Intra4x4Estimation
+ *
+ * Description:
+ * Performs MB-level motion estimation for Intra 4x4 MB type and selects
+ * the best set of modes supported in baseline profile.
+ *
+ * Parameters:
+ * [in] pSrcCurrBuf    - Pointer to the start of luma component of current Macroblock 
+ * [in] SrcCurrStep - Step size for the pointer pSrcCurrBuf 
+ * [in] pSrcRecBuf    - Pointer to the start of luma component of co-located reconstructed MB 
+ * [in] SrcRecStep - Step size for the pointer pSrcRecBuf 
+ * [in] nMBPosX     - Position of MB in the frame w.r.t X axis
+ * [in] nMBPosY     - Position of MB in the frame w.r.t Y axis
+ * [in] pMBIntra    - Array, of dimension four, containing pointers to information associated with four
+ *                    adjacent type INTRA MBs (Left, Top, Top-Left, Top-Right). 
+ * [in/out] pSrcDstMBCurr - Pointer to information structure for the current MB.  Following member should be set 
+ *                    before calling this function
+ * [in] Lambda      - For calculating the cost
+ * [out] pCost      - Pointer to cost for Intra4x4
+ * Return Value:
+ * OMX_Sts_NoErr - No Error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ *
+ */
+
+static OMXResult armVCM4P10_Intra4x4Estimation(
+    const OMX_U8* pSrcCurrBuf,   
+    OMX_S32 SrcCurrStep,
+    const OMX_U8* pSrcRecBuf,   
+    OMX_S32 SrcRecStep,
+    const OMXVCM4P10MBInfoPtr *pMBIntra,
+    OMXVCM4P10MBInfo *pSrcDstMBCurr,
+    OMX_U32 *pCost)
+{
+    OMX_S32     x, y, Block4x4, Block8x8;
+    OMX_S32     Cost;
+
+    /*
+     * PredIntra4x4Mode will store prediction modes of 4x4 blocks. 
+     * Modes for current MB starts at index [1][1].   
+     * Modes of nighbouring MB's will be as shown below
+     * A value of ARM_VCM4P10_INVALID_BLOCK for any block in this array means 
+     * that block is not available for prediction.
+     *
+     * c3 b0 b1 b2 b3 d0 d1 d2 d3
+     * a0 xx xx xx xx -  -  -  -
+     * a1 xx xx xx xx -  -  -  -
+     * a2 xx xx xx xx -  -  -  -
+     * a3 xx xx xx xx -  -  -  -
+     *
+     */
+    OMX_S32     PredIntra4x4Mode [5][9];
+
+    /*
+     * pSrcY stores re-construsted source array of size 3MB X 2MB as below
+     *
+     * MB11 MB12 MB13 
+     * MB21 MB22 MB23
+     *
+     * This array will be used for local reconstruction of 4x4 blocks 
+     * with best prediction mode within an MB
+     */    
+    OMX_U8      pSrcY [(16*3)*(16*2)];
+    OMX_S32     StepSrcY;
+    
+    /* init */
+    *pCost = 0;
+
+    for (y = 0; y < 5; y++)
+    {
+        for (x = 0; x < 9; x++)
+        {
+            /* 
+             * Initialize with value of ARM_VCM4P10_INVALID_BLOCK, to mean this 
+             * 4x4 block is not available 
+             */
+            PredIntra4x4Mode [y][x] = ARM_VCM4P10_INVALID_BLOCK;
+        }
+    }
+
+    /* Replace ARM_VCM4P10_INVALID_BLOCK value with available MBs values*/
+    for (x = 0; x < 4; x++)
+    {
+        /* Store values of b0, b1, b2, b3 */
+        if (pMBIntra[1] != NULL)
+        {
+            PredIntra4x4Mode [0][x + 1] = 
+                pMBIntra[1]->pIntra4x4PredMode[3*4 + x];            
+        }
+        
+        /* Store values of d0, d1, d2, d3 */
+        if (pMBIntra[3] != NULL)
+        {
+            PredIntra4x4Mode [0][x + 5] = 
+                pMBIntra[3]->pIntra4x4PredMode[3*4 + x];
+        }
+    }
+    
+    /* Store values of c3 */
+    if (pMBIntra[2] != NULL)
+    {
+        PredIntra4x4Mode [0][0] = pMBIntra[2]->pIntra4x4PredMode[15];
+    }
+    
+    for (y = 0; y < 4; y++)
+    {
+        /* Store values of a0, a1, a2, a3 */
+        if (pMBIntra[0] != NULL)
+        {
+            PredIntra4x4Mode [y + 1][0] = 
+                pMBIntra[0]->pIntra4x4PredMode[y*4 + 3];
+        }
+    }
+        
+    /*
+     * Update neighbouring Pred mode array which will be used for
+     * prediction of Intra4x4 modes.
+     */
+    
+    StepSrcY = 16 * 3;
+    for (y = 0; y < (16 * 2); y++)
+    {
+        for (x = 0; x < (16 * 3); x++)
+        {
+            pSrcY [StepSrcY * y + x] = 
+                pSrcRecBuf [SrcRecStep * (y - 16) + x - 16];
+        }
+    }
+    
+    /* for each 8x8 block */
+    for (Block8x8 = 0; Block8x8 < 4; Block8x8++)
+    {
+        /* for each 4x4 block inside 8x8 block */
+        for (Block4x4 = 0; Block4x4 < 4; Block4x4++)
+        {
+            armVCM4P10_Mode4x4Decision (
+                pSrcCurrBuf,   
+                SrcCurrStep,
+                pSrcDstMBCurr,
+                Block8x8, 
+                Block4x4,
+                pSrcY + 16 * StepSrcY + 16,
+                StepSrcY,
+                PredIntra4x4Mode, 
+                &Cost);
+
+            *pCost += Cost;
+        }
+    }
+    return OMX_Sts_NoErr;
+}
+
+/**
+ * Function: armVCM4P10_InterMEMB
+ *
+ * Description:
+ * Performs MB-level motion estimation for INTER MB type and selects best motion estimation strategy from 
+ * the set of modes supported in baseline profile ISO/IEC 14496-10.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcCurrBuf    - Pointer to the start of luma component of current Macroblock 
+ * [in] SrcCurrStep - Step size for the pointer pSrcCurrBuf 
+ * [in] pSrcRefBufList    - Pointer to the start of luma component of co-located reference MB
+ * [in] SrcRefStep - Step size for the pointer pSrcRefY 
+ * [in] pRefRect   Pointer to the valid reference rectangle; relative to the image origin.
+ * [in] pCurrPointPos   Position of the current macroblock in the current plane.
+ * [in] pMESpec     - Motion estimation structure
+ * [in] pMBInter    - Array, of dimension four, containing pointers to information associated with four
+ *                    adjacent type INTER MBs (Left, Top, Top-Left, Top-Right). 
+ * [in/out] pSrcDstMBCurr - Pointer to information structure for the current MB.  Following member should be set 
+ *                    before calling this function
+ * [in] Lambda      - For calculating the cost
+ * [out] pDstCost      - Pointer to cost for Inter MB
+ * Return Value:
+ * OMX_Sts_NoErr - No Error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ *
+ */
+
+static OMXResult armVCM4P10_InterMEMB(
+    const OMX_U8 *pSrcCurrBuf, 
+	OMX_S32 SrcCurrStep, 
+	const OMX_U8 *pSrcRefBufList[ARM_VCM4P10_MAX_FRAMES],
+	OMX_S32 SrcRefStep,
+	const OMXRect *pRefRect,
+	const OMXVCM4P2Coordinate *pCurrPointPos,
+	OMX_U32 Lambda,
+	void *pMESpec,
+	const OMXVCM4P10MBInfoPtr *pMBInter, 
+    OMXVCM4P10MBInfoPtr pSrcDstMBCurr,
+	OMX_U32 *pDstCost)
+{
+    OMX_S32     i, j, x, y, mode;
+    OMX_U32     Block8x8, XPerMB, YPerMB, Block2x, Block2y;
+    OMX_S32     PartStartX = 0, PartStartY = 0;
+    OMX_S32     PartWidth = 8, PartHeight = 8, BlockWidth = 4, BlockHeight = 4;
+    const OMX_U32     BlkSz [4][2] = {{4,4}, {4,8}, {8,4}};
+    const OMX_U32     PartSz [4][2] = {{8,8}, {8,16}, {16,8}, {16,16}};
+    const OMXVCM4P10SubMacroblockType     
+                ModeSubMBType4x4 [] = {OMX_VC_SUB_P_4x4, OMX_VC_SUB_P_4x8, 
+                              OMX_VC_SUB_P_8x4, OMX_VC_SUB_P_8x8};
+    const OMXVCM4P10MacroblockType
+                ModeMBType [] = {OMX_VC_P_8x8, OMX_VC_P_8x16, OMX_VC_P_16x8, OMX_VC_P_16x16};
+    
+    OMXVCM4P10MEParams  *pMBOptions;
+    /*
+     * RefFrArr and  MVArr will be used for temporary storage of Reference frame index and MVs
+     * It will store RefIndex and MVs of 6 MBs as shown below
+     * 
+     *     |------|------|------|
+     *     |Tp-Lt |Top   |Tp-R  |
+     *     | MB   | MB   | MB   |
+     *     |------|------|------|
+     *     |Left  | Curr |      |
+     *     | MB   | MB   |      |
+     *     |------|------|------|
+     */
+    OMX_S32     RefFrArr [4][6]; 
+    OMXVCMotionVector MVArr [8][12];
+    OMXVCMotionVector MVPredArr [4][4];
+    
+    /*
+     * IndexToLoc will translate pMBInter index into spacial arrangement of MBs
+     */
+    OMX_S32     IndexToLoc [] = {2,1,3,0};
+    OMX_U32     part, MaxPart;
+    OMX_S32     Cost, MotionCost8x8 [4], MBCost, BestCost;
+
+    /*
+     * Update neighbouring MV array and Ref frame array which will be used for
+     * prediction of MVs and Ref frames.
+     */
+
+    /* Set cost to a high value */
+    Cost = BestCost = ARM_VCM4P10_MAX_COST;
+    
+    for (y = 0; y < 8; y++)
+    {
+        for (x = 0; x < 12; x++)
+        {
+            i = 3 * (y >> 2) + (x >> 2);
+            if ((y < 4 || x < 4) && (pMBInter[IndexToLoc[i]] != NULL))
+            {
+                MVArr [y][x].dx = 
+                    pMBInter[IndexToLoc[i]]->pMV0[y % 4][x % 4].dx;
+                MVArr [y][x].dy = 
+                    pMBInter[IndexToLoc[i]]->pMV0[y % 4][x % 4].dy;
+            }
+            else
+            {
+                MVArr [y][x].dx = 0;
+                MVArr [y][x].dy = 0;
+            }
+        }
+    }    
+
+    for (y = 0; y < 4; y++)
+    {
+        for (x = 0; x < 6; x++)
+        {
+            i = 3 * (y >> 1) + (x >> 1);
+            if ((y < 2 || x < 2) && (pMBInter[IndexToLoc[i]] != NULL))
+            {
+                RefFrArr [y][x] = 
+                    pMBInter[IndexToLoc[i]]->pRefL0Idx [(y % 2) * 2 + (x % 2)];
+            }
+            else
+            {
+                RefFrArr [y][x] = ARM_VCM4P10_INVALID_BLOCK;
+            }
+        }
+    }    
+
+    for (y = 0; y < 4; y++)
+    {
+        for (x = 0; x < 4; x++)
+        {
+            MVPredArr [y][x].dx = 0;
+            MVPredArr [y][x].dy = 0;
+        }
+    }
+    /*
+     * Motion Estimation for 8x8 MB Partition 
+     */
+
+    for (i = 0; i < 4; i++)
+    {
+        MotionCost8x8 [i] = 0;
+    }        
+    
+    pMBOptions = (OMXVCM4P10MEParams *) pMESpec;
+    
+    if (pMBOptions->blockSplitEnable8x8 == 1 && 
+        pMBOptions->blockSplitEnable4x4 == 1)
+    {
+        pSrcDstMBCurr->mbType = OMX_VC_P_8x8;
+
+        PartWidth = PartSz [0][0];
+        PartHeight = PartSz [0][1];
+        
+        /* For each 8x8 partitions */
+        for (Block8x8 = 0; Block8x8 < 4; Block8x8++)
+        {
+            PartStartX = (Block8x8 % 2) << 3;
+            PartStartY = (Block8x8 / 2) << 3;
+
+            Block2x = (Block8x8 & 1) << 1;
+            Block2y = (Block8x8 >> 1) << 1;
+            
+            BestCost = ARM_VCM4P10_MAX_COST;
+            for (mode = 0; mode < 3; mode++)
+            {
+                BlockWidth = BlkSz [mode][0];
+                BlockHeight = BlkSz [mode][1];
+
+                armVCM4P10_PartitionME (
+                    pSrcCurrBuf,   
+                    SrcCurrStep,
+                    pSrcRefBufList,   
+                    SrcRefStep,
+                    pRefRect,
+                    pCurrPointPos,
+                    pMESpec,
+
+                    PartWidth,
+                    PartHeight,
+                    BlockWidth,
+                    BlockHeight,
+                    PartStartX,
+                    PartStartY,
+
+                    MVArr,
+                    RefFrArr,
+                    MVPredArr,
+
+                    Lambda,
+                    &Cost);
+                    
+                if (Cost <= BestCost)
+                {
+                    /* Update cost */
+                    BestCost = Cost;
+                    
+                    /* Update MBCurr struct */
+                    pSrcDstMBCurr->subMBType [Block8x8] = ModeSubMBType4x4 [mode];
+                    
+                    pSrcDstMBCurr->pRefL0Idx [Block8x8] = RefFrArr [2 + (PartStartY >> 3)][2 + (PartStartX >> 3)];
+
+                    /* Update pMV0 and pMVPred of MBCurr struct */
+                    for (j = 0; j < 2; j++)
+                    {
+                        for (i = 0; i < 2; i++)
+                        {
+                            pSrcDstMBCurr->pMV0 [Block2y + j][Block2x + i].dx =
+                                MVArr [4 + Block2y + j][4 + Block2x + i].dx;
+                            pSrcDstMBCurr->pMV0 [Block2y + j][Block2x + i].dy =
+                                MVArr [4 + Block2y + j][4 + Block2x + i].dy;
+                            
+                            pSrcDstMBCurr->pMVPred [Block2y + j][Block2x + i].dx =
+                                MVPredArr [Block2y + j][Block2x + i].dx;
+                            pSrcDstMBCurr->pMVPred [Block2y + j][Block2x + i].dy =
+                                MVPredArr [Block2y + j][Block2x + i].dy;
+                        }
+                    }
+                }
+            }
+            
+            /* Update cost */
+            MotionCost8x8 [Block8x8] = BestCost;
+        }
+        
+        /* Cost for mbType OMX_VC_P_8x8 */
+        BestCost = 0;
+        for (i = 0; i < 4; i++)
+        {
+            BestCost += MotionCost8x8 [i];
+        }
+    }
+    else
+    {
+        /* Set sub MB type to 8x8 */
+        for (i = 0; i < 4; i++)
+        {
+            pSrcDstMBCurr->subMBType [i] = OMX_VC_SUB_P_8x8;
+        }
+    }
+
+    /*
+     * Motion Estimation for 8x8, 8x16, 16x8 and 16x16 MB Partition
+     * If pMBOptions->b8x8BlockSplitEnable is 0, do only 16x16 ME (mode 3)
+     */
+    for (mode = (pMBOptions->blockSplitEnable8x8 == 1 ? 0 : 3); mode < 4; mode++)
+    {
+        BlockWidth = PartWidth = PartSz [mode][0];
+        BlockHeight = PartHeight = PartSz [mode][1];
+        
+        XPerMB = 16 / PartWidth;
+        YPerMB = 16 / PartHeight;
+        MaxPart = XPerMB * YPerMB;
+        
+        MBCost = 0;
+        
+        /* part size 4, 2, 2 and 1 corresponding to 8x8, 8x16, 16x8 and 16x16 MB */
+        for (part = 0; part < MaxPart; part++)
+        {
+        	PartStartX = (part % XPerMB) * PartWidth;
+        	PartStartY = (part / XPerMB) * PartHeight;
+        	
+            armVCM4P10_PartitionME (
+                pSrcCurrBuf,
+                SrcCurrStep,
+                pSrcRefBufList,   
+                SrcRefStep,
+                pRefRect,
+                pCurrPointPos,
+                pMESpec,
+
+                PartWidth,
+                PartHeight,
+                BlockWidth,
+                BlockHeight,
+                PartStartX,
+                PartStartY,
+
+                MVArr,
+                RefFrArr,
+                MVPredArr,
+
+                Lambda,
+                &Cost);
+                
+                MBCost += Cost;
+        }
+
+        if (MBCost <= BestCost)
+        {
+            /* Update cost */
+            BestCost = MBCost;
+            
+            /* Update mbType of MBCurr struct */
+            pSrcDstMBCurr->mbType = ModeMBType [mode];
+            
+            /* Update pMV0 and pMVPred of MBCurr struct */
+            for (j = 0; j < 4; j++)
+            {
+                for (i = 0; i < 4; i++)
+                {
+                    pSrcDstMBCurr->pMV0 [j][i].dx = MVArr [4+j][4+i].dx;
+                    pSrcDstMBCurr->pMV0 [j][i].dy = MVArr [4+j][4+i].dy;
+                    pSrcDstMBCurr->pMVPred [j][i].dx = MVPredArr [j][i].dx;
+                    pSrcDstMBCurr->pMVPred [j][i].dy = MVPredArr [j][i].dy;
+                }
+            }
+            for (j = 0; j < 2; j++)
+            {
+                for (i = 0; i < 2; i++)
+                {
+                    pSrcDstMBCurr->pRefL0Idx [j*2+i] = RefFrArr [2+j][2+i];
+                }
+            }
+        }
+
+    }
+
+    /* Update Best Cost */
+    *pDstCost = BestCost;
+    
+    return OMX_Sts_NoErr;
+}
+
+/**
+ * Function:  omxVCM4P10_MotionEstimationMB   (6.3.5.3.1)
+ *
+ * Description:
+ * Performs MB-level motion estimation and selects best motion estimation 
+ * strategy from the set of modes supported in baseline profile [ISO14496-10]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcCurrBuf - Pointer to the current position in original picture plane; 
+ *            16-byte alignment required 
+ *   pSrcRefBufList - Pointer to an array with 16 entries.  Each entry points 
+ *            to the top-left corner of the co-located MB in a reference 
+ *            picture.  The array is filled from low-to-high with valid 
+ *            reference frame pointers; the unused high entries should be set 
+ *            to NULL.  Ordering of the reference frames should follow 
+ *            [ISO14496-10] subclause 8.2.4  Decoding Process for Reference 
+ *            Picture Lists.   The entries must be 16-byte aligned. 
+ *   pSrcRecBuf - Pointer to the top-left corner of the co-located MB in the 
+ *            reconstructed picture; must be 16-byte aligned. 
+ *   SrcCurrStep - Width of the original picture plane in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   SrcRefStep - Width of the reference picture plane in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   SrcRecStep - Width of the reconstructed picture plane in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   pRefRect - Pointer to the valid reference rectangle; relative to the 
+ *            image origin. 
+ *   pCurrPointPos - Position of the current macroblock in the current plane. 
+ *   Lambda - Lagrange factor for computing the cost function 
+ *   pMESpec - Pointer to the motion estimation specification structure; must 
+ *            have been allocated and initialized prior to calling this 
+ *            function. 
+ *   pMBInter - Array, of dimension four, containing pointers to information 
+ *            associated with four adjacent type INTER MBs (Left, Top, 
+ *            Top-Left, Top-Right). Any pointer in the array may be set equal 
+ *            to NULL if the corresponding MB doesn t exist or is not of type 
+ *            INTER. pMBInter[0] - Pointer to left MB information pMBInter[1] 
+ *            - Pointer to top MB information pMBInter[2] - Pointer to 
+ *            top-left MB information pMBInter[3] - Pointer to top-right MB 
+ *            information 
+ *   pMBIntra - Array, of dimension four, containing pointers to information 
+ *            associated with four adjacent type INTRA MBs (Left, Top, 
+ *            Top-Left, Top-Right). Any pointer in the array may be set equal 
+ *            to NULL if the corresponding MB doesn t exist or is not of type 
+ *            INTRA. pMBIntra[0] - Pointer to left MB information pMBIntra[1] 
+ *            - Pointer to top MB information pMBIntra[2] - Pointer to 
+ *            top-left MB information pMBIntra[3] - Pointer to top-right MB 
+ *            information 
+ *   pSrcDstMBCurr - Pointer to information structure for the current MB.  
+ *            The following entries should be set prior to calling the 
+ *            function:  sliceID - the number of the slice the to which the 
+ *            current MB belongs. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstCost - Pointer to the minimum motion cost for the current MB. 
+ *   pDstBlockSAD - Pointer to the array of SADs for each of the sixteen luma 
+ *            4x4 blocks in each MB.  The block SADs are in scan order for 
+ *            each MB.  For implementations that cannot compute the SAD values 
+ *            individually, the maximum possible value (0xffff) is returned 
+ *            for each of the 16 block SAD entries. 
+ *   pSrcDstMBCurr - Pointer to updated information structure for the current 
+ *            MB after MB-level motion estimation has been completed.  The 
+ *            following fields are updated by the ME function.   The following 
+ *            parameter set quantifies the MB-level ME search results: MbType 
+ *            subMBType[4] pMV0[4][4] pMVPred[4][4] pRefL0Idx[4] 
+ *            Intra16x16PredMode pIntra4x4PredMode[4][4] 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -   One of more of the following pointers is NULL: pSrcCurrBuf, 
+ *           pSrcRefBufList, pSrcRecBuf, pRefRect, pCurrPointPos, pMESpec, 
+ *           pMBInter, pMBIntra,pSrcDstMBCurr, pDstCost, pSrcRefBufList[0] 
+ *    -    SrcRefStep, SrcRecStep are not multiples of 16 
+ *    -    iBlockWidth or iBlockHeight are values other than 4, 8, or 16. 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+ 
+OMXResult omxVCM4P10_MotionEstimationMB(                    
+    const OMX_U8 *pSrcCurrBuf,                                  
+	OMX_S32 SrcCurrStep, 
+	const OMX_U8 *pSrcRefBufList[ARM_VCM4P10_MAX_FRAMES],
+	OMX_S32 SrcRefStep,
+	const OMX_U8 *pSrcRecBuf, 
+	OMX_S32 SrcRecStep,
+	const OMXRect *pRefRect,
+	const OMXVCM4P2Coordinate *pCurrPointPos,
+	OMX_U32 Lambda,
+	void *pMESpec,
+	const OMXVCM4P10MBInfoPtr *pMBInter, 
+	const OMXVCM4P10MBInfoPtr *pMBIntra,
+    OMXVCM4P10MBInfo *pSrcDstMBCurr,
+	OMX_INT *pDstCost,
+    OMX_U16 *pDstBlockSAD)
+{
+    OMX_U32     Cost, i, IntraFlag = 1;
+    OMXVCM4P10MEParams  *pMEParams; 
+
+    /* check for argument error */
+    armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pSrcRefBufList == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pSrcRecBuf == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pCurrPointPos == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pMESpec == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pMBInter == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pMBIntra == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pSrcDstMBCurr == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstCost == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(SrcRefStep <= 0 || SrcRefStep & 15, OMX_Sts_BadArgErr)
+    armRetArgErrIf(SrcRecStep <= 0 || SrcRecStep & 15, OMX_Sts_BadArgErr)
+    armRetArgErrIf(SrcCurrStep <= 0 || SrcCurrStep & 15, OMX_Sts_BadArgErr)
+    
+    armRetArgErrIf(armNot16ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr)    
+    armRetArgErrIf(armNot16ByteAligned(pSrcRecBuf), OMX_Sts_BadArgErr)
+
+    for (i = 0; i < ARM_VCM4P10_MAX_FRAMES; i++)
+    {
+        armRetArgErrIf(pSrcRefBufList [i] != NULL &&
+            armNot16ByteAligned(pSrcRefBufList [i]), OMX_Sts_BadArgErr)
+            
+        /* Check if current MB needs INTER cost calculations */
+        if (pSrcRefBufList [i] != NULL && IntraFlag == 1)
+        {
+            IntraFlag = 0;
+        }
+    }
+
+    *pDstCost = ARM_VCM4P10_MAX_COST;
+    /*
+     * Inter cost calculations 
+     */
+
+     /* check this MB can be Inter */
+    if (IntraFlag != 1)
+    {
+         armVCM4P10_InterMEMB(
+             pSrcCurrBuf,   
+             SrcCurrStep,
+             pSrcRefBufList,   
+             SrcRefStep,
+             pRefRect,    
+             pCurrPointPos,
+             Lambda,
+             pMESpec,
+             pMBInter,
+             pSrcDstMBCurr,
+             &Cost
+             );
+        
+        *pDstCost = Cost;
+    }     
+
+    pMEParams = (OMXVCM4P10MEParams *)pMESpec;
+    
+    if (pMEParams->intraEnable4x4 == 1)
+    {
+        /*
+         * Intra 4x4 cost calculations
+         */
+        armVCM4P10_Intra4x4Estimation(
+            pSrcCurrBuf,   
+            SrcCurrStep,
+            pSrcRecBuf,   
+            SrcRecStep,
+            pMBIntra,
+            pSrcDstMBCurr,
+            &Cost
+            );
+
+        if (Cost <= *pDstCost)
+        {
+            *pDstCost = Cost;
+            pSrcDstMBCurr->mbType = OMX_VC_INTRA_4x4;
+
+        }
+        
+    }
+
+    /*
+     * Cost for Intra 16x16 mode
+     */
+
+    armVCM4P10_Intra16x16Estimation(
+        pSrcCurrBuf,   
+        SrcCurrStep,
+        pSrcRecBuf,   
+        SrcRecStep,
+        pCurrPointPos,
+        pMBInter,
+        pMBIntra,
+        pSrcDstMBCurr,
+        &Cost
+        );
+
+    if (Cost <= *pDstCost)
+    {
+        *pDstCost = Cost;
+        pSrcDstMBCurr->mbType = OMX_VC_INTRA_16x16;
+    }
+
+    /*
+     * Update pDstBlockSAD to max value
+     */
+	armVCM4P10_CalculateBlockSAD(	pSrcDstMBCurr, 
+        pSrcCurrBuf,                                  
+    	SrcCurrStep, 
+    	pSrcRefBufList,
+    	SrcRefStep,
+    	pSrcRecBuf, 
+    	SrcRecStep,
+    	pRefRect,
+    	pCurrPointPos,
+    	pMBInter, 
+    	pMBIntra,
+    	pDstBlockSAD);
+
+
+	return OMX_Sts_NoErr;
+}
+
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8.c
new file mode 100644
index 0000000..d6ca783
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8.c
@@ -0,0 +1,284 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_PredictIntraChroma_8x8.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 Chroma 8x8 intra prediction module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/*
+ * Description:
+ * Perform DC style intra prediction, upper block has priority
+ *
+ * Parameters:
+ * [in]	pSrcLeft		Pointer to the buffer of 16 left coefficients:
+ *								p[x, y] (x = -1, y = 0..3)
+ * [in]	pSrcAbove		Pointer to the buffer of 16 above coefficients:
+ *								p[x,y] (x = 0..3, y = -1)
+ * [in]	leftStep		Step of left coefficient buffer
+ * [in]	dstStep			Step of the destination buffer
+ * [in]	availability	Neighboring 16x16 MB availability flag
+ * [out]	pDst			Pointer to the destination buffer
+ *
+ * Return Value:
+ * None
+ */
+
+static void armVCM4P10_PredictIntraDCUp4x4(
+     const OMX_U8* pSrcLeft,
+     const OMX_U8 *pSrcAbove,
+     OMX_U8* pDst,
+     OMX_INT leftStep,
+     OMX_INT dstStep,
+     OMX_S32 availability        
+)
+{
+    int x, y, Sum=0, Count = 0;
+
+    if (availability & OMX_VC_UPPER)
+    {
+        for (x=0; x<4; x++)
+        {
+            Sum += pSrcAbove[x];
+        }
+        Count++;
+    }
+    else if (availability & OMX_VC_LEFT)
+    {
+        for (y=0; y<4; y++)
+        {
+            Sum += pSrcLeft[y*leftStep];
+        }
+        Count++;
+    }
+    if (Count==0)
+    {
+        Sum = 128;
+    }
+    else
+    {
+        Sum = (Sum + 2) >> 2;
+    }
+    for (y=0; y<4; y++)
+    {
+        for (x=0; x<4; x++)
+        {
+            pDst[y*dstStep+x] = (OMX_U8)Sum;
+        }
+    }
+}
+
+/*
+ * Description:
+ * Perform DC style intra prediction, left block has priority
+ *
+ * Parameters:
+ * [in]	pSrcLeft		Pointer to the buffer of 16 left coefficients:
+ *								p[x, y] (x = -1, y = 0..3)
+ * [in]	pSrcAbove		Pointer to the buffer of 16 above coefficients:
+ *								p[x,y] (x = 0..3, y = -1)
+ * [in]	leftStep		Step of left coefficient buffer
+ * [in]	dstStep			Step of the destination buffer
+ * [in]	availability	Neighboring 16x16 MB availability flag
+ * [out]	pDst			Pointer to the destination buffer
+ *
+ * Return Value:
+ * None
+ */
+
+static void armVCM4P10_PredictIntraDCLeft4x4(
+     const OMX_U8* pSrcLeft,
+     const OMX_U8 *pSrcAbove,
+     OMX_U8* pDst,
+     OMX_INT leftStep,
+     OMX_INT dstStep,
+     OMX_S32 availability        
+)
+{
+    int x, y, Sum=0, Count = 0;
+
+    if (availability & OMX_VC_LEFT)
+    {
+        for (y=0; y<4; y++)
+        {
+            Sum += pSrcLeft[y*leftStep];
+        }
+        Count++;
+    }
+    else if (availability & OMX_VC_UPPER)
+    {
+        for (x=0; x<4; x++)
+        {
+            Sum += pSrcAbove[x];
+        }
+        Count++;
+    }
+    if (Count==0)
+    {
+        Sum = 128;
+    }
+    else
+    {
+        Sum = (Sum + 2) >> 2;
+    }
+    for (y=0; y<4; y++)
+    {
+        for (x=0; x<4; x++)
+        {
+            pDst[y*dstStep+x] = (OMX_U8)Sum;
+        }
+    }
+}
+
+/**
+ * Function:  omxVCM4P10_PredictIntraChroma_8x8   (6.3.3.1.3)
+ *
+ * Description:
+ * Performs intra prediction for chroma samples. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcLeft - Pointer to the buffer of 8 left pixels: p[x, y] (x = -1, y= 
+ *            0..7). 
+ *   pSrcAbove - Pointer to the buffer of 8 above pixels: p[x,y] (x = 0..7, y 
+ *            = -1); must be aligned on an 8-byte boundary. 
+ *   pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) 
+ *   leftStep - Step of left pixel buffer; must be a multiple of 8. 
+ *   dstStep - Step of the destination buffer; must be a multiple of 8. 
+ *   predMode - Intra chroma prediction mode, please refer to section 3.4.3. 
+ *   availability - Neighboring chroma block availability flag, please refer 
+ *            to  "Neighboring Macroblock Availability". 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to the destination buffer; must be aligned on an 8-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If any of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pDst is NULL. 
+ *    dstStep < 8 or dstStep is not a multiple of 8. 
+ *    leftStep is not a multiple of 8. 
+ *    predMode is not in the valid range of enumeration 
+ *              OMXVCM4P10IntraChromaPredMode. 
+ *    predMode is OMX_VC_CHROMA_VERT, but availability doesn't set 
+ *              OMX_VC_UPPER indicating p[x,-1] (x = 0..7) is not available. 
+ *    predMode is OMX_VC_CHROMA_HOR, but availability doesn't set OMX_VC_LEFT 
+ *              indicating p[-1,y] (y = 0..7) is not available. 
+ *    predMode is OMX_VC_CHROMA_PLANE, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1](x = 0..7), or p[-1,y] (y = 0..7), or p[-1,-1] is not 
+ *              available. 
+ *    availability sets OMX_VC_UPPER, but pSrcAbove is NULL. 
+ *    availability sets OMX_VC_LEFT, but pSrcLeft is NULL. 
+ *    availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. 
+ *    either pSrcAbove or pDst is not aligned on a 8-byte boundary.  Note: 
+ *              pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointer if 
+ *              they are not used by intra prediction implied in predMode. 
+ *               Note: OMX_VC_UPPER_RIGHT is not used in intra chroma 
+ *              prediction. 
+ *
+ */
+OMXResult omxVCM4P10_PredictIntraChroma_8x8(
+     const OMX_U8* pSrcLeft,
+     const OMX_U8 *pSrcAbove,
+     const OMX_U8 *pSrcAboveLeft,
+     OMX_U8* pDst,
+     OMX_INT leftStep,
+     OMX_INT dstStep,
+     OMXVCM4P10IntraChromaPredMode predMode,
+     OMX_S32 availability        
+ )
+{
+    int x, y, Sum;
+    int H, V, a, b, c;
+
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(dstStep < 8,  OMX_Sts_BadArgErr);
+    armRetArgErrIf((dstStep % 8) != 0,  OMX_Sts_BadArgErr);
+    armRetArgErrIf((leftStep % 8) != 0,  OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot8ByteAligned(pSrcAbove), OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot8ByteAligned(pDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf((availability & OMX_VC_UPPER)      && pSrcAbove     == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf((availability & OMX_VC_LEFT )      && pSrcLeft      == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf((availability & OMX_VC_UPPER_LEFT) && pSrcAboveLeft == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_CHROMA_VERT  && !(availability & OMX_VC_UPPER),      OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_CHROMA_HOR   && !(availability & OMX_VC_LEFT),       OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_CHROMA_PLANE && !(availability & OMX_VC_UPPER),      OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_CHROMA_PLANE && !(availability & OMX_VC_UPPER_LEFT), OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_CHROMA_PLANE && !(availability & OMX_VC_LEFT),       OMX_Sts_BadArgErr);
+    armRetArgErrIf((unsigned)predMode > OMX_VC_CHROMA_PLANE,   OMX_Sts_BadArgErr);    
+
+    switch (predMode)
+    {
+    case OMX_VC_CHROMA_DC:
+        armVCM4P10_PredictIntraDC4x4(       pSrcLeft,            pSrcAbove,   pDst,             leftStep, dstStep, availability);
+        armVCM4P10_PredictIntraDCUp4x4(     pSrcLeft,            pSrcAbove+4, pDst+4,           leftStep, dstStep, availability);
+        armVCM4P10_PredictIntraDCLeft4x4(   pSrcLeft+4*leftStep, pSrcAbove,   pDst+4*dstStep,   leftStep, dstStep, availability);
+        armVCM4P10_PredictIntraDC4x4(       pSrcLeft+4*leftStep, pSrcAbove+4, pDst+4+4*dstStep, leftStep, dstStep, availability);
+        break;
+
+    case OMX_VC_CHROMA_HOR:
+        for (y=0; y<8; y++)
+        {
+            for (x=0; x<8; x++)
+            {
+                pDst[y*dstStep+x] = pSrcLeft[y*leftStep];
+            }
+        }
+        break;
+
+    case OMX_VC_CHROMA_VERT:
+        for (y=0; y<8; y++)
+        {
+            for (x=0; x<8; x++)
+            {
+                pDst[y*dstStep+x] = pSrcAbove[x];
+            }
+        }
+        break;
+
+    case OMX_VC_CHROMA_PLANE:
+        H = 4*(pSrcAbove[7] - pSrcAboveLeft[0]);
+        for (x=2; x>=0; x--)
+        {
+            H += (x+1)*(pSrcAbove[4+x] - pSrcAbove[2-x]);
+        }
+        V = 4*(pSrcLeft[7*leftStep] - pSrcAboveLeft[0]);
+        for (y=2; y>=0; y--)
+        {
+            V += (y+1)*(pSrcLeft[(4+y)*leftStep] - pSrcLeft[(2-y)*leftStep]);
+        }
+        a = 16*(pSrcAbove[7] + pSrcLeft[7*leftStep]);
+        b = (17*H+16)>>5;
+        c = (17*V+16)>>5;
+        for (y=0; y<8; y++)
+        {
+            for (x=0; x<8; x++)
+            {
+                Sum = (a + b*(x-3) + c*(y-3) + 16)>>5;
+                pDst[y*dstStep+x] = (OMX_U8)armClip(0,255,Sum);
+            }
+        }
+        break;
+    }
+
+    return OMX_Sts_NoErr;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16.c
new file mode 100644
index 0000000..c90cb4c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16.c
@@ -0,0 +1,198 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_PredictIntra_16x16.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 16x16 intra prediction module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCM4P10_PredictIntra_16x16   (6.3.3.1.2)
+ *
+ * Description:
+ * Perform Intra_16x16 prediction for luma samples. If the upper-right block 
+ * is not available, then duplication work should be handled inside the 
+ * function. Users need not define them outside. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcLeft - Pointer to the buffer of 16 left pixels: p[x, y] (x = -1, y = 
+ *            0..15) 
+ *   pSrcAbove - Pointer to the buffer of 16 above pixels: p[x,y] (x = 0..15, 
+ *            y= -1); must be aligned on a 16-byte boundary. 
+ *   pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) 
+ *   leftStep - Step of left pixel buffer; must be a multiple of 16. 
+ *   dstStep - Step of the destination buffer; must be a multiple of 16. 
+ *   predMode - Intra_16x16 prediction mode, please refer to section 3.4.1. 
+ *   availability - Neighboring 16x16 MB availability flag. Refer to 
+ *                  section 3.4.4. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst -Pointer to the destination buffer; must be aligned on a 16-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pDst is NULL. 
+ *    dstStep < 16. or dstStep is not a multiple of 16. 
+ *    leftStep is not a multiple of 16. 
+ *    predMode is not in the valid range of enumeration 
+ *              OMXVCM4P10Intra16x16PredMode 
+ *    predMode is OMX_VC_16X16_VERT, but availability doesn't set 
+ *              OMX_VC_UPPER indicating p[x,-1] (x = 0..15) is not available. 
+ *    predMode is OMX_VC_16X16_HOR, but availability doesn't set OMX_VC_LEFT 
+ *              indicating p[-1,y] (y = 0..15) is not available. 
+ *    predMode is OMX_VC_16X16_PLANE, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1](x = 0..15), or p[-1,y] (y = 0..15), or p[-1,-1] is not 
+ *              available. 
+ *    availability sets OMX_VC_UPPER, but pSrcAbove is NULL. 
+ *    availability sets OMX_VC_LEFT, but pSrcLeft is NULL. 
+ *    availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. 
+ *    either pSrcAbove or pDst is not aligned on a 16-byte boundary.  
+ *
+ * Note: 
+ *     pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if 
+ *     they are not used by intra prediction implied in predMode. 
+ * Note: 
+ *     OMX_VC_UPPER_RIGHT is not used in intra_16x16 luma prediction. 
+ *
+ */
+OMXResult omxVCM4P10_PredictIntra_16x16(
+    const OMX_U8* pSrcLeft, 
+    const OMX_U8 *pSrcAbove, 
+    const OMX_U8 *pSrcAboveLeft, 
+    OMX_U8* pDst, 
+    OMX_INT leftStep, 
+    OMX_INT dstStep, 
+    OMXVCM4P10Intra16x16PredMode predMode, 
+    OMX_S32 availability)
+{
+    int x,y,Sum,Count;
+    int H,V,a,b,c;
+
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(dstStep < 16,  OMX_Sts_BadArgErr);
+    armRetArgErrIf((dstStep % 16) != 0,  OMX_Sts_BadArgErr);
+    armRetArgErrIf((leftStep % 16) != 0,  OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot16ByteAligned(pSrcAbove), OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot16ByteAligned(pDst), OMX_Sts_BadArgErr);        
+    armRetArgErrIf((availability & OMX_VC_UPPER)      && pSrcAbove     == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf((availability & OMX_VC_LEFT )      && pSrcLeft      == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf((availability & OMX_VC_UPPER_LEFT) && pSrcAboveLeft == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_16X16_VERT  && !(availability & OMX_VC_UPPER),      OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_16X16_HOR   && !(availability & OMX_VC_LEFT),       OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_16X16_PLANE && !(availability & OMX_VC_UPPER),      OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_16X16_PLANE && !(availability & OMX_VC_UPPER_LEFT), OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_16X16_PLANE && !(availability & OMX_VC_LEFT),       OMX_Sts_BadArgErr);
+    armRetArgErrIf((unsigned)predMode > OMX_VC_16X16_PLANE,  OMX_Sts_BadArgErr);
+
+    switch (predMode)
+    {
+    case OMX_VC_16X16_VERT:
+        for (y=0; y<16; y++)
+        {
+            for (x=0; x<16; x++)
+            {
+                pDst[y*dstStep+x] = pSrcAbove[x];
+            }
+        }
+        break;
+
+    case OMX_VC_16X16_HOR:
+        for (y=0; y<16; y++)
+        {
+            for (x=0; x<16; x++)
+            {
+                pDst[y*dstStep+x] = pSrcLeft[y*leftStep];
+            }
+        }
+        break;
+
+    case OMX_VC_16X16_DC:
+        /* This can always be used even if no blocks available */
+        Sum = 0;
+        Count = 0;
+        if (availability & OMX_VC_LEFT)
+        {
+            for (y=0; y<16; y++)
+            {
+                Sum += pSrcLeft[y*leftStep];
+            }
+            Count++;
+        }
+        if (availability & OMX_VC_UPPER)
+        {
+            for (x=0; x<16; x++)
+            {
+                Sum += pSrcAbove[x];
+            }
+            Count++;
+        }
+        if (Count==0)
+        {
+            Sum = 128;
+        }
+        else if (Count==1)
+        {
+            Sum = (Sum + 8) >> 4;
+        }
+        else /* Count = 2 */
+        {
+            Sum = (Sum + 16) >> 5;
+        }
+        for (y=0; y<16; y++)
+        {
+            for (x=0; x<16; x++)
+            {
+                pDst[y*dstStep+x] = (OMX_U8)Sum;
+            }
+        }
+        break;
+
+    case OMX_VC_16X16_PLANE:
+        H = 8*(pSrcAbove[15] - pSrcAboveLeft[0]);
+        for (x=6; x>=0; x--)
+        {
+            H += (x+1)*(pSrcAbove[8+x] - pSrcAbove[6-x]);
+        }
+        V = 8*(pSrcLeft[15*leftStep] - pSrcAboveLeft[0]);
+        for (y=6; y>=0; y--)
+        {
+            V += (y+1)*(pSrcLeft[(8+y)*leftStep] - pSrcLeft[(6-y)*leftStep]);
+        }
+        a = 16*(pSrcAbove[15] + pSrcLeft[15*leftStep]);
+        b = (5*H+32)>>6;
+        c = (5*V+32)>>6;
+        for (y=0; y<16; y++)
+        {
+            for (x=0; x<16; x++)
+            {
+                Sum = (a + b*(x-7) + c*(y-7) + 16)>>5;
+                pDst[y*dstStep+x] = (OMX_U8)armClip(0,255,Sum);
+            }
+        }
+        break;
+    }
+
+    return OMX_Sts_NoErr;
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4.c
new file mode 100644
index 0000000..3fa8212
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4.c
@@ -0,0 +1,338 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_PredictIntra_4x4.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 4x4 intra prediction module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCM4P10_PredictIntra_4x4   (6.3.3.1.1)
+ *
+ * Description:
+ * Perform Intra_4x4 prediction for luma samples. If the upper-right block is 
+ * not available, then duplication work should be handled inside the function. 
+ * Users need not define them outside. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcLeft -  Pointer to the buffer of 4 left pixels: 
+ *                  p[x, y] (x = -1, y = 0..3) 
+ *   pSrcAbove - Pointer to the buffer of 8 above pixels: 
+ *                  p[x,y] (x = 0..7, y =-1); 
+ *               must be aligned on a 4-byte boundary. 
+ *   pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) 
+ *   leftStep - Step of left pixel buffer; must be a multiple of 4. 
+ *   dstStep - Step of the destination buffer; must be a multiple of 4. 
+ *   predMode - Intra_4x4 prediction mode. 
+ *   availability - Neighboring 4x4 block availability flag, refer to 
+ *             "Neighboring Macroblock Availability" . 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - Pointer to the destination buffer; must be aligned on a 4-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    If the function runs without error, it returns OMX_Sts_NoErr. 
+ *    If one of the following cases occurs, the function returns 
+ *              OMX_Sts_BadArgErr: 
+ *    pDst is NULL. 
+ *    dstStep < 4, or dstStep is not a multiple of 4. 
+ *    leftStep is not a multiple of 4. 
+ *    predMode is not in the valid range of enumeration 
+ *              OMXVCM4P10Intra4x4PredMode. 
+ *    predMode is OMX_VC_4x4_VERT, but availability doesn't set OMX_VC_UPPER 
+ *              indicating p[x,-1] (x = 0..3) is not available. 
+ *    predMode is OMX_VC_4x4_HOR, but availability doesn't set OMX_VC_LEFT 
+ *              indicating p[-1,y] (y = 0..3) is not available. 
+ *    predMode is OMX_VC_4x4_DIAG_DL, but availability doesn't set 
+ *              OMX_VC_UPPER indicating p[x, 1] (x = 0..3) is not available. 
+ *    predMode is OMX_VC_4x4_DIAG_DR, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not 
+ *              available. 
+ *    predMode is OMX_VC_4x4_VR, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not 
+ *              available. 
+ *    predMode is OMX_VC_4x4_HD, but availability doesn't set 
+ *              OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating 
+ *              p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not 
+ *              available. 
+ *    predMode is OMX_VC_4x4_VL, but availability doesn't set OMX_VC_UPPER 
+ *              indicating p[x,-1] (x = 0..3) is not available. 
+ *    predMode is OMX_VC_4x4_HU, but availability doesn't set OMX_VC_LEFT 
+ *              indicating p[-1,y] (y = 0..3) is not available. 
+ *    availability sets OMX_VC_UPPER, but pSrcAbove is NULL. 
+ *    availability sets OMX_VC_LEFT, but pSrcLeft is NULL. 
+ *    availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. 
+ *    either pSrcAbove or pDst is not aligned on a 4-byte boundary.  
+ *
+ * Note: 
+ *     pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if 
+ *     they are not used by intra prediction as implied in predMode. 
+ *
+ */
+
+OMXResult omxVCM4P10_PredictIntra_4x4(
+     const OMX_U8* pSrcLeft,
+     const OMX_U8 *pSrcAbove,
+     const OMX_U8 *pSrcAboveLeft,
+     OMX_U8* pDst,
+     OMX_INT leftStep,
+     OMX_INT dstStep,
+     OMXVCM4P10Intra4x4PredMode predMode,
+     OMX_S32 availability        
+ )
+{
+    int x, y;
+    OMX_U8 pTmp[10];
+
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf((leftStep % 4) != 0,  OMX_Sts_BadArgErr);
+    armRetArgErrIf((dstStep % 4) != 0,  OMX_Sts_BadArgErr);
+    armRetArgErrIf((dstStep < 4),  OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pSrcAbove), OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pDst), OMX_Sts_BadArgErr);    
+    armRetArgErrIf((availability & OMX_VC_UPPER)      && pSrcAbove     == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf((availability & OMX_VC_LEFT )      && pSrcLeft      == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf((availability & OMX_VC_UPPER_LEFT) && pSrcAboveLeft == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_4X4_VERT    && !(availability & OMX_VC_UPPER),      OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_4X4_HOR     && !(availability & OMX_VC_LEFT),       OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_4X4_DIAG_DL && !(availability & OMX_VC_UPPER),      OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_4X4_DIAG_DR && !(availability & OMX_VC_UPPER),      OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_4X4_DIAG_DR && !(availability & OMX_VC_UPPER_LEFT), OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_4X4_DIAG_DR && !(availability & OMX_VC_LEFT),       OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_4X4_VR      && !(availability & OMX_VC_UPPER),      OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_4X4_VR      && !(availability & OMX_VC_UPPER_LEFT), OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_4X4_VR      && !(availability & OMX_VC_LEFT),       OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_4X4_HD      && !(availability & OMX_VC_UPPER),      OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_4X4_HD      && !(availability & OMX_VC_UPPER_LEFT), OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_4X4_HD      && !(availability & OMX_VC_LEFT),       OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_4X4_VL      && !(availability & OMX_VC_UPPER),      OMX_Sts_BadArgErr);
+    armRetArgErrIf(predMode==OMX_VC_4X4_HU      && !(availability & OMX_VC_LEFT),       OMX_Sts_BadArgErr);
+    armRetArgErrIf((unsigned)predMode > OMX_VC_4X4_HU,   OMX_Sts_BadArgErr);    
+    
+    /* Note: This code must not read the pSrc arrays unless the corresponding
+     * block is marked as available. If the block is not avaibable then pSrc
+     * may not be a valid pointer.
+     *
+     * Note: To make the code more readable we refer to the neighbouring pixels
+     * in variables named as below:
+     *
+     *    UL U0 U1 U2 U3 U4 U5 U6 U7
+     *    L0 xx xx xx xx
+     *    L1 xx xx xx xx
+     *    L2 xx xx xx xx
+     *    L3 xx xx xx xx
+     */
+     
+#define UL pSrcAboveLeft[0]
+#define U0 pSrcAbove[0]
+#define U1 pSrcAbove[1]
+#define U2 pSrcAbove[2]
+#define U3 pSrcAbove[3]
+#define U4 pSrcAbove[4]
+#define U5 pSrcAbove[5]
+#define U6 pSrcAbove[6]
+#define U7 pSrcAbove[7]
+#define L0 pSrcLeft[0*leftStep]
+#define L1 pSrcLeft[1*leftStep]
+#define L2 pSrcLeft[2*leftStep]
+#define L3 pSrcLeft[3*leftStep]
+
+    switch (predMode)
+    {
+    case OMX_VC_4X4_VERT:
+        for (y=0; y<4; y++)
+        {
+            pDst[y*dstStep+0] = U0;
+            pDst[y*dstStep+1] = U1;
+            pDst[y*dstStep+2] = U2;
+            pDst[y*dstStep+3] = U3;
+        }
+        break;
+
+    case OMX_VC_4X4_HOR:
+        for (x=0; x<4; x++)
+        {
+            pDst[0*dstStep+x] = L0;
+            pDst[1*dstStep+x] = L1;
+            pDst[2*dstStep+x] = L2;
+            pDst[3*dstStep+x] = L3;
+        }
+        break;
+    
+    case OMX_VC_4X4_DC:
+        /* This can always be used even if no blocks available */
+        armVCM4P10_PredictIntraDC4x4(pSrcLeft, pSrcAbove, pDst, leftStep, dstStep, availability);
+        break;
+        
+    case OMX_VC_4X4_DIAG_DL:
+        pTmp[0] = (OMX_U8)((U0 + 2*U1 + U2 + 2)>>2);
+        pTmp[1] = (OMX_U8)((U1 + 2*U2 + U3 + 2)>>2);
+        if (availability & OMX_VC_UPPER_RIGHT)
+        {
+            pTmp[2] = (OMX_U8)((U2 + 2*U3 + U4 + 2)>>2);
+            pTmp[3] = (OMX_U8)((U3 + 2*U4 + U5 + 2)>>2);
+            pTmp[4] = (OMX_U8)((U4 + 2*U5 + U6 + 2)>>2);
+            pTmp[5] = (OMX_U8)((U5 + 2*U6 + U7 + 2)>>2);
+            pTmp[6] = (OMX_U8)((U6 + 3*U7      + 2)>>2);
+        }
+        else
+        {
+            pTmp[2] = (OMX_U8)((U2 + 3*U3      + 2)>>2);
+            pTmp[3] = U3;
+            pTmp[4] = U3;
+            pTmp[5] = U3;
+            pTmp[6] = U3;
+        }
+        for (y=0; y<4; y++)
+        {
+            for (x=0; x<4; x++)
+            {
+                pDst[y*dstStep+x] = pTmp[x+y];
+            }
+        }
+        break;
+
+    case OMX_VC_4X4_DIAG_DR:        
+        /* x-y = -3, -2, -1, 0, 1, 2, 3 */
+        pTmp[0] = (OMX_U8)((L1 + 2*L2 + L3 + 2)>>2);
+        pTmp[1] = (OMX_U8)((L0 + 2*L1 + L2 + 2)>>2);
+        pTmp[2] = (OMX_U8)((UL + 2*L0 + L1 + 2)>>2);
+        pTmp[3] = (OMX_U8)((U0 + 2*UL + L0 + 2)>>2);
+        pTmp[4] = (OMX_U8)((U1 + 2*U0 + UL + 2)>>2);
+        pTmp[5] = (OMX_U8)((U2 + 2*U1 + U0 + 2)>>2);
+        pTmp[6] = (OMX_U8)((U3 + 2*U2 + U1 + 2)>>2);
+        for (y=0; y<4; y++)
+        {
+            for (x=0; x<4; x++)
+            {
+                pDst[y*dstStep+x] = pTmp[3+x-y];
+            }
+        }
+        break;
+
+    case OMX_VC_4X4_VR:
+        /* zVR=2x-y = -3, -2, -1, 0, 1, 2, 3, 4, 5, 6
+         * x-(y>>1) = -1, -1,  0, 0, 1, 1, 2, 2, 3, 3
+         * y        =  3,  2,  ?, ?, ?, ?, ?, ?, 1, 0
+         */
+        pTmp[0] = (OMX_U8)((L2 + 2*L1 + L0 + 2)>>2);
+        pTmp[1] = (OMX_U8)((L1 + 2*L0 + UL + 2)>>2);
+        pTmp[2] = (OMX_U8)((L0 + 2*UL + U0 + 2)>>2);
+        pTmp[3] = (OMX_U8)((UL + U0 + 1)>>1);
+        pTmp[4] = (OMX_U8)((UL + 2*U0 + U1 + 2)>>2);
+        pTmp[5] = (OMX_U8)((U0 + U1 + 1)>>1);
+        pTmp[6] = (OMX_U8)((U0 + 2*U1 + U2 + 2)>>2);
+        pTmp[7] = (OMX_U8)((U1 + U2 + 1)>>1);
+        pTmp[8] = (OMX_U8)((U1 + 2*U2 + U3 + 2)>>2);
+        pTmp[9] = (OMX_U8)((U2 + U3 + 1)>>1);
+        for (y=0; y<4; y++)
+        {
+            for (x=0; x<4; x++)
+            {
+                pDst[y*dstStep+x] = pTmp[3+2*x-y];
+            }
+        }
+        break;
+
+    case OMX_VC_4X4_HD:
+        /* zHD=2y-x = -3 -2 -1  0  1  2  3  4  5  6
+         * y-(x>>1) = -1 -1  0  0  1  1  2  2  3  3
+         * x        =  3  2                    1  0
+         */
+        pTmp[0] = (OMX_U8)((U2 + 2*U1 + U0 + 2)>>2);
+        pTmp[1] = (OMX_U8)((U1 + 2*U0 + UL + 2)>>2);
+        pTmp[2] = (OMX_U8)((U0 + 2*UL + L0 + 2)>>2);
+        pTmp[3] = (OMX_U8)((UL + L0 + 1)>>1);
+        pTmp[4] = (OMX_U8)((UL + 2*L0 + L1 + 2)>>2);
+        pTmp[5] = (OMX_U8)((L0 + L1 + 1)>>1);
+        pTmp[6] = (OMX_U8)((L0 + 2*L1 + L2 + 2)>>2);
+        pTmp[7] = (OMX_U8)((L1 + L2 + 1)>>1);
+        pTmp[8] = (OMX_U8)((L1 + 2*L2 + L3 + 2)>>2);
+        pTmp[9] = (OMX_U8)((L2 + L3 + 1)>>1);
+        for (y=0; y<4; y++)
+        {
+            for (x=0; x<4; x++)
+            {
+                pDst[y*dstStep+x] = pTmp[3+2*y-x];
+            }
+        }
+        break;
+
+    case OMX_VC_4X4_VL:
+        /* Note: x+(y>>1) = (2*x+y)>>1
+         * 2x+y = 0 1 2 3 4 5 6 7 8 9
+         */
+        pTmp[0] = (OMX_U8)((U0 + U1 + 1)>>1);
+        pTmp[1] = (OMX_U8)((U0 + 2*U1 + U2 + 2)>>2);
+        pTmp[2] = (OMX_U8)((U1 + U2 + 1)>>1);
+        pTmp[3] = (OMX_U8)((U1 + 2*U2 + U3 + 2)>>2);
+        pTmp[4] = (OMX_U8)((U2 + U3 + 1)>>1);
+        if (availability & OMX_VC_UPPER_RIGHT)
+        {
+            pTmp[5] = (OMX_U8)((U2 + 2*U3 + U4 + 2)>>2);
+            pTmp[6] = (OMX_U8)((U3 + U4 + 1)>>1);
+            pTmp[7] = (OMX_U8)((U3 + 2*U4 + U5 + 2)>>2);
+            pTmp[8] = (OMX_U8)((U4 + U5 + 1)>>1);
+            pTmp[9] = (OMX_U8)((U4 + 2*U5 + U6 + 2)>>2);
+        }
+        else
+        {
+            pTmp[5] = (OMX_U8)((U2 + 3*U3 + 2)>>2);
+            pTmp[6] = U3;
+            pTmp[7] = U3;
+            pTmp[8] = U3;
+            pTmp[9] = U3;
+        }
+        for (y=0; y<4; y++)
+        {
+            for (x=0; x<4; x++)
+            {
+                pDst[y*dstStep+x] = pTmp[2*x+y];
+            }
+        }
+        break;
+
+    case OMX_VC_4X4_HU:
+        /* zHU = x+2*y */
+        pTmp[0] = (OMX_U8)((L0 + L1 + 1)>>1);
+        pTmp[1] = (OMX_U8)((L0 + 2*L1 + L2 + 2)>>2);
+        pTmp[2] = (OMX_U8)((L1 + L2 + 1)>>1);
+        pTmp[3] = (OMX_U8)((L1 + 2*L2 + L3 + 2)>>2);
+        pTmp[4] = (OMX_U8)((L2 + L3 + 1)>>1);
+        pTmp[5] = (OMX_U8)((L2 + 3*L3 + 2)>>2);
+        pTmp[6] = L3;
+        pTmp[7] = L3;
+        pTmp[8] = L3;
+        pTmp[9] = L3;
+        for (y=0; y<4; y++)
+        {
+            for (x=0; x<4; x++)
+            {
+                pDst[y*dstStep+x] = pTmp[x+2*y];
+            }
+        }
+        break;
+    }
+
+    return OMX_Sts_NoErr;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_16x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_16x.c
new file mode 100644
index 0000000..c8114ee
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_16x.c
@@ -0,0 +1,86 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_SADQuar_16x.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate SAD of pSrc with average of two Ref blocks
+ * of 16x16 or 16x8
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P10_SADQuar_16x   (6.3.5.4.4)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average 
+ * of the other two (pSrcRef0 and pSrcRef1) for 16x16 or 16x8 blocks.  
+ * Rounding is applied according to the convention (a+b+1)>>1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the original block; must be aligned on a 16-byte 
+ *            boundary. 
+ *   pSrcRef0 - Pointer to reference block 0 
+ *   pSrcRef1 - Pointer to reference block 1 
+ *   iSrcStep - Step of the original block buffer; must be a multiple of 16 
+ *   iRefStep0 - Step of reference block 0 
+ *   iRefStep1 - Step of reference block 1 
+ *   iHeight - Height of the block; must be equal to either 8 or 16 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD -Pointer of result SAD 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    iHeight is not equal to either 8 or 16. 
+ *    -    One of more of the following pointers is NULL: pSrc, pSrcRef0, 
+ *              pSrcRef1, pDstSAD. 
+ *    -    iSrcStep is not a multiple of 16 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_16x(
+	const OMX_U8* 	pSrc,
+    const OMX_U8* 	pSrcRef0,
+	const OMX_U8* 	pSrcRef1,	
+    OMX_U32 	iSrcStep,
+    OMX_U32		iRefStep0,
+    OMX_U32		iRefStep1,
+    OMX_U32*	pDstSAD,
+    OMX_U32     iHeight
+)
+{
+    /* check for argument error */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pSrcRef0 == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pSrcRef1 == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf((iHeight != 16) && (iHeight != 8), OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot16ByteAligned(pSrc), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iSrcStep == 0) || (iSrcStep & 15), OMX_Sts_BadArgErr)
+    
+
+    return armVCM4P10_SADQuar
+        (pSrc, pSrcRef0, pSrcRef1, iSrcStep, 
+        iRefStep0, iRefStep1, pDstSAD, iHeight, 16);
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_4x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_4x.c
new file mode 100644
index 0000000..4b330ba
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_4x.c
@@ -0,0 +1,85 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_SADQuar_4x.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate SAD of pSrc with average of two Ref blocks
+ * of 4x8 or 4x4 blocks
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P10_SADQuar_4x   (6.3.5.4.2)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average 
+ * of the other two (pSrcRef0 and pSrcRef1) for 4x8 or 4x4 blocks.  Rounding 
+ * is applied according to the convention (a+b+1)>>1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the original block; must be aligned on a 4-byte 
+ *            boundary. 
+ *   pSrcRef0 - Pointer to reference block 0 
+ *   pSrcRef1 - Pointer to reference block 1 
+ *   iSrcStep - Step of the original block buffer; must be a multiple of 4. 
+ *   iRefStep0 - Step of reference block 0 
+ *   iRefStep1 - Step of reference block 1 
+ *   iHeight - Height of the block; must be equal to either 4 or 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD - Pointer of result SAD 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    iHeight is not equal to either 4 or 8. 
+ *    -    One of more of the following pointers is NULL: pSrc, pSrcRef0, 
+ *              pSrcRef1, pDstSAD. 
+ *    -    iSrcStep is not a multiple of 4 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_4x( 
+	  const OMX_U8* 	pSrc,
+      const OMX_U8* 	pSrcRef0,
+	  const OMX_U8* 	pSrcRef1,	
+      OMX_U32 	iSrcStep,
+      OMX_U32	iRefStep0,
+      OMX_U32	iRefStep1,
+      OMX_U32*	pDstSAD,
+      OMX_U32   iHeight
+)
+{
+    /* check for argument error */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcRef0 == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcRef1 == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf((iHeight != 8) && (iHeight != 4), OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pSrc), OMX_Sts_BadArgErr);
+    armRetArgErrIf((iSrcStep == 0) || (iSrcStep & 3), OMX_Sts_BadArgErr);
+    
+    return armVCM4P10_SADQuar
+        (pSrc, pSrcRef0, pSrcRef1, iSrcStep, 
+        iRefStep0, iRefStep1, pDstSAD, iHeight, 4);
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_8x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_8x.c
new file mode 100644
index 0000000..c9e9c24
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_8x.c
@@ -0,0 +1,87 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_SADQuar_8x.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate SAD of pSrc with average of two Ref blocks
+ * of 8x16 or 8x8 or 8x4
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P10_SADQuar_8x   (6.3.5.4.3)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average 
+ * of the other two (pSrcRef0 and pSrcRef1) for 8x16, 8x8, or 8x4 blocks.  
+ * Rounding is applied according to the convention (a+b+1)>>1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - Pointer to the original block; must be aligned on an 8-byte 
+ *            boundary. 
+ *   pSrcRef0 - Pointer to reference block 0 
+ *   pSrcRef1 - Pointer to reference block 1 
+ *   iSrcStep - Step of the original block buffer; must be a multiple of 8. 
+ *   iRefStep0 - Step of reference block 0 
+ *   iRefStep1 - Step of reference block 1 
+ *   iHeight - Height of the block; must be equal either 4, 8, or 16. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD - Pointer of result SAD 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    iHeight is not equal to either 4, 8, or 16. 
+ *    -    One of more of the following pointers is NULL: pSrc, pSrcRef0, 
+ *              pSrcRef1, pDstSAD. 
+ *    -    iSrcStep is not a multiple of 8 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_8x( 
+	const OMX_U8* 	pSrc,
+    const OMX_U8* 	pSrcRef0,
+	const OMX_U8* 	pSrcRef1,	
+    OMX_U32 	iSrcStep,
+    OMX_U32		iRefStep0,
+    OMX_U32		iRefStep1,
+    OMX_U32*	pDstSAD,
+    OMX_U32     iHeight
+)
+{
+    /* check for argument error */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pSrcRef0 == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pSrcRef1 == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf((iHeight != 16) && (iHeight != 8) && 
+        (iHeight != 4), OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot8ByteAligned(pSrc), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iSrcStep == 0) || (iSrcStep & 7), OMX_Sts_BadArgErr)
+    
+
+    return armVCM4P10_SADQuar
+        (pSrc, pSrcRef0, pSrcRef1, iSrcStep, 
+        iRefStep0, iRefStep1, pDstSAD, iHeight, 8);
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SAD_4x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SAD_4x.c
new file mode 100644
index 0000000..927c454
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SAD_4x.c
@@ -0,0 +1,77 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_SAD_4x.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate SAD for 4x8 and 4x4 blocks
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P10_SAD_4x   (6.3.5.4.1)
+ *
+ * Description:
+ * This function calculates the SAD for 4x8 and 4x4 blocks. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg -Pointer to the original block; must be aligned on a 4-byte 
+ *            boundary. 
+ *   iStepOrg -Step of the original block buffer; must be a multiple of 4. 
+ *   pSrcRef -Pointer to the reference block 
+ *   iStepRef -Step of the reference block buffer 
+ *   iHeight -Height of the block; must be equal to either 4 or 8. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD -Pointer of result SAD 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    One of more of the following pointers is NULL: 
+ *         pSrcOrg, pSrcRef, or pDstSAD 
+ *    -    iHeight is not equal to either 4 or 8. 
+ *    -    iStepOrg is not a multiple of 4 
+ *    -    Any alignment restrictions are violated 
+ *
+ */
+OMXResult omxVCM4P10_SAD_4x(	
+	const OMX_U8* 	pSrcOrg,
+	OMX_U32 	iStepOrg,
+	const OMX_U8* 	pSrcRef,
+	OMX_U32 	iStepRef,
+	OMX_S32*	pDstSAD,
+	OMX_U32		iHeight
+)
+{
+    /* check for argument error */
+    armRetArgErrIf(pSrcOrg == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf((iHeight != 8) && (iHeight != 4), OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot4ByteAligned(pSrcOrg), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iStepOrg == 0) || (iStepOrg & 3), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iStepRef == 0) || (iStepRef & 3), OMX_Sts_BadArgErr)
+
+    return armVCCOMM_SAD 
+        (pSrcOrg, iStepOrg, pSrcRef, iStepRef, pDstSAD, iHeight, 4);
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SATD_4x4.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SATD_4x4.c
new file mode 100644
index 0000000..a91ae66
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SATD_4x4.c
@@ -0,0 +1,132 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_SATD_4x4.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate SAD for 4x4 blocks
+ * 
+ */
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P10_SATD_4x4   (6.3.5.4.5)
+ *
+ * Description:
+ * This function calculates the sum of absolute transform differences (SATD) 
+ * for a 4x4 block by applying a Hadamard transform to the difference block 
+ * and then calculating the sum of absolute coefficient values. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg - Pointer to the original block; must be aligned on a 4-byte 
+ *            boundary 
+ *   iStepOrg - Step of the original block buffer; must be a multiple of 4 
+ *   pSrcRef - Pointer to the reference block; must be aligned on a 4-byte 
+ *            boundary 
+ *   iStepRef - Step of the reference block buffer; must be a multiple of 4 
+ *
+ * Output Arguments:
+ *   
+ *   pDstSAD - pointer to the resulting SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcOrg, pSrcRef, or pDstSAD either pSrcOrg 
+ *    -    pSrcRef is not aligned on a 4-byte boundary 
+ *    -    iStepOrg <= 0 or iStepOrg is not a multiple of 4 
+ *    -    iStepRef <= 0 or iStepRef is not a multiple of 4 
+ *
+ */
+OMXResult omxVCM4P10_SATD_4x4( 
+	const OMX_U8*		pSrcOrg,
+	OMX_U32     iStepOrg,                         
+	const OMX_U8*		pSrcRef,
+	OMX_U32		iStepRef,
+	OMX_U32*    pDstSAD
+)
+{
+    OMX_INT     i, j;
+    OMX_S32     SATD = 0;
+    OMX_S32     d [4][4], m1[4][4], m2[4][4];
+
+    /* check for argument error */
+    armRetArgErrIf(pSrcOrg == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf((iStepOrg == 0) || (iStepOrg & 3), OMX_Sts_BadArgErr)
+    armRetArgErrIf((iStepRef == 0) || (iStepRef & 3), OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot4ByteAligned(pSrcOrg), OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot4ByteAligned(pSrcRef), OMX_Sts_BadArgErr)
+
+    /* Calculate the difference */
+    for (j = 0; j < 4; j++)
+    {
+        for (i = 0; i < 4; i++)
+        {
+            d [j][i] = pSrcOrg [j * iStepOrg + i] - pSrcRef [j * iStepRef + i];
+        }
+    }
+
+    /* Hadamard Transfor for 4x4 block */
+
+    /* Horizontal */
+    for (i = 0; i < 4; i++)
+    {
+        m1[i][0] = d[i][0] + d[i][2]; /* a+c */
+        m1[i][1] = d[i][1] + d[i][3]; /* b+d */
+        m1[i][2] = d[i][0] - d[i][2]; /* a-c */
+        m1[i][3] = d[i][1] - d[i][3]; /* b-d */
+
+        m2[i][0] = m1[i][0] + m1[i][1]; /* a+b+c+d */
+        m2[i][1] = m1[i][2] + m1[i][3]; /* a+b-c-d */
+        m2[i][2] = m1[i][2] - m1[i][3]; /* a-b-c+d */
+        m2[i][3] = m1[i][0] - m1[i][1]; /* a-b+c-d */
+
+    }
+
+    /* Vertical */
+    for (i = 0; i < 4; i++)
+    {
+        m1[0][i] = m2[0][i] + m2[2][i];
+        m1[1][i] = m2[1][i] + m2[3][i];
+        m1[2][i] = m2[0][i] - m2[2][i];
+        m1[3][i] = m2[1][i] - m2[3][i];
+
+        m2[0][i] = m1[0][i] + m1[1][i];
+        m2[1][i] = m1[2][i] + m1[3][i];
+        m2[2][i] = m1[2][i] - m1[3][i];
+        m2[3][i] = m1[0][i] - m1[1][i];
+    }
+    
+    /* calculate SAD for Transformed coefficients */
+    for (j = 0; j < 4; j++)
+    {
+        for (i = 0; i < 4; i++)
+        {
+            SATD += armAbs(m2 [j][i]);
+        }
+    }
+        
+    *pDstSAD = (SATD + 1) / 2;
+
+    return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SubAndTransformQDQResidual.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SubAndTransformQDQResidual.c
new file mode 100644
index 0000000..23a5662
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SubAndTransformQDQResidual.c
@@ -0,0 +1,220 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_SubAndTransformQDQResidual.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate SAD for 4x4 blocks
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCM4P10_SubAndTransformQDQResidual   (6.3.5.8.1)
+ *
+ * Description:
+ * This function subtracts the prediction signal from the original signal to 
+ * produce the difference signal and then performs a 4x4 integer transform and 
+ * quantization. The quantized transformed coefficients are stored as 
+ * pDstQuantCoeff. This function can also output dequantized coefficients or 
+ * unquantized DC coefficients optionally by setting the pointers 
+ * pDstDeQuantCoeff, pDCCoeff. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcOrg - Pointer to original signal. 4-byte alignment required. 
+ *   pSrcPred - Pointer to prediction signal. 4-byte alignment required. 
+ *   iSrcOrgStep - Step of the original signal buffer; must be a multiple of 
+ *            4. 
+ *   iSrcPredStep - Step of the prediction signal buffer; must be a multiple 
+ *            of 4. 
+ *   pNumCoeff -Number of non-zero coefficients after quantization. If this 
+ *            parameter is not required, it is set to NULL. 
+ *   nThreshSAD - Zero-block early detection threshold. If this parameter is 
+ *            not required, it is set to 0. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *   bIntra - Indicates whether this is an INTRA block, either 1-INTRA or 
+ *            0-INTER 
+ *
+ * Output Arguments:
+ *   
+ *   pDstQuantCoeff - Pointer to the quantized transformed coefficients.  
+ *            8-byte alignment required. 
+ *   pDstDeQuantCoeff - Pointer to the dequantized transformed coefficients 
+ *            if this parameter is not equal to NULL.  8-byte alignment 
+ *            required. 
+ *   pDCCoeff - Pointer to the unquantized DC coefficient if this parameter 
+ *            is not equal to NULL. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *            pSrcOrg, pSrcPred, pNumCoeff, pDstQuantCoeff, 
+ *            pDstDeQuantCoeff, pDCCoeff 
+ *    -    pSrcOrg is not aligned on a 4-byte boundary 
+ *    -    pSrcPred is not aligned on a 4-byte boundary 
+ *    -    iSrcOrgStep is not a multiple of 4 
+ *    -    iSrcPredStep is not a multiple of 4 
+ *    -    pDstQuantCoeff or pDstDeQuantCoeff is not aligned on an 8-byte boundary 
+ *
+ */
+ OMXResult omxVCM4P10_SubAndTransformQDQResidual (
+	 const OMX_U8*		pSrcOrg,
+	 const OMX_U8*		pSrcPred,
+	 OMX_U32		iSrcOrgStep,
+	 OMX_U32		iSrcPredStep,
+	 OMX_S16*	    pDstQuantCoeff,
+	 OMX_S16* 	    pDstDeQuantCoeff,
+	 OMX_S16*	    pDCCoeff,
+	 OMX_S8*		pNumCoeff,
+	 OMX_U32		nThreshSAD,
+	 OMX_U32		iQP,
+	 OMX_U8		    bIntra
+)
+{
+    OMX_INT     i, j;
+    OMX_S8      NumCoeff = 0;
+    OMX_S16     Buf[16], m[16];
+    OMX_U32     QBits, QPper, QPmod, f;
+    OMX_S32     Value, MF, ThreshDC;
+
+    /* check for argument error */
+    armRetArgErrIf(pSrcOrg == NULL, OMX_Sts_BadArgErr)
+	armRetArgErrIf(pDstDeQuantCoeff == NULL, OMX_Sts_BadArgErr)
+	armRetArgErrIf(pNumCoeff == NULL, OMX_Sts_BadArgErr)
+	armRetArgErrIf(pDCCoeff == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot4ByteAligned(pSrcOrg), OMX_Sts_BadArgErr)
+    armRetArgErrIf(pSrcPred == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot4ByteAligned(pSrcPred), OMX_Sts_BadArgErr)
+    armRetArgErrIf(pDstQuantCoeff == NULL, OMX_Sts_BadArgErr)
+    armRetArgErrIf(armNot8ByteAligned(pDstQuantCoeff), OMX_Sts_BadArgErr)
+    armRetArgErrIf((pDstDeQuantCoeff != NULL) && 
+			armNot8ByteAligned(pDstDeQuantCoeff), OMX_Sts_BadArgErr)
+    armRetArgErrIf((bIntra != 0) && (bIntra != 1), OMX_Sts_BadArgErr)
+    armRetArgErrIf(iQP > 51, OMX_Sts_BadArgErr)
+    armRetArgErrIf(iSrcOrgStep == 0, OMX_Sts_BadArgErr)
+    armRetArgErrIf(iSrcPredStep == 0, OMX_Sts_BadArgErr)
+    armRetArgErrIf(iSrcOrgStep & 3, OMX_Sts_BadArgErr)
+    armRetArgErrIf(iSrcPredStep & 3, OMX_Sts_BadArgErr)
+
+    /* 
+     * Zero-Block Early detection using nThreshSAD param 
+     */
+
+    QPper = iQP / 6;
+    QPmod = iQP % 6;    
+    QBits = 15 + QPper;
+    
+    f = (1 << QBits) / (bIntra ? 3 : 6);
+    
+    /* Do Zero-Block Early detection if enabled */
+    if (nThreshSAD)
+    {
+        ThreshDC = ((1 << QBits) - f) / armVCM4P10_MFMatrix[QPmod][0];
+        if (nThreshSAD < ThreshDC)
+        {
+            /* Set block to zero */
+            if (pDCCoeff != NULL)
+            {
+                *pDCCoeff = 0;
+            }
+
+            for (j = 0; j < 4; j++)
+            {
+                for (i = 0; i < 4; i++)
+                {
+                    pDstQuantCoeff [4 * j + i] = 0;
+                    if (pDstDeQuantCoeff != NULL)
+                    {
+                        pDstDeQuantCoeff [4 * j + i] = 0;    
+                    }                    
+                }
+            }
+
+            if (pNumCoeff != NULL)
+            {
+                *pNumCoeff = 0;
+            }
+            return OMX_Sts_NoErr;
+        }
+    }
+
+
+   /* Calculate difference */
+    for (j = 0; j < 4; j++)
+    {
+        for (i = 0; i < 4; i++)
+        {
+            Buf [j * 4 + i] = 
+                pSrcOrg [j * iSrcOrgStep + i] - pSrcPred [j * iSrcPredStep + i];
+        }
+    }
+
+    /* Residual Transform */
+    armVCM4P10_FwdTransformResidual4x4 (m, Buf);
+
+    if (pDCCoeff != NULL)
+    {
+        /* Copy unquantized DC value into pointer */
+        *pDCCoeff = m[0];
+    }
+
+    /* Quantization */
+    for (j = 0; j < 4; j++)
+    {
+        for (i = 0; i < 4; i++)
+        {
+            MF = armVCM4P10_MFMatrix[QPmod][armVCM4P10_PosToVCol4x4[j * 4 + i]];
+            Value = armAbs(m[j * 4 + i]) * MF + f;
+            Value >>= QBits;
+            Value = m[j * 4 + i] < 0 ? -Value : Value;
+            Buf[4 * j + i] = pDstQuantCoeff [4 * j + i] = (OMX_S16)Value;
+            if ((pNumCoeff != NULL) && Value)
+            {
+                NumCoeff++;
+            }
+        }
+    }
+
+    /* Output number of non-zero Coeffs */
+    if (pNumCoeff != NULL)
+    {
+        *pNumCoeff = NumCoeff;
+    }
+    
+    /* Residual Inv Transform */
+    if (pDstDeQuantCoeff != NULL)
+    {    
+        /* Re Scale */
+        for (j = 0; j < 4; j++)
+        {
+            for (i = 0; i < 4; i++)
+            {
+                m [j * 4 + i]  = Buf [j * 4 + i] * (1 << QPper) *
+                    armVCM4P10_VMatrix[QPmod][armVCM4P10_PosToVCol4x4[j * 4 + i]];
+            }
+        }
+        armVCM4P10_TransformResidual4x4 (pDstDeQuantCoeff, m);        
+    }
+        
+    return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair.c
new file mode 100644
index 0000000..9ad0e81
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair.c
@@ -0,0 +1,131 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_TransformDequantChromaDCFromPair.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 inverse quantize and transform module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/*
+ * Description:
+ * Dequantize Chroma 2x2 DC block
+ */
+
+static void DequantChromaDC2x2(
+     OMX_S16* pDst,
+     OMX_INT QP        
+)
+{
+    int Shift = (QP/6)-1 ;
+    int Scale = armVCM4P10_VMatrix[QP%6][0];
+    int i, Value;
+
+    if (Shift >= 0)
+    {
+        for (i=0; i<4; i++)
+        {
+            Value = (pDst[i] * Scale) << Shift;
+            pDst[i] = (OMX_S16)Value;
+        }
+    }
+    else
+    {
+        for (i=0; i<4; i++)
+        {
+            Value = (pDst[i] * Scale) >> 1;
+            pDst[i] = (OMX_S16)Value;
+        }
+    }
+}
+ 
+
+/*
+ * Description:
+ * Inverse Transform DC 2x2 Coefficients
+ */
+
+static void InvTransformDC2x2(OMX_S16* pData)
+{
+    int c00 = pData[0];
+    int c01 = pData[1];
+    int c10 = pData[2];
+    int c11 = pData[3];
+
+    int d00 = c00 + c01;
+    int d01 = c00 - c01;
+    int d10 = c10 + c11;
+    int d11 = c10 - c11;
+
+    pData[0] = (OMX_S16)(d00 + d10);
+    pData[1] = (OMX_S16)(d01 + d11);
+    pData[2] = (OMX_S16)(d00 - d10);
+    pData[3] = (OMX_S16)(d01 - d11);
+}
+
+
+/**
+ * Function:  omxVCM4P10_TransformDequantChromaDCFromPair   (6.3.4.2.2)
+ *
+ * Description:
+ * Reconstruct the 2x2 ChromaDC block from coefficient-position pair buffer, 
+ * perform integer inverse transformation, and dequantization for 2x2 chroma 
+ * DC coefficients, and update the pair buffer pointer to next non-empty 
+ * block. 
+ *
+ * Input Arguments:
+ *   
+ *   ppSrc - Double pointer to residual coefficient-position pair buffer 
+ *            output by CALVC decoding 
+ *   QP - Quantization parameter QpC 
+ *
+ * Output Arguments:
+ *   
+ *   ppSrc - *ppSrc is updated to the start of next non empty block 
+ *   pDst - Pointer to the reconstructed 2x2 ChromaDC coefficients buffer; 
+ *            must be aligned on a 4-byte boundary. 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    ppSrc or pDst is NULL. 
+ *    -    pDst is not 4-byte aligned. 
+ *    -    QP is not in the range of [0-51]. 
+ *
+ */
+
+OMXResult omxVCM4P10_TransformDequantChromaDCFromPair(
+     const OMX_U8 **ppSrc,
+     OMX_S16* pDst,
+     OMX_INT QP        
+ )
+{
+    armRetArgErrIf(ppSrc  == NULL,           OMX_Sts_BadArgErr);
+    armRetArgErrIf(*ppSrc == NULL,           OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDst   == NULL,           OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot4ByteAligned(pDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(QP<0,                     OMX_Sts_BadArgErr);
+    armRetArgErrIf(QP>51,                    OMX_Sts_BadArgErr);
+
+    armVCM4P10_UnpackBlock2x2(ppSrc, pDst);
+    InvTransformDC2x2(pDst);
+    DequantChromaDC2x2(pDst, QP);
+
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair.c
new file mode 100644
index 0000000..16c8be1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair.c
@@ -0,0 +1,148 @@
+/* ----------------------------------------------------------------
+ *
+ * 
+ * File Name:  omxVCM4P10_TransformDequantLumaDCFromPair.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * H.264 inverse quantize and transform module
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/*
+ * Description:
+ * Dequantize Luma DC block
+ */
+
+static void DequantLumaDC4x4(
+     OMX_S16* pDst,
+     OMX_INT QP        
+)
+{
+    int Shift = (QP/6)-2 ;
+    int Scale = armVCM4P10_VMatrix[QP%6][0];
+    int i, Round, Value;
+
+    if (Shift >= 0)
+    {
+        for (i=0; i<16; i++)
+        {
+            Value = (pDst[i] * Scale) << Shift;
+            pDst[i] = (OMX_S16)Value;
+        }
+    }
+    else
+    {
+        Shift = -Shift;;
+        Round = 1<<(Shift-1);
+
+        for (i=0; i<16; i++)
+        {
+            Value = (pDst[i] * Scale + Round) >> Shift;
+            pDst[i] = (OMX_S16)Value;
+        }
+    }
+}
+
+ 
+
+/*
+ * Description:
+ * Inverse Transform DC 4x4 Coefficients
+ */
+static void InvTransformDC4x4(OMX_S16* pData)
+{
+    int i;
+
+    /* Transform rows */
+    for (i=0; i<16; i+=4)
+    {
+        int c0 = pData[i+0];
+        int c1 = pData[i+1];
+        int c2 = pData[i+2];
+        int c3 = pData[i+3];
+        pData[i+0] = (OMX_S16)(c0+c1+c2+c3);
+        pData[i+1] = (OMX_S16)(c0+c1-c2-c3);
+        pData[i+2] = (OMX_S16)(c0-c1-c2+c3);
+        pData[i+3] = (OMX_S16)(c0-c1+c2-c3);
+    }
+
+    /* Transform columns */
+    for (i=0; i<4; i++)
+    {
+        int c0 = pData[i+0];
+        int c1 = pData[i+4];
+        int c2 = pData[i+8];
+        int c3 = pData[i+12];
+        pData[i+0] = (OMX_S16)(c0+c1+c2+c3);
+        pData[i+4] = (OMX_S16)(c0+c1-c2-c3);
+        pData[i+8] = (OMX_S16)(c0-c1-c2+c3);
+        pData[i+12] = (OMX_S16)(c0-c1+c2-c3);
+    }
+}
+
+
+/**
+ * Function:  omxVCM4P10_TransformDequantLumaDCFromPair   (6.3.4.2.1)
+ *
+ * Description:
+ * Reconstructs the 4x4 LumaDC block from the coefficient-position pair 
+ * buffer, performs integer inverse, and dequantization for 4x4 LumaDC 
+ * coefficients, and updates the pair buffer pointer to the next non-empty 
+ * block. 
+ *
+ * Input Arguments:
+ *   
+ *   ppSrc - Double pointer to residual coefficient-position pair buffer 
+ *            output by CALVC decoding 
+ *   QP - Quantization parameter QpY 
+ *
+ * Output Arguments:
+ *   
+ *   ppSrc - *ppSrc is updated to the start of next non empty block 
+ *   pDst - Pointer to the reconstructed 4x4 LumaDC coefficients buffer; must 
+ *            be aligned on a 8-byte boundary. 
+ *
+ * Return Value:
+ *    OMX_Sts_NoErr, if the function runs without error.
+ *    OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: 
+ *    -    ppSrc or pDst is NULL. 
+ *    -    pDst is not 8 byte aligned. 
+ *    -    QP is not in the range of [0-51]. 
+ *
+ */
+
+OMXResult omxVCM4P10_TransformDequantLumaDCFromPair(
+     const OMX_U8 **ppSrc,
+     OMX_S16* pDst,
+     OMX_INT QP        
+ )
+{
+    armRetArgErrIf(ppSrc  == NULL,           OMX_Sts_BadArgErr);
+    armRetArgErrIf(*ppSrc == NULL,           OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDst   == NULL,           OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot8ByteAligned(pDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(QP<0,                     OMX_Sts_BadArgErr);
+    armRetArgErrIf(QP>51,                    OMX_Sts_BadArgErr);
+
+    armVCM4P10_UnpackBlock4x4(ppSrc, pDst);
+    /*InvTransformDequantLumaDC4x4(pDst, QP);*/
+    InvTransformDC4x4(pDst);
+    DequantLumaDC4x4(pDst, QP);
+
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_ChromaDC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_ChromaDC.c
new file mode 100644
index 0000000..b5544dd
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_ChromaDC.c
@@ -0,0 +1,97 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_TransformQuant_ChromaDC.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate 4x4 hadamard transform of chroma DC  
+ * coefficients and quantization
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P10_TransformQuant_ChromaDC   (6.3.5.6.1)
+ *
+ * Description:
+ * This function performs 2x2 Hadamard transform of chroma DC coefficients 
+ * and then quantizes the coefficients. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - Pointer to the 2x2 array of chroma DC coefficients.  8-byte 
+ *            alignment required. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *   bIntra - Indicate whether this is an INTRA block. 1-INTRA, 0-INTER 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - Pointer to transformed and quantized coefficients.  8-byte 
+ *            alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: 
+ *             pSrcDst 
+ *    -    pSrcDst is not aligned on an 8-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_TransformQuant_ChromaDC(
+	OMX_S16* 	pSrcDst,
+	OMX_U32		iQP,
+	OMX_U8		bIntra
+)
+{
+    OMX_INT     i, j;
+    OMX_S32     m[2][2];
+    OMX_S32     Value;
+    OMX_S32     QbitsPlusOne, Two_f, MF00;
+
+    /* Check for argument error */
+    armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(iQP > 51, OMX_Sts_BadArgErr);
+
+    /* Hadamard Transform for 2x2 block */
+    m[0][0] = pSrcDst[0] + pSrcDst[1] +  pSrcDst[2] + pSrcDst[3];
+    m[0][1] = pSrcDst[0] - pSrcDst[1] +  pSrcDst[2] - pSrcDst[3];
+    m[1][0] = pSrcDst[0] + pSrcDst[1] -  pSrcDst[2] - pSrcDst[3];
+    m[1][1] = pSrcDst[0] - pSrcDst[1] -  pSrcDst[2] + pSrcDst[3];
+
+    /* Quantization */
+    QbitsPlusOne = ARM_M4P10_Q_OFFSET + 1 + (iQP / 6); /*floor (QP/6)*/
+    MF00 = armVCM4P10_MFMatrix [iQP % 6][0];
+
+    Two_f = (1 << QbitsPlusOne) / (bIntra ? 3 : 6); /* 3->INTRA, 6->INTER */
+
+    /* Scaling */
+    for (j = 0; j < 2; j++)
+    {
+        for (i = 0; i < 2; i++)
+        {
+            Value = (armAbs(m[j][i]) * MF00 + Two_f) >> QbitsPlusOne;
+            pSrcDst[j * 2 + i] = (OMX_S16)((m[j][i] < 0) ? -Value : Value);
+        }
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_LumaDC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_LumaDC.c
new file mode 100644
index 0000000..2ccf7f0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_LumaDC.c
@@ -0,0 +1,119 @@
+/**
+ * 
+ * File Name:  omxVCM4P10_TransformQuant_LumaDC.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * This function will calculate 4x4 hadamard transform of luma DC coefficients 
+ * and quantization
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCM4P10_TransformQuant_LumaDC   (6.3.5.6.2)
+ *
+ * Description:
+ * This function performs a 4x4 Hadamard transform of luma DC coefficients 
+ * and then quantizes the coefficients. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - Pointer to the 4x4 array of luma DC coefficients.  16-byte 
+ *            alignment required. 
+ *   iQP - Quantization parameter; must be in the range [0,51]. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - Pointer to transformed and quantized coefficients.  16-byte 
+ *             alignment required. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned if any of the following 
+ *              conditions are true: 
+ *    -    at least one of the following pointers is NULL: pSrcDst 
+ *    -    pSrcDst is not aligned on an 16-byte boundary 
+ *
+ */
+OMXResult omxVCM4P10_TransformQuant_LumaDC(
+	OMX_S16* 	pSrcDst,
+	OMX_U32		iQP
+)
+{
+    OMX_INT     i, j;
+    OMX_S32     m1[4][4], m2[4][4];
+    OMX_S32     Value;
+    OMX_U32     QbitsPlusOne, Two_f, MF;
+
+    /* Check for argument error */
+    armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(armNot16ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(iQP > 51, OMX_Sts_BadArgErr);
+
+    /* Hadamard Transform for 4x4 block */
+    /* Horizontal Hadamard */
+    for (i = 0; i < 4; i++)
+    {
+        j = i * 4;
+        
+        m1[i][0] = pSrcDst[j + 0] + pSrcDst[j + 2]; /* a+c */
+        m1[i][1] = pSrcDst[j + 1] + pSrcDst[j + 3]; /* b+d */
+        m1[i][2] = pSrcDst[j + 0] - pSrcDst[j + 2]; /* a-c */
+        m1[i][3] = pSrcDst[j + 1] - pSrcDst[j + 3]; /* b-d */
+
+        m2[i][0] = m1[i][0] + m1[i][1]; /* a+b+c+d */
+        m2[i][1] = m1[i][2] + m1[i][3]; /* a+b-c-d */
+        m2[i][2] = m1[i][2] - m1[i][3]; /* a-b-c+d */
+        m2[i][3] = m1[i][0] - m1[i][1]; /* a-b+c-d */
+
+    }
+
+    /* Vertical */
+    for (i = 0; i < 4; i++)
+    {
+        m1[0][i] = m2[0][i] + m2[2][i];
+        m1[1][i] = m2[1][i] + m2[3][i];
+        m1[2][i] = m2[0][i] - m2[2][i];
+        m1[3][i] = m2[1][i] - m2[3][i];
+
+        m2[0][i] = m1[0][i] + m1[1][i];
+        m2[1][i] = m1[2][i] + m1[3][i];
+        m2[2][i] = m1[2][i] - m1[3][i];
+        m2[3][i] = m1[0][i] - m1[1][i];
+    }
+
+    
+    /* Quantization */
+    QbitsPlusOne = ARM_M4P10_Q_OFFSET + 1 + (iQP / 6); /*floor (QP/6)*/
+    Two_f = (1 << QbitsPlusOne) / 3; /* 3->INTRA, 6->INTER */
+    MF = armVCM4P10_MFMatrix [iQP % 6][0];
+
+    /* Scaling */
+    for (j = 0; j < 4; j++)
+    {
+        for (i = 0; i < 4; i++)
+        {
+            Value = (armAbs((m2[j][i]/* + 1*/) / 2) * MF + Two_f) >> QbitsPlusOne;
+            pSrcDst[j * 4 + i] = (OMX_S16)((m2[j][i] < 0) ? -Value : Value);
+        }
+    }
+    return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ *                              END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_DCT_Table.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_DCT_Table.h
new file mode 100644
index 0000000..3255b61
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_DCT_Table.h
@@ -0,0 +1,30 @@
+/**
+ * 
+ * File Name:  armVCM4P2_DCT_Table.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ *
+ * File:        armVCM4P2_DCT_Table.h
+ * Description: Declares Tables used for DCT/IDCT module
+ *              in MP4P2 codec.
+ *
+ */
+ 
+#ifndef _OMXDCTTAB_H_
+#define _OMXDCTTAB_H_
+
+extern const OMX_F64 armVCM4P2_preCalcDCTCos[8][8];
+
+#endif /* _OMXDCTTAB_H_ */
+
+
+/* End of file */
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
new file mode 100644
index 0000000..92ecc05
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
@@ -0,0 +1,42 @@
+/**
+ * 
+ * File Name:  armVCM4P2_Huff_Tables_VLC.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ *
+ * File:        armVCM4P2_Huff_Tables.h
+ * Description: Declares Tables used for Hufffman coding and decoding 
+ *              in MP4P2 codec.
+ *
+ */
+ 
+#ifndef _OMXHUFFTAB_H_
+#define _OMXHUFFTAB_H_
+
+extern const OMX_U8 armVCM4P2_IntraL0RunIdx[11];
+extern const ARM_VLC32 armVCM4P2_IntraVlcL0[68];
+extern const OMX_U8 armVCM4P2_IntraL1RunIdx[7];
+extern const ARM_VLC32 armVCM4P2_IntraVlcL1[36];
+extern const OMX_U8 armVCM4P2_IntraL0LMAX[15];
+extern const OMX_U8 armVCM4P2_IntraL1LMAX[21];
+extern const OMX_U8 armVCM4P2_IntraL0RMAX[27];
+extern const OMX_U8 armVCM4P2_IntraL1RMAX[8];
+extern const OMX_U8 armVCM4P2_InterL0RunIdx[12];
+extern const ARM_VLC32 armVCM4P2_InterVlcL0[59];
+extern const OMX_U8 armVCM4P2_InterL1RunIdx[3];
+extern const ARM_VLC32 armVCM4P2_InterVlcL1[45];
+extern const OMX_U8 armVCM4P2_InterL0LMAX[27];
+extern const OMX_U8 armVCM4P2_InterL1LMAX[41];
+extern const OMX_U8 armVCM4P2_InterL0RMAX[12];
+extern const OMX_U8 armVCM4P2_InterL1RMAX[3];
+extern const ARM_VLC32 armVCM4P2_aIntraDCLumaIndex[14];
+extern const ARM_VLC32 armVCM4P2_aIntraDCChromaIndex[14];
+extern const ARM_VLC32 armVCM4P2_aVlcMVD[66];
+
+#endif /* _OMXHUFFTAB_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
new file mode 100644
index 0000000..c75ed89
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
@@ -0,0 +1,25 @@
+/**
+ * 
+ * File Name:  armVCM4P2_ZigZag_Tables.h
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ *
+ * File:        armVCM4P2_Zigzag_Tables.h
+ * Description: Declares Tables used for Zigzag scan in MP4P2 codec.
+ *
+ */
+ 
+#ifndef _OMXZIGZAGTAB_H_
+#define _OMXZIGZAGTAB_H_
+
+extern const OMX_U8 armVCM4P2_aClassicalZigzagScan [64];
+extern const OMX_U8 armVCM4P2_aHorizontalZigzagScan [64];
+extern const OMX_U8 armVCM4P2_aVerticalZigzagScan [64];
+
+#endif /* _OMXZIGZAGTAB_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_ACDCPredict.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_ACDCPredict.c
new file mode 100644
index 0000000..b6a396a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_ACDCPredict.c
@@ -0,0 +1,253 @@
+/**
+ * 
+ * File Name:  armVCM4P2_ACDCPredict.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * Description:
+ * Contains module for DC/AC coefficient prediction
+ *
+ */ 
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P2_ACDCPredict
+ *
+ * Description:
+ * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+ * to the function call, prediction direction (predDir) should be selected
+ * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDst     pointer to the coefficient buffer which contains
+ *                          the quantized coefficient residuals (PQF) of the
+ *                          current block
+ * [in] pPredBufRow pointer to the coefficient row buffer
+ * [in] pPredBufCol pointer to the coefficient column buffer
+ * [in] curQP       quantization parameter of the current block. curQP
+ *                          may equal to predQP especially when the current
+ *                          block and the predictor block are in the same
+ *                          macroblock.
+ * [in] predQP      quantization parameter of the predictor block
+ * [in] predDir     indicates the prediction direction which takes one
+ *                          of the following values:
+ *                          OMX_VC_HORIZONTAL    predict horizontally
+ *                          OMX_VC_VERTICAL      predict vertically
+ * [in] ACPredFlag  a flag indicating if AC prediction should be
+ *                          performed. It is equal to ac_pred_flag in the bit
+ *                          stream syntax of MPEG-4
+ * [in] videoComp   video component type (luminance, chrominance or
+ *                          alpha) of the current block
+ * [in] flag        This flag defines the if one wants to use this functions to
+ *                  calculate PQF (set 1, prediction) or QF (set 0, reconstruction)
+ * [out]    pPreACPredict   pointer to the predicted coefficients buffer.
+ *                          Filled ONLY if it is not NULL
+ * [out]    pSrcDst     pointer to the coefficient buffer which contains
+ *                          the quantized coefficients (QF) of the current
+ *                          block
+ * [out]    pPredBufRow pointer to the updated coefficient row buffer
+ * [out]    pPredBufCol pointer to the updated coefficient column buffer
+ * [out]    pSumErr     pointer to the updated sum of the difference
+ *                      between predicted and unpredicted coefficients
+ *                      If this is NULL, do not update
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_ACDCPredict(
+     OMX_S16 * pSrcDst,
+     OMX_S16 * pPreACPredict,
+     OMX_S16 * pPredBufRow,
+     OMX_S16 * pPredBufCol,
+     OMX_INT curQP,
+     OMX_INT predQP,
+     OMX_INT predDir,
+     OMX_INT ACPredFlag,
+     OMXVCM4P2VideoComponent videoComp,
+     OMX_U8 flag,
+     OMX_INT *pSumErr
+)
+{
+    OMX_INT dcScaler, i;
+    OMX_S16 tempPred;
+
+    /* Argument error checks */
+    armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pPredBufRow == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pPredBufCol == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(curQP <= 0, OMX_Sts_BadArgErr);
+    armRetArgErrIf(predQP <= 0, OMX_Sts_BadArgErr);
+    armRetArgErrIf((predDir != 1) && (predDir != 2), OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs4ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs4ByteAligned(pPredBufRow), OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs4ByteAligned(pPredBufCol), OMX_Sts_BadArgErr);
+
+    
+    /* Set DC scaler value to avoid some compilers giving a warning. */
+    dcScaler=0;
+    
+    /* Calculate the DC scaler value */
+    if (videoComp == OMX_VC_LUMINANCE)
+    {
+        if (curQP >= 1 && curQP <= 4)
+        {
+            dcScaler = 8;
+        }
+        else if (curQP >= 5 && curQP <= 8)
+        {
+            dcScaler = 2 * curQP;
+        }
+        else if (curQP >= 9 && curQP <= 24)
+        {
+            dcScaler = curQP + 8;
+        }
+        else
+        {
+            dcScaler = (2 * curQP) - 16;
+        }
+    }
+    else if (videoComp == OMX_VC_CHROMINANCE)
+    {
+        if (curQP >= 1 && curQP <= 4)
+        {
+            dcScaler = 8;
+        }
+        else if (curQP >= 5 && curQP <= 24)
+        {
+            dcScaler = (curQP + 13)/2;
+        }
+        else
+        {
+            dcScaler = curQP - 6;
+        }
+    }
+
+    if (pPreACPredict != NULL)
+    {
+        pPreACPredict[0] = predDir;
+    }
+
+    if (predDir == OMX_VC_VERTICAL)
+    {
+        /* F[0][0]//dc_scaler */
+        tempPred = armIntDivAwayFromZero(pPredBufRow[0], dcScaler);
+    }
+    else
+    {
+        /* F[0][0]//dc_scaler */
+        tempPred = armIntDivAwayFromZero(pPredBufCol[0], dcScaler);
+    }
+
+    /* Updating the DC value to the row and col buffer */
+    *(pPredBufRow - 8) = *pPredBufCol;
+
+    if (flag)
+    {
+        /* Cal and store F[0][0] into the col buffer */
+        *pPredBufCol = pSrcDst[0] * dcScaler;
+
+        /* PQF = QF - F[0][0]//dc_scaler */
+        pSrcDst[0] -= tempPred;
+    }
+    else
+    {
+        /* QF = PQF + F[0][0]//dc_scaler */
+        pSrcDst[0] += tempPred;
+        
+        /* Saturate */
+        pSrcDst[0] = armClip (-2048, 2047, pSrcDst[0]);
+
+        /* Cal and store F[0][0] into the col buffer */
+        *pPredBufCol = pSrcDst[0] * dcScaler;
+    }
+
+
+    if (ACPredFlag == 1)
+    {
+        if (predDir == OMX_VC_VERTICAL)
+        {
+            for (i = 1; i < 8; i++)
+            {
+                tempPred = armIntDivAwayFromZero \
+                              (pPredBufRow[i] * predQP, curQP);
+                if (flag)
+                {
+                    /* Updating QF to the row buff */
+                    pPredBufRow[i] = pSrcDst[i];
+                    /*PQFX[v][0] = QFX[v][0] - (QFA[v][0] * QPA) // QPX */
+                    pSrcDst[i] -= tempPred;
+                    /* Sum of absolute values of AC prediction error, this can
+                    be used as a reference to choose whether to use
+                    AC prediction */
+                    *pSumErr += armAbs(pSrcDst[i]);
+                    /* pPreACPredict[1~7] store the error signal
+                    after AC prediction */
+                    pPreACPredict[i] = pSrcDst[i];
+                }
+                else
+                {
+                    /*QFX[v][0] = PQFX[v][0] + (QFA[v][0] * QPA) // QPX */
+                    pSrcDst[i] += tempPred;
+                    
+                    /* Saturate */
+                    pSrcDst[i] = armClip (-2048, 2047, pSrcDst[i]);
+                    
+                    /* Updating QF to the row buff */
+                    pPredBufRow[i] = pSrcDst[i];
+                }
+            }
+        }
+        else
+        {
+            for (i = 8; i < 64; i += 8)
+            {
+                tempPred = armIntDivAwayFromZero \
+                              (pPredBufCol[i>>3] * predQP, curQP);
+                if (flag)
+                {
+                    /* Updating QF to col buff */
+                    pPredBufCol[i>>3] = pSrcDst[i];
+                    /*PQFX[0][u] = QFX[0][u] - (QFA[0][u] * QPA) // QPX */
+                    pSrcDst[i] -= tempPred;
+                    /* Sum of absolute values of AC prediction error, this can
+                    be used as a reference to choose whether to use AC
+                    prediction */
+                    *pSumErr += armAbs(pSrcDst[i]);
+                    /* pPreACPredict[1~7] store the error signal
+                    after AC prediction */
+                    pPreACPredict[i>>3] = pSrcDst[i];
+                }
+                else
+                {
+                    /*QFX[0][u] = PQFX[0][u] + (QFA[0][u] * QPA) // QPX */
+                    pSrcDst[i] += tempPred;
+                    
+                    /* Saturate */
+                    pSrcDst[i] = armClip (-2048, 2047, pSrcDst[i]);
+                    
+                    /* Updating QF to col buff */
+                    pPredBufCol[i>>3] = pSrcDst[i];
+                }
+            }
+        }
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Half.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Half.c
new file mode 100644
index 0000000..1b69a33
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Half.c
@@ -0,0 +1,187 @@
+/**
+ * 
+ * File Name:  armVCM4P2_BlockMatch_Half.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * Description:
+ * Contains modules for Block matching, a full search algorithm
+ * is implemented
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P2_BlockMatch_Half
+ *
+ * Description:
+ * Performs a 16x16 block match with half-pixel resolution.  Returns the estimated 
+ * motion vector and associated minimum SAD.  This function estimates the half-pixel 
+ * motion vector by interpolating the integer resolution motion vector referenced 
+ * by the input parameter pSrcDstMV, i.e., the initial integer MV is generated 
+ * externally.  The input parameters pSrcRefBuf and pSearchPointRefPos should be 
+ * shifted by the winning MV of 16x16 integer search prior to calling BlockMatch_Half_16x16.  
+ * The function BlockMatch_Integer_16x16 may be used for integer motion estimation.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	pSrcRefBuf		pointer to the reference Y plane; points to the reference MB 
+ *                    that corresponds to the location of the current macroblock in 
+ *                    the	current plane.
+ * [in]	refWidth		  width of the reference plane
+ * [in]	pRefRect		  reference plane valid region rectangle
+ * [in]	pSrcCurrBuf		pointer to the current macroblock extracted from original plane 
+ *                    (linear array, 256 entries); must be aligned on an 8-byte boundary. 
+ * [in]	pSearchPointRefPos	position of the starting point for half pixel search (specified 
+ *                          in terms of integer pixel units) in the reference plane.
+ * [in]	rndVal			  rounding control bit for half pixel motion estimation; 
+ *                    0=rounding control disabled; 1=rounding control enabled
+ * [in]	pSrcDstMV		pointer to the initial MV estimate; typically generated during a prior 
+ *                  16X16 integer search and its unit is half pixel.
+ * [in] BlockSize     MacroBlock Size i.e either 16x16 or 8x8.
+ * [out]pSrcDstMV		pointer to estimated MV
+ * [out]pDstSAD			pointer to minimum SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr ¨C no error
+ * OMX_Sts_BadArgErr ¨C bad arguments
+ *
+ */
+
+OMXResult armVCM4P2_BlockMatch_Half(
+     const OMX_U8 *pSrcRefBuf,
+     OMX_INT refWidth,
+     const OMXRect *pRefRect,
+     const OMX_U8 *pSrcCurrBuf,
+     const OMXVCM4P2Coordinate *pSearchPointRefPos,
+     OMX_INT rndVal,
+     OMXVCMotionVector *pSrcDstMV,
+     OMX_INT *pDstSAD,
+     OMX_U8 BlockSize
+)
+{
+    OMX_INT     outer, inner, count, index;
+    OMX_S16     halfPelX = 0, halfPelY = 0, x, y;
+    OMX_INT     candSAD, refSAD = 0;
+    OMX_INT     minSAD, fromX, toX, fromY, toY;
+    /* Offset to the reference at the begining of the bounding box */
+    const OMX_U8      *pTempSrcRefBuf;
+    OMX_U8 tempPel;
+        
+    /* Argument error checks */
+    armRetArgErrIf(pSrcRefBuf == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSearchPointRefPos == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcDstMV == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr);
+
+    /* Positioning the pointer */
+    pTempSrcRefBuf = pSrcRefBuf + (refWidth * (pSrcDstMV->dy/2)) + (pSrcDstMV->dx/2);
+
+    /* Copy the candidate to the temporary linear array */
+    for (outer = 0, count = 0,index = 0;
+         outer < BlockSize;
+         outer++, index += refWidth - BlockSize)
+    {
+        for (inner = 0; inner < BlockSize; inner++, count++, index++)
+        {
+            refSAD += armAbs (pTempSrcRefBuf[index] - pSrcCurrBuf[count]);
+        }
+    }
+
+    /* Set the minSad as reference SAD */
+    minSAD = refSAD;
+    *pDstSAD = refSAD;
+
+    /* Check for valid region */
+    fromX = 1;
+    toX   = 1;
+    fromY = 1;
+    toY   = 1;
+    if ((pSearchPointRefPos->x - 1) < pRefRect->x)
+    {
+        fromX = 0;
+    }
+
+    if ((pSearchPointRefPos->x + BlockSize + 1) > (pRefRect->x + pRefRect->width))
+    {
+        toX   = 0;
+    }
+
+    if ((pSearchPointRefPos->y - 1) < pRefRect->y)
+    {
+        fromY = 0;
+    }
+
+    if ((pSearchPointRefPos->y + BlockSize + 1) > (pRefRect->y + pRefRect->height))
+    {
+        toY   = 0;
+    }
+
+    /* Looping on y- axis */
+    for (y = -fromY; y <= toY; y++)
+    {
+        /* Looping on x- axis */
+        for (x = -fromX; x <= toX; x++)
+        {
+            /* check for integer position */
+            if ( x == 0 && y == 0)
+            {
+                continue;
+            }
+            /* Positioning the pointer */
+            pTempSrcRefBuf = pSrcRefBuf + (refWidth * (pSrcDstMV->dy/2))
+                             + (pSrcDstMV->dx/2);
+
+            /* Interpolate the pixel and calculate the SAD*/
+            for (outer = 0, count = 0, candSAD = 0,index = 0;
+                 outer < BlockSize;
+                 outer++, index += refWidth - BlockSize)
+            {
+                for (inner = 0; inner < BlockSize; inner++, count++,index++)
+                {
+                    tempPel = (
+                                pTempSrcRefBuf[index]
+                                + pTempSrcRefBuf[index + x] * armAbs(x)
+                                + pTempSrcRefBuf[index + refWidth * y] * armAbs(y)
+                                + pTempSrcRefBuf[index + refWidth * y + x]
+                                  * armAbs(x*y)
+                                + armAbs (x) + armAbs (y) - rndVal
+                              ) / (2 * (armAbs (x) + armAbs (y)));
+                    candSAD += armAbs (tempPel - pSrcCurrBuf[count]);
+                }
+            }
+
+            /* Result calculations */
+            if (armVCM4P2_CompareMV (x, y, candSAD, halfPelX, halfPelY, minSAD))
+            {
+                *pDstSAD = candSAD;
+                minSAD   = candSAD;
+                halfPelX = x;
+                halfPelY = y;
+            }
+
+        } /* End of x- axis */
+    } /* End of y-axis */
+
+    pSrcDstMV->dx += halfPelX;
+    pSrcDstMV->dy += halfPelY;
+
+    return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Integer.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Integer.c
new file mode 100644
index 0000000..77fe358
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Integer.c
@@ -0,0 +1,167 @@
+/**
+ * 
+ * File Name:  armVCM4P2_BlockMatch_Integer.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains modules for Block matching, a full search algorithm
+ * is implemented
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P2_BlockMatch_Integer
+ *
+ * Description:
+ * Performs a 16x16 block search; estimates motion vector and associated minimum SAD.  
+ * Both the input and output motion vectors are represented using half-pixel units, and 
+ * therefore a shift left or right by 1 bit may be required, respectively, to match the 
+ * input or output MVs with other functions that either generate output MVs or expect 
+ * input MVs represented using integer pixel units. 
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	pSrcRefBuf		pointer to the reference Y plane; points to the reference MB that 
+ *                    corresponds to the location of the current macroblock in the current 
+ *                    plane.
+ * [in]	refWidth		  width of the reference plane
+ * [in]	pRefRect		  pointer to the valid rectangular in reference plane. Relative to image origin. 
+ *                    It's not limited to the image boundary, but depended on the padding. For example, 
+ *                    if you pad 4 pixels outside the image border, then the value for left border 
+ *                    can be -4
+ * [in]	pSrcCurrBuf		pointer to the current macroblock extracted from original plane (linear array, 
+ *                    256 entries); must be aligned on an 8-byte boundary.
+ * [in] pCurrPointPos	position of the current macroblock in the current plane
+ * [in] pSrcPreMV		  pointer to predicted motion vector; NULL indicates no predicted MV
+ * [in] pSrcPreSAD		pointer to SAD associated with the predicted MV (referenced by pSrcPreMV)
+ * [in] searchRange		search range for 16X16 integer block,the units of it is full pixel,the search range 
+ *                    is the same in all directions.It is in inclusive of the boundary and specified in 
+ *                    terms of integer pixel units.
+ * [in] pMESpec			  vendor-specific motion estimation specification structure; must have been allocated 
+ *                    and then initialized using omxVCM4P2_MEInit prior to calling the block matching 
+ *                    function.
+ * [out]	pDstMV			pointer to estimated MV
+ * [out]	pDstSAD			pointer to minimum SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr ¨C no error.
+ * OMX_Sts_BadArgErr ¨C bad arguments
+ *
+ */
+
+OMXResult armVCM4P2_BlockMatch_Integer(
+     const OMX_U8 *pSrcRefBuf,
+     OMX_INT refWidth,
+     const OMXRect *pRefRect,
+     const OMX_U8 *pSrcCurrBuf,
+     const OMXVCM4P2Coordinate *pCurrPointPos,
+     const OMXVCMotionVector *pSrcPreMV,
+     const OMX_INT *pSrcPreSAD,
+     void *pMESpec,
+     OMXVCMotionVector *pDstMV,
+     OMX_INT *pDstSAD,
+     OMX_U8 BlockSize
+)
+{
+
+    /* Definitions and Initializations*/
+
+    OMX_INT     outer, inner, count,index;
+    OMX_INT     candSAD;
+    /*(256*256 +1) this is to make the SAD max initially*/
+    OMX_INT     minSAD = 0x10001, fromX, toX, fromY, toY;
+    /* Offset to the reference at the begining of the bounding box */
+    const OMX_U8      *pTempSrcRefBuf;
+    OMX_S16     x, y;
+    OMX_INT searchRange;
+   
+    /* Argument error checks */
+    armRetArgErrIf(pSrcRefBuf == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pCurrPointPos == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pMESpec == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDstMV == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr);
+        
+    searchRange = ((OMXVCM4P2MEParams *)pMESpec)->searchRange;
+    /* Check for valid region */
+    fromX = searchRange;
+    toX   = searchRange;
+    fromY = searchRange;
+    toY   = searchRange;
+
+    if ((pCurrPointPos->x - searchRange) < pRefRect->x)
+    {
+        fromX =  pCurrPointPos->x - pRefRect->x;
+    }
+
+    if ((pCurrPointPos->x + BlockSize + searchRange) > (pRefRect->x + pRefRect->width))
+    {
+        toX   = pRefRect->width - (pCurrPointPos->x - pRefRect->x) - BlockSize;
+    }
+
+    if ((pCurrPointPos->y - searchRange) < pRefRect->y)
+    {
+        fromY = pCurrPointPos->y - pRefRect->y;
+    }
+
+    if ((pCurrPointPos->y + BlockSize + searchRange) > (pRefRect->y + pRefRect->height))
+    {
+        toY   = pRefRect->width - (pCurrPointPos->y - pRefRect->y) - BlockSize;
+    }
+
+    pDstMV->dx = -fromX;
+    pDstMV->dy = -fromY;
+    /* Looping on y- axis */
+    for (y = -fromY; y <= toY; y++)
+    {
+
+        /* Looping on x- axis */
+        for (x = -fromX; x <= toX; x++)
+        {
+            /* Positioning the pointer */
+            pTempSrcRefBuf = pSrcRefBuf + (refWidth * y) + x;
+
+            /* Calculate the SAD */
+            for (outer = 0, count = 0, index = 0, candSAD = 0;
+                 outer < BlockSize;
+                 outer++, index += refWidth - BlockSize)
+            {
+                for (inner = 0; inner < BlockSize; inner++, count++, index++)
+                {
+                    candSAD += armAbs (pTempSrcRefBuf[index] - pSrcCurrBuf[count]);                    
+                }
+            }
+
+            /* Result calculations */
+            if (armVCM4P2_CompareMV (x, y, candSAD, pDstMV->dx/2, pDstMV->dy/2, minSAD))
+            {
+                *pDstSAD = candSAD;
+                minSAD   = candSAD;
+                pDstMV->dx = x*2;
+                pDstMV->dy = y*2;
+            }
+
+        } /* End of x- axis */
+    } /* End of y-axis */
+
+    return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CheckVLCEscapeMode.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CheckVLCEscapeMode.c
new file mode 100644
index 0000000..94e8639
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CheckVLCEscapeMode.c
@@ -0,0 +1,156 @@
+/**
+ * 
+ * File Name:  armVCM4P2_CheckVLCEscapeMode.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * Description:
+ * Contains module for VLC escape mode check 
+ *
+ */ 
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P2_CheckVLCEscapeMode
+ *
+ * Description:
+ * Performs escape mode decision based on the run, run+, level, level+ and 
+ * last combinations.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] run             Run value (count of zeros) to be encoded  
+ * [in] level           Level value (non-zero value) to be encoded
+ * [in] runPlus         Calculated as runPlus = run - (RMAX + 1)  
+ * [in] levelPlus       Calculated as 
+ *                      levelPlus = sign(level)*[abs(level) - LMAX]
+ * [in] maxStoreRun     Max store possible (considering last and inter/intra)
+ * [in] maxRunForMultipleEntries 
+ *                      The run value after which level 
+ *                      will be equal to 1: 
+ *                      (considering last and inter/intra status)
+ * [in] pRunIndexTable  Run Index table defined in 
+ *                      armVCM4P2_Huff_Tables_VLC.c
+ *                      (considering last and inter/intra status)
+ *
+ *                      
+ * Return Value:
+ * Returns an Escape mode which can take values from 0 to 3
+ * 0 --> no escape mode, 1 --> escape type 1,
+ * 1 --> escape type 2, 3 --> escape type 3, check section 7.4.1.3
+ * in the MPEG ISO standard.
+ *
+ */
+
+OMX_U8 armVCM4P2_CheckVLCEscapeMode(
+     OMX_U32 run,
+     OMX_U32 runPlus,
+     OMX_S16 level,
+     OMX_S16 levelPlus,
+     OMX_U8  maxStoreRun,
+     OMX_U8  maxRunForMultipleEntries,
+     OMX_INT shortVideoHeader,
+     const OMX_U8  *pRunIndexTable
+)
+{
+    OMX_U8 escape = 0, fMode = 0, entries;
+    
+    level = armAbs (level);
+    levelPlus = armAbs (levelPlus);
+    
+    /* Check for a valid entry with run, level and Last combination 
+       Mode 0 check */
+    if (run <= maxStoreRun)
+    {
+        entries = pRunIndexTable[run + 1]
+                  - pRunIndexTable[run];
+        if (run > maxRunForMultipleEntries)
+        {
+            entries = 1;
+        }
+        if (level > entries)
+        {
+            escape = 1;
+        }
+    }
+    else
+    {
+        escape = 1;
+    }
+    if(escape && shortVideoHeader)
+    {
+        escape = 0;
+        fMode = 4;
+    }
+    /* Check for a valid entry with run, levelPlus and Last combination 
+       Mode 1 check */    
+    if (escape)
+    {
+        escape = 0;
+        fMode = 1;
+        if (run <= maxStoreRun)
+        {
+            entries = pRunIndexTable[run + 1]
+                      - pRunIndexTable[run];
+            if (run > maxRunForMultipleEntries)
+            {
+                entries = 1;
+            }
+            if (levelPlus > entries)
+            {
+                escape = 1;
+            }
+        }
+        else
+        {
+            escape = 1;
+        }
+    }
+    
+    /* Check for a valid entry with runPlus, level and Last combination 
+       Mode 2 check */    
+    if (escape)
+    {
+        escape = 0;
+        fMode = 2;
+        if (runPlus <= maxStoreRun)
+        {
+            entries = pRunIndexTable[runPlus + 1]
+                      - pRunIndexTable[runPlus];
+            if (runPlus > maxRunForMultipleEntries)
+            {
+                entries = 1;
+            }
+            if (level > entries)
+            {
+                escape = 1;
+            }
+        }
+        else
+        {
+            escape = 1;
+        }
+    }
+    
+    /* select mode 3 --> FLC */
+    if (escape)
+    {
+        fMode = 3;
+    }
+    
+    return fMode;
+}
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CompareMV.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CompareMV.c
new file mode 100644
index 0000000..3b8845e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CompareMV.c
@@ -0,0 +1,71 @@
+/**
+ * 
+ * File Name:  armVCM4P2_CompareMV.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * Description:
+ * Contains module for comparing motion vectors and SAD's to decide 
+ * the best MV and SAD
+ *
+ */
+  
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P2_CompareMV
+ *
+ * Description:
+ * Performs comparision of motion vectors and SAD's to decide the 
+ * best MV and SAD
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	    mvX		x coordinate of the candidate motion vector
+ * [in]	    mvY		y coordinate of the candidate motion vector
+ * [in]	    candSAD	Candidate SAD
+ * [in]	    bestMVX	x coordinate of the best motion vector
+ * [in]	    bestMVY	y coordinate of the best motion vector
+ * [in]	    bestSAD	best SAD
+ *
+ * Return Value:
+ * OMX_INT -- 1 to indicate that the current sad is the best 
+ *            0 to indicate that it is NOT the best SAD
+ */
+
+OMX_INT armVCM4P2_CompareMV (
+    OMX_S16 mvX, 
+    OMX_S16 mvY, 
+    OMX_INT candSAD, 
+    OMX_S16 bestMVX, 
+    OMX_S16 bestMVY, 
+    OMX_INT bestSAD
+) 
+{
+    if (candSAD < bestSAD)
+    {
+        return 1;
+    }
+    if (candSAD > bestSAD)
+    {
+        return 0;
+    }
+    /* shorter motion vector */
+    if ( (mvX * mvX + mvY * mvY) < (bestMVX*bestMVX+bestMVY*bestMVY) )
+    {
+         return 1;
+    }
+    return 0;
+}
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DCT_Table.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DCT_Table.c
new file mode 100644
index 0000000..a6f713e7
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DCT_Table.c
@@ -0,0 +1,112 @@
+ /**
+ * 
+ * File Name:  armVCM4P2_DCT_Table.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File:        armVCM4P2_DCT_Table.c
+ * Description: Contains the DCT/IDCT coefficent matrix
+ *
+ */
+
+#ifndef _OMXDCTCOSTAB_C_
+#define _OMXDCTCOSTAB_C_
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+const OMX_F64 armVCM4P2_preCalcDCTCos[8][8] =
+{
+        {
+                0.353553390593273730, 
+                0.490392640201615220, 
+                0.461939766255643370, 
+                0.415734806151272620, 
+                0.353553390593273790, 
+                0.277785116509801140, 
+                0.191341716182544920, 
+                0.097545161008064152 
+        },
+        {
+                0.353553390593273730, 
+                0.415734806151272620, 
+                0.191341716182544920, 
+                -0.097545161008064096, 
+                -0.353553390593273730, 
+                -0.490392640201615220, 
+                -0.461939766255643420, 
+                -0.277785116509801090
+        },
+        {
+                0.353553390593273730, 
+                0.277785116509801140, 
+                -0.191341716182544860, 
+                -0.490392640201615220, 
+                -0.353553390593273840, 
+                0.097545161008064138, 
+                0.461939766255643260, 
+                0.415734806151272730 
+        },
+        {
+                0.353553390593273730, 
+                0.097545161008064152, 
+                -0.461939766255643370, 
+                -0.277785116509801090, 
+                0.353553390593273680, 
+                0.415734806151272730, 
+                -0.191341716182544920, 
+                -0.490392640201615330
+        },
+        {
+                0.353553390593273730, 
+                -0.097545161008064096, 
+                -0.461939766255643420, 
+                0.277785116509800920, 
+                0.353553390593273840, 
+                -0.415734806151272620, 
+                -0.191341716182545280, 
+                0.490392640201615220 
+        },
+        {
+                0.353553390593273730, 
+                -0.277785116509800980, 
+                -0.191341716182545170, 
+                0.490392640201615220, 
+                -0.353553390593273340, 
+                -0.097545161008064013, 
+                0.461939766255643370, 
+                -0.415734806151272510
+        },
+        {
+                0.353553390593273730, 
+                -0.415734806151272670, 
+                0.191341716182545000, 
+                0.097545161008064388, 
+                -0.353553390593273620, 
+                0.490392640201615330, 
+                -0.461939766255643200, 
+                0.277785116509800760 
+        },
+        {
+                0.353553390593273730, 
+                -0.490392640201615220, 
+                0.461939766255643260, 
+                -0.415734806151272620, 
+                0.353553390593273290, 
+                -0.277785116509800760, 
+                0.191341716182544780, 
+                -0.097545161008064277
+        }
+};
+
+#endif /*_OMXDCTCOSTAB_C_*/
+
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_intra.c
new file mode 100644
index 0000000..a2572e0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_intra.c
@@ -0,0 +1,144 @@
+/**
+ * 
+ * File Name:  armVCM4P2_DecodeVLCZigzag_intra.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * Description:
+ * Contains modules for filling of the coefficient buffer
+ *
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM_Bitstream.h"
+#include "armCOMM.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+#include "armVCM4P2_ZigZag_Tables.h"
+
+
+
+/**
+ * Function: armVCM4P2_DecodeVLCZigzag_Intra
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream     pointer to the pointer to the current byte in
+ *                              the bitstream buffer
+ * [in] pBitOffset      pointer to the bit position in the byte pointed
+ *                              to by *ppBitStream. *pBitOffset is valid within
+ *                              [0-7].
+ * [in] predDir         AC prediction direction which is used to decide
+ *                              the zigzag scan pattern. It takes one of the
+ *                              following values:
+ *                              OMX_VC_NONE  AC prediction not used;
+ *                                              perform classical zigzag scan;
+ *                              OMX_VC_HORIZONTAL    Horizontal prediction;
+ *                                                      perform alternate-vertical
+ *                                                      zigzag scan;
+ *                              OMX_VC_VERTICAL      Vertical prediction;
+ *                                                      thus perform
+ *                                                      alternate-horizontal
+ *                                                      zigzag scan.
+ * [in] start           start indicates whether the encoding begins with 0th element
+ *                      or 1st.
+ * [out]    ppBitStream     *ppBitStream is updated after the block is
+ *                              decoded, so that it points to the current byte
+ *                              in the bit stream buffer
+ * [out]    pBitOffset      *pBitOffset is updated so that it points to the
+ *                              current bit position in the byte pointed by
+ *                              *ppBitStream
+ * [out]    pDst            pointer to the coefficient buffer of current
+ *                              block. Should be 32-bit aligned
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_DecodeVLCZigzag_Intra(
+     const OMX_U8 ** ppBitStream,
+     OMX_INT * pBitOffset,
+     OMX_S16 * pDst,
+     OMX_U8 predDir,
+     OMX_INT shortVideoHeader,
+     OMX_U8  start
+)
+{
+    OMX_U8  last = 0;
+    const OMX_U8  *pZigzagTable = armVCM4P2_aClassicalZigzagScan;
+    OMXResult errorCode;
+    
+    /* Argument error checks */
+    armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs4ByteAligned(pDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset >7), OMX_Sts_BadArgErr);
+    armRetArgErrIf((predDir > 2), OMX_Sts_BadArgErr);
+
+    switch (predDir)
+    {
+        case OMX_VC_NONE:
+        {
+            pZigzagTable = armVCM4P2_aClassicalZigzagScan;
+            break;
+        }
+
+        case OMX_VC_HORIZONTAL:
+        {
+            pZigzagTable = armVCM4P2_aVerticalZigzagScan;
+            break;
+        }
+
+        case OMX_VC_VERTICAL:
+        {
+            pZigzagTable = armVCM4P2_aHorizontalZigzagScan;
+            break;
+        }
+    }
+    
+    errorCode = armVCM4P2_GetVLCBits (
+              ppBitStream,
+              pBitOffset,
+			  pDst,
+			  shortVideoHeader,
+			  start,
+			  &last,
+			  10,
+			  62,
+			   7,
+			  21,
+              armVCM4P2_IntraL0RunIdx,
+              armVCM4P2_IntraVlcL0,
+			  armVCM4P2_IntraL1RunIdx,
+              armVCM4P2_IntraVlcL1,
+              armVCM4P2_IntraL0LMAX,
+              armVCM4P2_IntraL1LMAX,
+              armVCM4P2_IntraL0RMAX,
+              armVCM4P2_IntraL1RMAX,
+              pZigzagTable );
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    if (last == 0)
+    {
+        return OMX_Sts_Err;
+    }
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_EncodeVLCZigzag_intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_EncodeVLCZigzag_intra.c
new file mode 100644
index 0000000..cd6b56d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_EncodeVLCZigzag_intra.c
@@ -0,0 +1,145 @@
+/**
+ * 
+ * File Name:  armVCM4P2_EncodeVLCZigzag_intra.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * Description:
+ * Contains modules for zigzag scanning and VLC encoding
+ * for intra block.
+ *
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM_Bitstream.h"
+#include "armCOMM.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+#include "armVCM4P2_ZigZag_Tables.h"
+
+
+
+/**
+ * Function: armVCM4P2_EncodeVLCZigzag_Intra
+ *
+ * Description:
+ * Performs zigzag scanning and VLC encoding for one intra block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream     pointer to the pointer to the current byte in
+ *                              the bit stream
+ * [in] pBitOffset      pointer to the bit position in the byte pointed
+ *                              by *ppBitStream. Valid within 0 to 7.
+ * [in] pQDctBlkCoef    pointer to the quantized DCT coefficient
+ * [in] predDir         AC prediction direction, which is used to decide
+ *                              the zigzag scan pattern. This takes one of the
+ *                              following values:
+ *                              OMX_VC_NONE          AC prediction not used.
+ *                                                      Performs classical zigzag
+ *                                                      scan.
+ *                              OMX_VC_HORIZONTAL    Horizontal prediction.
+ *                                                      Performs alternate-vertical
+ *                                                      zigzag scan.
+ *                              OMX_VC_VERTICAL      Vertical prediction.
+ *                                                      Performs alternate-horizontal
+ *                                                      zigzag scan.
+ * [in] pattern         block pattern which is used to decide whether
+ *                              this block is encoded
+ * [in] start           start indicates whether the encoding begins with 0th element
+ *                      or 1st.
+ * [out]    ppBitStream     *ppBitStream is updated after the block is encoded,
+ *                              so that it points to the current byte in the bit
+ *                              stream buffer.
+ * [out]    pBitOffset      *pBitOffset is updated so that it points to the
+ *                              current bit position in the byte pointed by
+ *                              *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_EncodeVLCZigzag_Intra(
+     OMX_U8 **ppBitStream,
+     OMX_INT *pBitOffset,
+     const OMX_S16 *pQDctBlkCoef,
+     OMX_U8 predDir,
+     OMX_U8 pattern,
+     OMX_INT shortVideoHeader,
+     OMX_U8 start
+)
+{
+    const OMX_U8  *pZigzagTable = armVCM4P2_aClassicalZigzagScan;
+    OMXResult errorCode;
+    
+    /* Argument error checks */
+    armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pQDctBlkCoef == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset >7), OMX_Sts_BadArgErr);
+    armRetArgErrIf(start > 1, OMX_Sts_BadArgErr);
+    armRetArgErrIf(predDir > 2, OMX_Sts_BadArgErr);
+
+    if (pattern)
+    {
+        switch (predDir)
+        {
+            case OMX_VC_NONE:
+            {
+                pZigzagTable = armVCM4P2_aClassicalZigzagScan;
+                break;
+            }
+
+            case OMX_VC_HORIZONTAL:
+            {
+                pZigzagTable = armVCM4P2_aVerticalZigzagScan;
+                break;
+            }
+
+            case OMX_VC_VERTICAL:
+            {
+                pZigzagTable = armVCM4P2_aHorizontalZigzagScan;
+                break;
+            }
+        }
+        
+        errorCode = armVCM4P2_PutVLCBits (
+              ppBitStream,
+              pBitOffset,
+              pQDctBlkCoef,
+              shortVideoHeader,
+              start,
+              14,
+              20,
+              9,
+              6,
+              armVCM4P2_IntraL0RunIdx,
+              armVCM4P2_IntraVlcL0,
+			  armVCM4P2_IntraL1RunIdx,
+              armVCM4P2_IntraVlcL1,
+              armVCM4P2_IntraL0LMAX,
+              armVCM4P2_IntraL1LMAX,
+              armVCM4P2_IntraL0RMAX,
+              armVCM4P2_IntraL1RMAX,
+              pZigzagTable
+        );
+        armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+        
+    } /* Pattern check ends*/
+
+    return (OMX_Sts_NoErr);
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLCBuffer.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLCBuffer.c
new file mode 100644
index 0000000..93c9504
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLCBuffer.c
@@ -0,0 +1,153 @@
+/**
+ * 
+ * File Name:  armVCM4P2_FillVLCBuffer.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * Description:
+ * Contains module for putting VLC bits
+ *
+ */ 
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+#include "armCOMM_Bitstream.h"
+
+/**
+ * Function: armVCM4P2_FillVLCBuffer
+ *
+ * Description:
+ * Performs calculating the VLC bits depending on the escape type and insert 
+ * the same in the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	 ppBitStream		pointer to the pointer to the current byte in
+ *	                        the bit stream
+ * [in]	 pBitOffset         pointer to the bit position in the byte pointed
+ *                          by *ppBitStream. Valid within 0 to 7
+ * [in]  run                Run value (count of zeros) to be encoded  
+ * [in]  level              Level value (non-zero value) to be encoded
+ * [in]  runPlus            Calculated as runPlus = run - (RMAX + 1)  
+ * [in]  levelPlus          Calculated as 
+ *                          levelPlus = sign(level)*[abs(level) - LMAX]
+ * [in]  fMode              Flag indicating the escape modes
+ * [in]  last               status of the last flag
+ * [in]  maxRunForMultipleEntries 
+ *                          The run value after which level will be equal to 1: 
+ *                          (considering last and inter/intra status)
+ * [in]  pRunIndexTable     Run Index table defined in
+ *                          armVCM4P2_Huff_Tables_VLC.h
+ * [in]  pVlcTable          VLC table defined in armVCM4P2_Huff_Tables_VLC.h
+ * [out] ppBitStream		*ppBitStream is updated after the block is encoded
+ *                          so that it points to the current byte in the bit
+ *                          stream buffer.
+ * [out] pBitOffset         *pBitOffset is updated so that it points to the
+ *                          current bit position in the byte pointed by
+ *                          *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_FillVLCBuffer (
+              OMX_U8 **ppBitStream,
+              OMX_INT * pBitOffset,
+              OMX_U32 run,
+              OMX_S16 level, 
+			  OMX_U32 runPlus,
+              OMX_S16 levelPlus, 
+              OMX_U8  fMode,
+			  OMX_U8  last,
+              OMX_U8  maxRunForMultipleEntries, 
+              const OMX_U8  *pRunIndexTable,
+              const ARM_VLC32 *pVlcTable
+)
+{
+    OMX_INT tempIndex;
+	OMX_U32 tempRun = run, sign = 0;
+    OMX_S16 tempLevel = level; 
+    
+    /* Escape sequence addition */
+    if (fMode == 1)
+    {
+        armPackBits(ppBitStream, pBitOffset, 3, 7);
+        armPackBits(ppBitStream, pBitOffset, 0, 1);
+		tempLevel = levelPlus;
+
+    }
+    else if(fMode == 2)
+    {
+        armPackBits(ppBitStream, pBitOffset, 3, 7);
+        armPackBits(ppBitStream, pBitOffset, 2, 2);
+		tempRun = runPlus;
+    }
+    else if (fMode == 3)
+    {
+        armPackBits(ppBitStream, pBitOffset, 3, 7);
+        armPackBits(ppBitStream, pBitOffset, 3, 2);
+    }
+    else if (fMode == 4)
+    {
+        armPackBits(ppBitStream, pBitOffset, 3, 7);
+        armPackBits(ppBitStream, pBitOffset, (OMX_U32)last, 1);
+		armPackBits(ppBitStream, pBitOffset, tempRun, 6);
+		if((tempLevel != 0) && (tempLevel != -128))
+		{
+		    armPackBits(ppBitStream, pBitOffset,
+			   (OMX_U32) tempLevel, 8);
+		}
+		return OMX_Sts_NoErr;		
+    }
+    
+    if (tempLevel < 0)
+    {
+        sign = 1;
+        tempLevel = armAbs(tempLevel);
+    }
+    /* Putting VLC bits in the stream */
+	if (fMode < 3)
+	{
+		if (tempRun > maxRunForMultipleEntries)
+		{
+			tempIndex = pRunIndexTable [maxRunForMultipleEntries + 1] + 
+						(tempRun - maxRunForMultipleEntries - 1);
+		}
+		else
+		{
+			tempIndex = pRunIndexTable [tempRun] + (tempLevel -1);
+		}
+    
+		armPackVLC32 (ppBitStream, pBitOffset,
+					  pVlcTable [tempIndex]);
+		armPackBits(ppBitStream, pBitOffset, (OMX_U32)sign, 1);
+	}
+    else
+	{
+		if (sign)
+		{
+			tempLevel = -tempLevel;
+		}
+		tempRun  = run;
+		armPackBits(ppBitStream, pBitOffset, (OMX_U32)last, 1);
+		armPackBits(ppBitStream, pBitOffset, tempRun, 6);
+		armPackBits(ppBitStream, pBitOffset, 1, 1);
+		armPackBits(ppBitStream, pBitOffset,
+			   (OMX_U32) tempLevel, 12);
+		armPackBits(ppBitStream, pBitOffset, 1, 1);
+	}
+    return OMX_Sts_NoErr;
+}
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLDBuffer.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLDBuffer.c
new file mode 100644
index 0000000..1712c3a4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLDBuffer.c
@@ -0,0 +1,84 @@
+/**
+ * 
+ * File Name:  armVCM4P2_FillVLDBuffer.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * Description:
+ * Contains module for VLC get bits from the stream 
+ *
+ */ 
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVCM4P2_ZigZag_Tables.h"
+
+
+/**
+ * Function: armVCM4P2_FillVLDBuffer
+ *
+ * Description:
+ * Performs filling of the coefficient buffer according to the run, level
+ * and sign, also updates the index
+ * 
+ * Parameters:
+ * [in]  storeRun        Stored Run value (count of zeros)   
+ * [in]  storeLevel      Stored Level value (non-zero value)
+ * [in]  sign            Flag indicating the sign of level
+ * [in]  last            status of the last flag
+ * [in]  pIndex          pointer to coefficient index in 8x8 matrix
+ * [out] pIndex          pointer to updated coefficient index in 8x8 
+ *                       matrix
+ * [in]  pZigzagTable    pointer to the zigzag tables
+ * [out] pDst            pointer to the coefficient buffer of current
+ *                       block. Should be 32-bit aligned
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_FillVLDBuffer(
+    OMX_U32 storeRun,
+    OMX_S16 * pDst,
+    OMX_S16 storeLevel,
+    OMX_U8  sign,
+    OMX_U8  last,
+    OMX_U8  * pIndex,
+    const OMX_U8 * pZigzagTable
+)
+{
+    /* Store the zero's as per the run length count */
+    for (;storeRun > 0; storeRun--, (*pIndex)++)
+    {
+        pDst[pZigzagTable[*pIndex]] = 0;
+    }
+    /* Store the level depending on the sign*/
+    if (sign == 1)
+    {
+        pDst[pZigzagTable[*pIndex]] = -storeLevel;
+    }
+    else
+    {
+        pDst[pZigzagTable[*pIndex]] = storeLevel;
+    }
+    (*pIndex)++;
+
+    /* If last is 1, fill the remaining elments of the buffer with zeros */
+    if (last == 1)
+    {
+        while (*pIndex < 64)
+        {
+            pDst[pZigzagTable[*pIndex]] = 0;
+            (*pIndex)++;
+        }
+    }
+
+    return OMX_Sts_NoErr;
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_GetVLCBits.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_GetVLCBits.c
new file mode 100644
index 0000000..953f597
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_GetVLCBits.c
@@ -0,0 +1,278 @@
+/**
+ * 
+ * File Name:  armVCM4P2_GetVLCBits.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * Description:
+ * Contains module for VLC get bits from the stream 
+ *
+ */ 
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+#include "armCOMM_Bitstream.h"
+#include "armVCM4P2_ZigZag_Tables.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+
+ 
+/**
+ * Function: armVCM4P2_GetVLCBits
+ *
+ * Description:
+ * Performs escape mode decision based on the run, run+, level, level+ and 
+ * last combinations.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte in
+ *								the bit stream
+ * [in]	pBitOffset		pointer to the bit position in the byte pointed
+ *								by *ppBitStream. Valid within 0 to 7
+ * [in] start           start indicates whether the encoding begins with 
+ *                      0th element or 1st.
+ * [in/out] pLast       pointer to last status flag
+ * [in] runBeginSingleLevelEntriesL0      The run value from which level 
+ *                                        will be equal to 1: last == 0
+ * [in] IndexBeginSingleLevelEntriesL0    Array index in the VLC table 
+ *                                        pointing to the  
+ *                                        runBeginSingleLevelEntriesL0 
+ * [in] runBeginSingleLevelEntriesL1      The run value from which level 
+ *                                        will be equal to 1: last == 1
+ * [in] IndexBeginSingleLevelEntriesL1    Array index in the VLC table 
+ *                                        pointing to the  
+ *                                        runBeginSingleLevelEntriesL0 
+ * [in] pRunIndexTableL0    Run Index table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pVlcTableL0         VLC table for last == 0
+ * [in] pRunIndexTableL1    Run Index table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pVlcTableL1         VLC table for last == 1
+ * [in] pLMAXTableL0        Level MAX table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pLMAXTableL1        Level MAX table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pRMAXTableL0        Run MAX table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pRMAXTableL1        Run MAX table defined in 
+ *                          armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [out]pDst			    pointer to the coefficient buffer of current
+ *							block. Should be 32-bit aligned
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_GetVLCBits (
+              const OMX_U8 **ppBitStream,
+              OMX_INT * pBitOffset,
+			  OMX_S16 * pDst,
+			  OMX_INT shortVideoHeader,
+			  OMX_U8    start,			  
+			  OMX_U8  * pLast,
+			  OMX_U8    runBeginSingleLevelEntriesL0,
+			  OMX_U8    maxIndexForMultipleEntriesL0,
+			  OMX_U8    maxRunForMultipleEntriesL1,
+			  OMX_U8    maxIndexForMultipleEntriesL1,
+              const OMX_U8  * pRunIndexTableL0,
+              const ARM_VLC32 *pVlcTableL0,
+			  const OMX_U8  * pRunIndexTableL1,
+              const ARM_VLC32 *pVlcTableL1,
+              const OMX_U8  * pLMAXTableL0,
+              const OMX_U8  * pLMAXTableL1,
+              const OMX_U8  * pRMAXTableL0,
+              const OMX_U8  * pRMAXTableL1,
+              const OMX_U8  * pZigzagTable
+)
+{
+    OMX_U32 storeRun;
+    OMX_U8  tabIndex, markerBit;
+    OMX_S16 storeLevel;
+    OMX_U16 unpackRetIndex;
+	OMX_U8  i, fType, escape;	
+	OMX_U8  sign = 0;
+	
+	/* Unpacking the bitstream and RLD */
+    for (i = start; i < 64;)
+    {
+		escape = armLookAheadBits(ppBitStream, pBitOffset, 7);
+		if (escape != 3)
+		{	
+			fType = 0; /* Not in escape mode */
+		}
+		else
+		{
+			armSkipBits (ppBitStream, pBitOffset, 7);
+			if(shortVideoHeader)
+			{
+			  *pLast = armGetBits(ppBitStream, pBitOffset, 1);
+			  storeRun = armGetBits(ppBitStream, pBitOffset, 6);
+			  storeLevel = armGetBits(ppBitStream, pBitOffset, 8);
+			  
+			  /* Ref to Table B-18 (c) in MPEG4 Standard- FLC code for  */
+			  /* LEVEL when short_video_header is 1, the storeLevel is  */
+			  /* a signed value and the sign and the unsigned value for */
+			  /* storeLevel need to be extracted and passed to arm      */
+			  /* FillVLDBuffer function                                 */
+			     
+			  sign = (storeLevel & 0x80);
+			  if(sign==0x80)
+			  {
+			  	storeLevel=(storeLevel^0xff)+1;			  
+			  	sign=1;
+			  	
+			  }
+			  
+			  armRetDataErrIf( storeLevel == 0 || sign*storeLevel == 128 , OMX_Sts_Err); /* Invalid FLC */
+			  armRetDataErrIf((i + storeRun) >= 64, OMX_Sts_Err);
+			  armVCM4P2_FillVLDBuffer(
+			    storeRun,
+			    pDst,
+			    storeLevel,
+			    sign,
+			    *pLast,
+			    &i,
+			    pZigzagTable);
+			    return OMX_Sts_NoErr;
+			    
+			}
+			if (armGetBits(ppBitStream, pBitOffset, 1))
+			{
+				if (armGetBits(ppBitStream, pBitOffset, 1))
+				{
+					fType = 3;
+				}
+				else
+				{
+					fType = 2;
+				}
+			}
+			else
+			{
+				fType = 1;
+			}
+		}
+
+	    if (fType < 3)
+	    {
+	        unpackRetIndex = armUnPackVLC32(ppBitStream, pBitOffset,
+										pVlcTableL0);
+			if (unpackRetIndex != ARM_NO_CODEBOOK_INDEX)
+		    {
+			    /* Decode run and level from the index */
+			    /* last = 0 */
+			    *pLast = 0;
+			    if (unpackRetIndex > maxIndexForMultipleEntriesL0)
+			    {
+				    storeLevel = 1;
+				    storeRun = (unpackRetIndex - maxIndexForMultipleEntriesL0) 
+							+ runBeginSingleLevelEntriesL0;
+			    }
+			    else
+			    {
+				    tabIndex = 1;
+				    while (pRunIndexTableL0[tabIndex] <= unpackRetIndex)
+				    {
+					    tabIndex++;
+				    }
+				    storeRun = tabIndex - 1;
+				    storeLevel = unpackRetIndex - pRunIndexTableL0[tabIndex - 1] + 1;
+			    }
+			    sign = (OMX_U8) armGetBits(ppBitStream, pBitOffset, 1);
+			
+			    if (fType == 1)
+			    {
+				    storeLevel = (armAbs(storeLevel) + pLMAXTableL0[storeRun]);
+			    }
+			    else if (fType == 2)
+			    {
+				    storeRun = storeRun + pRMAXTableL0[storeLevel-1] + 1;
+			    }
+		    }
+		    else
+		    {
+			    unpackRetIndex = armUnPackVLC32(ppBitStream, pBitOffset, 
+											pVlcTableL1);
+
+			    armRetDataErrIf(unpackRetIndex == ARM_NO_CODEBOOK_INDEX, OMX_Sts_Err);
+
+			    /* Decode run and level from the index */
+			    /* last = 1 */
+			    *pLast = 1;
+			    if (unpackRetIndex > maxIndexForMultipleEntriesL1)
+			    {
+				    storeLevel = 1;
+				    storeRun = (unpackRetIndex - maxIndexForMultipleEntriesL1) 
+							+ maxRunForMultipleEntriesL1;
+		        }
+		        else
+			    {
+				    tabIndex = 1;
+				    while (pRunIndexTableL1[tabIndex] <= unpackRetIndex)
+				    {
+					    tabIndex++;
+				    }
+				    storeRun = tabIndex - 1;
+				    storeLevel = unpackRetIndex - pRunIndexTableL1[tabIndex - 1] + 1;
+			    }
+			    sign = (OMX_U8) armGetBits(ppBitStream, pBitOffset, 1);
+
+			    if (fType == 1)
+			    {
+			        storeLevel = (armAbs(storeLevel) + pLMAXTableL1[storeRun]);				
+			    }
+			    else if (fType == 2)
+			    {
+				    storeRun = storeRun + pRMAXTableL1[storeLevel-1] + 1;
+			    }
+		    }
+            armRetDataErrIf((i + storeRun) >= 64, OMX_Sts_Err);
+		    armVCM4P2_FillVLDBuffer(
+			    storeRun,
+			    pDst,
+			    storeLevel,
+			    sign,
+			    *pLast,
+			    &i,
+			    pZigzagTable);		
+	    }
+	    else
+	    {
+		    *pLast = armGetBits(ppBitStream, pBitOffset, 1);
+		    storeRun  = armGetBits(ppBitStream, pBitOffset, 6);
+		    armRetDataErrIf((i + storeRun) >= 64, OMX_Sts_Err);
+		    markerBit = armGetBits(ppBitStream, pBitOffset, 1);
+		    armRetDataErrIf( markerBit == 0, OMX_Sts_Err);
+		    storeLevel  = armGetBits(ppBitStream, pBitOffset, 12);
+		    if (storeLevel & 0x800)
+		    {
+			    storeLevel -= 4096;
+		    }			
+		    armRetDataErrIf( storeLevel == 0 || storeLevel == -2048 , OMX_Sts_Err); /* Invalid FLC */
+		    armGetBits(ppBitStream, pBitOffset, 1);
+		    armVCM4P2_FillVLDBuffer(
+			    storeRun,
+			    pDst,
+			    storeLevel,
+			    0, /* Sign is not used, preprocessing done */
+			    *pLast,
+			    &i,
+			    pZigzagTable);
+
+	    }
+    } /* End of forloop for i */
+	return OMX_Sts_NoErr;
+}
+
+/* End of File */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
new file mode 100644
index 0000000..cd7e9e4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
@@ -0,0 +1,495 @@
+ /**
+ * 
+ * File Name:  armVCM4P2_Huff_Tables_VLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File:        armVCM4P2_Huff_Tables_VLC.c
+ * Description: Contains all the Huffman tables used in MPEG4 codec
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armCOMM_Bitstream.h"
+
+/* 
+*  For Intra
+*  last = 0 
+*/
+const OMX_U8 armVCM4P2_IntraL0RunIdx[11] = 
+{ 
+    0, 27, 37, 42, 46, 49, 52, 
+    55, 58, 60, 62
+};
+
+/* Entry defined for all values 
+*  for run = 0 to 14
+*  Note: the last entry is to terminate while decoding
+*/
+const ARM_VLC32 armVCM4P2_IntraVlcL0[68] = 
+{
+        {2,    2},
+        {3,    6},
+        {4,    15},
+        {5,    13},
+        {5,    12},
+        {6,    21},
+        {6,    19},
+        {6,    18},
+        {7,    23},
+        {8,    31},
+        {8,    30},
+        {8,    29},
+        {9,    37},
+        {9,    36},
+        {9,    35},
+        {9,    33},
+        {10,   33},
+        {10,   32},
+        {10,   15},
+        {10,   14},
+        {11,    7},
+        {11,    6},
+        {11,   32},
+        {11,   33},
+        {12,   80},
+        {12,   81},
+        {12,   82},
+        {4,    14},
+        {6,    20},
+        {7,    22},
+        {8,    28},
+        {9,    32},
+        {9,    31},
+        {10,   13},
+        {11,   34},
+        {12,   83},
+        {12,   85},
+        {5,    11},
+        {7,    21},
+        {9,    30},
+        {10,   12},
+        {12,   86},
+        {6,    17},
+        {8,    27},
+        {9,    29},
+        {10,   11},
+        {6,    16},
+        {9,    34},
+        {10,   10},
+        {6,    13},
+        {9,    28},
+        {10,    8},
+        {7,    18},
+        {9,    27},
+        {12,   84},
+        {7,    20},
+        {9,    26},
+        {12,   87},
+        {8,    25},
+        {10,    9},
+        {8,    24},
+        {11,   35},
+        {8,    23},
+        {9,    25},
+        {9,    24},
+        {10,    7},
+        {12,   88},
+        {0,     0}
+};
+
+/* 
+*  For Intra
+*  last = 1 
+*/
+
+const OMX_U8 armVCM4P2_IntraL1RunIdx[8] = 
+{
+    0,  8, 11, 13, 15, 17, 19, 21
+};
+
+/* Entry defined for all values 
+*  for run = 0 to 20
+*  *  Note: the last entry is to terminate while decoding
+*/
+const ARM_VLC32 armVCM4P2_IntraVlcL1[36] = 
+{
+        {4,     7},
+        {6,    12},
+        {8,    22},
+        {9,    23},
+        {10,    6},
+        {11,    5},
+        {11,    4},
+        {12,   89},
+        {6,    15},
+        {9,    22},
+        {10,    5},
+        {6,    14},
+        {10,    4},
+        {7,    17},
+        {11,   36},
+        {7,    16},
+        {11,   37},
+        {7,    19},
+        {12,   90},
+        {8,    21},
+        {12,   91},
+        {8,    20},
+        {8,    19},
+        {8,    26},
+        {9,    21},
+        {9,    20},
+        {9,    19},
+        {9,    18},
+        {9,    17},
+        {11,   38},
+        {11,   39},
+        {12,   92},
+        {12,   93},
+        {12,   94},
+        {12,   95},  
+        {0,     0}
+};
+
+/* LMAX table for Intra (Last == 0)*/
+const OMX_U8 armVCM4P2_IntraL0LMAX[15] = 
+{
+   27, 10,  5,  4,  3,  3,  3,  
+    3,  2,  2,  1,  1,  1,  1,  1
+};
+
+/* LMAX table for Intra (Last == 1)*/
+const OMX_U8 armVCM4P2_IntraL1LMAX[21] = 
+{
+    8,  3,  2,  2,  2,  2,  2,  1, 
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1
+};
+
+/* RMAX table for Intra (Last == 0)
+   Level - 1 Indexed 
+*/
+const OMX_U8 armVCM4P2_IntraL0RMAX[27] =
+{
+   14,  9,  7,  3,  2,  1,	1,  
+    1,  1,  1,  0,  0,  0, 	0,  
+    0,  0,  0,  0,  0,  0,  0,  
+    0,  0,  0,  0,  0,  0
+};
+
+/* RMAX table for Intra (Last == 1)
+   Level - 1 Indexed 
+*/
+const OMX_U8 armVCM4P2_IntraL1RMAX[8] =
+{
+   20,  6,  1,  0,  0,  0,  0,  0
+};
+
+/* 
+*  For Inter
+*  last = 0 
+*/
+const OMX_U8 armVCM4P2_InterL0RunIdx[12] = 
+{ 
+     0,  12,  18,  22,  25,  28,  
+    31,  34,  36,  38,  40,  42
+};
+
+/* Entry defined for all values 
+*  for run = 0 to 26
+*  Note: the last entry is to terminate while decoding
+*/
+const ARM_VLC32 armVCM4P2_InterVlcL0[59] = 
+{
+        {2,     2},
+        {4,    15},
+        {6,    21},
+        {7,    23},
+        {8,    31},
+        {9,    37},
+        {9,    36},
+        {10,   33},
+        {10,   32},
+        {11,    7},
+        {11,    6},
+        {11,   32},
+        {3,     6},
+        {6,    20},
+        {8,    30},
+        {10,   15},
+        {11,   33},
+        {12,   80},
+        {4,    14},
+        {8,    29},
+        {10,   14},
+        {12,   81},
+        {5,    13},
+        {9,    35},
+        {10,   13},
+        {5,    12},
+        {9,    34},
+        {12,   82},
+        {5,    11},
+        {10,   12},
+        {12,   83},
+        {6,    19},
+        {10,   11},
+        {12,   84},
+        {6,    18},
+        {10,   10},
+        {6,    17},
+        {10,    9},
+        {6,    16},
+        {10,    8},
+        {7,    22},
+        {12,   85},
+        {7,    21},
+        {7,    20},
+        {8,    28},
+        {8,    27},
+        {9,    33},
+        {9,    32},
+        {9,    31},
+        {9,    30},
+        {9,    29},
+        {9,    28},
+        {9,    27},
+        {9,    26},
+        {11,   34},
+        {11,   35},
+        {12,   86},
+        {12,   87},
+        {0,     0}
+};
+ 
+
+/* 
+*  For Intra
+*  last = 1 
+*/
+
+const OMX_U8 armVCM4P2_InterL1RunIdx[3] = 
+{
+    0, 3, 5
+};
+
+/* Entry defined for all values 
+*  for run = 0 to 40
+*  Note: the last entry is to terminate while decoding
+*/
+const ARM_VLC32 armVCM4P2_InterVlcL1[45] = 
+{
+        {4,     7},
+        {9,    25},
+        {11,    5},
+        {6,    15},
+        {11,    4},
+        {6,    14},
+        {6,    13},
+        {6,    12},
+        {7,    19},
+        {7,    18},
+        {7,    17},
+        {7,    16},
+        {8,    26},
+        {8,    25},
+        {8,    24},
+        {8,    23},
+        {8,    22},
+        {8,    21},
+        {8,    20},
+        {8,    19},
+        {9,    24},
+        {9,    23},
+        {9,    22},
+        {9,    21},
+        {9,    20},
+        {9,    19},
+        {9,    18},
+        {9,    17},
+        {10,    7},
+        {10,    6},
+        {10,    5},
+        {10,    4},
+        {11,   36},
+        {11,   37},
+        {11,   38},
+        {11,   39},
+        {12,   88},
+        {12,   89},
+        {12,   90},
+        {12,   91},
+        {12,   92},
+        {12,   93},
+        {12,   94},
+        {12,   95},
+        { 0,    0}
+};
+
+/* LMAX table for Intra (Last == 0)*/
+const OMX_U8 armVCM4P2_InterL0LMAX[27] = 
+{
+   12,  6,  4,  3,  3,  3,  3,  2, 
+    2,  2,  2,  1,  1,  1,  1,  1,
+    1,  1,  1,  1,  1,  1,  1,  1,
+    1,  1,  1,
+};
+
+/* LMAX table for Intra (Last == 1)*/
+const OMX_U8 armVCM4P2_InterL1LMAX[41] = 
+{
+    3,  2,  1,  1,  1,  1,  1,  1, 
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1,  
+};
+
+/* RMAX table for Intra (Last == 0)
+   Level - 1 Indexed 
+*/
+const OMX_U8 armVCM4P2_InterL0RMAX[12] = 
+{
+   26, 10,  6,  2,  1,  1,   
+    0,  0,  0,  0,  0,  0
+};
+
+/* RMAX table for Intra (Last == 1)
+   Level - 1 Indexed 
+*/
+const OMX_U8 armVCM4P2_InterL1RMAX[3] = 
+{
+   40,  1,  0
+};
+
+/* 
+*  For Intra - Luminance
+*/
+
+const ARM_VLC32 armVCM4P2_aIntraDCLumaIndex[14] = 
+{
+        {3,     3},
+        {2,     3},
+        {2,     2},
+        {3,     2},
+        {3,     1},
+        {4,     1},
+        {5,     1},
+        {6,     1},
+        {7,     1},
+        {8,     1},
+        {9,     1},
+        {10,    1},
+        {11,    1},
+        {0,     0}
+};
+
+/* 
+*  For Intra - Chrominance
+*/
+ 
+const ARM_VLC32 armVCM4P2_aIntraDCChromaIndex[14] = 
+{
+        {2,     3},
+        {2,     2},
+        {2,     1},
+        {3,     1},
+        {4,     1},
+        {5,     1},
+        {6,     1},
+        {7,     1},
+        {8,     1},
+        {9,     1},
+        {10,    1},
+        {11,    1},
+        {12,    1},
+        {0,     0}
+};
+
+/* 
+ *  Motion vector decoding table
+ */
+ 
+const ARM_VLC32 armVCM4P2_aVlcMVD[66] =
+{
+        {13,     5},
+        {13,     7},
+        {12,     5},
+        {12,     7},
+        {12,     9},
+        {12,    11},
+        {12,    13},
+        {12,    15},
+        {11,     9},
+        {11,    11},
+        {11,    13},
+        {11,    15},
+        {11,    17},
+        {11,    19},
+        {11,    21},
+        {11,    23},
+        {11,    25},
+        {11,    27},
+        {11,    29},
+        {11,    31},
+        {11,    33},
+        {11,    35},
+        {10,    19},
+        {10,    21},
+        {10,    23},
+        {8,      7},
+        {8,      9},
+        {8,     11},
+        {7,      7},
+        {5,      3},
+        {4,      3},
+        {3,      3},
+        {1,      1},
+        {3,      2},
+        {4,      2},
+        {5,      2},
+        {7,      6},
+        {8,     10},
+        {8,      8},
+        {8,      6},
+        {10,    22},
+        {10,    20},
+        {10,    18},
+        {11,    34},
+        {11,    32},
+        {11,    30},
+        {11,    28},
+        {11,    26},
+        {11,    24},
+        {11,    22},
+        {11,    20},
+        {11,    18},
+        {11,    16},
+        {11,    14},
+        {11,    12},
+        {11,    10},
+        {11,     8},
+        {12,    14},
+        {12,    12},
+        {12,    10},
+        {12,     8},
+        {12,     6},
+        {12,     4},
+        {13,     6},
+        {13,     4},
+        { 0,     0}
+};
+
+/* End of file */
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_PutVLCBits.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_PutVLCBits.c
new file mode 100644
index 0000000..ca9efec
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_PutVLCBits.c
@@ -0,0 +1,200 @@
+/**
+ * 
+ * File Name:  armVCM4P2_PutVLCBits.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * Description:
+ * Contains module for VLC put bits to bitstream 
+ *
+ */ 
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+#include "armCOMM_Bitstream.h"
+#include "armVCM4P2_ZigZag_Tables.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+
+ 
+/**
+ * Function: armVCM4P2_PutVLCBits
+ *
+ * Description:
+ * Checks the type of Escape Mode and put encoded bits for 
+ * quantized DCT coefficients.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	 ppBitStream      pointer to the pointer to the current byte in
+ *						  the bit stream
+ * [in]	 pBitOffset       pointer to the bit position in the byte pointed
+ *                        by *ppBitStream. Valid within 0 to 7
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ *                           and escape mode 4 is used when shortVideoHeader==1.
+ * [in]  start            start indicates whether the encoding begins with 
+ *                        0th element or 1st.
+ * [in]  maxStoreRunL0    Max store possible (considering last and inter/intra)
+ *                        for last = 0
+ * [in]  maxStoreRunL1    Max store possible (considering last and inter/intra)
+ *                        for last = 1
+ * [in]  maxRunForMultipleEntriesL0 
+ *                        The run value after which level 
+ *                        will be equal to 1: 
+ *                        (considering last and inter/intra status) for last = 0
+ * [in]  maxRunForMultipleEntriesL1 
+ *                        The run value after which level 
+ *                        will be equal to 1: 
+ *                        (considering last and inter/intra status) for last = 1
+ * [in]  pRunIndexTableL0 Run Index table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in]  pVlcTableL0      VLC table for last == 0
+ * [in]  pRunIndexTableL1 Run Index table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in]  pVlcTableL1      VLC table for last == 1
+ * [in]  pLMAXTableL0     Level MAX table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in]  pLMAXTableL1     Level MAX table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in]  pRMAXTableL0     Run MAX table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in]  pRMAXTableL1     Run MAX table defined in 
+ *                        armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [out] pQDctBlkCoef     pointer to the quantized DCT coefficient
+ * [out] ppBitStream      *ppBitStream is updated after the block is encoded
+ *                        so that it points to the current byte in the bit
+ *                        stream buffer.
+ * [out] pBitOffset       *pBitOffset is updated so that it points to the
+ *                        current bit position in the byte pointed by
+ *                        *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+
+OMXResult armVCM4P2_PutVLCBits (
+              OMX_U8 **ppBitStream,
+              OMX_INT * pBitOffset,
+              const OMX_S16 *pQDctBlkCoef,
+              OMX_INT shortVideoHeader,
+              OMX_U8 start,
+              OMX_U8 maxStoreRunL0,
+              OMX_U8 maxStoreRunL1,
+              OMX_U8  maxRunForMultipleEntriesL0,
+              OMX_U8  maxRunForMultipleEntriesL1,
+              const OMX_U8  * pRunIndexTableL0,
+              const ARM_VLC32 *pVlcTableL0,
+			  const OMX_U8  * pRunIndexTableL1,
+              const ARM_VLC32 *pVlcTableL1,
+              const OMX_U8  * pLMAXTableL0,
+              const OMX_U8  * pLMAXTableL1,
+              const OMX_U8  * pRMAXTableL0,
+              const OMX_U8  * pRMAXTableL1,
+              const OMX_U8  * pZigzagTable
+)
+{
+
+    OMX_U32 storeRun = 0, run, storeRunPlus;
+    OMX_U8  last = 0, first = 1, fMode;
+    OMX_S16 level, storeLevel = 0, storeLevelPlus;
+    OMX_INT i;
+    
+        /* RLE encoding and packing the bits into the streams */
+        for (i = start, run=0; i < 64; i++)
+        {
+            level   = pQDctBlkCoef[pZigzagTable[i]];
+
+            /* Counting the run */
+            if (level == 0)
+            {
+                run++;
+            }
+
+            /* Found a non-zero coeff */
+            else
+            {
+                if (first == 0)
+                {
+                    last = 0;
+                    
+                    /* Check for a valid entry in the VLC table */
+                    storeLevelPlus = armSignCheck(storeLevel) * 
+                      (armAbs(storeLevel) - pLMAXTableL0[storeRun]);
+                    storeRunPlus = storeRun - 
+                                  (pRMAXTableL0[armAbs(storeLevel) - 1] + 1);
+                                                      
+                    fMode = armVCM4P2_CheckVLCEscapeMode(
+                                             storeRun,
+                                             storeRunPlus,
+                                             storeLevel,
+                                             storeLevelPlus,
+                                             maxStoreRunL0,
+                                             maxRunForMultipleEntriesL0,
+                                             shortVideoHeader,
+                                             pRunIndexTableL0);
+                    
+                    armVCM4P2_FillVLCBuffer (
+                                      ppBitStream, 
+                                      pBitOffset,
+                                      storeRun,
+                                      storeLevel, 
+									  storeRunPlus,
+                                      storeLevelPlus, 
+                                      fMode,
+									  last,
+                                      maxRunForMultipleEntriesL0, 
+                                      pRunIndexTableL0,
+                                      pVlcTableL0);                                                  
+                }
+                storeLevel = level;
+                storeRun   = run;
+                first = 0;
+                run = 0;
+            }
+
+        } /* end of for loop for 64 elements */
+
+        /* writing the last element */
+        last = 1;
+        
+        /* Check for a valid entry in the VLC table */
+        storeLevelPlus = armSignCheck(storeLevel) * 
+                        (armAbs(storeLevel) - pLMAXTableL1[run]);
+        storeRunPlus = storeRun - 
+                      (pRMAXTableL1[armAbs(storeLevel) - 1] + 1);
+        fMode = armVCM4P2_CheckVLCEscapeMode(
+                                 storeRun,
+                                 storeRunPlus,
+                                 storeLevel,
+                                 storeLevelPlus,
+                                 maxStoreRunL1,
+                                 maxRunForMultipleEntriesL1,
+                                 shortVideoHeader,
+                                 pRunIndexTableL1);
+        
+        armVCM4P2_FillVLCBuffer (
+                          ppBitStream, 
+                          pBitOffset,
+                          storeRun,
+                          storeLevel, 
+						  storeRunPlus,
+                          storeLevelPlus,
+                          fMode,
+						  last,
+                          maxRunForMultipleEntriesL1,
+                          pRunIndexTableL1,
+                          pVlcTableL1);
+	return OMX_Sts_NoErr;                          
+}
+
+/* End of File */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_SetPredDir.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_SetPredDir.c
new file mode 100644
index 0000000..a9cd008
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_SetPredDir.c
@@ -0,0 +1,89 @@
+/**
+ * 
+ * File Name:  armVCM4P2_SetPredDir.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * Description:
+ * Contains module for detecting the prediction direction
+ *
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P2_SetPredDir
+ *
+ * Description:
+ * Performs detecting the prediction direction
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] blockIndex  block index indicating the component type and
+ *                          position as defined in subclause 6.1.3.8, of ISO/IEC
+ *                          14496-2. Furthermore, indexes 6 to 9 indicate the
+ *                          alpha blocks spatially corresponding to luminance
+ *                          blocks 0 to 3 in the same macroblock.
+ * [in] pCoefBufRow pointer to the coefficient row buffer
+ * [in] pQpBuf      pointer to the quantization parameter buffer
+ * [out]    predQP      quantization parameter of the predictor block
+ * [out]    predDir     indicates the prediction direction which takes one
+ *                          of the following values:
+ *                          OMX_VC_HORIZONTAL    predict horizontally
+ *                          OMX_VC_VERTICAL      predict vertically
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_SetPredDir(
+     OMX_INT blockIndex,
+     OMX_S16 *pCoefBufRow,
+     OMX_S16 *pCoefBufCol,
+     OMX_INT *predDir,
+     OMX_INT *predQP,
+     const OMX_U8 *pQpBuf
+)
+{
+    OMX_U8  blockDCLeft;
+    OMX_U8  blockDCTop;
+    OMX_U8  blockDCTopLeft;
+
+    if (blockIndex == 3)
+    {
+        blockDCTop = *(pCoefBufCol - 8);
+    }
+    else
+    {
+        blockDCTop = *pCoefBufRow;
+    }
+    blockDCLeft = *pCoefBufCol;
+    blockDCTopLeft = *(pCoefBufRow - 8);
+
+    if (armAbs(blockDCLeft - blockDCTopLeft) < armAbs(blockDCTopLeft \
+                                                        - blockDCTop))
+    {
+        *predDir = OMX_VC_VERTICAL;
+        *predQP = pQpBuf[1];
+    }
+    else
+    {
+        *predDir = OMX_VC_HORIZONTAL;
+        *predQP = pQpBuf[0];
+    }
+    return OMX_Sts_NoErr;
+}
+
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
new file mode 100644
index 0000000..a247c69
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
@@ -0,0 +1,58 @@
+ /**
+ * 
+ * File Name:  armVCM4P2_Zigzag_Tables.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File:        armVCM4P2_ZigZag_Tables.c
+ * Description: Contains the zigzag tables
+ *
+ */
+
+#include "omxtypes.h"
+
+const OMX_U8 armVCM4P2_aClassicalZigzagScan [64] = 
+{
+     0,  1,  8, 16,  9,  2,  3, 10,
+    17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13,  6,  7, 14, 21, 28, 
+    35, 42, 49, 56, 57, 50, 43, 36,
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+
+const OMX_U8 armVCM4P2_aHorizontalZigzagScan [64] = 
+{
+     0,  1,  2,  3,  8,  9, 16, 17,
+    10, 11,  4,  5,  6,  7, 15, 14,
+    13, 12, 19, 18, 24, 25, 32, 33,
+    26, 27, 20, 21, 22, 23, 28, 29,
+    30, 31, 34, 35, 40, 41, 48, 49,
+    42, 43, 36, 37, 38, 39, 44, 45, 
+    46, 47, 50, 51, 56, 57, 58, 59,
+    52, 53, 54, 55, 60, 61, 62, 63
+};
+
+const OMX_U8 armVCM4P2_aVerticalZigzagScan [64] = 
+{
+     0,  8, 16, 24,  1,  9,  2, 10,
+     17, 25, 32, 40, 48, 56, 57, 49,
+     41, 33, 26, 18,  3, 11,  4, 12,
+     19, 27, 34, 42, 50, 58, 35, 43,
+     51, 59, 20, 28,  5, 13,  6, 14,
+     21, 29, 36, 44, 52, 60, 37, 45, 
+     53, 61, 22, 30,  7, 15, 23, 31,
+     38, 46, 54, 62, 39, 47, 55, 63
+};
+
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_16x16.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_16x16.c
new file mode 100644
index 0000000..dcd3ce1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_16x16.c
@@ -0,0 +1,111 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_BlockMatch_Half_16x16.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * 
+ * Description:
+ * Contains modules for Block matching, a full search algorithm
+ * is implemented
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P2_BlockMatch_Half_16x16   (6.2.4.2.3)
+ *
+ * Description:
+ * Performs a 16x16 block match with half-pixel resolution.  Returns the 
+ * estimated motion vector and associated minimum SAD.  This function 
+ * estimates the half-pixel motion vector by interpolating the integer 
+ * resolution motion vector referenced by the input parameter pSrcDstMV, i.e., 
+ * the initial integer MV is generated externally.  The input parameters 
+ * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of 
+ * 16x16 integer search prior to calling BlockMatch_Half_16x16. The function 
+ * BlockMatch_Integer_16x16 may be used for integer motion estimation. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            macroblock that corresponds to the location of the current 
+ *            macroblock in the current plane. 
+ *   refWidth - width of the reference plane 
+ *   pRefRect - reference plane valid region rectangle 
+ *   pSrcCurrBuf - pointer to the current block in the current macroblock 
+ *            buffer extracted from the original plane (linear array, 256 
+ *            entries); must be aligned on a 16-byte boundary.  The number of 
+ *            bytes between lines (step) is 16. 
+ *   pSearchPointRefPos - position of the starting point for half pixel 
+ *            search (specified in terms of integer pixel units) in the 
+ *            reference plane, i.e., the reference position pointed to by the 
+ *            predicted motion vector. 
+ *   rndVal - rounding control parameter: 0 - disabled; 1 - enabled. 
+ *   pSrcDstMV - pointer to the initial MV estimate; typically generated 
+ *            during a prior 16X16 integer search; specified in terms of 
+ *            half-pixel units. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstMV - pointer to estimated MV 
+ *   pDstSAD - pointer to minimum SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one of the following 
+ *              conditions is true: 
+ *    -    at least one of the following pointers is NULL: pSrcRefBuf, 
+ *         pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV.
+ *    -    pSrcCurrBuf is not 16-byte aligned, or 
+ *
+ */
+
+OMXResult omxVCM4P2_BlockMatch_Half_16x16(
+     const OMX_U8 *pSrcRefBuf,
+     OMX_INT refWidth,
+     const OMXRect *pRefRect,
+     const OMX_U8 *pSrcCurrBuf,
+     const OMXVCM4P2Coordinate *pSearchPointRefPos,
+     OMX_INT rndVal,
+     OMXVCMotionVector *pSrcDstMV,
+     OMX_INT *pDstSAD
+)
+{
+
+    /* For a blocksize of 16x16 */
+    OMX_U8 BlockSize = 16;
+    
+    /* Argument error checks */  
+    armRetArgErrIf(pSrcRefBuf         == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pRefRect           == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcCurrBuf        == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSearchPointRefPos == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcDstMV          == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs16ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr);
+   
+    return (armVCM4P2_BlockMatch_Half(
+                                pSrcRefBuf,
+                                refWidth,
+                                pRefRect,
+                                pSrcCurrBuf,
+                                pSearchPointRefPos,
+                                rndVal,
+                                pSrcDstMV,
+                                pDstSAD,
+                                BlockSize));
+
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_8x8.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_8x8.c
new file mode 100644
index 0000000..6996e6d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_8x8.c
@@ -0,0 +1,109 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_BlockMatch_Half_8x8.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains modules for Block matching, a full search algorithm
+ * is implemented
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+
+/**
+ * Function:  omxVCM4P2_BlockMatch_Half_8x8   (6.2.4.2.4)
+ *
+ * Description:
+ * Performs an 8x8 block match with half-pixel resolution. Returns the 
+ * estimated motion vector and associated minimum SAD.  This function 
+ * estimates the half-pixel motion vector by interpolating the integer 
+ * resolution motion vector referenced by the input parameter pSrcDstMV, i.e., 
+ * the initial integer MV is generated externally.  The input parameters 
+ * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of 
+ * 8x8 integer search prior to calling BlockMatch_Half_8x8. The function 
+ * BlockMatch_Integer_8x8 may be used for integer motion estimation. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            block that corresponds to the location of the current 8x8 block 
+ *            in the current plane. 
+ *   refWidth - width of the reference plane 
+ *   pRefRect - reference plane valid region rectangle 
+ *   pSrcCurrBuf - pointer to the current block in the current macroblock 
+ *            buffer extracted from the original plane (linear array, 128 
+ *            entries); must be aligned on a 8-byte boundary.  The number of 
+ *            bytes between lines (step) is 16. 
+ *   pSearchPointRefPos - position of the starting point for half pixel 
+ *            search (specified in terms of integer pixel units) in the 
+ *            reference plane. 
+ *   rndVal - rounding control parameter: 0 - disabled; 1 - enabled. 
+ *   pSrcDstMV - pointer to the initial MV estimate; typically generated 
+ *            during a prior 8x8 integer search, specified in terms of 
+ *            half-pixel units. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstMV - pointer to estimated MV 
+ *   pDstSAD - pointer to minimum SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one of the following 
+ *              conditions is true: 
+ *    -    at least one of the following pointers is NULL: 
+ *         pSrcRefBuf, pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV
+ *    -    pSrcCurrBuf is not 8-byte aligned 
+ *
+ */
+
+OMXResult omxVCM4P2_BlockMatch_Half_8x8(
+     const OMX_U8 *pSrcRefBuf,
+     OMX_INT refWidth,
+     const OMXRect *pRefRect,
+     const OMX_U8 *pSrcCurrBuf,
+     const OMXVCM4P2Coordinate *pSearchPointRefPos,
+     OMX_INT rndVal,
+     OMXVCMotionVector *pSrcDstMV,
+     OMX_INT *pDstSAD
+)
+{
+    /* For a blocksize of 8x8 */
+    OMX_U8 BlockSize = 8;
+    
+    /* Argument error checks */  
+    armRetArgErrIf(pSrcRefBuf         == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pRefRect           == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcCurrBuf        == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSearchPointRefPos == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcDstMV          == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs8ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr);
+   
+    return (armVCM4P2_BlockMatch_Half(
+                                pSrcRefBuf,
+                                refWidth,
+                                pRefRect,
+                                pSrcCurrBuf,
+                                pSearchPointRefPos,
+                                rndVal,
+                                pSrcDstMV,
+                                pDstSAD,
+                                BlockSize));
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_16x16.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_16x16.c
new file mode 100644
index 0000000..e714ef1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_16x16.c
@@ -0,0 +1,114 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_BlockMatch_Integer_16x16.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains modules for Block matching, a full search algorithm
+ * is implemented
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P2_BlockMatch_Integer_16x16   (6.2.4.2.1)
+ *
+ * Description:
+ * Performs a 16x16 block search; estimates motion vector and associated 
+ * minimum SAD. Both the input and output motion vectors are represented using 
+ * half-pixel units, and therefore a shift left or right by 1 bit may be 
+ * required, respectively, to match the input or output MVs with other 
+ * functions that either generate output MVs or expect input MVs represented 
+ * using integer pixel units. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            MB that corresponds to the location of the current macroblock in 
+ *            the current plane. 
+ *   refWidth - width of the reference plane 
+ *   pRefRect - pointer to the valid reference plane rectangle; coordinates 
+ *            are specified relative to the image origin.  Rectangle 
+ *            boundaries may extend beyond image boundaries if the image has 
+ *            been padded.  For example, if padding extends 4 pixels beyond 
+ *            frame border, then the value for the left border could be set to 
+ *            -4. 
+ *   pSrcCurrBuf - pointer to the current block in the current macroblock 
+ *            buffer extracted from the original plane (linear array, 256 
+ *            entries); must be aligned on a 16-byte boundary.  The number of 
+ *            bytes between lines (step) is 16. 
+ *   pCurrPointPos - position of the current macroblock in the current plane 
+ *   pSrcPreMV - pointer to predicted motion vector; NULL indicates no 
+ *            predicted MV 
+ *   pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced 
+ *            by pSrcPreMV); may be set to NULL if unavailable. 
+ *   pMESpec - vendor-specific motion estimation specification structure; 
+ *            must have been allocated and then initialized using 
+ *            omxVCM4P2_MEInit prior to calling the block matching function. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstMV - pointer to estimated MV 
+ *   pDstSAD - pointer to minimum SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one of the following 
+ *              conditions is true: 
+ *    -    at least one of the following pointers is NULL: pSrcRefBuf, 
+ *              pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or 
+ *              pMESpec, or 
+ *    -    pSrcCurrBuf is not 16-byte aligned 
+ *
+ */
+
+OMXResult omxVCM4P2_BlockMatch_Integer_16x16(
+     const OMX_U8 *pSrcRefBuf,
+     OMX_INT refWidth,
+     const OMXRect *pRefRect,
+     const OMX_U8 *pSrcCurrBuf,
+     const OMXVCM4P2Coordinate *pCurrPointPos,
+     const OMXVCMotionVector *pSrcPreMV,
+     const OMX_INT *pSrcPreSAD,
+     void *pMESpec,
+     OMXVCMotionVector *pDstMV,
+     OMX_INT *pDstSAD
+)
+{
+
+   OMX_U8 BlockSize = 16;
+   
+   /* Argument error checks */  
+   armRetArgErrIf(!armIs16ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr);
+   
+   return ( armVCM4P2_BlockMatch_Integer(
+     pSrcRefBuf,
+     refWidth,
+     pRefRect,
+     pSrcCurrBuf,
+     pCurrPointPos,
+     pSrcPreMV,
+     pSrcPreSAD,
+     pMESpec,
+     pDstMV,
+     pDstSAD,
+     BlockSize)
+     );
+
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_8x8.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_8x8.c
new file mode 100644
index 0000000..607e64c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_8x8.c
@@ -0,0 +1,110 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_BlockMatch_Integer_8x8.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains modules for Block matching, a full search algorithm
+ * is implemented
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P2_BlockMatch_Integer_8x8   (6.2.4.2.2)
+ *
+ * Description:
+ * Performs an 8x8 block search; estimates motion vector and associated 
+ * minimum SAD.  Both the input and output motion vectors are represented 
+ * using half-pixel units, and therefore a shift left or right by 1 bit may be 
+ * required, respectively, to match the input or output MVs with other 
+ * functions that either generate output MVs or expect input MVs represented 
+ * using integer pixel units. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            block that corresponds to the location of the current 8x8 block 
+ *            in the current plane. 
+ *   refWidth - width of the reference plane 
+ *   pRefRect - pointer to the valid reference plane rectangle; coordinates 
+ *            are specified relative to the image origin.  Rectangle 
+ *            boundaries may extend beyond image boundaries if the image has 
+ *            been padded. 
+ *   pSrcCurrBuf - pointer to the current block in the current macroblock 
+ *            buffer extracted from the original plane (linear array, 128 
+ *            entries); must be aligned on an 8-byte boundary.  The number of 
+ *            bytes between lines (step) is 16 bytes. 
+ *   pCurrPointPos - position of the current block in the current plane 
+ *   pSrcPreMV - pointer to predicted motion vector; NULL indicates no 
+ *            predicted MV 
+ *   pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced 
+ *            by pSrcPreMV); may be set to NULL if unavailable. 
+ *   pMESpec - vendor-specific motion estimation specification structure; 
+ *            must have been allocated and then initialized using 
+ *            omxVCM4P2_MEInit prior to calling the block matching function. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstMV - pointer to estimated MV 
+ *   pDstSAD - pointer to minimum SAD 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one of the following 
+ *              conditions is true: 
+ *    -    at least one of the following pointers is NULL: pSrcRefBuf, 
+ *              pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or 
+ *              pMESpec, or 
+ *    -    pSrcCurrBuf is not 8-byte aligned 
+ *
+ */
+
+OMXResult omxVCM4P2_BlockMatch_Integer_8x8(
+     const OMX_U8 *pSrcRefBuf,
+     OMX_INT refWidth,
+     const OMXRect *pRefRect,
+     const OMX_U8 *pSrcCurrBuf,
+     const OMXVCM4P2Coordinate *pCurrPointPos,
+     const OMXVCMotionVector *pSrcPreMV,
+     const OMX_INT *pSrcPreSAD,
+     void *pMESpec,
+     OMXVCMotionVector *pDstMV,
+     OMX_INT *pDstSAD
+)
+{
+   OMX_U8 BlockSize = 8;
+   
+   /* Argument error checks */  
+   armRetArgErrIf(!armIs8ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr);
+   
+   return ( armVCM4P2_BlockMatch_Integer(
+     pSrcRefBuf,
+     refWidth,
+     pRefRect,
+     pSrcCurrBuf,
+     pCurrPointPos,
+     pSrcPreMV,
+     pSrcPreSAD,
+     pMESpec,
+     pDstMV,
+     pDstSAD,
+     BlockSize)
+     );
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DCT8x8blk.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DCT8x8blk.c
new file mode 100644
index 0000000..a077ac8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DCT8x8blk.c
@@ -0,0 +1,87 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_DCT8x8blk.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains modules for 8x8 block DCT
+ * 
+ */
+ 
+#include <math.h>
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armCOMM.h"
+#include "armVCM4P2_DCT_Table.h"
+
+/**
+ * Function:  omxVCM4P2_DCT8x8blk   (6.2.4.4.1)
+ *
+ * Description:
+ * Computes a 2D forward DCT for a single 8x8 block, as defined in 
+ * [ISO14496-2]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the start of the linearly arranged input buffer; must 
+ *            be aligned on a 16-byte boundary.  Input values (pixel 
+ *            intensities) are valid in the range [-255,255]. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the start of the linearly arranged output buffer; must 
+ *            be aligned on a 16-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, returned if:
+ *    -    pSrc or pDst is NULL. 
+ *    -    pSrc or pDst is not 16-byte aligned. 
+ *
+ */
+
+OMXResult omxVCM4P2_DCT8x8blk (const OMX_S16 *pSrc, OMX_S16 *pDst)
+{
+    OMX_INT x, y, u, v;
+    
+    /* Argument error checks */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs16ByteAligned(pSrc), OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
+
+
+    for (u = 0; u < 8; u++)
+    {
+        for (v = 0; v < 8; v++)
+        {
+            OMX_F64 sum = 0.0;
+            for (x = 0; x < 8; x++)
+            {
+                for (y = 0; y < 8; y++)
+                {
+                    sum += pSrc[(x * 8) + y] *
+                       armVCM4P2_preCalcDCTCos[x][u] *
+                       armVCM4P2_preCalcDCTCos[y][v];
+                }
+            }
+            pDst[(u * 8) + v]= armRoundFloatToS16 (sum);            
+        }
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
new file mode 100644
index 0000000..51f7bab
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
@@ -0,0 +1,115 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_DecodeBlockCoef_Inter.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description: 
+ * Contains modules for inter reconstruction
+ * 
+ */
+ 
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+
+/**
+ * Function:  omxVCM4P2_DecodeBlockCoef_Inter   (6.2.5.4.2)
+ *
+ * Description:
+ * Decodes the INTER block coefficients. This function performs inverse 
+ * quantization, inverse zigzag positioning, and IDCT (with appropriate 
+ * clipping on each step) on the coefficients. The results (residuals) are 
+ * placed in a contiguous array of 64 elements. For INTER block, the output 
+ * buffer holds the residuals for further reconstruction. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the bit 
+ *            stream buffer. There is no boundary check for the bit stream 
+ *            buffer. 
+ *   pBitOffset - pointer to the bit position in the byte pointed to by 
+ *            *ppBitStream. *pBitOffset is valid within [0-7] 
+ *   QP - quantization parameter 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; shortVideoHeader==1 selects linear intra DC 
+ *            mode, and shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded, so 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream 
+ *   pDst - pointer to the decoded residual buffer (a contiguous array of 64 
+ *            elements of OMX_S16 data type); must be aligned on a 16-byte 
+ *            boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, if:
+ *    -    At least one of the following pointers is Null: 
+ *         ppBitStream, *ppBitStream, pBitOffset , pDst 
+ *    -    *pBitOffset exceeds [0,7]
+ *    -    QP <= 0. 
+ *    -    pDst is not 16-byte aligned 
+ *    OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Inter . 
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Inter(
+     const OMX_U8 ** ppBitStream,
+     OMX_INT * pBitOffset,
+     OMX_S16 * pDst,
+     OMX_INT QP,
+     OMX_INT shortVideoHeader
+)
+{
+    /* 64 elements are needed but to align it to 16 bytes need
+    15 more elements of padding */
+    OMX_S16 tempBuf[79];
+    OMX_S16 *pTempBuf1;
+    OMXResult errorCode;
+    /* Aligning the local buffers */
+    pTempBuf1 = armAlignTo16Bytes(tempBuf);
+    
+    /* Argument error checks */
+    armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr);
+	armRetArgErrIf(((*pBitOffset < 0) || (*pBitOffset > 7)), OMX_Sts_BadArgErr);
+
+
+    /* VLD and zigzag */
+    errorCode = omxVCM4P2_DecodeVLCZigzag_Inter(ppBitStream, pBitOffset, 
+                                        pTempBuf1,shortVideoHeader);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Dequantization */
+    errorCode = omxVCM4P2_QuantInvInter_I(
+     pTempBuf1,
+     QP);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Inverse transform */
+    errorCode = omxVCM4P2_IDCT8x8blk(pTempBuf1, pDst);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
new file mode 100644
index 0000000..a0b2376
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
@@ -0,0 +1,225 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_DecodeBlockCoef_Intra.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description: 
+ * Contains modules for intra reconstruction
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCM4P2_DecodeBlockCoef_Intra   (6.2.5.4.1)
+ *
+ * Description:
+ * Decodes the INTRA block coefficients. Inverse quantization, inversely 
+ * zigzag positioning, and IDCT, with appropriate clipping on each step, are 
+ * performed on the coefficients. The results are then placed in the output 
+ * frame/plane on a pixel basis.  Note: This function will be used only when 
+ * at least one non-zero AC coefficient of current block exists in the bit 
+ * stream. The DC only condition will be handled in another function. 
+ *
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the bit 
+ *            stream buffer. There is no boundary check for the bit stream 
+ *            buffer. 
+ *   pBitOffset - pointer to the bit position in the byte pointed to by 
+ *            *ppBitStream. *pBitOffset is valid within [0-7]. 
+ *   step - width of the destination plane 
+ *   pCoefBufRow - pointer to the coefficient row buffer; must be aligned on 
+ *            an 8-byte boundary. 
+ *   pCoefBufCol - pointer to the coefficient column buffer; must be aligned 
+ *            on an 8-byte boundary. 
+ *   curQP - quantization parameter of the macroblock which the current block 
+ *            belongs to 
+ *   pQPBuf - pointer to the quantization parameter buffer 
+ *   blockIndex - block index indicating the component type and position as 
+ *            defined in [ISO14496-2], subclause 6.1.3.8, Figure 6-5. 
+ *   intraDCVLC - a code determined by intra_dc_vlc_thr and QP. This allows a 
+ *            mechanism to switch between two VLC for coding of Intra DC 
+ *            coefficients as per [ISO14496-2], Table 6-21. 
+ *   ACPredFlag - a flag equal to ac_pred_flag (of luminance) indicating if 
+ *            the ac coefficients of the first row or first column are 
+ *            differentially coded for intra coded macroblock. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; shortVideoHeader==1 selects linear intra DC 
+ *            mode, and shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded, so 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream 
+ *   pDst - pointer to the block in the destination plane; must be aligned on 
+ *            an 8-byte boundary. 
+ *   pCoefBufRow - pointer to the updated coefficient row buffer. 
+ *   pCoefBufCol - pointer to the updated coefficient column buffer  Note: 
+ *            The coefficient buffers must be updated in accordance with the 
+ *            update procedure defined in section 6.2.2. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, if:
+ *    -    At least one of the following pointers is NULL: 
+ *         ppBitStream, *ppBitStream, pBitOffset, pCoefBufRow, pCoefBufCol, 
+ *         pQPBuf, pDst. 
+ *    -    *pBitOffset exceeds [0,7] 
+ *    -    curQP exceeds (1, 31)
+ *    -    blockIndex exceeds [0,5]
+ *    -    step is not the multiple of 8
+ *    -    a pointer alignment requirement was violated. 
+ *    OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Intra.  
+ *
+ */
+
+OMXResult omxVCM4P2_DecodeBlockCoef_Intra(
+     const OMX_U8 ** ppBitStream,
+     OMX_INT *pBitOffset,
+     OMX_U8 *pDst,
+     OMX_INT step,
+     OMX_S16 *pCoefBufRow,
+     OMX_S16 *pCoefBufCol,
+     OMX_U8 curQP,
+     const OMX_U8 *pQPBuf,
+     OMX_INT blockIndex,
+     OMX_INT intraDCVLC,
+     OMX_INT ACPredFlag,
+	 OMX_INT shortVideoHeader
+ )
+{
+    OMX_S16 tempBuf1[79], tempBuf2[79];
+    OMX_S16 *pTempBuf1, *pTempBuf2;
+    OMX_INT predDir, predACDir, i, j, count;
+    OMX_INT  predQP;
+    OMXVCM4P2VideoComponent videoComp;
+    OMXResult errorCode;
+    
+    /* Argument error checks */
+    armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pCoefBufRow == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pCoefBufCol == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pQPBuf == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs8ByteAligned(pDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(((curQP <= 0) || (curQP >= 32)), OMX_Sts_BadArgErr);
+    armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset >7), OMX_Sts_BadArgErr);
+    armRetArgErrIf((blockIndex < 0) || (blockIndex > 5), OMX_Sts_BadArgErr);
+    armRetArgErrIf((step % 8) != 0, OMX_Sts_BadArgErr);
+    
+
+    /* Aligning the local buffers */
+    pTempBuf1 = armAlignTo16Bytes(tempBuf1);
+    pTempBuf2 = armAlignTo16Bytes(tempBuf2);
+    
+    /* Setting the AC prediction direction and prediction direction */
+    armVCM4P2_SetPredDir(
+        blockIndex,
+        pCoefBufRow,
+        pCoefBufCol,
+        &predDir,
+        &predQP,
+        pQPBuf);
+
+    predACDir = predDir;
+
+    armRetArgErrIf(((predQP <= 0) || (predQP >= 32)), OMX_Sts_BadArgErr);
+
+    if (ACPredFlag == 0)
+    {
+        predACDir = OMX_VC_NONE;
+    }
+
+    /* Setting the videoComp */
+    if (blockIndex <= 3)
+    {
+        videoComp = OMX_VC_LUMINANCE;
+    }
+    else
+    {
+        videoComp = OMX_VC_CHROMINANCE;
+    }
+    
+
+    /* VLD and zigzag */
+    if (intraDCVLC == 1)
+    {
+        errorCode = omxVCM4P2_DecodeVLCZigzag_IntraDCVLC(
+            ppBitStream,
+            pBitOffset,
+            pTempBuf1,
+            predACDir,
+            shortVideoHeader,
+            videoComp);
+        armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    }
+    else
+    {
+        errorCode = omxVCM4P2_DecodeVLCZigzag_IntraACVLC(
+            ppBitStream,
+            pBitOffset,
+            pTempBuf1,
+            predACDir,
+            shortVideoHeader);
+        armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    }
+
+    /* AC DC prediction */
+    errorCode = omxVCM4P2_PredictReconCoefIntra(
+        pTempBuf1,
+        pCoefBufRow,
+        pCoefBufCol,
+        curQP,
+        predQP,
+        predDir,
+        ACPredFlag,
+        videoComp);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Dequantization */
+    errorCode = omxVCM4P2_QuantInvIntra_I(
+     pTempBuf1,
+     curQP,
+     videoComp,
+     shortVideoHeader);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Inverse transform */
+    errorCode = omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf2);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Placing the linear array into the destination plane and clipping
+       it to 0 to 255 */
+    for (j = 0, count = 0; j < 8; j++)
+    {
+        for(i = 0; i < 8; i++, count++)
+        {
+            pDst[i] = armClip (0, 255, pTempBuf2[count]);
+        }
+        pDst += step;
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP.c
new file mode 100644
index 0000000..7e159b7
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP.c
@@ -0,0 +1,243 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_DecodePadMV_PVOP.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description: 
+ * Contains module for decoding MV and padding the same
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM_Bitstream.h"
+#include "armCOMM.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodePadMV_PVOP   (6.2.5.1.1)
+ *
+ * Description:
+ * Decodes and pads the four motion vectors associated with a non-intra P-VOP 
+ * macroblock.  For macroblocks of type OMX_VC_INTER4V, the output MV is 
+ * padded as specified in [ISO14496-2], subclause 7.6.1.6. Otherwise, for 
+ * macroblocks of types other than OMX_VC_INTER4V, the decoded MV is copied to 
+ * all four output MV buffer entries. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the bit 
+ *            stream buffer 
+ *   pBitOffset - pointer to the bit position in the byte pointed to by 
+ *            *ppBitStream. *pBitOffset is valid within [0-7]. 
+ *   pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB - pointers to the 
+ *            motion vector buffers of the macroblocks specially at the left, 
+ *            upper, and upper-right side of the current macroblock, 
+ *            respectively; a value of NULL indicates unavailability.  Note: 
+ *            Any neighborhood macroblock outside the current VOP or video 
+ *            packet or outside the current GOB (when short_video_header is 
+ *             1 ) for which gob_header_empty is  0  is treated as 
+ *            transparent, according to [ISO14496-2], subclause 7.6.5. 
+ *   fcodeForward - a code equal to vop_fcode_forward in MPEG-4 bit stream 
+ *            syntax 
+ *   MBType - the type of the current macroblock. If MBType is not equal to 
+ *            OMX_VC_INTER4V, the destination motion vector buffer is still 
+ *            filled with the same decoded vector. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded, so 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream 
+ *   pDstMVCurMB - pointer to the motion vector buffer for the current 
+ *            macroblock; contains four decoded motion vectors 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    At least one of the following pointers is NULL: 
+ *         ppBitStream, *ppBitStream, pBitOffset, pDstMVCurMB 
+ *    -    *pBitOffset exceeds [0,7]
+ *    -    fcodeForward exceeds (0,7]
+ *    -    MBType less than zero
+ *    -    motion vector buffer is not 4-byte aligned. 
+ *    OMX_Sts_Err - status error 
+ *
+ */
+
+OMXResult omxVCM4P2_DecodePadMV_PVOP(
+     const OMX_U8 ** ppBitStream,
+     OMX_INT * pBitOffset,
+     OMXVCMotionVector * pSrcMVLeftMB,
+     OMXVCMotionVector *pSrcMVUpperMB,
+     OMXVCMotionVector * pSrcMVUpperRightMB,
+     OMXVCMotionVector * pDstMVCurMB,
+     OMX_INT fcodeForward,
+     OMXVCM4P2MacroblockType MBType
+ )
+{
+    OMXVCMotionVector diffMV;
+    OMXVCMotionVector dstMVPredME[12];
+    OMX_INT iBlk, i, count = 1;
+    OMX_S32 mvHorResidual = 1, mvVerResidual = 1, mvHorData, mvVerData;
+    OMX_S8 scaleFactor, index;
+    OMX_S16 high, low, range;
+
+
+    /* Argument error checks */
+    armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDstMVCurMB == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(((*pBitOffset < 0) || (*pBitOffset > 7)), OMX_Sts_BadArgErr);
+    armRetArgErrIf(((fcodeForward < 1) || (fcodeForward > 7)), \
+                    OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs4ByteAligned(pDstMVCurMB), OMX_Sts_BadArgErr);
+    
+    if ((MBType == OMX_VC_INTRA) ||
+        (MBType == OMX_VC_INTRA_Q)
+       )
+    {
+        /* All MV's are zero */
+        for (i = 0; i < 4; i++)
+        {
+            pDstMVCurMB[i].dx = 0;
+            pDstMVCurMB[i].dy = 0;
+        }
+
+        return OMX_Sts_NoErr;
+    }
+
+    if ((MBType == OMX_VC_INTER4V) || (MBType == OMX_VC_INTER4V_Q))
+    {
+        count = 4;
+    }
+    else if ((MBType == OMX_VC_INTER) || (MBType == OMX_VC_INTER_Q))
+    {
+        count = 1;
+    }
+
+    /* Calculating the scale factor */
+    scaleFactor = 1 << (fcodeForward -1);
+    high =  ( 32 * scaleFactor) - 1;
+    low =   ( (-32) * scaleFactor);
+    range = ( 64 * scaleFactor);
+
+    /* Huffman decoding and MV reconstruction */
+    for (iBlk = 0; iBlk < count; iBlk++)
+    {
+
+        /* Huffman decoding to get Horizontal data and residual */
+        index = armUnPackVLC32(ppBitStream, pBitOffset,
+                                            armVCM4P2_aVlcMVD);
+        armRetDataErrIf(index == -1, OMX_Sts_Err);
+
+        mvHorData = index - 32;
+
+        if ((fcodeForward > 1) && (mvHorData != 0))
+        {
+            mvHorResidual = (OMX_S32) armGetBits(ppBitStream,
+                                            pBitOffset, (fcodeForward -1));
+        }
+
+        /* Huffman decoding to get Vertical data and residual */
+        index = armUnPackVLC32(ppBitStream, pBitOffset, armVCM4P2_aVlcMVD);
+        armRetDataErrIf(index == -1, OMX_Sts_Err);
+
+        mvVerData = index - 32;
+
+        if ((fcodeForward > 1) && (mvVerData != 0))
+        {
+            mvVerResidual = (OMX_S32) armGetBits(ppBitStream,
+                                            pBitOffset, (fcodeForward -1));
+        }
+
+        /* Calculating the differtial MV */
+        if ( (scaleFactor == 1) || (mvHorData == 0) )
+        {
+            diffMV.dx = mvHorData;
+        }
+        else
+        {
+            diffMV.dx = ((armAbs(mvHorData) - 1) * fcodeForward)
+                         + mvHorResidual + 1;
+            if (mvHorData < 0)
+            {
+                diffMV.dx = -diffMV.dx;
+            }
+        }
+
+        if ( (scaleFactor == 1) || (mvVerData == 0) )
+        {
+            diffMV.dy = mvVerData;
+        }
+        else
+        {
+            diffMV.dy = ((armAbs(mvVerData) - 1) * fcodeForward)
+                         + mvVerResidual + 1;
+            if (mvVerData < 0)
+            {
+                diffMV.dy = -diffMV.dy;
+            }
+        }
+
+        /* Find the predicted vector */
+        omxVCM4P2_FindMVpred (
+            pDstMVCurMB,
+            pSrcMVLeftMB,
+            pSrcMVUpperMB,
+            pSrcMVUpperRightMB,
+            &pDstMVCurMB[iBlk],
+            dstMVPredME,
+            iBlk);
+
+        /* Adding the difference to the predicted MV to reconstruct MV */
+        pDstMVCurMB[iBlk].dx += diffMV.dx;
+        pDstMVCurMB[iBlk].dy += diffMV.dy;
+
+        /* Checking the range and keeping it within the limits */
+        if ( pDstMVCurMB[iBlk].dx < low )
+        {
+            pDstMVCurMB[iBlk].dx += range;
+        }
+        if (pDstMVCurMB[iBlk].dx > high)
+        {
+            pDstMVCurMB[iBlk].dx -= range;
+        }
+
+        if ( pDstMVCurMB[iBlk].dy < low )
+        {
+            pDstMVCurMB[iBlk].dy += range;
+        }
+        if (pDstMVCurMB[iBlk].dy > high)
+        {
+            pDstMVCurMB[iBlk].dy -= range;
+        }
+    }
+
+    if ((MBType == OMX_VC_INTER) || (MBType == OMX_VC_INTER_Q))
+    {
+        pDstMVCurMB[1] = pDstMVCurMB[0];
+        pDstMVCurMB[2] = pDstMVCurMB[0];
+        pDstMVCurMB[3] = pDstMVCurMB[0];
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter.c
new file mode 100644
index 0000000..88a8d04
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter.c
@@ -0,0 +1,120 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_DecodeVLCZigzag_Inter.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description: 
+ * Contains modules for zigzag scanning and VLC decoding
+ * for inter block.
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM_Bitstream.h"
+#include "armCOMM.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+#include "armVCM4P2_ZigZag_Tables.h"
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodeVLCZigzag_Inter   (6.2.5.2.3)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan for one inter-coded block. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - double pointer to the current byte in the stream buffer 
+ *   pBitOffset - pointer to the next available bit in the current stream 
+ *            byte referenced by *ppBitStream. The parameter *pBitOffset is 
+ *            valid within the range [0-7]. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded such 
+ *            that it points to the current byte in the stream buffer 
+ *   pBitOffset - *pBitOffset is updated after decoding such that it points 
+ *            to the next available bit in the stream byte referenced by 
+ *            *ppBitStream 
+ *   pDst - pointer to the coefficient buffer of current block; must be 
+ *            4-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    At least one of the following pointers is NULL: 
+ *         ppBitStream, *ppBitStream, pBitOffset, pDst
+ *    -    pDst is not 4-byte aligned
+ *    -   *pBitOffset exceeds [0,7]
+ *    OMX_Sts_Err - status error, if:
+ *    -    At least one mark bit is equal to zero 
+ *    -    Encountered an illegal stream code that cannot be found in the VLC table 
+ *    -    Encountered an illegal code in the VLC FLC table 
+ *    -    The number of coefficients is greater than 64 
+ *
+ */
+
+OMXResult omxVCM4P2_DecodeVLCZigzag_Inter(
+     const OMX_U8 ** ppBitStream,
+     OMX_INT * pBitOffset,
+     OMX_S16 * pDst,
+     OMX_INT shortVideoHeader
+)
+{
+    OMX_U8  last,start = 0;
+    const OMX_U8  *pZigzagTable = armVCM4P2_aClassicalZigzagScan;
+    OMXResult errorCode;
+    
+    /* Argument error checks */
+    armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs4ByteAligned(pDst), OMX_Sts_BadArgErr);
+
+    errorCode = armVCM4P2_GetVLCBits (
+              ppBitStream,
+              pBitOffset,
+			  pDst,
+			  shortVideoHeader,
+              start,
+			  &last,
+			  11,
+			  42,
+			   2,
+			   5,
+              armVCM4P2_InterL0RunIdx,
+              armVCM4P2_InterVlcL0,
+			  armVCM4P2_InterL1RunIdx,
+              armVCM4P2_InterVlcL1,
+              armVCM4P2_InterL0LMAX,
+              armVCM4P2_InterL1LMAX,
+              armVCM4P2_InterL0RMAX,
+              armVCM4P2_InterL1RMAX,
+              pZigzagTable );
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    if (last == 0)
+    {
+        return OMX_Sts_Err;
+    }
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c
new file mode 100644
index 0000000..96593d1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c
@@ -0,0 +1,103 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description: 
+ * Contains modules for zigzag scanning and VLC decoding
+ * for intra block.
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodeVLCZigzag_IntraACVLC   (6.2.5.2.2)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients 
+ * for one intra block.  Two versions of the function (DCVLC and ACVLC) are 
+ * provided in order to support the two different methods of processing DC 
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4,  Intra DC 
+ * Coefficient Decoding for the Case of Switched VLC Encoding.  
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the 
+ *            bitstream buffer 
+ *   pBitOffset - pointer to the bit position in the current byte referenced 
+ *            by *ppBitStream.  The parameter *pBitOffset is valid in the 
+ *            range [0-7]. Bit Position in one byte:  |Most Least| *pBitOffset 
+ *            |0 1 2 3 4 5 6 7| 
+ *   predDir - AC prediction direction; used to select the zigzag scan 
+ *            pattern; takes one of the following values: OMX_VC_NONE - AC 
+ *            prediction not used; performs classical zigzag scan. 
+ *            OMX_VC_HORIZONTAL - Horizontal prediction; performs 
+ *            alternate-vertical zigzag scan; OMX_VC_VERTICAL - Vertical 
+ *            prediction; performs alternate-horizontal zigzag scan. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *   videoComp - video component type (luminance or chrominance) of the 
+ *            current block 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded such 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated such that it points to the current 
+ *            bit position in the byte pointed by *ppBitStream 
+ *   pDst - pointer to the coefficient buffer of current block; must be 
+ *            4-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments At least one of the following 
+ *              pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, 
+ *              or At least one of the following conditions is true: 
+ *              *pBitOffset exceeds [0,7], preDir exceeds [0,2], or pDst is 
+ *              not 4-byte aligned 
+ *    OMX_Sts_Err In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 At least one of 
+ *              mark bits equals zero Illegal stream encountered; code cannot 
+ *              be located in VLC table Forbidden code encountered in the VLC 
+ *              FLC table The number of coefficients is greater than 64 
+ *
+ */
+
+
+OMXResult omxVCM4P2_DecodeVLCZigzag_IntraACVLC(
+     const OMX_U8 ** ppBitStream,
+     OMX_INT * pBitOffset,
+     OMX_S16 * pDst,
+     OMX_U8 predDir,
+     OMX_INT shortVideoHeader
+)
+{
+    OMX_U8 start = 0;
+
+    return armVCM4P2_DecodeVLCZigzag_Intra(
+     ppBitStream,
+     pBitOffset,
+     pDst,
+     predDir,
+     shortVideoHeader,
+     start);
+}
+
+/* End of file */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c
new file mode 100644
index 0000000..95e00d7
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c
@@ -0,0 +1,170 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description: 
+ * Contains modules for zigzag scanning and VLC decoding
+ * for intra block.
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM_Bitstream.h"
+#include "armCOMM.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+#include "armVCM4P2_ZigZag_Tables.h"
+
+
+
+
+/**
+ * Function:  omxVCM4P2_DecodeVLCZigzag_IntraDCVLC   (6.2.5.2.2)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients 
+ * for one intra block.  Two versions of the function (DCVLC and ACVLC) are 
+ * provided in order to support the two different methods of processing DC 
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4,  Intra DC 
+ * Coefficient Decoding for the Case of Switched VLC Encoding.  
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the 
+ *            bitstream buffer 
+ *   pBitOffset - pointer to the bit position in the current byte referenced 
+ *            by *ppBitStream.  The parameter *pBitOffset is valid in the 
+ *            range [0-7]. 
+ *            Bit Position in one byte:  |Most      Least| 
+ *                    *pBitOffset        |0 1 2 3 4 5 6 7| 
+ *   predDir - AC prediction direction; used to select the zigzag scan 
+ *            pattern; takes one of the following values: 
+ *            -  OMX_VC_NONE - AC prediction not used; 
+ *                             performs classical zigzag scan. 
+ *            -  OMX_VC_HORIZONTAL - Horizontal prediction; 
+ *                             performs alternate-vertical zigzag scan; 
+ *            -  OMX_VC_VERTICAL - Vertical prediction; 
+ *                             performs alternate-horizontal zigzag scan. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *   videoComp - video component type (luminance or chrominance) of the 
+ *            current block 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is decoded such 
+ *            that it points to the current byte in the bit stream buffer 
+ *   pBitOffset - *pBitOffset is updated such that it points to the current 
+ *            bit position in the byte pointed by *ppBitStream 
+ *   pDst - pointer to the coefficient buffer of current block; must be 
+ *            4-byte aligned. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, if:
+ *    -    At least one of the following pointers is NULL: 
+ *         ppBitStream, *ppBitStream, pBitOffset, pDst
+ *    -    *pBitOffset exceeds [0,7]
+ *    -    preDir exceeds [0,2]
+ *    -    pDst is not 4-byte aligned 
+ *    OMX_Sts_Err - if:
+ *    -    In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 
+ *    -    At least one of mark bits equals zero 
+ *    -    Illegal stream encountered; code cannot be located in VLC table 
+ *    -    Forbidden code encountered in the VLC FLC table. 
+ *    -    The number of coefficients is greater than 64 
+ *
+ */
+
+OMXResult omxVCM4P2_DecodeVLCZigzag_IntraDCVLC(
+     const OMX_U8 ** ppBitStream,
+     OMX_INT * pBitOffset,
+     OMX_S16 * pDst,
+     OMX_U8 predDir,
+     OMX_INT shortVideoHeader,
+     OMXVCM4P2VideoComponent videoComp
+)
+{
+    /* Dummy initilaization to remove compilation error */
+    OMX_S8  DCValueSize = 0;
+    OMX_U16 powOfSize, fetchDCbits;
+    OMX_U8 start = 1;
+
+    /* Argument error checks */
+    armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs4ByteAligned(pDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset > 7), OMX_Sts_BadArgErr);
+    armRetArgErrIf((predDir > 2), OMX_Sts_BadArgErr);
+
+    /* Insert the code into the bitstream */
+    if (videoComp == OMX_VC_LUMINANCE)
+    {
+        DCValueSize = armUnPackVLC32(ppBitStream,
+                            pBitOffset, armVCM4P2_aIntraDCLumaIndex);
+    }
+    else if (videoComp == OMX_VC_CHROMINANCE)
+    {
+        DCValueSize = armUnPackVLC32(ppBitStream,
+                            pBitOffset, armVCM4P2_aIntraDCChromaIndex);
+    }
+    armRetDataErrIf(DCValueSize == -1, OMX_Sts_Err);
+    armRetDataErrIf(DCValueSize > 12, OMX_Sts_Err);
+
+
+    if (DCValueSize == 0)
+    {
+        pDst[0] = 0;
+    }
+    else
+    {
+        fetchDCbits = (OMX_U16) armGetBits(ppBitStream, pBitOffset, \
+                                           DCValueSize);
+
+        if ( (fetchDCbits >> (DCValueSize - 1)) == 0)
+        {
+            /* calulate pow */
+            powOfSize = (1 << DCValueSize);
+
+            pDst[0] =  (OMX_S16) (fetchDCbits ^ (powOfSize - 1));
+            pDst[0] = -pDst[0];
+        }
+        else
+        {
+            pDst[0] = fetchDCbits;
+        }
+
+        if (DCValueSize > 8)
+        {
+            /* reading and checking the marker bit*/
+            armRetDataErrIf (armGetBits(ppBitStream, pBitOffset, 1) == 0, \
+                             OMX_Sts_Err);
+        }
+    }
+
+    return armVCM4P2_DecodeVLCZigzag_Intra(
+                ppBitStream,
+                pBitOffset,
+                pDst,
+                predDir,
+                shortVideoHeader,
+                start);
+}
+
+/* End of file */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeMV.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeMV.c
new file mode 100644
index 0000000..def2b6d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeMV.c
@@ -0,0 +1,212 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_EncodeMV.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description: 
+ * Contains module for predicting MV of MB
+ *
+ */ 
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armCOMM_Bitstream.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+
+
+
+/**
+ * Function:  omxVCM4P2_EncodeMV   (6.2.4.5.4)
+ *
+ * Description:
+ * Predicts a motion vector for the current macroblock, encodes the 
+ * difference, and writes the output to the stream buffer. The input MVs 
+ * pMVCurMB, pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB should lie 
+ * within the ranges associated with the input parameter fcodeForward, as 
+ * described in [ISO14496-2], subclause 7.6.3.  This function provides a 
+ * superset of the functionality associated with the function 
+ * omxVCM4P2_FindMVpred. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - double pointer to the current byte in the bitstream buffer 
+ *   pBitOffset - index of the first free (next available) bit in the stream 
+ *            buffer referenced by *ppBitStream, valid in the range 0 to 7. 
+ *   pMVCurMB - pointer to the current macroblock motion vector; a value of 
+ *            NULL indicates unavailability. 
+ *   pSrcMVLeftMB - pointer to the source left macroblock motion vector; a 
+ *            value of  NULLindicates unavailability. 
+ *   pSrcMVUpperMB - pointer to source upper macroblock motion vector; a 
+ *            value of NULL indicates unavailability. 
+ *   pSrcMVUpperRightMB - pointer to source upper right MB motion vector; a 
+ *            value of NULL indicates unavailability. 
+ *   fcodeForward - an integer with values from 1 to 7; used in encoding 
+ *            motion vectors related to search range, as described in 
+ *            [ISO14496-2], subclause 7.6.3. 
+ *   MBType - macro block type, valid in the range 0 to 5 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - updated pointer to the current byte in the bit stream 
+ *            buffer 
+ *   pBitOffset - updated index of the next available bit position in stream 
+ *            buffer referenced by *ppBitStream 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments 
+ *    -    At least one of the following pointers is NULL: ppBitStream, 
+ *              *ppBitStream, pBitOffset, pMVCurMB 
+ *    -    *pBitOffset < 0, or *pBitOffset >7. 
+ *    -    fcodeForward <= 0, or fcodeForward > 7, or MBType < 0. 
+ *
+ */
+
+OMXResult omxVCM4P2_EncodeMV(
+     OMX_U8 **ppBitStream,
+     OMX_INT *pBitOffset,
+     const OMXVCMotionVector * pMVCurMB,
+     const OMXVCMotionVector * pSrcMVLeftMB,
+     const OMXVCMotionVector * pSrcMVUpperMB,
+     const OMXVCMotionVector * pSrcMVUpperRightMB,
+     OMX_INT fcodeForward,
+     OMXVCM4P2MacroblockType MBType
+)
+{
+    OMXVCMotionVector dstMVPred, diffMV;
+    OMXVCMotionVector dstMVPredME[12];
+    /* Initialized to remove compilation warning */
+    OMX_INT iBlk, i, count = 1;
+    OMX_S32 mvHorResidual, mvVerResidual, mvHorData, mvVerData;
+    OMX_U8 scaleFactor, index;
+
+    /* Argument error checks */
+    armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pMVCurMB == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(((*pBitOffset < 0) || (*pBitOffset > 7)), OMX_Sts_BadArgErr);
+    armRetArgErrIf(((fcodeForward < 1) || (fcodeForward > 7)), \
+                    OMX_Sts_BadArgErr);
+    
+    if ((MBType == OMX_VC_INTRA) ||
+        (MBType == OMX_VC_INTRA_Q)
+       )
+    {
+        /* No candidate vectors hence make them zero */
+        for (i = 0; i < 12; i++)
+        {
+            dstMVPredME[i].dx = 0;
+            dstMVPredME[i].dy = 0;
+        }
+
+        return OMX_Sts_NoErr;
+    }
+
+    if ((MBType == OMX_VC_INTER4V) || (MBType == OMX_VC_INTER4V_Q))
+    {
+        count = 4;
+    }
+    else if ((MBType == OMX_VC_INTER) || (MBType == OMX_VC_INTER_Q))
+    {
+        count = 1;
+    }
+
+    /* Calculating the scale factor */
+    scaleFactor = 1 << (fcodeForward -1);
+
+    for (iBlk = 0; iBlk < count; iBlk++)
+    {
+
+        /* Find the predicted vector */
+        omxVCM4P2_FindMVpred (
+            pMVCurMB,
+            pSrcMVLeftMB,
+            pSrcMVUpperMB,
+            pSrcMVUpperRightMB,
+            &dstMVPred,
+            dstMVPredME,
+            iBlk );
+
+        /* Calculating the differential motion vector (diffMV) */
+        diffMV.dx = pMVCurMB[iBlk].dx - dstMVPred.dx;
+        diffMV.dy = pMVCurMB[iBlk].dy - dstMVPred.dy;
+
+        /* Calculating the mv_data and mv_residual for Horizantal MV */
+        if (diffMV.dx == 0)
+        {
+            mvHorResidual = 0;
+            mvHorData = 0;
+        }
+        else
+        {
+            mvHorResidual = ( armAbs(diffMV.dx) - 1) % scaleFactor;
+            mvHorData = (armAbs(diffMV.dx) - mvHorResidual + (scaleFactor - 1))
+                     / scaleFactor;
+            if (diffMV.dx < 0)
+            {
+                mvHorData = -mvHorData;
+            }
+        }
+
+        /* Calculating the mv_data and mv_residual for Vertical MV */
+        if (diffMV.dy == 0)
+        {
+            mvVerResidual = 0;
+            mvVerData = 0;
+        }
+        else
+        {
+            mvVerResidual = ( armAbs(diffMV.dy) - 1) % scaleFactor;
+            mvVerData = (armAbs(diffMV.dy) - mvVerResidual + (scaleFactor - 1))
+                     / scaleFactor;
+            if (diffMV.dy < 0)
+            {
+                mvVerData = -mvVerData;
+            }
+        }
+
+        /* Huffman encoding */
+
+        /* The index is actually calculate as
+           index = ((float) (mvHorData/2) + 16) * 2,
+           meaning the MV data is halfed and then normalized
+           to begin with zero and then doubled to take care of indexing
+           the fractional part included */
+        index = mvHorData + 32;
+        armPackVLC32 (ppBitStream, pBitOffset, armVCM4P2_aVlcMVD[index]);
+        if ((fcodeForward > 1) && (diffMV.dx != 0))
+        {
+            armPackBits (ppBitStream, pBitOffset, mvHorResidual, (fcodeForward -1));
+        }
+
+        /* The index is actually calculate as
+           index = ((float) (mvVerData/2) + 16) * 2,
+           meaning the MV data is halfed and then normalized
+           to begin with zero and then doubled to take care of indexing
+           the fractional part included */
+        index = mvVerData + 32;
+        armPackVLC32 (ppBitStream, pBitOffset, armVCM4P2_aVlcMVD[index]);
+        if ((fcodeForward > 1) && (diffMV.dy != 0))
+        {
+            armPackBits (ppBitStream, pBitOffset, mvVerResidual, (fcodeForward -1));
+        }
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_Inter.c
new file mode 100644
index 0000000..b6c73ea
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_Inter.c
@@ -0,0 +1,112 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_EncodeVLCZigzag_Inter.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description: 
+ * Contains modules for zigzag scanning and VLC encoding
+ * for inter block.
+ *
+ */ 
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM_Bitstream.h"
+#include "armCOMM.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+#include "armVCM4P2_ZigZag_Tables.h"
+
+
+
+/**
+ * Function:  omxVCM4P2_EncodeVLCZigzag_Inter   (6.2.4.5.3)
+ *
+ * Description:
+ * Performs classical zigzag scanning and VLC encoding for one inter block. 
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - pointer to the pointer to the current byte in the bit 
+ *            stream 
+ *   pBitOffset - pointer to the bit position in the byte pointed by 
+ *            *ppBitStream. Valid within 0 to 7 
+ *   pQDctBlkCoef - pointer to the quantized DCT coefficient 
+ *   pattern - block pattern which is used to decide whether this block is 
+ *            encoded 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is encoded so that 
+ *            it points to the current byte in the bit stream buffer. 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - Bad arguments 
+ *    -    At least one of the pointers: is NULL: ppBitStream, *ppBitStream, 
+ *              pBitOffset, pQDctBlkCoef 
+ *    -   *pBitOffset < 0, or *pBitOffset >7. 
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_Inter(
+     OMX_U8 **ppBitStream,
+     OMX_INT * pBitOffset,
+     const OMX_S16 *pQDctBlkCoef,
+     OMX_U8 pattern,
+	 OMX_INT shortVideoHeader
+)
+{
+    OMX_U8 start = 0;
+    const OMX_U8  *pZigzagTable = armVCM4P2_aClassicalZigzagScan;
+
+    /* Argument error checks */
+    armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pQDctBlkCoef == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset >7), OMX_Sts_BadArgErr);
+
+    if (pattern)
+    {
+        armVCM4P2_PutVLCBits (
+              ppBitStream,
+              pBitOffset,
+              pQDctBlkCoef,
+              shortVideoHeader,
+              start,
+              26,
+              40,
+              10,
+              1,
+              armVCM4P2_InterL0RunIdx,
+              armVCM4P2_InterVlcL0,
+			  armVCM4P2_InterL1RunIdx,
+              armVCM4P2_InterVlcL1,
+              armVCM4P2_InterL0LMAX,
+              armVCM4P2_InterL1LMAX,
+              armVCM4P2_InterL0RMAX,
+              armVCM4P2_InterL1RMAX,
+              pZigzagTable
+        );
+    } /* Pattern check ends*/
+
+    return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c
new file mode 100644
index 0000000..d047942
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c
@@ -0,0 +1,97 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description: 
+ * Contains modules for zigzag scanning and VLC encoding
+ * for intra block.
+ *
+ */ 
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+
+
+/**
+ * Function:  omxVCM4P2_EncodeVLCZigzag_IntraACVLC   (6.2.4.5.2)
+ *
+ * Description:
+ * Performs zigzag scan and VLC encoding of AC and DC coefficients for one 
+ * intra block.  Two versions of the function (DCVLC and ACVLC) are provided 
+ * in order to support the two different methods of processing DC 
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4,  Intra DC 
+ * Coefficient Decoding for the Case of Switched VLC Encoding.  
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - double pointer to the current byte in the bitstream 
+ *   pBitOffset - pointer to the bit position in the byte pointed by 
+ *            *ppBitStream. Valid within 0 to 7. 
+ *   pQDctBlkCoef - pointer to the quantized DCT coefficient 
+ *   predDir - AC prediction direction, which is used to decide the zigzag 
+ *            scan pattern; takes one of the following values: 
+ *            -  OMX_VC_NONE - AC prediction not used.  
+ *                             Performs classical zigzag scan. 
+ *            -  OMX_VC_HORIZONTAL - Horizontal prediction.  
+ *                             Performs alternate-vertical zigzag scan. 
+ *            -  OMX_VC_VERTICAL - Vertical prediction.  
+ *                             Performs alternate-horizontal zigzag scan. 
+ *   pattern - block pattern which is used to decide whether this block is 
+ *            encoded 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is encoded, so 
+ *            that it points to the current byte in the bit stream buffer. 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - Bad arguments:
+ *    -    At least one of the following pointers is NULL: ppBitStream, 
+ *              *ppBitStream, pBitOffset, pQDctBlkCoef. 
+ *    -   *pBitOffset < 0, or *pBitOffset >7. 
+ *    -    PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or 
+ *         OMX_VC_VERTICAL. 
+ *    -    VideoComp is not one component of enum OMXVCM4P2VideoComponent. 
+ *
+ */
+
+OMXResult omxVCM4P2_EncodeVLCZigzag_IntraACVLC(
+     OMX_U8 **ppBitStream,
+     OMX_INT *pBitOffset,
+     const OMX_S16 *pQDctBlkCoef,
+     OMX_U8 predDir,
+     OMX_U8 pattern,
+     OMX_INT shortVideoHeader
+)
+{
+    OMX_U8 start = 0;
+
+    return armVCM4P2_EncodeVLCZigzag_Intra(
+     ppBitStream,
+     pBitOffset,
+     pQDctBlkCoef,
+     predDir,
+     pattern,
+     shortVideoHeader,
+     start);
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c
new file mode 100644
index 0000000..c57acd2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c
@@ -0,0 +1,160 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description: 
+ * Contains modules for zigzag scanning and VLC encoding
+ * for intra block.
+ *
+ */ 
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM_Bitstream.h"
+#include "armCOMM.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+#include "armVCM4P2_ZigZag_Tables.h"
+
+
+
+/**
+ * Function:  omxVCM4P2_EncodeVLCZigzag_IntraDCVLC   (6.2.4.5.2)
+ *
+ * Description:
+ * Performs zigzag scan and VLC encoding of AC and DC coefficients for one 
+ * intra block.  Two versions of the function (DCVLC and ACVLC) are provided 
+ * in order to support the two different methods of processing DC 
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, "Intra DC 
+ * Coefficient Decoding for the Case of Switched VLC Encoding".  
+ *
+ * Input Arguments:
+ *   
+ *   ppBitStream - double pointer to the current byte in the bitstream 
+ *   pBitOffset - pointer to the bit position in the byte pointed by 
+ *            *ppBitStream. Valid within 0 to 7. 
+ *   pQDctBlkCoef - pointer to the quantized DCT coefficient 
+ *   predDir - AC prediction direction, which is used to decide the zigzag 
+ *            scan pattern; takes one of the following values: 
+ *            -  OMX_VC_NONE - AC prediction not used.  
+ *                             Performs classical zigzag scan. 
+ *            -  OMX_VC_HORIZONTAL - Horizontal prediction.  
+ *                             Performs alternate-vertical zigzag scan. 
+ *            -  OMX_VC_VERTICAL - Vertical prediction.  
+ *                             Performs alternate-horizontal zigzag scan. 
+ *   pattern - block pattern which is used to decide whether this block is 
+ *            encoded 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; escape modes 0-3 are used if 
+ *            shortVideoHeader==0, and escape mode 4 is used when 
+ *            shortVideoHeader==1. 
+ *   videoComp - video component type (luminance, chrominance) of the current 
+ *            block 
+ *
+ * Output Arguments:
+ *   
+ *   ppBitStream - *ppBitStream is updated after the block is encoded, so 
+ *            that it points to the current byte in the bit stream buffer. 
+ *   pBitOffset - *pBitOffset is updated so that it points to the current bit 
+ *            position in the byte pointed by *ppBitStream. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - Bad arguments:
+ *    -    At least one of the following pointers is NULL: ppBitStream, 
+ *              *ppBitStream, pBitOffset, pQDctBlkCoef. 
+ *    -   *pBitOffset < 0, or *pBitOffset >7. 
+ *    -    PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or 
+ *         OMX_VC_VERTICAL. 
+ *    -    VideoComp is not one component of enum OMXVCM4P2VideoComponent. 
+ *
+ */
+
+OMXResult omxVCM4P2_EncodeVLCZigzag_IntraDCVLC(
+     OMX_U8 **ppBitStream,
+     OMX_INT *pBitOffset,
+     const OMX_S16 *pQDctBlkCoef,
+     OMX_U8 predDir,
+     OMX_U8 pattern,
+     OMX_INT shortVideoHeader,
+     OMXVCM4P2VideoComponent videoComp
+)
+{
+    OMX_S16 dcValue, powOfSize;
+    OMX_U8  DCValueSize, start = 1;
+    OMX_U16 absDCValue;
+
+    /* Argument error checks */
+    armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pQDctBlkCoef == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset >7), OMX_Sts_BadArgErr);
+	armRetArgErrIf((videoComp != OMX_VC_LUMINANCE) && (videoComp != OMX_VC_CHROMINANCE), OMX_Sts_BadArgErr);
+	armRetArgErrIf((predDir != OMX_VC_NONE) && (predDir != OMX_VC_HORIZONTAL) && (predDir != OMX_VC_VERTICAL) , OMX_Sts_BadArgErr);
+    
+    if (pattern)
+    {
+        dcValue = pQDctBlkCoef[0];
+        absDCValue = armAbs(dcValue);
+
+        /* Find the size */
+        DCValueSize = armLogSize (absDCValue);
+        absDCValue = armAbs(dcValue);
+
+        /* Insert the code into the bitstream */
+        if (videoComp == OMX_VC_LUMINANCE)
+        {
+
+            armPackVLC32 (ppBitStream, pBitOffset,
+                          armVCM4P2_aIntraDCLumaIndex[DCValueSize]);
+        }
+        else if (videoComp == OMX_VC_CHROMINANCE)
+        {
+
+            armPackVLC32 (ppBitStream, pBitOffset,
+                          armVCM4P2_aIntraDCChromaIndex[DCValueSize]);
+        }
+
+        /* Additional code generation in case of negative
+           dc value the additional */
+        if (DCValueSize > 0)
+        {
+            if (dcValue < 0)
+            {
+                /* calulate 2 pow */
+                powOfSize = (1 << DCValueSize);
+
+                absDCValue =  absDCValue ^ (powOfSize - 1);
+            }
+            armPackBits(ppBitStream, pBitOffset, (OMX_U32)absDCValue, \
+                        DCValueSize);
+
+            if (DCValueSize > 8)
+            {
+                armPackBits(ppBitStream, pBitOffset, 1, 1);
+            }
+        }
+    }
+
+    return armVCM4P2_EncodeVLCZigzag_Intra(
+                ppBitStream,
+                pBitOffset,
+                pQDctBlkCoef,
+                predDir,
+                pattern,
+                shortVideoHeader,
+                start);
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_FindMVpred.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_FindMVpred.c
new file mode 100644
index 0000000..a0cff48
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_FindMVpred.c
@@ -0,0 +1,188 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_FindMVpred.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description: 
+ * Contains module for predicting MV of MB
+ *
+ */
+  
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P2_FindMVpred   (6.2.3.1.1)
+ *
+ * Description:
+ * Predicts a motion vector for the current block using the procedure 
+ * specified in [ISO14496-2], subclause 7.6.5.  The resulting predicted MV is 
+ * returned in pDstMVPred. If the parameter pDstMVPredME if is not NULL then 
+ * the set of three MV candidates used for prediction is also returned, 
+ * otherwise pDstMVPredMEis NULL upon return. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcMVCurMB - pointer to the MV buffer associated with the current Y 
+ *            macroblock; a value of NULL indicates unavailability. 
+ *   pSrcCandMV1 - pointer to the MV buffer containing the 4 MVs associated 
+ *            with the MB located to the left of the current MB; set to NULL 
+ *            if there is no MB to the left. 
+ *   pSrcCandMV2 - pointer to the MV buffer containing the 4 MVs associated 
+ *            with the MB located above the current MB; set to NULL if there 
+ *            is no MB located above the current MB. 
+ *   pSrcCandMV3 - pointer to the MV buffer containing the 4 MVs associated 
+ *            with the MB located to the right and above the current MB; set 
+ *            to NULL if there is no MB located to the above-right. 
+ *   iBlk - the index of block in the current macroblock 
+ *   pDstMVPredME - MV candidate return buffer;  if set to NULL then 
+ *            prediction candidate MVs are not returned and pDstMVPredME will 
+ *            be NULL upon function return; if pDstMVPredME is non-NULL then it 
+ *            must point to a buffer containing sufficient space for three 
+ *            return MVs. 
+ *
+ * Output Arguments:
+ *   
+ *   pDstMVPred - pointer to the predicted motion vector 
+ *   pDstMVPredME - if non-NULL upon input then pDstMVPredME  points upon 
+ *            return to a buffer containing the three motion vector candidates 
+ *            used for prediction as specified in [ISO14496-2], subclause 
+ *            7.6.5, otherwise if NULL upon input then pDstMVPredME is NULL 
+ *            upon output. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -    the pointer pDstMVPred is NULL 
+ *    -    the parameter iBlk does not fall into the range 0 <= iBlk<=3 
+ *
+ */
+
+OMXResult omxVCM4P2_FindMVpred(
+     const OMXVCMotionVector* pSrcMVCurMB,
+     const OMXVCMotionVector* pSrcCandMV1,
+     const OMXVCMotionVector* pSrcCandMV2,
+     const OMXVCMotionVector* pSrcCandMV3,
+     OMXVCMotionVector* pDstMVPred,
+     OMXVCMotionVector* pDstMVPredME,
+     OMX_INT iBlk
+ )
+{
+    OMXVCMotionVector CandMV;
+	const OMXVCMotionVector *pCandMV1;
+    const OMXVCMotionVector *pCandMV2;
+    const OMXVCMotionVector *pCandMV3;
+    
+    /* Argument error checks */
+	armRetArgErrIf(iBlk!=0 && pSrcMVCurMB == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDstMVPred == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf((iBlk < 0) || (iBlk > 3), OMX_Sts_BadArgErr); 
+
+    CandMV.dx = CandMV.dy = 0;
+	/* Based on the position of the block extract the motion vectors and
+       the tranperancy status */
+   
+    
+    /* Set the default value for these to be used if pSrcCandMV[1|2|3] == NULL */
+    pCandMV1 = pCandMV2 = pCandMV3 = &CandMV;
+
+    
+    switch (iBlk)
+    {
+        case 0:
+        {
+            if(pSrcCandMV1 != NULL)
+            {
+			    pCandMV1 = &pSrcCandMV1[1];
+			}
+			if(pSrcCandMV2 != NULL)
+            {
+				pCandMV2 = &pSrcCandMV2[2];
+			}
+			if(pSrcCandMV3 != NULL)
+            {
+				pCandMV3 = &pSrcCandMV3[2];
+			}
+			if ((pSrcCandMV1 == NULL) && (pSrcCandMV2 == NULL))
+            {
+                pCandMV1 = pCandMV2 = pCandMV3;
+            }
+            else if((pSrcCandMV1 == NULL) && (pSrcCandMV3 == NULL))
+            {
+                pCandMV1 = pCandMV3 = pCandMV2;
+            }
+            else if((pSrcCandMV2 == NULL) && (pSrcCandMV3 == NULL))
+            {
+                pCandMV2 = pCandMV3 = pCandMV1;
+            }
+            break;
+        }
+        case 1:
+        {
+            pCandMV1 = &pSrcMVCurMB[0];
+			if(pSrcCandMV2 != NULL)
+            {
+				pCandMV2 = &pSrcCandMV2[3];
+			}
+			if(pSrcCandMV3 != NULL)
+            {
+				pCandMV3 = &pSrcCandMV3[2];
+			}
+			if((pSrcCandMV2 == NULL) && (pSrcCandMV3 == NULL))
+            {
+                pCandMV2 = pCandMV3 = pCandMV1;
+            }
+            break;
+        }
+        case 2:
+        {
+            if(pSrcCandMV1 != NULL)
+            {
+				pCandMV1 = &pSrcCandMV1[3];
+			}
+			pCandMV2 = &pSrcMVCurMB[0];
+			pCandMV3 = &pSrcMVCurMB[1];
+			break;
+        }
+        case 3:
+        {
+            pCandMV1 = &pSrcMVCurMB[2];
+			pCandMV2 = &pSrcMVCurMB[0];
+			pCandMV3 = &pSrcMVCurMB[1];
+			break;
+        }
+    }
+
+    /* Find the median of the 3 candidate MV's */
+    pDstMVPred->dx = armMedianOf3 (pCandMV1->dx, pCandMV2->dx, pCandMV3->dx);
+    pDstMVPred->dy = armMedianOf3 (pCandMV1->dy, pCandMV2->dy, pCandMV3->dy);
+        
+    if (pDstMVPredME != NULL)
+    {
+        /* Store the candidate MV's into the pDstMVPredME, these can be used
+           in the fast algorithm if implemented */
+        pDstMVPredME[0].dx = pCandMV1->dx;
+        pDstMVPredME[0].dy = pCandMV1->dy;
+        pDstMVPredME[1].dx = pCandMV2->dx;
+        pDstMVPredME[1].dy = pCandMV2->dy;
+        pDstMVPredME[2].dx = pCandMV3->dx;
+        pDstMVPredME[2].dy = pCandMV3->dy;
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+
+/* End of file */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_IDCT8x8blk.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_IDCT8x8blk.c
new file mode 100644
index 0000000..1886d92
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_IDCT8x8blk.c
@@ -0,0 +1,92 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_IDCT8x8blk.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains modules for 8x8 block IDCT
+ * 
+ */
+
+
+#include <math.h>
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVCM4P2_DCT_Table.h"
+
+/**
+ * Function:  omxVCM4P2_IDCT8x8blk   (6.2.3.2.1)
+ *
+ * Description:
+ * Computes a 2D inverse DCT for a single 8x8 block, as defined in 
+ * [ISO14496-2]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the start of the linearly arranged IDCT input buffer; 
+ *            must be aligned on a 16-byte boundary.  According to 
+ *            [ISO14496-2], the input coefficient values should lie within the 
+ *            range [-2048, 2047]. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the start of the linearly arranged IDCT output buffer; 
+ *            must be aligned on a 16-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    pSrc or pDst is NULL. 
+ *    -    pSrc or pDst is not 16-byte aligned. 
+ *
+ */
+OMXResult omxVCM4P2_IDCT8x8blk (const OMX_S16 *pSrc, OMX_S16 *pDst)
+{
+    OMX_INT x, y, u, v;
+
+    /* Argument error checks */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs16ByteAligned(pSrc), OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
+
+    for (x = 0; x < 8; x++)
+    {
+        for (y = 0; y < 8; y++)
+        {
+            OMX_F64 sum = 0.0;
+            for (u = 0; u < 8; u++)
+            {
+                for (v = 0; v < 8; v++)
+                {
+                    sum += pSrc[(u * 8) + v] *
+                        armVCM4P2_preCalcDCTCos[x][u] *
+                        armVCM4P2_preCalcDCTCos[y][v];
+                }
+            }
+            pDst[(x * 8) + y] = (OMX_S16) floor(sum + 0.5);
+
+            /* Saturate to [-256, 255] */
+            pDst[(x * 8) + y] = armClip (
+                                            -256,
+                                            255,
+                                            pDst[(x * 8) + y]);
+        }
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MCReconBlock.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MCReconBlock.c
new file mode 100644
index 0000000..7b3faee
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MCReconBlock.c
@@ -0,0 +1,357 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_MCReconBlock.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ * Description:
+ * MPEG4 motion compensation prediction for an 8x8 block using 
+ * interpolation
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P2_HalfPelVer
+ *
+ * Description:
+ * Performs half pel motion compensation for an 8x8 block using vertical 
+ * interpolation described in ISO/IEC 14496-2, subclause 7.6.2.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrc        pointer to the block in the reference plane.
+ * [in] srcStep     distance between the start of consecutive lines
+ *                  in the reference plane, in bytes; must be a multiple
+ *                  of 8.
+ * [in] rndVal      rounding control parameter: 0 - disabled; 1 - enabled.
+ * [out] pDst       pointer to the linaer 8x8 destination buffer;
+ *
+ */
+static OMXVoid armVCM4P2_HalfPelVer(
+      const OMX_U8 *pSrc,
+      OMX_INT srcStep, 
+      OMX_U8 *pDst,
+      OMX_INT rndVal)
+{
+  const OMX_U8 *pTempSrc1;
+  const OMX_U8 *pTempSrc2;
+  OMX_INT y, x;
+  
+  pTempSrc1 = pSrc;  
+  pTempSrc2 = pSrc + srcStep;
+  srcStep -= 8;
+  for (y = 0; y < 8; y++)
+  {
+    for (x = 0; x < 8; x++)
+    {
+      *pDst++ = ((*pTempSrc1++ + *pTempSrc2++) + 1 - rndVal) >> 1;
+    }
+    pTempSrc1 += srcStep;
+    pTempSrc2 += srcStep;
+  }
+}
+
+/**
+ * Function: armVCM4P2_HalfPelHor
+ *
+ * Description:
+ * Performs half pel motion compensation for an 8x8 block using horizontal 
+ * interpolation described in ISO/IEC 14496-2, subclause 7.6.2.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrc        pointer to the block in the reference plane.
+ * [in] srcStep     distance between the start of consecutive lines
+ *                  in the reference plane, in bytes; must be a multiple
+ *                  of 8.
+ * [in] rndVal      rounding control parameter: 0 - disabled; 1 - enabled.
+ * [out] pDst       pointer to the linaer 8x8 destination buffer;
+ *
+ */
+static OMXVoid armVCM4P2_HalfPelHor(
+      const OMX_U8 *pSrc,
+      OMX_INT srcStep, 
+      OMX_U8 *pDst,
+      OMX_INT rndVal)
+{
+  const OMX_U8 *pTempSrc1;
+  const OMX_U8 *pTempSrc2;
+  OMX_INT y, x;
+  
+  pTempSrc1 = pSrc;
+  pTempSrc2 = pTempSrc1 + 1;
+
+  srcStep -= 8;
+  for (y=0; y<8; y++)
+  {
+    for (x=0; x<8; x++)
+    {
+      *pDst++ = ((*pTempSrc1++ + *pTempSrc2++) + 1 - rndVal) >> 1;
+    }
+    pTempSrc1 += srcStep;
+    pTempSrc2 += srcStep;
+  }
+}
+
+
+/**
+ * Function: armVCM4P2_HalfPelVerHor
+ *
+ * Description:
+ * Performs half pel motion compensation for an 8x8 block using both 
+ * horizontal and vertical interpolation described in ISO/IEC 14496-2,
+ * subclause 7.6.2.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrc        pointer to the block in the reference plane.
+ * [in] srcStep     distance between the start of consecutive lines
+ *                  in the reference plane, in bytes; must be a multiple
+ *                  of 8.
+ * [in] rndVal      rounding control parameter: 0 - disabled; 1 - enabled.
+ * [out] pDst       pointer to the linaer 8x8 destination buffer;
+ *
+ */
+static OMXVoid armVCM4P2_HalfPelVerHor(
+      const OMX_U8 *pSrc,
+      OMX_INT srcStep, 
+      OMX_U8 *pDst,
+      OMX_INT rndVal)
+{
+  const OMX_U8 *pTempSrc1;
+  const OMX_U8 *pTempSrc2;
+  const OMX_U8 *pTempSrc3;
+  const OMX_U8 *pTempSrc4;
+  OMX_INT y, x;
+
+  pTempSrc1 = pSrc;
+  pTempSrc2 = pSrc + srcStep;
+  pTempSrc3 = pSrc + 1;
+  pTempSrc4 = pSrc + srcStep + 1;
+
+  srcStep -= 8;
+  for (y=0; y<8; y++)
+  {
+    for (x=0; x<8; x++)
+	{
+	  *pDst++ = ((*pTempSrc1++ + *pTempSrc2++ + *pTempSrc3++ + *pTempSrc4++) + 
+	                  2 - rndVal) >> 2;
+	}
+    pTempSrc1 += srcStep;
+    pTempSrc2 += srcStep;
+    pTempSrc3 += srcStep;
+    pTempSrc4 += srcStep;
+  }
+}
+
+/**
+ * Function: armVCM4P2_MCReconBlock_NoRes
+ *
+ * Description:
+ * Do motion compensation and copy the result to the current block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrc        pointer to the block in the reference plane.
+ * [in] srcStep     distance between the start of consecutive lines
+ *                  in the reference plane, in bytes; must be a multiple
+ *                  of 8.
+ * [in] dstStep     distance between the start of consecutive lines in the
+ *                  destination plane, in bytes; must be a multiple of 8.
+ * [in] predictType bilinear interpolation type, as defined in section 6.2.1.2.
+ * [in] rndVal      rounding control parameter: 0 - disabled; 1 - enabled.
+ * [out] pDst       pointer to the destination buffer; must be 8-byte aligned.
+ *                  If prediction residuals are added then output intensities
+ *                  are clipped to the range [0,255].
+ *
+ */
+static OMXVoid armVCM4P2_MCReconBlock_NoRes(
+      const OMX_U8 *pSrc, 
+      OMX_INT srcStep,
+      OMX_U8 *pDst,
+      OMX_INT dstStep)
+{
+    OMX_U8 x,y,count,index;
+    
+    /* Copying the ref 8x8 blk to the curr blk */
+    for (y = 0, count = 0, index = 0; y < 8; y++,index += (srcStep -8), count += (dstStep - 8))
+    {
+        for (x = 0; x < 8; x++, count++,index++)
+        {
+            pDst[count] = pSrc[index];
+        }       
+    }
+}
+
+/**
+ * Function: armVCM4P2_MCReconBlock_Res
+ *
+ * Description:
+ * Reconstructs INTER block by summing the motion compensation results
+ * and the results of the inverse transformation (prediction residuals).
+ * Output intensities are clipped to the range [0,255].
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrc        pointer to the block in the reference plane.
+ * [in] pSrcResidue pointer to a buffer containing the 16-bit prediction
+ *                  residuals. If the pointer is NULL,then no prediction
+ *                  is done, only motion compensation, i.e., the block is
+ *                  moved with interpolation.
+ * [in] dstStep     distance between the start of consecutive lines in the
+ *                  destination plane, in bytes; must be a multiple of 8.
+ * [out] pDst       pointer to the destination buffer; must be 8-byte aligned.
+ *                  If prediction residuals are added then output intensities
+ *                  are clipped to the range [0,255].
+ *
+ */
+static OMXVoid armVCM4P2_MCReconBlock_Res(
+      const OMX_U8 *pSrc, 
+      const OMX_S16 *pSrcResidue,
+      OMX_U8 *pDst,
+      OMX_INT dstStep)
+{
+      
+  OMX_U8 x,y;
+  OMX_INT temp;
+  
+  for(y = 0; y < 8; y++)
+  {
+    for(x = 0; x < 8; x++)
+    {
+      temp = pSrc[x] + pSrcResidue[x];         
+      pDst[x] = armClip(0,255,temp);
+    }
+    pDst += dstStep;
+    pSrc += 8;
+    pSrcResidue += 8;
+  }
+}
+
+/**
+ * Function:  omxVCM4P2_MCReconBlock   (6.2.5.5.1)
+ *
+ * Description:
+ * Performs motion compensation prediction for an 8x8 block using 
+ * interpolation described in [ISO14496-2], subclause 7.6.2. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the block in the reference plane. 
+ *   srcStep - distance between the start of consecutive lines in the 
+ *            reference plane, in bytes; must be a multiple of 8. 
+ *   dstStep - distance between the start of consecutive lines in the 
+ *            destination plane, in bytes; must be a multiple of 8. 
+ *   pSrcResidue - pointer to a buffer containing the 16-bit prediction 
+ *            residuals; must be 16-byte aligned. If the pointer is NULL, then 
+ *            no prediction is done, only motion compensation, i.e., the block 
+ *            is moved with interpolation. 
+ *   predictType - bilinear interpolation type, as defined in section 
+ *            6.2.1.2. 
+ *   rndVal - rounding control parameter: 0 - disabled; 1 - enabled. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the destination buffer; must be 8-byte aligned.  If 
+ *            prediction residuals are added then output intensities are 
+ *            clipped to the range [0,255]. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; returned under any of the following 
+ *              conditions: 
+ *    -    pDst is not 8-byte aligned. 
+ *    -    pSrcResidue is not 16-byte aligned. 
+ *    -    one or more of the following pointers is NULL: pSrc or pDst. 
+ *    -    either srcStep or dstStep is not a multiple of 8. 
+ *    -    invalid type specified for the parameter predictType. 
+ *    -    the parameter rndVal is not equal either to 0 or 1. 
+ *
+ */
+OMXResult omxVCM4P2_MCReconBlock(
+		const OMX_U8 *pSrc,
+		OMX_INT srcStep,
+		const OMX_S16 *pSrcResidue,
+		OMX_U8 *pDst, 
+		OMX_INT dstStep,
+		OMX_INT predictType,
+		OMX_INT rndVal)
+{
+    /* Definitions and Initializations*/
+    OMX_U8 pTempDst[64];
+    
+    /* Argument error checks */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs8ByteAligned(pDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs16ByteAligned(pSrcResidue), OMX_Sts_BadArgErr);
+    armRetArgErrIf(((dstStep % 8) || (srcStep % 8)), OMX_Sts_BadArgErr);
+    armRetArgErrIf(((predictType != OMX_VC_INTEGER_PIXEL) &&
+                    (predictType != OMX_VC_HALF_PIXEL_X) &&
+                    (predictType != OMX_VC_HALF_PIXEL_Y) &&
+                    (predictType != OMX_VC_HALF_PIXEL_XY)
+                   ),OMX_Sts_BadArgErr); 
+    armRetArgErrIf(((rndVal != 0) && (rndVal != 1)),OMX_Sts_BadArgErr);
+    
+    switch(predictType)
+    {
+        case OMX_VC_INTEGER_PIXEL:
+                                   armVCM4P2_MCReconBlock_NoRes(pSrc,
+                                                                    srcStep,
+                                                                    &(pTempDst[0]),
+                                                                    8);
+                                   break;
+        case OMX_VC_HALF_PIXEL_X:
+                                   armVCM4P2_HalfPelHor(pSrc,
+                                                            srcStep,
+                                                            &(pTempDst[0]),
+                                                            rndVal);
+                                   break;
+        case OMX_VC_HALF_PIXEL_Y:
+                                   armVCM4P2_HalfPelVer(pSrc,
+                                                            srcStep,
+                                                            &(pTempDst[0]),
+                                                            rndVal);
+                                   break;
+        case OMX_VC_HALF_PIXEL_XY:
+                                   armVCM4P2_HalfPelVerHor(pSrc,
+                                                            srcStep,
+                                                            &(pTempDst[0]),
+                                                            rndVal);
+                                   break;
+    }
+    
+    if(pSrcResidue == NULL)
+    {
+      armVCM4P2_MCReconBlock_NoRes(&(pTempDst[0]),
+                                         8,
+                                         pDst,
+                                         dstStep);    
+    }
+    else
+    {
+      armVCM4P2_MCReconBlock_Res(&(pTempDst[0]),
+                                          pSrcResidue,
+                                          pDst,
+                                          dstStep);    
+    }
+    
+    return OMX_Sts_NoErr;
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEGetBufSize.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEGetBufSize.c
new file mode 100644
index 0000000..a8e51da
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEGetBufSize.c
@@ -0,0 +1,70 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_MEGetBufSize.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Initialization modules for the vendor specific Motion Estimation structure.
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P2_MEGetBufSize   (6.2.4.1.1)
+ *
+ * Description:
+ * Computes the size, in bytes, of the vendor-specific specification 
+ * structure for the following motion estimation functions: 
+ * BlockMatch_Integer_8x8, BlockMatch_Integer_16x16, and MotionEstimationMB. 
+ *
+ * Input Arguments:
+ *   
+ *   MEmode - motion estimation mode; available modes are defined by the 
+ *            enumerated type OMXVCM4P2MEMode 
+ *   pMEParams - motion estimation parameters 
+ *
+ * Output Arguments:
+ *   
+ *   pSize - pointer to the number of bytes required for the specification 
+ *            structure 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - one or more of the following is true: 
+ *    -    an invalid value was specified for the parameter MEmode 
+ *    -    a negative or zero value was specified for the 
+ *         parameter pMEParams->searchRange 
+ *
+ */
+
+OMXResult omxVCM4P2_MEGetBufSize(
+    OMXVCM4P2MEMode MEMode, 
+    const OMXVCM4P2MEParams *pMEParams, 
+    OMX_U32 *pSize
+   )
+{
+    armRetArgErrIf(!pMEParams, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!pSize, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pMEParams->searchRange <= 0, OMX_Sts_BadArgErr);
+    armRetArgErrIf((MEMode != OMX_VC_M4P10_FAST_SEARCH) &&
+                   (MEMode != OMX_VC_M4P10_FULL_SEARCH), OMX_Sts_BadArgErr);
+    
+    *pSize = (OMX_INT) sizeof(ARMVCM4P2_MESpec);
+
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEInit.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEInit.c
new file mode 100644
index 0000000..419e71a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEInit.c
@@ -0,0 +1,84 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_MEInit.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Initialization modules for the vendor specific Motion Estimation structure.
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P2_MEInit   (6.2.4.1.2)
+ *
+ * Description:
+ * Initializes the vendor-specific specification structure required for the 
+ * following motion estimation functions:  BlockMatch_Integer_8x8, 
+ * BlockMatch_Integer_16x16, and MotionEstimationMB. Memory for the 
+ * specification structure *pMESpec must be allocated prior to calling the 
+ * function, and should be aligned on a 4-byte boundary.  Following 
+ * initialization by this function, the vendor-specific structure *pMESpec 
+ * should contain an implementation-specific representation of all motion 
+ * estimation parameters received via the structure pMEParams, for example  
+ * rndVal, searchRange, etc.  The number of bytes required for the 
+ * specification structure can be determined using the function 
+ * omxVCM4P2_MEGetBufSize. 
+ *
+ * Input Arguments:
+ *   
+ *   MEmode - motion estimation mode; available modes are defined by the 
+ *            enumerated type OMXVCM4P2MEMode 
+ *   pMEParams - motion estimation parameters 
+ *   pMESpec - pointer to the uninitialized ME specification structure 
+ *
+ * Output Arguments:
+ *   
+ *   pMESpec - pointer to the initialized ME specification structure 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - one or more of the following is true: 
+ *    -    an invalid value was specified for the parameter MEmode 
+ *    -    a negative or zero value was specified for the 
+ *         parameter pMEParams->searchRange 
+ *
+ */
+
+OMXResult omxVCM4P2_MEInit(
+    OMXVCM4P2MEMode MEMode, 
+    const OMXVCM4P2MEParams *pMEParams, 
+    void *pMESpec
+   )
+{
+    ARMVCM4P2_MESpec *armMESpec = (ARMVCM4P2_MESpec *) pMESpec;
+    
+    armRetArgErrIf(!pMEParams, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!pMESpec, OMX_Sts_BadArgErr);
+    armRetArgErrIf((MEMode != OMX_VC_M4P2_FAST_SEARCH) && 
+                   (MEMode != OMX_VC_M4P2_FULL_SEARCH), OMX_Sts_BadArgErr);
+    armRetArgErrIf(pMEParams->searchRange <= 0, OMX_Sts_BadArgErr);
+    
+    armMESpec->MEParams.searchEnable8x8     = pMEParams->searchEnable8x8;
+    armMESpec->MEParams.halfPelSearchEnable = pMEParams->halfPelSearchEnable;
+    armMESpec->MEParams.searchRange         = pMEParams->searchRange;        
+    armMESpec->MEParams.rndVal              = pMEParams->rndVal;
+    armMESpec->MEMode                       = MEMode;
+    
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MotionEstimationMB.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MotionEstimationMB.c
new file mode 100644
index 0000000..9549050
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MotionEstimationMB.c
@@ -0,0 +1,630 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_MotionEstimationMB.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains module for motion search 16x16 macroblock
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P2_BlockMatch_16x16
+ *
+ * Description:
+ * 16x16 block match wrapper function, calls omxVCM4P2_BlockMatch_Integer_16x16.
+ * If half pel search is enabled it also calls omxVCM4P2_BlockMatch_Half_16x16
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	pSrcRefBuf	  pointer to the reference Y plane; points to the reference MB that
+ *                    corresponds to the location of the current macroblock in the current
+ *                    plane.
+ * [in]	srcRefStep	  width of the reference plane
+ * [in]	pRefRect	  pointer to the valid rectangular in reference plane. Relative to image origin.
+ *                    It's not limited to the image boundary, but depended on the padding. For example,
+ *                    if you pad 4 pixels outside the image border, then the value for left border
+ *                    can be -4
+ * [in]	pSrcCurrBuf	  pointer to the current macroblock extracted from original plane (linear array,
+ *                    256 entries); must be aligned on an 16-byte boundary.
+ * [in] pCurrPointPos position of the current macroblock in the current plane
+ * [in] pSrcPreMV	  pointer to predicted motion vector; NULL indicates no predicted MV
+ * [in] pSrcPreSAD	  pointer to SAD associated with the predicted MV (referenced by pSrcPreMV); may be set to NULL if unavailable.
+ * [in] pMESpec		  vendor-specific motion estimation specification structure; must have been allocated
+ *                    and then initialized using omxVCM4P2_MEInit prior to calling the block matching
+ *                    function.
+ * [out] pDstMV	      pointer to estimated MV
+ * [out] pDstSAD	  pointer to minimum SAD
+ * *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ *
+ */
+static OMXResult armVCM4P2_BlockMatch_16x16(
+     const OMX_U8 *pSrcRefBuf,
+     const OMX_INT srcRefStep,
+     const OMXRect *pRefRect,
+     const OMX_U8 *pSrcCurrBuf,
+     const OMXVCM4P2Coordinate *pCurrPointPos,
+     OMXVCMotionVector *pSrcPreMV,
+     OMX_INT *pSrcPreSAD,
+     void *pMESpec,
+     OMXVCMotionVector *pDstMV,
+     OMX_INT *pDstSAD
+)
+{
+    OMXVCM4P2MEParams *pMEParams = (OMXVCM4P2MEParams *)pMESpec;
+    OMX_INT rndVal;
+    
+    rndVal = pMEParams->rndVal;
+    
+    omxVCM4P2_BlockMatch_Integer_16x16(
+        pSrcRefBuf,
+        srcRefStep,
+        pRefRect,
+        pSrcCurrBuf,
+        pCurrPointPos,
+        pSrcPreMV,
+        pSrcPreSAD,
+        pMEParams,
+        pDstMV,
+        pDstSAD);
+    
+    if (pMEParams->halfPelSearchEnable)
+    {
+        omxVCM4P2_BlockMatch_Half_16x16(
+            pSrcRefBuf,
+            srcRefStep,
+            pRefRect,
+            pSrcCurrBuf,
+            pCurrPointPos,
+            rndVal,
+            pDstMV,
+            pDstSAD);
+    }
+ 
+    return OMX_Sts_NoErr;        
+}
+
+/**
+ * Function: armVCM4P2_BlockMatch_8x8
+ *
+ * Description:
+ * 8x8 block match wrapper function, calls omxVCM4P2_BlockMatch_Integer_8x8.
+ * If half pel search is enabled it also calls omxVCM4P2_BlockMatch_Half_8x8
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	pSrcRefBuf	  pointer to the reference Y plane; points to the reference MB that
+ *                    corresponds to the location of the current macroblock in the current
+ *                    plane.
+ * [in]	srcRefStep	  width of the reference plane
+ * [in]	pRefRect	  pointer to the valid rectangular in reference plane. Relative to image origin.
+ *                    It's not limited to the image boundary, but depended on the padding. For example,
+ *                    if you pad 4 pixels outside the image border, then the value for left border
+ *                    can be -4
+ * [in]	pSrcCurrBuf	  pointer to the current macroblock extracted from original plane (linear array,
+ *                    256 entries); must be aligned on an 16-byte boundary.
+ * [in] pCurrPointPos position of the current macroblock in the current plane
+ * [in] pSrcPreMV	  pointer to predicted motion vector; NULL indicates no predicted MV
+ * [in] pSrcPreSAD	  pointer to SAD associated with the predicted MV (referenced by pSrcPreMV); may be set to NULL if unavailable.
+ * [in] pMESpec		  vendor-specific motion estimation specification structure; must have been allocated
+ *                    and then initialized using omxVCM4P2_MEInit prior to calling the block matching
+ *                    function.
+ * [out] pDstMV	      pointer to estimated MV
+ * [out] pDstSAD	  pointer to minimum SAD
+ * *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ *
+ */
+static OMXResult armVCM4P2_BlockMatch_8x8(
+     const OMX_U8 *pSrcRefBuf,
+     OMX_INT srcRefStep,
+     const OMXRect *pRefRect,
+     const OMX_U8 *pSrcCurrBuf,
+     const OMXVCM4P2Coordinate *pCurrPointPos,
+     OMXVCMotionVector *pSrcPreMV,
+     OMX_INT *pSrcPreSAD,
+     void *pMESpec,
+     OMXVCMotionVector *pSrcDstMV,
+     OMX_INT *pDstSAD
+)
+{
+    OMXVCM4P2MEParams *pMEParams = (OMXVCM4P2MEParams *)pMESpec;
+    OMX_INT rndVal;
+    
+    rndVal = pMEParams->rndVal;
+    
+    omxVCM4P2_BlockMatch_Integer_8x8(
+        pSrcRefBuf,
+        srcRefStep,
+        pRefRect,
+        pSrcCurrBuf,
+        pCurrPointPos,
+        pSrcPreMV,
+        pSrcPreSAD,
+        pMEParams,
+        pSrcDstMV,
+        pDstSAD);
+    
+    if (pMEParams->halfPelSearchEnable)
+    {
+        omxVCM4P2_BlockMatch_Half_8x8(
+            pSrcRefBuf,
+            srcRefStep,
+            pRefRect,
+            pSrcCurrBuf,
+            pCurrPointPos,
+            rndVal,
+            pSrcDstMV,
+            pDstSAD);
+    }
+    
+    return OMX_Sts_NoErr;        
+}
+
+
+/**
+ * Function:  omxVCM4P2_MotionEstimationMB   (6.2.4.3.1)
+ *
+ * Description:
+ * Performs motion search for a 16x16 macroblock.  Selects best motion search 
+ * strategy from among inter-1MV, inter-4MV, and intra modes.  Supports 
+ * integer and half pixel resolution. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcCurrBuf - pointer to the top-left corner of the current MB in the 
+ *            original picture plane; must be aligned on a 16-byte boundary.  
+ *            The function does not expect source data outside the region 
+ *            bounded by the MB to be available; for example it is not 
+ *            necessary for the caller to guarantee the availability of 
+ *            pSrcCurrBuf[-SrcCurrStep], i.e., the row of pixels above the MB 
+ *            to be processed. 
+ *   srcCurrStep - width of the original picture plane, in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   pSrcRefBuf - pointer to the reference Y plane; points to the reference 
+ *            plane location corresponding to the location of the current 
+ *            macroblock in the current plane; must be aligned on a 16-byte 
+ *            boundary. 
+ *   srcRefStep - width of the reference picture plane, in terms of full 
+ *            pixels; must be a multiple of 16. 
+ *   pRefRect - reference plane valid region rectangle, specified relative to 
+ *            the image origin 
+ *   pCurrPointPos - position of the current macroblock in the current plane 
+ *   pMESpec - pointer to the vendor-specific motion estimation specification 
+ *            structure; must be allocated and then initialized using 
+ *            omxVCM4P2_MEInit prior to calling this function. 
+ *   pMBInfo - array, of dimension four, containing pointers to information 
+ *            associated with four nearby MBs: 
+ *            -   pMBInfo[0] - pointer to left MB information 
+ *            -   pMBInfo[1] - pointer to top MB information 
+ *            -   pMBInfo[2] - pointer to top-left MB information 
+ *            -   pMBInfo[3] - pointer to top-right MB information 
+ *            Any pointer in the array may be set equal to NULL if the 
+ *            corresponding MB doesn't exist.  For each MB, the following structure 
+ *            members are used:    
+ *            -   mbType - macroblock type, either OMX_VC_INTRA, OMX_VC_INTER, or 
+ *                OMX_VC_INTER4V 
+ *            -   pMV0[2][2] - estimated motion vectors; represented 
+ *                in 1/2 pixel units 
+ *            -   sliceID - number of the slice to which the MB belongs 
+ *   pSrcDstMBCurr - pointer to information structure for the current MB.  
+ *            The following entries should be set prior to calling the 
+ *            function: sliceID - the number of the slice the to which the 
+ *            current MB belongs.  The structure elements cbpy and cbpc are 
+ *            ignored. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDstMBCurr - pointer to updated information structure for the current 
+ *            MB after MB-level motion estimation has been completed.  The 
+ *            following structure members are updated by the ME function:   
+ *              -  mbType - macroblock type: OMX_VC_INTRA, OMX_VC_INTER, or 
+ *                 OMX_VC_INTER4V. 
+ *              -  pMV0[2][2] - estimated motion vectors; represented in 
+ *                 terms of 1/2 pel units. 
+ *              -  pMVPred[2][2] - predicted motion vectors; represented 
+ *                 in terms of 1/2 pel units. 
+ *            The structure members cbpy and cbpc are not updated by the function. 
+ *   pDstSAD - pointer to the minimum SAD for INTER1V, or sum of minimum SADs 
+ *            for INTER4V 
+ *   pDstBlockSAD - pointer to an array of SAD values for each of the four 
+ *            8x8 luma blocks in the MB.  The block SADs are in scan order for 
+ *            each MB. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments.  Returned if one or more of the 
+ *              following conditions is true: 
+ *    -    at least one of the following pointers is NULL: pSrcCurrBuf, 
+ *              pSrcRefBuf, pRefRect, pCurrPointPos, pMBInter, pMBIntra, 
+ *              pSrcDstMBCurr, or pDstSAD. 
+ *
+ */
+
+OMXResult omxVCM4P2_MotionEstimationMB (
+    const OMX_U8 *pSrcCurrBuf,
+    OMX_S32 srcCurrStep,
+    const OMX_U8 *pSrcRefBuf,
+    OMX_S32 srcRefStep,
+    const OMXRect*pRefRect,
+    const OMXVCM4P2Coordinate *pCurrPointPos,
+    void *pMESpec,
+    const OMXVCM4P2MBInfoPtr *pMBInfo,
+    OMXVCM4P2MBInfo *pSrcDstMBCurr,
+    OMX_U16 *pDstSAD,
+    OMX_U16 *pDstBlockSAD
+)
+{
+ 
+    OMX_INT intraSAD, average, count, index, x, y;
+    OMXVCMotionVector dstMV16x16;
+    OMX_INT           dstSAD16x16;
+    OMX_INT           dstSAD8x8;
+    OMXVCM4P2MEParams  *pMEParams; 
+	OMXVCM4P2Coordinate TempCurrPointPos; 
+    OMXVCM4P2Coordinate *pTempCurrPointPos; 
+    OMX_U8 aTempSrcCurrBuf[271];
+    OMX_U8 *pTempSrcCurrBuf;
+    OMX_U8 *pDst;
+    OMX_U8 aDst[71];
+    OMX_S32 dstStep = 8;
+    OMX_INT predictType;
+	OMX_S32 Sad;
+    const OMX_U8 *pTempSrcRefBuf;
+    OMXVCMotionVector* pSrcCandMV1[4];
+    OMXVCMotionVector* pSrcCandMV2[4];
+    OMXVCMotionVector* pSrcCandMV3[4];
+        
+    /* Argument error checks */
+    armRetArgErrIf(!armIs16ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr);
+	armRetArgErrIf(!armIs16ByteAligned(pSrcRefBuf), OMX_Sts_BadArgErr);
+    armRetArgErrIf(((srcCurrStep % 16) || (srcRefStep % 16)), OMX_Sts_BadArgErr);
+	armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr);
+	armRetArgErrIf(pSrcRefBuf == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr);    
+    armRetArgErrIf(pCurrPointPos == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSrcDstMBCurr == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr);
+    
+    
+    pTempCurrPointPos = &(TempCurrPointPos);
+    pTempSrcCurrBuf = armAlignTo16Bytes(aTempSrcCurrBuf);
+    pMEParams = (OMXVCM4P2MEParams *)pMESpec;
+    pTempCurrPointPos->x = pCurrPointPos->x;
+    pTempCurrPointPos->y = pCurrPointPos->y;
+    pSrcDstMBCurr->mbType = OMX_VC_INTER;
+    
+    /* Preparing a linear buffer for block match */
+    for (y = 0, index = count = 0; y < 16; y++, index += srcCurrStep - 16)
+    {
+        for(x = 0; x < 16; x++, count++, index++)
+        {
+            pTempSrcCurrBuf[count] = pSrcCurrBuf[index];
+        }
+    }
+    for(y = 0, index = 0; y < 2; y++)
+    {
+        for(x = 0; x < 2; x++,index++)
+        {
+            if((pMBInfo[0] != NULL) && (pMBInfo[0]->mbType != OMX_VC_INTRA))
+            {
+               pSrcCandMV1[index] = &(pMBInfo[0]->pMV0[y][x]); 
+            }
+            else
+            {
+               pSrcCandMV1[index] = NULL;
+            }
+            if((pMBInfo[1] != NULL) && (pMBInfo[1]->mbType != OMX_VC_INTRA))
+            {
+               pSrcCandMV2[index] = &(pMBInfo[1]->pMV0[y][x]);
+            }
+            else
+            {
+               pSrcCandMV2[index] = NULL; 
+            }
+            if((pMBInfo[3] != NULL) && (pMBInfo[3]->mbType != OMX_VC_INTRA))
+            {
+               pSrcCandMV3[index] = &(pMBInfo[3]->pMV0[y][x]);
+            }
+            else
+            {
+               pSrcCandMV3[index] = NULL; 
+            }
+        }
+    }
+	/* Calculating SAD at MV(0,0) */
+	armVCCOMM_SAD(pTempSrcCurrBuf,
+					  16,
+					  pSrcRefBuf,
+					  srcRefStep,
+					  &Sad,
+					  16,
+					  16);
+	*pDstSAD = Sad;
+
+    /* Mode decision for NOT_CODED MB */
+	if(*pDstSAD == 0)
+	{
+        pSrcDstMBCurr->pMV0[0][0].dx = 0;
+        pSrcDstMBCurr->pMV0[0][0].dy = 0;
+        *pDstSAD   = 0;
+		return OMX_Sts_NoErr;
+	}
+
+    omxVCM4P2_FindMVpred(
+                    &(pSrcDstMBCurr->pMV0[0][0]),
+                    pSrcCandMV1[0],
+                    pSrcCandMV2[0],
+                    pSrcCandMV3[0],
+                    &(pSrcDstMBCurr->pMVPred[0][0]),
+                    NULL,
+                    0);
+                    
+    /* Inter 1 MV */
+    armVCM4P2_BlockMatch_16x16(
+        pSrcRefBuf,
+        srcRefStep,
+        pRefRect,
+        pTempSrcCurrBuf,
+        pCurrPointPos,
+        &(pSrcDstMBCurr->pMVPred[0][0]),
+        NULL,
+        pMEParams,
+        &dstMV16x16,
+        &dstSAD16x16);
+    
+    /* Initialize all with 1 MV values */
+    pSrcDstMBCurr->pMV0[0][0].dx = dstMV16x16.dx;
+    pSrcDstMBCurr->pMV0[0][0].dy = dstMV16x16.dy;
+    pSrcDstMBCurr->pMV0[0][1].dx = dstMV16x16.dx;
+    pSrcDstMBCurr->pMV0[0][1].dy = dstMV16x16.dy;
+    pSrcDstMBCurr->pMV0[1][0].dx = dstMV16x16.dx;
+    pSrcDstMBCurr->pMV0[1][0].dy = dstMV16x16.dy;
+    pSrcDstMBCurr->pMV0[1][1].dx = dstMV16x16.dx;
+    pSrcDstMBCurr->pMV0[1][1].dy = dstMV16x16.dy; 
+    
+    *pDstSAD   = dstSAD16x16;       
+    
+    if (pMEParams->searchEnable8x8)
+    {
+        /* Inter 4MV */
+        armVCM4P2_BlockMatch_8x8 (pSrcRefBuf,
+                                      srcRefStep, pRefRect,
+                                      pTempSrcCurrBuf, pTempCurrPointPos,
+                                      &(pSrcDstMBCurr->pMVPred[0][0]), NULL,
+                                      pMEParams, &(pSrcDstMBCurr->pMV0[0][0]),
+                                      &dstSAD8x8
+                                      );
+        pDstBlockSAD[0] = dstSAD8x8;
+        *pDstSAD = dstSAD8x8;
+        pTempCurrPointPos->x += 8;
+        pSrcRefBuf += 8;
+        omxVCM4P2_FindMVpred(
+                    &(pSrcDstMBCurr->pMV0[0][1]),
+                    pSrcCandMV1[1],
+                    pSrcCandMV2[1],
+                    pSrcCandMV3[1],
+                    &(pSrcDstMBCurr->pMVPred[0][1]),
+                    NULL,
+                    1);
+        
+        armVCM4P2_BlockMatch_8x8 (pSrcRefBuf,
+                                      srcRefStep, pRefRect,
+                                      pTempSrcCurrBuf, pTempCurrPointPos,
+                                      &(pSrcDstMBCurr->pMVPred[0][1]), NULL,
+                                      pMEParams, &(pSrcDstMBCurr->pMV0[0][1]),
+                                      &dstSAD8x8
+                                      );
+        pDstBlockSAD[1] = dstSAD8x8;
+        *pDstSAD += dstSAD8x8;
+        pTempCurrPointPos->x -= 8;
+        pTempCurrPointPos->y += 8;
+        pSrcRefBuf += (srcRefStep * 8) - 8;
+        
+        omxVCM4P2_FindMVpred(
+                    &(pSrcDstMBCurr->pMV0[1][0]),
+                    pSrcCandMV1[2],
+                    pSrcCandMV2[2],
+                    pSrcCandMV3[2],
+                    &(pSrcDstMBCurr->pMVPred[1][0]),
+                    NULL,
+                    2);
+        armVCM4P2_BlockMatch_8x8 (pSrcRefBuf,
+                                      srcRefStep, pRefRect,
+                                      pTempSrcCurrBuf, pTempCurrPointPos,
+                                      &(pSrcDstMBCurr->pMVPred[1][0]), NULL,
+                                      pMEParams, &(pSrcDstMBCurr->pMV0[1][0]),
+                                      &dstSAD8x8
+                                      );
+        pDstBlockSAD[2] = dstSAD8x8;
+        *pDstSAD += dstSAD8x8;
+        pTempCurrPointPos->x += 8;
+        pSrcRefBuf += 8;
+        omxVCM4P2_FindMVpred(
+                    &(pSrcDstMBCurr->pMV0[1][1]),
+                    pSrcCandMV1[3],
+                    pSrcCandMV2[3],
+                    pSrcCandMV3[3],
+                    &(pSrcDstMBCurr->pMVPred[1][1]),
+                    NULL,
+                    3);
+        armVCM4P2_BlockMatch_8x8 (pSrcRefBuf,
+                                      srcRefStep, pRefRect,
+                                      pTempSrcCurrBuf, pTempCurrPointPos,
+                                      &(pSrcDstMBCurr->pMVPred[1][1]), NULL,
+                                      pMEParams, &(pSrcDstMBCurr->pMV0[1][1]),
+                                      &dstSAD8x8
+                                      );
+        pDstBlockSAD[3] = dstSAD8x8;
+        *pDstSAD += dstSAD8x8;   
+        
+        
+        /* Checking if 4MV is equal to 1MV */
+        if (
+            (pSrcDstMBCurr->pMV0[0][0].dx != dstMV16x16.dx) ||
+            (pSrcDstMBCurr->pMV0[0][0].dy != dstMV16x16.dy) ||
+            (pSrcDstMBCurr->pMV0[0][1].dx != dstMV16x16.dx) ||
+            (pSrcDstMBCurr->pMV0[0][1].dy != dstMV16x16.dy) ||
+            (pSrcDstMBCurr->pMV0[1][0].dx != dstMV16x16.dx) ||
+            (pSrcDstMBCurr->pMV0[1][0].dy != dstMV16x16.dy) ||
+            (pSrcDstMBCurr->pMV0[1][1].dx != dstMV16x16.dx) ||
+            (pSrcDstMBCurr->pMV0[1][1].dy != dstMV16x16.dy)
+           )
+        {
+            /* select the 4 MV */
+            pSrcDstMBCurr->mbType = OMX_VC_INTER4V;
+        }                                      
+    }
+                                         
+    /* finding the error in intra mode */
+    for (count = 0, average = 0; count < 256 ; count++)
+    {
+        average = average + pTempSrcCurrBuf[count];
+    }
+    average = average/256;
+    
+	intraSAD = 0;
+
+    /* Intra SAD calculation */
+    for (count = 0; count < 256 ; count++)
+    {
+        intraSAD += armAbs ((pTempSrcCurrBuf[count]) - (average));
+    }
+    
+	/* Using the MPEG4 VM formula for intra/inter mode decision 
+	   Var < (SAD - 2*NB) where NB = N^2 is the number of pixels
+	   of the macroblock.*/
+
+    if (intraSAD <= (*pDstSAD - 512))
+    {
+        pSrcDstMBCurr->mbType = OMX_VC_INTRA;
+        pSrcDstMBCurr->pMV0[0][0].dx = 0;
+        pSrcDstMBCurr->pMV0[0][0].dy = 0;
+        *pDstSAD   = intraSAD;
+        pDstBlockSAD[0] = 0xFFFF;
+        pDstBlockSAD[1] = 0xFFFF;
+        pDstBlockSAD[2] = 0xFFFF;
+        pDstBlockSAD[3] = 0xFFFF;
+    }
+
+    if(pSrcDstMBCurr->mbType == OMX_VC_INTER)
+    {
+      pTempSrcRefBuf = pSrcRefBuf + (srcRefStep * dstMV16x16.dy) + dstMV16x16.dx;
+    
+      if((dstMV16x16.dx & 0x1) && (dstMV16x16.dy & 0x1))
+      {
+        predictType = OMX_VC_HALF_PIXEL_XY;
+      }
+      else if(dstMV16x16.dx & 0x1)
+      {
+        predictType = OMX_VC_HALF_PIXEL_X;
+      }
+      else if(dstMV16x16.dy & 0x1)
+      {
+        predictType = OMX_VC_HALF_PIXEL_Y;
+      }
+      else
+      {
+        predictType = OMX_VC_INTEGER_PIXEL;
+      }
+      
+      pDst = armAlignTo8Bytes(&(aDst[0]));
+      /* Calculating Block SAD at MV(dstMV16x16.dx,dstMV16x16.dy) */
+	  /* Block 0 */
+      omxVCM4P2_MCReconBlock(pTempSrcRefBuf,
+	                             srcRefStep,
+                                 NULL,
+                                 pDst, 
+                                 dstStep,
+                                 predictType,
+                                 pMEParams->rndVal);
+    
+      armVCCOMM_SAD(pTempSrcCurrBuf,
+                        16,
+                        pDst,
+                        dstStep,
+                        &Sad,
+                        8,
+                        8);
+      pDstBlockSAD[0] = Sad;
+   
+      /* Block 1 */
+      omxVCM4P2_MCReconBlock(pTempSrcRefBuf + 8,
+                                 srcRefStep,
+                                 NULL,
+                                 pDst, 
+                                 dstStep,
+                                 predictType,
+                                 pMEParams->rndVal);					  
+
+      armVCCOMM_SAD(pTempSrcCurrBuf + 8,
+                        16,
+                        pDst,
+                        dstStep,
+                        &Sad,
+                        8,
+                        8);
+      pDstBlockSAD[1] = Sad;
+	
+      /* Block 2 */
+      omxVCM4P2_MCReconBlock(pTempSrcRefBuf + (srcRefStep*8),
+                                 srcRefStep,
+                                 NULL,
+                                 pDst, 
+                                 dstStep,
+                                 predictType,
+                                 pMEParams->rndVal);
+
+      armVCCOMM_SAD(pTempSrcCurrBuf + (16*8),
+                        16,
+                        pDst,
+                        dstStep,
+                        &Sad,
+                        8,
+                        8);
+      pDstBlockSAD[2] = Sad;
+
+	  /* Block 3 */
+      omxVCM4P2_MCReconBlock(pTempSrcRefBuf + (srcRefStep*8) + 8,
+                                 srcRefStep,
+                                 NULL,
+                                 pDst, 
+                                 dstStep,
+                                 predictType,
+                                 pMEParams->rndVal);
+
+      armVCCOMM_SAD(pTempSrcCurrBuf + (16*8) + 8,
+                        16,
+                        pDst,
+                        dstStep,
+                        &Sad,
+                        8,
+                        8);
+      pDstBlockSAD[3] = Sad;
+    }
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra.c
new file mode 100644
index 0000000..1613f47
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra.c
@@ -0,0 +1,121 @@
+ /**
+ * 
+ * File Name:  omxVCM4P2_PredictReconCoefIntra.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File:        omxVCM4P2_PredictReconCoefIntra_S16.c
+ * Description: Contains modules for AC DC prediction
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function:  omxVCM4P2_PredictReconCoefIntra   (6.2.5.4.3)
+ *
+ * Description:
+ * Performs adaptive DC/AC coefficient prediction for an intra block.  Prior 
+ * to the function call, prediction direction (predDir) should be selected as 
+ * specified in [ISO14496-2], subclause 7.4.3.1. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the coefficient buffer which contains the quantized 
+ *            coefficient residuals (PQF) of the current block; must be 
+ *            aligned on a 4-byte boundary.  The output coefficients are 
+ *            saturated to the range [-2048, 2047]. 
+ *   pPredBufRow - pointer to the coefficient row buffer; must be aligned on 
+ *            a 4-byte boundary. 
+ *   pPredBufCol - pointer to the coefficient column buffer; must be aligned 
+ *            on a 4-byte boundary. 
+ *   curQP - quantization parameter of the current block. curQP may equal to 
+ *            predQP especially when the current block and the predictor block 
+ *            are in the same macroblock. 
+ *   predQP - quantization parameter of the predictor block 
+ *   predDir - indicates the prediction direction which takes one of the 
+ *            following values: OMX_VC_HORIZONTAL - predict horizontally 
+ *            OMX_VC_VERTICAL - predict vertically 
+ *   ACPredFlag - a flag indicating if AC prediction should be performed. It 
+ *            is equal to ac_pred_flag in the bit stream syntax of MPEG-4 
+ *   videoComp - video component type (luminance or chrominance) of the 
+ *            current block 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the coefficient buffer which contains the quantized 
+ *            coefficients (QF) of the current block 
+ *   pPredBufRow - pointer to the updated coefficient row buffer 
+ *   pPredBufCol - pointer to the updated coefficient column buffer  Note: 
+ *            Buffer update: Update the AC prediction buffer (both row and 
+ *            column buffer). 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments, if:
+ *        -    At least one of the pointers is NULL: 
+ *              pSrcDst, pPredBufRow, or pPredBufCol. 
+ *        -    curQP <= 0, 
+ *        -    predQP <= 0, 
+ *        -    curQP >31, 
+ *        -    predQP > 31, 
+ *        -    preDir exceeds [1,2]
+ *        -    pSrcDst, pPredBufRow, or pPredBufCol is not 4-byte aligned. 
+ *
+ */
+
+OMXResult omxVCM4P2_PredictReconCoefIntra(
+     OMX_S16 * pSrcDst,
+     OMX_S16 * pPredBufRow,
+     OMX_S16 * pPredBufCol,
+     OMX_INT curQP,
+     OMX_INT predQP,
+     OMX_INT predDir,
+     OMX_INT ACPredFlag,
+     OMXVCM4P2VideoComponent videoComp
+ )
+{
+    OMX_U8 flag;
+    /* Argument error checks */
+    armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pPredBufRow == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pPredBufCol == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(curQP <= 0, OMX_Sts_BadArgErr);
+    armRetArgErrIf(predQP <= 0, OMX_Sts_BadArgErr);
+    armRetArgErrIf(curQP > 31, OMX_Sts_BadArgErr);
+    armRetArgErrIf(predQP > 31, OMX_Sts_BadArgErr);
+    armRetArgErrIf((predDir != 1) && (predDir != 2), OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs4ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs4ByteAligned(pPredBufRow), OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs4ByteAligned(pPredBufCol), OMX_Sts_BadArgErr);
+
+    flag = 0;
+    return armVCM4P2_ACDCPredict(
+        pSrcDst,
+        NULL,
+        pPredBufRow,
+        pPredBufCol,
+        curQP,
+        predQP,
+        predDir,
+        ACPredFlag,
+        videoComp,
+        flag,
+        NULL);
+
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInter_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInter_I.c
new file mode 100644
index 0000000..5964f73
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInter_I.c
@@ -0,0 +1,117 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_QuantInter_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains modules for inter Quantization
+ * 
+ */
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P2_QuantInter_I   (6.2.4.4.3)
+ *
+ * Description:
+ * Performs quantization on an inter coefficient block; supports 
+ * bits_per_pixel == 8. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input inter block coefficients; must be aligned 
+ *            on a 16-byte boundary. 
+ *   QP - quantization parameter (quantizer_scale) 
+ *   shortVideoHeader - binary flag indicating presence of short_video_header; 
+ *            shortVideoHeader==1 selects linear intra DC mode, and 
+ *            shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the output (quantized) interblock coefficients.  
+ *            When shortVideoHeader==1, AC coefficients are saturated on the 
+ *            interval [-127, 127], and DC coefficients are saturated on the 
+ *            interval [1, 254].  When shortVideoHeader==0, AC coefficients 
+ *            are saturated on the interval [-2047, 2047]. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    pSrcDst is NULL. 
+ *    -    QP <= 0 or QP >= 32. 
+ *
+ */
+
+OMXResult omxVCM4P2_QuantInter_I(
+     OMX_S16 * pSrcDst,
+     OMX_U8 QP,
+	 OMX_INT shortVideoHeader
+)
+{
+
+    /* Definitions and Initializations*/
+    OMX_INT coeffCount;
+    OMX_INT fSign;
+    OMX_INT maxClpAC = 0, minClpAC = 0;
+    OMX_INT maxClpDC = 0, minClpDC = 0;
+    
+    /* Argument error checks */
+    armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr);
+   /* One argument check is delayed until we have ascertained that  */
+   /* pQMatrix is not NULL.                                         */
+                
+    /* Set the Clip Range based on SVH on/off */
+    if(shortVideoHeader == 1)
+    {
+       maxClpDC = 254;
+       minClpDC = 1;
+       maxClpAC = 127;
+       minClpAC = -127;        
+    }
+    else
+    {
+        maxClpDC = 2047;
+        minClpDC = -2047;
+        maxClpAC = 2047;
+        minClpAC = -2047;   
+    }
+                
+    /* Second Inverse quantisation method */
+    for (coeffCount = 0; coeffCount < 64; coeffCount++)
+    {
+        fSign =  armSignCheck (pSrcDst[coeffCount]);  
+        pSrcDst[coeffCount] = (armAbs(pSrcDst[coeffCount]) 
+                              - (QP/2))/(2 * QP);
+        pSrcDst[coeffCount] *= fSign;
+        
+        /* Clip */
+        if (coeffCount == 0)
+        {
+           pSrcDst[coeffCount] =
+           (OMX_S16) armClip (minClpDC, maxClpDC, pSrcDst[coeffCount]);
+        }
+        else
+        {
+           pSrcDst[coeffCount] =
+           (OMX_S16) armClip (minClpAC, maxClpAC, pSrcDst[coeffCount]);
+        }
+    }
+    return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantIntra_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantIntra_I.c
new file mode 100644
index 0000000..a10da68
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantIntra_I.c
@@ -0,0 +1,153 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_QuantIntra_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains modules for intra Quantization
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+/**
+ * Function:  omxVCM4P2_QuantIntra_I   (6.2.4.4.2)
+ *
+ * Description:
+ * Performs quantization on intra block coefficients. This function supports 
+ * bits_per_pixel == 8. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input intra block coefficients; must be aligned 
+ *            on a 16-byte boundary. 
+ *   QP - quantization parameter (quantizer_scale). 
+ *   blockIndex - block index indicating the component type and position, 
+ *            valid in the range 0 to 5, as defined in [ISO14496-2], subclause 
+ *            6.1.3.8. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; shortVideoHeader==1 selects linear intra DC 
+ *            mode, and shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the output (quantized) interblock coefficients.  
+ *            When shortVideoHeader==1, AC coefficients are saturated on the 
+ *            interval [-127, 127], and DC coefficients are saturated on the 
+ *            interval [1, 254].  When shortVideoHeader==0, AC coefficients 
+ *            are saturated on the interval [-2047, 2047]. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    pSrcDst is NULL. 
+ *    -    blockIndex < 0 or blockIndex >= 10 
+ *    -    QP <= 0 or QP >= 32. 
+ *
+ */
+
+OMXResult omxVCM4P2_QuantIntra_I(
+     OMX_S16 * pSrcDst,
+     OMX_U8 QP,
+     OMX_INT blockIndex,
+	 OMX_INT shortVideoHeader
+ )
+{
+
+    /* Definitions and Initializations*/
+    /* Initialized to remove compilation error */
+    OMX_INT dcScaler = 0, coeffCount,fSign;
+    OMX_INT maxClpAC, minClpAC;
+
+    /* Argument error checks */
+    armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(((blockIndex < 0) || (blockIndex >= 10)), OMX_Sts_BadArgErr);
+    armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr);
+   /* One argument check is delayed until we have ascertained that  */
+   /* pQMatrix is not NULL.                                         */
+
+    
+    /* Set the Clip Range based on SVH on/off */
+    if(shortVideoHeader == 1)
+    {
+        maxClpAC = 127;
+        minClpAC = -127;
+        dcScaler = 8;
+        /* Dequant the DC value, this applies to both the methods */
+        pSrcDst[0] = armIntDivAwayFromZero (pSrcDst[0], dcScaler);
+    
+        /* Clip between 1 and 254 */
+        pSrcDst[0] = (OMX_S16) armClip (1, 254, pSrcDst[0]);
+    }
+    else
+    {
+        maxClpAC = 2047;
+        minClpAC = -2047;   
+        /* Calculate the DC scaler value */
+        if ((blockIndex  < 4) || (blockIndex  > 5))
+        {
+            if (QP >= 1 && QP <= 4)
+            {
+                dcScaler = 8;
+            }
+            else if (QP >= 5 && QP <= 8)
+            {
+                dcScaler = 2 * QP;
+            }
+            else if (QP >= 9 && QP <= 24)
+            {
+                dcScaler = QP + 8;
+            }
+            else
+            {
+                dcScaler = (2 * QP) - 16;
+            }
+        }
+        else if (blockIndex < 6)
+        {
+            if (QP >= 1 && QP <= 4)
+            {
+                dcScaler = 8;
+            }
+            else if (QP >= 5 && QP <= 24)
+            {
+                dcScaler = (QP + 13)/2;
+            }
+            else
+            {
+                dcScaler = QP - 6;
+            }
+        }
+        
+        /* Dequant the DC value, this applies to both the methods */
+        pSrcDst[0] = armIntDivAwayFromZero (pSrcDst[0], dcScaler);
+    }
+    
+    /* Second Inverse quantisation method */
+    for (coeffCount = 1; coeffCount < 64; coeffCount++)
+    {
+        fSign =  armSignCheck (pSrcDst[coeffCount]);  
+        pSrcDst[coeffCount] = armAbs(pSrcDst[coeffCount])/(2 * QP);
+        pSrcDst[coeffCount] *= fSign;
+
+        /* Clip */
+        pSrcDst[coeffCount] =
+        (OMX_S16) armClip (minClpAC, maxClpAC, pSrcDst[coeffCount]);
+    }
+    return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvInter_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvInter_I.c
new file mode 100644
index 0000000..6e0de5c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvInter_I.c
@@ -0,0 +1,96 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_QuantInvInter_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains modules for inter inverse Quantization
+ * 
+ */ 
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+
+/**
+ * Function:  omxVCM4P2_QuantInvInter_I   (6.2.5.3.2)
+ *
+ * Description:
+ * Performs the second inverse quantization mode on an intra/inter coded 
+ * block. Supports bits_per_pixel = 8. The output coefficients are clipped to 
+ * the range [-2048, 2047]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input (quantized) intra/inter block; must be 
+ *            aligned on a 16-byte boundary. 
+ *   QP - quantization parameter (quantizer_scale) 
+ *   videoComp - video component type of the current block. Takes one of the 
+ *            following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra 
+ *            version only). 
+ *   shortVideoHeader - binary flag indicating presence of short_video_header 
+ *            (intra version only). 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the output (dequantized) intra/inter block 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; one or more of the following is 
+ *              true: 
+ *    -    pSrcDst is NULL 
+ *    -    QP <= 0 or QP >=31 
+ *    -    videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE. 
+ *
+ */
+
+OMXResult omxVCM4P2_QuantInvInter_I(
+     OMX_S16 * pSrcDst,
+     OMX_INT QP
+	 )
+{
+
+    OMX_INT coeffCount, Sign;
+    
+    /* Argument error checks */
+    armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr);
+
+    /* Second Inverse quantisation method */
+    for (coeffCount = 0; coeffCount < 64; coeffCount++)
+    {
+        /* check sign */
+        Sign =  armSignCheck (pSrcDst[coeffCount]);
+              
+        /* Quantize the coeff */
+        if (QP & 0x1)
+        {
+            pSrcDst[coeffCount] = (2* armAbs(pSrcDst[coeffCount]) + 1) * QP;
+            pSrcDst[coeffCount] *= Sign;
+        }
+        else
+        {
+            pSrcDst[coeffCount] = (2* armAbs(pSrcDst[coeffCount]) + 1)
+                                                                * QP - 1;
+            pSrcDst[coeffCount] *= Sign;
+        }
+        /* Saturate */
+        pSrcDst[coeffCount] = armClip (-2048, 2047, pSrcDst[coeffCount]);
+    }
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I.c
new file mode 100644
index 0000000..a946d7b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I.c
@@ -0,0 +1,153 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_QuantInvIntra_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains modules for intra inverse Quantization
+ * 
+ */ 
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function:  omxVCM4P2_QuantInvIntra_I   (6.2.5.3.2)
+ *
+ * Description:
+ * Performs the second inverse quantization mode on an intra/inter coded 
+ * block. Supports bits_per_pixel = 8. The output coefficients are clipped to 
+ * the range [-2048, 2047]. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrcDst - pointer to the input (quantized) intra/inter block; must be 
+ *            aligned on a 16-byte boundary. 
+ *   QP - quantization parameter (quantizer_scale) 
+ *   videoComp - video component type of the current block. Takes one of the 
+ *            following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra 
+ *            version only). 
+ *   shortVideoHeader - binary flag indicating presence of short_video_header 
+ *            (intra version only). 
+ *
+ * Output Arguments:
+ *   
+ *   pSrcDst - pointer to the output (dequantized) intra/inter block 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments; one or more of the following is 
+ *              true: 
+ *    -    pSrcDst is NULL 
+ *    -    QP <= 0 or QP >=31 
+ *    -    videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE. 
+ *
+ */
+
+OMXResult omxVCM4P2_QuantInvIntra_I(
+     OMX_S16 * pSrcDst,
+     OMX_INT QP,
+     OMXVCM4P2VideoComponent videoComp,
+	 OMX_INT shortVideoHeader
+)
+{
+
+    /* Initialized to remove compilation error */
+    OMX_INT dcScaler = 0, coeffCount, Sign;
+
+    /* Argument error checks */
+    armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr);
+	armRetArgErrIf(((videoComp != OMX_VC_LUMINANCE) && (videoComp != OMX_VC_CHROMINANCE)), OMX_Sts_BadArgErr);
+    
+    /* Calculate the DC scaler value */
+    
+    /* linear intra DC mode */
+    if(shortVideoHeader)
+    {
+        dcScaler = 8;
+    }
+    /* nonlinear intra DC mode */
+    else
+    {
+    
+        if (videoComp == OMX_VC_LUMINANCE)
+        {
+            if (QP >= 1 && QP <= 4)
+            {
+                dcScaler = 8;
+            }
+            else if (QP >= 5 && QP <= 8)
+            {
+                dcScaler = 2 * QP;
+            }
+            else if (QP >= 9 && QP <= 24)
+            {
+                dcScaler = QP + 8;
+            }
+            else
+            {
+                dcScaler = (2 * QP) - 16;
+            }
+        }
+
+        else if (videoComp == OMX_VC_CHROMINANCE)
+        {
+            if (QP >= 1 && QP <= 4)
+            {
+                dcScaler = 8;
+            }
+            else if (QP >= 5 && QP <= 24)
+            {
+                dcScaler = (QP + 13)/2;
+            }
+            else
+            {
+                dcScaler = QP - 6;
+            }
+        }
+    }
+    /* Dequant the DC value, this applies to both the methods */
+    pSrcDst[0] = pSrcDst[0] * dcScaler;
+
+    /* Saturate */
+    pSrcDst[0] = armClip (-2048, 2047, pSrcDst[0]);
+
+    /* Second Inverse quantisation method */
+    for (coeffCount = 1; coeffCount < 64; coeffCount++)
+    {
+        /* check sign */
+        Sign =  armSignCheck (pSrcDst[coeffCount]);  
+
+        if (QP & 0x1)
+        {
+            pSrcDst[coeffCount] = (2* armAbs(pSrcDst[coeffCount]) + 1) * QP;
+            pSrcDst[coeffCount] *= Sign;
+        }
+        else
+        {
+            pSrcDst[coeffCount] =
+                                (2* armAbs(pSrcDst[coeffCount]) + 1) * QP - 1;
+            pSrcDst[coeffCount] *= Sign;
+        }
+
+        /* Saturate */
+        pSrcDst[coeffCount] = armClip (-2048, 2047, pSrcDst[coeffCount]);
+    }
+    return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_inter.c
new file mode 100644
index 0000000..6e0c59b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_inter.c
@@ -0,0 +1,108 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_TransRecBlockCoef_inter.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains modules DCT->quant and reconstructing the inter texture data
+ * 
+ */ 
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+
+/**
+ * Function:  omxVCM4P2_TransRecBlockCoef_inter   (6.2.4.4.5)
+ *
+ * Description:
+ * Implements DCT, and quantizes the DCT coefficients of the inter block 
+ * while reconstructing the texture residual. There is no boundary check for 
+ * the bit stream buffer. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc -pointer to the residuals to be encoded; must be aligned on an 
+ *            16-byte boundary. 
+ *   QP - quantization parameter. 
+ *   shortVideoHeader - binary flag indicating presence of short_video_header; 
+ *                      shortVideoHeader==1 selects linear intra DC mode, and 
+ *                      shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the quantized DCT coefficients buffer; must be aligned 
+ *            on a 16-byte boundary. 
+ *   pRec - pointer to the reconstructed texture residuals; must be aligned 
+ *            on a 16-byte boundary. 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - bad arguments:
+ *    -    At least one of the following pointers is either NULL or 
+ *         not 16-byte aligned: 
+ *            - pSrc 
+ *            - pDst
+ *            - pRec
+ *    -    QP <= 0 or QP >= 32. 
+ *
+ */
+
+OMXResult omxVCM4P2_TransRecBlockCoef_inter(
+     const OMX_S16 *pSrc,
+     OMX_S16 * pDst,
+     OMX_S16 * pRec,
+     OMX_U8 QP,
+     OMX_INT shortVideoHeader
+)
+{
+    /* 64 elements are needed but to align it to 16 bytes need 
+    8 more elements of padding */
+    OMX_S16 tempBuffer[72];
+    OMX_S16 *pTempBuffer;
+    OMX_INT i;
+        
+    /* Aligning the local buffers */
+    pTempBuffer = armAlignTo16Bytes(tempBuffer);
+
+    /* Argument error checks */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pRec == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs16ByteAligned(pSrc), OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs16ByteAligned(pRec), OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr);
+    
+    omxVCM4P2_DCT8x8blk (pSrc, pDst);
+    omxVCM4P2_QuantInter_I(
+     pDst,
+     QP,
+     shortVideoHeader);
+
+    for (i = 0; i < 64; i++)
+    {
+        pTempBuffer[i] = pDst[i];
+    }
+
+    omxVCM4P2_QuantInvInter_I(
+     pTempBuffer,
+     QP);
+    omxVCM4P2_IDCT8x8blk (pTempBuffer, pRec);
+
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_intra.c
new file mode 100644
index 0000000..dd444f9
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_intra.c
@@ -0,0 +1,260 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_TransRecBlockCoef_intra.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description:
+ * Contains modules DCT->quant and reconstructing the intra texture data
+ * 
+ */ 
+ 
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+
+/**
+ * Function:  omxVCM4P2_TransRecBlockCoef_intra   (6.2.4.4.4)
+ *
+ * Description:
+ * Quantizes the DCT coefficients, implements intra block AC/DC coefficient 
+ * prediction, and reconstructs the current intra block texture for prediction 
+ * on the next frame.  Quantized row and column coefficients are returned in 
+ * the updated coefficient buffers. 
+ *
+ * Input Arguments:
+ *   
+ *   pSrc - pointer to the pixels of current intra block; must be aligned on 
+ *            an 8-byte boundary. 
+ *   pPredBufRow - pointer to the coefficient row buffer containing 
+ *            ((num_mb_per_row * 2 + 1) * 8) elements of type OMX_S16. 
+ *            Coefficients are organized into blocks of eight as described 
+ *            below (Internal Prediction Coefficient Update Procedures).  The 
+ *            DC coefficient is first, and the remaining buffer locations 
+ *            contain the quantized AC coefficients. Each group of eight row 
+ *            buffer elements combined with one element eight elements ahead 
+ *            contains the coefficient predictors of the neighboring block 
+ *            that is spatially above or to the left of the block currently to 
+ *            be decoded. A negative-valued DC coefficient indicates that this 
+ *            neighboring block is not INTRA-coded or out of bounds, and 
+ *            therefore the AC and DC coefficients are invalid.  Pointer must 
+ *            be aligned on an 8-byte boundary. 
+ *   pPredBufCol - pointer to the prediction coefficient column buffer 
+ *            containing 16 elements of type OMX_S16. Coefficients are 
+ *            organized as described in section 6.2.2.5.  Pointer must be 
+ *            aligned on an 8-byte boundary. 
+ *   pSumErr - pointer to a flag indicating whether or not AC prediction is 
+ *            required; AC prediction is enabled if *pSumErr >=0, but the 
+ *            value is not used for coefficient prediction, i.e., the sum of 
+ *            absolute differences starts from 0 for each call to this 
+ *            function.  Otherwise AC prediction is disabled if *pSumErr < 0 . 
+ *   blockIndex - block index indicating the component type and position, as 
+ *            defined in [ISO14496-2], subclause 6.1.3.8. 
+ *   curQp - quantization parameter of the macroblock to which the current 
+ *            block belongs 
+ *   pQpBuf - pointer to a 2-element quantization parameter buffer; pQpBuf[0] 
+ *            contains the quantization parameter associated with the 8x8 
+ *            block left of the current block (QPa), and pQpBuf[1] contains 
+ *            the quantization parameter associated with the 8x8 block above 
+ *            the current block (QPc).  In the event that the corresponding 
+ *            block is outside of the VOP bound, the Qp value will not affect 
+ *            the intra prediction process, as described in [ISO14496-2], 
+ *            sub-clause 7.4.3.3,  Adaptive AC Coefficient Prediction.  
+ *   srcStep - width of the source buffer; must be a multiple of 8. 
+ *   dstStep - width of the reconstructed destination buffer; must be a 
+ *            multiple of 16. 
+ *   shortVideoHeader - binary flag indicating presence of 
+ *            short_video_header; shortVideoHeader==1 selects linear intra DC 
+ *            mode, and shortVideoHeader==0 selects non linear intra DC mode. 
+ *
+ * Output Arguments:
+ *   
+ *   pDst - pointer to the quantized DCT coefficient buffer; pDst[0] contains 
+ *            the predicted DC coefficient; the remaining entries contain the 
+ *            quantized AC coefficients (without prediction).  The pointer 
+ *            pDstmust be aligned on a 16-byte boundary. 
+ *   pRec - pointer to the reconstructed texture; must be aligned on an 
+ *            8-byte boundary. 
+ *   pPredBufRow - pointer to the updated coefficient row buffer 
+ *   pPredBufCol - pointer to the updated coefficient column buffer 
+ *   pPreACPredict - if prediction is enabled, the parameter points to the 
+ *            start of the buffer containing the coefficient differences for 
+ *            VLC encoding. The entry pPreACPredict[0]indicates prediction 
+ *            direction for the current block and takes one of the following 
+ *            values: OMX_VC_NONE (prediction disabled), OMX_VC_HORIZONTAL, or 
+ *            OMX_VC_VERTICAL.  The entries 
+ *            pPreACPredict[1]-pPreACPredict[7]contain predicted AC 
+ *            coefficients.  If prediction is disabled (*pSumErr<0) then the 
+ *            contents of this buffer are undefined upon return from the 
+ *            function 
+ *   pSumErr - pointer to the value of the accumulated AC coefficient errors, 
+ *            i.e., sum of the absolute differences between predicted and 
+ *            unpredicted AC coefficients 
+ *
+ * Return Value:
+ *    
+ *    OMX_Sts_NoErr - no error 
+ *    OMX_Sts_BadArgErr - Bad arguments:
+ *    -    At least one of the following pointers is NULL: pSrc, pDst, pRec, 
+ *         pCoefBufRow, pCoefBufCol, pQpBuf, pPreACPredict, pSumErr. 
+ *    -    blockIndex < 0 or blockIndex >= 10; 
+ *    -    curQP <= 0 or curQP >= 32. 
+ *    -    srcStep, or dstStep <= 0 or not a multiple of 8. 
+ *    -    pDst is not 16-byte aligned: . 
+ *    -    At least one of the following pointers is not 8-byte aligned: 
+ *         pSrc, pRec.  
+ *
+ *  Note: The coefficient buffers must be updated in accordance with the 
+ *        update procedures defined in section in 6.2.2. 
+ *
+ */
+
+OMXResult omxVCM4P2_TransRecBlockCoef_intra(
+     const OMX_U8 *pSrc,
+     OMX_S16 * pDst,
+     OMX_U8 * pRec,
+     OMX_S16 *pPredBufRow,
+     OMX_S16 *pPredBufCol,
+     OMX_S16 * pPreACPredict,
+     OMX_INT *pSumErr,
+     OMX_INT blockIndex,
+     OMX_U8 curQp,
+     const OMX_U8 *pQpBuf,
+     OMX_INT srcStep,
+     OMX_INT dstStep,
+	 OMX_INT shortVideoHeader
+)
+{
+    /* 64 elements are needed but to align it to 16 bytes need
+    8 more elements of padding */
+    OMX_S16 tempBuf1[79], tempBuf2[79];
+    OMX_S16 tempBuf3[79];
+    OMX_S16 *pTempBuf1, *pTempBuf2,*pTempBuf3;
+    OMXVCM4P2VideoComponent videoComp;
+    OMX_U8  flag;
+    OMX_INT x, y, count, predDir;
+    OMX_INT predQP, ACPredFlag;
+    
+
+    /* Aligning the local buffers */
+    pTempBuf1 = armAlignTo16Bytes(tempBuf1);
+    pTempBuf2 = armAlignTo16Bytes(tempBuf2);
+    pTempBuf3 = armAlignTo16Bytes(tempBuf3);
+
+    /* Argument error checks */
+    armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pRec == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs8ByteAligned(pSrc), OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs8ByteAligned(pRec), OMX_Sts_BadArgErr);
+    armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
+    armRetArgErrIf(pPredBufRow == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pPredBufCol == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pPreACPredict == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pSumErr == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf(pQpBuf == NULL, OMX_Sts_BadArgErr);
+    armRetArgErrIf((srcStep <= 0) || (dstStep <= 0) ||
+                (dstStep & 7) || (srcStep & 7)
+                , OMX_Sts_BadArgErr);
+    armRetArgErrIf((blockIndex < 0) || (blockIndex > 9), OMX_Sts_BadArgErr);
+
+    armRetArgErrIf((curQp <= 0) || (curQp >=32), OMX_Sts_BadArgErr);
+
+
+   /* Setting the videoComp */
+    if (blockIndex <= 3)
+    {
+        videoComp = OMX_VC_LUMINANCE;
+    }
+    else
+    {
+        videoComp = OMX_VC_CHROMINANCE;
+    }
+    /* Converting from 2-d to 1-d buffer */
+    for (y = 0, count = 0; y < 8; y++)
+    {
+        for(x= 0; x < 8; x++, count++)
+        {
+            pTempBuf1[count] = pSrc[(y*srcStep) + x];
+        }
+    }
+
+    omxVCM4P2_DCT8x8blk  (pTempBuf1, pTempBuf2);
+    omxVCM4P2_QuantIntra_I(
+        pTempBuf2,
+        curQp,
+        blockIndex,
+        shortVideoHeader);
+
+    /* Converting from 1-D to 2-D buffer */
+    for (y = 0, count = 0; y < 8; y++)
+    {
+        for(x = 0; x < 8; x++, count++)
+        {
+            /* storing tempbuf2 to tempbuf1 */
+            pTempBuf1[count] = pTempBuf2[count];
+            pDst[(y*dstStep) + x] = pTempBuf2[count];
+        }
+    }
+
+    /* AC and DC prediction */
+    armVCM4P2_SetPredDir(
+        blockIndex,
+        pPredBufRow,
+        pPredBufCol,
+        &predDir,
+        &predQP,
+        pQpBuf);
+
+    armRetDataErrIf(((predQP <= 0) || (predQP >= 32)), OMX_Sts_BadArgErr);
+
+    flag = 1;
+    if (*pSumErr < 0)
+    {
+        ACPredFlag = 0;
+    }
+    else
+    {
+        ACPredFlag = 1;
+    }
+
+    armVCM4P2_ACDCPredict(
+        pTempBuf2,
+        pPreACPredict,
+        pPredBufRow,
+        pPredBufCol,
+        curQp,
+        predQP,
+        predDir,
+        ACPredFlag,
+        videoComp,
+        flag,
+        pSumErr);
+
+    /* Reconstructing the texture data */
+    omxVCM4P2_QuantInvIntra_I(
+        pTempBuf1,
+        curQp,
+        videoComp,
+        shortVideoHeader);
+    omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf3);
+    for(count = 0; count < 64; count++)
+    {
+        pRec[count] = armMax(0,pTempBuf3[count]);
+    }
+
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/src/armVC_Version.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/src/armVC_Version.c
new file mode 100644
index 0000000..5d93681
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/src/armVC_Version.c
@@ -0,0 +1,6 @@
+#include "omxtypes.h"
+#include "armCOMM_Version.h"
+
+#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS
+const char * const omxVC_VersionDescription = "ARM OpenMAX DL v" ARM_VERSION_STRING "   Rel=" OMX_ARM_RELEASE_TAG "   Arch=" OMX_ARM_BUILD_ARCHITECTURE "   Tools="  OMX_ARM_BUILD_TOOLCHAIN ;
+#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */
diff --git a/media/libstagefright/codecs/on2/h264dec/source/DecTestBench.c b/media/libstagefright/codecs/on2/h264dec/source/DecTestBench.c
new file mode 100755
index 0000000..dcf2ef6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/DecTestBench.c
@@ -0,0 +1,761 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "H264SwDecApi.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/*------------------------------------------------------------------------------
+    Module defines
+------------------------------------------------------------------------------*/
+
+/* CHECK_MEMORY_USAGE prints and sums the memory allocated in calls to
+ * H264SwDecMalloc() */
+/* #define CHECK_MEMORY_USAGE */
+
+/* _NO_OUT disables output file writing */
+/* #define _NO_OUT */
+
+/* Debug prints */
+#define DEBUG(argv) printf argv
+
+/* CVS tag name for identification */
+const char tagName[256] = "$Name: FIRST_ANDROID_COPYRIGHT $";
+
+void WriteOutput(char *filename, u8 *data, u32 picSize);
+u32 NextPacket(u8 **pStrm);
+u32 CropPicture(u8 *pOutImage, u8 *pInImage,
+    u32 picWidth, u32 picHeight, CropParams *pCropParams);
+
+/* Global variables for stream handling */
+u8 *streamStop = NULL;
+u32 packetize = 0;
+u32 nalUnitStream = 0;
+FILE *foutput = NULL;
+
+#ifdef SOC_DESIGNER
+
+// Initialisation function defined in InitCache.s
+extern void cache_init(void);
+
+/*------------------------------------------------------------------------------
+
+    Function name:  $Sub$$main
+
+    Purpose:
+        This function is called at the end of the C library initialisation and
+        before main. Its purpose is to do any further initialisation before the
+        application start.
+
+------------------------------------------------------------------------------*/
+int $Sub$$main(char argc, char * argv[])
+{
+  cache_init();                    // does some extra setup work setting up caches
+  return $Super$$main(argc, argv); // calls the original function
+}
+#endif
+
+/*------------------------------------------------------------------------------
+
+    Function name:  main
+
+    Purpose:
+        main function of decoder testbench. Provides command line interface
+        with file I/O for H.264 decoder. Prints out the usage information
+        when executed without arguments.
+
+------------------------------------------------------------------------------*/
+
+int main(int argc, char **argv)
+{
+
+    u32 i, tmp;
+    u32 maxNumPics = 0;
+    u8 *byteStrmStart;
+    u8 *imageData;
+    u8 *tmpImage = NULL;
+    u32 strmLen;
+    u32 picSize;
+    H264SwDecInst decInst;
+    H264SwDecRet ret;
+    H264SwDecInput decInput;
+    H264SwDecOutput decOutput;
+    H264SwDecPicture decPicture;
+    H264SwDecInfo decInfo;
+    H264SwDecApiVersion decVer;
+    u32 picDecodeNumber;
+    u32 picDisplayNumber;
+    u32 numErrors = 0;
+    u32 cropDisplay = 0;
+    u32 disableOutputReordering = 0;
+
+    FILE *finput;
+
+    char outFileName[256] = "";
+
+    /* Print API version number */
+    decVer = H264SwDecGetAPIVersion();
+    DEBUG(("H.264 Decoder API v%d.%d\n", decVer.major, decVer.minor));
+
+    /* Print tag name if '-T' argument present */
+    if ( argc > 1 && strcmp(argv[1], "-T") == 0 )
+    {
+        DEBUG(("%s\n", tagName));
+        return 0;
+    }
+
+    /* Check that enough command line arguments given, if not -> print usage
+     * information out */
+    if (argc < 2)
+    {
+        DEBUG((
+            "Usage: %s [-Nn] [-Ooutfile] [-P] [-U] [-C] [-R] [-T] file.h264\n",
+            argv[0]));
+        DEBUG(("\t-Nn forces decoding to stop after n pictures\n"));
+#if defined(_NO_OUT)
+        DEBUG(("\t-Ooutfile output writing disabled at compile time\n"));
+#else
+        DEBUG(("\t-Ooutfile write output to \"outfile\" (default out_wxxxhyyy.yuv)\n"));
+        DEBUG(("\t-Onone does not write output\n"));
+#endif
+        DEBUG(("\t-P packet-by-packet mode\n"));
+        DEBUG(("\t-U NAL unit stream mode\n"));
+        DEBUG(("\t-C display cropped image (default decoded image)\n"));
+        DEBUG(("\t-R disable DPB output reordering\n"));
+        DEBUG(("\t-T to print tag name and exit\n"));
+        return 0;
+    }
+
+    /* read command line arguments */
+    for (i = 1; i < (u32)(argc-1); i++)
+    {
+        if ( strncmp(argv[i], "-N", 2) == 0 )
+        {
+            maxNumPics = (u32)atoi(argv[i]+2);
+        }
+        else if ( strncmp(argv[i], "-O", 2) == 0 )
+        {
+            strcpy(outFileName, argv[i]+2);
+        }
+        else if ( strcmp(argv[i], "-P") == 0 )
+        {
+            packetize = 1;
+        }
+        else if ( strcmp(argv[i], "-U") == 0 )
+        {
+            nalUnitStream = 1;
+        }
+        else if ( strcmp(argv[i], "-C") == 0 )
+        {
+            cropDisplay = 1;
+        }
+        else if ( strcmp(argv[i], "-R") == 0 )
+        {
+            disableOutputReordering = 1;
+        }
+    }
+
+    /* open input file for reading, file name given by user. If file open
+     * fails -> exit */
+    finput = fopen(argv[argc-1],"rb");
+    if (finput == NULL)
+    {
+        DEBUG(("UNABLE TO OPEN INPUT FILE\n"));
+        return -1;
+    }
+
+    /* check size of the input file -> length of the stream in bytes */
+    fseek(finput,0L,SEEK_END);
+    strmLen = (u32)ftell(finput);
+    rewind(finput);
+
+    /* allocate memory for stream buffer. if unsuccessful -> exit */
+    byteStrmStart = (u8 *)malloc(sizeof(u8)*strmLen);
+    if (byteStrmStart == NULL)
+    {
+        DEBUG(("UNABLE TO ALLOCATE MEMORY\n"));
+        return -1;
+    }
+
+    /* read input stream from file to buffer and close input file */
+    fread(byteStrmStart, sizeof(u8), strmLen, finput);
+    fclose(finput);
+
+    /* initialize decoder. If unsuccessful -> exit */
+    ret = H264SwDecInit(&decInst, disableOutputReordering);
+    if (ret != H264SWDEC_OK)
+    {
+        DEBUG(("DECODER INITIALIZATION FAILED\n"));
+        free(byteStrmStart);
+        return -1;
+    }
+
+    /* initialize H264SwDecDecode() input structure */
+    streamStop = byteStrmStart + strmLen;
+    decInput.pStream = byteStrmStart;
+    decInput.dataLen = strmLen;
+    decInput.intraConcealmentMethod = 0;
+
+    /* get pointer to next packet and the size of packet
+     * (for packetize or nalUnitStream modes) */
+    if ( (tmp = NextPacket(&decInput.pStream)) != 0 )
+        decInput.dataLen = tmp;
+
+    picDecodeNumber = picDisplayNumber = 1;
+    /* main decoding loop */
+    do
+    {
+        /* Picture ID is the picture number in decoding order */
+        decInput.picId = picDecodeNumber;
+
+        /* call API function to perform decoding */
+        ret = H264SwDecDecode(decInst, &decInput, &decOutput);
+
+        switch(ret)
+        {
+
+            case H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY:
+                /* Stream headers were successfully decoded
+                 * -> stream information is available for query now */
+
+                ret = H264SwDecGetInfo(decInst, &decInfo);
+                if (ret != H264SWDEC_OK)
+                    return -1;
+
+                DEBUG(("Profile %d\n", decInfo.profile));
+
+                DEBUG(("Width %d Height %d\n",
+                    decInfo.picWidth, decInfo.picHeight));
+
+                if (cropDisplay && decInfo.croppingFlag)
+                {
+                    DEBUG(("Cropping params: (%d, %d) %dx%d\n",
+                        decInfo.cropParams.cropLeftOffset,
+                        decInfo.cropParams.cropTopOffset,
+                        decInfo.cropParams.cropOutWidth,
+                        decInfo.cropParams.cropOutHeight));
+
+                    /* Cropped frame size in planar YUV 4:2:0 */
+                    picSize = decInfo.cropParams.cropOutWidth *
+                              decInfo.cropParams.cropOutHeight;
+                    picSize = (3 * picSize)/2;
+                    tmpImage = malloc(picSize);
+                    if (tmpImage == NULL)
+                        return -1;
+                }
+                else
+                {
+                    /* Decoder output frame size in planar YUV 4:2:0 */
+                    picSize = decInfo.picWidth * decInfo.picHeight;
+                    picSize = (3 * picSize)/2;
+                }
+
+                DEBUG(("videoRange %d, matrixCoefficients %d\n",
+                    decInfo.videoRange, decInfo.matrixCoefficients));
+
+                /* update H264SwDecDecode() input structure, number of bytes
+                 * "consumed" is computed as difference between the new stream
+                 * pointer and old stream pointer */
+                decInput.dataLen -=
+                    (u32)(decOutput.pStrmCurrPos - decInput.pStream);
+                decInput.pStream = decOutput.pStrmCurrPos;
+
+                /* If -O option not used, generate default file name */
+                if (outFileName[0] == 0)
+                    sprintf(outFileName, "out_w%dh%d.yuv",
+                            decInfo.picWidth, decInfo.picHeight);
+                break;
+
+            case H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY:
+                /* Picture is ready and more data remains in input buffer
+                 * -> update H264SwDecDecode() input structure, number of bytes
+                 * "consumed" is computed as difference between the new stream
+                 * pointer and old stream pointer */
+                decInput.dataLen -=
+                    (u32)(decOutput.pStrmCurrPos - decInput.pStream);
+                decInput.pStream = decOutput.pStrmCurrPos;
+                /* fall through */
+
+            case H264SWDEC_PIC_RDY:
+
+                /*lint -esym(644,tmpImage,picSize) variable initialized at
+                 * H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY case */
+
+                if (ret == H264SWDEC_PIC_RDY)
+                    decInput.dataLen = NextPacket(&decInput.pStream);
+
+                /* If enough pictures decoded -> force decoding to end
+                 * by setting that no more stream is available */
+                if (maxNumPics && picDecodeNumber == maxNumPics)
+                    decInput.dataLen = 0;
+
+                /* Increment decoding number for every decoded picture */
+                picDecodeNumber++;
+
+                /* use function H264SwDecNextPicture() to obtain next picture
+                 * in display order. Function is called until no more images
+                 * are ready for display */
+                while ( H264SwDecNextPicture(decInst, &decPicture, 0) ==
+                        H264SWDEC_PIC_RDY )
+                {
+                    DEBUG(("PIC %d, type %s", picDisplayNumber,
+                        decPicture.isIdrPicture ? "IDR" : "NON-IDR"));
+                    if (picDisplayNumber != decPicture.picId)
+                        DEBUG((", decoded pic %d", decPicture.picId));
+                    if (decPicture.nbrOfErrMBs)
+                    {
+                        DEBUG((", concealed %d\n", decPicture.nbrOfErrMBs));
+                    }
+                    else
+                        DEBUG(("\n"));
+                    fflush(stdout);
+
+                    numErrors += decPicture.nbrOfErrMBs;
+
+                    /* Increment display number for every displayed picture */
+                    picDisplayNumber++;
+
+                    /*lint -esym(644,decInfo) always initialized if pictures
+                     * available for display */
+
+                    /* Write output picture to file */
+                    imageData = (u8*)decPicture.pOutputPicture;
+                    if (cropDisplay && decInfo.croppingFlag)
+                    {
+                        tmp = CropPicture(tmpImage, imageData,
+                            decInfo.picWidth, decInfo.picHeight,
+                            &decInfo.cropParams);
+                        if (tmp)
+                            return -1;
+                        WriteOutput(outFileName, tmpImage, picSize);
+                    }
+                    else
+                    {
+                        WriteOutput(outFileName, imageData, picSize);
+                    }
+                }
+
+                break;
+
+            case H264SWDEC_STRM_PROCESSED:
+            case H264SWDEC_STRM_ERR:
+                /* Input stream was decoded but no picture is ready
+                 * -> Get more data */
+                decInput.dataLen = NextPacket(&decInput.pStream);
+                break;
+
+            default:
+                DEBUG(("FATAL ERROR\n"));
+                return -1;
+
+        }
+    /* keep decoding until all data from input stream buffer consumed */
+    } while (decInput.dataLen > 0);
+
+    /* if output in display order is preferred, the decoder shall be forced
+     * to output pictures remaining in decoded picture buffer. Use function
+     * H264SwDecNextPicture() to obtain next picture in display order. Function
+     * is called until no more images are ready for display. Second parameter
+     * for the function is set to '1' to indicate that this is end of the
+     * stream and all pictures shall be output */
+    while (H264SwDecNextPicture(decInst, &decPicture, 1) == H264SWDEC_PIC_RDY)
+    {
+        DEBUG(("PIC %d, type %s", picDisplayNumber,
+            decPicture.isIdrPicture ? "IDR" : "NON-IDR"));
+        if (picDisplayNumber != decPicture.picId)
+            DEBUG((", decoded pic %d", decPicture.picId));
+        if (decPicture.nbrOfErrMBs)
+        {
+            DEBUG((", concealed %d\n", decPicture.nbrOfErrMBs));
+        }
+        else
+            DEBUG(("\n"));
+        fflush(stdout);
+
+        numErrors += decPicture.nbrOfErrMBs;
+
+        /* Increment display number for every displayed picture */
+        picDisplayNumber++;
+
+        /* Write output picture to file */
+        imageData = (u8*)decPicture.pOutputPicture;
+        if (cropDisplay && decInfo.croppingFlag)
+        {
+            tmp = CropPicture(tmpImage, imageData,
+                decInfo.picWidth, decInfo.picHeight,
+                &decInfo.cropParams);
+            if (tmp)
+                return -1;
+            WriteOutput(outFileName, tmpImage, picSize);
+        }
+        else
+        {
+            WriteOutput(outFileName, imageData, picSize);
+        }
+    }
+
+    /* release decoder instance */
+    H264SwDecRelease(decInst);
+
+    if (foutput)
+        fclose(foutput);
+
+    /* free allocated buffers */
+    free(byteStrmStart);
+    free(tmpImage);
+
+    DEBUG(("Output file: %s\n", outFileName));
+
+    DEBUG(("DECODING DONE\n"));
+    if (numErrors || picDecodeNumber == 1)
+    {
+        DEBUG(("ERRORS FOUND\n"));
+        return 1;
+    }
+
+    return 0;
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name:  WriteOutput
+
+    Purpose:
+        Write picture pointed by data to file. Size of the
+        picture in pixels is indicated by picSize.
+
+------------------------------------------------------------------------------*/
+void WriteOutput(char *filename, u8 *data, u32 picSize)
+{
+
+    /* foutput is global file pointer */
+    if (foutput == NULL)
+    {
+        /* open output file for writing, can be disabled with define.
+         * If file open fails -> exit */
+        if (strcmp(filename, "none") != 0)
+        {
+#if !defined(_NO_OUT)
+            foutput = fopen(filename, "wb");
+            if (foutput == NULL)
+            {
+                DEBUG(("UNABLE TO OPEN OUTPUT FILE\n"));
+                exit(100);
+            }
+#endif
+        }
+    }
+
+    if (foutput && data)
+        fwrite(data, 1, picSize, foutput);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name: NextPacket
+
+    Purpose:
+        Get the pointer to start of next packet in input stream. Uses
+        global variables 'packetize' and 'nalUnitStream' to determine the
+        decoder input stream mode and 'streamStop' to determine the end
+        of stream. There are three possible stream modes:
+            default - the whole stream at once
+            packetize - a single NAL-unit with start code prefix
+            nalUnitStream - a single NAL-unit without start code prefix
+
+        pStrm stores pointer to the start of previous decoder input and is
+        replaced with pointer to the start of the next decoder input.
+
+        Returns the packet size in bytes
+
+------------------------------------------------------------------------------*/
+u32 NextPacket(u8 **pStrm)
+{
+
+    u32 index;
+    u32 maxIndex;
+    u32 zeroCount;
+    u8 *stream;
+    u8 byte;
+    static u32 prevIndex=0;
+
+    /* For default stream mode all the stream is in first packet */
+    if (!packetize && !nalUnitStream)
+        return 0;
+
+    index = 0;
+    stream = *pStrm + prevIndex;
+    maxIndex = (u32)(streamStop - stream);
+
+    if (maxIndex == 0)
+        return(0);
+
+    /* leading zeros of first NAL unit */
+    do
+    {
+        byte = stream[index++];
+    } while (byte != 1 && index < maxIndex);
+
+    /* invalid start code prefix */
+    if (index == maxIndex || index < 3)
+    {
+        DEBUG(("INVALID BYTE STREAM\n"));
+        exit(100);
+    }
+
+    /* nalUnitStream is without start code prefix */
+    if (nalUnitStream)
+    {
+        stream += index;
+        maxIndex -= index;
+        index = 0;
+    }
+
+    zeroCount = 0;
+
+    /* Search stream for next start code prefix */
+    /*lint -e(716) while(1) used consciously */
+    while (1)
+    {
+        byte = stream[index++];
+        if (!byte)
+            zeroCount++;
+
+        if ( (byte == 0x01) && (zeroCount >= 2) )
+        {
+            /* Start code prefix has two zeros
+             * Third zero is assumed to be leading zero of next packet
+             * Fourth and more zeros are assumed to be trailing zeros of this
+             * packet */
+            if (zeroCount > 3)
+            {
+                index -= 4;
+                zeroCount -= 3;
+            }
+            else
+            {
+                index -= zeroCount+1;
+                zeroCount = 0;
+            }
+            break;
+        }
+        else if (byte)
+            zeroCount = 0;
+
+        if (index == maxIndex)
+        {
+            break;
+        }
+
+    }
+
+    /* Store pointer to the beginning of the packet */
+    *pStrm = stream;
+    prevIndex = index;
+
+    /* nalUnitStream is without trailing zeros */
+    if (nalUnitStream)
+        index -= zeroCount;
+
+    return(index);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name: CropPicture
+
+    Purpose:
+        Perform cropping for picture. Input picture pInImage with dimensions
+        picWidth x picHeight is cropped with pCropParams and the resulting
+        picture is stored in pOutImage.
+
+------------------------------------------------------------------------------*/
+u32 CropPicture(u8 *pOutImage, u8 *pInImage,
+    u32 picWidth, u32 picHeight, CropParams *pCropParams)
+{
+
+    u32 i, j;
+    u32 outWidth, outHeight;
+    u8 *pOut, *pIn;
+
+    if (pOutImage == NULL || pInImage == NULL || pCropParams == NULL ||
+        !picWidth || !picHeight)
+    {
+        /* just to prevent lint warning, returning non-zero will result in
+         * return without freeing the memory */
+        free(pOutImage);
+        return(1);
+    }
+
+    if ( ((pCropParams->cropLeftOffset + pCropParams->cropOutWidth) >
+           picWidth ) ||
+         ((pCropParams->cropTopOffset + pCropParams->cropOutHeight) >
+           picHeight ) )
+    {
+        /* just to prevent lint warning, returning non-zero will result in
+         * return without freeing the memory */
+        free(pOutImage);
+        return(1);
+    }
+
+    outWidth = pCropParams->cropOutWidth;
+    outHeight = pCropParams->cropOutHeight;
+
+    /* Calculate starting pointer for luma */
+    pIn = pInImage + pCropParams->cropTopOffset*picWidth +
+        pCropParams->cropLeftOffset;
+    pOut = pOutImage;
+
+    /* Copy luma pixel values */
+    for (i = outHeight; i; i--)
+    {
+        for (j = outWidth; j; j--)
+        {
+            *pOut++ = *pIn++;
+        }
+        pIn += picWidth - outWidth;
+    }
+
+    outWidth >>= 1;
+    outHeight >>= 1;
+
+    /* Calculate starting pointer for cb */
+    pIn = pInImage + picWidth*picHeight +
+        pCropParams->cropTopOffset*picWidth/4 + pCropParams->cropLeftOffset/2;
+
+    /* Copy cb pixel values */
+    for (i = outHeight; i; i--)
+    {
+        for (j = outWidth; j; j--)
+        {
+            *pOut++ = *pIn++;
+        }
+        pIn += picWidth/2 - outWidth;
+    }
+
+    /* Calculate starting pointer for cr */
+    pIn = pInImage + 5*picWidth*picHeight/4 +
+        pCropParams->cropTopOffset*picWidth/4 + pCropParams->cropLeftOffset/2;
+
+    /* Copy cr pixel values */
+    for (i = outHeight; i; i--)
+    {
+        for (j = outWidth; j; j--)
+        {
+            *pOut++ = *pIn++;
+        }
+        pIn += picWidth/2 - outWidth;
+    }
+
+    return (0);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name:  H264SwDecTrace
+
+    Purpose:
+        Example implementation of H264SwDecTrace function. Prototype of this
+        function is given in H264SwDecApi.h. This implementation appends
+        trace messages to file named 'dec_api.trc'.
+
+------------------------------------------------------------------------------*/
+void H264SwDecTrace(char *string)
+{
+    FILE *fp;
+
+    fp = fopen("dec_api.trc", "at");
+
+    if (!fp)
+        return;
+
+    fwrite(string, 1, strlen(string), fp);
+    fwrite("\n", 1,1, fp);
+
+    fclose(fp);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name:  H264SwDecMalloc
+
+    Purpose:
+        Example implementation of H264SwDecMalloc function. Prototype of this
+        function is given in H264SwDecApi.h. This implementation uses
+        library function malloc for allocation of memory.
+
+------------------------------------------------------------------------------*/
+void* H264SwDecMalloc(u32 size)
+{
+
+#if defined(CHECK_MEMORY_USAGE)
+    /* Note that if the decoder has to free and reallocate some of the buffers
+     * the total value will be invalid */
+    static u32 numBytes = 0;
+    numBytes += size;
+    DEBUG(("Allocated %d bytes, total %d\n", size, numBytes));
+#endif
+
+    return malloc(size);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name:  H264SwDecFree
+
+    Purpose:
+        Example implementation of H264SwDecFree function. Prototype of this
+        function is given in H264SwDecApi.h. This implementation uses
+        library function free for freeing of memory.
+
+------------------------------------------------------------------------------*/
+void H264SwDecFree(void *ptr)
+{
+    free(ptr);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name:  H264SwDecMemcpy
+
+    Purpose:
+        Example implementation of H264SwDecMemcpy function. Prototype of this
+        function is given in H264SwDecApi.h. This implementation uses
+        library function memcpy to copy src to dest.
+
+------------------------------------------------------------------------------*/
+void H264SwDecMemcpy(void *dest, void *src, u32 count)
+{
+    memcpy(dest, src, count);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name:  H264SwDecMemset
+
+    Purpose:
+        Example implementation of H264SwDecMemset function. Prototype of this
+        function is given in H264SwDecApi.h. This implementation uses
+        library function memset to set content of memory area pointed by ptr.
+
+------------------------------------------------------------------------------*/
+void H264SwDecMemset(void *ptr, i32 value, u32 count)
+{
+    memset(ptr, value, count);
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/EvaluationTestBench.c b/media/libstagefright/codecs/on2/h264dec/source/EvaluationTestBench.c
new file mode 100755
index 0000000..aadc75f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/EvaluationTestBench.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "H264SwDecApi.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+void WriteOutput(FILE *fid, u8 *data, u32 picSize);
+
+/*------------------------------------------------------------------------------
+
+    Function name:  main
+
+    Purpose:
+        main function. Assuming that executable is named 'decoder' the usage
+        is as follows
+
+            decoder inputFileName
+
+        , where inputFileName shall be name of file containing h264 stream
+        data.
+
+------------------------------------------------------------------------------*/
+int main(int argc, char **argv)
+{
+
+    u8 *byteStrmStart;
+    u8 *byteStrm;
+    u32 strmLen;
+    u32 picSize;
+    H264SwDecInst decInst;
+    H264SwDecRet ret;
+    H264SwDecInput decInput;
+    H264SwDecOutput decOutput;
+    H264SwDecPicture decPicture;
+    H264SwDecInfo decInfo;
+    u32 picNumber;
+
+    FILE *finput;
+    FILE *foutput;
+
+    /* Check that enough command line arguments given, if not -> print usage
+     * information out */
+    if (argc < 2)
+    {
+        printf( "Usage: %s file.h264\n", argv[0]);
+        return -1;
+    }
+
+    /* open output file for writing, output file named out.yuv. If file open
+     * fails -> exit */
+    foutput = fopen("out.yuv", "wb");
+    if (foutput == NULL)
+    {
+        printf("UNABLE TO OPEN OUTPUT FILE\n");
+        return -1;
+    }
+
+    /* open input file for reading, file name given by user. If file open
+     * fails -> exit */
+    finput = fopen(argv[argc-1], "rb");
+    if (finput == NULL)
+    {
+        printf("UNABLE TO OPEN INPUT FILE\n");
+        return -1;
+    }
+
+    /* check size of the input file -> length of the stream in bytes */
+    fseek(finput, 0L, SEEK_END);
+    strmLen = (u32)ftell(finput);
+    rewind(finput);
+
+    /* allocate memory for stream buffer, exit if unsuccessful */
+    byteStrm = byteStrmStart = (u8 *)H264SwDecMalloc(sizeof(u8)*strmLen);
+    if (byteStrm == NULL)
+    {
+        printf("UNABLE TO ALLOCATE MEMORY\n");
+        return -1;
+    }
+
+    /* read input stream from file to buffer and close input file */
+    fread(byteStrm, sizeof(u8), strmLen, finput);
+    fclose(finput);
+
+    /* initialize decoder. If unsuccessful -> exit */
+    ret = H264SwDecInit(&decInst, 0);
+    if (ret != H264SWDEC_OK)
+    {
+        printf("DECODER INITIALIZATION FAILED\n");
+        return -1;
+    }
+
+    /* initialize H264SwDecDecode() input structure */
+    decInput.pStream = byteStrmStart;
+    decInput.dataLen = strmLen;
+    decInput.intraConcealmentMethod = 0;
+
+    picNumber = 0;
+
+    /* For performance measurements, read the start time (in seconds) here.
+     * The decoding time should be measured over several frames and after
+     * that average fps (frames/second) can be calculated.
+     *
+     * startTime = GetTime();
+     *
+     * To prevent calculating file I/O latensies as a decoding time,
+     * comment out WriteOutput function call. Also prints to stdout might
+     * consume considerable amount of cycles during measurement */
+
+    /* main decoding loop */
+    do
+    {
+        /* call API function to perform decoding */
+        ret = H264SwDecDecode(decInst, &decInput, &decOutput);
+
+        switch(ret)
+        {
+
+            case H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY:
+
+                /* picture dimensions are available for query now */
+                ret = H264SwDecGetInfo(decInst, &decInfo);
+                if (ret != H264SWDEC_OK)
+                    return -1;
+
+                /* picture size in pixels */
+                picSize = decInfo.picWidth * decInfo.picHeight;
+                /* memory needed for YCbCr 4:2:0 picture in bytes */
+                picSize = (3 * picSize)/2;
+                /* memory needed for 16-bit RGB picture in bytes
+                 * picSize = (decInfo.picWidth * decInfo.picHeight) * 2; */
+
+                printf("Width %d Height %d\n",
+                    decInfo.picWidth, decInfo.picHeight);
+
+                /* update H264SwDecDecode() input structure, number of bytes
+                 * "consumed" is computed as difference between the new stream
+                 * pointer and old stream pointer */
+                decInput.dataLen -=
+                    (u32)(decOutput.pStrmCurrPos - decInput.pStream);
+                decInput.pStream = decOutput.pStrmCurrPos;
+                break;
+
+            case H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY:
+            case H264SWDEC_PIC_RDY:
+
+                /* update H264SwDecDecode() input structure, number of bytes
+                 * "consumed" is computed as difference between the new stream
+                 * pointer and old stream pointer */
+                decInput.dataLen -=
+                    (u32)(decOutput.pStrmCurrPos - decInput.pStream);
+                decInput.pStream = decOutput.pStrmCurrPos;
+
+                /* use function H264SwDecNextPicture() to obtain next picture
+                 * in display order. Function is called until no more images
+                 * are ready for display */
+                while (H264SwDecNextPicture(decInst, &decPicture, 0) ==
+                    H264SWDEC_PIC_RDY) { picNumber++;
+
+                    printf("PIC %d, type %s, concealed %d\n", picNumber,
+                        decPicture.isIdrPicture ? "IDR" : "NON-IDR",
+                        decPicture.nbrOfErrMBs);
+                    fflush(stdout);
+
+                    /* Do color conversion if needed to get display image
+                     * in RGB-format
+                     *
+                     * YuvToRgb( decPicture.pOutputPicture, pRgbPicture ); */
+
+                    /* write next display image to output file */
+                    WriteOutput(foutput, (u8*)decPicture.pOutputPicture,
+                        picSize);
+                }
+
+                break;
+
+            case H264SWDEC_EVALUATION_LIMIT_EXCEEDED:
+                /* evaluation version of the decoder has limited decoding
+                 * capabilities */
+                printf("EVALUATION LIMIT REACHED\n");
+                goto end;
+
+            default:
+                printf("UNRECOVERABLE ERROR\n");
+                return -1;
+        }
+    /* keep decoding until all data from input stream buffer consumed */
+    } while (decInput.dataLen > 0);
+
+end:
+
+    /* if output in display order is preferred, the decoder shall be forced
+     * to output pictures remaining in decoded picture buffer. Use function
+     * H264SwDecNextPicture() to obtain next picture in display order. Function
+     * is called until no more images are ready for display. Second parameter
+     * for the function is set to '1' to indicate that this is end of the
+     * stream and all pictures shall be output */
+    while (H264SwDecNextPicture(decInst, &decPicture, 1) ==
+        H264SWDEC_PIC_RDY) {
+
+        picNumber++;
+
+        printf("PIC %d, type %s, concealed %d\n", picNumber,
+            decPicture.isIdrPicture ? "IDR" : "NON-IDR",
+            decPicture.nbrOfErrMBs);
+        fflush(stdout);
+
+        /* Do color conversion if needed to get display image
+         * in RGB-format
+         *
+         * YuvToRgb( decPicture.pOutputPicture, pRgbPicture ); */
+
+        /* write next display image to output file */
+        WriteOutput(foutput, (u8*)decPicture.pOutputPicture, picSize);
+    }
+
+    /* For performance measurements, read the end time (in seconds) here.
+     *
+     * endTime = GetTime();
+     *
+     * Now the performance can be calculated as frames per second:
+     * fps = picNumber / (endTime - startTime); */
+
+
+    /* release decoder instance */
+    H264SwDecRelease(decInst);
+
+    /* close output file */
+    fclose(foutput);
+
+    /* free byte stream buffer */
+    free(byteStrmStart);
+
+    return 0;
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name:  WriteOutput
+
+    Purpose:
+        Write picture pointed by data to file pointed by fid. Size of the
+        picture in pixels is indicated by picSize.
+
+------------------------------------------------------------------------------*/
+void WriteOutput(FILE *fid, u8 *data, u32 picSize)
+{
+    fwrite(data, 1, picSize, fid);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name:  H264SwDecTrace
+
+    Purpose:
+        Example implementation of H264SwDecTrace function. Prototype of this
+        function is given in H264SwDecApi.h. This implementation appends
+        trace messages to file named 'dec_api.trc'.
+
+------------------------------------------------------------------------------*/
+void H264SwDecTrace(char *string)
+{
+    FILE *fp;
+
+    fp = fopen("dec_api.trc", "at");
+
+    if (!fp)
+        return;
+
+    fwrite(string, 1, strlen(string), fp);
+    fwrite("\n", 1,1, fp);
+
+    fclose(fp);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name:  H264SwDecmalloc
+
+    Purpose:
+        Example implementation of H264SwDecMalloc function. Prototype of this
+        function is given in H264SwDecApi.h. This implementation uses
+        library function malloc for allocation of memory.
+
+------------------------------------------------------------------------------*/
+void* H264SwDecMalloc(u32 size)
+{
+    return malloc(size);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name:  H264SwDecFree
+
+    Purpose:
+        Example implementation of H264SwDecFree function. Prototype of this
+        function is given in H264SwDecApi.h. This implementation uses
+        library function free for freeing of memory.
+
+------------------------------------------------------------------------------*/
+void H264SwDecFree(void *ptr)
+{
+    free(ptr);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name:  H264SwDecMemcpy
+
+    Purpose:
+        Example implementation of H264SwDecMemcpy function. Prototype of this
+        function is given in H264SwDecApi.h. This implementation uses
+        library function memcpy to copy src to dest.
+
+------------------------------------------------------------------------------*/
+void H264SwDecMemcpy(void *dest, void *src, u32 count)
+{
+    memcpy(dest, src, count);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name:  H264SwDecMemset
+
+    Purpose:
+        Example implementation of H264SwDecMemset function. Prototype of this
+        function is given in H264SwDecApi.h. This implementation uses
+        library function memset to set content of memory area pointed by ptr.
+
+------------------------------------------------------------------------------*/
+void H264SwDecMemset(void *ptr, i32 value, u32 count)
+{
+    memset(ptr, value, count);
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/H264SwDecApi.c b/media/libstagefright/codecs/on2/h264dec/source/H264SwDecApi.c
new file mode 100644
index 0000000..2bb4c4d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/H264SwDecApi.c
@@ -0,0 +1,567 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          H264SwDecInit
+          H264SwDecGetInfo
+          H264SwDecRelease
+          H264SwDecDecode
+          H264SwDecGetAPIVersion
+          H264SwDecNextPicture
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+#include <stdlib.h>
+#include "basetype.h"
+#include "h264bsd_container.h"
+#include "H264SwDecApi.h"
+#include "h264bsd_decoder.h"
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+       Version Information
+------------------------------------------------------------------------------*/
+
+#define H264SWDEC_MAJOR_VERSION 2
+#define H264SWDEC_MINOR_VERSION 3
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+H264DEC_TRACE           Trace H264 Decoder API function calls.
+H264DEC_EVALUATION      Compile evaluation version, restricts number of frames
+                        that can be decoded
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+#ifdef H264DEC_TRACE
+#include <stdio.h>
+#define DEC_API_TRC(str)    H264SwDecTrace(str)
+#else
+#define DEC_API_TRC(str)
+#endif
+
+#ifdef H264DEC_EVALUATION
+#define H264DEC_EVALUATION_LIMIT   500
+#endif
+
+void H264SwDecTrace(char *string) {
+}
+
+void* H264SwDecMalloc(u32 size) {
+    return malloc(size);
+}
+
+void H264SwDecFree(void *ptr) {
+    free(ptr);
+}
+
+void H264SwDecMemcpy(void *dest, void *src, u32 count) {
+    memcpy(dest, src, count);
+}
+
+void H264SwDecMemset(void *ptr, i32 value, u32 count) {
+    memset(ptr, value, count);
+}
+
+
+/*------------------------------------------------------------------------------
+
+    Function: H264SwDecInit()
+
+        Functional description:
+            Initialize decoder software. Function reserves memory for the
+            decoder instance and calls h264bsdInit to initialize the
+            instance data.
+
+        Inputs:
+            noOutputReordering  flag to indicate decoder that it doesn't have
+                                to try to provide output pictures in display
+                                order, saves memory
+
+        Outputs:
+            decInst             pointer to initialized instance is stored here
+
+        Returns:
+            H264SWDEC_OK        successfully initialized the instance
+            H264SWDEC_INITFAIL  initialization failed
+            H264SWDEC_PARAM_ERR invalid parameters
+            H264SWDEC_MEM_FAIL  memory allocation failed
+
+------------------------------------------------------------------------------*/
+
+H264SwDecRet H264SwDecInit(H264SwDecInst *decInst, u32 noOutputReordering)
+{
+    u32 rv = 0;
+
+    decContainer_t *pDecCont;
+
+    DEC_API_TRC("H264SwDecInit#");
+
+    /* check that right shift on negative numbers is performed signed */
+    /*lint -save -e* following check causes multiple lint messages */
+    if ( ((-1)>>1) != (-1) )
+    {
+        DEC_API_TRC("H264SwDecInit# ERROR: Right shift is not signed");
+        return(H264SWDEC_INITFAIL);
+    }
+    /*lint -restore */
+
+    if (decInst == NULL)
+    {
+        DEC_API_TRC("H264SwDecInit# ERROR: decInst == NULL");
+        return(H264SWDEC_PARAM_ERR);
+    }
+
+    pDecCont = (decContainer_t *)H264SwDecMalloc(sizeof(decContainer_t));
+
+    if (pDecCont == NULL)
+    {
+        DEC_API_TRC("H264SwDecInit# ERROR: Memory allocation failed");
+        return(H264SWDEC_MEMFAIL);
+    }
+
+#ifdef H264DEC_TRACE
+    sprintf(pDecCont->str, "H264SwDecInit# decInst %p noOutputReordering %d",
+            (void*)decInst, noOutputReordering);
+    DEC_API_TRC(pDecCont->str);
+#endif
+
+    rv = h264bsdInit(&pDecCont->storage, noOutputReordering);
+    if (rv != HANTRO_OK)
+    {
+        H264SwDecRelease(pDecCont);
+        return(H264SWDEC_MEMFAIL);
+    }
+
+    pDecCont->decStat  = INITIALIZED;
+    pDecCont->picNumber = 0;
+
+#ifdef H264DEC_TRACE
+    sprintf(pDecCont->str, "H264SwDecInit# OK: return %p", (void*)pDecCont);
+    DEC_API_TRC(pDecCont->str);
+#endif
+
+    *decInst = (decContainer_t *)pDecCont;
+
+    return(H264SWDEC_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: H264SwDecGetInfo()
+
+        Functional description:
+            This function provides read access to decoder information. This
+            function should not be called before H264SwDecDecode function has
+            indicated that headers are ready.
+
+        Inputs:
+            decInst     decoder instance
+
+        Outputs:
+            pDecInfo    pointer to info struct where data is written
+
+        Returns:
+            H264SWDEC_OK            success
+            H264SWDEC_PARAM_ERR     invalid parameters
+            H264SWDEC_HDRS_NOT_RDY  information not available yet
+
+------------------------------------------------------------------------------*/
+
+H264SwDecRet H264SwDecGetInfo(H264SwDecInst decInst, H264SwDecInfo *pDecInfo)
+{
+
+    storage_t *pStorage;
+
+    DEC_API_TRC("H264SwDecGetInfo#");
+
+    if (decInst == NULL || pDecInfo == NULL)
+    {
+        DEC_API_TRC("H264SwDecGetInfo# ERROR: decInst or pDecInfo is NULL");
+        return(H264SWDEC_PARAM_ERR);
+    }
+
+    pStorage = &(((decContainer_t *)decInst)->storage);
+
+    if (pStorage->activeSps == NULL || pStorage->activePps == NULL)
+    {
+        DEC_API_TRC("H264SwDecGetInfo# ERROR: Headers not decoded yet");
+        return(H264SWDEC_HDRS_NOT_RDY);
+    }
+
+#ifdef H264DEC_TRACE
+    sprintf(((decContainer_t*)decInst)->str,
+        "H264SwDecGetInfo# decInst %p  pDecInfo %p", decInst, (void*)pDecInfo);
+    DEC_API_TRC(((decContainer_t*)decInst)->str);
+#endif
+
+    /* h264bsdPicWidth and -Height return dimensions in macroblock units,
+     * picWidth and -Height in pixels */
+    pDecInfo->picWidth        = h264bsdPicWidth(pStorage) << 4;
+    pDecInfo->picHeight       = h264bsdPicHeight(pStorage) << 4;
+    pDecInfo->videoRange      = h264bsdVideoRange(pStorage);
+    pDecInfo->matrixCoefficients = h264bsdMatrixCoefficients(pStorage);
+
+    h264bsdCroppingParams(pStorage,
+        &pDecInfo->croppingFlag,
+        &pDecInfo->cropParams.cropLeftOffset,
+        &pDecInfo->cropParams.cropOutWidth,
+        &pDecInfo->cropParams.cropTopOffset,
+        &pDecInfo->cropParams.cropOutHeight);
+
+    /* sample aspect ratio */
+    h264bsdSampleAspectRatio(pStorage,
+                             &pDecInfo->parWidth,
+                             &pDecInfo->parHeight);
+
+    /* profile */
+    pDecInfo->profile = h264bsdProfile(pStorage);
+
+    DEC_API_TRC("H264SwDecGetInfo# OK");
+
+    return(H264SWDEC_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: H264SwDecRelease()
+
+        Functional description:
+            Release the decoder instance. Function calls h264bsdShutDown to
+            release instance data and frees the memory allocated for the
+            instance.
+
+        Inputs:
+            decInst     Decoder instance
+
+        Outputs:
+            none
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+void H264SwDecRelease(H264SwDecInst decInst)
+{
+
+    decContainer_t *pDecCont;
+
+    DEC_API_TRC("H264SwDecRelease#");
+
+    if (decInst == NULL)
+    {
+        DEC_API_TRC("H264SwDecRelease# ERROR: decInst == NULL");
+        return;
+    }
+
+    pDecCont = (decContainer_t*)decInst;
+
+#ifdef H264DEC_TRACE
+    sprintf(pDecCont->str, "H264SwDecRelease# decInst %p",decInst);
+    DEC_API_TRC(pDecCont->str);
+#endif
+
+    h264bsdShutdown(&pDecCont->storage);
+
+    H264SwDecFree(pDecCont);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: H264SwDecDecode
+
+        Functional description:
+            Decode stream data. Calls h264bsdDecode to do the actual decoding.
+
+        Input:
+            decInst     decoder instance
+            pInput      pointer to input struct
+
+        Outputs:
+            pOutput     pointer to output struct
+
+        Returns:
+            H264SWDEC_NOT_INITIALIZED   decoder instance not initialized yet
+            H264SWDEC_PARAM_ERR         invalid parameters
+
+            H264SWDEC_STRM_PROCESSED    stream buffer decoded
+            H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY   headers decoded,
+                                                stream buffer not finished
+            H264SWDEC_PIC_RDY                   decoding of a picture finished
+            H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY    decoding of a picture finished,
+                                                stream buffer not finished
+            H264SWDEC_STRM_ERR                  serious error in decoding, no
+                                                valid parameter sets available
+                                                to decode picture data
+            H264SWDEC_EVALUATION_LIMIT_EXCEEDED this can only occur when
+                                                evaluation version is used,
+                                                max number of frames reached
+
+------------------------------------------------------------------------------*/
+
+H264SwDecRet H264SwDecDecode(H264SwDecInst decInst, H264SwDecInput *pInput,
+                  H264SwDecOutput *pOutput)
+{
+
+    decContainer_t *pDecCont;
+    u32 strmLen;
+    u32 numReadBytes;
+    u8 *tmpStream;
+    u32 decResult = 0;
+    H264SwDecRet returnValue = H264SWDEC_STRM_PROCESSED;
+
+    DEC_API_TRC("H264SwDecDecode#");
+
+    /* Check that function input parameters are valid */
+    if (pInput == NULL || pOutput == NULL)
+    {
+        DEC_API_TRC("H264SwDecDecode# ERROR: pInput or pOutput is NULL");
+        return(H264SWDEC_PARAM_ERR);
+    }
+
+    if ((pInput->pStream == NULL) || (pInput->dataLen == 0))
+    {
+        DEC_API_TRC("H264SwDecDecode# ERROR: Invalid input parameters");
+        return(H264SWDEC_PARAM_ERR);
+    }
+
+    pDecCont = (decContainer_t *)decInst;
+
+    /* Check if decoder is in an incorrect mode */
+    if (decInst == NULL || pDecCont->decStat == UNINITIALIZED)
+    {
+        DEC_API_TRC("H264SwDecDecode# ERROR: Decoder not initialized");
+        return(H264SWDEC_NOT_INITIALIZED);
+    }
+
+#ifdef H264DEC_EVALUATION
+    if (pDecCont->picNumber >= H264DEC_EVALUATION_LIMIT)
+        return(H264SWDEC_EVALUATION_LIMIT_EXCEEDED);
+#endif
+
+#ifdef H264DEC_TRACE
+    sprintf(pDecCont->str, "H264SwDecDecode# decInst %p  pInput %p  pOutput %p",
+            decInst, (void*)pInput, (void*)pOutput);
+    DEC_API_TRC(pDecCont->str);
+#endif
+
+    pOutput->pStrmCurrPos   = NULL;
+
+    numReadBytes = 0;
+    strmLen = pInput->dataLen;
+    tmpStream = pInput->pStream;
+    pDecCont->storage.intraConcealmentFlag = pInput->intraConcealmentMethod;
+
+    do
+    {
+        /* Return HDRS_RDY after DPB flush caused by new SPS */
+        if (pDecCont->decStat == NEW_HEADERS)
+        {
+            decResult = H264BSD_HDRS_RDY;
+            pDecCont->decStat = INITIALIZED;
+        }
+        else /* Continue decoding normally */
+        {
+            decResult = h264bsdDecode(&pDecCont->storage, tmpStream, strmLen,
+                pInput->picId, &numReadBytes);
+        }
+        tmpStream += numReadBytes;
+        /* check if too many bytes are read from stream */
+        if ( (i32)(strmLen - numReadBytes) >= 0 )
+            strmLen -= numReadBytes;
+        else
+            strmLen = 0;
+
+        pOutput->pStrmCurrPos = tmpStream;
+
+        switch (decResult)
+        {
+            case H264BSD_HDRS_RDY:
+
+                if(pDecCont->storage.dpb->flushed &&
+                   pDecCont->storage.dpb->numOut !=
+                   pDecCont->storage.dpb->outIndex)
+                {
+                    /* output first all DPB stored pictures
+                     * DPB flush caused by new SPS */
+                    pDecCont->storage.dpb->flushed = 0;
+                    pDecCont->decStat = NEW_HEADERS;
+                    returnValue = H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY;
+                    strmLen = 0;
+                }
+                else
+                {
+                    returnValue = H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY;
+                    strmLen = 0;
+                }
+                break;
+
+            case H264BSD_PIC_RDY:
+                pDecCont->picNumber++;
+
+                if (strmLen == 0)
+                    returnValue = H264SWDEC_PIC_RDY;
+                else
+                    returnValue = H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY;
+
+                strmLen = 0;
+                break;
+
+            case H264BSD_PARAM_SET_ERROR:
+                if ( !h264bsdCheckValidParamSets(&pDecCont->storage) &&
+                     strmLen == 0 )
+                {
+                    returnValue = H264SWDEC_STRM_ERR;
+                }
+                break;
+            case H264BSD_MEMALLOC_ERROR:
+                {
+                    returnValue = H264SWDEC_MEMFAIL;
+                    strmLen = 0;
+                }
+                break;
+            default:
+                break;
+        }
+
+    } while (strmLen);
+
+#ifdef H264DEC_TRACE
+    sprintf(pDecCont->str, "H264SwDecDecode# OK: DecResult %d",
+            returnValue);
+    DEC_API_TRC(pDecCont->str);
+#endif
+
+    return(returnValue);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: H264SwDecGetAPIVersion
+
+        Functional description:
+            Return version information of the API
+
+        Inputs:
+            none
+
+        Outputs:
+            none
+
+        Returns:
+            API version
+
+------------------------------------------------------------------------------*/
+
+H264SwDecApiVersion H264SwDecGetAPIVersion()
+{
+    H264SwDecApiVersion ver;
+
+    ver.major = H264SWDEC_MAJOR_VERSION;
+    ver.minor = H264SWDEC_MINOR_VERSION;
+
+    return(ver);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: H264SwDecNextPicture
+
+        Functional description:
+            Get next picture in display order if any available.
+
+        Input:
+            decInst     decoder instance.
+            flushBuffer force output of all buffered pictures
+
+        Output:
+            pOutput     pointer to output structure
+
+        Returns:
+            H264SWDEC_OK            no pictures available for display
+            H264SWDEC_PIC_RDY       picture available for display
+            H264SWDEC_PARAM_ERR     invalid parameters
+
+------------------------------------------------------------------------------*/
+
+H264SwDecRet H264SwDecNextPicture(H264SwDecInst decInst,
+    H264SwDecPicture *pOutput, u32 flushBuffer)
+{
+
+    decContainer_t *pDecCont;
+    u32 numErrMbs, isIdrPic, picId;
+    u32 *pOutPic;
+
+    DEC_API_TRC("H264SwDecNextPicture#");
+
+    if (decInst == NULL || pOutput == NULL)
+    {
+        DEC_API_TRC("H264SwDecNextPicture# ERROR: decInst or pOutput is NULL");
+        return(H264SWDEC_PARAM_ERR);
+    }
+
+    pDecCont = (decContainer_t*)decInst;
+
+#ifdef H264DEC_TRACE
+    sprintf(pDecCont->str, "H264SwDecNextPicture# decInst %p pOutput %p %s %d",
+            decInst, (void*)pOutput, "flushBuffer", flushBuffer);
+    DEC_API_TRC(pDecCont->str);
+#endif
+
+    if (flushBuffer)
+        h264bsdFlushBuffer(&pDecCont->storage);
+
+    pOutPic = (u32*)h264bsdNextOutputPicture(&pDecCont->storage, &picId,
+                                             &isIdrPic, &numErrMbs);
+
+    if (pOutPic == NULL)
+    {
+        DEC_API_TRC("H264SwDecNextPicture# OK: return H264SWDEC_OK");
+        return(H264SWDEC_OK);
+    }
+    else
+    {
+        pOutput->pOutputPicture = pOutPic;
+        pOutput->picId          = picId;
+        pOutput->isIdrPicture   = isIdrPic;
+        pOutput->nbrOfErrMBs    = numErrMbs;
+        DEC_API_TRC("H264SwDecNextPicture# OK: return H264SWDEC_PIC_RDY");
+        return(H264SWDEC_PIC_RDY);
+    }
+
+}
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/TestBenchMultipleInstance.c b/media/libstagefright/codecs/on2/h264dec/source/TestBenchMultipleInstance.c
new file mode 100755
index 0000000..42170d3
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/TestBenchMultipleInstance.c
@@ -0,0 +1,531 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* CVS tag name for identification */
+const char tagName[256] = "$Name: FIRST_ANDROID_COPYRIGHT $";
+
+#include "H264SwDecApi.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define DEBUG(argv) printf argv
+
+/* _NO_OUT disables output file writing */
+#ifdef __arm
+#define _NO_OUT
+#endif
+
+/*------------------------------------------------------------------------------
+
+------------------------------------------------------------------------------*/
+void WriteOutput(FILE *fid, u8 *data, u32 picSize);
+
+u32 CropPicture(u8 *pOutImage, u8 *pInImage,
+    u32 picWidth, u32 picHeight, CropParams *pCropParams);
+
+void CropWriteOutput(FILE *fid, u8 *imageData, u32 cropDisplay,
+        H264SwDecInfo *decInfo);
+
+typedef struct
+{
+    H264SwDecInst decInst;
+    H264SwDecInput decInput;
+    H264SwDecOutput decOutput;
+    H264SwDecPicture decPicture;
+    H264SwDecInfo decInfo;
+    FILE *foutput;
+    char outFileName[256];
+    u8 *byteStrmStart;
+    u32 picNumber;
+} Decoder;
+
+
+/*------------------------------------------------------------------------------
+
+------------------------------------------------------------------------------*/
+int main(int argc, char **argv)
+{
+
+    i32 instCount, instRunning;
+    i32 i;
+    u32 maxNumPics;
+    u32 strmLen;
+    H264SwDecRet ret;
+    u32 numErrors = 0;
+    u32 cropDisplay = 0;
+    u32 disableOutputReordering = 0;
+    FILE *finput;
+    Decoder **decoder;
+    char outFileName[256] = "out.yuv";
+
+
+    if ( argc > 1 && strcmp(argv[1], "-T") == 0 )
+    {
+        fprintf(stderr, "%s\n", tagName);
+        return 0;
+    }
+
+    if (argc < 2)
+    {
+        DEBUG((
+            "Usage: %s [-Nn] [-Ooutfile] [-P] [-U] [-C] [-R] [-T] file1.264 [file2.264] .. [fileN.264]\n",
+            argv[0]));
+        DEBUG(("\t-Nn forces decoding to stop after n pictures\n"));
+#if defined(_NO_OUT)
+        DEBUG(("\t-Ooutfile output writing disabled at compile time\n"));
+#else
+        DEBUG(("\t-Ooutfile write output to \"outfile\" (default out.yuv)\n"));
+        DEBUG(("\t-Onone does not write output\n"));
+#endif
+        DEBUG(("\t-C display cropped image (default decoded image)\n"));
+        DEBUG(("\t-R disable DPB output reordering\n"));
+        DEBUG(("\t-T to print tag name and exit\n"));
+        exit(100);
+    }
+
+    instCount = argc - 1;
+
+    /* read command line arguments */
+    maxNumPics = 0;
+    for (i = 1; i < (argc-1); i++)
+    {
+        if ( strncmp(argv[i], "-N", 2) == 0 )
+        {
+            maxNumPics = (u32)atoi(argv[i]+2);
+            instCount--;
+        }
+        else if ( strncmp(argv[i], "-O", 2) == 0 )
+        {
+            strcpy(outFileName, argv[i]+2);
+            instCount--;
+        }
+        else if ( strcmp(argv[i], "-C") == 0 )
+        {
+            cropDisplay = 1;
+            instCount--;
+        }
+        else if ( strcmp(argv[i], "-R") == 0 )
+        {
+            disableOutputReordering = 1;
+            instCount--;
+        }
+    }
+
+    if (instCount < 1)
+    {
+        DEBUG(("No input files\n"));
+        exit(100);
+    }
+
+    /* allocate memory for multiple decoder instances
+     * one instance for every stream file */
+    decoder = (Decoder **)malloc(sizeof(Decoder*)*(u32)instCount);
+    if (decoder == NULL)
+    {
+        DEBUG(("Unable to allocate memory\n"));
+        exit(100);
+    }
+
+    /* prepare each decoder instance */
+    for (i = 0; i < instCount; i++)
+    {
+        decoder[i] = (Decoder *)calloc(1, sizeof(Decoder));
+
+        /* open input file */
+        finput = fopen(argv[argc-instCount+i],"rb");
+        if (finput == NULL)
+        {
+            DEBUG(("Unable to open input file <%s>\n", argv[argc-instCount+i]));
+            exit(100);
+        }
+
+        DEBUG(("Reading input file[%d] %s\n", i, argv[argc-instCount+i]));
+
+        /* read input stream to buffer */
+        fseek(finput,0L,SEEK_END);
+        strmLen = (u32)ftell(finput);
+        rewind(finput);
+        decoder[i]->byteStrmStart = (u8 *)malloc(sizeof(u8)*strmLen);
+        if (decoder[i]->byteStrmStart == NULL)
+        {
+            DEBUG(("Unable to allocate memory\n"));
+            exit(100);
+        }
+        fread(decoder[i]->byteStrmStart, sizeof(u8), strmLen, finput);
+        fclose(finput);
+
+        /* open output file */
+        if (strcmp(outFileName, "none") != 0)
+        {
+#if defined(_NO_OUT)
+            decoder[i]->foutput = NULL;
+#else
+            sprintf(decoder[i]->outFileName, "%s%i", outFileName, i);
+            decoder[i]->foutput = fopen(decoder[i]->outFileName, "wb");
+            if (decoder[i]->foutput == NULL)
+            {
+                DEBUG(("Unable to open output file\n"));
+                exit(100);
+            }
+#endif
+        }
+
+        ret = H264SwDecInit(&(decoder[i]->decInst), disableOutputReordering);
+
+        if (ret != H264SWDEC_OK)
+        {
+            DEBUG(("Init failed %d\n", ret));
+            exit(100);
+        }
+
+        decoder[i]->decInput.pStream = decoder[i]->byteStrmStart;
+        decoder[i]->decInput.dataLen = strmLen;
+        decoder[i]->decInput.intraConcealmentMethod = 0;
+
+    }
+
+    /* main decoding loop */
+    do
+    {
+        /* decode once using each instance */
+        for (i = 0; i < instCount; i++)
+        {
+            ret = H264SwDecDecode(decoder[i]->decInst,
+                                &(decoder[i]->decInput),
+                                &(decoder[i]->decOutput));
+
+            switch(ret)
+            {
+
+                case H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY:
+
+                    ret = H264SwDecGetInfo(decoder[i]->decInst,
+                            &(decoder[i]->decInfo));
+                    if (ret != H264SWDEC_OK)
+                        exit(1);
+
+                    if (cropDisplay && decoder[i]->decInfo.croppingFlag)
+                    {
+                        DEBUG(("Decoder[%d] Cropping params: (%d, %d) %dx%d\n",
+                            i,
+                            decoder[i]->decInfo.cropParams.cropLeftOffset,
+                            decoder[i]->decInfo.cropParams.cropTopOffset,
+                            decoder[i]->decInfo.cropParams.cropOutWidth,
+                            decoder[i]->decInfo.cropParams.cropOutHeight));
+                    }
+
+                    DEBUG(("Decoder[%d] Width %d Height %d\n", i,
+                        decoder[i]->decInfo.picWidth,
+                        decoder[i]->decInfo.picHeight));
+
+                    DEBUG(("Decoder[%d] videoRange %d, matricCoefficients %d\n",
+                        i, decoder[i]->decInfo.videoRange,
+                        decoder[i]->decInfo.matrixCoefficients));
+                    decoder[i]->decInput.dataLen -=
+                        (u32)(decoder[i]->decOutput.pStrmCurrPos -
+                              decoder[i]->decInput.pStream);
+                    decoder[i]->decInput.pStream =
+                        decoder[i]->decOutput.pStrmCurrPos;
+                    break;
+
+                case H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY:
+                    decoder[i]->decInput.dataLen -=
+                        (u32)(decoder[i]->decOutput.pStrmCurrPos -
+                              decoder[i]->decInput.pStream);
+                    decoder[i]->decInput.pStream =
+                        decoder[i]->decOutput.pStrmCurrPos;
+                    /* fall through */
+                case H264SWDEC_PIC_RDY:
+                    if (ret == H264SWDEC_PIC_RDY)
+                        decoder[i]->decInput.dataLen = 0;
+
+                    ret = H264SwDecGetInfo(decoder[i]->decInst,
+                            &(decoder[i]->decInfo));
+                    if (ret != H264SWDEC_OK)
+                        exit(1);
+
+                    while (H264SwDecNextPicture(decoder[i]->decInst,
+                            &(decoder[i]->decPicture), 0) == H264SWDEC_PIC_RDY)
+                    {
+                        decoder[i]->picNumber++;
+
+                        numErrors += decoder[i]->decPicture.nbrOfErrMBs;
+
+                        DEBUG(("Decoder[%d] PIC %d, type %s, concealed %d\n",
+                            i, decoder[i]->picNumber,
+                            decoder[i]->decPicture.isIdrPicture
+                                ? "IDR" : "NON-IDR",
+                            decoder[i]->decPicture.nbrOfErrMBs));
+                        fflush(stdout);
+
+                        CropWriteOutput(decoder[i]->foutput,
+                                (u8*)decoder[i]->decPicture.pOutputPicture,
+                                cropDisplay, &(decoder[i]->decInfo));
+                    }
+
+                    if (maxNumPics && decoder[i]->picNumber == maxNumPics)
+                        decoder[i]->decInput.dataLen = 0;
+                    break;
+
+                case H264SWDEC_STRM_PROCESSED:
+                case H264SWDEC_STRM_ERR:
+                case H264SWDEC_PARAM_ERR:
+                    decoder[i]->decInput.dataLen = 0;
+                    break;
+
+                default:
+                    DEBUG(("Decoder[%d] FATAL ERROR\n", i));
+                    exit(10);
+                    break;
+
+            }
+        }
+
+        /* check if any of the instances is still running (=has more data) */
+        instRunning = instCount;
+        for (i = 0; i < instCount; i++)
+        {
+            if (decoder[i]->decInput.dataLen == 0)
+                instRunning--;
+        }
+
+    } while (instRunning);
+
+
+    /* get last frames and close each instance */
+    for (i = 0; i < instCount; i++)
+    {
+        while (H264SwDecNextPicture(decoder[i]->decInst,
+                &(decoder[i]->decPicture), 1) == H264SWDEC_PIC_RDY)
+        {
+            decoder[i]->picNumber++;
+
+            DEBUG(("Decoder[%d] PIC %d, type %s, concealed %d\n",
+                i, decoder[i]->picNumber,
+                decoder[i]->decPicture.isIdrPicture
+                    ? "IDR" : "NON-IDR",
+                decoder[i]->decPicture.nbrOfErrMBs));
+            fflush(stdout);
+
+            CropWriteOutput(decoder[i]->foutput,
+                    (u8*)decoder[i]->decPicture.pOutputPicture,
+                    cropDisplay, &(decoder[i]->decInfo));
+        }
+
+        H264SwDecRelease(decoder[i]->decInst);
+
+        if (decoder[i]->foutput)
+            fclose(decoder[i]->foutput);
+
+        free(decoder[i]->byteStrmStart);
+
+        free(decoder[i]);
+    }
+
+    free(decoder);
+
+    if (numErrors)
+        return 1;
+    else
+        return 0;
+
+}
+
+/*------------------------------------------------------------------------------
+
+------------------------------------------------------------------------------*/
+void CropWriteOutput(FILE *foutput, u8 *imageData, u32 cropDisplay,
+        H264SwDecInfo *decInfo)
+{
+    u8 *tmpImage = NULL;
+    u32 tmp, picSize;
+
+    if (cropDisplay && decInfo->croppingFlag)
+    {
+        picSize = decInfo->cropParams.cropOutWidth *
+                  decInfo->cropParams.cropOutHeight;
+        picSize = (3 * picSize)/2;
+        tmpImage = malloc(picSize);
+        if (tmpImage == NULL)
+            exit(1);
+        tmp = CropPicture(tmpImage, imageData,
+            decInfo->picWidth, decInfo->picHeight,
+            &(decInfo->cropParams));
+        if (tmp)
+            exit(1);
+        WriteOutput(foutput, tmpImage, picSize);
+        free(tmpImage);
+    }
+    else
+    {
+        picSize = decInfo->picWidth * decInfo->picHeight;
+        picSize = (3 * picSize)/2;
+        WriteOutput(foutput, imageData, picSize);
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+------------------------------------------------------------------------------*/
+void WriteOutput(FILE *fid, u8 *data, u32 picSize)
+{
+    if (fid)
+        fwrite(data, 1, picSize, fid);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name:  H264SwDecTrace
+
+------------------------------------------------------------------------------*/
+void H264SwDecTrace(char *string)
+{
+    FILE *fp;
+
+    fp = fopen("dec_api.trc", "at");
+
+    if (!fp)
+        return;
+
+    fwrite(string, 1, strlen(string), fp);
+    fwrite("\n", 1,1, fp);
+
+    fclose(fp);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name:  H264SwDecmalloc
+
+------------------------------------------------------------------------------*/
+void* H264SwDecMalloc(u32 size)
+{
+    return malloc(size);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name:  H264SwDecFree
+
+------------------------------------------------------------------------------*/
+void H264SwDecFree(void *ptr)
+{
+    free(ptr);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name:  H264SwDecMemcpy
+
+------------------------------------------------------------------------------*/
+void H264SwDecMemcpy(void *dest, void *src, u32 count)
+{
+    memcpy(dest, src, count);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name:  H264SwDecMemset
+
+------------------------------------------------------------------------------*/
+void H264SwDecMemset(void *ptr, i32 value, u32 count)
+{
+    memset(ptr, value, count);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name: CropPicture
+
+------------------------------------------------------------------------------*/
+u32 CropPicture(u8 *pOutImage, u8 *pInImage,
+    u32 picWidth, u32 picHeight, CropParams *pCropParams)
+{
+
+    u32 i, j;
+    u32 outWidth, outHeight;
+    u8 *pOut, *pIn;
+
+    if (pOutImage == NULL || pInImage == NULL || pCropParams == NULL ||
+        !picWidth || !picHeight)
+    {
+        /* due to lint warning */
+        free(pOutImage);
+        return(1);
+    }
+
+    if ( ((pCropParams->cropLeftOffset + pCropParams->cropOutWidth) >
+           picWidth ) ||
+         ((pCropParams->cropTopOffset + pCropParams->cropOutHeight) >
+           picHeight ) )
+    {
+        /* due to lint warning */
+        free(pOutImage);
+        return(1);
+    }
+
+    outWidth = pCropParams->cropOutWidth;
+    outHeight = pCropParams->cropOutHeight;
+
+    pIn = pInImage + pCropParams->cropTopOffset*picWidth +
+        pCropParams->cropLeftOffset;
+    pOut = pOutImage;
+
+    /* luma */
+    for (i = outHeight; i; i--)
+    {
+        for (j = outWidth; j; j--)
+        {
+            *pOut++ = *pIn++;
+        }
+        pIn += picWidth - outWidth;
+    }
+
+    outWidth >>= 1;
+    outHeight >>= 1;
+
+    pIn = pInImage + picWidth*picHeight +
+        pCropParams->cropTopOffset*picWidth/4 + pCropParams->cropLeftOffset/2;
+
+    /* cb */
+    for (i = outHeight; i; i--)
+    {
+        for (j = outWidth; j; j--)
+        {
+            *pOut++ = *pIn++;
+        }
+        pIn += picWidth/2 - outWidth;
+    }
+
+    pIn = pInImage + 5*picWidth*picHeight/4 +
+        pCropParams->cropTopOffset*picWidth/4 + pCropParams->cropLeftOffset/2;
+
+    /* cr */
+    for (i = outHeight; i; i--)
+    {
+        for (j = outWidth; j; j--)
+        {
+            *pOut++ = *pIn++;
+        }
+        pIn += picWidth/2 - outWidth;
+    }
+
+    return (0);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor.s
new file mode 100755
index 0000000..634a484
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor.s
@@ -0,0 +1,298 @@
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;      http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+;-------------------------------------------------------------------------------
+;--
+;-- Abstract : ARMv6 optimized version of h264bsdInterpolateChromaHor function
+;--
+;-------------------------------------------------------------------------------
+
+
+    IF  :DEF: H264DEC_WINASM
+        ;// We dont use REQUIRE8 and PRESERVE8 for winasm
+    ELSE
+        REQUIRE8
+        PRESERVE8
+    ENDIF
+
+    AREA    |.text|, CODE
+
+
+;// h264bsdInterpolateChromaHor register allocation
+
+ref     RN 0
+ptrA    RN 0
+
+mb      RN 1
+block   RN 1
+
+x0      RN 2
+count   RN 2
+
+y0      RN 3
+valX    RN 3
+
+width   RN 4
+
+height  RN 5
+tmp7    RN 5
+
+chrPW   RN 6
+tmp8    RN 6
+
+tmp1    RN 7
+chrPH   RN 7
+
+tmp2    RN 8
+
+tmp3    RN 9
+
+tmp4    RN 10
+
+tmp5    RN 11
+
+tmp6    RN 12
+
+c32     RN 14
+xFrac   RN 14
+
+;// Function exports and imports
+
+    IMPORT  h264bsdFillBlock
+
+    EXPORT  h264bsdInterpolateChromaHor
+
+;//  Function arguments
+;//
+;//  u8 *ref,                   : 0xc4
+;//  u8 *predPartChroma,        : 0xc8
+;//  i32 x0,                    : 0xcc
+;//  i32 y0,                    : 0xd0
+;//  u32 width,                 : 0xf8
+;//  u32 height,                : 0xfc
+;//  u32 xFrac,                 : 0x100
+;//  u32 chromaPartWidth,       : 0x104
+;//  u32 chromaPartHeight       : 0x108
+
+h264bsdInterpolateChromaHor
+    STMFD   sp!, {r0-r11,lr}
+    SUB     sp, sp, #0xc4
+
+    LDR     chrPW, [sp, #0x104]     ;// chromaPartWidth
+    LDR     width, [sp, #0xf8]      ;// width
+    CMP     x0, #0
+    BLT     do_fill
+
+    ADD     tmp6, x0, chrPW         ;// tmp6 = x0+ chromaPartWidth
+    ADD     tmp6, tmp6, #1          ;// tmp6 = x0 + chromaPartWidth + 1
+    CMP     tmp6, width             ;// x0+chromaPartWidth+1 > width
+    BHI     do_fill
+
+    CMP     y0, #0
+    BLT     do_fill
+    LDR     chrPH, [sp, #0x108]     ;// chromaPartHeight
+    LDR     height, [sp, #0xfc]     ;// height
+    ADD     tmp6, y0, chrPH         ;// tmp6 = y0 + chromaPartHeight
+    CMP     tmp6, height
+    BLS     skip_fill
+
+do_fill
+    LDR     chrPH, [sp, #0x108]     ;// chromaPartHeight
+    LDR     height, [sp, #0xfc]     ;// height
+    ADD     tmp8, chrPW, #1         ;// tmp8 = chromaPartWidth+1
+    MOV     tmp2, tmp8              ;// tmp2 = chromaPartWidth+1
+    STMIA   sp,{width,height,tmp8,chrPH,tmp2}
+    ADD     block, sp, #0x1c        ;// block
+    BL      h264bsdFillBlock
+
+    LDR     x0, [sp, #0xcc]
+    LDR     y0, [sp, #0xd0]
+    LDR     ref, [sp, #0xc4]        ;// ref
+    STMIA   sp,{width,height,tmp8,chrPH,tmp2}
+    ADD     block, sp, #0x1c        ;// block
+    MLA     ref, height, width, ref ;// ref += width * height; 
+    MLA     block, chrPH, tmp8, block;// block + (chromaPH)*(chromaPW+1)
+    BL      h264bsdFillBlock
+
+    MOV     x0, #0                  ;// x0 = 0
+    MOV     y0, #0                  ;// y0 = 0
+    STR     x0, [sp, #0xcc]
+    STR     y0, [sp, #0xd0]
+    ADD     ref, sp, #0x1c          ;// ref = block
+    STR     ref, [sp, #0xc4]        ;// ref
+
+    STR     chrPH, [sp, #0xfc]      ;// height
+    STR     tmp8, [sp, #0xf8]       ;// width
+    MOV     width, tmp8
+    SUB     chrPW, chrPW, #1
+
+skip_fill
+    MLA     tmp3, y0, width, x0     ;// tmp3 = y0*width+x0
+    LDR     xFrac, [sp, #0x100]     ;// xFrac
+    ADD     ptrA, ref, tmp3         ;// ptrA = ref + y0*width+x0
+    RSB     valX, xFrac, #8         ;// valX = 8-xFrac
+
+    LDR     mb, [sp, #0xc8]         ;// predPartChroma
+
+
+    ;// pack values to count register
+    ;// [31:28] loop_x (chromaPartWidth-1)
+    ;// [27:24] loop_y (chromaPartHeight-1)
+    ;// [23:20] chromaPartWidth-1
+    ;// [19:16] chromaPartHeight-1
+    ;// [15:00] nothing
+
+    SUB     tmp2, chrPH, #1             ;// chromaPartHeight-1
+    SUB     tmp1, chrPW, #1             ;// chromaPartWidth-1
+    ADD     count, count, tmp2, LSL #16 ;// chromaPartHeight-1
+    ADD     count, count, tmp2, LSL #24 ;// loop_y
+    ADD     count, count, tmp1, LSL #20 ;// chromaPartWidth-1
+    AND     tmp2, count, #0x00F00000    ;// loop_x
+    PKHBT   valX, valX, xFrac, LSL #16  ;// |xFrac|valX |
+    MOV     valX, valX, LSL #3          ;// multiply by 8 in advance
+    MOV     c32, #32
+
+
+    ;///////////////////////////////////////////////////////////////////////////
+    ;// Cb
+    ;///////////////////////////////////////////////////////////////////////////
+
+    ;// 2x2 pels per iteration
+    ;// bilinear vertical interpolation
+
+loop1_y
+    ADD     count, count, tmp2, LSL #8
+    LDRB    tmp1, [ptrA, width]
+    LDRB    tmp2, [ptrA], #1
+
+loop1_x
+    LDRB    tmp3, [ptrA, width]
+    LDRB    tmp4, [ptrA], #1
+
+    PKHBT   tmp5, tmp1, tmp3, LSL #16
+    PKHBT   tmp6, tmp2, tmp4, LSL #16
+
+    LDRB    tmp1, [ptrA, width]
+    LDRB    tmp2, [ptrA], #1
+
+    SMLAD   tmp5, tmp5, valX, c32       ;// multiply
+    SMLAD   tmp6, tmp6, valX, c32       ;// multiply
+
+    PKHBT   tmp7, tmp3, tmp1, LSL #16
+    PKHBT   tmp8, tmp4, tmp2, LSL #16
+
+    SMLAD   tmp7, tmp7, valX, c32       ;// multiply
+    SMLAD   tmp8, tmp8, valX, c32       ;// multiply
+
+    MOV     tmp5, tmp5, LSR #6          ;// scale down
+    STRB    tmp5, [mb,#8]               ;// store row 2 col 1
+
+    MOV     tmp6, tmp6, LSR #6          ;// scale down
+    STRB    tmp6, [mb],#1               ;// store row 1 col 1
+
+    MOV     tmp7, tmp7, LSR #6          ;// scale down
+    STRB    tmp7, [mb,#8]               ;// store row 2 col 2
+
+    MOV     tmp8, tmp8, LSR #6          ;// scale down
+    STRB    tmp8, [mb],#1               ;// store row 1 col 2
+
+    SUBS    count, count, #2<<28
+    BCS     loop1_x
+
+    AND     tmp2, count, #0x00F00000
+
+    ADDS    mb, mb, #16
+    SBC     mb, mb, tmp2, LSR #20
+    ADD     ptrA, ptrA, width, LSL #1
+    SBC     ptrA, ptrA, tmp2, LSR #20
+    SUB     ptrA, ptrA, #1
+
+    ADDS    count, count, #0xE << 24
+    BGE     loop1_y
+
+    ;///////////////////////////////////////////////////////////////////////////
+    ;// Cr
+    ;///////////////////////////////////////////////////////////////////////////
+    LDR     height, [sp,#0xfc]          ;// height
+    LDR     ref, [sp, #0xc4]            ;// ref
+    LDR     tmp1, [sp, #0xd0]           ;// y0
+    LDR     tmp2, [sp, #0xcc]           ;// x0
+    LDR     mb, [sp, #0xc8]             ;// predPartChroma
+
+    ADD     tmp1, height, tmp1
+    MLA     tmp3, tmp1, width, tmp2
+    ADD     ptrA, ref, tmp3
+    ADD     mb, mb, #64
+
+    AND     count, count, #0x00FFFFFF
+    AND     tmp1, count, #0x000F0000
+    ADD     count, count, tmp1, LSL #8
+    AND     tmp2, count, #0x00F00000
+
+    ;// 2x2 pels per iteration
+    ;// bilinear vertical interpolation
+loop2_y
+    ADD     count, count, tmp2, LSL #8
+    LDRB    tmp1, [ptrA, width]
+    LDRB    tmp2, [ptrA], #1
+
+loop2_x
+    LDRB    tmp3, [ptrA, width]
+    LDRB    tmp4, [ptrA], #1
+
+    PKHBT   tmp5, tmp1, tmp3, LSL #16
+    PKHBT   tmp6, tmp2, tmp4, LSL #16
+
+    LDRB    tmp1, [ptrA, width]
+    LDRB    tmp2, [ptrA], #1
+
+    SMLAD   tmp5, tmp5, valX, c32       ;// multiply
+    SMLAD   tmp6, tmp6, valX, c32       ;// multiply
+
+    PKHBT   tmp7, tmp3, tmp1, LSL #16
+    PKHBT   tmp8, tmp4, tmp2, LSL #16
+
+    SMLAD   tmp7, tmp7, valX, c32       ;// multiply
+    SMLAD   tmp8, tmp8, valX, c32       ;// multiply
+
+    MOV     tmp5, tmp5, LSR #6          ;// scale down
+    STRB    tmp5, [mb,#8]               ;// store row 2 col 1
+
+    MOV     tmp6, tmp6, LSR #6          ;// scale down
+    STRB    tmp6, [mb],#1               ;// store row 1 col 1
+
+    MOV     tmp7, tmp7, LSR #6          ;// scale down
+    STRB    tmp7, [mb,#8]               ;// store row 2 col 2
+
+    MOV     tmp8, tmp8, LSR #6          ;// scale down
+    STRB    tmp8, [mb],#1               ;// store row 1 col 2
+
+    SUBS    count, count, #2<<28
+    BCS     loop2_x
+
+    AND     tmp2, count, #0x00F00000
+
+    ADDS    mb, mb, #16
+    SBC     mb, mb, tmp2, LSR #20
+    ADD     ptrA, ptrA, width, LSL #1
+    SBC     ptrA, ptrA, tmp2, LSR #20
+    SUB     ptrA, ptrA, #1
+
+    ADDS    count, count, #0xE << 24
+    BGE     loop2_y
+
+    ADD     sp,sp,#0xd4
+    LDMFD   sp!, {r4-r11,pc}
+
+    END
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor_ver.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor_ver.s
new file mode 100755
index 0000000..7420ad3
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor_ver.s
@@ -0,0 +1,339 @@
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;      http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+;-------------------------------------------------------------------------------
+;--
+;-- Abstract : ARMv6 optimized version of h264bsdInterpolateChromaHorVer 
+;--            function
+;--
+;-------------------------------------------------------------------------------
+
+
+    IF  :DEF: H264DEC_WINASM
+        ;// We dont use REQUIRE8 and PRESERVE8 for winasm
+    ELSE
+        REQUIRE8
+        PRESERVE8
+    ENDIF
+
+    AREA    |.text|, CODE
+
+
+;// h264bsdInterpolateChromaHorVer register allocation
+
+ref     RN 0
+ptrA    RN 0
+
+mb      RN 1
+block   RN 1
+
+x0      RN 2
+count   RN 2
+
+y0      RN 3
+valY    RN 3
+
+width   RN 4
+
+tmp4    RN 5
+height  RN 5
+
+tmp1    RN 6
+
+tmp2    RN 7
+
+tmp3    RN 8
+
+valX    RN 9
+
+tmp5    RN 10
+chrPW   RN 10
+
+tmp6    RN 11
+chrPH   RN 11
+
+xFrac   RN 12
+
+c32     RN 14
+yFrac   RN 14
+
+;// function exports and imports
+
+    IMPORT  h264bsdFillBlock
+
+    EXPORT  h264bsdInterpolateChromaHorVer
+
+;//  Function arguments
+;//
+;//  u8 *ref,                   : 0xc4
+;//  u8 *predPartChroma,        : 0xc8
+;//  i32 x0,                    : 0xcc
+;//  i32 y0,                    : 0xd0
+;//  u32 width,                 : 0xf8
+;//  u32 height,                : 0xfc
+;//  u32 xFrac,                 : 0x100
+;//  u32 yFrac,                 : 0x104
+;//  u32 chromaPartWidth,       : 0x108
+;//  u32 chromaPartHeight       : 0x10c
+
+h264bsdInterpolateChromaHorVer
+    STMFD   sp!, {r0-r11,lr}
+    SUB     sp, sp, #0xc4
+
+    LDR     chrPW, [sp, #0x108]     ;// chromaPartWidth
+    LDR     xFrac, [sp, #0x100]     ;// xFrac
+    LDR     width, [sp, #0xf8]      ;// width
+    CMP     x0, #0
+    BLT     do_fill
+
+    ADD     tmp1, x0, chrPW         ;// tmp1 = x0+ chromaPartWidth
+    ADD     tmp1, tmp1, #1          ;// tmp1 = x0+ chromaPartWidth+1
+    CMP     tmp1, width             ;// x0+chromaPartWidth+1 > width
+    BHI     do_fill
+
+    CMP     y0, #0
+    BLT     do_fill
+    LDR     chrPH, [sp, #0x10c]     ;// chromaPartHeight
+    LDR     height, [sp, #0xfc]     ;// height
+    ADD     tmp1, y0, chrPH         ;// tmp1 = y0 + chromaPartHeight
+    ADD     tmp1, tmp1, #1          ;// tmp1 = y0 + chromaPartHeight + 1
+    CMP     tmp1, height
+    BLS     skip_fill
+
+do_fill
+    LDR     chrPH, [sp, #0x10c]     ;// chromaPartHeight
+    LDR     height, [sp, #0xfc]     ;// height
+    ADD     tmp3, chrPW, #1         ;// tmp3 = chromaPartWidth+1
+    ADD     tmp1, chrPW, #1         ;// tmp1 = chromaPartWidth+1
+    ADD     tmp2, chrPH, #1         ;// tmp2 = chromaPartHeight+1
+    STMIA   sp,{width,height,tmp1,tmp2,tmp3}
+    ADD     block, sp, #0x1c        ;// block
+    BL      h264bsdFillBlock
+
+    LDR     x0, [sp, #0xcc]
+    LDR     y0, [sp, #0xd0]
+    LDR     ref, [sp, #0xc4]        ;// ref
+    STMIA   sp,{width,height,tmp1,tmp2,tmp3}
+    ADD     block, sp, #0x1c        ;// block
+    MLA     ref, height, width, ref ;// ref += width * height; 
+    MLA     block, tmp2, tmp1, block;// block + (chromaPW+1)*(chromaPH+1)
+    BL      h264bsdFillBlock
+
+    MOV     x0, #0                  ;// x0 = 0
+    MOV     y0, #0                  ;// y0 = 0
+    STR     x0, [sp, #0xcc]
+    STR     y0, [sp, #0xd0]
+    ADD     ref, sp, #0x1c          ;// ref = block
+    STR     ref, [sp, #0xc4]        ;// ref
+
+    STR     tmp2, [sp, #0xfc]       ;// height
+    STR     tmp1, [sp, #0xf8]       ;// width
+    MOV     width, tmp1
+
+skip_fill
+    MLA     tmp3, y0, width, x0     ;// tmp3 = y0*width+x0
+    LDR     yFrac, [sp, #0x104]     ;// yFrac
+    LDR     xFrac, [sp, #0x100]
+    ADD     ptrA, ref, tmp3         ;// ptrA = ref + y0*width+x0
+    RSB     valX, xFrac, #8         ;// valX = 8-xFrac
+    RSB     valY, yFrac, #8         ;// valY = 8-yFrac
+
+    LDR     mb, [sp, #0xc8]         ;// predPartChroma
+
+
+    ;// pack values to count register
+    ;// [31:28] loop_x (chromaPartWidth-1)
+    ;// [27:24] loop_y (chromaPartHeight-1)
+    ;// [23:20] chromaPartWidth-1
+    ;// [19:16] chromaPartHeight-1
+    ;// [15:00] nothing
+
+    SUB     tmp2, chrPH, #1             ;// chromaPartHeight-1
+    SUB     tmp1, chrPW, #1             ;// chromaPartWidth-1
+    ADD     count, count, tmp2, LSL #16 ;// chromaPartHeight-1
+    ADD     count, count, tmp2, LSL #24 ;// loop_y
+    ADD     count, count, tmp1, LSL #20 ;// chromaPartWidth-1
+    AND     tmp2, count, #0x00F00000    ;// loop_x
+    PKHBT   valY, valY, yFrac, LSL #16  ;// |yFrac|valY |
+    MOV     c32, #32
+
+
+    ;///////////////////////////////////////////////////////////////////////////
+    ;// Cb
+    ;///////////////////////////////////////////////////////////////////////////
+
+    ;// 2x2 pels per iteration
+    ;// bilinear vertical and horizontal interpolation
+
+loop1_y
+    LDRB    tmp1, [ptrA]
+    LDRB    tmp3, [ptrA, width]
+    LDRB    tmp5, [ptrA, width, LSL #1]
+
+    PKHBT   tmp1, tmp1, tmp3, LSL #16   ;// |t3|t1|
+    PKHBT   tmp3, tmp3, tmp5, LSL #16   ;// |t5|t3|
+
+    SMUAD   tmp1, tmp1, valY            ;// t1=(t1*valY + t3*yFrac)
+    SMUAD   tmp3, tmp3, valY            ;// t3=(t3*valY + t5*yFrac)
+
+    ADD     count, count, tmp2, LSL #8
+loop1_x
+    ;// first
+    LDRB    tmp2, [ptrA, #1]!
+    LDRB    tmp4, [ptrA, width]
+    LDRB    tmp6, [ptrA, width, LSL #1]
+
+    PKHBT   tmp2, tmp2, tmp4, LSL #16   ;// |t4|t2|
+    PKHBT   tmp4, tmp4, tmp6, LSL #16   ;// |t6|t4|
+
+    SMUAD   tmp2, tmp2, valY            ;// t2=(t2*valY + t4*yFrac)
+    MLA     tmp5, tmp1, valX, c32       ;// t5=t1*valX+32
+    MLA     tmp5, tmp2, xFrac, tmp5     ;// t5=t2*xFrac+t5
+
+    SMUAD   tmp4, tmp4, valY            ;// t4=(t4*valY + t6*yFrac)
+    MLA     tmp6, tmp3, valX, c32       ;// t3=t3*valX+32
+    MLA     tmp6, tmp4, xFrac, tmp6     ;// t6=t4*xFrac+t6
+
+    MOV     tmp6, tmp6, LSR #6          ;// scale down
+    STRB    tmp6, [mb, #8]              ;// store pixel
+    MOV     tmp5, tmp5, LSR #6          ;// scale down
+    STRB    tmp5, [mb], #1              ;// store pixel
+
+    ;// second
+    LDRB    tmp1, [ptrA, #1]!
+    LDRB    tmp3, [ptrA, width]
+    LDRB    tmp5, [ptrA, width, LSL #1]
+
+    PKHBT   tmp1, tmp1, tmp3, LSL #16   ;// |t3|t1|
+    PKHBT   tmp3, tmp3, tmp5, LSL #16   ;// |t5|t3|
+
+    SMUAD   tmp1, tmp1, valY            ;// t1=(t1*valY + t3*yFrac)
+    MLA     tmp5, tmp1, xFrac, c32      ;// t1=t1*xFrac+32
+    MLA     tmp5, tmp2, valX, tmp5      ;// t5=t2*valX+t5
+
+    SMUAD   tmp3, tmp3, valY            ;// t3=(t3*valY + t5*yFrac)
+    MLA     tmp6, tmp3, xFrac, c32      ;// t3=t3*xFrac+32
+    MLA     tmp6, tmp4, valX, tmp6      ;// t6=t4*valX+t6
+
+    MOV     tmp6, tmp6, LSR #6          ;// scale down
+    STRB    tmp6, [mb, #8]              ;// store pixel
+    MOV     tmp5, tmp5, LSR #6          ;// scale down
+    STRB    tmp5, [mb], #1              ;// store pixel
+
+    SUBS    count, count, #2<<28
+    BCS     loop1_x
+
+    AND     tmp2, count, #0x00F00000
+
+    ADDS    mb, mb, #16
+    SBC     mb, mb, tmp2, LSR #20
+    ADD     ptrA, ptrA, width, LSL #1
+    SBC     ptrA, ptrA, tmp2, LSR #20
+
+    ADDS    count, count, #0xE << 24
+    BGE     loop1_y
+
+    ;///////////////////////////////////////////////////////////////////////////
+    ;// Cr
+    ;///////////////////////////////////////////////////////////////////////////
+    LDR     height, [sp,#0xfc]          ;// height
+    LDR     ref, [sp, #0xc4]            ;// ref
+    LDR     tmp1, [sp, #0xd0]           ;// y0
+    LDR     tmp2, [sp, #0xcc]           ;// x0
+    LDR     mb, [sp, #0xc8]             ;// predPartChroma
+
+    ADD     tmp1, height, tmp1
+    MLA     tmp3, tmp1, width, tmp2
+    ADD     ptrA, ref, tmp3
+    ADD     mb, mb, #64
+
+    AND     count, count, #0x00FFFFFF
+    AND     tmp1, count, #0x000F0000
+    ADD     count, count, tmp1, LSL #8
+    AND     tmp2, count, #0x00F00000
+
+    ;// 2x2 pels per iteration
+    ;// bilinear vertical and horizontal interpolation
+loop2_y
+    LDRB    tmp1, [ptrA]
+    LDRB    tmp3, [ptrA, width]
+    LDRB    tmp5, [ptrA, width, LSL #1]
+
+    PKHBT   tmp1, tmp1, tmp3, LSL #16   ;// |t3|t1|
+    PKHBT   tmp3, tmp3, tmp5, LSL #16   ;// |t5|t3|
+
+    SMUAD   tmp1, tmp1, valY            ;// t1=(t1*valY + t3*yFrac)
+    SMUAD   tmp3, tmp3, valY            ;// t3=(t3*valY + t5*yFrac)
+
+    ADD     count, count, tmp2, LSL #8
+loop2_x
+    ;// first
+    LDRB    tmp2, [ptrA, #1]!
+    LDRB    tmp4, [ptrA, width]
+    LDRB    tmp6, [ptrA, width, LSL #1]
+
+    PKHBT   tmp2, tmp2, tmp4, LSL #16   ;// |t4|t2|
+    PKHBT   tmp4, tmp4, tmp6, LSL #16   ;// |t6|t4|
+
+    SMUAD   tmp2, tmp2, valY            ;// t2=(t2*valY + t4*yFrac)
+    MLA     tmp5, tmp1, valX, c32       ;// t5=t1*valX+32
+    MLA     tmp5, tmp2, xFrac, tmp5     ;// t5=t2*xFrac+t5
+
+    SMUAD   tmp4, tmp4, valY            ;// t4=(t4*valY + t6*yFrac)
+    MLA     tmp6, tmp3, valX, c32       ;// t3=t3*valX+32
+    MLA     tmp6, tmp4, xFrac, tmp6     ;// t6=t4*xFrac+t6
+
+    MOV     tmp6, tmp6, LSR #6          ;// scale down
+    STRB    tmp6, [mb, #8]              ;// store pixel
+    MOV     tmp5, tmp5, LSR #6          ;// scale down
+    STRB    tmp5, [mb], #1              ;// store pixel
+
+    ;// second 
+    LDRB    tmp1, [ptrA, #1]!
+    LDRB    tmp3, [ptrA, width]
+    LDRB    tmp5, [ptrA, width, LSL #1]
+
+    PKHBT   tmp1, tmp1, tmp3, LSL #16   ;// |t3|t1|
+    PKHBT   tmp3, tmp3, tmp5, LSL #16   ;// |t5|t3|
+
+    SMUAD   tmp1, tmp1, valY            ;// t1=(t1*valY + t3*yFrac)
+    MLA     tmp5, tmp1, xFrac, c32      ;// t1=t1*xFrac+32
+    MLA     tmp5, tmp2, valX, tmp5      ;// t5=t2*valX+t5
+
+    SMUAD   tmp3, tmp3, valY            ;// t3=(t3*valY + t5*yFrac)
+    MLA     tmp6, tmp3, xFrac, c32      ;// t3=t3*xFrac+32
+    MLA     tmp6, tmp4, valX, tmp6      ;// t6=t4*valX+t6
+
+    MOV     tmp6, tmp6, LSR #6          ;// scale down
+    STRB    tmp6, [mb, #8]              ;// store pixel
+    MOV     tmp5, tmp5, LSR #6          ;// scale down
+    STRB    tmp5, [mb], #1              ;// store pixel
+
+    SUBS    count, count, #2<<28
+    BCS     loop2_x
+
+    AND     tmp2, count, #0x00F00000
+
+    ADDS    mb, mb, #16
+    SBC     mb, mb, tmp2, LSR #20
+    ADD     ptrA, ptrA, width, LSL #1
+    SBC     ptrA, ptrA, tmp2, LSR #20
+
+    ADDS    count, count, #0xE << 24
+    BGE     loop2_y
+
+    ADD     sp,sp,#0xd4
+    LDMFD   sp!,{r4-r11,pc}
+
+    END
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_ver.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_ver.s
new file mode 100755
index 0000000..af9df1b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_ver.s
@@ -0,0 +1,288 @@
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;      http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+;-------------------------------------------------------------------------------
+;--
+;-- Abstract : ARMv6 optimized version of h264bsdInterpolateChromaVer function
+;--
+;-------------------------------------------------------------------------------
+
+
+    IF :DEF: H264DEC_WINASM
+        ;// We dont use REQUIRE8 and PRESERVE8 for winasm
+    ELSE
+        REQUIRE8
+        PRESERVE8
+    ENDIF
+
+    AREA    |.text|, CODE
+
+;// h264bsdInterpolateChromaVer register allocation
+
+ref     RN 0
+ptrA    RN 0
+
+mb      RN 1
+block   RN 1
+
+x0      RN 2
+count   RN 2
+
+y0      RN 3
+valY    RN 3
+
+width   RN 4
+
+height  RN 5
+tmp7    RN 5
+
+chrPW   RN 6
+tmp8    RN 6
+
+tmp1    RN 7
+
+tmp2    RN 8
+
+tmp3    RN 9
+
+tmp4    RN 10
+
+tmp5    RN 11
+chrPH   RN 11
+
+tmp6    RN 12
+
+c32     RN 14
+yFrac   RN 14
+
+;// Function exports and imports
+
+    IMPORT  h264bsdFillBlock
+
+    EXPORT  h264bsdInterpolateChromaVer
+
+;//  Function arguments
+;//
+;//  u8 *ref,                   : 0xc4
+;//  u8 *predPartChroma,        : 0xc8
+;//  i32 x0,                    : 0xcc
+;//  i32 y0,                    : 0xd0
+;//  u32 width,                 : 0xf8
+;//  u32 height,                : 0xfc
+;//  u32 yFrac,                 : 0x100
+;//  u32 chromaPartWidth,       : 0x104
+;//  u32 chromaPartHeight       : 0x108
+
+h264bsdInterpolateChromaVer
+    STMFD   sp!, {r0-r11,lr}
+    SUB     sp, sp, #0xc4
+
+    LDR     chrPW, [sp, #0x104]     ;// chromaPartWidth
+    LDR     width, [sp, #0xf8]      ;// width
+    CMP     x0, #0
+    BLT     do_fill
+
+    ADD     tmp1, x0, chrPW         ;// tmp1 = x0+ chromaPartWidth
+    CMP     tmp1, width             ;// x0+chromaPartWidth > width
+    BHI     do_fill
+
+    CMP     y0, #0
+    BLT     do_fill
+    LDR     chrPH, [sp, #0x108]     ;// chromaPartHeight
+    LDR     height, [sp, #0xfc]     ;// height
+    ADD     tmp1, y0, chrPH         ;// tmp1 = y0 + chromaPartHeight
+    ADD     tmp1, tmp1, #1          ;// tmp1 = y0 + chromaPartHeight + 1
+    CMP     tmp1, height
+    BLS     skip_fill
+
+do_fill
+    LDR     chrPH, [sp, #0x108]     ;// chromaPartHeight
+    LDR     height, [sp, #0xfc]     ;// height
+    ADD     tmp1, chrPH, #1         ;// tmp1 = chromaPartHeight+1
+    MOV     tmp2, chrPW             ;// tmp2 = chromaPartWidth
+    STMIA   sp,{width,height,chrPW,tmp1,tmp2}
+    ADD     block, sp, #0x1c        ;// block
+    BL      h264bsdFillBlock
+
+    LDR     x0, [sp, #0xcc]
+    LDR     y0, [sp, #0xd0]
+    LDR     ref, [sp, #0xc4]        ;// ref
+    STMIA   sp,{width,height,chrPW,tmp1,tmp2}
+    ADD     block, sp, #0x1c        ;// block
+    MLA     ref, height, width, ref ;// ref += width * height; 
+    MLA     block, chrPW, tmp1, block;// block + (chromaPW)*(chromaPH+1)
+    BL      h264bsdFillBlock
+
+    MOV     x0, #0                  ;// x0 = 0
+    MOV     y0, #0                  ;// y0 = 0
+    STR     x0, [sp, #0xcc]
+    STR     y0, [sp, #0xd0]
+    ADD     ref, sp, #0x1c          ;// ref = block
+    STR     ref, [sp, #0xc4]        ;// ref
+
+    STR     tmp1, [sp, #0xfc]       ;// height
+    STR     chrPW, [sp, #0xf8]      ;// width
+    MOV     width, chrPW
+
+skip_fill
+    MLA     tmp3, y0, width, x0     ;// tmp3 = y0*width+x0
+    LDR     yFrac, [sp, #0x100]     ;// yFrac
+    ADD     ptrA, ref, tmp3         ;// ptrA = ref + y0*width+x0
+    RSB     valY, yFrac, #8         ;// valY = 8-yFrac
+
+    LDR     mb, [sp, #0xc8]         ;// predPartChroma
+
+
+    ;// pack values to count register
+    ;// [31:28] loop_x (chromaPartWidth-1)
+    ;// [27:24] loop_y (chromaPartHeight-1)
+    ;// [23:20] chromaPartWidth-1
+    ;// [19:16] chromaPartHeight-1
+    ;// [15:00] nothing
+
+    SUB     tmp2, chrPH, #1             ;// chromaPartHeight-1
+    SUB     tmp1, chrPW, #1             ;// chromaPartWidth-1
+    ADD     count, count, tmp2, LSL #16 ;// chromaPartHeight-1
+    ADD     count, count, tmp2, LSL #24 ;// loop_y
+    ADD     count, count, tmp1, LSL #20 ;// chromaPartWidth-1
+    AND     tmp2, count, #0x00F00000    ;// loop_x
+    PKHBT   valY, valY, yFrac, LSL #16  ;// |yFrac|valY |
+    MOV     valY, valY, LSL #3          ;// multiply by 8 in advance
+    MOV     c32, #32
+
+
+    ;///////////////////////////////////////////////////////////////////////////
+    ;// Cb
+    ;///////////////////////////////////////////////////////////////////////////
+
+    ;// 2x2 pels per iteration
+    ;// bilinear vertical interpolation
+
+loop1_y
+    ADD     count, count, tmp2, LSL #8
+loop1_x
+    ;// Process 2x2 block
+    LDRB    tmp2, [ptrA,width]          ;// 2 row, 1 col
+    LDRB    tmp3, [ptrA,width, LSL #1]  ;// 3 row, 1 col
+    LDRB    tmp1, [ptrA],#1             ;// 1 row, 1 col
+
+    LDRB    tmp5, [ptrA,width]          ;// 2 row, 2 col
+    LDRB    tmp6, [ptrA,width, LSL #1]  ;// 3 row, 2 col
+    LDRB    tmp4, [ptrA],#1             ;// 1 row, 2 col
+
+    PKHBT   tmp1, tmp1, tmp2, LSL #16   ;// |B|A|
+    PKHBT   tmp2, tmp2, tmp3, LSL #16   ;// |C|B|
+    PKHBT   tmp4, tmp4, tmp5, LSL #16   ;// |B|A|
+
+    SMLAD   tmp7, tmp2, valY, c32       ;// multiply
+    PKHBT   tmp5, tmp5, tmp6, LSL #16   ;// |C|B|
+    SMLAD   tmp2, tmp1, valY, c32       ;// multiply
+    SMLAD   tmp8, tmp5, valY, c32       ;// multiply
+    SMLAD   tmp5, tmp4, valY, c32       ;// multiply
+
+    MOV     tmp7, tmp7, LSR #6          ;// scale down
+    STRB    tmp7, [mb,#8]               ;// store row 2 col 1
+    MOV     tmp2, tmp2, LSR #6          ;// scale down
+    STRB    tmp2, [mb],#1               ;// store row 1 col 1
+
+    MOV     tmp8, tmp8, LSR #6          ;// scale down
+    STRB    tmp8, [mb,#8]               ;// store row 2 col 2
+    MOV     tmp5, tmp5, LSR #6          ;// scale down
+    STRB    tmp5, [mb],#1               ;// store row 1 col 2
+
+
+    SUBS    count, count, #2<<28
+    BCS     loop1_x
+
+    AND     tmp2, count, #0x00F00000
+
+    ADDS    mb, mb, #16
+    SBC     mb, mb, tmp2, LSR #20
+    ADD     ptrA, ptrA, width, LSL #1
+    SBC     ptrA, ptrA, tmp2, LSR #20
+
+    ADDS    count, count, #0xE << 24
+    BGE     loop1_y 
+
+    ;///////////////////////////////////////////////////////////////////////////
+    ;// Cr
+    ;///////////////////////////////////////////////////////////////////////////
+    LDR     height, [sp,#0xfc]          ;// height
+    LDR     ref, [sp, #0xc4]            ;// ref
+    LDR     tmp1, [sp, #0xd0]           ;// y0
+    LDR     tmp2, [sp, #0xcc]           ;// x0
+    LDR     mb, [sp, #0xc8]             ;// predPartChroma
+
+    ADD     tmp1, height, tmp1
+    MLA     tmp3, tmp1, width, tmp2
+    ADD     ptrA, ref, tmp3
+    ADD     mb, mb, #64
+
+    AND     count, count, #0x00FFFFFF
+    AND     tmp1, count, #0x000F0000
+    ADD     count, count, tmp1, LSL #8
+    AND     tmp2, count, #0x00F00000
+
+    ;// 2x2 pels per iteration
+    ;// bilinear vertical interpolation
+loop2_y
+    ADD     count, count, tmp2, LSL #8
+loop2_x
+    ;// Process 2x2 block
+    LDRB    tmp2, [ptrA,width]          ;// 2 row, 1 col
+    LDRB    tmp3, [ptrA,width, LSL #1]  ;// 3 row, 1 col
+    LDRB    tmp1, [ptrA],#1             ;// 1 row, 1 col
+
+    LDRB    tmp5, [ptrA,width]          ;// 2 row, 2 col
+    LDRB    tmp6, [ptrA,width, LSL #1]  ;// 3 row, 2 col
+    LDRB    tmp4, [ptrA],#1             ;// 1 row, 2 col
+
+    PKHBT   tmp1, tmp1, tmp2, LSL #16   ;// |B|A|
+    PKHBT   tmp2, tmp2, tmp3, LSL #16   ;// |C|B|
+    PKHBT   tmp4, tmp4, tmp5, LSL #16   ;// |B|A|
+
+    SMLAD   tmp7, tmp2, valY, c32       ;// multiply
+    PKHBT   tmp5, tmp5, tmp6, LSL #16   ;// |C|B|
+    SMLAD   tmp2, tmp1, valY, c32       ;// multiply
+    SMLAD   tmp8, tmp5, valY, c32       ;// multiply
+    SMLAD   tmp5, tmp4, valY, c32       ;// multiply
+
+    MOV     tmp7, tmp7, LSR #6          ;// scale down
+    STRB    tmp7, [mb,#8]               ;// store row 2 col 1
+    MOV     tmp2, tmp2, LSR #6          ;// scale down
+    STRB    tmp2, [mb],#1               ;// store row 1 col 1
+
+    MOV     tmp8, tmp8, LSR #6          ;// scale down
+    STRB    tmp8, [mb,#8]               ;// store row 2 col 2
+    MOV     tmp5, tmp5, LSR #6          ;// scale down
+    STRB    tmp5, [mb],#1               ;// store row 1 col 2
+
+
+    SUBS    count, count, #2<<28
+    BCS     loop2_x
+
+    AND     tmp2, count, #0x00F00000
+
+    ADDS    mb, mb, #16
+    SBC     mb, mb, tmp2, LSR #20
+    ADD     ptrA, ptrA, width, LSL #1
+    SBC     ptrA, ptrA, tmp2, LSR #20
+
+    ADDS    count, count, #0xE << 24
+    BGE     loop2_y
+
+    ADD     sp,sp,#0xd4
+    LDMFD   sp!, {r4-r11,pc}
+
+    END
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_half.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_half.s
new file mode 100755
index 0000000..93968b6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_half.s
@@ -0,0 +1,251 @@
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;      http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+;-------------------------------------------------------------------------------
+;--
+;-- Abstract : ARMv6 optimized version of h264bsdInterpolateHorHalf function
+;--
+;-------------------------------------------------------------------------------
+
+
+    IF :DEF: H264DEC_WINASM
+        ;// We dont use REQUIRE8 and PRESERVE8 for winasm
+    ELSE
+        REQUIRE8
+        PRESERVE8
+    ENDIF
+
+    AREA    |.text|, CODE
+
+;// h264bsdInterpolateHorHalf register allocation
+
+ref     RN 0
+
+mb      RN 1
+buff    RN 1
+
+count   RN 2
+x0      RN 2
+
+y0      RN 3
+x_2_0   RN 3
+
+width   RN 4
+x_3_1   RN 4
+
+height  RN 5
+x_6_4   RN 5
+
+partW   RN 6
+x_7_5   RN 6
+
+partH   RN 7
+tmp1    RN 7
+
+tmp2    RN 8
+
+tmp3    RN 9
+
+tmp4    RN 10
+
+mult_20_01  RN 11
+mult_20_m5  RN 12
+
+plus16  RN 14
+
+
+;// function exports and imports
+
+    IMPORT  h264bsdFillBlock
+
+    EXPORT  h264bsdInterpolateHorHalf
+
+;// Horizontal filter approach
+;//
+;// Basic idea in horizontal filtering is to adjust coefficients
+;// like below. Calculation is done with 16-bit maths.
+;//
+;// Reg     x_2_0     x_3_1     x_6_4     x_7_5     x_2_0
+;//       [  2  0 ] [  3  1 ] [  6  4 ] [  7  5 ] [ 10  8 ] ...
+;// y_0 =   20  1     20 -5        -5         1
+;// y_1 =   -5        20  1      1 20        -5
+;// y_2 =    1        -5        -5 20      1 20
+;// y_3 =              1        20 -5     -5 20         1
+
+
+h264bsdInterpolateHorHalf
+    STMFD   sp!, {r0-r11, lr}
+    SUB     sp, sp, #0x1e4
+
+    CMP     x0, #0
+    BLT     do_fill                 ;// (x0 < 0)
+    LDR     partW, [sp,#0x220]      ;// partWidth
+    ADD     tmp4, x0, partW         ;// (x0+partWidth)
+    ADD     tmp4, tmp4, #5          ;// (y0+partW+5)
+    LDR     width, [sp,#0x218]      ;// width
+    CMP     tmp4, width
+    BHI     do_fill                 ;// (x0+partW)>width
+
+    CMP     y0, #0
+    BLT     do_fill                 ;// (y0 < 0)
+    LDR     partH, [sp,#0x224]      ;// partHeight
+    ADD     tmp2, y0, partH         ;// (y0+partHeight)
+    LDR     height, [sp,#0x21c]     ;// height
+    CMP     tmp2, height
+    BLS     skip_fill               ;// no overfill needed
+
+
+do_fill
+    LDR     partH, [sp,#0x224]      ;// partHeight
+    LDR     height, [sp,#0x21c]     ;// height
+    LDR     partW, [sp,#0x220]      ;// partWidth
+    ADD     tmp4, partW, #5         ;// tmp4 = partW + 5;
+    STMIB   sp, {height, tmp4}      ;// sp+4 = height, sp+8 = partWidth+5
+    STR     partH, [sp,#0xc]        ;// sp+c = partHeight
+    STR     tmp4, [sp,#0x10]        ;// sp+10 = partWidth+5
+    LDR     width, [sp,#0x218]      ;// width
+    STR     width, [sp,#0]          ;// sp+0 = width
+    ADD     buff, sp, #0x28         ;// buff = p1[21*21/4+1]
+    BL      h264bsdFillBlock
+
+    MOV     x0, #0
+    STR     x0,[sp,#0x1ec]          ;// x0 = 0
+    STR     x0,[sp,#0x1f0]          ;// y0 = 0
+    ADD     ref,sp,#0x28            ;// ref = p1
+    STR     tmp4, [sp,#0x218]       ;// width = partWidth+5
+
+
+skip_fill
+    LDR     x0 ,[sp,#0x1ec]         ;// x0
+    LDR     y0 ,[sp,#0x1f0]         ;// y0
+    LDR     width, [sp,#0x218]      ;// width
+    MLA     tmp2, width, y0, x0     ;// y0*width+x0
+    ADD     ref, ref, tmp2          ;// ref += y0*width+x0
+    ADD     ref, ref, #8            ;// ref = ref+8
+    LDR     mb, [sp, #0x1e8]        ;// mb
+
+    ;// pack values to count register
+    ;// [31:28] loop_x (partWidth-1)
+    ;// [27:24] loop_y (partHeight-1)
+    ;// [23:20] partWidth-1
+    ;// [19:16] partHeight-1
+    ;// [15:00] width
+    MOV     count, width
+    SUB     partW, partW, #1;
+    SUB     partH, partH, #1;
+    ADD     tmp2, partH, partW, LSL #4
+    ADD     count, count, tmp2, LSL #16
+
+
+    LDR     mult_20_01, = 0x00140001
+    LDR     mult_20_m5, = 0x0014FFFB
+    MOV     plus16, #16
+    AND     tmp1, count, #0x000F0000    ;// partHeight-1
+    AND     tmp3, count, #0x00F00000    ;// partWidth-1
+    ADD     count, count, tmp1, LSL #8
+loop_y
+    LDR     x_3_1, [ref, #-8]
+    ADD     count, count, tmp3, LSL #8
+    LDR     x_7_5, [ref, #-4]
+    UXTB16  x_2_0, x_3_1
+    UXTB16  x_3_1, x_3_1, ROR #8
+    UXTB16  x_6_4, x_7_5
+
+loop_x
+    UXTB16  x_7_5, x_7_5, ROR #8
+
+    SMLAD   tmp1, x_2_0, mult_20_01, plus16
+    SMLATB  tmp3, x_2_0, mult_20_01, plus16
+    SMLATB  tmp2, x_2_0, mult_20_m5, plus16
+    SMLATB  tmp4, x_3_1, mult_20_01, plus16
+
+    SMLAD   tmp1, x_3_1, mult_20_m5, tmp1
+    SMLATB  tmp3, x_3_1, mult_20_m5, tmp3
+    SMLAD   tmp2, x_3_1, mult_20_01, tmp2
+    LDR     x_3_1, [ref], #4
+    SMLAD   tmp4, x_6_4, mult_20_m5, tmp4
+
+    SMLABB  tmp1, x_6_4, mult_20_m5, tmp1
+    SMLADX  tmp3, x_6_4, mult_20_m5, tmp3
+    SMLADX  tmp2, x_6_4, mult_20_01, tmp2
+    SMLADX  tmp4, x_7_5, mult_20_m5, tmp4
+
+    SMLABB  tmp1, x_7_5, mult_20_01, tmp1
+    UXTB16  x_2_0, x_3_1
+    SMLABB  tmp2, x_7_5, mult_20_m5, tmp2
+    SMLADX  tmp3, x_7_5, mult_20_01, tmp3
+    SMLABB  tmp4, x_2_0, mult_20_01, tmp4
+
+    MOV     tmp2, tmp2, ASR #5
+    MOV     tmp1, tmp1, ASR #5
+    PKHBT   tmp2, tmp2, tmp4, LSL #(16-5)
+    PKHBT   tmp1, tmp1, tmp3, LSL #(16-5)
+    USAT16  tmp2, #8, tmp2
+    USAT16  tmp1, #8, tmp1
+
+    SUBS    count, count, #4<<28
+    ORR     tmp1, tmp1, tmp2, LSL #8
+    STR     tmp1, [mb], #4
+    BCC     next_y
+
+    UXTB16  x_3_1, x_3_1, ROR #8
+
+    SMLAD   tmp1, x_6_4, mult_20_01, plus16
+    SMLATB  tmp3, x_6_4, mult_20_01, plus16
+    SMLATB  tmp2, x_6_4, mult_20_m5, plus16
+    SMLATB  tmp4, x_7_5, mult_20_01, plus16
+
+    SMLAD   tmp1, x_7_5, mult_20_m5, tmp1
+    SMLATB  tmp3, x_7_5, mult_20_m5, tmp3
+    SMLAD   tmp2, x_7_5, mult_20_01, tmp2
+    LDR     x_7_5, [ref], #4
+    SMLAD   tmp4, x_2_0, mult_20_m5, tmp4
+
+    SMLABB  tmp1, x_2_0, mult_20_m5, tmp1
+    SMLADX  tmp3, x_2_0, mult_20_m5, tmp3
+    SMLADX  tmp2, x_2_0, mult_20_01, tmp2
+    SMLADX  tmp4, x_3_1, mult_20_m5, tmp4
+
+    SMLABB  tmp1, x_3_1, mult_20_01, tmp1
+    UXTB16  x_6_4, x_7_5
+    SMLABB  tmp2, x_3_1, mult_20_m5, tmp2
+    SMLADX  tmp3, x_3_1, mult_20_01, tmp3
+    SMLABB  tmp4, x_6_4, mult_20_01, tmp4
+
+    MOV     tmp2, tmp2, ASR #5
+    MOV     tmp1, tmp1, ASR #5
+    PKHBT   tmp2, tmp2, tmp4, LSL #(16-5)
+    PKHBT   tmp1, tmp1, tmp3, LSL #(16-5)
+    USAT16  tmp2, #8, tmp2
+    USAT16  tmp1, #8, tmp1
+
+    SUBS    count, count, #4<<28
+    ORR     tmp1, tmp1, tmp2, LSL #8
+    STR     tmp1, [mb], #4
+    BCS     loop_x
+
+next_y
+    AND     tmp3, count, #0x00F00000    ;// partWidth-1
+    SMLABB  ref, count, mult_20_01, ref ;// +width
+    ADDS    mb, mb, #16                 ;// +16, Carry=0
+    SBC     mb, mb, tmp3, LSR #20       ;// -(partWidth-1)-1
+    SBC     ref, ref, tmp3, LSR #20     ;// -(partWidth-1)-1
+    ADDS    count, count, #(1<<28)-(1<<24)
+    BGE     loop_y
+
+    ADD     sp,sp,#0x1f4
+    LDMFD   sp!, {r4-r11, pc}
+
+    END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_quarter.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_quarter.s
new file mode 100755
index 0000000..de243d4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_quarter.s
@@ -0,0 +1,273 @@
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;      http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+;-------------------------------------------------------------------------------
+;--
+;-- Abstract : ARMv6 optimized version of h264bsdInterpolateHorQuarter function
+;--
+;-------------------------------------------------------------------------------
+
+
+    IF :DEF: H264DEC_WINASM
+        ;// We dont use REQUIRE8 and PRESERVE8 for winasm
+    ELSE
+        REQUIRE8
+        PRESERVE8
+    ENDIF
+
+    AREA    |.text|, CODE
+
+;// h264bsdInterpolateHorQuarter register allocation
+
+ref     RN 0
+
+mb      RN 1
+buff    RN 1
+
+count   RN 2
+x0      RN 2
+
+y0      RN 3
+x_2_0   RN 3
+
+width   RN 4
+x_3_1   RN 4
+
+height  RN 5
+x_6_4   RN 5
+
+partW   RN 6
+x_7_5   RN 6
+
+partH   RN 7
+tmp1    RN 7
+
+tmp2    RN 8
+
+tmp3    RN 9
+
+tmp4    RN 10
+
+mult_20_01  RN 11
+
+mult_20_m5  RN 12
+
+plus16  RN 14
+
+
+;// function exports and imports
+
+    IMPORT  h264bsdFillBlock
+
+    EXPORT  h264bsdInterpolateHorQuarter
+
+
+;// Horizontal filter approach
+;//
+;// Basic idea in horizontal filtering is to adjust coefficients
+;// like below. Calculation is done with 16-bit maths.
+;//
+;// Reg     x_2_0     x_3_1     x_6_4     x_7_5     x_2_0
+;//       [  2  0 ] [  3  1 ] [  6  4 ] [  7  5 ] [ 10  8 ] ...
+;// y_0 =   20  1     20 -5        -5         1
+;// y_1 =   -5        20  1      1 20        -5
+;// y_2 =    1        -5        -5 20      1 20
+;// y_3 =              1        20 -5     -5 20         1
+
+
+h264bsdInterpolateHorQuarter
+    STMFD   sp!, {r0-r11, lr}
+    SUB     sp, sp, #0x1e4
+
+    CMP     x0, #0
+    BLT     do_fill                 ;// (x0 < 0)
+    LDR     partW, [sp,#0x220]      ;// partWidth
+    ADD     tmp4, x0, partW         ;// (x0+partWidth)
+    ADD     tmp4, tmp4, #5          ;// (y0+partW+5)
+    LDR     width, [sp,#0x218]      ;// width
+    CMP     tmp4, width
+    BHI     do_fill                 ;// (x0+partW)>width
+
+    CMP     y0, #0
+    BLT     do_fill                 ;// (y0 < 0)
+    LDR     partH, [sp,#0x224]      ;// partHeight
+    ADD     tmp2, y0, partH         ;// (y0+partHeight)
+    LDR     height, [sp,#0x21c]     ;// height
+    CMP     tmp2, height
+    BLS     skip_fill               ;// no overfill needed
+
+
+do_fill
+    LDR     partH, [sp,#0x224]      ;// partHeight
+    LDR     height, [sp,#0x21c]     ;// height
+    LDR     partW, [sp,#0x220]      ;// partWidth
+    ADD     tmp4, partW, #5         ;// tmp4 = partW + 5;
+    STMIB   sp, {height, tmp4}      ;// sp+4 = height, sp+8 = partWidth+5
+    STR     partH, [sp,#0xc]        ;// sp+c = partHeight
+    STR     tmp4, [sp,#0x10]        ;// sp+10 = partWidth+5
+    LDR     width, [sp,#0x218]      ;// width
+    STR     width, [sp,#0]          ;// sp+0 = width
+    ADD     buff, sp, #0x28         ;// buff = p1[21*21/4+1]
+    BL      h264bsdFillBlock
+
+    MOV     x0, #0
+    STR     x0,[sp,#0x1ec]          ;// x0 = 0
+    STR     x0,[sp,#0x1f0]          ;// y0 = 0
+    ADD     ref,sp,#0x28            ;// ref = p1
+    STR     tmp4, [sp,#0x218]       ;// width = partWidth+5
+
+
+skip_fill
+    LDR     x0 ,[sp,#0x1ec]         ;// x0
+    LDR     y0 ,[sp,#0x1f0]         ;// y0
+    LDR     width, [sp,#0x218]      ;// width
+    MLA     tmp2, width, y0, x0     ;// y0*width+x0
+    ADD     ref, ref, tmp2          ;// ref += y0*width+x0
+    ADD     ref, ref, #8            ;// ref = ref+8
+    LDR     mb, [sp, #0x1e8]        ;// mb
+
+    ;// pack values to count register
+    ;// [31:28] loop_x (partWidth-1)
+    ;// [27:24] loop_y (partHeight-1)
+    ;// [23:20] partWidth-1
+    ;// [19:16] partHeight-1
+    ;// [15:00] width
+    MOV     count, width
+    SUB     partW, partW, #1;
+    SUB     partH, partH, #1;
+    ADD     tmp2, partH, partW, LSL #4
+    ADD     count, count, tmp2, LSL #16
+
+
+    LDR     mult_20_01, = 0x00140001
+    LDR     mult_20_m5, = 0x0014FFFB
+    MOV     plus16, #16
+    AND     tmp1, count, #0x000F0000    ;// partHeight-1
+    AND     tmp3, count, #0x00F00000    ;// partWidth-1
+    ADD     count, count, tmp1, LSL #8
+loop_y
+    LDR     x_3_1, [ref, #-8]
+    ADD     count, count, tmp3, LSL #8
+    LDR     x_7_5, [ref, #-4]
+    UXTB16  x_2_0, x_3_1
+    UXTB16  x_3_1, x_3_1, ROR #8
+    UXTB16  x_6_4, x_7_5
+
+loop_x
+    UXTB16  x_7_5, x_7_5, ROR #8
+
+    SMLAD   tmp1, x_2_0, mult_20_01, plus16
+    SMLATB  tmp3, x_2_0, mult_20_01, plus16
+    SMLATB  tmp2, x_2_0, mult_20_m5, plus16
+    SMLATB  tmp4, x_3_1, mult_20_01, plus16
+
+    SMLAD   tmp1, x_3_1, mult_20_m5, tmp1
+    SMLATB  tmp3, x_3_1, mult_20_m5, tmp3
+    SMLAD   tmp2, x_3_1, mult_20_01, tmp2
+    LDR     x_3_1, [ref], #4
+    SMLAD   tmp4, x_6_4, mult_20_m5, tmp4
+
+    SMLABB  tmp1, x_6_4, mult_20_m5, tmp1
+    SMLADX  tmp3, x_6_4, mult_20_m5, tmp3
+    SMLADX  tmp2, x_6_4, mult_20_01, tmp2
+    SMLADX  tmp4, x_7_5, mult_20_m5, tmp4
+
+    SMLABB  tmp1, x_7_5, mult_20_01, tmp1
+    UXTB16  x_2_0, x_3_1
+    SMLABB  tmp2, x_7_5, mult_20_m5, tmp2
+    SMLADX  tmp3, x_7_5, mult_20_01, tmp3
+    SMLABB  tmp4, x_2_0, mult_20_01, tmp4
+
+    MOV     tmp2, tmp2, ASR #5
+    MOV     tmp1, tmp1, ASR #5
+    PKHBT   tmp2, tmp2, tmp4, LSL #(16-5)
+    PKHBT   tmp1, tmp1, tmp3, LSL #(16-5)
+    LDR     tmp4, [sp, #0x228]
+    USAT16  tmp2, #8, tmp2
+    USAT16  tmp1, #8, tmp1
+    SUB     tmp4, tmp4, #10
+
+    SUBS    count, count, #4<<28
+    LDR     tmp3, [ref, tmp4]
+    ORR     tmp1, tmp1, tmp2, LSL #8
+
+;// quarter pel position
+    LDR     tmp2, = 0x80808080
+    MVN     tmp3, tmp3
+    UHSUB8  tmp1, tmp1, tmp3
+    EOR     tmp1, tmp1, tmp2
+    STR     tmp1, [mb], #4
+
+    BCC     next_y
+
+    UXTB16  x_3_1, x_3_1, ROR #8
+
+    SMLAD   tmp1, x_6_4, mult_20_01, plus16
+    SMLATB  tmp3, x_6_4, mult_20_01, plus16
+    SMLATB  tmp2, x_6_4, mult_20_m5, plus16
+    SMLATB  tmp4, x_7_5, mult_20_01, plus16
+
+    SMLAD   tmp1, x_7_5, mult_20_m5, tmp1
+    SMLATB  tmp3, x_7_5, mult_20_m5, tmp3
+    SMLAD   tmp2, x_7_5, mult_20_01, tmp2
+    LDR     x_7_5, [ref], #4
+    SMLAD   tmp4, x_2_0, mult_20_m5, tmp4
+
+    SMLABB  tmp1, x_2_0, mult_20_m5, tmp1
+    SMLADX  tmp3, x_2_0, mult_20_m5, tmp3
+    SMLADX  tmp2, x_2_0, mult_20_01, tmp2
+    SMLADX  tmp4, x_3_1, mult_20_m5, tmp4
+
+    SMLABB  tmp1, x_3_1, mult_20_01, tmp1
+    UXTB16  x_6_4, x_7_5
+    SMLABB  tmp2, x_3_1, mult_20_m5, tmp2
+    SMLADX  tmp3, x_3_1, mult_20_01, tmp3
+    SMLABB  tmp4, x_6_4, mult_20_01, tmp4
+
+    MOV     tmp2, tmp2, ASR #5
+    MOV     tmp1, tmp1, ASR #5
+    PKHBT   tmp2, tmp2, tmp4, LSL #(16-5)
+    PKHBT   tmp1, tmp1, tmp3, LSL #(16-5)
+    LDR     tmp4, [sp, #0x228]
+    USAT16  tmp2, #8, tmp2
+    USAT16  tmp1, #8, tmp1
+    SUB     tmp4, tmp4, #10
+
+    SUBS    count, count, #4<<28
+    LDR     tmp3, [ref, tmp4]
+    ORR     tmp1, tmp1, tmp2, LSL #8
+
+;// quarter pel
+    LDR     tmp2, = 0x80808080
+    MVN     tmp3, tmp3
+    UHSUB8  tmp1, tmp1, tmp3
+    EOR     tmp1, tmp1, tmp2
+
+    STR     tmp1, [mb], #4
+    BCS     loop_x
+
+next_y
+    AND     tmp3, count, #0x00F00000    ;// partWidth-1
+    SMLABB  ref, count, mult_20_01, ref ;// +width
+    ADDS    mb, mb, #16                 ;// +16, Carry=0
+    SBC     mb, mb, tmp3, LSR #20       ;// -(partWidth-1)-1
+    SBC     ref, ref, tmp3, LSR #20     ;// -(partWidth-1)-1
+    ADDS    count, count, #(1<<28)-(1<<24)
+    BGE     loop_y
+
+    ADD     sp,sp,#0x1f4
+    LDMFD   sp!, {r4-r11, pc}
+
+    END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_ver_quarter.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_ver_quarter.s
new file mode 100755
index 0000000..1c79b39
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_ver_quarter.s
@@ -0,0 +1,536 @@
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;      http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+;-------------------------------------------------------------------------------
+;--
+;-- Abstract : ARMv6 optimized version of h264bsdInterpolateHorVerQuarter 
+;--            function
+;--
+;-------------------------------------------------------------------------------
+
+
+    IF :DEF: H264DEC_WINASM
+        ;// We dont use REQUIRE8 and PRESERVE8 for winasm
+    ELSE
+        REQUIRE8
+        PRESERVE8
+    ENDIF
+
+    AREA    |.text|, CODE
+
+;// h264bsdInterpolateHorVerQuarter register allocation
+
+ref     RN 0
+
+mb      RN 1
+buff    RN 1
+
+count   RN 2
+x0      RN 2
+
+y0      RN 3
+x_2_0   RN 3
+res     RN 3
+
+x_3_1   RN 4
+tmp1    RN 4
+
+height  RN 5
+x_6_4   RN 5
+tmp2    RN 5
+
+partW   RN 6
+x_7_5   RN 6
+tmp3    RN 6
+
+partH   RN 7
+tmp4    RN 7
+
+tmp5    RN 8
+
+tmp6    RN 9
+
+tmpa    RN 10
+
+mult_20_01  RN 11
+tmpb        RN 11
+
+mult_20_m5  RN 12
+width       RN 12
+
+plus16  RN 14
+
+
+;// function exports and imports
+
+    IMPORT  h264bsdFillBlock
+
+    EXPORT  h264bsdInterpolateHorVerQuarter
+
+;// Horizontal filter approach
+;//
+;// Basic idea in horizontal filtering is to adjust coefficients
+;// like below. Calculation is done with 16-bit maths.
+;//
+;// Reg     x_2_0     x_3_1     x_6_4     x_7_5     x_2_0
+;//       [  2  0 ] [  3  1 ] [  6  4 ] [  7  5 ] [ 10  8 ] ...
+;// y_0 =   20  1     20 -5        -5         1
+;// y_1 =   -5        20  1      1 20        -5
+;// y_2 =    1        -5        -5 20      1 20
+;// y_3 =              1        20 -5     -5 20         1
+
+
+h264bsdInterpolateHorVerQuarter
+    STMFD   sp!, {r0-r11, lr}
+    SUB     sp, sp, #0x1e4
+
+    CMP     x0, #0
+    BLT     do_fill                 ;// (x0 < 0)
+    LDR     partW, [sp,#0x220]      ;// partWidth
+    LDR     width, [sp,#0x218]      ;// width
+    ADD     tmpa, x0, partW         ;// (x0+partWidth)
+    ADD     tmpa, tmpa, #5          ;// (x0+partW+5)
+    CMP     tmpa, width
+    BHI     do_fill                 ;// (x0+partW)>width
+
+    CMP     y0, #0
+    BLT     do_fill                 ;// (y0 < 0)
+    LDR     partH, [sp,#0x224]      ;// partHeight
+    LDR     height, [sp,#0x21c]     ;// height
+    ADD     tmp5, y0, partH         ;// (y0+partHeight)
+    ADD     tmp5, tmp5, #5          ;// (y0+partH+5)
+    CMP     tmp5, height
+    BLS     skip_fill               ;// no overfill needed
+
+
+do_fill
+    LDR     partH, [sp,#0x224]      ;// partHeight
+    LDR     partW, [sp,#0x220]      ;// partWidth
+    LDR     height, [sp,#0x21c]     ;// height
+    ADD     tmp5, partH, #5         ;// tmp5 = partH + 5
+    ADD     tmpa, partW, #5         ;// tmpa = partW + 5
+    STMIB   sp, {height, tmpa}      ;// sp+4 = height, sp+8 = partWidth+5
+    LDR     width, [sp,#0x218]      ;// width
+    STR     tmp5, [sp,#0xc]         ;// sp+c = partHeight+5
+    STR     tmpa, [sp,#0x10]        ;// sp+10 = partWidth+5
+    STR     width, [sp,#0]          ;// sp+0 = width
+    ADD     buff, sp, #0x28         ;// buff = p1[21*21/4+1]
+    BL      h264bsdFillBlock
+
+    MOV     x0, #0
+    STR     x0,[sp,#0x1ec]          ;// x0 = 0
+    STR     x0,[sp,#0x1f0]          ;// y0 = 0
+    ADD     ref,sp,#0x28            ;// ref = p1
+    STR     tmpa, [sp,#0x218]       ;// width = partWidth+5
+
+
+skip_fill
+    LDR     x0 ,[sp,#0x1ec]         ;// x0
+    LDR     y0 ,[sp,#0x1f0]         ;// y0
+    LDR     width, [sp,#0x218]      ;// width
+    LDR     tmp6, [sp,#0x228]       ;// horVerOffset
+    LDR     mb, [sp, #0x1e8]        ;// mb
+    MLA     tmp5, width, y0, x0     ;// y0*width+x0
+    ADD     ref, ref, tmp5          ;// ref += y0*width+x0
+    STR     ref, [sp, #0x1e4]       ;// store "ref" for vertical filtering
+    AND     tmp6, tmp6, #2          ;// calculate ref for horizontal filter
+    MOV     tmpa, #2
+    ADD     tmp6, tmpa, tmp6, LSR #1
+    MLA     ref, tmp6, width, ref
+    ADD     ref, ref, #8            ;// ref = ref+8
+
+    ;// pack values to count register
+    ;// [31:28] loop_x (partWidth-1)
+    ;// [27:24] loop_y (partHeight-1)
+    ;// [23:20] partWidth-1
+    ;// [19:16] partHeight-1
+    ;// [15:00] width
+    MOV     count, width
+    SUB     partW, partW, #1;
+    SUB     partH, partH, #1;
+    ADD     tmp5, partH, partW, LSL #4
+    ADD     count, count, tmp5, LSL #16
+
+
+    LDR     mult_20_01, = 0x00140001    ;// constant multipliers
+    LDR     mult_20_m5, = 0x0014FFFB    ;// constant multipliers
+    MOV     plus16, #16                 ;// constant for add
+    AND     tmp4, count, #0x000F0000    ;// partHeight-1
+    AND     tmp6, count, #0x00F00000    ;// partWidth-1
+    ADD     count, count, tmp4, LSL #8  ;// partH-1 to lower part of top byte
+
+;// HORIZONTAL PART
+
+loop_y_hor
+    LDR     x_3_1, [ref, #-8]
+    ADD     count, count, tmp6, LSL #8   ;// partW-1 to upper part of top byte
+    LDR     x_7_5, [ref, #-4]
+    UXTB16  x_2_0, x_3_1
+    UXTB16  x_3_1, x_3_1, ROR #8
+    UXTB16  x_6_4, x_7_5
+
+loop_x_hor
+    UXTB16  x_7_5, x_7_5, ROR #8
+
+    SMLAD   tmp4, x_2_0, mult_20_01, plus16
+    SMLATB  tmp6, x_2_0, mult_20_01, plus16
+    SMLATB  tmp5, x_2_0, mult_20_m5, plus16
+    SMLATB  tmpa, x_3_1, mult_20_01, plus16
+
+    SMLAD   tmp4, x_3_1, mult_20_m5, tmp4
+    SMLATB  tmp6, x_3_1, mult_20_m5, tmp6
+    SMLAD   tmp5, x_3_1, mult_20_01, tmp5
+    LDR     x_3_1, [ref], #4
+    SMLAD   tmpa, x_6_4, mult_20_m5, tmpa
+
+    SMLABB  tmp4, x_6_4, mult_20_m5, tmp4
+    SMLADX  tmp6, x_6_4, mult_20_m5, tmp6
+    SMLADX  tmp5, x_6_4, mult_20_01, tmp5
+    SMLADX  tmpa, x_7_5, mult_20_m5, tmpa
+
+    SMLABB  tmp4, x_7_5, mult_20_01, tmp4
+    UXTB16  x_2_0, x_3_1
+    SMLABB  tmp5, x_7_5, mult_20_m5, tmp5
+    SMLADX  tmp6, x_7_5, mult_20_01, tmp6
+    SMLABB  tmpa, x_2_0, mult_20_01, tmpa
+
+    MOV     tmp5, tmp5, ASR #5
+    MOV     tmp4, tmp4, ASR #5
+    PKHBT   tmp5, tmp5, tmpa, LSL #(16-5)
+    PKHBT   tmp4, tmp4, tmp6, LSL #(16-5)
+    USAT16  tmp5, #8, tmp5
+    USAT16  tmp4, #8, tmp4
+
+    SUBS    count, count, #4<<28
+    ORR     tmp4, tmp4, tmp5, LSL #8
+    STR     tmp4, [mb], #4
+    BCC     next_y_hor
+
+    UXTB16  x_3_1, x_3_1, ROR #8
+
+    SMLAD   tmp4, x_6_4, mult_20_01, plus16
+    SMLATB  tmp6, x_6_4, mult_20_01, plus16
+    SMLATB  tmp5, x_6_4, mult_20_m5, plus16
+    SMLATB  tmpa, x_7_5, mult_20_01, plus16
+
+    SMLAD   tmp4, x_7_5, mult_20_m5, tmp4
+    SMLATB  tmp6, x_7_5, mult_20_m5, tmp6
+    SMLAD   tmp5, x_7_5, mult_20_01, tmp5
+    LDR     x_7_5, [ref], #4
+    SMLAD   tmpa, x_2_0, mult_20_m5, tmpa
+
+    SMLABB  tmp4, x_2_0, mult_20_m5, tmp4
+    SMLADX  tmp6, x_2_0, mult_20_m5, tmp6
+    SMLADX  tmp5, x_2_0, mult_20_01, tmp5
+    SMLADX  tmpa, x_3_1, mult_20_m5, tmpa
+
+    SMLABB  tmp4, x_3_1, mult_20_01, tmp4
+    UXTB16  x_6_4, x_7_5
+    SMLABB  tmp5, x_3_1, mult_20_m5, tmp5
+    SMLADX  tmp6, x_3_1, mult_20_01, tmp6
+    SMLABB  tmpa, x_6_4, mult_20_01, tmpa
+
+    MOV     tmp5, tmp5, ASR #5
+    MOV     tmp4, tmp4, ASR #5
+    PKHBT   tmp5, tmp5, tmpa, LSL #(16-5)
+    PKHBT   tmp4, tmp4, tmp6, LSL #(16-5)
+    USAT16  tmp5, #8, tmp5
+    USAT16  tmp4, #8, tmp4
+
+    SUBS    count, count, #4<<28
+    ORR     tmp4, tmp4, tmp5, LSL #8
+    STR     tmp4, [mb], #4
+    BCS     loop_x_hor
+
+next_y_hor
+    AND     tmp6, count, #0x00F00000        ;// partWidth-1
+    SMLABB  ref, count, mult_20_01, ref     ;// +width
+    ADDS    mb, mb, #16                     ;// +16, Carry=0
+    SBC     mb, mb, tmp6, LSR #20           ;// -(partWidth-1)-1
+    SBC     ref, ref, tmp6, LSR #20         ;// -(partWidth-1)-1
+    ADDS    count, count, #(1<<28)-(1<<24)  ;// decrement counter (partW)
+    BGE     loop_y_hor
+
+
+
+;// VERTICAL PART
+;//
+;// Approach to vertical interpolation
+;//
+;// Interpolation is done by using 32-bit loads and stores
+;// and by using 16 bit arithmetic. 4x4 block is processed
+;// in each round.
+;//
+;// |a_11|a_11|a_11|a_11|...|a_1n|a_1n|a_1n|a_1n|
+;// |b_11|b_11|b_11|b_11|...|b_1n|b_1n|b_1n|b_1n|
+;// |c_11|c_11|c_11|c_11|...|c_1n|c_1n|c_1n|c_1n|
+;// |d_11|d_11|d_11|d_11|...|d_1n|d_1n|d_1n|d_1n|
+;//           ..
+;//           ..
+;// |a_m1|a_m1|a_m1|a_m1|...
+;// |b_m1|b_m1|b_m1|b_m1|...
+;// |c_m1|c_m1|c_m1|c_m1|...
+;// |d_m1|d_m1|d_m1|d_m1|...
+
+;// Approach to bilinear interpolation to quarter pel position.
+;// 4 bytes are processed parallel
+;//
+;// algorithm (a+b+1)/2. Rouding upwards +1 can be achieved by 
+;// negating second operand to get one's complement (instead of 2's)
+;// and using subtraction, EOR is used to correct sign.
+;//
+;// MVN     b, b
+;// UHSUB8  a, a, b
+;// EOR     a, a, 0x80808080
+
+
+    LDR     ref, [sp, #0x1e4]           ;// ref
+    LDR     tmpa, [sp, #0x228]          ;// horVerOffset
+    LDR     mb, [sp, #0x1e8]            ;// mb
+    LDR     width, [sp, #0x218]         ;// width
+    ADD     ref, ref, #2                ;// calculate correct position
+    AND     tmpa, tmpa, #1
+    ADD     ref, ref, tmpa
+    LDR     plus16, = 0x00100010        ;// +16 to lower and upperf halfwords
+    AND     count, count, #0x00FFFFFF   ;// partWidth-1
+
+    AND     tmpa, count, #0x000F0000    ;// partHeight-1
+    ADD     count, count, tmpa, LSL #8
+
+loop_y
+    ADD     count, count, tmp6, LSL #8  ;// partWidth-1
+
+loop_x
+    LDR     tmp1, [ref], width     ;// |a4|a3|a2|a1|
+    LDR     tmp2, [ref], width     ;// |c4|c3|c2|c1|
+    LDR     tmp3, [ref], width     ;// |g4|g3|g2|g1|
+    LDR     tmp4, [ref], width     ;// |m4|m3|m2|m1|
+    LDR     tmp5, [ref], width     ;// |r4|r3|r2|r1|
+    LDR     tmp6, [ref], width     ;// |t4|t3|t2|t1|
+
+    ;// first four pixels 
+    UXTB16  tmpa, tmp3                  ;// |g3|g1|
+    UXTAB16 tmpa, tmpa, tmp4            ;// |g3+m3|g1+m1|
+    UXTB16  tmpb, tmp2                  ;// |c3|c1|
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+
+    UXTAB16 tmpb, tmpb, tmp5            ;// |c3+r3|c1+r1|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpa, tmpa, tmp1            ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp6            ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     res, = 0x00FF00FF
+    UXTB16  tmpa, tmp3, ROR #8          ;// |g4|g2|
+    UXTAB16 tmpa, tmpa, tmp4, ROR #8    ;// |g4+m4|g2+m2|
+    AND     res, res, tmpb, LSR #5      ;// mask and divide by 32
+
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTB16  tmpb, tmp2, ROR #8          ;// |c4|c2|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpb, tmpb, tmp5, ROR #8    ;// |c4+r4|c2+r2|
+    UXTAB16 tmpa, tmpa, tmp1, ROR #8    ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp6, ROR #8    ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     tmp1, [mb]
+    LDR     tmpa, = 0xFF00FF00
+    MVN     tmp1, tmp1
+    AND     tmpa, tmpa, tmpb, LSL #3    ;// mask and divede by 32
+    ORR     res, res, tmpa
+
+    LDR     tmpa, = 0x80808080
+    UHSUB8  res, res, tmp1              ;// bilinear interpolation
+    LDR     tmp1, [ref], width          ;// load next row
+    EOR     res, res, tmpa              ;// correct sign
+
+    STR     res, [mb], #16              ;// next row (mb)
+
+
+    ;// tmp2 = |a4|a3|a2|a1|
+    ;// tmp3 = |c4|c3|c2|c1|
+    ;// tmp4 = |g4|g3|g2|g1|
+    ;// tmp5 = |m4|m3|m2|m1|
+    ;// tmp6 = |r4|r3|r2|r1|
+    ;// tmp1 = |t4|t3|t2|t1|
+
+    ;// second four pixels
+    UXTB16  tmpa, tmp4                  ;// |g3|g1|
+    UXTAB16 tmpa, tmpa, tmp5            ;// |g3+m3|g1+m1|
+    UXTB16  tmpb, tmp3                  ;// |c3|c1|
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTAB16 tmpb, tmpb, tmp6            ;// |c3+r3|c1+r1|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpa, tmpa, tmp2            ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp1            ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     res, = 0x00FF00FF
+    UXTB16  tmpa, tmp4, ROR #8          ;// |g4|g2|
+    UXTAB16 tmpa, tmpa, tmp5, ROR #8    ;// |g4+m4|g2+m2|
+    AND     res, res, tmpb, LSR #5      ;// mask and divide by 32
+
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTB16  tmpb, tmp3, ROR #8          ;// |c4|c2|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpb, tmpb, tmp6, ROR #8    ;// |c4+r4|c2+r2|
+    UXTAB16 tmpa, tmpa, tmp2, ROR #8    ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp1, ROR #8    ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     tmp2, [mb]
+    LDR     tmpa, = 0xFF00FF00
+    MVN     tmp2, tmp2
+
+    AND     tmpa, tmpa, tmpb, LSL #3    ;// mask and divide by 32
+    ORR     res, res, tmpa
+    LDR     tmpa, = 0x80808080
+    UHSUB8  res, res, tmp2              ;// bilinear interpolation
+    LDR     tmp2, [ref], width          ;// load next row
+    EOR     res, res, tmpa              ;// correct sign
+    STR     res, [mb], #16              ;// next row
+
+    ;// tmp3 = |a4|a3|a2|a1|
+    ;// tmp4 = |c4|c3|c2|c1|
+    ;// tmp5 = |g4|g3|g2|g1|
+    ;// tmp6 = |m4|m3|m2|m1|
+    ;// tmp1 = |r4|r3|r2|r1|
+    ;// tmp2 = |t4|t3|t2|t1|
+
+    ;// third four pixels
+    UXTB16  tmpa, tmp5                  ;// |g3|g1|
+    UXTAB16 tmpa, tmpa, tmp6            ;// |g3+m3|g1+m1|
+    UXTB16  tmpb, tmp4                  ;// |c3|c1|
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTAB16 tmpb, tmpb, tmp1            ;// |c3+r3|c1+r1|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpa, tmpa, tmp3            ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp2            ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     res, = 0x00FF00FF
+    UXTB16  tmpa, tmp5, ROR #8          ;// |g4|g2|
+    UXTAB16 tmpa, tmpa, tmp6, ROR #8    ;// |g4+m4|g2+m2|
+    AND     res, res, tmpb, LSR #5      ;// mask and divide by 32
+
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTB16  tmpb, tmp4, ROR #8          ;// |c4|c2|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpb, tmpb, tmp1, ROR #8    ;// |c4+r4|c2+r2|
+    UXTAB16 tmpa, tmpa, tmp3, ROR #8    ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp2, ROR #8    ;// 16+20(G+M)+A+T
+
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     tmp3, [mb]
+    LDR     tmpa, = 0xFF00FF00
+    MVN     tmp3, tmp3
+
+    AND     tmpa, tmpa, tmpb, LSL #3    ;// mask and divide by 32
+    ORR     res, res, tmpa
+    LDR     tmpa, = 0x80808080
+    UHSUB8  res, res, tmp3              ;// bilinear interpolation
+    LDR     tmp3, [ref]                 ;// load next row
+    EOR     res, res, tmpa              ;// correct sign
+    STR     res, [mb], #16              ;// next row
+
+    ;// tmp4 = |a4|a3|a2|a1|
+    ;// tmp5 = |c4|c3|c2|c1|
+    ;// tmp6 = |g4|g3|g2|g1|
+    ;// tmp1 = |m4|m3|m2|m1|
+    ;// tmp2 = |r4|r3|r2|r1|
+    ;// tmp3 = |t4|t3|t2|t1|
+
+    ;// fourth four pixels
+    UXTB16  tmpa, tmp6                  ;// |g3|g1|
+    UXTAB16 tmpa, tmpa, tmp1            ;// |g3+m3|g1+m1|
+    UXTB16  tmpb, tmp5                  ;// |c3|c1|
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTAB16 tmpb, tmpb, tmp2            ;// |c3+r3|c1+r1|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpa, tmpa, tmp4            ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp3            ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     res, = 0x00FF00FF
+    UXTB16  tmpa, tmp6, ROR #8          ;// |g4|g2|
+    UXTAB16 tmpa, tmpa, tmp1, ROR #8    ;// |g4+m4|g2+m2|
+    AND     res, res, tmpb, LSR #5      ;// mask and divide by 32
+
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTB16  tmpb, tmp5, ROR #8          ;// |c4|c2|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpb, tmpb, tmp2, ROR #8    ;// |c4+r4|c2+r2|
+    UXTAB16 tmpa, tmpa, tmp4, ROR #8    ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp3, ROR #8    ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     tmp5, [mb]
+    LDR     tmp4, = 0xFF00FF00
+    MVN     tmp5, tmp5
+
+    AND     tmpa, tmp4, tmpb, LSL #3    ;// mask and divide by 32
+    ORR     res, res, tmpa
+    LDR     tmpa, = 0x80808080
+    UHSUB8  res, res, tmp5              ;// bilinear interpolation
+
+    ;// decrement loop_x counter
+    SUBS    count, count, #4<<28        ;// decrement x loop counter
+
+    ;// calculate "ref" address for next round
+    SUB     ref, ref, width, LSL #3     ;// ref -= 8*width;
+    ADD     ref, ref, #4                ;// next column (4 pixels)
+
+    EOR     res, res, tmpa              ;// correct sign
+    STR     res, [mb], #-44
+
+    BCS     loop_x
+
+    ADDS    mb, mb, #64                 ;// set Carry=0
+    ADD     ref, ref, width, LSL #2     ;// ref += 4*width
+    AND     tmp6, count, #0x00F00000    ;// partWidth-1
+    SBC     ref, ref, tmp6, LSR #20     ;// -(partWidth-1)-1
+    SBC     mb, mb, tmp6, LSR #20       ;// -(partWidth-1)-1
+
+    ADDS    count, count, #0xC << 24    ;// decrement y loop counter
+    BGE     loop_y
+
+    ADD     sp, sp, #0x1f4
+    LDMFD   sp!, {r4-r11, pc}
+
+    END
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_mid_hor.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_mid_hor.s
new file mode 100755
index 0000000..a81aed7
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_mid_hor.s
@@ -0,0 +1,163 @@
+; Copyright (C) 2009 The Android Open Source Project

+;

+; Licensed under the Apache License, Version 2.0 (the "License");

+; you may not use this file except in compliance with the License.

+; You may obtain a copy of the License at

+;

+;      http://www.apache.org/licenses/LICENSE-2.0

+;

+; Unless required by applicable law or agreed to in writing, software

+; distributed under the License is distributed on an "AS IS" BASIS,

+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+; See the License for the specific language governing permissions and

+; limitations under the License.

+

+;-------------------------------------------------------------------------------

+;--

+;-- Abstract : ARMv6 optimized version horizontal part of 

+;--            h264bsdInterpolateMid functions

+;--

+;-------------------------------------------------------------------------------

+

+

+    IF :DEF: H264DEC_WINASM

+        ;// We dont use REQUIRE8 and PRESERVE8 for winasm

+    ELSE

+        REQUIRE8

+        PRESERVE8

+    ENDIF

+

+    AREA    |.text|, CODE

+

+

+;// Register allocation

+

+ref     RN 0    ;// pointer to current position in reference image

+mb      RN 1    ;// pointer to current position in interpolated mb

+count   RN 2    ;// bit-packed width and count values

+

+x_2_0   RN 4

+x_3_1   RN 5

+x_6_4   RN 6

+x_7_5   RN 7

+

+tmp1    RN 8

+tmp2    RN 9

+tmp3    RN 10

+tmp4    RN 11

+

+mult_20_01  RN 12   ;// [20,  1]

+mult_20_m5  RN 14   ;// [20, -5]

+

+

+        EXPORT  h264bsdInterpolateMidHorPart

+

+;// Horizontal filter approach

+;//

+;// Basic idea in horizontal filtering is to adjust coefficients

+;// like below. Calculation is done with 16-bit maths.

+;//

+;// Reg     x_2_0     x_3_1     x_6_4     x_7_5     x_2_0

+;//       [  2  0 ] [  3  1 ] [  6  4 ] [  7  5 ] [ 10  8 ] ...

+;// y_0 =   20  1     20 -5        -5         1

+;// y_1 =   -5        20  1      1 20        -5

+;// y_2 =    1        -5        -5 20      1 20

+;// y_3 =              1        20 -5     -5 20         1

+

+

+h264bsdInterpolateMidHorPart

+    STMFD   sp!, {r4-r11, lr}

+

+    ;// pack values to count register

+    ;// [31:28] loop_x (partWidth-1)

+    ;// [27:24] loop_y (partHeight-1)

+    ;// [23:20] partWidth-1

+    ;// [19:16] partHeight-1

+    ;// [15:00] width

+

+

+    LDR     mult_20_01, = 0x00140001

+    LDR     mult_20_m5, = 0x0014FFFB

+    AND     tmp3, count, #0x000F0000    ;// partWidth-1

+loop_y

+    LDR     x_3_1, [ref, #-8]

+    ADD     count, count, tmp3, LSL #12

+    LDR     x_7_5, [ref, #-4]

+    UXTB16  x_2_0, x_3_1

+    UXTB16  x_3_1, x_3_1, ROR #8

+    UXTB16  x_6_4, x_7_5

+

+loop_x

+    UXTB16  x_7_5, x_7_5, ROR #8

+

+    SMUAD   tmp1, x_2_0, mult_20_01

+    SMULTB  tmp2, x_2_0, mult_20_m5

+    SMULTB  tmp3, x_2_0, mult_20_01

+    SMULTB  tmp4, x_3_1, mult_20_01

+

+    SMLAD   tmp1, x_3_1, mult_20_m5, tmp1

+    SMLAD   tmp2, x_3_1, mult_20_01, tmp2

+    SMLATB  tmp3, x_3_1, mult_20_m5, tmp3

+    LDR     x_3_1, [ref], #4

+    SMLAD   tmp4, x_6_4, mult_20_m5, tmp4

+

+    SMLABB  tmp1, x_6_4, mult_20_m5, tmp1

+    SMLADX  tmp2, x_6_4, mult_20_01, tmp2

+    SMLADX  tmp3, x_6_4, mult_20_m5, tmp3

+    SMLADX  tmp4, x_7_5, mult_20_m5, tmp4

+

+    SMLABB  tmp1, x_7_5, mult_20_01, tmp1

+    SMLABB  tmp2, x_7_5, mult_20_m5, tmp2

+    UXTB16  x_2_0, x_3_1

+    SMLADX  tmp3, x_7_5, mult_20_01, tmp3

+    SMLABB  tmp4, x_2_0, mult_20_01, tmp4

+

+    SUBS    count, count, #4<<28

+    STR     tmp1, [mb], #4

+    STR     tmp2, [mb], #4

+    STR     tmp3, [mb], #4

+    STR     tmp4, [mb], #4

+    BCC     next_y

+

+    UXTB16  x_3_1, x_3_1, ROR #8

+

+    SMUAD   tmp1, x_6_4, mult_20_01

+    SMULTB  tmp2, x_6_4, mult_20_m5

+    SMULTB  tmp3, x_6_4, mult_20_01

+    SMULTB  tmp4, x_7_5, mult_20_01

+

+    SMLAD   tmp1, x_7_5, mult_20_m5, tmp1

+    SMLAD   tmp2, x_7_5, mult_20_01, tmp2

+    SMLATB  tmp3, x_7_5, mult_20_m5, tmp3

+    LDR     x_7_5, [ref], #4

+    SMLAD   tmp4, x_2_0, mult_20_m5, tmp4

+

+    SMLABB  tmp1, x_2_0, mult_20_m5, tmp1

+    SMLADX  tmp2, x_2_0, mult_20_01, tmp2

+    SMLADX  tmp3, x_2_0, mult_20_m5, tmp3

+    SMLADX  tmp4, x_3_1, mult_20_m5, tmp4

+

+    SMLABB  tmp1, x_3_1, mult_20_01, tmp1

+    SMLABB  tmp2, x_3_1, mult_20_m5, tmp2

+    UXTB16  x_6_4, x_7_5

+    SMLADX  tmp3, x_3_1, mult_20_01, tmp3

+    SMLABB  tmp4, x_6_4, mult_20_01, tmp4

+

+    SUBS    count, count, #4<<28

+    STR     tmp1, [mb], #4

+    STR     tmp2, [mb], #4

+    STR     tmp3, [mb], #4

+    STR     tmp4, [mb], #4

+    BCS     loop_x

+

+next_y

+    AND     tmp3, count, #0x000F0000    ;// partWidth-1

+    SMLABB  ref, count, mult_20_01, ref   ;// +width

+    SBC     ref, ref, tmp3, LSR #16   ;// -(partWidth-1)-1

+    ADDS    count, count, #(1<<28)-(1<<20)

+    BGE     loop_y

+

+    LDMFD   sp!, {r4-r11, pc}

+

+    END

+

diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_half.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_half.s
new file mode 100755
index 0000000..244fc6f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_half.s
@@ -0,0 +1,347 @@
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;      http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+;-------------------------------------------------------------------------------
+;--
+;-- Abstract : ARMv6 optimized version of h264bsdInterpolateVerHalf function
+;--
+;-------------------------------------------------------------------------------
+
+
+    IF :DEF: H264DEC_WINASM
+        ;// We dont use REQUIRE8 and PRESERVE8 for winasm
+    ELSE
+        REQUIRE8
+        PRESERVE8
+    ENDIF
+
+    AREA    |.text|, CODE
+
+;// h264bsdInterpolateVerHalf register allocation
+
+ref     RN 0
+
+mb      RN 1
+buff    RN 1
+
+count   RN 2
+x0      RN 2
+
+res     RN 3
+y0      RN 3
+
+tmp1    RN 4
+
+tmp2    RN 5
+height  RN 5
+
+tmp3    RN 6
+partW   RN 6
+
+tmp4    RN 7
+partH   RN 7
+
+tmp5    RN 8
+tmp6    RN 9
+
+tmpa    RN 10
+tmpb    RN 11
+width   RN 12
+
+plus16  RN 14
+
+
+;// function exports and imports
+
+    IMPORT  h264bsdFillBlock
+
+    EXPORT  h264bsdInterpolateVerHalf
+
+;// Approach to vertical interpolation
+;//
+;// Interpolation is done by using 32-bit loads and stores
+;// and by using 16 bit arithmetic. 4x4 block is processed
+;// in each round.
+;//
+;// |a_11|a_11|a_11|a_11|...|a_1n|a_1n|a_1n|a_1n|
+;// |b_11|b_11|b_11|b_11|...|b_1n|b_1n|b_1n|b_1n|
+;// |c_11|c_11|c_11|c_11|...|c_1n|c_1n|c_1n|c_1n|
+;// |d_11|d_11|d_11|d_11|...|d_1n|d_1n|d_1n|d_1n|
+;//           ..
+;//           ..
+;// |a_m1|a_m1|a_m1|a_m1|...
+;// |b_m1|b_m1|b_m1|b_m1|...
+;// |c_m1|c_m1|c_m1|c_m1|...
+;// |d_m1|d_m1|d_m1|d_m1|...
+
+h264bsdInterpolateVerHalf
+    STMFD   sp!, {r0-r11, lr}
+    SUB     sp, sp, #0x1e4
+
+    CMP     x0, #0
+    BLT     do_fill                 ;// (x0 < 0)
+    LDR     partW, [sp,#0x220]      ;// partWidth
+    ADD     tmp5, x0, partW         ;// (x0+partWidth)
+    LDR     width, [sp,#0x218]      ;// width
+    CMP     tmp5, width
+    BHI     do_fill                 ;// (x0+partW)>width
+
+    CMP     y0, #0
+    BLT     do_fill                 ;// (y0 < 0)
+    LDR     partH, [sp,#0x224]      ;// partHeight
+    ADD     tmp6, y0, partH         ;// (y0+partHeight)
+    ADD     tmp6, tmp6, #5          ;// (y0+partH+5)
+    LDR     height, [sp,#0x21c]     ;// height
+    CMP     tmp6, height
+    BLS     skip_fill               ;// no overfill needed
+
+
+do_fill
+    LDR     partH, [sp,#0x224]      ;// partHeight
+    ADD     tmp5, partH, #5         ;// r2 = partH + 5;
+    LDR     height, [sp,#0x21c]     ;// height
+    LDR     partW, [sp,#0x220]      ;// partWidth
+    STMIB   sp, {height, partW}     ;// sp+4 = height, sp+8 = partWidth
+    STR     tmp5, [sp,#0xc]         ;// sp+c partHeight+5
+    STR     partW, [sp,#0x10]       ;// sp+10 = partWidth
+    LDR     width, [sp,#0x218]      ;// width
+    STR     width, [sp,#0]          ;// sp+0 = width
+    ADD     buff, sp, #0x28         ;// buff = p1[21*21/4+1]
+    BL      h264bsdFillBlock
+
+    MOV     x0, #0
+    STR     x0,[sp,#0x1ec]          ;// x0 = 0
+    STR     x0,[sp,#0x1f0]          ;// y0 = 0
+    ADD     ref,sp,#0x28            ;// ref = p1
+    STR     partW, [sp,#0x218]
+
+
+skip_fill
+    LDR     x0 ,[sp,#0x1ec]         ;// x0
+    LDR     y0 ,[sp,#0x1f0]         ;// y0
+    LDR     width, [sp,#0x218]      ;// width
+    MLA     tmp6, width, y0, x0     ;// y0*width+x0
+    ADD     ref, ref, tmp6          ;// ref += y0*width+x0
+    LDR     mb, [sp, #0x1e8]        ;// mb
+
+    ADD     count, partW, partH, LSL #16    ;// |partH|partW|
+    LDR     tmp5, = 0x00010001
+    SSUB16  count, count, tmp5;     ;// |partH-1|partW-1|
+    LDR     plus16, = 0x00100010
+
+    AND     tmp1, count, #0x000000FF ;// partWidth
+
+
+loop_y
+    ADD     count, count, tmp1, LSL #24  ;// partWidth-1 to top byte
+
+loop_x
+    LDR     tmp1, [ref], width     ;// |a4|a3|a2|a1|
+    LDR     tmp2, [ref], width     ;// |c4|c3|c2|c1|
+    LDR     tmp3, [ref], width     ;// |g4|g3|g2|g1|
+    LDR     tmp4, [ref], width     ;// |m4|m3|m2|m1|
+    LDR     tmp5, [ref], width     ;// |r4|r3|r2|r1|
+    LDR     tmp6, [ref], width     ;// |t4|t3|t2|t1|
+
+    ;// first four pixels
+    UXTB16  tmpa, tmp3                  ;// |g3|g1|
+    UXTAB16 tmpa, tmpa, tmp4            ;// |g3+m3|g1+m1|
+    UXTB16  tmpb, tmp2                  ;// |c3|c1|
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+
+    UXTAB16 tmpb, tmpb, tmp5            ;// |c3+r3|c1+r1|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpa, tmpa, tmp1            ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp6            ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     res, = 0x00FF00FF
+    UXTB16  tmpa, tmp3, ROR #8          ;// |g4|g2|
+    UXTAB16 tmpa, tmpa, tmp4, ROR #8    ;// |g4+m4|g2+m2|
+    AND     res, res, tmpb, LSR #5      ;// mask and divide by 32
+
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTB16  tmpb, tmp2, ROR #8          ;// |c4|c2|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpb, tmpb, tmp5, ROR #8    ;// |c4+r4|c2+r2|
+    UXTAB16 tmpa, tmpa, tmp1, ROR #8    ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp6, ROR #8    ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     tmp1, [ref], width
+    LDR     tmpa, = 0xFF00FF00
+
+    AND     tmpa, tmpa, tmpb, LSL #3    ;// mask and divede by 32
+    ORR     res, res, tmpa
+    STR     res, [mb], #16              ;// next row (mb)
+
+    ;// tmp2 = |a4|a3|a2|a1|
+    ;// tmp3 = |c4|c3|c2|c1|
+    ;// tmp4 = |g4|g3|g2|g1|
+    ;// tmp5 = |m4|m3|m2|m1|
+    ;// tmp6 = |r4|r3|r2|r1|
+    ;// tmp1 = |t4|t3|t2|t1|
+
+    ;// second four pixels
+    UXTB16  tmpa, tmp4                  ;// |g3|g1|
+    UXTAB16 tmpa, tmpa, tmp5            ;// |g3+m3|g1+m1|
+    UXTB16  tmpb, tmp3                  ;// |c3|c1|
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTAB16 tmpb, tmpb, tmp6            ;// |c3+r3|c1+r1|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpa, tmpa, tmp2            ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp1            ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     res, = 0x00FF00FF
+    UXTB16  tmpa, tmp4, ROR #8          ;// |g4|g2|
+    UXTAB16 tmpa, tmpa, tmp5, ROR #8    ;// |g4+m4|g2+m2|
+    AND     res, res, tmpb, LSR #5      ;// mask and divide by 32
+
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTB16  tmpb, tmp3, ROR #8          ;// |c4|c2|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpb, tmpb, tmp6, ROR #8    ;// |c4+r4|c2+r2|
+    UXTAB16 tmpa, tmpa, tmp2, ROR #8    ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp1, ROR #8    ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     tmp2, [ref], width
+    LDR     tmpa, = 0xFF00FF00
+
+    AND     tmpa, tmpa, tmpb, LSL #3    ;// mask and divide by 32
+    ORR     res, res, tmpa
+    STR     res, [mb], #16              ;// next row
+
+    ;// tmp3 = |a4|a3|a2|a1|
+    ;// tmp4 = |c4|c3|c2|c1|
+    ;// tmp5 = |g4|g3|g2|g1|
+    ;// tmp6 = |m4|m3|m2|m1|
+    ;// tmp1 = |r4|r3|r2|r1|
+    ;// tmp2 = |t4|t3|t2|t1|
+
+    ;// third four pixels
+    UXTB16  tmpa, tmp5                  ;// |g3|g1|
+    UXTAB16 tmpa, tmpa, tmp6            ;// |g3+m3|g1+m1|
+    UXTB16  tmpb, tmp4                  ;// |c3|c1|
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTAB16 tmpb, tmpb, tmp1            ;// |c3+r3|c1+r1|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpa, tmpa, tmp3            ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp2            ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     res, = 0x00FF00FF
+    UXTB16  tmpa, tmp5, ROR #8          ;// |g4|g2|
+    UXTAB16 tmpa, tmpa, tmp6, ROR #8    ;// |g4+m4|g2+m2|
+    AND     res, res, tmpb, LSR #5      ;// mask and divide by 32
+
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTB16  tmpb, tmp4, ROR #8          ;// |c4|c2|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpb, tmpb, tmp1, ROR #8    ;// |c4+r4|c2+r2|
+    UXTAB16 tmpa, tmpa, tmp3, ROR #8    ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp2, ROR #8    ;// 16+20(G+M)+A+T
+
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     tmp3, [ref]
+    LDR     tmpa, = 0xFF00FF00
+
+    ;// decrement loop_x counter
+    SUBS    count, count, #4<<24        ;// (partWidth-1) -= 4;
+
+    AND     tmpa, tmpa, tmpb, LSL #3    ;// mask and divide by 32
+    ORR     res, res, tmpa
+    STR     res, [mb], #16              ;// next row
+
+    ;// tmp4 = |a4|a3|a2|a1|
+    ;// tmp5 = |c4|c3|c2|c1|
+    ;// tmp6 = |g4|g3|g2|g1|
+    ;// tmp1 = |m4|m3|m2|m1|
+    ;// tmp2 = |r4|r3|r2|r1|
+    ;// tmp3 = |t4|t3|t2|t1|
+
+    ;// fourth four pixels
+    UXTB16  tmpa, tmp6                  ;// |g3|g1|
+    UXTAB16 tmpa, tmpa, tmp1            ;// |g3+m3|g1+m1|
+    UXTB16  tmpb, tmp5                  ;// |c3|c1|
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTAB16 tmpb, tmpb, tmp2            ;// |c3+r3|c1+r1|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpa, tmpa, tmp4            ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp3            ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     res, = 0x00FF00FF
+    UXTB16  tmpa, tmp6, ROR #8          ;// |g4|g2|
+    UXTAB16 tmpa, tmpa, tmp1, ROR #8    ;// |g4+m4|g2+m2|
+    AND     res, res, tmpb, LSR #5      ;// mask and divide by 32
+
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTB16  tmpb, tmp5, ROR #8          ;// |c4|c2|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpb, tmpb, tmp2, ROR #8    ;// |c4+r4|c2+r2|
+    UXTAB16 tmpa, tmpa, tmp4, ROR #8    ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp3, ROR #8    ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     tmp4, = 0xFF00FF00
+
+    ;// calculate "ref" address for next round
+    SUB     ref, ref, width, LSL #3     ;// ref -= 8*width;
+    ADD     ref, ref, #4;               ;// next column (4 pixels)
+    AND     tmpa, tmp4, tmpb, LSL #3    ;// mask and divide by 32
+    ORR     res, res, tmpa
+    STR     res, [mb], #-44
+
+    BCS     loop_x
+
+    ADDS    count, count, #252<<16      ;// (partHeight-1) -= 4;
+    ADD     ref, ref, width, LSL #2     ;// ref += 4*width
+    AND     tmp1, count, #0x000000FF    ;// partWidth-1
+    ADD     tmp2, tmp1, #1              ;// partWidth
+    SUB     ref, ref, tmp2              ;// ref -= partWidth
+    ADD     mb, mb, #64;
+    SUB     mb, mb, tmp2;               ;// mb -= partWidth
+    BGE     loop_y
+
+    ADD     sp,sp,#0x1f4
+    LDMFD   sp!, {r4-r11, pc}
+
+    END
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_quarter.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_quarter.s
new file mode 100755
index 0000000..5266c85
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_quarter.s
@@ -0,0 +1,374 @@
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;      http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+;-------------------------------------------------------------------------------
+;--
+;-- Abstract : ARMv6 optimized version of h264bsdInterpolateVerQuarter function
+;--
+;-------------------------------------------------------------------------------
+
+    IF :DEF: H264DEC_WINASM
+        ;// We dont use REQUIRE8 and PRESERVE8 for winasm
+    ELSE
+        REQUIRE8
+        PRESERVE8
+    ENDIF
+
+    AREA    |.text|, CODE
+
+;// h264bsdInterpolateVerQuarter register allocation
+
+ref     RN 0
+
+mb      RN 1
+buff    RN 1
+
+count   RN 2
+x0      RN 2
+
+res     RN 3
+y0      RN 3
+
+tmp1    RN 4
+
+tmp2    RN 5
+height  RN 5
+
+tmp3    RN 6
+partW   RN 6
+
+tmp4    RN 7
+partH   RN 7
+
+tmp5    RN 8
+tmp6    RN 9
+
+tmpa    RN 10
+tmpb    RN 11
+width   RN 12
+
+plus16  RN 14
+
+
+;// function exports and imports
+
+    IMPORT  h264bsdFillBlock
+
+    EXPORT  h264bsdInterpolateVerQuarter
+
+;// Approach to vertical interpolation
+;//
+;// Interpolation is done by using 32-bit loads and stores
+;// and by using 16 bit arithmetic. 4x4 block is processed
+;// in each round.
+;//
+;// |a_11|a_11|a_11|a_11|...|a_1n|a_1n|a_1n|a_1n|
+;// |b_11|b_11|b_11|b_11|...|b_1n|b_1n|b_1n|b_1n|
+;// |c_11|c_11|c_11|c_11|...|c_1n|c_1n|c_1n|c_1n|
+;// |d_11|d_11|d_11|d_11|...|d_1n|d_1n|d_1n|d_1n|
+;//           ..
+;//           ..
+;// |a_m1|a_m1|a_m1|a_m1|...
+;// |b_m1|b_m1|b_m1|b_m1|...
+;// |c_m1|c_m1|c_m1|c_m1|...
+;// |d_m1|d_m1|d_m1|d_m1|...
+
+h264bsdInterpolateVerQuarter
+    STMFD   sp!, {r0-r11, lr}
+    SUB     sp, sp, #0x1e4
+
+    CMP     x0, #0
+    BLT     do_fill                 ;// (x0 < 0)
+    LDR     partW, [sp,#0x220]      ;// partWidth
+    ADD     tmp5, x0, partW         ;// (x0+partWidth)
+    LDR     width, [sp,#0x218]      ;// width
+    CMP     tmp5, width
+    BHI     do_fill                 ;// (x0+partW)>width
+
+    CMP     y0, #0
+    BLT     do_fill                 ;// (y0 < 0)
+    LDR     partH, [sp,#0x224]      ;// partHeight
+    ADD     tmp6, y0, partH         ;// (y0+partHeight)
+    ADD     tmp6, tmp6, #5          ;// (y0+partH+5)
+    LDR     height, [sp,#0x21c]     ;// height
+    CMP     tmp6, height
+    BLS     skip_fill               ;// no overfill needed
+
+
+do_fill
+    LDR     partH, [sp,#0x224]      ;// partHeight
+    ADD     tmp5, partH, #5         ;// r2 = partH + 5;
+    LDR     height, [sp,#0x21c]     ;// height
+    LDR     partW, [sp,#0x220]      ;// partWidth
+    STMIB   sp, {height, partW}     ;// sp+4 = height, sp+8 = partWidth
+    STR     tmp5, [sp,#0xc]         ;// sp+c partHeight+5
+    STR     partW, [sp,#0x10]       ;// sp+10 = partWidth
+    LDR     width, [sp,#0x218]      ;// width
+    STR     width, [sp,#0]          ;// sp+0 = width
+    ADD     buff, sp, #0x28         ;// buff = p1[21*21/4+1]
+    BL      h264bsdFillBlock
+
+    MOV     x0, #0
+    STR     x0,[sp,#0x1ec]          ;// x0 = 0
+    STR     x0,[sp,#0x1f0]          ;// y0 = 0
+    ADD     ref,sp,#0x28            ;// ref = p1
+    STR     partW, [sp,#0x218]
+
+
+skip_fill
+    LDR     x0 ,[sp,#0x1ec]         ;// x0
+    LDR     y0 ,[sp,#0x1f0]         ;// y0
+    LDR     width, [sp,#0x218]      ;// width
+    MLA     tmp6, width, y0, x0     ;// y0*width+x0
+    ADD     ref, ref, tmp6          ;// ref += y0*width+x0
+    LDR     mb, [sp, #0x1e8]        ;// mb
+
+    ADD     count, partW, partH, LSL #8    ;// |xx|xx|partH|partW|
+    LDR     tmp5, = 0x00010100
+    RSB     count, tmp5, count, LSL #8      ;// |xx|partH-1|partW-1|xx|
+    LDR     tmp2, [sp, #0x228]      ;// verOffset
+    ADD     count, count, tmp2      ;// |xx|partH-1|partW-1|verOffset|
+    LDR     plus16, = 0x00100010
+
+    AND     tmp1, count, #0x0000FF00 ;// partWidth
+
+
+loop_y
+    ADD     count, count, tmp1, LSL #16  ;// partWidth-1 to top byte
+
+loop_x
+    LDR     tmp1, [ref], width     ;// |a4|a3|a2|a1|
+    LDR     tmp2, [ref], width     ;// |c4|c3|c2|c1|
+    LDR     tmp3, [ref], width     ;// |g4|g3|g2|g1|
+    LDR     tmp4, [ref], width     ;// |m4|m3|m2|m1|
+    LDR     tmp5, [ref], width     ;// |r4|r3|r2|r1|
+    LDR     tmp6, [ref], width     ;// |t4|t3|t2|t1|
+
+    ;// first four pixels 
+    UXTB16  tmpa, tmp3                  ;// |g3|g1|
+    UXTAB16 tmpa, tmpa, tmp4            ;// |g3+m3|g1+m1|
+    UXTB16  tmpb, tmp2                  ;// |c3|c1|
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+
+    UXTAB16 tmpb, tmpb, tmp5            ;// |c3+r3|c1+r1|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpa, tmpa, tmp1            ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp6            ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     res, = 0x00FF00FF
+    UXTB16  tmpa, tmp3, ROR #8          ;// |g4|g2|
+    UXTAB16 tmpa, tmpa, tmp4, ROR #8    ;// |g4+m4|g2+m2|
+    AND     res, res, tmpb, LSR #5      ;// mask and divide by 32
+
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTB16  tmpb, tmp2, ROR #8          ;// |c4|c2|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpb, tmpb, tmp5, ROR #8    ;// |c4+r4|c2+r2|
+    UXTAB16 tmpa, tmpa, tmp1, ROR #8    ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp6, ROR #8    ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    MOVS    tmp1, count, LSL #31        ;// update flags (verOffset)
+    LDR     tmpa, = 0xFF00FF00
+    MVNEQ   tmp1, tmp3                  ;// select verOffset=0
+    MVNNE   tmp1, tmp4                  ;// select verOffset=1
+    AND     tmpa, tmpa, tmpb, LSL #3    ;// mask and divede by 32
+    ORR     res, res, tmpa
+
+    LDR     tmpa, = 0x80808080
+    UHSUB8  res, res, tmp1              ;// bilinear interpolation
+    LDR     tmp1, [ref], width          ;// load next row
+    EOR     res, res, tmpa              ;// correct sign
+
+    STR     res, [mb], #16              ;// next row (mb)
+
+
+    ;// tmp2 = |a4|a3|a2|a1|
+    ;// tmp3 = |c4|c3|c2|c1|
+    ;// tmp4 = |g4|g3|g2|g1|
+    ;// tmp5 = |m4|m3|m2|m1|
+    ;// tmp6 = |r4|r3|r2|r1|
+    ;// tmp1 = |t4|t3|t2|t1|
+
+    ;// second four pixels
+    UXTB16  tmpa, tmp4                  ;// |g3|g1|
+    UXTAB16 tmpa, tmpa, tmp5            ;// |g3+m3|g1+m1|
+    UXTB16  tmpb, tmp3                  ;// |c3|c1|
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTAB16 tmpb, tmpb, tmp6            ;// |c3+r3|c1+r1|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpa, tmpa, tmp2            ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp1            ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     res, = 0x00FF00FF
+    UXTB16  tmpa, tmp4, ROR #8          ;// |g4|g2|
+    UXTAB16 tmpa, tmpa, tmp5, ROR #8    ;// |g4+m4|g2+m2|
+    AND     res, res, tmpb, LSR #5      ;// mask and divide by 32
+
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTB16  tmpb, tmp3, ROR #8          ;// |c4|c2|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpb, tmpb, tmp6, ROR #8    ;// |c4+r4|c2+r2|
+    UXTAB16 tmpa, tmpa, tmp2, ROR #8    ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp1, ROR #8    ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     tmpa, = 0xFF00FF00
+    MVNEQ   tmp2, tmp4                  ;// select verOffset=0
+    MVNNE   tmp2, tmp5                  ;// select verOffset=1
+
+    AND     tmpa, tmpa, tmpb, LSL #3    ;// mask and divide by 32
+    ORR     res, res, tmpa
+    LDR     tmpa, = 0x80808080
+    UHSUB8  res, res, tmp2              ;// bilinear interpolation
+    LDR     tmp2, [ref], width          ;// load next row
+    EOR     res, res, tmpa              ;// correct sign
+    STR     res, [mb], #16              ;// next row
+
+    ;// tmp3 = |a4|a3|a2|a1|
+    ;// tmp4 = |c4|c3|c2|c1|
+    ;// tmp5 = |g4|g3|g2|g1|
+    ;// tmp6 = |m4|m3|m2|m1|
+    ;// tmp1 = |r4|r3|r2|r1|
+    ;// tmp2 = |t4|t3|t2|t1|
+
+    ;// third four pixels
+    UXTB16  tmpa, tmp5                  ;// |g3|g1|
+    UXTAB16 tmpa, tmpa, tmp6            ;// |g3+m3|g1+m1|
+    UXTB16  tmpb, tmp4                  ;// |c3|c1|
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTAB16 tmpb, tmpb, tmp1            ;// |c3+r3|c1+r1|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpa, tmpa, tmp3            ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp2            ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     res, = 0x00FF00FF
+    UXTB16  tmpa, tmp5, ROR #8          ;// |g4|g2|
+    UXTAB16 tmpa, tmpa, tmp6, ROR #8    ;// |g4+m4|g2+m2|
+    AND     res, res, tmpb, LSR #5      ;// mask and divide by 32
+
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTB16  tmpb, tmp4, ROR #8          ;// |c4|c2|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpb, tmpb, tmp1, ROR #8    ;// |c4+r4|c2+r2|
+    UXTAB16 tmpa, tmpa, tmp3, ROR #8    ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp2, ROR #8    ;// 16+20(G+M)+A+T
+
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     tmpa, = 0xFF00FF00
+    MVNEQ   tmp3, tmp5                  ;// select verOffset=0
+    MVNNE   tmp3, tmp6                  ;// select verOffset=1
+
+    AND     tmpa, tmpa, tmpb, LSL #3    ;// mask and divide by 32
+    ORR     res, res, tmpa
+    LDR     tmpa, = 0x80808080
+    UHSUB8  res, res, tmp3              ;// bilinear interpolation
+    LDR     tmp3, [ref]                 ;// load next row
+    EOR     res, res, tmpa              ;// correct sign
+    STR     res, [mb], #16              ;// next row
+
+    ;// tmp4 = |a4|a3|a2|a1|
+    ;// tmp5 = |c4|c3|c2|c1|
+    ;// tmp6 = |g4|g3|g2|g1|
+    ;// tmp1 = |m4|m3|m2|m1|
+    ;// tmp2 = |r4|r3|r2|r1|
+    ;// tmp3 = |t4|t3|t2|t1|
+
+    ;// fourth four pixels
+    UXTB16  tmpa, tmp6                  ;// |g3|g1|
+    UXTAB16 tmpa, tmpa, tmp1            ;// |g3+m3|g1+m1|
+    UXTB16  tmpb, tmp5                  ;// |c3|c1|
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTAB16 tmpb, tmpb, tmp2            ;// |c3+r3|c1+r1|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpa, tmpa, tmp4            ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp3            ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     res, = 0x00FF00FF
+    UXTB16  tmpa, tmp6, ROR #8          ;// |g4|g2|
+    UXTAB16 tmpa, tmpa, tmp1, ROR #8    ;// |g4+m4|g2+m2|
+    AND     res, res, tmpb, LSR #5      ;// mask and divide by 32
+
+    ADD     tmpa, tmpa, tmpa, LSL #2    ;// 5(G+M)
+    UXTB16  tmpb, tmp5, ROR #8          ;// |c4|c2|
+    ADD     tmpa, plus16, tmpa, LSL #2  ;// 16+20(G+M)
+    UXTAB16 tmpb, tmpb, tmp2, ROR #8    ;// |c4+r4|c2+r2|
+    UXTAB16 tmpa, tmpa, tmp4, ROR #8    ;// 16+20(G+M)+A
+    UXTAB16 tmpa, tmpa, tmp3, ROR #8    ;// 16+20(G+M)+A+T
+
+    ADD     tmpb, tmpb, tmpb, LSL #2    ;// 5(C+R)
+    SSUB16  tmpa, tmpa, tmpb            ;// 16+20(G+M)+(A+T)-5(C+R)
+
+    USAT16  tmpb, #13, tmpa             ;// saturate
+    LDR     tmp4, = 0xFF00FF00
+    MVNEQ   tmp5, tmp6                  ;// select verOffset=0
+    MVNNE   tmp5, tmp1                  ;// select verOffset=1
+
+    AND     tmpa, tmp4, tmpb, LSL #3    ;// mask and divide by 32
+    ORR     res, res, tmpa
+    LDR     tmpa, = 0x80808080
+    UHSUB8  res, res, tmp5              ;// bilinear interpolation
+
+    ;// decrement loop_x counter
+    SUBS    count, count, #4<<24        ;// (partWidth-1) -= 4;
+
+    ;// calculate "ref" address for next round
+    SUB     ref, ref, width, LSL #3     ;// ref -= 8*width;
+    ADD     ref, ref, #4;               ;// next column (4 pixels)
+
+    EOR     res, res, tmpa              ;// correct sign
+    STR     res, [mb], #-44
+ 
+    BCS     loop_x
+
+    ADDS    count, count, #252<<16      ;// (partHeight-1) -= 4;
+    ADD     ref, ref, width, LSL #2     ;// ref += 4*width
+    AND     tmp1, count, #0x0000FF00    ;// partWidth-1
+    MOV     tmp2, #1
+    ADD     tmp2, tmp2, tmp1, LSR #8    ;// partWidth
+    SUB     ref, ref, tmp2              ;// ref -= partWidth
+    ADD     mb, mb, #64;
+    SUB     mb, mb, tmp2;               ;// mb -= partWidth
+    BGE     loop_y
+
+    ADD     sp,sp,#0x1f4
+    LDMFD   sp!, {r4-r11, pc}
+
+    END
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/win_asm.bat b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/win_asm.bat
new file mode 100644
index 0000000..1b8d88c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/win_asm.bat
@@ -0,0 +1,15 @@
+echo off
+set ASMFLAGS= -checkreglist -CPU ARM1136 -PreDefine "H264DEC_WINASM SETL {TRUE}"
+set ASM="D:\Program Files\Microsoft Visual Studio 8\VC\ce\bin\x86_arm\armasm"
+echo on
+
+%ASM% %ASMFLAGS% h264bsd_interpolate_chroma_ver.s
+%ASM% %ASMFLAGS% h264bsd_interpolate_chroma_hor.s
+%ASM% %ASMFLAGS% h264bsd_interpolate_hor_half.s
+%ASM% %ASMFLAGS% h264bsd_interpolate_hor_quarter.s
+%ASM% %ASMFLAGS% h264bsd_interpolate_hor_ver_quarter.s
+%ASM% %ASMFLAGS% h264bsd_interpolate_ver_half.s
+%ASM% %ASMFLAGS% h264bsd_interpolate_ver_quarter.s
+
+rem %ASM% %ASMFLAGS% h264bsd_interpolate_chroma_hor_ver.s
+rem %ASM% %ASMFLAGS% h264bsd_interpolate_mid_hor.s
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdClearMbLayer.s b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdClearMbLayer.s
new file mode 100644
index 0000000..db11654
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdClearMbLayer.s
@@ -0,0 +1,66 @@
+;
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;      http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+;
+
+    REQUIRE8
+    PRESERVE8
+
+    AREA    |.text|, CODE
+
+    EXPORT h264bsdClearMbLayer
+
+; Input / output registers
+pMbLayer    RN  0
+size        RN  1
+pTmp        RN  2
+step        RN  3
+
+; -- NEON registers --
+
+qZero   QN  Q0.U8
+
+;/*------------------------------------------------------------------------------
+;
+;    Function: h264bsdClearMbLayer
+;
+;        Functional description:
+;
+;        Inputs:
+;
+;        Outputs:
+;
+;        Returns:
+;
+;------------------------------------------------------------------------------*/
+
+h264bsdClearMbLayer
+
+    VMOV    qZero, #0
+    ADD     pTmp, pMbLayer, #16
+    MOV     step, #32
+    SUBS    size, size, #64
+
+loop
+    VST1    qZero, [pMbLayer], step
+    SUBS    size, size, #64
+    VST1    qZero, [pTmp], step
+    VST1    qZero, [pMbLayer], step
+    VST1    qZero, [pTmp], step
+    BCS     loop
+
+    BX      lr
+    END
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdCountLeadingZeros.s b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdCountLeadingZeros.s
new file mode 100644
index 0000000..c7bd73e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdCountLeadingZeros.s
@@ -0,0 +1,49 @@
+;
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;      http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+;
+
+    REQUIRE8
+    PRESERVE8
+
+    AREA    |.text|, CODE
+
+    EXPORT h264bsdCountLeadingZeros
+
+; Input / output registers
+value    RN  0
+
+; -- NEON registers --
+
+;/*------------------------------------------------------------------------------
+;
+;    Function: h264bsdCountLeadingZeros
+;
+;        Functional description:
+;
+;        Inputs:
+;
+;        Outputs:
+;
+;        Returns:
+;
+;------------------------------------------------------------------------------*/
+
+h264bsdCountLeadingZeros
+
+    CLZ     value, value
+    BX      lr
+    END
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFillRow7.s b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFillRow7.s
new file mode 100644
index 0000000..5bfac92
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFillRow7.s
@@ -0,0 +1,180 @@
+;
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;      http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+;
+
+    REQUIRE8
+    PRESERVE8
+
+    AREA    |.text|, CODE
+
+    EXPORT h264bsdFillRow7
+
+; Input / output registers
+
+ref     RN 0
+fill    RN 1
+left    RN 2
+tmp2    RN 2
+center  RN 3
+right   RN 4
+tmp1    RN 5
+
+; -- NEON registers --
+
+qTmp0   QN  Q0.U8
+qTmp1   QN  Q1.U8
+dTmp0   DN  D0.U8
+dTmp1   DN  D1.U8
+dTmp2   DN  D2.U8
+dTmp3   DN  D3.U8
+
+
+;/*------------------------------------------------------------------------------
+;
+;    Function: h264bsdFillRow7
+;
+;        Functional description:
+;
+;        Inputs:
+;
+;        Outputs:
+;
+;        Returns:
+;
+;------------------------------------------------------------------------------*/
+
+h264bsdFillRow7
+        PUSH     {r4-r6,lr}
+        CMP      left, #0
+        LDR      right, [sp,#0x10]
+        BEQ      switch_center
+        LDRB     tmp1, [ref,#0]
+
+loop_left
+        SUBS     left, left, #1
+        STRB     tmp1, [fill], #1
+        BNE      loop_left
+
+switch_center
+        ASR      tmp2,center,#2
+        CMP      tmp2,#9
+        ADDCC    pc,pc,tmp2,LSL #2
+        B        loop_center
+        B        loop_center
+        B        case_1
+        B        case_2
+        B        case_3
+        B        case_4
+        B        case_5
+        B        case_6
+        B        case_7
+        B        case_8
+;case_8
+;        LDR      tmp2, [ref], #4
+;        SUB      center, center, #4
+;        STR      tmp2, [fill], #4
+;case_7
+;        LDR      tmp2, [ref], #4
+;        SUB      center, center, #4
+;        STR      tmp2, [fill], #4
+;case_6
+;        LDR      tmp2, [ref], #4
+;        SUB      center, center, #4
+;        STR      tmp2, [fill],#4
+;case_5
+;        LDR      tmp2, [ref], #4
+;        SUB      center, center, #4
+;        STR      tmp2, [fill],#4
+;case_4
+;        LDR      tmp2, [ref],#4
+;        SUB      center, center, #4
+;        STR      tmp2, [fill], #4
+;case_3
+;        LDR      tmp2, [ref],#4
+;        SUB      center, center, #4
+;        STR      tmp2, [fill], #4
+;case_2
+;        LDR      tmp2, [ref],#4
+;        SUB      center, center, #4
+;        STR      tmp2, [fill], #4
+;case_1
+;        LDR      tmp2, [ref],#4
+;        SUB      center, center, #4
+;        STR      tmp2, [fill], #4
+
+case_8
+        VLD1    {qTmp0, qTmp1}, [ref]!
+        SUB     center, center, #32
+        VST1    qTmp0, [fill]!
+        VST1    qTmp1, [fill]!
+        B       loop_center
+case_7
+        VLD1    {dTmp0,dTmp1,dTmp2}, [ref]!
+        SUB     center, center, #28
+        LDR     tmp2, [ref], #4
+        VST1    {dTmp0,dTmp1,dTmp2}, [fill]!
+        STR     tmp2, [fill],#4
+        B       loop_center
+case_6
+        VLD1    {dTmp0,dTmp1,dTmp2}, [ref]!
+        SUB     center, center, #24
+        VST1    {dTmp0,dTmp1,dTmp2}, [fill]!
+        B       loop_center
+case_5
+        VLD1    qTmp0, [ref]!
+        SUB     center, center, #20
+        LDR     tmp2, [ref], #4
+        VST1    qTmp0, [fill]!
+        STR     tmp2, [fill],#4
+        B       loop_center
+case_4
+        VLD1    qTmp0, [ref]!
+        SUB     center, center, #16
+        VST1    qTmp0, [fill]!
+        B       loop_center
+case_3
+        VLD1    dTmp0, [ref]!
+        SUB     center, center, #12
+        LDR     tmp2, [ref], #4
+        VST1    dTmp0, [fill]!
+        STR     tmp2, [fill],#4
+        B       loop_center
+case_2
+        LDR      tmp2, [ref],#4
+        SUB      center, center, #4
+        STR      tmp2, [fill], #4
+case_1
+        LDR      tmp2, [ref],#4
+        SUB      center, center, #4
+        STR      tmp2, [fill], #4
+
+loop_center
+        CMP      center, #0
+        LDRBNE   tmp2, [ref], #1
+        SUBNE    center, center, #1
+        STRBNE   tmp2, [fill], #1
+        BNE      loop_center
+        CMP      right,#0
+        POPEQ    {r4-r6,pc}
+        LDRB     tmp2, [ref,#-1]
+
+loop_right
+        STRB     tmp2, [fill], #1
+        SUBS     right, right, #1
+        BNE      loop_right
+
+        POP      {r4-r6,pc}
+        END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFlushBits.s b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFlushBits.s
new file mode 100644
index 0000000..21335b8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFlushBits.s
@@ -0,0 +1,82 @@
+;
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;      http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+;
+
+    REQUIRE8
+    PRESERVE8
+
+    AREA    |.text|, CODE
+
+    EXPORT h264bsdFlushBits
+
+; Input / output registers
+pStrmData       RN  0
+numBits         RN  1
+readBits        RN  2
+strmBuffSize    RN  3
+pStrmBuffStart  RN  1
+pStrmCurrPos    RN  2
+bitPosInWord    RN  1
+
+; -- NEON registers --
+
+
+
+;/*------------------------------------------------------------------------------
+;
+;    Function: h264bsdFlushBits
+;
+;        Functional description:
+;
+;        Inputs:
+;
+;        Outputs:
+;
+;        Returns:
+;
+;------------------------------------------------------------------------------*/
+
+h264bsdFlushBits
+;//    PUSH     {r4-r6,lr}
+
+    LDR readBits, [pStrmData, #0x10]
+    LDR strmBuffSize, [pStrmData, #0xC]
+
+    ADD readBits, readBits, numBits
+    AND bitPosInWord, readBits, #7
+
+    STR readBits, [pStrmData, #0x10]
+    STR bitPosInWord, [pStrmData, #0x8]
+
+    LDR pStrmBuffStart, [pStrmData, #0x0]
+
+    CMP readBits, strmBuffSize, LSL #3
+
+    BHI end_of_stream
+
+    ADD pStrmCurrPos, pStrmBuffStart, readBits, LSR #3
+    STR pStrmCurrPos, [pStrmData, #0x4]
+    MOV r0, #0
+    BX  lr
+;//    POP      {r4-r6,pc}
+
+end_of_stream
+    MVN r0, #0
+    BX  lr
+;//    POP      {r4-r6,pc}
+
+    END
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdWriteMacroblock.s b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdWriteMacroblock.s
new file mode 100644
index 0000000..38a0781
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdWriteMacroblock.s
@@ -0,0 +1,152 @@
+;
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;      http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+;
+
+    REQUIRE8
+    PRESERVE8
+
+    AREA    |.text|, CODE
+
+    EXPORT h264bsdWriteMacroblock
+
+; Input / output registers
+image   RN  0
+data    RN  1
+width   RN  2
+luma    RN  3
+cb      RN  4
+cr      RN  5
+cwidth  RN  6
+
+; -- NEON registers --
+
+qRow0   QN  Q0.U8
+qRow1   QN  Q1.U8
+qRow2   QN  Q2.U8
+qRow3   QN  Q3.U8
+qRow4   QN  Q4.U8
+qRow5   QN  Q5.U8
+qRow6   QN  Q6.U8
+qRow7   QN  Q7.U8
+qRow8   QN  Q8.U8
+qRow9   QN  Q9.U8
+qRow10  QN  Q10.U8
+qRow11  QN  Q11.U8
+qRow12  QN  Q12.U8
+qRow13  QN  Q13.U8
+qRow14  QN  Q14.U8
+qRow15  QN  Q15.U8
+
+dRow0   DN  D0.U8
+dRow1   DN  D1.U8
+dRow2   DN  D2.U8
+dRow3   DN  D3.U8
+dRow4   DN  D4.U8
+dRow5   DN  D5.U8
+dRow6   DN  D6.U8
+dRow7   DN  D7.U8
+dRow8   DN  D8.U8
+dRow9   DN  D9.U8
+dRow10  DN  D10.U8
+dRow11  DN  D11.U8
+dRow12  DN  D12.U8
+dRow13  DN  D13.U8
+dRow14  DN  D14.U8
+dRow15  DN  D15.U8
+
+;/*------------------------------------------------------------------------------
+;
+;    Function: h264bsdWriteMacroblock
+;
+;        Functional description:
+;            Write one macroblock into the image. Both luma and chroma
+;            components will be written at the same time.
+;
+;        Inputs:
+;            data    pointer to macroblock data to be written, 256 values for
+;                    luma followed by 64 values for both chroma components
+;
+;        Outputs:
+;            image   pointer to the image where the macroblock will be written
+;
+;        Returns:
+;            none
+;
+;------------------------------------------------------------------------------*/
+
+h264bsdWriteMacroblock
+    PUSH    {r4-r6,lr}
+    VPUSH   {q4-q7}
+
+    LDR     width, [image, #4]
+    LDR     luma, [image, #0xC]
+    LDR     cb, [image, #0x10]
+    LDR     cr, [image, #0x14]
+
+
+;   Write luma
+    VLD1    {qRow0, qRow1}, [data]!
+    LSL     width, width, #4
+    VLD1    {qRow2, qRow3}, [data]!
+    LSR     cwidth, width, #1
+    VST1    {qRow0}, [luma@128], width
+    VLD1    {qRow4, qRow5}, [data]!
+    VST1    {qRow1}, [luma@128], width
+    VLD1    {qRow6, qRow7}, [data]!
+    VST1    {qRow2}, [luma@128], width
+    VLD1    {qRow8, qRow9}, [data]!
+    VST1    {qRow3}, [luma@128], width
+    VLD1    {qRow10, qRow11}, [data]!
+    VST1    {qRow4}, [luma@128], width
+    VLD1    {qRow12, qRow13}, [data]!
+    VST1    {qRow5}, [luma@128], width
+    VLD1    {qRow14, qRow15}, [data]!
+    VST1    {qRow6}, [luma@128], width
+
+    VLD1    {qRow0, qRow1}, [data]! ;cb rows 0,1,2,3
+    VST1    {qRow7}, [luma@128], width
+    VLD1    {qRow2, qRow3}, [data]! ;cb rows 4,5,6,7
+    VST1    {qRow8}, [luma@128], width
+    VLD1    {qRow4, qRow5}, [data]! ;cr rows 0,1,2,3
+    VST1    {qRow9}, [luma@128], width
+    VLD1    {qRow6, qRow7}, [data]! ;cr rows 4,5,6,7
+    VST1    {qRow10}, [luma@128], width
+    VST1    {dRow0}, [cb@64], cwidth
+    VST1    {dRow8}, [cr@64], cwidth
+    VST1    {qRow11}, [luma@128], width
+    VST1    {dRow1}, [cb@64], cwidth
+    VST1    {dRow9}, [cr@64], cwidth
+    VST1    {qRow12}, [luma@128], width
+    VST1    {dRow2}, [cb@64], cwidth
+    VST1    {dRow10}, [cr@64], cwidth
+    VST1    {qRow13}, [luma@128], width
+    VST1    {dRow3}, [cb@64], cwidth
+    VST1    {dRow11}, [cr@64], cwidth
+    VST1    {qRow14}, [luma@128], width
+    VST1    {dRow4}, [cb@64], cwidth
+    VST1    {dRow12}, [cr@64], cwidth
+    VST1    {qRow15}, [luma]
+    VST1    {dRow5}, [cb@64], cwidth
+    VST1    {dRow13}, [cr@64], cwidth
+    VST1    {dRow6}, [cb@64], cwidth
+    VST1    {dRow14}, [cr@64], cwidth
+    VST1    {dRow7}, [cb@64]
+    VST1    {dRow15}, [cr@64]
+
+    VPOP    {q4-q7}
+    POP     {r4-r6,pc}
+    END
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S
new file mode 100644
index 0000000..f39f5c4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S
@@ -0,0 +1,41 @@
+@
+@ Copyright (C) 2009 The Android Open Source Project
+@
+@ Licensed under the Apache License, Version 2.0 (the "License");
+@ you may not use this file except in compliance with the License.
+@ You may obtain a copy of the License at
+@
+@      http://www.apache.org/licenses/LICENSE-2.0
+@
+@ Unless required by applicable law or agreed to in writing, software
+@ distributed under the License is distributed on an "AS IS" BASIS,
+@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ See the License for the specific language governing permissions and
+@ limitations under the License.
+@
+
+
+
+
+    .macro REQUIRE8
+    .eabi_attribute 24, 1
+    .endm
+
+    .macro PRESERVE8
+    .eabi_attribute 25, 1
+    .endm
+
+
+    .macro function name, export=0
+.if \export
+    .global \name
+.endif
+    .type   \name, %function
+    .func   \name
+\name:
+    .endm
+
+    .macro endfunction
+    .endfunc
+    .endm
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S
new file mode 100644
index 0000000..c8a940e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S
@@ -0,0 +1,68 @@
+@
+@ Copyright (C) 2009 The Android Open Source Project
+@
+@ Licensed under the Apache License, Version 2.0 (the "License");
+@ you may not use this file except in compliance with the License.
+@ You may obtain a copy of the License at
+@
+@      http://www.apache.org/licenses/LICENSE-2.0
+@
+@ Unless required by applicable law or agreed to in writing, software
+@ distributed under the License is distributed on an "AS IS" BASIS,
+@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ See the License for the specific language governing permissions and
+@ limitations under the License.
+@
+
+#include "asm_common.S"
+
+    preserve8
+
+    .fpu neon
+    .text
+
+/* Input / output registers */
+#define pMbLayer    r0
+#define size        r1
+#define pTmp        r2
+#define step        r3
+
+/* -- NEON registers -- */
+
+#define qZero   Q0.U8
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdClearMbLayer
+
+        Functional description:
+
+        Inputs:
+
+        Outputs:
+
+        Returns:
+
+------------------------------------------------------------------------------*/
+
+function h264bsdClearMbLayer, export=1
+
+    VMOV    qZero, #0
+    ADD     pTmp, pMbLayer, #16
+    MOV     step, #32
+    SUBS    size, size, #64
+
+loop:
+    VST1    {qZero}, [pMbLayer], step
+    SUBS    size, size, #64
+    VST1    {qZero}, [pTmp], step
+    VST1    {qZero}, [pMbLayer], step
+    VST1    {qZero}, [pTmp], step
+    BCS     loop
+
+    BX      lr
+
+endfunction
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S
new file mode 100644
index 0000000..05253d0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S
@@ -0,0 +1,48 @@
+@
+@ Copyright (C) 2009 The Android Open Source Project
+@
+@ Licensed under the Apache License, Version 2.0 (the "License");
+@ you may not use this file except in compliance with the License.
+@ You may obtain a copy of the License at
+@
+@      http://www.apache.org/licenses/LICENSE-2.0
+@
+@ Unless required by applicable law or agreed to in writing, software
+@ distributed under the License is distributed on an "AS IS" BASIS,
+@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ See the License for the specific language governing permissions and
+@ limitations under the License.
+@
+#include "asm_common.S"
+
+    preserve8
+    .arm
+    .text
+
+
+/* Input / output registers */
+#define value    r0
+
+/* -- NEON registers -- */
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdCountLeadingZeros
+
+        Functional description:
+
+        Inputs:
+
+        Outputs:
+
+        Returns:
+
+------------------------------------------------------------------------------*/
+
+function h264bsdCountLeadingZeros, export=1
+
+    CLZ     value, value
+    BX      lr
+
+endfunction
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S
new file mode 100644
index 0000000..6955b9a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S
@@ -0,0 +1,143 @@
+@
+@ Copyright (C) 2009 The Android Open Source Project
+@
+@ Licensed under the Apache License, Version 2.0 (the "License");
+@ you may not use this file except in compliance with the License.
+@ You may obtain a copy of the License at
+@
+@      http://www.apache.org/licenses/LICENSE-2.0
+@
+@ Unless required by applicable law or agreed to in writing, software
+@ distributed under the License is distributed on an "AS IS" BASIS,
+@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ See the License for the specific language governing permissions and
+@ limitations under the License.
+@
+
+#include "asm_common.S"
+
+    preserve8
+
+    .fpu neon
+    .text
+
+/* Input / output registers */
+
+#define ref     r0
+#define fill    r1
+#define left    r2
+#define tmp2    r2
+#define center  r3
+#define right   r4
+#define tmp1    r5
+
+/* -- NEON registers -- */
+
+#define qTmp0     Q0.U8
+#define qTmp1     Q1.U8
+#define dTmp0     D0.U8
+#define dTmp1     D1.U8
+#define dTmp2     D2.U8
+#define dTmp3     D3.U8
+
+/*
+void h264bsdFillRow7(const u8 * ref, u8 * fill, i32 left, i32 center,
+                     i32 right);
+*/
+
+function h264bsdFillRow7, export=1
+
+        PUSH     {r4-r6,lr}
+        CMP      left, #0
+        LDR      right, [sp,#0x10]
+        BEQ      switch_center
+        LDRB     tmp1, [ref,#0]
+
+loop_left:
+        SUBS     left, left, #1
+        STRB     tmp1, [fill], #1
+        BNE      loop_left
+
+switch_center:
+        ASR      tmp2,center,#2
+        CMP      tmp2,#9
+        ADDCC    pc,pc,tmp2,LSL #2
+        B        loop_center
+        B        loop_center
+        B        case_1
+        B        case_2
+        B        case_3
+        B        case_4
+        B        case_5
+        B        case_6
+        B        case_7
+        B        case_8
+
+case_8:
+        VLD1    {qTmp0, qTmp1}, [ref]!
+        SUB     center, center, #32
+        VST1    {qTmp0}, [fill]!
+        VST1    {qTmp1}, [fill]!
+        B       loop_center
+case_7:
+        VLD1    {dTmp0,dTmp1,dTmp2}, [ref]!
+        SUB     center, center, #28
+        LDR     tmp2, [ref], #4
+        VST1    {dTmp0,dTmp1,dTmp2}, [fill]!
+        STR     tmp2, [fill],#4
+        B       loop_center
+case_6:
+        VLD1    {dTmp0,dTmp1,dTmp2}, [ref]!
+        SUB     center, center, #24
+        VST1    {dTmp0,dTmp1,dTmp2}, [fill]!
+        B       loop_center
+case_5:
+        VLD1    {qTmp0}, [ref]!
+        SUB     center, center, #20
+        LDR     tmp2, [ref], #4
+        VST1    {qTmp0}, [fill]!
+        STR     tmp2, [fill],#4
+        B       loop_center
+case_4:
+        VLD1    {qTmp0}, [ref]!
+        SUB     center, center, #16
+        VST1    {qTmp0}, [fill]!
+        B       loop_center
+case_3:
+        VLD1    {dTmp0}, [ref]!
+        SUB     center, center, #12
+        LDR     tmp2, [ref], #4
+        VST1    dTmp0, [fill]!
+        STR     tmp2, [fill],#4
+        B       loop_center
+case_2:
+        LDR      tmp2, [ref],#4
+        SUB      center, center, #4
+        STR      tmp2, [fill], #4
+case_1:
+        LDR      tmp2, [ref],#4
+        SUB      center, center, #4
+        STR      tmp2, [fill], #4
+
+loop_center:
+        CMP      center, #0
+        BEQ      jump
+        LDRB     tmp2, [ref], #1
+        SUB      center, center, #1
+        STRB     tmp2, [fill], #1
+        BNE      loop_center
+jump:
+        CMP      right,#0
+        POPEQ    {r4-r6,pc}
+        LDRB     tmp2, [ref,#-1]
+
+loop_right:
+        STRB     tmp2, [fill], #1
+        SUBS     right, right, #1
+        BNE      loop_right
+
+        POP      {r4-r6,pc}
+
+endfunction
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S
new file mode 100644
index 0000000..b3f3191
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S
@@ -0,0 +1,78 @@
+@
+@ Copyright (C) 2009 The Android Open Source Project
+@
+@ Licensed under the Apache License, Version 2.0 (the "License");
+@ you may not use this file except in compliance with the License.
+@ You may obtain a copy of the License at
+@
+@      http://www.apache.org/licenses/LICENSE-2.0
+@
+@ Unless required by applicable law or agreed to in writing, software
+@ distributed under the License is distributed on an "AS IS" BASIS,
+@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ See the License for the specific language governing permissions and
+@ limitations under the License.
+@
+
+#include "asm_common.S"
+
+    preserve8
+
+    .arm
+    .text
+
+/* Input / output registers */
+#define pStrmData       r0
+#define numBits         r1
+#define readBits        r2
+#define strmBuffSize    r3
+#define pStrmBuffStart  r1
+#define pStrmCurrPos    r2
+#define bitPosInWord    r1
+
+/* Input / output registers */
+
+
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdFlushBits
+
+        Functional description:
+
+        Inputs:
+
+        Outputs:
+
+        Returns:
+
+------------------------------------------------------------------------------*/
+function h264bsdFlushBits, export=1
+
+    LDR readBits, [pStrmData, #0x10]
+    LDR strmBuffSize, [pStrmData, #0xC]
+
+    ADD readBits, readBits, numBits
+    AND bitPosInWord, readBits, #7
+
+    STR readBits, [pStrmData, #0x10]
+    STR bitPosInWord, [pStrmData, #0x8]
+
+    LDR pStrmBuffStart, [pStrmData, #0x0]
+
+    CMP readBits, strmBuffSize, LSL #3
+
+    BHI end_of_stream
+
+    ADD pStrmCurrPos, pStrmBuffStart, readBits, LSR #3
+    STR pStrmCurrPos, [pStrmData, #0x4]
+    MOV r0, #0
+    BX  lr
+
+end_of_stream:
+    MVN r0, #0
+    BX  lr
+
+endfunction
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S
new file mode 100644
index 0000000..495d560
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S
@@ -0,0 +1,157 @@
+@
+@ Copyright (C) 2009 The Android Open Source Project
+@
+@ Licensed under the Apache License, Version 2.0 (the "License");
+@ you may not use this file except in compliance with the License.
+@ You may obtain a copy of the License at
+@
+@      http://www.apache.org/licenses/LICENSE-2.0
+@
+@ Unless required by applicable law or agreed to in writing, software
+@ distributed under the License is distributed on an "AS IS" BASIS,
+@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ See the License for the specific language governing permissions and
+@ limitations under the License.
+@
+
+#include "asm_common.S"
+
+    require8
+    preserve8
+
+    .arm
+    .fpu neon
+    .text
+
+/* Input / output registers */
+#define image   r0
+#define data    r1
+#define width   r2
+#define luma    r3
+#define cb      r4
+#define cr      r5
+#define cwidth  r6
+
+/* -- NEON registers -- */
+
+#define qRow0     Q0.U8
+#define qRow1     Q1.U8
+#define qRow2     Q2.U8
+#define qRow3     Q3.U8
+#define qRow4     Q4.U8
+#define qRow5     Q5.U8
+#define qRow6     Q6.U8
+#define qRow7     Q7.U8
+#define qRow8     Q8.U8
+#define qRow9     Q9.U8
+#define qRow10    Q10.U8
+#define qRow11    Q11.U8
+#define qRow12    Q12.U8
+#define qRow13    Q13.U8
+#define qRow14    Q14.U8
+#define qRow15    Q15.U8
+
+#define dRow0     D0.U8
+#define dRow1     D1.U8
+#define dRow2     D2.U8
+#define dRow3     D3.U8
+#define dRow4     D4.U8
+#define dRow5     D5.U8
+#define dRow6     D6.U8
+#define dRow7     D7.U8
+#define dRow8     D8.U8
+#define dRow9     D9.U8
+#define dRow10    D10.U8
+#define dRow11    D11.U8
+#define dRow12    D12.U8
+#define dRow13    D13.U8
+#define dRow14    D14.U8
+#define dRow15    D15.U8
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdWriteMacroblock
+
+        Functional description:
+            Write one macroblock into the image. Both luma and chroma
+            components will be written at the same time.
+
+        Inputs:
+            data    pointer to macroblock data to be written, 256 values for
+                    luma followed by 64 values for both chroma components
+
+        Outputs:
+            image   pointer to the image where the macroblock will be written
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+function h264bsdWriteMacroblock, export=1
+    PUSH    {r4-r6,lr}
+    VPUSH   {q4-q7}
+
+    LDR     width, [image, #4]
+    LDR     luma, [image, #0xC]
+    LDR     cb, [image, #0x10]
+    LDR     cr, [image, #0x14]
+
+
+@   Write luma
+    VLD1    {qRow0, qRow1}, [data]!
+    LSL     width, width, #4
+    VLD1    {qRow2, qRow3}, [data]!
+    LSR     cwidth, width, #1
+    VST1    {qRow0}, [luma,:128], width
+    VLD1    {qRow4, qRow5}, [data]!
+    VST1    {qRow1}, [luma,:128], width
+    VLD1    {qRow6, qRow7}, [data]!
+    VST1    {qRow2}, [luma,:128], width
+    VLD1    {qRow8, qRow9}, [data]!
+    VST1    {qRow3}, [luma,:128], width
+    VLD1    {qRow10, qRow11}, [data]!
+    VST1    {qRow4}, [luma,:128], width
+    VLD1    {qRow12, qRow13}, [data]!
+    VST1    {qRow5}, [luma,:128], width
+    VLD1    {qRow14, qRow15}, [data]!
+    VST1    {qRow6}, [luma,:128], width
+
+    VLD1    {qRow0, qRow1}, [data]! ;//cb rows 0,1,2,3
+    VST1    {qRow7}, [luma,:128], width
+    VLD1    {qRow2, qRow3}, [data]! ;//cb rows 4,5,6,7
+    VST1    {qRow8}, [luma,:128], width
+    VLD1    {qRow4, qRow5}, [data]! ;//cr rows 0,1,2,3
+    VST1    {qRow9}, [luma,:128], width
+    VLD1    {qRow6, qRow7}, [data]! ;//cr rows 4,5,6,7
+    VST1    {qRow10}, [luma,:128], width
+    VST1    {dRow0}, [cb,:64], cwidth
+    VST1    {dRow8}, [cr,:64], cwidth
+    VST1    {qRow11}, [luma,:128], width
+    VST1    {dRow1}, [cb,:64], cwidth
+    VST1    {dRow9}, [cr,:64], cwidth
+    VST1    {qRow12}, [luma,:128], width
+    VST1    {dRow2}, [cb,:64], cwidth
+    VST1    {dRow10}, [cr,:64], cwidth
+    VST1    {qRow13}, [luma,:128], width
+    VST1    {dRow3}, [cb,:64], cwidth
+    VST1    {dRow11}, [cr,:64], cwidth
+    VST1    {qRow14}, [luma,:128], width
+    VST1    {dRow4}, [cb,:64], cwidth
+    VST1    {dRow12}, [cr,:64], cwidth
+    VST1    {qRow15}, [luma]
+    VST1    {dRow5}, [cb,:64], cwidth
+    VST1    {dRow13}, [cr,:64], cwidth
+    VST1    {dRow6}, [cb,:64], cwidth
+    VST1    {dRow14}, [cr,:64], cwidth
+    VST1    {dRow7}, [cb,:64]
+    VST1    {dRow15}, [cr,:64]
+
+    VPOP    {q4-q7}
+    POP     {r4-r6,pc}
+@    BX      lr
+
+    .endfunc
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.c
new file mode 100755
index 0000000..db77f8c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          ExtractNalUnit
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_byte_stream.h"
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+#define BYTE_STREAM_ERROR  0xFFFFFFFF
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+    Function name: ExtractNalUnit
+
+        Functional description:
+            Extracts one NAL unit from the byte stream buffer. Removes
+            emulation prevention bytes if present. The original stream buffer
+            is used directly and is therefore modified if emulation prevention
+            bytes are present in the stream.
+
+            Stream buffer is assumed to contain either exactly one NAL unit
+            and nothing else, or one or more NAL units embedded in byte
+            stream format described in the Annex B of the standard. Function
+            detects which one is used based on the first bytes in the buffer.
+
+        Inputs:
+            pByteStream     pointer to byte stream buffer
+            len             length of the stream buffer (in bytes)
+
+        Outputs:
+            pStrmData       stream information is stored here
+            readBytes       number of bytes "consumed" from the stream buffer
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      error in byte stream
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdExtractNalUnit(u8 *pByteStream, u32 len, strmData_t *pStrmData,
+    u32 *readBytes)
+{
+
+/* Variables */
+
+    u32 i, tmp;
+    u32 byteCount,initByteCount;
+    u32 zeroCount;
+    u8  byte;
+    u32 hasEmulation = HANTRO_FALSE;
+    u32 invalidStream = HANTRO_FALSE;
+    u8 *readPtr, *writePtr;
+
+/* Code */
+
+    ASSERT(pByteStream);
+    ASSERT(len);
+    ASSERT(len < BYTE_STREAM_ERROR);
+    ASSERT(pStrmData);
+
+    /* byte stream format if starts with 0x000001 or 0x000000 */
+    if (len > 3 && pByteStream[0] == 0x00 && pByteStream[1] == 0x00 &&
+        (pByteStream[2]&0xFE) == 0x00)
+    {
+        /* search for NAL unit start point, i.e. point after first start code
+         * prefix in the stream */
+        zeroCount = byteCount = 2;
+        readPtr = pByteStream + 2;
+        /*lint -e(716) while(1) used consciously */
+        while (1)
+        {
+            byte = *readPtr++;
+            byteCount++;
+
+            if (byteCount == len)
+            {
+                /* no start code prefix found -> error */
+                *readBytes = len;
+                return(HANTRO_NOK);
+            }
+
+            if (!byte)
+                zeroCount++;
+            else if ((byte == 0x01) && (zeroCount >= 2))
+                break;
+            else
+                zeroCount = 0;
+        }
+
+        initByteCount = byteCount;
+
+        /* determine size of the NAL unit. Search for next start code prefix
+         * or end of stream and ignore possible trailing zero bytes */
+        zeroCount = 0;
+        /*lint -e(716) while(1) used consciously */
+        while (1)
+        {
+            byte = *readPtr++;
+            byteCount++;
+            if (!byte)
+                zeroCount++;
+
+            if ( (byte == 0x03) && (zeroCount == 2) )
+            {
+                hasEmulation = HANTRO_TRUE;
+            }
+
+            if ( (byte == 0x01) && (zeroCount >= 2 ) )
+            {
+                pStrmData->strmBuffSize =
+                    byteCount - initByteCount - zeroCount - 1;
+                zeroCount -= MIN(zeroCount, 3);
+                break;
+            }
+            else if (byte)
+            {
+                if (zeroCount >= 3)
+                    invalidStream = HANTRO_TRUE;
+                zeroCount = 0;
+            }
+
+            if (byteCount == len)
+            {
+                pStrmData->strmBuffSize = byteCount - initByteCount - zeroCount;
+                break;
+            }
+
+        }
+    }
+    /* separate NAL units as input -> just set stream params */
+    else
+    {
+        initByteCount = 0;
+        zeroCount = 0;
+        pStrmData->strmBuffSize = len;
+        hasEmulation = HANTRO_TRUE;
+    }
+
+    pStrmData->pStrmBuffStart    = pByteStream + initByteCount;
+    pStrmData->pStrmCurrPos      = pStrmData->pStrmBuffStart;
+    pStrmData->bitPosInWord      = 0;
+    pStrmData->strmBuffReadBits  = 0;
+
+    /* return number of bytes "consumed" */
+    *readBytes = pStrmData->strmBuffSize + initByteCount + zeroCount;
+
+    if (invalidStream)
+    {
+        return(HANTRO_NOK);
+    }
+
+    /* remove emulation prevention bytes before rbsp processing */
+    if (hasEmulation)
+    {
+        tmp = pStrmData->strmBuffSize;
+        readPtr = writePtr = pStrmData->pStrmBuffStart;
+        zeroCount = 0;
+        for (i = tmp; i--;)
+        {
+            if ((zeroCount == 2) && (*readPtr == 0x03))
+            {
+                /* emulation prevention byte shall be followed by one of the
+                 * following bytes: 0x00, 0x01, 0x02, 0x03. This implies that
+                 * emulation prevention 0x03 byte shall not be the last byte
+                 * of the stream. */
+                if ( (i == 0) || (*(readPtr+1) > 0x03) )
+                    return(HANTRO_NOK);
+
+                /* do not write emulation prevention byte */
+                readPtr++;
+                zeroCount = 0;
+            }
+            else
+            {
+                /* NAL unit shall not contain byte sequences 0x000000,
+                 * 0x000001 or 0x000002 */
+                if ( (zeroCount == 2) && (*readPtr <= 0x02) )
+                    return(HANTRO_NOK);
+
+                if (*readPtr == 0)
+                    zeroCount++;
+                else
+                    zeroCount = 0;
+
+                *writePtr++ = *readPtr++;
+            }
+        }
+
+        /* (readPtr - writePtr) indicates number of "removed" emulation
+         * prevention bytes -> subtract from stream buffer size */
+        pStrmData->strmBuffSize -= (u32)(readPtr - writePtr);
+    }
+
+    return(HANTRO_OK);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.h
new file mode 100755
index 0000000..36aec76
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_BYTE_STREAM_H
+#define H264SWDEC_BYTE_STREAM_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdExtractNalUnit(u8 *pByteStream, u32 len, strmData_t *pStrmData,
+    u32 *readBytes);
+
+#endif /* #ifdef H264SWDEC_BYTE_STREAM_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.c
new file mode 100755
index 0000000..91d78bd
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.c
@@ -0,0 +1,916 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          DecodeCoeffToken
+          DecodeLevelPrefix
+          DecodeTotalZeros
+          DecodeRunBefore
+          DecodeResidualBlockCavlc
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_cavlc.h"
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/* Following descriptions use term "information field" to represent combination
+ * of certain decoded symbol value and the length of the corresponding variable
+ * length code word. For example, total_zeros information field consists of
+ * 4 bits symbol value (bits [4,7]) along with four bits to represent length
+ * of the VLC code word (bits [0,3]) */
+
+/* macro to obtain length of the coeff token information field, bits [0,4]  */
+#define LENGTH_TC(vlc) ((vlc) & 0x1F)
+/* macro to obtain length of the other information fields, bits [0,3] */
+#define LENGTH(vlc) ((vlc) & 0xF)
+/* macro to obtain code word from the information fields, bits [4,7] */
+#define INFO(vlc) (((vlc) >> 4) & 0xF)  /* 4 MSB bits contain information */
+/* macro to obtain trailing ones from the coeff token information word,
+ * bits [5,10] */
+#define TRAILING_ONES(coeffToken) ((coeffToken>>5) & 0x3F)
+/* macro to obtain total coeff from the coeff token information word,
+ * bits [11,15] */
+#define TOTAL_COEFF(coeffToken) (((coeffToken) >> 11) & 0x1F)
+
+#define VLC_NOT_FOUND 0xFFFFFFFEU
+
+/* VLC tables for coeff_token. Because of long codes (max. 16 bits) some of the
+ * tables have been splitted into multiple separate tables. Each array/table
+ * element has the following structure:
+ * [5 bits for tot.coeff.] [6 bits for tr.ones] [5 bits for VLC length]
+ * If there is a 0x0000 value, it means that there is not corresponding VLC
+ * codeword for that index. */
+
+/* VLC lengths up to 6 bits, 0 <= nC < 2 */
+static const u16 coeffToken0_0[32] = {
+    0x0000,0x0000,0x0000,0x2066,0x1026,0x0806,0x1865,0x1865,
+    0x1043,0x1043,0x1043,0x1043,0x1043,0x1043,0x1043,0x1043,
+    0x0822,0x0822,0x0822,0x0822,0x0822,0x0822,0x0822,0x0822,
+    0x0822,0x0822,0x0822,0x0822,0x0822,0x0822,0x0822,0x0822};
+
+/* VLC lengths up to 10 bits, 0 <= nC < 2 */
+static const u16 coeffToken0_1[48] = {
+    0x0000,0x0000,0x0000,0x0000,0x406a,0x304a,0x282a,0x200a,
+    0x3869,0x3869,0x2849,0x2849,0x2029,0x2029,0x1809,0x1809,
+    0x3068,0x3068,0x3068,0x3068,0x2048,0x2048,0x2048,0x2048,
+    0x1828,0x1828,0x1828,0x1828,0x1008,0x1008,0x1008,0x1008,
+    0x2867,0x2867,0x2867,0x2867,0x2867,0x2867,0x2867,0x2867,
+    0x1847,0x1847,0x1847,0x1847,0x1847,0x1847,0x1847,0x1847};
+
+/* VLC lengths up to 14 bits, 0 <= nC < 2 */
+static const u16 coeffToken0_2[56] = {
+    0x606e,0x584e,0x502e,0x500e,0x586e,0x504e,0x482e,0x480e,
+    0x400d,0x400d,0x484d,0x484d,0x402d,0x402d,0x380d,0x380d,
+    0x506d,0x506d,0x404d,0x404d,0x382d,0x382d,0x300d,0x300d,
+    0x486b,0x486b,0x486b,0x486b,0x486b,0x486b,0x486b,0x486b,
+    0x384b,0x384b,0x384b,0x384b,0x384b,0x384b,0x384b,0x384b,
+    0x302b,0x302b,0x302b,0x302b,0x302b,0x302b,0x302b,0x302b,
+    0x280b,0x280b,0x280b,0x280b,0x280b,0x280b,0x280b,0x280b};
+
+/* VLC lengths up to 16 bits, 0 <= nC < 2 */
+static const u16 coeffToken0_3[32] = {
+    0x0000,0x0000,0x682f,0x682f,0x8010,0x8050,0x8030,0x7810,
+    0x8070,0x7850,0x7830,0x7010,0x7870,0x7050,0x7030,0x6810,
+    0x706f,0x706f,0x684f,0x684f,0x602f,0x602f,0x600f,0x600f,
+    0x686f,0x686f,0x604f,0x604f,0x582f,0x582f,0x580f,0x580f};
+
+/* VLC lengths up to 6 bits, 2 <= nC < 4 */
+static const u16 coeffToken2_0[32] = {
+    0x0000,0x0000,0x0000,0x0000,0x3866,0x2046,0x2026,0x1006,
+    0x3066,0x1846,0x1826,0x0806,0x2865,0x2865,0x1025,0x1025,
+    0x2064,0x2064,0x2064,0x2064,0x1864,0x1864,0x1864,0x1864,
+    0x1043,0x1043,0x1043,0x1043,0x1043,0x1043,0x1043,0x1043};
+
+/* VLC lengths up to 9 bits, 2 <= nC < 4 */
+static const u16 coeffToken2_1[32] = {
+    0x0000,0x0000,0x0000,0x0000,0x4869,0x3849,0x3829,0x3009,
+    0x2808,0x2808,0x3048,0x3048,0x3028,0x3028,0x2008,0x2008,
+    0x4067,0x4067,0x4067,0x4067,0x2847,0x2847,0x2847,0x2847,
+    0x2827,0x2827,0x2827,0x2827,0x1807,0x1807,0x1807,0x1807};
+
+/* VLC lengths up to 14 bits, 2 <= nC < 4 */
+static const u16 coeffToken2_2[128] = {
+    0x0000,0x0000,0x786d,0x786d,0x806e,0x804e,0x802e,0x800e,
+    0x782e,0x780e,0x784e,0x702e,0x704d,0x704d,0x700d,0x700d,
+    0x706d,0x706d,0x684d,0x684d,0x682d,0x682d,0x680d,0x680d,
+    0x686d,0x686d,0x604d,0x604d,0x602d,0x602d,0x600d,0x600d,
+    0x580c,0x580c,0x580c,0x580c,0x584c,0x584c,0x584c,0x584c,
+    0x582c,0x582c,0x582c,0x582c,0x500c,0x500c,0x500c,0x500c,
+    0x606c,0x606c,0x606c,0x606c,0x504c,0x504c,0x504c,0x504c,
+    0x502c,0x502c,0x502c,0x502c,0x480c,0x480c,0x480c,0x480c,
+    0x586b,0x586b,0x586b,0x586b,0x586b,0x586b,0x586b,0x586b,
+    0x484b,0x484b,0x484b,0x484b,0x484b,0x484b,0x484b,0x484b,
+    0x482b,0x482b,0x482b,0x482b,0x482b,0x482b,0x482b,0x482b,
+    0x400b,0x400b,0x400b,0x400b,0x400b,0x400b,0x400b,0x400b,
+    0x506b,0x506b,0x506b,0x506b,0x506b,0x506b,0x506b,0x506b,
+    0x404b,0x404b,0x404b,0x404b,0x404b,0x404b,0x404b,0x404b,
+    0x402b,0x402b,0x402b,0x402b,0x402b,0x402b,0x402b,0x402b,
+    0x380b,0x380b,0x380b,0x380b,0x380b,0x380b,0x380b,0x380b};
+
+/* VLC lengths up to 6 bits, 4 <= nC < 8 */
+static const u16 coeffToken4_0[64] = {
+    0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+    0x1806,0x3846,0x3826,0x1006,0x4866,0x3046,0x3026,0x0806,
+    0x2825,0x2825,0x2845,0x2845,0x2025,0x2025,0x2045,0x2045,
+    0x1825,0x1825,0x4065,0x4065,0x1845,0x1845,0x1025,0x1025,
+    0x3864,0x3864,0x3864,0x3864,0x3064,0x3064,0x3064,0x3064,
+    0x2864,0x2864,0x2864,0x2864,0x2064,0x2064,0x2064,0x2064,
+    0x1864,0x1864,0x1864,0x1864,0x1044,0x1044,0x1044,0x1044,
+    0x0824,0x0824,0x0824,0x0824,0x0004,0x0004,0x0004,0x0004};
+
+/* VLC lengths up to 10 bits, 4 <= nC < 8 */
+static const u16 coeffToken4_1[128] = {
+    0x0000,0x800a,0x806a,0x804a,0x802a,0x780a,0x786a,0x784a,
+    0x782a,0x700a,0x706a,0x704a,0x702a,0x680a,0x6829,0x6829,
+    0x6009,0x6009,0x6849,0x6849,0x6029,0x6029,0x5809,0x5809,
+    0x6869,0x6869,0x6049,0x6049,0x5829,0x5829,0x5009,0x5009,
+    0x6068,0x6068,0x6068,0x6068,0x5848,0x5848,0x5848,0x5848,
+    0x5028,0x5028,0x5028,0x5028,0x4808,0x4808,0x4808,0x4808,
+    0x5868,0x5868,0x5868,0x5868,0x5048,0x5048,0x5048,0x5048,
+    0x4828,0x4828,0x4828,0x4828,0x4008,0x4008,0x4008,0x4008,
+    0x3807,0x3807,0x3807,0x3807,0x3807,0x3807,0x3807,0x3807,
+    0x3007,0x3007,0x3007,0x3007,0x3007,0x3007,0x3007,0x3007,
+    0x4847,0x4847,0x4847,0x4847,0x4847,0x4847,0x4847,0x4847,
+    0x2807,0x2807,0x2807,0x2807,0x2807,0x2807,0x2807,0x2807,
+    0x5067,0x5067,0x5067,0x5067,0x5067,0x5067,0x5067,0x5067,
+    0x4047,0x4047,0x4047,0x4047,0x4047,0x4047,0x4047,0x4047,
+    0x4027,0x4027,0x4027,0x4027,0x4027,0x4027,0x4027,0x4027,
+    0x2007,0x2007,0x2007,0x2007,0x2007,0x2007,0x2007,0x2007};
+
+/* fixed 6 bit length VLC, nC <= 8 */
+static const u16 coeffToken8[64] = {
+    0x0806,0x0826,0x0000,0x0006,0x1006,0x1026,0x1046,0x0000,
+    0x1806,0x1826,0x1846,0x1866,0x2006,0x2026,0x2046,0x2066,
+    0x2806,0x2826,0x2846,0x2866,0x3006,0x3026,0x3046,0x3066,
+    0x3806,0x3826,0x3846,0x3866,0x4006,0x4026,0x4046,0x4066,
+    0x4806,0x4826,0x4846,0x4866,0x5006,0x5026,0x5046,0x5066,
+    0x5806,0x5826,0x5846,0x5866,0x6006,0x6026,0x6046,0x6066,
+    0x6806,0x6826,0x6846,0x6866,0x7006,0x7026,0x7046,0x7066,
+    0x7806,0x7826,0x7846,0x7866,0x8006,0x8026,0x8046,0x8066};
+
+/* VLC lengths up to 3 bits, nC == -1 */
+static const u16 coeffTokenMinus1_0[8] = {
+    0x0000,0x1043,0x0002,0x0002,0x0821,0x0821,0x0821,0x0821};
+
+/* VLC lengths up to 8 bits, nC == -1 */
+static const u16 coeffTokenMinus1_1[32] = {
+    0x2067,0x2067,0x2048,0x2028,0x1847,0x1847,0x1827,0x1827,
+    0x2006,0x2006,0x2006,0x2006,0x1806,0x1806,0x1806,0x1806,
+    0x1006,0x1006,0x1006,0x1006,0x1866,0x1866,0x1866,0x1866,
+    0x1026,0x1026,0x1026,0x1026,0x0806,0x0806,0x0806,0x0806};
+
+/* VLC tables for total_zeros. One table containing longer code, totalZeros_1,
+ * has been broken into two separate tables. Table elements have the
+ * following structure:
+ * [4 bits for info] [4 bits for VLC length] */
+
+/* VLC lengths up to 5 bits */
+static const u8 totalZeros_1_0[32] = {
+    0x00,0x00,0x65,0x55,0x44,0x44,0x34,0x34,
+    0x23,0x23,0x23,0x23,0x13,0x13,0x13,0x13,
+    0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
+    0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01};
+
+/* VLC lengths up to 9 bits */
+static const u8 totalZeros_1_1[32] = {
+    0x00,0xf9,0xe9,0xd9,0xc8,0xc8,0xb8,0xb8,
+    0xa7,0xa7,0xa7,0xa7,0x97,0x97,0x97,0x97,
+    0x86,0x86,0x86,0x86,0x86,0x86,0x86,0x86,
+    0x76,0x76,0x76,0x76,0x76,0x76,0x76,0x76};
+
+static const u8 totalZeros_2[64] = {
+    0xe6,0xd6,0xc6,0xb6,0xa5,0xa5,0x95,0x95,
+    0x84,0x84,0x84,0x84,0x74,0x74,0x74,0x74,
+    0x64,0x64,0x64,0x64,0x54,0x54,0x54,0x54,
+    0x43,0x43,0x43,0x43,0x43,0x43,0x43,0x43,
+    0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,
+    0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,
+    0x13,0x13,0x13,0x13,0x13,0x13,0x13,0x13,
+    0x03,0x03,0x03,0x03,0x03,0x03,0x03,0x03};
+
+static const u8 totalZeros_3[64] = {
+    0xd6,0xb6,0xc5,0xc5,0xa5,0xa5,0x95,0x95,
+    0x84,0x84,0x84,0x84,0x54,0x54,0x54,0x54,
+    0x44,0x44,0x44,0x44,0x04,0x04,0x04,0x04,
+    0x73,0x73,0x73,0x73,0x73,0x73,0x73,0x73,
+    0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,
+    0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,
+    0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,
+    0x13,0x13,0x13,0x13,0x13,0x13,0x13,0x13};
+
+static const u8 totalZeros_4[32] = {
+    0xc5,0xb5,0xa5,0x05,0x94,0x94,0x74,0x74,
+    0x34,0x34,0x24,0x24,0x83,0x83,0x83,0x83,
+    0x63,0x63,0x63,0x63,0x53,0x53,0x53,0x53,
+    0x43,0x43,0x43,0x43,0x13,0x13,0x13,0x13};
+
+static const u8 totalZeros_5[32] = {
+    0xb5,0x95,0xa4,0xa4,0x84,0x84,0x24,0x24,
+    0x14,0x14,0x04,0x04,0x73,0x73,0x73,0x73,
+    0x63,0x63,0x63,0x63,0x53,0x53,0x53,0x53,
+    0x43,0x43,0x43,0x43,0x33,0x33,0x33,0x33};
+
+static const u8 totalZeros_6[64] = {
+    0xa6,0x06,0x15,0x15,0x84,0x84,0x84,0x84,
+    0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
+    0x73,0x73,0x73,0x73,0x73,0x73,0x73,0x73,
+    0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,
+    0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53,
+    0x43,0x43,0x43,0x43,0x43,0x43,0x43,0x43,
+    0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,
+    0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23};
+
+static const u8 totalZeros_7[64] = {
+    0x96,0x06,0x15,0x15,0x74,0x74,0x74,0x74,
+    0x83,0x83,0x83,0x83,0x83,0x83,0x83,0x83,
+    0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,
+    0x43,0x43,0x43,0x43,0x43,0x43,0x43,0x43,
+    0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,
+    0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,
+    0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
+    0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52};
+
+static const u8 totalZeros_8[64] = {
+    0x86,0x06,0x25,0x25,0x14,0x14,0x14,0x14,
+    0x73,0x73,0x73,0x73,0x73,0x73,0x73,0x73,
+    0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,
+    0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,
+    0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
+    0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
+    0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42,
+    0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42};
+
+static const u8 totalZeros_9[64] = {
+    0x16,0x06,0x75,0x75,0x24,0x24,0x24,0x24,
+    0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53,
+    0x62,0x62,0x62,0x62,0x62,0x62,0x62,0x62,
+    0x62,0x62,0x62,0x62,0x62,0x62,0x62,0x62,
+    0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42,
+    0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42,
+    0x32,0x32,0x32,0x32,0x32,0x32,0x32,0x32,
+    0x32,0x32,0x32,0x32,0x32,0x32,0x32,0x32};
+
+static const u8 totalZeros_10[32] = {
+    0x15,0x05,0x64,0x64,0x23,0x23,0x23,0x23,
+    0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
+    0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42,
+    0x32,0x32,0x32,0x32,0x32,0x32,0x32,0x32};
+
+static const u8 totalZeros_11[16] = {
+    0x04,0x14,0x23,0x23,0x33,0x33,0x53,0x53,
+    0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41};
+
+static const u8 totalZeros_12[16] = {
+    0x04,0x14,0x43,0x43,0x22,0x22,0x22,0x22,
+    0x31,0x31,0x31,0x31,0x31,0x31,0x31,0x31};
+
+static const u8 totalZeros_13[8] = {0x03,0x13,0x32,0x32,0x21,0x21,0x21,0x21};
+
+static const u8 totalZeros_14[4] = {0x02,0x12,0x21,0x21};
+
+/* VLC tables for run_before. Table elements have the following structure:
+ * [4 bits for info] [4bits for VLC length]
+ */
+
+static const u8 runBefore_6[8] = {0x13,0x23,0x43,0x33,0x63,0x53,0x02,0x02};
+
+static const u8 runBefore_5[8] = {0x53,0x43,0x33,0x23,0x12,0x12,0x02,0x02};
+
+static const u8 runBefore_4[8] = {0x43,0x33,0x22,0x22,0x12,0x12,0x02,0x02};
+
+static const u8 runBefore_3[4] = {0x32,0x22,0x12,0x02};
+
+static const u8 runBefore_2[4] = {0x22,0x12,0x01,0x01};
+
+static const u8 runBefore_1[2] = {0x11,0x01};
+
+/* following four macros are used to handle stream buffer "cache" in the CAVLC
+ * decoding function */
+
+/* macro to initialize stream buffer cache, fills the buffer (32 bits) */
+#define BUFFER_INIT(value, bits) \
+{ \
+    bits = 32; \
+    value = h264bsdShowBits32(pStrmData); \
+}
+
+/* macro to read numBits bits from the buffer, bits will be written to
+ * outVal. Refills the buffer if not enough bits left */
+#define BUFFER_SHOW(value, bits, outVal, numBits) \
+{ \
+    if (bits < (numBits)) \
+    { \
+        if(h264bsdFlushBits(pStrmData,32-bits) == END_OF_STREAM) \
+            return(HANTRO_NOK); \
+        value = h264bsdShowBits32(pStrmData); \
+        bits = 32; \
+    } \
+    (outVal) = value >> (32 - (numBits)); \
+}
+
+/* macro to flush numBits bits from the buffer */
+#define BUFFER_FLUSH(value, bits, numBits) \
+{ \
+    value <<= (numBits); \
+    bits -= (numBits); \
+}
+
+/* macro to read and flush  numBits bits from the buffer, bits will be written
+ * to outVal. Refills the buffer if not enough bits left */
+#define BUFFER_GET(value, bits, outVal, numBits) \
+{ \
+    if (bits < (numBits)) \
+    { \
+        if(h264bsdFlushBits(pStrmData,32-bits) == END_OF_STREAM) \
+            return(HANTRO_NOK); \
+        value = h264bsdShowBits32(pStrmData); \
+        bits = 32; \
+    } \
+    (outVal) = value >> (32 - (numBits)); \
+    value <<= (numBits); \
+    bits -= (numBits); \
+}
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 DecodeCoeffToken(u32 bits, u32 nc);
+
+static u32 DecodeLevelPrefix(u32 bits);
+
+static u32 DecodeTotalZeros(u32 bits, u32 totalCoeff, u32 isChromaDC);
+
+static u32 DecodeRunBefore(u32 bits,u32 zerosLeft);
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeCoeffToken
+
+        Functional description:
+          Function to decode coeff_token information field from the stream.
+
+        Inputs:
+          u32 bits                  next 16 stream bits
+          u32 nc                    nC, see standard for details
+
+        Outputs:
+          u32  information field (11 bits for value, 5 bits for length)
+
+------------------------------------------------------------------------------*/
+
+u32 DecodeCoeffToken(u32 bits, u32 nc)
+{
+
+/* Variables */
+
+    u32 value;
+
+/* Code */
+
+    /* standard defines that nc for decoding of chroma dc coefficients is -1,
+     * represented by u32 here -> -1 maps to 2^32 - 1 */
+    ASSERT(nc <= 16 || nc == (u32)(-1));
+
+    if (nc < 2)
+    {
+        if (bits >= 0x8000)
+        {
+            value = 0x0001;
+        }
+        else if (bits >= 0x0C00)
+            value = coeffToken0_0[bits >> 10];
+        else if (bits >= 0x0100)
+            value = coeffToken0_1[bits >> 6];
+        else if (bits >= 0x0020)
+            value = coeffToken0_2[(bits>>2)-8];
+        else
+            value = coeffToken0_3[bits];
+    }
+    else if (nc < 4)
+    {
+        if (bits >= 0x8000)
+        {
+            value = bits & 0x4000 ? 0x0002 : 0x0822;
+        }
+        else if (bits >= 0x1000)
+            value = coeffToken2_0[bits >> 10];
+        else if (bits >= 0x0200)
+            value = coeffToken2_1[bits >> 7];
+        else
+            value = coeffToken2_2[bits>>2];
+    }
+    else if (nc < 8)
+    {
+        value = coeffToken4_0[bits >> 10];
+        if (!value)
+            value = coeffToken4_1[bits>>6];
+    }
+    else if (nc <= 16)
+    {
+        value = coeffToken8[bits>>10];
+    }
+    else
+    {
+        value = coeffTokenMinus1_0[bits >> 13];
+        if (!value)
+            value = coeffTokenMinus1_1[bits>>8];
+    }
+
+    return(value);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeLevelPrefix
+
+        Functional description:
+          Function to decode level_prefix information field from the stream
+
+        Inputs:
+          u32 bits      next 16 stream bits
+
+        Outputs:
+          u32  level_prefix information field or VLC_NOT_FOUND
+
+------------------------------------------------------------------------------*/
+
+u32 DecodeLevelPrefix(u32 bits)
+{
+
+/* Variables */
+
+    u32 numZeros;
+
+/* Code */
+
+    if (bits >= 0x8000)
+        numZeros = 0;
+    else if (bits >= 0x4000)
+        numZeros = 1;
+    else if (bits >= 0x2000)
+        numZeros = 2;
+    else if (bits >= 0x1000)
+        numZeros = 3;
+    else if (bits >= 0x0800)
+        numZeros = 4;
+    else if (bits >= 0x0400)
+        numZeros = 5;
+    else if (bits >= 0x0200)
+        numZeros = 6;
+    else if (bits >= 0x0100)
+        numZeros = 7;
+    else if (bits >= 0x0080)
+        numZeros = 8;
+    else if (bits >= 0x0040)
+        numZeros = 9;
+    else if (bits >= 0x0020)
+        numZeros = 10;
+    else if (bits >= 0x0010)
+        numZeros = 11;
+    else if (bits >= 0x0008)
+        numZeros = 12;
+    else if (bits >= 0x0004)
+        numZeros = 13;
+    else if (bits >= 0x0002)
+        numZeros = 14;
+    else if (bits >= 0x0001)
+        numZeros = 15;
+    else /* more than 15 zeros encountered which is an error */
+        return(VLC_NOT_FOUND);
+
+    return(numZeros);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeTotalZeros
+
+        Functional description:
+          Function to decode total_zeros information field from the stream
+
+        Inputs:
+          u32 bits                  next 9 stream bits
+          u32 totalCoeff            total number of coefficients for the block
+                                    being decoded
+          u32 isChromaDC           flag to indicate chroma DC block
+
+        Outputs:
+          u32  information field (4 bits value, 4 bits length)
+
+------------------------------------------------------------------------------*/
+
+u32 DecodeTotalZeros(u32 bits, u32 totalCoeff, u32 isChromaDC)
+{
+
+/* Variables */
+
+    u32 value = 0x0;
+
+/* Code */
+
+    ASSERT(totalCoeff);
+
+    if (!isChromaDC)
+    {
+        ASSERT(totalCoeff < 16);
+        switch (totalCoeff)
+        {
+            case 1:
+                value = totalZeros_1_0[bits >> 4];
+                if (!value)
+                    value = totalZeros_1_1[bits];
+                break;
+
+            case 2:
+                value = totalZeros_2[bits >> 3];
+                break;
+
+            case 3:
+                value = totalZeros_3[bits >> 3];
+                break;
+
+            case 4:
+                value = totalZeros_4[bits >> 4];
+                break;
+
+            case 5:
+                value = totalZeros_5[bits >> 4];
+                break;
+
+            case 6:
+                value = totalZeros_6[bits >> 3];
+                break;
+
+            case 7:
+                value = totalZeros_7[bits >> 3];
+                break;
+
+            case 8:
+                value = totalZeros_8[bits >> 3];
+                break;
+
+            case 9:
+                value = totalZeros_9[bits >> 3];
+                break;
+
+            case 10:
+                value = totalZeros_10[bits >> 4];
+                break;
+
+            case 11:
+                value = totalZeros_11[bits >> 5];
+                break;
+
+            case 12:
+                value = totalZeros_12[bits >> 5];
+                break;
+
+            case 13:
+                value = totalZeros_13[bits >> 6];
+                break;
+
+            case 14:
+                value = totalZeros_14[bits >> 7];
+                break;
+
+            default: /* case 15 */
+                value = (bits >> 8) ? 0x11 : 0x01;
+                break;
+        }
+    }
+    else
+    {
+        ASSERT(totalCoeff < 4);
+        bits >>= 6;
+        if (bits > 3)
+            value = 0x01;
+        else
+        {
+            if (totalCoeff == 3)
+                value = 0x11;
+            else if (bits > 1)
+            {
+                value = 0x12;
+            }
+            else if (totalCoeff == 2)
+                value = 0x22;
+            else if (bits)
+                value = 0x23;
+            else
+                value = 0x33;
+        }
+    }
+
+    return(value);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeRunBefore
+
+        Functional description:
+          Function to decode run_before information field from the stream
+
+        Inputs:
+          u32 bits                  next 11 stream bits
+          u32 zerosLeft             number of zeros left for the current block
+
+        Outputs:
+          u32  information field (4 bits value, 4 bits length)
+
+------------------------------------------------------------------------------*/
+
+u32 DecodeRunBefore(u32 bits, u32 zerosLeft)
+{
+
+/* Variables */
+
+    u32 value = 0x0;
+
+/* Code */
+
+    switch (zerosLeft)
+    {
+        case 1:
+            value = runBefore_1[bits>>10];
+            break;
+
+        case 2:
+            value = runBefore_2[bits>>9];
+            break;
+
+        case 3:
+            value = runBefore_3[bits>>9];
+            break;
+
+        case 4:
+            value = runBefore_4[bits>>8];
+            break;
+
+        case 5:
+            value = runBefore_5[bits>>8];
+            break;
+
+        case 6:
+            value = runBefore_6[bits>>8];
+            break;
+
+        default:
+            if (bits >= 0x100)
+                value = ((7-(bits>>8))<<4)+0x3;
+            else if (bits >= 0x80)
+                value = 0x74;
+            else if (bits >= 0x40)
+                value = 0x85;
+            else if (bits >= 0x20)
+                value = 0x96;
+            else if (bits >= 0x10)
+                value = 0xa7;
+            else if (bits >= 0x8)
+                value = 0xb8;
+            else if (bits >= 0x4)
+                value = 0xc9;
+            else if (bits >= 0x2)
+                value = 0xdA;
+            else if (bits)
+                value = 0xeB;
+            if (INFO(value) > zerosLeft)
+                value = 0;
+            break;
+    }
+
+    return(value);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeResidualBlockCavlc
+
+        Functional description:
+          Function to decode one CAVLC coded block. This corresponds to
+          syntax elements residual_block_cavlc() in the standard.
+
+        Inputs:
+          pStrmData             pointer to stream data structure
+          nc                    nC value
+          maxNumCoeff           maximum number of residual coefficients
+
+        Outputs:
+          coeffLevel            stores decoded coefficient levels
+
+        Returns:
+          numCoeffs             on bits [4,11] if successful
+          coeffMap              on bits [16,31] if successful, this is bit map
+                                where each bit indicates if the corresponding
+                                coefficient was zero (0) or non-zero (1)
+          HANTRO_NOK            end of stream or error in stream
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeResidualBlockCavlc(
+  strmData_t *pStrmData,
+  i32 *coeffLevel,
+  i32 nc,
+  u32 maxNumCoeff)
+{
+
+/* Variables */
+
+    u32 i, tmp, totalCoeff, trailingOnes, suffixLength, levelPrefix;
+    u32 levelSuffix, zerosLeft, bit;
+    i32 level[16];
+    u32 run[16];
+    /* stream "cache" */
+    u32 bufferValue;
+    u32 bufferBits;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(coeffLevel);
+    ASSERT(nc > -2);
+    ASSERT(maxNumCoeff == 4 || maxNumCoeff == 15 || maxNumCoeff == 16);
+    ASSERT(VLC_NOT_FOUND != END_OF_STREAM);
+
+    /* assume that coeffLevel array has been "cleaned" by caller */
+
+    BUFFER_INIT(bufferValue, bufferBits);
+
+    /*lint -e774 disable lint warning on always false comparison */
+    BUFFER_SHOW(bufferValue, bufferBits, bit, 16);
+    /*lint +e774 */
+    tmp = DecodeCoeffToken(bit, (u32)nc);
+    if (!tmp)
+        return(HANTRO_NOK);
+    BUFFER_FLUSH(bufferValue, bufferBits, LENGTH_TC(tmp));
+
+    totalCoeff = TOTAL_COEFF(tmp);
+    if (totalCoeff > maxNumCoeff)
+        return(HANTRO_NOK);
+    trailingOnes = TRAILING_ONES(tmp);
+
+    if (totalCoeff != 0)
+    {
+        i = 0;
+        /* nonzero coefficients: +/- 1 */
+        if (trailingOnes)
+        {
+            BUFFER_GET(bufferValue, bufferBits, bit, trailingOnes);
+            tmp = 1 << (trailingOnes - 1);
+            for (; tmp; i++)
+            {
+                level[i] = bit & tmp ? -1 : 1;
+                tmp >>= 1;
+            }
+        }
+
+        /* other levels */
+        if (totalCoeff > 10 && trailingOnes < 3)
+            suffixLength = 1;
+        else
+            suffixLength = 0;
+
+        for (; i < totalCoeff; i++)
+        {
+            BUFFER_SHOW(bufferValue, bufferBits, bit, 16);
+            levelPrefix = DecodeLevelPrefix(bit);
+            if (levelPrefix == VLC_NOT_FOUND)
+                return(HANTRO_NOK);
+            BUFFER_FLUSH(bufferValue, bufferBits, levelPrefix+1);
+
+            if (levelPrefix < 14)
+                tmp = suffixLength;
+            else if (levelPrefix == 14)
+            {
+                tmp = suffixLength ? suffixLength : 4;
+            }
+            else
+            {
+                /* setting suffixLength to 1 here corresponds to adding 15
+                 * to levelCode value if levelPrefix == 15 and
+                 * suffixLength == 0 */
+                if (!suffixLength)
+                    suffixLength = 1;
+                tmp = 12;
+            }
+
+            if (suffixLength)
+                levelPrefix <<= suffixLength;
+
+            if (tmp)
+            {
+                BUFFER_GET(bufferValue, bufferBits, levelSuffix, tmp);
+                levelPrefix += levelSuffix;
+            }
+
+            tmp = levelPrefix;
+
+            if (i == trailingOnes && trailingOnes < 3)
+                tmp += 2;
+
+            level[i] = (tmp+2)>>1;
+
+            if (suffixLength == 0)
+                suffixLength = 1;
+
+            if ((level[i] > (3 << (suffixLength - 1))) && suffixLength < 6)
+                suffixLength++;
+
+            if (tmp & 0x1)
+                level[i] = -level[i];
+        }
+
+        /* zero runs */
+        if (totalCoeff < maxNumCoeff)
+        {
+            BUFFER_SHOW(bufferValue, bufferBits, bit,9);
+            zerosLeft = DecodeTotalZeros(bit, totalCoeff,
+                                        (u32)(maxNumCoeff == 4));
+            if (!zerosLeft)
+                return(HANTRO_NOK);
+            BUFFER_FLUSH(bufferValue, bufferBits, LENGTH(zerosLeft));
+            zerosLeft = INFO(zerosLeft);
+        }
+        else
+            zerosLeft = 0;
+
+        for (i = 0; i < totalCoeff - 1; i++)
+        {
+            if (zerosLeft > 0)
+            {
+                BUFFER_SHOW(bufferValue, bufferBits, bit,11);
+                tmp = DecodeRunBefore(bit, zerosLeft);
+                if (!tmp)
+                    return(HANTRO_NOK);
+                BUFFER_FLUSH(bufferValue, bufferBits, LENGTH(tmp));
+                run[i] = INFO(tmp);
+                zerosLeft -= run[i]++;
+            }
+            else
+            {
+                run[i] = 1;
+            }
+        }
+
+        /* combining level and run, levelSuffix variable used to hold coeffMap,
+         * i.e. bit map indicating which coefficients had non-zero value. */
+
+        /*lint -esym(771,level,run) level and run are always initialized */
+        tmp = zerosLeft;
+        coeffLevel[tmp] = level[totalCoeff-1];
+        levelSuffix = 1 << tmp;
+        for (i = totalCoeff-1; i--;)
+        {
+            tmp += run[i];
+            levelSuffix |= 1 << tmp;
+            coeffLevel[tmp] = level[i];
+        }
+
+    }
+    else
+        levelSuffix = 0;
+
+    if (h264bsdFlushBits(pStrmData, 32-bufferBits) != HANTRO_OK)
+        return(HANTRO_NOK);
+
+    return((totalCoeff << 4) | (levelSuffix << 16));
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.h
new file mode 100755
index 0000000..80353d39
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_CAVLC_H
+#define H264SWDEC_CAVLC_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeResidualBlockCavlc(
+  strmData_t *pStrmData,
+  i32 *coeffLevel,
+  i32 nc,
+  u32 maxNumCoeff);
+
+#endif /* #ifdef H264SWDEC_CAVLC_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cfg.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cfg.h
new file mode 100755
index 0000000..2baba5a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cfg.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_CFG_H
+#define H264SWDEC_CFG_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+#define MAX_NUM_REF_PICS 16
+#define MAX_NUM_SLICE_GROUPS 8
+#define MAX_NUM_SEQ_PARAM_SETS 32
+#define MAX_NUM_PIC_PARAM_SETS 256
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+#endif /* #ifdef H264SWDEC_CFG_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.c
new file mode 100755
index 0000000..493fb9e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.c
@@ -0,0 +1,626 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdConceal
+          ConcealMb
+          Transform
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_conceal.h"
+#include "h264bsd_util.h"
+#include "h264bsd_reconstruct.h"
+#include "h264bsd_dpb.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/*lint -e702 disable lint warning on right shift of signed quantity */
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 ConcealMb(mbStorage_t *pMb, image_t *currImage, u32 row, u32 col,
+    u32 sliceType, u8 *data);
+
+static void Transform(i32 *data);
+
+/*------------------------------------------------------------------------------
+
+    Function name: h264bsdConceal
+
+        Functional description:
+            Perform error concealment for a picture. Two types of concealment
+            is performed based on sliceType:
+                1) copy from previous picture for P-slices.
+                2) concealment from neighbour pixels for I-slices
+
+            I-type concealment is based on ideas presented by Jarno Tulkki.
+            The concealment algorithm determines frequency domain coefficients
+            from the neighbour pixels, applies integer transform (the same
+            transform used in the residual processing) and uses the results as
+            pixel values for concealed macroblocks. Transform produces 4x4
+            array and one pixel value has to be used for 4x4 luma blocks and
+            2x2 chroma blocks.
+
+            Similar concealment is performed for whole picture (the choise
+            of the type is based on last successfully decoded slice header of
+            the picture but it is handled by the calling function). It is
+            acknowledged that this may result in wrong type of concealment
+            when a picture contains both types of slices. However,
+            determination of slice type macroblock-by-macroblock cannot
+            be done due to the fact that it is impossible to know to which
+            slice each corrupted (not successfully decoded) macroblock
+            belongs.
+
+            The error concealment is started by searching the first propoerly
+            decoded macroblock and concealing the row containing the macroblock
+            in question. After that all macroblocks above the row in question
+            are concealed. Finally concealment of rows below is performed.
+            The order of concealment for 4x4 picture where macroblock 9 is the
+            first properly decoded one is as follows (properly decoded
+            macroblocks marked with 'x', numbers indicating the order of
+            concealment):
+
+               4  6  8 10
+               3  5  7  9
+               1  x  x  2
+              11 12 13 14
+
+            If all macroblocks of the picture are lost, the concealment is
+            copy of previous picture for P-type and setting the image to
+            constant gray (pixel value 128) for I-type.
+
+            Concealment sets quantization parameter of the concealed
+            macroblocks to value 40 and macroblock type to intra to enable
+            deblocking filter to smooth the edges of the concealed areas.
+
+        Inputs:
+            pStorage        pointer to storage structure
+            currImage       pointer to current image structure
+            sliceType       type of the slice
+
+        Outputs:
+            currImage       concealed macroblocks will be written here
+
+        Returns:
+            HANTRO_OK
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdConceal(storage_t *pStorage, image_t *currImage, u32 sliceType)
+{
+
+/* Variables */
+
+    u32 i, j;
+    u32 row, col;
+    u32 width, height;
+    u8 *refData;
+    mbStorage_t *mb;
+
+/* Code */
+
+    ASSERT(pStorage);
+    ASSERT(currImage);
+
+    DEBUG(("Concealing %s slice\n", IS_I_SLICE(sliceType) ?
+            "intra" : "inter"));
+
+    width = currImage->width;
+    height = currImage->height;
+    refData = NULL;
+    /* use reference picture with smallest available index */
+    if (IS_P_SLICE(sliceType) || (pStorage->intraConcealmentFlag != 0))
+    {
+        i = 0;
+        do
+        {
+            refData = h264bsdGetRefPicData(pStorage->dpb, i);
+            i++;
+            if (i >= 16)
+                break;
+        } while (refData == NULL);
+    }
+
+    i = row = col = 0;
+    /* find first properly decoded macroblock -> start point for concealment */
+    while (i < pStorage->picSizeInMbs && !pStorage->mb[i].decoded)
+    {
+        i++;
+        col++;
+        if (col == width)
+        {
+            row++;
+            col = 0;
+        }
+    }
+
+    /* whole picture lost -> copy previous or set grey */
+    if (i == pStorage->picSizeInMbs)
+    {
+        if ( (IS_I_SLICE(sliceType) && (pStorage->intraConcealmentFlag == 0)) ||
+             refData == NULL)
+            H264SwDecMemset(currImage->data, 128, width*height*384);
+        else
+            H264SwDecMemcpy(currImage->data, refData, width*height*384);
+
+        pStorage->numConcealedMbs = pStorage->picSizeInMbs;
+
+        /* no filtering if whole picture concealed */
+        for (i = 0; i < pStorage->picSizeInMbs; i++)
+            pStorage->mb[i].disableDeblockingFilterIdc = 1;
+
+        return(HANTRO_OK);
+    }
+
+    /* start from the row containing the first correct macroblock, conceal the
+     * row in question, all rows above that row and then continue downwards */
+    mb = pStorage->mb + row * width;
+    for (j = col; j--;)
+    {
+        ConcealMb(mb+j, currImage, row, j, sliceType, refData);
+        mb[j].decoded = 1;
+        pStorage->numConcealedMbs++;
+    }
+    for (j = col + 1; j < width; j++)
+    {
+        if (!mb[j].decoded)
+        {
+            ConcealMb(mb+j, currImage, row, j, sliceType, refData);
+            mb[j].decoded = 1;
+            pStorage->numConcealedMbs++;
+        }
+    }
+    /* if previous row(s) could not be concealed -> conceal them now */
+    if (row)
+    {
+        for (j = 0; j < width; j++)
+        {
+            i = row - 1;
+            mb = pStorage->mb + i*width + j;
+            do
+            {
+                ConcealMb(mb, currImage, i, j, sliceType, refData);
+                mb->decoded = 1;
+                pStorage->numConcealedMbs++;
+                mb -= width;
+            } while(i--);
+        }
+    }
+
+    /* process rows below the one containing the first correct macroblock */
+    for (i = row + 1; i < height; i++)
+    {
+        mb = pStorage->mb + i * width;
+
+        for (j = 0; j < width; j++)
+        {
+            if (!mb[j].decoded)
+            {
+                ConcealMb(mb+j, currImage, i, j, sliceType, refData);
+                mb[j].decoded = 1;
+                pStorage->numConcealedMbs++;
+            }
+        }
+    }
+
+    return(HANTRO_OK);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name: ConcealMb
+
+        Functional description:
+            Perform error concealment for one macroblock, location of the
+            macroblock in the picture indicated by row and col
+
+------------------------------------------------------------------------------*/
+
+u32 ConcealMb(mbStorage_t *pMb, image_t *currImage, u32 row, u32 col,
+    u32 sliceType, u8 *refData)
+{
+
+/* Variables */
+
+    u32 i, j, comp;
+    u32 hor, ver;
+    u32 mbNum;
+    u32 width, height;
+    u8 *mbPos;
+    u8 data[384];
+    u8 *pData;
+    i32 tmp;
+    i32 firstPhase[16];
+    i32 *pTmp;
+    /* neighbours above, below, left and right */
+    i32 a[4], b[4], l[4], r[4];
+    u32 A, B, L, R;
+#ifdef H264DEC_OMXDL
+    u8 fillBuff[32*21 + 15 + 32];
+    u8 *pFill;
+#endif
+/* Code */
+
+    ASSERT(pMb);
+    ASSERT(!pMb->decoded);
+    ASSERT(currImage);
+    ASSERT(col < currImage->width);
+    ASSERT(row < currImage->height);
+
+#ifdef H264DEC_OMXDL
+    pFill = ALIGN(fillBuff, 16);
+#endif
+    width = currImage->width;
+    height = currImage->height;
+    mbNum = row * width + col;
+
+    h264bsdSetCurrImageMbPointers(currImage, mbNum);
+
+    mbPos = currImage->data + row * 16 * width * 16 + col * 16;
+    A = B = L = R = HANTRO_FALSE;
+
+    /* set qpY to 40 to enable some filtering in deblocking (stetson value) */
+    pMb->qpY = 40;
+    pMb->disableDeblockingFilterIdc = 0;
+    /* mbType set to intra to perform filtering despite the values of other
+     * boundary strength determination fields */
+    pMb->mbType = I_4x4;
+    pMb->filterOffsetA = 0;
+    pMb->filterOffsetB = 0;
+    pMb->chromaQpIndexOffset = 0;
+
+    if (IS_I_SLICE(sliceType))
+        H264SwDecMemset(data, 0, sizeof(data));
+    else
+    {
+        mv_t mv = {0,0};
+        image_t refImage;
+        refImage.width = width;
+        refImage.height = height;
+        refImage.data = refData;
+        if (refImage.data)
+        {
+#ifndef H264DEC_OMXDL
+            h264bsdPredictSamples(data, &mv, &refImage, col*16, row*16,
+                0, 0, 16, 16);
+#else
+            h264bsdPredictSamples(data, &mv, &refImage,
+                    ((row*16) + ((col*16)<<16)),
+                    0x00001010, pFill);
+#endif
+            h264bsdWriteMacroblock(currImage, data);
+
+            return(HANTRO_OK);
+        }
+        else
+            H264SwDecMemset(data, 0, sizeof(data));
+    }
+
+    H264SwDecMemset(firstPhase, 0, sizeof(firstPhase));
+
+    /* counter for number of neighbours used */
+    j = 0;
+    hor = ver = 0;
+    if (row && (pMb-width)->decoded)
+    {
+        A = HANTRO_TRUE;
+        pData = mbPos - width*16;
+        a[0] = *pData++; a[0] += *pData++; a[0] += *pData++; a[0] += *pData++;
+        a[1] = *pData++; a[1] += *pData++; a[1] += *pData++; a[1] += *pData++;
+        a[2] = *pData++; a[2] += *pData++; a[2] += *pData++; a[2] += *pData++;
+        a[3] = *pData++; a[3] += *pData++; a[3] += *pData++; a[3] += *pData++;
+        j++;
+        hor++;
+        firstPhase[0] += a[0] + a[1] + a[2] + a[3];
+        firstPhase[1] += a[0] + a[1] - a[2] - a[3];
+    }
+    if ((row != height - 1) && (pMb+width)->decoded)
+    {
+        B = HANTRO_TRUE;
+        pData = mbPos + 16*width*16;
+        b[0] = *pData++; b[0] += *pData++; b[0] += *pData++; b[0] += *pData++;
+        b[1] = *pData++; b[1] += *pData++; b[1] += *pData++; b[1] += *pData++;
+        b[2] = *pData++; b[2] += *pData++; b[2] += *pData++; b[2] += *pData++;
+        b[3] = *pData++; b[3] += *pData++; b[3] += *pData++; b[3] += *pData++;
+        j++;
+        hor++;
+        firstPhase[0] += b[0] + b[1] + b[2] + b[3];
+        firstPhase[1] += b[0] + b[1] - b[2] - b[3];
+    }
+    if (col && (pMb-1)->decoded)
+    {
+        L = HANTRO_TRUE;
+        pData = mbPos - 1;
+        l[0] = pData[0]; l[0] += pData[16*width];
+        l[0] += pData[32*width]; l[0] += pData[48*width];
+        pData += 64*width;
+        l[1] = pData[0]; l[1] += pData[16*width];
+        l[1] += pData[32*width]; l[1] += pData[48*width];
+        pData += 64*width;
+        l[2] = pData[0]; l[2] += pData[16*width];
+        l[2] += pData[32*width]; l[2] += pData[48*width];
+        pData += 64*width;
+        l[3] = pData[0]; l[3] += pData[16*width];
+        l[3] += pData[32*width]; l[3] += pData[48*width];
+        j++;
+        ver++;
+        firstPhase[0] += l[0] + l[1] + l[2] + l[3];
+        firstPhase[4] += l[0] + l[1] - l[2] - l[3];
+    }
+    if ((col != width - 1) && (pMb+1)->decoded)
+    {
+        R = HANTRO_TRUE;
+        pData = mbPos + 16;
+        r[0] = pData[0]; r[0] += pData[16*width];
+        r[0] += pData[32*width]; r[0] += pData[48*width];
+        pData += 64*width;
+        r[1] = pData[0]; r[1] += pData[16*width];
+        r[1] += pData[32*width]; r[1] += pData[48*width];
+        pData += 64*width;
+        r[2] = pData[0]; r[2] += pData[16*width];
+        r[2] += pData[32*width]; r[2] += pData[48*width];
+        pData += 64*width;
+        r[3] = pData[0]; r[3] += pData[16*width];
+        r[3] += pData[32*width]; r[3] += pData[48*width];
+        j++;
+        ver++;
+        firstPhase[0] += r[0] + r[1] + r[2] + r[3];
+        firstPhase[4] += r[0] + r[1] - r[2] - r[3];
+    }
+
+    /* at least one properly decoded neighbour available */
+    ASSERT(j);
+
+    /*lint -esym(644,l,r,a,b) variable initialized above */
+    if (!hor && L && R)
+        firstPhase[1] = (l[0]+l[1]+l[2]+l[3]-r[0]-r[1]-r[2]-r[3]) >> 5;
+    else if (hor)
+        firstPhase[1] >>= (3+hor);
+
+    if (!ver && A && B)
+        firstPhase[4] = (a[0]+a[1]+a[2]+a[3]-b[0]-b[1]-b[2]-b[3]) >> 5;
+    else if (ver)
+        firstPhase[4] >>= (3+ver);
+
+    switch (j)
+    {
+        case 1:
+            firstPhase[0] >>= 4;
+            break;
+
+        case 2:
+            firstPhase[0] >>= 5;
+            break;
+
+        case 3:
+            /* approximate (firstPhase[0]*4/3)>>6 */
+            firstPhase[0] = (21 * firstPhase[0]) >> 10;
+            break;
+
+        default: /* 4 */
+            firstPhase[0] >>= 6;
+            break;
+
+    }
+
+
+    Transform(firstPhase);
+
+    for (i = 0, pData = data, pTmp = firstPhase; i < 256;)
+    {
+        tmp = pTmp[(i & 0xF)>>2];
+        /*lint -e734 CLIP1 macro results in value that fits into 8 bits */
+        *pData++ = CLIP1(tmp);
+        /*lint +e734 */
+
+        i++;
+        if (!(i & 0x3F))
+            pTmp += 4;
+    }
+
+    /* chroma components */
+    mbPos = currImage->data + width * height * 256 +
+       row * 8 * width * 8 + col * 8;
+    for (comp = 0; comp < 2; comp++)
+    {
+
+        H264SwDecMemset(firstPhase, 0, sizeof(firstPhase));
+
+        /* counter for number of neighbours used */
+        j = 0;
+        hor = ver = 0;
+        if (A)
+        {
+            pData = mbPos - width*8;
+            a[0] = *pData++; a[0] += *pData++;
+            a[1] = *pData++; a[1] += *pData++;
+            a[2] = *pData++; a[2] += *pData++;
+            a[3] = *pData++; a[3] += *pData++;
+            j++;
+            hor++;
+            firstPhase[0] += a[0] + a[1] + a[2] + a[3];
+            firstPhase[1] += a[0] + a[1] - a[2] - a[3];
+        }
+        if (B)
+        {
+            pData = mbPos + 8*width*8;
+            b[0] = *pData++; b[0] += *pData++;
+            b[1] = *pData++; b[1] += *pData++;
+            b[2] = *pData++; b[2] += *pData++;
+            b[3] = *pData++; b[3] += *pData++;
+            j++;
+            hor++;
+            firstPhase[0] += b[0] + b[1] + b[2] + b[3];
+            firstPhase[1] += b[0] + b[1] - b[2] - b[3];
+        }
+        if (L)
+        {
+            pData = mbPos - 1;
+            l[0] = pData[0]; l[0] += pData[8*width];
+            pData += 16*width;
+            l[1] = pData[0]; l[1] += pData[8*width];
+            pData += 16*width;
+            l[2] = pData[0]; l[2] += pData[8*width];
+            pData += 16*width;
+            l[3] = pData[0]; l[3] += pData[8*width];
+            j++;
+            ver++;
+            firstPhase[0] += l[0] + l[1] + l[2] + l[3];
+            firstPhase[4] += l[0] + l[1] - l[2] - l[3];
+        }
+        if (R)
+        {
+            pData = mbPos + 8;
+            r[0] = pData[0]; r[0] += pData[8*width];
+            pData += 16*width;
+            r[1] = pData[0]; r[1] += pData[8*width];
+            pData += 16*width;
+            r[2] = pData[0]; r[2] += pData[8*width];
+            pData += 16*width;
+            r[3] = pData[0]; r[3] += pData[8*width];
+            j++;
+            ver++;
+            firstPhase[0] += r[0] + r[1] + r[2] + r[3];
+            firstPhase[4] += r[0] + r[1] - r[2] - r[3];
+        }
+        if (!hor && L && R)
+            firstPhase[1] = (l[0]+l[1]+l[2]+l[3]-r[0]-r[1]-r[2]-r[3]) >> 4;
+        else if (hor)
+            firstPhase[1] >>= (2+hor);
+
+        if (!ver && A && B)
+            firstPhase[4] = (a[0]+a[1]+a[2]+a[3]-b[0]-b[1]-b[2]-b[3]) >> 4;
+        else if (ver)
+            firstPhase[4] >>= (2+ver);
+
+        switch (j)
+        {
+            case 1:
+                firstPhase[0] >>= 3;
+                break;
+
+            case 2:
+                firstPhase[0] >>= 4;
+                break;
+
+            case 3:
+                /* approximate (firstPhase[0]*4/3)>>5 */
+                firstPhase[0] = (21 * firstPhase[0]) >> 9;
+                break;
+
+            default: /* 4 */
+                firstPhase[0] >>= 5;
+                break;
+
+        }
+
+        Transform(firstPhase);
+
+        pData = data + 256 + comp*64;
+        for (i = 0, pTmp = firstPhase; i < 64;)
+        {
+            tmp = pTmp[(i & 0x7)>>1];
+            /*lint -e734 CLIP1 macro results in value that fits into 8 bits */
+            *pData++ = CLIP1(tmp);
+            /*lint +e734 */
+
+            i++;
+            if (!(i & 0xF))
+                pTmp += 4;
+        }
+
+        /* increment pointers for cr */
+        mbPos += width * height * 64;
+    }
+
+    h264bsdWriteMacroblock(currImage, data);
+
+    return(HANTRO_OK);
+
+}
+
+
+/*------------------------------------------------------------------------------
+
+    Function name: Transform
+
+        Functional description:
+            Simplified transform, assuming that only dc component and lowest
+            horizontal and lowest vertical component may be non-zero
+
+------------------------------------------------------------------------------*/
+
+void Transform(i32 *data)
+{
+
+    u32 col;
+    i32 tmp0, tmp1;
+
+    if (!data[1] && !data[4])
+    {
+        data[1]  = data[2]  = data[3]  = data[4]  = data[5]  =
+        data[6]  = data[7]  = data[8]  = data[9]  = data[10] =
+        data[11] = data[12] = data[13] = data[14] = data[15] = data[0];
+        return;
+    }
+    /* first horizontal transform for rows 0 and 1 */
+    tmp0 = data[0];
+    tmp1 = data[1];
+    data[0] = tmp0 + tmp1;
+    data[1] = tmp0 + (tmp1>>1);
+    data[2] = tmp0 - (tmp1>>1);
+    data[3] = tmp0 - tmp1;
+
+    tmp0 = data[4];
+    data[5] = tmp0;
+    data[6] = tmp0;
+    data[7] = tmp0;
+
+    /* then vertical transform */
+    for (col = 4; col--; data++)
+    {
+        tmp0 = data[0];
+        tmp1 = data[4];
+        data[0] = tmp0 + tmp1;
+        data[4] = tmp0 + (tmp1>>1);
+        data[8] = tmp0 - (tmp1>>1);
+        data[12] = tmp0 - tmp1;
+    }
+
+}
+/*lint +e702 */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.h
new file mode 100755
index 0000000..3134670
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_CONCEAL_H
+#define H264SWDEC_CONCEAL_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_storage.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdConceal(storage_t *pStorage, image_t *currImage, u32 sliceType);
+
+#endif /* #ifdef H264SWDEC_CONCEAL_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_container.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_container.h
new file mode 100755
index 0000000..99b74a0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_container.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_CONTAINER_H
+#define H264SWDEC_CONTAINER_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_storage.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+/* String length for tracing */
+#define H264DEC_TRACE_STR_LEN 100
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+typedef struct
+{
+    enum {
+        UNINITIALIZED,
+        INITIALIZED,
+        NEW_HEADERS
+    } decStat;
+
+    u32 picNumber;
+    storage_t storage;
+#ifdef H264DEC_TRACE
+    char str[H264DEC_TRACE_STR_LEN];
+#endif
+} decContainer_t;
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+#endif /* #ifdef H264SWDEC_DECCONTAINER_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.c
new file mode 100755
index 0000000..f8c1f76
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.c
@@ -0,0 +1,2417 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdFilterPicture
+          FilterVerLumaEdge
+          FilterHorLumaEdge
+          FilterHorLuma
+          FilterVerChromaEdge
+          FilterHorChromaEdge
+          FilterHorChroma
+          InnerBoundaryStrength
+          EdgeBoundaryStrength
+          GetBoundaryStrengths
+          IsSliceBoundaryOnLeft
+          IsSliceBoundaryOnTop
+          GetMbFilteringFlags
+          GetLumaEdgeThresholds
+          GetChromaEdgeThresholds
+          FilterLuma
+          FilterChroma
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_util.h"
+#include "h264bsd_macroblock_layer.h"
+#include "h264bsd_deblocking.h"
+#include "h264bsd_dpb.h"
+
+#ifdef H264DEC_OMXDL
+#include "omxtypes.h"
+#include "omxVC.h"
+#include "armVC.h"
+#endif /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/* Switch off the following Lint messages for this file:
+ * Info 701: Shift left of signed quantity (int)
+ * Info 702: Shift right of signed quantity (int)
+ */
+/*lint -e701 -e702 */
+
+/* array of alpha values, from the standard */
+static const u8 alphas[52] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,5,6,7,8,9,10,
+    12,13,15,17,20,22,25,28,32,36,40,45,50,56,63,71,80,90,101,113,127,144,162,
+    182,203,226,255,255};
+
+/* array of beta values, from the standard */
+static const u8 betas[52] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,3,3,3,3,4,4,
+    4,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18};
+
+
+
+#ifndef H264DEC_OMXDL
+/* array of tc0 values, from the standard, each triplet corresponds to a
+ * column in the table. Indexing goes as tc0[indexA][bS-1] */
+static const u8 tc0[52][3] = {
+    {0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
+    {0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
+    {0,0,0},{0,0,1},{0,0,1},{0,0,1},{0,0,1},{0,1,1},{0,1,1},{1,1,1},
+    {1,1,1},{1,1,1},{1,1,1},{1,1,2},{1,1,2},{1,1,2},{1,1,2},{1,2,3},
+    {1,2,3},{2,2,3},{2,2,4},{2,3,4},{2,3,4},{3,3,5},{3,4,6},{3,4,6},
+    {4,5,7},{4,5,8},{4,6,9},{5,7,10},{6,8,11},{6,8,13},{7,10,14},{8,11,16},
+    {9,12,18},{10,13,20},{11,15,23},{13,17,25}
+};
+#else
+/* array of tc0 values, from the standard, each triplet corresponds to a
+ * column in the table. Indexing goes as tc0[indexA][bS] */
+static const u8 tc0[52][5] = {
+    {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0},
+    {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0},
+    {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0},
+    {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0},
+    {0, 0, 0, 0, 0}, {0, 0, 0, 1, 0}, {0, 0, 0, 1, 0}, {0, 0, 0, 1, 0},
+    {0, 0, 0, 1, 0}, {0, 0, 1, 1, 0}, {0, 0, 1, 1, 0}, {0, 1, 1, 1, 0},
+    {0, 1, 1, 1, 0}, {0, 1, 1, 1, 0}, {0, 1, 1, 1, 0}, {0, 1, 1, 2, 0},
+    {0, 1, 1, 2, 0}, {0, 1, 1, 2, 0}, {0, 1, 1, 2, 0}, {0, 1, 2, 3, 0},
+    {0, 1, 2, 3, 0}, {0, 2, 2, 3, 0}, {0, 2, 2, 4, 0}, {0, 2, 3, 4, 0},
+    {0, 2, 3, 4, 0}, {0, 3, 3, 5, 0}, {0, 3, 4, 6, 0}, {0, 3, 4, 6, 0},
+    {0, 4, 5, 7, 0}, {0, 4, 5, 8, 0}, {0, 4, 6, 9, 0}, {0, 5, 7, 10, 0},
+    {0, 6, 8, 11, 0}, {0, 6, 8, 13, 0}, {0, 7, 10, 14, 0},
+    {0, 8, 11, 16, 0}, {0, 9, 12, 18, 0}, {0, 10, 13, 20, 0},
+    {0, 11, 15, 23, 0}, {0, 13, 17, 25, 0}
+};
+#endif
+
+
+#ifndef H264DEC_OMXDL
+/* mapping of raster scan block index to 4x4 block index */
+static const u32 mb4x4Index[16] =
+    {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
+
+typedef struct {
+    const u8 *tc0;
+    u32 alpha;
+    u32 beta;
+} edgeThreshold_t;
+
+typedef struct {
+    u32 top;
+    u32 left;
+} bS_t;
+
+enum { TOP = 0, LEFT = 1, INNER = 2 };
+#endif /* H264DEC_OMXDL */
+
+#define FILTER_LEFT_EDGE    0x04
+#define FILTER_TOP_EDGE     0x02
+#define FILTER_INNER_EDGE   0x01
+
+
+/* clipping table defined in intra_prediction.c */
+extern const u8 h264bsdClip[];
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 InnerBoundaryStrength(mbStorage_t *mb1, u32 i1, u32 i2);
+
+#ifndef H264DEC_OMXDL
+static u32 EdgeBoundaryStrength(mbStorage_t *mb1, mbStorage_t *mb2,
+    u32 i1, u32 i2);
+#else
+static u32 InnerBoundaryStrength2(mbStorage_t *mb1, u32 i1, u32 i2);
+static u32 EdgeBoundaryStrengthLeft(mbStorage_t *mb1, mbStorage_t *mb2);
+static u32 EdgeBoundaryStrengthTop(mbStorage_t *mb1, mbStorage_t *mb2);
+#endif
+
+static u32 IsSliceBoundaryOnLeft(mbStorage_t *mb);
+
+static u32 IsSliceBoundaryOnTop(mbStorage_t *mb);
+
+static u32 GetMbFilteringFlags(mbStorage_t *mb);
+
+#ifndef H264DEC_OMXDL
+
+static u32 GetBoundaryStrengths(mbStorage_t *mb, bS_t *bs, u32 flags);
+
+static void FilterLuma(u8 *data, bS_t *bS, edgeThreshold_t *thresholds,
+        u32 imageWidth);
+
+static void FilterChroma(u8 *cb, u8 *cr, bS_t *bS, edgeThreshold_t *thresholds,
+        u32 imageWidth);
+
+static void FilterVerLumaEdge( u8 *data, u32 bS, edgeThreshold_t *thresholds,
+        u32 imageWidth);
+static void FilterHorLumaEdge( u8 *data, u32 bS, edgeThreshold_t *thresholds,
+        i32 imageWidth);
+static void FilterHorLuma( u8 *data, u32 bS, edgeThreshold_t *thresholds,
+        i32 imageWidth);
+
+static void FilterVerChromaEdge( u8 *data, u32 bS, edgeThreshold_t *thresholds,
+  u32 imageWidth);
+static void FilterHorChromaEdge( u8 *data, u32 bS, edgeThreshold_t *thresholds,
+  i32 imageWidth);
+static void FilterHorChroma( u8 *data, u32 bS, edgeThreshold_t *thresholds,
+  i32 imageWidth);
+
+static void GetLumaEdgeThresholds(
+  edgeThreshold_t *thresholds,
+  mbStorage_t *mb,
+  u32 filteringFlags);
+
+static void GetChromaEdgeThresholds(
+  edgeThreshold_t *thresholds,
+  mbStorage_t *mb,
+  u32 filteringFlags,
+  i32 chromaQpIndexOffset);
+
+#else /* H264DEC_OMXDL */
+
+static u32 GetBoundaryStrengths(mbStorage_t *mb, u8 (*bs)[16], u32 flags);
+
+static void GetLumaEdgeThresholds(
+    mbStorage_t *mb,
+    u8 (*alpha)[2],
+    u8 (*beta)[2],
+    u8 (*threshold)[16],
+    u8 (*bs)[16],
+    u32 filteringFlags );
+
+static void GetChromaEdgeThresholds(
+    mbStorage_t *mb,
+    u8 (*alpha)[2],
+    u8 (*beta)[2],
+    u8 (*threshold)[8],
+    u8 (*bs)[16],
+    u32 filteringFlags,
+    i32 chromaQpIndexOffset);
+
+#endif /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+
+    Function: IsSliceBoundaryOnLeft
+
+        Functional description:
+            Function to determine if there is a slice boundary on the left side
+            of a macroblock.
+
+------------------------------------------------------------------------------*/
+u32 IsSliceBoundaryOnLeft(mbStorage_t *mb)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(mb && mb->mbA);
+
+    if (mb->sliceId != mb->mbA->sliceId)
+        return(HANTRO_TRUE);
+    else
+        return(HANTRO_FALSE);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: IsSliceBoundaryOnTop
+
+        Functional description:
+            Function to determine if there is a slice boundary above the
+            current macroblock.
+
+------------------------------------------------------------------------------*/
+u32 IsSliceBoundaryOnTop(mbStorage_t *mb)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(mb && mb->mbB);
+
+    if (mb->sliceId != mb->mbB->sliceId)
+        return(HANTRO_TRUE);
+    else
+        return(HANTRO_FALSE);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: GetMbFilteringFlags
+
+        Functional description:
+          Function to determine which edges of a macroblock has to be
+          filtered. Output is a bit-wise OR of FILTER_LEFT_EDGE,
+          FILTER_TOP_EDGE and FILTER_INNER_EDGE, depending on which edges
+          shall be filtered.
+
+------------------------------------------------------------------------------*/
+u32 GetMbFilteringFlags(mbStorage_t *mb)
+{
+
+/* Variables */
+
+    u32 flags = 0;
+
+/* Code */
+
+    ASSERT(mb);
+
+    /* nothing will be filtered if disableDeblockingFilterIdc == 1 */
+    if (mb->disableDeblockingFilterIdc != 1)
+    {
+        flags |= FILTER_INNER_EDGE;
+
+        /* filterLeftMbEdgeFlag, left mb is MB_A */
+        if (mb->mbA &&
+            ((mb->disableDeblockingFilterIdc != 2) ||
+             !IsSliceBoundaryOnLeft(mb)))
+            flags |= FILTER_LEFT_EDGE;
+
+        /* filterTopMbEdgeFlag */
+        if (mb->mbB &&
+            ((mb->disableDeblockingFilterIdc != 2) ||
+             !IsSliceBoundaryOnTop(mb)))
+            flags |= FILTER_TOP_EDGE;
+    }
+
+    return(flags);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: InnerBoundaryStrength
+
+        Functional description:
+            Function to calculate boundary strength value bs for an inner
+            edge of a macroblock. Macroblock type is checked before this is
+            called -> no intra mb condition here.
+
+------------------------------------------------------------------------------*/
+u32 InnerBoundaryStrength(mbStorage_t *mb1, u32 ind1, u32 ind2)
+{
+    i32 tmp1, tmp2;
+    i32 mv1, mv2, mv3, mv4;
+
+    tmp1 = mb1->totalCoeff[ind1];
+    tmp2 = mb1->totalCoeff[ind2];
+    mv1 = mb1->mv[ind1].hor;
+    mv2 = mb1->mv[ind2].hor;
+    mv3 = mb1->mv[ind1].ver;
+    mv4 = mb1->mv[ind2].ver;
+
+    if (tmp1 || tmp2)
+    {
+        return 2;
+    }
+    else if ( (ABS(mv1 - mv2) >= 4) || (ABS(mv3 - mv4) >= 4) ||
+              (mb1->refAddr[ind1 >> 2] != mb1->refAddr[ind2 >> 2]) )
+    {
+        return 1;
+    }
+    else
+        return 0;
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: InnerBoundaryStrength2
+
+        Functional description:
+            Function to calculate boundary strength value bs for an inner
+            edge of a macroblock. The function is the same as
+            InnerBoundaryStrength but without checking totalCoeff.
+
+------------------------------------------------------------------------------*/
+u32 InnerBoundaryStrength2(mbStorage_t *mb1, u32 ind1, u32 ind2)
+{
+    i32 tmp1, tmp2, tmp3, tmp4;
+
+    tmp1 = mb1->mv[ind1].hor;
+    tmp2 = mb1->mv[ind2].hor;
+    tmp3 = mb1->mv[ind1].ver;
+    tmp4 = mb1->mv[ind2].ver;
+
+    if ( (ABS(tmp1 - tmp2) >= 4) || (ABS(tmp3 - tmp4) >= 4) ||
+         (mb1->refAddr[ind1 >> 2] != mb1->refAddr[ind2 >> 2]))
+    {
+        return 1;
+    }
+    else
+        return 0;
+}
+#ifndef H264DEC_OMXDL
+/*------------------------------------------------------------------------------
+
+    Function: EdgeBoundaryStrength
+
+        Functional description:
+            Function to calculate boundary strength value bs for left- or
+            top-most edge of a macroblock. Macroblock types are checked
+            before this is called -> no intra mb conditions here.
+
+------------------------------------------------------------------------------*/
+u32 EdgeBoundaryStrength(mbStorage_t *mb1, mbStorage_t *mb2,
+    u32 ind1, u32 ind2)
+{
+
+    if (mb1->totalCoeff[ind1] || mb2->totalCoeff[ind2])
+    {
+        return 2;
+    }
+    else if ((mb1->refAddr[ind1 >> 2] != mb2->refAddr[ind2 >> 2]) ||
+             (ABS(mb1->mv[ind1].hor - mb2->mv[ind2].hor) >= 4) ||
+             (ABS(mb1->mv[ind1].ver - mb2->mv[ind2].ver) >= 4))
+    {
+        return 1;
+    }
+    else
+        return 0;
+}
+
+#else /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+
+    Function: EdgeBoundaryStrengthTop
+
+        Functional description:
+            Function to calculate boundary strength value bs for
+            top-most edge of a macroblock. Macroblock types are checked
+            before this is called -> no intra mb conditions here.
+
+------------------------------------------------------------------------------*/
+u32 EdgeBoundaryStrengthTop(mbStorage_t *mb1, mbStorage_t *mb2)
+{
+    u32 topBs = 0;
+    u32 tmp1, tmp2, tmp3, tmp4;
+
+    tmp1 = mb1->totalCoeff[0];
+    tmp2 = mb2->totalCoeff[10];
+    tmp3 = mb1->totalCoeff[1];
+    tmp4 = mb2->totalCoeff[11];
+    if (tmp1 || tmp2)
+    {
+        topBs = 2<<0;
+    }
+    else if ((ABS(mb1->mv[0].hor - mb2->mv[10].hor) >= 4) ||
+             (ABS(mb1->mv[0].ver - mb2->mv[10].ver) >= 4) ||
+             (mb1->refAddr[0] != mb2->refAddr[10 >> 2]))
+    {
+        topBs = 1<<0;
+    }
+    tmp1 = mb1->totalCoeff[4];
+    tmp2 = mb2->totalCoeff[14];
+    if (tmp3 || tmp4)
+    {
+        topBs += 2<<8;
+    }
+    else if ((ABS(mb1->mv[1].hor - mb2->mv[11].hor) >= 4) ||
+             (ABS(mb1->mv[1].ver - mb2->mv[11].ver) >= 4) ||
+             (mb1->refAddr[0] != mb2->refAddr[11 >> 2]))
+    {
+        topBs += 1<<8;
+    }
+    tmp3 = mb1->totalCoeff[5];
+    tmp4 = mb2->totalCoeff[15];
+    if (tmp1 || tmp2)
+    {
+        topBs += 2<<16;
+    }
+    else if ((ABS(mb1->mv[4].hor - mb2->mv[14].hor) >= 4) ||
+             (ABS(mb1->mv[4].ver - mb2->mv[14].ver) >= 4) ||
+             (mb1->refAddr[4 >> 2] != mb2->refAddr[14 >> 2]))
+    {
+        topBs += 1<<16;
+    }
+    if (tmp3 || tmp4)
+    {
+        topBs += 2<<24;
+    }
+    else if ((ABS(mb1->mv[5].hor - mb2->mv[15].hor) >= 4) ||
+             (ABS(mb1->mv[5].ver - mb2->mv[15].ver) >= 4) ||
+             (mb1->refAddr[5 >> 2] != mb2->refAddr[15 >> 2]))
+    {
+        topBs += 1<<24;
+    }
+
+    return topBs;
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: EdgeBoundaryStrengthLeft
+
+        Functional description:
+            Function to calculate boundary strength value bs for left-
+            edge of a macroblock. Macroblock types are checked
+            before this is called -> no intra mb conditions here.
+
+------------------------------------------------------------------------------*/
+u32 EdgeBoundaryStrengthLeft(mbStorage_t *mb1, mbStorage_t *mb2)
+{
+    u32 leftBs = 0;
+    u32 tmp1, tmp2, tmp3, tmp4;
+
+    tmp1 = mb1->totalCoeff[0];
+    tmp2 = mb2->totalCoeff[5];
+    tmp3 = mb1->totalCoeff[2];
+    tmp4 = mb2->totalCoeff[7];
+
+    if (tmp1 || tmp2)
+    {
+        leftBs = 2<<0;
+    }
+    else if ((ABS(mb1->mv[0].hor - mb2->mv[5].hor) >= 4) ||
+             (ABS(mb1->mv[0].ver - mb2->mv[5].ver) >= 4) ||
+             (mb1->refAddr[0] != mb2->refAddr[5 >> 2]))
+    {
+        leftBs = 1<<0;
+    }
+    tmp1 = mb1->totalCoeff[8];
+    tmp2 = mb2->totalCoeff[13];
+    if (tmp3 || tmp4)
+    {
+        leftBs += 2<<8;
+    }
+    else if ((ABS(mb1->mv[2].hor - mb2->mv[7].hor) >= 4) ||
+             (ABS(mb1->mv[2].ver - mb2->mv[7].ver) >= 4) ||
+             (mb1->refAddr[0] != mb2->refAddr[7 >> 2]))
+    {
+        leftBs += 1<<8;
+    }
+    tmp3 = mb1->totalCoeff[10];
+    tmp4 = mb2->totalCoeff[15];
+    if (tmp1 || tmp2)
+    {
+        leftBs += 2<<16;
+    }
+    else if ((ABS(mb1->mv[8].hor - mb2->mv[13].hor) >= 4) ||
+             (ABS(mb1->mv[8].ver - mb2->mv[13].ver) >= 4) ||
+             (mb1->refAddr[8 >> 2] != mb2->refAddr[13 >> 2]))
+    {
+        leftBs += 1<<16;
+    }
+    if (tmp3 || tmp4)
+    {
+        leftBs += 2<<24;
+    }
+    else if ((ABS(mb1->mv[10].hor - mb2->mv[15].hor) >= 4) ||
+             (ABS(mb1->mv[10].ver - mb2->mv[15].ver) >= 4) ||
+             (mb1->refAddr[10 >> 2] != mb2->refAddr[15 >> 2]))
+    {
+        leftBs += 1<<24;
+    }
+
+    return leftBs;
+}
+#endif /* H264DEC_OMXDL */
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdFilterPicture
+
+        Functional description:
+          Perform deblocking filtering for a picture. Filter does not copy
+          the original picture anywhere but filtering is performed directly
+          on the original image. Parameters controlling the filtering process
+          are computed based on information in macroblock structures of the
+          filtered macroblock, macroblock above and macroblock on the left of
+          the filtered one.
+
+        Inputs:
+          image         pointer to image to be filtered
+          mb            pointer to macroblock data structure of the top-left
+                        macroblock of the picture
+
+        Outputs:
+          image         filtered image stored here
+
+        Returns:
+          none
+
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_OMXDL
+void h264bsdFilterPicture(
+  image_t *image,
+  mbStorage_t *mb)
+{
+
+/* Variables */
+
+    u32 flags;
+    u32 picSizeInMbs, mbRow, mbCol;
+    u32 picWidthInMbs;
+    u8 *data;
+    mbStorage_t *pMb;
+    bS_t bS[16];
+    edgeThreshold_t thresholds[3];
+
+/* Code */
+
+    ASSERT(image);
+    ASSERT(mb);
+    ASSERT(image->data);
+    ASSERT(image->width);
+    ASSERT(image->height);
+
+    picWidthInMbs = image->width;
+    data = image->data;
+    picSizeInMbs = picWidthInMbs * image->height;
+
+    pMb = mb;
+
+    for (mbRow = 0, mbCol = 0; mbRow < image->height; pMb++)
+    {
+        flags = GetMbFilteringFlags(pMb);
+
+        if (flags)
+        {
+            /* GetBoundaryStrengths function returns non-zero value if any of
+             * the bS values for the macroblock being processed was non-zero */
+            if (GetBoundaryStrengths(pMb, bS, flags))
+            {
+                /* luma */
+                GetLumaEdgeThresholds(thresholds, pMb, flags);
+                data = image->data + mbRow * picWidthInMbs * 256 + mbCol * 16;
+
+                FilterLuma((u8*)data, bS, thresholds, picWidthInMbs*16);
+
+                /* chroma */
+                GetChromaEdgeThresholds(thresholds, pMb, flags,
+                    pMb->chromaQpIndexOffset);
+                data = image->data + picSizeInMbs * 256 +
+                    mbRow * picWidthInMbs * 64 + mbCol * 8;
+
+                FilterChroma((u8*)data, data + 64*picSizeInMbs, bS,
+                        thresholds, picWidthInMbs*8);
+
+            }
+        }
+
+        mbCol++;
+        if (mbCol == picWidthInMbs)
+        {
+            mbCol = 0;
+            mbRow++;
+        }
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: FilterVerLumaEdge
+
+        Functional description:
+            Filter one vertical 4-pixel luma edge.
+
+------------------------------------------------------------------------------*/
+void FilterVerLumaEdge(
+  u8 *data,
+  u32 bS,
+  edgeThreshold_t *thresholds,
+  u32 imageWidth)
+{
+
+/* Variables */
+
+    i32 delta, tc, tmp;
+    u32 i;
+    u8 p0, q0, p1, q1, p2, q2;
+    u32 tmpFlag;
+    const u8 *clp = h264bsdClip + 512;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(bS && bS <= 4);
+    ASSERT(thresholds);
+
+    if (bS < 4)
+    {
+        tc = thresholds->tc0[bS-1];
+        tmp = tc;
+        for (i = 4; i; i--, data += imageWidth)
+        {
+            p1 = data[-2]; p0 = data[-1];
+            q0 = data[0]; q1 = data[1];
+            if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+                 ((unsigned)ABS(p1-p0) < thresholds->beta)  &&
+                 ((unsigned)ABS(q1-q0) < thresholds->beta) )
+            {
+                p2 = data[-3];
+                q2 = data[2];
+
+                if ((unsigned)ABS(p2-p0) < thresholds->beta)
+                {
+                    data[-2] = (u8)(p1 + CLIP3(-tc,tc,
+                        (p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1));
+                    tmp++;
+                }
+
+                if ((unsigned)ABS(q2-q0) < thresholds->beta)
+                {
+                    data[1] = (u8)(q1 + CLIP3(-tc,tc,
+                        (q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1));
+                    tmp++;
+                }
+
+                delta = CLIP3(-tmp, tmp, ((((q0 - p0) << 2) +
+                          (p1 - q1) + 4) >> 3));
+
+                p0 = clp[p0 + delta];
+                q0 = clp[q0 - delta];
+                tmp = tc;
+                data[-1] = p0;
+                data[ 0] = q0;
+            }
+        }
+    }
+    else
+    {
+        for (i = 4; i; i--, data += imageWidth)
+        {
+            p1 = data[-2]; p0 = data[-1];
+            q0 = data[0]; q1 = data[1];
+            if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+                 ((unsigned)ABS(p1-p0) < thresholds->beta)  &&
+                 ((unsigned)ABS(q1-q0) < thresholds->beta) )
+            {
+                tmpFlag =
+                    ((unsigned)ABS(p0-q0) < ((thresholds->alpha >> 2) +2)) ?
+                        HANTRO_TRUE : HANTRO_FALSE;
+
+                p2 = data[-3];
+                q2 = data[2];
+
+                if (tmpFlag && (unsigned)ABS(p2-p0) < thresholds->beta)
+                {
+                    tmp = p1 + p0 + q0;
+                    data[-1] = (u8)((p2 + 2 * tmp + q1 + 4) >> 3);
+                    data[-2] = (u8)((p2 + tmp + 2) >> 2);
+                    data[-3] = (u8)((2 * data[-4] + 3 * p2 + tmp + 4) >> 3);
+                }
+                else
+                    data[-1] = (2 * p1 + p0 + q1 + 2) >> 2;
+
+                if (tmpFlag && (unsigned)ABS(q2-q0) < thresholds->beta)
+                {
+                    tmp = p0 + q0 + q1;
+                    data[0] = (u8)((p1 + 2 * tmp + q2 + 4) >> 3);
+                    data[1] = (u8)((tmp + q2 + 2) >> 2);
+                    data[2] = (u8)((2 * data[3] + 3 * q2 + tmp + 4) >> 3);
+                }
+                else
+                    data[0] = (u8)((2 * q1 + q0 + p1 + 2) >> 2);
+            }
+        }
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: FilterHorLumaEdge
+
+        Functional description:
+            Filter one horizontal 4-pixel luma edge
+
+------------------------------------------------------------------------------*/
+void FilterHorLumaEdge(
+  u8 *data,
+  u32 bS,
+  edgeThreshold_t *thresholds,
+  i32 imageWidth)
+{
+
+/* Variables */
+
+    i32 delta, tc, tmp;
+    u32 i;
+    u8 p0, q0, p1, q1, p2, q2;
+    const u8 *clp = h264bsdClip + 512;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(bS < 4);
+    ASSERT(thresholds);
+
+    tc = thresholds->tc0[bS-1];
+    tmp = tc;
+    for (i = 4; i; i--, data++)
+    {
+        p1 = data[-imageWidth*2]; p0 = data[-imageWidth];
+        q0 = data[0]; q1 = data[imageWidth];
+        if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+             ((unsigned)ABS(p1-p0) < thresholds->beta)  &&
+             ((unsigned)ABS(q1-q0) < thresholds->beta) )
+        {
+            p2 = data[-imageWidth*3];
+
+            if ((unsigned)ABS(p2-p0) < thresholds->beta)
+            {
+                data[-imageWidth*2] = (u8)(p1 + CLIP3(-tc,tc,
+                    (p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1));
+                tmp++;
+            }
+
+            q2 = data[imageWidth*2];
+
+            if ((unsigned)ABS(q2-q0) < thresholds->beta)
+            {
+                data[imageWidth] = (u8)(q1 + CLIP3(-tc,tc,
+                    (q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1));
+                tmp++;
+            }
+
+            delta = CLIP3(-tmp, tmp, ((((q0 - p0) << 2) +
+                      (p1 - q1) + 4) >> 3));
+
+            p0 = clp[p0 + delta];
+            q0 = clp[q0 - delta];
+            tmp = tc;
+            data[-imageWidth] = p0;
+            data[  0] = q0;
+        }
+    }
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: FilterHorLuma
+
+        Functional description:
+            Filter all four successive horizontal 4-pixel luma edges. This can
+            be done when bS is equal to all four edges.
+
+------------------------------------------------------------------------------*/
+void FilterHorLuma(
+  u8 *data,
+  u32 bS,
+  edgeThreshold_t *thresholds,
+  i32 imageWidth)
+{
+
+/* Variables */
+
+    i32 delta, tc, tmp;
+    u32 i;
+    u8 p0, q0, p1, q1, p2, q2;
+    u32 tmpFlag;
+    const u8 *clp = h264bsdClip + 512;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(bS <= 4);
+    ASSERT(thresholds);
+
+    if (bS < 4)
+    {
+        tc = thresholds->tc0[bS-1];
+        tmp = tc;
+        for (i = 16; i; i--, data++)
+        {
+            p1 = data[-imageWidth*2]; p0 = data[-imageWidth];
+            q0 = data[0]; q1 = data[imageWidth];
+            if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+                 ((unsigned)ABS(p1-p0) < thresholds->beta)  &&
+                 ((unsigned)ABS(q1-q0) < thresholds->beta) )
+            {
+                p2 = data[-imageWidth*3];
+
+                if ((unsigned)ABS(p2-p0) < thresholds->beta)
+                {
+                    data[-imageWidth*2] = (u8)(p1 + CLIP3(-tc,tc,
+                        (p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1));
+                    tmp++;
+                }
+
+                q2 = data[imageWidth*2];
+
+                if ((unsigned)ABS(q2-q0) < thresholds->beta)
+                {
+                    data[imageWidth] = (u8)(q1 + CLIP3(-tc,tc,
+                        (q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1));
+                    tmp++;
+                }
+
+                delta = CLIP3(-tmp, tmp, ((((q0 - p0) << 2) +
+                          (p1 - q1) + 4) >> 3));
+
+                p0 = clp[p0 + delta];
+                q0 = clp[q0 - delta];
+                tmp = tc;
+                data[-imageWidth] = p0;
+                data[  0] = q0;
+            }
+        }
+    }
+    else
+    {
+        for (i = 16; i; i--, data++)
+        {
+            p1 = data[-imageWidth*2]; p0 = data[-imageWidth];
+            q0 = data[0]; q1 = data[imageWidth];
+            if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+                 ((unsigned)ABS(p1-p0) < thresholds->beta)  &&
+                 ((unsigned)ABS(q1-q0) < thresholds->beta) )
+            {
+                tmpFlag = ((unsigned)ABS(p0-q0) < ((thresholds->alpha >> 2) +2))
+                            ? HANTRO_TRUE : HANTRO_FALSE;
+
+                p2 = data[-imageWidth*3];
+                q2 = data[imageWidth*2];
+
+                if (tmpFlag && (unsigned)ABS(p2-p0) < thresholds->beta)
+                {
+                    tmp = p1 + p0 + q0;
+                    data[-imageWidth] = (u8)((p2 + 2 * tmp + q1 + 4) >> 3);
+                    data[-imageWidth*2] = (u8)((p2 + tmp + 2) >> 2);
+                    data[-imageWidth*3] = (u8)((2 * data[-imageWidth*4] +
+                                           3 * p2 + tmp + 4) >> 3);
+                }
+                else
+                    data[-imageWidth] = (u8)((2 * p1 + p0 + q1 + 2) >> 2);
+
+                if (tmpFlag && (unsigned)ABS(q2-q0) < thresholds->beta)
+                {
+                    tmp = p0 + q0 + q1;
+                    data[ 0] = (u8)((p1 + 2 * tmp + q2 + 4) >> 3);
+                    data[imageWidth] = (u8)((tmp + q2 + 2) >> 2);
+                    data[imageWidth*2] = (u8)((2 * data[imageWidth*3] +
+                                          3 * q2 + tmp + 4) >> 3);
+                }
+                else
+                    data[0] = (2 * q1 + q0 + p1 + 2) >> 2;
+            }
+        }
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: FilterVerChromaEdge
+
+        Functional description:
+            Filter one vertical 2-pixel chroma edge
+
+------------------------------------------------------------------------------*/
+void FilterVerChromaEdge(
+  u8 *data,
+  u32 bS,
+  edgeThreshold_t *thresholds,
+  u32 width)
+{
+
+/* Variables */
+
+    i32 delta, tc;
+    u8 p0, q0, p1, q1;
+    const u8 *clp = h264bsdClip + 512;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(bS <= 4);
+    ASSERT(thresholds);
+
+    p1 = data[-2]; p0 = data[-1];
+    q0 = data[0]; q1 = data[1];
+    if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+         ((unsigned)ABS(p1-p0) < thresholds->beta)  &&
+         ((unsigned)ABS(q1-q0) < thresholds->beta) )
+    {
+        if (bS < 4)
+        {
+            tc = thresholds->tc0[bS-1] + 1;
+            delta = CLIP3(-tc, tc, ((((q0 - p0) << 2) +
+                      (p1 - q1) + 4) >> 3));
+            p0 = clp[p0 + delta];
+            q0 = clp[q0 - delta];
+            data[-1] = p0;
+            data[ 0] = q0;
+        }
+        else
+        {
+            data[-1] = (2 * p1 + p0 + q1 + 2) >> 2;
+            data[ 0] = (2 * q1 + q0 + p1 + 2) >> 2;
+        }
+    }
+    data += width;
+    p1 = data[-2]; p0 = data[-1];
+    q0 = data[0]; q1 = data[1];
+    if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+         ((unsigned)ABS(p1-p0) < thresholds->beta)  &&
+         ((unsigned)ABS(q1-q0) < thresholds->beta) )
+    {
+        if (bS < 4)
+        {
+            tc = thresholds->tc0[bS-1] + 1;
+            delta = CLIP3(-tc, tc, ((((q0 - p0) << 2) +
+                      (p1 - q1) + 4) >> 3));
+            p0 = clp[p0 + delta];
+            q0 = clp[q0 - delta];
+            data[-1] = p0;
+            data[ 0] = q0;
+        }
+        else
+        {
+            data[-1] = (2 * p1 + p0 + q1 + 2) >> 2;
+            data[ 0] = (2 * q1 + q0 + p1 + 2) >> 2;
+        }
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: FilterHorChromaEdge
+
+        Functional description:
+            Filter one horizontal 2-pixel chroma edge
+
+------------------------------------------------------------------------------*/
+void FilterHorChromaEdge(
+  u8 *data,
+  u32 bS,
+  edgeThreshold_t *thresholds,
+  i32 width)
+{
+
+/* Variables */
+
+    i32 delta, tc;
+    u32 i;
+    u8 p0, q0, p1, q1;
+    const u8 *clp = h264bsdClip + 512;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(bS < 4);
+    ASSERT(thresholds);
+
+    tc = thresholds->tc0[bS-1] + 1;
+    for (i = 2; i; i--, data++)
+    {
+        p1 = data[-width*2]; p0 = data[-width];
+        q0 = data[0]; q1 = data[width];
+        if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+             ((unsigned)ABS(p1-p0) < thresholds->beta)  &&
+             ((unsigned)ABS(q1-q0) < thresholds->beta) )
+        {
+            delta = CLIP3(-tc, tc, ((((q0 - p0) << 2) +
+                      (p1 - q1) + 4) >> 3));
+            p0 = clp[p0 + delta];
+            q0 = clp[q0 - delta];
+            data[-width] = p0;
+            data[  0] = q0;
+        }
+    }
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: FilterHorChroma
+
+        Functional description:
+            Filter all four successive horizontal 2-pixel chroma edges. This
+            can be done if bS is equal for all four edges.
+
+------------------------------------------------------------------------------*/
+void FilterHorChroma(
+  u8 *data,
+  u32 bS,
+  edgeThreshold_t *thresholds,
+  i32 width)
+{
+
+/* Variables */
+
+    i32 delta, tc;
+    u32 i;
+    u8 p0, q0, p1, q1;
+    const u8 *clp = h264bsdClip + 512;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(bS <= 4);
+    ASSERT(thresholds);
+
+    if (bS < 4)
+    {
+        tc = thresholds->tc0[bS-1] + 1;
+        for (i = 8; i; i--, data++)
+        {
+            p1 = data[-width*2]; p0 = data[-width];
+            q0 = data[0]; q1 = data[width];
+            if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+                 ((unsigned)ABS(p1-p0) < thresholds->beta)  &&
+                 ((unsigned)ABS(q1-q0) < thresholds->beta) )
+            {
+                delta = CLIP3(-tc, tc, ((((q0 - p0) << 2) +
+                          (p1 - q1) + 4) >> 3));
+                p0 = clp[p0 + delta];
+                q0 = clp[q0 - delta];
+                data[-width] = p0;
+                data[  0] = q0;
+            }
+        }
+    }
+    else
+    {
+        for (i = 8; i; i--, data++)
+        {
+            p1 = data[-width*2]; p0 = data[-width];
+            q0 = data[0]; q1 = data[width];
+            if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+                 ((unsigned)ABS(p1-p0) < thresholds->beta)  &&
+                 ((unsigned)ABS(q1-q0) < thresholds->beta) )
+            {
+                    data[-width] = (2 * p1 + p0 + q1 + 2) >> 2;
+                    data[  0] = (2 * q1 + q0 + p1 + 2) >> 2;
+            }
+        }
+    }
+
+}
+
+
+/*------------------------------------------------------------------------------
+
+    Function: GetBoundaryStrengths
+
+        Functional description:
+            Function to calculate boundary strengths for all edges of a
+            macroblock. Function returns HANTRO_TRUE if any of the bS values for
+            the macroblock had non-zero value, HANTRO_FALSE otherwise.
+
+------------------------------------------------------------------------------*/
+u32 GetBoundaryStrengths(mbStorage_t *mb, bS_t *bS, u32 flags)
+{
+
+/* Variables */
+
+    /* this flag is set HANTRO_TRUE as soon as any boundary strength value is
+     * non-zero */
+    u32 nonZeroBs = HANTRO_FALSE;
+
+/* Code */
+
+    ASSERT(mb);
+    ASSERT(bS);
+    ASSERT(flags);
+
+    /* top edges */
+    if (flags & FILTER_TOP_EDGE)
+    {
+        if (IS_INTRA_MB(*mb) || IS_INTRA_MB(*mb->mbB))
+        {
+            bS[0].top = bS[1].top = bS[2].top = bS[3].top = 4;
+            nonZeroBs = HANTRO_TRUE;
+        }
+        else
+        {
+            bS[0].top = EdgeBoundaryStrength(mb, mb->mbB, 0, 10);
+            bS[1].top = EdgeBoundaryStrength(mb, mb->mbB, 1, 11);
+            bS[2].top = EdgeBoundaryStrength(mb, mb->mbB, 4, 14);
+            bS[3].top = EdgeBoundaryStrength(mb, mb->mbB, 5, 15);
+            if (bS[0].top || bS[1].top || bS[2].top || bS[3].top)
+                nonZeroBs = HANTRO_TRUE;
+        }
+    }
+    else
+    {
+        bS[0].top = bS[1].top = bS[2].top = bS[3].top = 0;
+    }
+
+    /* left edges */
+    if (flags & FILTER_LEFT_EDGE)
+    {
+        if (IS_INTRA_MB(*mb) || IS_INTRA_MB(*mb->mbA))
+        {
+            bS[0].left = bS[4].left = bS[8].left = bS[12].left = 4;
+            nonZeroBs = HANTRO_TRUE;
+        }
+        else
+        {
+            bS[0].left = EdgeBoundaryStrength(mb, mb->mbA, 0, 5);
+            bS[4].left = EdgeBoundaryStrength(mb, mb->mbA, 2, 7);
+            bS[8].left = EdgeBoundaryStrength(mb, mb->mbA, 8, 13);
+            bS[12].left = EdgeBoundaryStrength(mb, mb->mbA, 10, 15);
+            if (!nonZeroBs &&
+                (bS[0].left || bS[4].left || bS[8].left || bS[12].left))
+                nonZeroBs = HANTRO_TRUE;
+        }
+    }
+    else
+    {
+        bS[0].left = bS[4].left = bS[8].left = bS[12].left = 0;
+    }
+
+    /* inner edges */
+    if (IS_INTRA_MB(*mb))
+    {
+        bS[4].top  = bS[5].top  = bS[6].top  = bS[7].top  =
+        bS[8].top  = bS[9].top  = bS[10].top = bS[11].top =
+        bS[12].top = bS[13].top = bS[14].top = bS[15].top = 3;
+
+        bS[1].left  = bS[2].left  = bS[3].left  =
+        bS[5].left  = bS[6].left  = bS[7].left  =
+        bS[9].left  = bS[10].left = bS[11].left =
+        bS[13].left = bS[14].left = bS[15].left = 3;
+        nonZeroBs = HANTRO_TRUE;
+    }
+    else
+    {
+        /* 16x16 inter mb -> ref addresses or motion vectors cannot differ,
+         * only check if either of the blocks contain coefficients */
+        if (h264bsdNumMbPart(mb->mbType) == 1)
+        {
+            bS[4].top = mb->totalCoeff[2] || mb->totalCoeff[0] ? 2 : 0;
+            bS[5].top = mb->totalCoeff[3] || mb->totalCoeff[1] ? 2 : 0;
+            bS[6].top = mb->totalCoeff[6] || mb->totalCoeff[4] ? 2 : 0;
+            bS[7].top = mb->totalCoeff[7] || mb->totalCoeff[5] ? 2 : 0;
+            bS[8].top = mb->totalCoeff[8] || mb->totalCoeff[2] ? 2 : 0;
+            bS[9].top = mb->totalCoeff[9] || mb->totalCoeff[3] ? 2 : 0;
+            bS[10].top = mb->totalCoeff[12] || mb->totalCoeff[6] ? 2 : 0;
+            bS[11].top = mb->totalCoeff[13] || mb->totalCoeff[7] ? 2 : 0;
+            bS[12].top = mb->totalCoeff[10] || mb->totalCoeff[8] ? 2 : 0;
+            bS[13].top = mb->totalCoeff[11] || mb->totalCoeff[9] ? 2 : 0;
+            bS[14].top = mb->totalCoeff[14] || mb->totalCoeff[12] ? 2 : 0;
+            bS[15].top = mb->totalCoeff[15] || mb->totalCoeff[13] ? 2 : 0;
+
+            bS[1].left = mb->totalCoeff[1] || mb->totalCoeff[0] ? 2 : 0;
+            bS[2].left = mb->totalCoeff[4] || mb->totalCoeff[1] ? 2 : 0;
+            bS[3].left = mb->totalCoeff[5] || mb->totalCoeff[4] ? 2 : 0;
+            bS[5].left = mb->totalCoeff[3] || mb->totalCoeff[2] ? 2 : 0;
+            bS[6].left = mb->totalCoeff[6] || mb->totalCoeff[3] ? 2 : 0;
+            bS[7].left = mb->totalCoeff[7] || mb->totalCoeff[6] ? 2 : 0;
+            bS[9].left = mb->totalCoeff[9] || mb->totalCoeff[8] ? 2 : 0;
+            bS[10].left = mb->totalCoeff[12] || mb->totalCoeff[9] ? 2 : 0;
+            bS[11].left = mb->totalCoeff[13] || mb->totalCoeff[12] ? 2 : 0;
+            bS[13].left = mb->totalCoeff[11] || mb->totalCoeff[10] ? 2 : 0;
+            bS[14].left = mb->totalCoeff[14] || mb->totalCoeff[11] ? 2 : 0;
+            bS[15].left = mb->totalCoeff[15] || mb->totalCoeff[14] ? 2 : 0;
+        }
+        /* 16x8 inter mb -> ref addresses and motion vectors can be different
+         * only for the middle horizontal edge, for the other top edges it is
+         * enough to check whether the blocks contain coefficients or not. The
+         * same applies to all internal left edges. */
+        else if (mb->mbType == P_L0_L0_16x8)
+        {
+            bS[4].top = mb->totalCoeff[2] || mb->totalCoeff[0] ? 2 : 0;
+            bS[5].top = mb->totalCoeff[3] || mb->totalCoeff[1] ? 2 : 0;
+            bS[6].top = mb->totalCoeff[6] || mb->totalCoeff[4] ? 2 : 0;
+            bS[7].top = mb->totalCoeff[7] || mb->totalCoeff[5] ? 2 : 0;
+            bS[12].top = mb->totalCoeff[10] || mb->totalCoeff[8] ? 2 : 0;
+            bS[13].top = mb->totalCoeff[11] || mb->totalCoeff[9] ? 2 : 0;
+            bS[14].top = mb->totalCoeff[14] || mb->totalCoeff[12] ? 2 : 0;
+            bS[15].top = mb->totalCoeff[15] || mb->totalCoeff[13] ? 2 : 0;
+            bS[8].top = InnerBoundaryStrength(mb, 8, 2);
+            bS[9].top = InnerBoundaryStrength(mb, 9, 3);
+            bS[10].top = InnerBoundaryStrength(mb, 12, 6);
+            bS[11].top = InnerBoundaryStrength(mb, 13, 7);
+
+            bS[1].left = mb->totalCoeff[1] || mb->totalCoeff[0] ? 2 : 0;
+            bS[2].left = mb->totalCoeff[4] || mb->totalCoeff[1] ? 2 : 0;
+            bS[3].left = mb->totalCoeff[5] || mb->totalCoeff[4] ? 2 : 0;
+            bS[5].left = mb->totalCoeff[3] || mb->totalCoeff[2] ? 2 : 0;
+            bS[6].left = mb->totalCoeff[6] || mb->totalCoeff[3] ? 2 : 0;
+            bS[7].left = mb->totalCoeff[7] || mb->totalCoeff[6] ? 2 : 0;
+            bS[9].left = mb->totalCoeff[9] || mb->totalCoeff[8] ? 2 : 0;
+            bS[10].left = mb->totalCoeff[12] || mb->totalCoeff[9] ? 2 : 0;
+            bS[11].left = mb->totalCoeff[13] || mb->totalCoeff[12] ? 2 : 0;
+            bS[13].left = mb->totalCoeff[11] || mb->totalCoeff[10] ? 2 : 0;
+            bS[14].left = mb->totalCoeff[14] || mb->totalCoeff[11] ? 2 : 0;
+            bS[15].left = mb->totalCoeff[15] || mb->totalCoeff[14] ? 2 : 0;
+        }
+        /* 8x16 inter mb -> ref addresses and motion vectors can be different
+         * only for the middle vertical edge, for the other left edges it is
+         * enough to check whether the blocks contain coefficients or not. The
+         * same applies to all internal top edges. */
+        else if (mb->mbType == P_L0_L0_8x16)
+        {
+            bS[4].top = mb->totalCoeff[2] || mb->totalCoeff[0] ? 2 : 0;
+            bS[5].top = mb->totalCoeff[3] || mb->totalCoeff[1] ? 2 : 0;
+            bS[6].top = mb->totalCoeff[6] || mb->totalCoeff[4] ? 2 : 0;
+            bS[7].top = mb->totalCoeff[7] || mb->totalCoeff[5] ? 2 : 0;
+            bS[8].top = mb->totalCoeff[8] || mb->totalCoeff[2] ? 2 : 0;
+            bS[9].top = mb->totalCoeff[9] || mb->totalCoeff[3] ? 2 : 0;
+            bS[10].top = mb->totalCoeff[12] || mb->totalCoeff[6] ? 2 : 0;
+            bS[11].top = mb->totalCoeff[13] || mb->totalCoeff[7] ? 2 : 0;
+            bS[12].top = mb->totalCoeff[10] || mb->totalCoeff[8] ? 2 : 0;
+            bS[13].top = mb->totalCoeff[11] || mb->totalCoeff[9] ? 2 : 0;
+            bS[14].top = mb->totalCoeff[14] || mb->totalCoeff[12] ? 2 : 0;
+            bS[15].top = mb->totalCoeff[15] || mb->totalCoeff[13] ? 2 : 0;
+
+            bS[1].left = mb->totalCoeff[1] || mb->totalCoeff[0] ? 2 : 0;
+            bS[3].left = mb->totalCoeff[5] || mb->totalCoeff[4] ? 2 : 0;
+            bS[5].left = mb->totalCoeff[3] || mb->totalCoeff[2] ? 2 : 0;
+            bS[7].left = mb->totalCoeff[7] || mb->totalCoeff[6] ? 2 : 0;
+            bS[9].left = mb->totalCoeff[9] || mb->totalCoeff[8] ? 2 : 0;
+            bS[11].left = mb->totalCoeff[13] || mb->totalCoeff[12] ? 2 : 0;
+            bS[13].left = mb->totalCoeff[11] || mb->totalCoeff[10] ? 2 : 0;
+            bS[15].left = mb->totalCoeff[15] || mb->totalCoeff[14] ? 2 : 0;
+            bS[2].left = InnerBoundaryStrength(mb, 4, 1);
+            bS[6].left = InnerBoundaryStrength(mb, 6, 3);
+            bS[10].left = InnerBoundaryStrength(mb, 12, 9);
+            bS[14].left = InnerBoundaryStrength(mb, 14, 11);
+        }
+        else
+        {
+            bS[4].top =
+                InnerBoundaryStrength(mb, mb4x4Index[4], mb4x4Index[0]);
+            bS[5].top =
+                InnerBoundaryStrength(mb, mb4x4Index[5], mb4x4Index[1]);
+            bS[6].top =
+                InnerBoundaryStrength(mb, mb4x4Index[6], mb4x4Index[2]);
+            bS[7].top =
+                InnerBoundaryStrength(mb, mb4x4Index[7], mb4x4Index[3]);
+            bS[8].top =
+                InnerBoundaryStrength(mb, mb4x4Index[8], mb4x4Index[4]);
+            bS[9].top =
+                InnerBoundaryStrength(mb, mb4x4Index[9], mb4x4Index[5]);
+            bS[10].top =
+                InnerBoundaryStrength(mb, mb4x4Index[10], mb4x4Index[6]);
+            bS[11].top =
+                InnerBoundaryStrength(mb, mb4x4Index[11], mb4x4Index[7]);
+            bS[12].top =
+                InnerBoundaryStrength(mb, mb4x4Index[12], mb4x4Index[8]);
+            bS[13].top =
+                InnerBoundaryStrength(mb, mb4x4Index[13], mb4x4Index[9]);
+            bS[14].top =
+                InnerBoundaryStrength(mb, mb4x4Index[14], mb4x4Index[10]);
+            bS[15].top =
+                InnerBoundaryStrength(mb, mb4x4Index[15], mb4x4Index[11]);
+
+            bS[1].left =
+                InnerBoundaryStrength(mb, mb4x4Index[1], mb4x4Index[0]);
+            bS[2].left =
+                InnerBoundaryStrength(mb, mb4x4Index[2], mb4x4Index[1]);
+            bS[3].left =
+                InnerBoundaryStrength(mb, mb4x4Index[3], mb4x4Index[2]);
+            bS[5].left =
+                InnerBoundaryStrength(mb, mb4x4Index[5], mb4x4Index[4]);
+            bS[6].left =
+                InnerBoundaryStrength(mb, mb4x4Index[6], mb4x4Index[5]);
+            bS[7].left =
+                InnerBoundaryStrength(mb, mb4x4Index[7], mb4x4Index[6]);
+            bS[9].left =
+                InnerBoundaryStrength(mb, mb4x4Index[9], mb4x4Index[8]);
+            bS[10].left =
+                InnerBoundaryStrength(mb, mb4x4Index[10], mb4x4Index[9]);
+            bS[11].left =
+                InnerBoundaryStrength(mb, mb4x4Index[11], mb4x4Index[10]);
+            bS[13].left =
+                InnerBoundaryStrength(mb, mb4x4Index[13], mb4x4Index[12]);
+            bS[14].left =
+                InnerBoundaryStrength(mb, mb4x4Index[14], mb4x4Index[13]);
+            bS[15].left =
+                InnerBoundaryStrength(mb, mb4x4Index[15], mb4x4Index[14]);
+        }
+        if (!nonZeroBs &&
+            (bS[4].top || bS[5].top || bS[6].top || bS[7].top ||
+             bS[8].top || bS[9].top || bS[10].top || bS[11].top ||
+             bS[12].top || bS[13].top || bS[14].top || bS[15].top ||
+             bS[1].left || bS[2].left || bS[3].left ||
+             bS[5].left || bS[6].left || bS[7].left ||
+             bS[9].left || bS[10].left || bS[11].left ||
+             bS[13].left || bS[14].left || bS[15].left))
+            nonZeroBs = HANTRO_TRUE;
+    }
+
+    return(nonZeroBs);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: GetLumaEdgeThresholds
+
+        Functional description:
+            Compute alpha, beta and tc0 thresholds for inner, left and top
+            luma edges of a macroblock.
+
+------------------------------------------------------------------------------*/
+void GetLumaEdgeThresholds(
+  edgeThreshold_t *thresholds,
+  mbStorage_t *mb,
+  u32 filteringFlags)
+{
+
+/* Variables */
+
+    u32 indexA, indexB;
+    u32 qpAv, qp, qpTmp;
+
+/* Code */
+
+    ASSERT(thresholds);
+    ASSERT(mb);
+
+    qp = mb->qpY;
+
+    indexA = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetA);
+    indexB = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetB);
+
+    thresholds[INNER].alpha = alphas[indexA];
+    thresholds[INNER].beta = betas[indexB];
+    thresholds[INNER].tc0 = tc0[indexA];
+
+    if (filteringFlags & FILTER_TOP_EDGE)
+    {
+        qpTmp = mb->mbB->qpY;
+        if (qpTmp != qp)
+        {
+            qpAv = (qp + qpTmp + 1) >> 1;
+
+            indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA);
+            indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB);
+
+            thresholds[TOP].alpha = alphas[indexA];
+            thresholds[TOP].beta = betas[indexB];
+            thresholds[TOP].tc0 = tc0[indexA];
+        }
+        else
+        {
+            thresholds[TOP].alpha = thresholds[INNER].alpha;
+            thresholds[TOP].beta = thresholds[INNER].beta;
+            thresholds[TOP].tc0 = thresholds[INNER].tc0;
+        }
+    }
+    if (filteringFlags & FILTER_LEFT_EDGE)
+    {
+        qpTmp = mb->mbA->qpY;
+        if (qpTmp != qp)
+        {
+            qpAv = (qp + qpTmp + 1) >> 1;
+
+            indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA);
+            indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB);
+
+            thresholds[LEFT].alpha = alphas[indexA];
+            thresholds[LEFT].beta = betas[indexB];
+            thresholds[LEFT].tc0 = tc0[indexA];
+        }
+        else
+        {
+            thresholds[LEFT].alpha = thresholds[INNER].alpha;
+            thresholds[LEFT].beta = thresholds[INNER].beta;
+            thresholds[LEFT].tc0 = thresholds[INNER].tc0;
+        }
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: GetChromaEdgeThresholds
+
+        Functional description:
+            Compute alpha, beta and tc0 thresholds for inner, left and top
+            chroma edges of a macroblock.
+
+------------------------------------------------------------------------------*/
+void GetChromaEdgeThresholds(
+  edgeThreshold_t *thresholds,
+  mbStorage_t *mb,
+  u32 filteringFlags,
+  i32 chromaQpIndexOffset)
+{
+
+/* Variables */
+
+    u32 indexA, indexB;
+    u32 qpAv, qp, qpTmp;
+
+/* Code */
+
+    ASSERT(thresholds);
+    ASSERT(mb);
+
+    qp = mb->qpY;
+    qp = h264bsdQpC[CLIP3(0, 51, (i32)qp + chromaQpIndexOffset)];
+
+    indexA = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetA);
+    indexB = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetB);
+
+    thresholds[INNER].alpha = alphas[indexA];
+    thresholds[INNER].beta = betas[indexB];
+    thresholds[INNER].tc0 = tc0[indexA];
+
+    if (filteringFlags & FILTER_TOP_EDGE)
+    {
+        qpTmp = mb->mbB->qpY;
+        if (qpTmp != mb->qpY)
+        {
+            qpTmp = h264bsdQpC[CLIP3(0, 51, (i32)qpTmp + chromaQpIndexOffset)];
+            qpAv = (qp + qpTmp + 1) >> 1;
+
+            indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA);
+            indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB);
+
+            thresholds[TOP].alpha = alphas[indexA];
+            thresholds[TOP].beta = betas[indexB];
+            thresholds[TOP].tc0 = tc0[indexA];
+        }
+        else
+        {
+            thresholds[TOP].alpha = thresholds[INNER].alpha;
+            thresholds[TOP].beta = thresholds[INNER].beta;
+            thresholds[TOP].tc0 = thresholds[INNER].tc0;
+        }
+    }
+    if (filteringFlags & FILTER_LEFT_EDGE)
+    {
+        qpTmp = mb->mbA->qpY;
+        if (qpTmp != mb->qpY)
+        {
+            qpTmp = h264bsdQpC[CLIP3(0, 51, (i32)qpTmp + chromaQpIndexOffset)];
+            qpAv = (qp + qpTmp + 1) >> 1;
+
+            indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA);
+            indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB);
+
+            thresholds[LEFT].alpha = alphas[indexA];
+            thresholds[LEFT].beta = betas[indexB];
+            thresholds[LEFT].tc0 = tc0[indexA];
+        }
+        else
+        {
+            thresholds[LEFT].alpha = thresholds[INNER].alpha;
+            thresholds[LEFT].beta = thresholds[INNER].beta;
+            thresholds[LEFT].tc0 = thresholds[INNER].tc0;
+        }
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: FilterLuma
+
+        Functional description:
+            Function to filter all luma edges of a macroblock
+
+------------------------------------------------------------------------------*/
+void FilterLuma(
+  u8 *data,
+  bS_t *bS,
+  edgeThreshold_t *thresholds,
+  u32 width)
+{
+
+/* Variables */
+
+    u32 vblock;
+    bS_t *tmp;
+    u8 *ptr;
+    u32 offset;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(bS);
+    ASSERT(thresholds);
+
+    ptr = data;
+    tmp = bS;
+
+    offset  = TOP;
+
+    /* loop block rows, perform filtering for all vertical edges of the block
+     * row first, then filter each horizontal edge of the row */
+    for (vblock = 4; vblock--;)
+    {
+        /* only perform filtering if bS is non-zero, first of the four
+         * FilterVerLumaEdge handles the left edge of the macroblock, others
+         * filter inner edges */
+        if (tmp[0].left)
+            FilterVerLumaEdge(ptr, tmp[0].left, thresholds + LEFT, width);
+        if (tmp[1].left)
+            FilterVerLumaEdge(ptr+4, tmp[1].left, thresholds + INNER, width);
+        if (tmp[2].left)
+            FilterVerLumaEdge(ptr+8, tmp[2].left, thresholds + INNER, width);
+        if (tmp[3].left)
+            FilterVerLumaEdge(ptr+12, tmp[3].left, thresholds + INNER, width);
+
+        /* if bS is equal for all horizontal edges of the row -> perform
+         * filtering with FilterHorLuma, otherwise use FilterHorLumaEdge for
+         * each edge separately. offset variable indicates top macroblock edge
+         * on the first loop round, inner edge for the other rounds */
+        if (tmp[0].top == tmp[1].top && tmp[1].top == tmp[2].top &&
+            tmp[2].top == tmp[3].top)
+        {
+            if(tmp[0].top)
+                FilterHorLuma(ptr, tmp[0].top, thresholds + offset, (i32)width);
+        }
+        else
+        {
+            if(tmp[0].top)
+                FilterHorLumaEdge(ptr, tmp[0].top, thresholds+offset,
+                    (i32)width);
+            if(tmp[1].top)
+                FilterHorLumaEdge(ptr+4, tmp[1].top, thresholds+offset,
+                    (i32)width);
+            if(tmp[2].top)
+                FilterHorLumaEdge(ptr+8, tmp[2].top, thresholds+offset,
+                    (i32)width);
+            if(tmp[3].top)
+                FilterHorLumaEdge(ptr+12, tmp[3].top, thresholds+offset,
+                    (i32)width);
+        }
+
+        /* four pixel rows ahead, i.e. next row of 4x4-blocks */
+        ptr += width*4;
+        tmp += 4;
+        offset = INNER;
+    }
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: FilterChroma
+
+        Functional description:
+            Function to filter all chroma edges of a macroblock
+
+------------------------------------------------------------------------------*/
+void FilterChroma(
+  u8 *dataCb,
+  u8 *dataCr,
+  bS_t *bS,
+  edgeThreshold_t *thresholds,
+  u32 width)
+{
+
+/* Variables */
+
+    u32 vblock;
+    bS_t *tmp;
+    u32 offset;
+
+/* Code */
+
+    ASSERT(dataCb);
+    ASSERT(dataCr);
+    ASSERT(bS);
+    ASSERT(thresholds);
+
+    tmp = bS;
+    offset = TOP;
+
+    /* loop block rows, perform filtering for all vertical edges of the block
+     * row first, then filter each horizontal edge of the row */
+    for (vblock = 0; vblock < 2; vblock++)
+    {
+        /* only perform filtering if bS is non-zero, first two of the four
+         * FilterVerChromaEdge calls handle the left edge of the macroblock,
+         * others filter the inner edge. Note that as chroma uses bS values
+         * determined for luma edges, each bS is used only for 2 pixels of
+         * a 4-pixel edge */
+        if (tmp[0].left)
+        {
+            FilterVerChromaEdge(dataCb, tmp[0].left, thresholds + LEFT, width);
+            FilterVerChromaEdge(dataCr, tmp[0].left, thresholds + LEFT, width);
+        }
+        if (tmp[4].left)
+        {
+            FilterVerChromaEdge(dataCb+2*width, tmp[4].left, thresholds + LEFT,
+                width);
+            FilterVerChromaEdge(dataCr+2*width, tmp[4].left, thresholds + LEFT,
+                width);
+        }
+        if (tmp[2].left)
+        {
+            FilterVerChromaEdge(dataCb+4, tmp[2].left, thresholds + INNER,
+                width);
+            FilterVerChromaEdge(dataCr+4, tmp[2].left, thresholds + INNER,
+                width);
+        }
+        if (tmp[6].left)
+        {
+            FilterVerChromaEdge(dataCb+2*width+4, tmp[6].left,
+                thresholds + INNER, width);
+            FilterVerChromaEdge(dataCr+2*width+4, tmp[6].left,
+                thresholds + INNER, width);
+        }
+
+        /* if bS is equal for all horizontal edges of the row -> perform
+         * filtering with FilterHorChroma, otherwise use FilterHorChromaEdge
+         * for each edge separately. offset variable indicates top macroblock
+         * edge on the first loop round, inner edge for the second */
+        if (tmp[0].top == tmp[1].top && tmp[1].top == tmp[2].top &&
+            tmp[2].top == tmp[3].top)
+        {
+            if(tmp[0].top)
+            {
+                FilterHorChroma(dataCb, tmp[0].top, thresholds+offset,
+                    (i32)width);
+                FilterHorChroma(dataCr, tmp[0].top, thresholds+offset,
+                    (i32)width);
+            }
+        }
+        else
+        {
+            if (tmp[0].top)
+            {
+                FilterHorChromaEdge(dataCb, tmp[0].top, thresholds+offset,
+                    (i32)width);
+                FilterHorChromaEdge(dataCr, tmp[0].top, thresholds+offset,
+                    (i32)width);
+            }
+            if (tmp[1].top)
+            {
+                FilterHorChromaEdge(dataCb+2, tmp[1].top, thresholds+offset,
+                    (i32)width);
+                FilterHorChromaEdge(dataCr+2, tmp[1].top, thresholds+offset,
+                    (i32)width);
+            }
+            if (tmp[2].top)
+            {
+                FilterHorChromaEdge(dataCb+4, tmp[2].top, thresholds+offset,
+                    (i32)width);
+                FilterHorChromaEdge(dataCr+4, tmp[2].top, thresholds+offset,
+                    (i32)width);
+            }
+            if (tmp[3].top)
+            {
+                FilterHorChromaEdge(dataCb+6, tmp[3].top, thresholds+offset,
+                    (i32)width);
+                FilterHorChromaEdge(dataCr+6, tmp[3].top, thresholds+offset,
+                    (i32)width);
+            }
+        }
+
+        tmp += 8;
+        dataCb += width*4;
+        dataCr += width*4;
+        offset = INNER;
+    }
+}
+
+#else /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdFilterPicture
+
+        Functional description:
+          Perform deblocking filtering for a picture. Filter does not copy
+          the original picture anywhere but filtering is performed directly
+          on the original image. Parameters controlling the filtering process
+          are computed based on information in macroblock structures of the
+          filtered macroblock, macroblock above and macroblock on the left of
+          the filtered one.
+
+        Inputs:
+          image         pointer to image to be filtered
+          mb            pointer to macroblock data structure of the top-left
+                        macroblock of the picture
+
+        Outputs:
+          image         filtered image stored here
+
+        Returns:
+          none
+
+------------------------------------------------------------------------------*/
+
+/*lint --e{550} Symbol not accessed */
+void h264bsdFilterPicture(
+  image_t *image,
+  mbStorage_t *mb)
+{
+
+/* Variables */
+
+    u32 flags;
+    u32 picSizeInMbs, mbRow, mbCol;
+    u32 picWidthInMbs;
+    u8 *data;
+    mbStorage_t *pMb;
+    u8 bS[2][16];
+    u8 thresholdLuma[2][16];
+    u8 thresholdChroma[2][8];
+    u8 alpha[2][2];
+    u8 beta[2][2];
+    OMXResult res;
+
+/* Code */
+
+    ASSERT(image);
+    ASSERT(mb);
+    ASSERT(image->data);
+    ASSERT(image->width);
+    ASSERT(image->height);
+
+    picWidthInMbs = image->width;
+    data = image->data;
+    picSizeInMbs = picWidthInMbs * image->height;
+
+    pMb = mb;
+
+    for (mbRow = 0, mbCol = 0; mbRow < image->height; pMb++)
+    {
+        flags = GetMbFilteringFlags(pMb);
+
+        if (flags)
+        {
+            /* GetBoundaryStrengths function returns non-zero value if any of
+             * the bS values for the macroblock being processed was non-zero */
+            if (GetBoundaryStrengths(pMb, bS, flags))
+            {
+
+                /* Luma */
+                GetLumaEdgeThresholds(pMb,alpha,beta,thresholdLuma,bS,flags);
+                data = image->data + mbRow * picWidthInMbs * 256 + mbCol * 16;
+
+                res = omxVCM4P10_FilterDeblockingLuma_VerEdge_I( data,
+                                                (OMX_S32)(picWidthInMbs*16),
+                                                (const OMX_U8*)alpha,
+                                                (const OMX_U8*)beta,
+                                                (const OMX_U8*)thresholdLuma,
+                                                (const OMX_U8*)bS );
+
+                res = omxVCM4P10_FilterDeblockingLuma_HorEdge_I( data,
+                                                (OMX_S32)(picWidthInMbs*16),
+                                                (const OMX_U8*)alpha+2,
+                                                (const OMX_U8*)beta+2,
+                                                (const OMX_U8*)thresholdLuma+16,
+                                                (const OMX_U8*)bS+16 );
+                /* Cb */
+                GetChromaEdgeThresholds(pMb, alpha, beta, thresholdChroma,
+                                        bS, flags, pMb->chromaQpIndexOffset);
+                data = image->data + picSizeInMbs * 256 +
+                    mbRow * picWidthInMbs * 64 + mbCol * 8;
+
+                res = omxVCM4P10_FilterDeblockingChroma_VerEdge_I( data,
+                                              (OMX_S32)(picWidthInMbs*8),
+                                              (const OMX_U8*)alpha,
+                                              (const OMX_U8*)beta,
+                                              (const OMX_U8*)thresholdChroma,
+                                              (const OMX_U8*)bS );
+                res = omxVCM4P10_FilterDeblockingChroma_HorEdge_I( data,
+                                              (OMX_S32)(picWidthInMbs*8),
+                                              (const OMX_U8*)alpha+2,
+                                              (const OMX_U8*)beta+2,
+                                              (const OMX_U8*)thresholdChroma+8,
+                                              (const OMX_U8*)bS+16 );
+                /* Cr */
+                data += (picSizeInMbs * 64);
+                res = omxVCM4P10_FilterDeblockingChroma_VerEdge_I( data,
+                                              (OMX_S32)(picWidthInMbs*8),
+                                              (const OMX_U8*)alpha,
+                                              (const OMX_U8*)beta,
+                                              (const OMX_U8*)thresholdChroma,
+                                              (const OMX_U8*)bS );
+                res = omxVCM4P10_FilterDeblockingChroma_HorEdge_I( data,
+                                              (OMX_S32)(picWidthInMbs*8),
+                                              (const OMX_U8*)alpha+2,
+                                              (const OMX_U8*)beta+2,
+                                              (const OMX_U8*)thresholdChroma+8,
+                                              (const OMX_U8*)bS+16 );
+            }
+        }
+
+        mbCol++;
+        if (mbCol == picWidthInMbs)
+        {
+            mbCol = 0;
+            mbRow++;
+        }
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: GetBoundaryStrengths
+
+        Functional description:
+            Function to calculate boundary strengths for all edges of a
+            macroblock. Function returns HANTRO_TRUE if any of the bS values for
+            the macroblock had non-zero value, HANTRO_FALSE otherwise.
+
+------------------------------------------------------------------------------*/
+u32 GetBoundaryStrengths(mbStorage_t *mb, u8 (*bS)[16], u32 flags)
+{
+
+/* Variables */
+
+    /* this flag is set HANTRO_TRUE as soon as any boundary strength value is
+     * non-zero */
+    u32 nonZeroBs = HANTRO_FALSE;
+    u32 *pTmp;
+    u32 tmp1, tmp2, isIntraMb;
+
+/* Code */
+
+    ASSERT(mb);
+    ASSERT(bS);
+    ASSERT(flags);
+
+    isIntraMb = IS_INTRA_MB(*mb);
+
+    /* top edges */
+    pTmp = (u32*)&bS[1][0];
+    if (flags & FILTER_TOP_EDGE)
+    {
+        if (isIntraMb || IS_INTRA_MB(*mb->mbB))
+        {
+            *pTmp = 0x04040404;
+            nonZeroBs = HANTRO_TRUE;
+        }
+        else
+        {
+            *pTmp = EdgeBoundaryStrengthTop(mb, mb->mbB);
+            if (*pTmp)
+                nonZeroBs = HANTRO_TRUE;
+        }
+    }
+    else
+    {
+        *pTmp = 0;
+    }
+
+    /* left edges */
+    pTmp = (u32*)&bS[0][0];
+    if (flags & FILTER_LEFT_EDGE)
+    {
+        if (isIntraMb || IS_INTRA_MB(*mb->mbA))
+        {
+            /*bS[0][0] = bS[0][1] = bS[0][2] = bS[0][3] = 4;*/
+            *pTmp = 0x04040404;
+            nonZeroBs = HANTRO_TRUE;
+        }
+        else
+        {
+            *pTmp = EdgeBoundaryStrengthLeft(mb, mb->mbA);
+            if (!nonZeroBs && *pTmp)
+                nonZeroBs = HANTRO_TRUE;
+        }
+    }
+    else
+    {
+        *pTmp = 0;
+    }
+
+    /* inner edges */
+    if (isIntraMb)
+    {
+        pTmp++;
+        *pTmp++ = 0x03030303;
+        *pTmp++ = 0x03030303;
+        *pTmp++ = 0x03030303;
+        pTmp++;
+        *pTmp++ = 0x03030303;
+        *pTmp++ = 0x03030303;
+        *pTmp = 0x03030303;
+
+        nonZeroBs = HANTRO_TRUE;
+    }
+    else
+    {
+        pTmp = (u32*)mb->totalCoeff;
+
+        /* 16x16 inter mb -> ref addresses or motion vectors cannot differ,
+         * only check if either of the blocks contain coefficients */
+        if (h264bsdNumMbPart(mb->mbType) == 1)
+        {
+            tmp1 = *pTmp++;
+            tmp2 = *pTmp++;
+            bS[1][4]  = (tmp1 & 0x00FF00FF) ? 2 : 0; /* [2]  || [0] */
+            bS[1][5]  = (tmp1 & 0xFF00FF00) ? 2 : 0; /* [3]  || [1] */
+            bS[0][4]  = (tmp1 & 0x0000FFFF) ? 2 : 0; /* [1]  || [0] */
+            bS[0][5]  = (tmp1 & 0xFFFF0000) ? 2 : 0; /* [3]  || [2] */
+
+            tmp1 = *pTmp++;
+            bS[1][6]  = (tmp2 & 0x00FF00FF) ? 2 : 0; /* [6]  || [4] */
+            bS[1][7]  = (tmp2 & 0xFF00FF00) ? 2 : 0; /* [7]  || [5] */
+            bS[0][12] = (tmp2 & 0x0000FFFF) ? 2 : 0; /* [5]  || [4] */
+            bS[0][13] = (tmp2 & 0xFFFF0000) ? 2 : 0; /* [7]  || [6] */
+            tmp2 = *pTmp;
+            bS[1][12] = (tmp1 & 0x00FF00FF) ? 2 : 0; /* [10] || [8] */
+            bS[1][13] = (tmp1 & 0xFF00FF00) ? 2 : 0; /* [11] || [9] */
+            bS[0][6]  = (tmp1 & 0x0000FFFF) ? 2 : 0; /* [9]  || [8] */
+            bS[0][7]  = (tmp1 & 0xFFFF0000) ? 2 : 0; /* [11] || [10] */
+
+            bS[1][14] = (tmp2 & 0x00FF00FF) ? 2 : 0; /* [14] || [12] */
+            bS[1][15] = (tmp2 & 0xFF00FF00) ? 2 : 0; /* [15] || [13] */
+            bS[0][14] = (tmp2 & 0x0000FFFF) ? 2 : 0; /* [13] || [12] */
+            bS[0][15] = (tmp2 & 0xFFFF0000) ? 2 : 0; /* [15] || [14] */
+
+            {
+            u32 tmp3, tmp4;
+
+            tmp1 = mb->totalCoeff[8];
+            tmp2 = mb->totalCoeff[2];
+            tmp3 = mb->totalCoeff[9];
+            tmp4 = mb->totalCoeff[3];
+
+            bS[1][8] = tmp1 || tmp2 ? 2 : 0;
+            tmp1 = mb->totalCoeff[12];
+            tmp2 = mb->totalCoeff[6];
+            bS[1][9] = tmp3 || tmp4 ? 2 : 0;
+            tmp3 = mb->totalCoeff[13];
+            tmp4 = mb->totalCoeff[7];
+            bS[1][10] = tmp1 || tmp2 ? 2 : 0;
+            tmp1 = mb->totalCoeff[4];
+            tmp2 = mb->totalCoeff[1];
+            bS[1][11] = tmp3 || tmp4 ? 2 : 0;
+            tmp3 = mb->totalCoeff[6];
+            tmp4 = mb->totalCoeff[3];
+            bS[0][8] = tmp1 || tmp2 ? 2 : 0;
+            tmp1 = mb->totalCoeff[12];
+            tmp2 = mb->totalCoeff[9];
+            bS[0][9] = tmp3 || tmp4 ? 2 : 0;
+            tmp3 = mb->totalCoeff[14];
+            tmp4 = mb->totalCoeff[11];
+            bS[0][10] = tmp1 || tmp2 ? 2 : 0;
+            bS[0][11] = tmp3 || tmp4 ? 2 : 0;
+            }
+        }
+
+        /* 16x8 inter mb -> ref addresses and motion vectors can be different
+         * only for the middle horizontal edge, for the other top edges it is
+         * enough to check whether the blocks contain coefficients or not. The
+         * same applies to all internal left edges. */
+        else if (mb->mbType == P_L0_L0_16x8)
+        {
+            tmp1 = *pTmp++;
+            tmp2 = *pTmp++;
+            bS[1][4]  = (tmp1 & 0x00FF00FF) ? 2 : 0; /* [2]  || [0] */
+            bS[1][5]  = (tmp1 & 0xFF00FF00) ? 2 : 0; /* [3]  || [1] */
+            bS[0][4]  = (tmp1 & 0x0000FFFF) ? 2 : 0; /* [1]  || [0] */
+            bS[0][5]  = (tmp1 & 0xFFFF0000) ? 2 : 0; /* [3]  || [2] */
+            tmp1 = *pTmp++;
+            bS[1][6]  = (tmp2 & 0x00FF00FF) ? 2 : 0; /* [6]  || [4] */
+            bS[1][7]  = (tmp2 & 0xFF00FF00) ? 2 : 0; /* [7]  || [5] */
+            bS[0][12] = (tmp2 & 0x0000FFFF) ? 2 : 0; /* [5]  || [4] */
+            bS[0][13] = (tmp2 & 0xFFFF0000) ? 2 : 0; /* [7]  || [6] */
+            tmp2 = *pTmp;
+            bS[1][12] = (tmp1 & 0x00FF00FF) ? 2 : 0; /* [10] || [8] */
+            bS[1][13] = (tmp1 & 0xFF00FF00) ? 2 : 0; /* [11] || [9] */
+            bS[0][6]  = (tmp1 & 0x0000FFFF) ? 2 : 0; /* [9]  || [8] */
+            bS[0][7]  = (tmp1 & 0xFFFF0000) ? 2 : 0; /* [11] || [10] */
+
+            bS[1][14] = (tmp2 & 0x00FF00FF) ? 2 : 0; /* [14] || [12] */
+            bS[1][15] = (tmp2 & 0xFF00FF00) ? 2 : 0; /* [15] || [13] */
+            bS[0][14] = (tmp2 & 0x0000FFFF) ? 2 : 0; /* [13] || [12] */
+            bS[0][15] = (tmp2 & 0xFFFF0000) ? 2 : 0; /* [15] || [14] */
+
+            bS[1][8] = (u8)InnerBoundaryStrength(mb, 8, 2);
+            bS[1][9] = (u8)InnerBoundaryStrength(mb, 9, 3);
+            bS[1][10] = (u8)InnerBoundaryStrength(mb, 12, 6);
+            bS[1][11] = (u8)InnerBoundaryStrength(mb, 13, 7);
+
+            {
+            u32 tmp3, tmp4;
+
+            tmp1 = mb->totalCoeff[4];
+            tmp2 = mb->totalCoeff[1];
+            tmp3 = mb->totalCoeff[6];
+            tmp4 = mb->totalCoeff[3];
+            bS[0][8] = tmp1 || tmp2 ? 2 : 0;
+            tmp1 = mb->totalCoeff[12];
+            tmp2 = mb->totalCoeff[9];
+            bS[0][9] = tmp3 || tmp4 ? 2 : 0;
+            tmp3 = mb->totalCoeff[14];
+            tmp4 = mb->totalCoeff[11];
+            bS[0][10] = tmp1 || tmp2 ? 2 : 0;
+            bS[0][11] = tmp3 || tmp4 ? 2 : 0;
+            }
+        }
+        /* 8x16 inter mb -> ref addresses and motion vectors can be different
+         * only for the middle vertical edge, for the other left edges it is
+         * enough to check whether the blocks contain coefficients or not. The
+         * same applies to all internal top edges. */
+        else if (mb->mbType == P_L0_L0_8x16)
+        {
+            tmp1 = *pTmp++;
+            tmp2 = *pTmp++;
+            bS[1][4]  = (tmp1 & 0x00FF00FF) ? 2 : 0; /* [2]  || [0] */
+            bS[1][5]  = (tmp1 & 0xFF00FF00) ? 2 : 0; /* [3]  || [1] */
+            bS[0][4]  = (tmp1 & 0x0000FFFF) ? 2 : 0; /* [1]  || [0] */
+            bS[0][5]  = (tmp1 & 0xFFFF0000) ? 2 : 0; /* [3]  || [2] */
+            tmp1 = *pTmp++;
+            bS[1][6]  = (tmp2 & 0x00FF00FF) ? 2 : 0; /* [6]  || [4] */
+            bS[1][7]  = (tmp2 & 0xFF00FF00) ? 2 : 0; /* [7]  || [5] */
+            bS[0][12] = (tmp2 & 0x0000FFFF) ? 2 : 0; /* [5]  || [4] */
+            bS[0][13] = (tmp2 & 0xFFFF0000) ? 2 : 0; /* [7]  || [6] */
+            tmp2 = *pTmp;
+            bS[1][12] = (tmp1 & 0x00FF00FF) ? 2 : 0; /* [10] || [8] */
+            bS[1][13] = (tmp1 & 0xFF00FF00) ? 2 : 0; /* [11] || [9] */
+            bS[0][6]  = (tmp1 & 0x0000FFFF) ? 2 : 0; /* [9]  || [8] */
+            bS[0][7]  = (tmp1 & 0xFFFF0000) ? 2 : 0; /* [11] || [10] */
+
+            bS[1][14] = (tmp2 & 0x00FF00FF) ? 2 : 0; /* [14] || [12] */
+            bS[1][15] = (tmp2 & 0xFF00FF00) ? 2 : 0; /* [15] || [13] */
+            bS[0][14] = (tmp2 & 0x0000FFFF) ? 2 : 0; /* [13] || [12] */
+            bS[0][15] = (tmp2 & 0xFFFF0000) ? 2 : 0; /* [15] || [14] */
+
+            bS[0][8] = (u8)InnerBoundaryStrength(mb, 4, 1);
+            bS[0][9] = (u8)InnerBoundaryStrength(mb, 6, 3);
+            bS[0][10] = (u8)InnerBoundaryStrength(mb, 12, 9);
+            bS[0][11] = (u8)InnerBoundaryStrength(mb, 14, 11);
+
+            {
+            u32 tmp3, tmp4;
+
+            tmp1 = mb->totalCoeff[8];
+            tmp2 = mb->totalCoeff[2];
+            tmp3 = mb->totalCoeff[9];
+            tmp4 = mb->totalCoeff[3];
+            bS[1][8] = tmp1 || tmp2 ? 2 : 0;
+            tmp1 = mb->totalCoeff[12];
+            tmp2 = mb->totalCoeff[6];
+            bS[1][9] = tmp3 || tmp4 ? 2 : 0;
+            tmp3 = mb->totalCoeff[13];
+            tmp4 = mb->totalCoeff[7];
+            bS[1][10] = tmp1 || tmp2 ? 2 : 0;
+            bS[1][11] = tmp3 || tmp4 ? 2 : 0;
+            }
+        }
+        else
+        {
+            tmp1 = *pTmp++;
+            bS[1][4] = (tmp1 & 0x00FF00FF) ? 2 : (u8)InnerBoundaryStrength2(mb, 2, 0);
+            bS[1][5] = (tmp1 & 0xFF00FF00) ? 2 : (u8)InnerBoundaryStrength2(mb, 3, 1);
+            bS[0][4] = (tmp1 & 0x0000FFFF) ? 2 : (u8)InnerBoundaryStrength2(mb, 1, 0);
+            bS[0][5] = (tmp1 & 0xFFFF0000) ? 2 : (u8)InnerBoundaryStrength2(mb, 3, 2);
+            tmp1 = *pTmp++;
+            bS[1][6]  = (tmp1 & 0x00FF00FF) ? 2 : (u8)InnerBoundaryStrength2(mb, 6, 4);
+            bS[1][7]  = (tmp1 & 0xFF00FF00) ? 2 : (u8)InnerBoundaryStrength2(mb, 7, 5);
+            bS[0][12] = (tmp1 & 0x0000FFFF) ? 2 : (u8)InnerBoundaryStrength2(mb, 5, 4);
+            bS[0][13] = (tmp1 & 0xFFFF0000) ? 2 : (u8)InnerBoundaryStrength2(mb, 7, 6);
+            tmp1 = *pTmp++;
+            bS[1][12] = (tmp1 & 0x00FF00FF) ? 2 : (u8)InnerBoundaryStrength2(mb, 10, 8);
+            bS[1][13] = (tmp1 & 0xFF00FF00) ? 2 : (u8)InnerBoundaryStrength2(mb, 11, 9);
+            bS[0][6]  = (tmp1 & 0x0000FFFF) ? 2 : (u8)InnerBoundaryStrength2(mb, 9, 8);
+            bS[0][7]  = (tmp1 & 0xFFFF0000) ? 2 : (u8)InnerBoundaryStrength2(mb, 11, 10);
+            tmp1 = *pTmp;
+            bS[1][14] = (tmp1 & 0x00FF00FF) ? 2 : (u8)InnerBoundaryStrength2(mb, 14, 12);
+            bS[1][15] = (tmp1 & 0xFF00FF00) ? 2 : (u8)InnerBoundaryStrength2(mb, 15, 13);
+            bS[0][14] = (tmp1 & 0x0000FFFF) ? 2 : (u8)InnerBoundaryStrength2(mb, 13, 12);
+            bS[0][15] = (tmp1 & 0xFFFF0000) ? 2 : (u8)InnerBoundaryStrength2(mb, 15, 14);
+
+            bS[1][8] = (u8)InnerBoundaryStrength(mb, 8, 2);
+            bS[1][9] = (u8)InnerBoundaryStrength(mb, 9, 3);
+            bS[1][10] = (u8)InnerBoundaryStrength(mb, 12, 6);
+            bS[1][11] = (u8)InnerBoundaryStrength(mb, 13, 7);
+
+            bS[0][8] = (u8)InnerBoundaryStrength(mb, 4, 1);
+            bS[0][9] = (u8)InnerBoundaryStrength(mb, 6, 3);
+            bS[0][10] = (u8)InnerBoundaryStrength(mb, 12, 9);
+            bS[0][11] = (u8)InnerBoundaryStrength(mb, 14, 11);
+        }
+        pTmp = (u32*)&bS[0][0];
+        if (!nonZeroBs && (pTmp[1] || pTmp[2] || pTmp[3] ||
+                           pTmp[5] || pTmp[6] || pTmp[7]) )
+        {
+            nonZeroBs = HANTRO_TRUE;
+        }
+    }
+
+    return(nonZeroBs);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: GetLumaEdgeThresholds
+
+        Functional description:
+            Compute alpha, beta and tc0 thresholds for inner, left and top
+            luma edges of a macroblock.
+
+------------------------------------------------------------------------------*/
+void GetLumaEdgeThresholds(
+    mbStorage_t *mb,
+    u8 (*alpha)[2],
+    u8 (*beta)[2],
+    u8 (*threshold)[16],
+    u8 (*bs)[16],
+    u32 filteringFlags )
+{
+
+/* Variables */
+
+    u32 indexA, indexB;
+    u32 qpAv, qp, qpTmp;
+    u32 i;
+
+/* Code */
+
+    ASSERT(threshold);
+    ASSERT(bs);
+    ASSERT(beta);
+    ASSERT(alpha);
+    ASSERT(mb);
+
+    qp = mb->qpY;
+
+    indexA = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetA);
+    indexB = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetB);
+
+    /* Internal edge values */
+    alpha[0][1] = alphas[indexA];
+    alpha[1][1] = alphas[indexA];
+    alpha[1][0] = alphas[indexA];
+    alpha[0][0] = alphas[indexA];
+    beta[0][1] = betas[indexB];
+    beta[1][1] = betas[indexB];
+    beta[1][0] = betas[indexB];
+    beta[0][0] = betas[indexB];
+
+    /* vertical scan order */
+    for (i = 0; i < 2; i++)
+    {
+        u32 t1, t2;
+
+        t1 = bs[i][0];
+        t2 = bs[i][1];
+        threshold[i][0]  = (t1) ? tc0[indexA][t1] : 0;
+        t1 = bs[i][2];
+        threshold[i][1]  = (t2) ? tc0[indexA][t2] : 0;
+        t2 = bs[i][3];
+        threshold[i][2]  = (t1) ? tc0[indexA][t1] : 0;
+        t1 = bs[i][4];
+        threshold[i][3]  = (t2) ? tc0[indexA][t2] : 0;
+        t2 = bs[i][5];
+        threshold[i][4]  = (t1) ? tc0[indexA][t1] : 0;
+        t1 = bs[i][6];
+        threshold[i][5]  = (t2) ? tc0[indexA][t2] : 0;
+        t2 = bs[i][7];
+        threshold[i][6]  = (t1) ? tc0[indexA][t1] : 0;
+        t1 = bs[i][8];
+        threshold[i][7]  = (t2) ? tc0[indexA][t2] : 0;
+        t2 = bs[i][9];
+        threshold[i][8]  = (t1) ? tc0[indexA][t1] : 0;
+        t1 = bs[i][10];
+        threshold[i][9]  = (t2) ? tc0[indexA][t2] : 0;
+        t2 = bs[i][11];
+        threshold[i][10] = (t1) ? tc0[indexA][t1] : 0;
+        t1 = bs[i][12];
+        threshold[i][11] = (t2) ? tc0[indexA][t2] : 0;
+        t2 = bs[i][13];
+        threshold[i][12] = (t1) ? tc0[indexA][t1] : 0;
+        t1 = bs[i][14];
+        threshold[i][13] = (t2) ? tc0[indexA][t2] : 0;
+        t2 = bs[i][15];
+        threshold[i][14] = (t1) ? tc0[indexA][t1] : 0;
+        threshold[i][15] = (t2) ? tc0[indexA][t2] : 0;
+    }
+
+    if (filteringFlags & FILTER_TOP_EDGE)
+    {
+        qpTmp = mb->mbB->qpY;
+        if (qpTmp != qp)
+        {
+            u32 t1, t2, t3, t4;
+            qpAv = (qp + qpTmp + 1) >> 1;
+
+            indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA);
+            indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB);
+
+            alpha[1][0] = alphas[indexA];
+            beta[1][0] = betas[indexB];
+            t1 = bs[1][0];
+            t2 = bs[1][1];
+            t3 = bs[1][2];
+            t4 = bs[1][3];
+            threshold[1][0] = (t1 && (t1 < 4)) ? tc0[indexA][t1] : 0;
+            threshold[1][1] = (t2 && (t2 < 4)) ? tc0[indexA][t2] : 0;
+            threshold[1][2] = (t3 && (t3 < 4)) ? tc0[indexA][t3] : 0;
+            threshold[1][3] = (t4 && (t4 < 4)) ? tc0[indexA][t4] : 0;
+        }
+    }
+    if (filteringFlags & FILTER_LEFT_EDGE)
+    {
+        qpTmp = mb->mbA->qpY;
+        if (qpTmp != qp)
+        {
+            qpAv = (qp + qpTmp + 1) >> 1;
+
+            indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA);
+            indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB);
+
+            alpha[0][0] = alphas[indexA];
+            beta[0][0] = betas[indexB];
+            threshold[0][0] = (bs[0][0] && (bs[0][0] < 4)) ? tc0[indexA][bs[0][0]] : 0;
+            threshold[0][1] = (bs[0][1] && (bs[0][1] < 4)) ? tc0[indexA][bs[0][1]] : 0;
+            threshold[0][2] = (bs[0][2] && (bs[0][2] < 4)) ? tc0[indexA][bs[0][2]] : 0;
+            threshold[0][3] = (bs[0][3] && (bs[0][3] < 4)) ? tc0[indexA][bs[0][3]] : 0;
+        }
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: GetChromaEdgeThresholds
+
+        Functional description:
+            Compute alpha, beta and tc0 thresholds for inner, left and top
+            chroma edges of a macroblock.
+
+------------------------------------------------------------------------------*/
+void GetChromaEdgeThresholds(
+    mbStorage_t *mb,
+    u8 (*alpha)[2],
+    u8 (*beta)[2],
+    u8 (*threshold)[8],
+    u8 (*bs)[16],
+    u32 filteringFlags,
+    i32 chromaQpIndexOffset)
+{
+
+/* Variables */
+
+    u32 indexA, indexB;
+    u32 qpAv, qp, qpTmp;
+    u32 i;
+
+/* Code */
+
+    ASSERT(threshold);
+    ASSERT(bs);
+    ASSERT(beta);
+    ASSERT(alpha);
+    ASSERT(mb);
+    ASSERT(mb);
+
+    qp = mb->qpY;
+    qp = h264bsdQpC[CLIP3(0, 51, (i32)qp + chromaQpIndexOffset)];
+
+    indexA = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetA);
+    indexB = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetB);
+
+    alpha[0][1] = alphas[indexA];
+    alpha[1][1] = alphas[indexA];
+    alpha[1][0] = alphas[indexA];
+    alpha[0][0] = alphas[indexA];
+    beta[0][1] = betas[indexB];
+    beta[1][1] = betas[indexB];
+    beta[1][0] = betas[indexB];
+    beta[0][0] = betas[indexB];
+
+    for (i = 0; i < 2; i++)
+    {
+        u32 t1, t2;
+
+        t1 = bs[i][0];
+        t2 = bs[i][1];
+        threshold[i][0]  = (t1) ? tc0[indexA][t1] : 0;
+        t1 = bs[i][2];
+        threshold[i][1]  = (t2) ? tc0[indexA][t2] : 0;
+        t2 = bs[i][3];
+        threshold[i][2]  = (t1) ? tc0[indexA][t1] : 0;
+        t1 = bs[i][8];
+        threshold[i][3]  = (t2) ? tc0[indexA][t2] : 0;
+        t2 = bs[i][9];
+        threshold[i][4]  = (t1) ? tc0[indexA][t1] : 0;
+        t1 = bs[i][10];
+        threshold[i][5]  = (t2) ? tc0[indexA][t2] : 0;
+        t2 = bs[i][11];
+        threshold[i][6]  = (t1) ? tc0[indexA][t1] : 0;
+        threshold[i][7]  = (t2) ? tc0[indexA][t2] : 0;
+    }
+
+    if (filteringFlags & FILTER_TOP_EDGE)
+    {
+        qpTmp = mb->mbB->qpY;
+        if (qpTmp != mb->qpY)
+        {
+            u32 t1, t2, t3, t4;
+            qpTmp = h264bsdQpC[CLIP3(0, 51, (i32)qpTmp + chromaQpIndexOffset)];
+            qpAv = (qp + qpTmp + 1) >> 1;
+
+            indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA);
+            indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB);
+
+            alpha[1][0] = alphas[indexA];
+            beta[1][0] = betas[indexB];
+
+            t1 = bs[1][0];
+            t2 = bs[1][1];
+            t3 = bs[1][2];
+            t4 = bs[1][3];
+            threshold[1][0] = (t1) ? tc0[indexA][t1] : 0;
+            threshold[1][1] = (t2) ? tc0[indexA][t2] : 0;
+            threshold[1][2] = (t3) ? tc0[indexA][t3] : 0;
+            threshold[1][3] = (t4) ? tc0[indexA][t4] : 0;
+        }
+    }
+    if (filteringFlags & FILTER_LEFT_EDGE)
+    {
+        qpTmp = mb->mbA->qpY;
+        if (qpTmp != mb->qpY)
+        {
+
+            qpTmp = h264bsdQpC[CLIP3(0, 51, (i32)qpTmp + chromaQpIndexOffset)];
+            qpAv = (qp + qpTmp + 1) >> 1;
+
+            indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA);
+            indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB);
+
+            alpha[0][0] = alphas[indexA];
+            beta[0][0] = betas[indexB];
+            threshold[0][0] = (bs[0][0]) ? tc0[indexA][bs[0][0]] : 0;
+            threshold[0][1] = (bs[0][1]) ? tc0[indexA][bs[0][1]] : 0;
+            threshold[0][2] = (bs[0][2]) ? tc0[indexA][bs[0][2]] : 0;
+            threshold[0][3] = (bs[0][3]) ? tc0[indexA][bs[0][3]] : 0;
+        }
+    }
+
+}
+
+#endif /* H264DEC_OMXDL */
+
+/*lint +e701 +e702 */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.h
new file mode 100755
index 0000000..2571dda
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_DEBLOCKING_H
+#define H264SWDEC_DEBLOCKING_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_image.h"
+#include "h264bsd_macroblock_layer.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+void h264bsdFilterPicture(
+  image_t *image,
+  mbStorage_t *mb);
+
+#endif /* #ifdef H264SWDEC_DEBLOCKING_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.c
new file mode 100644
index 0000000..a816871
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.c
@@ -0,0 +1,961 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdInit
+          h264bsdDecode
+          h264bsdShutdown
+          h264bsdCurrentImage
+          h264bsdNextOutputPicture
+          h264bsdPicWidth
+          h264bsdPicHeight
+          h264bsdFlushBuffer
+          h264bsdCheckValidParamSets
+          h264bsdVideoRange
+          h264bsdMatrixCoefficients
+          h264bsdCroppingParams
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_decoder.h"
+#include "h264bsd_nal_unit.h"
+#include "h264bsd_byte_stream.h"
+#include "h264bsd_seq_param_set.h"
+#include "h264bsd_pic_param_set.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_slice_data.h"
+#include "h264bsd_neighbour.h"
+#include "h264bsd_util.h"
+#include "h264bsd_dpb.h"
+#include "h264bsd_deblocking.h"
+#include "h264bsd_conceal.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+    Function name: h264bsdInit
+
+        Functional description:
+            Initialize the decoder.
+
+        Inputs:
+            noOutputReordering  flag to indicate the decoder that it does not
+                                have to perform reordering of display images.
+
+        Outputs:
+            pStorage            pointer to initialized storage structure
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdInit(storage_t *pStorage, u32 noOutputReordering)
+{
+
+/* Variables */
+    u32 size;
+/* Code */
+
+    ASSERT(pStorage);
+
+    h264bsdInitStorage(pStorage);
+
+    /* allocate mbLayer to be next multiple of 64 to enable use of
+     * specific NEON optimized "memset" for clearing the structure */
+    size = (sizeof(macroblockLayer_t) + 63) & ~0x3F;
+
+    pStorage->mbLayer = (macroblockLayer_t*)H264SwDecMalloc(size);
+    if (!pStorage->mbLayer)
+        return HANTRO_NOK;
+
+    if (noOutputReordering)
+        pStorage->noReordering = HANTRO_TRUE;
+
+    return HANTRO_OK;
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdDecode
+
+        Functional description:
+            Decode a NAL unit. This function calls other modules to perform
+            tasks like
+                * extract and decode NAL unit from the byte stream
+                * decode parameter sets
+                * decode slice header and slice data
+                * conceal errors in the picture
+                * perform deblocking filtering
+
+            This function contains top level control logic of the decoder.
+
+        Inputs:
+            pStorage        pointer to storage data structure
+            byteStrm        pointer to stream buffer given by application
+            len             length of the buffer in bytes
+            picId           identifier for a picture, assigned by the
+                            application
+
+        Outputs:
+            readBytes       number of bytes read from the stream is stored
+                            here
+
+        Returns:
+            H264BSD_RDY             decoding finished, nothing special
+            H264BSD_PIC_RDY         decoding of a picture finished
+            H264BSD_HDRS_RDY        param sets activated, information like
+                                    picture dimensions etc can be read
+            H264BSD_ERROR           error in decoding
+            H264BSD_PARAM_SET_ERROR serius error in decoding, failed to
+                                    activate param sets
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecode(storage_t *pStorage, u8 *byteStrm, u32 len, u32 picId,
+    u32 *readBytes)
+{
+
+/* Variables */
+
+    u32 tmp, ppsId, spsId;
+    i32 picOrderCnt;
+    nalUnit_t nalUnit;
+    seqParamSet_t seqParamSet;
+    picParamSet_t picParamSet;
+    strmData_t strm;
+    u32 accessUnitBoundaryFlag = HANTRO_FALSE;
+    u32 picReady = HANTRO_FALSE;
+
+/* Code */
+
+    ASSERT(pStorage);
+    ASSERT(byteStrm);
+    ASSERT(len);
+    ASSERT(readBytes);
+
+    /* if previous buffer was not finished and same pointer given -> skip NAL
+     * unit extraction */
+    if (pStorage->prevBufNotFinished && byteStrm == pStorage->prevBufPointer)
+    {
+        strm = pStorage->strm[0];
+        strm.pStrmCurrPos = strm.pStrmBuffStart;
+        strm.strmBuffReadBits = strm.bitPosInWord = 0;
+        *readBytes = pStorage->prevBytesConsumed;
+    }
+    else
+    {
+        tmp = h264bsdExtractNalUnit(byteStrm, len, &strm, readBytes);
+        if (tmp != HANTRO_OK)
+        {
+            EPRINT("BYTE_STREAM");
+            return(H264BSD_ERROR);
+        }
+        /* store stream */
+        pStorage->strm[0] = strm;
+        pStorage->prevBytesConsumed = *readBytes;
+        pStorage->prevBufPointer = byteStrm;
+    }
+    pStorage->prevBufNotFinished = HANTRO_FALSE;
+
+    tmp = h264bsdDecodeNalUnit(&strm, &nalUnit);
+    if (tmp != HANTRO_OK)
+    {
+        EPRINT("NAL_UNIT");
+        return(H264BSD_ERROR);
+    }
+
+    /* Discard unspecified, reserved, SPS extension and auxiliary picture slices */
+    if(nalUnit.nalUnitType == 0 || nalUnit.nalUnitType >= 13)
+    {
+        DEBUG(("DISCARDED NAL (UNSPECIFIED, REGISTERED, SPS ext or AUX slice)\n"));
+        return(H264BSD_RDY);
+    }
+
+    tmp = h264bsdCheckAccessUnitBoundary(
+      &strm,
+      &nalUnit,
+      pStorage,
+      &accessUnitBoundaryFlag);
+    if (tmp != HANTRO_OK)
+    {
+        EPRINT("ACCESS UNIT BOUNDARY CHECK");
+        if (tmp == PARAM_SET_ERROR)
+            return(H264BSD_PARAM_SET_ERROR);
+        else
+            return(H264BSD_ERROR);
+    }
+
+    if ( accessUnitBoundaryFlag )
+    {
+        DEBUG(("Access unit boundary\n"));
+        /* conceal if picture started and param sets activated */
+        if (pStorage->picStarted && pStorage->activeSps != NULL)
+        {
+            DEBUG(("CONCEALING..."));
+
+            /* return error if second phase of
+             * initialization is not completed */
+            if (pStorage->pendingActivation)
+            {
+                EPRINT("Pending activation not completed");
+                return (H264BSD_ERROR);
+            }
+
+            if (!pStorage->validSliceInAccessUnit)
+            {
+                pStorage->currImage->data =
+                    h264bsdAllocateDpbImage(pStorage->dpb);
+                h264bsdInitRefPicList(pStorage->dpb);
+                tmp = h264bsdConceal(pStorage, pStorage->currImage, P_SLICE);
+            }
+            else
+                tmp = h264bsdConceal(pStorage, pStorage->currImage,
+                    pStorage->sliceHeader->sliceType);
+
+            picReady = HANTRO_TRUE;
+
+            /* current NAL unit should be decoded on next activation -> set
+             * readBytes to 0 */
+            *readBytes = 0;
+            pStorage->prevBufNotFinished = HANTRO_TRUE;
+            DEBUG(("...DONE\n"));
+        }
+        else
+        {
+            pStorage->validSliceInAccessUnit = HANTRO_FALSE;
+        }
+        pStorage->skipRedundantSlices = HANTRO_FALSE;
+    }
+
+    if (!picReady)
+    {
+        switch (nalUnit.nalUnitType)
+        {
+            case NAL_SEQ_PARAM_SET:
+                DEBUG(("SEQ PARAM SET\n"));
+                tmp = h264bsdDecodeSeqParamSet(&strm, &seqParamSet);
+                if (tmp != HANTRO_OK)
+                {
+                    EPRINT("SEQ_PARAM_SET");
+                    FREE(seqParamSet.offsetForRefFrame);
+                    FREE(seqParamSet.vuiParameters);
+                    return(H264BSD_ERROR);
+                }
+                tmp = h264bsdStoreSeqParamSet(pStorage, &seqParamSet);
+                break;
+
+            case NAL_PIC_PARAM_SET:
+                DEBUG(("PIC PARAM SET\n"));
+                tmp = h264bsdDecodePicParamSet(&strm, &picParamSet);
+                if (tmp != HANTRO_OK)
+                {
+                    EPRINT("PIC_PARAM_SET");
+                    FREE(picParamSet.runLength);
+                    FREE(picParamSet.topLeft);
+                    FREE(picParamSet.bottomRight);
+                    FREE(picParamSet.sliceGroupId);
+                    return(H264BSD_ERROR);
+                }
+                tmp = h264bsdStorePicParamSet(pStorage, &picParamSet);
+                break;
+
+            case NAL_CODED_SLICE_IDR:
+                DEBUG(("IDR "));
+                /* fall through */
+            case NAL_CODED_SLICE:
+                DEBUG(("SLICE HEADER\n"));
+
+                /* picture successfully finished and still decoding same old
+                 * access unit -> no need to decode redundant slices */
+                if (pStorage->skipRedundantSlices)
+                    return(H264BSD_RDY);
+
+                pStorage->picStarted = HANTRO_TRUE;
+
+                if (h264bsdIsStartOfPicture(pStorage))
+                {
+                    pStorage->numConcealedMbs = 0;
+                    pStorage->currentPicId    = picId;
+
+                    tmp = h264bsdCheckPpsId(&strm, &ppsId);
+                    ASSERT(tmp == HANTRO_OK);
+                    /* store old activeSpsId and return headers ready
+                     * indication if activeSps changes */
+                    spsId = pStorage->activeSpsId;
+                    tmp = h264bsdActivateParamSets(pStorage, ppsId,
+                            IS_IDR_NAL_UNIT(&nalUnit) ?
+                            HANTRO_TRUE : HANTRO_FALSE);
+                    if (tmp != HANTRO_OK)
+                    {
+                        EPRINT("Param set activation");
+                        pStorage->activePpsId = MAX_NUM_PIC_PARAM_SETS;
+                        pStorage->activePps = NULL;
+                        pStorage->activeSpsId = MAX_NUM_SEQ_PARAM_SETS;
+                        pStorage->activeSps = NULL;
+                        pStorage->pendingActivation = HANTRO_FALSE;
+
+                        if(tmp == MEMORY_ALLOCATION_ERROR)
+                        {
+                            return H264BSD_MEMALLOC_ERROR;
+                        }
+                        else
+                            return(H264BSD_PARAM_SET_ERROR);
+                    }
+
+                    if (spsId != pStorage->activeSpsId)
+                    {
+                        seqParamSet_t *oldSPS = NULL;
+                        seqParamSet_t *newSPS = pStorage->activeSps;
+                        u32 noOutputOfPriorPicsFlag = 1;
+
+                        if(pStorage->oldSpsId < MAX_NUM_SEQ_PARAM_SETS)
+                        {
+                            oldSPS = pStorage->sps[pStorage->oldSpsId];
+                        }
+
+                        *readBytes = 0;
+                        pStorage->prevBufNotFinished = HANTRO_TRUE;
+
+
+                        if(nalUnit.nalUnitType == NAL_CODED_SLICE_IDR)
+                        {
+                            tmp =
+                            h264bsdCheckPriorPicsFlag(&noOutputOfPriorPicsFlag,
+                                                          &strm, newSPS,
+                                                          pStorage->activePps,
+                                                          nalUnit.nalUnitType);
+                        }
+                        else
+                        {
+                            tmp = HANTRO_NOK;
+                        }
+
+                        if((tmp != HANTRO_OK) ||
+                           (noOutputOfPriorPicsFlag != 0) ||
+                           (pStorage->dpb->noReordering) ||
+                           (oldSPS == NULL) ||
+                           (oldSPS->picWidthInMbs != newSPS->picWidthInMbs) ||
+                           (oldSPS->picHeightInMbs != newSPS->picHeightInMbs) ||
+                           (oldSPS->maxDpbSize != newSPS->maxDpbSize))
+                        {
+                            pStorage->dpb->flushed = 0;
+                        }
+                        else
+                        {
+                            h264bsdFlushDpb(pStorage->dpb);
+                        }
+
+                        pStorage->oldSpsId = pStorage->activeSpsId;
+
+                        return(H264BSD_HDRS_RDY);
+                    }
+                }
+
+                /* return error if second phase of
+                 * initialization is not completed */
+                if (pStorage->pendingActivation)
+                {
+                    EPRINT("Pending activation not completed");
+                    return (H264BSD_ERROR);
+                }
+                tmp = h264bsdDecodeSliceHeader(&strm, pStorage->sliceHeader + 1,
+                    pStorage->activeSps, pStorage->activePps, &nalUnit);
+                if (tmp != HANTRO_OK)
+                {
+                    EPRINT("SLICE_HEADER");
+                    return(H264BSD_ERROR);
+                }
+                if (h264bsdIsStartOfPicture(pStorage))
+                {
+                    if (!IS_IDR_NAL_UNIT(&nalUnit))
+                    {
+                        tmp = h264bsdCheckGapsInFrameNum(pStorage->dpb,
+                            pStorage->sliceHeader[1].frameNum,
+                            nalUnit.nalRefIdc != 0 ?
+                            HANTRO_TRUE : HANTRO_FALSE,
+                            pStorage->activeSps->
+                            gapsInFrameNumValueAllowedFlag);
+                        if (tmp != HANTRO_OK)
+                        {
+                            EPRINT("Gaps in frame num");
+                            return(H264BSD_ERROR);
+                        }
+                    }
+                    pStorage->currImage->data =
+                        h264bsdAllocateDpbImage(pStorage->dpb);
+                }
+
+                /* store slice header to storage if successfully decoded */
+                pStorage->sliceHeader[0] = pStorage->sliceHeader[1];
+                pStorage->validSliceInAccessUnit = HANTRO_TRUE;
+                pStorage->prevNalUnit[0] = nalUnit;
+
+                h264bsdComputeSliceGroupMap(pStorage,
+                    pStorage->sliceHeader->sliceGroupChangeCycle);
+
+                h264bsdInitRefPicList(pStorage->dpb);
+                tmp = h264bsdReorderRefPicList(pStorage->dpb,
+                    &pStorage->sliceHeader->refPicListReordering,
+                    pStorage->sliceHeader->frameNum,
+                    pStorage->sliceHeader->numRefIdxL0Active);
+                if (tmp != HANTRO_OK)
+                {
+                    EPRINT("Reordering");
+                    return(H264BSD_ERROR);
+                }
+
+                DEBUG(("SLICE DATA, FIRST %d\n",
+                        pStorage->sliceHeader->firstMbInSlice));
+                tmp = h264bsdDecodeSliceData(&strm, pStorage,
+                    pStorage->currImage, pStorage->sliceHeader);
+                if (tmp != HANTRO_OK)
+                {
+                    EPRINT("SLICE_DATA");
+                    h264bsdMarkSliceCorrupted(pStorage,
+                        pStorage->sliceHeader->firstMbInSlice);
+                    return(H264BSD_ERROR);
+                }
+
+                if (h264bsdIsEndOfPicture(pStorage))
+                {
+                    picReady = HANTRO_TRUE;
+                    pStorage->skipRedundantSlices = HANTRO_TRUE;
+                }
+                break;
+
+            case NAL_SEI:
+                DEBUG(("SEI MESSAGE, NOT DECODED"));
+                break;
+
+            default:
+                DEBUG(("NOT IMPLEMENTED YET %d\n",nalUnit.nalUnitType));
+        }
+    }
+
+    if (picReady)
+    {
+        h264bsdFilterPicture(pStorage->currImage, pStorage->mb);
+
+        h264bsdResetStorage(pStorage);
+
+        picOrderCnt = h264bsdDecodePicOrderCnt(pStorage->poc,
+            pStorage->activeSps, pStorage->sliceHeader, pStorage->prevNalUnit);
+
+        if (pStorage->validSliceInAccessUnit)
+        {
+            if (pStorage->prevNalUnit->nalRefIdc)
+            {
+                tmp = h264bsdMarkDecRefPic(pStorage->dpb,
+                    &pStorage->sliceHeader->decRefPicMarking,
+                    pStorage->currImage, pStorage->sliceHeader->frameNum,
+                    picOrderCnt,
+                    IS_IDR_NAL_UNIT(pStorage->prevNalUnit) ?
+                    HANTRO_TRUE : HANTRO_FALSE,
+                    pStorage->currentPicId, pStorage->numConcealedMbs);
+            }
+            /* non-reference picture, just store for possible display
+             * reordering */
+            else
+            {
+                tmp = h264bsdMarkDecRefPic(pStorage->dpb, NULL,
+                    pStorage->currImage, pStorage->sliceHeader->frameNum,
+                    picOrderCnt,
+                    IS_IDR_NAL_UNIT(pStorage->prevNalUnit) ?
+                    HANTRO_TRUE : HANTRO_FALSE,
+                    pStorage->currentPicId, pStorage->numConcealedMbs);
+            }
+        }
+
+        pStorage->picStarted = HANTRO_FALSE;
+        pStorage->validSliceInAccessUnit = HANTRO_FALSE;
+
+        return(H264BSD_PIC_RDY);
+    }
+    else
+        return(H264BSD_RDY);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdShutdown
+
+        Functional description:
+            Shutdown a decoder instance. Function frees all the memories
+            allocated for the decoder instance.
+
+        Inputs:
+            pStorage    pointer to storage data structure
+
+        Returns:
+            none
+
+
+------------------------------------------------------------------------------*/
+
+void h264bsdShutdown(storage_t *pStorage)
+{
+
+/* Variables */
+
+    u32 i;
+
+/* Code */
+
+    ASSERT(pStorage);
+
+    for (i = 0; i < MAX_NUM_SEQ_PARAM_SETS; i++)
+    {
+        if (pStorage->sps[i])
+        {
+            FREE(pStorage->sps[i]->offsetForRefFrame);
+            FREE(pStorage->sps[i]->vuiParameters);
+            FREE(pStorage->sps[i]);
+        }
+    }
+
+    for (i = 0; i < MAX_NUM_PIC_PARAM_SETS; i++)
+    {
+        if (pStorage->pps[i])
+        {
+            FREE(pStorage->pps[i]->runLength);
+            FREE(pStorage->pps[i]->topLeft);
+            FREE(pStorage->pps[i]->bottomRight);
+            FREE(pStorage->pps[i]->sliceGroupId);
+            FREE(pStorage->pps[i]);
+        }
+    }
+
+    FREE(pStorage->mbLayer);
+    FREE(pStorage->mb);
+    FREE(pStorage->sliceGroupMap);
+
+    h264bsdFreeDpb(pStorage->dpb);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdNextOutputPicture
+
+        Functional description:
+            Get next output picture in display order.
+
+        Inputs:
+            pStorage    pointer to storage data structure
+
+        Outputs:
+            picId       identifier of the picture will be stored here
+            isIdrPic    IDR flag of the picture will be stored here
+            numErrMbs   number of concealed macroblocks in the picture
+                        will be stored here
+
+        Returns:
+            pointer to the picture data
+            NULL if no pictures available for display
+
+------------------------------------------------------------------------------*/
+
+u8* h264bsdNextOutputPicture(storage_t *pStorage, u32 *picId, u32 *isIdrPic,
+    u32 *numErrMbs)
+{
+
+/* Variables */
+
+    dpbOutPicture_t *pOut;
+
+/* Code */
+
+    ASSERT(pStorage);
+
+    pOut = h264bsdDpbOutputPicture(pStorage->dpb);
+
+    if (pOut != NULL)
+    {
+        *picId = pOut->picId;
+        *isIdrPic = pOut->isIdr;
+        *numErrMbs = pOut->numErrMbs;
+        return (pOut->data);
+    }
+    else
+        return(NULL);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdPicWidth
+
+        Functional description:
+            Get width of the picture in macroblocks
+
+        Inputs:
+            pStorage    pointer to storage data structure
+
+        Outputs:
+            none
+
+        Returns:
+            picture width
+            0 if parameters sets not yet activated
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdPicWidth(storage_t *pStorage)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(pStorage);
+
+    if (pStorage->activeSps)
+        return(pStorage->activeSps->picWidthInMbs);
+    else
+        return(0);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdPicHeight
+
+        Functional description:
+            Get height of the picture in macroblocks
+
+        Inputs:
+            pStorage    pointer to storage data structure
+
+        Outputs:
+            none
+
+        Returns:
+            picture width
+            0 if parameters sets not yet activated
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdPicHeight(storage_t *pStorage)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(pStorage);
+
+    if (pStorage->activeSps)
+        return(pStorage->activeSps->picHeightInMbs);
+    else
+        return(0);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdFlushBuffer
+
+        Functional description:
+            Flush the decoded picture buffer, see dpb.c for details
+
+        Inputs:
+            pStorage    pointer to storage data structure
+
+------------------------------------------------------------------------------*/
+
+void h264bsdFlushBuffer(storage_t *pStorage)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(pStorage);
+
+    h264bsdFlushDpb(pStorage->dpb);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdCheckValidParamSets
+
+        Functional description:
+            Check if any valid parameter set combinations (SPS/PPS) exists.
+
+        Inputs:
+            pStorage    pointer to storage structure
+
+        Returns:
+            1       at least one valid SPS/PPS combination found
+            0       no valid param set combinations found
+
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckValidParamSets(storage_t *pStorage)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(pStorage);
+
+    return(h264bsdValidParamSets(pStorage) == HANTRO_OK ? 1 : 0);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdVideoRange
+
+        Functional description:
+            Get value of video_full_range_flag received in the VUI data.
+
+        Inputs:
+            pStorage    pointer to storage structure
+
+        Returns:
+            1   video_full_range_flag received and value is 1
+            0   otherwise
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdVideoRange(storage_t *pStorage)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(pStorage);
+
+    if (pStorage->activeSps && pStorage->activeSps->vuiParametersPresentFlag &&
+        pStorage->activeSps->vuiParameters &&
+        pStorage->activeSps->vuiParameters->videoSignalTypePresentFlag &&
+        pStorage->activeSps->vuiParameters->videoFullRangeFlag)
+        return(1);
+    else /* default value of video_full_range_flag is 0 */
+        return(0);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdMatrixCoefficients
+
+        Functional description:
+            Get value of matrix_coefficients received in the VUI data
+
+        Inputs:
+            pStorage    pointer to storage structure
+
+        Outputs:
+            value of matrix_coefficients if received
+            2   otherwise (this is the default value)
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdMatrixCoefficients(storage_t *pStorage)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(pStorage);
+
+    if (pStorage->activeSps && pStorage->activeSps->vuiParametersPresentFlag &&
+        pStorage->activeSps->vuiParameters &&
+        pStorage->activeSps->vuiParameters->videoSignalTypePresentFlag &&
+        pStorage->activeSps->vuiParameters->colourDescriptionPresentFlag)
+        return(pStorage->activeSps->vuiParameters->matrixCoefficients);
+    else /* default unspecified */
+        return(2);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: hh264bsdCroppingParams
+
+        Functional description:
+            Get cropping parameters of the active SPS
+
+        Inputs:
+            pStorage    pointer to storage structure
+
+        Outputs:
+            croppingFlag    flag indicating if cropping params present is
+                            stored here
+            leftOffset      cropping left offset in pixels is stored here
+            width           width of the image after cropping is stored here
+            topOffset       cropping top offset in pixels is stored here
+            height          height of the image after cropping is stored here
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+void h264bsdCroppingParams(storage_t *pStorage, u32 *croppingFlag,
+    u32 *leftOffset, u32 *width, u32 *topOffset, u32 *height)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(pStorage);
+
+    if (pStorage->activeSps && pStorage->activeSps->frameCroppingFlag)
+    {
+        *croppingFlag = 1;
+        *leftOffset = 2 * pStorage->activeSps->frameCropLeftOffset;
+        *width = 16 * pStorage->activeSps->picWidthInMbs -
+                 2 * (pStorage->activeSps->frameCropLeftOffset +
+                      pStorage->activeSps->frameCropRightOffset);
+        *topOffset = 2 * pStorage->activeSps->frameCropTopOffset;
+        *height = 16 * pStorage->activeSps->picHeightInMbs -
+                  2 * (pStorage->activeSps->frameCropTopOffset +
+                       pStorage->activeSps->frameCropBottomOffset);
+    }
+    else
+    {
+        *croppingFlag = 0;
+        *leftOffset = 0;
+        *width = 0;
+        *topOffset = 0;
+        *height = 0;
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdSampleAspectRatio
+
+        Functional description:
+            Get aspect ratio received in the VUI data
+
+        Inputs:
+            pStorage    pointer to storage structure
+
+        Outputs:
+            sarWidth    sample aspect ratio height
+            sarHeight   sample aspect ratio width
+
+------------------------------------------------------------------------------*/
+
+void h264bsdSampleAspectRatio(storage_t *pStorage, u32 *sarWidth, u32 *sarHeight)
+{
+
+/* Variables */
+    u32 w = 1;
+    u32 h = 1;
+/* Code */
+
+    ASSERT(pStorage);
+
+
+    if (pStorage->activeSps &&
+        pStorage->activeSps->vuiParametersPresentFlag &&
+        pStorage->activeSps->vuiParameters &&
+        pStorage->activeSps->vuiParameters->aspectRatioPresentFlag )
+    {
+        switch (pStorage->activeSps->vuiParameters->aspectRatioIdc)
+        {
+            case ASPECT_RATIO_UNSPECIFIED:  w =   0; h =  0; break;
+            case ASPECT_RATIO_1_1:          w =   1; h =  1; break;
+            case ASPECT_RATIO_12_11:        w =  12; h = 11; break;
+            case ASPECT_RATIO_10_11:        w =  10; h = 11; break;
+            case ASPECT_RATIO_16_11:        w =  16; h = 11; break;
+            case ASPECT_RATIO_40_33:        w =  40; h = 33; break;
+            case ASPECT_RATIO_24_11:        w =  24; h = 11; break;
+            case ASPECT_RATIO_20_11:        w =  20; h = 11; break;
+            case ASPECT_RATIO_32_11:        w =  32; h = 11; break;
+            case ASPECT_RATIO_80_33:        w =  80; h = 33; break;
+            case ASPECT_RATIO_18_11:        w =  18; h = 11; break;
+            case ASPECT_RATIO_15_11:        w =  15; h = 11; break;
+            case ASPECT_RATIO_64_33:        w =  64; h = 33; break;
+            case ASPECT_RATIO_160_99:       w = 160; h = 99; break;
+            case ASPECT_RATIO_EXTENDED_SAR:
+                w = pStorage->activeSps->vuiParameters->sarWidth;
+                h = pStorage->activeSps->vuiParameters->sarHeight;
+                if ((w == 0) || (h == 0))
+                    w = h = 0;
+                break;
+            default:
+                w = 0;
+                h = 0;
+                break;
+        }
+    }
+
+    /* set aspect ratio*/
+    *sarWidth = w;
+    *sarHeight = h;
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdProfile
+
+        Functional description:
+            Get profile information from active SPS
+
+        Inputs:
+            pStorage    pointer to storage structure
+
+        Outputs:
+            profile   current profile
+
+------------------------------------------------------------------------------*/
+u32 h264bsdProfile(storage_t *pStorage)
+{
+    if (pStorage->activeSps)
+        return pStorage->activeSps->profileIdc;
+    else
+        return 0;
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.h
new file mode 100644
index 0000000..8336523
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_DECODER_H
+#define H264SWDEC_DECODER_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_storage.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+/* enumerated return values of the functions */
+enum {
+    H264BSD_RDY,
+    H264BSD_PIC_RDY,
+    H264BSD_HDRS_RDY,
+    H264BSD_ERROR,
+    H264BSD_PARAM_SET_ERROR,
+    H264BSD_MEMALLOC_ERROR
+};
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdInit(storage_t *pStorage, u32 noOutputReordering);
+u32 h264bsdDecode(storage_t *pStorage, u8 *byteStrm, u32 len, u32 picId,
+    u32 *readBytes);
+void h264bsdShutdown(storage_t *pStorage);
+
+u8* h264bsdNextOutputPicture(storage_t *pStorage, u32 *picId, u32 *isIdrPic,
+    u32 *numErrMbs);
+
+u32 h264bsdPicWidth(storage_t *pStorage);
+u32 h264bsdPicHeight(storage_t *pStorage);
+u32 h264bsdVideoRange(storage_t *pStorage);
+u32 h264bsdMatrixCoefficients(storage_t *pStorage);
+void h264bsdCroppingParams(storage_t *pStorage, u32 *croppingFlag,
+    u32 *left, u32 *width, u32 *top, u32 *height);
+void h264bsdSampleAspectRatio(storage_t *pStorage,
+                              u32 *sarWidth, u32 *sarHeight);
+u32 h264bsdCheckValidParamSets(storage_t *pStorage);
+
+void h264bsdFlushBuffer(storage_t *pStorage);
+
+u32 h264bsdProfile(storage_t *pStorage);
+
+#endif /* #ifdef H264SWDEC_DECODER_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.c
new file mode 100755
index 0000000..9517d0a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.c
@@ -0,0 +1,1584 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          ComparePictures
+          h264bsdReorderRefPicList
+          Mmcop1
+          Mmcop2
+          Mmcop3
+          Mmcop4
+          Mmcop5
+          Mmcop6
+          h264bsdMarkDecRefPic
+          h264bsdGetRefPicData
+          h264bsdAllocateDpbImage
+          SlidingWindowRefPicMarking
+          h264bsdInitDpb
+          h264bsdResetDpb
+          h264bsdInitRefPicList
+          FindDpbPic
+          SetPicNums
+          h264bsdCheckGapsInFrameNum
+          FindSmallestPicOrderCnt
+          OutputPicture
+          h264bsdDpbOutputPicture
+          h264bsdFlushDpb
+          h264bsdFreeDpb
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_cfg.h"
+#include "h264bsd_dpb.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_image.h"
+#include "h264bsd_util.h"
+#include "basetype.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/* macros to determine picture status. Note that IS_SHORT_TERM macro returns
+ * true also for non-existing pictures because non-existing pictures are
+ * regarded short term pictures according to H.264 standard */
+#define IS_REFERENCE(a) ((a).status)
+#define IS_EXISTING(a) ((a).status > NON_EXISTING)
+#define IS_SHORT_TERM(a) \
+    ((a).status == NON_EXISTING || (a).status == SHORT_TERM)
+#define IS_LONG_TERM(a) ((a).status == LONG_TERM)
+
+/* macro to set a picture unused for reference */
+#define SET_UNUSED(a) (a).status = UNUSED;
+
+#define MAX_NUM_REF_IDX_L0_ACTIVE 16
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static i32 ComparePictures(const void *ptr1, const void *ptr2);
+
+static u32 Mmcop1(dpbStorage_t *dpb, u32 currPicNum, u32 differenceOfPicNums);
+
+static u32 Mmcop2(dpbStorage_t *dpb, u32 longTermPicNum);
+
+static u32 Mmcop3(dpbStorage_t *dpb, u32 currPicNum, u32 differenceOfPicNums,
+    u32 longTermFrameIdx);
+
+static u32 Mmcop4(dpbStorage_t *dpb, u32 maxLongTermFrameIdx);
+
+static u32 Mmcop5(dpbStorage_t *dpb);
+
+static u32 Mmcop6(dpbStorage_t *dpb, u32 frameNum, i32 picOrderCnt,
+    u32 longTermFrameIdx);
+
+static u32 SlidingWindowRefPicMarking(dpbStorage_t *dpb);
+
+static i32 FindDpbPic(dpbStorage_t *dpb, i32 picNum, u32 isShortTerm);
+
+static void SetPicNums(dpbStorage_t *dpb, u32 currFrameNum);
+
+static dpbPicture_t* FindSmallestPicOrderCnt(dpbStorage_t *dpb);
+
+static u32 OutputPicture(dpbStorage_t *dpb);
+
+static void ShellSort(dpbPicture_t *pPic, u32 num);
+
+/*------------------------------------------------------------------------------
+
+    Function: ComparePictures
+
+        Functional description:
+            Function to compare dpb pictures, used by the ShellSort() function.
+            Order of the pictures after sorting shall be as follows:
+                1) short term reference pictures starting with the largest
+                   picNum
+                2) long term reference pictures starting with the smallest
+                   longTermPicNum
+                3) pictures unused for reference but needed for display
+                4) other pictures
+
+        Returns:
+            -1      pic 1 is greater than pic 2
+             0      equal from comparison point of view
+             1      pic 2 is greater then pic 1
+
+------------------------------------------------------------------------------*/
+
+static i32 ComparePictures(const void *ptr1, const void *ptr2)
+{
+
+/* Variables */
+
+    dpbPicture_t *pic1, *pic2;
+
+/* Code */
+
+    ASSERT(ptr1);
+    ASSERT(ptr2);
+
+    pic1 = (dpbPicture_t*)ptr1;
+    pic2 = (dpbPicture_t*)ptr2;
+
+    /* both are non-reference pictures, check if needed for display */
+    if (!IS_REFERENCE(*pic1) && !IS_REFERENCE(*pic2))
+    {
+        if (pic1->toBeDisplayed && !pic2->toBeDisplayed)
+            return(-1);
+        else if (!pic1->toBeDisplayed && pic2->toBeDisplayed)
+            return(1);
+        else
+            return(0);
+    }
+    /* only pic 1 needed for reference -> greater */
+    else if (!IS_REFERENCE(*pic2))
+        return(-1);
+    /* only pic 2 needed for reference -> greater */
+    else if (!IS_REFERENCE(*pic1))
+        return(1);
+    /* both are short term reference pictures -> check picNum */
+    else if (IS_SHORT_TERM(*pic1) && IS_SHORT_TERM(*pic2))
+    {
+        if (pic1->picNum > pic2->picNum)
+            return(-1);
+        else if (pic1->picNum < pic2->picNum)
+            return(1);
+        else
+            return(0);
+    }
+    /* only pic 1 is short term -> greater */
+    else if (IS_SHORT_TERM(*pic1))
+        return(-1);
+    /* only pic 2 is short term -> greater */
+    else if (IS_SHORT_TERM(*pic2))
+        return(1);
+    /* both are long term reference pictures -> check picNum (contains the
+     * longTermPicNum */
+    else
+    {
+        if (pic1->picNum > pic2->picNum)
+            return(1);
+        else if (pic1->picNum < pic2->picNum)
+            return(-1);
+        else
+            return(0);
+    }
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdReorderRefPicList
+
+        Functional description:
+            Function to perform reference picture list reordering based on
+            reordering commands received in the slice header. See details
+            of the process in the H.264 standard.
+
+        Inputs:
+            dpb             pointer to dpb storage structure
+            order           pointer to reordering commands
+            currFrameNum    current frame number
+            numRefIdxActive number of active reference indices for current
+                            picture
+
+        Outputs:
+            dpb             'list' field of the structure reordered
+
+        Returns:
+            HANTRO_OK      success
+            HANTRO_NOK     if non-existing pictures referred to in the
+                           reordering commands
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdReorderRefPicList(
+  dpbStorage_t *dpb,
+  refPicListReordering_t *order,
+  u32 currFrameNum,
+  u32 numRefIdxActive)
+{
+
+/* Variables */
+
+    u32 i, j, k, picNumPred, refIdx;
+    i32 picNum, picNumNoWrap, index;
+    u32 isShortTerm;
+
+/* Code */
+
+    ASSERT(order);
+    ASSERT(currFrameNum <= dpb->maxFrameNum);
+    ASSERT(numRefIdxActive <= MAX_NUM_REF_IDX_L0_ACTIVE);
+
+    /* set dpb picture numbers for sorting */
+    SetPicNums(dpb, currFrameNum);
+
+    if (!order->refPicListReorderingFlagL0)
+        return(HANTRO_OK);
+
+    refIdx     = 0;
+    picNumPred = currFrameNum;
+
+    i = 0;
+    while (order->command[i].reorderingOfPicNumsIdc < 3)
+    {
+        /* short term */
+        if (order->command[i].reorderingOfPicNumsIdc < 2)
+        {
+            if (order->command[i].reorderingOfPicNumsIdc == 0)
+            {
+                picNumNoWrap =
+                    (i32)picNumPred - (i32)order->command[i].absDiffPicNum;
+                if (picNumNoWrap < 0)
+                    picNumNoWrap += (i32)dpb->maxFrameNum;
+            }
+            else
+            {
+                picNumNoWrap =
+                    (i32)(picNumPred + order->command[i].absDiffPicNum);
+                if (picNumNoWrap >= (i32)dpb->maxFrameNum)
+                    picNumNoWrap -= (i32)dpb->maxFrameNum;
+            }
+            picNumPred = (u32)picNumNoWrap;
+            picNum = picNumNoWrap;
+            if ((u32)picNumNoWrap > currFrameNum)
+                picNum -= (i32)dpb->maxFrameNum;
+            isShortTerm = HANTRO_TRUE;
+        }
+        /* long term */
+        else
+        {
+            picNum = (i32)order->command[i].longTermPicNum;
+            isShortTerm = HANTRO_FALSE;
+
+        }
+        /* find corresponding picture from dpb */
+        index = FindDpbPic(dpb, picNum, isShortTerm);
+        if (index < 0 || !IS_EXISTING(dpb->buffer[index]))
+            return(HANTRO_NOK);
+
+        /* shift pictures */
+        for (j = numRefIdxActive; j > refIdx; j--)
+            dpb->list[j] = dpb->list[j-1];
+        /* put picture into the list */
+        dpb->list[refIdx++] = &dpb->buffer[index];
+        /* remove later references to the same picture */
+        for (j = k = refIdx; j <= numRefIdxActive; j++)
+            if(dpb->list[j] != &dpb->buffer[index])
+                dpb->list[k++] = dpb->list[j];
+
+        i++;
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Mmcop1
+
+        Functional description:
+            Function to mark a short-term reference picture unused for
+            reference, memory_management_control_operation equal to 1
+
+        Returns:
+            HANTRO_OK      success
+            HANTRO_NOK     failure, picture does not exist in the buffer
+
+------------------------------------------------------------------------------*/
+
+static u32 Mmcop1(dpbStorage_t *dpb, u32 currPicNum, u32 differenceOfPicNums)
+{
+
+/* Variables */
+
+    i32 index, picNum;
+
+/* Code */
+
+    ASSERT(currPicNum < dpb->maxFrameNum);
+
+    picNum = (i32)currPicNum - (i32)differenceOfPicNums;
+
+    index = FindDpbPic(dpb, picNum, HANTRO_TRUE);
+    if (index < 0)
+        return(HANTRO_NOK);
+
+    SET_UNUSED(dpb->buffer[index]);
+    dpb->numRefFrames--;
+    if (!dpb->buffer[index].toBeDisplayed)
+        dpb->fullness--;
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Mmcop2
+
+        Functional description:
+            Function to mark a long-term reference picture unused for
+            reference, memory_management_control_operation equal to 2
+
+        Returns:
+            HANTRO_OK      success
+            HANTRO_NOK     failure, picture does not exist in the buffer
+
+------------------------------------------------------------------------------*/
+
+static u32 Mmcop2(dpbStorage_t *dpb, u32 longTermPicNum)
+{
+
+/* Variables */
+
+    i32 index;
+
+/* Code */
+
+    index = FindDpbPic(dpb, (i32)longTermPicNum, HANTRO_FALSE);
+    if (index < 0)
+        return(HANTRO_NOK);
+
+    SET_UNUSED(dpb->buffer[index]);
+    dpb->numRefFrames--;
+    if (!dpb->buffer[index].toBeDisplayed)
+        dpb->fullness--;
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Mmcop3
+
+        Functional description:
+            Function to assing a longTermFrameIdx to a short-term reference
+            frame (i.e. to change it to a long-term reference picture),
+            memory_management_control_operation equal to 3
+
+        Returns:
+            HANTRO_OK      success
+            HANTRO_NOK     failure, short-term picture does not exist in the
+                           buffer or is a non-existing picture, or invalid
+                           longTermFrameIdx given
+
+------------------------------------------------------------------------------*/
+
+static u32 Mmcop3(dpbStorage_t *dpb, u32 currPicNum, u32 differenceOfPicNums,
+    u32 longTermFrameIdx)
+{
+
+/* Variables */
+
+    i32 index, picNum;
+    u32 i;
+
+/* Code */
+
+    ASSERT(dpb);
+    ASSERT(currPicNum < dpb->maxFrameNum);
+
+    if ( (dpb->maxLongTermFrameIdx == NO_LONG_TERM_FRAME_INDICES) ||
+         (longTermFrameIdx > dpb->maxLongTermFrameIdx) )
+        return(HANTRO_NOK);
+
+    /* check if a long term picture with the same longTermFrameIdx already
+     * exist and remove it if necessary */
+    for (i = 0; i < dpb->maxRefFrames; i++)
+        if (IS_LONG_TERM(dpb->buffer[i]) &&
+          (u32)dpb->buffer[i].picNum == longTermFrameIdx)
+        {
+            SET_UNUSED(dpb->buffer[i]);
+            dpb->numRefFrames--;
+            if (!dpb->buffer[i].toBeDisplayed)
+                dpb->fullness--;
+            break;
+        }
+
+    picNum = (i32)currPicNum - (i32)differenceOfPicNums;
+
+    index = FindDpbPic(dpb, picNum, HANTRO_TRUE);
+    if (index < 0)
+        return(HANTRO_NOK);
+    if (!IS_EXISTING(dpb->buffer[index]))
+        return(HANTRO_NOK);
+
+    dpb->buffer[index].status = LONG_TERM;
+    dpb->buffer[index].picNum = (i32)longTermFrameIdx;
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Mmcop4
+
+        Functional description:
+            Function to set maxLongTermFrameIdx,
+            memory_management_control_operation equal to 4
+
+        Returns:
+            HANTRO_OK      success
+
+------------------------------------------------------------------------------*/
+
+static u32 Mmcop4(dpbStorage_t *dpb, u32 maxLongTermFrameIdx)
+{
+
+/* Variables */
+
+    u32 i;
+
+/* Code */
+
+    dpb->maxLongTermFrameIdx = maxLongTermFrameIdx;
+
+    for (i = 0; i < dpb->maxRefFrames; i++)
+        if (IS_LONG_TERM(dpb->buffer[i]) &&
+          ( ((u32)dpb->buffer[i].picNum > maxLongTermFrameIdx) ||
+            (dpb->maxLongTermFrameIdx == NO_LONG_TERM_FRAME_INDICES) ) )
+        {
+            SET_UNUSED(dpb->buffer[i]);
+            dpb->numRefFrames--;
+            if (!dpb->buffer[i].toBeDisplayed)
+                dpb->fullness--;
+        }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Mmcop5
+
+        Functional description:
+            Function to mark all reference pictures unused for reference and
+            set maxLongTermFrameIdx to NO_LONG_TERM_FRAME_INDICES,
+            memory_management_control_operation equal to 5. Function flushes
+            the buffer and places all pictures that are needed for display into
+            the output buffer.
+
+        Returns:
+            HANTRO_OK      success
+
+------------------------------------------------------------------------------*/
+
+static u32 Mmcop5(dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+    u32 i;
+
+/* Code */
+
+    for (i = 0; i < 16; i++)
+    {
+        if (IS_REFERENCE(dpb->buffer[i]))
+        {
+            SET_UNUSED(dpb->buffer[i]);
+            if (!dpb->buffer[i].toBeDisplayed)
+                dpb->fullness--;
+        }
+    }
+
+    /* output all pictures */
+    while (OutputPicture(dpb) == HANTRO_OK)
+        ;
+    dpb->numRefFrames = 0;
+    dpb->maxLongTermFrameIdx = NO_LONG_TERM_FRAME_INDICES;
+    dpb->prevRefFrameNum = 0;
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Mmcop6
+
+        Functional description:
+            Function to assign longTermFrameIdx to the current picture,
+            memory_management_control_operation equal to 6
+
+        Returns:
+            HANTRO_OK      success
+            HANTRO_NOK     invalid longTermFrameIdx or no room for current
+                           picture in the buffer
+
+------------------------------------------------------------------------------*/
+
+static u32 Mmcop6(dpbStorage_t *dpb, u32 frameNum, i32 picOrderCnt,
+    u32 longTermFrameIdx)
+{
+
+/* Variables */
+
+    u32 i;
+
+/* Code */
+
+    ASSERT(frameNum < dpb->maxFrameNum);
+
+    if ( (dpb->maxLongTermFrameIdx == NO_LONG_TERM_FRAME_INDICES) ||
+         (longTermFrameIdx > dpb->maxLongTermFrameIdx) )
+        return(HANTRO_NOK);
+
+    /* check if a long term picture with the same longTermFrameIdx already
+     * exist and remove it if necessary */
+    for (i = 0; i < dpb->maxRefFrames; i++)
+        if (IS_LONG_TERM(dpb->buffer[i]) &&
+          (u32)dpb->buffer[i].picNum == longTermFrameIdx)
+        {
+            SET_UNUSED(dpb->buffer[i]);
+            dpb->numRefFrames--;
+            if (!dpb->buffer[i].toBeDisplayed)
+                dpb->fullness--;
+            break;
+        }
+
+    if (dpb->numRefFrames < dpb->maxRefFrames)
+    {
+        dpb->currentOut->frameNum = frameNum;
+        dpb->currentOut->picNum   = (i32)longTermFrameIdx;
+        dpb->currentOut->picOrderCnt = picOrderCnt;
+        dpb->currentOut->status   = LONG_TERM;
+        if (dpb->noReordering)
+            dpb->currentOut->toBeDisplayed = HANTRO_FALSE;
+        else
+            dpb->currentOut->toBeDisplayed = HANTRO_TRUE;
+        dpb->numRefFrames++;
+        dpb->fullness++;
+        return(HANTRO_OK);
+    }
+    /* if there is no room, return an error */
+    else
+        return(HANTRO_NOK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdMarkDecRefPic
+
+        Functional description:
+            Function to perform reference picture marking process. This
+            function should be called both for reference and non-reference
+            pictures.  Non-reference pictures shall have mark pointer set to
+            NULL.
+
+        Inputs:
+            dpb         pointer to the DPB data structure
+            mark        pointer to reference picture marking commands
+            image       pointer to current picture to be placed in the buffer
+            frameNum    frame number of the current picture
+            picOrderCnt picture order count for the current picture
+            isIdr       flag to indicate if the current picture is an
+                        IDR picture
+            currentPicId    identifier for the current picture, from the
+                            application, stored along with the picture
+            numErrMbs       number of concealed macroblocks in the current
+                            picture, stored along with the picture
+
+        Outputs:
+            dpb         'buffer' modified, possible output frames placed into
+                        'outBuf'
+
+        Returns:
+            HANTRO_OK   success
+            HANTRO_NOK  failure
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdMarkDecRefPic(
+  dpbStorage_t *dpb,
+  decRefPicMarking_t *mark,
+  image_t *image,
+  u32 frameNum,
+  i32 picOrderCnt,
+  u32 isIdr,
+  u32 currentPicId,
+  u32 numErrMbs)
+{
+
+/* Variables */
+
+    u32 i, status;
+    u32 markedAsLongTerm;
+    u32 toBeDisplayed;
+
+/* Code */
+
+    ASSERT(dpb);
+    ASSERT(mark || !isIdr);
+    ASSERT(!isIdr || (frameNum == 0 && picOrderCnt == 0));
+    ASSERT(frameNum < dpb->maxFrameNum);
+
+    if (image->data != dpb->currentOut->data)
+    {
+        EPRINT("TRYING TO MARK NON-ALLOCATED IMAGE");
+        return(HANTRO_NOK);
+    }
+
+    dpb->lastContainsMmco5 = HANTRO_FALSE;
+    status = HANTRO_OK;
+
+    toBeDisplayed = dpb->noReordering ? HANTRO_FALSE : HANTRO_TRUE;
+
+    /* non-reference picture, stored for display reordering purposes */
+    if (mark == NULL)
+    {
+        dpb->currentOut->status = UNUSED;
+        dpb->currentOut->frameNum = frameNum;
+        dpb->currentOut->picNum = (i32)frameNum;
+        dpb->currentOut->picOrderCnt = picOrderCnt;
+        dpb->currentOut->toBeDisplayed = toBeDisplayed;
+        if (!dpb->noReordering)
+            dpb->fullness++;
+    }
+    /* IDR picture */
+    else if (isIdr)
+    {
+
+        /* h264bsdCheckGapsInFrameNum not called for IDR pictures -> have to
+         * reset numOut and outIndex here */
+        dpb->numOut = dpb->outIndex = 0;
+
+        /* flush the buffer */
+        Mmcop5(dpb);
+        /* if noOutputOfPriorPicsFlag was set -> the pictures preceding the
+         * IDR picture shall not be output -> set output buffer empty */
+        if (mark->noOutputOfPriorPicsFlag || dpb->noReordering)
+        {
+            dpb->numOut = 0;
+            dpb->outIndex = 0;
+        }
+
+        if (mark->longTermReferenceFlag)
+        {
+            dpb->currentOut->status = LONG_TERM;
+            dpb->maxLongTermFrameIdx = 0;
+        }
+        else
+        {
+            dpb->currentOut->status = SHORT_TERM;
+            dpb->maxLongTermFrameIdx = NO_LONG_TERM_FRAME_INDICES;
+        }
+        dpb->currentOut->frameNum  = 0;
+        dpb->currentOut->picNum    = 0;
+        dpb->currentOut->picOrderCnt = 0;
+        dpb->currentOut->toBeDisplayed = toBeDisplayed;
+        dpb->fullness = 1;
+        dpb->numRefFrames = 1;
+    }
+    /* reference picture */
+    else
+    {
+        markedAsLongTerm = HANTRO_FALSE;
+        if (mark->adaptiveRefPicMarkingModeFlag)
+        {
+            i = 0;
+            while (mark->operation[i].memoryManagementControlOperation)
+            {
+                switch (mark->operation[i].memoryManagementControlOperation)
+                {
+                    case 1:
+                        status = Mmcop1(
+                          dpb,
+                          frameNum,
+                          mark->operation[i].differenceOfPicNums);
+                        break;
+
+                    case 2:
+                        status = Mmcop2(dpb, mark->operation[i].longTermPicNum);
+                        break;
+
+                    case 3:
+                        status =  Mmcop3(
+                          dpb,
+                          frameNum,
+                          mark->operation[i].differenceOfPicNums,
+                          mark->operation[i].longTermFrameIdx);
+                        break;
+
+                    case 4:
+                        status = Mmcop4(
+                          dpb,
+                          mark->operation[i].maxLongTermFrameIdx);
+                        break;
+
+                    case 5:
+                        status = Mmcop5(dpb);
+                        dpb->lastContainsMmco5 = HANTRO_TRUE;
+                        frameNum = 0;
+                        break;
+
+                    case 6:
+                        status = Mmcop6(
+                          dpb,
+                          frameNum,
+                          picOrderCnt,
+                          mark->operation[i].longTermFrameIdx);
+                        if (status == HANTRO_OK)
+                            markedAsLongTerm = HANTRO_TRUE;
+                        break;
+
+                    default: /* invalid memory management control operation */
+                        status = HANTRO_NOK;
+                        break;
+                }
+                if (status != HANTRO_OK)
+                {
+                    break;
+                }
+                i++;
+            }
+        }
+        else
+        {
+            status = SlidingWindowRefPicMarking(dpb);
+        }
+        /* if current picture was not marked as long-term reference by
+         * memory management control operation 6 -> mark current as short
+         * term and insert it into dpb (if there is room) */
+        if (!markedAsLongTerm)
+        {
+            if (dpb->numRefFrames < dpb->maxRefFrames)
+            {
+                dpb->currentOut->frameNum = frameNum;
+                dpb->currentOut->picNum   = (i32)frameNum;
+                dpb->currentOut->picOrderCnt = picOrderCnt;
+                dpb->currentOut->status   = SHORT_TERM;
+                dpb->currentOut->toBeDisplayed = toBeDisplayed;
+                dpb->fullness++;
+                dpb->numRefFrames++;
+            }
+            /* no room */
+            else
+            {
+                status = HANTRO_NOK;
+            }
+        }
+    }
+
+    dpb->currentOut->isIdr = isIdr;
+    dpb->currentOut->picId = currentPicId;
+    dpb->currentOut->numErrMbs = numErrMbs;
+
+    /* dpb was initialized to not to reorder the pictures -> output current
+     * picture immediately */
+    if (dpb->noReordering)
+    {
+        ASSERT(dpb->numOut == 0);
+        ASSERT(dpb->outIndex == 0);
+        dpb->outBuf[dpb->numOut].data  = dpb->currentOut->data;
+        dpb->outBuf[dpb->numOut].isIdr = dpb->currentOut->isIdr;
+        dpb->outBuf[dpb->numOut].picId = dpb->currentOut->picId;
+        dpb->outBuf[dpb->numOut].numErrMbs = dpb->currentOut->numErrMbs;
+        dpb->numOut++;
+    }
+    else
+    {
+        /* output pictures if buffer full */
+        while (dpb->fullness > dpb->dpbSize)
+        {
+            i = OutputPicture(dpb);
+            ASSERT(i == HANTRO_OK);
+        }
+    }
+
+    /* sort dpb */
+    ShellSort(dpb->buffer, dpb->dpbSize+1);
+
+    return(status);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdGetRefPicData
+
+        Functional description:
+            Function to get reference picture data from the reference picture
+            list
+
+        Returns:
+            pointer to desired reference picture data
+            NULL if invalid index or non-existing picture referred
+
+------------------------------------------------------------------------------*/
+
+u8* h264bsdGetRefPicData(dpbStorage_t *dpb, u32 index)
+{
+
+/* Variables */
+
+/* Code */
+
+    if(index > 16 || dpb->list[index] == NULL)
+        return(NULL);
+    else if(!IS_EXISTING(*dpb->list[index]))
+        return(NULL);
+    else
+        return(dpb->list[index]->data);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdAllocateDpbImage
+
+        Functional description:
+            function to allocate memory for a image. This function does not
+            really allocate any memory but reserves one of the buffer
+            positions for decoding of current picture
+
+        Returns:
+            pointer to memory area for the image
+
+
+------------------------------------------------------------------------------*/
+
+u8* h264bsdAllocateDpbImage(dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT( !dpb->buffer[dpb->dpbSize].toBeDisplayed &&
+            !IS_REFERENCE(dpb->buffer[dpb->dpbSize]) );
+    ASSERT(dpb->fullness <=  dpb->dpbSize);
+
+    dpb->currentOut = dpb->buffer + dpb->dpbSize;
+
+    return(dpb->currentOut->data);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: SlidingWindowRefPicMarking
+
+        Functional description:
+            Function to perform sliding window refence picture marking process.
+
+        Outputs:
+            HANTRO_OK      success
+            HANTRO_NOK     failure, no short-term reference frame found that
+                           could be marked unused
+
+
+------------------------------------------------------------------------------*/
+
+static u32 SlidingWindowRefPicMarking(dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+    i32 index, picNum;
+    u32 i;
+
+/* Code */
+
+    if (dpb->numRefFrames < dpb->maxRefFrames)
+    {
+        return(HANTRO_OK);
+    }
+    else
+    {
+        index = -1;
+        picNum = 0;
+        /* find the oldest short term picture */
+        for (i = 0; i < dpb->numRefFrames; i++)
+            if (IS_SHORT_TERM(dpb->buffer[i]))
+                if (dpb->buffer[i].picNum < picNum || index == -1)
+                {
+                    index = (i32)i;
+                    picNum = dpb->buffer[i].picNum;
+                }
+        if (index >= 0)
+        {
+            SET_UNUSED(dpb->buffer[index]);
+            dpb->numRefFrames--;
+            if (!dpb->buffer[index].toBeDisplayed)
+                dpb->fullness--;
+
+            return(HANTRO_OK);
+        }
+    }
+
+    return(HANTRO_NOK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdInitDpb
+
+        Functional description:
+            Function to initialize DPB. Reserves memories for the buffer,
+            reference picture list and output buffer. dpbSize indicates
+            the maximum DPB size indicated by the levelIdc in the stream.
+            If noReordering flag is FALSE the DPB stores dpbSize pictures
+            for display reordering purposes. On the other hand, if the
+            flag is TRUE the DPB only stores maxRefFrames reference pictures
+            and outputs all the pictures immediately.
+
+        Inputs:
+            picSizeInMbs    picture size in macroblocks
+            dpbSize         size of the DPB (number of pictures)
+            maxRefFrames    max number of reference frames
+            maxFrameNum     max frame number
+            noReordering    flag to indicate that DPB does not have to
+                            prepare to reorder frames for display
+
+        Outputs:
+            dpb             pointer to dpb data storage
+
+        Returns:
+            HANTRO_OK       success
+            MEMORY_ALLOCATION_ERROR if memory allocation failed
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdInitDpb(
+  dpbStorage_t *dpb,
+  u32 picSizeInMbs,
+  u32 dpbSize,
+  u32 maxRefFrames,
+  u32 maxFrameNum,
+  u32 noReordering)
+{
+
+/* Variables */
+
+    u32 i;
+
+/* Code */
+
+    ASSERT(picSizeInMbs);
+    ASSERT(maxRefFrames <= MAX_NUM_REF_PICS);
+    ASSERT(maxRefFrames <= dpbSize);
+    ASSERT(maxFrameNum);
+    ASSERT(dpbSize);
+
+    dpb->maxLongTermFrameIdx = NO_LONG_TERM_FRAME_INDICES;
+    dpb->maxRefFrames        = MAX(maxRefFrames, 1);
+    if (noReordering)
+        dpb->dpbSize         = dpb->maxRefFrames;
+    else
+        dpb->dpbSize         = dpbSize;
+    dpb->maxFrameNum         = maxFrameNum;
+    dpb->noReordering        = noReordering;
+    dpb->fullness            = 0;
+    dpb->numRefFrames        = 0;
+    dpb->prevRefFrameNum     = 0;
+
+    ALLOCATE(dpb->buffer, MAX_NUM_REF_IDX_L0_ACTIVE + 1, dpbPicture_t);
+    if (dpb->buffer == NULL)
+        return(MEMORY_ALLOCATION_ERROR);
+    H264SwDecMemset(dpb->buffer, 0,
+            (MAX_NUM_REF_IDX_L0_ACTIVE + 1)*sizeof(dpbPicture_t));
+    for (i = 0; i < dpb->dpbSize + 1; i++)
+    {
+        /* Allocate needed amount of memory, which is:
+         * image size + 32 + 15, where 32 cames from the fact that in ARM OpenMax
+         * DL implementation Functions may read beyond the end of an array,
+         * by a maximum of 32 bytes. And +15 cames for the need to align memory
+         * to 16-byte boundary */
+        ALLOCATE(dpb->buffer[i].pAllocatedData, (picSizeInMbs*384 + 32+15), u8);
+        if (dpb->buffer[i].pAllocatedData == NULL)
+            return(MEMORY_ALLOCATION_ERROR);
+
+        dpb->buffer[i].data = ALIGN(dpb->buffer[i].pAllocatedData, 16);
+    }
+
+    ALLOCATE(dpb->list, MAX_NUM_REF_IDX_L0_ACTIVE + 1, dpbPicture_t*);
+    ALLOCATE(dpb->outBuf, dpb->dpbSize+1, dpbOutPicture_t);
+
+    if (dpb->list == NULL || dpb->outBuf == NULL)
+        return(MEMORY_ALLOCATION_ERROR);
+
+    H264SwDecMemset(dpb->list, 0,
+            ((MAX_NUM_REF_IDX_L0_ACTIVE + 1) * sizeof(dpbPicture_t*)) );
+
+    dpb->numOut = dpb->outIndex = 0;
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdResetDpb
+
+        Functional description:
+            Function to reset DPB. This function should be called when an IDR
+            slice (other than the first) activates new sequence parameter set.
+            Function calls h264bsdFreeDpb to free old allocated memories and
+            h264bsdInitDpb to re-initialize the DPB. Same inputs, outputs and
+            returns as for h264bsdInitDpb.
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdResetDpb(
+  dpbStorage_t *dpb,
+  u32 picSizeInMbs,
+  u32 dpbSize,
+  u32 maxRefFrames,
+  u32 maxFrameNum,
+  u32 noReordering)
+{
+
+/* Code */
+
+    ASSERT(picSizeInMbs);
+    ASSERT(maxRefFrames <= MAX_NUM_REF_PICS);
+    ASSERT(maxRefFrames <= dpbSize);
+    ASSERT(maxFrameNum);
+    ASSERT(dpbSize);
+
+    h264bsdFreeDpb(dpb);
+
+    return h264bsdInitDpb(dpb, picSizeInMbs, dpbSize, maxRefFrames,
+                          maxFrameNum, noReordering);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdInitRefPicList
+
+        Functional description:
+            Function to initialize reference picture list. Function just
+            sets pointers in the list according to pictures in the buffer.
+            The buffer is assumed to contain pictures sorted according to
+            what the H.264 standard says about initial reference picture list.
+
+        Inputs:
+            dpb     pointer to dpb data structure
+
+        Outputs:
+            dpb     'list' field initialized
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInitRefPicList(dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+    u32 i;
+
+/* Code */
+
+    for (i = 0; i < dpb->numRefFrames; i++)
+        dpb->list[i] = &dpb->buffer[i];
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: FindDpbPic
+
+        Functional description:
+            Function to find a reference picture from the buffer. The picture
+            to be found is identified by picNum and isShortTerm flag.
+
+        Returns:
+            index of the picture in the buffer
+            -1 if the specified picture was not found in the buffer
+
+------------------------------------------------------------------------------*/
+
+static i32 FindDpbPic(dpbStorage_t *dpb, i32 picNum, u32 isShortTerm)
+{
+
+/* Variables */
+
+    u32 i = 0;
+    u32 found = HANTRO_FALSE;
+
+/* Code */
+
+    if (isShortTerm)
+    {
+        while (i < dpb->maxRefFrames && !found)
+        {
+            if (IS_SHORT_TERM(dpb->buffer[i]) &&
+              dpb->buffer[i].picNum == picNum)
+                found = HANTRO_TRUE;
+            else
+                i++;
+        }
+    }
+    else
+    {
+        ASSERT(picNum >= 0);
+        while (i < dpb->maxRefFrames && !found)
+        {
+            if (IS_LONG_TERM(dpb->buffer[i]) &&
+              dpb->buffer[i].picNum == picNum)
+                found = HANTRO_TRUE;
+            else
+                i++;
+        }
+    }
+
+    if (found)
+        return((i32)i);
+    else
+        return(-1);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: SetPicNums
+
+        Functional description:
+            Function to set picNum values for short-term pictures in the
+            buffer. Numbering of pictures is based on frame numbers and as
+            frame numbers are modulo maxFrameNum -> frame numbers of older
+            pictures in the buffer may be bigger than the currFrameNum.
+            picNums will be set so that current frame has the largest picNum
+            and all the short-term frames in the buffer will get smaller picNum
+            representing their "distance" from the current frame. This
+            function kind of maps the modulo arithmetic back to normal.
+
+------------------------------------------------------------------------------*/
+
+static void SetPicNums(dpbStorage_t *dpb, u32 currFrameNum)
+{
+
+/* Variables */
+
+    u32 i;
+    i32 frameNumWrap;
+
+/* Code */
+
+    ASSERT(dpb);
+    ASSERT(currFrameNum < dpb->maxFrameNum);
+
+    for (i = 0; i < dpb->numRefFrames; i++)
+        if (IS_SHORT_TERM(dpb->buffer[i]))
+        {
+            if (dpb->buffer[i].frameNum > currFrameNum)
+                frameNumWrap =
+                    (i32)dpb->buffer[i].frameNum - (i32)dpb->maxFrameNum;
+            else
+                frameNumWrap = (i32)dpb->buffer[i].frameNum;
+            dpb->buffer[i].picNum = frameNumWrap;
+        }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdCheckGapsInFrameNum
+
+        Functional description:
+            Function to check gaps in frame_num and generate non-existing
+            (short term) reference pictures if necessary. This function should
+            be called only for non-IDR pictures.
+
+        Inputs:
+            dpb         pointer to dpb data structure
+            frameNum    frame number of the current picture
+            isRefPic    flag to indicate if current picture is a reference or
+                        non-reference picture
+            gapsAllowed Flag which indicates active SPS stance on whether
+                        to allow gaps
+
+        Outputs:
+            dpb         'buffer' possibly modified by inserting non-existing
+                        pictures with sliding window marking process
+
+        Returns:
+            HANTRO_OK   success
+            HANTRO_NOK  error in sliding window reference picture marking or
+                        frameNum equal to previous reference frame used for
+                        a reference picture
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckGapsInFrameNum(dpbStorage_t *dpb, u32 frameNum, u32 isRefPic,
+                               u32 gapsAllowed)
+{
+
+/* Variables */
+
+    u32 unUsedShortTermFrameNum;
+    u8 *tmp;
+
+/* Code */
+
+    ASSERT(dpb);
+    ASSERT(dpb->fullness <= dpb->dpbSize);
+    ASSERT(frameNum < dpb->maxFrameNum);
+
+    dpb->numOut = 0;
+    dpb->outIndex = 0;
+
+    if(!gapsAllowed)
+        return(HANTRO_OK);
+
+    if ( (frameNum != dpb->prevRefFrameNum) &&
+         (frameNum != ((dpb->prevRefFrameNum + 1) % dpb->maxFrameNum)))
+    {
+
+        unUsedShortTermFrameNum = (dpb->prevRefFrameNum + 1) % dpb->maxFrameNum;
+
+        /* store data pointer of last buffer position to be used as next
+         * "allocated" data pointer if last buffer position after this process
+         * contains data pointer located in outBuf (buffer placed in the output
+         * shall not be overwritten by the current picture) */
+        tmp = dpb->buffer[dpb->dpbSize].data;
+        do
+        {
+            SetPicNums(dpb, unUsedShortTermFrameNum);
+
+            if (SlidingWindowRefPicMarking(dpb) != HANTRO_OK)
+            {
+                return(HANTRO_NOK);
+            }
+
+            /* output pictures if buffer full */
+            while (dpb->fullness >= dpb->dpbSize)
+            {
+#ifdef _ASSERT_USED
+                ASSERT(!dpb->noReordering);
+                ASSERT(OutputPicture(dpb) == HANTRO_OK);
+#else
+                OutputPicture(dpb);
+#endif
+            }
+
+            /* add to end of list */
+            ASSERT( !dpb->buffer[dpb->dpbSize].toBeDisplayed &&
+                    !IS_REFERENCE(dpb->buffer[dpb->dpbSize]) );
+            dpb->buffer[dpb->dpbSize].status = NON_EXISTING;
+            dpb->buffer[dpb->dpbSize].frameNum = unUsedShortTermFrameNum;
+            dpb->buffer[dpb->dpbSize].picNum   = (i32)unUsedShortTermFrameNum;
+            dpb->buffer[dpb->dpbSize].picOrderCnt = 0;
+            dpb->buffer[dpb->dpbSize].toBeDisplayed = HANTRO_FALSE;
+            dpb->fullness++;
+            dpb->numRefFrames++;
+
+            /* sort the buffer */
+            ShellSort(dpb->buffer, dpb->dpbSize+1);
+
+            unUsedShortTermFrameNum = (unUsedShortTermFrameNum + 1) %
+                dpb->maxFrameNum;
+
+        } while (unUsedShortTermFrameNum != frameNum);
+
+        /* pictures placed in output buffer -> check that 'data' in
+         * buffer position dpbSize is not in the output buffer (this will be
+         * "allocated" by h264bsdAllocateDpbImage). If it is -> exchange data
+         * pointer with the one stored in the beginning */
+        if (dpb->numOut)
+        {
+            u32 i;
+
+            for (i = 0; i < dpb->numOut; i++)
+            {
+                if (dpb->outBuf[i].data == dpb->buffer[dpb->dpbSize].data)
+                {
+                    /* find buffer position containing data pointer stored in
+                     * tmp */
+                    for (i = 0; i < dpb->dpbSize; i++)
+                    {
+                        if (dpb->buffer[i].data == tmp)
+                        {
+                            dpb->buffer[i].data =
+                                dpb->buffer[dpb->dpbSize].data;
+                            dpb->buffer[dpb->dpbSize].data = tmp;
+                            break;
+                        }
+                    }
+                    ASSERT(i < dpb->dpbSize);
+                    break;
+                }
+            }
+        }
+    }
+    /* frameNum for reference pictures shall not be the same as for previous
+     * reference picture, otherwise accesses to pictures in the buffer cannot
+     * be solved unambiguously */
+    else if (isRefPic && frameNum == dpb->prevRefFrameNum)
+    {
+        return(HANTRO_NOK);
+    }
+
+    /* save current frame_num in prevRefFrameNum. For non-reference frame
+     * prevFrameNum is set to frame number of last non-existing frame above */
+    if (isRefPic)
+        dpb->prevRefFrameNum = frameNum;
+    else if (frameNum != dpb->prevRefFrameNum)
+    {
+        dpb->prevRefFrameNum =
+            (frameNum + dpb->maxFrameNum - 1) % dpb->maxFrameNum;
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: FindSmallestPicOrderCnt
+
+        Functional description:
+            Function to find picture with smallest picture order count. This
+            will be the next picture in display order.
+
+        Returns:
+            pointer to the picture, NULL if no pictures to be displayed
+
+------------------------------------------------------------------------------*/
+
+dpbPicture_t* FindSmallestPicOrderCnt(dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+    u32 i;
+    i32 picOrderCnt;
+    dpbPicture_t *tmp;
+
+/* Code */
+
+    ASSERT(dpb);
+
+    picOrderCnt = 0x7FFFFFFF;
+    tmp = NULL;
+
+    for (i = 0; i <= dpb->dpbSize; i++)
+    {
+        if (dpb->buffer[i].toBeDisplayed &&
+            (dpb->buffer[i].picOrderCnt < picOrderCnt))
+        {
+            tmp = dpb->buffer + i;
+            picOrderCnt = dpb->buffer[i].picOrderCnt;
+        }
+    }
+
+    return(tmp);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: OutputPicture
+
+        Functional description:
+            Function to put next display order picture into the output buffer.
+
+        Returns:
+            HANTRO_OK      success
+            HANTRO_NOK     no pictures to display
+
+------------------------------------------------------------------------------*/
+
+u32 OutputPicture(dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+    dpbPicture_t *tmp;
+
+/* Code */
+
+    ASSERT(dpb);
+
+    if (dpb->noReordering)
+        return(HANTRO_NOK);
+
+    tmp = FindSmallestPicOrderCnt(dpb);
+
+    /* no pictures to be displayed */
+    if (tmp == NULL)
+        return(HANTRO_NOK);
+
+    dpb->outBuf[dpb->numOut].data  = tmp->data;
+    dpb->outBuf[dpb->numOut].isIdr = tmp->isIdr;
+    dpb->outBuf[dpb->numOut].picId = tmp->picId;
+    dpb->outBuf[dpb->numOut].numErrMbs = tmp->numErrMbs;
+    dpb->numOut++;
+
+    tmp->toBeDisplayed = HANTRO_FALSE;
+    if (!IS_REFERENCE(*tmp))
+    {
+        dpb->fullness--;
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdDpbOutputPicture
+
+        Functional description:
+            Function to get next display order picture from the output buffer.
+
+        Return:
+            pointer to output picture structure, NULL if no pictures to
+            display
+
+------------------------------------------------------------------------------*/
+
+dpbOutPicture_t* h264bsdDpbOutputPicture(dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(dpb);
+
+    if (dpb->outIndex < dpb->numOut)
+        return(dpb->outBuf + dpb->outIndex++);
+    else
+        return(NULL);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdFlushDpb
+
+        Functional description:
+            Function to flush the DPB. Function puts all pictures needed for
+            display into the output buffer. This function shall be called in
+            the end of the stream to obtain pictures buffered for display
+            re-ordering purposes.
+
+------------------------------------------------------------------------------*/
+
+void h264bsdFlushDpb(dpbStorage_t *dpb)
+{
+
+    /* don't do anything if buffer not reserved */
+    if (dpb->buffer)
+    {
+        dpb->flushed = 1;
+        /* output all pictures */
+        while (OutputPicture(dpb) == HANTRO_OK)
+            ;
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdFreeDpb
+
+        Functional description:
+            Function to free memories reserved for the DPB.
+
+------------------------------------------------------------------------------*/
+
+void h264bsdFreeDpb(dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+    u32 i;
+
+/* Code */
+
+    ASSERT(dpb);
+
+    if (dpb->buffer)
+    {
+        for (i = 0; i < dpb->dpbSize+1; i++)
+        {
+            FREE(dpb->buffer[i].pAllocatedData);
+        }
+    }
+    FREE(dpb->buffer);
+    FREE(dpb->list);
+    FREE(dpb->outBuf);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: ShellSort
+
+        Functional description:
+            Sort pictures in the buffer. Function implements Shell's method,
+            i.e. diminishing increment sort. See e.g. "Numerical Recipes in C"
+            for more information.
+
+------------------------------------------------------------------------------*/
+
+static void ShellSort(dpbPicture_t *pPic, u32 num)
+{
+
+    u32 i, j;
+    u32 step;
+    dpbPicture_t tmpPic;
+
+    step = 7;
+
+    while (step)
+    {
+        for (i = step; i < num; i++)
+        {
+            tmpPic = pPic[i];
+            j = i;
+            while (j >= step && ComparePictures(pPic + j - step, &tmpPic) > 0)
+            {
+                pPic[j] = pPic[j-step];
+                j -= step;
+            }
+            pPic[j] = tmpPic;
+        }
+        step >>= 1;
+    }
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.h
new file mode 100755
index 0000000..0e25084
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_DPB_H
+#define H264SWDEC_DPB_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_image.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+/* enumeration to represent status of buffered image */
+typedef enum {
+    UNUSED = 0,
+    NON_EXISTING,
+    SHORT_TERM,
+    LONG_TERM
+} dpbPictureStatus_e;
+
+/* structure to represent a buffered picture */
+typedef struct {
+    u8 *data;           /* 16-byte aligned pointer of pAllocatedData */
+    u8 *pAllocatedData; /* allocated picture pointer; (size + 15) bytes */
+    i32 picNum;
+    u32 frameNum;
+    i32 picOrderCnt;
+    dpbPictureStatus_e status;
+    u32 toBeDisplayed;
+    u32 picId;
+    u32 numErrMbs;
+    u32 isIdr;
+} dpbPicture_t;
+
+/* structure to represent display image output from the buffer */
+typedef struct {
+    u8 *data;
+    u32 picId;
+    u32 numErrMbs;
+    u32 isIdr;
+} dpbOutPicture_t;
+
+/* structure to represent DPB */
+typedef struct {
+    dpbPicture_t *buffer;
+    dpbPicture_t **list;
+    dpbPicture_t *currentOut;
+    dpbOutPicture_t *outBuf;
+    u32 numOut;
+    u32 outIndex;
+    u32 maxRefFrames;
+    u32 dpbSize;
+    u32 maxFrameNum;
+    u32 maxLongTermFrameIdx;
+    u32 numRefFrames;
+    u32 fullness;
+    u32 prevRefFrameNum;
+    u32 lastContainsMmco5;
+    u32 noReordering;
+    u32 flushed;
+} dpbStorage_t;
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdInitDpb(
+  dpbStorage_t *dpb,
+  u32 picSizeInMbs,
+  u32 dpbSize,
+  u32 numRefFrames,
+  u32 maxFrameNum,
+  u32 noReordering);
+
+u32 h264bsdResetDpb(
+  dpbStorage_t *dpb,
+  u32 picSizeInMbs,
+  u32 dpbSize,
+  u32 numRefFrames,
+  u32 maxFrameNum,
+  u32 noReordering);
+
+void h264bsdInitRefPicList(dpbStorage_t *dpb);
+
+u8* h264bsdAllocateDpbImage(dpbStorage_t *dpb);
+
+u8* h264bsdGetRefPicData(dpbStorage_t *dpb, u32 index);
+
+u32 h264bsdReorderRefPicList(
+  dpbStorage_t *dpb,
+  refPicListReordering_t *order,
+  u32 currFrameNum,
+  u32 numRefIdxActive);
+
+u32 h264bsdMarkDecRefPic(
+  dpbStorage_t *dpb,
+  decRefPicMarking_t *mark,
+  image_t *image,
+  u32 frameNum,
+  i32 picOrderCnt,
+  u32 isIdr,
+  u32 picId,
+  u32 numErrMbs);
+
+u32 h264bsdCheckGapsInFrameNum(dpbStorage_t *dpb, u32 frameNum, u32 isRefPic,
+                               u32 gapsAllowed);
+
+dpbOutPicture_t* h264bsdDpbOutputPicture(dpbStorage_t *dpb);
+
+void h264bsdFlushDpb(dpbStorage_t *dpb);
+
+void h264bsdFreeDpb(dpbStorage_t *dpb);
+
+#endif /* #ifdef H264SWDEC_DPB_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_image.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_image.c
new file mode 100755
index 0000000..7b92870
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_image.c
@@ -0,0 +1,345 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdWriteMacroblock
+          h264bsdWriteOutputBlocks
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_image.h"
+#include "h264bsd_util.h"
+#include "h264bsd_neighbour.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/* x- and y-coordinates for each block, defined in h264bsd_intra_prediction.c */
+extern const u32 h264bsdBlockX[];
+extern const u32 h264bsdBlockY[];
+
+/* clipping table, defined in h264bsd_intra_prediction.c */
+extern const u8 h264bsdClip[];
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdWriteMacroblock
+
+        Functional description:
+            Write one macroblock into the image. Both luma and chroma
+            components will be written at the same time.
+
+        Inputs:
+            data    pointer to macroblock data to be written, 256 values for
+                    luma followed by 64 values for both chroma components
+
+        Outputs:
+            image   pointer to the image where the macroblock will be written
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_NEON
+void h264bsdWriteMacroblock(image_t *image, u8 *data)
+{
+
+/* Variables */
+
+    u32 i;
+    u32 width;
+    u32 *lum, *cb, *cr;
+    u32 *ptr;
+    u32 tmp1, tmp2;
+
+/* Code */
+
+    ASSERT(image);
+    ASSERT(data);
+    ASSERT(!((u32)data&0x3));
+
+    width = image->width;
+
+    /*lint -save -e826 lum, cb and cr used to copy 4 bytes at the time, disable
+     * "area too small" info message */
+    lum = (u32*)image->luma;
+    cb = (u32*)image->cb;
+    cr = (u32*)image->cr;
+    ASSERT(!((u32)lum&0x3));
+    ASSERT(!((u32)cb&0x3));
+    ASSERT(!((u32)cr&0x3));
+
+    ptr = (u32*)data;
+
+    width *= 4;
+    for (i = 16; i ; i--)
+    {
+        tmp1 = *ptr++;
+        tmp2 = *ptr++;
+        *lum++ = tmp1;
+        *lum++ = tmp2;
+        tmp1 = *ptr++;
+        tmp2 = *ptr++;
+        *lum++ = tmp1;
+        *lum++ = tmp2;
+        lum += width-4;
+    }
+
+    width >>= 1;
+    for (i = 8; i ; i--)
+    {
+        tmp1 = *ptr++;
+        tmp2 = *ptr++;
+        *cb++ = tmp1;
+        *cb++ = tmp2;
+        cb += width-2;
+    }
+
+    for (i = 8; i ; i--)
+    {
+        tmp1 = *ptr++;
+        tmp2 = *ptr++;
+        *cr++ = tmp1;
+        *cr++ = tmp2;
+        cr += width-2;
+    }
+
+}
+#endif
+#ifndef H264DEC_OMXDL
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdWriteOutputBlocks
+
+        Functional description:
+            Write one macroblock into the image. Prediction for the macroblock
+            and the residual are given separately and will be combined while
+            writing the data to the image
+
+        Inputs:
+            data        pointer to macroblock prediction data, 256 values for
+                        luma followed by 64 values for both chroma components
+            mbNum       number of the macroblock
+            residual    pointer to residual data, 16 16-element arrays for luma
+                        followed by 4 16-element arrays for both chroma
+                        components
+
+        Outputs:
+            image       pointer to the image where the data will be written
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+void h264bsdWriteOutputBlocks(image_t *image, u32 mbNum, u8 *data,
+        i32 residual[][16])
+{
+
+/* Variables */
+
+    u32 i;
+    u32 picWidth, picSize;
+    u8 *lum, *cb, *cr;
+    u8 *imageBlock;
+    u8 *tmp;
+    u32 row, col;
+    u32 block;
+    u32 x, y;
+    i32 *pRes;
+    i32 tmp1, tmp2, tmp3, tmp4;
+    const u8 *clp = h264bsdClip + 512;
+
+/* Code */
+
+    ASSERT(image);
+    ASSERT(data);
+    ASSERT(mbNum < image->width * image->height);
+    ASSERT(!((u32)data&0x3));
+
+    /* Image size in macroblocks */
+    picWidth = image->width;
+    picSize = picWidth * image->height;
+    row = mbNum / picWidth;
+    col = mbNum % picWidth;
+
+    /* Output macroblock position in output picture */
+    lum = (image->data + row * picWidth * 256 + col * 16);
+    cb = (image->data + picSize * 256 + row * picWidth * 64 + col * 8);
+    cr = (cb + picSize * 64);
+
+    picWidth *= 16;
+
+    for (block = 0; block < 16; block++)
+    {
+        x = h264bsdBlockX[block];
+        y = h264bsdBlockY[block];
+
+        pRes = residual[block];
+
+        ASSERT(pRes);
+
+        tmp = data + y*16 + x;
+        imageBlock = lum + y*picWidth + x;
+
+        ASSERT(!((u32)tmp&0x3));
+        ASSERT(!((u32)imageBlock&0x3));
+
+        if (IS_RESIDUAL_EMPTY(pRes))
+        {
+            /*lint -e826 */
+            i32 *in32 = (i32*)tmp;
+            i32 *out32 = (i32*)imageBlock;
+
+            /* Residual is zero => copy prediction block to output */
+            tmp1 = *in32;  in32 += 4;
+            tmp2 = *in32;  in32 += 4;
+            *out32 = tmp1; out32 += picWidth/4;
+            *out32 = tmp2; out32 += picWidth/4;
+            tmp1 = *in32;  in32 += 4;
+            tmp2 = *in32;
+            *out32 = tmp1; out32 += picWidth/4;
+            *out32 = tmp2;
+        }
+        else
+        {
+
+            RANGE_CHECK_ARRAY(pRes, -512, 511, 16);
+
+            /* Calculate image = prediction + residual
+             * Process four pixels in a loop */
+            for (i = 4; i; i--)
+            {
+                tmp1 = tmp[0];
+                tmp2 = *pRes++;
+                tmp3 = tmp[1];
+                tmp1 = clp[tmp1 + tmp2];
+                tmp4 = *pRes++;
+                imageBlock[0] = (u8)tmp1;
+                tmp3 = clp[tmp3 + tmp4];
+                tmp1 = tmp[2];
+                tmp2 = *pRes++;
+                imageBlock[1] = (u8)tmp3;
+                tmp1 = clp[tmp1 + tmp2];
+                tmp3 = tmp[3];
+                tmp4 = *pRes++;
+                imageBlock[2] = (u8)tmp1;
+                tmp3 = clp[tmp3 + tmp4];
+                tmp += 16;
+                imageBlock[3] = (u8)tmp3;
+                imageBlock += picWidth;
+            }
+        }
+
+    }
+
+    picWidth /= 2;
+
+    for (block = 16; block <= 23; block++)
+    {
+        x = h264bsdBlockX[block & 0x3];
+        y = h264bsdBlockY[block & 0x3];
+
+        pRes = residual[block];
+
+        ASSERT(pRes);
+
+        tmp = data + 256;
+        imageBlock = cb;
+
+        if (block >= 20)
+        {
+            imageBlock = cr;
+            tmp += 64;
+        }
+
+        tmp += y*8 + x;
+        imageBlock += y*picWidth + x;
+
+        ASSERT(!((u32)tmp&0x3));
+        ASSERT(!((u32)imageBlock&0x3));
+
+        if (IS_RESIDUAL_EMPTY(pRes))
+        {
+            /*lint -e826 */
+            i32 *in32 = (i32*)tmp;
+            i32 *out32 = (i32*)imageBlock;
+
+            /* Residual is zero => copy prediction block to output */
+            tmp1 = *in32;  in32 += 2;
+            tmp2 = *in32;  in32 += 2;
+            *out32 = tmp1; out32 += picWidth/4;
+            *out32 = tmp2; out32 += picWidth/4;
+            tmp1 = *in32;  in32 += 2;
+            tmp2 = *in32;
+            *out32 = tmp1; out32 += picWidth/4;
+            *out32 = tmp2;
+        }
+        else
+        {
+
+            RANGE_CHECK_ARRAY(pRes, -512, 511, 16);
+
+            for (i = 4; i; i--)
+            {
+                tmp1 = tmp[0];
+                tmp2 = *pRes++;
+                tmp3 = tmp[1];
+                tmp1 = clp[tmp1 + tmp2];
+                tmp4 = *pRes++;
+                imageBlock[0] = (u8)tmp1;
+                tmp3 = clp[tmp3 + tmp4];
+                tmp1 = tmp[2];
+                tmp2 = *pRes++;
+                imageBlock[1] = (u8)tmp3;
+                tmp1 = clp[tmp1 + tmp2];
+                tmp3 = tmp[3];
+                tmp4 = *pRes++;
+                imageBlock[2] = (u8)tmp1;
+                tmp3 = clp[tmp3 + tmp4];
+                tmp += 8;
+                imageBlock[3] = (u8)tmp3;
+                imageBlock += picWidth;
+            }
+        }
+    }
+
+}
+#endif /* H264DEC_OMXDL */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_image.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_image.h
new file mode 100755
index 0000000..ed7c18c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_image.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_IMAGE_H
+#define H264SWDEC_IMAGE_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+typedef struct
+{
+    u8 *data;
+    u32 width;
+    u32 height;
+    /* current MB's components */
+    u8 *luma;
+    u8 *cb;
+    u8 *cr;
+} image_t;
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+void h264bsdWriteMacroblock(image_t *image, u8 *data);
+
+#ifndef H264DEC_OMXDL
+void h264bsdWriteOutputBlocks(image_t *image, u32 mbNum, u8 *data,
+    i32 residual[][16]);
+#endif
+
+#endif /* #ifdef H264SWDEC_IMAGE_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.c
new file mode 100755
index 0000000..2a81c4a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.c
@@ -0,0 +1,1027 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdInterPrediction
+          MvPrediction16x16
+          MvPrediction16x8
+          MvPrediction8x16
+          MvPrediction8x8
+          MvPrediction
+          MedianFilter
+          GetInterNeighbour
+          GetPredictionMv
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_inter_prediction.h"
+#include "h264bsd_neighbour.h"
+#include "h264bsd_util.h"
+#include "h264bsd_reconstruct.h"
+#include "h264bsd_dpb.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+typedef struct
+{
+    u32 available;
+    u32 refIndex;
+    mv_t mv;
+} interNeighbour_t;
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 MvPrediction16x16(mbStorage_t *pMb, mbPred_t *mbPred,
+    dpbStorage_t *dpb);
+static u32 MvPrediction16x8(mbStorage_t *pMb, mbPred_t *mbPred,
+    dpbStorage_t *dpb);
+static u32 MvPrediction8x16(mbStorage_t *pMb, mbPred_t *mbPred,
+    dpbStorage_t *dpb);
+static u32 MvPrediction8x8(mbStorage_t *pMb, subMbPred_t *subMbPred,
+    dpbStorage_t *dpb);
+static u32 MvPrediction(mbStorage_t *pMb, subMbPred_t *subMbPred,
+    u32 mbPartIdx, u32 subMbPartIdx);
+static i32 MedianFilter(i32 a, i32 b, i32 c);
+
+static void GetInterNeighbour(u32 sliceId, mbStorage_t *nMb,
+    interNeighbour_t *n, u32 index);
+static void GetPredictionMv(mv_t *mv, interNeighbour_t *a, u32 refIndex);
+
+static const neighbour_t N_A_SUB_PART[4][4][4] = {
+    { { {MB_A,5}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_A,5}, {MB_A,7}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_A,5}, {MB_CURR,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_A,5}, {MB_CURR,0}, {MB_A,7}, {MB_CURR,2} } },
+
+    { { {MB_CURR,1}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,1}, {MB_CURR,3}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,1}, {MB_CURR,4}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,1}, {MB_CURR,4}, {MB_CURR,3}, {MB_CURR,6} } },
+
+    { { {MB_A,13}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_A,13}, {MB_A,15}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_A,13}, {MB_CURR,8}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_A,13}, {MB_CURR,8}, {MB_A,15}, {MB_CURR,10} } },
+
+    { { {MB_CURR,9}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,9}, {MB_CURR,11}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,9}, {MB_CURR,12}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,9}, {MB_CURR,12}, {MB_CURR,11}, {MB_CURR,14} } } };
+
+static const neighbour_t N_B_SUB_PART[4][4][4] = {
+    { { {MB_B,10}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_B,10}, {MB_CURR,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_B,10}, {MB_B,11}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_B,10}, {MB_B,11}, {MB_CURR,0}, {MB_CURR,1} } },
+
+    { { {MB_B,14}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_B,14}, {MB_CURR,4}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_B,14}, {MB_B,15}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_B,14}, {MB_B,15}, {MB_CURR,4}, {MB_CURR,5} } },
+
+    { { {MB_CURR,2}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,2}, {MB_CURR,8}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,2}, {MB_CURR,3}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,2}, {MB_CURR,3}, {MB_CURR,8}, {MB_CURR,9} } },
+
+    { { {MB_CURR,6}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,6}, {MB_CURR,12}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,6}, {MB_CURR,7}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,6}, {MB_CURR,7}, {MB_CURR,12}, {MB_CURR,13} } } };
+
+static const neighbour_t N_C_SUB_PART[4][4][4] = {
+    { { {MB_B,14}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_B,14}, {MB_NA,4}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_B,11}, {MB_B,14}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_B,11}, {MB_B,14}, {MB_CURR,1}, {MB_NA,4} } },
+
+    { { {MB_C,10}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_C,10}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_B,15}, {MB_C,10}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_B,15}, {MB_C,10}, {MB_CURR,5}, {MB_NA,0} } },
+
+    { { {MB_CURR,6}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,6}, {MB_NA,12}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,3}, {MB_CURR,6}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,3}, {MB_CURR,6}, {MB_CURR,9}, {MB_NA,12} } },
+
+    { { {MB_NA,2}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_NA,2}, {MB_NA,8}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,7}, {MB_NA,2}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,7}, {MB_NA,2}, {MB_CURR,13}, {MB_NA,8} } } };
+
+static const neighbour_t N_D_SUB_PART[4][4][4] = {
+    { { {MB_D,15}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_D,15}, {MB_A,5}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_D,15}, {MB_B,10}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_D,15}, {MB_B,10}, {MB_A,5}, {MB_CURR,0} } },
+
+    { { {MB_B,11}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_B,11}, {MB_CURR,1}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_B,11}, {MB_B,14}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_B,11}, {MB_B,14}, {MB_CURR,1}, {MB_CURR,4} } },
+
+    { { {MB_A,7}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_A,7}, {MB_A,13}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_A,7}, {MB_CURR,2}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_A,7}, {MB_CURR,2}, {MB_A,13}, {MB_CURR,8} } },
+
+    { { {MB_CURR,3}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,3}, {MB_CURR,9}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,3}, {MB_CURR,6}, {MB_NA,0}, {MB_NA,0} },
+      { {MB_CURR,3}, {MB_CURR,6}, {MB_CURR,9}, {MB_CURR,12} } } };
+
+
+#ifdef H264DEC_OMXDL
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdInterPrediction
+
+        Functional description:
+          Processes one inter macroblock. Performs motion vector prediction
+          and reconstructs prediction macroblock. Writes the final macroblock
+          (prediction + residual) into the output image (currImage)
+
+        Inputs:
+          pMb           pointer to macroblock specific information
+          pMbLayer      pointer to current macroblock data from stream
+          dpb           pointer to decoded picture buffer
+          mbNum         current macroblock number
+          currImage     pointer to output image
+          data          pointer where predicted macroblock will be stored
+
+        Outputs:
+          pMb           structure is updated with current macroblock
+          currImage     current macroblock is written into image
+          data          prediction is stored here
+
+        Returns:
+          HANTRO_OK     success
+          HANTRO_NOK    error in motion vector prediction
+
+------------------------------------------------------------------------------*/
+u32 h264bsdInterPrediction(mbStorage_t *pMb, macroblockLayer_t *pMbLayer,
+    dpbStorage_t *dpb, u32 mbNum, image_t *currImage, u8 *data)
+{
+
+/* Variables */
+
+    u32 i;
+    u32 x, y;
+    u32 colAndRow;
+    subMbPartMode_e subPartMode;
+    image_t refImage;
+    u8 fillBuff[32*21 + 15 + 32];
+    u8 *pFill;
+    u32 tmp;
+/* Code */
+
+    ASSERT(pMb);
+    ASSERT(h264bsdMbPartPredMode(pMb->mbType) == PRED_MODE_INTER);
+    ASSERT(pMbLayer);
+
+    /* 16-byte alignment */
+    pFill = ALIGN(fillBuff, 16);
+
+    /* set row bits 15:0 */
+    colAndRow = mbNum / currImage->width;
+    /*set col to bits 31:16 */
+    colAndRow += (mbNum - colAndRow * currImage->width) << 16;
+    colAndRow <<= 4;
+
+    refImage.width = currImage->width;
+    refImage.height = currImage->height;
+
+    switch (pMb->mbType)
+    {
+        case P_Skip:
+        case P_L0_16x16:
+            if (MvPrediction16x16(pMb, &pMbLayer->mbPred, dpb) != HANTRO_OK)
+                return(HANTRO_NOK);
+            refImage.data = pMb->refAddr[0];
+            tmp = (0<<24) + (0<<16) + (16<<8) + 16;
+            h264bsdPredictSamples(data, pMb->mv, &refImage,
+                                    colAndRow, tmp, pFill);
+            break;
+
+        case P_L0_L0_16x8:
+            if ( MvPrediction16x8(pMb, &pMbLayer->mbPred, dpb) != HANTRO_OK)
+                return(HANTRO_NOK);
+            refImage.data = pMb->refAddr[0];
+            tmp = (0<<24) + (0<<16) + (16<<8) + 8;
+            h264bsdPredictSamples(data, pMb->mv, &refImage,
+                                    colAndRow, tmp, pFill);
+
+            refImage.data = pMb->refAddr[2];
+            tmp = (0<<24) + (8<<16) + (16<<8) + 8;
+            h264bsdPredictSamples(data, pMb->mv+8, &refImage,
+                                    colAndRow, tmp, pFill);
+            break;
+
+        case P_L0_L0_8x16:
+            if ( MvPrediction8x16(pMb, &pMbLayer->mbPred, dpb) != HANTRO_OK)
+                return(HANTRO_NOK);
+            refImage.data = pMb->refAddr[0];
+            tmp = (0<<24) + (0<<16) + (8<<8) + 16;
+            h264bsdPredictSamples(data, pMb->mv, &refImage,
+                                    colAndRow, tmp, pFill);
+            refImage.data = pMb->refAddr[1];
+            tmp = (8<<24) + (0<<16) + (8<<8) + 16;
+            h264bsdPredictSamples(data, pMb->mv+4, &refImage,
+                                    colAndRow, tmp, pFill);
+            break;
+
+        default: /* P_8x8 and P_8x8ref0 */
+            if ( MvPrediction8x8(pMb, &pMbLayer->subMbPred, dpb) != HANTRO_OK)
+                return(HANTRO_NOK);
+            for (i = 0; i < 4; i++)
+            {
+                refImage.data = pMb->refAddr[i];
+                subPartMode =
+                    h264bsdSubMbPartMode(pMbLayer->subMbPred.subMbType[i]);
+                x = i & 0x1 ? 8 : 0;
+                y = i < 2 ? 0 : 8;
+                switch (subPartMode)
+                {
+                    case MB_SP_8x8:
+                        tmp = (x<<24) + (y<<16) + (8<<8) + 8;
+                        h264bsdPredictSamples(data, pMb->mv+4*i, &refImage,
+                                                    colAndRow, tmp, pFill);
+                        break;
+
+                    case MB_SP_8x4:
+                        tmp = (x<<24) + (y<<16) + (8<<8) + 4;
+                        h264bsdPredictSamples(data, pMb->mv+4*i, &refImage,
+                                                    colAndRow, tmp, pFill);
+                        tmp = (x<<24) + ((y+4)<<16) + (8<<8) + 4;
+                        h264bsdPredictSamples(data, pMb->mv+4*i+2, &refImage,
+                                                    colAndRow, tmp, pFill);
+                        break;
+
+                    case MB_SP_4x8:
+                        tmp = (x<<24) + (y<<16) + (4<<8) + 8;
+                        h264bsdPredictSamples(data, pMb->mv+4*i, &refImage,
+                                                    colAndRow, tmp, pFill);
+                        tmp = ((x+4)<<24) + (y<<16) + (4<<8) + 8;
+                        h264bsdPredictSamples(data, pMb->mv+4*i+1, &refImage,
+                                                    colAndRow, tmp, pFill);
+                        break;
+
+                    default:
+                        tmp = (x<<24) + (y<<16) + (4<<8) + 4;
+                        h264bsdPredictSamples(data, pMb->mv+4*i, &refImage,
+                                                    colAndRow, tmp, pFill);
+                        tmp = ((x+4)<<24) + (y<<16) + (4<<8) + 4;
+                        h264bsdPredictSamples(data, pMb->mv+4*i+1, &refImage,
+                                                    colAndRow, tmp, pFill);
+                        tmp = (x<<24) + ((y+4)<<16) + (4<<8) + 4;
+                        h264bsdPredictSamples(data, pMb->mv+4*i+2, &refImage,
+                                                    colAndRow, tmp, pFill);
+                        tmp = ((x+4)<<24) + ((y+4)<<16) + (4<<8) + 4;
+                        h264bsdPredictSamples(data, pMb->mv+4*i+3, &refImage,
+                                                    colAndRow, tmp, pFill);
+                        break;
+                }
+            }
+            break;
+    }
+
+    /* if decoded flag > 1 -> mb has already been successfully decoded and
+     * written to output -> do not write again */
+    if (pMb->decoded > 1)
+        return HANTRO_OK;
+
+    return(HANTRO_OK);
+}
+
+#else /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdInterPrediction
+
+        Functional description:
+          Processes one inter macroblock. Performs motion vector prediction
+          and reconstructs prediction macroblock. Writes the final macroblock
+          (prediction + residual) into the output image (currImage)
+
+        Inputs:
+          pMb           pointer to macroblock specific information
+          pMbLayer      pointer to current macroblock data from stream
+          dpb           pointer to decoded picture buffer
+          mbNum         current macroblock number
+          currImage     pointer to output image
+          data          pointer where predicted macroblock will be stored
+
+        Outputs:
+          pMb           structure is updated with current macroblock
+          currImage     current macroblock is written into image
+          data          prediction is stored here
+
+        Returns:
+          HANTRO_OK     success
+          HANTRO_NOK    error in motion vector prediction
+
+------------------------------------------------------------------------------*/
+u32 h264bsdInterPrediction(mbStorage_t *pMb, macroblockLayer_t *pMbLayer,
+    dpbStorage_t *dpb, u32 mbNum, image_t *currImage, u8 *data)
+{
+
+/* Variables */
+
+    u32 i;
+    u32 x, y;
+    u32 row, col;
+    subMbPartMode_e subPartMode;
+    image_t refImage;
+
+/* Code */
+
+    ASSERT(pMb);
+    ASSERT(h264bsdMbPartPredMode(pMb->mbType) == PRED_MODE_INTER);
+    ASSERT(pMbLayer);
+
+    row = mbNum / currImage->width;
+    col = mbNum - row * currImage->width;
+    row *= 16;
+    col *= 16;
+
+    refImage.width = currImage->width;
+    refImage.height = currImage->height;
+
+    switch (pMb->mbType)
+    {
+        case P_Skip:
+        case P_L0_16x16:
+            if (MvPrediction16x16(pMb, &pMbLayer->mbPred, dpb) != HANTRO_OK)
+                return(HANTRO_NOK);
+            refImage.data = pMb->refAddr[0];
+            h264bsdPredictSamples(data, pMb->mv, &refImage, col, row, 0, 0,
+                16, 16);
+            break;
+
+        case P_L0_L0_16x8:
+            if ( MvPrediction16x8(pMb, &pMbLayer->mbPred, dpb) != HANTRO_OK)
+                return(HANTRO_NOK);
+            refImage.data = pMb->refAddr[0];
+            h264bsdPredictSamples(data, pMb->mv, &refImage, col, row, 0, 0,
+                16, 8);
+            refImage.data = pMb->refAddr[2];
+            h264bsdPredictSamples(data, pMb->mv+8, &refImage, col, row, 0, 8,
+                16, 8);
+            break;
+
+        case P_L0_L0_8x16:
+            if ( MvPrediction8x16(pMb, &pMbLayer->mbPred, dpb) != HANTRO_OK)
+                return(HANTRO_NOK);
+            refImage.data = pMb->refAddr[0];
+            h264bsdPredictSamples(data, pMb->mv, &refImage, col, row, 0, 0,
+                8, 16);
+            refImage.data = pMb->refAddr[1];
+            h264bsdPredictSamples(data, pMb->mv+4, &refImage, col, row, 8, 0,
+                8, 16);
+            break;
+
+        default: /* P_8x8 and P_8x8ref0 */
+            if ( MvPrediction8x8(pMb, &pMbLayer->subMbPred, dpb) != HANTRO_OK)
+                return(HANTRO_NOK);
+            for (i = 0; i < 4; i++)
+            {
+                refImage.data = pMb->refAddr[i];
+                subPartMode =
+                    h264bsdSubMbPartMode(pMbLayer->subMbPred.subMbType[i]);
+                x = i & 0x1 ? 8 : 0;
+                y = i < 2 ? 0 : 8;
+                switch (subPartMode)
+                {
+                    case MB_SP_8x8:
+                        h264bsdPredictSamples(data, pMb->mv+4*i, &refImage,
+                            col, row, x, y, 8, 8);
+                        break;
+
+                    case MB_SP_8x4:
+                        h264bsdPredictSamples(data, pMb->mv+4*i, &refImage,
+                            col, row, x, y, 8, 4);
+                        h264bsdPredictSamples(data, pMb->mv+4*i+2, &refImage,
+                            col, row, x, y+4, 8, 4);
+                        break;
+
+                    case MB_SP_4x8:
+                        h264bsdPredictSamples(data, pMb->mv+4*i, &refImage,
+                            col, row, x, y, 4, 8);
+                        h264bsdPredictSamples(data, pMb->mv+4*i+1, &refImage,
+                            col, row, x+4, y, 4, 8);
+                        break;
+
+                    default:
+                        h264bsdPredictSamples(data, pMb->mv+4*i, &refImage,
+                            col, row, x, y, 4, 4);
+                        h264bsdPredictSamples(data, pMb->mv+4*i+1, &refImage,
+                            col, row, x+4, y, 4, 4);
+                        h264bsdPredictSamples(data, pMb->mv+4*i+2, &refImage,
+                            col, row, x, y+4, 4, 4);
+                        h264bsdPredictSamples(data, pMb->mv+4*i+3, &refImage,
+                            col, row, x+4, y+4, 4, 4);
+                        break;
+                }
+            }
+            break;
+    }
+
+    /* if decoded flag > 1 -> mb has already been successfully decoded and
+     * written to output -> do not write again */
+    if (pMb->decoded > 1)
+        return HANTRO_OK;
+
+    if (pMb->mbType != P_Skip)
+    {
+        h264bsdWriteOutputBlocks(currImage, mbNum, data,
+            pMbLayer->residual.level);
+    }
+    else
+    {
+        h264bsdWriteMacroblock(currImage, data);
+    }
+
+    return(HANTRO_OK);
+}
+#endif /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+
+    Function: MvPrediction16x16
+
+        Functional description:
+            Motion vector prediction for 16x16 partition mode
+
+------------------------------------------------------------------------------*/
+
+u32 MvPrediction16x16(mbStorage_t *pMb, mbPred_t *mbPred, dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+    mv_t mv;
+    mv_t mvPred;
+    interNeighbour_t a[3]; /* A, B, C */
+    u32 refIndex;
+    u8 *tmp;
+    u32 *tmpMv1, *tmpMv2;
+
+/* Code */
+
+    refIndex = mbPred->refIdxL0[0];
+
+    GetInterNeighbour(pMb->sliceId, pMb->mbA, a, 5);
+    GetInterNeighbour(pMb->sliceId, pMb->mbB, a+1, 10);
+    /*lint --e(740)  Unusual pointer cast (incompatible indirect types) */
+    tmpMv1 = (u32*)(&a[0].mv); /* we test just that both MVs are zero */
+    /*lint --e(740) */
+    tmpMv2 = (u32*)(&a[1].mv); /* i.e. a[0].mv.hor == 0 && a[0].mv.ver == 0 */
+    if (pMb->mbType == P_Skip &&
+        (!a[0].available || !a[1].available ||
+         ( a[0].refIndex == 0 && ((u32)(*tmpMv1) == 0) ) ||
+         ( a[1].refIndex == 0 && ((u32)(*tmpMv2) == 0) )))
+    {
+            mv.hor = mv.ver = 0;
+    }
+    else
+    {
+        mv = mbPred->mvdL0[0];
+        GetInterNeighbour(pMb->sliceId, pMb->mbC, a+2, 10);
+        if (!a[2].available)
+        {
+            GetInterNeighbour(pMb->sliceId, pMb->mbD, a+2, 15);
+        }
+
+        GetPredictionMv(&mvPred, a, refIndex);
+
+        mv.hor += mvPred.hor;
+        mv.ver += mvPred.ver;
+
+        /* horizontal motion vector range [-2048, 2047.75] */
+        if ((u32)(i32)(mv.hor+8192) >= (16384))
+            return(HANTRO_NOK);
+
+        /* vertical motion vector range [-512, 511.75]
+         * (smaller for low levels) */
+        if ((u32)(i32)(mv.ver+2048) >= (4096))
+            return(HANTRO_NOK);
+    }
+
+    tmp = h264bsdGetRefPicData(dpb, refIndex);
+    if (tmp == NULL)
+        return(HANTRO_NOK);
+
+    pMb->mv[0] = pMb->mv[1] = pMb->mv[2] = pMb->mv[3] =
+    pMb->mv[4] = pMb->mv[5] = pMb->mv[6] = pMb->mv[7] =
+    pMb->mv[8] = pMb->mv[9] = pMb->mv[10] = pMb->mv[11] =
+    pMb->mv[12] = pMb->mv[13] = pMb->mv[14] = pMb->mv[15] = mv;
+
+    pMb->refPic[0] = refIndex;
+    pMb->refPic[1] = refIndex;
+    pMb->refPic[2] = refIndex;
+    pMb->refPic[3] = refIndex;
+    pMb->refAddr[0] = tmp;
+    pMb->refAddr[1] = tmp;
+    pMb->refAddr[2] = tmp;
+    pMb->refAddr[3] = tmp;
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: MvPrediction16x8
+
+        Functional description:
+            Motion vector prediction for 16x8 partition mode
+
+------------------------------------------------------------------------------*/
+
+u32 MvPrediction16x8(mbStorage_t *pMb, mbPred_t *mbPred, dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+    mv_t mv;
+    mv_t mvPred;
+    interNeighbour_t a[3]; /* A, B, C */
+    u32 refIndex;
+    u8 *tmp;
+
+/* Code */
+
+    mv = mbPred->mvdL0[0];
+    refIndex = mbPred->refIdxL0[0];
+
+    GetInterNeighbour(pMb->sliceId, pMb->mbB, a+1, 10);
+
+    if (a[1].refIndex == refIndex)
+        mvPred = a[1].mv;
+    else
+    {
+        GetInterNeighbour(pMb->sliceId, pMb->mbA, a, 5);
+        GetInterNeighbour(pMb->sliceId, pMb->mbC, a+2, 10);
+        if (!a[2].available)
+        {
+            GetInterNeighbour(pMb->sliceId, pMb->mbD, a+2, 15);
+        }
+
+        GetPredictionMv(&mvPred, a, refIndex);
+
+    }
+    mv.hor += mvPred.hor;
+    mv.ver += mvPred.ver;
+
+    /* horizontal motion vector range [-2048, 2047.75] */
+    if ((u32)(i32)(mv.hor+8192) >= (16384))
+        return(HANTRO_NOK);
+
+    /* vertical motion vector range [-512, 511.75] (smaller for low levels) */
+    if ((u32)(i32)(mv.ver+2048) >= (4096))
+        return(HANTRO_NOK);
+
+    tmp = h264bsdGetRefPicData(dpb, refIndex);
+    if (tmp == NULL)
+        return(HANTRO_NOK);
+
+    pMb->mv[0] = pMb->mv[1] = pMb->mv[2] = pMb->mv[3] =
+    pMb->mv[4] = pMb->mv[5] = pMb->mv[6] = pMb->mv[7] = mv;
+    pMb->refPic[0] = refIndex;
+    pMb->refPic[1] = refIndex;
+    pMb->refAddr[0] = tmp;
+    pMb->refAddr[1] = tmp;
+
+    mv = mbPred->mvdL0[1];
+    refIndex = mbPred->refIdxL0[1];
+
+    GetInterNeighbour(pMb->sliceId, pMb->mbA, a, 13);
+    if (a[0].refIndex == refIndex)
+        mvPred = a[0].mv;
+    else
+    {
+        a[1].available = HANTRO_TRUE;
+        a[1].refIndex = pMb->refPic[0];
+        a[1].mv = pMb->mv[0];
+
+        /* c is not available */
+        GetInterNeighbour(pMb->sliceId, pMb->mbA, a+2, 7);
+
+        GetPredictionMv(&mvPred, a, refIndex);
+
+    }
+    mv.hor += mvPred.hor;
+    mv.ver += mvPred.ver;
+
+    /* horizontal motion vector range [-2048, 2047.75] */
+    if ((u32)(i32)(mv.hor+8192) >= (16384))
+        return(HANTRO_NOK);
+
+    /* vertical motion vector range [-512, 511.75] (smaller for low levels) */
+    if ((u32)(i32)(mv.ver+2048) >= (4096))
+        return(HANTRO_NOK);
+
+    tmp = h264bsdGetRefPicData(dpb, refIndex);
+    if (tmp == NULL)
+        return(HANTRO_NOK);
+
+    pMb->mv[8] = pMb->mv[9] = pMb->mv[10] = pMb->mv[11] =
+    pMb->mv[12] = pMb->mv[13] = pMb->mv[14] = pMb->mv[15] = mv;
+    pMb->refPic[2] = refIndex;
+    pMb->refPic[3] = refIndex;
+    pMb->refAddr[2] = tmp;
+    pMb->refAddr[3] = tmp;
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: MvPrediction8x16
+
+        Functional description:
+            Motion vector prediction for 8x16 partition mode
+
+------------------------------------------------------------------------------*/
+
+u32 MvPrediction8x16(mbStorage_t *pMb, mbPred_t *mbPred, dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+    mv_t mv;
+    mv_t mvPred;
+    interNeighbour_t a[3]; /* A, B, C */
+    u32 refIndex;
+    u8 *tmp;
+
+/* Code */
+
+    mv = mbPred->mvdL0[0];
+    refIndex = mbPred->refIdxL0[0];
+
+    GetInterNeighbour(pMb->sliceId, pMb->mbA, a, 5);
+
+    if (a[0].refIndex == refIndex)
+        mvPred = a[0].mv;
+    else
+    {
+        GetInterNeighbour(pMb->sliceId, pMb->mbB, a+1, 10);
+        GetInterNeighbour(pMb->sliceId, pMb->mbB, a+2, 14);
+        if (!a[2].available)
+        {
+            GetInterNeighbour(pMb->sliceId, pMb->mbD, a+2, 15);
+        }
+
+        GetPredictionMv(&mvPred, a, refIndex);
+
+    }
+    mv.hor += mvPred.hor;
+    mv.ver += mvPred.ver;
+
+    /* horizontal motion vector range [-2048, 2047.75] */
+    if ((u32)(i32)(mv.hor+8192) >= (16384))
+        return(HANTRO_NOK);
+
+    /* vertical motion vector range [-512, 511.75] (smaller for low levels) */
+    if ((u32)(i32)(mv.ver+2048) >= (4096))
+        return(HANTRO_NOK);
+
+    tmp = h264bsdGetRefPicData(dpb, refIndex);
+    if (tmp == NULL)
+        return(HANTRO_NOK);
+
+    pMb->mv[0] = pMb->mv[1] = pMb->mv[2] = pMb->mv[3] =
+    pMb->mv[8] = pMb->mv[9] = pMb->mv[10] = pMb->mv[11] = mv;
+    pMb->refPic[0] = refIndex;
+    pMb->refPic[2] = refIndex;
+    pMb->refAddr[0] = tmp;
+    pMb->refAddr[2] = tmp;
+
+    mv = mbPred->mvdL0[1];
+    refIndex = mbPred->refIdxL0[1];
+
+    GetInterNeighbour(pMb->sliceId, pMb->mbC, a+2, 10);
+    if (!a[2].available)
+    {
+        GetInterNeighbour(pMb->sliceId, pMb->mbB, a+2, 11);
+    }
+    if (a[2].refIndex == refIndex)
+        mvPred = a[2].mv;
+    else
+    {
+        a[0].available = HANTRO_TRUE;
+        a[0].refIndex = pMb->refPic[0];
+        a[0].mv = pMb->mv[0];
+
+        GetInterNeighbour(pMb->sliceId, pMb->mbB, a+1, 14);
+
+        GetPredictionMv(&mvPred, a, refIndex);
+
+    }
+    mv.hor += mvPred.hor;
+    mv.ver += mvPred.ver;
+
+    /* horizontal motion vector range [-2048, 2047.75] */
+    if ((u32)(i32)(mv.hor+8192) >= (16384))
+        return(HANTRO_NOK);
+
+    /* vertical motion vector range [-512, 511.75] (smaller for low levels) */
+    if ((u32)(i32)(mv.ver+2048) >= (4096))
+        return(HANTRO_NOK);
+
+    tmp = h264bsdGetRefPicData(dpb, refIndex);
+    if (tmp == NULL)
+        return(HANTRO_NOK);
+
+    pMb->mv[4] = pMb->mv[5] = pMb->mv[6] = pMb->mv[7] =
+    pMb->mv[12] = pMb->mv[13] = pMb->mv[14] = pMb->mv[15] = mv;
+    pMb->refPic[1] = refIndex;
+    pMb->refPic[3] = refIndex;
+    pMb->refAddr[1] = tmp;
+    pMb->refAddr[3] = tmp;
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: MvPrediction8x8
+
+        Functional description:
+            Motion vector prediction for 8x8 partition mode
+
+------------------------------------------------------------------------------*/
+
+u32 MvPrediction8x8(mbStorage_t *pMb, subMbPred_t *subMbPred, dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+    u32 i, j;
+    u32 numSubMbPart;
+
+/* Code */
+
+    for (i = 0; i < 4; i++)
+    {
+        numSubMbPart = h264bsdNumSubMbPart(subMbPred->subMbType[i]);
+        pMb->refPic[i] = subMbPred->refIdxL0[i];
+        pMb->refAddr[i] = h264bsdGetRefPicData(dpb, subMbPred->refIdxL0[i]);
+        if (pMb->refAddr[i] == NULL)
+            return(HANTRO_NOK);
+        for (j = 0; j < numSubMbPart; j++)
+        {
+            if (MvPrediction(pMb, subMbPred, i, j) != HANTRO_OK)
+                return(HANTRO_NOK);
+        }
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: MvPrediction
+
+        Functional description:
+            Perform motion vector prediction for sub-partition
+
+------------------------------------------------------------------------------*/
+
+u32 MvPrediction(mbStorage_t *pMb, subMbPred_t *subMbPred, u32 mbPartIdx,
+    u32 subMbPartIdx)
+{
+
+/* Variables */
+
+    mv_t mv, mvPred;
+    u32 refIndex;
+    subMbPartMode_e subMbPartMode;
+    const neighbour_t *n;
+    mbStorage_t *nMb;
+    interNeighbour_t a[3]; /* A, B, C */
+
+/* Code */
+
+    mv = subMbPred->mvdL0[mbPartIdx][subMbPartIdx];
+    subMbPartMode = h264bsdSubMbPartMode(subMbPred->subMbType[mbPartIdx]);
+    refIndex = subMbPred->refIdxL0[mbPartIdx];
+
+    n = N_A_SUB_PART[mbPartIdx][subMbPartMode]+subMbPartIdx;
+    nMb = h264bsdGetNeighbourMb(pMb, n->mb);
+    GetInterNeighbour(pMb->sliceId, nMb, a, n->index);
+
+    n = N_B_SUB_PART[mbPartIdx][subMbPartMode]+subMbPartIdx;
+    nMb = h264bsdGetNeighbourMb(pMb, n->mb);
+    GetInterNeighbour(pMb->sliceId, nMb, a+1, n->index);
+
+    n = N_C_SUB_PART[mbPartIdx][subMbPartMode]+subMbPartIdx;
+    nMb = h264bsdGetNeighbourMb(pMb, n->mb);
+    GetInterNeighbour(pMb->sliceId, nMb, a+2, n->index);
+
+    if (!a[2].available)
+    {
+        n = N_D_SUB_PART[mbPartIdx][subMbPartMode]+subMbPartIdx;
+        nMb = h264bsdGetNeighbourMb(pMb, n->mb);
+        GetInterNeighbour(pMb->sliceId, nMb, a+2, n->index);
+    }
+
+    GetPredictionMv(&mvPred, a, refIndex);
+
+    mv.hor += mvPred.hor;
+    mv.ver += mvPred.ver;
+
+    /* horizontal motion vector range [-2048, 2047.75] */
+    if (((u32)(i32)(mv.hor+8192) >= (16384)))
+        return(HANTRO_NOK);
+
+    /* vertical motion vector range [-512, 511.75] (smaller for low levels) */
+    if (((u32)(i32)(mv.ver+2048) >= (4096)))
+        return(HANTRO_NOK);
+
+    switch (subMbPartMode)
+    {
+        case MB_SP_8x8:
+            pMb->mv[4*mbPartIdx] = mv;
+            pMb->mv[4*mbPartIdx + 1] = mv;
+            pMb->mv[4*mbPartIdx + 2] = mv;
+            pMb->mv[4*mbPartIdx + 3] = mv;
+            break;
+
+        case MB_SP_8x4:
+            pMb->mv[4*mbPartIdx + 2*subMbPartIdx] = mv;
+            pMb->mv[4*mbPartIdx + 2*subMbPartIdx + 1] = mv;
+            break;
+
+        case MB_SP_4x8:
+            pMb->mv[4*mbPartIdx + subMbPartIdx] = mv;
+            pMb->mv[4*mbPartIdx + subMbPartIdx + 2] = mv;
+            break;
+
+        case MB_SP_4x4:
+            pMb->mv[4*mbPartIdx + subMbPartIdx] = mv;
+            break;
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: MedianFilter
+
+        Functional description:
+            Median filtering for motion vector prediction
+
+------------------------------------------------------------------------------*/
+
+i32 MedianFilter(i32 a, i32 b, i32 c)
+{
+
+/* Variables */
+
+    i32 max,min,med;
+
+/* Code */
+
+    max = min = med = a;
+    if (b > max)
+    {
+        max = b;
+    }
+    else if (b < min)
+    {
+        min = b;
+    }
+    if (c > max)
+    {
+        med = max;
+    }
+    else if (c < min)
+    {
+        med = min;
+    }
+    else
+    {
+        med = c;
+    }
+
+    return(med);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: GetInterNeighbour
+
+        Functional description:
+            Get availability, reference index and motion vector of a neighbour
+
+------------------------------------------------------------------------------*/
+
+void GetInterNeighbour(u32 sliceId, mbStorage_t *nMb,
+    interNeighbour_t *n, u32 index)
+{
+
+    n->available = HANTRO_FALSE;
+    n->refIndex = 0xFFFFFFFF;
+    n->mv.hor = n->mv.ver = 0;
+
+    if (nMb && (sliceId == nMb->sliceId))
+    {
+        u32 tmp;
+        mv_t tmpMv;
+
+        tmp = nMb->mbType;
+        n->available = HANTRO_TRUE;
+        /* MbPartPredMode "inlined" */
+        if (tmp <= P_8x8ref0)
+        {
+            tmpMv = nMb->mv[index];
+            tmp = nMb->refPic[index>>2];
+            n->refIndex = tmp;
+            n->mv = tmpMv;
+        }
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: GetPredictionMv
+
+        Functional description:
+            Compute motion vector predictor based on neighbours A, B and C
+
+------------------------------------------------------------------------------*/
+
+void GetPredictionMv(mv_t *mv, interNeighbour_t *a, u32 refIndex)
+{
+
+    if ( a[1].available || a[2].available || !a[0].available)
+    {
+        u32 isA, isB, isC;
+        isA = (a[0].refIndex == refIndex) ? HANTRO_TRUE : HANTRO_FALSE;
+        isB = (a[1].refIndex == refIndex) ? HANTRO_TRUE : HANTRO_FALSE;
+        isC = (a[2].refIndex == refIndex) ? HANTRO_TRUE : HANTRO_FALSE;
+
+        if (((u32)isA+(u32)isB+(u32)isC) != 1)
+        {
+            mv->hor = (i16)MedianFilter(a[0].mv.hor, a[1].mv.hor, a[2].mv.hor);
+            mv->ver = (i16)MedianFilter(a[0].mv.ver, a[1].mv.ver, a[2].mv.ver);
+        }
+        else if (isA)
+            *mv = a[0].mv;
+        else if (isB)
+            *mv = a[1].mv;
+        else
+            *mv = a[2].mv;
+    }
+    else
+    {
+        *mv = a[0].mv;
+    }
+
+}
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.h
new file mode 100755
index 0000000..94dee25
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_INTER_PREDICTION_H
+#define H264SWDEC_INTER_PREDICTION_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_image.h"
+#include "h264bsd_macroblock_layer.h"
+#include "h264bsd_dpb.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdInterPrediction(mbStorage_t *pMb, macroblockLayer_t *pMbLayer,
+    dpbStorage_t *dpb, u32 mbNum, image_t *image, u8 *data);
+
+#endif /* #ifdef H264SWDEC_INTER_PREDICTION_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.c
new file mode 100755
index 0000000..15eabfb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.c
@@ -0,0 +1,1937 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdIntraPrediction
+          h264bsdGetNeighbourPels
+          h264bsdIntra16x16Prediction
+          h264bsdIntra4x4Prediction
+          h264bsdIntraChromaPrediction
+          h264bsdAddResidual
+          Intra16x16VerticalPrediction
+          Intra16x16HorizontalPrediction
+          Intra16x16DcPrediction
+          Intra16x16PlanePrediction
+          IntraChromaDcPrediction
+          IntraChromaHorizontalPrediction
+          IntraChromaVerticalPrediction
+          IntraChromaPlanePrediction
+          Get4x4NeighbourPels
+          Write4x4To16x16
+          Intra4x4VerticalPrediction
+          Intra4x4HorizontalPrediction
+          Intra4x4DcPrediction
+          Intra4x4DiagonalDownLeftPrediction
+          Intra4x4DiagonalDownRightPrediction
+          Intra4x4VerticalRightPrediction
+          Intra4x4HorizontalDownPrediction
+          Intra4x4VerticalLeftPrediction
+          Intra4x4HorizontalUpPrediction
+          DetermineIntra4x4PredMode
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_intra_prediction.h"
+#include "h264bsd_util.h"
+#include "h264bsd_macroblock_layer.h"
+#include "h264bsd_neighbour.h"
+#include "h264bsd_image.h"
+
+#ifdef H264DEC_OMXDL
+#include "omxtypes.h"
+#include "omxVC.h"
+#endif /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/* Switch off the following Lint messages for this file:
+ * Info 702: Shift right of signed quantity (int)
+ */
+/*lint -e702 */
+
+
+/* x- and y-coordinates for each block */
+const u32 h264bsdBlockX[16] =
+    { 0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12 };
+const u32 h264bsdBlockY[16] =
+    { 0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12 };
+
+const u8 h264bsdClip[1280] =
+{
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
+    16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
+    32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
+    48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
+    64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
+    80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
+    96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
+    112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
+    128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+    144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+    160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+    176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+    192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+    208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
+    224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
+    240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+    255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
+};
+
+#ifndef H264DEC_OMXDL
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+static void Get4x4NeighbourPels(u8 *a, u8 *l, u8 *data, u8 *above, u8 *left,
+    u32 blockNum);
+static void Intra16x16VerticalPrediction(u8 *data, u8 *above);
+static void Intra16x16HorizontalPrediction(u8 *data, u8 *left);
+static void Intra16x16DcPrediction(u8 *data, u8 *above, u8 *left,
+    u32 A, u32 B);
+static void Intra16x16PlanePrediction(u8 *data, u8 *above, u8 *left);
+static void IntraChromaDcPrediction(u8 *data, u8 *above, u8 *left,
+    u32 A, u32 B);
+static void IntraChromaHorizontalPrediction(u8 *data, u8 *left);
+static void IntraChromaVerticalPrediction(u8 *data, u8 *above);
+static void IntraChromaPlanePrediction(u8 *data, u8 *above, u8 *left);
+
+static void Intra4x4VerticalPrediction(u8 *data, u8 *above);
+static void Intra4x4HorizontalPrediction(u8 *data, u8 *left);
+static void Intra4x4DcPrediction(u8 *data, u8 *above, u8 *left, u32 A, u32 B);
+static void Intra4x4DiagonalDownLeftPrediction(u8 *data, u8 *above);
+static void Intra4x4DiagonalDownRightPrediction(u8 *data, u8 *above, u8 *left);
+static void Intra4x4VerticalRightPrediction(u8 *data, u8 *above, u8 *left);
+static void Intra4x4HorizontalDownPrediction(u8 *data, u8 *above, u8 *left);
+static void Intra4x4VerticalLeftPrediction(u8 *data, u8 *above);
+static void Intra4x4HorizontalUpPrediction(u8 *data, u8 *left);
+void h264bsdAddResidual(u8 *data, i32 *residual, u32 blockNum);
+
+static void Write4x4To16x16(u8 *data, u8 *data4x4, u32 blockNum);
+#endif /* H264DEC_OMXDL */
+
+static u32 DetermineIntra4x4PredMode(macroblockLayer_t *pMbLayer,
+    u32 available, neighbour_t *nA, neighbour_t *nB, u32 index,
+    mbStorage_t *nMbA, mbStorage_t *nMbB);
+
+
+#ifdef H264DEC_OMXDL
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdIntra16x16Prediction
+
+        Functional description:
+          Perform intra 16x16 prediction mode for luma pixels and add
+          residual into prediction. The resulting luma pixels are
+          stored in macroblock array 'data'.
+
+------------------------------------------------------------------------------*/
+u32 h264bsdIntra16x16Prediction(mbStorage_t *pMb, u8 *data, u8 *ptr,
+                                u32 width, u32 constrainedIntraPred)
+{
+
+/* Variables */
+
+    u32 availableA, availableB, availableD;
+    OMXResult omxRes;
+
+/* Code */
+    ASSERT(pMb);
+    ASSERT(data);
+    ASSERT(ptr);
+    ASSERT(h264bsdPredModeIntra16x16(pMb->mbType) < 4);
+
+    availableA = h264bsdIsNeighbourAvailable(pMb, pMb->mbA);
+    if (availableA && constrainedIntraPred &&
+       (h264bsdMbPartPredMode(pMb->mbA->mbType) == PRED_MODE_INTER))
+        availableA = HANTRO_FALSE;
+    availableB = h264bsdIsNeighbourAvailable(pMb, pMb->mbB);
+    if (availableB && constrainedIntraPred &&
+       (h264bsdMbPartPredMode(pMb->mbB->mbType) == PRED_MODE_INTER))
+        availableB = HANTRO_FALSE;
+    availableD = h264bsdIsNeighbourAvailable(pMb, pMb->mbD);
+    if (availableD && constrainedIntraPred &&
+       (h264bsdMbPartPredMode(pMb->mbD->mbType) == PRED_MODE_INTER))
+        availableD = HANTRO_FALSE;
+
+    omxRes = omxVCM4P10_PredictIntra_16x16( (ptr-1),
+                                    (ptr - width),
+                                    (ptr - width-1),
+                                    data,
+                                    (i32)width,
+                                    16,
+                                    (OMXVCM4P10Intra16x16PredMode)
+                                    h264bsdPredModeIntra16x16(pMb->mbType),
+                                    (i32)(availableB + (availableA<<1) +
+                                     (availableD<<5)) );
+    if (omxRes != OMX_Sts_NoErr)
+        return HANTRO_NOK;
+    else
+        return(HANTRO_OK);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdIntra4x4Prediction
+
+        Functional description:
+          Perform intra 4x4 prediction for luma pixels and add residual
+          into prediction. The resulting luma pixels are stored in
+          macroblock array 'data'. The intra 4x4 prediction mode for each
+          block is stored in 'pMb' structure.
+
+------------------------------------------------------------------------------*/
+u32 h264bsdIntra4x4Prediction(mbStorage_t *pMb, u8 *data,
+                              macroblockLayer_t *mbLayer,
+                              u8 *ptr, u32 width,
+                              u32 constrainedIntraPred, u32 block)
+{
+
+/* Variables */
+    u32 mode;
+    neighbour_t neighbour, neighbourB;
+    mbStorage_t *nMb, *nMb2;
+    u32 availableA, availableB, availableC, availableD;
+
+    OMXResult omxRes;
+    u32 x, y;
+    u8 *l, *a, *al;
+/* Code */
+    ASSERT(pMb);
+    ASSERT(data);
+    ASSERT(mbLayer);
+    ASSERT(ptr);
+    ASSERT(pMb->intra4x4PredMode[block] < 9);
+
+    neighbour = *h264bsdNeighbour4x4BlockA(block);
+    nMb = h264bsdGetNeighbourMb(pMb, neighbour.mb);
+    availableA = h264bsdIsNeighbourAvailable(pMb, nMb);
+    if (availableA && constrainedIntraPred &&
+       ( h264bsdMbPartPredMode(nMb->mbType) == PRED_MODE_INTER) )
+    {
+        availableA = HANTRO_FALSE;
+    }
+
+    neighbourB = *h264bsdNeighbour4x4BlockB(block);
+    nMb2 = h264bsdGetNeighbourMb(pMb, neighbourB.mb);
+    availableB = h264bsdIsNeighbourAvailable(pMb, nMb2);
+    if (availableB && constrainedIntraPred &&
+       ( h264bsdMbPartPredMode(nMb2->mbType) == PRED_MODE_INTER) )
+    {
+        availableB = HANTRO_FALSE;
+    }
+
+    mode = DetermineIntra4x4PredMode(mbLayer,
+        (u32)(availableA && availableB),
+        &neighbour, &neighbourB, block, nMb, nMb2);
+    pMb->intra4x4PredMode[block] = (u8)mode;
+
+    neighbour = *h264bsdNeighbour4x4BlockC(block);
+    nMb = h264bsdGetNeighbourMb(pMb, neighbour.mb);
+    availableC = h264bsdIsNeighbourAvailable(pMb, nMb);
+    if (availableC && constrainedIntraPred &&
+       ( h264bsdMbPartPredMode(nMb->mbType) == PRED_MODE_INTER) )
+    {
+        availableC = HANTRO_FALSE;
+    }
+
+    neighbour = *h264bsdNeighbour4x4BlockD(block);
+    nMb = h264bsdGetNeighbourMb(pMb, neighbour.mb);
+    availableD = h264bsdIsNeighbourAvailable(pMb, nMb);
+    if (availableD && constrainedIntraPred &&
+       ( h264bsdMbPartPredMode(nMb->mbType) == PRED_MODE_INTER) )
+    {
+        availableD = HANTRO_FALSE;
+    }
+
+    x = h264bsdBlockX[block];
+    y = h264bsdBlockY[block];
+
+    if (y == 0)
+        a = ptr - width + x;
+    else
+        a = data-16;
+
+    if (x == 0)
+        l = ptr + y * width -1;
+    else
+    {
+        l = data-1;
+        width = 16;
+    }
+
+    if (x == 0)
+        al = l-width;
+    else
+        al = a-1;
+
+    omxRes = omxVCM4P10_PredictIntra_4x4( l,
+                                          a,
+                                          al,
+                                          data,
+                                          (i32)width,
+                                          16,
+                                          (OMXVCM4P10Intra4x4PredMode)mode,
+                                          (i32)(availableB +
+                                          (availableA<<1) +
+                                          (availableD<<5) +
+                                          (availableC<<6)) );
+    if (omxRes != OMX_Sts_NoErr)
+        return HANTRO_NOK;
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdIntraChromaPrediction
+
+        Functional description:
+          Perform intra prediction for chroma pixels and add residual
+          into prediction. The resulting chroma pixels are stored in 'data'.
+
+------------------------------------------------------------------------------*/
+u32 h264bsdIntraChromaPrediction(mbStorage_t *pMb, u8 *data, image_t *image,
+                                        u32 predMode, u32 constrainedIntraPred)
+{
+
+/* Variables */
+
+    u32 availableA, availableB, availableD;
+    OMXResult omxRes;
+    u8 *ptr;
+    u32 width;
+
+/* Code */
+    ASSERT(pMb);
+    ASSERT(data);
+    ASSERT(image);
+    ASSERT(predMode < 4);
+
+    availableA = h264bsdIsNeighbourAvailable(pMb, pMb->mbA);
+    if (availableA && constrainedIntraPred &&
+       (h264bsdMbPartPredMode(pMb->mbA->mbType) == PRED_MODE_INTER))
+        availableA = HANTRO_FALSE;
+    availableB = h264bsdIsNeighbourAvailable(pMb, pMb->mbB);
+    if (availableB && constrainedIntraPred &&
+       (h264bsdMbPartPredMode(pMb->mbB->mbType) == PRED_MODE_INTER))
+        availableB = HANTRO_FALSE;
+    availableD = h264bsdIsNeighbourAvailable(pMb, pMb->mbD);
+    if (availableD && constrainedIntraPred &&
+       (h264bsdMbPartPredMode(pMb->mbD->mbType) == PRED_MODE_INTER))
+        availableD = HANTRO_FALSE;
+
+    ptr = image->cb;
+    width = image->width*8;
+
+    omxRes = omxVCM4P10_PredictIntraChroma_8x8( (ptr-1),
+                                                (ptr - width),
+                                                (ptr - width -1),
+                                                data,
+                                                (i32)width,
+                                                8,
+                                                (OMXVCM4P10IntraChromaPredMode)
+                                                predMode,
+                                                (i32)(availableB +
+                                                 (availableA<<1) +
+                                                 (availableD<<5)) );
+    if (omxRes != OMX_Sts_NoErr)
+        return HANTRO_NOK;
+
+    /* advance pointers */
+    data += 64;
+    ptr = image->cr;
+
+    omxRes = omxVCM4P10_PredictIntraChroma_8x8( (ptr-1),
+                                                (ptr - width),
+                                                (ptr - width -1),
+                                                data,
+                                                (i32)width,
+                                                8,
+                                                (OMXVCM4P10IntraChromaPredMode)
+                                                predMode,
+                                                (i32)(availableB +
+                                                 (availableA<<1) +
+                                                 (availableD<<5)) );
+    if (omxRes != OMX_Sts_NoErr)
+        return HANTRO_NOK;
+
+    return(HANTRO_OK);
+
+}
+
+
+#else /* H264DEC_OMXDL */
+
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdIntraPrediction
+
+        Functional description:
+          Processes one intra macroblock. Performs intra prediction using
+          specified prediction mode. Writes the final macroblock
+          (prediction + residual) into the output image (image)
+
+        Inputs:
+          pMb           pointer to macroblock specific information
+          mbLayer       pointer to current macroblock data from stream
+          image         pointer to output image
+          mbNum         current macroblock number
+          constrainedIntraPred  flag specifying if neighbouring inter
+                                macroblocks are used in intra prediction
+          data          pointer where output macroblock will be stored
+
+        Outputs:
+          pMb           structure is updated with current macroblock
+          image         current macroblock is written into image
+          data          current macroblock is stored here
+
+        Returns:
+          HANTRO_OK     success
+          HANTRO_NOK    error in intra prediction
+
+------------------------------------------------------------------------------*/
+u32 h264bsdIntraPrediction(mbStorage_t *pMb, macroblockLayer_t *mbLayer,
+    image_t *image, u32 mbNum, u32 constrainedIntraPred, u8 *data)
+{
+
+/* Variables */
+
+    /* pelAbove and pelLeft contain samples above and left to the current
+     * macroblock. Above array contains also sample above-left to the current
+     * mb as well as 4 samples above-right to the current mb (latter only for
+     * luma) */
+    /* lumD + lumB + lumC + cbD + cbB + crD + crB */
+    u8 pelAbove[1 + 16 + 4 + 1 + 8 + 1 + 8];
+    /* lumA + cbA + crA */
+    u8 pelLeft[16 + 8 + 8];
+    u32 tmp;
+
+/* Code */
+
+    ASSERT(pMb);
+    ASSERT(image);
+    ASSERT(mbNum < image->width * image->height);
+    ASSERT(h264bsdMbPartPredMode(pMb->mbType) != PRED_MODE_INTER);
+
+    h264bsdGetNeighbourPels(image, pelAbove, pelLeft, mbNum);
+
+    if (h264bsdMbPartPredMode(pMb->mbType) == PRED_MODE_INTRA16x16)
+    {
+        tmp = h264bsdIntra16x16Prediction(pMb, data, mbLayer->residual.level,
+            pelAbove, pelLeft, constrainedIntraPred);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+    }
+    else
+    {
+        tmp = h264bsdIntra4x4Prediction(pMb, data, mbLayer,
+            pelAbove, pelLeft, constrainedIntraPred);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+    }
+
+    tmp = h264bsdIntraChromaPrediction(pMb, data + 256,
+            mbLayer->residual.level+16, pelAbove + 21, pelLeft + 16,
+            mbLayer->mbPred.intraChromaPredMode, constrainedIntraPred);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* if decoded flag > 1 -> mb has already been successfully decoded and
+     * written to output -> do not write again */
+    if (pMb->decoded > 1)
+        return HANTRO_OK;
+
+    h264bsdWriteMacroblock(image, data);
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdGetNeighbourPels
+
+        Functional description:
+          Get pixel values from neighbouring macroblocks into 'above'
+          and 'left' arrays.
+
+------------------------------------------------------------------------------*/
+
+void h264bsdGetNeighbourPels(image_t *image, u8 *above, u8 *left, u32 mbNum)
+{
+
+/* Variables */
+
+    u32 i;
+    u32 width, picSize;
+    u8 *ptr, *tmp;
+    u32 row, col;
+
+/* Code */
+
+    ASSERT(image);
+    ASSERT(above);
+    ASSERT(left);
+    ASSERT(mbNum < image->width * image->height);
+
+    if (!mbNum)
+        return;
+
+    width = image->width;
+    picSize = width * image->height;
+    row = mbNum / width;
+    col = mbNum - row * width;
+
+    width *= 16;
+    ptr = image->data + row * 16 * width  + col * 16;
+
+    /* note that luma samples above-right to current macroblock do not make
+     * sense when current mb is the right-most mb in a row. Same applies to
+     * sample above-left if col is zero. However, usage of pels in prediction
+     * is controlled by neighbour availability information in actual prediction
+     * process */
+    if (row)
+    {
+        tmp = ptr - (width + 1);
+        for (i = 21; i--;)
+            *above++ = *tmp++;
+    }
+
+    if (col)
+    {
+        ptr--;
+        for (i = 16; i--; ptr+=width)
+            *left++ = *ptr;
+    }
+
+    width >>= 1;
+    ptr = image->data + picSize * 256 + row * 8 * width  + col * 8;
+
+    if (row)
+    {
+        tmp = ptr - (width + 1);
+        for (i = 9; i--;)
+            *above++ = *tmp++;
+        tmp += (picSize * 64) - 9;
+        for (i = 9; i--;)
+            *above++ = *tmp++;
+    }
+
+    if (col)
+    {
+        ptr--;
+        for (i = 8; i--; ptr+=width)
+            *left++ = *ptr;
+        ptr += (picSize * 64) - 8 * width;
+        for (i = 8; i--; ptr+=width)
+            *left++ = *ptr;
+    }
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Intra16x16Prediction
+
+        Functional description:
+          Perform intra 16x16 prediction mode for luma pixels and add
+          residual into prediction. The resulting luma pixels are
+          stored in macroblock array 'data'.
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdIntra16x16Prediction(mbStorage_t *pMb, u8 *data, i32 residual[][16],
+                                u8 *above, u8 *left, u32 constrainedIntraPred)
+{
+
+/* Variables */
+
+    u32 i;
+    u32 availableA, availableB, availableD;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(residual);
+    ASSERT(above);
+    ASSERT(left);
+    ASSERT(h264bsdPredModeIntra16x16(pMb->mbType) < 4);
+
+    availableA = h264bsdIsNeighbourAvailable(pMb, pMb->mbA);
+    if (availableA && constrainedIntraPred &&
+       (h264bsdMbPartPredMode(pMb->mbA->mbType) == PRED_MODE_INTER))
+        availableA = HANTRO_FALSE;
+    availableB = h264bsdIsNeighbourAvailable(pMb, pMb->mbB);
+    if (availableB && constrainedIntraPred &&
+       (h264bsdMbPartPredMode(pMb->mbB->mbType) == PRED_MODE_INTER))
+        availableB = HANTRO_FALSE;
+    availableD = h264bsdIsNeighbourAvailable(pMb, pMb->mbD);
+    if (availableD && constrainedIntraPred &&
+       (h264bsdMbPartPredMode(pMb->mbD->mbType) == PRED_MODE_INTER))
+        availableD = HANTRO_FALSE;
+
+    switch(h264bsdPredModeIntra16x16(pMb->mbType))
+    {
+        case 0: /* Intra_16x16_Vertical */
+            if (!availableB)
+                return(HANTRO_NOK);
+            Intra16x16VerticalPrediction(data, above+1);
+            break;
+
+        case 1: /* Intra_16x16_Horizontal */
+            if (!availableA)
+                return(HANTRO_NOK);
+            Intra16x16HorizontalPrediction(data, left);
+            break;
+
+        case 2: /* Intra_16x16_DC */
+            Intra16x16DcPrediction(data, above+1, left, availableA, availableB);
+            break;
+
+        default: /* case 3: Intra_16x16_Plane */
+            if (!availableA || !availableB || !availableD)
+                return(HANTRO_NOK);
+            Intra16x16PlanePrediction(data, above+1, left);
+            break;
+    }
+    /* add residual */
+    for (i = 0; i < 16; i++)
+        h264bsdAddResidual(data, residual[i], i);
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Intra4x4Prediction
+
+        Functional description:
+          Perform intra 4x4 prediction for luma pixels and add residual
+          into prediction. The resulting luma pixels are stored in
+          macroblock array 'data'. The intra 4x4 prediction mode for each
+          block is stored in 'pMb' structure.
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdIntra4x4Prediction(mbStorage_t *pMb, u8 *data,
+                              macroblockLayer_t *mbLayer, u8 *above,
+                              u8 *left, u32 constrainedIntraPred)
+{
+
+/* Variables */
+
+    u32 block;
+    u32 mode;
+    neighbour_t neighbour, neighbourB;
+    mbStorage_t *nMb, *nMb2;
+    u8 a[1 + 4 + 4], l[1 + 4];
+    u32 data4x4[4];
+    u32 availableA, availableB, availableC, availableD;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(mbLayer);
+    ASSERT(above);
+    ASSERT(left);
+
+    for (block = 0; block < 16; block++)
+    {
+
+        ASSERT(pMb->intra4x4PredMode[block] < 9);
+
+        neighbour = *h264bsdNeighbour4x4BlockA(block);
+        nMb = h264bsdGetNeighbourMb(pMb, neighbour.mb);
+        availableA = h264bsdIsNeighbourAvailable(pMb, nMb);
+        if (availableA && constrainedIntraPred &&
+           ( h264bsdMbPartPredMode(nMb->mbType) == PRED_MODE_INTER) )
+        {
+            availableA = HANTRO_FALSE;
+        }
+
+        neighbourB = *h264bsdNeighbour4x4BlockB(block);
+        nMb2 = h264bsdGetNeighbourMb(pMb, neighbourB.mb);
+        availableB = h264bsdIsNeighbourAvailable(pMb, nMb2);
+        if (availableB && constrainedIntraPred &&
+           ( h264bsdMbPartPredMode(nMb2->mbType) == PRED_MODE_INTER) )
+        {
+            availableB = HANTRO_FALSE;
+        }
+
+        mode = DetermineIntra4x4PredMode(mbLayer,
+            (u32)(availableA && availableB),
+            &neighbour, &neighbourB, block, nMb, nMb2);
+        pMb->intra4x4PredMode[block] = (u8)mode;
+
+        neighbour = *h264bsdNeighbour4x4BlockC(block);
+        nMb = h264bsdGetNeighbourMb(pMb, neighbour.mb);
+        availableC = h264bsdIsNeighbourAvailable(pMb, nMb);
+        if (availableC && constrainedIntraPred &&
+           ( h264bsdMbPartPredMode(nMb->mbType) == PRED_MODE_INTER) )
+        {
+            availableC = HANTRO_FALSE;
+        }
+
+        neighbour = *h264bsdNeighbour4x4BlockD(block);
+        nMb = h264bsdGetNeighbourMb(pMb, neighbour.mb);
+        availableD = h264bsdIsNeighbourAvailable(pMb, nMb);
+        if (availableD && constrainedIntraPred &&
+           ( h264bsdMbPartPredMode(nMb->mbType) == PRED_MODE_INTER) )
+        {
+            availableD = HANTRO_FALSE;
+        }
+
+        Get4x4NeighbourPels(a, l, data, above, left, block);
+
+        switch(mode)
+        {
+            case 0: /* Intra_4x4_Vertical */
+                if (!availableB)
+                    return(HANTRO_NOK);
+                Intra4x4VerticalPrediction((u8*)data4x4, a + 1);
+                break;
+            case 1: /* Intra_4x4_Horizontal */
+                if (!availableA)
+                    return(HANTRO_NOK);
+                Intra4x4HorizontalPrediction((u8*)data4x4, l + 1);
+                break;
+            case 2: /* Intra_4x4_DC */
+                Intra4x4DcPrediction((u8*)data4x4, a + 1, l + 1,
+                    availableA, availableB);
+                break;
+            case 3: /* Intra_4x4_Diagonal_Down_Left */
+                if (!availableB)
+                    return(HANTRO_NOK);
+                if (!availableC)
+                {
+                    a[5] = a[6] = a[7] = a[8] = a[4];
+                }
+                Intra4x4DiagonalDownLeftPrediction((u8*)data4x4, a + 1);
+                break;
+            case 4: /* Intra_4x4_Diagonal_Down_Right */
+                if (!availableA || !availableB || !availableD)
+                    return(HANTRO_NOK);
+                Intra4x4DiagonalDownRightPrediction((u8*)data4x4, a + 1, l + 1);
+                break;
+            case 5: /* Intra_4x4_Vertical_Right */
+                if (!availableA || !availableB || !availableD)
+                    return(HANTRO_NOK);
+                Intra4x4VerticalRightPrediction((u8*)data4x4, a + 1, l + 1);
+                break;
+            case 6: /* Intra_4x4_Horizontal_Down */
+                if (!availableA || !availableB || !availableD)
+                    return(HANTRO_NOK);
+                Intra4x4HorizontalDownPrediction((u8*)data4x4, a + 1, l + 1);
+                break;
+            case 7: /* Intra_4x4_Vertical_Left */
+                if (!availableB)
+                    return(HANTRO_NOK);
+                if (!availableC)
+                {
+                    a[5] = a[6] = a[7] = a[8] = a[4];
+                }
+                Intra4x4VerticalLeftPrediction((u8*)data4x4, a + 1);
+                break;
+            default: /* case 8 Intra_4x4_Horizontal_Up */
+                if (!availableA)
+                    return(HANTRO_NOK);
+                Intra4x4HorizontalUpPrediction((u8*)data4x4, l + 1);
+                break;
+        }
+
+        Write4x4To16x16(data, (u8*)data4x4, block);
+        h264bsdAddResidual(data, mbLayer->residual.level[block], block);
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: IntraChromaPrediction
+
+        Functional description:
+          Perform intra prediction for chroma pixels and add residual
+          into prediction. The resulting chroma pixels are stored in 'data'.
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdIntraChromaPrediction(mbStorage_t *pMb, u8 *data, i32 residual[][16],
+                    u8 *above, u8 *left, u32 predMode, u32 constrainedIntraPred)
+{
+
+/* Variables */
+
+    u32 i, comp, block;
+    u32 availableA, availableB, availableD;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(residual);
+    ASSERT(above);
+    ASSERT(left);
+    ASSERT(predMode < 4);
+
+    availableA = h264bsdIsNeighbourAvailable(pMb, pMb->mbA);
+    if (availableA && constrainedIntraPred &&
+       (h264bsdMbPartPredMode(pMb->mbA->mbType) == PRED_MODE_INTER))
+        availableA = HANTRO_FALSE;
+    availableB = h264bsdIsNeighbourAvailable(pMb, pMb->mbB);
+    if (availableB && constrainedIntraPred &&
+       (h264bsdMbPartPredMode(pMb->mbB->mbType) == PRED_MODE_INTER))
+        availableB = HANTRO_FALSE;
+    availableD = h264bsdIsNeighbourAvailable(pMb, pMb->mbD);
+    if (availableD && constrainedIntraPred &&
+       (h264bsdMbPartPredMode(pMb->mbD->mbType) == PRED_MODE_INTER))
+        availableD = HANTRO_FALSE;
+
+    for (comp = 0, block = 16; comp < 2; comp++)
+    {
+        switch(predMode)
+        {
+            case 0: /* Intra_Chroma_DC */
+                IntraChromaDcPrediction(data, above+1, left, availableA,
+                    availableB);
+                break;
+
+            case 1: /* Intra_Chroma_Horizontal */
+                if (!availableA)
+                    return(HANTRO_NOK);
+                IntraChromaHorizontalPrediction(data, left);
+                break;
+
+            case 2: /* Intra_Chroma_Vertical */
+                if (!availableB)
+                    return(HANTRO_NOK);
+                IntraChromaVerticalPrediction(data, above+1);
+
+                break;
+
+            default: /* case 3: Intra_Chroma_Plane */
+                if (!availableA || !availableB || !availableD)
+                    return(HANTRO_NOK);
+                IntraChromaPlanePrediction(data, above+1, left);
+                break;
+        }
+        for (i = 0; i < 4; i++, block++)
+            h264bsdAddResidual(data, residual[i], block);
+
+        /* advance pointers */
+        data += 64;
+        above += 9;
+        left += 8;
+        residual += 4;
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdAddResidual
+
+        Functional description:
+          Add residual of a block into prediction in macroblock array 'data'.
+          The result (residual + prediction) is stored in 'data'.
+
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_OMXDL
+void h264bsdAddResidual(u8 *data, i32 *residual, u32 blockNum)
+{
+
+/* Variables */
+
+    u32 i;
+    u32 x, y;
+    u32 width;
+    i32 tmp1, tmp2, tmp3, tmp4;
+    u8 *tmp;
+    const u8 *clp = h264bsdClip + 512;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(residual);
+    ASSERT(blockNum < 16 + 4 + 4);
+
+    if (IS_RESIDUAL_EMPTY(residual))
+        return;
+
+    RANGE_CHECK_ARRAY(residual, -512, 511, 16);
+
+    if (blockNum < 16)
+    {
+        width = 16;
+        x = h264bsdBlockX[blockNum];
+        y = h264bsdBlockY[blockNum];
+    }
+    else
+    {
+        width = 8;
+        x = h264bsdBlockX[blockNum & 0x3];
+        y = h264bsdBlockY[blockNum & 0x3];
+    }
+
+    tmp = data + y*width + x;
+    for (i = 4; i; i--)
+    {
+        tmp1 = *residual++;
+        tmp2 = tmp[0];
+        tmp3 = *residual++;
+        tmp4 = tmp[1];
+
+        tmp[0] = clp[tmp1 + tmp2];
+
+        tmp1 = *residual++;
+        tmp2 = tmp[2];
+
+        tmp[1] = clp[tmp3 + tmp4];
+
+        tmp3 = *residual++;
+        tmp4 = tmp[3];
+
+        tmp1 = clp[tmp1 + tmp2];
+        tmp3 = clp[tmp3 + tmp4];
+        tmp[2] = (u8)tmp1;
+        tmp[3] = (u8)tmp3;
+
+        tmp += width;
+    }
+
+}
+#endif
+/*------------------------------------------------------------------------------
+
+    Function: Intra16x16VerticalPrediction
+
+        Functional description:
+          Perform intra 16x16 vertical prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra16x16VerticalPrediction(u8 *data, u8 *above)
+{
+
+/* Variables */
+
+    u32 i, j;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(above);
+
+    for (i = 0; i < 16; i++)
+    {
+        for (j = 0; j < 16; j++)
+        {
+            *data++ = above[j];
+        }
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Intra16x16HorizontalPrediction
+
+        Functional description:
+          Perform intra 16x16 horizontal prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra16x16HorizontalPrediction(u8 *data, u8 *left)
+{
+
+/* Variables */
+
+    u32 i, j;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(left);
+
+    for (i = 0; i < 16; i++)
+    {
+        for (j = 0; j < 16; j++)
+        {
+            *data++ = left[i];
+        }
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Intra16x16DcPrediction
+
+        Functional description:
+          Perform intra 16x16 DC prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra16x16DcPrediction(u8 *data, u8 *above, u8 *left, u32 availableA,
+    u32 availableB)
+{
+
+/* Variables */
+
+    u32 i, tmp;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(above);
+    ASSERT(left);
+
+    if (availableA && availableB)
+    {
+        for (i = 0, tmp = 0; i < 16; i++)
+            tmp += above[i] + left[i];
+        tmp = (tmp + 16) >> 5;
+    }
+    else if (availableA)
+    {
+        for (i = 0, tmp = 0; i < 16; i++)
+            tmp += left[i];
+        tmp = (tmp + 8) >> 4;
+    }
+    else if (availableB)
+    {
+        for (i = 0, tmp = 0; i < 16; i++)
+            tmp += above[i];
+        tmp = (tmp + 8) >> 4;
+    }
+    /* neither A nor B available */
+    else
+    {
+        tmp = 128;
+    }
+    for (i = 0; i < 256; i++)
+        data[i] = (u8)tmp;
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Intra16x16PlanePrediction
+
+        Functional description:
+          Perform intra 16x16 plane prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra16x16PlanePrediction(u8 *data, u8 *above, u8 *left)
+{
+
+/* Variables */
+
+    u32 i, j;
+    i32 a, b, c;
+    i32 tmp;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(above);
+    ASSERT(left);
+
+    a = 16 * (above[15] + left[15]);
+
+    for (i = 0, b = 0; i < 8; i++)
+        b += ((i32)i + 1) * (above[8+i] - above[6-i]);
+    b = (5 * b + 32) >> 6;
+
+    for (i = 0, c = 0; i < 7; i++)
+        c += ((i32)i + 1) * (left[8+i] - left[6-i]);
+    /* p[-1,-1] has to be accessed through above pointer */
+    c += ((i32)i + 1) * (left[8+i] - above[-1]);
+    c = (5 * c + 32) >> 6;
+
+    for (i = 0; i < 16; i++)
+    {
+        for (j = 0; j < 16; j++)
+        {
+            tmp = (a + b * ((i32)j - 7) + c * ((i32)i - 7) + 16) >> 5;
+            data[i*16+j] = (u8)CLIP1(tmp);
+        }
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: IntraChromaDcPrediction
+
+        Functional description:
+          Perform intra chroma DC prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void IntraChromaDcPrediction(u8 *data, u8 *above, u8 *left, u32 availableA,
+    u32 availableB)
+{
+
+/* Variables */
+
+    u32 i;
+    u32 tmp1, tmp2;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(above);
+    ASSERT(left);
+
+    /* y = 0..3 */
+    if (availableA && availableB)
+    {
+        tmp1 = above[0] + above[1] + above[2] + above[3] +
+              left[0] + left[1] + left[2] + left[3];
+        tmp1 = (tmp1 + 4) >> 3;
+        tmp2 = (above[4] + above[5] + above[6] + above[7] + 2) >> 2;
+    }
+    else if (availableB)
+    {
+        tmp1 = (above[0] + above[1] + above[2] + above[3] + 2) >> 2;
+        tmp2 = (above[4] + above[5] + above[6] + above[7] + 2) >> 2;
+    }
+    else if (availableA)
+    {
+        tmp1 = (left[0] + left[1] + left[2] + left[3] + 2) >> 2;
+        tmp2 = tmp1;
+    }
+    /* neither A nor B available */
+    else
+    {
+        tmp1 = tmp2 = 128;
+    }
+
+    ASSERT(tmp1 < 256 && tmp2 < 256);
+    for (i = 4; i--;)
+    {
+        *data++ = (u8)tmp1;
+        *data++ = (u8)tmp1;
+        *data++ = (u8)tmp1;
+        *data++ = (u8)tmp1;
+        *data++ = (u8)tmp2;
+        *data++ = (u8)tmp2;
+        *data++ = (u8)tmp2;
+        *data++ = (u8)tmp2;
+    }
+
+    /* y = 4...7 */
+    if (availableA)
+    {
+        tmp1 = (left[4] + left[5] + left[6] + left[7] + 2) >> 2;
+        if (availableB)
+        {
+            tmp2 = above[4] + above[5] + above[6] + above[7] +
+                   left[4] + left[5] + left[6] + left[7];
+            tmp2 = (tmp2 + 4) >> 3;
+        }
+        else
+            tmp2 = tmp1;
+    }
+    else if (availableB)
+    {
+        tmp1 = (above[0] + above[1] + above[2] + above[3] + 2) >> 2;
+        tmp2 = (above[4] + above[5] + above[6] + above[7] + 2) >> 2;
+    }
+    else
+    {
+        tmp1 = tmp2 = 128;
+    }
+
+    ASSERT(tmp1 < 256 && tmp2 < 256);
+    for (i = 4; i--;)
+    {
+        *data++ = (u8)tmp1;
+        *data++ = (u8)tmp1;
+        *data++ = (u8)tmp1;
+        *data++ = (u8)tmp1;
+        *data++ = (u8)tmp2;
+        *data++ = (u8)tmp2;
+        *data++ = (u8)tmp2;
+        *data++ = (u8)tmp2;
+    }
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: IntraChromaHorizontalPrediction
+
+        Functional description:
+          Perform intra chroma horizontal prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void IntraChromaHorizontalPrediction(u8 *data, u8 *left)
+{
+
+/* Variables */
+
+    u32 i;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(left);
+
+    for (i = 8; i--;)
+    {
+        *data++ = *left;
+        *data++ = *left;
+        *data++ = *left;
+        *data++ = *left;
+        *data++ = *left;
+        *data++ = *left;
+        *data++ = *left;
+        *data++ = *left++;
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: IntraChromaVerticalPrediction
+
+        Functional description:
+          Perform intra chroma vertical prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void IntraChromaVerticalPrediction(u8 *data, u8 *above)
+{
+
+/* Variables */
+
+    u32 i;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(above);
+
+    for (i = 8; i--;data++/*above-=8*/)
+    {
+        data[0] = *above;
+        data[8] = *above;
+        data[16] = *above;
+        data[24] = *above;
+        data[32] = *above;
+        data[40] = *above;
+        data[48] = *above;
+        data[56] = *above++;
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: IntraChromaPlanePrediction
+
+        Functional description:
+          Perform intra chroma plane prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void IntraChromaPlanePrediction(u8 *data, u8 *above, u8 *left)
+{
+
+/* Variables */
+
+    u32 i;
+    i32 a, b, c;
+    i32 tmp;
+    const u8 *clp = h264bsdClip + 512;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(above);
+    ASSERT(left);
+
+    a = 16 * (above[7] + left[7]);
+
+    b = (above[4] - above[2]) + 2 * (above[5] - above[1])
+        + 3 * (above[6] - above[0]) + 4 * (above[7] - above[-1]);
+    b = (17 * b + 16) >> 5;
+
+    /* p[-1,-1] has to be accessed through above pointer */
+    c = (left[4] - left[2]) + 2 * (left[5] - left[1])
+        + 3 * (left[6] - left[0]) + 4 * (left[7] - above[-1]);
+    c = (17 * c + 16) >> 5;
+
+    /*a += 16;*/
+    a = a - 3 * c + 16;
+    for (i = 8; i--; a += c)
+    {
+        tmp = (a - 3 * b);
+        *data++ = clp[tmp>>5];
+        tmp += b;
+        *data++ = clp[tmp>>5];
+        tmp += b;
+        *data++ = clp[tmp>>5];
+        tmp += b;
+        *data++ = clp[tmp>>5];
+        tmp += b;
+        *data++ = clp[tmp>>5];
+        tmp += b;
+        *data++ = clp[tmp>>5];
+        tmp += b;
+        *data++ = clp[tmp>>5];
+        tmp += b;
+        *data++ = clp[tmp>>5];
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Get4x4NeighbourPels
+
+        Functional description:
+          Get neighbouring pixels of a 4x4 block into 'a' and 'l'.
+
+------------------------------------------------------------------------------*/
+
+void Get4x4NeighbourPels(u8 *a, u8 *l, u8 *data, u8 *above, u8 *left,
+    u32 blockNum)
+{
+
+/* Variables */
+
+    u32 x, y;
+    u8 t1, t2;
+
+/* Code */
+
+    ASSERT(a);
+    ASSERT(l);
+    ASSERT(data);
+    ASSERT(above);
+    ASSERT(left);
+    ASSERT(blockNum < 16);
+
+    x = h264bsdBlockX[blockNum];
+    y = h264bsdBlockY[blockNum];
+
+    /* A and D */
+    if (x == 0)
+    {
+        t1 = left[y    ];
+        t2 = left[y + 1];
+        l[1] = t1;
+        l[2] = t2;
+        t1 = left[y + 2];
+        t2 = left[y + 3];
+        l[3] = t1;
+        l[4] = t2;
+    }
+    else
+    {
+        t1 = data[y * 16 + x - 1     ];
+        t2 = data[y * 16 + x - 1 + 16];
+        l[1] = t1;
+        l[2] = t2;
+        t1 = data[y * 16 + x - 1 + 32];
+        t2 = data[y * 16 + x - 1 + 48];
+        l[3] = t1;
+        l[4] = t2;
+    }
+
+    /* B, C and D */
+    if (y == 0)
+    {
+        t1 = above[x    ];
+        t2 = above[x    ];
+        l[0] = t1;
+        a[0] = t2;
+        t1 = above[x + 1];
+        t2 = above[x + 2];
+        a[1] = t1;
+        a[2] = t2;
+        t1 = above[x + 3];
+        t2 = above[x + 4];
+        a[3] = t1;
+        a[4] = t2;
+        t1 = above[x + 5];
+        t2 = above[x + 6];
+        a[5] = t1;
+        a[6] = t2;
+        t1 = above[x + 7];
+        t2 = above[x + 8];
+        a[7] = t1;
+        a[8] = t2;
+    }
+    else
+    {
+        t1 = data[(y - 1) * 16 + x    ];
+        t2 = data[(y - 1) * 16 + x + 1];
+        a[1] = t1;
+        a[2] = t2;
+        t1 = data[(y - 1) * 16 + x + 2];
+        t2 = data[(y - 1) * 16 + x + 3];
+        a[3] = t1;
+        a[4] = t2;
+        t1 = data[(y - 1) * 16 + x + 4];
+        t2 = data[(y - 1) * 16 + x + 5];
+        a[5] = t1;
+        a[6] = t2;
+        t1 = data[(y - 1) * 16 + x + 6];
+        t2 = data[(y - 1) * 16 + x + 7];
+        a[7] = t1;
+        a[8] = t2;
+
+        if (x == 0)
+            l[0] = a[0] = left[y-1];
+        else
+            l[0] = a[0] = data[(y - 1) * 16 + x - 1];
+    }
+}
+
+
+/*------------------------------------------------------------------------------
+
+    Function: Intra4x4VerticalPrediction
+
+        Functional description:
+          Perform intra 4x4 vertical prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra4x4VerticalPrediction(u8 *data, u8 *above)
+{
+
+/* Variables */
+
+    u8 t1, t2;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(above);
+
+    t1 = above[0];
+    t2 = above[1];
+    data[0] = data[4] = data[8] = data[12] = t1;
+    data[1] = data[5] = data[9] = data[13] = t2;
+    t1 = above[2];
+    t2 = above[3];
+    data[2] = data[6] = data[10] = data[14] = t1;
+    data[3] = data[7] = data[11] = data[15] = t2;
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Intra4x4HorizontalPrediction
+
+        Functional description:
+          Perform intra 4x4 horizontal prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra4x4HorizontalPrediction(u8 *data, u8 *left)
+{
+
+/* Variables */
+
+    u8 t1, t2;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(left);
+
+    t1 = left[0];
+    t2 = left[1];
+    data[0] = data[1] = data[2] = data[3] = t1;
+    data[4] = data[5] = data[6] = data[7] = t2;
+    t1 = left[2];
+    t2 = left[3];
+    data[8] = data[9] = data[10] = data[11] = t1;
+    data[12] = data[13] = data[14] = data[15] = t2;
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Intra4x4DcPrediction
+
+        Functional description:
+          Perform intra 4x4 DC prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra4x4DcPrediction(u8 *data, u8 *above, u8 *left, u32 availableA,
+    u32 availableB)
+{
+
+/* Variables */
+
+    u32 tmp;
+    u8 t1, t2, t3, t4;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(above);
+    ASSERT(left);
+
+    if (availableA && availableB)
+    {
+        t1 = above[0]; t2 = above[1]; t3 = above[2]; t4 = above[3];
+        tmp = t1 + t2 + t3 + t4;
+        t1 = left[0]; t2 = left[1]; t3 = left[2]; t4 = left[3];
+        tmp += t1 + t2 + t3 + t4;
+        tmp = (tmp + 4) >> 3;
+    }
+    else if (availableA)
+    {
+        t1 = left[0]; t2 = left[1]; t3 = left[2]; t4 = left[3];
+        tmp = (t1 + t2 + t3 + t4 + 2) >> 2;
+    }
+    else if (availableB)
+    {
+        t1 = above[0]; t2 = above[1]; t3 = above[2]; t4 = above[3];
+        tmp = (t1 + t2 + t3 + t4 + 2) >> 2;
+    }
+    else
+    {
+        tmp = 128;
+    }
+
+    ASSERT(tmp < 256);
+    data[0] = data[1] = data[2] = data[3] =
+    data[4] = data[5] = data[6] = data[7] =
+    data[8] = data[9] = data[10] = data[11] =
+    data[12] = data[13] = data[14] = data[15] = (u8)tmp;
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Intra4x4DiagonalDownLeftPrediction
+
+        Functional description:
+          Perform intra 4x4 diagonal down-left prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra4x4DiagonalDownLeftPrediction(u8 *data, u8 *above)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(above);
+
+    data[ 0] = (above[0] + 2 * above[1] + above[2] + 2) >> 2;
+    data[ 1] = (above[1] + 2 * above[2] + above[3] + 2) >> 2;
+    data[ 4] = (above[1] + 2 * above[2] + above[3] + 2) >> 2;
+    data[ 2] = (above[2] + 2 * above[3] + above[4] + 2) >> 2;
+    data[ 5] = (above[2] + 2 * above[3] + above[4] + 2) >> 2;
+    data[ 8] = (above[2] + 2 * above[3] + above[4] + 2) >> 2;
+    data[ 3] = (above[3] + 2 * above[4] + above[5] + 2) >> 2;
+    data[ 6] = (above[3] + 2 * above[4] + above[5] + 2) >> 2;
+    data[ 9] = (above[3] + 2 * above[4] + above[5] + 2) >> 2;
+    data[12] = (above[3] + 2 * above[4] + above[5] + 2) >> 2;
+    data[ 7] = (above[4] + 2 * above[5] + above[6] + 2) >> 2;
+    data[10] = (above[4] + 2 * above[5] + above[6] + 2) >> 2;
+    data[13] = (above[4] + 2 * above[5] + above[6] + 2) >> 2;
+    data[11] = (above[5] + 2 * above[6] + above[7] + 2) >> 2;
+    data[14] = (above[5] + 2 * above[6] + above[7] + 2) >> 2;
+    data[15] = (above[6] + 3 * above[7] + 2) >> 2;
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Intra4x4DiagonalDownRightPrediction
+
+        Functional description:
+          Perform intra 4x4 diagonal down-right prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra4x4DiagonalDownRightPrediction(u8 *data, u8 *above, u8 *left)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(above);
+    ASSERT(left);
+
+    data[ 0] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2;
+    data[ 5] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2;
+    data[10] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2;
+    data[15] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2;
+    data[ 1] = (above[-1] + 2 * above[0] + above[1] + 2) >> 2;
+    data[ 6] = (above[-1] + 2 * above[0] + above[1] + 2) >> 2;
+    data[11] = (above[-1] + 2 * above[0] + above[1] + 2) >> 2;
+    data[ 2] = (above[0] + 2 * above[1] + above[2] + 2) >> 2;
+    data[ 7] = (above[0] + 2 * above[1] + above[2] + 2) >> 2;
+    data[ 3] = (above[1] + 2 * above[2] + above[3] + 2) >> 2;
+    data[ 4] = (left[-1] + 2 * left[0] + left[1] + 2) >> 2;
+    data[ 9] = (left[-1] + 2 * left[0] + left[1] + 2) >> 2;
+    data[14] = (left[-1] + 2 * left[0] + left[1] + 2) >> 2;
+    data[ 8] = (left[0] + 2 * left[1] + left[2] + 2) >> 2;
+    data[13] = (left[0] + 2 * left[1] + left[2] + 2) >> 2;
+    data[12] = (left[1] + 2 * left[2] + left[3] + 2) >> 2;
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Intra4x4VerticalRightPrediction
+
+        Functional description:
+          Perform intra 4x4 vertical right prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra4x4VerticalRightPrediction(u8 *data, u8 *above, u8 *left)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(above);
+    ASSERT(left);
+
+    data[ 0] = (above[-1] + above[0] + 1) >> 1;
+    data[ 9] = (above[-1] + above[0] + 1) >> 1;
+    data[ 5] = (above[-1] + 2 * above[0] + above[1] + 2) >> 2;
+    data[14] = (above[-1] + 2 * above[0] + above[1] + 2) >> 2;
+    data[ 4] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2;
+    data[13] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2;
+    data[ 1] = (above[0] + above[1] + 1) >> 1;
+    data[10] = (above[0] + above[1] + 1) >> 1;
+    data[ 6] = (above[0] + 2 * above[1] + above[2] + 2) >> 2;
+    data[15] = (above[0] + 2 * above[1] + above[2] + 2) >> 2;
+    data[ 2] = (above[1] + above[2] + 1) >> 1;
+    data[11] = (above[1] + above[2] + 1) >> 1;
+    data[ 7] = (above[1] + 2 * above[2] + above[3] + 2) >> 2;
+    data[ 3] = (above[2] + above[3] + 1) >> 1;
+    data[ 8] = (left[1] + 2 * left[0] + left[-1] + 2) >> 2;
+    data[12] = (left[2] + 2 * left[1] + left[0] + 2) >> 2;
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Intra4x4HorizontalDownPrediction
+
+        Functional description:
+          Perform intra 4x4 horizontal down prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra4x4HorizontalDownPrediction(u8 *data, u8 *above, u8 *left)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(above);
+    ASSERT(left);
+
+    data[ 0] = (left[-1] + left[0] + 1) >> 1;
+    data[ 6] = (left[-1] + left[0] + 1) >> 1;
+    data[ 5] = (left[-1] + 2 * left[0] + left[1] + 2) >> 2;
+    data[11] = (left[-1] + 2 * left[0] + left[1] + 2) >> 2;
+    data[ 4] = (left[0] + left[1] + 1) >> 1;
+    data[10] = (left[0] + left[1] + 1) >> 1;
+    data[ 9] = (left[0] + 2 * left[1] + left[2] + 2) >> 2;
+    data[15] = (left[0] + 2 * left[1] + left[2] + 2) >> 2;
+    data[ 8] = (left[1] + left[2] + 1) >> 1;
+    data[14] = (left[1] + left[2] + 1) >> 1;
+    data[13] = (left[1] + 2 * left[2] + left[3] + 2) >> 2;
+    data[12] = (left[2] + left[3] + 1) >> 1;
+    data[ 1] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2;
+    data[ 7] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2;
+    data[ 2] = (above[1] + 2 * above[0] + above[-1] + 2) >> 2;
+    data[ 3] = (above[2] + 2 * above[1] + above[0] + 2) >> 2;
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Intra4x4VerticalLeftPrediction
+
+        Functional description:
+          Perform intra 4x4 vertical left prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra4x4VerticalLeftPrediction(u8 *data, u8 *above)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(above);
+
+    data[ 0] = (above[0] + above[1] + 1) >> 1;
+    data[ 1] = (above[1] + above[2] + 1) >> 1;
+    data[ 2] = (above[2] + above[3] + 1) >> 1;
+    data[ 3] = (above[3] + above[4] + 1) >> 1;
+    data[ 4] = (above[0] + 2 * above[1] + above[2] + 2) >> 2;
+    data[ 5] = (above[1] + 2 * above[2] + above[3] + 2) >> 2;
+    data[ 6] = (above[2] + 2 * above[3] + above[4] + 2) >> 2;
+    data[ 7] = (above[3] + 2 * above[4] + above[5] + 2) >> 2;
+    data[ 8] = (above[1] + above[2] + 1) >> 1;
+    data[ 9] = (above[2] + above[3] + 1) >> 1;
+    data[10] = (above[3] + above[4] + 1) >> 1;
+    data[11] = (above[4] + above[5] + 1) >> 1;
+    data[12] = (above[1] + 2 * above[2] + above[3] + 2) >> 2;
+    data[13] = (above[2] + 2 * above[3] + above[4] + 2) >> 2;
+    data[14] = (above[3] + 2 * above[4] + above[5] + 2) >> 2;
+    data[15] = (above[4] + 2 * above[5] + above[6] + 2) >> 2;
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: Intra4x4HorizontalUpPrediction
+
+        Functional description:
+          Perform intra 4x4 horizontal up prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra4x4HorizontalUpPrediction(u8 *data, u8 *left)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(left);
+
+    data[ 0] = (left[0] + left[1] + 1) >> 1;
+    data[ 1] = (left[0] + 2 * left[1] + left[2] + 2) >> 2;
+    data[ 2] = (left[1] + left[2] + 1) >> 1;
+    data[ 3] = (left[1] + 2 * left[2] + left[3] + 2) >> 2;
+    data[ 4] = (left[1] + left[2] + 1) >> 1;
+    data[ 5] = (left[1] + 2 * left[2] + left[3] + 2) >> 2;
+    data[ 6] = (left[2] + left[3] + 1) >> 1;
+    data[ 7] = (left[2] + 3 * left[3] + 2) >> 2;
+    data[ 8] = (left[2] + left[3] + 1) >> 1;
+    data[ 9] = (left[2] + 3 * left[3] + 2) >> 2;
+    data[10] = left[3];
+    data[11] = left[3];
+    data[12] = left[3];
+    data[13] = left[3];
+    data[14] = left[3];
+    data[15] = left[3];
+
+}
+
+#endif /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+
+    Function: Write4x4To16x16
+
+        Functional description:
+          Write a 4x4 block (data4x4) into correct position
+          in 16x16 macroblock (data).
+
+------------------------------------------------------------------------------*/
+
+void Write4x4To16x16(u8 *data, u8 *data4x4, u32 blockNum)
+{
+
+/* Variables */
+
+    u32 x, y;
+    u32 *in32, *out32;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(data4x4);
+    ASSERT(blockNum < 16);
+
+    x = h264bsdBlockX[blockNum];
+    y = h264bsdBlockY[blockNum];
+
+    data += y*16+x;
+
+    ASSERT(((u32)data&0x3) == 0);
+
+    /*lint --e(826) */
+    out32 = (u32 *)data;
+    /*lint --e(826) */
+    in32 = (u32 *)data4x4;
+
+    out32[0] = *in32++;
+    out32[4] = *in32++;
+    out32[8] = *in32++;
+    out32[12] = *in32++;
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DetermineIntra4x4PredMode
+
+        Functional description:
+          Returns the intra 4x4 prediction mode of a block based on the
+          neighbouring macroblocks and information parsed from stream.
+
+------------------------------------------------------------------------------*/
+
+u32 DetermineIntra4x4PredMode(macroblockLayer_t *pMbLayer,
+    u32 available, neighbour_t *nA, neighbour_t *nB, u32 index,
+    mbStorage_t *nMbA, mbStorage_t *nMbB)
+{
+
+/* Variables */
+
+    u32 mode1, mode2;
+    mbStorage_t *pMb;
+
+/* Code */
+
+    ASSERT(pMbLayer);
+
+    /* dc only prediction? */
+    if (!available)
+        mode1 = 2;
+    else
+    {
+        pMb = nMbA;
+        if (h264bsdMbPartPredMode(pMb->mbType) == PRED_MODE_INTRA4x4)
+        {
+            mode1 = pMb->intra4x4PredMode[nA->index];
+        }
+        else
+            mode1 = 2;
+
+        pMb = nMbB;
+        if (h264bsdMbPartPredMode(pMb->mbType) == PRED_MODE_INTRA4x4)
+        {
+            mode2 = pMb->intra4x4PredMode[nB->index];
+        }
+        else
+            mode2 = 2;
+
+        mode1 = MIN(mode1, mode2);
+    }
+
+    if (!pMbLayer->mbPred.prevIntra4x4PredModeFlag[index])
+    {
+        if (pMbLayer->mbPred.remIntra4x4PredMode[index] < mode1)
+        {
+            mode1 = pMbLayer->mbPred.remIntra4x4PredMode[index];
+        }
+        else
+        {
+            mode1 = pMbLayer->mbPred.remIntra4x4PredMode[index] + 1;
+        }
+    }
+
+    return(mode1);
+}
+
+
+/*lint +e702 */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.h
new file mode 100755
index 0000000..4652bd5
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_INTRA_PREDICTION_H
+#define H264SWDEC_INTRA_PREDICTION_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_image.h"
+#include "h264bsd_macroblock_layer.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_OMXDL
+u32 h264bsdIntraPrediction(mbStorage_t *pMb, macroblockLayer_t *mbLayer,
+    image_t *image, u32 mbNum, u32 constrainedIntraPred, u8 *data);
+
+u32 h264bsdIntra4x4Prediction(mbStorage_t *pMb, u8 *data,
+                              macroblockLayer_t *mbLayer,
+                              u8 *above, u8 *left, u32 constrainedIntraPred);
+u32 h264bsdIntra16x16Prediction(mbStorage_t *pMb, u8 *data, i32 residual[][16],
+    u8 *above, u8 *left, u32 constrainedIntraPred);
+
+u32 h264bsdIntraChromaPrediction(mbStorage_t *pMb, u8 *data, i32 residual[][16],
+    u8 *above, u8 *left, u32 predMode, u32 constrainedIntraPred);
+
+void h264bsdGetNeighbourPels(image_t *image, u8 *above, u8 *left, u32 mbNum);
+
+#else
+
+u32 h264bsdIntra4x4Prediction(mbStorage_t *pMb, u8 *data,
+                              macroblockLayer_t *mbLayer,
+                              u8 *pImage, u32 width,
+                              u32 constrainedIntraPred, u32 block);
+
+u32 h264bsdIntra16x16Prediction(mbStorage_t *pMb, u8 *data, u8 *pImage,
+                            u32 width, u32 constrainedIntraPred);
+
+u32 h264bsdIntraChromaPrediction(mbStorage_t *pMb, u8 *data, image_t *image,
+                                        u32 predMode, u32 constrainedIntraPred);
+
+#endif
+
+#endif /* #ifdef H264SWDEC_INTRA_PREDICTION_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.c
new file mode 100755
index 0000000..2b3e7f0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.c
@@ -0,0 +1,1446 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdDecodeMacroblockLayer
+          h264bsdMbPartPredMode
+          h264bsdNumMbPart
+          h264bsdNumSubMbPart
+          DecodeMbPred
+          DecodeSubMbPred
+          DecodeResidual
+          DetermineNc
+          CbpIntra16x16
+          h264bsdPredModeIntra16x16
+          h264bsdDecodeMacroblock
+          ProcessResidual
+          h264bsdSubMbPartMode
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_macroblock_layer.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_util.h"
+#include "h264bsd_vlc.h"
+#include "h264bsd_cavlc.h"
+#include "h264bsd_nal_unit.h"
+#include "h264bsd_neighbour.h"
+#include "h264bsd_transform.h"
+#include "h264bsd_intra_prediction.h"
+#include "h264bsd_inter_prediction.h"
+
+#ifdef H264DEC_OMXDL
+#include "omxtypes.h"
+#include "omxVC.h"
+#include "armVC.h"
+#endif /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+#ifdef H264DEC_OMXDL
+static const u32 chromaIndex[8] = { 256, 260, 288, 292, 320, 324, 352, 356 };
+static const u32 lumaIndex[16] = {   0,   4,  64,  68,
+                                     8,  12,  72,  76,
+                                   128, 132, 192, 196,
+                                   136, 140, 200, 204 };
+#endif
+/* mapping of dc coefficients array to luma blocks */
+static const u32 dcCoeffIndex[16] =
+    {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 DecodeMbPred(strmData_t *pStrmData, mbPred_t *pMbPred,
+    mbType_e mbType, u32 numRefIdxActive);
+static u32 DecodeSubMbPred(strmData_t *pStrmData, subMbPred_t *pSubMbPred,
+    mbType_e mbType, u32 numRefIdxActive);
+static u32 DecodeResidual(strmData_t *pStrmData, residual_t *pResidual,
+    mbStorage_t *pMb, mbType_e mbType, u32 codedBlockPattern);
+
+#ifdef H264DEC_OMXDL
+static u32 DetermineNc(mbStorage_t *pMb, u32 blockIndex, u8 *pTotalCoeff);
+#else
+static u32 DetermineNc(mbStorage_t *pMb, u32 blockIndex, i16 *pTotalCoeff);
+#endif
+
+static u32 CbpIntra16x16(mbType_e mbType);
+#ifdef H264DEC_OMXDL
+static u32 ProcessIntra4x4Residual(mbStorage_t *pMb, u8 *data, u32 constrainedIntraPred,
+                    macroblockLayer_t *mbLayer, const u8 **pSrc, image_t *image);
+static u32 ProcessChromaResidual(mbStorage_t *pMb, u8 *data, const u8 **pSrc );
+static u32 ProcessIntra16x16Residual(mbStorage_t *pMb, u8 *data, u32 constrainedIntraPred,
+                    u32 intraChromaPredMode, const u8 **pSrc, image_t *image);
+
+
+#else
+static u32 ProcessResidual(mbStorage_t *pMb, i32 residualLevel[][16], u32 *);
+#endif
+
+/*------------------------------------------------------------------------------
+
+    Function name: h264bsdDecodeMacroblockLayer
+
+        Functional description:
+          Parse macroblock specific information from bit stream.
+
+        Inputs:
+          pStrmData         pointer to stream data structure
+          pMb               pointer to macroblock storage structure
+          sliceType         type of the current slice
+          numRefIdxActive   maximum reference index
+
+        Outputs:
+          pMbLayer          stores the macroblock data parsed from stream
+
+        Returns:
+          HANTRO_OK         success
+          HANTRO_NOK        end of stream or error in stream
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeMacroblockLayer(strmData_t *pStrmData,
+    macroblockLayer_t *pMbLayer, mbStorage_t *pMb, u32 sliceType,
+    u32 numRefIdxActive)
+{
+
+/* Variables */
+
+    u32 tmp, i, value;
+    i32 itmp;
+    mbPartPredMode_e partMode;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pMbLayer);
+
+#ifdef H264DEC_NEON
+    h264bsdClearMbLayer(pMbLayer, ((sizeof(macroblockLayer_t) + 63) & ~0x3F));
+#else
+    H264SwDecMemset(pMbLayer, 0, sizeof(macroblockLayer_t));
+#endif
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+
+    if (IS_I_SLICE(sliceType))
+    {
+        if ((value + 6) > 31 || tmp != HANTRO_OK)
+            return(HANTRO_NOK);
+        pMbLayer->mbType = (mbType_e)(value + 6);
+    }
+    else
+    {
+        if ((value + 1) > 31 || tmp != HANTRO_OK)
+            return(HANTRO_NOK);
+        pMbLayer->mbType = (mbType_e)(value + 1);
+    }
+
+    if (pMbLayer->mbType == I_PCM)
+    {
+        i32 *level;
+        while( !h264bsdIsByteAligned(pStrmData) )
+        {
+            /* pcm_alignment_zero_bit */
+            tmp = h264bsdGetBits(pStrmData, 1);
+            if (tmp)
+                return(HANTRO_NOK);
+        }
+
+        level = pMbLayer->residual.level[0];
+        for (i = 0; i < 384; i++)
+        {
+            value = h264bsdGetBits(pStrmData, 8);
+            if (value == END_OF_STREAM)
+                return(HANTRO_NOK);
+            *level++ = (i32)value;
+        }
+    }
+    else
+    {
+        partMode = h264bsdMbPartPredMode(pMbLayer->mbType);
+        if ( (partMode == PRED_MODE_INTER) &&
+             (h264bsdNumMbPart(pMbLayer->mbType) == 4) )
+        {
+            tmp = DecodeSubMbPred(pStrmData, &pMbLayer->subMbPred,
+                pMbLayer->mbType, numRefIdxActive);
+        }
+        else
+        {
+            tmp = DecodeMbPred(pStrmData, &pMbLayer->mbPred,
+                pMbLayer->mbType, numRefIdxActive);
+        }
+        if (tmp != HANTRO_OK)
+            return(tmp);
+
+        if (partMode != PRED_MODE_INTRA16x16)
+        {
+            tmp = h264bsdDecodeExpGolombMapped(pStrmData, &value,
+                (u32)(partMode == PRED_MODE_INTRA4x4));
+            if (tmp != HANTRO_OK)
+                return(tmp);
+            pMbLayer->codedBlockPattern = value;
+        }
+        else
+        {
+            pMbLayer->codedBlockPattern = CbpIntra16x16(pMbLayer->mbType);
+        }
+
+        if ( pMbLayer->codedBlockPattern ||
+             (partMode == PRED_MODE_INTRA16x16) )
+        {
+            tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+            if (tmp != HANTRO_OK || (itmp < -26) || (itmp > 25) )
+                return(HANTRO_NOK);
+            pMbLayer->mbQpDelta = itmp;
+
+            tmp = DecodeResidual(pStrmData, &pMbLayer->residual, pMb,
+                pMbLayer->mbType, pMbLayer->codedBlockPattern);
+
+            pStrmData->strmBuffReadBits =
+                (u32)(pStrmData->pStrmCurrPos - pStrmData->pStrmBuffStart) * 8 +
+                pStrmData->bitPosInWord;
+
+            if (tmp != HANTRO_OK)
+                return(tmp);
+        }
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdMbPartPredMode
+
+        Functional description:
+          Returns the prediction mode of a macroblock type
+
+------------------------------------------------------------------------------*/
+
+mbPartPredMode_e h264bsdMbPartPredMode(mbType_e mbType)
+{
+
+/* Variables */
+
+
+/* Code */
+
+    ASSERT(mbType <= 31);
+
+    if ((mbType <= P_8x8ref0))
+        return(PRED_MODE_INTER);
+    else if (mbType == I_4x4)
+        return(PRED_MODE_INTRA4x4);
+    else
+        return(PRED_MODE_INTRA16x16);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdNumMbPart
+
+        Functional description:
+          Returns the amount of macroblock partitions in a macroblock type
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdNumMbPart(mbType_e mbType)
+{
+
+/* Variables */
+
+
+/* Code */
+
+    ASSERT(h264bsdMbPartPredMode(mbType) == PRED_MODE_INTER);
+
+    switch (mbType)
+    {
+        case P_L0_16x16:
+        case P_Skip:
+            return(1);
+
+        case P_L0_L0_16x8:
+        case P_L0_L0_8x16:
+            return(2);
+
+        /* P_8x8 or P_8x8ref0 */
+        default:
+            return(4);
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdNumSubMbPart
+
+        Functional description:
+          Returns the amount of sub-partitions in a sub-macroblock type
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdNumSubMbPart(subMbType_e subMbType)
+{
+
+/* Variables */
+
+
+/* Code */
+
+    ASSERT(subMbType <= P_L0_4x4);
+
+    switch (subMbType)
+    {
+        case P_L0_8x8:
+            return(1);
+
+        case P_L0_8x4:
+        case P_L0_4x8:
+            return(2);
+
+        /* P_L0_4x4 */
+        default:
+            return(4);
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeMbPred
+
+        Functional description:
+          Parse macroblock prediction information from bit stream and store
+          in 'pMbPred'.
+
+------------------------------------------------------------------------------*/
+
+u32 DecodeMbPred(strmData_t *pStrmData, mbPred_t *pMbPred, mbType_e mbType,
+    u32 numRefIdxActive)
+{
+
+/* Variables */
+
+    u32 tmp, i, j, value;
+    i32 itmp;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pMbPred);
+
+    switch (h264bsdMbPartPredMode(mbType))
+    {
+        case PRED_MODE_INTER: /* PRED_MODE_INTER */
+            if (numRefIdxActive > 1)
+            {
+                for (i = h264bsdNumMbPart(mbType), j = 0; i--;  j++)
+                {
+                    tmp = h264bsdDecodeExpGolombTruncated(pStrmData, &value,
+                        (u32)(numRefIdxActive > 2));
+                    if (tmp != HANTRO_OK || value >= numRefIdxActive)
+                        return(HANTRO_NOK);
+
+                    pMbPred->refIdxL0[j] = value;
+                }
+            }
+
+            for (i = h264bsdNumMbPart(mbType), j = 0; i--;  j++)
+            {
+                tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+                if (tmp != HANTRO_OK)
+                    return(tmp);
+                pMbPred->mvdL0[j].hor = (i16)itmp;
+
+                tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+                if (tmp != HANTRO_OK)
+                    return(tmp);
+                pMbPred->mvdL0[j].ver = (i16)itmp;
+            }
+            break;
+
+        case PRED_MODE_INTRA4x4:
+            for (itmp = 0, i = 0; itmp < 2; itmp++)
+            {
+                value = h264bsdShowBits32(pStrmData);
+                tmp = 0;
+                for (j = 8; j--; i++)
+                {
+                    pMbPred->prevIntra4x4PredModeFlag[i] =
+                        value & 0x80000000 ? HANTRO_TRUE : HANTRO_FALSE;
+                    value <<= 1;
+                    if (!pMbPred->prevIntra4x4PredModeFlag[i])
+                    {
+                        pMbPred->remIntra4x4PredMode[i] = value>>29;
+                        value <<= 3;
+                        tmp++;
+                    }
+                }
+                if (h264bsdFlushBits(pStrmData, 8 + 3*tmp) == END_OF_STREAM)
+                    return(HANTRO_NOK);
+            }
+            /* fall-through */
+
+        case PRED_MODE_INTRA16x16:
+            tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+            if (tmp != HANTRO_OK || value > 3)
+                return(HANTRO_NOK);
+            pMbPred->intraChromaPredMode = value;
+            break;
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeSubMbPred
+
+        Functional description:
+          Parse sub-macroblock prediction information from bit stream and
+          store in 'pMbPred'.
+
+------------------------------------------------------------------------------*/
+
+u32 DecodeSubMbPred(strmData_t *pStrmData, subMbPred_t *pSubMbPred,
+    mbType_e mbType, u32 numRefIdxActive)
+{
+
+/* Variables */
+
+    u32 tmp, i, j, value;
+    i32 itmp;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pSubMbPred);
+    ASSERT(h264bsdMbPartPredMode(mbType) == PRED_MODE_INTER);
+
+    for (i = 0; i < 4; i++)
+    {
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+        if (tmp != HANTRO_OK || value > 3)
+            return(HANTRO_NOK);
+        pSubMbPred->subMbType[i] = (subMbType_e)value;
+    }
+
+    if ( (numRefIdxActive > 1) && (mbType != P_8x8ref0) )
+    {
+        for (i = 0; i < 4; i++)
+        {
+            tmp = h264bsdDecodeExpGolombTruncated(pStrmData, &value,
+                (u32)(numRefIdxActive > 2));
+            if (tmp != HANTRO_OK || value >= numRefIdxActive)
+                return(HANTRO_NOK);
+            pSubMbPred->refIdxL0[i] = value;
+        }
+    }
+
+    for (i = 0; i < 4; i++)
+    {
+        j = 0;
+        for (value = h264bsdNumSubMbPart(pSubMbPred->subMbType[i]);
+             value--; j++)
+        {
+            tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+            if (tmp != HANTRO_OK)
+                return(tmp);
+            pSubMbPred->mvdL0[i][j].hor = (i16)itmp;
+
+            tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+            if (tmp != HANTRO_OK)
+                return(tmp);
+            pSubMbPred->mvdL0[i][j].ver = (i16)itmp;
+        }
+    }
+
+    return(HANTRO_OK);
+
+}
+
+#ifdef H264DEC_OMXDL
+/*------------------------------------------------------------------------------
+
+    Function: DecodeResidual
+
+        Functional description:
+          Parse residual information from bit stream and store in 'pResidual'.
+
+------------------------------------------------------------------------------*/
+
+u32 DecodeResidual(strmData_t *pStrmData, residual_t *pResidual,
+    mbStorage_t *pMb, mbType_e mbType, u32 codedBlockPattern)
+{
+
+/* Variables */
+
+    u32 i, j;
+    u32 blockCoded;
+    u32 blockIndex;
+    u32 is16x16;
+    OMX_INT nc;
+    OMXResult omxRes;
+    OMX_U8 *pPosCoefBuf;
+
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pResidual);
+
+    pPosCoefBuf = pResidual->posCoefBuf;
+
+    /* luma DC is at index 24 */
+    if (h264bsdMbPartPredMode(mbType) == PRED_MODE_INTRA16x16)
+    {
+        nc = (OMX_INT)DetermineNc(pMb, 0, pResidual->totalCoeff);
+#ifndef H264DEC_NEON
+        omxRes =  omxVCM4P10_DecodeCoeffsToPairCAVLC(
+                (const OMX_U8 **) (&pStrmData->pStrmCurrPos),
+                (OMX_S32*) (&pStrmData->bitPosInWord),
+                &pResidual->totalCoeff[24],
+                &pPosCoefBuf,
+                nc,
+                16);
+#else
+        omxRes = armVCM4P10_DecodeCoeffsToPair(
+                (const OMX_U8 **) (&pStrmData->pStrmCurrPos),
+                (OMX_S32*) (&pStrmData->bitPosInWord),
+                &pResidual->totalCoeff[24],
+                &pPosCoefBuf,
+                nc,
+                16);
+#endif
+        if (omxRes != OMX_Sts_NoErr)
+            return(HANTRO_NOK);
+        is16x16 = HANTRO_TRUE;
+    }
+    else
+        is16x16 = HANTRO_FALSE;
+
+    for (i = 4, blockIndex = 0; i--;)
+    {
+        /* luma cbp in bits 0-3 */
+        blockCoded = codedBlockPattern & 0x1;
+        codedBlockPattern >>= 1;
+        if (blockCoded)
+        {
+            for (j = 4; j--; blockIndex++)
+            {
+                nc = (OMX_INT)DetermineNc(pMb,blockIndex,pResidual->totalCoeff);
+                if (is16x16)
+                {
+#ifndef H264DEC_NEON
+                    omxRes =  omxVCM4P10_DecodeCoeffsToPairCAVLC(
+                            (const OMX_U8 **) (&pStrmData->pStrmCurrPos),
+                            (OMX_S32*) (&pStrmData->bitPosInWord),
+                            &pResidual->totalCoeff[blockIndex],
+                            &pPosCoefBuf,
+                            nc,
+                            15);
+#else
+                    omxRes =  armVCM4P10_DecodeCoeffsToPair(
+                            (const OMX_U8 **) (&pStrmData->pStrmCurrPos),
+                            (OMX_S32*) (&pStrmData->bitPosInWord),
+                            &pResidual->totalCoeff[blockIndex],
+                            &pPosCoefBuf,
+                            nc,
+                            15);
+#endif
+                }
+                else
+                {
+#ifndef H264DEC_NEON
+                    omxRes =  omxVCM4P10_DecodeCoeffsToPairCAVLC(
+                            (const OMX_U8 **) (&pStrmData->pStrmCurrPos),
+                            (OMX_S32*) (&pStrmData->bitPosInWord),
+                            &pResidual->totalCoeff[blockIndex],
+                            &pPosCoefBuf,
+                            nc,
+                            16);
+#else
+                    omxRes = armVCM4P10_DecodeCoeffsToPair(
+                            (const OMX_U8 **) (&pStrmData->pStrmCurrPos),
+                            (OMX_S32*) (&pStrmData->bitPosInWord),
+                            &pResidual->totalCoeff[blockIndex],
+                            &pPosCoefBuf,
+                            nc,
+                            16);
+#endif
+                }
+                if (omxRes != OMX_Sts_NoErr)
+                    return(HANTRO_NOK);
+            }
+        }
+        else
+            blockIndex += 4;
+    }
+
+    /* chroma DC block are at indices 25 and 26 */
+    blockCoded = codedBlockPattern & 0x3;
+    if (blockCoded)
+    {
+#ifndef H264DEC_NEON
+        omxRes =  omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC(
+                (const OMX_U8**) (&pStrmData->pStrmCurrPos),
+                (OMX_S32*) (&pStrmData->bitPosInWord),
+                &pResidual->totalCoeff[25],
+                &pPosCoefBuf);
+#else
+        omxRes = armVCM4P10_DecodeCoeffsToPair(
+                (const OMX_U8**) (&pStrmData->pStrmCurrPos),
+                (OMX_S32*) (&pStrmData->bitPosInWord),
+                &pResidual->totalCoeff[25],
+                &pPosCoefBuf,
+                17,
+                4);
+#endif
+        if (omxRes != OMX_Sts_NoErr)
+            return(HANTRO_NOK);
+#ifndef H264DEC_NEON
+        omxRes =  omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC(
+                (const OMX_U8**) (&pStrmData->pStrmCurrPos),
+                (OMX_S32*) (&pStrmData->bitPosInWord),
+                &pResidual->totalCoeff[26],
+                &pPosCoefBuf);
+#else
+        omxRes = armVCM4P10_DecodeCoeffsToPair(
+                (const OMX_U8**) (&pStrmData->pStrmCurrPos),
+                (OMX_S32*) (&pStrmData->bitPosInWord),
+                &pResidual->totalCoeff[26],
+                &pPosCoefBuf,
+                17,
+                4);
+#endif
+        if (omxRes != OMX_Sts_NoErr)
+            return(HANTRO_NOK);
+    }
+
+    /* chroma AC */
+    blockCoded = codedBlockPattern & 0x2;
+    if (blockCoded)
+    {
+        for (i = 8; i--;blockIndex++)
+        {
+            nc = (OMX_INT)DetermineNc(pMb, blockIndex, pResidual->totalCoeff);
+#ifndef H264DEC_NEON
+            omxRes =  omxVCM4P10_DecodeCoeffsToPairCAVLC(
+                    (const OMX_U8 **) (&pStrmData->pStrmCurrPos),
+                    (OMX_S32*) (&pStrmData->bitPosInWord),
+                    &pResidual->totalCoeff[blockIndex],
+                    &pPosCoefBuf,
+                    nc,
+                    15);
+#else
+            omxRes =  armVCM4P10_DecodeCoeffsToPair(
+                    (const OMX_U8 **) (&pStrmData->pStrmCurrPos),
+                    (OMX_S32*) (&pStrmData->bitPosInWord),
+                    &pResidual->totalCoeff[blockIndex],
+                    &pPosCoefBuf,
+                    nc,
+                    15);
+#endif
+            if (omxRes != OMX_Sts_NoErr)
+                return(HANTRO_NOK);
+        }
+    }
+
+    return(HANTRO_OK);
+
+}
+
+#else
+/*------------------------------------------------------------------------------
+
+    Function: DecodeResidual
+
+        Functional description:
+          Parse residual information from bit stream and store in 'pResidual'.
+
+------------------------------------------------------------------------------*/
+
+u32 DecodeResidual(strmData_t *pStrmData, residual_t *pResidual,
+    mbStorage_t *pMb, mbType_e mbType, u32 codedBlockPattern)
+{
+
+/* Variables */
+
+    u32 i, j, tmp;
+    i32 nc;
+    u32 blockCoded;
+    u32 blockIndex;
+    u32 is16x16;
+    i32 (*level)[16];
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pResidual);
+
+    level = pResidual->level;
+
+    /* luma DC is at index 24 */
+    if (h264bsdMbPartPredMode(mbType) == PRED_MODE_INTRA16x16)
+    {
+        nc = (i32)DetermineNc(pMb, 0, pResidual->totalCoeff);
+        tmp = h264bsdDecodeResidualBlockCavlc(pStrmData, level[24], nc, 16);
+        if ((tmp & 0xF) != HANTRO_OK)
+            return(tmp);
+        pResidual->totalCoeff[24] = (tmp >> 4) & 0xFF;
+        is16x16 = HANTRO_TRUE;
+    }
+    else
+        is16x16 = HANTRO_FALSE;
+
+    for (i = 4, blockIndex = 0; i--;)
+    {
+        /* luma cbp in bits 0-3 */
+        blockCoded = codedBlockPattern & 0x1;
+        codedBlockPattern >>= 1;
+        if (blockCoded)
+        {
+            for (j = 4; j--; blockIndex++)
+            {
+                nc = (i32)DetermineNc(pMb, blockIndex, pResidual->totalCoeff);
+                if (is16x16)
+                {
+                    tmp = h264bsdDecodeResidualBlockCavlc(pStrmData,
+                        level[blockIndex] + 1, nc, 15);
+                    pResidual->coeffMap[blockIndex] = tmp >> 15;
+                }
+                else
+                {
+                    tmp = h264bsdDecodeResidualBlockCavlc(pStrmData,
+                        level[blockIndex], nc, 16);
+                    pResidual->coeffMap[blockIndex] = tmp >> 16;
+                }
+                if ((tmp & 0xF) != HANTRO_OK)
+                    return(tmp);
+                pResidual->totalCoeff[blockIndex] = (tmp >> 4) & 0xFF;
+            }
+        }
+        else
+            blockIndex += 4;
+    }
+
+    /* chroma DC block are at indices 25 and 26 */
+    blockCoded = codedBlockPattern & 0x3;
+    if (blockCoded)
+    {
+        tmp = h264bsdDecodeResidualBlockCavlc(pStrmData, level[25], -1, 4);
+        if ((tmp & 0xF) != HANTRO_OK)
+            return(tmp);
+        pResidual->totalCoeff[25] = (tmp >> 4) & 0xFF;
+        tmp = h264bsdDecodeResidualBlockCavlc(pStrmData, level[25]+4, -1, 4);
+        if ((tmp & 0xF) != HANTRO_OK)
+            return(tmp);
+        pResidual->totalCoeff[26] = (tmp >> 4) & 0xFF;
+    }
+
+    /* chroma AC */
+    blockCoded = codedBlockPattern & 0x2;
+    if (blockCoded)
+    {
+        for (i = 8; i--;blockIndex++)
+        {
+            nc = (i32)DetermineNc(pMb, blockIndex, pResidual->totalCoeff);
+            tmp = h264bsdDecodeResidualBlockCavlc(pStrmData,
+                level[blockIndex] + 1, nc, 15);
+            if ((tmp & 0xF) != HANTRO_OK)
+                return(tmp);
+            pResidual->totalCoeff[blockIndex] = (tmp >> 4) & 0xFF;
+            pResidual->coeffMap[blockIndex] = (tmp >> 15);
+        }
+    }
+
+    return(HANTRO_OK);
+
+}
+#endif
+
+/*------------------------------------------------------------------------------
+
+    Function: DetermineNc
+
+        Functional description:
+          Returns the nC of a block.
+
+------------------------------------------------------------------------------*/
+#ifdef H264DEC_OMXDL
+u32 DetermineNc(mbStorage_t *pMb, u32 blockIndex, u8 *pTotalCoeff)
+#else
+u32 DetermineNc(mbStorage_t *pMb, u32 blockIndex, i16 *pTotalCoeff)
+#endif
+{
+/*lint -e702 */
+/* Variables */
+
+    u32 tmp;
+    i32 n;
+    const neighbour_t *neighbourA, *neighbourB;
+    u8 neighbourAindex, neighbourBindex;
+
+/* Code */
+
+    ASSERT(blockIndex < 24);
+
+    /* if neighbour block belongs to current macroblock totalCoeff array
+     * mbStorage has not been set/updated yet -> use pTotalCoeff */
+    neighbourA = h264bsdNeighbour4x4BlockA(blockIndex);
+    neighbourB = h264bsdNeighbour4x4BlockB(blockIndex);
+    neighbourAindex = neighbourA->index;
+    neighbourBindex = neighbourB->index;
+    if (neighbourA->mb == MB_CURR && neighbourB->mb == MB_CURR)
+    {
+        n = (pTotalCoeff[neighbourAindex] +
+             pTotalCoeff[neighbourBindex] + 1)>>1;
+    }
+    else if (neighbourA->mb == MB_CURR)
+    {
+        n = pTotalCoeff[neighbourAindex];
+        if (h264bsdIsNeighbourAvailable(pMb, pMb->mbB))
+        {
+            n = (n + pMb->mbB->totalCoeff[neighbourBindex] + 1) >> 1;
+        }
+    }
+    else if (neighbourB->mb == MB_CURR)
+    {
+        n = pTotalCoeff[neighbourBindex];
+        if (h264bsdIsNeighbourAvailable(pMb, pMb->mbA))
+        {
+            n = (n + pMb->mbA->totalCoeff[neighbourAindex] + 1) >> 1;
+        }
+    }
+    else
+    {
+        n = tmp = 0;
+        if (h264bsdIsNeighbourAvailable(pMb, pMb->mbA))
+        {
+            n = pMb->mbA->totalCoeff[neighbourAindex];
+            tmp = 1;
+        }
+        if (h264bsdIsNeighbourAvailable(pMb, pMb->mbB))
+        {
+            if (tmp)
+                n = (n + pMb->mbB->totalCoeff[neighbourBindex] + 1) >> 1;
+            else
+                n = pMb->mbB->totalCoeff[neighbourBindex];
+        }
+    }
+    return((u32)n);
+/*lint +e702 */
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: CbpIntra16x16
+
+        Functional description:
+          Returns the coded block pattern for intra 16x16 macroblock.
+
+------------------------------------------------------------------------------*/
+
+u32 CbpIntra16x16(mbType_e mbType)
+{
+
+/* Variables */
+
+    u32 cbp;
+    u32 tmp;
+
+/* Code */
+
+    ASSERT(mbType >= I_16x16_0_0_0 && mbType <= I_16x16_3_2_1);
+
+    if (mbType >= I_16x16_0_0_1)
+        cbp = 15;
+    else
+        cbp = 0;
+
+    /* tmp is 0 for I_16x16_0_0_0 mb type */
+    /* ignore lint warning on arithmetic on enum's */
+    tmp = /*lint -e(656)*/(mbType - I_16x16_0_0_0) >> 2;
+    if (tmp > 2)
+        tmp -= 3;
+
+    cbp += tmp << 4;
+
+    return(cbp);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdPredModeIntra16x16
+
+        Functional description:
+          Returns the prediction mode for intra 16x16 macroblock.
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdPredModeIntra16x16(mbType_e mbType)
+{
+
+/* Variables */
+
+    u32 tmp;
+
+/* Code */
+
+    ASSERT(mbType >= I_16x16_0_0_0 && mbType <= I_16x16_3_2_1);
+
+    /* tmp is 0 for I_16x16_0_0_0 mb type */
+    /* ignore lint warning on arithmetic on enum's */
+    tmp = /*lint -e(656)*/(mbType - I_16x16_0_0_0);
+
+    return(tmp & 0x3);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdDecodeMacroblock
+
+        Functional description:
+          Decode one macroblock and write into output image.
+
+        Inputs:
+          pMb           pointer to macroblock specific information
+          mbLayer       pointer to current macroblock data from stream
+          currImage     pointer to output image
+          dpb           pointer to decoded picture buffer
+          qpY           pointer to slice QP
+          mbNum         current macroblock number
+          constrainedIntraPred  flag specifying if neighbouring inter
+                                macroblocks are used in intra prediction
+
+        Outputs:
+          pMb           structure is updated with current macroblock
+          currImage     decoded macroblock is written into output image
+
+        Returns:
+          HANTRO_OK     success
+          HANTRO_NOK    error in macroblock decoding
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeMacroblock(mbStorage_t *pMb, macroblockLayer_t *pMbLayer,
+    image_t *currImage, dpbStorage_t *dpb, i32 *qpY, u32 mbNum,
+    u32 constrainedIntraPredFlag, u8* data)
+{
+
+/* Variables */
+
+    u32 i, tmp;
+    mbType_e mbType;
+#ifdef H264DEC_OMXDL
+    const u8 *pSrc;
+#endif
+/* Code */
+
+    ASSERT(pMb);
+    ASSERT(pMbLayer);
+    ASSERT(currImage);
+    ASSERT(qpY && *qpY < 52);
+    ASSERT(mbNum < currImage->width*currImage->height);
+
+    mbType = pMbLayer->mbType;
+    pMb->mbType = mbType;
+
+    pMb->decoded++;
+
+    h264bsdSetCurrImageMbPointers(currImage, mbNum);
+
+    if (mbType == I_PCM)
+    {
+        u8 *pData = (u8*)data;
+#ifdef H264DEC_OMXDL
+        u8 *tot = pMb->totalCoeff;
+#else
+        i16 *tot = pMb->totalCoeff;
+#endif
+        i32 *lev = pMbLayer->residual.level[0];
+
+        pMb->qpY = 0;
+
+        /* if decoded flag > 1 -> mb has already been successfully decoded and
+         * written to output -> do not write again */
+        if (pMb->decoded > 1)
+        {
+            for (i = 24; i--;)
+                *tot++ = 16;
+            return HANTRO_OK;
+        }
+
+        for (i = 24; i--;)
+        {
+            *tot++ = 16;
+            for (tmp = 16; tmp--;)
+                *pData++ = (u8)(*lev++);
+        }
+        h264bsdWriteMacroblock(currImage, (u8*)data);
+
+        return(HANTRO_OK);
+    }
+    else
+    {
+#ifdef H264DEC_OMXDL
+        if (h264bsdMbPartPredMode(mbType) == PRED_MODE_INTER)
+        {
+            tmp = h264bsdInterPrediction(pMb, pMbLayer, dpb, mbNum,
+                currImage, (u8*)data);
+            if (tmp != HANTRO_OK) return (tmp);
+        }
+#endif
+        if (mbType != P_Skip)
+        {
+            H264SwDecMemcpy(pMb->totalCoeff,
+                            pMbLayer->residual.totalCoeff,
+                            27*sizeof(*pMb->totalCoeff));
+
+            /* update qpY */
+            if (pMbLayer->mbQpDelta)
+            {
+                *qpY = *qpY + pMbLayer->mbQpDelta;
+                if (*qpY < 0) *qpY += 52;
+                else if (*qpY >= 52) *qpY -= 52;
+            }
+            pMb->qpY = (u32)*qpY;
+
+#ifdef H264DEC_OMXDL
+            pSrc = pMbLayer->residual.posCoefBuf;
+
+            if (h264bsdMbPartPredMode(mbType) == PRED_MODE_INTER)
+            {
+                OMXResult res;
+                u8 *p;
+                u8 *totalCoeff = pMb->totalCoeff;
+
+                for (i = 0; i < 16; i++, totalCoeff++)
+                {
+                    p = data + lumaIndex[i];
+                    if (*totalCoeff)
+                    {
+                        res = omxVCM4P10_DequantTransformResidualFromPairAndAdd(
+                                &pSrc, p, 0, p, 16, 16, *qpY, *totalCoeff);
+                        if (res != OMX_Sts_NoErr)
+                            return (HANTRO_NOK);
+                    }
+                }
+
+            }
+            else if (h264bsdMbPartPredMode(mbType) == PRED_MODE_INTRA4x4)
+            {
+                tmp = ProcessIntra4x4Residual(pMb,
+                                              data,
+                                              constrainedIntraPredFlag,
+                                              pMbLayer,
+                                              &pSrc,
+                                              currImage);
+                if (tmp != HANTRO_OK)
+                    return (tmp);
+            }
+            else if (h264bsdMbPartPredMode(mbType) == PRED_MODE_INTRA16x16)
+            {
+                tmp = ProcessIntra16x16Residual(pMb,
+                                        data,
+                                        constrainedIntraPredFlag,
+                                        pMbLayer->mbPred.intraChromaPredMode,
+                                        &pSrc,
+                                        currImage);
+                if (tmp != HANTRO_OK)
+                    return (tmp);
+            }
+
+            tmp = ProcessChromaResidual(pMb, data, &pSrc);
+
+#else
+            tmp = ProcessResidual(pMb, pMbLayer->residual.level,
+                pMbLayer->residual.coeffMap);
+#endif
+            if (tmp != HANTRO_OK)
+                return (tmp);
+        }
+        else
+        {
+            H264SwDecMemset(pMb->totalCoeff, 0, 27*sizeof(*pMb->totalCoeff));
+            pMb->qpY = (u32)*qpY;
+        }
+#ifdef H264DEC_OMXDL
+        /* if decoded flag > 1 -> mb has already been successfully decoded and
+         * written to output -> do not write again */
+        if (pMb->decoded > 1)
+            return HANTRO_OK;
+
+        h264bsdWriteMacroblock(currImage, data);
+#else
+        if (h264bsdMbPartPredMode(mbType) != PRED_MODE_INTER)
+        {
+            tmp = h264bsdIntraPrediction(pMb, pMbLayer, currImage, mbNum,
+                constrainedIntraPredFlag, (u8*)data);
+            if (tmp != HANTRO_OK) return (tmp);
+        }
+        else
+        {
+            tmp = h264bsdInterPrediction(pMb, pMbLayer, dpb, mbNum,
+                currImage, (u8*)data);
+            if (tmp != HANTRO_OK) return (tmp);
+        }
+#endif
+    }
+
+    return HANTRO_OK;
+}
+
+
+#ifdef H264DEC_OMXDL
+
+/*------------------------------------------------------------------------------
+
+    Function: ProcessChromaResidual
+
+        Functional description:
+          Process the residual data of chroma with
+          inverse quantization and inverse transform.
+
+------------------------------------------------------------------------------*/
+u32 ProcessChromaResidual(mbStorage_t *pMb, u8 *data, const u8 **pSrc )
+{
+    u32 i;
+    u32 chromaQp;
+    i16 *pDc;
+    i16 dc[4 + 4] = {0,0,0,0,0,0,0,0};
+    u8 *totalCoeff;
+    OMXResult result;
+    u8 *p;
+
+    /* chroma DC processing. First chroma dc block is block with index 25 */
+    chromaQp =
+        h264bsdQpC[CLIP3(0, 51, (i32)pMb->qpY + pMb->chromaQpIndexOffset)];
+
+    if (pMb->totalCoeff[25])
+    {
+        pDc = dc;
+        result = omxVCM4P10_TransformDequantChromaDCFromPair(
+                pSrc,
+                pDc,
+                (i32)chromaQp);
+        if (result != OMX_Sts_NoErr)
+            return (HANTRO_NOK);
+    }
+    if (pMb->totalCoeff[26])
+    {
+        pDc = dc+4;
+        result = omxVCM4P10_TransformDequantChromaDCFromPair(
+                pSrc,
+                pDc,
+                (i32)chromaQp);
+        if (result != OMX_Sts_NoErr)
+            return (HANTRO_NOK);
+    }
+
+    pDc = dc;
+    totalCoeff = pMb->totalCoeff + 16;
+    for (i = 0; i < 8; i++, pDc++, totalCoeff++)
+    {
+        /* chroma prediction */
+        if (*totalCoeff || *pDc)
+        {
+            p = data + chromaIndex[i];
+            result = omxVCM4P10_DequantTransformResidualFromPairAndAdd(
+                    pSrc,
+                    p,
+                    pDc,
+                    p,
+                    8,
+                    8,
+                    (i32)chromaQp,
+                    *totalCoeff);
+            if (result != OMX_Sts_NoErr)
+                return (HANTRO_NOK);
+        }
+    }
+
+    return(HANTRO_OK);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: ProcessIntra16x16Residual
+
+        Functional description:
+          Process the residual data of luma with
+          inverse quantization and inverse transform.
+
+------------------------------------------------------------------------------*/
+u32 ProcessIntra16x16Residual(mbStorage_t *pMb,
+                              u8 *data,
+                              u32 constrainedIntraPred,
+                              u32 intraChromaPredMode,
+                              const u8** pSrc,
+                              image_t *image)
+{
+    u32 i;
+    i16 *pDc;
+    i16 dc[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+    u8 *totalCoeff;
+    OMXResult result;
+    u8 *p;
+
+    totalCoeff = pMb->totalCoeff;
+
+    if (totalCoeff[24])
+    {
+        pDc = dc;
+        result = omxVCM4P10_TransformDequantLumaDCFromPair(
+                    pSrc,
+                    pDc,
+                    (i32)pMb->qpY);
+        if (result != OMX_Sts_NoErr)
+            return (HANTRO_NOK);
+    }
+    /* Intra 16x16 pred */
+    if (h264bsdIntra16x16Prediction(pMb, data, image->luma,
+                            image->width*16, constrainedIntraPred) != HANTRO_OK)
+        return(HANTRO_NOK);
+    for (i = 0; i < 16; i++, totalCoeff++)
+    {
+        p = data + lumaIndex[i];
+        pDc = &dc[dcCoeffIndex[i]];
+        if (*totalCoeff || *pDc)
+        {
+            result = omxVCM4P10_DequantTransformResidualFromPairAndAdd(
+                    pSrc,
+                    p,
+                    pDc,
+                    p,
+                    16,
+                    16,
+                    (i32)pMb->qpY,
+                    *totalCoeff);
+            if (result != OMX_Sts_NoErr)
+                return (HANTRO_NOK);
+        }
+    }
+
+    if (h264bsdIntraChromaPrediction(pMb, data + 256,
+                image,
+                intraChromaPredMode,
+                constrainedIntraPred) != HANTRO_OK)
+        return(HANTRO_NOK);
+
+    return HANTRO_OK;
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: ProcessIntra4x4Residual
+
+        Functional description:
+          Process the residual data of luma with
+          inverse quantization and inverse transform.
+
+------------------------------------------------------------------------------*/
+u32 ProcessIntra4x4Residual(mbStorage_t *pMb,
+                            u8 *data,
+                            u32 constrainedIntraPred,
+                            macroblockLayer_t *mbLayer,
+                            const u8 **pSrc,
+                            image_t *image)
+{
+    u32 i;
+    u8 *totalCoeff;
+    OMXResult result;
+    u8 *p;
+
+    totalCoeff = pMb->totalCoeff;
+
+    for (i = 0; i < 16; i++, totalCoeff++)
+    {
+        p = data + lumaIndex[i];
+        if (h264bsdIntra4x4Prediction(pMb, p, mbLayer, image->luma,
+                    image->width*16, constrainedIntraPred, i) != HANTRO_OK)
+            return(HANTRO_NOK);
+
+        if (*totalCoeff)
+        {
+            result = omxVCM4P10_DequantTransformResidualFromPairAndAdd(
+                    pSrc,
+                    p,
+                    NULL,
+                    p,
+                    16,
+                    16,
+                    (i32)pMb->qpY,
+                    *totalCoeff);
+            if (result != OMX_Sts_NoErr)
+                return (HANTRO_NOK);
+        }
+    }
+
+    if (h264bsdIntraChromaPrediction(pMb, data + 256,
+                image,
+                mbLayer->mbPred.intraChromaPredMode,
+                constrainedIntraPred) != HANTRO_OK)
+        return(HANTRO_NOK);
+
+    return HANTRO_OK;
+}
+
+#else /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+
+    Function: ProcessResidual
+
+        Functional description:
+          Process the residual data of one macroblock with
+          inverse quantization and inverse transform.
+
+------------------------------------------------------------------------------*/
+
+u32 ProcessResidual(mbStorage_t *pMb, i32 residualLevel[][16], u32 *coeffMap)
+{
+
+/* Variables */
+
+    u32 i;
+    u32 chromaQp;
+    i32 (*blockData)[16];
+    i32 (*blockDc)[16];
+    i16 *totalCoeff;
+    i32 *chromaDc;
+    const u32 *dcCoeffIdx;
+
+/* Code */
+
+    ASSERT(pMb);
+    ASSERT(residualLevel);
+
+    /* set pointers to DC coefficient blocks */
+    blockDc = residualLevel + 24;
+
+    blockData = residualLevel;
+    totalCoeff = pMb->totalCoeff;
+    if (h264bsdMbPartPredMode(pMb->mbType) == PRED_MODE_INTRA16x16)
+    {
+        if (totalCoeff[24])
+        {
+            h264bsdProcessLumaDc(*blockDc, pMb->qpY);
+        }
+        dcCoeffIdx = dcCoeffIndex;
+
+        for (i = 16; i--; blockData++, totalCoeff++, coeffMap++)
+        {
+            /* set dc coefficient of luma block */
+            (*blockData)[0] = (*blockDc)[*dcCoeffIdx++];
+            if ((*blockData)[0] || *totalCoeff)
+            {
+                if (h264bsdProcessBlock(*blockData, pMb->qpY, 1, *coeffMap) !=
+                    HANTRO_OK)
+                    return(HANTRO_NOK);
+            }
+            else
+                MARK_RESIDUAL_EMPTY(*blockData);
+        }
+    }
+    else
+    {
+        for (i = 16; i--; blockData++, totalCoeff++, coeffMap++)
+        {
+            if (*totalCoeff)
+            {
+                if (h264bsdProcessBlock(*blockData, pMb->qpY, 0, *coeffMap) !=
+                    HANTRO_OK)
+                    return(HANTRO_NOK);
+            }
+            else
+                MARK_RESIDUAL_EMPTY(*blockData);
+        }
+    }
+
+    /* chroma DC processing. First chroma dc block is block with index 25 */
+    chromaQp =
+        h264bsdQpC[CLIP3(0, 51, (i32)pMb->qpY + pMb->chromaQpIndexOffset)];
+    if (pMb->totalCoeff[25] || pMb->totalCoeff[26])
+        h264bsdProcessChromaDc(residualLevel[25], chromaQp);
+    chromaDc = residualLevel[25];
+    for (i = 8; i--; blockData++, totalCoeff++, coeffMap++)
+    {
+        /* set dc coefficient of chroma block */
+        (*blockData)[0] = *chromaDc++;
+        if ((*blockData)[0] || *totalCoeff)
+        {
+            if (h264bsdProcessBlock(*blockData, chromaQp, 1,*coeffMap) !=
+                HANTRO_OK)
+                return(HANTRO_NOK);
+        }
+        else
+            MARK_RESIDUAL_EMPTY(*blockData);
+    }
+
+    return(HANTRO_OK);
+}
+#endif /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdSubMbPartMode
+
+        Functional description:
+          Returns the macroblock's sub-partition mode.
+
+------------------------------------------------------------------------------*/
+
+subMbPartMode_e h264bsdSubMbPartMode(subMbType_e subMbType)
+{
+
+/* Variables */
+
+
+/* Code */
+
+    ASSERT(subMbType < 4);
+
+    return((subMbPartMode_e)subMbType);
+
+}
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.h
new file mode 100755
index 0000000..32bc340
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.h
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_MACROBLOCK_LAYER_H
+#define H264SWDEC_MACROBLOCK_LAYER_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_image.h"
+#include "h264bsd_dpb.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+/* Macro to determine if a mb is an intra mb */
+#define IS_INTRA_MB(a) ((a).mbType > 5)
+
+/* Macro to determine if a mb is an I_PCM mb */
+#define IS_I_PCM_MB(a) ((a).mbType == 31)
+
+typedef enum {
+    P_Skip          = 0,
+    P_L0_16x16      = 1,
+    P_L0_L0_16x8    = 2,
+    P_L0_L0_8x16    = 3,
+    P_8x8           = 4,
+    P_8x8ref0       = 5,
+    I_4x4           = 6,
+    I_16x16_0_0_0   = 7,
+    I_16x16_1_0_0   = 8,
+    I_16x16_2_0_0   = 9,
+    I_16x16_3_0_0   = 10,
+    I_16x16_0_1_0   = 11,
+    I_16x16_1_1_0   = 12,
+    I_16x16_2_1_0   = 13,
+    I_16x16_3_1_0   = 14,
+    I_16x16_0_2_0   = 15,
+    I_16x16_1_2_0   = 16,
+    I_16x16_2_2_0   = 17,
+    I_16x16_3_2_0   = 18,
+    I_16x16_0_0_1   = 19,
+    I_16x16_1_0_1   = 20,
+    I_16x16_2_0_1   = 21,
+    I_16x16_3_0_1   = 22,
+    I_16x16_0_1_1   = 23,
+    I_16x16_1_1_1   = 24,
+    I_16x16_2_1_1   = 25,
+    I_16x16_3_1_1   = 26,
+    I_16x16_0_2_1   = 27,
+    I_16x16_1_2_1   = 28,
+    I_16x16_2_2_1   = 29,
+    I_16x16_3_2_1   = 30,
+    I_PCM           = 31
+} mbType_e;
+
+typedef enum {
+    P_L0_8x8 = 0,
+    P_L0_8x4 = 1,
+    P_L0_4x8 = 2,
+    P_L0_4x4 = 3
+} subMbType_e;
+
+typedef enum {
+    MB_P_16x16 = 0,
+    MB_P_16x8,
+    MB_P_8x16,
+    MB_P_8x8
+} mbPartMode_e;
+
+typedef enum {
+    MB_SP_8x8 = 0,
+    MB_SP_8x4,
+    MB_SP_4x8,
+    MB_SP_4x4
+} subMbPartMode_e;
+
+typedef enum {
+    PRED_MODE_INTRA4x4 = 0,
+    PRED_MODE_INTRA16x16  ,
+    PRED_MODE_INTER
+} mbPartPredMode_e;
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+typedef struct
+{
+    /* MvPrediction16x16 assumes that MVs are 16bits */
+    i16 hor;
+    i16 ver;
+} mv_t;
+
+typedef struct
+{
+    u32 prevIntra4x4PredModeFlag[16];
+    u32 remIntra4x4PredMode[16];
+    u32 intraChromaPredMode;
+    u32 refIdxL0[4];
+    mv_t mvdL0[4];
+} mbPred_t;
+
+typedef struct
+{
+    subMbType_e subMbType[4];
+    u32 refIdxL0[4];
+    mv_t mvdL0[4][4];
+} subMbPred_t;
+
+typedef struct
+{
+#ifdef H264DEC_OMXDL
+    u8 posCoefBuf[27*16*3];
+    u8 totalCoeff[27];
+#else
+    i16 totalCoeff[27];
+#endif
+    i32 level[26][16];
+    u32 coeffMap[24];
+} residual_t;
+
+typedef struct
+{
+    mbType_e mbType;
+    u32 codedBlockPattern;
+    i32 mbQpDelta;
+    mbPred_t mbPred;
+    subMbPred_t subMbPred;
+    residual_t residual;
+} macroblockLayer_t;
+
+typedef struct mbStorage
+{
+    mbType_e mbType;
+    u32 sliceId;
+    u32 disableDeblockingFilterIdc;
+    i32 filterOffsetA;
+    i32 filterOffsetB;
+    u32 qpY;
+    i32 chromaQpIndexOffset;
+#ifdef H264DEC_OMXDL
+    u8 totalCoeff[27];
+#else
+    i16 totalCoeff[27];
+#endif
+    u8 intra4x4PredMode[16];
+    u32 refPic[4];
+    u8* refAddr[4];
+    mv_t mv[16];
+    u32 decoded;
+    struct mbStorage *mbA;
+    struct mbStorage *mbB;
+    struct mbStorage *mbC;
+    struct mbStorage *mbD;
+} mbStorage_t;
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeMacroblockLayer(strmData_t *pStrmData,
+    macroblockLayer_t *pMbLayer, mbStorage_t *pMb, u32 sliceType,
+    u32 numRefIdxActive);
+
+u32 h264bsdNumMbPart(mbType_e mbType);
+u32 h264bsdNumSubMbPart(subMbType_e subMbType);
+
+subMbPartMode_e h264bsdSubMbPartMode(subMbType_e subMbType);
+
+u32 h264bsdDecodeMacroblock(mbStorage_t *pMb, macroblockLayer_t *pMbLayer,
+    image_t *currImage, dpbStorage_t *dpb, i32 *qpY, u32 mbNum,
+    u32 constrainedIntraPredFlag, u8* data);
+
+u32 h264bsdPredModeIntra16x16(mbType_e mbType);
+
+mbPartPredMode_e h264bsdMbPartPredMode(mbType_e mbType);
+#ifdef H264DEC_NEON
+u32 h264bsdClearMbLayer(macroblockLayer_t *pMbLayer, u32 size);
+#endif
+
+#endif /* #ifdef H264SWDEC_MACROBLOCK_LAYER_H */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.c
new file mode 100755
index 0000000..e44c43a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdDecodeNalUnit
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_nal_unit.h"
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+    Function name: h264bsdDecodeNalUnit
+
+        Functional description:
+            Decode NAL unit header information
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+
+        Outputs:
+            pNalUnit        NAL unit header information is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      invalid NAL unit header information
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeNalUnit(strmData_t *pStrmData, nalUnit_t *pNalUnit)
+{
+
+/* Variables */
+
+    u32 tmp;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pNalUnit);
+    ASSERT(pStrmData->bitPosInWord == 0);
+
+    /* forbidden_zero_bit (not checked to be zero, errors ignored) */
+    tmp = h264bsdGetBits(pStrmData, 1);
+    /* Assuming that NAL unit starts from byte boundary ­> don't have to check
+     * following 7 bits for END_OF_STREAM */
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+
+    tmp = h264bsdGetBits(pStrmData, 2);
+    pNalUnit->nalRefIdc = tmp;
+
+    tmp = h264bsdGetBits(pStrmData, 5);
+    pNalUnit->nalUnitType = (nalUnitType_e)tmp;
+
+    /* data partitioning NAL units not supported */
+    if ( (tmp == 2) || (tmp == 3) || (tmp == 4) )
+    {
+        return(HANTRO_NOK);
+    }
+
+    /* nal_ref_idc shall not be zero for these nal_unit_types */
+    if ( ( (tmp == NAL_SEQ_PARAM_SET) || (tmp == NAL_PIC_PARAM_SET) ||
+           (tmp == NAL_CODED_SLICE_IDR) ) && (pNalUnit->nalRefIdc == 0) )
+    {
+        return(HANTRO_NOK);
+    }
+    /* nal_ref_idc shall be zero for these nal_unit_types */
+    else if ( ( (tmp == NAL_SEI) || (tmp == NAL_ACCESS_UNIT_DELIMITER) ||
+                (tmp == NAL_END_OF_SEQUENCE) || (tmp == NAL_END_OF_STREAM) ||
+                (tmp == NAL_FILLER_DATA) ) && (pNalUnit->nalRefIdc != 0) )
+    {
+        return(HANTRO_NOK);
+    }
+
+    return(HANTRO_OK);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.h
new file mode 100755
index 0000000..38957bf
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_NAL_UNIT_H
+#define H264SWDEC_NAL_UNIT_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+/* macro to determine if NAL unit pointed by pNalUnit contains an IDR slice */
+#define IS_IDR_NAL_UNIT(pNalUnit) \
+    ((pNalUnit)->nalUnitType == NAL_CODED_SLICE_IDR)
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+typedef enum {
+    NAL_CODED_SLICE = 1,
+    NAL_CODED_SLICE_IDR = 5,
+    NAL_SEI = 6,
+    NAL_SEQ_PARAM_SET = 7,
+    NAL_PIC_PARAM_SET = 8,
+    NAL_ACCESS_UNIT_DELIMITER = 9,
+    NAL_END_OF_SEQUENCE = 10,
+    NAL_END_OF_STREAM = 11,
+    NAL_FILLER_DATA = 12,
+    NAL_MAX_TYPE_VALUE = 31
+} nalUnitType_e;
+
+typedef struct
+{
+    nalUnitType_e nalUnitType;
+    u32 nalRefIdc;
+} nalUnit_t;
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeNalUnit(strmData_t *pStrmData, nalUnit_t *pNalUnit);
+
+#endif /* #ifdef H264SWDEC_NAL_UNIT_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.c
new file mode 100755
index 0000000..ce5eeff
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.c
@@ -0,0 +1,382 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdInitMbNeighbours
+          h264bsdGetNeighbourMb
+          h264bsdNeighbour4x4BlockA
+          h264bsdNeighbour4x4BlockB
+          h264bsdNeighbour4x4BlockC
+          h264bsdNeighbour4x4BlockD
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_neighbour.h"
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/* Following four tables indicate neighbours of each block of a macroblock.
+ * First 16 values are for luma blocks, next 4 values for Cb and last 4
+ * values for Cr. Elements of the table indicate to which macroblock the
+ * neighbour block belongs and the index of the neighbour block in question.
+ * Indexing of the blocks goes as follows
+ *
+ *          Y             Cb       Cr
+ *      0  1  4  5      16 17    20 21
+ *      2  3  6  7      18 19    22 23
+ *      8  9 12 13
+ *     10 11 14 15
+ */
+
+/* left neighbour for each block */
+static const neighbour_t N_A_4x4B[24] = {
+    {MB_A,5},    {MB_CURR,0}, {MB_A,7},    {MB_CURR,2},
+    {MB_CURR,1}, {MB_CURR,4}, {MB_CURR,3}, {MB_CURR,6},
+    {MB_A,13},   {MB_CURR,8}, {MB_A,15},   {MB_CURR,10},
+    {MB_CURR,9}, {MB_CURR,12},{MB_CURR,11},{MB_CURR,14},
+    {MB_A,17},   {MB_CURR,16},{MB_A,19},   {MB_CURR,18},
+    {MB_A,21},   {MB_CURR,20},{MB_A,23},   {MB_CURR,22} };
+
+/* above neighbour for each block */
+static const neighbour_t N_B_4x4B[24] = {
+    {MB_B,10},   {MB_B,11},   {MB_CURR,0}, {MB_CURR,1},
+    {MB_B,14},   {MB_B,15},   {MB_CURR,4}, {MB_CURR,5},
+    {MB_CURR,2}, {MB_CURR,3}, {MB_CURR,8}, {MB_CURR,9},
+    {MB_CURR,6}, {MB_CURR,7}, {MB_CURR,12},{MB_CURR,13},
+    {MB_B,18},   {MB_B,19},   {MB_CURR,16},{MB_CURR,17},
+    {MB_B,22},   {MB_B,23},   {MB_CURR,20},{MB_CURR,21} };
+
+/* above-right neighbour for each block */
+static const neighbour_t N_C_4x4B[24] = {
+    {MB_B,11},   {MB_B,14},   {MB_CURR,1}, {MB_NA,4},
+    {MB_B,15},   {MB_C,10},   {MB_CURR,5}, {MB_NA,0},
+    {MB_CURR,3}, {MB_CURR,6}, {MB_CURR,9}, {MB_NA,12},
+    {MB_CURR,7}, {MB_NA,2},   {MB_CURR,13},{MB_NA,8},
+    {MB_B,19},   {MB_C,18},   {MB_CURR,17},{MB_NA,16},
+    {MB_B,23},   {MB_C,22},   {MB_CURR,21},{MB_NA,20} };
+
+/* above-left neighbour for each block */
+static const neighbour_t N_D_4x4B[24] = {
+    {MB_D,15},   {MB_B,10},   {MB_A,5},    {MB_CURR,0},
+    {MB_B,11},   {MB_B,14},   {MB_CURR,1}, {MB_CURR,4},
+    {MB_A,7},    {MB_CURR,2}, {MB_A,13},   {MB_CURR,8},
+    {MB_CURR,3}, {MB_CURR,6}, {MB_CURR,9}, {MB_CURR,12},
+    {MB_D,19},   {MB_B,18},   {MB_A,17},   {MB_CURR,16},
+    {MB_D,23},   {MB_B,22},   {MB_A,21},   {MB_CURR,20} };
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdInitMbNeighbours
+
+        Functional description:
+            Initialize macroblock neighbours. Function sets neighbour
+            macroblock pointers in macroblock structures to point to
+            macroblocks on the left, above, above-right and above-left.
+            Pointers are set NULL if the neighbour does not fit into the
+            picture.
+
+        Inputs:
+            picWidth        width of the picture in macroblocks
+            picSizeInMbs    no need to clarify
+
+        Outputs:
+            pMbStorage      neighbour pointers of each mbStorage structure
+                            stored here
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInitMbNeighbours(mbStorage_t *pMbStorage, u32 picWidth,
+    u32 picSizeInMbs)
+{
+
+/* Variables */
+
+    u32 i, row, col;
+
+/* Code */
+
+    ASSERT(pMbStorage);
+    ASSERT(picWidth);
+    ASSERT(picWidth <= picSizeInMbs);
+    ASSERT(((picSizeInMbs / picWidth) * picWidth) == picSizeInMbs);
+
+    row = col = 0;
+
+    for (i = 0; i < picSizeInMbs; i++)
+    {
+
+        if (col)
+            pMbStorage[i].mbA = pMbStorage + i - 1;
+        else
+            pMbStorage[i].mbA = NULL;
+
+        if (row)
+            pMbStorage[i].mbB = pMbStorage + i - picWidth;
+        else
+            pMbStorage[i].mbB = NULL;
+
+        if (row && (col < picWidth - 1))
+            pMbStorage[i].mbC = pMbStorage + i - (picWidth - 1);
+        else
+            pMbStorage[i].mbC = NULL;
+
+        if (row && col)
+            pMbStorage[i].mbD = pMbStorage + i - (picWidth + 1);
+        else
+            pMbStorage[i].mbD = NULL;
+
+        col++;
+        if (col == picWidth)
+        {
+            col = 0;
+            row++;
+        }
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdGetNeighbourMb
+
+        Functional description:
+            Get pointer to neighbour macroblock.
+
+        Inputs:
+            pMb         pointer to macroblock structure of the macroblock
+                        whose neighbour is wanted
+            neighbour   indicates which neighbour is wanted
+
+        Outputs:
+            none
+
+        Returns:
+            pointer to neighbour macroblock
+            NULL if not available
+
+------------------------------------------------------------------------------*/
+
+mbStorage_t* h264bsdGetNeighbourMb(mbStorage_t *pMb, neighbourMb_e neighbour)
+{
+
+/* Variables */
+
+
+/* Code */
+
+    ASSERT((neighbour <= MB_CURR) || (neighbour == MB_NA));
+
+    if (neighbour == MB_A)
+        return(pMb->mbA);
+    else if (neighbour == MB_B)
+        return(pMb->mbB);
+    else if (neighbour == MB_C)
+        return(pMb->mbC);
+    else if (neighbour == MB_D)
+        return(pMb->mbD);
+    else if (neighbour == MB_CURR)
+        return(pMb);
+    else
+        return(NULL);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdNeighbour4x4BlockA
+
+        Functional description:
+            Get left neighbour of the block. Function returns pointer to
+            the table defined in the beginning of the file.
+
+        Inputs:
+            blockIndex  indicates the block whose neighbours are wanted
+
+        Outputs:
+
+        Returns:
+            pointer to neighbour structure
+
+------------------------------------------------------------------------------*/
+
+const neighbour_t* h264bsdNeighbour4x4BlockA(u32 blockIndex)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(blockIndex < 24);
+
+    return(N_A_4x4B+blockIndex);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdNeighbour4x4BlockB
+
+        Functional description:
+            Get above neighbour of the block. Function returns pointer to
+            the table defined in the beginning of the file.
+
+        Inputs:
+            blockIndex  indicates the block whose neighbours are wanted
+
+        Outputs:
+
+        Returns:
+            pointer to neighbour structure
+
+------------------------------------------------------------------------------*/
+
+const neighbour_t* h264bsdNeighbour4x4BlockB(u32 blockIndex)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(blockIndex < 24);
+
+    return(N_B_4x4B+blockIndex);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdNeighbour4x4BlockC
+
+        Functional description:
+            Get above-right  neighbour of the block. Function returns pointer
+            to the table defined in the beginning of the file.
+
+        Inputs:
+            blockIndex  indicates the block whose neighbours are wanted
+
+        Outputs:
+
+        Returns:
+            pointer to neighbour structure
+
+------------------------------------------------------------------------------*/
+
+const neighbour_t* h264bsdNeighbour4x4BlockC(u32 blockIndex)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(blockIndex < 24);
+
+    return(N_C_4x4B+blockIndex);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdNeighbour4x4BlockD
+
+        Functional description:
+            Get above-left neighbour of the block. Function returns pointer to
+            the table defined in the beginning of the file.
+
+        Inputs:
+            blockIndex  indicates the block whose neighbours are wanted
+
+        Outputs:
+
+        Returns:
+            pointer to neighbour structure
+
+------------------------------------------------------------------------------*/
+
+const neighbour_t* h264bsdNeighbour4x4BlockD(u32 blockIndex)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(blockIndex < 24);
+
+    return(N_D_4x4B+blockIndex);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdIsNeighbourAvailable
+
+        Functional description:
+            Check if neighbour macroblock is available. Neighbour macroblock
+            is considered available if it is within the picture and belongs
+            to the same slice as the current macroblock.
+
+        Inputs:
+            pMb         pointer to the current macroblock
+            pNeighbour  pointer to the neighbour macroblock
+
+        Outputs:
+            none
+
+        Returns:
+            TRUE    neighbour is available
+            FALSE   neighbour is not available
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdIsNeighbourAvailable(mbStorage_t *pMb, mbStorage_t *pNeighbour)
+{
+
+/* Variables */
+
+/* Code */
+
+    if ( (pNeighbour == NULL) || (pMb->sliceId != pNeighbour->sliceId) )
+        return(HANTRO_FALSE);
+    else
+        return(HANTRO_TRUE);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.h
new file mode 100755
index 0000000..fce0ad1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_NEIGHBOUR_H
+#define H264SWDEC_NEIGHBOUR_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_macroblock_layer.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+typedef enum {
+    MB_A = 0,
+    MB_B,
+    MB_C,
+    MB_D,
+    MB_CURR,
+    MB_NA = 0xFF
+} neighbourMb_e;
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+typedef struct
+{
+    neighbourMb_e   mb;
+    u8             index;
+} neighbour_t;
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+void h264bsdInitMbNeighbours(mbStorage_t *pMbStorage, u32 picWidth,
+    u32 picSizeInMbs);
+
+mbStorage_t* h264bsdGetNeighbourMb(mbStorage_t *pMb, neighbourMb_e neighbour);
+
+u32 h264bsdIsNeighbourAvailable(mbStorage_t *pMb, mbStorage_t *pNeighbour);
+
+const neighbour_t* h264bsdNeighbour4x4BlockA(u32 blockIndex);
+const neighbour_t* h264bsdNeighbour4x4BlockB(u32 blockIndex);
+const neighbour_t* h264bsdNeighbour4x4BlockC(u32 blockIndex);
+const neighbour_t* h264bsdNeighbour4x4BlockD(u32 blockIndex);
+
+#endif /* #ifdef H264SWDEC_NEIGHBOUR_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.c
new file mode 100755
index 0000000..fb23352
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdDecodePicOrderCnt
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_util.h"
+#include "h264bsd_pic_order_cnt.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdDecodePicOrderCnt
+
+        Functional description:
+            Compute picture order count for a picture. Function implements
+            computation of all POC types (0, 1 and 2), type is obtained from
+            sps. See standard for description of the POC types and how POC is
+            computed for each type.
+
+            Function returns the minimum of top field and bottom field pic
+            order counts.
+
+        Inputs:
+            poc         pointer to previous results
+            sps         pointer to sequence parameter set
+            slicHeader  pointer to current slice header, frame number and
+                        other params needed for POC computation
+            pNalUnit    pointer to current NAL unit structrue, function needs
+                        to know if this is an IDR picture and also if this is
+                        a reference picture
+
+        Outputs:
+            poc         results stored here for computation of next POC
+
+        Returns:
+            picture order count
+
+------------------------------------------------------------------------------*/
+
+i32 h264bsdDecodePicOrderCnt(pocStorage_t *poc, seqParamSet_t *sps,
+    sliceHeader_t *pSliceHeader, nalUnit_t *pNalUnit)
+{
+
+/* Variables */
+
+    u32 i;
+    i32 picOrderCnt;
+    u32 frameNumOffset, absFrameNum, picOrderCntCycleCnt;
+    u32 frameNumInPicOrderCntCycle;
+    i32 expectedDeltaPicOrderCntCycle;
+    u32 containsMmco5;
+
+/* Code */
+
+    ASSERT(poc);
+    ASSERT(sps);
+    ASSERT(pSliceHeader);
+    ASSERT(pNalUnit);
+    ASSERT(sps->picOrderCntType <= 2);
+
+#if 0
+    /* JanSa: I don't think this is necessary, don't see any reason to
+     * increment prevFrameNum one by one instead of one big increment.
+     * However, standard specifies that this should be done -> if someone
+     * figures out any case when the outcome would be different for step by
+     * step increment, this part of the code should be enabled */
+
+    /* if there was a gap in frame numbering and picOrderCntType is 1 or 2 ->
+     * "compute" pic order counts for non-existing frames. These are not
+     * actually computed, but process needs to be done to update the
+     * prevFrameNum and prevFrameNumOffset */
+    if ( sps->picOrderCntType > 0 &&
+         pSliceHeader->frameNum != poc->prevFrameNum &&
+         pSliceHeader->frameNum != ((poc->prevFrameNum + 1) % sps->maxFrameNum))
+    {
+
+        /* use variable i for unUsedShortTermFrameNum */
+        i = (poc->prevFrameNum + 1) % sps->maxFrameNum;
+
+        do
+        {
+            if (poc->prevFrameNum > i)
+                frameNumOffset = poc->prevFrameNumOffset + sps->maxFrameNum;
+            else
+                frameNumOffset = poc->prevFrameNumOffset;
+
+            poc->prevFrameNumOffset = frameNumOffset;
+            poc->prevFrameNum = i;
+
+            i = (i + 1) % sps->maxFrameNum;
+
+        } while (i != pSliceHeader->frameNum);
+    }
+#endif
+
+    /* check if current slice includes mmco equal to 5 */
+    containsMmco5 = HANTRO_FALSE;
+    if (pSliceHeader->decRefPicMarking.adaptiveRefPicMarkingModeFlag)
+    {
+        i = 0;
+        while (pSliceHeader->decRefPicMarking.operation[i].
+            memoryManagementControlOperation)
+        {
+            if (pSliceHeader->decRefPicMarking.operation[i].
+                memoryManagementControlOperation == 5)
+            {
+                containsMmco5 = HANTRO_TRUE;
+                break;
+            }
+            i++;
+        }
+    }
+    switch (sps->picOrderCntType)
+    {
+
+        case 0:
+            /* set prevPicOrderCnt values for IDR frame */
+            if (IS_IDR_NAL_UNIT(pNalUnit))
+            {
+                poc->prevPicOrderCntMsb = 0;
+                poc->prevPicOrderCntLsb = 0;
+            }
+
+            /* compute picOrderCntMsb (stored in picOrderCnt variable) */
+            if ( (pSliceHeader->picOrderCntLsb < poc->prevPicOrderCntLsb) &&
+                ((poc->prevPicOrderCntLsb - pSliceHeader->picOrderCntLsb) >=
+                 sps->maxPicOrderCntLsb/2) )
+            {
+                picOrderCnt = poc->prevPicOrderCntMsb +
+                    (i32)sps->maxPicOrderCntLsb;
+            }
+            else if ((pSliceHeader->picOrderCntLsb > poc->prevPicOrderCntLsb) &&
+                ((pSliceHeader->picOrderCntLsb - poc->prevPicOrderCntLsb) >
+                 sps->maxPicOrderCntLsb/2) )
+            {
+                picOrderCnt = poc->prevPicOrderCntMsb -
+                    (i32)sps->maxPicOrderCntLsb;
+            }
+            else
+                picOrderCnt = poc->prevPicOrderCntMsb;
+
+            /* standard specifies that prevPicOrderCntMsb is from previous
+             * rererence frame -> replace old value only if current frame is
+             * rererence frame */
+            if (pNalUnit->nalRefIdc)
+                poc->prevPicOrderCntMsb = picOrderCnt;
+
+            /* compute top field order cnt (stored in picOrderCnt) */
+            picOrderCnt += (i32)pSliceHeader->picOrderCntLsb;
+
+            /* if delta for bottom field is negative -> bottom will be the
+             * minimum pic order count */
+            if (pSliceHeader->deltaPicOrderCntBottom < 0)
+                picOrderCnt += pSliceHeader->deltaPicOrderCntBottom;
+
+            /* standard specifies that prevPicOrderCntLsb is from previous
+             * rererence frame -> replace old value only if current frame is
+             * rererence frame */
+            if (pNalUnit->nalRefIdc)
+            {
+                /* if current frame contains mmco5 -> modify values to be
+                 * stored */
+                if (containsMmco5)
+                {
+                    poc->prevPicOrderCntMsb = 0;
+                    /* prevPicOrderCntLsb should be the top field picOrderCnt
+                     * if previous frame included mmco5. Top field picOrderCnt
+                     * for frames containing mmco5 is obtained by subtracting
+                     * the picOrderCnt from original top field order count ->
+                     * value is zero if top field was the minimum, i.e. delta
+                     * for bottom was positive, otherwise value is
+                     * -deltaPicOrderCntBottom */
+                    if (pSliceHeader->deltaPicOrderCntBottom < 0)
+                        poc->prevPicOrderCntLsb =
+                            (u32)(-pSliceHeader->deltaPicOrderCntBottom);
+                    else
+                        poc->prevPicOrderCntLsb = 0;
+                    picOrderCnt = 0;
+                }
+                else
+                {
+                    poc->prevPicOrderCntLsb = pSliceHeader->picOrderCntLsb;
+                }
+            }
+
+            break;
+
+        case 1:
+
+            /* step 1 (in the description in the standard) */
+            if (IS_IDR_NAL_UNIT(pNalUnit))
+                frameNumOffset = 0;
+            else if (poc->prevFrameNum > pSliceHeader->frameNum)
+                frameNumOffset = poc->prevFrameNumOffset + sps->maxFrameNum;
+            else
+                frameNumOffset = poc->prevFrameNumOffset;
+
+            /* step 2 */
+            if (sps->numRefFramesInPicOrderCntCycle)
+                absFrameNum = frameNumOffset + pSliceHeader->frameNum;
+            else
+                absFrameNum = 0;
+
+            if (pNalUnit->nalRefIdc == 0 && absFrameNum > 0)
+                absFrameNum -= 1;
+
+            /* step 3 */
+            if (absFrameNum > 0)
+            {
+                picOrderCntCycleCnt =
+                    (absFrameNum - 1)/sps->numRefFramesInPicOrderCntCycle;
+                frameNumInPicOrderCntCycle =
+                    (absFrameNum - 1)%sps->numRefFramesInPicOrderCntCycle;
+            }
+
+            /* step 4 */
+            expectedDeltaPicOrderCntCycle = 0;
+            for (i = 0; i < sps->numRefFramesInPicOrderCntCycle; i++)
+                expectedDeltaPicOrderCntCycle += sps->offsetForRefFrame[i];
+
+            /* step 5 (picOrderCnt used to store expectedPicOrderCnt) */
+            /*lint -esym(644,picOrderCntCycleCnt) always initialized */
+            /*lint -esym(644,frameNumInPicOrderCntCycle) always initialized */
+            if (absFrameNum > 0)
+            {
+                picOrderCnt =
+                    (i32)picOrderCntCycleCnt * expectedDeltaPicOrderCntCycle;
+                for (i = 0; i <= frameNumInPicOrderCntCycle; i++)
+                    picOrderCnt += sps->offsetForRefFrame[i];
+            }
+            else
+                picOrderCnt = 0;
+
+            if (pNalUnit->nalRefIdc == 0)
+                picOrderCnt += sps->offsetForNonRefPic;
+
+            /* step 6 (picOrderCnt is top field order cnt if delta for bottom
+             * is positive, otherwise it is bottom field order cnt) */
+            picOrderCnt += pSliceHeader->deltaPicOrderCnt[0];
+
+            if ( (sps->offsetForTopToBottomField +
+                    pSliceHeader->deltaPicOrderCnt[1]) < 0 )
+            {
+                picOrderCnt += sps->offsetForTopToBottomField +
+                    pSliceHeader->deltaPicOrderCnt[1];
+            }
+
+            /* if current picture contains mmco5 -> set prevFrameNumOffset and
+             * prevFrameNum to 0 for computation of picOrderCnt of next
+             * frame, otherwise store frameNum and frameNumOffset to poc
+             * structure */
+            if (!containsMmco5)
+            {
+                poc->prevFrameNumOffset = frameNumOffset;
+                poc->prevFrameNum = pSliceHeader->frameNum;
+            }
+            else
+            {
+                poc->prevFrameNumOffset = 0;
+                poc->prevFrameNum = 0;
+                picOrderCnt = 0;
+            }
+            break;
+
+        default: /* case 2 */
+            /* derive frameNumOffset */
+            if (IS_IDR_NAL_UNIT(pNalUnit))
+                frameNumOffset = 0;
+            else if (poc->prevFrameNum > pSliceHeader->frameNum)
+                frameNumOffset = poc->prevFrameNumOffset + sps->maxFrameNum;
+            else
+                frameNumOffset = poc->prevFrameNumOffset;
+
+            /* derive picOrderCnt (type 2 has same value for top and bottom
+             * field order cnts) */
+            if (IS_IDR_NAL_UNIT(pNalUnit))
+                picOrderCnt = 0;
+            else if (pNalUnit->nalRefIdc == 0)
+                picOrderCnt =
+                    2 * (i32)(frameNumOffset + pSliceHeader->frameNum) - 1;
+            else
+                picOrderCnt =
+                    2 * (i32)(frameNumOffset + pSliceHeader->frameNum);
+
+            /* if current picture contains mmco5 -> set prevFrameNumOffset and
+             * prevFrameNum to 0 for computation of picOrderCnt of next
+             * frame, otherwise store frameNum and frameNumOffset to poc
+             * structure */
+            if (!containsMmco5)
+            {
+                poc->prevFrameNumOffset = frameNumOffset;
+                poc->prevFrameNum = pSliceHeader->frameNum;
+            }
+            else
+            {
+                poc->prevFrameNumOffset = 0;
+                poc->prevFrameNum = 0;
+                picOrderCnt = 0;
+            }
+            break;
+
+    }
+
+    /*lint -esym(644,picOrderCnt) always initialized */
+    return(picOrderCnt);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.h
new file mode 100755
index 0000000..19741eb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_PIC_ORDER_CNT_H
+#define H264SWDEC_PIC_ORDER_CNT_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_seq_param_set.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_nal_unit.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+/* structure to store information computed for previous picture, needed for
+ * POC computation of a picture. Two first fields for POC type 0, last two
+ * for types 1 and 2 */
+typedef struct
+{
+    u32 prevPicOrderCntLsb;
+    i32 prevPicOrderCntMsb;
+    u32 prevFrameNum;
+    u32 prevFrameNumOffset;
+} pocStorage_t;
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+i32 h264bsdDecodePicOrderCnt(pocStorage_t *poc, seqParamSet_t *sps,
+    sliceHeader_t *sliceHeader, nalUnit_t *pNalUnit);
+
+#endif /* #ifdef H264SWDEC_PIC_ORDER_CNT_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.c
new file mode 100755
index 0000000..e04dea4b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.c
@@ -0,0 +1,335 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdDecodePicParamSet
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_pic_param_set.h"
+#include "h264bsd_util.h"
+#include "h264bsd_vlc.h"
+#include "h264bsd_cfg.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/* lookup table for ceil(log2(numSliceGroups)), i.e. number of bits needed to
+ * represent range [0, numSliceGroups)
+ *
+ * NOTE: if MAX_NUM_SLICE_GROUPS is higher than 8 this table has to be resized
+ * accordingly */
+static const u32 CeilLog2NumSliceGroups[8] = {1, 1, 2, 2, 3, 3, 3, 3};
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+    Function name: h264bsdDecodePicParamSet
+
+        Functional description:
+            Decode picture parameter set information from the stream.
+
+            Function allocates memory for
+                - run lengths if slice group map type is 0
+                - top-left and bottom-right arrays if map type is 2
+                - for slice group ids if map type is 6
+
+            Validity of some of the slice group mapping information depends
+            on the image dimensions which are not known here. Therefore the
+            validity has to be checked afterwards, currently in the parameter
+            set activation phase.
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+
+        Outputs:
+            pPicParamSet    decoded information is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      failure, invalid information or end of stream
+            MEMORY_ALLOCATION_ERROR for memory allocation failure
+
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodePicParamSet(strmData_t *pStrmData, picParamSet_t *pPicParamSet)
+{
+
+/* Variables */
+
+    u32 tmp, i, value;
+    i32 itmp;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pPicParamSet);
+
+
+    H264SwDecMemset(pPicParamSet, 0, sizeof(picParamSet_t));
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+        &pPicParamSet->picParameterSetId);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if (pPicParamSet->picParameterSetId >= MAX_NUM_PIC_PARAM_SETS)
+    {
+        EPRINT("pic_parameter_set_id");
+        return(HANTRO_NOK);
+    }
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+        &pPicParamSet->seqParameterSetId);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if (pPicParamSet->seqParameterSetId >= MAX_NUM_SEQ_PARAM_SETS)
+    {
+        EPRINT("seq_param_set_id");
+        return(HANTRO_NOK);
+    }
+
+    /* entropy_coding_mode_flag, shall be 0 for baseline profile */
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp)
+    {
+        EPRINT("entropy_coding_mode_flag");
+        return(HANTRO_NOK);
+    }
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pPicParamSet->picOrderPresentFlag = (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+    /* num_slice_groups_minus1 */
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    pPicParamSet->numSliceGroups = value + 1;
+    if (pPicParamSet->numSliceGroups > MAX_NUM_SLICE_GROUPS)
+    {
+        EPRINT("num_slice_groups_minus1");
+        return(HANTRO_NOK);
+    }
+
+    /* decode slice group mapping information if more than one slice groups */
+    if (pPicParamSet->numSliceGroups > 1)
+    {
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+            &pPicParamSet->sliceGroupMapType);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        if (pPicParamSet->sliceGroupMapType > 6)
+        {
+            EPRINT("slice_group_map_type");
+            return(HANTRO_NOK);
+        }
+
+        if (pPicParamSet->sliceGroupMapType == 0)
+        {
+            ALLOCATE(pPicParamSet->runLength,
+                pPicParamSet->numSliceGroups, u32);
+            if (pPicParamSet->runLength == NULL)
+                return(MEMORY_ALLOCATION_ERROR);
+            for (i = 0; i < pPicParamSet->numSliceGroups; i++)
+            {
+                tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+                if (tmp != HANTRO_OK)
+                    return(tmp);
+                pPicParamSet->runLength[i] = value+1;
+                /* param values checked in CheckPps() */
+            }
+        }
+        else if (pPicParamSet->sliceGroupMapType == 2)
+        {
+            ALLOCATE(pPicParamSet->topLeft,
+                pPicParamSet->numSliceGroups - 1, u32);
+            ALLOCATE(pPicParamSet->bottomRight,
+                pPicParamSet->numSliceGroups - 1, u32);
+            if (pPicParamSet->topLeft == NULL ||
+                pPicParamSet->bottomRight == NULL)
+                return(MEMORY_ALLOCATION_ERROR);
+            for (i = 0; i < pPicParamSet->numSliceGroups - 1; i++)
+            {
+                tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+                if (tmp != HANTRO_OK)
+                    return(tmp);
+                pPicParamSet->topLeft[i] = value;
+                tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+                if (tmp != HANTRO_OK)
+                    return(tmp);
+                pPicParamSet->bottomRight[i] = value;
+                /* param values checked in CheckPps() */
+            }
+        }
+        else if ( (pPicParamSet->sliceGroupMapType == 3) ||
+                  (pPicParamSet->sliceGroupMapType == 4) ||
+                  (pPicParamSet->sliceGroupMapType == 5) )
+        {
+            tmp = h264bsdGetBits(pStrmData, 1);
+            if (tmp == END_OF_STREAM)
+                return(HANTRO_NOK);
+            pPicParamSet->sliceGroupChangeDirectionFlag =
+                (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+            tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+            if (tmp != HANTRO_OK)
+                return(tmp);
+            pPicParamSet->sliceGroupChangeRate = value + 1;
+            /* param value checked in CheckPps() */
+        }
+        else if (pPicParamSet->sliceGroupMapType == 6)
+        {
+            tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+            if (tmp != HANTRO_OK)
+                return(tmp);
+            pPicParamSet->picSizeInMapUnits = value + 1;
+
+            ALLOCATE(pPicParamSet->sliceGroupId,
+                pPicParamSet->picSizeInMapUnits, u32);
+            if (pPicParamSet->sliceGroupId == NULL)
+                return(MEMORY_ALLOCATION_ERROR);
+
+            /* determine number of bits needed to represent range
+             * [0, numSliceGroups) */
+            tmp = CeilLog2NumSliceGroups[pPicParamSet->numSliceGroups-1];
+
+            for (i = 0; i < pPicParamSet->picSizeInMapUnits; i++)
+            {
+                pPicParamSet->sliceGroupId[i] = h264bsdGetBits(pStrmData, tmp);
+                if ( pPicParamSet->sliceGroupId[i] >=
+                     pPicParamSet->numSliceGroups )
+                {
+                    EPRINT("slice_group_id");
+                    return(HANTRO_NOK);
+                }
+            }
+        }
+    }
+
+    /* num_ref_idx_l0_active_minus1 */
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if (value > 31)
+    {
+        EPRINT("num_ref_idx_l0_active_minus1");
+        return(HANTRO_NOK);
+    }
+    pPicParamSet->numRefIdxL0Active = value + 1;
+
+    /* num_ref_idx_l1_active_minus1 */
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if (value > 31)
+    {
+        EPRINT("num_ref_idx_l1_active_minus1");
+        return(HANTRO_NOK);
+    }
+
+    /* weighted_pred_flag, this shall be 0 for baseline profile */
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp)
+    {
+        EPRINT("weighted_pred_flag");
+        return(HANTRO_NOK);
+    }
+
+    /* weighted_bipred_idc */
+    tmp = h264bsdGetBits(pStrmData, 2);
+    if (tmp > 2)
+    {
+        EPRINT("weighted_bipred_idc");
+        return(HANTRO_NOK);
+    }
+
+    /* pic_init_qp_minus26 */
+    tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if ((itmp < -26) || (itmp > 25))
+    {
+        EPRINT("pic_init_qp_minus26");
+        return(HANTRO_NOK);
+    }
+    pPicParamSet->picInitQp = (u32)(itmp + 26);
+
+    /* pic_init_qs_minus26 */
+    tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if ((itmp < -26) || (itmp > 25))
+    {
+        EPRINT("pic_init_qs_minus26");
+        return(HANTRO_NOK);
+    }
+
+    tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if ((itmp < -12) || (itmp > 12))
+    {
+        EPRINT("chroma_qp_index_offset");
+        return(HANTRO_NOK);
+    }
+    pPicParamSet->chromaQpIndexOffset = itmp;
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pPicParamSet->deblockingFilterControlPresentFlag =
+        (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pPicParamSet->constrainedIntraPredFlag = (tmp == 1) ?
+                                    HANTRO_TRUE : HANTRO_FALSE;
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pPicParamSet->redundantPicCntPresentFlag = (tmp == 1) ?
+                                    HANTRO_TRUE : HANTRO_FALSE;
+
+    tmp = h264bsdRbspTrailingBits(pStrmData);
+
+    /* ignore possible errors in trailing bits of parameters sets */
+    return(HANTRO_OK);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.h
new file mode 100755
index 0000000..6328638
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_PIC_PARAM_SET_H
+#define H264SWDEC_PIC_PARAM_SET_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+/* data structure to store PPS information decoded from the stream */
+typedef struct
+{
+    u32 picParameterSetId;
+    u32 seqParameterSetId;
+    u32 picOrderPresentFlag;
+    u32 numSliceGroups;
+    u32 sliceGroupMapType;
+    u32 *runLength;
+    u32 *topLeft;
+    u32 *bottomRight;
+    u32 sliceGroupChangeDirectionFlag;
+    u32 sliceGroupChangeRate;
+    u32 picSizeInMapUnits;
+    u32 *sliceGroupId;
+    u32 numRefIdxL0Active;
+    u32 picInitQp;
+    i32 chromaQpIndexOffset;
+    u32 deblockingFilterControlPresentFlag;
+    u32 constrainedIntraPredFlag;
+    u32 redundantPicCntPresentFlag;
+} picParamSet_t;
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodePicParamSet(strmData_t *pStrmData,
+    picParamSet_t *pPicParamSet);
+
+#endif /* #ifdef H264SWDEC_PIC_PARAM_SET_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.c
new file mode 100755
index 0000000..c948776
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.c
@@ -0,0 +1,2315 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_reconstruct.h"
+#include "h264bsd_macroblock_layer.h"
+#include "h264bsd_image.h"
+#include "h264bsd_util.h"
+
+#ifdef H264DEC_OMXDL
+#include "omxtypes.h"
+#include "omxVC.h"
+#include "armVC.h"
+#endif /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/* Switch off the following Lint messages for this file:
+ * Info 701: Shift left of signed quantity (int)
+ * Info 702: Shift right of signed quantity (int)
+ */
+/*lint -e701 -e702 */
+
+/* Luma fractional-sample positions
+ *
+ *  G a b c H
+ *  d e f g
+ *  h i j k m
+ *  n p q r
+ *  M   s   N
+ *
+ *  G, H, M and N are integer sample positions
+ *  a-s are fractional samples that need to be interpolated.
+ */
+#ifndef H264DEC_OMXDL
+static const u32 lumaFracPos[4][4] = {
+  /* G  d  h  n    a  e  i  p    b  f  j   q     c   g   k   r */
+    {0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}, {12, 13, 14, 15}};
+#endif /* H264DEC_OMXDL */
+
+/* clipping table, defined in h264bsd_intra_prediction.c */
+extern const u8 h264bsdClip[];
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+#ifndef H264DEC_OMXDL
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdInterpolateChromaHor
+
+        Functional description:
+          This function performs chroma interpolation in horizontal direction.
+          Overfilling is done only if needed. Reference image (pRef) is
+          read at correct position and the predicted part is written to
+          macroblock's chrominance (predPartChroma)
+        Inputs:
+          pRef              pointer to reference frame Cb top-left corner
+          x0                integer x-coordinate for prediction
+          y0                integer y-coordinate for prediction
+          width             width of the reference frame chrominance in pixels
+          height            height of the reference frame chrominance in pixels
+          xFrac             horizontal fraction for prediction in 1/8 pixels
+          chromaPartWidth   width of the predicted part in pixels
+          chromaPartHeight  height of the predicted part in pixels
+        Outputs:
+          predPartChroma    pointer where predicted part is written
+
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_ARM11
+void h264bsdInterpolateChromaHor(
+  u8 *pRef,
+  u8 *predPartChroma,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 xFrac,
+  u32 chromaPartWidth,
+  u32 chromaPartHeight)
+{
+
+/* Variables */
+
+    u32 x, y, tmp1, tmp2, tmp3, tmp4, c, val;
+    u8 *ptrA, *cbr;
+    u32 comp;
+    u8 block[9*8*2];
+
+/* Code */
+
+    ASSERT(predPartChroma);
+    ASSERT(chromaPartWidth);
+    ASSERT(chromaPartHeight);
+    ASSERT(xFrac < 8);
+    ASSERT(pRef);
+
+    if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) ||
+        (y0 < 0) || ((u32)y0+chromaPartHeight > height))
+    {
+        h264bsdFillBlock(pRef, block, x0, y0, width, height,
+            chromaPartWidth + 1, chromaPartHeight, chromaPartWidth + 1);
+        pRef += width * height;
+        h264bsdFillBlock(pRef, block + (chromaPartWidth+1)*chromaPartHeight,
+            x0, y0, width, height, chromaPartWidth + 1,
+            chromaPartHeight, chromaPartWidth + 1);
+
+        pRef = block;
+        x0 = 0;
+        y0 = 0;
+        width = chromaPartWidth+1;
+        height = chromaPartHeight;
+    }
+
+    val = 8 - xFrac;
+
+    for (comp = 0; comp <= 1; comp++)
+    {
+
+        ptrA = pRef + (comp * height + (u32)y0) * width + x0;
+        cbr = predPartChroma + comp * 8 * 8;
+
+        /* 2x2 pels per iteration
+         * bilinear horizontal interpolation */
+        for (y = (chromaPartHeight >> 1); y; y--)
+        {
+            for (x = (chromaPartWidth >> 1); x; x--)
+            {
+                tmp1 = ptrA[width];
+                tmp2 = *ptrA++;
+                tmp3 = ptrA[width];
+                tmp4 = *ptrA++;
+                c = ((val * tmp1 + xFrac * tmp3) << 3) + 32;
+                c >>= 6;
+                cbr[8] = (u8)c;
+                c = ((val * tmp2 + xFrac * tmp4) << 3) + 32;
+                c >>= 6;
+                *cbr++ = (u8)c;
+                tmp1 = ptrA[width];
+                tmp2 = *ptrA;
+                c = ((val * tmp3 + xFrac * tmp1) << 3) + 32;
+                c >>= 6;
+                cbr[8] = (u8)c;
+                c = ((val * tmp4 + xFrac * tmp2) << 3) + 32;
+                c >>= 6;
+                *cbr++ = (u8)c;
+            }
+            cbr += 2*8 - chromaPartWidth;
+            ptrA += 2*width - chromaPartWidth;
+        }
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdInterpolateChromaVer
+
+        Functional description:
+          This function performs chroma interpolation in vertical direction.
+          Overfilling is done only if needed. Reference image (pRef) is
+          read at correct position and the predicted part is written to
+          macroblock's chrominance (predPartChroma)
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInterpolateChromaVer(
+  u8 *pRef,
+  u8 *predPartChroma,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 yFrac,
+  u32 chromaPartWidth,
+  u32 chromaPartHeight)
+{
+
+/* Variables */
+
+    u32 x, y, tmp1, tmp2, tmp3, c, val;
+    u8 *ptrA, *cbr;
+    u32 comp;
+    u8 block[9*8*2];
+
+/* Code */
+
+    ASSERT(predPartChroma);
+    ASSERT(chromaPartWidth);
+    ASSERT(chromaPartHeight);
+    ASSERT(yFrac < 8);
+    ASSERT(pRef);
+
+    if ((x0 < 0) || ((u32)x0+chromaPartWidth > width) ||
+        (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height))
+    {
+        h264bsdFillBlock(pRef, block, x0, y0, width, height, chromaPartWidth,
+            chromaPartHeight + 1, chromaPartWidth);
+        pRef += width * height;
+        h264bsdFillBlock(pRef, block + chromaPartWidth*(chromaPartHeight+1),
+            x0, y0, width, height, chromaPartWidth,
+            chromaPartHeight + 1, chromaPartWidth);
+
+        pRef = block;
+        x0 = 0;
+        y0 = 0;
+        width = chromaPartWidth;
+        height = chromaPartHeight+1;
+    }
+
+    val = 8 - yFrac;
+
+    for (comp = 0; comp <= 1; comp++)
+    {
+
+        ptrA = pRef + (comp * height + (u32)y0) * width + x0;
+        cbr = predPartChroma + comp * 8 * 8;
+
+        /* 2x2 pels per iteration
+         * bilinear vertical interpolation */
+        for (y = (chromaPartHeight >> 1); y; y--)
+        {
+            for (x = (chromaPartWidth >> 1); x; x--)
+            {
+                tmp3 = ptrA[width*2];
+                tmp2 = ptrA[width];
+                tmp1 = *ptrA++;
+                c = ((val * tmp2 + yFrac * tmp3) << 3) + 32;
+                c >>= 6;
+                cbr[8] = (u8)c;
+                c = ((val * tmp1 + yFrac * tmp2) << 3) + 32;
+                c >>= 6;
+                *cbr++ = (u8)c;
+                tmp3 = ptrA[width*2];
+                tmp2 = ptrA[width];
+                tmp1 = *ptrA++;
+                c = ((val * tmp2 + yFrac * tmp3) << 3) + 32;
+                c >>= 6;
+                cbr[8] = (u8)c;
+                c = ((val * tmp1 + yFrac * tmp2) << 3) + 32;
+                c >>= 6;
+                *cbr++ = (u8)c;
+            }
+            cbr += 2*8 - chromaPartWidth;
+            ptrA += 2*width - chromaPartWidth;
+        }
+    }
+
+}
+#endif
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdInterpolateChromaHorVer
+
+        Functional description:
+          This function performs chroma interpolation in horizontal and
+          vertical direction. Overfilling is done only if needed. Reference
+          image (ref) is read at correct position and the predicted part
+          is written to macroblock's chrominance (predPartChroma)
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInterpolateChromaHorVer(
+  u8 *ref,
+  u8 *predPartChroma,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 xFrac,
+  u32 yFrac,
+  u32 chromaPartWidth,
+  u32 chromaPartHeight)
+{
+    u8 block[9*9*2];
+    u32 x, y, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, valX, valY, plus32 = 32;
+    u32 comp;
+    u8 *ptrA, *cbr;
+
+/* Code */
+
+    ASSERT(predPartChroma);
+    ASSERT(chromaPartWidth);
+    ASSERT(chromaPartHeight);
+    ASSERT(xFrac < 8);
+    ASSERT(yFrac < 8);
+    ASSERT(ref);
+
+    if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) ||
+        (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height))
+    {
+        h264bsdFillBlock(ref, block, x0, y0, width, height,
+            chromaPartWidth + 1, chromaPartHeight + 1, chromaPartWidth + 1);
+        ref += width * height;
+        h264bsdFillBlock(ref, block + (chromaPartWidth+1)*(chromaPartHeight+1),
+            x0, y0, width, height, chromaPartWidth + 1,
+            chromaPartHeight + 1, chromaPartWidth + 1);
+
+        ref = block;
+        x0 = 0;
+        y0 = 0;
+        width = chromaPartWidth+1;
+        height = chromaPartHeight+1;
+    }
+
+    valX = 8 - xFrac;
+    valY = 8 - yFrac;
+
+    for (comp = 0; comp <= 1; comp++)
+    {
+
+        ptrA = ref + (comp * height + (u32)y0) * width + x0;
+        cbr = predPartChroma + comp * 8 * 8;
+
+        /* 2x2 pels per iteration
+         * bilinear vertical and horizontal interpolation */
+        for (y = (chromaPartHeight >> 1); y; y--)
+        {
+            tmp1 = *ptrA;
+            tmp3 = ptrA[width];
+            tmp5 = ptrA[width*2];
+            tmp1 *= valY;
+            tmp1 += tmp3 * yFrac;
+            tmp3 *= valY;
+            tmp3 += tmp5 * yFrac;
+            for (x = (chromaPartWidth >> 1); x; x--)
+            {
+                tmp2 = *++ptrA;
+                tmp4 = ptrA[width];
+                tmp6 = ptrA[width*2];
+                tmp2 *= valY;
+                tmp2 += tmp4 * yFrac;
+                tmp4 *= valY;
+                tmp4 += tmp6 * yFrac;
+                tmp1 = tmp1 * valX + plus32;
+                tmp3 = tmp3 * valX + plus32;
+                tmp1 += tmp2 * xFrac;
+                tmp1 >>= 6;
+                tmp3 += tmp4 * xFrac;
+                tmp3 >>= 6;
+                cbr[8] = (u8)tmp3;
+                *cbr++ = (u8)tmp1;
+
+                tmp1 = *++ptrA;
+                tmp3 = ptrA[width];
+                tmp5 = ptrA[width*2];
+                tmp1 *= valY;
+                tmp1 += tmp3 * yFrac;
+                tmp3 *= valY;
+                tmp3 += tmp5 * yFrac;
+                tmp2 = tmp2 * valX + plus32;
+                tmp4 = tmp4 * valX + plus32;
+                tmp2 += tmp1 * xFrac;
+                tmp2 >>= 6;
+                tmp4 += tmp3 * xFrac;
+                tmp4 >>= 6;
+                cbr[8] = (u8)tmp4;
+                *cbr++ = (u8)tmp2;
+            }
+            cbr += 2*8 - chromaPartWidth;
+            ptrA += 2*width - chromaPartWidth;
+        }
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: PredictChroma
+
+        Functional description:
+          Top level chroma prediction function that calls the appropriate
+          interpolation function. The output is written to macroblock array.
+
+------------------------------------------------------------------------------*/
+
+static void PredictChroma(
+  u8 *mbPartChroma,
+  u32 xAL,
+  u32 yAL,
+  u32 partWidth,
+  u32 partHeight,
+  mv_t *mv,
+  image_t *refPic)
+{
+
+/* Variables */
+
+    u32 xFrac, yFrac, width, height, chromaPartWidth, chromaPartHeight;
+    i32 xInt, yInt;
+    u8 *ref;
+
+/* Code */
+
+    ASSERT(mv);
+    ASSERT(refPic);
+    ASSERT(refPic->data);
+    ASSERT(refPic->width);
+    ASSERT(refPic->height);
+
+    width  = 8 * refPic->width;
+    height = 8 * refPic->height;
+
+    xInt = (xAL >> 1) + (mv->hor >> 3);
+    yInt = (yAL >> 1) + (mv->ver >> 3);
+    xFrac = mv->hor & 0x7;
+    yFrac = mv->ver & 0x7;
+
+    chromaPartWidth  = partWidth >> 1;
+    chromaPartHeight = partHeight >> 1;
+    ref = refPic->data + 256 * refPic->width * refPic->height;
+
+    if (xFrac && yFrac)
+    {
+        h264bsdInterpolateChromaHorVer(ref, mbPartChroma, xInt, yInt, width,
+                height, xFrac, yFrac, chromaPartWidth, chromaPartHeight);
+    }
+    else if (xFrac)
+    {
+        h264bsdInterpolateChromaHor(ref, mbPartChroma, xInt, yInt, width,
+                height, xFrac, chromaPartWidth, chromaPartHeight);
+    }
+    else if (yFrac)
+    {
+        h264bsdInterpolateChromaVer(ref, mbPartChroma, xInt, yInt, width,
+                height, yFrac, chromaPartWidth, chromaPartHeight);
+    }
+    else
+    {
+        h264bsdFillBlock(ref, mbPartChroma, xInt, yInt, width, height,
+            chromaPartWidth, chromaPartHeight, 8);
+        ref += width * height;
+        h264bsdFillBlock(ref, mbPartChroma + 8*8, xInt, yInt, width, height,
+            chromaPartWidth, chromaPartHeight, 8);
+    }
+
+}
+
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdInterpolateVerHalf
+
+        Functional description:
+          Function to perform vertical interpolation of pixel position 'h'
+          for a block. Overfilling is done only if needed. Reference
+          image (ref) is read at correct position and the predicted part
+          is written to macroblock array (mb)
+
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_ARM11
+void h264bsdInterpolateVerHalf(
+  u8 *ref,
+  u8 *mb,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 partWidth,
+  u32 partHeight)
+{
+    u32 p1[21*21/4+1];
+    u32 i, j;
+    i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    u8 *ptrC, *ptrV;
+    const u8 *clp = h264bsdClip + 512;
+
+    /* Code */
+
+    ASSERT(ref);
+    ASSERT(mb);
+
+    if ((x0 < 0) || ((u32)x0+partWidth > width) ||
+        (y0 < 0) || ((u32)y0+partHeight+5 > height))
+    {
+        h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
+                partWidth, partHeight+5, partWidth);
+
+        x0 = 0;
+        y0 = 0;
+        ref = (u8*)p1;
+        width = partWidth;
+    }
+
+    ref += (u32)y0 * width + (u32)x0;
+
+    ptrC = ref + width;
+    ptrV = ptrC + 5*width;
+
+    /* 4 pixels per iteration, interpolate using 5 vertical samples */
+    for (i = (partHeight >> 2); i; i--)
+    {
+        /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */
+        for (j = partWidth; j; j--)
+        {
+            tmp4 = ptrV[-(i32)width*2];
+            tmp5 = ptrV[-(i32)width];
+            tmp1 = ptrV[width];
+            tmp2 = ptrV[width*2];
+            tmp6 = *ptrV++;
+
+            tmp7 = tmp4 + tmp1;
+            tmp2 -= (tmp7 << 2);
+            tmp2 -= tmp7;
+            tmp2 += 16;
+            tmp7 = tmp5 + tmp6;
+            tmp3 = ptrC[width*2];
+            tmp2 += (tmp7 << 4);
+            tmp2 += (tmp7 << 2);
+            tmp2 += tmp3;
+            tmp2 = clp[tmp2>>5];
+            tmp1 += 16;
+            mb[48] = (u8)tmp2;
+
+            tmp7 = tmp3 + tmp6;
+            tmp1 -= (tmp7 << 2);
+            tmp1 -= tmp7;
+            tmp7 = tmp4 + tmp5;
+            tmp2 = ptrC[width];
+            tmp1 += (tmp7 << 4);
+            tmp1 += (tmp7 << 2);
+            tmp1 += tmp2;
+            tmp1 = clp[tmp1>>5];
+            tmp6 += 16;
+            mb[32] = (u8)tmp1;
+
+            tmp7 = tmp2 + tmp5;
+            tmp6 -= (tmp7 << 2);
+            tmp6 -= tmp7;
+            tmp7 = tmp4 + tmp3;
+            tmp1 = *ptrC;
+            tmp6 += (tmp7 << 4);
+            tmp6 += (tmp7 << 2);
+            tmp6 += tmp1;
+            tmp6 = clp[tmp6>>5];
+            tmp5 += 16;
+            mb[16] = (u8)tmp6;
+
+            tmp1 += tmp4;
+            tmp5 -= (tmp1 << 2);
+            tmp5 -= tmp1;
+            tmp3 += tmp2;
+            tmp6 = ptrC[-(i32)width];
+            tmp5 += (tmp3 << 4);
+            tmp5 += (tmp3 << 2);
+            tmp5 += tmp6;
+            tmp5 = clp[tmp5>>5];
+            *mb++ = (u8)tmp5;
+            ptrC++;
+        }
+        ptrC += 4*width - partWidth;
+        ptrV += 4*width - partWidth;
+        mb += 4*16 - partWidth;
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdInterpolateVerQuarter
+
+        Functional description:
+          Function to perform vertical interpolation of pixel position 'd'
+          or 'n' for a block. Overfilling is done only if needed. Reference
+          image (ref) is read at correct position and the predicted part
+          is written to macroblock array (mb)
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInterpolateVerQuarter(
+  u8 *ref,
+  u8 *mb,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 partWidth,
+  u32 partHeight,
+  u32 verOffset)    /* 0 for pixel d, 1 for pixel n */
+{
+    u32 p1[21*21/4+1];
+    u32 i, j;
+    i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    u8 *ptrC, *ptrV, *ptrInt;
+    const u8 *clp = h264bsdClip + 512;
+
+    /* Code */
+
+    ASSERT(ref);
+    ASSERT(mb);
+
+    if ((x0 < 0) || ((u32)x0+partWidth > width) ||
+        (y0 < 0) || ((u32)y0+partHeight+5 > height))
+    {
+        h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
+                partWidth, partHeight+5, partWidth);
+
+        x0 = 0;
+        y0 = 0;
+        ref = (u8*)p1;
+        width = partWidth;
+    }
+
+    ref += (u32)y0 * width + (u32)x0;
+
+    ptrC = ref + width;
+    ptrV = ptrC + 5*width;
+
+    /* Pointer to integer sample position, either M or R */
+    ptrInt = ptrC + (2+verOffset)*width;
+
+    /* 4 pixels per iteration
+     * interpolate using 5 vertical samples and average between
+     * interpolated value and integer sample value */
+    for (i = (partHeight >> 2); i; i--)
+    {
+        /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */
+        for (j = partWidth; j; j--)
+        {
+            tmp4 = ptrV[-(i32)width*2];
+            tmp5 = ptrV[-(i32)width];
+            tmp1 = ptrV[width];
+            tmp2 = ptrV[width*2];
+            tmp6 = *ptrV++;
+
+            tmp7 = tmp4 + tmp1;
+            tmp2 -= (tmp7 << 2);
+            tmp2 -= tmp7;
+            tmp2 += 16;
+            tmp7 = tmp5 + tmp6;
+            tmp3 = ptrC[width*2];
+            tmp2 += (tmp7 << 4);
+            tmp2 += (tmp7 << 2);
+            tmp2 += tmp3;
+            tmp2 = clp[tmp2>>5];
+            tmp7 = ptrInt[width*2];
+            tmp1 += 16;
+            tmp2++;
+            mb[48] = (u8)((tmp2 + tmp7) >> 1);
+
+            tmp7 = tmp3 + tmp6;
+            tmp1 -= (tmp7 << 2);
+            tmp1 -= tmp7;
+            tmp7 = tmp4 + tmp5;
+            tmp2 = ptrC[width];
+            tmp1 += (tmp7 << 4);
+            tmp1 += (tmp7 << 2);
+            tmp1 += tmp2;
+            tmp1 = clp[tmp1>>5];
+            tmp7 = ptrInt[width];
+            tmp6 += 16;
+            tmp1++;
+            mb[32] = (u8)((tmp1 + tmp7) >> 1);
+
+            tmp7 = tmp2 + tmp5;
+            tmp6 -= (tmp7 << 2);
+            tmp6 -= tmp7;
+            tmp7 = tmp4 + tmp3;
+            tmp1 = *ptrC;
+            tmp6 += (tmp7 << 4);
+            tmp6 += (tmp7 << 2);
+            tmp6 += tmp1;
+            tmp6 = clp[tmp6>>5];
+            tmp7 = *ptrInt;
+            tmp5 += 16;
+            tmp6++;
+            mb[16] = (u8)((tmp6 + tmp7) >> 1);
+
+            tmp1 += tmp4;
+            tmp5 -= (tmp1 << 2);
+            tmp5 -= tmp1;
+            tmp3 += tmp2;
+            tmp6 = ptrC[-(i32)width];
+            tmp5 += (tmp3 << 4);
+            tmp5 += (tmp3 << 2);
+            tmp5 += tmp6;
+            tmp5 = clp[tmp5>>5];
+            tmp7 = ptrInt[-(i32)width];
+            tmp5++;
+            *mb++ = (u8)((tmp5 + tmp7) >> 1);
+            ptrC++;
+            ptrInt++;
+        }
+        ptrC += 4*width - partWidth;
+        ptrV += 4*width - partWidth;
+        ptrInt += 4*width - partWidth;
+        mb += 4*16 - partWidth;
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdInterpolateHorHalf
+
+        Functional description:
+          Function to perform horizontal interpolation of pixel position 'b'
+          for a block. Overfilling is done only if needed. Reference
+          image (ref) is read at correct position and the predicted part
+          is written to macroblock array (mb)
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInterpolateHorHalf(
+  u8 *ref,
+  u8 *mb,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 partWidth,
+  u32 partHeight)
+{
+    u32 p1[21*21/4+1];
+    u8 *ptrJ;
+    u32 x, y;
+    i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    const u8 *clp = h264bsdClip + 512;
+
+    /* Code */
+
+    ASSERT(ref);
+    ASSERT(mb);
+    ASSERT((partWidth&0x3) == 0);
+    ASSERT((partHeight&0x3) == 0);
+
+    if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
+        (y0 < 0) || ((u32)y0+partHeight > height))
+    {
+        h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
+                partWidth+5, partHeight, partWidth+5);
+
+        x0 = 0;
+        y0 = 0;
+        ref = (u8*)p1;
+        width = partWidth + 5;
+    }
+
+    ref += (u32)y0 * width + (u32)x0;
+
+    ptrJ = ref + 5;
+
+    for (y = partHeight; y; y--)
+    {
+        tmp6 = *(ptrJ - 5);
+        tmp5 = *(ptrJ - 4);
+        tmp4 = *(ptrJ - 3);
+        tmp3 = *(ptrJ - 2);
+        tmp2 = *(ptrJ - 1);
+
+        /* calculate 4 pels per iteration */
+        for (x = (partWidth >> 2); x; x--)
+        {
+            /* First pixel */
+            tmp6 += 16;
+            tmp7 = tmp3 + tmp4;
+            tmp6 += (tmp7 << 4);
+            tmp6 += (tmp7 << 2);
+            tmp7 = tmp2 + tmp5;
+            tmp1 = *ptrJ++;
+            tmp6 -= (tmp7 << 2);
+            tmp6 -= tmp7;
+            tmp6 += tmp1;
+            tmp6 = clp[tmp6>>5];
+            /* Second pixel */
+            tmp5 += 16;
+            tmp7 = tmp2 + tmp3;
+            *mb++ = (u8)tmp6;
+            tmp5 += (tmp7 << 4);
+            tmp5 += (tmp7 << 2);
+            tmp7 = tmp1 + tmp4;
+            tmp6 = *ptrJ++;
+            tmp5 -= (tmp7 << 2);
+            tmp5 -= tmp7;
+            tmp5 += tmp6;
+            tmp5 = clp[tmp5>>5];
+            /* Third pixel */
+            tmp4 += 16;
+            tmp7 = tmp1 + tmp2;
+            *mb++ = (u8)tmp5;
+            tmp4 += (tmp7 << 4);
+            tmp4 += (tmp7 << 2);
+            tmp7 = tmp6 + tmp3;
+            tmp5 = *ptrJ++;
+            tmp4 -= (tmp7 << 2);
+            tmp4 -= tmp7;
+            tmp4 += tmp5;
+            tmp4 = clp[tmp4>>5];
+            /* Fourth pixel */
+            tmp3 += 16;
+            tmp7 = tmp6 + tmp1;
+            *mb++ = (u8)tmp4;
+            tmp3 += (tmp7 << 4);
+            tmp3 += (tmp7 << 2);
+            tmp7 = tmp5 + tmp2;
+            tmp4 = *ptrJ++;
+            tmp3 -= (tmp7 << 2);
+            tmp3 -= tmp7;
+            tmp3 += tmp4;
+            tmp3 = clp[tmp3>>5];
+            tmp7 = tmp4;
+            tmp4 = tmp6;
+            tmp6 = tmp2;
+            tmp2 = tmp7;
+            *mb++ = (u8)tmp3;
+            tmp3 = tmp5;
+            tmp5 = tmp1;
+        }
+        ptrJ += width - partWidth;
+        mb += 16 - partWidth;
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdInterpolateHorQuarter
+
+        Functional description:
+          Function to perform horizontal interpolation of pixel position 'a'
+          or 'c' for a block. Overfilling is done only if needed. Reference
+          image (ref) is read at correct position and the predicted part
+          is written to macroblock array (mb)
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInterpolateHorQuarter(
+  u8 *ref,
+  u8 *mb,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 partWidth,
+  u32 partHeight,
+  u32 horOffset) /* 0 for pixel a, 1 for pixel c */
+{
+    u32 p1[21*21/4+1];
+    u8 *ptrJ;
+    u32 x, y;
+    i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    const u8 *clp = h264bsdClip + 512;
+
+    /* Code */
+
+    ASSERT(ref);
+    ASSERT(mb);
+
+    if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
+        (y0 < 0) || ((u32)y0+partHeight > height))
+    {
+        h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
+                partWidth+5, partHeight, partWidth+5);
+
+        x0 = 0;
+        y0 = 0;
+        ref = (u8*)p1;
+        width = partWidth + 5;
+    }
+
+    ref += (u32)y0 * width + (u32)x0;
+
+    ptrJ = ref + 5;
+
+    for (y = partHeight; y; y--)
+    {
+        tmp6 = *(ptrJ - 5);
+        tmp5 = *(ptrJ - 4);
+        tmp4 = *(ptrJ - 3);
+        tmp3 = *(ptrJ - 2);
+        tmp2 = *(ptrJ - 1);
+
+        /* calculate 4 pels per iteration */
+        for (x = (partWidth >> 2); x; x--)
+        {
+            /* First pixel */
+            tmp6 += 16;
+            tmp7 = tmp3 + tmp4;
+            tmp6 += (tmp7 << 4);
+            tmp6 += (tmp7 << 2);
+            tmp7 = tmp2 + tmp5;
+            tmp1 = *ptrJ++;
+            tmp6 -= (tmp7 << 2);
+            tmp6 -= tmp7;
+            tmp6 += tmp1;
+            tmp6 = clp[tmp6>>5];
+            tmp5 += 16;
+            if (!horOffset)
+                tmp6 += tmp4;
+            else
+                tmp6 += tmp3;
+            *mb++ = (u8)((tmp6 + 1) >> 1);
+            /* Second pixel */
+            tmp7 = tmp2 + tmp3;
+            tmp5 += (tmp7 << 4);
+            tmp5 += (tmp7 << 2);
+            tmp7 = tmp1 + tmp4;
+            tmp6 = *ptrJ++;
+            tmp5 -= (tmp7 << 2);
+            tmp5 -= tmp7;
+            tmp5 += tmp6;
+            tmp5 = clp[tmp5>>5];
+            tmp4 += 16;
+            if (!horOffset)
+                tmp5 += tmp3;
+            else
+                tmp5 += tmp2;
+            *mb++ = (u8)((tmp5 + 1) >> 1);
+            /* Third pixel */
+            tmp7 = tmp1 + tmp2;
+            tmp4 += (tmp7 << 4);
+            tmp4 += (tmp7 << 2);
+            tmp7 = tmp6 + tmp3;
+            tmp5 = *ptrJ++;
+            tmp4 -= (tmp7 << 2);
+            tmp4 -= tmp7;
+            tmp4 += tmp5;
+            tmp4 = clp[tmp4>>5];
+            tmp3 += 16;
+            if (!horOffset)
+                tmp4 += tmp2;
+            else
+                tmp4 += tmp1;
+            *mb++ = (u8)((tmp4 + 1) >> 1);
+            /* Fourth pixel */
+            tmp7 = tmp6 + tmp1;
+            tmp3 += (tmp7 << 4);
+            tmp3 += (tmp7 << 2);
+            tmp7 = tmp5 + tmp2;
+            tmp4 = *ptrJ++;
+            tmp3 -= (tmp7 << 2);
+            tmp3 -= tmp7;
+            tmp3 += tmp4;
+            tmp3 = clp[tmp3>>5];
+            if (!horOffset)
+                tmp3 += tmp1;
+            else
+                tmp3 += tmp6;
+            *mb++ = (u8)((tmp3 + 1) >> 1);
+            tmp3 = tmp5;
+            tmp5 = tmp1;
+            tmp7 = tmp4;
+            tmp4 = tmp6;
+            tmp6 = tmp2;
+            tmp2 = tmp7;
+        }
+        ptrJ += width - partWidth;
+        mb += 16 - partWidth;
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdInterpolateHorVerQuarter
+
+        Functional description:
+          Function to perform horizontal and vertical interpolation of pixel
+          position 'e', 'g', 'p' or 'r' for a block. Overfilling is done only
+          if needed. Reference image (ref) is read at correct position and
+          the predicted part is written to macroblock array (mb)
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInterpolateHorVerQuarter(
+  u8 *ref,
+  u8 *mb,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 partWidth,
+  u32 partHeight,
+  u32 horVerOffset) /* 0 for pixel e, 1 for pixel g,
+                       2 for pixel p, 3 for pixel r */
+{
+    u32 p1[21*21/4+1];
+    u8 *ptrC, *ptrJ, *ptrV;
+    u32 x, y;
+    i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    const u8 *clp = h264bsdClip + 512;
+
+    /* Code */
+
+    ASSERT(ref);
+    ASSERT(mb);
+
+    if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
+        (y0 < 0) || ((u32)y0+partHeight+5 > height))
+    {
+        h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
+                partWidth+5, partHeight+5, partWidth+5);
+
+        x0 = 0;
+        y0 = 0;
+        ref = (u8*)p1;
+        width = partWidth+5;
+    }
+
+    /* Ref points to G + (-2, -2) */
+    ref += (u32)y0 * width + (u32)x0;
+
+    /* ptrJ points to either J or Q, depending on vertical offset */
+    ptrJ = ref + (((horVerOffset & 0x2) >> 1) + 2) * width + 5;
+
+    /* ptrC points to either C or D, depending on horizontal offset */
+    ptrC = ref + width + 2 + (horVerOffset & 0x1);
+
+    for (y = partHeight; y; y--)
+    {
+        tmp6 = *(ptrJ - 5);
+        tmp5 = *(ptrJ - 4);
+        tmp4 = *(ptrJ - 3);
+        tmp3 = *(ptrJ - 2);
+        tmp2 = *(ptrJ - 1);
+
+        /* Horizontal interpolation, calculate 4 pels per iteration */
+        for (x = (partWidth >> 2); x; x--)
+        {
+            /* First pixel */
+            tmp6 += 16;
+            tmp7 = tmp3 + tmp4;
+            tmp6 += (tmp7 << 4);
+            tmp6 += (tmp7 << 2);
+            tmp7 = tmp2 + tmp5;
+            tmp1 = *ptrJ++;
+            tmp6 -= (tmp7 << 2);
+            tmp6 -= tmp7;
+            tmp6 += tmp1;
+            tmp6 = clp[tmp6>>5];
+            /* Second pixel */
+            tmp5 += 16;
+            tmp7 = tmp2 + tmp3;
+            *mb++ = (u8)tmp6;
+            tmp5 += (tmp7 << 4);
+            tmp5 += (tmp7 << 2);
+            tmp7 = tmp1 + tmp4;
+            tmp6 = *ptrJ++;
+            tmp5 -= (tmp7 << 2);
+            tmp5 -= tmp7;
+            tmp5 += tmp6;
+            tmp5 = clp[tmp5>>5];
+            /* Third pixel */
+            tmp4 += 16;
+            tmp7 = tmp1 + tmp2;
+            *mb++ = (u8)tmp5;
+            tmp4 += (tmp7 << 4);
+            tmp4 += (tmp7 << 2);
+            tmp7 = tmp6 + tmp3;
+            tmp5 = *ptrJ++;
+            tmp4 -= (tmp7 << 2);
+            tmp4 -= tmp7;
+            tmp4 += tmp5;
+            tmp4 = clp[tmp4>>5];
+            /* Fourth pixel */
+            tmp3 += 16;
+            tmp7 = tmp6 + tmp1;
+            *mb++ = (u8)tmp4;
+            tmp3 += (tmp7 << 4);
+            tmp3 += (tmp7 << 2);
+            tmp7 = tmp5 + tmp2;
+            tmp4 = *ptrJ++;
+            tmp3 -= (tmp7 << 2);
+            tmp3 -= tmp7;
+            tmp3 += tmp4;
+            tmp3 = clp[tmp3>>5];
+            tmp7 = tmp4;
+            tmp4 = tmp6;
+            tmp6 = tmp2;
+            tmp2 = tmp7;
+            *mb++ = (u8)tmp3;
+            tmp3 = tmp5;
+            tmp5 = tmp1;
+        }
+        ptrJ += width - partWidth;
+        mb += 16 - partWidth;
+    }
+
+    mb -= 16*partHeight;
+    ptrV = ptrC + 5*width;
+
+    for (y = (partHeight >> 2); y; y--)
+    {
+        /* Vertical interpolation and averaging, 4 pels per iteration */
+        for (x = partWidth; x; x--)
+        {
+            tmp4 = ptrV[-(i32)width*2];
+            tmp5 = ptrV[-(i32)width];
+            tmp1 = ptrV[width];
+            tmp2 = ptrV[width*2];
+            tmp6 = *ptrV++;
+
+            tmp7 = tmp4 + tmp1;
+            tmp2 -= (tmp7 << 2);
+            tmp2 -= tmp7;
+            tmp2 += 16;
+            tmp7 = tmp5 + tmp6;
+            tmp3 = ptrC[width*2];
+            tmp2 += (tmp7 << 4);
+            tmp2 += (tmp7 << 2);
+            tmp2 += tmp3;
+            tmp7 = clp[tmp2>>5];
+            tmp2 = mb[48];
+            tmp1 += 16;
+            tmp7++;
+            mb[48] = (u8)((tmp2 + tmp7) >> 1);
+
+            tmp7 = tmp3 + tmp6;
+            tmp1 -= (tmp7 << 2);
+            tmp1 -= tmp7;
+            tmp7 = tmp4 + tmp5;
+            tmp2 = ptrC[width];
+            tmp1 += (tmp7 << 4);
+            tmp1 += (tmp7 << 2);
+            tmp1 += tmp2;
+            tmp7 = clp[tmp1>>5];
+            tmp1 = mb[32];
+            tmp6 += 16;
+            tmp7++;
+            mb[32] = (u8)((tmp1 + tmp7) >> 1);
+
+            tmp1 = *ptrC;
+            tmp7 = tmp2 + tmp5;
+            tmp6 -= (tmp7 << 2);
+            tmp6 -= tmp7;
+            tmp7 = tmp4 + tmp3;
+            tmp6 += (tmp7 << 4);
+            tmp6 += (tmp7 << 2);
+            tmp6 += tmp1;
+            tmp7 = clp[tmp6>>5];
+            tmp6 = mb[16];
+            tmp5 += 16;
+            tmp7++;
+            mb[16] = (u8)((tmp6 + tmp7) >> 1);
+
+            tmp6 = ptrC[-(i32)width];
+            tmp1 += tmp4;
+            tmp5 -= (tmp1 << 2);
+            tmp5 -= tmp1;
+            tmp3 += tmp2;
+            tmp5 += (tmp3 << 4);
+            tmp5 += (tmp3 << 2);
+            tmp5 += tmp6;
+            tmp7 = clp[tmp5>>5];
+            tmp5 = *mb;
+            tmp7++;
+            *mb++ = (u8)((tmp5 + tmp7) >> 1);
+            ptrC++;
+
+        }
+        ptrC += 4*width - partWidth;
+        ptrV += 4*width - partWidth;
+        mb += 4*16 - partWidth;
+    }
+
+}
+#endif
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdInterpolateMidHalf
+
+        Functional description:
+          Function to perform horizontal and vertical interpolation of pixel
+          position 'j' for a block. Overfilling is done only if needed.
+          Reference image (ref) is read at correct position and the predicted
+          part is written to macroblock array (mb)
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInterpolateMidHalf(
+  u8 *ref,
+  u8 *mb,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 partWidth,
+  u32 partHeight)
+{
+    u32 p1[21*21/4+1];
+    u32 x, y;
+    i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    i32 *ptrC, *ptrV, *b1;
+    u8  *ptrJ;
+    i32 table[21*16];
+    const u8 *clp = h264bsdClip + 512;
+
+    /* Code */
+
+    ASSERT(ref);
+    ASSERT(mb);
+
+    if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
+        (y0 < 0) || ((u32)y0+partHeight+5 > height))
+    {
+        h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
+                partWidth+5, partHeight+5, partWidth+5);
+
+        x0 = 0;
+        y0 = 0;
+        ref = (u8*)p1;
+        width = partWidth+5;
+    }
+
+    ref += (u32)y0 * width + (u32)x0;
+
+    b1 = table;
+    ptrJ = ref + 5;
+
+    /* First step: calculate intermediate values for
+     * horizontal interpolation */
+    for (y = partHeight + 5; y; y--)
+    {
+        tmp6 = *(ptrJ - 5);
+        tmp5 = *(ptrJ - 4);
+        tmp4 = *(ptrJ - 3);
+        tmp3 = *(ptrJ - 2);
+        tmp2 = *(ptrJ - 1);
+
+        /* 4 pels per iteration */
+        for (x = (partWidth >> 2); x; x--)
+        {
+            /* First pixel */
+            tmp7 = tmp3 + tmp4;
+            tmp6 += (tmp7 << 4);
+            tmp6 += (tmp7 << 2);
+            tmp7 = tmp2 + tmp5;
+            tmp1 = *ptrJ++;
+            tmp6 -= (tmp7 << 2);
+            tmp6 -= tmp7;
+            tmp6 += tmp1;
+            *b1++ = tmp6;
+            /* Second pixel */
+            tmp7 = tmp2 + tmp3;
+            tmp5 += (tmp7 << 4);
+            tmp5 += (tmp7 << 2);
+            tmp7 = tmp1 + tmp4;
+            tmp6 = *ptrJ++;
+            tmp5 -= (tmp7 << 2);
+            tmp5 -= tmp7;
+            tmp5 += tmp6;
+            *b1++ = tmp5;
+            /* Third pixel */
+            tmp7 = tmp1 + tmp2;
+            tmp4 += (tmp7 << 4);
+            tmp4 += (tmp7 << 2);
+            tmp7 = tmp6 + tmp3;
+            tmp5 = *ptrJ++;
+            tmp4 -= (tmp7 << 2);
+            tmp4 -= tmp7;
+            tmp4 += tmp5;
+            *b1++ = tmp4;
+            /* Fourth pixel */
+            tmp7 = tmp6 + tmp1;
+            tmp3 += (tmp7 << 4);
+            tmp3 += (tmp7 << 2);
+            tmp7 = tmp5 + tmp2;
+            tmp4 = *ptrJ++;
+            tmp3 -= (tmp7 << 2);
+            tmp3 -= tmp7;
+            tmp3 += tmp4;
+            *b1++ = tmp3;
+            tmp7 = tmp4;
+            tmp4 = tmp6;
+            tmp6 = tmp2;
+            tmp2 = tmp7;
+            tmp3 = tmp5;
+            tmp5 = tmp1;
+        }
+        ptrJ += width - partWidth;
+    }
+
+    /* Second step: calculate vertical interpolation */
+    ptrC = table + partWidth;
+    ptrV = ptrC + 5*partWidth;
+    for (y = (partHeight >> 2); y; y--)
+    {
+        /* 4 pels per iteration */
+        for (x = partWidth; x; x--)
+        {
+            tmp4 = ptrV[-(i32)partWidth*2];
+            tmp5 = ptrV[-(i32)partWidth];
+            tmp1 = ptrV[partWidth];
+            tmp2 = ptrV[partWidth*2];
+            tmp6 = *ptrV++;
+
+            tmp7 = tmp4 + tmp1;
+            tmp2 -= (tmp7 << 2);
+            tmp2 -= tmp7;
+            tmp2 += 512;
+            tmp7 = tmp5 + tmp6;
+            tmp3 = ptrC[partWidth*2];
+            tmp2 += (tmp7 << 4);
+            tmp2 += (tmp7 << 2);
+            tmp2 += tmp3;
+            tmp7 = clp[tmp2>>10];
+            tmp1 += 512;
+            mb[48] = (u8)tmp7;
+
+            tmp7 = tmp3 + tmp6;
+            tmp1 -= (tmp7 << 2);
+            tmp1 -= tmp7;
+            tmp7 = tmp4 + tmp5;
+            tmp2 = ptrC[partWidth];
+            tmp1 += (tmp7 << 4);
+            tmp1 += (tmp7 << 2);
+            tmp1 += tmp2;
+            tmp7 = clp[tmp1>>10];
+            tmp6 += 512;
+            mb[32] = (u8)tmp7;
+
+            tmp1 = *ptrC;
+            tmp7 = tmp2 + tmp5;
+            tmp6 -= (tmp7 << 2);
+            tmp6 -= tmp7;
+            tmp7 = tmp4 + tmp3;
+            tmp6 += (tmp7 << 4);
+            tmp6 += (tmp7 << 2);
+            tmp6 += tmp1;
+            tmp7 = clp[tmp6>>10];
+            tmp5 += 512;
+            mb[16] = (u8)tmp7;
+
+            tmp6 = ptrC[-(i32)partWidth];
+            tmp1 += tmp4;
+            tmp5 -= (tmp1 << 2);
+            tmp5 -= tmp1;
+            tmp3 += tmp2;
+            tmp5 += (tmp3 << 4);
+            tmp5 += (tmp3 << 2);
+            tmp5 += tmp6;
+            tmp7 = clp[tmp5>>10];
+            *mb++ = (u8)tmp7;
+            ptrC++;
+        }
+        mb += 4*16 - partWidth;
+        ptrC += 3*partWidth;
+        ptrV += 3*partWidth;
+    }
+
+}
+
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdInterpolateMidVerQuarter
+
+        Functional description:
+          Function to perform horizontal and vertical interpolation of pixel
+          position 'f' or 'q' for a block. Overfilling is done only if needed.
+          Reference image (ref) is read at correct position and the predicted
+          part is written to macroblock array (mb)
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInterpolateMidVerQuarter(
+  u8 *ref,
+  u8 *mb,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 partWidth,
+  u32 partHeight,
+  u32 verOffset)    /* 0 for pixel f, 1 for pixel q */
+{
+    u32 p1[21*21/4+1];
+    u32 x, y;
+    i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    i32 *ptrC, *ptrV, *ptrInt, *b1;
+    u8  *ptrJ;
+    i32 table[21*16];
+    const u8 *clp = h264bsdClip + 512;
+
+    /* Code */
+
+    ASSERT(ref);
+    ASSERT(mb);
+
+    if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
+        (y0 < 0) || ((u32)y0+partHeight+5 > height))
+    {
+        h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
+                partWidth+5, partHeight+5, partWidth+5);
+
+        x0 = 0;
+        y0 = 0;
+        ref = (u8*)p1;
+        width = partWidth+5;
+    }
+
+    ref += (u32)y0 * width + (u32)x0;
+
+    b1 = table;
+    ptrJ = ref + 5;
+
+    /* First step: calculate intermediate values for
+     * horizontal interpolation */
+    for (y = partHeight + 5; y; y--)
+    {
+        tmp6 = *(ptrJ - 5);
+        tmp5 = *(ptrJ - 4);
+        tmp4 = *(ptrJ - 3);
+        tmp3 = *(ptrJ - 2);
+        tmp2 = *(ptrJ - 1);
+        for (x = (partWidth >> 2); x; x--)
+        {
+            /* First pixel */
+            tmp7 = tmp3 + tmp4;
+            tmp6 += (tmp7 << 4);
+            tmp6 += (tmp7 << 2);
+            tmp7 = tmp2 + tmp5;
+            tmp1 = *ptrJ++;
+            tmp6 -= (tmp7 << 2);
+            tmp6 -= tmp7;
+            tmp6 += tmp1;
+            *b1++ = tmp6;
+            /* Second pixel */
+            tmp7 = tmp2 + tmp3;
+            tmp5 += (tmp7 << 4);
+            tmp5 += (tmp7 << 2);
+            tmp7 = tmp1 + tmp4;
+            tmp6 = *ptrJ++;
+            tmp5 -= (tmp7 << 2);
+            tmp5 -= tmp7;
+            tmp5 += tmp6;
+            *b1++ = tmp5;
+            /* Third pixel */
+            tmp7 = tmp1 + tmp2;
+            tmp4 += (tmp7 << 4);
+            tmp4 += (tmp7 << 2);
+            tmp7 = tmp6 + tmp3;
+            tmp5 = *ptrJ++;
+            tmp4 -= (tmp7 << 2);
+            tmp4 -= tmp7;
+            tmp4 += tmp5;
+            *b1++ = tmp4;
+            /* Fourth pixel */
+            tmp7 = tmp6 + tmp1;
+            tmp3 += (tmp7 << 4);
+            tmp3 += (tmp7 << 2);
+            tmp7 = tmp5 + tmp2;
+            tmp4 = *ptrJ++;
+            tmp3 -= (tmp7 << 2);
+            tmp3 -= tmp7;
+            tmp3 += tmp4;
+            *b1++ = tmp3;
+            tmp7 = tmp4;
+            tmp4 = tmp6;
+            tmp6 = tmp2;
+            tmp2 = tmp7;
+            tmp3 = tmp5;
+            tmp5 = tmp1;
+        }
+        ptrJ += width - partWidth;
+    }
+
+    /* Second step: calculate vertical interpolation and average */
+    ptrC = table + partWidth;
+    ptrV = ptrC + 5*partWidth;
+    /* Pointer to integer sample position, either M or R */
+    ptrInt = ptrC + (2+verOffset)*partWidth;
+    for (y = (partHeight >> 2); y; y--)
+    {
+        for (x = partWidth; x; x--)
+        {
+            tmp4 = ptrV[-(i32)partWidth*2];
+            tmp5 = ptrV[-(i32)partWidth];
+            tmp1 = ptrV[partWidth];
+            tmp2 = ptrV[partWidth*2];
+            tmp6 = *ptrV++;
+
+            tmp7 = tmp4 + tmp1;
+            tmp2 -= (tmp7 << 2);
+            tmp2 -= tmp7;
+            tmp2 += 512;
+            tmp7 = tmp5 + tmp6;
+            tmp3 = ptrC[partWidth*2];
+            tmp2 += (tmp7 << 4);
+            tmp2 += (tmp7 << 2);
+            tmp7 = ptrInt[partWidth*2];
+            tmp2 += tmp3;
+            tmp2 = clp[tmp2>>10];
+            tmp7 += 16;
+            tmp7 = clp[tmp7>>5];
+            tmp1 += 512;
+            tmp2++;
+            mb[48] = (u8)((tmp7 + tmp2) >> 1);
+
+            tmp7 = tmp3 + tmp6;
+            tmp1 -= (tmp7 << 2);
+            tmp1 -= tmp7;
+            tmp7 = tmp4 + tmp5;
+            tmp2 = ptrC[partWidth];
+            tmp1 += (tmp7 << 4);
+            tmp1 += (tmp7 << 2);
+            tmp7 = ptrInt[partWidth];
+            tmp1 += tmp2;
+            tmp1 = clp[tmp1>>10];
+            tmp7 += 16;
+            tmp7 = clp[tmp7>>5];
+            tmp6 += 512;
+            tmp1++;
+            mb[32] = (u8)((tmp7 + tmp1) >> 1);
+
+            tmp1 = *ptrC;
+            tmp7 = tmp2 + tmp5;
+            tmp6 -= (tmp7 << 2);
+            tmp6 -= tmp7;
+            tmp7 = tmp4 + tmp3;
+            tmp6 += (tmp7 << 4);
+            tmp6 += (tmp7 << 2);
+            tmp7 = *ptrInt;
+            tmp6 += tmp1;
+            tmp6 = clp[tmp6>>10];
+            tmp7 += 16;
+            tmp7 = clp[tmp7>>5];
+            tmp5 += 512;
+            tmp6++;
+            mb[16] = (u8)((tmp7 + tmp6) >> 1);
+
+            tmp6 = ptrC[-(i32)partWidth];
+            tmp1 += tmp4;
+            tmp5 -= (tmp1 << 2);
+            tmp5 -= tmp1;
+            tmp3 += tmp2;
+            tmp5 += (tmp3 << 4);
+            tmp5 += (tmp3 << 2);
+            tmp7 = ptrInt[-(i32)partWidth];
+            tmp5 += tmp6;
+            tmp5 = clp[tmp5>>10];
+            tmp7 += 16;
+            tmp7 = clp[tmp7>>5];
+            tmp5++;
+            *mb++ = (u8)((tmp7 + tmp5) >> 1);
+            ptrC++;
+            ptrInt++;
+        }
+        mb += 4*16 - partWidth;
+        ptrC += 3*partWidth;
+        ptrV += 3*partWidth;
+        ptrInt += 3*partWidth;
+    }
+
+}
+
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdInterpolateMidHorQuarter
+
+        Functional description:
+          Function to perform horizontal and vertical interpolation of pixel
+          position 'i' or 'k' for a block. Overfilling is done only if needed.
+          Reference image (ref) is read at correct position and the predicted
+          part is written to macroblock array (mb)
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInterpolateMidHorQuarter(
+  u8 *ref,
+  u8 *mb,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 partWidth,
+  u32 partHeight,
+  u32 horOffset)    /* 0 for pixel i, 1 for pixel k */
+{
+    u32 p1[21*21/4+1];
+    u32 x, y;
+    i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+    i32 *ptrJ, *ptrInt, *h1;
+    u8  *ptrC, *ptrV;
+    i32 table[21*16];
+    i32 tableWidth = (i32)partWidth+5;
+    const u8 *clp = h264bsdClip + 512;
+
+    /* Code */
+
+    ASSERT(ref);
+    ASSERT(mb);
+
+    if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
+        (y0 < 0) || ((u32)y0+partHeight+5 > height))
+    {
+        h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
+                partWidth+5, partHeight+5, partWidth+5);
+
+        x0 = 0;
+        y0 = 0;
+        ref = (u8*)p1;
+        width = partWidth+5;
+    }
+
+    ref += (u32)y0 * width + (u32)x0;
+
+    h1 = table + tableWidth;
+    ptrC = ref + width;
+    ptrV = ptrC + 5*width;
+
+    /* First step: calculate intermediate values for
+     * vertical interpolation */
+    for (y = (partHeight >> 2); y; y--)
+    {
+        for (x = (u32)tableWidth; x; x--)
+        {
+            tmp4 = ptrV[-(i32)width*2];
+            tmp5 = ptrV[-(i32)width];
+            tmp1 = ptrV[width];
+            tmp2 = ptrV[width*2];
+            tmp6 = *ptrV++;
+
+            tmp7 = tmp4 + tmp1;
+            tmp2 -= (tmp7 << 2);
+            tmp2 -= tmp7;
+            tmp7 = tmp5 + tmp6;
+            tmp3 = ptrC[width*2];
+            tmp2 += (tmp7 << 4);
+            tmp2 += (tmp7 << 2);
+            tmp2 += tmp3;
+            h1[tableWidth*2] = tmp2;
+
+            tmp7 = tmp3 + tmp6;
+            tmp1 -= (tmp7 << 2);
+            tmp1 -= tmp7;
+            tmp7 = tmp4 + tmp5;
+            tmp2 = ptrC[width];
+            tmp1 += (tmp7 << 4);
+            tmp1 += (tmp7 << 2);
+            tmp1 += tmp2;
+            h1[tableWidth] = tmp1;
+
+            tmp1 = *ptrC;
+            tmp7 = tmp2 + tmp5;
+            tmp6 -= (tmp7 << 2);
+            tmp6 -= tmp7;
+            tmp7 = tmp4 + tmp3;
+            tmp6 += (tmp7 << 4);
+            tmp6 += (tmp7 << 2);
+            tmp6 += tmp1;
+            *h1 = tmp6;
+
+            tmp6 = ptrC[-(i32)width];
+            tmp1 += tmp4;
+            tmp5 -= (tmp1 << 2);
+            tmp5 -= tmp1;
+            tmp3 += tmp2;
+            tmp5 += (tmp3 << 4);
+            tmp5 += (tmp3 << 2);
+            tmp5 += tmp6;
+            h1[-tableWidth] = tmp5;
+            h1++;
+            ptrC++;
+        }
+        ptrC += 4*width - partWidth - 5;
+        ptrV += 4*width - partWidth - 5;
+        h1 += 3*tableWidth;
+    }
+
+    /* Second step: calculate horizontal interpolation and average */
+    ptrJ = table + 5;
+    /* Pointer to integer sample position, either G or H */
+    ptrInt = table + 2 + horOffset;
+    for (y = partHeight; y; y--)
+    {
+        tmp6 = *(ptrJ - 5);
+        tmp5 = *(ptrJ - 4);
+        tmp4 = *(ptrJ - 3);
+        tmp3 = *(ptrJ - 2);
+        tmp2 = *(ptrJ - 1);
+        for (x = (partWidth>>2); x; x--)
+        {
+            /* First pixel */
+            tmp6 += 512;
+            tmp7 = tmp3 + tmp4;
+            tmp6 += (tmp7 << 4);
+            tmp6 += (tmp7 << 2);
+            tmp7 = tmp2 + tmp5;
+            tmp1 = *ptrJ++;
+            tmp6 -= (tmp7 << 2);
+            tmp6 -= tmp7;
+            tmp7 = *ptrInt++;
+            tmp6 += tmp1;
+            tmp6 = clp[tmp6 >> 10];
+            tmp7 += 16;
+            tmp7 = clp[tmp7 >> 5];
+            tmp5 += 512;
+            tmp6++;
+            *mb++ = (u8)((tmp6 + tmp7) >> 1);
+            /* Second pixel */
+            tmp7 = tmp2 + tmp3;
+            tmp5 += (tmp7 << 4);
+            tmp5 += (tmp7 << 2);
+            tmp7 = tmp1 + tmp4;
+            tmp6 = *ptrJ++;
+            tmp5 -= (tmp7 << 2);
+            tmp5 -= tmp7;
+            tmp7 = *ptrInt++;
+            tmp5 += tmp6;
+            tmp5 = clp[tmp5 >> 10];
+            tmp7 += 16;
+            tmp7 = clp[tmp7 >> 5];
+            tmp4 += 512;
+            tmp5++;
+            *mb++ = (u8)((tmp5 + tmp7) >> 1);
+            /* Third pixel */
+            tmp7 = tmp1 + tmp2;
+            tmp4 += (tmp7 << 4);
+            tmp4 += (tmp7 << 2);
+            tmp7 = tmp6 + tmp3;
+            tmp5 = *ptrJ++;
+            tmp4 -= (tmp7 << 2);
+            tmp4 -= tmp7;
+            tmp7 = *ptrInt++;
+            tmp4 += tmp5;
+            tmp4 = clp[tmp4 >> 10];
+            tmp7 += 16;
+            tmp7 = clp[tmp7 >> 5];
+            tmp3 += 512;
+            tmp4++;
+            *mb++ = (u8)((tmp4 + tmp7) >> 1);
+            /* Fourth pixel */
+            tmp7 = tmp6 + tmp1;
+            tmp3 += (tmp7 << 4);
+            tmp3 += (tmp7 << 2);
+            tmp7 = tmp5 + tmp2;
+            tmp4 = *ptrJ++;
+            tmp3 -= (tmp7 << 2);
+            tmp3 -= tmp7;
+            tmp7 = *ptrInt++;
+            tmp3 += tmp4;
+            tmp3 = clp[tmp3 >> 10];
+            tmp7 += 16;
+            tmp7 = clp[tmp7 >> 5];
+            tmp3++;
+            *mb++ = (u8)((tmp3 + tmp7) >> 1);
+            tmp3 = tmp5;
+            tmp5 = tmp1;
+            tmp7 = tmp4;
+            tmp4 = tmp6;
+            tmp6 = tmp2;
+            tmp2 = tmp7;
+        }
+        ptrJ += 5;
+        ptrInt += 5;
+        mb += 16 - partWidth;
+    }
+
+}
+
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdPredictSamples
+
+        Functional description:
+          This function reconstructs a prediction for a macroblock partition.
+          The prediction is either copied or interpolated using the reference
+          frame and the motion vector. Both luminance and chrominance parts are
+          predicted. The prediction is stored in given macroblock array (data).
+        Inputs:
+          data          pointer to macroblock array (384 bytes) for output
+          mv            pointer to motion vector used for prediction
+          refPic        pointer to reference picture structure
+          xA            x-coordinate for current macroblock
+          yA            y-coordinate for current macroblock
+          partX         x-offset for partition in macroblock
+          partY         y-offset for partition in macroblock
+          partWidth     width of partition
+          partHeight    height of partition
+        Outputs:
+          data          macroblock array (16x16+8x8+8x8) where predicted
+                        partition is stored at correct position
+
+------------------------------------------------------------------------------*/
+
+void h264bsdPredictSamples(
+  u8 *data,
+  mv_t *mv,
+  image_t *refPic,
+  u32 xA,
+  u32 yA,
+  u32 partX,
+  u32 partY,
+  u32 partWidth,
+  u32 partHeight)
+
+{
+
+/* Variables */
+
+    u32 xFrac, yFrac, width, height;
+    i32 xInt, yInt;
+    u8 *lumaPartData;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(mv);
+    ASSERT(partWidth);
+    ASSERT(partHeight);
+    ASSERT(refPic);
+    ASSERT(refPic->data);
+    ASSERT(refPic->width);
+    ASSERT(refPic->height);
+
+    /* luma */
+    lumaPartData = data + 16*partY + partX;
+
+    xFrac = mv->hor & 0x3;
+    yFrac = mv->ver & 0x3;
+
+    width = 16 * refPic->width;
+    height = 16 * refPic->height;
+
+    xInt = (i32)xA + (i32)partX + (mv->hor >> 2);
+    yInt = (i32)yA + (i32)partY + (mv->ver >> 2);
+
+    ASSERT(lumaFracPos[xFrac][yFrac] < 16);
+
+    switch (lumaFracPos[xFrac][yFrac])
+    {
+        case 0: /* G */
+            h264bsdFillBlock(refPic->data, lumaPartData,
+                    xInt,yInt,width,height,partWidth,partHeight,16);
+            break;
+        case 1: /* d */
+            h264bsdInterpolateVerQuarter(refPic->data, lumaPartData,
+                    xInt, yInt-2, width, height, partWidth, partHeight, 0);
+            break;
+        case 2: /* h */
+            h264bsdInterpolateVerHalf(refPic->data, lumaPartData,
+                    xInt, yInt-2, width, height, partWidth, partHeight);
+            break;
+        case 3: /* n */
+            h264bsdInterpolateVerQuarter(refPic->data, lumaPartData,
+                    xInt, yInt-2, width, height, partWidth, partHeight, 1);
+            break;
+        case 4: /* a */
+            h264bsdInterpolateHorQuarter(refPic->data, lumaPartData,
+                    xInt-2, yInt, width, height, partWidth, partHeight, 0);
+            break;
+        case 5: /* e */
+            h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData,
+                    xInt-2, yInt-2, width, height, partWidth, partHeight, 0);
+            break;
+        case 6: /* i */
+            h264bsdInterpolateMidHorQuarter(refPic->data, lumaPartData,
+                    xInt-2, yInt-2, width, height, partWidth, partHeight, 0);
+            break;
+        case 7: /* p */
+            h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData,
+                    xInt-2, yInt-2, width, height, partWidth, partHeight, 2);
+            break;
+        case 8: /* b */
+            h264bsdInterpolateHorHalf(refPic->data, lumaPartData,
+                    xInt-2, yInt, width, height, partWidth, partHeight);
+            break;
+        case 9: /* f */
+            h264bsdInterpolateMidVerQuarter(refPic->data, lumaPartData,
+                    xInt-2, yInt-2, width, height, partWidth, partHeight, 0);
+            break;
+        case 10: /* j */
+            h264bsdInterpolateMidHalf(refPic->data, lumaPartData,
+                    xInt-2, yInt-2, width, height, partWidth, partHeight);
+            break;
+        case 11: /* q */
+            h264bsdInterpolateMidVerQuarter(refPic->data, lumaPartData,
+                    xInt-2, yInt-2, width, height, partWidth, partHeight, 1);
+            break;
+        case 12: /* c */
+            h264bsdInterpolateHorQuarter(refPic->data, lumaPartData,
+                    xInt-2, yInt, width, height, partWidth, partHeight, 1);
+            break;
+        case 13: /* g */
+            h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData,
+                    xInt-2, yInt-2, width, height, partWidth, partHeight, 1);
+            break;
+        case 14: /* k */
+            h264bsdInterpolateMidHorQuarter(refPic->data, lumaPartData,
+                    xInt-2, yInt-2, width, height, partWidth, partHeight, 1);
+            break;
+        default: /* case 15, r */
+            h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData,
+                    xInt-2, yInt-2, width, height, partWidth, partHeight, 3);
+            break;
+    }
+
+    /* chroma */
+    PredictChroma(
+      data + 16*16 + (partY>>1)*8 + (partX>>1),
+      xA + partX,
+      yA + partY,
+      partWidth,
+      partHeight,
+      mv,
+      refPic);
+
+}
+
+#else /* H264DEC_OMXDL */
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdPredictSamples
+
+        Functional description:
+          This function reconstructs a prediction for a macroblock partition.
+          The prediction is either copied or interpolated using the reference
+          frame and the motion vector. Both luminance and chrominance parts are
+          predicted. The prediction is stored in given macroblock array (data).
+        Inputs:
+          data          pointer to macroblock array (384 bytes) for output
+          mv            pointer to motion vector used for prediction
+          refPic        pointer to reference picture structure
+          xA            x-coordinate for current macroblock
+          yA            y-coordinate for current macroblock
+          partX         x-offset for partition in macroblock
+          partY         y-offset for partition in macroblock
+          partWidth     width of partition
+          partHeight    height of partition
+        Outputs:
+          data          macroblock array (16x16+8x8+8x8) where predicted
+                        partition is stored at correct position
+
+------------------------------------------------------------------------------*/
+
+/*lint -e{550} Symbol 'res' not accessed */
+void h264bsdPredictSamples(
+  u8 *data,
+  mv_t *mv,
+  image_t *refPic,
+  u32 colAndRow,
+  u32 part,
+  u8 *pFill)
+
+{
+
+/* Variables */
+
+    u32 xFrac, yFrac;
+    u32 width, height;
+    i32 xInt, yInt, x0, y0;
+    u8 *partData, *ref;
+    OMXSize roi;
+    u32 fillWidth;
+    u32 fillHeight;
+    OMXResult res;
+    u32 xA, yA;
+    u32 partX, partY;
+    u32 partWidth, partHeight;
+
+/* Code */
+
+    ASSERT(data);
+    ASSERT(mv);
+    ASSERT(refPic);
+    ASSERT(refPic->data);
+    ASSERT(refPic->width);
+    ASSERT(refPic->height);
+
+    xA = (colAndRow & 0xFFFF0000) >> 16;
+    yA = (colAndRow & 0x0000FFFF);
+
+    partX = (part & 0xFF000000) >> 24;
+    partY = (part & 0x00FF0000) >> 16;
+    partWidth = (part & 0x0000FF00) >> 8;
+    partHeight = (part & 0x000000FF);
+
+    ASSERT(partWidth);
+    ASSERT(partHeight);
+
+    /* luma */
+    partData = data + 16*partY + partX;
+
+    xFrac = mv->hor & 0x3;
+    yFrac = mv->ver & 0x3;
+
+    width = 16 * refPic->width;
+    height = 16 * refPic->height;
+
+    xInt = (i32)xA + (i32)partX + (mv->hor >> 2);
+    yInt = (i32)yA + (i32)partY + (mv->ver >> 2);
+
+    x0 = (xFrac) ? xInt-2 : xInt;
+    y0 = (yFrac) ? yInt-2 : yInt;
+
+    if (xFrac)
+    {
+        if (partWidth == 16)
+            fillWidth = 32;
+        else
+            fillWidth = 16;
+    }
+    else
+        fillWidth = (partWidth*2);
+    if (yFrac)
+        fillHeight = partHeight+5;
+    else
+        fillHeight = partHeight;
+
+
+    if ((x0 < 0) || ((u32)x0+fillWidth > width) ||
+        (y0 < 0) || ((u32)y0+fillHeight > height))
+    {
+        h264bsdFillBlock(refPic->data, (u8*)pFill, x0, y0, width, height,
+                fillWidth, fillHeight, fillWidth);
+
+        x0 = 0;
+        y0 = 0;
+        ref = pFill;
+        width = fillWidth;
+        if (yFrac)
+            ref += 2*width;
+        if (xFrac)
+            ref += 2;
+    }
+    else
+    {
+        /*lint --e(737) Loss of sign */
+        ref = refPic->data + yInt*width + xInt;
+    }
+    /* Luma interpolation */
+    roi.width = (i32)partWidth;
+    roi.height = (i32)partHeight;
+
+    res = omxVCM4P10_InterpolateLuma(ref, (i32)width, partData, 16,
+                                        (i32)xFrac, (i32)yFrac, roi);
+    ASSERT(res == 0);
+
+    /* Chroma */
+    width  = 8 * refPic->width;
+    height = 8 * refPic->height;
+
+    x0 = ((xA + partX) >> 1) + (mv->hor >> 3);
+    y0 = ((yA + partY) >> 1) + (mv->ver >> 3);
+    xFrac = mv->hor & 0x7;
+    yFrac = mv->ver & 0x7;
+
+    ref = refPic->data + 256 * refPic->width * refPic->height;
+
+    roi.width = (i32)(partWidth >> 1);
+    fillWidth = ((partWidth >> 1) + 8) & ~0x7;
+    roi.height = (i32)(partHeight >> 1);
+    fillHeight = (partHeight >> 1) + 1;
+
+    if ((x0 < 0) || ((u32)x0+fillWidth > width) ||
+        (y0 < 0) || ((u32)y0+fillHeight > height))
+    {
+        h264bsdFillBlock(ref, pFill, x0, y0, width, height,
+            fillWidth, fillHeight, fillWidth);
+        ref += width * height;
+        h264bsdFillBlock(ref, pFill + fillWidth*fillHeight,
+            x0, y0, width, height, fillWidth,
+            fillHeight, fillWidth);
+
+        ref = pFill;
+        x0 = 0;
+        y0 = 0;
+        width = fillWidth;
+        height = fillHeight;
+    }
+
+    partData = data + 16*16 + (partY>>1)*8 + (partX>>1);
+
+    /* Chroma interpolation */
+    /*lint --e(737) Loss of sign */
+    ref += y0 * width + x0;
+    res = armVCM4P10_Interpolate_Chroma(ref, width, partData, 8,
+                            (u32)roi.width, (u32)roi.height, xFrac, yFrac);
+    ASSERT(res == 0);
+    partData += 8 * 8;
+    ref += height * width;
+    res = armVCM4P10_Interpolate_Chroma(ref, width, partData, 8,
+                            (u32)roi.width, (u32)roi.height, xFrac, yFrac);
+    ASSERT(res == 0);
+
+}
+
+#endif /* H264DEC_OMXDL */
+
+
+/*------------------------------------------------------------------------------
+
+    Function: FillRow1
+
+        Functional description:
+          This function gets a row of reference pels in a 'normal' case when no
+          overfilling is necessary.
+
+------------------------------------------------------------------------------*/
+
+static void FillRow1(
+  u8 *ref,
+  u8 *fill,
+  i32 left,
+  i32 center,
+  i32 right)
+{
+
+    ASSERT(ref);
+    ASSERT(fill);
+
+    H264SwDecMemcpy(fill, ref, (u32)center);
+
+    /*lint -e(715) */
+}
+
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdFillRow7
+
+        Functional description:
+          This function gets a row of reference pels when horizontal coordinate
+          is partly negative or partly greater than reference picture width
+          (overfilling some pels on left and/or right edge).
+        Inputs:
+          ref       pointer to reference samples
+          left      amount of pixels to overfill on left-edge
+          center    amount of pixels to copy
+          right     amount of pixels to overfill on right-edge
+        Outputs:
+          fill      pointer where samples are stored
+
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_NEON
+void h264bsdFillRow7(
+  u8 *ref,
+  u8 *fill,
+  i32 left,
+  i32 center,
+  i32 right)
+{
+    u8 tmp;
+
+    ASSERT(ref);
+    ASSERT(fill);
+
+    if (left)
+        tmp = *ref;
+
+    for ( ; left; left--)
+        /*lint -esym(644,tmp)  tmp is initialized if used */
+        *fill++ = tmp;
+
+    for ( ; center; center--)
+        *fill++ = *ref++;
+
+    if (right)
+        tmp = ref[-1];
+
+    for ( ; right; right--)
+        /*lint -esym(644,tmp)  tmp is initialized if used */
+        *fill++ = tmp;
+}
+#endif
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdFillBlock
+
+        Functional description:
+          This function gets a block of reference pels. It determines whether
+          overfilling is needed or not and repeatedly calls an appropriate
+          function (by using a function pointer) that fills one row the block.
+        Inputs:
+          ref               pointer to reference frame
+          x0                x-coordinate for block
+          y0                y-coordinate for block
+          width             width of reference frame
+          height            height of reference frame
+          blockWidth        width of block
+          blockHeight       height of block
+          fillScanLength    length of a line in output array (pixels)
+        Outputs:
+          fill              pointer to array where output block is written
+
+------------------------------------------------------------------------------*/
+
+void h264bsdFillBlock(
+  u8 *ref,
+  u8 *fill,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 blockWidth,
+  u32 blockHeight,
+  u32 fillScanLength)
+
+{
+
+/* Variables */
+
+    i32 xstop, ystop;
+    void (*fp)(u8*, u8*, i32, i32, i32);
+    i32 left, x, right;
+    i32 top, y, bottom;
+
+/* Code */
+
+    ASSERT(ref);
+    ASSERT(fill);
+    ASSERT(width);
+    ASSERT(height);
+    ASSERT(fill);
+    ASSERT(blockWidth);
+    ASSERT(blockHeight);
+
+    xstop = x0 + (i32)blockWidth;
+    ystop = y0 + (i32)blockHeight;
+
+    /* Choose correct function whether overfilling on left-edge or right-edge
+     * is needed or not */
+    if (x0 >= 0 && xstop <= (i32)width)
+        fp = FillRow1;
+    else
+        fp = h264bsdFillRow7;
+
+    if (ystop < 0)
+        y0 = -(i32)blockHeight;
+
+    if (xstop < 0)
+        x0 = -(i32)blockWidth;
+
+    if (y0 > (i32)height)
+        y0 = (i32)height;
+
+    if (x0 > (i32)width)
+        x0 = (i32)width;
+
+    xstop = x0 + (i32)blockWidth;
+    ystop = y0 + (i32)blockHeight;
+
+    if (x0 > 0)
+        ref += x0;
+
+    if (y0 > 0)
+        ref += y0 * (i32)width;
+
+    left = x0 < 0 ? -x0 : 0;
+    right = xstop > (i32)width ? xstop - (i32)width : 0;
+    x = (i32)blockWidth - left - right;
+
+    top = y0 < 0 ? -y0 : 0;
+    bottom = ystop > (i32)height ? ystop - (i32)height : 0;
+    y = (i32)blockHeight - top - bottom;
+
+    /* Top-overfilling */
+    for ( ; top; top-- )
+    {
+        (*fp)(ref, fill, left, x, right);
+        fill += fillScanLength;
+    }
+
+    /* Lines inside reference image */
+    for ( ; y; y-- )
+    {
+        (*fp)(ref, fill, left, x, right);
+        ref += width;
+        fill += fillScanLength;
+    }
+
+    ref -= width;
+
+    /* Bottom-overfilling */
+    for ( ; bottom; bottom-- )
+    {
+        (*fp)(ref, fill, left, x, right);
+        fill += fillScanLength;
+    }
+}
+
+/*lint +e701 +e702 */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.h
new file mode 100755
index 0000000..5a1a140
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.h
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_RECONSTRUCT_H
+#define H264SWDEC_RECONSTRUCT_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_macroblock_layer.h"
+#include "h264bsd_image.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_OMXDL
+void h264bsdPredictSamples(
+  u8 *data,
+  mv_t *mv,
+  image_t *refPic,
+  u32 xA,
+  u32 yA,
+  u32 partX,
+  u32 partY,
+  u32 partWidth,
+  u32 partHeight);
+#else
+void h264bsdPredictSamples(
+  u8 *data,
+  mv_t *mv,
+  image_t *refPic,
+  u32 colAndRow,/* packaged data | column    | row                |*/
+  u32 part,     /* packaged data |partX|partY|partWidth|partHeight|*/
+  u8 *pFill);
+#endif
+
+void h264bsdFillBlock(
+  u8 * ref,
+  u8 * fill,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 blockWidth,
+  u32 blockHeight,
+  u32 fillScanLength);
+
+void h264bsdInterpolateChromaHor(
+  u8 *pRef,
+  u8 *predPartChroma,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 xFrac,
+  u32 chromaPartWidth,
+  u32 chromaPartHeight);
+
+void h264bsdInterpolateChromaVer(
+  u8 *pRef,
+  u8 *predPartChroma,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 yFrac,
+  u32 chromaPartWidth,
+  u32 chromaPartHeight);
+
+void h264bsdInterpolateChromaHorVer(
+  u8 *ref,
+  u8 *predPartChroma,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 xFrac,
+  u32 yFrac,
+  u32 chromaPartWidth,
+  u32 chromaPartHeight);
+
+void h264bsdInterpolateVerHalf(
+  u8 *ref,
+  u8 *mb,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 partWidth,
+  u32 partHeight);
+
+void h264bsdInterpolateVerQuarter(
+  u8 *ref,
+  u8 *mb,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 partWidth,
+  u32 partHeight,
+  u32 verOffset);
+
+void h264bsdInterpolateHorHalf(
+  u8 *ref,
+  u8 *mb,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 partWidth,
+  u32 partHeight);
+
+void h264bsdInterpolateHorQuarter(
+  u8 *ref,
+  u8 *mb,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 partWidth,
+  u32 partHeight,
+  u32 horOffset);
+
+void h264bsdInterpolateHorVerQuarter(
+  u8 *ref,
+  u8 *mb,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 partWidth,
+  u32 partHeight,
+  u32 horVerOffset);
+
+void h264bsdInterpolateMidHalf(
+  u8 *ref,
+  u8 *mb,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 partWidth,
+  u32 partHeight);
+
+void h264bsdInterpolateMidVerQuarter(
+  u8 *ref,
+  u8 *mb,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 partWidth,
+  u32 partHeight,
+  u32 verOffset);
+
+void h264bsdInterpolateMidHorQuarter(
+  u8 *ref,
+  u8 *mb,
+  i32 x0,
+  i32 y0,
+  u32 width,
+  u32 height,
+  u32 partWidth,
+  u32 partHeight,
+  u32 horOffset);
+
+
+void h264bsdFillRow7(
+  u8 *ref,
+  u8 *fill,
+  i32 left,
+  i32 center,
+  i32 right);
+
+#endif /* #ifdef H264SWDEC_RECONSTRUCT_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.c
new file mode 100755
index 0000000..0756c47
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.c
@@ -0,0 +1,1692 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdDecodeSeiMessage
+          DecodeBufferingPeriod
+          DecodePictureTiming
+          DecodePanScanRectangle
+          DecodeFillerPayload
+          DecodeUserDataRegisteredITuTT35
+          DecodeUserDataUnregistered
+          DecodeRecoveryPoint
+          DecodeDecRefPicMarkingRepetition
+          DecodeSparePic
+          DecodeSceneInfo
+          DecodeSubSeqInfo
+          DecodeSubSeqLayerCharacteristics
+          DecodeSubSeqCharacteristics
+          DecodeFullFrameFreeze
+          DecodeFullFrameSnapshot
+          DecodeProgressiveRefinementSegmentStart
+          DecodeProgressiveRefinementSegmentEnd
+          DecodeMotionConstrainedSliceGroupSet
+          DecodeReservedSeiMessage
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_sei.h"
+#include "basetype.h"
+#include "h264bsd_util.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_vlc.h"
+#include "h264bsd_seq_param_set.h"
+#include "h264bsd_slice_header.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+static const u32 numClockTS[9] = {1,1,1,2,2,3,3,2,3};
+static const u32 ceilLog2NumSliceGroups[9] = {0,1,1,2,2,3,3,3,3};
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 DecodeBufferingPeriod(
+  strmData_t *pStrmData,
+  seiBufferingPeriod_t *pBufferingPeriod,
+  u32 cpbCnt,
+  u32 initialCpbRemovalDelayLength,
+  u32 nalHrdBpPresentFlag,
+  u32 vclHrdBpPresentFlag);
+
+static u32 DecodePictureTiming(
+  strmData_t *pStrmData,
+  seiPicTiming_t *pPicTiming,
+  u32 cpbRemovalDelayLength,
+  u32 dpbOutputDelayLength,
+  u32 timeOffsetLength,
+  u32 cpbDpbDelaysPresentFlag,
+  u32 picStructPresentFlag);
+
+static u32 DecodePanScanRectangle(
+  strmData_t *pStrmData,
+  seiPanScanRect_t *pPanScanRectangle);
+
+static u32 DecodeFillerPayload(strmData_t *pStrmData, u32 payloadSize);
+
+static u32 DecodeUserDataRegisteredITuTT35(
+  strmData_t *pStrmData,
+  seiUserDataRegisteredItuTT35_t *pUserDataRegisteredItuTT35,
+  u32 payloadSize);
+
+static u32 DecodeUserDataUnregistered(
+  strmData_t *pStrmData,
+  seiUserDataUnregistered_t *pUserDataUnregistered,
+  u32 payloadSize);
+
+static u32 DecodeRecoveryPoint(
+  strmData_t *pStrmData,
+  seiRecoveryPoint_t *pRecoveryPoint);
+
+static u32 DecodeDecRefPicMarkingRepetition(
+  strmData_t *pStrmData,
+  seiDecRefPicMarkingRepetition_t *pDecRefPicMarkingRepetition,
+  u32 numRefFrames);
+
+static u32 DecodeSparePic(
+  strmData_t *pStrmData,
+  seiSparePic_t *pSparePic,
+  u32 picSizeInMapUnits);
+
+static u32 DecodeSceneInfo(
+  strmData_t *pStrmData,
+  seiSceneInfo_t *pSceneInfo);
+
+static u32 DecodeSubSeqInfo(
+  strmData_t *pStrmData,
+  seiSubSeqInfo_t *pSubSeqInfo);
+
+static u32 DecodeSubSeqLayerCharacteristics(
+  strmData_t *pStrmData,
+  seiSubSeqLayerCharacteristics_t *pSubSeqLayerCharacteristics);
+
+static u32 DecodeSubSeqCharacteristics(
+  strmData_t *pStrmData,
+  seiSubSeqCharacteristics_t *pSubSeqCharacteristics);
+
+static u32 DecodeFullFrameFreeze(
+  strmData_t *pStrmData,
+  seiFullFrameFreeze_t *pFullFrameFreeze);
+
+static u32 DecodeFullFrameSnapshot(
+  strmData_t *pStrmData,
+  seiFullFrameSnapshot_t *pFullFrameSnapshot);
+
+static u32 DecodeProgressiveRefinementSegmentStart(
+  strmData_t *pStrmData,
+  seiProgressiveRefinementSegmentStart_t *pProgressiveRefinementSegmentStart);
+
+static u32 DecodeProgressiveRefinementSegmentEnd(
+  strmData_t *pStrmData,
+  seiProgressiveRefinementSegmentEnd_t *pProgressiveRefinementSegmentEnd);
+
+static u32 DecodeMotionConstrainedSliceGroupSet(
+  strmData_t *pStrmData,
+  seiMotionConstrainedSliceGroupSet_t *pMotionConstrainedSliceGroupSet,
+  u32 numSliceGroups);
+
+static u32 DecodeReservedSeiMessage(
+  strmData_t *pStrmData,
+  seiReservedSeiMessage_t *pReservedSeiMessage,
+  u32 payloadSize);
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdDecodeSeiMessage
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeSeiMessage(
+  strmData_t *pStrmData,
+  seqParamSet_t *pSeqParamSet,
+  seiMessage_t *pSeiMessage,
+  u32 numSliceGroups)
+{
+
+/* Variables */
+
+    u32 tmp, payloadType, payloadSize, status;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pSeiMessage);
+
+
+    H264SwDecMemset(pSeiMessage, 0, sizeof(seiMessage_t));
+
+    do
+    {
+        payloadType = 0;
+        while((tmp = h264bsdGetBits(pStrmData, 8)) == 0xFF)
+        {
+            payloadType += 255;
+                    }
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        payloadType += tmp;
+
+        payloadSize = 0;
+        while((tmp = h264bsdGetBits(pStrmData, 8)) == 0xFF)
+        {
+            payloadSize += 255;
+        }
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        payloadSize += tmp;
+
+        pSeiMessage->payloadType = payloadType;
+
+        switch (payloadType)
+        {
+            case 0:
+                ASSERT(pSeqParamSet);
+                status = DecodeBufferingPeriod(
+                  pStrmData,
+                  &pSeiMessage->bufferingPeriod,
+                  pSeqParamSet->vuiParameters->vclHrdParameters.cpbCnt,
+                  pSeqParamSet->vuiParameters->vclHrdParameters.
+                  initialCpbRemovalDelayLength,
+                  pSeqParamSet->vuiParameters->nalHrdParametersPresentFlag,
+                  pSeqParamSet->vuiParameters->vclHrdParametersPresentFlag);
+                break;
+
+            case 1:
+                ASSERT(pSeqParamSet->vuiParametersPresentFlag);
+                status = DecodePictureTiming(
+                  pStrmData,
+                  &pSeiMessage->picTiming,
+                  pSeqParamSet->vuiParameters->vclHrdParameters.
+                      cpbRemovalDelayLength,
+                  pSeqParamSet->vuiParameters->vclHrdParameters.
+                      dpbOutputDelayLength,
+                  pSeqParamSet->vuiParameters->vclHrdParameters.
+                    timeOffsetLength,
+                  pSeqParamSet->vuiParameters->nalHrdParametersPresentFlag ||
+                  pSeqParamSet->vuiParameters->vclHrdParametersPresentFlag ?
+                  HANTRO_TRUE : HANTRO_FALSE,
+                  pSeqParamSet->vuiParameters->picStructPresentFlag);
+                break;
+
+            case 2:
+                status = DecodePanScanRectangle(
+                  pStrmData,
+                  &pSeiMessage->panScanRect);
+                break;
+
+            case 3:
+                status = DecodeFillerPayload(pStrmData, payloadSize);
+                break;
+
+            case 4:
+                status = DecodeUserDataRegisteredITuTT35(
+                  pStrmData,
+                  &pSeiMessage->userDataRegisteredItuTT35,
+                  payloadSize);
+                break;
+
+            case 5:
+                status = DecodeUserDataUnregistered(
+                  pStrmData,
+                  &pSeiMessage->userDataUnregistered,
+                  payloadSize);
+                break;
+
+            case 6:
+                status = DecodeRecoveryPoint(
+                  pStrmData,
+                  &pSeiMessage->recoveryPoint);
+                break;
+
+            case 7:
+                status = DecodeDecRefPicMarkingRepetition(
+                  pStrmData,
+                  &pSeiMessage->decRefPicMarkingRepetition,
+                  pSeqParamSet->numRefFrames);
+                break;
+
+            case 8:
+                ASSERT(pSeqParamSet);
+                status = DecodeSparePic(
+                  pStrmData,
+                  &pSeiMessage->sparePic,
+                  pSeqParamSet->picWidthInMbs * pSeqParamSet->picHeightInMbs);
+                break;
+
+            case 9:
+                status = DecodeSceneInfo(
+                  pStrmData,
+                  &pSeiMessage->sceneInfo);
+                break;
+
+            case 10:
+                status = DecodeSubSeqInfo(
+                  pStrmData,
+                  &pSeiMessage->subSeqInfo);
+                break;
+
+            case 11:
+                status = DecodeSubSeqLayerCharacteristics(
+                  pStrmData,
+                  &pSeiMessage->subSeqLayerCharacteristics);
+                break;
+
+            case 12:
+                status = DecodeSubSeqCharacteristics(
+                  pStrmData,
+                  &pSeiMessage->subSeqCharacteristics);
+                break;
+
+            case 13:
+                status = DecodeFullFrameFreeze(
+                  pStrmData,
+                  &pSeiMessage->fullFrameFreeze);
+                break;
+
+            case 14: /* This SEI does not contain data, what to do ??? */
+                status = HANTRO_OK;
+                break;
+
+            case 15:
+                status = DecodeFullFrameSnapshot(
+                  pStrmData,
+                  &pSeiMessage->fullFrameSnapshot);
+                break;
+
+            case 16:
+                status = DecodeProgressiveRefinementSegmentStart(
+                  pStrmData,
+                  &pSeiMessage->progressiveRefinementSegmentStart);
+                break;
+
+            case 17:
+                status = DecodeProgressiveRefinementSegmentEnd(
+                  pStrmData,
+                  &pSeiMessage->progressiveRefinementSegmentEnd);
+                break;
+
+            case 18:
+                ASSERT(numSliceGroups);
+                status = DecodeMotionConstrainedSliceGroupSet(
+                  pStrmData,
+                  &pSeiMessage->motionConstrainedSliceGroupSet,
+                  numSliceGroups);
+                break;
+
+            default:
+                status = DecodeReservedSeiMessage(
+                  pStrmData,
+                  &pSeiMessage->reservedSeiMessage,
+                  payloadSize);
+                break;
+        }
+
+        if (status != HANTRO_OK)
+            return(status);
+
+        while (!h264bsdIsByteAligned(pStrmData))
+        {
+            if (h264bsdGetBits(pStrmData, 1) != 1)
+                return(HANTRO_NOK);
+            while (!h264bsdIsByteAligned(pStrmData))
+            {
+                if (h264bsdGetBits(pStrmData, 1) != 0)
+                    return(HANTRO_NOK);
+            }
+        }
+    } while (h264bsdMoreRbspData(pStrmData));
+
+    return(h264bsdRbspTrailingBits(pStrmData));
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeBufferingPeriod
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeBufferingPeriod(
+  strmData_t *pStrmData,
+  seiBufferingPeriod_t *pBufferingPeriod,
+  u32 cpbCnt,
+  u32 initialCpbRemovalDelayLength,
+  u32 nalHrdBpPresentFlag,
+  u32 vclHrdBpPresentFlag)
+{
+
+/* Variables */
+
+    u32 tmp, i;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pBufferingPeriod);
+    ASSERT(cpbCnt);
+    ASSERT(initialCpbRemovalDelayLength);
+
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+      &pBufferingPeriod->seqParameterSetId);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if (pBufferingPeriod->seqParameterSetId > 31)
+        return(HANTRO_NOK);
+
+    if (nalHrdBpPresentFlag)
+    {
+        for (i = 0; i < cpbCnt; i++)
+        {
+            tmp = h264bsdGetBits(pStrmData, initialCpbRemovalDelayLength);
+            if (tmp == END_OF_STREAM)
+                return(HANTRO_NOK);
+            if (tmp == 0)
+                return(HANTRO_NOK);
+            pBufferingPeriod->initialCpbRemovalDelay[i] = tmp;
+
+            tmp = h264bsdGetBits(pStrmData, initialCpbRemovalDelayLength);
+            if (tmp == END_OF_STREAM)
+                return(HANTRO_NOK);
+            pBufferingPeriod->initialCpbRemovalDelayOffset[i] = tmp;
+        }
+    }
+
+    if (vclHrdBpPresentFlag)
+    {
+        for (i = 0; i < cpbCnt; i++)
+        {
+            tmp = h264bsdGetBits(pStrmData, initialCpbRemovalDelayLength);
+            if (tmp == END_OF_STREAM)
+                return(HANTRO_NOK);
+            pBufferingPeriod->initialCpbRemovalDelay[i] = tmp;
+
+            tmp = h264bsdGetBits(pStrmData, initialCpbRemovalDelayLength);
+            if (tmp == END_OF_STREAM)
+                return(HANTRO_NOK);
+            pBufferingPeriod->initialCpbRemovalDelayOffset[i] = tmp;
+        }
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodePictureTiming
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodePictureTiming(
+  strmData_t *pStrmData,
+  seiPicTiming_t *pPicTiming,
+  u32 cpbRemovalDelayLength,
+  u32 dpbOutputDelayLength,
+  u32 timeOffsetLength,
+  u32 cpbDpbDelaysPresentFlag,
+  u32 picStructPresentFlag)
+{
+
+/* Variables */
+
+    u32 tmp, i;
+    i32 itmp;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pPicTiming);
+
+
+    if (cpbDpbDelaysPresentFlag)
+    {
+        tmp = h264bsdGetBits(pStrmData, cpbRemovalDelayLength);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pPicTiming->cpbRemovalDelay = tmp;
+
+        tmp = h264bsdGetBits(pStrmData, dpbOutputDelayLength);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pPicTiming->dpbOutputDelay = tmp;
+    }
+
+    if (picStructPresentFlag)
+    {
+        tmp = h264bsdGetBits(pStrmData, 4);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        if (tmp > 8)
+            return(HANTRO_NOK);
+        pPicTiming->picStruct = tmp;
+
+        for (i = 0; i < numClockTS[pPicTiming->picStruct]; i++)
+        {
+            tmp = h264bsdGetBits(pStrmData, 1);
+            if (tmp == END_OF_STREAM)
+                return(HANTRO_NOK);
+            pPicTiming->clockTimeStampFlag[i] = tmp == 1 ?
+                                    HANTRO_TRUE : HANTRO_FALSE;
+
+            if (pPicTiming->clockTimeStampFlag[i])
+            {
+                tmp = h264bsdGetBits(pStrmData, 2);
+                if (tmp == END_OF_STREAM)
+                    return(HANTRO_NOK);
+                pPicTiming->ctType[i] = tmp;
+
+                tmp = h264bsdGetBits(pStrmData, 1);
+                if (tmp == END_OF_STREAM)
+                    return(HANTRO_NOK);
+                pPicTiming->nuitFieldBasedFlag[i] = tmp == 1 ?
+                                    HANTRO_TRUE : HANTRO_FALSE;
+
+                tmp = h264bsdGetBits(pStrmData, 5);
+                if (tmp == END_OF_STREAM)
+                    return(HANTRO_NOK);
+                if (tmp > 6)
+                    return(HANTRO_NOK);
+                pPicTiming->countingType[i] = tmp;
+
+                tmp = h264bsdGetBits(pStrmData, 1);
+                if (tmp == END_OF_STREAM)
+                    return(HANTRO_NOK);
+                pPicTiming->fullTimeStampFlag[i] = tmp == 1 ?
+                                    HANTRO_TRUE : HANTRO_FALSE;
+
+                tmp = h264bsdGetBits(pStrmData, 1);
+                if (tmp == END_OF_STREAM)
+                    return(HANTRO_NOK);
+                pPicTiming->discontinuityFlag[i] = tmp == 1 ?
+                                    HANTRO_TRUE : HANTRO_FALSE;
+
+                tmp = h264bsdGetBits(pStrmData, 1);
+                if (tmp == END_OF_STREAM)
+                    return(HANTRO_NOK);
+                pPicTiming->cntDroppedFlag[i] = tmp == 1 ?
+                                    HANTRO_TRUE : HANTRO_FALSE;
+
+                tmp = h264bsdGetBits(pStrmData, 8);
+                if (tmp == END_OF_STREAM)
+                    return(HANTRO_NOK);
+                pPicTiming->nFrames[i] = tmp;
+
+                if (pPicTiming->fullTimeStampFlag[i])
+                {
+                    tmp = h264bsdGetBits(pStrmData, 6);
+                    if (tmp == END_OF_STREAM)
+                        return(HANTRO_NOK);
+                    if (tmp > 59)
+                        return(HANTRO_NOK);
+                    pPicTiming->secondsValue[i] = tmp;
+
+                    tmp = h264bsdGetBits(pStrmData, 6);
+                    if (tmp == END_OF_STREAM)
+                        return(HANTRO_NOK);
+                    if (tmp > 59)
+                        return(HANTRO_NOK);
+                    pPicTiming->minutesValue[i] = tmp;
+
+                    tmp = h264bsdGetBits(pStrmData, 5);
+                    if (tmp == END_OF_STREAM)
+                        return(HANTRO_NOK);
+                    if (tmp > 23)
+                        return(HANTRO_NOK);
+                    pPicTiming->hoursValue[i] = tmp;
+                }
+                else
+                {
+                    tmp = h264bsdGetBits(pStrmData, 1);
+                    if (tmp == END_OF_STREAM)
+                        return(HANTRO_NOK);
+                    pPicTiming->secondsFlag[i] = tmp == 1 ?
+                                    HANTRO_TRUE : HANTRO_FALSE;
+
+                    if (pPicTiming->secondsFlag[i])
+                    {
+                        tmp = h264bsdGetBits(pStrmData, 6);
+                        if (tmp == END_OF_STREAM)
+                            return(HANTRO_NOK);
+                        if (tmp > 59)
+                            return(HANTRO_NOK);
+                        pPicTiming->secondsValue[i] = tmp;
+
+                        tmp = h264bsdGetBits(pStrmData, 1);
+                        if (tmp == END_OF_STREAM)
+                            return(HANTRO_NOK);
+                        pPicTiming->minutesFlag[i] = tmp == 1 ?
+                                    HANTRO_TRUE : HANTRO_FALSE;
+
+                        if (pPicTiming->minutesFlag[i])
+                        {
+                            tmp = h264bsdGetBits(pStrmData, 6);
+                            if (tmp == END_OF_STREAM)
+                                return(HANTRO_NOK);
+                            if (tmp > 59)
+                                return(HANTRO_NOK);
+                            pPicTiming->minutesValue[i] = tmp;
+
+                            tmp = h264bsdGetBits(pStrmData, 1);
+                            if (tmp == END_OF_STREAM)
+                                return(HANTRO_NOK);
+                            pPicTiming->hoursFlag[i] = tmp == 1 ?
+                                    HANTRO_TRUE : HANTRO_FALSE;
+
+                            if (pPicTiming->hoursFlag[i])
+                            {
+                                tmp = h264bsdGetBits(pStrmData, 5);
+                                if (tmp == END_OF_STREAM)
+                                    return(HANTRO_NOK);
+                                if (tmp > 23)
+                                    return(HANTRO_NOK);
+                                pPicTiming->hoursValue[i] = tmp;
+                            }
+                        }
+                    }
+                }
+                if (timeOffsetLength)
+                {
+                    tmp = h264bsdGetBits(pStrmData, timeOffsetLength);
+                    if (tmp == END_OF_STREAM)
+                        return(HANTRO_NOK);
+                    itmp = (i32)tmp;
+                    /* following "converts" timeOffsetLength-bit signed
+                     * integer into i32 */
+                    /*lint -save -e701 -e702 */
+                    itmp <<= (32 - timeOffsetLength);
+                    itmp >>= (32 - timeOffsetLength);
+                    /*lint -restore */
+                    pPicTiming->timeOffset[i] = itmp;
+                                    }
+                else
+                    pPicTiming->timeOffset[i] = 0;
+            }
+        }
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodePanScanRectangle
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodePanScanRectangle(
+  strmData_t *pStrmData,
+  seiPanScanRect_t *pPanScanRectangle)
+{
+
+/* Variables */
+
+    u32 tmp, i;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pPanScanRectangle);
+
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+      &pPanScanRectangle->panScanRectId);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pPanScanRectangle->panScanRectCancelFlag = tmp == 1 ?
+                                HANTRO_TRUE : HANTRO_FALSE;
+
+    if (!pPanScanRectangle->panScanRectCancelFlag)
+    {
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+          &pPanScanRectangle->panScanCnt);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        if (pPanScanRectangle->panScanCnt > 2)
+            return(HANTRO_NOK);
+        pPanScanRectangle->panScanCnt++;
+
+        for (i = 0; i < pPanScanRectangle->panScanCnt; i++)
+        {
+            tmp = h264bsdDecodeExpGolombSigned(pStrmData,
+              &pPanScanRectangle->panScanRectLeftOffset[i]);
+            if (tmp != HANTRO_OK)
+                return(tmp);
+
+            tmp = h264bsdDecodeExpGolombSigned(pStrmData,
+              &pPanScanRectangle->panScanRectRightOffset[i]);
+            if (tmp != HANTRO_OK)
+                return(tmp);
+
+            tmp = h264bsdDecodeExpGolombSigned(pStrmData,
+              &pPanScanRectangle->panScanRectTopOffset[i]);
+            if (tmp != HANTRO_OK)
+                return(tmp);
+
+            tmp = h264bsdDecodeExpGolombSigned(pStrmData,
+              &pPanScanRectangle->panScanRectBottomOffset[i]);
+            if (tmp != HANTRO_OK)
+                return(tmp);
+        }
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+          &pPanScanRectangle->panScanRectRepetitionPeriod);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        if (pPanScanRectangle->panScanRectRepetitionPeriod > 16384)
+            return(HANTRO_NOK);
+        if (pPanScanRectangle->panScanCnt > 1 &&
+          pPanScanRectangle->panScanRectRepetitionPeriod > 1)
+            return(HANTRO_NOK);
+    }
+
+    return(HANTRO_OK);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeFillerPayload
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeFillerPayload(strmData_t *pStrmData, u32 payloadSize)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(pStrmData);
+
+
+    if (payloadSize)
+        if (h264bsdFlushBits(pStrmData, 8 * payloadSize) == END_OF_STREAM)
+            return(HANTRO_NOK);
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeUserDataRegisteredITuTT35
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeUserDataRegisteredITuTT35(
+  strmData_t *pStrmData,
+  seiUserDataRegisteredItuTT35_t *pUserDataRegisteredItuTT35,
+  u32 payloadSize)
+{
+
+/* Variables */
+
+    u32 tmp, i, j;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pUserDataRegisteredItuTT35);
+    ASSERT(payloadSize);
+
+        tmp = h264bsdGetBits(pStrmData, 8);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pUserDataRegisteredItuTT35->ituTT35CountryCode = tmp;
+
+    if (pUserDataRegisteredItuTT35->ituTT35CountryCode != 0xFF)
+        i = 1;
+    else
+    {
+        tmp = h264bsdGetBits(pStrmData, 8);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pUserDataRegisteredItuTT35->ituTT35CountryCodeExtensionByte = tmp;
+        i = 2;
+    }
+
+    /* where corresponding FREE() ??? */
+    ALLOCATE(pUserDataRegisteredItuTT35->ituTT35PayloadByte,payloadSize-i,u8);
+    pUserDataRegisteredItuTT35->numPayloadBytes = payloadSize - i;
+    if (pUserDataRegisteredItuTT35->ituTT35PayloadByte == NULL)
+        return(MEMORY_ALLOCATION_ERROR);
+
+    j = 0;
+    do
+    {
+        tmp = h264bsdGetBits(pStrmData, 8);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pUserDataRegisteredItuTT35->ituTT35PayloadByte[j] = (u8)tmp;
+        i++;
+        j++;
+    } while (i < payloadSize);
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeUserDataUnregistered
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeUserDataUnregistered(
+  strmData_t *pStrmData,
+  seiUserDataUnregistered_t *pUserDataUnregistered,
+  u32 payloadSize)
+{
+
+/* Variables */
+
+    u32 i, tmp;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pUserDataUnregistered);
+
+
+    for (i = 0; i < 4; i++)
+    {
+        pUserDataUnregistered->uuidIsoIec11578[i] = h264bsdShowBits32(pStrmData);
+        if (h264bsdFlushBits(pStrmData,32) == END_OF_STREAM)
+            return(HANTRO_NOK);
+    }
+
+    /* where corresponding FREE() ??? */
+    ALLOCATE(pUserDataUnregistered->userDataPayloadByte, payloadSize - 16, u8);
+    if (pUserDataUnregistered->userDataPayloadByte == NULL)
+        return(MEMORY_ALLOCATION_ERROR);
+
+    pUserDataUnregistered->numPayloadBytes = payloadSize - 16;
+
+    for (i = 0; i < payloadSize - 16; i++)
+    {
+        tmp = h264bsdGetBits(pStrmData, 8);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pUserDataUnregistered->userDataPayloadByte[i] = (u8)tmp;
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeRecoveryPoint
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeRecoveryPoint(
+  strmData_t *pStrmData,
+  seiRecoveryPoint_t *pRecoveryPoint)
+{
+
+/* Variables */
+
+    u32 tmp;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pRecoveryPoint);
+
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+        &pRecoveryPoint->recoveryFrameCnt);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pRecoveryPoint->exactMatchFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pRecoveryPoint->brokenLinkFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+    tmp = h264bsdGetBits(pStrmData, 2);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    if (tmp > 2)
+        return(HANTRO_NOK);
+    pRecoveryPoint->changingSliceGroupIdc = tmp;
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeDecRefPicMarkingRepetition
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeDecRefPicMarkingRepetition(
+  strmData_t *pStrmData,
+  seiDecRefPicMarkingRepetition_t *pDecRefPicMarkingRepetition,
+  u32 numRefFrames)
+{
+
+/* Variables */
+
+    u32 tmp;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pDecRefPicMarkingRepetition);
+
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pDecRefPicMarkingRepetition->originalIdrFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+      &pDecRefPicMarkingRepetition->originalFrameNum);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* frame_mbs_only_flag assumed always true so some field related syntax
+     * elements are skipped, see H.264 standard */
+    tmp = h264bsdDecRefPicMarking(pStrmData,
+      &pDecRefPicMarkingRepetition->decRefPicMarking, NAL_SEI, numRefFrames);
+
+    return(tmp);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeSparePic
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeSparePic(
+  strmData_t *pStrmData,
+  seiSparePic_t *pSparePic,
+  u32 picSizeInMapUnits)
+{
+
+/* Variables */
+
+    u32 tmp, i, j, mapUnitCnt;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pSparePic);
+
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+        &pSparePic->targetFrameNum);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pSparePic->spareFieldFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+    /* do not accept fields */
+    if (pSparePic->spareFieldFlag)
+        return(HANTRO_NOK);
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &pSparePic->numSparePics);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    pSparePic->numSparePics++;
+    if (pSparePic->numSparePics > MAX_NUM_SPARE_PICS)
+        return(HANTRO_NOK);
+
+    for (i = 0; i < pSparePic->numSparePics; i++)
+    {
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+          &pSparePic->deltaSpareFrameNum[i]);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+            &pSparePic->spareAreaIdc[i]);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        if (pSparePic->spareAreaIdc[i] > 2)
+            return(HANTRO_NOK);
+
+        if (pSparePic->spareAreaIdc[i] == 1)
+        {
+            /* where corresponding FREE() ??? */
+            ALLOCATE(pSparePic->spareUnitFlag[i], picSizeInMapUnits, u32);
+            if (pSparePic->spareUnitFlag[i] == NULL)
+                return(MEMORY_ALLOCATION_ERROR);
+            pSparePic->zeroRunLength[i] = NULL;
+
+            for (j = 0; j < picSizeInMapUnits; j++)
+            {
+                tmp = h264bsdGetBits(pStrmData, 1);
+                if (tmp == END_OF_STREAM)
+                    return(HANTRO_NOK);
+                pSparePic->spareUnitFlag[i][j] = tmp == 1 ?
+                                    HANTRO_TRUE : HANTRO_FALSE;
+            }
+        }
+        else if (pSparePic->spareAreaIdc[i] == 2)
+        {
+            /* where corresponding FREE() ??? */
+            ALLOCATE(pSparePic->zeroRunLength[i], picSizeInMapUnits, u32);
+            if (pSparePic->zeroRunLength[i] == NULL)
+                return(MEMORY_ALLOCATION_ERROR);
+            pSparePic->spareUnitFlag[i] = NULL;
+
+            for (j = 0, mapUnitCnt = 0; mapUnitCnt < picSizeInMapUnits; j++)
+            {
+                tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+                  &pSparePic->zeroRunLength[i][j]);
+                if (tmp != HANTRO_OK)
+                    return(tmp);
+                mapUnitCnt += pSparePic->zeroRunLength[i][j] + 1;
+            }
+        }
+    }
+
+    /* set rest to null */
+    for (i = pSparePic->numSparePics; i < MAX_NUM_SPARE_PICS; i++)
+    {
+        pSparePic->spareUnitFlag[i] = NULL;
+        pSparePic->zeroRunLength[i] = NULL;
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeSceneInfo
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeSceneInfo(
+  strmData_t *pStrmData,
+  seiSceneInfo_t *pSceneInfo)
+{
+
+/* Variables */
+
+    u32 tmp;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pSceneInfo);
+
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pSceneInfo->sceneInfoPresentFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+    if (pSceneInfo->sceneInfoPresentFlag)
+    {
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &pSceneInfo->sceneId);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+          &pSceneInfo->sceneTransitionType);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        if (pSceneInfo->sceneTransitionType > 6)
+            return(HANTRO_NOK);
+
+        if (pSceneInfo->sceneTransitionType)
+        {
+            tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+              &pSceneInfo->secondSceneId);
+            if (tmp != HANTRO_OK)
+                return(tmp);
+        }
+
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeSubSeqInfo
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+-----------------------------------------------------------------------------*/
+
+static u32 DecodeSubSeqInfo(
+  strmData_t *pStrmData,
+  seiSubSeqInfo_t *pSubSeqInfo)
+{
+
+/* Variables */
+
+    u32 tmp;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pSubSeqInfo);
+
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+        &pSubSeqInfo->subSeqLayerNum);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if (pSubSeqInfo->subSeqLayerNum > 255)
+        return(HANTRO_NOK);
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &pSubSeqInfo->subSeqId);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if (pSubSeqInfo->subSeqId > 65535)
+        return(HANTRO_NOK);
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pSubSeqInfo->firstRefPicFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pSubSeqInfo->leadingNonRefPicFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pSubSeqInfo->lastPicFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pSubSeqInfo->subSeqFrameNumFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+    if (pSubSeqInfo->subSeqFrameNumFlag)
+    {
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+            &pSubSeqInfo->subSeqFrameNum);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeSubSeqLayerCharacteristics
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeSubSeqLayerCharacteristics(
+  strmData_t *pStrmData,
+  seiSubSeqLayerCharacteristics_t *pSubSeqLayerCharacteristics)
+{
+
+/* Variables */
+
+    u32 tmp, i;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pSubSeqLayerCharacteristics);
+
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+      &pSubSeqLayerCharacteristics->numSubSeqLayers);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    pSubSeqLayerCharacteristics->numSubSeqLayers++;
+    if (pSubSeqLayerCharacteristics->numSubSeqLayers > MAX_NUM_SUB_SEQ_LAYERS)
+        return(HANTRO_NOK);
+
+    for (i = 0; i < pSubSeqLayerCharacteristics->numSubSeqLayers; i++)
+    {
+        tmp = h264bsdGetBits(pStrmData, 1);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pSubSeqLayerCharacteristics->accurateStatisticsFlag[i] =
+            tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+        tmp = h264bsdGetBits(pStrmData, 16);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pSubSeqLayerCharacteristics->averageBitRate[i] = tmp;
+
+        tmp = h264bsdGetBits(pStrmData, 16);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pSubSeqLayerCharacteristics->averageFrameRate[i] = tmp;
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeSubSeqCharacteristics
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeSubSeqCharacteristics(
+  strmData_t *pStrmData,
+  seiSubSeqCharacteristics_t *pSubSeqCharacteristics)
+{
+
+/* Variables */
+
+    u32 tmp, i;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pSubSeqCharacteristics);
+
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+      &pSubSeqCharacteristics->subSeqLayerNum);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if (pSubSeqCharacteristics->subSeqLayerNum > MAX_NUM_SUB_SEQ_LAYERS-1)
+        return(HANTRO_NOK);
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+        &pSubSeqCharacteristics->subSeqId);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if (pSubSeqCharacteristics->subSeqId > 65535)
+        return(HANTRO_NOK);
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pSubSeqCharacteristics->durationFlag = tmp == 1 ?
+                            HANTRO_TRUE : HANTRO_FALSE;
+
+    if (pSubSeqCharacteristics->durationFlag)
+    {
+        pSubSeqCharacteristics->subSeqDuration = h264bsdShowBits32(pStrmData);
+        if (h264bsdFlushBits(pStrmData,32) == END_OF_STREAM)
+            return(HANTRO_NOK);
+    }
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pSubSeqCharacteristics->averageRateFlag = tmp == 1 ?
+                            HANTRO_TRUE : HANTRO_FALSE;
+
+    if (pSubSeqCharacteristics->averageRateFlag)
+    {
+        tmp = h264bsdGetBits(pStrmData, 1);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pSubSeqCharacteristics->accurateStatisticsFlag =
+            tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+        tmp = h264bsdGetBits(pStrmData, 16);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pSubSeqCharacteristics->averageBitRate = tmp;
+
+        tmp = h264bsdGetBits(pStrmData, 16);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pSubSeqCharacteristics->averageFrameRate = tmp;
+    }
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+      &pSubSeqCharacteristics->numReferencedSubseqs);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if (pSubSeqCharacteristics->numReferencedSubseqs > MAX_NUM_SUB_SEQ_LAYERS-1)
+        return(HANTRO_NOK);
+
+    for (i = 0; i < pSubSeqCharacteristics->numReferencedSubseqs; i++)
+    {
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+          &pSubSeqCharacteristics->refSubSeqLayerNum[i]);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+          &pSubSeqCharacteristics->refSubSeqId[i]);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+
+        tmp = h264bsdGetBits(pStrmData, 1);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pSubSeqCharacteristics->refSubSeqDirection[i] = tmp;
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeFullFrameFreeze
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeFullFrameFreeze(
+  strmData_t *pStrmData,
+  seiFullFrameFreeze_t *pFullFrameFreeze)
+{
+
+/* Variables */
+
+    u32 tmp;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pFullFrameFreeze);
+
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+      &pFullFrameFreeze->fullFrameFreezeRepetitionPeriod);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if (pFullFrameFreeze->fullFrameFreezeRepetitionPeriod > 16384)
+        return(HANTRO_NOK);
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeFullFrameSnapshot
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeFullFrameSnapshot(
+  strmData_t *pStrmData,
+  seiFullFrameSnapshot_t *pFullFrameSnapshot)
+{
+
+/* Variables */
+
+    u32 tmp;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pFullFrameSnapshot);
+
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+        &pFullFrameSnapshot->snapShotId);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeProgressiveRefinementSegmentStart
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeProgressiveRefinementSegmentStart(
+  strmData_t *pStrmData,
+  seiProgressiveRefinementSegmentStart_t *pProgressiveRefinementSegmentStart)
+{
+
+/* Variables */
+
+    u32 tmp;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pProgressiveRefinementSegmentStart);
+
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+      &pProgressiveRefinementSegmentStart->progressiveRefinementId);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+      &pProgressiveRefinementSegmentStart->numRefinementSteps);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    pProgressiveRefinementSegmentStart->numRefinementSteps++;
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeProgressiveRefinementSegmentEnd
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeProgressiveRefinementSegmentEnd(
+  strmData_t *pStrmData,
+  seiProgressiveRefinementSegmentEnd_t *pProgressiveRefinementSegmentEnd)
+{
+
+/* Variables */
+
+    u32 tmp;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pProgressiveRefinementSegmentEnd);
+
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+      &pProgressiveRefinementSegmentEnd->progressiveRefinementId);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeMotionConstrainedSliceGroupSet
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeMotionConstrainedSliceGroupSet(
+  strmData_t *pStrmData,
+  seiMotionConstrainedSliceGroupSet_t *pMotionConstrainedSliceGroupSet,
+  u32 numSliceGroups)
+{
+
+/* Variables */
+
+    u32 tmp,i;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pMotionConstrainedSliceGroupSet);
+    ASSERT(numSliceGroups < MAX_NUM_SLICE_GROUPS);
+
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+      &pMotionConstrainedSliceGroupSet->numSliceGroupsInSet);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    pMotionConstrainedSliceGroupSet->numSliceGroupsInSet++;
+    if (pMotionConstrainedSliceGroupSet->numSliceGroupsInSet > numSliceGroups)
+        return(HANTRO_NOK);
+
+    for (i = 0; i < pMotionConstrainedSliceGroupSet->numSliceGroupsInSet; i++)
+    {
+        tmp = h264bsdGetBits(pStrmData,
+            ceilLog2NumSliceGroups[numSliceGroups]);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pMotionConstrainedSliceGroupSet->sliceGroupId[i] = tmp;
+        if (pMotionConstrainedSliceGroupSet->sliceGroupId[i] >
+          pMotionConstrainedSliceGroupSet->numSliceGroupsInSet-1)
+            return(HANTRO_NOK);
+    }
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pMotionConstrainedSliceGroupSet->exactSampleValueMatchFlag =
+        tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pMotionConstrainedSliceGroupSet->panScanRectFlag = tmp == 1 ?
+                                        HANTRO_TRUE : HANTRO_FALSE;
+
+    if (pMotionConstrainedSliceGroupSet->panScanRectFlag)
+    {
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+          &pMotionConstrainedSliceGroupSet->panScanRectId);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeReservedSeiMessage
+
+        Functional description:
+          <++>
+        Inputs:
+          <++>
+        Outputs:
+          <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeReservedSeiMessage(
+  strmData_t *pStrmData,
+  seiReservedSeiMessage_t *pReservedSeiMessage,
+  u32 payloadSize)
+{
+
+/* Variables */
+
+    u32 i, tmp;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pReservedSeiMessage);
+
+
+    /* where corresponding FREE() ??? */
+    ALLOCATE(pReservedSeiMessage->reservedSeiMessagePayloadByte,payloadSize,u8);
+    if (pReservedSeiMessage->reservedSeiMessagePayloadByte == NULL)
+        return(MEMORY_ALLOCATION_ERROR);
+
+    pReservedSeiMessage->numPayloadBytes = payloadSize;
+
+    for (i = 0; i < payloadSize; i++)
+    {
+        tmp = h264bsdGetBits(pStrmData,8);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pReservedSeiMessage->reservedSeiMessagePayloadByte[i] = (u8)tmp;
+    }
+
+    return(HANTRO_OK);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.h
new file mode 100755
index 0000000..efe543a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.h
@@ -0,0 +1,252 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_SEI_H
+#define H264SWDEC_SEI_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_seq_param_set.h"
+#include "h264bsd_vui.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+#define MAX_PAN_SCAN_CNT 32
+#define MAX_NUM_SPARE_PICS 16
+#define MAX_NUM_CLOCK_TS 3
+#define MAX_NUM_SUB_SEQ_LAYERS 256
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+typedef struct
+{
+    u32 seqParameterSetId;
+    u32 initialCpbRemovalDelay[MAX_CPB_CNT];
+    u32 initialCpbRemovalDelayOffset[MAX_CPB_CNT];
+} seiBufferingPeriod_t;
+
+typedef struct
+{
+    u32 cpbRemovalDelay;
+    u32 dpbOutputDelay;
+    u32 picStruct;
+    u32 clockTimeStampFlag[MAX_NUM_CLOCK_TS];
+    u32 clockTimeStamp[MAX_NUM_CLOCK_TS];
+    u32 ctType[MAX_NUM_CLOCK_TS];
+    u32 nuitFieldBasedFlag[MAX_NUM_CLOCK_TS];
+    u32 countingType[MAX_NUM_CLOCK_TS];
+    u32 fullTimeStampFlag[MAX_NUM_CLOCK_TS];
+    u32 discontinuityFlag[MAX_NUM_CLOCK_TS];
+    u32 cntDroppedFlag[MAX_NUM_CLOCK_TS];
+    u32 nFrames[MAX_NUM_CLOCK_TS];
+    u32 secondsFlag[MAX_NUM_CLOCK_TS];
+    u32 secondsValue[MAX_NUM_CLOCK_TS];
+    u32 minutesFlag[MAX_NUM_CLOCK_TS];
+    u32 minutesValue[MAX_NUM_CLOCK_TS];
+    u32 hoursFlag[MAX_NUM_CLOCK_TS];
+    u32 hoursValue[MAX_NUM_CLOCK_TS];
+    i32 timeOffset[MAX_NUM_CLOCK_TS];
+} seiPicTiming_t;
+
+typedef struct
+{
+    u32 panScanRectId;
+    u32 panScanRectCancelFlag;
+    u32 panScanCnt;
+    i32 panScanRectLeftOffset[MAX_PAN_SCAN_CNT];
+    i32 panScanRectRightOffset[MAX_PAN_SCAN_CNT];
+    i32 panScanRectTopOffset[MAX_PAN_SCAN_CNT];
+    i32 panScanRectBottomOffset[MAX_PAN_SCAN_CNT];
+    u32 panScanRectRepetitionPeriod;
+} seiPanScanRect_t;
+
+typedef struct
+{
+    u32 ituTT35CountryCode;
+    u32 ituTT35CountryCodeExtensionByte;
+    u8 *ituTT35PayloadByte;
+    u32 numPayloadBytes;
+} seiUserDataRegisteredItuTT35_t;
+
+typedef struct
+{
+    u32 uuidIsoIec11578[4];
+    u8 *userDataPayloadByte;
+    u32 numPayloadBytes;
+} seiUserDataUnregistered_t;
+
+typedef struct
+{
+    u32 recoveryFrameCnt;
+    u32 exactMatchFlag;
+    u32 brokenLinkFlag;
+    u32 changingSliceGroupIdc;
+} seiRecoveryPoint_t;
+
+typedef struct
+{
+    u32 originalIdrFlag;
+    u32 originalFrameNum;
+    decRefPicMarking_t decRefPicMarking;
+} seiDecRefPicMarkingRepetition_t;
+
+typedef struct
+{
+    u32 targetFrameNum;
+    u32 spareFieldFlag;
+    u32 targetBottomFieldFlag;
+    u32 numSparePics;
+    u32 deltaSpareFrameNum[MAX_NUM_SPARE_PICS];
+    u32 spareBottomFieldFlag[MAX_NUM_SPARE_PICS];
+    u32 spareAreaIdc[MAX_NUM_SPARE_PICS];
+    u32 *spareUnitFlag[MAX_NUM_SPARE_PICS];
+    u32 *zeroRunLength[MAX_NUM_SPARE_PICS];
+} seiSparePic_t;
+
+typedef struct
+{
+    u32 sceneInfoPresentFlag;
+    u32 sceneId;
+    u32 sceneTransitionType;
+    u32 secondSceneId;
+} seiSceneInfo_t;
+
+typedef struct
+{
+    u32 subSeqLayerNum;
+    u32 subSeqId;
+    u32 firstRefPicFlag;
+    u32 leadingNonRefPicFlag;
+    u32 lastPicFlag;
+    u32 subSeqFrameNumFlag;
+    u32 subSeqFrameNum;
+} seiSubSeqInfo_t;
+
+typedef struct
+{
+    u32 numSubSeqLayers;
+    u32 accurateStatisticsFlag[MAX_NUM_SUB_SEQ_LAYERS];
+    u32 averageBitRate[MAX_NUM_SUB_SEQ_LAYERS];
+    u32 averageFrameRate[MAX_NUM_SUB_SEQ_LAYERS];
+} seiSubSeqLayerCharacteristics_t;
+
+typedef struct
+{
+    u32 subSeqLayerNum;
+    u32 subSeqId;
+    u32 durationFlag;
+    u32 subSeqDuration;
+    u32 averageRateFlag;
+    u32 accurateStatisticsFlag;
+    u32 averageBitRate;
+    u32 averageFrameRate;
+    u32 numReferencedSubseqs;
+    u32 refSubSeqLayerNum[MAX_NUM_SUB_SEQ_LAYERS];
+    u32 refSubSeqId[MAX_NUM_SUB_SEQ_LAYERS];
+    u32 refSubSeqDirection[MAX_NUM_SUB_SEQ_LAYERS];
+} seiSubSeqCharacteristics_t;
+
+typedef struct
+{
+    u32 fullFrameFreezeRepetitionPeriod;
+} seiFullFrameFreeze_t;
+
+typedef struct
+{
+    u32 snapShotId;
+} seiFullFrameSnapshot_t;
+
+typedef struct
+{
+    u32 progressiveRefinementId;
+    u32 numRefinementSteps;
+} seiProgressiveRefinementSegmentStart_t;
+
+typedef struct
+{
+    u32 progressiveRefinementId;
+} seiProgressiveRefinementSegmentEnd_t;
+
+typedef struct
+{
+    u32 numSliceGroupsInSet;
+    u32 sliceGroupId[MAX_NUM_SLICE_GROUPS];
+    u32 exactSampleValueMatchFlag;
+    u32 panScanRectFlag;
+    u32 panScanRectId;
+} seiMotionConstrainedSliceGroupSet_t;
+
+typedef struct
+{
+    u8 *reservedSeiMessagePayloadByte;
+    u32 numPayloadBytes;
+} seiReservedSeiMessage_t;
+
+typedef struct
+{
+    u32 payloadType;
+    seiBufferingPeriod_t bufferingPeriod;
+    seiPicTiming_t picTiming;
+    seiPanScanRect_t panScanRect;
+    seiUserDataRegisteredItuTT35_t userDataRegisteredItuTT35;
+    seiUserDataUnregistered_t userDataUnregistered;
+    seiRecoveryPoint_t recoveryPoint;
+    seiDecRefPicMarkingRepetition_t decRefPicMarkingRepetition;
+    seiSparePic_t sparePic;
+    seiSceneInfo_t sceneInfo;
+    seiSubSeqInfo_t subSeqInfo;
+    seiSubSeqLayerCharacteristics_t subSeqLayerCharacteristics;
+    seiSubSeqCharacteristics_t subSeqCharacteristics;
+    seiFullFrameFreeze_t fullFrameFreeze;
+    seiFullFrameSnapshot_t fullFrameSnapshot;
+    seiProgressiveRefinementSegmentStart_t progressiveRefinementSegmentStart;
+    seiProgressiveRefinementSegmentEnd_t progressiveRefinementSegmentEnd;
+    seiMotionConstrainedSliceGroupSet_t motionConstrainedSliceGroupSet;
+    seiReservedSeiMessage_t reservedSeiMessage;
+} seiMessage_t;
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeSeiMessage(
+  strmData_t *pStrmData,
+  seqParamSet_t *pSeqParamSet,
+  seiMessage_t *pSeiMessage,
+  u32 numSliceGroups);
+
+#endif /* #ifdef H264SWDEC_SEI_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.c
new file mode 100644
index 0000000..751051a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.c
@@ -0,0 +1,577 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdDecodeSeqParamSet
+          GetDpbSize
+          h264bsdCompareSeqParamSets
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_seq_param_set.h"
+#include "h264bsd_util.h"
+#include "h264bsd_vlc.h"
+#include "h264bsd_vui.h"
+#include "h264bsd_cfg.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/* enumeration to indicate invalid return value from the GetDpbSize function */
+enum {INVALID_DPB_SIZE = 0x7FFFFFFF};
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 GetDpbSize(u32 picSizeInMbs, u32 levelIdc);
+
+/*------------------------------------------------------------------------------
+
+    Function name: h264bsdDecodeSeqParamSet
+
+        Functional description:
+            Decode sequence parameter set information from the stream.
+
+            Function allocates memory for offsetForRefFrame array if
+            picture order count type is 1 and numRefFramesInPicOrderCntCycle
+            is greater than zero.
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+
+        Outputs:
+            pSeqParamSet    decoded information is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      failure, invalid information or end of stream
+            MEMORY_ALLOCATION_ERROR for memory allocation failure
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeSeqParamSet(strmData_t *pStrmData, seqParamSet_t *pSeqParamSet)
+{
+
+/* Variables */
+
+    u32 tmp, i, value;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pSeqParamSet);
+
+    H264SwDecMemset(pSeqParamSet, 0, sizeof(seqParamSet_t));
+
+    /* profile_idc */
+    tmp = h264bsdGetBits(pStrmData, 8);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    if (tmp != 66)
+    {
+        DEBUG(("NOT BASELINE PROFILE %d\n", tmp));
+    }
+    pSeqParamSet->profileIdc = tmp;
+
+    /* constrained_set0_flag */
+    tmp = h264bsdGetBits(pStrmData, 1);
+    /* constrained_set1_flag */
+    tmp = h264bsdGetBits(pStrmData, 1);
+    /* constrained_set2_flag */
+    tmp = h264bsdGetBits(pStrmData, 1);
+
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+
+    /* reserved_zero_5bits, values of these bits shall be ignored */
+    tmp = h264bsdGetBits(pStrmData, 5);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+
+    tmp = h264bsdGetBits(pStrmData, 8);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pSeqParamSet->levelIdc = tmp;
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+        &pSeqParamSet->seqParameterSetId);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if (pSeqParamSet->seqParameterSetId >= MAX_NUM_SEQ_PARAM_SETS)
+    {
+        EPRINT("seq_param_set_id");
+        return(HANTRO_NOK);
+    }
+
+    /* log2_max_frame_num_minus4 */
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if (value > 12)
+    {
+        EPRINT("log2_max_frame_num_minus4");
+        return(HANTRO_NOK);
+    }
+    /* maxFrameNum = 2^(log2_max_frame_num_minus4 + 4) */
+    pSeqParamSet->maxFrameNum = 1 << (value+4);
+
+    /* valid POC types are 0, 1 and 2 */
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if (value > 2)
+    {
+        EPRINT("pic_order_cnt_type");
+        return(HANTRO_NOK);
+    }
+    pSeqParamSet->picOrderCntType = value;
+
+    if (pSeqParamSet->picOrderCntType == 0)
+    {
+        /* log2_max_pic_order_cnt_lsb_minus4 */
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        if (value > 12)
+        {
+            EPRINT("log2_max_pic_order_cnt_lsb_minus4");
+            return(HANTRO_NOK);
+        }
+        /* maxPicOrderCntLsb = 2^(log2_max_pic_order_cnt_lsb_minus4 + 4) */
+        pSeqParamSet->maxPicOrderCntLsb = 1 << (value+4);
+    }
+    else if (pSeqParamSet->picOrderCntType == 1)
+    {
+        tmp = h264bsdGetBits(pStrmData, 1);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pSeqParamSet->deltaPicOrderAlwaysZeroFlag = (tmp == 1) ?
+                                        HANTRO_TRUE : HANTRO_FALSE;
+
+        tmp = h264bsdDecodeExpGolombSigned(pStrmData,
+            &pSeqParamSet->offsetForNonRefPic);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+
+        tmp = h264bsdDecodeExpGolombSigned(pStrmData,
+            &pSeqParamSet->offsetForTopToBottomField);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+            &pSeqParamSet->numRefFramesInPicOrderCntCycle);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        if (pSeqParamSet->numRefFramesInPicOrderCntCycle > 255)
+        {
+            EPRINT("num_ref_frames_in_pic_order_cnt_cycle");
+            return(HANTRO_NOK);
+        }
+
+        if (pSeqParamSet->numRefFramesInPicOrderCntCycle)
+        {
+            /* NOTE: This has to be freed somewhere! */
+            ALLOCATE(pSeqParamSet->offsetForRefFrame,
+                     pSeqParamSet->numRefFramesInPicOrderCntCycle, i32);
+            if (pSeqParamSet->offsetForRefFrame == NULL)
+                return(MEMORY_ALLOCATION_ERROR);
+
+            for (i = 0; i < pSeqParamSet->numRefFramesInPicOrderCntCycle; i++)
+            {
+                tmp =  h264bsdDecodeExpGolombSigned(pStrmData,
+                    pSeqParamSet->offsetForRefFrame + i);
+                if (tmp != HANTRO_OK)
+                    return(tmp);
+            }
+        }
+        else
+        {
+            pSeqParamSet->offsetForRefFrame = NULL;
+        }
+    }
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+        &pSeqParamSet->numRefFrames);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if (pSeqParamSet->numRefFrames > MAX_NUM_REF_PICS)
+    {
+        EPRINT("num_ref_frames");
+        return(HANTRO_NOK);
+    }
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pSeqParamSet->gapsInFrameNumValueAllowedFlag = (tmp == 1) ?
+                                        HANTRO_TRUE : HANTRO_FALSE;
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    pSeqParamSet->picWidthInMbs = value + 1;
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    pSeqParamSet->picHeightInMbs = value + 1;
+
+    /* frame_mbs_only_flag, shall be 1 for baseline profile */
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    if (!tmp)
+    {
+        EPRINT("frame_mbs_only_flag");
+        return(HANTRO_NOK);
+    }
+
+    /* direct_8x8_inference_flag */
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pSeqParamSet->frameCroppingFlag = (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+    if (pSeqParamSet->frameCroppingFlag)
+    {
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+            &pSeqParamSet->frameCropLeftOffset);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+            &pSeqParamSet->frameCropRightOffset);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+            &pSeqParamSet->frameCropTopOffset);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+            &pSeqParamSet->frameCropBottomOffset);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+
+        /* check that frame cropping params are valid, parameters shall
+         * specify non-negative area within the original picture */
+        if ( ( (i32)pSeqParamSet->frameCropLeftOffset >
+               ( 8 * (i32)pSeqParamSet->picWidthInMbs -
+                 ((i32)pSeqParamSet->frameCropRightOffset + 1) ) ) ||
+             ( (i32)pSeqParamSet->frameCropTopOffset >
+               ( 8 * (i32)pSeqParamSet->picHeightInMbs -
+                 ((i32)pSeqParamSet->frameCropBottomOffset + 1) ) ) )
+        {
+            EPRINT("frame_cropping");
+            return(HANTRO_NOK);
+        }
+    }
+
+    /* check that image dimensions and levelIdc match */
+    tmp = pSeqParamSet->picWidthInMbs * pSeqParamSet->picHeightInMbs;
+    value = GetDpbSize(tmp, pSeqParamSet->levelIdc);
+    if (value == INVALID_DPB_SIZE || pSeqParamSet->numRefFrames > value)
+    {
+        DEBUG(("WARNING! Invalid DPB size based on SPS Level!\n"));
+        DEBUG(("WARNING! Using num_ref_frames =%d for DPB size!\n",
+                        pSeqParamSet->numRefFrames));
+        value = pSeqParamSet->numRefFrames;
+    }
+    pSeqParamSet->maxDpbSize = value;
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pSeqParamSet->vuiParametersPresentFlag = (tmp == 1) ?
+                                HANTRO_TRUE : HANTRO_FALSE;
+
+    /* VUI */
+    if (pSeqParamSet->vuiParametersPresentFlag)
+    {
+        ALLOCATE(pSeqParamSet->vuiParameters, 1, vuiParameters_t);
+        if (pSeqParamSet->vuiParameters == NULL)
+            return(MEMORY_ALLOCATION_ERROR);
+        tmp = h264bsdDecodeVuiParameters(pStrmData,
+            pSeqParamSet->vuiParameters);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        /* check numReorderFrames and maxDecFrameBuffering */
+        if (pSeqParamSet->vuiParameters->bitstreamRestrictionFlag)
+        {
+            if (pSeqParamSet->vuiParameters->numReorderFrames >
+                    pSeqParamSet->vuiParameters->maxDecFrameBuffering ||
+                pSeqParamSet->vuiParameters->maxDecFrameBuffering <
+                    pSeqParamSet->numRefFrames ||
+                pSeqParamSet->vuiParameters->maxDecFrameBuffering >
+                    pSeqParamSet->maxDpbSize)
+            {
+                return(HANTRO_NOK);
+            }
+
+            /* standard says that "the sequence shall not require a DPB with
+             * size of more than max(1, maxDecFrameBuffering) */
+            pSeqParamSet->maxDpbSize =
+                MAX(1, pSeqParamSet->vuiParameters->maxDecFrameBuffering);
+        }
+    }
+
+    tmp = h264bsdRbspTrailingBits(pStrmData);
+
+    /* ignore possible errors in trailing bits of parameters sets */
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: GetDpbSize
+
+        Functional description:
+            Get size of the DPB in frames. Size is determined based on the
+            picture size and MaxDPB for the specified level. These determine
+            how many pictures may fit into to the buffer. However, the size
+            is also limited to a maximum of 16 frames and therefore function
+            returns the minimum of the determined size and 16.
+
+        Inputs:
+            picSizeInMbs    number of macroblocks in the picture
+            levelIdc        indicates the level
+
+        Outputs:
+            none
+
+        Returns:
+            size of the DPB in frames
+            INVALID_DPB_SIZE when invalid levelIdc specified or picSizeInMbs
+            is higher than supported by the level in question
+
+------------------------------------------------------------------------------*/
+
+u32 GetDpbSize(u32 picSizeInMbs, u32 levelIdc)
+{
+
+/* Variables */
+
+    u32 tmp;
+    u32 maxPicSizeInMbs;
+
+/* Code */
+
+    ASSERT(picSizeInMbs);
+
+    /* use tmp as the size of the DPB in bytes, computes as 1024 * MaxDPB
+     * (from table A-1 in Annex A) */
+    switch (levelIdc)
+    {
+        case 10:
+            tmp = 152064;
+            maxPicSizeInMbs = 99;
+            break;
+
+        case 11:
+            tmp = 345600;
+            maxPicSizeInMbs = 396;
+            break;
+
+        case 12:
+            tmp = 912384;
+            maxPicSizeInMbs = 396;
+            break;
+
+        case 13:
+            tmp = 912384;
+            maxPicSizeInMbs = 396;
+            break;
+
+        case 20:
+            tmp = 912384;
+            maxPicSizeInMbs = 396;
+            break;
+
+        case 21:
+            tmp = 1824768;
+            maxPicSizeInMbs = 792;
+            break;
+
+        case 22:
+            tmp = 3110400;
+            maxPicSizeInMbs = 1620;
+            break;
+
+        case 30:
+            tmp = 3110400;
+            maxPicSizeInMbs = 1620;
+            break;
+
+        case 31:
+            tmp = 6912000;
+            maxPicSizeInMbs = 3600;
+            break;
+
+        case 32:
+            tmp = 7864320;
+            maxPicSizeInMbs = 5120;
+            break;
+
+        case 40:
+            tmp = 12582912;
+            maxPicSizeInMbs = 8192;
+            break;
+
+        case 41:
+            tmp = 12582912;
+            maxPicSizeInMbs = 8192;
+            break;
+
+        case 42:
+            tmp = 34816*384;
+            maxPicSizeInMbs = 8704;
+            break;
+
+        case 50:
+            /* standard says 42301440 here, but corrigendum "corrects" this to
+             * 42393600 */
+            tmp = 42393600;
+            maxPicSizeInMbs = 22080;
+            break;
+
+        case 51:
+            tmp = 70778880;
+            maxPicSizeInMbs = 36864;
+            break;
+
+        default:
+            return(INVALID_DPB_SIZE);
+    }
+
+    /* this is not "correct" return value! However, it results in error in
+     * decoding and this was easiest place to check picture size */
+    if (picSizeInMbs > maxPicSizeInMbs)
+        return(INVALID_DPB_SIZE);
+
+    tmp /= (picSizeInMbs*384);
+
+    return(MIN(tmp, 16));
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name: h264bsdCompareSeqParamSets
+
+        Functional description:
+            Compare two sequence parameter sets.
+
+        Inputs:
+            pSps1   pointer to a sequence parameter set
+            pSps2   pointer to another sequence parameter set
+
+        Outputs:
+            0       sequence parameter sets are equal
+            1       otherwise
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCompareSeqParamSets(seqParamSet_t *pSps1, seqParamSet_t *pSps2)
+{
+
+/* Variables */
+
+    u32 i;
+
+/* Code */
+
+    ASSERT(pSps1);
+    ASSERT(pSps2);
+
+    /* first compare parameters whose existence does not depend on other
+     * parameters and only compare the rest of the params if these are equal */
+    if (pSps1->profileIdc        == pSps2->profileIdc &&
+        pSps1->levelIdc          == pSps2->levelIdc &&
+        pSps1->maxFrameNum       == pSps2->maxFrameNum &&
+        pSps1->picOrderCntType   == pSps2->picOrderCntType &&
+        pSps1->numRefFrames      == pSps2->numRefFrames &&
+        pSps1->gapsInFrameNumValueAllowedFlag ==
+            pSps2->gapsInFrameNumValueAllowedFlag &&
+        pSps1->picWidthInMbs     == pSps2->picWidthInMbs &&
+        pSps1->picHeightInMbs    == pSps2->picHeightInMbs &&
+        pSps1->frameCroppingFlag == pSps2->frameCroppingFlag &&
+        pSps1->vuiParametersPresentFlag == pSps2->vuiParametersPresentFlag)
+    {
+        if (pSps1->picOrderCntType == 0)
+        {
+            if (pSps1->maxPicOrderCntLsb != pSps2->maxPicOrderCntLsb)
+                return 1;
+        }
+        else if (pSps1->picOrderCntType == 1)
+        {
+            if (pSps1->deltaPicOrderAlwaysZeroFlag !=
+                    pSps2->deltaPicOrderAlwaysZeroFlag ||
+                pSps1->offsetForNonRefPic != pSps2->offsetForNonRefPic ||
+                pSps1->offsetForTopToBottomField !=
+                    pSps2->offsetForTopToBottomField ||
+                pSps1->numRefFramesInPicOrderCntCycle !=
+                    pSps2->numRefFramesInPicOrderCntCycle)
+            {
+                return 1;
+            }
+            else
+            {
+                for (i = 0; i < pSps1->numRefFramesInPicOrderCntCycle; i++)
+                    if (pSps1->offsetForRefFrame[i] !=
+                        pSps2->offsetForRefFrame[i])
+                    {
+                        return 1;
+                    }
+            }
+        }
+        if (pSps1->frameCroppingFlag)
+        {
+            if (pSps1->frameCropLeftOffset   != pSps2->frameCropLeftOffset ||
+                pSps1->frameCropRightOffset  != pSps2->frameCropRightOffset ||
+                pSps1->frameCropTopOffset    != pSps2->frameCropTopOffset ||
+                pSps1->frameCropBottomOffset != pSps2->frameCropBottomOffset)
+            {
+                return 1;
+            }
+        }
+
+        return 0;
+    }
+
+    return 1;
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.h
new file mode 100755
index 0000000..e18df94
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_SEQ_PARAM_SET_H
+#define H264SWDEC_SEQ_PARAM_SET_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_vui.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+/* structure to store sequence parameter set information decoded from the
+ * stream */
+typedef struct
+{
+    u32 profileIdc;
+    u32 levelIdc;
+    u32 seqParameterSetId;
+    u32 maxFrameNum;
+    u32 picOrderCntType;
+    u32 maxPicOrderCntLsb;
+    u32 deltaPicOrderAlwaysZeroFlag;
+    i32 offsetForNonRefPic;
+    i32 offsetForTopToBottomField;
+    u32 numRefFramesInPicOrderCntCycle;
+    i32 *offsetForRefFrame;
+    u32 numRefFrames;
+    u32 gapsInFrameNumValueAllowedFlag;
+    u32 picWidthInMbs;
+    u32 picHeightInMbs;
+    u32 frameCroppingFlag;
+    u32 frameCropLeftOffset;
+    u32 frameCropRightOffset;
+    u32 frameCropTopOffset;
+    u32 frameCropBottomOffset;
+    u32 vuiParametersPresentFlag;
+    vuiParameters_t *vuiParameters;
+    u32 maxDpbSize;
+} seqParamSet_t;
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeSeqParamSet(strmData_t *pStrmData,
+    seqParamSet_t *pSeqParamSet);
+
+u32 h264bsdCompareSeqParamSets(seqParamSet_t *pSps1, seqParamSet_t *pSps2);
+
+#endif /* #ifdef H264SWDEC_SEQ_PARAM_SET_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.c
new file mode 100755
index 0000000..c288d4b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdDecodeSliceData
+          SetMbParams
+          h264bsdMarkSliceCorrupted
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_slice_data.h"
+#include "h264bsd_util.h"
+#include "h264bsd_vlc.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static void SetMbParams(mbStorage_t *pMb, sliceHeader_t *pSlice, u32 sliceId,
+    i32 chromaQpIndexOffset);
+
+/*------------------------------------------------------------------------------
+
+   5.1  Function name: h264bsdDecodeSliceData
+
+        Functional description:
+            Decode one slice. Function decodes stream data, i.e. macroblocks
+            and possible skip_run fields. h264bsdDecodeMacroblock function is
+            called to handle all other macroblock related processing.
+            Macroblock to slice group mapping is considered when next
+            macroblock to process is determined (h264bsdNextMbAddress function)
+            map
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+            pStorage        pointer to storage structure
+            currImage       pointer to current processed picture, needed for
+                            intra prediction of the macroblocks
+            pSliceHeader    pointer to slice header of the current slice
+
+        Outputs:
+            currImage       processed macroblocks are written to current image
+            pStorage        mbStorage structure of each processed macroblock
+                            is updated here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeSliceData(strmData_t *pStrmData, storage_t *pStorage,
+    image_t *currImage, sliceHeader_t *pSliceHeader)
+{
+
+/* Variables */
+
+    u8 mbData[384 + 15 + 32];
+    u8 *data;
+    u32 tmp;
+    u32 skipRun;
+    u32 prevSkipped;
+    u32 currMbAddr;
+    u32 moreMbs;
+    u32 mbCount;
+    i32 qpY;
+    macroblockLayer_t *mbLayer;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pSliceHeader);
+    ASSERT(pStorage);
+    ASSERT(pSliceHeader->firstMbInSlice < pStorage->picSizeInMbs);
+
+    /* ensure 16-byte alignment */
+    data = (u8*)ALIGN(mbData, 16);
+
+    mbLayer = pStorage->mbLayer;
+
+    currMbAddr = pSliceHeader->firstMbInSlice;
+    skipRun = 0;
+    prevSkipped = HANTRO_FALSE;
+
+    /* increment slice index, will be one for decoding of the first slice of
+     * the picture */
+    pStorage->slice->sliceId++;
+
+    /* lastMbAddr stores address of the macroblock that was last successfully
+     * decoded, needed for error handling */
+    pStorage->slice->lastMbAddr = 0;
+
+    mbCount = 0;
+    /* initial quantization parameter for the slice is obtained as the sum of
+     * initial QP for the picture and sliceQpDelta for the current slice */
+    qpY = (i32)pStorage->activePps->picInitQp + pSliceHeader->sliceQpDelta;
+    do
+    {
+        /* primary picture and already decoded macroblock -> error */
+        if (!pSliceHeader->redundantPicCnt && pStorage->mb[currMbAddr].decoded)
+        {
+            EPRINT("Primary and already decoded");
+            return(HANTRO_NOK);
+        }
+
+        SetMbParams(pStorage->mb + currMbAddr, pSliceHeader,
+            pStorage->slice->sliceId, pStorage->activePps->chromaQpIndexOffset);
+
+        if (!IS_I_SLICE(pSliceHeader->sliceType))
+        {
+            if (!prevSkipped)
+            {
+                tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &skipRun);
+                if (tmp != HANTRO_OK)
+                    return(tmp);
+                /* skip_run shall be less than or equal to number of
+                 * macroblocks left */
+                if (skipRun > (pStorage->picSizeInMbs - currMbAddr))
+                {
+                    EPRINT("skip_run");
+                    return(HANTRO_NOK);
+                }
+                if (skipRun)
+                {
+                    prevSkipped = HANTRO_TRUE;
+                    H264SwDecMemset(&mbLayer->mbPred, 0, sizeof(mbPred_t));
+                    /* mark current macroblock skipped */
+                    mbLayer->mbType = P_Skip;
+                }
+            }
+        }
+
+        if (skipRun)
+        {
+            DEBUG(("Skipping macroblock %d\n", currMbAddr));
+            skipRun--;
+        }
+        else
+        {
+            prevSkipped = HANTRO_FALSE;
+            tmp = h264bsdDecodeMacroblockLayer(pStrmData, mbLayer,
+                pStorage->mb + currMbAddr, pSliceHeader->sliceType,
+                pSliceHeader->numRefIdxL0Active);
+            if (tmp != HANTRO_OK)
+            {
+                EPRINT("macroblock_layer");
+                return(tmp);
+            }
+        }
+
+        tmp = h264bsdDecodeMacroblock(pStorage->mb + currMbAddr, mbLayer,
+            currImage, pStorage->dpb, &qpY, currMbAddr,
+            pStorage->activePps->constrainedIntraPredFlag, data);
+        if (tmp != HANTRO_OK)
+        {
+            EPRINT("MACRO_BLOCK");
+            return(tmp);
+        }
+
+        /* increment macroblock count only for macroblocks that were decoded
+         * for the first time (redundant slices) */
+        if (pStorage->mb[currMbAddr].decoded == 1)
+            mbCount++;
+
+        /* keep on processing as long as there is stream data left or
+         * processing of macroblocks to be skipped based on the last skipRun is
+         * not finished */
+        moreMbs = (h264bsdMoreRbspData(pStrmData) || skipRun) ?
+                                        HANTRO_TRUE : HANTRO_FALSE;
+
+        /* lastMbAddr is only updated for intra slices (all macroblocks of
+         * inter slices will be lost in case of an error) */
+        if (IS_I_SLICE(pSliceHeader->sliceType))
+            pStorage->slice->lastMbAddr = currMbAddr;
+
+        currMbAddr = h264bsdNextMbAddress(pStorage->sliceGroupMap,
+            pStorage->picSizeInMbs, currMbAddr);
+        /* data left in the buffer but no more macroblocks for current slice
+         * group -> error */
+        if (moreMbs && !currMbAddr)
+        {
+            EPRINT("Next mb address");
+            return(HANTRO_NOK);
+        }
+
+    } while (moreMbs);
+
+    if ((pStorage->slice->numDecodedMbs + mbCount) > pStorage->picSizeInMbs)
+    {
+        EPRINT("Num decoded mbs");
+        return(HANTRO_NOK);
+    }
+
+    pStorage->slice->numDecodedMbs += mbCount;
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+   5.2  Function: SetMbParams
+
+        Functional description:
+            Set macroblock parameters that remain constant for this slice
+
+        Inputs:
+            pSlice      pointer to current slice header
+            sliceId     id of the current slice
+            chromaQpIndexOffset
+
+        Outputs:
+            pMb         pointer to macroblock structure which is updated
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+void SetMbParams(mbStorage_t *pMb, sliceHeader_t *pSlice, u32 sliceId,
+    i32 chromaQpIndexOffset)
+{
+
+/* Variables */
+    u32 tmp1;
+    i32 tmp2, tmp3;
+
+/* Code */
+
+    tmp1 = pSlice->disableDeblockingFilterIdc;
+    tmp2 = pSlice->sliceAlphaC0Offset;
+    tmp3 = pSlice->sliceBetaOffset;
+    pMb->sliceId = sliceId;
+    pMb->disableDeblockingFilterIdc = tmp1;
+    pMb->filterOffsetA = tmp2;
+    pMb->filterOffsetB = tmp3;
+    pMb->chromaQpIndexOffset = chromaQpIndexOffset;
+
+}
+
+/*------------------------------------------------------------------------------
+
+   5.3  Function name: h264bsdMarkSliceCorrupted
+
+        Functional description:
+            Mark macroblocks of the slice corrupted. If lastMbAddr in the slice
+            storage is set -> picWidhtInMbs (or at least 10) macroblocks back
+            from  the lastMbAddr are marked corrupted. However, if lastMbAddr
+            is not set -> all macroblocks of the slice are marked.
+
+        Inputs:
+            pStorage        pointer to storage structure
+            firstMbInSlice  address of the first macroblock in the slice, this
+                            identifies the slice to be marked corrupted
+
+        Outputs:
+            pStorage        mbStorage for the corrupted macroblocks updated
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+void h264bsdMarkSliceCorrupted(storage_t *pStorage, u32 firstMbInSlice)
+{
+
+/* Variables */
+
+    u32 tmp, i;
+    u32 sliceId;
+    u32 currMbAddr;
+
+/* Code */
+
+    ASSERT(pStorage);
+    ASSERT(firstMbInSlice < pStorage->picSizeInMbs);
+
+    currMbAddr = firstMbInSlice;
+
+    sliceId = pStorage->slice->sliceId;
+
+    /* DecodeSliceData sets lastMbAddr for I slices -> if it was set, go back
+     * MAX(picWidthInMbs, 10) macroblocks and start marking from there */
+    if (pStorage->slice->lastMbAddr)
+    {
+        ASSERT(pStorage->mb[pStorage->slice->lastMbAddr].sliceId == sliceId);
+        i = pStorage->slice->lastMbAddr - 1;
+        tmp = 0;
+        while (i > currMbAddr)
+        {
+            if (pStorage->mb[i].sliceId == sliceId)
+            {
+                tmp++;
+                if (tmp >= MAX(pStorage->activeSps->picWidthInMbs, 10))
+                    break;
+            }
+            i--;
+        }
+        currMbAddr = i;
+    }
+
+    do
+    {
+
+        if ( (pStorage->mb[currMbAddr].sliceId == sliceId) &&
+             (pStorage->mb[currMbAddr].decoded) )
+        {
+            pStorage->mb[currMbAddr].decoded--;
+        }
+        else
+        {
+            break;
+        }
+
+        currMbAddr = h264bsdNextMbAddress(pStorage->sliceGroupMap,
+            pStorage->picSizeInMbs, currMbAddr);
+
+    } while (currMbAddr);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.h
new file mode 100755
index 0000000..f23d49e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_SLICE_DATA_H
+#define H264SWDEC_SLICE_DATA_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_cfg.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_storage.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeSliceData(strmData_t *pStrmData, storage_t *pStorage,
+    image_t *currImage, sliceHeader_t *pSliceHeader);
+
+void h264bsdMarkSliceCorrupted(storage_t *pStorage, u32 firstMbInSlice);
+
+#endif /* #ifdef H264SWDEC_SLICE_DATA_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.c
new file mode 100755
index 0000000..7cbb534
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.c
@@ -0,0 +1,589 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          DecodeInterleavedMap
+          DecodeDispersedMap
+          DecodeForegroundLeftOverMap
+          DecodeBoxOutMap
+          DecodeRasterScanMap
+          DecodeWipeMap
+          h264bsdDecodeSliceGroupMap
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_slice_group_map.h"
+#include "h264bsd_cfg.h"
+#include "h264bsd_pic_param_set.h"
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static void DecodeInterleavedMap(
+  u32 *map,
+  u32 numSliceGroups,
+  u32 *runLength,
+  u32 picSize);
+
+static void DecodeDispersedMap(
+  u32 *map,
+  u32 numSliceGroups,
+  u32 picWidth,
+  u32 picHeight);
+
+static void DecodeForegroundLeftOverMap(
+  u32 *map,
+  u32 numSliceGroups,
+  u32 *topLeft,
+  u32 *bottomRight,
+  u32 picWidth,
+  u32 picHeight);
+
+static void DecodeBoxOutMap(
+  u32 *map,
+  u32 sliceGroupChangeDirectionFlag,
+  u32 unitsInSliceGroup0,
+  u32 picWidth,
+  u32 picHeight);
+
+static void DecodeRasterScanMap(
+  u32 *map,
+  u32 sliceGroupChangeDirectionFlag,
+  u32 sizeOfUpperLeftGroup,
+  u32 picSize);
+
+static void DecodeWipeMap(
+  u32 *map,
+  u32 sliceGroupChangeDirectionFlag,
+  u32 sizeOfUpperLeftGroup,
+  u32 picWidth,
+  u32 picHeight);
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeInterleavedMap
+
+        Functional description:
+            Function to decode interleaved slice group map type, i.e. slice
+            group map type 0.
+
+        Inputs:
+            map             pointer to the map
+            numSliceGroups  number of slice groups
+            runLength       run_length[] values for each slice group
+            picSize         picture size in macroblocks
+
+        Outputs:
+            map             slice group map is stored here
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+void DecodeInterleavedMap(
+  u32 *map,
+  u32 numSliceGroups,
+  u32 *runLength,
+  u32 picSize)
+{
+
+/* Variables */
+
+    u32 i,j, group;
+
+/* Code */
+
+    ASSERT(map);
+    ASSERT(numSliceGroups >= 1 && numSliceGroups <= MAX_NUM_SLICE_GROUPS);
+    ASSERT(runLength);
+
+    i = 0;
+
+    do {
+        for (group = 0; group < numSliceGroups && i < picSize;
+          i += runLength[group++])
+        {
+            ASSERT(runLength[group] <= picSize);
+            for (j = 0; j < runLength[group] && i + j < picSize; j++)
+                map[i+j] = group;
+        }
+    } while (i < picSize);
+
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeDispersedMap
+
+        Functional description:
+            Function to decode dispersed slice group map type, i.e. slice
+            group map type 1.
+
+        Inputs:
+            map               pointer to the map
+            numSliceGroups    number of slice groups
+            picWidth          picture width in macroblocks
+            picHeight         picture height in macroblocks
+
+        Outputs:
+            map               slice group map is stored here
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+void DecodeDispersedMap(
+  u32 *map,
+  u32 numSliceGroups,
+  u32 picWidth,
+  u32 picHeight)
+{
+
+/* Variables */
+
+    u32 i, picSize;
+
+/* Code */
+
+    ASSERT(map);
+    ASSERT(numSliceGroups >= 1 && numSliceGroups <= MAX_NUM_SLICE_GROUPS);
+    ASSERT(picWidth);
+    ASSERT(picHeight);
+
+    picSize = picWidth * picHeight;
+
+    for (i = 0; i < picSize; i++)
+        map[i] = ((i % picWidth) + (((i / picWidth) * numSliceGroups) >> 1)) %
+            numSliceGroups;
+
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeForegroundLeftOverMap
+
+        Functional description:
+            Function to decode foreground with left-over slice group map type,
+            i.e. slice group map type 2.
+
+        Inputs:
+            map               pointer to the map
+            numSliceGroups    number of slice groups
+            topLeft           top_left[] values
+            bottomRight       bottom_right[] values
+            picWidth          picture width in macroblocks
+            picHeight         picture height in macroblocks
+
+        Outputs:
+            map               slice group map is stored here
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+void DecodeForegroundLeftOverMap(
+  u32 *map,
+  u32 numSliceGroups,
+  u32 *topLeft,
+  u32 *bottomRight,
+  u32 picWidth,
+  u32 picHeight)
+{
+
+/* Variables */
+
+    u32 i,y,x,yTopLeft,yBottomRight,xTopLeft,xBottomRight, picSize;
+    u32 group;
+
+/* Code */
+
+    ASSERT(map);
+    ASSERT(numSliceGroups >= 1 && numSliceGroups <= MAX_NUM_SLICE_GROUPS);
+    ASSERT(topLeft);
+    ASSERT(bottomRight);
+    ASSERT(picWidth);
+    ASSERT(picHeight);
+
+    picSize = picWidth * picHeight;
+
+    for (i = 0; i < picSize; i++)
+        map[i] = numSliceGroups - 1;
+
+    for (group = numSliceGroups - 1; group--; )
+    {
+        ASSERT( topLeft[group] <= bottomRight[group] &&
+                bottomRight[group] < picSize );
+        yTopLeft = topLeft[group] / picWidth;
+        xTopLeft = topLeft[group] % picWidth;
+        yBottomRight = bottomRight[group] / picWidth;
+        xBottomRight = bottomRight[group] % picWidth;
+        ASSERT(xTopLeft <= xBottomRight);
+
+        for (y = yTopLeft; y <= yBottomRight; y++)
+            for (x = xTopLeft; x <= xBottomRight; x++)
+                map[ y * picWidth + x ] = group;
+    }
+
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeBoxOutMap
+
+        Functional description:
+            Function to decode box-out slice group map type, i.e. slice group
+            map type 3.
+
+        Inputs:
+            map                               pointer to the map
+            sliceGroupChangeDirectionFlag     slice_group_change_direction_flag
+            unitsInSliceGroup0                mbs on slice group 0
+            picWidth                          picture width in macroblocks
+            picHeight                         picture height in macroblocks
+
+        Outputs:
+            map                               slice group map is stored here
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+void DecodeBoxOutMap(
+  u32 *map,
+  u32 sliceGroupChangeDirectionFlag,
+  u32 unitsInSliceGroup0,
+  u32 picWidth,
+  u32 picHeight)
+{
+
+/* Variables */
+
+    u32 i, k, picSize;
+    i32 x, y, xDir, yDir, leftBound, topBound, rightBound, bottomBound;
+    u32 mapUnitVacant;
+
+/* Code */
+
+    ASSERT(map);
+    ASSERT(picWidth);
+    ASSERT(picHeight);
+
+    picSize = picWidth * picHeight;
+    ASSERT(unitsInSliceGroup0 <= picSize);
+
+    for (i = 0; i < picSize; i++)
+        map[i] = 1;
+
+    x = (picWidth - (u32)sliceGroupChangeDirectionFlag) >> 1;
+    y = (picHeight - (u32)sliceGroupChangeDirectionFlag) >> 1;
+
+    leftBound = x;
+    topBound = y;
+
+    rightBound = x;
+    bottomBound = y;
+
+    xDir = (i32)sliceGroupChangeDirectionFlag - 1;
+    yDir = (i32)sliceGroupChangeDirectionFlag;
+
+    for (k = 0; k < unitsInSliceGroup0; k += mapUnitVacant ? 1 : 0)
+    {
+        mapUnitVacant = (map[ (u32)y * picWidth + (u32)x ] == 1) ?
+                                        HANTRO_TRUE : HANTRO_FALSE;
+
+        if (mapUnitVacant)
+            map[ (u32)y * picWidth + (u32)x ] = 0;
+
+        if (xDir == -1 && x == leftBound)
+        {
+            leftBound = MAX(leftBound - 1, 0);
+            x = leftBound;
+            xDir = 0;
+            yDir = 2 * (i32)sliceGroupChangeDirectionFlag - 1;
+        }
+        else if (xDir == 1 && x == rightBound)
+        {
+            rightBound = MIN(rightBound + 1, (i32)picWidth - 1);
+            x = rightBound;
+            xDir = 0;
+            yDir = 1 - 2 * (i32)sliceGroupChangeDirectionFlag;
+        }
+        else if (yDir == -1 && y == topBound)
+        {
+            topBound = MAX(topBound - 1, 0);
+            y = topBound;
+            xDir = 1 - 2 * (i32)sliceGroupChangeDirectionFlag;
+            yDir = 0;
+        }
+        else if (yDir == 1 && y == bottomBound)
+        {
+            bottomBound = MIN(bottomBound + 1, (i32)picHeight - 1);
+            y = bottomBound;
+            xDir = 2 * (i32)sliceGroupChangeDirectionFlag - 1;
+            yDir = 0;
+        }
+        else
+        {
+            x += xDir;
+            y += yDir;
+        }
+    }
+
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeRasterScanMap
+
+        Functional description:
+            Function to decode raster scan slice group map type, i.e. slice
+            group map type 4.
+
+        Inputs:
+            map                               pointer to the map
+            sliceGroupChangeDirectionFlag     slice_group_change_direction_flag
+            sizeOfUpperLeftGroup              mbs in upperLeftGroup
+            picSize                           picture size in macroblocks
+
+        Outputs:
+            map                               slice group map is stored here
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+void DecodeRasterScanMap(
+  u32 *map,
+  u32 sliceGroupChangeDirectionFlag,
+  u32 sizeOfUpperLeftGroup,
+  u32 picSize)
+{
+
+/* Variables */
+
+    u32 i;
+
+/* Code */
+
+    ASSERT(map);
+    ASSERT(picSize);
+    ASSERT(sizeOfUpperLeftGroup <= picSize);
+
+    for (i = 0; i < picSize; i++)
+        if (i < sizeOfUpperLeftGroup)
+            map[i] = (u32)sliceGroupChangeDirectionFlag;
+        else
+            map[i] = 1 - (u32)sliceGroupChangeDirectionFlag;
+
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeWipeMap
+
+        Functional description:
+            Function to decode wipe slice group map type, i.e. slice group map
+            type 5.
+
+        Inputs:
+            sliceGroupChangeDirectionFlag     slice_group_change_direction_flag
+            sizeOfUpperLeftGroup              mbs in upperLeftGroup
+            picWidth                          picture width in macroblocks
+            picHeight                         picture height in macroblocks
+
+        Outputs:
+            map                               slice group map is stored here
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+void DecodeWipeMap(
+  u32 *map,
+  u32 sliceGroupChangeDirectionFlag,
+  u32 sizeOfUpperLeftGroup,
+  u32 picWidth,
+  u32 picHeight)
+{
+
+/* Variables */
+
+    u32 i,j,k;
+
+/* Code */
+
+    ASSERT(map);
+    ASSERT(picWidth);
+    ASSERT(picHeight);
+    ASSERT(sizeOfUpperLeftGroup <= picWidth * picHeight);
+
+    k = 0;
+    for (j = 0; j < picWidth; j++)
+        for (i = 0; i < picHeight; i++)
+            if (k++ < sizeOfUpperLeftGroup)
+                map[ i * picWidth + j ] = (u32)sliceGroupChangeDirectionFlag;
+            else
+                map[ i * picWidth + j ] = 1 -
+                    (u32)sliceGroupChangeDirectionFlag;
+
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdDecodeSliceGroupMap
+
+        Functional description:
+            Function to decode macroblock to slice group map. Construction
+            of different slice group map types is handled by separate
+            functions defined above. See standard for details how slice group
+            maps are computed.
+
+        Inputs:
+            pps                     active picture parameter set
+            sliceGroupChangeCycle   slice_group_change_cycle
+            picWidth                picture width in macroblocks
+            picHeight               picture height in macroblocks
+
+        Outputs:
+            map                     slice group map is stored here
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+void h264bsdDecodeSliceGroupMap(
+  u32 *map,
+  picParamSet_t *pps,
+  u32 sliceGroupChangeCycle,
+  u32 picWidth,
+  u32 picHeight)
+{
+
+/* Variables */
+
+    u32 i, picSize, unitsInSliceGroup0 = 0, sizeOfUpperLeftGroup = 0;
+
+/* Code */
+
+    ASSERT(map);
+    ASSERT(pps);
+    ASSERT(picWidth);
+    ASSERT(picHeight);
+    ASSERT(pps->sliceGroupMapType < 7);
+
+    picSize = picWidth * picHeight;
+
+    /* just one slice group -> all macroblocks belong to group 0 */
+    if (pps->numSliceGroups == 1)
+    {
+        H264SwDecMemset(map, 0, picSize * sizeof(u32));
+        return;
+    }
+
+    if (pps->sliceGroupMapType > 2 && pps->sliceGroupMapType < 6)
+    {
+        ASSERT(pps->sliceGroupChangeRate &&
+               pps->sliceGroupChangeRate <= picSize);
+
+        unitsInSliceGroup0 =
+            MIN(sliceGroupChangeCycle * pps->sliceGroupChangeRate, picSize);
+
+        if (pps->sliceGroupMapType == 4 || pps->sliceGroupMapType == 5)
+            sizeOfUpperLeftGroup = pps->sliceGroupChangeDirectionFlag ?
+                (picSize - unitsInSliceGroup0) : unitsInSliceGroup0;
+    }
+
+    switch (pps->sliceGroupMapType)
+    {
+        case 0:
+            DecodeInterleavedMap(map, pps->numSliceGroups,
+              pps->runLength, picSize);
+            break;
+
+        case 1:
+            DecodeDispersedMap(map, pps->numSliceGroups, picWidth,
+              picHeight);
+            break;
+
+        case 2:
+            DecodeForegroundLeftOverMap(map, pps->numSliceGroups,
+              pps->topLeft, pps->bottomRight, picWidth, picHeight);
+            break;
+
+        case 3:
+            DecodeBoxOutMap(map, pps->sliceGroupChangeDirectionFlag,
+              unitsInSliceGroup0, picWidth, picHeight);
+            break;
+
+        case 4:
+            DecodeRasterScanMap(map,
+              pps->sliceGroupChangeDirectionFlag, sizeOfUpperLeftGroup,
+              picSize);
+            break;
+
+        case 5:
+            DecodeWipeMap(map, pps->sliceGroupChangeDirectionFlag,
+              sizeOfUpperLeftGroup, picWidth, picHeight);
+            break;
+
+        default:
+            ASSERT(pps->sliceGroupId);
+            for (i = 0; i < picSize; i++)
+            {
+                ASSERT(pps->sliceGroupId[i] < pps->numSliceGroups);
+                map[i] = pps->sliceGroupId[i];
+            }
+            break;
+    }
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.h
new file mode 100755
index 0000000..4bcb6f2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_SLICE_GROUP_MAP_H
+#define H264SWDEC_SLICE_GROUP_MAP_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_pic_param_set.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+void h264bsdDecodeSliceGroupMap(
+  u32 *map,
+  picParamSet_t *pps,
+  u32 sliceGroupChangeCycle,
+  u32 picWidth,
+  u32 picHeight);
+
+#endif /* #ifdef H264SWDEC_SLICE_GROUP_MAP_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.c
new file mode 100755
index 0000000..a7c6f64
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.c
@@ -0,0 +1,1511 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdDecodeSliceHeader
+          NumSliceGroupChangeCycleBits
+          RefPicListReordering
+          DecRefPicMarking
+          CheckPpsId
+          CheckFrameNum
+          CheckIdrPicId
+          CheckPicOrderCntLsb
+          CheckDeltaPicOrderCntBottom
+          CheckDeltaPicOrderCnt
+          CheckRedundantPicCnt
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_slice_header.h"
+#include "h264bsd_util.h"
+#include "h264bsd_vlc.h"
+#include "h264bsd_nal_unit.h"
+#include "h264bsd_dpb.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 RefPicListReordering(strmData_t *, refPicListReordering_t *,
+    u32, u32);
+
+static u32 NumSliceGroupChangeCycleBits(u32 picSizeInMbs,
+    u32 sliceGroupChangeRate);
+
+static u32 DecRefPicMarking(strmData_t *pStrmData,
+    decRefPicMarking_t *pDecRefPicMarking, nalUnitType_e nalUnitType,
+    u32 numRefFrames);
+
+
+/*------------------------------------------------------------------------------
+
+    Function name: h264bsdDecodeSliceHeader
+
+        Functional description:
+            Decode slice header data from the stream.
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+            pSeqParamSet    pointer to active sequence parameter set
+            pPicParamSet    pointer to active picture parameter set
+            pNalUnit        pointer to current NAL unit structure
+
+        Outputs:
+            pSliceHeader    decoded data is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      invalid stream data or end of stream
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeSliceHeader(strmData_t *pStrmData, sliceHeader_t *pSliceHeader,
+    seqParamSet_t *pSeqParamSet, picParamSet_t *pPicParamSet,
+    nalUnit_t *pNalUnit)
+{
+
+/* Variables */
+
+    u32 tmp, i, value;
+    i32 itmp;
+    u32 picSizeInMbs;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pSliceHeader);
+    ASSERT(pSeqParamSet);
+    ASSERT(pPicParamSet);
+    ASSERT( pNalUnit->nalUnitType == NAL_CODED_SLICE ||
+            pNalUnit->nalUnitType == NAL_CODED_SLICE_IDR );
+
+
+    H264SwDecMemset(pSliceHeader, 0, sizeof(sliceHeader_t));
+
+    picSizeInMbs = pSeqParamSet->picWidthInMbs * pSeqParamSet->picHeightInMbs;
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    pSliceHeader->firstMbInSlice = value;
+    if (value >= picSizeInMbs)
+    {
+        EPRINT("first_mb_in_slice");
+        return(HANTRO_NOK);
+    }
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    pSliceHeader->sliceType = value;
+    /* slice type has to be either I or P slice. P slice is not allowed when
+     * current NAL unit is an IDR NAL unit or num_ref_frames is 0 */
+    if ( !IS_I_SLICE(pSliceHeader->sliceType) &&
+         ( !IS_P_SLICE(pSliceHeader->sliceType) ||
+           IS_IDR_NAL_UNIT(pNalUnit) ||
+           !pSeqParamSet->numRefFrames ) )
+    {
+        EPRINT("slice_type");
+        return(HANTRO_NOK);
+    }
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    pSliceHeader->picParameterSetId = value;
+    if (pSliceHeader->picParameterSetId != pPicParamSet->picParameterSetId)
+    {
+        EPRINT("pic_parameter_set_id");
+        return(HANTRO_NOK);
+    }
+
+    /* log2(maxFrameNum) -> num bits to represent frame_num */
+    i = 0;
+    while (pSeqParamSet->maxFrameNum >> i)
+        i++;
+    i--;
+
+    tmp = h264bsdGetBits(pStrmData, i);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    if (IS_IDR_NAL_UNIT(pNalUnit) && tmp != 0)
+    {
+        EPRINT("frame_num");
+        return(HANTRO_NOK);
+    }
+    pSliceHeader->frameNum = tmp;
+
+    if (IS_IDR_NAL_UNIT(pNalUnit))
+    {
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        pSliceHeader->idrPicId = value;
+        if (value > 65535)
+        {
+            EPRINT("idr_pic_id");
+            return(HANTRO_NOK);
+        }
+    }
+
+    if (pSeqParamSet->picOrderCntType == 0)
+    {
+        /* log2(maxPicOrderCntLsb) -> num bits to represent pic_order_cnt_lsb */
+        i = 0;
+        while (pSeqParamSet->maxPicOrderCntLsb >> i)
+            i++;
+        i--;
+
+        tmp = h264bsdGetBits(pStrmData, i);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pSliceHeader->picOrderCntLsb = tmp;
+
+        if (pPicParamSet->picOrderPresentFlag)
+        {
+            tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+            if (tmp != HANTRO_OK)
+                return(tmp);
+            pSliceHeader->deltaPicOrderCntBottom = itmp;
+        }
+
+        /* check that picOrderCnt for IDR picture will be zero. See
+         * DecodePicOrderCnt function to understand the logic here */
+        if ( IS_IDR_NAL_UNIT(pNalUnit) &&
+             ( (pSliceHeader->picOrderCntLsb >
+                pSeqParamSet->maxPicOrderCntLsb/2) ||
+                MIN((i32)pSliceHeader->picOrderCntLsb,
+                    (i32)pSliceHeader->picOrderCntLsb +
+                    pSliceHeader->deltaPicOrderCntBottom) != 0 ) )
+        {
+            return(HANTRO_NOK);
+        }
+    }
+
+    if ( (pSeqParamSet->picOrderCntType == 1) &&
+         !pSeqParamSet->deltaPicOrderAlwaysZeroFlag )
+    {
+        tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        pSliceHeader->deltaPicOrderCnt[0] = itmp;
+
+        if (pPicParamSet->picOrderPresentFlag)
+        {
+            tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+            if (tmp != HANTRO_OK)
+                return(tmp);
+            pSliceHeader->deltaPicOrderCnt[1] = itmp;
+        }
+
+        /* check that picOrderCnt for IDR picture will be zero. See
+         * DecodePicOrderCnt function to understand the logic here */
+        if ( IS_IDR_NAL_UNIT(pNalUnit) &&
+             MIN(pSliceHeader->deltaPicOrderCnt[0],
+                 pSliceHeader->deltaPicOrderCnt[0] +
+                 pSeqParamSet->offsetForTopToBottomField +
+                 pSliceHeader->deltaPicOrderCnt[1]) != 0)
+        {
+            return(HANTRO_NOK);
+        }
+    }
+
+    if (pPicParamSet->redundantPicCntPresentFlag)
+    {
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        pSliceHeader->redundantPicCnt = value;
+        if (value > 127)
+        {
+            EPRINT("redundant_pic_cnt");
+            return(HANTRO_NOK);
+        }
+    }
+
+    if (IS_P_SLICE(pSliceHeader->sliceType))
+    {
+        tmp = h264bsdGetBits(pStrmData, 1);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pSliceHeader->numRefIdxActiveOverrideFlag = tmp;
+
+        if (pSliceHeader->numRefIdxActiveOverrideFlag)
+        {
+            tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+            if (tmp != HANTRO_OK)
+                return(tmp);
+            if (value > 15)
+            {
+                EPRINT("num_ref_idx_l0_active_minus1");
+                return(HANTRO_NOK);
+            }
+            pSliceHeader->numRefIdxL0Active = value + 1;
+        }
+        /* set numRefIdxL0Active from pic param set */
+        else
+        {
+            /* if value (minus1) in picture parameter set exceeds 15 it should
+             * have been overridden here */
+            if (pPicParamSet->numRefIdxL0Active > 16)
+            {
+                EPRINT("num_ref_idx_active_override_flag");
+                return(HANTRO_NOK);
+            }
+            pSliceHeader->numRefIdxL0Active = pPicParamSet->numRefIdxL0Active;
+        }
+    }
+
+    if (IS_P_SLICE(pSliceHeader->sliceType))
+    {
+        tmp = RefPicListReordering(pStrmData,
+            &pSliceHeader->refPicListReordering,
+            pSliceHeader->numRefIdxL0Active,
+            pSeqParamSet->maxFrameNum);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+    }
+
+    if (pNalUnit->nalRefIdc != 0)
+    {
+        tmp = DecRefPicMarking(pStrmData, &pSliceHeader->decRefPicMarking,
+            pNalUnit->nalUnitType, pSeqParamSet->numRefFrames);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+    }
+
+    /* decode sliceQpDelta and check that initial QP for the slice will be on
+     * the range [0, 51] */
+    tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    pSliceHeader->sliceQpDelta = itmp;
+    itmp += (i32)pPicParamSet->picInitQp;
+    if ( (itmp < 0) || (itmp > 51) )
+    {
+        EPRINT("slice_qp_delta");
+        return(HANTRO_NOK);
+    }
+
+    if (pPicParamSet->deblockingFilterControlPresentFlag)
+    {
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        pSliceHeader->disableDeblockingFilterIdc = value;
+        if (pSliceHeader->disableDeblockingFilterIdc > 2)
+        {
+            EPRINT("disable_deblocking_filter_idc");
+            return(HANTRO_NOK);
+        }
+
+        if (pSliceHeader->disableDeblockingFilterIdc != 1)
+        {
+            tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+            if (tmp != HANTRO_OK)
+                return(tmp);
+            if ( (itmp < -6) || (itmp > 6) )
+            {
+               EPRINT("slice_alpha_c0_offset_div2");
+               return(HANTRO_NOK);
+            }
+            pSliceHeader->sliceAlphaC0Offset = itmp * 2;
+
+            tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+            if (tmp != HANTRO_OK)
+                return(tmp);
+            if ( (itmp < -6) || (itmp > 6) )
+            {
+               EPRINT("slice_beta_offset_div2");
+               return(HANTRO_NOK);
+            }
+            pSliceHeader->sliceBetaOffset = itmp * 2;
+        }
+    }
+
+    if ( (pPicParamSet->numSliceGroups > 1) &&
+         (pPicParamSet->sliceGroupMapType >= 3) &&
+         (pPicParamSet->sliceGroupMapType <= 5) )
+    {
+        /* set tmp to number of bits used to represent slice_group_change_cycle
+         * in the stream */
+        tmp = NumSliceGroupChangeCycleBits(picSizeInMbs,
+            pPicParamSet->sliceGroupChangeRate);
+        value = h264bsdGetBits(pStrmData, tmp);
+        if (value == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pSliceHeader->sliceGroupChangeCycle = value;
+
+        /* corresponds to tmp = Ceil(picSizeInMbs / sliceGroupChangeRate) */
+        tmp = (picSizeInMbs + pPicParamSet->sliceGroupChangeRate - 1) /
+              pPicParamSet->sliceGroupChangeRate;
+        if (pSliceHeader->sliceGroupChangeCycle > tmp)
+        {
+            EPRINT("slice_group_change_cycle");
+            return(HANTRO_NOK);
+        }
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: NumSliceGroupChangeCycleBits
+
+        Functional description:
+            Determine number of bits needed to represent
+            slice_group_change_cycle in the stream. The standard states that
+            slice_group_change_cycle is represented by
+                Ceil( Log2( (picSizeInMbs / sliceGroupChangeRate) + 1) )
+
+            bits. Division "/" in the equation is non-truncating division.
+
+        Inputs:
+            picSizeInMbs            picture size in macroblocks
+            sliceGroupChangeRate
+
+        Outputs:
+            none
+
+        Returns:
+            number of bits needed
+
+------------------------------------------------------------------------------*/
+
+u32 NumSliceGroupChangeCycleBits(u32 picSizeInMbs, u32 sliceGroupChangeRate)
+{
+
+/* Variables */
+
+    u32 tmp,numBits,mask;
+
+/* Code */
+
+    ASSERT(picSizeInMbs);
+    ASSERT(sliceGroupChangeRate);
+    ASSERT(sliceGroupChangeRate <= picSizeInMbs);
+
+    /* compute (picSizeInMbs / sliceGroupChangeRate + 1), rounded up */
+    if (picSizeInMbs % sliceGroupChangeRate)
+        tmp = 2 + picSizeInMbs/sliceGroupChangeRate;
+    else
+        tmp = 1 + picSizeInMbs/sliceGroupChangeRate;
+
+    numBits = 0;
+    mask = ~0U;
+
+    /* set numBits to position of right-most non-zero bit */
+    while (tmp & (mask<<++numBits))
+        ;
+    numBits--;
+
+    /* add one more bit if value greater than 2^numBits */
+    if (tmp & ((1<<numBits)-1))
+        numBits++;
+
+    return(numBits);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: RefPicListReordering
+
+        Functional description:
+            Decode reference picture list reordering syntax elements from
+            the stream. Max number of reordering commands is numRefIdxActive.
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+            numRefIdxActive number of active reference indices to be used for
+                            current slice
+            maxPicNum       maxFrameNum from the active SPS
+
+        Outputs:
+            pRefPicListReordering   decoded data is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 RefPicListReordering(strmData_t *pStrmData,
+    refPicListReordering_t *pRefPicListReordering, u32 numRefIdxActive,
+    u32 maxPicNum)
+{
+
+/* Variables */
+
+    u32 tmp, value, i;
+    u32 command;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pRefPicListReordering);
+    ASSERT(numRefIdxActive);
+    ASSERT(maxPicNum);
+
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+
+    pRefPicListReordering->refPicListReorderingFlagL0 = tmp;
+
+    if (pRefPicListReordering->refPicListReorderingFlagL0)
+    {
+        i = 0;
+
+        do
+        {
+            if (i > numRefIdxActive)
+            {
+                EPRINT("Too many reordering commands");
+                return(HANTRO_NOK);
+            }
+
+            tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &command);
+            if (tmp != HANTRO_OK)
+                return(tmp);
+            if (command > 3)
+            {
+                EPRINT("reordering_of_pic_nums_idc");
+                return(HANTRO_NOK);
+            }
+
+            pRefPicListReordering->command[i].reorderingOfPicNumsIdc = command;
+
+            if ((command == 0) || (command == 1))
+            {
+                tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+                if (tmp != HANTRO_OK)
+                    return(tmp);
+                if (value >= maxPicNum)
+                {
+                    EPRINT("abs_diff_pic_num_minus1");
+                    return(HANTRO_NOK);
+                }
+                pRefPicListReordering->command[i].absDiffPicNum = value + 1;
+                            }
+            else if (command == 2)
+            {
+                tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+                if (tmp != HANTRO_OK)
+                    return(tmp);
+                pRefPicListReordering->command[i].longTermPicNum = value;
+                            }
+            i++;
+        } while (command != 3);
+
+        /* there shall be at least one reordering command if
+         * refPicListReorderingFlagL0 was set */
+        if (i == 1)
+        {
+            EPRINT("ref_pic_list_reordering");
+            return(HANTRO_NOK);
+        }
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecRefPicMarking
+
+        Functional description:
+            Decode decoded reference picture marking syntax elements from
+            the stream.
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+            nalUnitType     type of the current NAL unit
+            numRefFrames    max number of reference frames from the active SPS
+
+        Outputs:
+            pDecRefPicMarking   decoded data is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 DecRefPicMarking(strmData_t *pStrmData,
+    decRefPicMarking_t *pDecRefPicMarking, nalUnitType_e nalUnitType,
+    u32 numRefFrames)
+{
+
+/* Variables */
+
+    u32 tmp, value;
+    u32 i;
+    u32 operation;
+    /* variables for error checking purposes, store number of memory
+     * management operations of certain type */
+    u32 num4 = 0, num5 = 0, num6 = 0, num1to3 = 0;
+
+/* Code */
+
+    ASSERT( nalUnitType == NAL_CODED_SLICE_IDR ||
+            nalUnitType == NAL_CODED_SLICE ||
+            nalUnitType == NAL_SEI );
+
+
+    if (nalUnitType == NAL_CODED_SLICE_IDR)
+    {
+        tmp = h264bsdGetBits(pStrmData, 1);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pDecRefPicMarking->noOutputOfPriorPicsFlag = tmp;
+
+        tmp = h264bsdGetBits(pStrmData, 1);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pDecRefPicMarking->longTermReferenceFlag = tmp;
+        if (!numRefFrames && pDecRefPicMarking->longTermReferenceFlag)
+        {
+            EPRINT("long_term_reference_flag");
+            return(HANTRO_NOK);
+        }
+    }
+    else
+    {
+        tmp = h264bsdGetBits(pStrmData, 1);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pDecRefPicMarking->adaptiveRefPicMarkingModeFlag = tmp;
+        if (pDecRefPicMarking->adaptiveRefPicMarkingModeFlag)
+        {
+            i = 0;
+            do
+            {
+                /* see explanation of the MAX_NUM_MMC_OPERATIONS in
+                 * slice_header.h */
+                if (i > (2 * numRefFrames + 2))
+                {
+                    EPRINT("Too many management operations");
+                    return(HANTRO_NOK);
+                }
+
+                tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &operation);
+                if (tmp != HANTRO_OK)
+                    return(tmp);
+                if (operation > 6)
+                {
+                    EPRINT("memory_management_control_operation");
+                    return(HANTRO_NOK);
+                }
+
+                pDecRefPicMarking->operation[i].
+                    memoryManagementControlOperation = operation;
+                if ((operation == 1) || (operation == 3))
+                {
+                    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+                    if (tmp != HANTRO_OK)
+                        return(tmp);
+                    pDecRefPicMarking->operation[i].differenceOfPicNums =
+                        value + 1;
+                }
+                if (operation == 2)
+                {
+                    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+                    if (tmp != HANTRO_OK)
+                        return(tmp);
+                    pDecRefPicMarking->operation[i].longTermPicNum = value;
+                }
+                if ((operation == 3) || (operation == 6))
+                {
+                    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+                    if (tmp != HANTRO_OK)
+                        return(tmp);
+                    pDecRefPicMarking->operation[i].longTermFrameIdx =
+                        value;
+                }
+                if (operation == 4)
+                {
+                    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+                    if (tmp != HANTRO_OK)
+                        return(tmp);
+                    /* value shall be in range [0, numRefFrames] */
+                    if (value > numRefFrames)
+                    {
+                        EPRINT("max_long_term_frame_idx_plus1");
+                        return(HANTRO_NOK);
+                    }
+                    if (value == 0)
+                    {
+                        pDecRefPicMarking->operation[i].
+                            maxLongTermFrameIdx =
+                            NO_LONG_TERM_FRAME_INDICES;
+                    }
+                    else
+                    {
+                        pDecRefPicMarking->operation[i].
+                            maxLongTermFrameIdx = value - 1;
+                    }
+                    num4++;
+                }
+                if (operation == 5)
+                {
+                    num5++;
+                }
+                if (operation && operation <= 3)
+                    num1to3++;
+                if (operation == 6)
+                    num6++;
+
+                i++;
+            } while (operation != 0);
+
+            /* error checking */
+            if (num4 > 1 || num5 > 1 || num6 > 1 || (num1to3 && num5))
+                return(HANTRO_NOK);
+
+        }
+    }
+
+    return(HANTRO_OK);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function name: h264bsdCheckPpsId
+
+        Functional description:
+            Peek value of pic_parameter_set_id from the slice header. Function
+            does not modify current stream positions but copies the stream
+            data structure to tmp structure which is used while accessing
+            stream data.
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+
+        Outputs:
+            picParamSetId   value is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckPpsId(strmData_t *pStrmData, u32 *picParamSetId)
+{
+
+/* Variables */
+
+    u32 tmp, value;
+    strmData_t tmpStrmData[1];
+
+/* Code */
+
+    ASSERT(pStrmData);
+
+    /* don't touch original stream position params */
+    *tmpStrmData = *pStrmData;
+
+    /* first_mb_in_slice */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* slice_type */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    if (value >= MAX_NUM_PIC_PARAM_SETS)
+        return(HANTRO_NOK);
+
+    *picParamSetId = value;
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdCheckFrameNum
+
+        Functional description:
+            Peek value of frame_num from the slice header. Function does not
+            modify current stream positions but copies the stream data
+            structure to tmp structure which is used while accessing stream
+            data.
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+            maxFrameNum
+
+        Outputs:
+            frameNum        value is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckFrameNum(
+  strmData_t *pStrmData,
+  u32 maxFrameNum,
+  u32 *frameNum)
+{
+
+/* Variables */
+
+    u32 tmp, value, i;
+    strmData_t tmpStrmData[1];
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(maxFrameNum);
+    ASSERT(frameNum);
+
+    /* don't touch original stream position params */
+    *tmpStrmData = *pStrmData;
+
+    /* skip first_mb_in_slice */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* skip slice_type */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* skip pic_parameter_set_id */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* log2(maxFrameNum) -> num bits to represent frame_num */
+    i = 0;
+    while (maxFrameNum >> i)
+        i++;
+    i--;
+
+    /* frame_num */
+    tmp = h264bsdGetBits(tmpStrmData, i);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    *frameNum = tmp;
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdCheckIdrPicId
+
+        Functional description:
+            Peek value of idr_pic_id from the slice header. Function does not
+            modify current stream positions but copies the stream data
+            structure to tmp structure which is used while accessing stream
+            data.
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+            maxFrameNum     max frame number from active SPS
+            nalUnitType     type of the current NAL unit
+
+        Outputs:
+            idrPicId        value is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckIdrPicId(
+  strmData_t *pStrmData,
+  u32 maxFrameNum,
+  nalUnitType_e nalUnitType,
+  u32 *idrPicId)
+{
+
+/* Variables */
+
+    u32 tmp, value, i;
+    strmData_t tmpStrmData[1];
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(maxFrameNum);
+    ASSERT(idrPicId);
+
+    /* nalUnitType must be equal to 5 because otherwise idrPicId is not
+     * present */
+    if (nalUnitType != NAL_CODED_SLICE_IDR)
+        return(HANTRO_NOK);
+
+    /* don't touch original stream position params */
+    *tmpStrmData = *pStrmData;
+
+    /* skip first_mb_in_slice */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* skip slice_type */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* skip pic_parameter_set_id */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* log2(maxFrameNum) -> num bits to represent frame_num */
+    i = 0;
+    while (maxFrameNum >> i)
+        i++;
+    i--;
+
+    /* skip frame_num */
+    tmp = h264bsdGetBits(tmpStrmData, i);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+
+    /* idr_pic_id */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, idrPicId);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdCheckPicOrderCntLsb
+
+        Functional description:
+            Peek value of pic_order_cnt_lsb from the slice header. Function
+            does not modify current stream positions but copies the stream
+            data structure to tmp structure which is used while accessing
+            stream data.
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+            pSeqParamSet    pointer to active SPS
+            nalUnitType     type of the current NAL unit
+
+        Outputs:
+            picOrderCntLsb  value is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckPicOrderCntLsb(
+  strmData_t *pStrmData,
+  seqParamSet_t *pSeqParamSet,
+  nalUnitType_e nalUnitType,
+  u32 *picOrderCntLsb)
+{
+
+/* Variables */
+
+    u32 tmp, value, i;
+    strmData_t tmpStrmData[1];
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pSeqParamSet);
+    ASSERT(picOrderCntLsb);
+
+    /* picOrderCntType must be equal to 0 */
+    ASSERT(pSeqParamSet->picOrderCntType == 0);
+    ASSERT(pSeqParamSet->maxFrameNum);
+    ASSERT(pSeqParamSet->maxPicOrderCntLsb);
+
+    /* don't touch original stream position params */
+    *tmpStrmData = *pStrmData;
+
+    /* skip first_mb_in_slice */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* skip slice_type */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* skip pic_parameter_set_id */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* log2(maxFrameNum) -> num bits to represent frame_num */
+    i = 0;
+    while (pSeqParamSet->maxFrameNum >> i)
+        i++;
+    i--;
+
+    /* skip frame_num */
+    tmp = h264bsdGetBits(tmpStrmData, i);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+
+    /* skip idr_pic_id when necessary */
+    if (nalUnitType == NAL_CODED_SLICE_IDR)
+    {
+        tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+    }
+
+    /* log2(maxPicOrderCntLsb) -> num bits to represent pic_order_cnt_lsb */
+    i = 0;
+    while (pSeqParamSet->maxPicOrderCntLsb >> i)
+        i++;
+    i--;
+
+    /* pic_order_cnt_lsb */
+    tmp = h264bsdGetBits(tmpStrmData, i);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    *picOrderCntLsb = tmp;
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdCheckDeltaPicOrderCntBottom
+
+        Functional description:
+            Peek value of delta_pic_order_cnt_bottom from the slice header.
+            Function does not modify current stream positions but copies the
+            stream data structure to tmp structure which is used while
+            accessing stream data.
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+            pSeqParamSet    pointer to active SPS
+            nalUnitType     type of the current NAL unit
+
+        Outputs:
+            deltaPicOrderCntBottom  value is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckDeltaPicOrderCntBottom(
+  strmData_t *pStrmData,
+  seqParamSet_t *pSeqParamSet,
+  nalUnitType_e nalUnitType,
+  i32 *deltaPicOrderCntBottom)
+{
+
+/* Variables */
+
+    u32 tmp, value, i;
+    strmData_t tmpStrmData[1];
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pSeqParamSet);
+    ASSERT(deltaPicOrderCntBottom);
+
+    /* picOrderCntType must be equal to 0 and picOrderPresentFlag must be TRUE
+     * */
+    ASSERT(pSeqParamSet->picOrderCntType == 0);
+    ASSERT(pSeqParamSet->maxFrameNum);
+    ASSERT(pSeqParamSet->maxPicOrderCntLsb);
+
+    /* don't touch original stream position params */
+    *tmpStrmData = *pStrmData;
+
+    /* skip first_mb_in_slice */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* skip slice_type */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* skip pic_parameter_set_id */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* log2(maxFrameNum) -> num bits to represent frame_num */
+    i = 0;
+    while (pSeqParamSet->maxFrameNum >> i)
+        i++;
+    i--;
+
+    /* skip frame_num */
+    tmp = h264bsdGetBits(tmpStrmData, i);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+
+    /* skip idr_pic_id when necessary */
+    if (nalUnitType == NAL_CODED_SLICE_IDR)
+    {
+        tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+    }
+
+    /* log2(maxPicOrderCntLsb) -> num bits to represent pic_order_cnt_lsb */
+    i = 0;
+    while (pSeqParamSet->maxPicOrderCntLsb >> i)
+        i++;
+    i--;
+
+    /* skip pic_order_cnt_lsb */
+    tmp = h264bsdGetBits(tmpStrmData, i);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+
+    /* delta_pic_order_cnt_bottom */
+    tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, deltaPicOrderCntBottom);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdCheckDeltaPicOrderCnt
+
+        Functional description:
+            Peek values delta_pic_order_cnt[0] and delta_pic_order_cnt[1]
+            from the slice header. Function does not modify current stream
+            positions but copies the stream data structure to tmp structure
+            which is used while accessing stream data.
+
+        Inputs:
+            pStrmData               pointer to stream data structure
+            pSeqParamSet            pointer to active SPS
+            nalUnitType             type of the current NAL unit
+            picOrderPresentFlag     flag indicating if delta_pic_order_cnt[1]
+                                    is present in the stream
+
+        Outputs:
+            deltaPicOrderCnt        values are stored here
+
+        Returns:
+            HANTRO_OK               success
+            HANTRO_NOK              invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckDeltaPicOrderCnt(
+  strmData_t *pStrmData,
+  seqParamSet_t *pSeqParamSet,
+  nalUnitType_e nalUnitType,
+  u32 picOrderPresentFlag,
+  i32 *deltaPicOrderCnt)
+{
+
+/* Variables */
+
+    u32 tmp, value, i;
+    strmData_t tmpStrmData[1];
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pSeqParamSet);
+    ASSERT(deltaPicOrderCnt);
+
+    /* picOrderCntType must be equal to 1 and deltaPicOrderAlwaysZeroFlag must
+     * be FALSE */
+    ASSERT(pSeqParamSet->picOrderCntType == 1);
+    ASSERT(!pSeqParamSet->deltaPicOrderAlwaysZeroFlag);
+    ASSERT(pSeqParamSet->maxFrameNum);
+
+    /* don't touch original stream position params */
+    *tmpStrmData = *pStrmData;
+
+    /* skip first_mb_in_slice */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* skip slice_type */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* skip pic_parameter_set_id */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* log2(maxFrameNum) -> num bits to represent frame_num */
+    i = 0;
+    while (pSeqParamSet->maxFrameNum >> i)
+        i++;
+    i--;
+
+    /* skip frame_num */
+    tmp = h264bsdGetBits(tmpStrmData, i);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+
+    /* skip idr_pic_id when necessary */
+    if (nalUnitType == NAL_CODED_SLICE_IDR)
+    {
+        tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+    }
+
+    /* delta_pic_order_cnt[0] */
+    tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &deltaPicOrderCnt[0]);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* delta_pic_order_cnt[1] if present */
+    if (picOrderPresentFlag)
+    {
+        tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &deltaPicOrderCnt[1]);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdCheckRedundantPicCnt
+
+        Functional description:
+            Peek value of redundant_pic_cnt from the slice header. Function
+            does not modify current stream positions but copies the stream
+            data structure to tmp structure which is used while accessing
+            stream data.
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+            pSeqParamSet    pointer to active SPS
+            pPicParamSet    pointer to active PPS
+            nalUnitType     type of the current NAL unit
+
+        Outputs:
+            redundantPicCnt value is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckRedundantPicCnt(
+  strmData_t *pStrmData,
+  seqParamSet_t *pSeqParamSet,
+  picParamSet_t *pPicParamSet,
+  nalUnitType_e nalUnitType,
+  u32 *redundantPicCnt)
+{
+
+/* Variables */
+
+    u32 tmp, value, i;
+    i32 ivalue;
+    strmData_t tmpStrmData[1];
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pSeqParamSet);
+    ASSERT(pPicParamSet);
+    ASSERT(redundantPicCnt);
+
+    /* redundant_pic_cnt_flag must be TRUE */
+    ASSERT(pPicParamSet->redundantPicCntPresentFlag);
+    ASSERT(pSeqParamSet->maxFrameNum);
+    ASSERT(pSeqParamSet->picOrderCntType > 0 ||
+           pSeqParamSet->maxPicOrderCntLsb);
+
+    /* don't touch original stream position params */
+    *tmpStrmData = *pStrmData;
+
+    /* skip first_mb_in_slice */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* skip slice_type */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* skip pic_parameter_set_id */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* log2(maxFrameNum) -> num bits to represent frame_num */
+    i = 0;
+    while (pSeqParamSet->maxFrameNum >> i)
+        i++;
+    i--;
+
+    /* skip frame_num */
+    tmp = h264bsdGetBits(tmpStrmData, i);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+
+    /* skip idr_pic_id when necessary */
+    if (nalUnitType == NAL_CODED_SLICE_IDR)
+    {
+        tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+    }
+
+    if (pSeqParamSet->picOrderCntType == 0)
+    {
+        /* log2(maxPicOrderCntLsb) -> num bits to represent pic_order_cnt_lsb */
+        i = 0;
+        while (pSeqParamSet->maxPicOrderCntLsb >> i)
+            i++;
+        i--;
+
+        /* pic_order_cnt_lsb */
+        tmp = h264bsdGetBits(tmpStrmData, i);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+
+        if (pPicParamSet->picOrderPresentFlag)
+        {
+            /* skip delta_pic_order_cnt_bottom */
+            tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &ivalue);
+            if (tmp != HANTRO_OK)
+                return(tmp);
+        }
+    }
+
+    if (pSeqParamSet->picOrderCntType == 1 &&
+      !pSeqParamSet->deltaPicOrderAlwaysZeroFlag)
+    {
+        /* delta_pic_order_cnt[0] */
+        tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &ivalue);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+
+        /* delta_pic_order_cnt[1] if present */
+        if (pPicParamSet->picOrderPresentFlag)
+        {
+            tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &ivalue);
+            if (tmp != HANTRO_OK)
+                return(tmp);
+        }
+    }
+
+    /* redundant_pic_cnt */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, redundantPicCnt);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    return(HANTRO_OK);
+
+}
+
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdCheckPriorPicsFlag
+
+        Functional description:
+            Peek value of no_output_of_prior_pics_flag from the slice header.
+            Function does not modify current stream positions but copies
+            the stream data structure to tmp structure which is used while
+            accessing stream data.
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+            pSeqParamSet    pointer to active SPS
+            pPicParamSet    pointer to active PPS
+            nalUnitType     type of the current NAL unit
+
+        Outputs:
+            noOutputOfPriorPicsFlag value is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      invalid stream data
+
+------------------------------------------------------------------------------*/
+/*lint -e715 disable lint info nalUnitType not referenced */
+u32 h264bsdCheckPriorPicsFlag(u32 * noOutputOfPriorPicsFlag,
+                              const strmData_t * pStrmData,
+                              const seqParamSet_t * pSeqParamSet,
+                              const picParamSet_t * pPicParamSet,
+                              nalUnitType_e nalUnitType)
+{
+/* Variables */
+
+    u32 tmp, value, i;
+    i32 ivalue;
+    strmData_t tmpStrmData[1];
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pSeqParamSet);
+    ASSERT(pPicParamSet);
+    ASSERT(noOutputOfPriorPicsFlag);
+
+    /* must be IDR lsice */
+    ASSERT(nalUnitType == NAL_CODED_SLICE_IDR);
+
+    /* don't touch original stream position params */
+    *tmpStrmData = *pStrmData;
+
+    /* skip first_mb_in_slice */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if(tmp != HANTRO_OK)
+        return (tmp);
+
+    /* slice_type */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if(tmp != HANTRO_OK)
+        return (tmp);
+
+    /* skip pic_parameter_set_id */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if(tmp != HANTRO_OK)
+        return (tmp);
+
+    /* log2(maxFrameNum) -> num bits to represent frame_num */
+    i = 0;
+    while(pSeqParamSet->maxFrameNum >> i)
+        i++;
+    i--;
+
+    /* skip frame_num */
+    tmp = h264bsdGetBits(tmpStrmData, i);
+    if(tmp == END_OF_STREAM)
+        return (HANTRO_NOK);
+
+    /* skip idr_pic_id */
+    tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+    if(tmp != HANTRO_OK)
+        return (tmp);
+
+    if(pSeqParamSet->picOrderCntType == 0)
+    {
+        /* log2(maxPicOrderCntLsb) -> num bits to represent pic_order_cnt_lsb */
+        i = 0;
+        while(pSeqParamSet->maxPicOrderCntLsb >> i)
+            i++;
+        i--;
+
+        /* skip pic_order_cnt_lsb */
+        tmp = h264bsdGetBits(tmpStrmData, i);
+        if(tmp == END_OF_STREAM)
+            return (HANTRO_NOK);
+
+        if(pPicParamSet->picOrderPresentFlag)
+        {
+            /* skip delta_pic_order_cnt_bottom */
+            tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &ivalue);
+            if(tmp != HANTRO_OK)
+                return (tmp);
+        }
+    }
+
+    if(pSeqParamSet->picOrderCntType == 1 &&
+       !pSeqParamSet->deltaPicOrderAlwaysZeroFlag)
+    {
+        /* skip delta_pic_order_cnt[0] */
+        tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &ivalue);
+        if(tmp != HANTRO_OK)
+            return (tmp);
+
+        /* skip delta_pic_order_cnt[1] if present */
+        if(pPicParamSet->picOrderPresentFlag)
+        {
+            tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &ivalue);
+            if(tmp != HANTRO_OK)
+                return (tmp);
+        }
+    }
+
+    /* skip redundant_pic_cnt */
+    if(pPicParamSet->redundantPicCntPresentFlag)
+    {
+        tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+        if(tmp != HANTRO_OK)
+            return (tmp);
+    }
+
+    *noOutputOfPriorPicsFlag = h264bsdGetBits(tmpStrmData, 1);
+    if(*noOutputOfPriorPicsFlag == END_OF_STREAM)
+        return (HANTRO_NOK);
+
+    return (HANTRO_OK);
+
+}
+/*lint +e715 */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.h
new file mode 100755
index 0000000..198898a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_SLICE_HEADER_H
+#define H264SWDEC_SLICE_HEADER_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_cfg.h"
+#include "h264bsd_seq_param_set.h"
+#include "h264bsd_pic_param_set.h"
+#include "h264bsd_nal_unit.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+enum {
+    P_SLICE = 0,
+    I_SLICE = 2
+};
+
+enum {NO_LONG_TERM_FRAME_INDICES = 0xFFFF};
+
+/* macro to determine if slice is an inter slice, sliceTypes 0 and 5 */
+#define IS_P_SLICE(sliceType) (((sliceType) == P_SLICE) || \
+    ((sliceType) == P_SLICE + 5))
+
+/* macro to determine if slice is an intra slice, sliceTypes 2 and 7 */
+#define IS_I_SLICE(sliceType) (((sliceType) == I_SLICE) || \
+    ((sliceType) == I_SLICE + 5))
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+/* structure to store data of one reference picture list reordering operation */
+typedef struct
+{
+    u32 reorderingOfPicNumsIdc;
+    u32 absDiffPicNum;
+    u32 longTermPicNum;
+} refPicListReorderingOperation_t;
+
+/* structure to store reference picture list reordering operations */
+typedef struct
+{
+    u32 refPicListReorderingFlagL0;
+    refPicListReorderingOperation_t command[MAX_NUM_REF_PICS+1];
+} refPicListReordering_t;
+
+/* structure to store data of one DPB memory management control operation */
+typedef struct
+{
+    u32 memoryManagementControlOperation;
+    u32 differenceOfPicNums;
+    u32 longTermPicNum;
+    u32 longTermFrameIdx;
+    u32 maxLongTermFrameIdx;
+} memoryManagementOperation_t;
+
+/* worst case scenario: all MAX_NUM_REF_PICS pictures in the buffer are
+ * short term pictures, each one of them is first marked as long term
+ * reference picture which is then marked as unused for reference.
+ * Additionally, max long-term frame index is set and current picture is
+ * marked as long term reference picture. Last position reserved for
+ * end memory_management_control_operation command */
+#define MAX_NUM_MMC_OPERATIONS (2*MAX_NUM_REF_PICS+2+1)
+
+/* structure to store decoded reference picture marking data */
+typedef struct
+{
+    u32 noOutputOfPriorPicsFlag;
+    u32 longTermReferenceFlag;
+    u32 adaptiveRefPicMarkingModeFlag;
+    memoryManagementOperation_t operation[MAX_NUM_MMC_OPERATIONS];
+} decRefPicMarking_t;
+
+/* structure to store slice header data decoded from the stream */
+typedef struct
+{
+    u32 firstMbInSlice;
+    u32 sliceType;
+    u32 picParameterSetId;
+    u32 frameNum;
+    u32 idrPicId;
+    u32 picOrderCntLsb;
+    i32 deltaPicOrderCntBottom;
+    i32 deltaPicOrderCnt[2];
+    u32 redundantPicCnt;
+    u32 numRefIdxActiveOverrideFlag;
+    u32 numRefIdxL0Active;
+    i32 sliceQpDelta;
+    u32 disableDeblockingFilterIdc;
+    i32 sliceAlphaC0Offset;
+    i32 sliceBetaOffset;
+    u32 sliceGroupChangeCycle;
+    refPicListReordering_t refPicListReordering;
+    decRefPicMarking_t decRefPicMarking;
+} sliceHeader_t;
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeSliceHeader(strmData_t *pStrmData,
+  sliceHeader_t *pSliceHeader,
+  seqParamSet_t *pSeqParamSet,
+  picParamSet_t *pPicParamSet,
+  nalUnit_t *pNalUnit);
+
+u32 h264bsdCheckPpsId(strmData_t *pStrmData, u32 *ppsId);
+
+u32 h264bsdCheckFrameNum(
+  strmData_t *pStrmData,
+  u32 maxFrameNum,
+  u32 *frameNum);
+
+u32 h264bsdCheckIdrPicId(
+  strmData_t *pStrmData,
+  u32 maxFrameNum,
+  nalUnitType_e nalUnitType,
+  u32 *idrPicId);
+
+u32 h264bsdCheckPicOrderCntLsb(
+  strmData_t *pStrmData,
+  seqParamSet_t *pSeqParamSet,
+  nalUnitType_e nalUnitType,
+  u32 *picOrderCntLsb);
+
+u32 h264bsdCheckDeltaPicOrderCntBottom(
+  strmData_t *pStrmData,
+  seqParamSet_t *pSeqParamSet,
+  nalUnitType_e nalUnitType,
+  i32 *deltaPicOrderCntBottom);
+
+u32 h264bsdCheckDeltaPicOrderCnt(
+  strmData_t *pStrmData,
+  seqParamSet_t *pSeqParamSet,
+  nalUnitType_e nalUnitType,
+  u32 picOrderPresentFlag,
+  i32 *deltaPicOrderCnt);
+
+u32 h264bsdCheckRedundantPicCnt(
+  strmData_t *pStrmData,
+  seqParamSet_t *pSeqParamSet,
+  picParamSet_t *pPicParamSet,
+  nalUnitType_e nalUnitType,
+  u32 *redundantPicCnt);
+
+u32 h264bsdCheckPriorPicsFlag(u32 * noOutputOfPriorPicsFlag,
+                              const strmData_t * pStrmData,
+                              const seqParamSet_t * pSeqParamSet,
+                              const picParamSet_t * pPicParamSet,
+                              nalUnitType_e nalUnitType);
+
+#endif /* #ifdef H264SWDEC_SLICE_HEADER_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.c
new file mode 100755
index 0000000..3234754
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.c
@@ -0,0 +1,888 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdInitStorage
+          h264bsdStoreSeqParamSet
+          h264bsdStorePicParamSet
+          h264bsdActivateParamSets
+          h264bsdResetStorage
+          h264bsdIsStartOfPicture
+          h264bsdIsEndOfPicture
+          h264bsdComputeSliceGroupMap
+          h264bsdCheckAccessUnitBoundary
+          CheckPps
+          h264bsdValidParamSets
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_storage.h"
+#include "h264bsd_util.h"
+#include "h264bsd_neighbour.h"
+#include "h264bsd_slice_group_map.h"
+#include "h264bsd_dpb.h"
+#include "h264bsd_nal_unit.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_seq_param_set.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 CheckPps(picParamSet_t *pps, seqParamSet_t *sps);
+
+/*------------------------------------------------------------------------------
+
+    Function name: h264bsdInitStorage
+
+        Functional description:
+            Initialize storage structure. Sets contents of the storage to '0'
+            except for the active parameter set ids, which are initialized
+            to invalid values.
+
+        Inputs:
+
+        Outputs:
+            pStorage    initialized data stored here
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInitStorage(storage_t *pStorage)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(pStorage);
+
+    H264SwDecMemset(pStorage, 0, sizeof(storage_t));
+
+    pStorage->activeSpsId = MAX_NUM_SEQ_PARAM_SETS;
+    pStorage->activePpsId = MAX_NUM_PIC_PARAM_SETS;
+
+    pStorage->aub->firstCallFlag = HANTRO_TRUE;
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdStoreSeqParamSet
+
+        Functional description:
+            Store sequence parameter set into the storage. If active SPS is
+            overwritten -> check if contents changes and if it does, set
+            parameters to force reactivation of parameter sets
+
+        Inputs:
+            pStorage        pointer to storage structure
+            pSeqParamSet    pointer to param set to be stored
+
+        Outputs:
+            none
+
+        Returns:
+            HANTRO_OK                success
+            MEMORY_ALLOCATION_ERROR  failure in memory allocation
+
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdStoreSeqParamSet(storage_t *pStorage, seqParamSet_t *pSeqParamSet)
+{
+
+/* Variables */
+
+    u32 id;
+
+/* Code */
+
+    ASSERT(pStorage);
+    ASSERT(pSeqParamSet);
+    ASSERT(pSeqParamSet->seqParameterSetId < MAX_NUM_SEQ_PARAM_SETS);
+
+    id = pSeqParamSet->seqParameterSetId;
+
+    /* seq parameter set with id not used before -> allocate memory */
+    if (pStorage->sps[id] == NULL)
+    {
+        ALLOCATE(pStorage->sps[id], 1, seqParamSet_t);
+        if (pStorage->sps[id] == NULL)
+            return(MEMORY_ALLOCATION_ERROR);
+    }
+    /* sequence parameter set with id equal to id of active sps */
+    else if (id == pStorage->activeSpsId)
+    {
+        /* if seq parameter set contents changes
+         *    -> overwrite and re-activate when next IDR picture decoded
+         *    ids of active param sets set to invalid values to force
+         *    re-activation. Memories allocated for old sps freed
+         * otherwise free memeries allocated for just decoded sps and
+         * continue */
+        if (h264bsdCompareSeqParamSets(pSeqParamSet, pStorage->activeSps) != 0)
+        {
+            FREE(pStorage->sps[id]->offsetForRefFrame);
+            FREE(pStorage->sps[id]->vuiParameters);
+            pStorage->activeSpsId = MAX_NUM_SEQ_PARAM_SETS + 1;
+            pStorage->activePpsId = MAX_NUM_PIC_PARAM_SETS + 1;
+            pStorage->activeSps = NULL;
+            pStorage->activePps = NULL;
+        }
+        else
+        {
+            FREE(pSeqParamSet->offsetForRefFrame);
+            FREE(pSeqParamSet->vuiParameters);
+            return(HANTRO_OK);
+        }
+    }
+    /* overwrite seq param set other than active one -> free memories
+     * allocated for old param set */
+    else
+    {
+        FREE(pStorage->sps[id]->offsetForRefFrame);
+        FREE(pStorage->sps[id]->vuiParameters);
+    }
+
+    *pStorage->sps[id] = *pSeqParamSet;
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdStorePicParamSet
+
+        Functional description:
+            Store picture parameter set into the storage. If active PPS is
+            overwritten -> check if active SPS changes and if it does -> set
+            parameters to force reactivation of parameter sets
+
+        Inputs:
+            pStorage        pointer to storage structure
+            pPicParamSet    pointer to param set to be stored
+
+        Outputs:
+            none
+
+        Returns:
+            HANTRO_OK                success
+            MEMORY_ALLOCATION_ERROR  failure in memory allocation
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdStorePicParamSet(storage_t *pStorage, picParamSet_t *pPicParamSet)
+{
+
+/* Variables */
+
+    u32 id;
+
+/* Code */
+
+    ASSERT(pStorage);
+    ASSERT(pPicParamSet);
+    ASSERT(pPicParamSet->picParameterSetId < MAX_NUM_PIC_PARAM_SETS);
+    ASSERT(pPicParamSet->seqParameterSetId < MAX_NUM_SEQ_PARAM_SETS);
+
+    id = pPicParamSet->picParameterSetId;
+
+    /* pic parameter set with id not used before -> allocate memory */
+    if (pStorage->pps[id] == NULL)
+    {
+        ALLOCATE(pStorage->pps[id], 1, picParamSet_t);
+        if (pStorage->pps[id] == NULL)
+            return(MEMORY_ALLOCATION_ERROR);
+    }
+    /* picture parameter set with id equal to id of active pps */
+    else if (id == pStorage->activePpsId)
+    {
+        /* check whether seq param set changes, force re-activation of
+         * param set if it does. Set activeSpsId to invalid value to
+         * accomplish this */
+        if (pPicParamSet->seqParameterSetId != pStorage->activeSpsId)
+        {
+            pStorage->activePpsId = MAX_NUM_PIC_PARAM_SETS + 1;
+        }
+        /* free memories allocated for old param set */
+        FREE(pStorage->pps[id]->runLength);
+        FREE(pStorage->pps[id]->topLeft);
+        FREE(pStorage->pps[id]->bottomRight);
+        FREE(pStorage->pps[id]->sliceGroupId);
+    }
+    /* overwrite pic param set other than active one -> free memories
+     * allocated for old param set */
+    else
+    {
+        FREE(pStorage->pps[id]->runLength);
+        FREE(pStorage->pps[id]->topLeft);
+        FREE(pStorage->pps[id]->bottomRight);
+        FREE(pStorage->pps[id]->sliceGroupId);
+    }
+
+    *pStorage->pps[id] = *pPicParamSet;
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdActivateParamSets
+
+        Functional description:
+            Activate certain SPS/PPS combination. This function shall be
+            called in the beginning of each picture. Picture parameter set
+            can be changed as wanted, but sequence parameter set may only be
+            changed when the starting picture is an IDR picture.
+
+            When new SPS is activated the function allocates memory for
+            macroblock storages and slice group map and (re-)initializes the
+            decoded picture buffer. If this is not the first activation the old
+            allocations are freed and FreeDpb called before new allocations.
+
+        Inputs:
+            pStorage        pointer to storage data structure
+            ppsId           identifies the PPS to be activated, SPS id obtained
+                            from the PPS
+            isIdr           flag to indicate if the picture is an IDR picture
+
+        Outputs:
+            none
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      non-existing or invalid param set combination,
+                            trying to change SPS with non-IDR picture
+            MEMORY_ALLOCATION_ERROR     failure in memory allocation
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdActivateParamSets(storage_t *pStorage, u32 ppsId, u32 isIdr)
+{
+
+/* Variables */
+
+    u32 tmp;
+    u32 flag;
+
+/* Code */
+
+    ASSERT(pStorage);
+    ASSERT(ppsId < MAX_NUM_PIC_PARAM_SETS);
+
+    /* check that pps and corresponding sps exist */
+    if ( (pStorage->pps[ppsId] == NULL) ||
+         (pStorage->sps[pStorage->pps[ppsId]->seqParameterSetId] == NULL) )
+    {
+        return(HANTRO_NOK);
+    }
+
+    /* check that pps parameters do not violate picture size constraints */
+    tmp = CheckPps(pStorage->pps[ppsId],
+                   pStorage->sps[pStorage->pps[ppsId]->seqParameterSetId]);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* first activation part1 */
+    if (pStorage->activePpsId == MAX_NUM_PIC_PARAM_SETS)
+    {
+        pStorage->activePpsId = ppsId;
+        pStorage->activePps = pStorage->pps[ppsId];
+        pStorage->activeSpsId = pStorage->activePps->seqParameterSetId;
+        pStorage->activeSps = pStorage->sps[pStorage->activeSpsId];
+        pStorage->picSizeInMbs =
+            pStorage->activeSps->picWidthInMbs *
+            pStorage->activeSps->picHeightInMbs;
+
+        pStorage->currImage->width = pStorage->activeSps->picWidthInMbs;
+        pStorage->currImage->height = pStorage->activeSps->picHeightInMbs;
+
+        pStorage->pendingActivation = HANTRO_TRUE;
+    }
+    /* first activation part2 */
+    else if (pStorage->pendingActivation)
+    {
+        pStorage->pendingActivation = HANTRO_FALSE;
+
+        FREE(pStorage->mb);
+        FREE(pStorage->sliceGroupMap);
+
+        ALLOCATE(pStorage->mb, pStorage->picSizeInMbs, mbStorage_t);
+        ALLOCATE(pStorage->sliceGroupMap, pStorage->picSizeInMbs, u32);
+        if (pStorage->mb == NULL || pStorage->sliceGroupMap == NULL)
+            return(MEMORY_ALLOCATION_ERROR);
+
+        H264SwDecMemset(pStorage->mb, 0,
+            pStorage->picSizeInMbs * sizeof(mbStorage_t));
+
+        h264bsdInitMbNeighbours(pStorage->mb,
+            pStorage->activeSps->picWidthInMbs,
+            pStorage->picSizeInMbs);
+
+        /* dpb output reordering disabled if
+         * 1) application set noReordering flag
+         * 2) POC type equal to 2
+         * 3) num_reorder_frames in vui equal to 0 */
+        if ( pStorage->noReordering ||
+             pStorage->activeSps->picOrderCntType == 2 ||
+             (pStorage->activeSps->vuiParametersPresentFlag &&
+              pStorage->activeSps->vuiParameters->bitstreamRestrictionFlag &&
+              !pStorage->activeSps->vuiParameters->numReorderFrames) )
+            flag = HANTRO_TRUE;
+        else
+            flag = HANTRO_FALSE;
+
+        tmp = h264bsdResetDpb(pStorage->dpb,
+            pStorage->activeSps->picWidthInMbs *
+            pStorage->activeSps->picHeightInMbs,
+            pStorage->activeSps->maxDpbSize,
+            pStorage->activeSps->numRefFrames,
+            pStorage->activeSps->maxFrameNum,
+            flag);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+    }
+    else if (ppsId != pStorage->activePpsId)
+    {
+        /* sequence parameter set shall not change but before an IDR picture */
+        if (pStorage->pps[ppsId]->seqParameterSetId != pStorage->activeSpsId)
+        {
+            DEBUG(("SEQ PARAM SET CHANGING...\n"));
+            if (isIdr)
+            {
+                pStorage->activePpsId = ppsId;
+                pStorage->activePps = pStorage->pps[ppsId];
+                pStorage->activeSpsId = pStorage->activePps->seqParameterSetId;
+                pStorage->activeSps = pStorage->sps[pStorage->activeSpsId];
+                pStorage->picSizeInMbs =
+                    pStorage->activeSps->picWidthInMbs *
+                    pStorage->activeSps->picHeightInMbs;
+
+                pStorage->currImage->width = pStorage->activeSps->picWidthInMbs;
+                pStorage->currImage->height =
+                    pStorage->activeSps->picHeightInMbs;
+
+                pStorage->pendingActivation = HANTRO_TRUE;
+            }
+            else
+            {
+                DEBUG(("TRYING TO CHANGE SPS IN NON-IDR SLICE\n"));
+                return(HANTRO_NOK);
+            }
+        }
+        else
+        {
+            pStorage->activePpsId = ppsId;
+            pStorage->activePps = pStorage->pps[ppsId];
+        }
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdResetStorage
+
+        Functional description:
+            Reset contents of the storage. This should be called before
+            processing of new image is started.
+
+        Inputs:
+            pStorage    pointer to storage structure
+
+        Outputs:
+            none
+
+        Returns:
+            none
+
+
+------------------------------------------------------------------------------*/
+
+void h264bsdResetStorage(storage_t *pStorage)
+{
+
+/* Variables */
+
+    u32 i;
+
+/* Code */
+
+    ASSERT(pStorage);
+
+    pStorage->slice->numDecodedMbs = 0;
+    pStorage->slice->sliceId = 0;
+
+    for (i = 0; i < pStorage->picSizeInMbs; i++)
+    {
+        pStorage->mb[i].sliceId = 0;
+        pStorage->mb[i].decoded = 0;
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdIsStartOfPicture
+
+        Functional description:
+            Determine if the decoder is in the start of a picture. This
+            information is needed to decide if h264bsdActivateParamSets and
+            h264bsdCheckGapsInFrameNum functions should be called. Function
+            considers that new picture is starting if no slice headers
+            have been successfully decoded for the current access unit.
+
+        Inputs:
+            pStorage    pointer to storage structure
+
+        Outputs:
+            none
+
+        Returns:
+            HANTRO_TRUE        new picture is starting
+            HANTRO_FALSE       not starting
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdIsStartOfPicture(storage_t *pStorage)
+{
+
+/* Variables */
+
+
+/* Code */
+
+    if (pStorage->validSliceInAccessUnit == HANTRO_FALSE)
+        return(HANTRO_TRUE);
+    else
+        return(HANTRO_FALSE);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdIsEndOfPicture
+
+        Functional description:
+            Determine if the decoder is in the end of a picture. This
+            information is needed to determine when deblocking filtering
+            and reference picture marking processes should be performed.
+
+            If the decoder is processing primary slices the return value
+            is determined by checking the value of numDecodedMbs in the
+            storage. On the other hand, if the decoder is processing
+            redundant slices the numDecodedMbs may not contain valid
+            informationa and each macroblock has to be checked separately.
+
+        Inputs:
+            pStorage    pointer to storage structure
+
+        Outputs:
+            none
+
+        Returns:
+            HANTRO_TRUE        end of picture
+            HANTRO_FALSE       noup
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdIsEndOfPicture(storage_t *pStorage)
+{
+
+/* Variables */
+
+    u32 i, tmp;
+
+/* Code */
+
+    /* primary picture */
+    if (!pStorage->sliceHeader[0].redundantPicCnt)
+    {
+        if (pStorage->slice->numDecodedMbs == pStorage->picSizeInMbs)
+            return(HANTRO_TRUE);
+    }
+    else
+    {
+        for (i = 0, tmp = 0; i < pStorage->picSizeInMbs; i++)
+            tmp += pStorage->mb[i].decoded ? 1 : 0;
+
+        if (tmp == pStorage->picSizeInMbs)
+            return(HANTRO_TRUE);
+    }
+
+    return(HANTRO_FALSE);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdComputeSliceGroupMap
+
+        Functional description:
+            Compute slice group map. Just call h264bsdDecodeSliceGroupMap with
+            appropriate parameters.
+
+        Inputs:
+            pStorage                pointer to storage structure
+            sliceGroupChangeCycle
+
+        Outputs:
+            none
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+
+void h264bsdComputeSliceGroupMap(storage_t *pStorage, u32 sliceGroupChangeCycle)
+{
+
+/* Variables */
+
+
+/* Code */
+
+    h264bsdDecodeSliceGroupMap(pStorage->sliceGroupMap,
+                        pStorage->activePps, sliceGroupChangeCycle,
+                        pStorage->activeSps->picWidthInMbs,
+                        pStorage->activeSps->picHeightInMbs);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdCheckAccessUnitBoundary
+
+        Functional description:
+            Check if next NAL unit starts a new access unit. Following
+            conditions specify start of a new access unit:
+
+                -NAL unit types 6-11, 13-18 (e.g. SPS, PPS)
+
+           following conditions checked only for slice NAL units, values
+           compared to ones obtained from previous slice:
+
+                -NAL unit type differs (slice / IDR slice)
+                -frame_num differs
+                -nal_ref_idc differs and one of the values is 0
+                -POC information differs
+                -both are IDR slices and idr_pic_id differs
+
+        Inputs:
+            strm        pointer to stream data structure
+            nuNext      pointer to NAL unit structure
+            storage     pointer to storage structure
+
+        Outputs:
+            accessUnitBoundaryFlag  the result is stored here, TRUE for
+                                    access unit boundary, FALSE otherwise
+
+        Returns:
+            HANTRO_OK           success
+            HANTRO_NOK          failure, invalid stream data
+            PARAM_SET_ERROR     invalid param set usage
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckAccessUnitBoundary(
+  strmData_t *strm,
+  nalUnit_t *nuNext,
+  storage_t *storage,
+  u32 *accessUnitBoundaryFlag)
+{
+
+/* Variables */
+
+    u32 tmp, ppsId, frameNum, idrPicId, picOrderCntLsb;
+    i32 deltaPicOrderCntBottom, deltaPicOrderCnt[2];
+    seqParamSet_t *sps;
+    picParamSet_t *pps;
+
+/* Code */
+
+    ASSERT(strm);
+    ASSERT(nuNext);
+    ASSERT(storage);
+    ASSERT(storage->sps);
+    ASSERT(storage->pps);
+
+    /* initialize default output to FALSE */
+    *accessUnitBoundaryFlag = HANTRO_FALSE;
+
+    if ( ( (nuNext->nalUnitType > 5) && (nuNext->nalUnitType < 12) ) ||
+         ( (nuNext->nalUnitType > 12) && (nuNext->nalUnitType <= 18) ) )
+    {
+        *accessUnitBoundaryFlag = HANTRO_TRUE;
+        return(HANTRO_OK);
+    }
+    else if ( nuNext->nalUnitType != NAL_CODED_SLICE &&
+              nuNext->nalUnitType != NAL_CODED_SLICE_IDR )
+    {
+        return(HANTRO_OK);
+    }
+
+    /* check if this is the very first call to this function */
+    if (storage->aub->firstCallFlag)
+    {
+        *accessUnitBoundaryFlag = HANTRO_TRUE;
+        storage->aub->firstCallFlag = HANTRO_FALSE;
+    }
+
+    /* get picture parameter set id */
+    tmp = h264bsdCheckPpsId(strm, &ppsId);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+
+    /* store sps and pps in separate pointers just to make names shorter */
+    pps = storage->pps[ppsId];
+    if ( pps == NULL || storage->sps[pps->seqParameterSetId] == NULL  ||
+         (storage->activeSpsId != MAX_NUM_SEQ_PARAM_SETS &&
+          pps->seqParameterSetId != storage->activeSpsId &&
+          nuNext->nalUnitType != NAL_CODED_SLICE_IDR) )
+        return(PARAM_SET_ERROR);
+    sps = storage->sps[pps->seqParameterSetId];
+
+    if (storage->aub->nuPrev->nalRefIdc != nuNext->nalRefIdc &&
+      (storage->aub->nuPrev->nalRefIdc == 0 || nuNext->nalRefIdc == 0))
+        *accessUnitBoundaryFlag = HANTRO_TRUE;
+
+    if ((storage->aub->nuPrev->nalUnitType == NAL_CODED_SLICE_IDR &&
+          nuNext->nalUnitType != NAL_CODED_SLICE_IDR) ||
+      (storage->aub->nuPrev->nalUnitType != NAL_CODED_SLICE_IDR &&
+       nuNext->nalUnitType == NAL_CODED_SLICE_IDR))
+        *accessUnitBoundaryFlag = HANTRO_TRUE;
+
+    tmp = h264bsdCheckFrameNum(strm, sps->maxFrameNum, &frameNum);
+    if (tmp != HANTRO_OK)
+        return(HANTRO_NOK);
+
+    if (storage->aub->prevFrameNum != frameNum)
+    {
+        storage->aub->prevFrameNum = frameNum;
+        *accessUnitBoundaryFlag = HANTRO_TRUE;
+    }
+
+    if (nuNext->nalUnitType == NAL_CODED_SLICE_IDR)
+    {
+        tmp = h264bsdCheckIdrPicId(strm, sps->maxFrameNum, nuNext->nalUnitType,
+          &idrPicId);
+        if (tmp != HANTRO_OK)
+            return(HANTRO_NOK);
+
+        if (storage->aub->nuPrev->nalUnitType == NAL_CODED_SLICE_IDR &&
+          storage->aub->prevIdrPicId != idrPicId)
+            *accessUnitBoundaryFlag = HANTRO_TRUE;
+
+        storage->aub->prevIdrPicId = idrPicId;
+    }
+
+    if (sps->picOrderCntType == 0)
+    {
+        tmp = h264bsdCheckPicOrderCntLsb(strm, sps, nuNext->nalUnitType,
+          &picOrderCntLsb);
+        if (tmp != HANTRO_OK)
+            return(HANTRO_NOK);
+
+        if (storage->aub->prevPicOrderCntLsb != picOrderCntLsb)
+        {
+            storage->aub->prevPicOrderCntLsb = picOrderCntLsb;
+            *accessUnitBoundaryFlag = HANTRO_TRUE;
+        }
+
+        if (pps->picOrderPresentFlag)
+        {
+            tmp = h264bsdCheckDeltaPicOrderCntBottom(strm, sps,
+                nuNext->nalUnitType, &deltaPicOrderCntBottom);
+            if (tmp != HANTRO_OK)
+                return(tmp);
+
+            if (storage->aub->prevDeltaPicOrderCntBottom !=
+                deltaPicOrderCntBottom)
+            {
+                storage->aub->prevDeltaPicOrderCntBottom =
+                    deltaPicOrderCntBottom;
+                *accessUnitBoundaryFlag = HANTRO_TRUE;
+            }
+        }
+    }
+    else if (sps->picOrderCntType == 1 && !sps->deltaPicOrderAlwaysZeroFlag)
+    {
+        tmp = h264bsdCheckDeltaPicOrderCnt(strm, sps, nuNext->nalUnitType,
+          pps->picOrderPresentFlag, deltaPicOrderCnt);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+
+        if (storage->aub->prevDeltaPicOrderCnt[0] != deltaPicOrderCnt[0])
+        {
+            storage->aub->prevDeltaPicOrderCnt[0] = deltaPicOrderCnt[0];
+            *accessUnitBoundaryFlag = HANTRO_TRUE;
+        }
+
+        if (pps->picOrderPresentFlag)
+            if (storage->aub->prevDeltaPicOrderCnt[1] != deltaPicOrderCnt[1])
+            {
+                storage->aub->prevDeltaPicOrderCnt[1] = deltaPicOrderCnt[1];
+                *accessUnitBoundaryFlag = HANTRO_TRUE;
+            }
+    }
+
+    *storage->aub->nuPrev = *nuNext;
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: CheckPps
+
+        Functional description:
+            Check picture parameter set. Contents of the picture parameter
+            set information that depends on the image dimensions is checked
+            against the dimensions in the sps.
+
+        Inputs:
+            pps     pointer to picture paramter set
+            sps     pointer to sequence parameter set
+
+        Outputs:
+            none
+
+        Returns:
+            HANTRO_OK      everything ok
+            HANTRO_NOK     invalid data in picture parameter set
+
+------------------------------------------------------------------------------*/
+u32 CheckPps(picParamSet_t *pps, seqParamSet_t *sps)
+{
+
+    u32 i;
+    u32 picSize;
+
+    picSize = sps->picWidthInMbs * sps->picHeightInMbs;
+
+    /* check slice group params */
+    if (pps->numSliceGroups > 1)
+    {
+        if (pps->sliceGroupMapType == 0)
+        {
+            ASSERT(pps->runLength);
+            for (i = 0; i < pps->numSliceGroups; i++)
+            {
+                if (pps->runLength[i] > picSize)
+                    return(HANTRO_NOK);
+            }
+        }
+        else if (pps->sliceGroupMapType == 2)
+        {
+            ASSERT(pps->topLeft);
+            ASSERT(pps->bottomRight);
+            for (i = 0; i < pps->numSliceGroups-1; i++)
+            {
+                if (pps->topLeft[i] > pps->bottomRight[i] ||
+                    pps->bottomRight[i] >= picSize)
+                    return(HANTRO_NOK);
+
+                if ( (pps->topLeft[i] % sps->picWidthInMbs) >
+                     (pps->bottomRight[i] % sps->picWidthInMbs) )
+                    return(HANTRO_NOK);
+            }
+        }
+        else if (pps->sliceGroupMapType > 2 && pps->sliceGroupMapType < 6)
+        {
+            if (pps->sliceGroupChangeRate > picSize)
+                return(HANTRO_NOK);
+        }
+        else if (pps->sliceGroupMapType == 6 &&
+                 pps->picSizeInMapUnits < picSize)
+            return(HANTRO_NOK);
+    }
+
+    return(HANTRO_OK);
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdValidParamSets
+
+        Functional description:
+            Check if any valid SPS/PPS combination exists in the storage.
+            Function tries each PPS in the buffer and checks if corresponding
+            SPS exists and calls CheckPps to determine if the PPS conforms
+            to image dimensions of the SPS.
+
+        Inputs:
+            pStorage    pointer to storage structure
+
+        Outputs:
+            HANTRO_OK   there is at least one valid combination
+            HANTRO_NOK  no valid combinations found
+
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdValidParamSets(storage_t *pStorage)
+{
+
+/* Variables */
+
+    u32 i;
+
+/* Code */
+
+    ASSERT(pStorage);
+
+    for (i = 0; i < MAX_NUM_PIC_PARAM_SETS; i++)
+    {
+        if ( pStorage->pps[i] &&
+             pStorage->sps[pStorage->pps[i]->seqParameterSetId] &&
+             CheckPps(pStorage->pps[i],
+                      pStorage->sps[pStorage->pps[i]->seqParameterSetId]) ==
+                 HANTRO_OK)
+        {
+            return(HANTRO_OK);
+        }
+    }
+
+    return(HANTRO_NOK);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.h
new file mode 100755
index 0000000..ba3b2da
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.h
@@ -0,0 +1,174 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_STORAGE_H
+#define H264SWDEC_STORAGE_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_cfg.h"
+#include "h264bsd_seq_param_set.h"
+#include "h264bsd_pic_param_set.h"
+#include "h264bsd_macroblock_layer.h"
+#include "h264bsd_nal_unit.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_seq_param_set.h"
+#include "h264bsd_dpb.h"
+#include "h264bsd_pic_order_cnt.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+typedef struct
+{
+    u32 sliceId;
+    u32 numDecodedMbs;
+    u32 lastMbAddr;
+} sliceStorage_t;
+
+/* structure to store parameters needed for access unit boundary checking */
+typedef struct
+{
+    nalUnit_t nuPrev[1];
+    u32 prevFrameNum;
+    u32 prevIdrPicId;
+    u32 prevPicOrderCntLsb;
+    i32 prevDeltaPicOrderCntBottom;
+    i32 prevDeltaPicOrderCnt[2];
+    u32 firstCallFlag;
+} aubCheck_t;
+
+/* storage data structure, holds all data of a decoder instance */
+typedef struct
+{
+    /* active paramet set ids and pointers */
+    u32 oldSpsId;
+    u32 activePpsId;
+    u32 activeSpsId;
+    picParamSet_t *activePps;
+    seqParamSet_t *activeSps;
+    seqParamSet_t *sps[MAX_NUM_SEQ_PARAM_SETS];
+    picParamSet_t *pps[MAX_NUM_PIC_PARAM_SETS];
+
+    /* current slice group map, recomputed for each slice */
+    u32 *sliceGroupMap;
+
+    u32 picSizeInMbs;
+
+    /* this flag is set after all macroblocks of a picture successfully
+     * decoded -> redundant slices not decoded */
+    u32 skipRedundantSlices;
+    u32 picStarted;
+
+    /* flag to indicate if current access unit contains any valid slices */
+    u32 validSliceInAccessUnit;
+
+    /* store information needed for handling of slice decoding */
+    sliceStorage_t slice[1];
+
+    /* number of concealed macroblocks in the current image */
+    u32 numConcealedMbs;
+
+    /* picId given by application */
+    u32 currentPicId;
+
+    /* macroblock specific storages, size determined by image dimensions */
+    mbStorage_t *mb;
+
+    /* flag to store noOutputReordering flag set by the application */
+    u32 noReordering;
+
+    /* DPB */
+    dpbStorage_t dpb[1];
+
+    /* structure to store picture order count related information */
+    pocStorage_t poc[1];
+
+    /* access unit boundary checking related data */
+    aubCheck_t aub[1];
+
+    /* current processed image */
+    image_t currImage[1];
+
+    /* last valid NAL unit header is stored here */
+    nalUnit_t prevNalUnit[1];
+
+    /* slice header, second structure used as a temporary storage while
+     * decoding slice header, first one stores last successfully decoded
+     * slice header */
+    sliceHeader_t sliceHeader[2];
+
+    /* fields to store old stream buffer pointers, needed when only part of
+     * a stream buffer is processed by h264bsdDecode function */
+    u32 prevBufNotFinished;
+    u8 *prevBufPointer;
+    u32 prevBytesConsumed;
+    strmData_t strm[1];
+
+    /* macroblock layer structure, there is no need to store this but it
+     * would have increased the stack size excessively and needed to be
+     * allocated from head -> easiest to put it here */
+    macroblockLayer_t *mbLayer;
+
+    u32 pendingActivation; /* Activate parameter sets after returning
+                              HEADERS_RDY to the user */
+    u32 intraConcealmentFlag; /* 0 gray picture for corrupted intra
+                                 1 previous frame used if available */
+} storage_t;
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+void h264bsdInitStorage(storage_t *pStorage);
+void h264bsdResetStorage(storage_t *pStorage);
+u32 h264bsdIsStartOfPicture(storage_t *pStorage);
+u32 h264bsdIsEndOfPicture(storage_t *pStorage);
+u32 h264bsdStoreSeqParamSet(storage_t *pStorage, seqParamSet_t *pSeqParamSet);
+u32 h264bsdStorePicParamSet(storage_t *pStorage, picParamSet_t *pPicParamSet);
+u32 h264bsdActivateParamSets(storage_t *pStorage, u32 ppsId, u32 isIdr);
+void h264bsdComputeSliceGroupMap(storage_t *pStorage,
+    u32 sliceGroupChangeCycle);
+
+u32 h264bsdCheckAccessUnitBoundary(
+  strmData_t *strm,
+  nalUnit_t *nuNext,
+  storage_t *storage,
+  u32 *accessUnitBoundaryFlag);
+
+u32 h264bsdValidParamSets(storage_t *pStorage);
+
+#endif /* #ifdef H264SWDEC_STORAGE_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.c
new file mode 100755
index 0000000..20d1083
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.c
@@ -0,0 +1,242 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdGetBits
+          h264bsdShowBits32
+          h264bsdFlushBits
+          h264bsdIsByteAligned
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_util.h"
+#include "h264bsd_stream.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdGetBits
+
+        Functional description:
+            Read and remove bits from the stream buffer.
+
+        Input:
+            pStrmData   pointer to stream data structure
+            numBits     number of bits to read
+
+        Output:
+            none
+
+        Returns:
+            bits read from stream
+            END_OF_STREAM if not enough bits left
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdGetBits(strmData_t *pStrmData, u32 numBits)
+{
+
+    u32 out;
+
+    ASSERT(pStrmData);
+    ASSERT(numBits < 32);
+
+    out = h264bsdShowBits32(pStrmData) >> (32 - numBits);
+
+    if (h264bsdFlushBits(pStrmData, numBits) == HANTRO_OK)
+    {
+        return(out);
+    }
+    else
+    {
+        return(END_OF_STREAM);
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdShowBits32
+
+        Functional description:
+            Read 32 bits from the stream buffer. Buffer is left as it is, i.e.
+            no bits are removed. First bit read from the stream is the MSB of
+            the return value. If there is not enough bits in the buffer ->
+            bits beyong the end of the stream are set to '0' in the return
+            value.
+
+        Input:
+            pStrmData   pointer to stream data structure
+
+        Output:
+            none
+
+        Returns:
+            bits read from stream
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdShowBits32(strmData_t *pStrmData)
+{
+
+    i32 bits, shift;
+    u32 out;
+    u8 *pStrm;
+
+    ASSERT(pStrmData);
+    ASSERT(pStrmData->pStrmCurrPos);
+    ASSERT(pStrmData->bitPosInWord < 8);
+    ASSERT(pStrmData->bitPosInWord ==
+           (pStrmData->strmBuffReadBits & 0x7));
+
+    pStrm = pStrmData->pStrmCurrPos;
+
+    /* number of bits left in the buffer */
+    bits = (i32)pStrmData->strmBuffSize*8 - (i32)pStrmData->strmBuffReadBits;
+
+    /* at least 32-bits in the buffer */
+    if (bits >= 32)
+    {
+        u32 bitPosInWord = pStrmData->bitPosInWord;
+        out = ((u32)pStrm[0] << 24) | ((u32)pStrm[1] << 16) |
+              ((u32)pStrm[2] <<  8) | ((u32)pStrm[3]);
+
+        if (bitPosInWord)
+        {
+            u32 byte = (u32)pStrm[4];
+            u32 tmp = (8-bitPosInWord);
+            out <<= bitPosInWord;
+            out |= byte>>tmp;
+        }
+        return (out);
+    }
+    /* at least one bit in the buffer */
+    else if (bits > 0)
+    {
+        shift = (i32)(24 + pStrmData->bitPosInWord);
+        out = (u32)(*pStrm++) << shift;
+        bits -= (i32)(8 - pStrmData->bitPosInWord);
+        while (bits > 0)
+        {
+            shift -= 8;
+            out |= (u32)(*pStrm++) << shift;
+            bits -= 8;
+        }
+        return (out);
+    }
+    else
+        return (0);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdFlushBits
+
+        Functional description:
+            Remove bits from the stream buffer
+
+        Input:
+            pStrmData       pointer to stream data structure
+            numBits         number of bits to remove
+
+        Output:
+            none
+
+        Returns:
+            HANTRO_OK       success
+            END_OF_STREAM   not enough bits left
+
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_NEON
+u32 h264bsdFlushBits(strmData_t *pStrmData, u32 numBits)
+{
+
+    ASSERT(pStrmData);
+    ASSERT(pStrmData->pStrmBuffStart);
+    ASSERT(pStrmData->pStrmCurrPos);
+    ASSERT(pStrmData->bitPosInWord < 8);
+    ASSERT(pStrmData->bitPosInWord == (pStrmData->strmBuffReadBits & 0x7));
+
+    pStrmData->strmBuffReadBits += numBits;
+    pStrmData->bitPosInWord = pStrmData->strmBuffReadBits & 0x7;
+    if ( (pStrmData->strmBuffReadBits ) <= (8*pStrmData->strmBuffSize) )
+    {
+        pStrmData->pStrmCurrPos = pStrmData->pStrmBuffStart +
+            (pStrmData->strmBuffReadBits >> 3);
+        return(HANTRO_OK);
+    }
+    else
+        return(END_OF_STREAM);
+
+}
+#endif
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdIsByteAligned
+
+        Functional description:
+            Check if current stream position is byte aligned.
+
+        Inputs:
+            pStrmData   pointer to stream data structure
+
+        Outputs:
+            none
+
+        Returns:
+            TRUE        stream is byte aligned
+            FALSE       stream is not byte aligned
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdIsByteAligned(strmData_t *pStrmData)
+{
+
+/* Variables */
+
+/* Code */
+
+    if (!pStrmData->bitPosInWord)
+        return(HANTRO_TRUE);
+    else
+        return(HANTRO_FALSE);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.h
new file mode 100755
index 0000000..4404b66
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_STREAM_H
+#define H264SWDEC_STREAM_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+typedef struct
+{
+    u8  *pStrmBuffStart;    /* pointer to start of stream buffer */
+    u8  *pStrmCurrPos;      /* current read address in stream buffer */
+    u32  bitPosInWord;      /* bit position in stream buffer byte */
+    u32  strmBuffSize;      /* size of stream buffer (bytes) */
+    u32  strmBuffReadBits;  /* number of bits read from stream buffer */
+} strmData_t;
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdGetBits(strmData_t *pStrmData, u32 numBits);
+
+u32 h264bsdShowBits32(strmData_t *pStrmData);
+
+u32 h264bsdFlushBits(strmData_t *pStrmData, u32 numBits);
+
+u32 h264bsdIsByteAligned(strmData_t *);
+
+#endif /* #ifdef H264SWDEC_STREAM_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.c
new file mode 100755
index 0000000..4eb6dd0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdProcessBlock
+          h264bsdProcessLumaDc
+          h264bsdProcessChromaDc
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_transform.h"
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/* Switch off the following Lint messages for this file:
+ * Info 701: Shift left of signed quantity (int)
+ * Info 702: Shift right of signed quantity (int)
+ */
+/*lint -e701 -e702 */
+
+/* LevelScale function */
+static const i32 levelScale[6][3] = {
+    {10,13,16}, {11,14,18}, {13,16,20}, {14,18,23}, {16,20,25}, {18,23,29}};
+
+/* qp % 6 as a function of qp */
+static const u8 qpMod6[52] = {0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,
+    0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3};
+
+/* qp / 6 as a function of qp */
+static const u8 qpDiv6[52] = {0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,
+    4,4,4,4,4,4,5,5,5,5,5,5,6,6,6,6,6,6,7,7,7,7,7,7,8,8,8,8};
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdProcessBlock
+
+        Functional description:
+            Function performs inverse zig-zag scan, inverse scaling and
+            inverse transform for a luma or a chroma residual block
+
+        Inputs:
+            data            pointer to data to be processed
+            qp              quantization parameter
+            skip            skip processing of data[0], set to non-zero value
+                            if dc coeff hanled separately
+            coeffMap        16 lsb's indicate which coeffs are non-zero,
+                            bit 0 (lsb) for coeff 0, bit 1 for coeff 1 etc.
+
+        Outputs:
+            data            processed data
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      processed data not in valid range [-512, 511]
+
+------------------------------------------------------------------------------*/
+u32 h264bsdProcessBlock(i32 *data, u32 qp, u32 skip, u32 coeffMap)
+{
+
+/* Variables */
+
+    i32 tmp0, tmp1, tmp2, tmp3;
+    i32 d1, d2, d3;
+    u32 row,col;
+    u32 qpDiv;
+    i32 *ptr;
+
+/* Code */
+
+    qpDiv = qpDiv6[qp];
+    tmp1 = levelScale[qpMod6[qp]][0] << qpDiv;
+    tmp2 = levelScale[qpMod6[qp]][1] << qpDiv;
+    tmp3 = levelScale[qpMod6[qp]][2] << qpDiv;
+
+    if (!skip)
+        data[0] = (data[0] * tmp1);
+
+    /* at least one of the rows 1, 2 or 3 contain non-zero coeffs, mask takes
+     * the scanning order into account */
+    if (coeffMap & 0xFF9C)
+    {
+        /* do the zig-zag scan and inverse quantization */
+        d1 = data[1];
+        d2 = data[14];
+        d3 = data[15];
+        data[1] = (d1 * tmp2);
+        data[14] = (d2 * tmp2);
+        data[15] = (d3 * tmp3);
+
+        d1 = data[2];
+        d2 = data[5];
+        d3 = data[4];
+        data[4] = (d1 * tmp2);
+        data[2]  = (d2 * tmp1);
+        data[5] = (d3 * tmp3);
+
+        d1 = data[8];
+        d2 = data[3];
+        d3 = data[6];
+        tmp0 = (d1 * tmp2);
+        data[8] = (d2 * tmp1);
+        data[3]  = (d3 * tmp2);
+        d1 = data[7];
+        d2 = data[12];
+        d3 = data[9];
+        data[6]  = (d1 * tmp2);
+        data[7]  = (d2 * tmp3);
+        data[12] = (d3 * tmp2);
+        data[9]  = tmp0;
+
+        d1 = data[10];
+        d2 = data[11];
+        d3 = data[13];
+        data[13] = (d1 * tmp3);
+        data[10] = (d2 * tmp1);
+        data[11] = (d3 * tmp2);
+
+        /* horizontal transform */
+        for (row = 4, ptr = data; row--; ptr += 4)
+        {
+            tmp0 = ptr[0] + ptr[2];
+            tmp1 = ptr[0] - ptr[2];
+            tmp2 = (ptr[1] >> 1) - ptr[3];
+            tmp3 = ptr[1] + (ptr[3] >> 1);
+            ptr[0] = tmp0 + tmp3;
+            ptr[1] = tmp1 + tmp2;
+            ptr[2] = tmp1 - tmp2;
+            ptr[3] = tmp0 - tmp3;
+        }
+
+        /*lint +e661 +e662*/
+        /* then vertical transform */
+        for (col = 4; col--; data++)
+        {
+            tmp0 = data[0] + data[8];
+            tmp1 = data[0] - data[8];
+            tmp2 = (data[4] >> 1) - data[12];
+            tmp3 = data[4] + (data[12] >> 1);
+            data[0 ] = (tmp0 + tmp3 + 32)>>6;
+            data[4 ] = (tmp1 + tmp2 + 32)>>6;
+            data[8 ] = (tmp1 - tmp2 + 32)>>6;
+            data[12] = (tmp0 - tmp3 + 32)>>6;
+            /* check that each value is in the range [-512,511] */
+            if (((u32)(data[0] + 512) > 1023) ||
+                ((u32)(data[4] + 512) > 1023) ||
+                ((u32)(data[8] + 512) > 1023) ||
+                ((u32)(data[12] + 512) > 1023) )
+                return(HANTRO_NOK);
+        }
+    }
+    else /* rows 1, 2 and 3 are zero */
+    {
+        /* only dc-coeff is non-zero, i.e. coeffs at original positions
+         * 1, 5 and 6 are zero */
+        if ((coeffMap & 0x62) == 0)
+        {
+            tmp0 = (data[0] + 32) >> 6;
+            /* check that value is in the range [-512,511] */
+            if ((u32)(tmp0 + 512) > 1023)
+                return(HANTRO_NOK);
+            data[0] = data[1]  = data[2]  = data[3]  = data[4]  = data[5]  =
+                      data[6]  = data[7]  = data[8]  = data[9]  = data[10] =
+                      data[11] = data[12] = data[13] = data[14] = data[15] =
+                      tmp0;
+        }
+        else /* at least one of the coeffs 1, 5 or 6 is non-zero */
+        {
+            data[1] = (data[1] * tmp2);
+            data[2] = (data[5] * tmp1);
+            data[3] = (data[6] * tmp2);
+            tmp0 = data[0] + data[2];
+            tmp1 = data[0] - data[2];
+            tmp2 = (data[1] >> 1) - data[3];
+            tmp3 = data[1] + (data[3] >> 1);
+            data[0] = (tmp0 + tmp3 + 32)>>6;
+            data[1] = (tmp1 + tmp2 + 32)>>6;
+            data[2] = (tmp1 - tmp2 + 32)>>6;
+            data[3] = (tmp0 - tmp3 + 32)>>6;
+            data[4] = data[8] = data[12] = data[0];
+            data[5] = data[9] = data[13] = data[1];
+            data[6] = data[10] = data[14] = data[2];
+            data[7] = data[11] = data[15] = data[3];
+            /* check that each value is in the range [-512,511] */
+            if (((u32)(data[0] + 512) > 1023) ||
+                ((u32)(data[1] + 512) > 1023) ||
+                ((u32)(data[2] + 512) > 1023) ||
+                ((u32)(data[3] + 512) > 1023) )
+                return(HANTRO_NOK);
+        }
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdProcessLumaDc
+
+        Functional description:
+            Function performs inverse zig-zag scan, inverse transform and
+            inverse scaling for a luma DC coefficients block
+
+        Inputs:
+            data            pointer to data to be processed
+            qp              quantization parameter
+
+        Outputs:
+            data            processed data
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+void h264bsdProcessLumaDc(i32 *data, u32 qp)
+{
+
+/* Variables */
+
+    i32 tmp0, tmp1, tmp2, tmp3;
+    u32 row,col;
+    u32 qpMod, qpDiv;
+    i32 levScale;
+    i32 *ptr;
+
+/* Code */
+
+    qpMod = qpMod6[qp];
+    qpDiv = qpDiv6[qp];
+
+    /* zig-zag scan */
+    tmp0 = data[2];
+    data[2]  = data[5];
+    data[5] = data[4];
+    data[4] = tmp0;
+
+    tmp0 = data[8];
+    data[8] = data[3];
+    data[3]  = data[6];
+    data[6]  = data[7];
+    data[7]  = data[12];
+    data[12] = data[9];
+    data[9]  = tmp0;
+
+    tmp0 = data[10];
+    data[10] = data[11];
+    data[11] = data[13];
+    data[13] = tmp0;
+
+    /* horizontal transform */
+    for (row = 4, ptr = data; row--; ptr += 4)
+    {
+        tmp0 = ptr[0] + ptr[2];
+        tmp1 = ptr[0] - ptr[2];
+        tmp2 = ptr[1] - ptr[3];
+        tmp3 = ptr[1] + ptr[3];
+        ptr[0] = tmp0 + tmp3;
+        ptr[1] = tmp1 + tmp2;
+        ptr[2] = tmp1 - tmp2;
+        ptr[3] = tmp0 - tmp3;
+    }
+
+    /*lint +e661 +e662*/
+    /* then vertical transform and inverse scaling */
+    levScale = levelScale[ qpMod ][0];
+    if (qp >= 12)
+    {
+        levScale <<= (qpDiv-2);
+        for (col = 4; col--; data++)
+        {
+            tmp0 = data[0] + data[8 ];
+            tmp1 = data[0] - data[8 ];
+            tmp2 = data[4] - data[12];
+            tmp3 = data[4] + data[12];
+            data[0 ] = ((tmp0 + tmp3)*levScale);
+            data[4 ] = ((tmp1 + tmp2)*levScale);
+            data[8 ] = ((tmp1 - tmp2)*levScale);
+            data[12] = ((tmp0 - tmp3)*levScale);
+        }
+    }
+    else
+    {
+        i32 tmp;
+        tmp = ((1 - qpDiv) == 0) ? 1 : 2;
+        for (col = 4; col--; data++)
+        {
+            tmp0 = data[0] + data[8 ];
+            tmp1 = data[0] - data[8 ];
+            tmp2 = data[4] - data[12];
+            tmp3 = data[4] + data[12];
+            data[0 ] = ((tmp0 + tmp3)*levScale+tmp) >> (2-qpDiv);
+            data[4 ] = ((tmp1 + tmp2)*levScale+tmp) >> (2-qpDiv);
+            data[8 ] = ((tmp1 - tmp2)*levScale+tmp) >> (2-qpDiv);
+            data[12] = ((tmp0 - tmp3)*levScale+tmp) >> (2-qpDiv);
+        }
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdProcessChromaDc
+
+        Functional description:
+            Function performs inverse transform and inverse scaling for a
+            chroma DC coefficients block
+
+        Inputs:
+            data            pointer to data to be processed
+            qp              quantization parameter
+
+        Outputs:
+            data            processed data
+
+        Returns:
+            none
+
+------------------------------------------------------------------------------*/
+void h264bsdProcessChromaDc(i32 *data, u32 qp)
+{
+
+/* Variables */
+
+    i32 tmp0, tmp1, tmp2, tmp3;
+    u32 qpDiv;
+    i32 levScale;
+    u32 levShift;
+
+/* Code */
+
+    qpDiv = qpDiv6[qp];
+    levScale = levelScale[ qpMod6[qp] ][0];
+
+    if (qp >= 6)
+    {
+        levScale <<= (qpDiv-1);
+        levShift = 0;
+    }
+    else
+    {
+        levShift = 1;
+    }
+
+    tmp0 = data[0] + data[2];
+    tmp1 = data[0] - data[2];
+    tmp2 = data[1] - data[3];
+    tmp3 = data[1] + data[3];
+    data[0] = ((tmp0 + tmp3) * levScale) >> levShift;
+    data[1] = ((tmp0 - tmp3) * levScale) >> levShift;
+    data[2] = ((tmp1 + tmp2) * levScale) >> levShift;
+    data[3] = ((tmp1 - tmp2) * levScale) >> levShift;
+    tmp0 = data[4] + data[6];
+    tmp1 = data[4] - data[6];
+    tmp2 = data[5] - data[7];
+    tmp3 = data[5] + data[7];
+    data[4] = ((tmp0 + tmp3) * levScale) >> levShift;
+    data[5] = ((tmp0 - tmp3) * levScale) >> levShift;
+    data[6] = ((tmp1 + tmp2) * levScale) >> levShift;
+    data[7] = ((tmp1 - tmp2) * levScale) >> levShift;
+
+}
+
+/*lint +e701 +e702 */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.h
new file mode 100755
index 0000000..4f41a23
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_TRANSFORM_H
+#define H264SWDEC_TRANSFORM_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdProcessBlock(i32 *data, u32 qp, u32 skip, u32 coeffMap);
+void h264bsdProcessLumaDc(i32 *data, u32 qp);
+void h264bsdProcessChromaDc(i32 *data, u32 qp);
+
+#endif /* #ifdef H264SWDEC_TRANSFORM_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_util.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_util.c
new file mode 100755
index 0000000..53b2fd8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_util.c
@@ -0,0 +1,286 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdCountLeadingZeros
+          h264bsdRbspTrailingBits
+          h264bsdMoreRbspData
+          h264bsdNextMbAddress
+          h264bsdSetCurrImageMbPointers
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/* look-up table for expected values of stuffing bits */
+static const u32 stuffingTable[8] = {0x1,0x2,0x4,0x8,0x10,0x20,0x40,0x80};
+
+/* look-up table for chroma quantization parameter as a function of luma QP */
+const u32 h264bsdQpC[52] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,
+    20,21,22,23,24,25,26,27,28,29,29,30,31,32,32,33,34,34,35,35,36,36,37,37,37,
+    38,38,38,39,39,39,39};
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+   5.1  Function: h264bsdCountLeadingZeros
+
+        Functional description:
+            Count leading zeros in a code word. Code word is assumed to be
+            right-aligned, last bit of the code word in the lsb of the value.
+
+        Inputs:
+            value   code word
+            length  number of bits in the code word
+
+        Outputs:
+            none
+
+        Returns:
+            number of leading zeros in the code word
+
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_NEON
+u32 h264bsdCountLeadingZeros(u32 value, u32 length)
+{
+
+/* Variables */
+
+    u32 zeros = 0;
+    u32 mask = 1 << (length - 1);
+
+/* Code */
+
+    ASSERT(length <= 32);
+
+    while (mask && !(value & mask))
+    {
+        zeros++;
+        mask >>= 1;
+    }
+    return(zeros);
+
+}
+#endif
+/*------------------------------------------------------------------------------
+
+   5.2  Function: h264bsdRbspTrailingBits
+
+        Functional description:
+            Check Raw Byte Stream Payload (RBSP) trailing bits, i.e. stuffing.
+            Rest of the current byte (whole byte if allready byte aligned)
+            in the stream buffer shall contain a '1' bit followed by zero or
+            more '0' bits.
+
+        Inputs:
+            pStrmData   pointer to stream data structure
+
+        Outputs:
+            none
+
+        Returns:
+            HANTRO_OK      RBSP trailing bits found
+            HANTRO_NOK     otherwise
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdRbspTrailingBits(strmData_t *pStrmData)
+{
+
+/* Variables */
+
+    u32 stuffing;
+    u32 stuffingLength;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pStrmData->bitPosInWord < 8);
+
+    stuffingLength = 8 - pStrmData->bitPosInWord;
+
+    stuffing = h264bsdGetBits(pStrmData, stuffingLength);
+    if (stuffing == END_OF_STREAM)
+        return(HANTRO_NOK);
+
+    if (stuffing != stuffingTable[stuffingLength - 1])
+        return(HANTRO_NOK);
+    else
+        return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+   5.3  Function: h264bsdMoreRbspData
+
+        Functional description:
+            Check if there is more data in the current RBSP. The standard
+            defines this function so that there is more data if
+                -more than 8 bits left or
+                -last bits are not RBSP trailing bits
+
+        Inputs:
+            pStrmData   pointer to stream data structure
+
+        Outputs:
+            none
+
+        Returns:
+            HANTRO_TRUE    there is more data
+            HANTRO_FALSE   no more data
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdMoreRbspData(strmData_t *pStrmData)
+{
+
+/* Variables */
+
+    u32 bits;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pStrmData->strmBuffReadBits <= 8 * pStrmData->strmBuffSize);
+
+    bits = pStrmData->strmBuffSize * 8 - pStrmData->strmBuffReadBits;
+
+    if (bits == 0)
+        return(HANTRO_FALSE);
+
+    if ( (bits > 8) ||
+         ((h264bsdShowBits32(pStrmData)>>(32-bits)) != (1 << (bits-1))) )
+        return(HANTRO_TRUE);
+    else
+        return(HANTRO_FALSE);
+
+}
+
+/*------------------------------------------------------------------------------
+
+   5.4  Function: h264bsdNextMbAddress
+
+        Functional description:
+            Get address of the next macroblock in the current slice group.
+
+        Inputs:
+            pSliceGroupMap      slice group for each macroblock
+            picSizeInMbs        size of the picture
+            currMbAddr          where to start
+
+        Outputs:
+            none
+
+        Returns:
+            address of the next macroblock
+            0   if none of the following macroblocks belong to same slice
+                group as currMbAddr
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdNextMbAddress(u32 *pSliceGroupMap, u32 picSizeInMbs, u32 currMbAddr)
+{
+
+/* Variables */
+
+    u32 i, sliceGroup, tmp;
+
+/* Code */
+
+    ASSERT(pSliceGroupMap);
+    ASSERT(picSizeInMbs);
+    ASSERT(currMbAddr < picSizeInMbs);
+
+    sliceGroup = pSliceGroupMap[currMbAddr];
+
+    i = currMbAddr + 1;
+    tmp = pSliceGroupMap[i];
+    while ((i < picSizeInMbs) && (tmp != sliceGroup))
+    {
+        i++;
+        tmp = pSliceGroupMap[i];
+    }
+
+    if (i == picSizeInMbs)
+        i = 0;
+
+    return(i);
+
+}
+
+
+/*------------------------------------------------------------------------------
+
+   5.5  Function: h264bsdSetCurrImageMbPointers
+
+        Functional description:
+            Set luma and chroma pointers in image_t for current MB
+
+        Inputs:
+            image       Current image
+            mbNum       number of current MB
+
+        Outputs:
+            none
+
+        Returns:
+            none
+------------------------------------------------------------------------------*/
+void h264bsdSetCurrImageMbPointers(image_t *image, u32 mbNum)
+{
+    u32 width, height;
+    u32 picSize;
+    u32 row, col;
+    u32 tmp;
+
+    width = image->width;
+    height = image->height;
+    row = mbNum / width;
+    col = mbNum % width;
+
+    tmp = row * width;
+    picSize = width * height;
+
+    image->luma = (u8*)(image->data + col * 16 + tmp * 256);
+    image->cb = (u8*)(image->data + picSize * 256 + tmp * 64 + col * 8);
+    image->cr = (u8*)(image->cb + picSize * 64);
+}
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_util.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_util.h
new file mode 100755
index 0000000..cb3adda
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_util.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_UTIL_H
+#define H264SWDEC_UTIL_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#ifdef _ASSERT_USED
+#include <assert.h>
+#endif
+
+#include "H264SwDecApi.h"
+
+#if defined(_RANGE_CHECK) || defined(_DEBUG_PRINT) || defined(_ERROR_PRINT)
+#include <stdio.h>
+#endif
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_image.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+#define HANTRO_OK   0
+#define HANTRO_NOK  1
+
+#define HANTRO_TRUE     (1)
+#define HANTRO_FALSE    (0)
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+#define MEMORY_ALLOCATION_ERROR 0xFFFF
+#define PARAM_SET_ERROR 0xFFF0
+
+/* value to be returned by GetBits if stream buffer is empty */
+#define END_OF_STREAM 0xFFFFFFFFU
+
+#define EMPTY_RESIDUAL_INDICATOR 0xFFFFFF
+
+/* macro to mark a residual block empty, i.e. contain zero coefficients */
+#define MARK_RESIDUAL_EMPTY(residual) ((residual)[0] = EMPTY_RESIDUAL_INDICATOR)
+/* macro to check if residual block is empty */
+#define IS_RESIDUAL_EMPTY(residual) ((residual)[0] == EMPTY_RESIDUAL_INDICATOR)
+
+/* macro for assertion, used only if compiler flag _ASSERT_USED is defined */
+#ifdef _ASSERT_USED
+#define ASSERT(expr) assert(expr)
+#else
+#define ASSERT(expr)
+#endif
+
+/* macro for range checking an value, used only if compiler flag _RANGE_CHECK
+ * is defined */
+#ifdef _RANGE_CHECK
+#define RANGE_CHECK(value, minBound, maxBound) \
+{ \
+    if ((value) < (minBound) || (value) > (maxBound)) \
+        fprintf(stderr, "Warning: Value exceeds given limit(s)!\n"); \
+}
+#else
+#define RANGE_CHECK(value, minBound, maxBound)
+#endif
+
+/* macro for range checking an array, used only if compiler flag _RANGE_CHECK
+ * is defined */
+#ifdef _RANGE_CHECK
+#define RANGE_CHECK_ARRAY(array, minBound, maxBound, length) \
+{ \
+    i32 i; \
+    for (i = 0; i < (length); i++) \
+        if ((array)[i] < (minBound) || (array)[i] > (maxBound)) \
+            fprintf(stderr,"Warning: Value [%d] exceeds given limit(s)!\n",i); \
+}
+#else
+#define RANGE_CHECK_ARRAY(array, minBound, maxBound, length)
+#endif
+
+/* macro for debug printing, used only if compiler flag _DEBUG_PRINT is
+ * defined */
+#ifdef _DEBUG_PRINT
+#define DEBUG(args) printf args
+#else
+#define DEBUG(args)
+#endif
+
+/* macro for error printing, used only if compiler flag _ERROR_PRINT is
+ * defined */
+#ifdef _ERROR_PRINT
+#define EPRINT(msg) fprintf(stderr,"ERROR: %s\n",msg)
+#else
+#define EPRINT(msg)
+#endif
+
+/* macro to get smaller of two values */
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+/* macro to get greater of two values */
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+
+/* macro to get absolute value */
+#define ABS(a) (((a) < 0) ? -(a) : (a))
+
+/* macro to clip a value z, so that x <= z =< y */
+#define CLIP3(x,y,z) (((z) < (x)) ? (x) : (((z) > (y)) ? (y) : (z)))
+
+/* macro to clip a value z, so that 0 <= z =< 255 */
+#define CLIP1(z) (((z) < 0) ? 0 : (((z) > 255) ? 255 : (z)))
+
+/* macro to allocate memory */
+#define ALLOCATE(ptr, count, type) \
+{ \
+    (ptr) = H264SwDecMalloc((count) * sizeof(type)); \
+}
+
+/* macro to free allocated memory */
+#define FREE(ptr) \
+{ \
+    H264SwDecFree((ptr)); (ptr) = NULL; \
+}
+
+#define ALIGN(ptr, bytePos) \
+        (ptr + ( ((bytePos - (int)ptr) & (bytePos - 1)) / sizeof(*ptr) ))
+
+extern const u32 h264bsdQpC[52];
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_NEON
+u32 h264bsdCountLeadingZeros(u32 value, u32 length);
+#else
+u32 h264bsdCountLeadingZeros(u32 value);
+#endif
+u32 h264bsdRbspTrailingBits(strmData_t *strmData);
+
+u32 h264bsdMoreRbspData(strmData_t *strmData);
+
+u32 h264bsdNextMbAddress(u32 *pSliceGroupMap, u32 picSizeInMbs, u32 currMbAddr);
+
+void h264bsdSetCurrImageMbPointers(image_t *image, u32 mbNum);
+
+#endif /* #ifdef H264SWDEC_UTIL_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.c
new file mode 100755
index 0000000..060f35e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.c
@@ -0,0 +1,391 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdDecodeExpGolombUnsigned
+          h264bsdDecodeExpGolombSigned
+          h264bsdDecodeExpGolombMapped
+          h264bsdDecodeExpGolombTruncated
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_vlc.h"
+#include "basetype.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+/* definition of special code num, this along with the return value is used
+ * to handle code num in the range [0, 2^32] in the DecodeExpGolombUnsigned
+ * function */
+#define BIG_CODE_NUM 0xFFFFFFFFU
+
+/* Mapping tables for coded_block_pattern, used for decoding of mapped
+ * Exp-Golomb codes */
+static const u8 codedBlockPatternIntra4x4[48] = {
+    47,31,15,0,23,27,29,30,7,11,13,14,39,43,45,46,16,3,5,10,12,19,21,26,28,35,
+    37,42,44,1,2,4,8,17,18,20,24,6,9,22,25,32,33,34,36,40,38,41};
+
+static const u8 codedBlockPatternInter[48] = {
+    0,16,1,2,4,8,32,3,5,10,12,15,47,7,11,13,14,6,9,31,35,37,42,44,33,34,36,40,
+    39,43,45,46,17,18,20,24,19,21,26,28,23,27,29,30,22,25,38,41};
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+   5.1  Function: h264bsdDecodeExpGolombUnsigned
+
+        Functional description:
+            Decode unsigned Exp-Golomb code. This is the same as codeNum used
+            in other Exp-Golomb code mappings. Code num (i.e. the decoded
+            symbol) is determined as
+
+                codeNum = 2^leadingZeros - 1 + GetBits(leadingZeros)
+
+            Normal decoded symbols are in the range [0, 2^32 - 2]. Symbol
+            2^32-1 is indicated by BIG_CODE_NUM with return value HANTRO_OK
+            while symbol 2^32  is indicated by BIG_CODE_NUM with return value
+            HANTRO_NOK.  These two symbols are special cases with code length
+            of 65, i.e.  32 '0' bits, a '1' bit, and either 0 or 1 represented
+            by 32 bits.
+
+            Symbol 2^32 is out of unsigned 32-bit range but is needed for
+            DecodeExpGolombSigned to express value -2^31.
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+
+        Outputs:
+            codeNum         decoded code word is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      failure, no valid code word found, note exception
+                            with BIG_CODE_NUM
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeExpGolombUnsigned(strmData_t *pStrmData, u32 *codeNum)
+{
+
+/* Variables */
+
+    u32 bits, numZeros;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(codeNum);
+
+    bits = h264bsdShowBits32(pStrmData);
+
+    /* first bit is 1 -> code length 1 */
+    if (bits >= 0x80000000)
+    {
+        h264bsdFlushBits(pStrmData, 1);
+        *codeNum = 0;
+        return(HANTRO_OK);
+    }
+    /* second bit is 1 -> code length 3 */
+    else if (bits >= 0x40000000)
+    {
+        if (h264bsdFlushBits(pStrmData, 3) == END_OF_STREAM)
+            return(HANTRO_NOK);
+        *codeNum = 1 + ((bits >> 29) & 0x1);
+        return(HANTRO_OK);
+    }
+    /* third bit is 1 -> code length 5 */
+    else if (bits >= 0x20000000)
+    {
+        if (h264bsdFlushBits(pStrmData, 5) == END_OF_STREAM)
+            return(HANTRO_NOK);
+        *codeNum = 3 + ((bits >> 27) & 0x3);
+        return(HANTRO_OK);
+    }
+    /* fourth bit is 1 -> code length 7 */
+    else if (bits >= 0x10000000)
+    {
+        if (h264bsdFlushBits(pStrmData, 7) == END_OF_STREAM)
+            return(HANTRO_NOK);
+        *codeNum = 7 + ((bits >> 25) & 0x7);
+        return(HANTRO_OK);
+    }
+    /* other code lengths */
+    else
+    {
+#ifndef H264DEC_NEON
+        numZeros = 4 + h264bsdCountLeadingZeros(bits, 28);
+#else
+        numZeros = h264bsdCountLeadingZeros(bits);
+#endif
+        /* all 32 bits are zero */
+        if (numZeros == 32)
+        {
+            *codeNum = 0;
+            h264bsdFlushBits(pStrmData,32);
+            bits = h264bsdGetBits(pStrmData, 1);
+            /* check 33rd bit, must be 1 */
+            if (bits == 1)
+            {
+                /* cannot use h264bsdGetBits, limited to 31 bits */
+                bits = h264bsdShowBits32(pStrmData);
+                if (h264bsdFlushBits(pStrmData, 32) == END_OF_STREAM)
+                    return(HANTRO_NOK);
+                /* code num 2^32 - 1, needed for unsigned mapping */
+                if (bits == 0)
+                {
+                    *codeNum = BIG_CODE_NUM;
+                    return(HANTRO_OK);
+                }
+                /* code num 2^32, needed for unsigned mapping
+                 * (results in -2^31) */
+                else if (bits == 1)
+                {
+                    *codeNum = BIG_CODE_NUM;
+                    return(HANTRO_NOK);
+                }
+            }
+            /* if more zeros than 32, it is an error */
+            return(HANTRO_NOK);
+        }
+        else
+            h264bsdFlushBits(pStrmData,numZeros+1);
+
+        bits = h264bsdGetBits(pStrmData, numZeros);
+        if (bits == END_OF_STREAM)
+            return(HANTRO_NOK);
+
+        *codeNum = (1 << numZeros) - 1 + bits;
+
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+   5.2  Function: h264bsdDecodeExpGolombSigned
+
+        Functional description:
+            Decode signed Exp-Golomb code. Code num is determined by
+            h264bsdDecodeExpGolombUnsigned and then mapped to signed
+            representation as
+
+                symbol = (-1)^(codeNum+1) * (codeNum+1)/2
+
+            Signed symbols shall be in the range [-2^31, 2^31 - 1]. Symbol
+            -2^31 is obtained when codeNum is 2^32, which cannot be expressed
+            by unsigned 32-bit value. This is signaled as a special case from
+            the h264bsdDecodeExpGolombUnsigned by setting codeNum to
+            BIG_CODE_NUM and returning HANTRO_NOK status.
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+
+        Outputs:
+            value           decoded code word is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      failure, no valid code word found
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeExpGolombSigned(strmData_t *pStrmData, i32 *value)
+{
+
+/* Variables */
+
+    u32 status, codeNum = 0;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(value);
+
+    status = h264bsdDecodeExpGolombUnsigned(pStrmData, &codeNum);
+
+    if (codeNum == BIG_CODE_NUM)
+    {
+        /* BIG_CODE_NUM and HANTRO_OK status means codeNum 2^32-1 which would
+         * result in signed integer valued 2^31 (i.e. out of 32-bit signed
+         * integer range) */
+        if (status == HANTRO_OK)
+            return(HANTRO_NOK);
+        /* BIG_CODE_NUM and HANTRO_NOK status means codeNum 2^32 which results
+         * in signed integer valued -2^31 */
+        else
+        {
+            *value = (i32)(2147483648U);
+            return (HANTRO_OK);
+        }
+    }
+    else if (status == HANTRO_OK)
+    {
+        /* (-1)^(codeNum+1) results in positive sign if codeNum is odd,
+         * negative when it is even. (codeNum+1)/2 is obtained as
+         * (codeNum+1)>>1 when value is positive and as (-codeNum)>>1 for
+         * negative value */
+        /*lint -e702 */
+        *value = (codeNum & 0x1) ? (i32)((codeNum + 1) >> 1) :
+                                  -(i32)((codeNum + 1) >> 1);
+        /*lint +e702 */
+        return(HANTRO_OK);
+    }
+
+    return(HANTRO_NOK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+   5.3  Function: h264bsdDecodeExpGolombMapped
+
+        Functional description:
+            Decode mapped Exp-Golomb code. Code num is determined by
+            h264bsdDecodeExpGolombUnsigned and then mapped to codedBlockPattern
+            either for intra or inter macroblock. The mapping is implemented by
+            look-up tables defined in the beginning of the file.
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+            isIntra         flag to indicate if intra or inter mapping is to
+                            be used
+
+        Outputs:
+            value           decoded code word is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      failure, no valid code word found
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeExpGolombMapped(strmData_t *pStrmData, u32 *value,
+    u32 isIntra)
+{
+
+/* Variables */
+
+    u32 status, codeNum;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(value);
+
+    status = h264bsdDecodeExpGolombUnsigned(pStrmData, &codeNum);
+
+    if (status != HANTRO_OK)
+        return (HANTRO_NOK);
+    else
+    {
+        /* range of valid codeNums [0,47] */
+        if (codeNum > 47)
+            return (HANTRO_NOK);
+        if (isIntra)
+            *value = codedBlockPatternIntra4x4[codeNum];
+        else
+            *value = codedBlockPatternInter[codeNum];
+        return(HANTRO_OK);
+    }
+
+}
+
+/*------------------------------------------------------------------------------
+
+   5.4  Function: h264bsdDecodeExpGolombTruncated
+
+        Functional description:
+            Decode truncated Exp-Golomb code. greaterThanOne flag indicates
+            the range of the symbol to be decoded as follows:
+                FALSE   ->  [0,1]
+                TRUE    ->  [0,2^32-1]
+
+            If flag is false the decoding is performed by reading one bit
+            from the stream with h264bsdGetBits and mapping this to decoded
+            symbol as
+                symbol = bit ? 0 : 1
+
+            Otherwise, i.e. when flag is TRUE, code num is determined by
+            h264bsdDecodeExpGolombUnsigned and this is used as the decoded
+            symbol.
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+            greaterThanOne  flag to indicate if range is wider than [0,1]
+
+        Outputs:
+            value           decoded code word is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      failure, no valid code word found
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeExpGolombTruncated(
+  strmData_t *pStrmData,
+  u32 *value,
+  u32 greaterThanOne)
+{
+
+/* Variables */
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(value);
+
+    if (greaterThanOne)
+    {
+        return(h264bsdDecodeExpGolombUnsigned(pStrmData, value));
+    }
+    else
+    {
+        *value = h264bsdGetBits(pStrmData,1);
+        if (*value == END_OF_STREAM)
+            return (HANTRO_NOK);
+        *value ^= 0x1;
+    }
+
+    return (HANTRO_OK);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.h
new file mode 100755
index 0000000..4c16773
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_VLC_H
+#define H264SWDEC_VLC_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_transform.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeExpGolombUnsigned(strmData_t *pStrmData, u32 *value);
+
+u32 h264bsdDecodeExpGolombSigned(strmData_t *pStrmData, i32 *value);
+
+u32 h264bsdDecodeExpGolombMapped(strmData_t *pStrmData, u32 *value,
+    u32 isIntra);
+
+u32 h264bsdDecodeExpGolombTruncated(strmData_t *pStrmData, u32 *value,
+    u32 greaterThanOne);
+
+#endif /* #ifdef H264SWDEC_VLC_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.c
new file mode 100755
index 0000000..4a9335a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.c
@@ -0,0 +1,490 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+     1. Include headers
+     2. External compiler flags
+     3. Module defines
+     4. Local function prototypes
+     5. Functions
+          h264bsdDecodeVuiParameters
+          DecodeHrdParameters
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_vui.h"
+#include "basetype.h"
+#include "h264bsd_vlc.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+    2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+    3. Module defines
+------------------------------------------------------------------------------*/
+
+#define MAX_DPB_SIZE 16
+#define MAX_BR       240000 /* for level 5.1 */
+#define MAX_CPB      240000 /* for level 5.1 */
+
+/*------------------------------------------------------------------------------
+    4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 DecodeHrdParameters(
+  strmData_t *pStrmData,
+  hrdParameters_t *pHrdParameters);
+
+/*------------------------------------------------------------------------------
+
+    Function: h264bsdDecodeVuiParameters
+
+        Functional description:
+            Decode VUI parameters from the stream. See standard for details.
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+
+        Outputs:
+            pVuiParameters  decoded information is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      invalid stream data or end of stream
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeVuiParameters(strmData_t *pStrmData,
+    vuiParameters_t *pVuiParameters)
+{
+
+/* Variables */
+
+    u32 tmp;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pVuiParameters);
+
+    H264SwDecMemset(pVuiParameters, 0, sizeof(vuiParameters_t));
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pVuiParameters->aspectRatioPresentFlag = (tmp == 1) ?
+                                HANTRO_TRUE : HANTRO_FALSE;
+
+    if (pVuiParameters->aspectRatioPresentFlag)
+    {
+        tmp = h264bsdGetBits(pStrmData, 8);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pVuiParameters->aspectRatioIdc = tmp;
+
+        if (pVuiParameters->aspectRatioIdc == ASPECT_RATIO_EXTENDED_SAR)
+        {
+            tmp = h264bsdGetBits(pStrmData, 16);
+            if (tmp == END_OF_STREAM)
+                return(HANTRO_NOK);
+            pVuiParameters->sarWidth = tmp;
+
+            tmp = h264bsdGetBits(pStrmData, 16);
+            if (tmp == END_OF_STREAM)
+                return(HANTRO_NOK);
+            pVuiParameters->sarHeight = tmp;
+        }
+    }
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pVuiParameters->overscanInfoPresentFlag = (tmp == 1) ?
+                                HANTRO_TRUE : HANTRO_FALSE;
+
+    if (pVuiParameters->overscanInfoPresentFlag)
+    {
+        tmp = h264bsdGetBits(pStrmData, 1);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pVuiParameters->overscanAppropriateFlag = (tmp == 1) ?
+                                HANTRO_TRUE : HANTRO_FALSE;
+    }
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pVuiParameters->videoSignalTypePresentFlag = (tmp == 1) ?
+                                HANTRO_TRUE : HANTRO_FALSE;
+
+    if (pVuiParameters->videoSignalTypePresentFlag)
+    {
+        tmp = h264bsdGetBits(pStrmData, 3);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pVuiParameters->videoFormat = tmp;
+
+        tmp = h264bsdGetBits(pStrmData, 1);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pVuiParameters->videoFullRangeFlag = (tmp == 1) ?
+                                HANTRO_TRUE : HANTRO_FALSE;
+
+        tmp = h264bsdGetBits(pStrmData, 1);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pVuiParameters->colourDescriptionPresentFlag =
+            (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+        if (pVuiParameters->colourDescriptionPresentFlag)
+        {
+            tmp = h264bsdGetBits(pStrmData, 8);
+            if (tmp == END_OF_STREAM)
+                return(HANTRO_NOK);
+            pVuiParameters->colourPrimaries = tmp;
+
+            tmp = h264bsdGetBits(pStrmData, 8);
+            if (tmp == END_OF_STREAM)
+                return(HANTRO_NOK);
+            pVuiParameters->transferCharacteristics = tmp;
+
+            tmp = h264bsdGetBits(pStrmData, 8);
+            if (tmp == END_OF_STREAM)
+                return(HANTRO_NOK);
+            pVuiParameters->matrixCoefficients = tmp;
+        }
+        else
+        {
+            pVuiParameters->colourPrimaries         = 2;
+            pVuiParameters->transferCharacteristics = 2;
+            pVuiParameters->matrixCoefficients      = 2;
+        }
+    }
+    else
+    {
+        pVuiParameters->videoFormat             = 5;
+        pVuiParameters->colourPrimaries         = 2;
+        pVuiParameters->transferCharacteristics = 2;
+        pVuiParameters->matrixCoefficients      = 2;
+    }
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pVuiParameters->chromaLocInfoPresentFlag =
+        (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+    if (pVuiParameters->chromaLocInfoPresentFlag)
+    {
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+          &pVuiParameters->chromaSampleLocTypeTopField);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        if (pVuiParameters->chromaSampleLocTypeTopField > 5)
+            return(HANTRO_NOK);
+
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+          &pVuiParameters->chromaSampleLocTypeBottomField);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        if (pVuiParameters->chromaSampleLocTypeBottomField > 5)
+            return(HANTRO_NOK);
+    }
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pVuiParameters->timingInfoPresentFlag =
+        (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+    if (pVuiParameters->timingInfoPresentFlag)
+    {
+        tmp = h264bsdShowBits32(pStrmData);
+        if (h264bsdFlushBits(pStrmData, 32) == END_OF_STREAM)
+            return(HANTRO_NOK);
+        if (tmp == 0)
+            return(HANTRO_NOK);
+        pVuiParameters->numUnitsInTick = tmp;
+
+        tmp = h264bsdShowBits32(pStrmData);
+        if (h264bsdFlushBits(pStrmData, 32) == END_OF_STREAM)
+            return(HANTRO_NOK);
+        if (tmp == 0)
+            return(HANTRO_NOK);
+        pVuiParameters->timeScale = tmp;
+
+        tmp = h264bsdGetBits(pStrmData, 1);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pVuiParameters->fixedFrameRateFlag =
+            (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+    }
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pVuiParameters->nalHrdParametersPresentFlag =
+        (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+    if (pVuiParameters->nalHrdParametersPresentFlag)
+    {
+        tmp = DecodeHrdParameters(pStrmData, &pVuiParameters->nalHrdParameters);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+    }
+    else
+    {
+        pVuiParameters->nalHrdParameters.cpbCnt          = 1;
+        /* MaxBR and MaxCPB should be the values correspondig to the levelIdc
+         * in the SPS containing these VUI parameters. However, these values
+         * are not used anywhere and maximum for any level will be used here */
+        pVuiParameters->nalHrdParameters.bitRateValue[0] = 1200 * MAX_BR + 1;
+        pVuiParameters->nalHrdParameters.cpbSizeValue[0] = 1200 * MAX_CPB + 1;
+        pVuiParameters->nalHrdParameters.initialCpbRemovalDelayLength = 24;
+        pVuiParameters->nalHrdParameters.cpbRemovalDelayLength        = 24;
+        pVuiParameters->nalHrdParameters.dpbOutputDelayLength         = 24;
+        pVuiParameters->nalHrdParameters.timeOffsetLength             = 24;
+    }
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pVuiParameters->vclHrdParametersPresentFlag =
+        (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+    if (pVuiParameters->vclHrdParametersPresentFlag)
+    {
+        tmp = DecodeHrdParameters(pStrmData, &pVuiParameters->vclHrdParameters);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+    }
+    else
+    {
+        pVuiParameters->vclHrdParameters.cpbCnt          = 1;
+        /* MaxBR and MaxCPB should be the values correspondig to the levelIdc
+         * in the SPS containing these VUI parameters. However, these values
+         * are not used anywhere and maximum for any level will be used here */
+        pVuiParameters->vclHrdParameters.bitRateValue[0] = 1000 * MAX_BR + 1;
+        pVuiParameters->vclHrdParameters.cpbSizeValue[0] = 1000 * MAX_CPB + 1;
+        pVuiParameters->vclHrdParameters.initialCpbRemovalDelayLength = 24;
+        pVuiParameters->vclHrdParameters.cpbRemovalDelayLength        = 24;
+        pVuiParameters->vclHrdParameters.dpbOutputDelayLength         = 24;
+        pVuiParameters->vclHrdParameters.timeOffsetLength             = 24;
+    }
+
+    if (pVuiParameters->nalHrdParametersPresentFlag ||
+      pVuiParameters->vclHrdParametersPresentFlag)
+    {
+        tmp = h264bsdGetBits(pStrmData, 1);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pVuiParameters->lowDelayHrdFlag =
+            (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+    }
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pVuiParameters->picStructPresentFlag =
+        (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+    tmp = h264bsdGetBits(pStrmData, 1);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pVuiParameters->bitstreamRestrictionFlag =
+        (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+    if (pVuiParameters->bitstreamRestrictionFlag)
+    {
+        tmp = h264bsdGetBits(pStrmData, 1);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pVuiParameters->motionVectorsOverPicBoundariesFlag =
+            (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+          &pVuiParameters->maxBytesPerPicDenom);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        if (pVuiParameters->maxBytesPerPicDenom > 16)
+            return(HANTRO_NOK);
+
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+          &pVuiParameters->maxBitsPerMbDenom);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        if (pVuiParameters->maxBitsPerMbDenom > 16)
+            return(HANTRO_NOK);
+
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+          &pVuiParameters->log2MaxMvLengthHorizontal);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        if (pVuiParameters->log2MaxMvLengthHorizontal > 16)
+            return(HANTRO_NOK);
+
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+          &pVuiParameters->log2MaxMvLengthVertical);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        if (pVuiParameters->log2MaxMvLengthVertical > 16)
+            return(HANTRO_NOK);
+
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+          &pVuiParameters->numReorderFrames);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+          &pVuiParameters->maxDecFrameBuffering);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+    }
+    else
+    {
+        pVuiParameters->motionVectorsOverPicBoundariesFlag = HANTRO_TRUE;
+        pVuiParameters->maxBytesPerPicDenom       = 2;
+        pVuiParameters->maxBitsPerMbDenom         = 1;
+        pVuiParameters->log2MaxMvLengthHorizontal = 16;
+        pVuiParameters->log2MaxMvLengthVertical   = 16;
+        pVuiParameters->numReorderFrames          = MAX_DPB_SIZE;
+        pVuiParameters->maxDecFrameBuffering      = MAX_DPB_SIZE;
+    }
+
+    return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+    Function: DecodeHrdParameters
+
+        Functional description:
+            Decode HRD parameters from the stream. See standard for details.
+
+        Inputs:
+            pStrmData       pointer to stream data structure
+
+        Outputs:
+            pHrdParameters  decoded information is stored here
+
+        Returns:
+            HANTRO_OK       success
+            HANTRO_NOK      invalid stream data
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeHrdParameters(
+  strmData_t *pStrmData,
+  hrdParameters_t *pHrdParameters)
+{
+
+/* Variables */
+
+    u32 tmp, i;
+
+/* Code */
+
+    ASSERT(pStrmData);
+    ASSERT(pHrdParameters);
+
+
+    tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &pHrdParameters->cpbCnt);
+    if (tmp != HANTRO_OK)
+        return(tmp);
+    /* cpbCount = cpb_cnt_minus1 + 1 */
+    pHrdParameters->cpbCnt++;
+    if (pHrdParameters->cpbCnt > MAX_CPB_CNT)
+        return(HANTRO_NOK);
+
+    tmp = h264bsdGetBits(pStrmData, 4);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pHrdParameters->bitRateScale = tmp;
+
+    tmp = h264bsdGetBits(pStrmData, 4);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pHrdParameters->cpbSizeScale = tmp;
+
+    for (i = 0; i < pHrdParameters->cpbCnt; i++)
+    {
+        /* bit_rate_value_minus1 in the range [0, 2^32 - 2] */
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+          &pHrdParameters->bitRateValue[i]);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        if (pHrdParameters->bitRateValue[i] > 4294967294U)
+            return(HANTRO_NOK);
+        pHrdParameters->bitRateValue[i]++;
+        /* this may result in overflow, but this value is not used for
+         * anything */
+        pHrdParameters->bitRateValue[i] *=
+            1 << (6 + pHrdParameters->bitRateScale);
+
+        /* cpb_size_value_minus1 in the range [0, 2^32 - 2] */
+        tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+          &pHrdParameters->cpbSizeValue[i]);
+        if (tmp != HANTRO_OK)
+            return(tmp);
+        if (pHrdParameters->cpbSizeValue[i] > 4294967294U)
+            return(HANTRO_NOK);
+        pHrdParameters->cpbSizeValue[i]++;
+        /* this may result in overflow, but this value is not used for
+         * anything */
+        pHrdParameters->cpbSizeValue[i] *=
+            1 << (4 + pHrdParameters->cpbSizeScale);
+
+        tmp = h264bsdGetBits(pStrmData, 1);
+        if (tmp == END_OF_STREAM)
+            return(HANTRO_NOK);
+        pHrdParameters->cbrFlag[i] = (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+    }
+
+    tmp = h264bsdGetBits(pStrmData, 5);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pHrdParameters->initialCpbRemovalDelayLength = tmp + 1;
+
+    tmp = h264bsdGetBits(pStrmData, 5);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pHrdParameters->cpbRemovalDelayLength = tmp + 1;
+
+    tmp = h264bsdGetBits(pStrmData, 5);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pHrdParameters->dpbOutputDelayLength = tmp + 1;
+
+    tmp = h264bsdGetBits(pStrmData, 5);
+    if (tmp == END_OF_STREAM)
+        return(HANTRO_NOK);
+    pHrdParameters->timeOffsetLength = tmp;
+
+    return(HANTRO_OK);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.h
new file mode 100755
index 0000000..05d52a4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+    Table of contents
+
+    1. Include headers
+    2. Module defines
+    3. Data types
+    4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_VUI_H
+#define H264SWDEC_VUI_H
+
+/*------------------------------------------------------------------------------
+    1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+
+/*------------------------------------------------------------------------------
+    2. Module defines
+------------------------------------------------------------------------------*/
+
+#define MAX_CPB_CNT 32
+
+/*------------------------------------------------------------------------------
+    3. Data types
+------------------------------------------------------------------------------*/
+
+/* enumerated sample aspect ratios, ASPECT_RATIO_M_N means M:N */
+enum
+{
+    ASPECT_RATIO_UNSPECIFIED = 0,
+    ASPECT_RATIO_1_1,
+    ASPECT_RATIO_12_11,
+    ASPECT_RATIO_10_11,
+    ASPECT_RATIO_16_11,
+    ASPECT_RATIO_40_33,
+    ASPECT_RATIO_24_11,
+    ASPECT_RATIO_20_11,
+    ASPECT_RATIO_32_11,
+    ASPECT_RATIO_80_33,
+    ASPECT_RATIO_18_11,
+    ASPECT_RATIO_15_11,
+    ASPECT_RATIO_64_33,
+    ASPECT_RATIO_160_99,
+    ASPECT_RATIO_EXTENDED_SAR = 255
+};
+
+/* structure to store Hypothetical Reference Decoder (HRD) parameters */
+typedef struct
+{
+    u32 cpbCnt;
+    u32 bitRateScale;
+    u32 cpbSizeScale;
+    u32 bitRateValue[MAX_CPB_CNT];
+    u32 cpbSizeValue[MAX_CPB_CNT];
+    u32 cbrFlag[MAX_CPB_CNT];
+    u32 initialCpbRemovalDelayLength;
+    u32 cpbRemovalDelayLength;
+    u32 dpbOutputDelayLength;
+    u32 timeOffsetLength;
+} hrdParameters_t;
+
+/* storage for VUI parameters */
+typedef struct
+{
+    u32 aspectRatioPresentFlag;
+    u32 aspectRatioIdc;
+    u32 sarWidth;
+    u32 sarHeight;
+    u32 overscanInfoPresentFlag;
+    u32 overscanAppropriateFlag;
+    u32 videoSignalTypePresentFlag;
+    u32 videoFormat;
+    u32 videoFullRangeFlag;
+    u32 colourDescriptionPresentFlag;
+    u32 colourPrimaries;
+    u32 transferCharacteristics;
+    u32 matrixCoefficients;
+    u32 chromaLocInfoPresentFlag;
+    u32 chromaSampleLocTypeTopField;
+    u32 chromaSampleLocTypeBottomField;
+    u32 timingInfoPresentFlag;
+    u32 numUnitsInTick;
+    u32 timeScale;
+    u32 fixedFrameRateFlag;
+    u32 nalHrdParametersPresentFlag;
+    hrdParameters_t nalHrdParameters;
+    u32 vclHrdParametersPresentFlag;
+    hrdParameters_t vclHrdParameters;
+    u32 lowDelayHrdFlag;
+    u32 picStructPresentFlag;
+    u32 bitstreamRestrictionFlag;
+    u32 motionVectorsOverPicBoundariesFlag;
+    u32 maxBytesPerPicDenom;
+    u32 maxBitsPerMbDenom;
+    u32 log2MaxMvLengthHorizontal;
+    u32 log2MaxMvLengthVertical;
+    u32 numReorderFrames;
+    u32 maxDecFrameBuffering;
+} vuiParameters_t;
+
+/*------------------------------------------------------------------------------
+    4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeVuiParameters(strmData_t *pStrmData,
+    vuiParameters_t *pVuiParameters);
+
+#endif /* #ifdef H264SWDEC_VUI_H */
+
diff --git a/media/libstagefright/colorconversion/ColorConverter.cpp b/media/libstagefright/colorconversion/ColorConverter.cpp
index 4b72a53..b28d947 100644
--- a/media/libstagefright/colorconversion/ColorConverter.cpp
+++ b/media/libstagefright/colorconversion/ColorConverter.cpp
@@ -20,8 +20,6 @@
 
 namespace android {
 
-static const int OMX_QCOM_COLOR_FormatYVU420SemiPlanar = 0x7FA30C00;
-
 ColorConverter::ColorConverter(
         OMX_COLOR_FORMATTYPE from, OMX_COLOR_FORMATTYPE to)
     : mSrcFormat(from),
diff --git a/media/libstagefright/omx/SoftOMXPlugin.cpp b/media/libstagefright/omx/SoftOMXPlugin.cpp
index 04ca39e..02b1c8e 100644
--- a/media/libstagefright/omx/SoftOMXPlugin.cpp
+++ b/media/libstagefright/omx/SoftOMXPlugin.cpp
@@ -37,6 +37,7 @@
     { "OMX.google.aac.decoder", "aacdec", "audio_decoder.aac" },
     { "OMX.google.amrnb.decoder", "amrdec", "audio_decoder.amrnb" },
     { "OMX.google.amrwb.decoder", "amrdec", "audio_decoder.amrwb" },
+    { "OMX.google.h264.decoder", "h264dec", "video_decoder.avc" },
     { "OMX.google.avc.decoder", "avcdec", "video_decoder.avc" },
     { "OMX.google.g711.alaw.decoder", "g711dec", "audio_decoder.g711alaw" },
     { "OMX.google.g711.mlaw.decoder", "g711dec", "audio_decoder.g711mlaw" },
diff --git a/packages/BackupRestoreConfirmation/AndroidManifest.xml b/packages/BackupRestoreConfirmation/AndroidManifest.xml
index ed9a519..19848f6 100644
--- a/packages/BackupRestoreConfirmation/AndroidManifest.xml
+++ b/packages/BackupRestoreConfirmation/AndroidManifest.xml
@@ -21,7 +21,7 @@
     <uses-permission android:name="android.permission.BACKUP" />
 
     <application android:allowClearUserData="false"
-                 android:killAfterRestore="false"
+                 android:allowBackup="false"
                  android:permission="android.permission.CONFIRM_FULL_BACKUP" >
 
         <activity android:name=".BackupRestoreConfirmation" 
diff --git a/packages/SettingsProvider/res/values/defaults.xml b/packages/SettingsProvider/res/values/defaults.xml
index bf06f947..2e2768f 100644
--- a/packages/SettingsProvider/res/values/defaults.xml
+++ b/packages/SettingsProvider/res/values/defaults.xml
@@ -125,4 +125,7 @@
     <!-- Default for Settings.Secure.LONG_PRESS_TIMEOUT_MILLIS -->
     <integer name="def_long_press_timeout_millis">500</integer>
 
+    <!-- Default for Settings.System.POINTER_SPEED -->
+    <integer name="def_pointer_speed">0</integer>
+
 </resources>
diff --git a/packages/SettingsProvider/src/com/android/providers/settings/DatabaseHelper.java b/packages/SettingsProvider/src/com/android/providers/settings/DatabaseHelper.java
index b99c8b0..0f5f095 100644
--- a/packages/SettingsProvider/src/com/android/providers/settings/DatabaseHelper.java
+++ b/packages/SettingsProvider/src/com/android/providers/settings/DatabaseHelper.java
@@ -1214,6 +1214,9 @@
             loadBooleanSetting(stmt, Settings.System.NOTIFICATIONS_USE_RING_VOLUME,
                     R.bool.def_notifications_use_ring_volume);
 
+            loadIntegerSetting(stmt, Settings.System.POINTER_SPEED,
+                    R.integer.def_pointer_speed);
+
         } finally {
             if (stmt != null) stmt.close();
         }
diff --git a/services/input/EventHub.cpp b/services/input/EventHub.cpp
index af30887..29add52 100644
--- a/services/input/EventHub.cpp
+++ b/services/input/EventHub.cpp
@@ -33,6 +33,7 @@
 
 #include <hardware_legacy/power.h>
 
+#include <cutils/atomic.h>
 #include <cutils/properties.h>
 #include <utils/Log.h>
 #include <utils/Timers.h>
@@ -128,6 +129,7 @@
         mError(NO_INIT), mBuiltInKeyboardId(-1), mNextDeviceId(1),
         mOpeningDevices(0), mClosingDevices(0),
         mOpened(false), mNeedToSendFinishedDeviceScan(false),
+        mNeedToReopenDevices(0), mNeedToScanDevices(false),
         mInputFdIndex(1) {
     acquire_wake_lock(PARTIAL_WAKE_LOCK, WAKE_LOCK_ID);
 
@@ -393,12 +395,10 @@
     return NAME_NOT_FOUND;
 }
 
-void EventHub::addExcludedDevice(const char* deviceName)
-{
+void EventHub::setExcludedDevices(const Vector<String8>& devices) {
     AutoMutex _l(mLock);
 
-    String8 name(deviceName);
-    mExcludedDevices.push_back(name);
+    mExcludedDevices = devices;
 }
 
 bool EventHub::hasLed(int32_t deviceId, int32_t led) const {
@@ -466,9 +466,11 @@
     LOG_ASSERT(bufferSize >= 1);
 
     if (!mOpened) {
+        android_atomic_acquire_store(0, &mNeedToReopenDevices);
+
         mError = openPlatformInput() ? NO_ERROR : UNKNOWN_ERROR;
         mOpened = true;
-        mNeedToSendFinishedDeviceScan = true;
+        mNeedToScanDevices = true;
     }
 
     struct input_event readBuffer[bufferSize];
@@ -478,6 +480,20 @@
     for (;;) {
         nsecs_t now = systemTime(SYSTEM_TIME_MONOTONIC);
 
+        // Reopen input devices if needed.
+        if (android_atomic_acquire_load(&mNeedToReopenDevices)) {
+            android_atomic_acquire_store(0, &mNeedToReopenDevices);
+
+            LOGI("Reopening all input devices due to a configuration change.");
+
+            AutoMutex _l(mLock);
+            while (mDevices.size() > 1) {
+                closeDeviceAtIndexLocked(mDevices.size() - 1);
+            }
+            mNeedToScanDevices = true;
+            break; // return to the caller before we actually rescan
+        }
+
         // Report any devices that had last been added/removed.
         while (mClosingDevices) {
             Device* device = mClosingDevices;
@@ -495,6 +511,12 @@
             }
         }
 
+        if (mNeedToScanDevices) {
+            mNeedToScanDevices = false;
+            scanDevices();
+            mNeedToSendFinishedDeviceScan = true;
+        }
+
         while (mOpeningDevices != NULL) {
             Device* device = mOpeningDevices;
             LOGV("Reporting device opened: id=%d, name=%s\n",
@@ -696,13 +718,14 @@
     pollfd.revents = 0;
     mFds.push(pollfd);
     mDevices.push(NULL);
+    return true;
+}
 
-    res = scanDir(DEVICE_PATH);
+void EventHub::scanDevices() {
+    int res = scanDir(DEVICE_PATH);
     if(res < 0) {
         LOGE("scan dir failed for %s\n", DEVICE_PATH);
     }
-
-    return true;
 }
 
 // ----------------------------------------------------------------------------
@@ -755,12 +778,10 @@
     }
 
     // Check to see if the device is on our excluded list
-    List<String8>::iterator iter = mExcludedDevices.begin();
-    List<String8>::iterator end = mExcludedDevices.end();
-    for ( ; iter != end; iter++) {
-        const char* test = *iter;
-        if (identifier.name == test) {
-            LOGI("ignoring event id %s driver %s\n", devicePath, test);
+    for (size_t i = 0; i < mExcludedDevices.size(); i++) {
+        const String8& item = mExcludedDevices.itemAt(i);
+        if (identifier.name == item) {
+            LOGI("ignoring event id %s driver %s\n", devicePath, item.string());
             close(fd);
             return -1;
         }
@@ -1223,6 +1244,10 @@
     return 0;
 }
 
+void EventHub::reopenDevices() {
+    android_atomic_release_store(1, &mNeedToReopenDevices);
+}
+
 void EventHub::dump(String8& dump) {
     dump.append("Event Hub State:\n");
 
diff --git a/services/input/EventHub.h b/services/input/EventHub.h
index ca33619..558959b 100644
--- a/services/input/EventHub.h
+++ b/services/input/EventHub.h
@@ -193,9 +193,9 @@
     virtual status_t mapAxis(int32_t deviceId, int scancode,
             AxisInfo* outAxisInfo) const = 0;
 
-    // exclude a particular device from opening
-    // this can be used to ignore input devices for sensors
-    virtual void addExcludedDevice(const char* deviceName) = 0;
+    // Sets devices that are excluded from opening.
+    // This can be used to ignore input devices for sensors.
+    virtual void setExcludedDevices(const Vector<String8>& devices) = 0;
 
     /*
      * Wait for events to become available and returns them.
@@ -230,6 +230,8 @@
     virtual void getVirtualKeyDefinitions(int32_t deviceId,
             Vector<VirtualKeyDefinition>& outVirtualKeys) const = 0;
 
+    virtual void reopenDevices() = 0;
+
     virtual void dump(String8& dump) = 0;
 };
 
@@ -259,7 +261,7 @@
     virtual status_t mapAxis(int32_t deviceId, int scancode,
             AxisInfo* outAxisInfo) const;
 
-    virtual void addExcludedDevice(const char* deviceName);
+    virtual void setExcludedDevices(const Vector<String8>& devices);
 
     virtual int32_t getScanCodeState(int32_t deviceId, int32_t scanCode) const;
     virtual int32_t getKeyCodeState(int32_t deviceId, int32_t keyCode) const;
@@ -276,6 +278,8 @@
     virtual void getVirtualKeyDefinitions(int32_t deviceId,
             Vector<VirtualKeyDefinition>& outVirtualKeys) const;
 
+    virtual void reopenDevices();
+
     virtual void dump(String8& dump);
 
 protected:
@@ -288,6 +292,7 @@
     int closeDevice(const char *devicePath);
     int closeDeviceAtIndexLocked(int index);
     int scanDir(const char *dirname);
+    void scanDevices();
     int readNotify(int nfd);
 
     status_t mError;
@@ -351,7 +356,9 @@
 
     bool mOpened;
     bool mNeedToSendFinishedDeviceScan;
-    List<String8> mExcludedDevices;
+    volatile int32_t mNeedToReopenDevices; // must be modified atomically
+    bool mNeedToScanDevices;
+    Vector<String8> mExcludedDevices;
 
     // device ids that report particular switches.
     int32_t mSwitches[SW_MAX + 1];
diff --git a/services/input/InputReader.cpp b/services/input/InputReader.cpp
index cb64da6..c42e3ab 100644
--- a/services/input/InputReader.cpp
+++ b/services/input/InputReader.cpp
@@ -38,6 +38,7 @@
 
 #include "InputReader.h"
 
+#include <cutils/atomic.h>
 #include <cutils/log.h>
 #include <ui/Keyboard.h>
 #include <ui/VirtualKeyMap.h>
@@ -260,10 +261,9 @@
         const sp<InputReaderPolicyInterface>& policy,
         const sp<InputDispatcherInterface>& dispatcher) :
         mEventHub(eventHub), mPolicy(policy), mDispatcher(dispatcher),
-        mGlobalMetaState(0), mDisableVirtualKeysTimeout(LLONG_MIN), mNextTimeout(LLONG_MAX) {
-    mPolicy->getReaderConfiguration(&mConfig);
-
-    configureExcludedDevices();
+        mGlobalMetaState(0), mDisableVirtualKeysTimeout(LLONG_MIN), mNextTimeout(LLONG_MAX),
+        mRefreshConfiguration(0) {
+    configure(true /*firstTime*/);
     updateGlobalMetaState();
     updateInputConfiguration();
 }
@@ -275,6 +275,11 @@
 }
 
 void InputReader::loopOnce() {
+    if (android_atomic_acquire_load(&mRefreshConfiguration)) {
+        android_atomic_release_store(0, &mRefreshConfiguration);
+        configure(false /*firstTime*/);
+    }
+
     int32_t timeoutMillis = -1;
     if (mNextTimeout != LLONG_MAX) {
         nsecs_t now = systemTime(SYSTEM_TIME_MONOTONIC);
@@ -495,9 +500,12 @@
     mDispatcher->notifyConfigurationChanged(when);
 }
 
-void InputReader::configureExcludedDevices() {
-    for (size_t i = 0; i < mConfig.excludedDeviceNames.size(); i++) {
-        mEventHub->addExcludedDevice(mConfig.excludedDeviceNames[i]);
+void InputReader::configure(bool firstTime) {
+    mPolicy->getReaderConfiguration(&mConfig);
+    mEventHub->setExcludedDevices(mConfig.excludedDeviceNames);
+
+    if (!firstTime) {
+        mEventHub->reopenDevices();
     }
 }
 
@@ -718,6 +726,10 @@
     } // release device registy reader lock
 }
 
+void InputReader::refreshConfiguration() {
+    android_atomic_release_store(1, &mRefreshConfiguration);
+}
+
 void InputReader::dump(String8& dump) {
     mEventHub->dump(dump);
     dump.append("\n");
@@ -748,6 +760,20 @@
     dump.appendFormat(INDENT2 "VirtualKeyQuietTime: %0.1fms\n",
             mConfig.virtualKeyQuietTime * 0.000001f);
 
+    dump.appendFormat(INDENT2 "PointerVelocityControlParameters: "
+            "scale=%0.3f, lowThreshold=%0.3f, highThreshold=%0.3f, acceleration=%0.3f\n",
+            mConfig.pointerVelocityControlParameters.scale,
+            mConfig.pointerVelocityControlParameters.lowThreshold,
+            mConfig.pointerVelocityControlParameters.highThreshold,
+            mConfig.pointerVelocityControlParameters.acceleration);
+
+    dump.appendFormat(INDENT2 "WheelVelocityControlParameters: "
+            "scale=%0.3f, lowThreshold=%0.3f, highThreshold=%0.3f, acceleration=%0.3f\n",
+            mConfig.wheelVelocityControlParameters.scale,
+            mConfig.wheelVelocityControlParameters.lowThreshold,
+            mConfig.wheelVelocityControlParameters.highThreshold,
+            mConfig.wheelVelocityControlParameters.acceleration);
+
     dump.appendFormat(INDENT2 "PointerGesture:\n");
     dump.appendFormat(INDENT3 "QuietInterval: %0.1fms\n",
             mConfig.pointerGestureQuietInterval * 0.000001f);
@@ -1430,6 +1456,10 @@
 
     mHaveVWheel = getEventHub()->hasRelativeAxis(getDeviceId(), REL_WHEEL);
     mHaveHWheel = getEventHub()->hasRelativeAxis(getDeviceId(), REL_HWHEEL);
+
+    mPointerVelocityControl.setParameters(getConfig()->pointerVelocityControlParameters);
+    mWheelXVelocityControl.setParameters(getConfig()->wheelVelocityControlParameters);
+    mWheelYVelocityControl.setParameters(getConfig()->wheelVelocityControlParameters);
 }
 
 void CursorInputMapper::configureParameters() {
@@ -1491,6 +1521,11 @@
             }
         } // release lock
 
+        // Reset velocity.
+        mPointerVelocityControl.reset();
+        mWheelXVelocityControl.reset();
+        mWheelYVelocityControl.reset();
+
         // Synthesize button up event on reset.
         nsecs_t when = systemTime(SYSTEM_TIME_MONOTONIC);
         mAccumulator.clear();
@@ -1654,11 +1689,16 @@
         } else {
             vscroll = 0;
         }
+        mWheelYVelocityControl.move(when, NULL, &vscroll);
+
         if (mHaveHWheel && (fields & Accumulator::FIELD_REL_HWHEEL)) {
             hscroll = mAccumulator.relHWheel;
         } else {
             hscroll = 0;
         }
+        mWheelXVelocityControl.move(when, &hscroll, NULL);
+
+        mPointerVelocityControl.move(when, &deltaX, &deltaY);
 
         if (mPointerController != NULL) {
             if (deltaX != 0 || deltaY != 0 || vscroll != 0 || hscroll != 0
@@ -1900,6 +1940,7 @@
     mLocked.orientedRanges.haveDistance = false;
 
     mPointerGesture.reset();
+    mPointerGesture.pointerVelocityControl.setParameters(mConfig->pointerVelocityControlParameters);
 }
 
 void TouchInputMapper::configure() {
@@ -1946,9 +1987,11 @@
     mParameters.useAveragingTouchFilter = mConfig->filterTouchEvents;
     mParameters.useJumpyTouchFilter = mConfig->filterJumpyTouchEvents;
 
-    // TODO: select the default gesture mode based on whether the device supports
-    // distinct multitouch
-    mParameters.gestureMode = Parameters::GESTURE_MODE_SPOTS;
+    // Use the pointer presentation mode for devices that do not support distinct
+    // multitouch.  The spot-based presentation relies on being able to accurately
+    // locate two or more fingers on the touch pad.
+    mParameters.gestureMode = getEventHub()->hasInputProperty(getDeviceId(), INPUT_PROP_SEMI_MT)
+            ? Parameters::GESTURE_MODE_POINTER : Parameters::GESTURE_MODE_SPOTS;
 
     String8 gestureModeString;
     if (getDevice()->getConfiguration().tryGetProperty(String8("touch.gestureMode"),
@@ -2369,11 +2412,10 @@
                     mLocked.associatedDisplayHeight);
 
             // Scale movements such that one whole swipe of the touch pad covers a
-            // given area relative to the diagonal size of the display.
+            // given area relative to the diagonal size of the display when no acceleration
+            // is applied.
             // Assume that the touch pad has a square aspect ratio such that movements in
             // X and Y of the same number of raw units cover the same physical distance.
-            const float scaleFactor = 0.8f;
-
             mLocked.pointerGestureXMovementScale = mConfig->pointerGestureMovementSpeedRatio
                     * displayDiagonal / rawDiagonal;
             mLocked.pointerGestureYMovementScale = mLocked.pointerGestureXMovementScale;
@@ -3463,6 +3505,9 @@
     if (!sendEvents) {
         return;
     }
+    if (finishPreviousGesture) {
+        cancelPreviousGesture = false;
+    }
 
     // Switch pointer presentation.
     mPointerController->setPresentation(
@@ -3666,6 +3711,8 @@
                 mPointerGesture.currentGestureMode = PointerGesture::NEUTRAL;
                 mPointerGesture.currentGestureIdBits.clear();
 
+                mPointerGesture.pointerVelocityControl.reset();
+
                 if (mParameters.gestureMode == Parameters::GESTURE_MODE_SPOTS) {
                     mPointerGesture.spotGesture = PointerControllerInterface::SPOT_GESTURE_NEUTRAL;
                     mPointerGesture.spotIdBits.clear();
@@ -3760,6 +3807,8 @@
         mPointerGesture.currentGestureMode = PointerGesture::QUIET;
         mPointerGesture.currentGestureIdBits.clear();
 
+        mPointerGesture.pointerVelocityControl.reset();
+
         if (mParameters.gestureMode == Parameters::GESTURE_MODE_SPOTS) {
             mPointerGesture.spotGesture = PointerControllerInterface::SPOT_GESTURE_NEUTRAL;
             mPointerGesture.spotIdBits.clear();
@@ -3791,46 +3840,48 @@
 
         // Switch pointers if needed.
         // Find the fastest pointer and follow it.
-        if (activeTouchId >= 0) {
-            if (mCurrentTouch.pointerCount > 1) {
-                int32_t bestId = -1;
-                float bestSpeed = mConfig->pointerGestureDragMinSwitchSpeed;
-                for (uint32_t i = 0; i < mCurrentTouch.pointerCount; i++) {
-                    uint32_t id = mCurrentTouch.pointers[i].id;
-                    float vx, vy;
-                    if (mPointerGesture.velocityTracker.getVelocity(id, &vx, &vy)) {
-                        float speed = hypotf(vx, vy);
-                        if (speed > bestSpeed) {
-                            bestId = id;
-                            bestSpeed = speed;
-                        }
+        if (activeTouchId >= 0 && mCurrentTouch.pointerCount > 1) {
+            int32_t bestId = -1;
+            float bestSpeed = mConfig->pointerGestureDragMinSwitchSpeed;
+            for (uint32_t i = 0; i < mCurrentTouch.pointerCount; i++) {
+                uint32_t id = mCurrentTouch.pointers[i].id;
+                float vx, vy;
+                if (mPointerGesture.velocityTracker.getVelocity(id, &vx, &vy)) {
+                    float speed = hypotf(vx, vy);
+                    if (speed > bestSpeed) {
+                        bestId = id;
+                        bestSpeed = speed;
                     }
                 }
-                if (bestId >= 0 && bestId != activeTouchId) {
-                    mPointerGesture.activeTouchId = activeTouchId = bestId;
-                    activeTouchChanged = true;
+            }
+            if (bestId >= 0 && bestId != activeTouchId) {
+                mPointerGesture.activeTouchId = activeTouchId = bestId;
+                activeTouchChanged = true;
 #if DEBUG_GESTURES
-                    LOGD("Gestures: BUTTON_CLICK_OR_DRAG switched pointers, "
-                            "bestId=%d, bestSpeed=%0.3f", bestId, bestSpeed);
+                LOGD("Gestures: BUTTON_CLICK_OR_DRAG switched pointers, "
+                        "bestId=%d, bestSpeed=%0.3f", bestId, bestSpeed);
 #endif
-                }
             }
+        }
 
-            if (mLastTouch.idBits.hasBit(activeTouchId)) {
-                const PointerData& currentPointer =
-                        mCurrentTouch.pointers[mCurrentTouch.idToIndex[activeTouchId]];
-                const PointerData& lastPointer =
-                        mLastTouch.pointers[mLastTouch.idToIndex[activeTouchId]];
-                float deltaX = (currentPointer.x - lastPointer.x)
-                        * mLocked.pointerGestureXMovementScale;
-                float deltaY = (currentPointer.y - lastPointer.y)
-                        * mLocked.pointerGestureYMovementScale;
+        if (activeTouchId >= 0 && mLastTouch.idBits.hasBit(activeTouchId)) {
+            const PointerData& currentPointer =
+                    mCurrentTouch.pointers[mCurrentTouch.idToIndex[activeTouchId]];
+            const PointerData& lastPointer =
+                    mLastTouch.pointers[mLastTouch.idToIndex[activeTouchId]];
+            float deltaX = (currentPointer.x - lastPointer.x)
+                    * mLocked.pointerGestureXMovementScale;
+            float deltaY = (currentPointer.y - lastPointer.y)
+                    * mLocked.pointerGestureYMovementScale;
 
-                // Move the pointer using a relative motion.
-                // When using spots, the click will occur at the position of the anchor
-                // spot and all other spots will move there.
-                mPointerController->move(deltaX, deltaY);
-            }
+            mPointerGesture.pointerVelocityControl.move(when, &deltaX, &deltaY);
+
+            // Move the pointer using a relative motion.
+            // When using spots, the click will occur at the position of the anchor
+            // spot and all other spots will move there.
+            mPointerController->move(deltaX, deltaY);
+        } else {
+            mPointerGesture.pointerVelocityControl.reset();
         }
 
         float x, y;
@@ -3939,6 +3990,8 @@
             }
         }
 
+        mPointerGesture.pointerVelocityControl.reset();
+
         if (!tapped) {
 #if DEBUG_GESTURES
             LOGD("Gestures: NEUTRAL");
@@ -3995,9 +4048,13 @@
             float deltaY = (currentPointer.y - lastPointer.y)
                     * mLocked.pointerGestureYMovementScale;
 
+            mPointerGesture.pointerVelocityControl.move(when, &deltaX, &deltaY);
+
             // Move the pointer using a relative motion.
             // When using spots, the hover or drag will occur at the position of the anchor spot.
             mPointerController->move(deltaX, deltaY);
+        } else {
+            mPointerGesture.pointerVelocityControl.reset();
         }
 
         bool down;
@@ -4063,16 +4120,32 @@
         // a decision to transition into SWIPE or FREEFORM mode accordingly.
         LOG_ASSERT(activeTouchId >= 0);
 
-        bool needReference = false;
         bool settled = when >= mPointerGesture.firstTouchTime
                 + mConfig->pointerGestureMultitouchSettleInterval;
         if (mPointerGesture.lastGestureMode != PointerGesture::PRESS
                 && mPointerGesture.lastGestureMode != PointerGesture::SWIPE
                 && mPointerGesture.lastGestureMode != PointerGesture::FREEFORM) {
             *outFinishPreviousGesture = true;
+        } else if (!settled && mCurrentTouch.pointerCount > mLastTouch.pointerCount) {
+            // Additional pointers have gone down but not yet settled.
+            // Reset the gesture.
+#if DEBUG_GESTURES
+            LOGD("Gestures: Resetting gesture since additional pointers went down for MULTITOUCH, "
+                    "settle time remaining %0.3fms",
+                    (mPointerGesture.firstTouchTime + MULTITOUCH_SETTLE_INTERVAL - when)
+                            * 0.000001f);
+#endif
+            *outCancelPreviousGesture = true;
+        } else {
+            // Continue previous gesture.
+            mPointerGesture.currentGestureMode = mPointerGesture.lastGestureMode;
+        }
+
+        if (*outFinishPreviousGesture || *outCancelPreviousGesture) {
             mPointerGesture.currentGestureMode = PointerGesture::PRESS;
             mPointerGesture.activeGestureId = 0;
             mPointerGesture.referenceIdBits.clear();
+            mPointerGesture.pointerVelocityControl.reset();
 
             if (settled && mParameters.gestureMode == Parameters::GESTURE_MODE_SPOTS
                     && mLastTouch.idBits.hasBit(mPointerGesture.activeTouchId)) {
@@ -4093,37 +4166,18 @@
                 mPointerGesture.referenceGestureX = c.getAxisValue(AMOTION_EVENT_AXIS_X);
                 mPointerGesture.referenceGestureY = c.getAxisValue(AMOTION_EVENT_AXIS_Y);
             } else {
+                // Use the centroid and pointer location as the reference points for the gesture.
 #if DEBUG_GESTURES
                 LOGD("Gestures: Using centroid as reference for MULTITOUCH, "
                         "settle time remaining %0.3fms",
                         (mPointerGesture.firstTouchTime + MULTITOUCH_SETTLE_INTERVAL - when)
                                 * 0.000001f);
 #endif
-                needReference = true;
+                mCurrentTouch.getCentroid(&mPointerGesture.referenceTouchX,
+                        &mPointerGesture.referenceTouchY);
+                mPointerController->getPosition(&mPointerGesture.referenceGestureX,
+                        &mPointerGesture.referenceGestureY);
             }
-        } else if (!settled && mCurrentTouch.pointerCount > mLastTouch.pointerCount) {
-            // Additional pointers have gone down but not yet settled.
-            // Reset the gesture.
-#if DEBUG_GESTURES
-            LOGD("Gestures: Resetting gesture since additional pointers went down for MULTITOUCH, "
-                    "settle time remaining %0.3fms",
-                    (mPointerGesture.firstTouchTime + MULTITOUCH_SETTLE_INTERVAL - when)
-                            * 0.000001f);
-#endif
-            *outCancelPreviousGesture = true;
-            mPointerGesture.currentGestureMode = PointerGesture::PRESS;
-            mPointerGesture.activeGestureId = 0;
-        } else {
-            // Continue previous gesture.
-            mPointerGesture.currentGestureMode = mPointerGesture.lastGestureMode;
-        }
-
-        if (needReference) {
-            // Use the centroid and pointer location as the reference points for the gesture.
-            mCurrentTouch.getCentroid(&mPointerGesture.referenceTouchX,
-                    &mPointerGesture.referenceTouchY);
-            mPointerController->getPosition(&mPointerGesture.referenceGestureX,
-                    &mPointerGesture.referenceGestureY);
         }
 
         if (mPointerGesture.currentGestureMode == PointerGesture::PRESS) {
@@ -4253,10 +4307,14 @@
 
                 mPointerGesture.referenceTouchX += commonDeltaX;
                 mPointerGesture.referenceTouchY += commonDeltaY;
-                mPointerGesture.referenceGestureX +=
-                        commonDeltaX * mLocked.pointerGestureXMovementScale;
-                mPointerGesture.referenceGestureY +=
-                        commonDeltaY * mLocked.pointerGestureYMovementScale;
+
+                commonDeltaX *= mLocked.pointerGestureXMovementScale;
+                commonDeltaY *= mLocked.pointerGestureYMovementScale;
+                mPointerGesture.pointerVelocityControl.move(when, &commonDeltaX, &commonDeltaY);
+
+                mPointerGesture.referenceGestureX += commonDeltaX;
+                mPointerGesture.referenceGestureY += commonDeltaY;
+
                 clampPositionUsingPointerBounds(mPointerController,
                         &mPointerGesture.referenceGestureX,
                         &mPointerGesture.referenceGestureY);
diff --git a/services/input/InputReader.h b/services/input/InputReader.h
index a7c6629..1d4ad87 100644
--- a/services/input/InputReader.h
+++ b/services/input/InputReader.h
@@ -62,6 +62,12 @@
     // Devices with these names will be ignored.
     Vector<String8> excludedDeviceNames;
 
+    // Velocity control parameters for mouse pointer movements.
+    VelocityControlParameters pointerVelocityControlParameters;
+
+    // Velocity control parameters for mouse wheel movements.
+    VelocityControlParameters wheelVelocityControlParameters;
+
     // Quiet time between certain pointer gesture transitions.
     // Time to allow for all fingers or buttons to settle into a stable state before
     // starting a new gesture.
@@ -128,6 +134,8 @@
             filterTouchEvents(false),
             filterJumpyTouchEvents(false),
             virtualKeyQuietTime(0),
+            pointerVelocityControlParameters(1.0f, 80.0f, 400.0f, 4.0f),
+            wheelVelocityControlParameters(1.0f, 15.0f, 50.0f, 4.0f),
             pointerGestureQuietInterval(100 * 1000000LL), // 100 ms
             pointerGestureDragMinSwitchSpeed(50), // 50 pixels per second
             pointerGestureTapInterval(150 * 1000000LL), // 150 ms
@@ -137,7 +145,7 @@
             pointerGestureMultitouchMinSpeed(150.0f), // 150 pixels per second
             pointerGestureSwipeTransitionAngleCosine(0.5f), // cosine of 45degrees
             pointerGestureSwipeMaxWidthRatio(0.333f),
-            pointerGestureMovementSpeedRatio(0.8f),
+            pointerGestureMovementSpeedRatio(0.3f),
             pointerGestureZoomSpeedRatio(0.3f) { }
 };
 
@@ -226,6 +234,9 @@
     /* Determine whether physical keys exist for the given framework-domain key codes. */
     virtual bool hasKeys(int32_t deviceId, uint32_t sourceMask,
             size_t numCodes, const int32_t* keyCodes, uint8_t* outFlags) = 0;
+
+    /* Reopens and reconfigures all input devices. */
+    virtual void refreshConfiguration() = 0;
 };
 
 
@@ -290,6 +301,8 @@
     virtual bool hasKeys(int32_t deviceId, uint32_t sourceMask,
             size_t numCodes, const int32_t* keyCodes, uint8_t* outFlags);
 
+    virtual void refreshConfiguration();
+
 protected:
     // These methods are protected virtual so they can be overridden and instrumented
     // by test cases.
@@ -331,18 +344,17 @@
     void timeoutExpired(nsecs_t when);
 
     void handleConfigurationChanged(nsecs_t when);
-    void configureExcludedDevices();
 
     // state management for all devices
     Mutex mStateLock;
 
-    int32_t mGlobalMetaState;
+    int32_t mGlobalMetaState; // guarded by mStateLock
     virtual void updateGlobalMetaState();
     virtual int32_t getGlobalMetaState();
 
     virtual void fadePointer();
 
-    InputConfiguration mInputConfiguration;
+    InputConfiguration mInputConfiguration; // guarded by mStateLock
     void updateInputConfiguration();
 
     nsecs_t mDisableVirtualKeysTimeout; // only accessed by reader thread
@@ -350,9 +362,12 @@
     virtual bool shouldDropVirtualKey(nsecs_t now,
             InputDevice* device, int32_t keyCode, int32_t scanCode);
 
-    nsecs_t mNextTimeout; // only accessed by reader thread
+    nsecs_t mNextTimeout; // only accessed by reader thread, not guarded
     virtual void requestTimeoutAtTime(nsecs_t when);
 
+    volatile int32_t mRefreshConfiguration; // atomic
+    void configure(bool firstTime);
+
     // state queries
     typedef int32_t (InputDevice::*GetStateFunc)(uint32_t sourceMask, int32_t code);
     int32_t getState(int32_t deviceId, uint32_t sourceMask, int32_t code,
@@ -630,6 +645,12 @@
     float mVWheelScale;
     float mHWheelScale;
 
+    // Velocity controls for mouse pointer and wheel movements.
+    // The controls for X and Y wheel movements are separate to keep them decoupled.
+    VelocityControl mPointerVelocityControl;
+    VelocityControl mWheelXVelocityControl;
+    VelocityControl mWheelYVelocityControl;
+
     sp<PointerControllerInterface> mPointerController;
 
     struct LockedState {
@@ -1160,6 +1181,9 @@
         // A velocity tracker for determining whether to switch active pointers during drags.
         VelocityTracker velocityTracker;
 
+        // Velocity control for pointer movements.
+        VelocityControl pointerVelocityControl;
+
         void reset() {
             firstTouchTime = LLONG_MIN;
             activeTouchId = -1;
@@ -1174,6 +1198,7 @@
             velocityTracker.clear();
             resetTap();
             resetQuietTime();
+            pointerVelocityControl.reset();
         }
 
         void resetTap() {
diff --git a/services/input/tests/InputReader_test.cpp b/services/input/tests/InputReader_test.cpp
index 3a1beb6..00b4222 100644
--- a/services/input/tests/InputReader_test.cpp
+++ b/services/input/tests/InputReader_test.cpp
@@ -629,8 +629,8 @@
         return NAME_NOT_FOUND;
     }
 
-    virtual void addExcludedDevice(const char* deviceName) {
-        mExcludedDevices.add(String8(deviceName));
+    virtual void setExcludedDevices(const Vector<String8>& devices) {
+        mExcludedDevices = devices;
     }
 
     virtual size_t getEvents(int timeoutMillis, RawEvent* buffer, size_t bufferSize) {
@@ -728,6 +728,9 @@
 
     virtual void dump(String8& dump) {
     }
+
+    virtual void reopenDevices() {
+    }
 };
 
 
diff --git a/services/java/com/android/server/BackupManagerService.java b/services/java/com/android/server/BackupManagerService.java
index cd58b9b9..7c6d3c1 100644
--- a/services/java/com/android/server/BackupManagerService.java
+++ b/services/java/com/android/server/BackupManagerService.java
@@ -39,6 +39,7 @@
 import android.content.ServiceConnection;
 import android.content.pm.ApplicationInfo;
 import android.content.pm.IPackageDataObserver;
+import android.content.pm.IPackageDeleteObserver;
 import android.content.pm.IPackageInstallObserver;
 import android.content.pm.IPackageManager;
 import android.content.pm.PackageInfo;
@@ -1709,6 +1710,16 @@
                 }
             }
 
+            // Cull any packages that have indicated that backups are not permitted.
+            for (int i = 0; i < packagesToBackup.size(); ) {
+                PackageInfo info = packagesToBackup.get(i);
+                if ((info.applicationInfo.flags & ApplicationInfo.FLAG_ALLOW_BACKUP) == 0) {
+                    packagesToBackup.remove(i);
+                } else {
+                    i++;
+                }
+            }
+
             // Now back up the app data via the agent mechanism
             PackageInfo pkg = null;
             try {
@@ -1937,7 +1948,6 @@
             // Which packages we've already wiped data on.  We prepopulate this
             // with a whitelist of packages known to be unclearable.
             mClearedPackages.add("android");
-            mClearedPackages.add("com.android.backupconfirm");
             mClearedPackages.add("com.android.providers.settings");
         }
 
@@ -2314,6 +2324,7 @@
 
         class RestoreInstallObserver extends IPackageInstallObserver.Stub {
             final AtomicBoolean mDone = new AtomicBoolean();
+            String mPackageName;
             int mResult;
 
             public void reset() {
@@ -2341,12 +2352,45 @@
                     throws RemoteException {
                 synchronized (mDone) {
                     mResult = returnCode;
+                    mPackageName = packageName;
                     mDone.set(true);
                     mDone.notifyAll();
                 }
             }
         }
+
+        class RestoreDeleteObserver extends IPackageDeleteObserver.Stub {
+            final AtomicBoolean mDone = new AtomicBoolean();
+            int mResult;
+
+            public void reset() {
+                synchronized (mDone) {
+                    mDone.set(false);
+                }
+            }
+
+            public void waitForCompletion() {
+                synchronized (mDone) {
+                    while (mDone.get() == false) {
+                        try {
+                            mDone.wait();
+                        } catch (InterruptedException e) { }
+                    }
+                }
+            }
+
+            @Override
+            public void packageDeleted(String packageName, int returnCode) throws RemoteException {
+                synchronized (mDone) {
+                    mResult = returnCode;
+                    mDone.set(true);
+                    mDone.notifyAll();
+                }
+            }
+        }
+
         final RestoreInstallObserver mInstallObserver = new RestoreInstallObserver();
+        final RestoreDeleteObserver mDeleteObserver = new RestoreDeleteObserver();
 
         boolean installApk(FileMetadata info, String installerPackage, InputStream instream) {
             boolean okay = true;
@@ -2385,6 +2429,49 @@
                     if (mPackagePolicies.get(info.packageName) != RestorePolicy.ACCEPT) {
                         okay = false;
                     }
+                } else {
+                    // Okay, the install succeeded.  Make sure it was the right app.
+                    boolean uninstall = false;
+                    if (!mInstallObserver.mPackageName.equals(info.packageName)) {
+                        Slog.w(TAG, "Restore stream claimed to include apk for "
+                                + info.packageName + " but apk was really "
+                                + mInstallObserver.mPackageName);
+                        // delete the package we just put in place; it might be fraudulent
+                        okay = false;
+                        uninstall = true;
+                    } else {
+                        try {
+                            PackageInfo pkg = mPackageManager.getPackageInfo(info.packageName,
+                                    PackageManager.GET_SIGNATURES);
+                            if ((pkg.applicationInfo.flags & ApplicationInfo.FLAG_ALLOW_BACKUP) == 0) {
+                                Slog.w(TAG, "Restore stream contains apk of package "
+                                        + info.packageName + " but it disallows backup/restore");
+                                okay = false;
+                            } else {
+                                // So far so good -- do the signatures match the manifest?
+                                Signature[] sigs = mManifestSignatures.get(info.packageName);
+                                if (!signaturesMatch(sigs, pkg)) {
+                                    Slog.w(TAG, "Installed app " + info.packageName
+                                            + " signatures do not match restore manifest");
+                                    okay = false;
+                                    uninstall = true;
+                                }
+                            }
+                        } catch (NameNotFoundException e) {
+                            Slog.w(TAG, "Install of package " + info.packageName
+                                    + " succeeded but now not found");
+                            okay = false;
+                        }
+                    }
+
+                    // If we're not okay at this point, we need to delete the package
+                    // that we just installed.
+                    if (uninstall) {
+                        mDeleteObserver.reset();
+                        mPackageManager.deletePackage(mInstallObserver.mPackageName,
+                                mDeleteObserver, 0);
+                        mDeleteObserver.waitForCompletion();
+                    }
                 }
             } catch (IOException e) {
                 Slog.e(TAG, "Unable to transcribe restored apk for install");
@@ -2441,38 +2528,48 @@
                         boolean hasApk = str[0].equals("1");
                         offset = extractLine(buffer, offset, str);
                         int numSigs = Integer.parseInt(str[0]);
-                        Signature[] sigs = null;
                         if (numSigs > 0) {
-                            sigs = new Signature[numSigs];
+                            Signature[] sigs = new Signature[numSigs];
                             for (int i = 0; i < numSigs; i++) {
                                 offset = extractLine(buffer, offset, str);
                                 sigs[i] = new Signature(str[0]);
                             }
+                            mManifestSignatures.put(info.packageName, sigs);
 
                             // Okay, got the manifest info we need...
                             try {
-                                // Verify signatures against any installed version; if they
-                                // don't match, then we fall though and ignore the data.  The
-                                // signatureMatch() method explicitly ignores the signature
-                                // check for packages installed on the system partition, because
-                                // such packages are signed with the platform cert instead of
-                                // the app developer's cert, so they're different on every
-                                // device.
                                 PackageInfo pkgInfo = mPackageManager.getPackageInfo(
                                         info.packageName, PackageManager.GET_SIGNATURES);
-                                if (signaturesMatch(sigs, pkgInfo)) {
-                                    if (pkgInfo.versionCode >= version) {
-                                        Slog.i(TAG, "Sig + version match; taking data");
-                                        policy = RestorePolicy.ACCEPT;
+                                // Fall through to IGNORE if the app explicitly disallows backup
+                                final int flags = pkgInfo.applicationInfo.flags;
+                                if ((flags & ApplicationInfo.FLAG_ALLOW_BACKUP) != 0) {
+                                    // Verify signatures against any installed version; if they
+                                    // don't match, then we fall though and ignore the data.  The
+                                    // signatureMatch() method explicitly ignores the signature
+                                    // check for packages installed on the system partition, because
+                                    // such packages are signed with the platform cert instead of
+                                    // the app developer's cert, so they're different on every
+                                    // device.
+                                    if (signaturesMatch(sigs, pkgInfo)) {
+                                        if (pkgInfo.versionCode >= version) {
+                                            Slog.i(TAG, "Sig + version match; taking data");
+                                            policy = RestorePolicy.ACCEPT;
+                                        } else {
+                                            // The data is from a newer version of the app than
+                                            // is presently installed.  That means we can only
+                                            // use it if the matching apk is also supplied.
+                                            Slog.d(TAG, "Data version " + version
+                                                    + " is newer than installed version "
+                                                    + pkgInfo.versionCode + " - requiring apk");
+                                            policy = RestorePolicy.ACCEPT_IF_APK;
+                                        }
                                     } else {
-                                        // The data is from a newer version of the app than
-                                        // is presently installed.  That means we can only
-                                        // use it if the matching apk is also supplied.
-                                        Slog.d(TAG, "Data version " + version
-                                                + " is newer than installed version "
-                                                + pkgInfo.versionCode + " - requiring apk");
-                                        policy = RestorePolicy.ACCEPT_IF_APK;
+                                        Slog.w(TAG, "Restore manifest signatures do not match "
+                                                + "installed application for " + info.packageName);
                                     }
+                                } else {
+                                    if (DEBUG) Slog.i(TAG, "Restore manifest from "
+                                            + info.packageName + " but allowBackup=false");
                                 }
                             } catch (NameNotFoundException e) {
                                 // Okay, the target app isn't installed.  We can process
diff --git a/services/java/com/android/server/accessibility/AccessibilityManagerService.java b/services/java/com/android/server/accessibility/AccessibilityManagerService.java
index 7801aec..fba293c 100644
--- a/services/java/com/android/server/accessibility/AccessibilityManagerService.java
+++ b/services/java/com/android/server/accessibility/AccessibilityManagerService.java
@@ -259,7 +259,6 @@
                             unbindAllServicesLocked();
                         }
                         updateClientsLocked();
-                        updateInputFilterLocked();
                     }
                 }
             });
@@ -319,6 +318,7 @@
 
     public List<AccessibilityServiceInfo> getEnabledAccessibilityServiceList(int feedbackType) {
         List<AccessibilityServiceInfo> result = mEnabledServicesForFeedbackTempList;
+        result.clear();
         List<Service> services = mServices;
         synchronized (mLock) {
             while (feedbackType != 0) {
@@ -327,7 +327,7 @@
                 final int serviceCount = services.size();
                 for (int i = 0; i < serviceCount; i++) {
                     Service service = services.get(i);
-                    if (service.mFeedbackType == feedbackType) {
+                    if ((service.mFeedbackType & feedbackTypeBit) != 0) {
                         result.add(service.mAccessibilityServiceInfo);
                     }
                 }
@@ -368,10 +368,7 @@
                         service.setAccessibilityServiceInfo(oldInfo);
                     } else {
                         service.setAccessibilityServiceInfo(info);
-                        tryAddServiceLocked(service);
                     }
-
-                    updateInputFilterLocked();
                 }
                 return;
             default:
@@ -772,6 +769,10 @@
             }
             mNotificationTimeout = info.notificationTimeout;
             mIsDefault = (info.flags & AccessibilityServiceInfo.DEFAULT) != 0;
+
+            synchronized (mLock) {
+                tryAddServiceLocked(this);
+            }
         }
 
         /**
@@ -794,7 +795,9 @@
          */
         public boolean unbind() {
             if (mService != null) {
-                tryRemoveServiceLocked(this);
+                synchronized (mLock) {
+                    tryRemoveServiceLocked(this);
+                }
                 mContext.unbindService(this);
                 mService = null;
                 return true;
@@ -809,7 +812,7 @@
          * @return True if the service is configured, false otherwise.
          */
         public boolean isConfigured() {
-            return (mEventTypes != 0 && mFeedbackType != 0);
+            return (mEventTypes != 0 && mFeedbackType != 0 && mService != null);
         }
 
         public void setServiceInfo(AccessibilityServiceInfo info) {
diff --git a/services/java/com/android/server/wm/InputManager.java b/services/java/com/android/server/wm/InputManager.java
index d13cc63..3095c37 100644
--- a/services/java/com/android/server/wm/InputManager.java
+++ b/services/java/com/android/server/wm/InputManager.java
@@ -23,10 +23,14 @@
 import android.content.Context;
 import android.content.pm.PackageManager;
 import android.content.res.Configuration;
+import android.database.ContentObserver;
 import android.os.Environment;
+import android.os.Handler;
 import android.os.Looper;
 import android.os.MessageQueue;
 import android.os.SystemProperties;
+import android.provider.Settings;
+import android.provider.Settings.SettingNotFoundException;
 import android.util.Slog;
 import android.util.Xml;
 import android.view.InputChannel;
@@ -57,7 +61,7 @@
     private final Callbacks mCallbacks;
     private final Context mContext;
     private final WindowManagerService mWindowManagerService;
-    
+
     private static native void nativeInit(Context context,
             Callbacks callbacks, MessageQueue messageQueue);
     private static native void nativeStart();
@@ -88,6 +92,7 @@
     private static native int[] nativeGetInputDeviceIds();
     private static native boolean nativeTransferTouchFocus(InputChannel fromChannel,
             InputChannel toChannel);
+    private static native void nativeSetPointerSpeed(int speed);
     private static native String nativeDump();
     
     // Input event injection constants defined in InputDispatcher.h.
@@ -131,10 +136,13 @@
         Slog.i(TAG, "Initializing input manager");
         nativeInit(mContext, mCallbacks, looper.getQueue());
     }
-    
+
     public void start() {
         Slog.i(TAG, "Starting input manager");
         nativeStart();
+
+        registerPointerSpeedSettingObserver();
+        updatePointerSpeedFromSettings();
     }
     
     public void setDisplaySize(int displayId, int width, int height) {
@@ -403,6 +411,42 @@
         return nativeTransferTouchFocus(fromChannel, toChannel);
     }
 
+    /**
+     * Set the pointer speed.
+     * @param speed The pointer speed as a value between -7 (slowest) and 7 (fastest)
+     * where 0 is the default speed.
+     */
+    public void setPointerSpeed(int speed) {
+        speed = Math.min(Math.max(speed, -7), 7);
+        nativeSetPointerSpeed(speed);
+    }
+
+    public void updatePointerSpeedFromSettings() {
+        int speed = getPointerSpeedSetting(0);
+        setPointerSpeed(speed);
+    }
+
+    private void registerPointerSpeedSettingObserver() {
+        mContext.getContentResolver().registerContentObserver(
+                Settings.System.getUriFor(Settings.System.POINTER_SPEED), true,
+                new ContentObserver(mWindowManagerService.mH) {
+                    @Override
+                    public void onChange(boolean selfChange) {
+                        updatePointerSpeedFromSettings();
+                    }
+                });
+    }
+
+    private int getPointerSpeedSetting(int defaultValue) {
+        int speed = defaultValue;
+        try {
+            speed = Settings.System.getInt(mContext.getContentResolver(),
+                    Settings.System.POINTER_SPEED);
+        } catch (SettingNotFoundException snfe) {
+        }
+        return speed;
+    }
+
     public void dump(PrintWriter pw) {
         String dumpStr = nativeDump();
         if (dumpStr != null) {
diff --git a/services/java/com/android/server/wm/WindowManagerService.java b/services/java/com/android/server/wm/WindowManagerService.java
index 080ba35..d95d4c5 100644
--- a/services/java/com/android/server/wm/WindowManagerService.java
+++ b/services/java/com/android/server/wm/WindowManagerService.java
@@ -5951,6 +5951,19 @@
         }
     }
 
+    /**
+     * Temporarily set the pointer speed.  Does not save the new setting.
+     * Used by the settings application.
+     */
+    public void setPointerSpeed(int speed) {
+        if (!checkCallingPermission(android.Manifest.permission.SET_POINTER_SPEED,
+                "setPointerSpeed()")) {
+            throw new SecurityException("Requires SET_POINTER_SPEED permission");
+        }
+
+        mInputManager.setPointerSpeed(speed);
+    }
+
     private WindowState getFocusedWindow() {
         synchronized (mWindowMap) {
             return getFocusedWindowLocked();
diff --git a/services/jni/com_android_server_InputManager.cpp b/services/jni/com_android_server_InputManager.cpp
index 0ee3217..881882f1 100644
--- a/services/jni/com_android_server_InputManager.cpp
+++ b/services/jni/com_android_server_InputManager.cpp
@@ -53,6 +53,11 @@
 
 namespace android {
 
+// The exponent used to calculate the pointer speed scaling factor.
+// The scaling factor is calculated as 2 ^ (speed * exponent),
+// where the speed ranges from -7 to + 7 and is supplied by the user.
+static const float POINTER_SPEED_EXPONENT = 1.0f / 3;
+
 static struct {
     jmethodID notifyConfigurationChanged;
     jmethodID notifyLidSwitchChanged;
@@ -176,6 +181,7 @@
     void setFocusedApplication(JNIEnv* env, jobject applicationObj);
     void setInputDispatchMode(bool enabled, bool frozen);
     void setSystemUiVisibility(int32_t visibility);
+    void setPointerSpeed(int32_t speed);
 
     /* --- InputReaderPolicyInterface implementation --- */
 
@@ -225,6 +231,9 @@
         // System UI visibility.
         int32_t systemUiVisibility;
 
+        // Pointer speed.
+        int32_t pointerSpeed;
+
         // Sprite controller singleton, created on first use.
         sp<SpriteController> spriteController;
 
@@ -264,6 +273,7 @@
         mLocked.displayOrientation = ROTATION_0;
 
         mLocked.systemUiVisibility = ASYSTEM_UI_VISIBILITY_STATUS_BAR_VISIBLE;
+        mLocked.pointerSpeed = 0;
     }
 
     sp<EventHub> eventHub = new EventHub();
@@ -427,6 +437,13 @@
     if (!checkAndClearExceptionFromCallback(env, "getTouchSlop")) {
         outConfig->pointerGestureTapSlop = touchSlop;
     }
+
+    { // acquire lock
+        AutoMutex _l(mLock);
+
+        outConfig->pointerVelocityControlParameters.scale = exp2f(mLocked.pointerSpeed
+                * POINTER_SPEED_EXPONENT);
+    } // release lock
 }
 
 sp<PointerControllerInterface> NativeInputManager::obtainPointerController(int32_t deviceId) {
@@ -632,6 +649,17 @@
             : PointerController::INACTIVITY_TIMEOUT_NORMAL);
 }
 
+void NativeInputManager::setPointerSpeed(int32_t speed) {
+    AutoMutex _l(mLock);
+
+    if (mLocked.pointerSpeed != speed) {
+        LOGI("Setting pointer speed to %d.", speed);
+        mLocked.pointerSpeed = speed;
+
+        mInputManager->getReader()->refreshConfiguration();
+    }
+}
+
 bool NativeInputManager::isScreenOn() {
     return android_server_PowerManagerService_isScreenOn();
 }
@@ -1221,6 +1249,15 @@
             transferTouchFocus(fromChannel, toChannel);
 }
 
+static void android_server_InputManager_nativeSetPointerSpeed(JNIEnv* env,
+        jclass clazz, jint speed) {
+    if (checkInputManagerUnitialized(env)) {
+        return;
+    }
+
+    gNativeInputManager->setPointerSpeed(speed);
+}
+
 static jstring android_server_InputManager_nativeDump(JNIEnv* env, jclass clazz) {
     if (checkInputManagerUnitialized(env)) {
         return NULL;
@@ -1277,6 +1314,8 @@
             (void*) android_server_InputManager_nativeGetInputConfiguration },
     { "nativeTransferTouchFocus", "(Landroid/view/InputChannel;Landroid/view/InputChannel;)Z",
             (void*) android_server_InputManager_nativeTransferTouchFocus },
+    { "nativeSetPointerSpeed", "(I)V",
+            (void*) android_server_InputManager_nativeSetPointerSpeed },
     { "nativeDump", "()Ljava/lang/String;",
             (void*) android_server_InputManager_nativeDump },
 };
diff --git a/telephony/java/android/telephony/ServiceState.java b/telephony/java/android/telephony/ServiceState.java
index ba05837..fce7cdc 100644
--- a/telephony/java/android/telephony/ServiceState.java
+++ b/telephony/java/android/telephony/ServiceState.java
@@ -387,62 +387,77 @@
                 && mIsEmergencyOnly == s.mIsEmergencyOnly);
     }
 
+    /**
+     * Convert radio technology to String
+     *
+     * @param radioTechnology
+     * @return String representation of the RAT
+     *
+     * @hide
+     */
+    public static String radioTechnologyToString(int rt) {
+        String rtString;
+
+        switch(rt) {
+            case 0:
+                rtString = "Unknown";
+                break;
+            case 1:
+                rtString = "GPRS";
+                break;
+            case 2:
+                rtString = "EDGE";
+                break;
+            case 3:
+                rtString = "UMTS";
+                break;
+            case 4:
+                rtString = "CDMA-IS95A";
+                break;
+            case 5:
+                rtString = "CDMA-IS95B";
+                break;
+            case 6:
+                rtString = "1xRTT";
+                break;
+            case 7:
+                rtString = "EvDo-rev.0";
+                break;
+            case 8:
+                rtString = "EvDo-rev.A";
+                break;
+            case 9:
+                rtString = "HSDPA";
+                break;
+            case 10:
+                rtString = "HSUPA";
+                break;
+            case 11:
+                rtString = "HSPA";
+                break;
+            case 12:
+                rtString = "EvDo-rev.B";
+                break;
+            case 13:
+                rtString = "eHRPD";
+                break;
+            case 14:
+                rtString = "LTE";
+                break;
+            case 15:
+                rtString = "HSPAP";
+                break;
+            default:
+                rtString = "Unexpected";
+                Log.w(LOG_TAG, "Unexpected radioTechnology=" + rt);
+                break;
+        }
+        return rtString + ":" + rt;
+    }
+
     @Override
     public String toString() {
-        String radioTechnology = new String("Error in radioTechnology");
-        switch(this.mRadioTechnology) {
-        case 0:
-            radioTechnology = "Unknown";
-            break;
-        case 1:
-            radioTechnology = "GPRS";
-            break;
-        case 2:
-            radioTechnology = "EDGE";
-            break;
-        case 3:
-            radioTechnology = "UMTS";
-            break;
-        case 4:
-            radioTechnology = "IS95A";
-            break;
-        case 5:
-            radioTechnology = "IS95B";
-            break;
-        case 6:
-            radioTechnology = "1xRTT";
-            break;
-        case 7:
-            radioTechnology = "EvDo rev. 0";
-            break;
-        case 8:
-            radioTechnology = "EvDo rev. A";
-            break;
-        case 9:
-            radioTechnology = "HSDPA";
-            break;
-        case 10:
-            radioTechnology = "HSUPA";
-            break;
-        case 11:
-            radioTechnology = "HSPA";
-            break;
-        case 12:
-            radioTechnology = "EvDo rev. B";
-            break;
-        case 13:
-            radioTechnology = "eHRPD";
-            break;
-        case 14:
-            radioTechnology = "LTE";
-            break;
-        case 15:
-            radioTechnology = "HSPAP";
-            break;
-        default:
-            Log.w(LOG_TAG, "mRadioTechnology variable out of range.");
-        break;
-        }
+        String radioTechnology = radioTechnologyToString(mRadioTechnology);
 
         return (mState + " " + (mRoaming ? "roaming" : "home")
                 + " " + mOperatorAlphaLong
@@ -551,7 +566,7 @@
      *
      * @hide
      */
-    public void setCdmaEriText(String longName) {
+    public void setOperatorAlphaLong(String longName) {
         mOperatorAlphaLong = longName;
     }
 
diff --git a/telephony/java/com/android/internal/telephony/DataConnectionTracker.java b/telephony/java/com/android/internal/telephony/DataConnectionTracker.java
index 5d6aaa6..2c26f62 100644
--- a/telephony/java/com/android/internal/telephony/DataConnectionTracker.java
+++ b/telephony/java/com/android/internal/telephony/DataConnectionTracker.java
@@ -509,7 +509,6 @@
     protected abstract void onVoiceCallEnded();
     protected abstract void onCleanUpConnection(boolean tearDown, int apnId, String reason);
     protected abstract void onCleanUpAllConnections(String cause);
-    protected abstract boolean isDataPossible();
     protected abstract boolean isDataPossible(String apnType);
 
     @Override
@@ -752,7 +751,7 @@
     protected void notifyDataAvailability(String reason) {
         // note that we either just turned all off because we lost availability
         // or all were off and could now go on, so only have off apns to worry about
-        notifyOffApnsOfAvailability(reason, isDataPossible());
+        notifyOffApnsOfAvailability(reason, isDataPossible(Phone.APN_TYPE_DEFAULT));
     }
 
     public boolean isApnTypeEnabled(String apnType) {
@@ -968,11 +967,7 @@
         sendMessage(msg);
     }
 
-    public boolean isAnyActiveDataConnections() {
-        // TODO: Remember if there are any connected or
-        // loop asking each DC/APN?
-        return true;
-    }
+    public abstract boolean isAnyActiveDataConnections();
 
     protected void onSetDataEnabled(boolean enable) {
         boolean prevEnabled = getAnyDataEnabled();
diff --git a/telephony/java/com/android/internal/telephony/ITelephony.aidl b/telephony/java/com/android/internal/telephony/ITelephony.aidl
index da233cc..19441cd 100644
--- a/telephony/java/com/android/internal/telephony/ITelephony.aidl
+++ b/telephony/java/com/android/internal/telephony/ITelephony.aidl
@@ -145,6 +145,15 @@
     boolean supplyPin(String pin);
 
     /**
+     * Supply puk to unlock the SIM and set SIM pin to new pin.
+     *  Blocks until a result is determined.
+     * @param puk The puk to check.
+     *        pin The new pin to be set in SIM
+     * @return whether the operation was a success.
+     */
+    boolean supplyPuk(String puk, String pin);
+
+    /**
      * Handles PIN MMI commands (PIN/PIN2/PUK/PUK2), which are initiated
      * without SEND (so <code>dial</code> is not appropriate).
      *
@@ -254,7 +263,7 @@
       * Returns the network type
       */
     int getNetworkType();
-    
+
     /**
      * Return true if an ICC card is present
      */
diff --git a/telephony/java/com/android/internal/telephony/IccConstants.java b/telephony/java/com/android/internal/telephony/IccConstants.java
index b40f945..cafc79b 100644
--- a/telephony/java/com/android/internal/telephony/IccConstants.java
+++ b/telephony/java/com/android/internal/telephony/IccConstants.java
@@ -58,6 +58,12 @@
     static final int EF_CST = 0x6f32;
     static final int EF_RUIM_SPN =0x6F41;
 
+    // ETSI TS.102.221
+    static final int EF_PL = 0x2F05;
+    // 3GPP2 C.S0065
+    static final int EF_CSIM_LI = 0x6F3A;
+    static final int EF_CSIM_SPN =0x6F41;
+
     //ISIM access
     static final int EF_IMPU = 0x6f04;
     static final int EF_IMPI = 0x6f02;
diff --git a/telephony/java/com/android/internal/telephony/IccFileHandler.java b/telephony/java/com/android/internal/telephony/IccFileHandler.java
index 92ddd2c..93b9b79 100644
--- a/telephony/java/com/android/internal/telephony/IccFileHandler.java
+++ b/telephony/java/com/android/internal/telephony/IccFileHandler.java
@@ -529,6 +529,7 @@
             return MF_SIM + DF_TELECOM;
 
         case EF_ICCID:
+        case EF_PL:
             return MF_SIM;
         case EF_IMG:
             return MF_SIM + DF_TELECOM + DF_GRAPHICS;
diff --git a/telephony/java/com/android/internal/telephony/PhoneBase.java b/telephony/java/com/android/internal/telephony/PhoneBase.java
index b77e134..f70d680 100644
--- a/telephony/java/com/android/internal/telephony/PhoneBase.java
+++ b/telephony/java/com/android/internal/telephony/PhoneBase.java
@@ -1023,7 +1023,7 @@
     }
 
     public boolean isDataConnectivityPossible() {
-        return ((mDataConnectionTracker != null) && (mDataConnectionTracker.isDataPossible()));
+        return isDataConnectivityPossible(Phone.APN_TYPE_DEFAULT);
     }
 
     public boolean isDataConnectivityPossible(String apnType) {
diff --git a/telephony/java/com/android/internal/telephony/PhoneProxy.java b/telephony/java/com/android/internal/telephony/PhoneProxy.java
index 68f1c5f..c2212db 100644
--- a/telephony/java/com/android/internal/telephony/PhoneProxy.java
+++ b/telephony/java/com/android/internal/telephony/PhoneProxy.java
@@ -654,7 +654,7 @@
     }
 
     public boolean isDataConnectivityPossible() {
-        return mActivePhone.isDataConnectivityPossible();
+        return mActivePhone.isDataConnectivityPossible(Phone.APN_TYPE_DEFAULT);
     }
 
     public boolean isDataConnectivityPossible(String apnType) {
diff --git a/telephony/java/com/android/internal/telephony/ServiceStateTracker.java b/telephony/java/com/android/internal/telephony/ServiceStateTracker.java
index 695805c..01b807d 100644
--- a/telephony/java/com/android/internal/telephony/ServiceStateTracker.java
+++ b/telephony/java/com/android/internal/telephony/ServiceStateTracker.java
@@ -54,6 +54,12 @@
     protected boolean mDesiredPowerState;
 
     /**
+     *  Values correspond to ServiceState.RADIO_TECHNOLOGY_ definitions.
+     */
+    protected int mRadioTechnology = 0;
+    protected int mNewRadioTechnology = 0;
+
+    /**
      * By default, strength polling is enabled.  However, if we're
      * getting unsolicited signal strength updates from the radio, set
      * value to true and don't bother polling any more.
diff --git a/telephony/java/com/android/internal/telephony/cdma/CDMALTEPhone.java b/telephony/java/com/android/internal/telephony/cdma/CDMALTEPhone.java
index 5733164c..fe2fcb2 100644
--- a/telephony/java/com/android/internal/telephony/cdma/CDMALTEPhone.java
+++ b/telephony/java/com/android/internal/telephony/cdma/CDMALTEPhone.java
@@ -58,8 +58,9 @@
     @Override
     protected void initSstIcc() {
         mSST = new CdmaLteServiceStateTracker(this);
-        mIccRecords = new SIMRecords(this);
+        mIccRecords = new CdmaLteUiccRecords(this);
         mIccCard = new SimCard(this, LOG_TAG, DBG);
+        mIccFileHandler = new CdmaLteUiccFileHandler(this);
     }
 
     @Override
diff --git a/telephony/java/com/android/internal/telephony/cdma/CDMAPhone.java b/telephony/java/com/android/internal/telephony/cdma/CDMAPhone.java
index c85f7d8..a283062 100755
--- a/telephony/java/com/android/internal/telephony/cdma/CDMAPhone.java
+++ b/telephony/java/com/android/internal/telephony/cdma/CDMAPhone.java
@@ -156,13 +156,13 @@
         mSST = new CdmaServiceStateTracker(this);
         mIccRecords = new RuimRecords(this);
         mIccCard = new RuimCard(this, LOG_TAG, DBG);
+        mIccFileHandler = new RuimFileHandler(this);
     }
 
     protected void init(Context context, PhoneNotifier notifier) {
         mCM.setPhoneType(Phone.PHONE_TYPE_CDMA);
         mCT = new CdmaCallTracker(this);
         mSMS = new CdmaSMSDispatcher(this);
-        mIccFileHandler = new RuimFileHandler(this);
         mDataConnectionTracker = new CdmaDataConnectionTracker (this);
         mRuimPhoneBookInterfaceManager = new RuimPhoneBookInterfaceManager(this);
         mRuimSmsInterfaceManager = new RuimSmsInterfaceManager(this, mSMS);
diff --git a/telephony/java/com/android/internal/telephony/cdma/CdmaDataConnectionTracker.java b/telephony/java/com/android/internal/telephony/cdma/CdmaDataConnectionTracker.java
index 8ce221a..800615c 100644
--- a/telephony/java/com/android/internal/telephony/cdma/CdmaDataConnectionTracker.java
+++ b/telephony/java/com/android/internal/telephony/cdma/CdmaDataConnectionTracker.java
@@ -214,35 +214,15 @@
         return allowed;
     }
 
-    /**
-     * The only circumstances under which we report that data connectivity is not
-     * possible are
-     * <ul>
-     * <li>Data is disallowed (roaming, power state, voice call, etc).</li>
-     * <li>The current data state is {@code DISCONNECTED} for a reason other than
-     * having explicitly disabled connectivity. In other words, data is not available
-     * because the phone is out of coverage or some like reason.</li>
-     * </ul>
-     * @return {@code true} if data connectivity is possible, {@code false} otherwise.
-     */
     @Override
-    protected boolean isDataPossible() {
-        boolean dataAllowed = isDataAllowed();
-        boolean anyDataEnabled = getAnyDataEnabled();
-        boolean possible = (dataAllowed
-                && !(anyDataEnabled && (mState == State.FAILED || mState == State.IDLE)));
-        if (!possible && DBG) {
-            log("isDataPossible() " + possible + ", dataAllowed=" + dataAllowed +
-                    " anyDataEnabled=" + anyDataEnabled + " dataState=" + mState);
+    protected boolean isDataPossible(String apnType) {
+        boolean possible = isDataAllowed() && !(getAnyDataEnabled() &&
+                (mState == State.FAILED || mState == State.IDLE));
+        if (!possible && DBG && isDataAllowed()) {
+            log("Data not possible.  No coverage: dataState = " + mState);
         }
         return possible;
     }
- 
-    @Override
-    protected boolean isDataPossible(String apnType) {
-        return isDataPossible();
-    }
-
 
     private boolean trySetupData(String reason) {
         if (DBG) log("***trySetupData due to " + (reason == null ? "(unspecified)" : reason));
diff --git a/telephony/java/com/android/internal/telephony/cdma/CdmaLteServiceStateTracker.java b/telephony/java/com/android/internal/telephony/cdma/CdmaLteServiceStateTracker.java
index e593bd0..7bc7ca2 100644
--- a/telephony/java/com/android/internal/telephony/cdma/CdmaLteServiceStateTracker.java
+++ b/telephony/java/com/android/internal/telephony/cdma/CdmaLteServiceStateTracker.java
@@ -32,6 +32,7 @@
 import android.os.RegistrantList;
 import android.os.AsyncResult;
 import android.os.Message;
+import android.os.SystemProperties;
 
 import android.util.Log;
 import android.util.EventLog;
@@ -72,6 +73,7 @@
             handlePollStateResult(msg.what, ar);
             break;
         case EVENT_SIM_READY:
+            if (DBG) log("handleMessage EVENT_SIM_READY");
             isSubscriptionFromRuim = false;
             cm.getCDMASubscription( obtainMessage(EVENT_POLL_STATE_CDMA_SUBSCRIPTION));
             pollState();
@@ -186,39 +188,6 @@
         }
     }
 
-    protected static String networkTypeToString(int type) {
-        String ret = "unknown";
-
-        switch (type) {
-            case ServiceState.RADIO_TECHNOLOGY_IS95A:
-            case ServiceState.RADIO_TECHNOLOGY_IS95B:
-                ret = "CDMA";
-                break;
-            case ServiceState.RADIO_TECHNOLOGY_1xRTT:
-                ret = "CDMA - 1xRTT";
-                break;
-            case ServiceState.RADIO_TECHNOLOGY_EVDO_0:
-                ret = "CDMA - EvDo rev. 0";
-                break;
-            case ServiceState.RADIO_TECHNOLOGY_EVDO_A:
-                ret = "CDMA - EvDo rev. A";
-                break;
-            case ServiceState.RADIO_TECHNOLOGY_EVDO_B:
-                ret = "CDMA - EvDo rev. B";
-                break;
-            case ServiceState.RADIO_TECHNOLOGY_LTE:
-                ret = "LTE";
-                break;
-            case ServiceState.RADIO_TECHNOLOGY_EHRPD:
-                ret = "CDMA - eHRPD";
-                break;
-            default:
-                sloge("networkTypeToString: Wrong network, can not return a string.");
-                break;
-        }
-        return ret;
-    }
-
     @Override
     protected void pollStateDone() {
         // determine data NetworkType from both LET and CDMA SS
@@ -282,13 +251,20 @@
              (newNetworkType <= ServiceState.RADIO_TECHNOLOGY_EVDO_A));
 
         if (DBG) {
-            log("pollStateDone: hasRegistered = "
-                + hasRegistered + " hasCdmaDataConnectionAttached = "
-                + hasCdmaDataConnectionAttached + " hasCdmaDataConnectionChanged = "
-                + hasCdmaDataConnectionChanged + " hasNetworkTypeChanged = "
-                + hasNetworkTypeChanged + " has4gHandoff = " + has4gHandoff
-                + " hasMultiApnSupport = " + hasMultiApnSupport + " hasLostMultiApnSupport = "
-                + hasLostMultiApnSupport);
+            log("pollStateDone:"
+                + " hasRegistered=" + hasRegistered
+                + " hasDeegistered=" + hasDeregistered
+                + " hasCdmaDataConnectionAttached=" + hasCdmaDataConnectionAttached
+                + " hasCdmaDataConnectionDetached=" + hasCdmaDataConnectionDetached
+                + " hasCdmaDataConnectionChanged=" + hasCdmaDataConnectionChanged
+                + " hasNetworkTypeChanged = " + hasNetworkTypeChanged
+                + " hasChanged=" + hasChanged
+                + " hasRoamingOn=" + hasRoamingOn
+                + " hasRoamingOff=" + hasRoamingOff
+                + " hasLocationChanged=" + hasLocationChanged
+                + " has4gHandoff = " + has4gHandoff
+                + " hasMultiApnSupport=" + hasMultiApnSupport
+                + " hasLostMultiApnSupport=" + hasLostMultiApnSupport);
         }
         // Add an event log when connection state changes
         if (ss.getState() != newSS.getState()
@@ -316,7 +292,7 @@
                 && (phone.mDataConnectionTracker instanceof GsmDataConnectionTracker)) {
             if (DBG)log("GsmDataConnectionTracker disposed");
             phone.mDataConnectionTracker.dispose();
-            phone.mDataConnectionTracker = new CdmaDataConnectionTracker((CDMAPhone)phone);
+            phone.mDataConnectionTracker = new CdmaDataConnectionTracker(phone);
         }
 
         CdmaCellLocation tcl = cellLoc;
@@ -330,7 +306,7 @@
 
         if (hasNetworkTypeChanged) {
             phone.setSystemProperty(TelephonyProperties.PROPERTY_DATA_NETWORK_TYPE,
-                    networkTypeToString(networkType));
+                    ServiceState.radioTechnologyToString(networkType));
         }
 
         if (hasRegistered) {
@@ -351,7 +327,14 @@
                     eriText = phone.getContext()
                             .getText(com.android.internal.R.string.roamingTextSearching).toString();
                 }
-                ss.setCdmaEriText(eriText);
+                ss.setOperatorAlphaLong(eriText);
+            }
+            if (cm.getSimState().isSIMReady()) {
+                // SIM is found on the device. Read the operator name from the card.
+                ss.setOperatorAlphaLong(phone.mIccRecords.getServiceProviderName());
+
+                // If SIM card is present, Eri will not be used. Turn it off
+                ss.setCdmaEriIconIndex(EriInfo.ROAMING_INDICATOR_OFF);
             }
 
             String operatorNumeric;
diff --git a/telephony/java/com/android/internal/telephony/cdma/CdmaLteUiccFileHandler.java b/telephony/java/com/android/internal/telephony/cdma/CdmaLteUiccFileHandler.java
new file mode 100644
index 0000000..2aede29
--- /dev/null
+++ b/telephony/java/com/android/internal/telephony/cdma/CdmaLteUiccFileHandler.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.internal.telephony.cdma;
+
+import android.util.Log;
+import com.android.internal.telephony.IccConstants;
+import com.android.internal.telephony.IccFileHandler;
+
+/**
+ * {@hide}
+ */
+public final class CdmaLteUiccFileHandler extends IccFileHandler {
+    static final String LOG_TAG = "CDMA";
+
+    CdmaLteUiccFileHandler(CDMALTEPhone phone) {
+        super(phone);
+    }
+
+    protected String getEFPath(int efid) {
+        switch(efid) {
+        case EF_CSIM_SPN:
+        case EF_CSIM_LI:
+            return MF_SIM + DF_CDMA;
+        case EF_AD:
+            return MF_SIM + DF_GSM;
+        }
+        return getCommonIccEFPath(efid);
+    }
+
+    protected void logd(String msg) {
+        Log.d(LOG_TAG, "[CdmaLteUiccFileHandler] " + msg);
+    }
+
+    protected void loge(String msg) {
+        Log.e(LOG_TAG, "[CdmaLteUiccFileHandler] " + msg);
+    }
+
+}
diff --git a/telephony/java/com/android/internal/telephony/cdma/CdmaLteUiccRecords.java b/telephony/java/com/android/internal/telephony/cdma/CdmaLteUiccRecords.java
new file mode 100755
index 0000000..78879d6
--- /dev/null
+++ b/telephony/java/com/android/internal/telephony/cdma/CdmaLteUiccRecords.java
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.android.internal.telephony.cdma;
+
+import static com.android.internal.telephony.TelephonyProperties.PROPERTY_ICC_OPERATOR_ALPHA;
+import com.android.internal.telephony.GsmAlphabet;
+import com.android.internal.telephony.IccFileHandler;
+import com.android.internal.telephony.IccUtils;
+import com.android.internal.telephony.PhoneBase;
+import com.android.internal.telephony.cdma.sms.UserData;
+import com.android.internal.telephony.gsm.SIMRecords;
+import android.os.AsyncResult;
+import android.os.Message;
+import android.os.SystemProperties;
+import android.util.Log;
+
+
+/**
+ * {@hide}
+ */
+public final class CdmaLteUiccRecords extends SIMRecords {
+    // From CSIM application
+    private byte[] mEFpl = null;
+    private byte[] mEFli = null;
+    boolean csimSpnDisplayCondition = false;
+
+    private static final int EVENT_GET_PL_DONE = CSIM_EVENT_BASE;
+    private static final int EVENT_GET_CSIM_LI_DONE = CSIM_EVENT_BASE + 1;
+    private static final int EVENT_GET_CSIM_SPN_DONE = CSIM_EVENT_BASE + 2;
+
+    public CdmaLteUiccRecords(PhoneBase p) {
+        super(p);
+    }
+
+    @Override
+    public void handleMessage(Message msg) {
+        AsyncResult ar;
+        byte data[];
+
+        boolean isCsimRecordLoadResponse = false;
+
+        try { switch (msg.what) {
+            case EVENT_GET_PL_DONE:
+                // Refer to ETSI TS.102.221
+                if (DBG) log("EF_GET_EF_PL_DONE");
+                isCsimRecordLoadResponse = true;
+
+                ar = (AsyncResult) msg.obj;
+
+                if (ar.exception != null) {
+                    Log.e(LOG_TAG, "ar.exception = " + ar.exception);
+                    break;
+                }
+
+                mEFpl = (byte[]) ar.result;
+                if (DBG) log("EF_PL=" + IccUtils.bytesToHexString(mEFpl));
+                break;
+
+            case EVENT_GET_CSIM_LI_DONE:
+                // Refer to C.S0065 5.2.26
+                if (DBG) log("EVENT_GET_CSIM_LI_DONE");
+                isCsimRecordLoadResponse = true;
+
+                ar = (AsyncResult) msg.obj;
+                if (ar.exception != null) {
+                    Log.e(LOG_TAG, "ar.exception = " + ar.exception);
+                    break;
+                }
+
+                mEFli = (byte[]) ar.result;
+                // convert csim efli data to iso 639 format
+                for (int i = 0; i < mEFli.length; i+=2) {
+                    switch(mEFli[i+1]) {
+                    case 0x01: mEFli[i] = 'e'; mEFli[i+1] = 'n';break;
+                    case 0x02: mEFli[i] = 'f'; mEFli[i+1] = 'r';break;
+                    case 0x03: mEFli[i] = 'e'; mEFli[i+1] = 's';break;
+                    case 0x04: mEFli[i] = 'j'; mEFli[i+1] = 'a';break;
+                    case 0x05: mEFli[i] = 'k'; mEFli[i+1] = 'o';break;
+                    case 0x06: mEFli[i] = 'z'; mEFli[i+1] = 'h';break;
+                    case 0x07: mEFli[i] = 'h'; mEFli[i+1] = 'e';break;
+                    default: mEFli[i] = ' '; mEFli[i+1] = ' ';
+                    }
+                }
+
+                if (DBG) log("EF_LI=" + IccUtils.bytesToHexString(mEFli));
+                break;
+            case EVENT_GET_CSIM_SPN_DONE:
+                // Refer to C.S0065 5.2.32
+                if (DBG) log("EVENT_GET_CSIM_SPN_DONE");
+                isCsimRecordLoadResponse = true;
+                ar = (AsyncResult) msg.obj;
+
+                if (ar.exception != null) {
+                    Log.e(LOG_TAG, "ar.exception=" + ar.exception);
+                    break;
+                }
+                onGetCSimSpnDone(ar);
+                break;
+            default:
+                super.handleMessage(msg);
+        }}catch (RuntimeException exc) {
+            Log.w(LOG_TAG, "Exception parsing SIM record", exc);
+        } finally {
+            if (isCsimRecordLoadResponse) {
+                onRecordLoaded();
+            }
+        }
+    }
+
+    @Override
+    protected void onRecordLoaded() {
+        // One record loaded successfully or failed, In either case
+        // we need to update the recordsToLoad count
+        recordsToLoad -= 1;
+
+        if (recordsToLoad == 0 && recordsRequested == true) {
+            onAllRecordsLoaded();
+        } else if (recordsToLoad < 0) {
+            Log.e(LOG_TAG, "SIMRecords: recordsToLoad <0, programmer error suspected");
+            recordsToLoad = 0;
+        }
+    }
+
+    @Override
+    protected void fetchSimRecords() {
+        IccFileHandler iccFh = phone.getIccFileHandler();
+        recordsRequested = true;
+
+        phone.mCM.getIMSI(obtainMessage(EVENT_GET_IMSI_DONE));
+        recordsToLoad++;
+
+        iccFh.loadEFTransparent(EF_ICCID, obtainMessage(EVENT_GET_ICCID_DONE));
+        recordsToLoad++;
+
+        iccFh.loadEFTransparent(EF_AD, obtainMessage(EVENT_GET_AD_DONE));
+        recordsToLoad++;
+
+        iccFh.loadEFTransparent(EF_PL, obtainMessage(EVENT_GET_PL_DONE));
+        recordsToLoad++;
+
+        iccFh.loadEFTransparent(EF_CSIM_LI, obtainMessage(EVENT_GET_CSIM_LI_DONE));
+        recordsToLoad++;
+
+        iccFh.loadEFTransparent(EF_CSIM_SPN, obtainMessage(EVENT_GET_CSIM_SPN_DONE));
+        recordsToLoad++;
+    }
+
+    private void onGetCSimSpnDone(AsyncResult ar) {
+        byte[] data = (byte[]) ar.result;
+        if (DBG) log("CSIM_SPN=" +
+                     IccUtils.bytesToHexString(data));
+
+        // C.S0065 for EF_SPN decoding
+        csimSpnDisplayCondition = ((0x02 & data[0]) > 0)?true:false;
+
+        int encoding = data[1];
+        int language = data[2];
+        byte[] spnData = new byte[32];
+        System.arraycopy(data, 3, spnData, 0, (data.length < 32)?data.length:32);
+
+        int numBytes;
+        for (numBytes = 0; numBytes < spnData.length; numBytes++) {
+            if ((spnData[numBytes] & 0xFF) == 0xFF) break;
+        }
+
+        if (numBytes == 0) {
+            spn = "";
+            return;
+        }
+        try {
+            switch (encoding) {
+            case UserData.ENCODING_OCTET:
+            case UserData.ENCODING_LATIN:
+                spn = new String(spnData, 0, numBytes, "ISO-8859-1");
+                break;
+            case UserData.ENCODING_IA5:
+            case UserData.ENCODING_GSM_7BIT_ALPHABET:
+            case UserData.ENCODING_7BIT_ASCII:
+                spn = GsmAlphabet.gsm7BitPackedToString(spnData, 0, (numBytes*8)/7);
+                break;
+            case UserData.ENCODING_UNICODE_16:
+                spn =  new String(spnData, 0, numBytes, "utf-16");
+                break;
+            default:
+                log("SPN encoding not supported");
+            }
+        } catch(Exception e) {
+            log("spn decode error: " + e);
+        }
+        if (DBG) log("spn=" + spn);
+        if (DBG) log("spnCondition=" + csimSpnDisplayCondition);
+        phone.setSystemProperty(PROPERTY_ICC_OPERATOR_ALPHA, spn);
+    }
+
+    public byte[] getPreferredLanguage() {
+        return mEFpl;
+    }
+
+    public byte[] getLanguageIndication() {
+        return mEFli;
+    }
+}
diff --git a/telephony/java/com/android/internal/telephony/cdma/CdmaServiceStateTracker.java b/telephony/java/com/android/internal/telephony/cdma/CdmaServiceStateTracker.java
index 56b335e..ead6bca 100755
--- a/telephony/java/com/android/internal/telephony/cdma/CdmaServiceStateTracker.java
+++ b/telephony/java/com/android/internal/telephony/cdma/CdmaServiceStateTracker.java
@@ -253,6 +253,7 @@
 
         switch (msg.what) {
         case EVENT_RADIO_AVAILABLE:
+            if (DBG) log("handleMessage: EVENT_RADIO_AVAILABLE");
             break;
 
         case EVENT_RUIM_READY:
@@ -266,7 +267,7 @@
             }
 
             cm.getCDMASubscription(obtainMessage(EVENT_POLL_STATE_CDMA_SUBSCRIPTION));
-            if (DBG) log("Receive EVENT_RUIM_READY and Send Request getCDMASubscription.");
+            if (DBG) log("handleMessage: EVENT_RUIM_READY, Send Request getCDMASubscription.");
 
             // Restore the previous network selection.
             pollState();
@@ -280,6 +281,7 @@
             // For Non-RUIM phones, the subscription information is stored in
             // Non Volatile. Here when Non-Volatile is ready, we can poll the CDMA
             // subscription info.
+            if (DBG) log("handleMessage: EVENT_NV_READY, Send Request getCDMASubscription.");
             cm.getCDMASubscription( obtainMessage(EVENT_POLL_STATE_CDMA_SUBSCRIPTION));
             pollState();
             // Signal strength polling stops when radio is off.
@@ -871,36 +873,6 @@
         }
     }
 
-    protected static String networkTypeToString(int type) {
-        String ret = "unknown";
-
-        switch (type) {
-        case ServiceState.RADIO_TECHNOLOGY_IS95A:
-        case ServiceState.RADIO_TECHNOLOGY_IS95B:
-            ret = "CDMA";
-            break;
-        case ServiceState.RADIO_TECHNOLOGY_1xRTT:
-            ret = "CDMA - 1xRTT";
-            break;
-        case ServiceState.RADIO_TECHNOLOGY_EVDO_0:
-            ret = "CDMA - EvDo rev. 0";
-            break;
-        case ServiceState.RADIO_TECHNOLOGY_EVDO_A:
-            ret = "CDMA - EvDo rev. A";
-            break;
-        case ServiceState.RADIO_TECHNOLOGY_EVDO_B:
-            ret = "CDMA - EvDo rev. B";
-            break;
-        default:
-            if (DBG) {
-                slog("Wrong network. Can not return a string.");
-            }
-        break;
-        }
-
-        return ret;
-    }
-
     protected void fixTimeZone(String isoCountryCode) {
         TimeZone zone = null;
         // If the offset is (0, false) and the time zone property
@@ -998,7 +970,7 @@
 
         if (hasNetworkTypeChanged) {
             phone.setSystemProperty(TelephonyProperties.PROPERTY_DATA_NETWORK_TYPE,
-                    networkTypeToString(networkType));
+                    ServiceState.radioTechnologyToString(networkType));
         }
 
         if (hasRegistered) {
@@ -1017,7 +989,7 @@
                     eriText = phone.getContext().getText(
                             com.android.internal.R.string.roamingTextSearching).toString();
                 }
-                ss.setCdmaEriText(eriText);
+                ss.setOperatorAlphaLong(eriText);
             }
 
             String operatorNumeric;
diff --git a/telephony/java/com/android/internal/telephony/gsm/GsmDataConnectionTracker.java b/telephony/java/com/android/internal/telephony/gsm/GsmDataConnectionTracker.java
index f6485a4..db2b490 100644
--- a/telephony/java/com/android/internal/telephony/gsm/GsmDataConnectionTracker.java
+++ b/telephony/java/com/android/internal/telephony/gsm/GsmDataConnectionTracker.java
@@ -205,28 +205,6 @@
         return (apnContext.getDataConnection() != null);
     }
 
-    /**
-     * The only circumstances under which we report that data connectivity is not
-     * possible are
-     * <ul>
-     * <li>Data is disallowed (roaming, power state, voice call, etc).</li>
-     * <li>The current data state is {@code DISCONNECTED} for a reason other than
-     * having explicitly disabled connectivity. In other words, data is not available
-     * because the phone is out of coverage or some like reason.</li>
-     * </ul>
-     * @return {@code true} if data connectivity is possible, {@code false} otherwise.
-     * TODO - do per-apn notifications of availability using dependencyMet values.
-     */
-    @Override
-    protected boolean isDataPossible() {
-        boolean possible = (isDataAllowed()
-                && !(getAnyDataEnabled() && (getOverallState() == State.FAILED)));
-        if (!possible && DBG && isDataAllowed()) {
-            if (DBG) log("Data not possible.  No coverage: dataState = " + getOverallState());
-        }
-        return possible;
-    }
-
     @Override
     protected boolean isDataPossible(String apnType) {
         ApnContext apnContext = mApnContexts.get(apnType);
@@ -608,33 +586,6 @@
         return allowed;
     }
 
-    /**
-     * Release the apnContext
-     *
-     * @param apnContext
-     * @param tearDown
-     * @return none
-     */
-    private void releaseApnContext(ApnContext apnContext, boolean tearDown) {
-        if (apnContext == null) {
-            if (DBG) loge("releaseApnContext: apnContext null should not happen, ignore");
-            return;
-        }
-        DataConnection dc = apnContext.getDataConnection();
-        if (dc == null) {
-            if (DBG) loge("releaseApnContext: apnContext dc == null should not happen, ignore");
-            return;
-        }
-        if (tearDown) {
-            if (DBG) log("releaseApnContext: tearing down");
-            Message msg = obtainMessage(EVENT_DISCONNECT_DONE, apnContext);
-            apnContext.getDataConnection().tearDown(apnContext.getReason(), msg);
-        }
-        apnContext.setDataConnection(null);
-        apnContext.setDataConnectionAc(null);
-        return;
-    }
-
     private void setupDataOnReadyApns(String reason) {
         // Only check for default APN state
         for (ApnContext apnContext : mApnContexts.values()) {
@@ -803,17 +754,17 @@
         }
 
         DataConnectionAc dcac = apnContext.getDataConnectionAc();
-        if (dcac != null) {
-            if (tearDown) {
-                apnContext.setState(State.DISCONNECTING);
-                releaseApnContext(apnContext, tearDown);
-            } else {
-                dcac.resetSync();
-                apnContext.setState(State.IDLE);
-                mPhone.notifyDataConnection(apnContext.getReason(), apnContext.getApnType());
-                apnContext.setDataConnection(null);
-                apnContext.setDataConnectionAc(null);
-            }
+        if (tearDown && (dcac != null)) {
+            if (DBG) log("cleanUpConnection: tearing down");
+            Message msg = obtainMessage(EVENT_DISCONNECT_DONE, apnContext);
+            apnContext.getDataConnection().tearDown(apnContext.getReason(), msg);
+            apnContext.setState(State.DISCONNECTING);
+        } else {
+            if (dcac != null) dcac.resetSync();
+            apnContext.setState(State.IDLE);
+            mPhone.notifyDataConnection(apnContext.getReason(), apnContext.getApnType());
+            apnContext.setDataConnection(null);
+            apnContext.setDataConnectionAc(null);
         }
     }
 
@@ -1704,7 +1655,8 @@
                     apnContext.setState(State.FAILED);
                     mPhone.notifyDataConnection(Phone.REASON_APN_FAILED, apnContext.getApnType());
 
-                    releaseApnContext(apnContext, false);
+                    apnContext.setDataConnection(null);
+                    apnContext.setDataConnectionAc(null);
                     if (DBG) {
                         log("onDataSetupComplete: permanent error apn=%s" + apnString );
                     }
@@ -1739,6 +1691,8 @@
 
         apnContext.setState(State.IDLE);
         apnContext.setApnSetting(null);
+        apnContext.setDataConnection(null);
+        apnContext.setDataConnectionAc(null);
 
         mPhone.notifyDataConnection(apnContext.getReason(), apnContext.getApnType());
 
diff --git a/telephony/java/com/android/internal/telephony/gsm/GsmServiceStateTracker.java b/telephony/java/com/android/internal/telephony/gsm/GsmServiceStateTracker.java
index 4352831..93f4b4e 100644
--- a/telephony/java/com/android/internal/telephony/gsm/GsmServiceStateTracker.java
+++ b/telephony/java/com/android/internal/telephony/gsm/GsmServiceStateTracker.java
@@ -84,12 +84,6 @@
     private int mNewReasonDataDenied = -1;
 
     /**
-     *  Values correspond to ServiceState.RADIO_TECHNOLOGY_ definitions.
-     */
-    private int networkType = 0;
-    private int newNetworkType = 0;
-
-    /**
      * GSM roaming status solely based on TS 27.007 7.2 CREG. Only used by
      * handlePollStateResult to store CREG roaming result.
      */
@@ -628,7 +622,7 @@
                     }
                     newGPRSState = regCodeToServiceState(regState);
                     mDataRoaming = regCodeIsRoaming(regState);
-                    newNetworkType = type;
+                    mNewRadioTechnology = type;
                     newSS.setRadioTechnology(type);
                 break;
 
@@ -748,37 +742,6 @@
         }
     }
 
-    private static String networkTypeToString(int type) {
-        //Network Type from GPRS_REGISTRATION_STATE
-        String ret = "unknown";
-
-        switch (type) {
-            case ServiceState.RADIO_TECHNOLOGY_GPRS:
-                ret = "GPRS";
-                break;
-            case ServiceState.RADIO_TECHNOLOGY_EDGE:
-                ret = "EDGE";
-                break;
-            case ServiceState.RADIO_TECHNOLOGY_UMTS:
-                ret = "UMTS";
-                break;
-            case ServiceState.RADIO_TECHNOLOGY_HSDPA:
-                ret = "HSDPA";
-                break;
-            case ServiceState.RADIO_TECHNOLOGY_HSUPA:
-                ret = "HSUPA";
-                break;
-            case ServiceState.RADIO_TECHNOLOGY_HSPA:
-                ret = "HSPA";
-                break;
-            default:
-                sloge("Wrong network type: " + Integer.toString(type));
-                break;
-        }
-
-        return ret;
-    }
-
     private void pollStateDone() {
         if (DBG) {
             log("Poll ServiceState done: " +
@@ -788,8 +751,8 @@
                 " mNewMaxDataCalls=" + mNewMaxDataCalls +
                 " oldReasonDataDenied=" + mReasonDataDenied +
                 " mNewReasonDataDenied=" + mNewReasonDataDenied +
-                " oldType=" + networkTypeToString(networkType) +
-                " newType=" + networkTypeToString(newNetworkType));
+                " oldType=" + ServiceState.radioTechnologyToString(mRadioTechnology) +
+                " newType=" + ServiceState.radioTechnologyToString(mNewRadioTechnology));
         }
 
         boolean hasRegistered =
@@ -808,7 +771,7 @@
                 gprsState == ServiceState.STATE_IN_SERVICE
                 && newGPRSState != ServiceState.STATE_IN_SERVICE;
 
-        boolean hasNetworkTypeChanged = networkType != newNetworkType;
+        boolean hasRadioTechnologyChanged = mRadioTechnology != mNewRadioTechnology;
 
         boolean hasChanged = !newSS.equals(ss);
 
@@ -838,30 +801,32 @@
         // Add an event log when network type switched
         // TODO: we may add filtering to reduce the event logged,
         // i.e. check preferred network setting, only switch to 2G, etc
-        if (hasNetworkTypeChanged) {
+        if (hasRadioTechnologyChanged) {
             int cid = -1;
             GsmCellLocation loc = ((GsmCellLocation)phone.getCellLocation());
             if (loc != null) cid = loc.getCid();
-            EventLog.writeEvent(EventLogTags.GSM_RAT_SWITCHED, cid, networkType, newNetworkType);
+            EventLog.writeEvent(EventLogTags.GSM_RAT_SWITCHED, cid, mRadioTechnology,
+                    mNewRadioTechnology);
             if (DBG) {
-                log("RAT switched " + networkTypeToString(networkType) + " -> "
-                    + networkTypeToString(newNetworkType) + " at cell " + cid);
+                log("RAT switched " + ServiceState.radioTechnologyToString(mRadioTechnology) +
+                        " -> " + ServiceState.radioTechnologyToString(mNewRadioTechnology) +
+                        " at cell " + cid);
             }
         }
 
         gprsState = newGPRSState;
         mReasonDataDenied = mNewReasonDataDenied;
         mMaxDataCalls = mNewMaxDataCalls;
-        networkType = newNetworkType;
+        mRadioTechnology = mNewRadioTechnology;
         // this new state has been applied - forget it until we get a new new state
-        newNetworkType = 0;
+        mNewRadioTechnology = 0;
 
 
         newSS.setStateOutOfService(); // clean slate for next time
 
-        if (hasNetworkTypeChanged) {
+        if (hasRadioTechnologyChanged) {
             phone.setSystemProperty(TelephonyProperties.PROPERTY_DATA_NETWORK_TYPE,
-                    networkTypeToString(networkType));
+                    ServiceState.radioTechnologyToString(mRadioTechnology));
         }
 
         if (hasRegistered) {
@@ -949,7 +914,7 @@
             mDetachedRegistrants.notifyRegistrants();
         }
 
-        if (hasNetworkTypeChanged) {
+        if (hasRadioTechnologyChanged) {
             phone.notifyDataConnection(Phone.REASON_NW_TYPE_CHANGED, Phone.APN_TYPE_ALL);
         }
 
@@ -1285,7 +1250,7 @@
      * that could support voice and data simultaneously.
      */
     public boolean isConcurrentVoiceAndDataAllowed() {
-        return (networkType >= ServiceState.RADIO_TECHNOLOGY_UMTS);
+        return (mRadioTechnology >= ServiceState.RADIO_TECHNOLOGY_UMTS);
     }
 
     /**
diff --git a/telephony/java/com/android/internal/telephony/gsm/SIMRecords.java b/telephony/java/com/android/internal/telephony/gsm/SIMRecords.java
index 4cd9440..b0bad56 100755
--- a/telephony/java/com/android/internal/telephony/gsm/SIMRecords.java
+++ b/telephony/java/com/android/internal/telephony/gsm/SIMRecords.java
@@ -45,12 +45,12 @@
 /**
  * {@hide}
  */
-public final class SIMRecords extends IccRecords {
-    static final String LOG_TAG = "GSM";
+public class SIMRecords extends IccRecords {
+    protected static final String LOG_TAG = "GSM";
 
     private static final boolean CRASH_RIL = false;
 
-    private static final boolean DBG = true;
+    protected static final boolean DBG = true;
 
     // ***** Instance Variables
 
@@ -120,13 +120,13 @@
 
     private static final int EVENT_SIM_READY = 1;
     private static final int EVENT_RADIO_OFF_OR_NOT_AVAILABLE = 2;
-    private static final int EVENT_GET_IMSI_DONE = 3;
-    private static final int EVENT_GET_ICCID_DONE = 4;
+    protected static final int EVENT_GET_IMSI_DONE = 3;
+    protected static final int EVENT_GET_ICCID_DONE = 4;
     private static final int EVENT_GET_MBI_DONE = 5;
     private static final int EVENT_GET_MBDN_DONE = 6;
     private static final int EVENT_GET_MWIS_DONE = 7;
     private static final int EVENT_GET_VOICE_MAIL_INDICATOR_CPHS_DONE = 8;
-    private static final int EVENT_GET_AD_DONE = 9; // Admin data on SIM
+    protected static final int EVENT_GET_AD_DONE = 9; // Admin data on SIM
     private static final int EVENT_GET_MSISDN_DONE = 10;
     private static final int EVENT_GET_CPHS_MAILBOX_DONE = 11;
     private static final int EVENT_GET_SPN_DONE = 12;
@@ -147,6 +147,8 @@
     private static final int EVENT_GET_CFIS_DONE = 32;
     private static final int EVENT_GET_CSP_CPHS_DONE = 33;
 
+    protected static final int CSIM_EVENT_BASE = 100;
+
     // Lookup table for carriers known to produce SIMs which incorrectly indicate MNC length.
 
     private static final String[] MCCMNC_CODES_HAVING_3DIGITS_MNC = {
@@ -1285,7 +1287,7 @@
         fetchSimRecords();
     }
 
-    private void fetchSimRecords() {
+    protected void fetchSimRecords() {
         recordsRequested = true;
         IccFileHandler iccFh = phone.getIccFileHandler();
 
diff --git a/tests/StatusBar/src/com/android/statusbartest/StatusBarTest.java b/tests/StatusBar/src/com/android/statusbartest/StatusBarTest.java
index 0129114..b212533 100644
--- a/tests/StatusBar/src/com/android/statusbartest/StatusBarTest.java
+++ b/tests/StatusBar/src/com/android/statusbartest/StatusBarTest.java
@@ -145,13 +145,10 @@
         },
         new Test("Priority notification") {
             public void run() {
-                Notification not = new Notification(StatusBarTest.this,
+                Notification not = new Notification(
                                 R.drawable.stat_sys_phone,
                                 "Incoming call from: Imperious Leader",
-                                System.currentTimeMillis()-(1000*60*60*24),
-                                "Imperious Leader",
-                                "(888) 555-5038",
-                                null
+                                System.currentTimeMillis()-(1000*60*60*24)
                                 );
                 not.flags |= Notification.FLAG_HIGH_PRIORITY;
                 Intent fullScreenIntent = new Intent(StatusBarTest.this, TestAlertActivity.class);
@@ -248,12 +245,10 @@
                 mHandler.postDelayed(new Runnable() {
                         public void run() {
                             mNotificationManager.notify(1,
-                                    new Notification(StatusBarTest.this,
+                                    new Notification(
                                             R.drawable.ic_statusbar_missedcall,
                                             "tick tick tick",
-                                            System.currentTimeMillis()-(1000*60*60*24),
-                                            "(453) 123-2328",
-                                            "", null
+                                            System.currentTimeMillis()-(1000*60*60*24)
                                             ));
                         }
                     }, 3000);