imagedecoder: standalone JPEG decode lib

BZ: 135392

JPEG decode is now used by not only skia but also
other modules like usb camera. A standalone lib
wrapping VAAPI JPEG decoding is implemented for
their use.

Change-Id: I7fe07faacc810bd237f367b6149ca1cd35c9a773
Signed-off-by: Cheng Yao <yao.cheng@intel.com>
Reviewed-on: http://android.intel.com:8080/130405
Reviewed-by: Shi, PingX <pingx.shi@intel.com>
Tested-by: Shi, PingX <pingx.shi@intel.com>
Reviewed-by: cactus <cactus@intel.com>
Tested-by: cactus <cactus@intel.com>
diff --git a/imagedecoder/Android.mk b/imagedecoder/Android.mk
index 6795a06..a7a59b3 100644
--- a/imagedecoder/Android.mk
+++ b/imagedecoder/Android.mk
@@ -1,44 +1,105 @@
-#ifeq ($(strip $(USE_INTEL_JPEGDEC)),true)
 
 LOCAL_PATH := $(call my-dir)
 include $(CLEAR_VARS)
 
 LOCAL_SRC_FILES += \
-    JPEGDecoder.c \
-    JPEGParser.c \
-    ImageDecoderTrace.c
+    JPEGDecoder.cpp \
+    JPEGBlitter.cpp \
+    JPEGParser.cpp \
+    ImageDecoderTrace.cpp
 
 LOCAL_C_INCLUDES += \
     $(LOCAL_PATH) \
-    $(TOP)/external/jpeg \
     $(TARGET_OUT_HEADERS)/libva
 
-LOCAL_COPY_HEADERS_TO  := libjpeg_hw
+LOCAL_COPY_HEADERS_TO  := libjpegdec
 
 LOCAL_COPY_HEADERS := \
     JPEGDecoder.h \
-    JPEGParser.h \
+    JPEGCommon.h \
     ImageDecoderTrace.h
 
 LOCAL_SHARED_LIBRARIES += \
     libcutils \
+    libutils \
     libva-android     \
     libva             \
-    libva-tpi
+    libva-tpi		  \
+    libhardware
+
+LOCAL_LDLIBS += -lpthread
+LOCAL_CFLAGS += -Wno-multichar
+
+ifeq ($(TARGET_BOARD_PLATFORM),baytrail)
+LOCAL_SRC_FILES += JPEGBlitter_gen.cpp
+LOCAL_SRC_FILES += JPEGDecoder_gen.cpp
+else
+LOCAL_SRC_FILES += JPEGBlitter_img.cpp
+LOCAL_SRC_FILES += JPEGDecoder_img.cpp
+endif
+
+LOCAL_MODULE:= libjpegdec
+LOCAL_MODULE_TAGS := optional
+
+include $(BUILD_SHARED_LIBRARY)
+
+ifeq ($(TARGET_BOARD_PLATFORM),baytrail)
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES += \
+    test/testdecode.cpp
+
+LOCAL_C_INCLUDES += \
+    $(LOCAL_PATH) \
+    $(TARGET_OUT_HEADERS)/libva
+
+LOCAL_SHARED_LIBRARIES += \
+    libcutils \
+    libutils \
+    libva-android     \
+    libva             \
+    libva-tpi         \
+    libjpegdec        \
+    libhardware
+
+LOCAL_LDLIBS += -lpthread
+LOCAL_CFLAGS += -Wno-multichar
+
+LOCAL_MODULE:= testjpegdec
+LOCAL_MODULE_TAGS := optional
+
+include $(BUILD_EXECUTABLE)
+endif
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES += \
+    JPEGDecoder_libjpeg_wrapper.cpp
+
+LOCAL_C_INCLUDES += \
+    $(LOCAL_PATH) \
+    $(TOP)/external/jpeg \
+    $(TARGET_OUT_HEADERS)/libva \
+    $(TARGET_OUT_HEADERS)/libjpegdec
+
+LOCAL_COPY_HEADERS_TO  := libjpeg_hw
+
+LOCAL_COPY_HEADERS := \
+    JPEGDecoder_libjpeg_wrapper.h
+
+LOCAL_SHARED_LIBRARIES += \
+    libcutils \
+    libutils \
+    liblog  \
+    libjpegdec \
+    libhardware
 
 LOCAL_LDLIBS += -lpthread
 LOCAL_CFLAGS += -Wno-multichar
 LOCAL_CFLAGS += -DUSE_INTEL_JPEGDEC
 
-ifeq ($(JPEGDEC_USES_GEN),true)
-LOCAL_C_INCLUDES += $(TARGET_OUT_HEADERS)
-LOCAL_CFLAGS += -DJPEGDEC_USES_GEN
-endif
-
 LOCAL_MODULE:= libjpeg_hw
 LOCAL_MODULE_TAGS := optional
 
 include $(BUILD_SHARED_LIBRARY)
 
-#endif
-
diff --git a/imagedecoder/ImageDecoderTrace.c b/imagedecoder/ImageDecoderTrace.cpp
similarity index 100%
rename from imagedecoder/ImageDecoderTrace.c
rename to imagedecoder/ImageDecoderTrace.cpp
diff --git a/imagedecoder/ImageDecoderTrace.h b/imagedecoder/ImageDecoderTrace.h
index 1f67415..466b606 100644
--- a/imagedecoder/ImageDecoderTrace.h
+++ b/imagedecoder/ImageDecoderTrace.h
@@ -50,10 +50,17 @@
 #else
 // for Android OS
 
-//#define LOG_NDEBUG 0
-
+#ifdef LOG_TAG
+#undef LOG_TAG
+#endif
 #define LOG_TAG "ImageDecoder"
 
+#ifdef LOG_NDEBUG
+#undef LOG_NDEBUG
+#endif
+#define LOG_NDEBUG 0
+
+
 #include <utils/Log.h>
 #define ETRACE(...) ALOGE(__VA_ARGS__)
 #define WTRACE(...) ALOGW(__VA_ARGS__)
diff --git a/imagedecoder/JPEGBlitter.cpp b/imagedecoder/JPEGBlitter.cpp
new file mode 100644
index 0000000..cb1e917
--- /dev/null
+++ b/imagedecoder/JPEGBlitter.cpp
@@ -0,0 +1,88 @@
+/* INTEL CONFIDENTIAL
+* Copyright (c) 2013 Intel Corporation.  All rights reserved.
+* Copyright (c) Imagination Technologies Limited, UK
+*
+* The source code contained or described herein and all documents
+* related to the source code ("Material") are owned by Intel
+* Corporation or its suppliers or licensors.  Title to the
+* Material remains with Intel Corporation or its suppliers and
+* licensors.  The Material contains trade secrets and proprietary
+* and confidential information of Intel or its suppliers and
+* licensors. The Material is protected by worldwide copyright and
+* trade secret laws and treaty provisions.  No part of the Material
+* may be used, copied, reproduced, modified, published, uploaded,
+* posted, transmitted, distributed, or disclosed in any way without
+* Intel's prior express written permission.
+*
+* No license under any patent, copyright, trade secret or other
+* intellectual property right is granted to or conferred upon you
+* by disclosure or delivery of the Materials, either expressly, by
+* implication, inducement, estoppel or otherwise. Any license
+* under such intellectual property rights must be express and
+* approved by Intel in writing.
+*
+* Authors:
+*    Yao Cheng <yao.cheng@intel.com>
+*
+*/
+//#define LOG_NDEBUG 0
+
+#include <va/va.h>
+#include <va/va_tpi.h>
+#include "JPEGBlitter.h"
+#include "JPEGDecoder.h"
+
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+#include <assert.h>
+//#define LOG_TAG "JPEGBlitter"
+
+JpegBlitter::JpegBlitter()
+    :mDecoder(NULL),
+    mConfigId(VA_INVALID_ID),
+    mContextId(VA_INVALID_ID)
+{
+    // empty
+}
+
+JpegBlitter::~JpegBlitter()
+{
+    if (mDecoder) {
+        destroyContext();
+    }
+}
+
+void JpegBlitter::destroyContext()
+{
+    if (mDecoder == NULL)
+        return;
+
+    Mutex::Autolock autoLock(mLock);
+    if (mDecoder) {
+        vaDestroyContext(mDecoder->mDisplay, mContextId);
+        mContextId = VA_INVALID_ID;
+        vaDestroyConfig(mDecoder->mDisplay, mConfigId);
+        mConfigId = VA_INVALID_ID;
+        mDecoder = NULL;
+    }
+}
+
+void JpegBlitter::setDecoder(JpegDecoder &decoder)
+{
+    destroyContext();
+    Mutex::Autolock autoLock(mLock);
+    mDecoder = &decoder;
+    VAConfigAttrib  vpp_attrib;
+    VAStatus st;
+    vpp_attrib.type  = VAConfigAttribRTFormat;
+    vpp_attrib.value = VA_RT_FORMAT_YUV420;
+    st = vaCreateConfig(mDecoder->mDisplay, VAProfileNone,
+                                VAEntrypointVideoProc,
+                                &vpp_attrib,
+                                1, &mConfigId);
+    assert(st == VA_STATUS_SUCCESS);
+    st = vaCreateContext(mDecoder->mDisplay, mConfigId, 1920, 1080, 0, NULL, 0, &mContextId);
+    assert(st == VA_STATUS_SUCCESS);
+}
+
diff --git a/imagedecoder/JPEGBlitter.h b/imagedecoder/JPEGBlitter.h
new file mode 100644
index 0000000..9514b25
--- /dev/null
+++ b/imagedecoder/JPEGBlitter.h
@@ -0,0 +1,53 @@
+/* INTEL CONFIDENTIAL
+* Copyright (c) 2013 Intel Corporation.  All rights reserved.
+* Copyright (c) Imagination Technologies Limited, UK
+*
+* The source code contained or described herein and all documents
+* related to the source code ("Material") are owned by Intel
+* Corporation or its suppliers or licensors.  Title to the
+* Material remains with Intel Corporation or its suppliers and
+* licensors.  The Material contains trade secrets and proprietary
+* and confidential information of Intel or its suppliers and
+* licensors. The Material is protected by worldwide copyright and
+* trade secret laws and treaty provisions.  No part of the Material
+* may be used, copied, reproduced, modified, published, uploaded,
+* posted, transmitted, distributed, or disclosed in any way without
+* Intel's prior express written permission.
+*
+* No license under any patent, copyright, trade secret or other
+* intellectual property right is granted to or conferred upon you
+* by disclosure or delivery of the Materials, either expressly, by
+* implication, inducement, estoppel or otherwise. Any license
+* under such intellectual property rights must be express and
+* approved by Intel in writing.
+*
+* Authors:
+*    Yao Cheng <yao.cheng@intel.com>
+*
+*/
+
+#ifndef JPEG_BLITTER_H
+#define JPEG_BLITTER_H
+
+#include "../videovpp/VideoVPPBase.h"
+#include "JPEGCommon.h"
+#include <utils/threads.h>
+
+class JpegDecoder;
+
+class JpegBlitter
+{
+public:
+    JpegBlitter();
+    virtual ~JpegBlitter();
+    virtual void setDecoder(JpegDecoder &decoder);
+    virtual JpegDecodeStatus blit(RenderTarget &src, RenderTarget &dst);
+private:
+    mutable Mutex mLock;
+    virtual void destroyContext();
+    JpegDecoder *mDecoder;
+    VAConfigID mConfigId;
+    VAContextID mContextId;
+};
+
+#endif
diff --git a/imagedecoder/JPEGBlitter_gen.cpp b/imagedecoder/JPEGBlitter_gen.cpp
new file mode 100644
index 0000000..b1167d3
--- /dev/null
+++ b/imagedecoder/JPEGBlitter_gen.cpp
@@ -0,0 +1,441 @@
+/* INTEL CONFIDENTIAL
+* Copyright (c) 2013 Intel Corporation.  All rights reserved.
+* Copyright (c) Imagination Technologies Limited, UK
+*
+* The source code contained or described herein and all documents
+* related to the source code ("Material") are owned by Intel
+* Corporation or its suppliers or licensors.  Title to the
+* Material remains with Intel Corporation or its suppliers and
+* licensors.  The Material contains trade secrets and proprietary
+* and confidential information of Intel or its suppliers and
+* licensors. The Material is protected by worldwide copyright and
+* trade secret laws and treaty provisions.  No part of the Material
+* may be used, copied, reproduced, modified, published, uploaded,
+* posted, transmitted, distributed, or disclosed in any way without
+* Intel's prior express written permission.
+*
+* No license under any patent, copyright, trade secret or other
+* intellectual property right is granted to or conferred upon you
+* by disclosure or delivery of the Materials, either expressly, by
+* implication, inducement, estoppel or otherwise. Any license
+* under such intellectual property rights must be express and
+* approved by Intel in writing.
+*
+* Authors:
+*    Yao Cheng <yao.cheng@intel.com>
+*
+*/
+//#define LOG_NDEBUG 0
+
+#include "JPEGBlitter.h"
+#include "JPEGCommon_Gen.h"
+#include "JPEGDecoder.h"
+
+#include <va/va.h>
+#include <va/va_tpi.h>
+#include "ImageDecoderTrace.h"
+
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+
+#include <assert.h>
+
+#define JD_CHECK(err, label) \
+        if (err) { \
+            ETRACE("%s::%d: failed: %d", __PRETTY_FUNCTION__, __LINE__, err); \
+            goto label; \
+        }
+
+#define JD_CHECK_RET(err, label, retcode) \
+        if (err) { \
+            status = retcode; \
+            ETRACE("%s::%d: failed: %d", __PRETTY_FUNCTION__, __LINE__, err); \
+            goto label; \
+        }
+
+const VAProcColorStandardType fourcc2ColorStandard(uint32_t fourcc)
+{
+    switch(fourcc) {
+    case VA_FOURCC_NV12:
+    case VA_FOURCC_YUY2:
+    case VA_FOURCC_422H:
+    case VA_FOURCC_422V:
+    case VA_FOURCC_411P:
+    case VA_FOURCC_411R:
+    case VA_FOURCC_IMC3:
+    case VA_FOURCC_444P:
+    case VA_FOURCC_YV12:
+        return VAProcColorStandardBT601;
+    default:
+        return VAProcColorStandardNone;
+    }
+}
+
+void write_to_file(const char *file, const VAImage *pImg, const uint8_t *pSrc)
+{
+    FILE *fp = fopen(file, "wb");
+    if (!fp) {
+        return;
+    }
+    const uint8_t *pY, *pU, *pV, *pYUYV, *pRGBA, *pUV;
+    float h_samp_factor, v_samp_factor;
+    int row, col;
+    char fourccstr[5];
+    VTRACE("Dumping %s buffer to %s", fourcc2str(fourccstr, pImg->format.fourcc), file);
+    switch (pImg->format.fourcc) {
+    case VA_FOURCC_IMC3:
+        h_samp_factor = 1;
+        v_samp_factor = 0.5;
+        break;
+    case VA_FOURCC_422H:
+        h_samp_factor = 0.5;
+        v_samp_factor = 1;
+        break;
+    case VA_FOURCC_444P:
+        h_samp_factor = 1;
+        v_samp_factor = 1;
+        break;
+    case VA_FOURCC_YUY2:
+    {
+        pYUYV = pSrc + pImg->offsets[0];
+        VTRACE("YUY2 output width %u stride %u", pImg->width, pImg->pitches[0]);
+        for (row = 0; row < pImg->height; ++row) {
+            fwrite(pYUYV, 2, pImg->width, fp);
+            pYUYV += pImg->pitches[0];
+        }
+    }
+    fclose(fp);
+    return;
+    case VA_FOURCC_NV12:
+    {
+        pY = pSrc + pImg->offsets[0];
+        pUV = pSrc + pImg->offsets[1];
+        VTRACE("NV12 output width %u stride %u, %u", pImg->width, pImg->pitches[0], pImg->pitches[1]);
+        for (row = 0; row < pImg->height; ++row) {
+            fwrite(pY, 1, pImg->width, fp);
+            pY += pImg->pitches[0];
+        }
+        for (row = 0; row < pImg->height/2; ++row) {
+            fwrite(pUV, 1, pImg->width, fp);
+            pUV += pImg->pitches[1];
+        }
+    }
+    fclose(fp);
+    return;
+    case VA_FOURCC_RGBA:
+    case VA_FOURCC_BGRA:
+    case VA_FOURCC_ARGB:
+    case VA_FOURCC('A', 'B', 'G', 'R'):
+    {
+        pRGBA = pSrc + pImg->offsets[0];
+        VTRACE("RGBA output width %u stride %u", pImg->width, pImg->pitches[0]);
+        for (row = 0; row < pImg->height; ++row) {
+            fwrite(pRGBA, 4, pImg->width, fp);
+            pRGBA += pImg->pitches[0];
+        }
+    }
+    fclose(fp);
+    return;
+    default:
+        // non-supported
+        {
+            char fourccstr[5];
+            ETRACE("%s: Not-supported input YUV format", fourcc2str(fourccstr, pImg->format.fourcc));
+        }
+        return;
+    }
+    pY = pSrc + pImg->offsets[0];
+    pU = pSrc + pImg->offsets[1];
+    pV = pSrc + pImg->offsets[2];
+    // Y
+    for (row = 0; row < pImg->height; ++row) {
+        fwrite(pY, 1, pImg->width, fp);
+        pY += pImg->pitches[0];
+    }
+    // U
+    for (row = 0; row < pImg->height * v_samp_factor; ++row) {
+        fwrite(pU, 1, pImg->width * h_samp_factor, fp);
+        pU += pImg->pitches[1];
+    }
+    // V
+    for (row = 0; row < pImg->height * v_samp_factor; ++row) {
+        fwrite(pV, 1, pImg->width * h_samp_factor, fp);
+        pV += pImg->pitches[2];
+    }
+    fclose(fp);
+}
+
+static void write_to_YUY2(uint8_t *pDst,
+                          uint32_t dst_w,
+                          uint32_t dst_h,
+                          uint32_t dst_stride,
+                          const VAImage *pImg,
+                          const uint8_t *pSrc)
+{
+    const uint8_t *pY, *pU, *pV;
+    float h_samp_factor, v_samp_factor;
+    int row, col;
+    char fourccstr[5];
+    uint32_t copy_w = (dst_w < pImg->width)? dst_w: pImg->width;
+    uint32_t copy_h = (dst_h < pImg->height)? dst_h: pImg->height;
+    switch (pImg->format.fourcc) {
+    case VA_FOURCC_IMC3:
+        h_samp_factor = 0.5;
+        v_samp_factor = 0.5;
+        break;
+    case VA_FOURCC_422H:
+        h_samp_factor = 0.5;
+        v_samp_factor = 1;
+        break;
+    case VA_FOURCC_444P:
+        h_samp_factor = 1;
+        v_samp_factor = 1;
+        break;
+    default:
+        // non-supported
+        ETRACE("%s to YUY2: Not-supported input YUV format", fourcc2str(fourccstr, pImg->format.fourcc));
+        return;
+    }
+    pY = pSrc + pImg->offsets[0];
+    pU = pSrc + pImg->offsets[1];
+    pV = pSrc + pImg->offsets[2];
+    for (row = 0; row < copy_h; ++row) {
+        for (col = 0; col < copy_w; ++col) {
+            // Y
+            *(pDst + 2 * col) = *(pY + col);
+            uint32_t actual_col = h_samp_factor * col;
+            if (col % 2 == 1) {
+                // U
+                *(pDst + 2 * col + 1) = *(pU + actual_col);
+            }
+            else {
+                // V
+                *(pDst + 2 * col + 1) = *(pV + actual_col);
+            }
+        }
+        pDst += dst_stride;
+        pY += pImg->pitches[0];
+        uint32_t actual_row = row * v_samp_factor;
+        pU = pSrc + pImg->offsets[1] + actual_row * pImg->pitches[1];
+        pV = pSrc + pImg->offsets[2] + actual_row * pImg->pitches[2];
+    }
+}
+
+static void dumpSurface(const char* filename, VADisplay display, VASurfaceID surface)
+{
+    VAStatus st;
+    VAImage img;
+    uint8_t *buf;
+    st = vaDeriveImage(display, surface, &img);
+    if (st) {
+        ETRACE("vaDeriveImage failed with %d", st);
+        return;
+    }
+    uint32_t in_fourcc = img.format.fourcc;
+    VTRACE("Start dumping %s surface to %s", fourcc2str(NULL, in_fourcc), filename);
+    st = vaMapBuffer(display, img.buf, (void **)&buf);
+    if (st) {
+        ETRACE("vaMapBuffer failed with %d", st);
+        vaDestroyImage(display, img.image_id);
+        return;
+    }
+    VTRACE("start write_to_file");
+    write_to_file(filename, &img, buf);
+    vaUnmapBuffer(display, img.buf);
+    vaDestroyImage(display, img.image_id);
+}
+
+static void dumpGallocBuffer(const char* filename,
+                                buffer_handle_t handle,
+                                int width,
+                                int height,
+                                uint32_t fourcc)
+{
+    // NOT IMPLEMENTED
+}
+
+
+static JpegDecodeStatus swBlit(VADisplay display, VAContextID context,
+                 VASurfaceID in_surf, VARectangle *in_rect, uint32_t in_fourcc,
+                 VASurfaceID out_surf, VARectangle *out_rect, uint32_t out_fourcc)
+{
+    assert(out_fourcc == VA_FOURCC_YUY2);
+    assert((in_fourcc == VA_FOURCC_IMC3) || (in_fourcc == VA_FOURCC_422H) || (in_fourcc == VA_FOURCC_444P));
+    VAStatus st;
+    char str[10];
+    JpegDecodeStatus status;
+    VAImage in_img, out_img;
+    in_img.image_id = VA_INVALID_ID;
+    in_img.buf = VA_INVALID_ID;
+    out_img.image_id = VA_INVALID_ID;
+    out_img.buf = VA_INVALID_ID;
+    uint8_t *in_buf, *out_buf;
+    in_buf = out_buf = NULL;
+    st = vaDeriveImage(display, in_surf, &in_img);
+    JD_CHECK_RET(st, cleanup, JD_BLIT_FAILURE);
+    st = vaDeriveImage(display, out_surf, &out_img);
+    JD_CHECK_RET(st, cleanup, JD_BLIT_FAILURE);
+    st = vaMapBuffer(display, in_img.buf, (void **)&in_buf);
+    JD_CHECK_RET(st, cleanup, JD_BLIT_FAILURE);
+    st = vaMapBuffer(display, out_img.buf, (void **)&out_buf);
+    JD_CHECK_RET(st, cleanup, JD_BLIT_FAILURE);
+    VTRACE("%s in: %s, %ux%u, size %u, offset=%u,%u,%u, pitch=%u,%u,%u", __FUNCTION__,
+        fourcc2str(NULL, in_fourcc),
+        in_img.width,
+        in_img.height,
+        in_img.data_size,
+        in_img.offsets[0], in_img.offsets[1], in_img.offsets[2],
+        in_img.pitches[0], in_img.pitches[1], in_img.pitches[2]);
+    VTRACE("%s out: %s, %ux%u, size %u, offset=%u,%u,%u, pitch=%u,%u,%u", __FUNCTION__,
+        fourcc2str(NULL, out_fourcc),
+        out_img.width,
+        out_img.height,
+        out_img.data_size,
+        out_img.offsets[0], out_img.offsets[1], out_img.offsets[2],
+        out_img.pitches[0], out_img.pitches[1], out_img.pitches[2]);
+    write_to_YUY2(out_buf, out_img.width, out_img.height, out_img.pitches[0], &in_img, in_buf);
+    vaUnmapBuffer(display, in_img.buf);
+    vaUnmapBuffer(display, out_img.buf);
+    vaDestroyImage(display, in_img.image_id);
+    vaDestroyImage(display, out_img.image_id);
+    VTRACE("%s Finished SW CSC %s=>%s", __FUNCTION__, fourcc2str(str, in_fourcc), fourcc2str(str + 5, out_fourcc));
+    return JD_SUCCESS;
+
+cleanup:
+    ETRACE("%s failed to do swBlit %s=>%s", __FUNCTION__, fourcc2str(str, in_fourcc), fourcc2str(str + 5, out_fourcc));
+    if (in_buf != NULL) vaUnmapBuffer(display, in_img.buf);
+    if (out_buf != NULL) vaUnmapBuffer(display, out_img.buf);
+    if (in_img.image_id != VA_INVALID_ID) vaDestroyImage(display, in_img.image_id);
+    if (out_img.image_id != VA_INVALID_ID) vaDestroyImage(display, out_img.image_id);
+    return status;
+}
+
+static JpegDecodeStatus hwBlit(VADisplay display, VAContextID context,
+                 VASurfaceID in_surf, VARectangle *in_rect, uint32_t in_fourcc,
+                 VASurfaceID out_surf, VARectangle *out_rect, uint32_t out_fourcc)
+{
+    VAProcPipelineCaps vpp_pipeline_cap ;
+    VABufferID vpp_pipeline_buf = VA_INVALID_ID;
+    VAProcPipelineParameterBuffer vpp_param;
+    VAStatus vpp_status;
+    JpegDecodeStatus status = JD_SUCCESS;
+    char str[10];
+    nsecs_t t1, t2;
+
+    memset(&vpp_param, 0, sizeof(VAProcPipelineParameterBuffer));
+#if PRE_TOUCH_SURFACE
+    //zeroSurfaces(display, &out_surf, 1);
+#endif
+    t1 = systemTime();
+    vpp_param.surface                 = in_surf;
+    vpp_param.output_region           = out_rect;
+    vpp_param.surface_region          = in_rect;
+    vpp_param.surface_color_standard  = fourcc2ColorStandard(in_fourcc);
+    vpp_param.output_background_color = 0;
+    vpp_param.output_color_standard   = fourcc2ColorStandard(out_fourcc);
+    vpp_param.filter_flags            = VA_FRAME_PICTURE;
+    vpp_param.filters                 = NULL;
+    vpp_param.num_filters             = 0;
+    vpp_param.forward_references      = 0;
+    vpp_param.num_forward_references  = 0;
+    vpp_param.backward_references     = 0;
+    vpp_param.num_backward_references = 0;
+    vpp_param.blend_state             = NULL;
+    vpp_param.rotation_state          = VA_ROTATION_NONE;
+    vpp_status = vaCreateBuffer(display,
+                                context,
+                                VAProcPipelineParameterBufferType,
+                                sizeof(VAProcPipelineParameterBuffer),
+                                1,
+                                &vpp_param,
+                                &vpp_pipeline_buf);
+    JD_CHECK_RET(vpp_status, cleanup, JD_RESOURCE_FAILURE);
+
+    vpp_status = vaBeginPicture(display,
+                                context,
+                                out_surf);
+    JD_CHECK_RET(vpp_status, cleanup, JD_BLIT_FAILURE);
+
+    //Render the picture
+    vpp_status = vaRenderPicture(display,
+                                 context,
+                                 &vpp_pipeline_buf,
+                                 1);
+    JD_CHECK_RET(vpp_status, cleanup, JD_BLIT_FAILURE);
+
+    vpp_status = vaEndPicture(display, context);
+    JD_CHECK_RET(vpp_status, cleanup, JD_BLIT_FAILURE);
+
+    vaDestroyBuffer(display, vpp_pipeline_buf);
+    JD_CHECK_RET(vpp_status, cleanup, JD_BLIT_FAILURE);
+    t2 = systemTime();
+    VTRACE("Finished HW CSC %s(%d,%d,%u,%u)=>%s(%d,%d,%u,%u) for %f ms",
+        fourcc2str(str, in_fourcc),
+        in_rect->x, in_rect->y, in_rect->width, in_rect->height,
+        fourcc2str(str + 5, out_fourcc),
+        out_rect->x, out_rect->y, out_rect->width, out_rect->height,
+        ns2us(t2 - t1)/1000.0);
+
+    return JD_SUCCESS;
+cleanup:
+    if (vpp_pipeline_buf != VA_INVALID_ID)
+        vaDestroyBuffer(display, vpp_pipeline_buf);
+    return status;
+}
+
+static JpegDecodeStatus vaBlit(VADisplay display, VAContextID context,
+                 VASurfaceID in_surf, VARectangle *in_rect, uint32_t in_fourcc,
+                 VASurfaceID out_surf, VARectangle *out_rect, uint32_t out_fourcc)
+{
+    if (((in_fourcc == VA_FOURCC_422H) ||
+        (in_fourcc == VA_FOURCC_NV12) ||
+        (in_fourcc == VA_FOURCC_YUY2) ||
+        (in_fourcc == VA_FOURCC_YV12) ||
+        (in_fourcc == VA_FOURCC_RGBA))
+        &&
+        ((out_fourcc == VA_FOURCC_422H) ||
+        (out_fourcc == VA_FOURCC_NV12) ||
+        (out_fourcc == VA_FOURCC_YV12) ||
+        (out_fourcc == VA_FOURCC_YUY2) ||
+        (out_fourcc == VA_FOURCC_RGBA))) {
+        return hwBlit(display, context, in_surf, in_rect, in_fourcc,
+               out_surf, out_rect, out_fourcc);
+    }
+    else {
+        return swBlit(display, context, in_surf, in_rect, in_fourcc,
+               out_surf, out_rect, out_fourcc);
+    }
+}
+
+JpegDecodeStatus JpegBlitter::blit(RenderTarget &src, RenderTarget &dst)
+{
+    if (mDecoder == NULL)
+        return JD_UNINITIALIZED;
+    JpegDecodeStatus st;
+    uint32_t src_fourcc, dst_fourcc;
+    char tmp[10];
+    src_fourcc = pixelFormat2Fourcc(src.pixel_format);
+    dst_fourcc = pixelFormat2Fourcc(dst.pixel_format);
+    VASurfaceID src_surf = mDecoder->getSurfaceID(src);
+    if (src_surf == VA_INVALID_ID) {
+        ETRACE("%s invalid src %s target", __FUNCTION__, fourcc2str(NULL, src_fourcc));
+        return JD_INVALID_RENDER_TARGET;
+    }
+    VASurfaceID dst_surf = mDecoder->getSurfaceID(dst);
+    if (dst_surf == VA_INVALID_ID) {
+        WTRACE("%s foreign dst target for JpegDecoder, create surface for it, not guaranteed to free it!!!", __FUNCTION__);
+        st = mDecoder->createSurfaceFromRenderTarget(dst, &dst_surf);
+        if (st != JD_SUCCESS || dst_surf == VA_INVALID_ID) {
+            ETRACE("%s failed to create surface for dst target", __FUNCTION__);
+            return JD_RESOURCE_FAILURE;
+        }
+    }
+
+    VTRACE("%s blitting from %s to %s", __FUNCTION__, fourcc2str(tmp, src_fourcc), fourcc2str(tmp + 5, dst_fourcc));
+    st = vaBlit(mDecoder->mDisplay, mContextId, src_surf, &src.rect, src_fourcc,
+                dst_surf, &dst.rect, dst_fourcc);
+
+    return st;
+}
+
diff --git a/imagedecoder/JPEGBlitter_img.cpp b/imagedecoder/JPEGBlitter_img.cpp
new file mode 100644
index 0000000..d56ba98
--- /dev/null
+++ b/imagedecoder/JPEGBlitter_img.cpp
@@ -0,0 +1,35 @@
+/* INTEL CONFIDENTIAL
+* Copyright (c) 2013 Intel Corporation.  All rights reserved.
+* Copyright (c) Imagination Technologies Limited, UK
+*
+* The source code contained or described herein and all documents
+* related to the source code ("Material") are owned by Intel
+* Corporation or its suppliers or licensors.  Title to the
+* Material remains with Intel Corporation or its suppliers and
+* licensors.  The Material contains trade secrets and proprietary
+* and confidential information of Intel or its suppliers and
+* licensors. The Material is protected by worldwide copyright and
+* trade secret laws and treaty provisions.  No part of the Material
+* may be used, copied, reproduced, modified, published, uploaded,
+* posted, transmitted, distributed, or disclosed in any way without
+* Intel's prior express written permission.
+*
+* No license under any patent, copyright, trade secret or other
+* intellectual property right is granted to or conferred upon you
+* by disclosure or delivery of the Materials, either expressly, by
+* implication, inducement, estoppel or otherwise. Any license
+* under such intellectual property rights must be express and
+* approved by Intel in writing.
+*
+* Authors:
+*    Yao Cheng <yao.cheng@intel.com>
+*
+*/
+
+#include "JPEGBlitter.h"
+
+JpegDecodeStatus JpegBlitter::blit(RenderTarget &src, RenderTarget &dst)
+{
+    return JD_OUTPUT_FORMAT_UNSUPPORTED;
+}
+
diff --git a/imagedecoder/JPEGCommon.h b/imagedecoder/JPEGCommon.h
new file mode 100644
index 0000000..6df6fcd
--- /dev/null
+++ b/imagedecoder/JPEGCommon.h
@@ -0,0 +1,186 @@
+/* INTEL CONFIDENTIAL
+* Copyright (c) 2013 Intel Corporation.  All rights reserved.
+* Copyright (c) Imagination Technologies Limited, UK
+*
+* The source code contained or described herein and all documents
+* related to the source code ("Material") are owned by Intel
+* Corporation or its suppliers or licensors.  Title to the
+* Material remains with Intel Corporation or its suppliers and
+* licensors.  The Material contains trade secrets and proprietary
+* and confidential information of Intel or its suppliers and
+* licensors. The Material is protected by worldwide copyright and
+* trade secret laws and treaty provisions.  No part of the Material
+* may be used, copied, reproduced, modified, published, uploaded,
+* posted, transmitted, distributed, or disclosed in any way without
+* Intel's prior express written permission.
+*
+* No license under any patent, copyright, trade secret or other
+* intellectual property right is granted to or conferred upon you
+* by disclosure or delivery of the Materials, either expressly, by
+* implication, inducement, estoppel or otherwise. Any license
+* under such intellectual property rights must be express and
+* approved by Intel in writing.
+*
+* Authors:
+*    Yao Cheng <yao.cheng@intel.com>
+*
+*/
+
+#ifndef JPEGCOMMON_H
+#define JPEGCOMMON_H
+
+#include <va/va.h>
+#include <va/va_dec_jpeg.h>
+#include <sys/types.h>
+#include <string.h>
+
+#define JPEG_MAX_COMPONENTS 4
+#define JPEG_MAX_QUANT_TABLES 4
+
+
+#define RENDERTARGET_INTERNAL_BUFFER (RenderTarget::ANDROID_GRALLOC + 1)
+
+struct JpegInfo
+{
+    // in
+    uint8_t *buf;
+    size_t bufsize;
+    // out
+    uint32_t image_width;
+    uint32_t image_height;
+    uint32_t image_color_fourcc;
+    int      image_pixel_format;
+    VAPictureParameterBufferJPEGBaseline picture_param_buf;
+    VASliceParameterBufferJPEGBaseline slice_param_buf[JPEG_MAX_COMPONENTS];
+    VAIQMatrixBufferJPEGBaseline qmatrix_buf;
+    VAHuffmanTableBufferJPEGBaseline hufman_table_buf;
+    uint32_t dht_byte_offset[4];
+    uint32_t dqt_byte_offset[4];
+    uint32_t huffman_tables_num;
+    uint32_t quant_tables_num;
+    uint32_t soi_offset;
+    uint32_t eoi_offset;
+    uint32_t scan_ctrl_count;
+};
+
+enum JpegDecodeStatus
+{
+    JD_SUCCESS,
+    JD_UNINITIALIZED,
+    JD_ALREADY_INITIALIZED,
+    JD_RENDER_TARGET_TYPE_UNSUPPORTED,
+    JD_INPUT_FORMAT_UNSUPPORTED,
+    JD_OUTPUT_FORMAT_UNSUPPORTED,
+    JD_INVALID_RENDER_TARGET,
+    JD_RENDER_TARGET_NOT_INITIALIZED,
+    JD_CODEC_UNSUPPORTED,
+    JD_INITIALIZATION_ERROR,
+    JD_RESOURCE_FAILURE,
+    JD_DECODE_FAILURE,
+    JD_BLIT_FAILURE,
+    JD_ERROR_BITSTREAM,
+    JD_RENDER_TARGET_BUSY,
+};
+
+
+inline char * fourcc2str(char * str, uint32_t fourcc)
+{
+    static char tmp[5];
+    if (str == NULL) {
+        str = tmp;
+        memset(str, 0, sizeof str);
+    }
+    str[0] = fourcc & 0xff;
+    str[1] = (fourcc >> 8 )& 0xff;
+    str[2] = (fourcc >> 16) & 0xff;
+    str[3] = (fourcc >> 24)& 0xff;
+    str[4] = '\0';
+    return str;
+}
+
+inline int fourcc2VaFormat(uint32_t fourcc)
+{
+    switch(fourcc) {
+    case VA_FOURCC_422H:
+    case VA_FOURCC_422V:
+    case VA_FOURCC_YUY2:
+        return VA_RT_FORMAT_YUV422;
+    case VA_FOURCC_IMC3:
+    case VA_FOURCC_YV12:
+    case VA_FOURCC_NV12:
+        return VA_RT_FORMAT_YUV420;
+    case VA_FOURCC_444P:
+        return VA_RT_FORMAT_YUV444;
+    case VA_FOURCC_411P:
+        return VA_RT_FORMAT_YUV411;
+    case VA_FOURCC_BGRA:
+    case VA_FOURCC_ARGB:
+    case VA_FOURCC_RGBA:
+        return VA_RT_FORMAT_RGB32;
+    default:
+        return -1;
+    }
+}
+
+inline uint32_t sampFactor2Fourcc(int h1, int h2, int h3, int v1, int v2, int v3)
+{
+    if (h1 == 2 && h2 == 1 && h3 == 1 &&
+            v1 == 2 && v2 == 1 && v3 == 1) {
+        return VA_FOURCC_IMC3;
+    }
+    else if (h1 == 2 && h2 == 1 && h3 == 1 &&
+            v1 == 1 && v2 == 1 && v3 == 1) {
+        return VA_FOURCC_422H;
+    }
+    else if (h1 == 1 && h2 == 1 && h3 == 1 &&
+            v1 == 1 && v2 == 1 && v3 == 1) {
+        return VA_FOURCC_444P;
+    }
+    else if (h1 == 4 && h2 == 1 && h3 == 1 &&
+            v1 == 1 && v2 == 1 && v3 == 1) {
+        return VA_FOURCC_411P;
+    }
+    else if (h1 == 1 && h2 == 1 && h3 == 1 &&
+            v1 == 2 && v2 == 1 && v3 == 1) {
+        return VA_FOURCC_422V;
+    }
+    else if (h1 == 2 && h2 == 1 && h3 == 1 &&
+            v1 == 2 && v2 == 2 && v3 == 2) {
+        return VA_FOURCC_422H;
+    }
+    else if (h2 == 2 && h2 == 2 && h3 == 2 &&
+            v1 == 2 && v2 == 1 && v3 == 1) {
+        return VA_FOURCC_422V;
+    }
+    else
+    {
+        return VA_FOURCC('4','0','0','P');
+    }
+}
+
+inline int fourcc2LumaBitsPerPixel(uint32_t fourcc)
+{
+    switch(fourcc) {
+    case VA_FOURCC_422H:
+    case VA_FOURCC_422V:
+    case VA_FOURCC_IMC3:
+    case VA_FOURCC_YV12:
+    case VA_FOURCC_NV12:
+    case VA_FOURCC_444P:
+    case VA_FOURCC_411P:
+        return 1;
+    case VA_FOURCC_YUY2:
+        return 2;
+    case VA_FOURCC_BGRA:
+    case VA_FOURCC_ARGB:
+    case VA_FOURCC_RGBA:
+        return 4;
+    default:
+        return 1;
+    }
+}
+
+extern int fourcc2PixelFormat(uint32_t fourcc);
+extern uint32_t pixelFormat2Fourcc(int pixel_format);
+
+#endif
diff --git a/imagedecoder/JPEGCommon_Gen.h b/imagedecoder/JPEGCommon_Gen.h
new file mode 100644
index 0000000..2cc90ae
--- /dev/null
+++ b/imagedecoder/JPEGCommon_Gen.h
@@ -0,0 +1,41 @@
+/* INTEL CONFIDENTIAL
+* Copyright (c) 2013 Intel Corporation.  All rights reserved.
+* Copyright (c) Imagination Technologies Limited, UK
+*
+* The source code contained or described herein and all documents
+* related to the source code ("Material") are owned by Intel
+* Corporation or its suppliers or licensors.  Title to the
+* Material remains with Intel Corporation or its suppliers and
+* licensors.  The Material contains trade secrets and proprietary
+* and confidential information of Intel or its suppliers and
+* licensors. The Material is protected by worldwide copyright and
+* trade secret laws and treaty provisions.  No part of the Material
+* may be used, copied, reproduced, modified, published, uploaded,
+* posted, transmitted, distributed, or disclosed in any way without
+* Intel's prior express written permission.
+*
+* No license under any patent, copyright, trade secret or other
+* intellectual property right is granted to or conferred upon you
+* by disclosure or delivery of the Materials, either expressly, by
+* implication, inducement, estoppel or otherwise. Any license
+* under such intellectual property rights must be express and
+* approved by Intel in writing.
+*
+* Authors:
+*    Yao Cheng <yao.cheng@intel.com>
+*
+*/
+
+#ifndef JPEGCOMMON_GEN_H
+#define JPEGCOMMON_GEN_H
+
+#include <ufo/graphics.h>
+#include <ufo/gralloc.h>
+#include "JPEGCommon.h"
+// temp workaround
+#define HAL_PIXEL_FORMAT_YCbCr_422_H_INTEL HAL_PIXEL_FORMAT_YCrCb_422_H_INTEL // 422H (YU16)
+#define HAL_PIXEL_FORMAT_IMC3 0x103 // IMC3
+#define HAL_PIXEL_FORMAT_444P 0x104 // 444P
+
+#endif
+
diff --git a/imagedecoder/JPEGCommon_Img.h b/imagedecoder/JPEGCommon_Img.h
new file mode 100644
index 0000000..3473d20
--- /dev/null
+++ b/imagedecoder/JPEGCommon_Img.h
@@ -0,0 +1,34 @@
+/* INTEL CONFIDENTIAL
+* Copyright (c) 2013 Intel Corporation.  All rights reserved.
+* Copyright (c) Imagination Technologies Limited, UK
+*
+* The source code contained or described herein and all documents
+* related to the source code ("Material") are owned by Intel
+* Corporation or its suppliers or licensors.  Title to the
+* Material remains with Intel Corporation or its suppliers and
+* licensors.  The Material contains trade secrets and proprietary
+* and confidential information of Intel or its suppliers and
+* licensors. The Material is protected by worldwide copyright and
+* trade secret laws and treaty provisions.  No part of the Material
+* may be used, copied, reproduced, modified, published, uploaded,
+* posted, transmitted, distributed, or disclosed in any way without
+* Intel's prior express written permission.
+*
+* No license under any patent, copyright, trade secret or other
+* intellectual property right is granted to or conferred upon you
+* by disclosure or delivery of the Materials, either expressly, by
+* implication, inducement, estoppel or otherwise. Any license
+* under such intellectual property rights must be express and
+* approved by Intel in writing.
+*
+* Authors:
+*    Yao Cheng <yao.cheng@intel.com>
+*
+*/
+
+#ifndef JPEGCOMMON_IMG_H
+#define JPEGCOMMON_IMG_H
+
+#include "JPEGCommon.h"
+
+#endif
diff --git a/imagedecoder/JPEGDecoder.c b/imagedecoder/JPEGDecoder.c
deleted file mode 100644
index 9dfe0a1..0000000
--- a/imagedecoder/JPEGDecoder.c
+++ /dev/null
@@ -1,1167 +0,0 @@
-/* INTEL CONFIDENTIAL
-* Copyright (c) 2012 Intel Corporation.  All rights reserved.
-* Copyright (c) Imagination Technologies Limited, UK
-*
-* The source code contained or described herein and all documents
-* related to the source code ("Material") are owned by Intel
-* Corporation or its suppliers or licensors.  Title to the
-* Material remains with Intel Corporation or its suppliers and
-* licensors.  The Material contains trade secrets and proprietary
-* and confidential information of Intel or its suppliers and
-* licensors. The Material is protected by worldwide copyright and
-* trade secret laws and treaty provisions.  No part of the Material
-* may be used, copied, reproduced, modified, published, uploaded,
-* posted, transmitted, distributed, or disclosed in any way without
-* Intel's prior express written permission.
-*
-* No license under any patent, copyright, trade secret or other
-* intellectual property right is granted to or conferred upon you
-* by disclosure or delivery of the Materials, either expressly, by
-* implication, inducement, estoppel or otherwise. Any license
-* under such intellectual property rights must be express and
-* approved by Intel in writing.
-*
-* Authors:
-*    Nana Guo <nana.n.guo@intel.com>
-*    Yao Cheng <yao.cheng@intel.com>
-*
-*/
-
-#include "va/va_tpi.h"
-#include "va/va_vpp.h"
-#include "va/va_drmcommon.h"
-#include "JPEGDecoder.h"
-#include "ImageDecoderTrace.h"
-#include "JPEGParser.h"
-#include <string.h>
-#include "jerror.h"
-
-#define JPEG_MAX_SETS_HUFFMAN_TABLES 2
-
-#define TABLE_CLASS_DC  0
-#define TABLE_CLASS_AC  1
-#define TABLE_CLASS_NUM 2
-
-// for config
-#define HW_DECODE_MIN_WIDTH  100 // for JPEG smaller than this, use SW decode
-#define HW_DECODE_MIN_HEIGHT 100 // for JPEG smaller than this, use SW decode
-
-// for debug
-#define DECODE_DUMP_FILE    "" // no dump by default
-#define YUY2_DUMP_FILE      "" // no dump by default
-#define RGBA_DUMP_FILE      "" // no dump by default
-
-#define JD_CHECK(err, label) \
-        if (err) { \
-            ETRACE("%s::%d: failed: %d", __PRETTY_FUNCTION__, __LINE__, err); \
-            goto label; \
-        }
-
-#define JD_CHECK_RET(err, label, retcode) \
-        if (err) { \
-            status = retcode; \
-            ETRACE("%s::%d: failed: %d", __PRETTY_FUNCTION__, __LINE__, err); \
-            goto label; \
-        }
-
-const char * fourcc2str(uint32_t fourcc)
-{
-    static char str[5];
-    memset(str, 0, sizeof str);
-    str[0] = fourcc & 0xff;
-    str[1] = (fourcc >> 8 )& 0xff;
-    str[2] = (fourcc >> 16) & 0xff;
-    str[3] = (fourcc >> 24)& 0xff;
-    str[4] = '\0';
-    return str;
-}
-
-// VPG supports only YUY2->RGBA, YUY2->NV12_TILED now
-// needs to convert IMC3/YV16/444P to YUY2 before HW CSC
-static void write_to_YUY2(uint8_t *pDst,
-                          uint32_t dst_stride,
-                          VAImage *pImg,
-                          uint8_t *pSrc)
-{
-    uint8_t *pY, *pU, *pV;
-    float h_samp_factor, v_samp_factor;
-    int row, col;
-    switch (pImg->format.fourcc) {
-    case VA_FOURCC_IMC3:
-        h_samp_factor = 0.5;
-        v_samp_factor = 0.5;
-        break;
-    case VA_FOURCC_422H:
-        h_samp_factor = 0.5;
-        v_samp_factor = 1;
-        break;
-    case VA_FOURCC_444P:
-        h_samp_factor = 1;
-        v_samp_factor = 1;
-        break;
-    default:
-        // non-supported
-        ETRACE("%s to YUY2: Not-supported input YUV format", fourcc2str(pImg->format.fourcc));
-        return;
-    }
-    pY = pSrc + pImg->offsets[0];
-    pU = pSrc + pImg->offsets[1];
-    pV = pSrc + pImg->offsets[2];
-    for (row = 0; row < pImg->height; ++row) {
-        for (col = 0; col < pImg->width; ++col) {
-            // Y
-            *(pDst + 2 * col) = *(pY + col);
-            uint32_t actual_col = h_samp_factor * col;
-            if (col % 2 == 1) {
-                // U
-                *(pDst + 2 * col + 1) = *(pU + actual_col);
-            }
-            else {
-                // V
-                *(pDst + 2 * col + 1) = *(pV + actual_col);
-            }
-        }
-        pDst += dst_stride;
-        pY += pImg->pitches[0];
-        uint32_t actual_row = row * v_samp_factor;
-        pU = pSrc + pImg->offsets[1] + actual_row * pImg->pitches[1];
-        pV = pSrc + pImg->offsets[2] + actual_row * pImg->pitches[2];
-    }
-}
-
-static void write_to_file(char *file, VAImage *pImg, uint8_t *pSrc)
-{
-    FILE *fp = fopen(file, "wb");
-    if (!fp) {
-        return;
-    }
-    uint8_t *pY, *pU, *pV;
-    float h_samp_factor, v_samp_factor;
-    int row, col;
-    ITRACE("Dumping decoded YUV to %s", file);
-    switch (pImg->format.fourcc) {
-    case VA_FOURCC_IMC3:
-        h_samp_factor = 0.5;
-        v_samp_factor = 0.5;
-        break;
-    case VA_FOURCC_422H:
-        h_samp_factor = 0.5;
-        v_samp_factor = 1;
-        break;
-    case VA_FOURCC_444P:
-        h_samp_factor = 1;
-        v_samp_factor = 1;
-        break;
-    default:
-        // non-supported
-        ETRACE("%s to YUY2: Not-supported input YUV format", fourcc2str(pImg->format.fourcc));
-        return;
-    }
-    pY = pSrc + pImg->offsets[0];
-    pU = pSrc + pImg->offsets[1];
-    pV = pSrc + pImg->offsets[2];
-    // Y
-    for (row = 0; row < pImg->height; ++row) {
-        fwrite(pY, 1, pImg->width, fp);
-        pY += pImg->pitches[0];
-    }
-    // U
-    for (row = 0; row < pImg->height * v_samp_factor; ++row) {
-        fwrite(pU, 1, pImg->width * h_samp_factor, fp);
-        pU += pImg->pitches[1];
-    }
-    // V
-    for (row = 0; row < pImg->height * v_samp_factor; ++row) {
-        fwrite(pV, 1, pImg->width * h_samp_factor, fp);
-        pV += pImg->pitches[2];
-    }
-    fclose(fp);
-}
-
-/*
- * Initialize VA API related stuff
- *
- * We will check the return value of  jva_initialize
- * to determine which path will be use (SW or HW)
- *
- */
-Decode_Status jdva_initialize (jd_libva_struct * jd_libva_ptr) {
-  /*
-   * Please note that we won't check the input parameters to follow the
-   * convention of libjpeg duo to we need these parameters to do error handling,
-   * and if these parameters are invalid, means the whole stack is crashed, so check
-   * them here and return false is meaningless, same situation for all internal methods
-   * related to VA API
-  */
-    uint32_t va_major_version = 0;
-    uint32_t va_minor_version = 0;
-    VAStatus va_status = VA_STATUS_SUCCESS;
-    Decode_Status status = DECODE_SUCCESS;
-    uint32_t index;
-
-    if (jd_libva_ptr->initialized)
-        return DECODE_NOT_STARTED;
-
-    jd_libva_ptr->android_display = (Display*)malloc(sizeof(Display));
-    if (jd_libva_ptr->android_display == NULL) {
-        return DECODE_MEMORY_FAIL;
-    }
-    jd_libva_ptr->va_display = vaGetDisplay (jd_libva_ptr->android_display);
-
-    if (jd_libva_ptr->va_display == NULL) {
-        ETRACE("vaGetDisplay failed.");
-        free (jd_libva_ptr->android_display);
-        return DECODE_DRIVER_FAIL;
-    }
-    va_status = vaInitialize(jd_libva_ptr->va_display, &va_major_version, &va_minor_version);
-    if (va_status != VA_STATUS_SUCCESS) {
-        ETRACE("vaInitialize failed. va_status = 0x%x", va_status);
-        status = DECODE_DRIVER_FAIL;
-        goto cleanup;
-    }
-
-    /*if ((VA_RT_FORMAT_YUV444 & attrib.value) == 0) {
-        WTRACE("Format not surportted\n");
-        status = DECODE_FAIL;
-        goto cleanup;
-    }*/
-
-    jd_libva_ptr->initialized = TRUE;
-    status = DECODE_SUCCESS;
-
-cleanup:
-#if 0
-    /*free profiles and entrypoints*/
-    if (va_profiles)
-        free(va_profiles);
-
-    if (va_entrypoints)
-        free (va_entrypoints);
-#endif
-    if (status) {
-        jd_libva_ptr->initialized = TRUE; // make sure we can call into jva_deinitialize()
-        jdva_deinitialize (jd_libva_ptr);
-        return status;
-    }
-
-  return status;
-}
-
-void jdva_deinitialize (jd_libva_struct * jd_libva_ptr) {
-    if (!(jd_libva_ptr->initialized)) {
-        return;
-    }
-
-    if (jd_libva_ptr->JPEGParser) {
-        free(jd_libva_ptr->JPEGParser);
-        jd_libva_ptr->JPEGParser = NULL;
-    }
-
-    if (jd_libva_ptr->va_display) {
-        vaTerminate(jd_libva_ptr->va_display);
-        jd_libva_ptr->va_display = NULL;
-    }
-
-    if (jd_libva_ptr->android_display) {
-        free(jd_libva_ptr->android_display);
-        jd_libva_ptr->android_display = NULL;
-    }
-
-    jd_libva_ptr->initialized = FALSE;
-    ITRACE("jdva_deinitialize finished");
-    return;
-}
-
-static Decode_Status doColorConversion(jd_libva_struct *jd_libva_ptr, VASurfaceID surface, char ** buf, uint32_t rows)
-{
-#ifdef JPEGDEC_USES_GEN
-    VAImage decoded_img;
-    uint8_t *decoded_buf = NULL;
-    VAImage yuy2_img;
-    uint8_t *yuy2_buf = NULL;
-    VAImage rgba_img;
-    uint8_t *rgba_buf = NULL;
-    int row, col;
-    VAStatus vpp_status;
-    uint8_t *pSrc, *pDst;
-    VADisplay display = NULL;
-    VAContextID context = VA_INVALID_ID;
-    VAConfigID config = VA_INVALID_ID;
-    VAConfigAttrib  vpp_attrib;
-    VAProcPipelineParameterBuffer vpp_param;
-    VABufferID vpp_pipeline_buf = VA_INVALID_ID;
-    int major_version, minor_version;
-    VAProcPipelineCaps vpp_pipeline_cap ;
-    VARectangle src_rect, dst_rect;
-    int err;
-    Display vppdpy;
-    FILE *fp;
-    VASurfaceAttrib in_fourcc, out_fourcc;
-    VASurfaceID in_surf, out_surf;
-    Decode_Status status = DECODE_SUCCESS;
-    VASurfaceAttribExternalBuffers vaSurfaceExternBufIn, vaSurfaceExternBufOut;
-    decoded_img.image_id = VA_INVALID_ID;
-    yuy2_img.image_id = VA_INVALID_ID;
-    rgba_img.image_id = VA_INVALID_ID;
-    display = jd_libva_ptr->va_display;
-
-    vpp_status = vaDeriveImage(display, surface, &decoded_img);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-
-    vpp_status = vaMapBuffer(display, decoded_img.buf, (void **)&decoded_buf);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-
-    write_to_file(DECODE_DUMP_FILE, &decoded_img, decoded_buf);
-
-    ITRACE("Start HW CSC: color %s=>RGBA8888", fourcc2str(jd_libva_ptr->fourcc));
-
-    vpp_attrib.type  = VAConfigAttribRTFormat;
-    vpp_attrib.value = VA_RT_FORMAT_YUV420;
-    vpp_status = vaCreateConfig(display,
-                                VAProfileNone,
-                                VAEntrypointVideoProc,
-                                &vpp_attrib,
-                                1,
-                                &config);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-
-    vpp_status = vaCreateContext(display,
-                                 config,
-                                 jd_libva_ptr->image_width,
-                                 jd_libva_ptr->image_height,
-                                 0,
-                                 NULL,
-                                 0,
-                                 &context);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-
-    in_surf = out_surf = VA_INVALID_ID;
-    in_fourcc.type = VASurfaceAttribPixelFormat;
-    in_fourcc.flags = VA_SURFACE_ATTRIB_SETTABLE;
-    in_fourcc.value.type = VAGenericValueTypeInteger;
-    in_fourcc.value.value.i = VA_FOURCC_YUY2;
-    vpp_status = vaCreateSurfaces(display,
-                                    VA_RT_FORMAT_YUV422,
-                                    jd_libva_ptr->image_width,
-                                    jd_libva_ptr->image_height,
-                                    &in_surf,
-                                    1,
-                                    &in_fourcc,
-                                    1);
-    vpp_status = vaDeriveImage(display, in_surf, &yuy2_img);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-
-    vpp_status = vaMapBuffer(display, yuy2_img.buf, (void **)&yuy2_buf);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-
-    write_to_YUY2(yuy2_buf, yuy2_img.pitches[0], &decoded_img, decoded_buf);
-    fp = fopen(YUY2_DUMP_FILE, "wb");
-    if (fp) {
-        ITRACE("DUMP YUY2 to " YUY2_DUMP_FILE);
-        unsigned char *pYUV = yuy2_buf;
-        uint32_t loop;
-		for(loop=0;loop<jd_libva_ptr->image_height;loop++)
-		{
-            fwrite(pYUV, 2, jd_libva_ptr->image_width, fp);
-            pYUV += yuy2_img.pitches[0];
-		}
-		fclose(fp);
-    }
-    vaUnmapBuffer(display, yuy2_img.buf);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-    yuy2_buf = NULL;
-    vaDestroyImage(display, yuy2_img.image_id);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-    yuy2_img.image_id = VA_INVALID_ID;
-    vaUnmapBuffer(display, decoded_img.buf);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-    decoded_buf = NULL;
-    vaDestroyImage(display, decoded_img.image_id);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-    decoded_img.image_id = VA_INVALID_ID;
-
-    out_fourcc.type = VASurfaceAttribPixelFormat;
-    out_fourcc.flags = VA_SURFACE_ATTRIB_SETTABLE;
-    out_fourcc.value.type = VAGenericValueTypeInteger;
-    out_fourcc.value.value.i = VA_FOURCC_RGBA;
-    vpp_status = vaCreateSurfaces(display,
-                                    VA_RT_FORMAT_RGB32,
-                                    jd_libva_ptr->image_width,
-                                    jd_libva_ptr->image_height,
-                                    &out_surf,
-                                    1,
-                                    &out_fourcc,
-                                    1);
-
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-
-    ITRACE("vaCreateSurfaces got surface %u=>%u", in_surf, out_surf);
-    //query caps for pipeline
-    vpp_status = vaQueryVideoProcPipelineCaps(display,
-                                              context,
-                                              NULL,
-                                              0,
-                                              &vpp_pipeline_cap);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-
-    src_rect.x = dst_rect.x           = 0;
-    src_rect.y = dst_rect.y           = 0;
-    src_rect.width  = dst_rect.width  = jd_libva_ptr->image_width;
-    src_rect.height = dst_rect.height = jd_libva_ptr->image_height;
-    ITRACE("from (%d, %d, %u, %u) to (%d, %d, %u, %u)",
-        src_rect.x, src_rect.y, src_rect.width, src_rect.height,
-        dst_rect.x, dst_rect.y, dst_rect.width, dst_rect.height);
-    vpp_param.surface                 = in_surf;
-    vpp_param.output_region           = &dst_rect;
-    vpp_param.surface_region          = &src_rect;
-    vpp_param.surface_color_standard  = VAProcColorStandardBT601;   //csc
-    vpp_param.output_background_color = 0x8000;                     //colorfill
-    vpp_param.output_color_standard   = VAProcColorStandardNone;
-    vpp_param.filter_flags            = VA_FRAME_PICTURE;
-    vpp_param.filters                 = NULL;
-    vpp_param.num_filters             = 0;
-    vpp_param.forward_references      = 0;
-    vpp_param.num_forward_references  = 0;
-    vpp_param.backward_references     = 0;
-    vpp_param.num_backward_references = 0;
-    vpp_param.blend_state             = NULL;
-    vpp_param.rotation_state          = VA_ROTATION_NONE;
-    vpp_status = vaCreateBuffer(display,
-                                context,
-                                VAProcPipelineParameterBufferType,
-                                sizeof(VAProcPipelineParameterBuffer),
-                                1,
-                                &vpp_param,
-                                &vpp_pipeline_buf);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-
-    vpp_status = vaBeginPicture(display,
-                                context,
-                                out_surf);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-
-    //Render the picture
-    vpp_status = vaRenderPicture(display,
-                                 context,
-                                 &vpp_pipeline_buf,
-                                 1);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-
-    vpp_status = vaEndPicture(display, context);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-
-    vpp_status = vaSyncSurface(display, out_surf);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-    ITRACE("Finished HW CSC YUY2=>RGBA8888");
-
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-    ITRACE("Copy RGBA8888 buffer (%ux%u) to skia buffer (%ux%u)",
-           jd_libva_ptr->image_width,
-           jd_libva_ptr->image_height,
-           buf[1] - buf[0],
-           rows);
-
-    vpp_status = vaDeriveImage(display, out_surf, &rgba_img);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-    vpp_status = vaMapBuffer(display, rgba_img.buf, (void **)&rgba_buf);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-    fp = fopen(RGBA_DUMP_FILE, "wb");
-    if (fp)
-        ITRACE("DUMP RGBA to " RGBA_DUMP_FILE);
-    unsigned char *prgba = rgba_buf;
-    uint32_t loop;
-	for(loop=0;loop<jd_libva_ptr->image_height && loop < rows;loop++)
-	{
-        memcpy(buf[loop], prgba, 4 * jd_libva_ptr->image_width);
-        if (fp)
-            fwrite(prgba, 4, jd_libva_ptr->image_width, fp);
-        prgba += rgba_img.pitches[0];
-	}
-    if (fp)
-		fclose(fp);
-    vaUnmapBuffer(display, rgba_img.buf);
-    JD_CHECK_RET(vpp_status, cleanup, DECODE_DRIVER_FAIL);
-    rgba_buf = NULL;
-    vaDestroyImage(display, rgba_img.image_id);
-    rgba_img.image_id = VA_INVALID_ID;
-
-cleanup:
-    if (vpp_pipeline_buf != VA_INVALID_ID)
-        vaDestroyBuffer(display, vpp_pipeline_buf);
-    if (in_surf != VA_INVALID_ID)
-        vaDestroySurfaces(display, &in_surf, 1);
-    if (out_surf != VA_INVALID_ID)
-        vaDestroySurfaces(display, &out_surf, 1);
-    if (rgba_buf)
-        vaUnmapBuffer(display, rgba_img.buf);
-    if (rgba_img.image_id != VA_INVALID_ID)
-        vaDestroyImage(display, rgba_img.image_id);
-    if (yuy2_buf)
-        vaUnmapBuffer(display, yuy2_img.buf);
-    if (yuy2_img.image_id != VA_INVALID_ID)
-        vaDestroyImage(display, yuy2_img.image_id);
-    if (decoded_buf)
-        vaUnmapBuffer(display, decoded_img.buf);
-    if (decoded_img.image_id != VA_INVALID_ID)
-        vaDestroyImage(display, decoded_img.image_id);
-    if (context != VA_INVALID_ID)
-        vaDestroyContext(display, context);
-    if (config != VA_INVALID_ID)
-        vaDestroyConfig(display, config);
-    return status;
-#else
-    return DECODE_SUCCESS;
-#endif
-}
-
-static unsigned int getSurfaceFormat(jd_libva_struct * jd_libva_ptr, VASurfaceAttrib * fourcc) {
-    int h1, h2, h3, v1, v2, v3;
-    h1 = jd_libva_ptr->picture_param_buf.components[0].h_sampling_factor;
-    h2 = jd_libva_ptr->picture_param_buf.components[1].h_sampling_factor;
-    h3 = jd_libva_ptr->picture_param_buf.components[2].h_sampling_factor;
-    v1 = jd_libva_ptr->picture_param_buf.components[0].v_sampling_factor;
-    v2 = jd_libva_ptr->picture_param_buf.components[1].v_sampling_factor;
-    v3 = jd_libva_ptr->picture_param_buf.components[2].v_sampling_factor;
-
-    fourcc->type = VASurfaceAttribPixelFormat;
-    fourcc->flags = VA_SURFACE_ATTRIB_SETTABLE;
-    fourcc->value.type = VAGenericValueTypeInteger;
-
-    if (h1 == 2 && h2 == 1 && h3 == 1 &&
-            v1 == 2 && v2 == 1 && v3 == 1) {
-        fourcc->value.value.i = VA_FOURCC_IMC3;
-        return VA_RT_FORMAT_YUV420;
-    }
-    else if (h1 == 2 && h2 == 1 && h3 == 1 &&
-            v1 == 1 && v2 == 1 && v3 == 1) {
-        fourcc->value.value.i = VA_FOURCC_422H;
-        return VA_RT_FORMAT_YUV422;
-    }
-    else if (h1 == 1 && h2 == 1 && h3 == 1 &&
-            v1 == 1 && v2 == 1 && v3 == 1) {
-        fourcc->value.value.i = VA_FOURCC_444P;
-        return VA_RT_FORMAT_YUV444;
-    }
-    else if (h1 == 4 && h2 == 1 && h3 == 1 &&
-            v1 == 1 && v2 == 1 && v3 == 1) {
-        fourcc->value.value.i = VA_FOURCC_411P;
-        ITRACE("SurfaceFormat: 411P");
-        return VA_RT_FORMAT_YUV411;
-    }
-    else if (h1 == 1 && h2 == 1 && h3 == 1 &&
-            v1 == 2 && v2 == 1 && v3 == 1) {
-        fourcc->value.value.i = VA_FOURCC_422V;
-        return VA_RT_FORMAT_YUV422;
-    }
-    else if (h1 == 2 && h2 == 1 && h3 == 1 &&
-            v1 == 2 && v2 == 2 && v3 == 2) {
-        fourcc->value.value.i = VA_FOURCC_422H;
-        return VA_RT_FORMAT_YUV422;
-    }
-    else if (h2 == 2 && h2 == 2 && h3 == 2 &&
-            v1 == 2 && v2 == 1 && v3 == 1) {
-        fourcc->value.value.i = VA_FOURCC_422V;
-        return VA_RT_FORMAT_YUV422;
-    }
-    else
-    {
-        fourcc->value.value.i = VA_FOURCC('4','0','0','P');
-        return VA_RT_FORMAT_YUV400;
-    }
-
-}
-
-Decode_Status jdva_create_resource (jd_libva_struct * jd_libva_ptr) {
-    VAStatus va_status = VA_STATUS_SUCCESS;
-    Decode_Status status = DECODE_SUCCESS;
-    jd_libva_ptr->image_width = jd_libva_ptr->picture_param_buf.picture_width;
-    jd_libva_ptr->image_height = jd_libva_ptr->picture_param_buf.picture_height;
-    jd_libva_ptr->surface_count = 1;
-    jd_libva_ptr->va_surfaces = (VASurfaceID *) malloc(sizeof(VASurfaceID)*jd_libva_ptr->surface_count);
-    if (jd_libva_ptr->va_surfaces == NULL) {
-        return DECODE_MEMORY_FAIL;
-    }
-
-
-    jd_libva_ptr->resource_allocated = TRUE;
-    return status;
-cleanup:
-    jd_libva_ptr->resource_allocated = FALSE;
-
-    if (jd_libva_ptr->va_surfaces) {
-        free (jd_libva_ptr->va_surfaces);
-        jd_libva_ptr->va_surfaces = NULL;
-    }
-    jdva_deinitialize (jd_libva_ptr);
-
-    return DECODE_DRIVER_FAIL;
-}
-
-Decode_Status jdva_release_resource (jd_libva_struct * jd_libva_ptr) {
-    Decode_Status status = DECODE_SUCCESS;
-    VAStatus va_status = VA_STATUS_SUCCESS;
-
-    if (!(jd_libva_ptr->resource_allocated)) {
-        return status;
-    }
-
-    if (!(jd_libva_ptr->va_display)) {
-        return status; //most likely the resource are already released and HW jpeg is deinitialize, return directly
-    }
-
-  /*
-   * It is safe to destroy Surface/Config/Context severl times
-   * and it is also safe even their value is NULL
-   */
-
-cleanup:
-    jd_libva_ptr->va_config = VA_INVALID_ID;
-
-    jd_libva_ptr->resource_allocated = FALSE;
-
-    return va_status;
-}
-
-Decode_Status jdva_decode (j_decompress_ptr cinfo, jd_libva_struct * jd_libva_ptr) {
-    Decode_Status status = DECODE_SUCCESS;
-    VAStatus va_status = VA_STATUS_SUCCESS;
-    VABufferID desc_buf[5];
-    uint32_t bitstream_buffer_size = 0;
-    uint32_t scan_idx = 0;
-    uint32_t buf_idx = 0;
-    char **buf = jd_libva_ptr->output_image;
-    uint32_t lines = jd_libva_ptr->output_lines;
-    uint32_t chopping = VA_SLICE_DATA_FLAG_ALL;
-    uint32_t bytes_remaining;
-    VAConfigAttrib attrib;
-    attrib.type = VAConfigAttribRTFormat;
-    va_status = vaGetConfigAttributes(jd_libva_ptr->va_display, VAProfileJPEGBaseline, VAEntrypointVLD, &attrib, 1);
-    if (va_status != VA_STATUS_SUCCESS) {
-        ETRACE("vaGetConfigAttributes failed. va_status = 0x%x", va_status);
-        status = DECODE_DRIVER_FAIL;
-        goto cleanup;
-    }
-    va_status = vaCreateConfig(jd_libva_ptr->va_display, VAProfileJPEGBaseline, VAEntrypointVLD, &attrib, 1, &(jd_libva_ptr->va_config));
-    if (va_status != VA_STATUS_SUCCESS) {
-        ETRACE("vaCreateConfig failed. va_status = 0x%x", va_status);
-        status = DECODE_DRIVER_FAIL;
-        goto cleanup;
-    }
-    VASurfaceAttrib fourcc;
-    unsigned int surface_format = getSurfaceFormat(jd_libva_ptr, &fourcc);
-    jd_libva_ptr->fourcc = fourcc.value.value.i;
-#ifdef JPEGDEC_USES_GEN
-    va_status = vaCreateSurfaces(jd_libva_ptr->va_display, surface_format,
-                                    jd_libva_ptr->image_width,
-                                    jd_libva_ptr->image_height,
-                                    jd_libva_ptr->va_surfaces,
-                                    jd_libva_ptr->surface_count, &fourcc, 1);
-#else
-    va_status = vaCreateSurfaces(jd_libva_ptr->va_display, VA_RT_FORMAT_YUV444,
-                                    jd_libva_ptr->image_width,
-                                    jd_libva_ptr->image_height,
-                                    jd_libva_ptr->va_surfaces,
-                                    jd_libva_ptr->surface_count, NULL, 0);
-#endif
-    JD_CHECK(va_status, cleanup);
-    va_status = vaCreateContext(jd_libva_ptr->va_display, jd_libva_ptr->va_config,
-                                   jd_libva_ptr->image_width,
-                                   jd_libva_ptr->image_height,
-                                   0,  //VA_PROGRESSIVE
-                                   jd_libva_ptr->va_surfaces,
-                                   jd_libva_ptr->surface_count, &(jd_libva_ptr->va_context));
-    if (va_status != VA_STATUS_SUCCESS) {
-        ETRACE("vaCreateContext failed. va_status = 0x%x", va_status);
-        return DECODE_DRIVER_FAIL;
-    }
-
-    if (jd_libva_ptr->eoi_offset)
-        bytes_remaining = jd_libva_ptr->eoi_offset - jd_libva_ptr->soi_offset;
-    else
-        bytes_remaining = jd_libva_ptr->file_size - jd_libva_ptr->soi_offset;
-    uint32_t src_offset = jd_libva_ptr->soi_offset;
-    uint32_t cpy_row;
-    bitstream_buffer_size = cinfo->src->bytes_in_buffer;//1024*1024*5;
-
-    va_status = vaBeginPicture(jd_libva_ptr->va_display, jd_libva_ptr->va_context, jd_libva_ptr->va_surfaces[0]);
-    if (va_status != VA_STATUS_SUCCESS) {
-        ETRACE("vaBeginPicture failed. va_status = 0x%x", va_status);
-        status = DECODE_DRIVER_FAIL;
-        return status;
-    }
-    va_status = vaCreateBuffer(jd_libva_ptr->va_display, jd_libva_ptr->va_context, VAPictureParameterBufferType, sizeof(VAPictureParameterBufferJPEGBaseline), 1, &jd_libva_ptr->picture_param_buf, &desc_buf[buf_idx]);
-    if (va_status != VA_STATUS_SUCCESS) {
-        ETRACE("vaCreateBuffer VAPictureParameterBufferType failed. va_status = 0x%x", va_status);
-        status = DECODE_DRIVER_FAIL;
-        return status;
-    }
-    buf_idx++;
-    va_status = vaCreateBuffer(jd_libva_ptr->va_display, jd_libva_ptr->va_context, VAIQMatrixBufferType, sizeof(VAIQMatrixBufferJPEGBaseline), 1, &jd_libva_ptr->qmatrix_buf, &desc_buf[buf_idx]);
-
-    if (va_status != VA_STATUS_SUCCESS) {
-        ETRACE("vaCreateBuffer VAIQMatrixBufferType failed. va_status = 0x%x", va_status);
-        status = DECODE_DRIVER_FAIL;
-        return status;
-    }
-    buf_idx++;
-    va_status = vaCreateBuffer(jd_libva_ptr->va_display, jd_libva_ptr->va_context, VAHuffmanTableBufferType, sizeof(VAHuffmanTableBufferJPEGBaseline), 1, &jd_libva_ptr->hufman_table_buf, &desc_buf[buf_idx]);
-    if (va_status != VA_STATUS_SUCCESS) {
-        ETRACE("vaCreateBuffer VAHuffmanTableBufferType failed. va_status = 0x%x", va_status);
-        status = DECODE_DRIVER_FAIL;
-        return status;
-    }
-    buf_idx++;
-    do {
-        /* Get Bitstream Buffer */
-        uint32_t bytes = ( bytes_remaining < bitstream_buffer_size ) ? bytes_remaining : bitstream_buffer_size;
-        bytes_remaining -= bytes;
-        /* Get Slice Control Buffer */
-        VASliceParameterBufferJPEGBaseline dest_scan_ctrl[JPEG_MAX_COMPONENTS];
-        uint32_t src_idx = 0;
-        uint32_t dest_idx = 0;
-        memset(dest_scan_ctrl, 0, sizeof(dest_scan_ctrl));
-        for (src_idx = scan_idx; src_idx < jd_libva_ptr->scan_ctrl_count ; src_idx++) {
-            if (jd_libva_ptr->slice_param_buf[ src_idx ].slice_data_offset) {
-                /* new scan, reset state machine */
-                chopping = VA_SLICE_DATA_FLAG_ALL;
-                fprintf(stderr,"Scan:%i FileOffset:%x Bytes:%x \n", src_idx,
-                    jd_libva_ptr->slice_param_buf[ src_idx ].slice_data_offset,
-                    jd_libva_ptr->slice_param_buf[ src_idx ].slice_data_size );
-                /* does the slice end in the buffer */
-                if (jd_libva_ptr->slice_param_buf[ src_idx ].slice_data_offset + jd_libva_ptr->slice_param_buf[ src_idx ].slice_data_size > bytes + src_offset) {
-                    chopping = VA_SLICE_DATA_FLAG_BEGIN;
-                }
-            } else {
-                if (jd_libva_ptr->slice_param_buf[ src_idx ].slice_data_size > bytes) {
-                    chopping = VA_SLICE_DATA_FLAG_MIDDLE;
-                } else {
-                    if ((chopping == VA_SLICE_DATA_FLAG_BEGIN) || (chopping == VA_SLICE_DATA_FLAG_MIDDLE)) {
-                        chopping = VA_SLICE_DATA_FLAG_END;
-                    }
-                }
-            }
-            dest_scan_ctrl[dest_idx].slice_data_flag = chopping;
-            dest_scan_ctrl[dest_idx].slice_data_offset = ((chopping == VA_SLICE_DATA_FLAG_ALL) ||      (chopping == VA_SLICE_DATA_FLAG_BEGIN) )?
-jd_libva_ptr->slice_param_buf[ src_idx ].slice_data_offset : 0;
-
-            const int32_t bytes_in_seg = bytes - dest_scan_ctrl[dest_idx].slice_data_offset;
-            const uint32_t scan_data = (bytes_in_seg < jd_libva_ptr->slice_param_buf[src_idx].slice_data_size) ? bytes_in_seg : jd_libva_ptr->slice_param_buf[src_idx].slice_data_size ;
-            jd_libva_ptr->slice_param_buf[src_idx].slice_data_offset = 0;
-            jd_libva_ptr->slice_param_buf[src_idx].slice_data_size -= scan_data;
-            dest_scan_ctrl[dest_idx].slice_data_size = scan_data;
-            dest_scan_ctrl[dest_idx].num_components = jd_libva_ptr->slice_param_buf[src_idx].num_components;
-            dest_scan_ctrl[dest_idx].restart_interval = jd_libva_ptr->slice_param_buf[src_idx].restart_interval;
-            memcpy(&dest_scan_ctrl[dest_idx].components, & jd_libva_ptr->slice_param_buf[ src_idx ].components,
-                sizeof(jd_libva_ptr->slice_param_buf[ src_idx ].components) );
-            dest_idx++;
-            if ((chopping == VA_SLICE_DATA_FLAG_ALL) || (chopping == VA_SLICE_DATA_FLAG_END)) { /* all good good */
-            } else {
-                break;
-            }
-        }
-        scan_idx = src_idx;
-        /* Get Slice Control Buffer */
-        va_status = vaCreateBuffer(jd_libva_ptr->va_display, jd_libva_ptr->va_context, VASliceParameterBufferType, sizeof(VASliceParameterBufferJPEGBaseline) * dest_idx, 1, dest_scan_ctrl, &desc_buf[buf_idx]);
-        if (va_status != VA_STATUS_SUCCESS) {
-            ETRACE("vaCreateBuffer VASliceParameterBufferType failed. va_status = 0x%x", va_status);
-            status = DECODE_DRIVER_FAIL;
-            return status;
-        }
-        buf_idx++;
-        va_status = vaCreateBuffer(jd_libva_ptr->va_display, jd_libva_ptr->va_context, VASliceDataBufferType, bytes, 1, &jd_libva_ptr->bitstream_buf[ src_offset ], &desc_buf[buf_idx]);
-        buf_idx++;
-        if (va_status != VA_STATUS_SUCCESS) {
-            status = DECODE_DRIVER_FAIL;
-            return status;
-        }
-        va_status = vaRenderPicture( jd_libva_ptr->va_display, jd_libva_ptr->va_context, desc_buf, buf_idx);
-        if (va_status != VA_STATUS_SUCCESS) {
-            ETRACE("vaRenderPicture failed. va_status = 0x%x", va_status);
-            status = DECODE_DRIVER_FAIL;
-            return status;
-        }
-        buf_idx = 0;
-
-        src_offset += bytes;
-    } while (bytes_remaining);
-
-    va_status = vaEndPicture(jd_libva_ptr->va_display, jd_libva_ptr->va_context);
-    if (va_status != VA_STATUS_SUCCESS) {
-        ETRACE("vaRenderPicture failed. va_status = 0x%x", va_status);
-        status = DECODE_DRIVER_FAIL;
-        return status;
-    }
-
-    va_status = vaSyncSurface(jd_libva_ptr->va_display, jd_libva_ptr->va_surfaces[0]);
-    if (va_status != VA_STATUS_SUCCESS) {
-        WTRACE("vaSyncSurface failed. va_status = 0x%x", va_status);
-    }
-
-    va_status = vaDestroyContext(jd_libva_ptr->va_display, jd_libva_ptr->va_context);
-    if (va_status != VA_STATUS_SUCCESS) {
-      ETRACE("vaDestroyContext failed. va_status = 0x%x", va_status);
-      return DECODE_DRIVER_FAIL;
-    }
-    jd_libva_ptr->va_context = VA_INVALID_ID;
-
-
-
-    va_status = vaDestroyConfig(jd_libva_ptr->va_display, jd_libva_ptr->va_config);
-    if (va_status != VA_STATUS_SUCCESS) {
-        ETRACE("vaDestroyConfig failed. va_status = 0x%x", va_status);
-        return DECODE_DRIVER_FAIL;
-    }
-    status = doColorConversion(jd_libva_ptr,
-                               jd_libva_ptr->va_surfaces[0],
-                               buf, lines);
-    va_status = vaDestroySurfaces(jd_libva_ptr->va_display, jd_libva_ptr->va_surfaces, jd_libva_ptr->surface_count);
-    ITRACE("Successfully decoded picture");
-
-    if (jd_libva_ptr->va_surfaces) {
-        free (jd_libva_ptr->va_surfaces);
-        jd_libva_ptr->va_surfaces = NULL;
-    }
-
-
-    return status;
-cleanup:
-    return DECODE_DRIVER_FAIL;
-}
-
-Decode_Status parseBitstream(j_decompress_ptr cinfo, jd_libva_struct * jd_libva_ptr) {
-    uint32_t component_order = 0 ;
-    uint32_t dqt_ind = 0;
-    uint32_t dht_ind = 0;
-    uint32_t scan_ind = 0;
-    boolean frame_marker_found = FALSE;
-    int i;
-
-    uint8_t marker = jd_libva_ptr->JPEGParser->getNextMarker(jd_libva_ptr->JPEGParser);
-
-    while (marker != CODE_EOI &&( !jd_libva_ptr->JPEGParser->endOfBuffer(jd_libva_ptr->JPEGParser))) {
-        switch (marker) {
-            case CODE_SOI: {
-                 jd_libva_ptr->soi_offset = jd_libva_ptr->JPEGParser->getByteOffset(jd_libva_ptr->JPEGParser) - 2;
-                break;
-            }
-            // If the marker is an APP marker skip over the data
-            case CODE_APP0:
-            case CODE_APP1:
-            case CODE_APP2:
-            case CODE_APP3:
-            case CODE_APP4:
-            case CODE_APP5:
-            case CODE_APP6:
-            case CODE_APP7:
-            case CODE_APP8:
-            case CODE_APP9:
-            case CODE_APP10:
-            case CODE_APP11:
-            case CODE_APP12:
-            case CODE_APP13:
-            case CODE_APP14:
-            case CODE_APP15: {
-
-                uint32_t bytes_to_burn = jd_libva_ptr->JPEGParser->readBytes(jd_libva_ptr->JPEGParser, 2) - 2;
-                jd_libva_ptr->JPEGParser->burnBytes(jd_libva_ptr->JPEGParser, bytes_to_burn);
-                    break;
-            }
-            // Store offset to DQT data to avoid parsing bitstream in user mode
-            case CODE_DQT: {
-                if (dqt_ind < 4) {
-                    jd_libva_ptr->dqt_byte_offset[dqt_ind] = jd_libva_ptr->JPEGParser->getByteOffset(jd_libva_ptr->JPEGParser) - jd_libva_ptr->soi_offset;
-                    dqt_ind++;
-                    uint32_t bytes_to_burn = jd_libva_ptr->JPEGParser->readBytes( jd_libva_ptr->JPEGParser, 2 ) - 2;
-                    jd_libva_ptr->JPEGParser->burnBytes( jd_libva_ptr->JPEGParser, bytes_to_burn );
-                } else {
-                    ETRACE("ERROR: Decoder does not support more than 4 Quant Tables\n");
-                    return DECODE_PARSER_FAIL;
-                }
-                break;
-            }
-            // Throw exception for all SOF marker other than SOF0
-            case CODE_SOF1:
-            case CODE_SOF2:
-            case CODE_SOF3:
-            case CODE_SOF5:
-            case CODE_SOF6:
-            case CODE_SOF7:
-            case CODE_SOF8:
-            case CODE_SOF9:
-            case CODE_SOF10:
-            case CODE_SOF11:
-            case CODE_SOF13:
-            case CODE_SOF14:
-            case CODE_SOF15: {
-                ETRACE("ERROR: unsupport SOF\n");
-                break;
-            }
-            // Parse component information in SOF marker
-            case CODE_SOF_BASELINE: {
-                frame_marker_found = TRUE;
-
-                jd_libva_ptr->JPEGParser->burnBytes(jd_libva_ptr->JPEGParser, 2); // Throw away frame header length
-                uint8_t sample_precision = jd_libva_ptr->JPEGParser->readNextByte(jd_libva_ptr->JPEGParser);
-                if (sample_precision != 8) {
-                    ETRACE("sample_precision is not supported\n");
-                    return DECODE_PARSER_FAIL;
-                }
-                // Extract pic width and height
-                jd_libva_ptr->picture_param_buf.picture_height = jd_libva_ptr->JPEGParser->readBytes(jd_libva_ptr->JPEGParser, 2);
-                jd_libva_ptr->picture_param_buf.picture_width = jd_libva_ptr->JPEGParser->readBytes(jd_libva_ptr->JPEGParser, 2);
-                jd_libva_ptr->picture_param_buf.num_components = jd_libva_ptr->JPEGParser->readNextByte(jd_libva_ptr->JPEGParser);
-
-                if (jd_libva_ptr->picture_param_buf.num_components > JPEG_MAX_COMPONENTS) {
-                    ETRACE("ERROR: reached max components\n");
-                    return DECODE_PARSER_FAIL;
-                }
-                if (jd_libva_ptr->picture_param_buf.picture_height < HW_DECODE_MIN_HEIGHT
-                    || jd_libva_ptr->picture_param_buf.picture_width < HW_DECODE_MIN_WIDTH) {
-                    ITRACE("PERFORMANCE: %ux%u JPEG will decode faster with SW\n",
-                        jd_libva_ptr->picture_param_buf.picture_width,
-                        jd_libva_ptr->picture_param_buf.picture_height);
-                    return DECODE_PARSER_FAIL;
-                }
-                uint8_t comp_ind = 0;
-                for (comp_ind = 0; comp_ind < jd_libva_ptr->picture_param_buf.num_components; comp_ind++) {
-                    jd_libva_ptr->picture_param_buf.components[comp_ind].component_id = jd_libva_ptr->JPEGParser->readNextByte(jd_libva_ptr->JPEGParser);
-
-                    uint8_t hv_sampling = jd_libva_ptr->JPEGParser->readNextByte(jd_libva_ptr->JPEGParser);
-                    jd_libva_ptr->picture_param_buf.components[comp_ind].h_sampling_factor = hv_sampling >> 4;
-                    jd_libva_ptr->picture_param_buf.components[comp_ind].v_sampling_factor = hv_sampling & 0xf;
-                    jd_libva_ptr->picture_param_buf.components[comp_ind].quantiser_table_selector = jd_libva_ptr->JPEGParser->readNextByte(jd_libva_ptr->JPEGParser);
-                }
-
-
-                break;
-            }
-            // Store offset to DHT data to avoid parsing bitstream in user mode
-            case CODE_DHT: {
-                if (dht_ind < 4) {
-                    jd_libva_ptr->dht_byte_offset[dht_ind] = jd_libva_ptr->JPEGParser->getByteOffset(jd_libva_ptr->JPEGParser) - jd_libva_ptr->soi_offset;
-                    dht_ind++;
-                    uint32_t bytes_to_burn = jd_libva_ptr->JPEGParser->readBytes(jd_libva_ptr->JPEGParser, 2) - 2;
-                    jd_libva_ptr->JPEGParser->burnBytes(jd_libva_ptr->JPEGParser,  bytes_to_burn );
-                } else {
-                    ETRACE("ERROR: Decoder does not support more than 4 Huff Tables\n");
-                    return DECODE_PARSER_FAIL;
-                }
-                break;
-            }
-            // Parse component information in SOS marker
-            case CODE_SOS: {
-                jd_libva_ptr->JPEGParser->burnBytes(jd_libva_ptr->JPEGParser, 2);
-                uint32_t component_in_scan = jd_libva_ptr->JPEGParser->readNextByte(jd_libva_ptr->JPEGParser);
-                uint8_t comp_ind = 0;
-
-                for (comp_ind = 0; comp_ind < component_in_scan; comp_ind++) {
-                    uint8_t comp_id = jd_libva_ptr->JPEGParser->readNextByte(jd_libva_ptr->JPEGParser);
-                    uint8_t comp_data_ind;
-                    for (comp_data_ind = 0; comp_data_ind < jd_libva_ptr->picture_param_buf.num_components; comp_data_ind++) {
-                        if (comp_id == jd_libva_ptr->picture_param_buf.components[comp_data_ind].component_id) {
-                            jd_libva_ptr->slice_param_buf[scan_ind].components[comp_ind].component_selector = comp_data_ind + 1;
-                            break;
-                        }
-                    }
-                    uint8_t huffman_tables = jd_libva_ptr->JPEGParser->readNextByte(jd_libva_ptr->JPEGParser);
-                    jd_libva_ptr->slice_param_buf[scan_ind].components[comp_ind].dc_table_selector = huffman_tables >> 4;
-                    jd_libva_ptr->slice_param_buf[scan_ind].components[comp_ind].ac_table_selector = huffman_tables & 0xf;
-                }
-                uint32_t curr_byte = jd_libva_ptr->JPEGParser->readNextByte(jd_libva_ptr->JPEGParser); // Ss
-                if (curr_byte != 0) {
-                    ETRACE("ERROR: curr_byte 0x%08x != 0\n", curr_byte);
-                    return DECODE_PARSER_FAIL;
-                }
-                curr_byte = jd_libva_ptr->JPEGParser->readNextByte(jd_libva_ptr->JPEGParser);  // Se
-                if (curr_byte != 0x3f) {
-                    ETRACE("ERROR: curr_byte 0x%08x != 0x3f\n", curr_byte);
-                    return DECODE_PARSER_FAIL;
-                }
-                curr_byte = jd_libva_ptr->JPEGParser->readNextByte(jd_libva_ptr->JPEGParser);  // Ah, Al
-                if (curr_byte != 0) {
-                    ETRACE("ERROR: curr_byte 0x%08x != 0\n", curr_byte);
-                    return DECODE_PARSER_FAIL;
-                }
-                // Set slice control variables needed
-                jd_libva_ptr->slice_param_buf[scan_ind].slice_data_offset = jd_libva_ptr->JPEGParser->getByteOffset(jd_libva_ptr->JPEGParser) - jd_libva_ptr->soi_offset;
-                jd_libva_ptr->slice_param_buf[scan_ind].num_components = component_in_scan;
-                if (scan_ind) {
-                    /* If there is more than one scan, the slice for all but the final scan should only run up to the beginning of the next scan */
-                    jd_libva_ptr->slice_param_buf[scan_ind - 1].slice_data_size =
-                        (jd_libva_ptr->slice_param_buf[scan_ind].slice_data_offset - jd_libva_ptr->slice_param_buf[scan_ind - 1].slice_data_offset );;
-                    }
-                    scan_ind++;
-                    jd_libva_ptr->scan_ctrl_count++;   // gsDXVA2Globals.uiScanCtrlCount
-                    break;
-                }
-            case CODE_DRI: {
-                uint32_t size =  jd_libva_ptr->JPEGParser->readBytes(jd_libva_ptr->JPEGParser, 2);
-                jd_libva_ptr->slice_param_buf[scan_ind].restart_interval =  jd_libva_ptr->JPEGParser->readBytes(jd_libva_ptr->JPEGParser, 2);
-                jd_libva_ptr->JPEGParser->burnBytes(jd_libva_ptr->JPEGParser, (size - 4));
-                break;
-            }
-            default:
-                break;
-        }
-
-        marker = jd_libva_ptr->JPEGParser->getNextMarker(jd_libva_ptr->JPEGParser);
-        // If the EOI code is found, store the byte offset before the parsing finishes
-        if( marker == CODE_EOI ) {
-            jd_libva_ptr->eoi_offset = jd_libva_ptr->JPEGParser->getByteOffset(jd_libva_ptr->JPEGParser);
-        }
-
-    }
-
-    jd_libva_ptr->quant_tables_num = dqt_ind;
-    jd_libva_ptr->huffman_tables_num = dht_ind;
-
-    /* The slice for the last scan should run up to the end of the picture */
-    if (jd_libva_ptr->eoi_offset) {
-        jd_libva_ptr->slice_param_buf[scan_ind - 1].slice_data_size = (jd_libva_ptr->eoi_offset - jd_libva_ptr->slice_param_buf[scan_ind - 1].slice_data_offset);
-    }
-    else {
-        jd_libva_ptr->slice_param_buf[scan_ind - 1].slice_data_size = (jd_libva_ptr->file_size - jd_libva_ptr->slice_param_buf[scan_ind - 1].slice_data_offset);
-    }
-    // throw AppException if SOF0 isn't found
-    if (!frame_marker_found) {
-        ETRACE("EEORR: Reached end of bitstream while trying to parse headers\n");
-        return DECODE_PARSER_FAIL;
-    }
-
-    Decode_Status status = parseTableData(cinfo, jd_libva_ptr);
-    if (status != DECODE_SUCCESS) {
-        ETRACE("ERROR: Parsing table data returns %d", status);
-    }
-    cinfo->original_image_width = jd_libva_ptr->picture_param_buf.picture_width;  /* nominal image width (from SOF marker) */
-    cinfo->image_width = jd_libva_ptr->picture_param_buf.picture_width;   /* nominal image width (from SOF marker) */
-    cinfo->image_height = jd_libva_ptr->picture_param_buf.picture_height;  /* nominal image height */
-    cinfo->num_components = jd_libva_ptr->picture_param_buf.num_components;       /* # of color components in JPEG image */
-    cinfo->jpeg_color_space = JCS_YCbCr; /* colorspace of JPEG image */
-    cinfo->out_color_space = JCS_RGB; /* colorspace for output */
-    cinfo->src->bytes_in_buffer = jd_libva_ptr->file_size;
-
-    ITRACE("Successfully parsed table");
-    return status;
-
-}
-
-Decode_Status parseTableData(j_decompress_ptr cinfo, jd_libva_struct * jd_libva_ptr) {
-    CJPEGParse* parser = (CJPEGParse*)malloc(sizeof(CJPEGParse));
-    if (parser == NULL) {
-        ETRACE("%s ERROR: Parsing table data returns %d", __FUNCTION__, DECODE_MEMORY_FAIL);
-        return DECODE_MEMORY_FAIL;
-    }
-
-    parserInitialize(parser, jd_libva_ptr->bitstream_buf, jd_libva_ptr->file_size);
-
-    // Parse Quant tables
-    memset(&jd_libva_ptr->qmatrix_buf, 0, sizeof(jd_libva_ptr->qmatrix_buf));
-    uint32_t dqt_ind = 0;
-    for (dqt_ind = 0; dqt_ind < jd_libva_ptr->quant_tables_num; dqt_ind++) {
-        if (parser->setByteOffset(parser, jd_libva_ptr->dqt_byte_offset[dqt_ind])) {
-            // uint32_t uiTableBytes = parser->readBytes( 2 ) - 2;
-            uint32_t table_bytes = parser->readBytes( parser, 2 ) - 2;
-            do {
-                uint32_t table_info = parser->readNextByte(parser);
-                table_bytes--;
-                uint32_t table_length = table_bytes > 64 ? 64 : table_bytes;
-                uint32_t table_precision = table_info >> 4;
-                if (table_precision != 0) {
-                    ETRACE("%s ERROR: Parsing table data returns %d", __FUNCTION__, DECODE_PARSER_FAIL);
-                    return DECODE_PARSER_FAIL;
-                }
-                uint32_t table_id = table_info & 0xf;
-
-                jd_libva_ptr->qmatrix_buf.load_quantiser_table[table_id] = 1;
-
-                if (table_id < JPEG_MAX_QUANT_TABLES) {
-                    // Pull Quant table data from bitstream
-                    uint32_t byte_ind;
-                    for (byte_ind = 0; byte_ind < table_length; byte_ind++) {
-                        jd_libva_ptr->qmatrix_buf.quantiser_table[table_id][byte_ind] = parser->readNextByte(parser);
-                    }
-                } else {
-                    ETRACE("%s DQT table ID is not supported", __FUNCTION__);
-                    parser->burnBytes(parser, table_length);
-                }
-                table_bytes -= table_length;
-            } while (table_bytes);
-        }
-    }
-
-    // Parse Huffman tables
-    memset(&jd_libva_ptr->hufman_table_buf, 0, sizeof(jd_libva_ptr->hufman_table_buf));
-    uint32_t dht_ind = 0;
-    for (dht_ind = 0; dht_ind < jd_libva_ptr->huffman_tables_num; dht_ind++) {
-        if (parser->setByteOffset(parser, jd_libva_ptr->dht_byte_offset[dht_ind])) {
-            uint32_t table_bytes = parser->readBytes( parser, 2 ) - 2;
-            do {
-                uint32_t table_info = parser->readNextByte(parser);
-                table_bytes--;
-                uint32_t table_class = table_info >> 4; // Identifies whether the table is for AC or DC
-                uint32_t table_id = table_info & 0xf;
-                jd_libva_ptr->hufman_table_buf.load_huffman_table[table_id] = 1;
-
-                if ((table_class < TABLE_CLASS_NUM) && (table_id < JPEG_MAX_SETS_HUFFMAN_TABLES)) {
-                    if (table_class == 0) {
-                        uint8_t* bits = parser->getCurrentIndex(parser);
-                        // Find out the number of entries in the table
-                        uint32_t table_entries = 0;
-                        uint32_t bit_ind;
-                        for (bit_ind = 0; bit_ind < 16; bit_ind++) {
-                            jd_libva_ptr->hufman_table_buf.huffman_table[table_id].num_dc_codes[bit_ind] = bits[bit_ind];
-                            table_entries += jd_libva_ptr->hufman_table_buf.huffman_table[table_id].num_dc_codes[bit_ind];
-                        }
-
-                        // Create table of code values
-                        parser->burnBytes(parser, 16);
-                        table_bytes -= 16;
-                        uint32_t tbl_ind;
-                        for (tbl_ind = 0; tbl_ind < table_entries; tbl_ind++) {
-                            jd_libva_ptr->hufman_table_buf.huffman_table[table_id].dc_values[tbl_ind] = parser->readNextByte(parser);
-                            table_bytes--;
-                        }
-
-                    } else { // for AC class
-                        uint8_t* bits = parser->getCurrentIndex(parser);
-                        // Find out the number of entries in the table
-                        uint32_t table_entries = 0;
-                        uint32_t bit_ind = 0;
-                        for (bit_ind = 0; bit_ind < 16; bit_ind++) {
-                            jd_libva_ptr->hufman_table_buf.huffman_table[table_id].num_ac_codes[bit_ind] = bits[bit_ind];
-                            table_entries += jd_libva_ptr->hufman_table_buf.huffman_table[table_id].num_ac_codes[bit_ind];
-                        }
-
-                        // Create table of code values
-                        parser->burnBytes(parser, 16);
-                        table_bytes -= 16;
-                        uint32_t tbl_ind = 0;
-                        for (tbl_ind = 0; tbl_ind < table_entries; tbl_ind++) {
-                            jd_libva_ptr->hufman_table_buf.huffman_table[table_id].ac_values[tbl_ind] = parser->readNextByte(parser);
-                            table_bytes--;
-                        }
-                    }//end of else
-                } else {
-                    // Find out the number of entries in the table
-                    ETRACE("%s DHT table ID is not supported", __FUNCTION__);
-                    uint32_t table_entries = 0;
-                    uint32_t bit_ind = 0;
-                    for(bit_ind = 0; bit_ind < 16; bit_ind++) {
-                        table_entries += parser->readNextByte(parser);
-                        table_bytes--;
-                    }
-                    parser->burnBytes(parser, table_entries);
-                    table_bytes -= table_entries;
-		}
-
-            } while (table_bytes);
-        }
-    }
-
-    if (parser) {
-        free(parser);
-        parser = NULL;
-    }
-    return DECODE_SUCCESS;
-}
-
diff --git a/imagedecoder/JPEGDecoder.cpp b/imagedecoder/JPEGDecoder.cpp
new file mode 100644
index 0000000..5e05464
--- /dev/null
+++ b/imagedecoder/JPEGDecoder.cpp
@@ -0,0 +1,849 @@
+/* INTEL CONFIDENTIAL
+* Copyright (c) 2012, 2013 Intel Corporation.  All rights reserved.
+* Copyright (c) Imagination Technologies Limited, UK
+*
+* The source code contained or described herein and all documents
+* related to the source code ("Material") are owned by Intel
+* Corporation or its suppliers or licensors.  Title to the
+* Material remains with Intel Corporation or its suppliers and
+* licensors.  The Material contains trade secrets and proprietary
+* and confidential information of Intel or its suppliers and
+* licensors. The Material is protected by worldwide copyright and
+* trade secret laws and treaty provisions.  No part of the Material
+* may be used, copied, reproduced, modified, published, uploaded,
+* posted, transmitted, distributed, or disclosed in any way without
+* Intel's prior express written permission.
+*
+* No license under any patent, copyright, trade secret or other
+* intellectual property right is granted to or conferred upon you
+* by disclosure or delivery of the Materials, either expressly, by
+* implication, inducement, estoppel or otherwise. Any license
+* under such intellectual property rights must be express and
+* approved by Intel in writing.
+*
+* Authors:
+*    Nana Guo <nana.n.guo@intel.com>
+*    Yao Cheng <yao.cheng@intel.com>
+*
+*/
+//#define LOG_NDEBUG 0
+
+#include <va/va.h>
+#include <va/va_tpi.h>
+#include "JPEGDecoder.h"
+#include "JPEGParser.h"
+#include "JPEGBlitter.h"
+#include "ImageDecoderTrace.h"
+
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+#include <assert.h>
+
+//#define LOG_TAG "ImageDecoder"
+
+#define JPEG_MAX_SETS_HUFFMAN_TABLES 2
+
+#define TABLE_CLASS_DC  0
+#define TABLE_CLASS_AC  1
+#define TABLE_CLASS_NUM 2
+
+// for config
+#define HW_DECODE_MIN_WIDTH  100 // for JPEG smaller than this, use SW decode
+#define HW_DECODE_MIN_HEIGHT 100 // for JPEG smaller than this, use SW decode
+
+typedef uint32_t Display;
+
+#define JD_CHECK(err, label) \
+        if (err) { \
+            ETRACE("%s::%d: failed: %d", __PRETTY_FUNCTION__, __LINE__, err); \
+            goto label; \
+        }
+
+#define JD_CHECK_RET(err, label, retcode) \
+        if (err) { \
+            status = retcode; \
+            ETRACE("%s::%d: failed: %d", __PRETTY_FUNCTION__, __LINE__, err); \
+            goto label; \
+        }
+
+JpegDecoder::JpegDecoder()
+    :mInitialized(false),
+    mDisplay(0),
+    mConfigId(VA_INVALID_ID),
+    mContextId(VA_INVALID_ID),
+    mParser(NULL),
+    mBlitter(NULL)
+{
+    mParser = new CJPEGParse;
+    mBlitter = new JpegBlitter;
+    Display dpy;
+    int va_major_version, va_minor_version;
+    mDisplay = vaGetDisplay(&dpy);
+    vaInitialize(mDisplay, &va_major_version, &va_minor_version);
+}
+JpegDecoder::~JpegDecoder()
+{
+    if (mInitialized) {
+        WTRACE("Freeing JpegDecoder: not destroyed yet. Force destroy resource");
+        deinit();
+    }
+    delete mBlitter;
+    vaTerminate(mDisplay);
+    delete mParser;
+}
+
+JpegDecoder::MapHandle JpegDecoder::mapData(RenderTarget &target, void ** data, uint32_t * offsets, uint32_t * pitches)
+{
+    JpegDecoder::MapHandle handle;
+    handle.img = NULL;
+    handle.valid = false;
+    VASurfaceID surf_id = getSurfaceID(target);
+    if (surf_id != VA_INVALID_ID) {
+        handle.img = new VAImage();
+        if (handle.img == NULL) {
+            ETRACE("%s: create VAImage fail", __FUNCTION__);
+            return handle;
+        }
+        VAStatus st;
+        st = vaDeriveImage(mDisplay, surf_id, handle.img);
+        if (st != VA_STATUS_SUCCESS) {
+            delete handle.img;
+            handle.img = NULL;
+            ETRACE("%s: vaDeriveImage fail %d", __FUNCTION__, st);
+            return handle;
+        }
+        st = vaMapBuffer(mDisplay, handle.img->buf, data);
+        if (st != VA_STATUS_SUCCESS) {
+            vaDestroyImage(mDisplay, handle.img->image_id);
+            delete handle.img;
+            handle.img = NULL;
+            ETRACE("%s: vaMapBuffer fail %d", __FUNCTION__, st);
+            return handle;
+        }
+        handle.valid = true;
+        offsets[0] = handle.img->offsets[0];
+        offsets[1] = handle.img->offsets[1];
+        offsets[2] = handle.img->offsets[2];
+        pitches[0] = handle.img->pitches[0];
+        pitches[1] = handle.img->pitches[1];
+        pitches[2] = handle.img->pitches[2];
+        return handle;
+    }
+    ETRACE("%s: get Surface ID fail", __FUNCTION__);
+    return handle;
+}
+
+void JpegDecoder::unmapData(RenderTarget &target, JpegDecoder::MapHandle maphandle)
+{
+    if (maphandle.valid == false)
+        return;
+    if (maphandle.img != NULL) {
+        vaUnmapBuffer(mDisplay, maphandle.img->buf);
+        vaDestroyImage(mDisplay, maphandle.img->image_id);
+        delete maphandle.img;
+    }
+}
+
+JpegDecodeStatus JpegDecoder::init(int w, int h, RenderTarget **targets, int num)
+{
+    if (mInitialized)
+        return JD_ALREADY_INITIALIZED;
+    Mutex::Autolock autoLock(mLock);
+    mBlitter->setDecoder(*this);
+    if (!mInitialized) {
+        mGrallocSurfaceMap.clear();
+        mDrmSurfaceMap.clear();
+        mNormalSurfaceMap.clear();
+        VAStatus st;
+        VASurfaceID surfid;
+        for (int i = 0; i < num; ++i) {
+            JpegDecodeStatus st = createSurfaceFromRenderTarget(*targets[i], &surfid);
+            if (st != JD_SUCCESS || surfid == VA_INVALID_ID) {
+                ETRACE("%s failed to create surface from RenderTarget handle 0x%x",
+                    __FUNCTION__, targets[i]->handle);
+                return JD_RESOURCE_FAILURE;
+            }
+        }
+        VAConfigAttrib attrib;
+
+        attrib.type = VAConfigAttribRTFormat;
+        st = vaGetConfigAttributes(mDisplay, VAProfileJPEGBaseline, VAEntrypointVLD, &attrib, 1);
+        if (st != VA_STATUS_SUCCESS) {
+            ETRACE("vaGetConfigAttributes failed. va_status = 0x%x", st);
+            return JD_INITIALIZATION_ERROR;
+        }
+        st = vaCreateConfig(mDisplay, VAProfileJPEGBaseline, VAEntrypointVLD, &attrib, 1, &mConfigId);
+        if (st != VA_STATUS_SUCCESS) {
+            ETRACE("vaCreateConfig failed. va_status = 0x%x", st);
+            return JD_INITIALIZATION_ERROR;
+        }
+        mContextId = VA_INVALID_ID;
+        size_t gmsize = mGrallocSurfaceMap.size();
+        size_t dmsize = mDrmSurfaceMap.size();
+        size_t nmsize = mNormalSurfaceMap.size();
+        VASurfaceID *surfaces = new VASurfaceID[gmsize + dmsize + nmsize];
+        for (size_t i = 0; i < gmsize + dmsize + nmsize; ++i) {
+            if (i < gmsize)
+                surfaces[i] = mGrallocSurfaceMap.valueAt(i);
+            else if (i < gmsize + dmsize)
+                surfaces[i] = mDrmSurfaceMap.valueAt(i - gmsize);
+            else
+                surfaces[i] = mNormalSurfaceMap.valueAt(i - gmsize - dmsize);
+        }
+        st = vaCreateContext(mDisplay, mConfigId,
+            w, h,
+            0,
+            surfaces, gmsize + dmsize + nmsize,
+            &mContextId);
+        delete[] surfaces;
+        if (st != VA_STATUS_SUCCESS) {
+            ETRACE("vaCreateContext failed. va_status = 0x%x", st);
+            return JD_INITIALIZATION_ERROR;
+        }
+
+        VTRACE("vaconfig = %u, vacontext = %u", mConfigId, mContextId);
+        mInitialized = true;
+    }
+    return JD_SUCCESS;
+}
+
+JpegDecodeStatus JpegDecoder::blit(RenderTarget &src, RenderTarget &dst)
+{
+    return mBlitter->blit(src, dst);
+}
+
+JpegDecodeStatus JpegDecoder::parse(JpegInfo &jpginfo)
+{
+    uint32_t component_order = 0 ;
+    uint32_t dqt_ind = 0;
+    uint32_t dht_ind = 0;
+    uint32_t scan_ind = 0;
+    bool frame_marker_found = false;
+    int i;
+
+    parserInitialize(mParser, jpginfo.buf, jpginfo.bufsize);
+
+    uint8_t marker = mParser->getNextMarker(mParser);
+
+    while (marker != CODE_EOI &&( !mParser->endOfBuffer(mParser))) {
+        switch (marker) {
+            case CODE_SOI: {
+                 jpginfo.soi_offset = mParser->getByteOffset(mParser) - 2;
+                break;
+            }
+            // If the marker is an APP marker skip over the data
+            case CODE_APP0:
+            case CODE_APP1:
+            case CODE_APP2:
+            case CODE_APP3:
+            case CODE_APP4:
+            case CODE_APP5:
+            case CODE_APP6:
+            case CODE_APP7:
+            case CODE_APP8:
+            case CODE_APP9:
+            case CODE_APP10:
+            case CODE_APP11:
+            case CODE_APP12:
+            case CODE_APP13:
+            case CODE_APP14:
+            case CODE_APP15: {
+
+                uint32_t bytes_to_burn = mParser->readBytes(mParser, 2) - 2;
+                mParser->burnBytes(mParser, bytes_to_burn);
+                    break;
+            }
+            // Store offset to DQT data to avoid parsing bitstream in user mode
+            case CODE_DQT: {
+                if (dqt_ind < 4) {
+                    jpginfo.dqt_byte_offset[dqt_ind] = mParser->getByteOffset(mParser) - jpginfo.soi_offset;
+                    dqt_ind++;
+                    uint32_t bytes_to_burn = mParser->readBytes(mParser, 2 ) - 2;
+                    mParser->burnBytes( mParser, bytes_to_burn );
+                } else {
+                    ETRACE("ERROR: Decoder does not support more than 4 Quant Tables\n");
+                    return JD_ERROR_BITSTREAM;
+                }
+                break;
+            }
+            // Throw exception for all SOF marker other than SOF0
+            case CODE_SOF1:
+            case CODE_SOF2:
+            case CODE_SOF3:
+            case CODE_SOF5:
+            case CODE_SOF6:
+            case CODE_SOF7:
+            case CODE_SOF8:
+            case CODE_SOF9:
+            case CODE_SOF10:
+            case CODE_SOF11:
+            case CODE_SOF13:
+            case CODE_SOF14:
+            case CODE_SOF15: {
+                ETRACE("ERROR: unsupport SOF\n");
+                break;
+            }
+            // Parse component information in SOF marker
+            case CODE_SOF_BASELINE: {
+                frame_marker_found = true;
+
+                mParser->burnBytes(mParser, 2); // Throw away frame header length
+                uint8_t sample_precision = mParser->readNextByte(mParser);
+                if (sample_precision != 8) {
+                    ETRACE("sample_precision is not supported\n");
+                    return JD_ERROR_BITSTREAM;
+                }
+                // Extract pic width and height
+                jpginfo.picture_param_buf.picture_height = mParser->readBytes(mParser, 2);
+                jpginfo.picture_param_buf.picture_width = mParser->readBytes(mParser, 2);
+                jpginfo.picture_param_buf.num_components = mParser->readNextByte(mParser);
+
+                if (jpginfo.picture_param_buf.num_components > JPEG_MAX_COMPONENTS) {
+                    ETRACE("ERROR: reached max components\n");
+                    return JD_ERROR_BITSTREAM;
+                }
+                if (jpginfo.picture_param_buf.picture_height < HW_DECODE_MIN_HEIGHT
+                    || jpginfo.picture_param_buf.picture_width < HW_DECODE_MIN_WIDTH) {
+                    VTRACE("PERFORMANCE: %ux%u JPEG will decode faster with SW\n",
+                        jpginfo.picture_param_buf.picture_width,
+                        jpginfo.picture_param_buf.picture_height);
+                    return JD_ERROR_BITSTREAM;
+                }
+                uint8_t comp_ind = 0;
+                for (comp_ind = 0; comp_ind < jpginfo.picture_param_buf.num_components; comp_ind++) {
+                    jpginfo.picture_param_buf.components[comp_ind].component_id = mParser->readNextByte(mParser);
+
+                    uint8_t hv_sampling = mParser->readNextByte(mParser);
+                    jpginfo.picture_param_buf.components[comp_ind].h_sampling_factor = hv_sampling >> 4;
+                    jpginfo.picture_param_buf.components[comp_ind].v_sampling_factor = hv_sampling & 0xf;
+                    jpginfo.picture_param_buf.components[comp_ind].quantiser_table_selector = mParser->readNextByte(mParser);
+                }
+
+
+                break;
+            }
+            // Store offset to DHT data to avoid parsing bitstream in user mode
+            case CODE_DHT: {
+                if (dht_ind < 4) {
+                    jpginfo.dht_byte_offset[dht_ind] = mParser->getByteOffset(mParser) - jpginfo.soi_offset;
+                    dht_ind++;
+                    uint32_t bytes_to_burn = mParser->readBytes(mParser, 2) - 2;
+                    mParser->burnBytes(mParser, bytes_to_burn );
+                } else {
+                    ETRACE("ERROR: Decoder does not support more than 4 Huff Tables\n");
+                    return JD_ERROR_BITSTREAM;
+                }
+                break;
+            }
+            // Parse component information in SOS marker
+            case CODE_SOS: {
+                mParser->burnBytes(mParser, 2);
+                uint32_t component_in_scan = mParser->readNextByte(mParser);
+                uint8_t comp_ind = 0;
+
+                for (comp_ind = 0; comp_ind < component_in_scan; comp_ind++) {
+                    uint8_t comp_id = mParser->readNextByte(mParser);
+                    uint8_t comp_data_ind;
+                    for (comp_data_ind = 0; comp_data_ind < jpginfo.picture_param_buf.num_components; comp_data_ind++) {
+                        if (comp_id == jpginfo.picture_param_buf.components[comp_data_ind].component_id) {
+                            jpginfo.slice_param_buf[scan_ind].components[comp_ind].component_selector = comp_data_ind + 1;
+                            break;
+                        }
+                    }
+                    uint8_t huffman_tables = mParser->readNextByte(mParser);
+                    jpginfo.slice_param_buf[scan_ind].components[comp_ind].dc_table_selector = huffman_tables >> 4;
+                    jpginfo.slice_param_buf[scan_ind].components[comp_ind].ac_table_selector = huffman_tables & 0xf;
+                }
+                uint32_t curr_byte = mParser->readNextByte(mParser); // Ss
+                if (curr_byte != 0) {
+                    ETRACE("ERROR: curr_byte 0x%08x != 0\n", curr_byte);
+                    return JD_ERROR_BITSTREAM;
+                }
+                curr_byte = mParser->readNextByte(mParser);  // Se
+                if (curr_byte != 0x3f) {
+                    ETRACE("ERROR: curr_byte 0x%08x != 0x3f\n", curr_byte);
+                    return JD_ERROR_BITSTREAM;
+                }
+                curr_byte = mParser->readNextByte(mParser);  // Ah, Al
+                if (curr_byte != 0) {
+                    ETRACE("ERROR: curr_byte 0x%08x != 0\n", curr_byte);
+                    return JD_ERROR_BITSTREAM;
+                }
+                // Set slice control variables needed
+                jpginfo.slice_param_buf[scan_ind].slice_data_offset = mParser->getByteOffset(mParser) - jpginfo.soi_offset;
+                jpginfo.slice_param_buf[scan_ind].num_components = component_in_scan;
+                if (scan_ind) {
+                    /* If there is more than one scan, the slice for all but the final scan should only run up to the beginning of the next scan */
+                    jpginfo.slice_param_buf[scan_ind - 1].slice_data_size =
+                        (jpginfo.slice_param_buf[scan_ind].slice_data_offset - jpginfo.slice_param_buf[scan_ind - 1].slice_data_offset );;
+                    }
+                    scan_ind++;
+                    jpginfo.scan_ctrl_count++;   // gsDXVA2Globals.uiScanCtrlCount
+                    break;
+                }
+            case CODE_DRI: {
+                uint32_t size =  mParser->readBytes(mParser, 2);
+                jpginfo.slice_param_buf[scan_ind].restart_interval =  mParser->readBytes(mParser, 2);
+                mParser->burnBytes(mParser, (size - 4));
+                break;
+            }
+            default:
+                break;
+        }
+
+        marker = mParser->getNextMarker(mParser);
+        // If the EOI code is found, store the byte offset before the parsing finishes
+        if( marker == CODE_EOI ) {
+            jpginfo.eoi_offset = mParser->getByteOffset(mParser);
+        }
+
+    }
+
+    jpginfo.quant_tables_num = dqt_ind;
+    jpginfo.huffman_tables_num = dht_ind;
+
+    /* The slice for the last scan should run up to the end of the picture */
+    if (jpginfo.eoi_offset) {
+        jpginfo.slice_param_buf[scan_ind - 1].slice_data_size = (jpginfo.eoi_offset - jpginfo.slice_param_buf[scan_ind - 1].slice_data_offset);
+    }
+    else {
+        jpginfo.slice_param_buf[scan_ind - 1].slice_data_size = (jpginfo.bufsize - jpginfo.slice_param_buf[scan_ind - 1].slice_data_offset);
+    }
+    // throw AppException if SOF0 isn't found
+    if (!frame_marker_found) {
+        ETRACE("EEORR: Reached end of bitstream while trying to parse headers\n");
+        return JD_ERROR_BITSTREAM;
+    }
+
+    JpegDecodeStatus status = parseTableData(jpginfo);
+    if (status != JD_SUCCESS) {
+        ETRACE("ERROR: Parsing table data returns %d", status);
+        return JD_ERROR_BITSTREAM;
+    }
+
+    jpginfo.image_width = jpginfo.picture_param_buf.picture_width;
+    jpginfo.image_height = jpginfo.picture_param_buf.picture_height;
+    jpginfo.image_color_fourcc = sampFactor2Fourcc(jpginfo.picture_param_buf.components[0].h_sampling_factor,
+        jpginfo.picture_param_buf.components[1].h_sampling_factor,
+        jpginfo.picture_param_buf.components[2].h_sampling_factor,
+        jpginfo.picture_param_buf.components[0].v_sampling_factor,
+        jpginfo.picture_param_buf.components[1].v_sampling_factor,
+        jpginfo.picture_param_buf.components[2].v_sampling_factor);
+    jpginfo.image_pixel_format = fourcc2PixelFormat(jpginfo.image_color_fourcc);
+
+    VTRACE("%s jpg %ux%u, fourcc=%s, pixelformat=0x%x",
+        __FUNCTION__, jpginfo.image_width, jpginfo.image_height, fourcc2str(NULL, jpginfo.image_color_fourcc),
+        jpginfo.image_pixel_format);
+
+    if (!jpegColorFormatSupported(jpginfo))
+        return JD_INPUT_FORMAT_UNSUPPORTED;
+    return JD_SUCCESS;
+}
+
+JpegDecodeStatus JpegDecoder::createSurfaceFromRenderTarget(RenderTarget &target, VASurfaceID *surfid)
+{
+    if (target.type == RENDERTARGET_INTERNAL_BUFFER) {
+        JpegDecodeStatus st = createSurfaceInternal(target.width,
+            target.height,
+            target.pixel_format,
+            target.handle,
+            surfid);
+        if (st != JD_SUCCESS)
+            return st;
+        mNormalSurfaceMap.add(target.handle, *surfid);
+        VTRACE("%s added surface %u (internal buffer id %d) to SurfaceList",
+            __PRETTY_FUNCTION__, *surfid, target.handle);
+    }
+    else {
+        switch (target.type) {
+        case RenderTarget::KERNEL_DRM:
+            {
+                JpegDecodeStatus st = createSurfaceDrm(target.width,
+                    target.height,
+                    target.pixel_format,
+                    (unsigned long)target.handle,
+                    target.stride,
+                    surfid);
+                if (st != JD_SUCCESS)
+                    return st;
+                mDrmSurfaceMap.add((unsigned long)target.handle, *surfid);
+                VTRACE("%s added surface %u (Drm handle %d) to DrmSurfaceMap",
+                    __PRETTY_FUNCTION__, *surfid, target.handle);
+            }
+            break;
+        case RenderTarget::ANDROID_GRALLOC:
+            {
+                JpegDecodeStatus st = createSurfaceGralloc(target.width,
+                    target.height,
+                    target.pixel_format,
+                    (buffer_handle_t)target.handle,
+                    target.stride,
+                    surfid);
+                if (st != JD_SUCCESS)
+                    return st;
+                mGrallocSurfaceMap.add((buffer_handle_t)target.handle, *surfid);
+                VTRACE("%s added surface %u (Gralloc handle %d) to DrmSurfaceMap",
+                    __PRETTY_FUNCTION__, *surfid, target.handle);
+            }
+            break;
+        default:
+            return JD_RENDER_TARGET_TYPE_UNSUPPORTED;
+        }
+    }
+    return JD_SUCCESS;
+}
+
+JpegDecodeStatus JpegDecoder::createSurfaceInternal(int width, int height, int pixel_format, int handle, VASurfaceID *surf_id)
+{
+    VAStatus va_status;
+    VASurfaceAttrib attrib;
+    attrib.type = VASurfaceAttribPixelFormat;
+    attrib.flags = VA_SURFACE_ATTRIB_SETTABLE;
+    attrib.value.type = VAGenericValueTypeInteger;
+    uint32_t fourcc = pixelFormat2Fourcc(pixel_format);
+    uint32_t vaformat = fourcc2VaFormat(fourcc);
+    attrib.value.value.i = fourcc;
+    VTRACE("enter %s, pixel_format 0x%x, fourcc %s", __FUNCTION__, pixel_format, fourcc2str(NULL, fourcc));
+    va_status = vaCreateSurfaces(mDisplay,
+                                vaformat,
+                                width,
+                                height,
+                                surf_id,
+                                1,
+                                &attrib,
+                                1);
+    if (va_status != VA_STATUS_SUCCESS) {
+        ETRACE("%s: createSurface (format %u, fourcc %s) returns %d", __PRETTY_FUNCTION__, vaformat, fourcc2str(NULL, fourcc), va_status);
+        return JD_RESOURCE_FAILURE;
+    }
+    return JD_SUCCESS;
+}
+
+VASurfaceID JpegDecoder::getSurfaceID(RenderTarget &target) const
+{
+    int index;
+    if (target.type == RENDERTARGET_INTERNAL_BUFFER) {
+        index = mNormalSurfaceMap.indexOfKey(target.handle);
+        if (index < 0)
+            return VA_INVALID_ID;
+        else
+            return mNormalSurfaceMap.valueAt(index);
+    }
+    switch (target.type) {
+    case RenderTarget::KERNEL_DRM:
+        index = mDrmSurfaceMap.indexOfKey((unsigned long)target.handle);
+        if (index < 0)
+            return VA_INVALID_ID;
+        else
+            return mDrmSurfaceMap.valueAt(index);
+    case RenderTarget::ANDROID_GRALLOC:
+        index = mGrallocSurfaceMap.indexOfKey((buffer_handle_t)target.handle);
+        if (index < 0)
+            return VA_INVALID_ID;
+        else
+            return mGrallocSurfaceMap.valueAt(index);
+    default:
+        assert(false);
+    }
+    return VA_INVALID_ID;
+}
+
+JpegDecodeStatus JpegDecoder::sync(RenderTarget &target)
+{
+    VASurfaceID surf_id = getSurfaceID(target);
+    if (surf_id == VA_INVALID_ID)
+        return JD_INVALID_RENDER_TARGET;
+    vaSyncSurface(mDisplay, surf_id);
+    return JD_SUCCESS;
+}
+bool JpegDecoder::busy(RenderTarget &target) const
+{
+    VASurfaceStatus surf_st;
+    VASurfaceID surf_id = getSurfaceID(target);
+    if (surf_id == VA_INVALID_ID)
+        return false;
+    VAStatus st = vaQuerySurfaceStatus(mDisplay, surf_id, &surf_st);
+    if (st != VA_STATUS_SUCCESS)
+        return false;
+    return surf_st != VASurfaceReady;
+}
+
+
+JpegDecodeStatus JpegDecoder::decode(JpegInfo &jpginfo, RenderTarget &target)
+{
+    VAStatus va_status = VA_STATUS_SUCCESS;
+    VASurfaceStatus surf_status;
+    VABufferID desc_buf[5];
+    uint32_t bitstream_buffer_size = 0;
+    uint32_t scan_idx = 0;
+    uint32_t buf_idx = 0;
+    uint32_t chopping = VA_SLICE_DATA_FLAG_ALL;
+    uint32_t bytes_remaining;
+    VASurfaceID surf_id = getSurfaceID(target);
+    if (surf_id == VA_INVALID_ID)
+        return JD_RENDER_TARGET_NOT_INITIALIZED;
+    va_status = vaQuerySurfaceStatus(mDisplay, surf_id, &surf_status);
+    if (surf_status != VASurfaceReady)
+        return JD_RENDER_TARGET_BUSY;
+
+    if (jpginfo.eoi_offset)
+        bytes_remaining = jpginfo.eoi_offset - jpginfo.soi_offset;
+    else
+        bytes_remaining = jpginfo.bufsize - jpginfo.soi_offset;
+    uint32_t src_offset = jpginfo.soi_offset;
+    uint32_t cpy_row;
+    bitstream_buffer_size = jpginfo.bufsize;//cinfo->src->bytes_in_buffer;//1024*1024*5;
+
+    Vector<VABufferID> buf_list;
+    va_status = vaBeginPicture(mDisplay, mContextId, surf_id);
+    if (va_status != VA_STATUS_SUCCESS) {
+        ETRACE("vaBeginPicture failed. va_status = 0x%x", va_status);
+        return JD_DECODE_FAILURE;
+    }
+    va_status = vaCreateBuffer(mDisplay, mContextId, VAPictureParameterBufferType, sizeof(VAPictureParameterBufferJPEGBaseline), 1, &jpginfo.picture_param_buf, &desc_buf[buf_idx]);
+    if (va_status != VA_STATUS_SUCCESS) {
+        ETRACE("vaCreateBuffer VAPictureParameterBufferType failed. va_status = 0x%x", va_status);
+        return JD_RESOURCE_FAILURE;
+    }
+    buf_list.add(desc_buf[buf_idx++]);
+    va_status = vaCreateBuffer(mDisplay, mContextId, VAIQMatrixBufferType, sizeof(VAIQMatrixBufferJPEGBaseline), 1, &jpginfo.qmatrix_buf, &desc_buf[buf_idx]);
+
+    if (va_status != VA_STATUS_SUCCESS) {
+        ETRACE("vaCreateBuffer VAIQMatrixBufferType failed. va_status = 0x%x", va_status);
+        return JD_RESOURCE_FAILURE;
+    }
+    buf_list.add(desc_buf[buf_idx++]);
+    va_status = vaCreateBuffer(mDisplay, mContextId, VAHuffmanTableBufferType, sizeof(VAHuffmanTableBufferJPEGBaseline), 1, &jpginfo.hufman_table_buf, &desc_buf[buf_idx]);
+    if (va_status != VA_STATUS_SUCCESS) {
+        ETRACE("vaCreateBuffer VAHuffmanTableBufferType failed. va_status = 0x%x", va_status);
+        return JD_RESOURCE_FAILURE;
+    }
+    buf_list.add(desc_buf[buf_idx++]);
+
+    do {
+        /* Get Bitstream Buffer */
+        uint32_t bytes = ( bytes_remaining < bitstream_buffer_size ) ? bytes_remaining : bitstream_buffer_size;
+        bytes_remaining -= bytes;
+        /* Get Slice Control Buffer */
+        VASliceParameterBufferJPEGBaseline dest_scan_ctrl[JPEG_MAX_COMPONENTS];
+        uint32_t src_idx = 0;
+        uint32_t dest_idx = 0;
+        memset(dest_scan_ctrl, 0, sizeof(dest_scan_ctrl));
+        for (src_idx = scan_idx; src_idx < jpginfo.scan_ctrl_count ; src_idx++) {
+            if (jpginfo.slice_param_buf[ src_idx ].slice_data_offset) {
+                /* new scan, reset state machine */
+                chopping = VA_SLICE_DATA_FLAG_ALL;
+                VTRACE("Scan:%i FileOffset:%x Bytes:%x \n", src_idx,
+                    jpginfo.slice_param_buf[ src_idx ].slice_data_offset,
+                    jpginfo.slice_param_buf[ src_idx ].slice_data_size );
+                /* does the slice end in the buffer */
+                if (jpginfo.slice_param_buf[ src_idx ].slice_data_offset + jpginfo.slice_param_buf[ src_idx ].slice_data_size > bytes + src_offset) {
+                    chopping = VA_SLICE_DATA_FLAG_BEGIN;
+                }
+            } else {
+                if (jpginfo.slice_param_buf[ src_idx ].slice_data_size > bytes) {
+                    chopping = VA_SLICE_DATA_FLAG_MIDDLE;
+                } else {
+                    if ((chopping == VA_SLICE_DATA_FLAG_BEGIN) || (chopping == VA_SLICE_DATA_FLAG_MIDDLE)) {
+                        chopping = VA_SLICE_DATA_FLAG_END;
+                    }
+                }
+            }
+            dest_scan_ctrl[dest_idx].slice_data_flag = chopping;
+
+            if ((chopping == VA_SLICE_DATA_FLAG_ALL) || (chopping == VA_SLICE_DATA_FLAG_BEGIN))
+                dest_scan_ctrl[dest_idx].slice_data_offset = jpginfo.slice_param_buf[ src_idx ].slice_data_offset;
+            else
+                dest_scan_ctrl[dest_idx].slice_data_offset = 0;
+
+            const int32_t bytes_in_seg = bytes - dest_scan_ctrl[dest_idx].slice_data_offset;
+            const uint32_t scan_data = (bytes_in_seg < jpginfo.slice_param_buf[src_idx].slice_data_size) ? bytes_in_seg : jpginfo.slice_param_buf[src_idx].slice_data_size ;
+            jpginfo.slice_param_buf[src_idx].slice_data_offset = 0;
+            jpginfo.slice_param_buf[src_idx].slice_data_size -= scan_data;
+            dest_scan_ctrl[dest_idx].slice_data_size = scan_data;
+            dest_scan_ctrl[dest_idx].num_components = jpginfo.slice_param_buf[src_idx].num_components;
+            dest_scan_ctrl[dest_idx].restart_interval = jpginfo.slice_param_buf[src_idx].restart_interval;
+            memcpy(&dest_scan_ctrl[dest_idx].components, & jpginfo.slice_param_buf[ src_idx ].components,
+                sizeof(jpginfo.slice_param_buf[ src_idx ].components) );
+            dest_idx++;
+            if ((chopping == VA_SLICE_DATA_FLAG_ALL) || (chopping == VA_SLICE_DATA_FLAG_END)) { /* all good good */
+            } else {
+                break;
+            }
+        }
+        scan_idx = src_idx;
+        /* Get Slice Control Buffer */
+        va_status = vaCreateBuffer(mDisplay, mContextId, VASliceParameterBufferType, sizeof(VASliceParameterBufferJPEGBaseline) * dest_idx, 1, dest_scan_ctrl, &desc_buf[buf_idx]);
+        if (va_status != VA_STATUS_SUCCESS) {
+            ETRACE("vaCreateBuffer VASliceParameterBufferType failed. va_status = 0x%x", va_status);
+            return JD_RESOURCE_FAILURE;
+        }
+        buf_list.add(desc_buf[buf_idx++]);
+        va_status = vaCreateBuffer(mDisplay, mContextId, VASliceDataBufferType, bytes, 1, &jpginfo.buf[ src_offset ], &desc_buf[buf_idx]);
+        buf_list.add(desc_buf[buf_idx++]);
+        if (va_status != VA_STATUS_SUCCESS) {
+            ETRACE("vaCreateBuffer VASliceDataBufferType (%u bytes) failed. va_status = 0x%x", bytes, va_status);
+            return JD_RESOURCE_FAILURE;
+        }
+        va_status = vaRenderPicture( mDisplay, mContextId, desc_buf, buf_idx);
+        if (va_status != VA_STATUS_SUCCESS) {
+            ETRACE("vaRenderPicture failed. va_status = 0x%x", va_status);
+            return JD_DECODE_FAILURE;
+        }
+        buf_idx = 0;
+
+        src_offset += bytes;
+    } while (bytes_remaining);
+
+    va_status = vaEndPicture(mDisplay, mContextId);
+
+    while(buf_list.size() > 0) {
+        vaDestroyBuffer(mDisplay, buf_list.top());
+        buf_list.pop();
+    }
+    if (va_status != VA_STATUS_SUCCESS) {
+        ETRACE("vaEndPicture failed. va_status = 0x%x", va_status);
+        return JD_DECODE_FAILURE;
+    }
+    return JD_SUCCESS;
+}
+void JpegDecoder::deinit()
+{
+    if (mInitialized) {
+        Mutex::Autolock autoLock(mLock);
+        if (mInitialized) {
+            vaDestroyContext(mDisplay, mContextId);
+            vaDestroyConfig(mDisplay, mConfigId);
+            mInitialized = false;
+            size_t gralloc_size = mGrallocSurfaceMap.size();
+            size_t drm_size = mDrmSurfaceMap.size();
+            size_t internal_surf_size = mNormalSurfaceMap.size();
+            for (size_t i = 0; i < gralloc_size; ++i) {
+                VASurfaceID surf_id = mGrallocSurfaceMap.valueAt(i);
+                vaDestroySurfaces(mDisplay, &surf_id, 1);
+            }
+            for (size_t i = 0; i < drm_size; ++i) {
+                VASurfaceID surf_id = mDrmSurfaceMap.valueAt(i);
+                vaDestroySurfaces(mDisplay, &surf_id, 1);
+            }
+            for (size_t i = 0; i < internal_surf_size; ++i) {
+                VASurfaceID surf_id = mNormalSurfaceMap.valueAt(i);
+                vaDestroySurfaces(mDisplay, &surf_id, 1);
+            }
+            mGrallocSurfaceMap.clear();
+            mDrmSurfaceMap.clear();
+            mNormalSurfaceMap.clear();
+        }
+    }
+}
+
+JpegDecodeStatus JpegDecoder::parseTableData(JpegInfo &jpginfo) {
+    parserInitialize(mParser, jpginfo.buf, jpginfo.bufsize);
+    // Parse Quant tables
+    memset(&jpginfo.qmatrix_buf, 0, sizeof(jpginfo.qmatrix_buf));
+    uint32_t dqt_ind = 0;
+    for (dqt_ind = 0; dqt_ind < jpginfo.quant_tables_num; dqt_ind++) {
+        if (mParser->setByteOffset(mParser, jpginfo.dqt_byte_offset[dqt_ind])) {
+            // uint32_t uiTableBytes = mParser->readBytes( 2 ) - 2;
+            uint32_t table_bytes = mParser->readBytes( mParser, 2 ) - 2;
+            do {
+                uint32_t table_info = mParser->readNextByte(mParser);
+                table_bytes--;
+                uint32_t table_length = table_bytes > 64 ? 64 : table_bytes;
+                uint32_t table_precision = table_info >> 4;
+                if (table_precision != 0) {
+                    ETRACE("%s ERROR: Parsing table data returns %d", __FUNCTION__, JD_ERROR_BITSTREAM);
+                    return JD_ERROR_BITSTREAM;
+                }
+                uint32_t table_id = table_info & 0xf;
+
+                jpginfo.qmatrix_buf.load_quantiser_table[table_id] = 1;
+
+                if (table_id < JPEG_MAX_QUANT_TABLES) {
+                    // Pull Quant table data from bitstream
+                    uint32_t byte_ind;
+                    for (byte_ind = 0; byte_ind < table_length; byte_ind++) {
+                        jpginfo.qmatrix_buf.quantiser_table[table_id][byte_ind] = mParser->readNextByte(mParser);
+                    }
+                } else {
+                    ETRACE("%s DQT table ID is not supported", __FUNCTION__);
+                    mParser->burnBytes(mParser, table_length);
+                }
+                table_bytes -= table_length;
+            } while (table_bytes);
+        }
+    }
+
+    // Parse Huffman tables
+    memset(&jpginfo.hufman_table_buf, 0, sizeof(jpginfo.hufman_table_buf));
+    uint32_t dht_ind = 0;
+    for (dht_ind = 0; dht_ind < jpginfo.huffman_tables_num; dht_ind++) {
+        if (mParser->setByteOffset(mParser, jpginfo.dht_byte_offset[dht_ind])) {
+            uint32_t table_bytes = mParser->readBytes( mParser, 2 ) - 2;
+            do {
+                uint32_t table_info = mParser->readNextByte(mParser);
+                table_bytes--;
+                uint32_t table_class = table_info >> 4; // Identifies whether the table is for AC or DC
+                uint32_t table_id = table_info & 0xf;
+                jpginfo.hufman_table_buf.load_huffman_table[table_id] = 1;
+
+                if ((table_class < TABLE_CLASS_NUM) && (table_id < JPEG_MAX_SETS_HUFFMAN_TABLES)) {
+                    if (table_class == 0) {
+                        uint8_t* bits = mParser->getCurrentIndex(mParser);
+                        // Find out the number of entries in the table
+                        uint32_t table_entries = 0;
+                        uint32_t bit_ind;
+                        for (bit_ind = 0; bit_ind < 16; bit_ind++) {
+                            jpginfo.hufman_table_buf.huffman_table[table_id].num_dc_codes[bit_ind] = bits[bit_ind];
+                            table_entries += jpginfo.hufman_table_buf.huffman_table[table_id].num_dc_codes[bit_ind];
+                        }
+
+                        // Create table of code values
+                        mParser->burnBytes(mParser, 16);
+                        table_bytes -= 16;
+                        uint32_t tbl_ind;
+                        for (tbl_ind = 0; tbl_ind < table_entries; tbl_ind++) {
+                            jpginfo.hufman_table_buf.huffman_table[table_id].dc_values[tbl_ind] = mParser->readNextByte(mParser);
+                            table_bytes--;
+                        }
+
+                    } else { // for AC class
+                        uint8_t* bits = mParser->getCurrentIndex(mParser);
+                        // Find out the number of entries in the table
+                        uint32_t table_entries = 0;
+                        uint32_t bit_ind = 0;
+                        for (bit_ind = 0; bit_ind < 16; bit_ind++) {
+                            jpginfo.hufman_table_buf.huffman_table[table_id].num_ac_codes[bit_ind] = bits[bit_ind];
+                            table_entries += jpginfo.hufman_table_buf.huffman_table[table_id].num_ac_codes[bit_ind];
+                        }
+
+                        // Create table of code values
+                        mParser->burnBytes(mParser, 16);
+                        table_bytes -= 16;
+                        uint32_t tbl_ind = 0;
+                        for (tbl_ind = 0; tbl_ind < table_entries; tbl_ind++) {
+                            jpginfo.hufman_table_buf.huffman_table[table_id].ac_values[tbl_ind] = mParser->readNextByte(mParser);
+                            table_bytes--;
+                        }
+                    }//end of else
+                } else {
+                    // Find out the number of entries in the table
+                    ETRACE("%s DHT table ID is not supported", __FUNCTION__);
+                    uint32_t table_entries = 0;
+                    uint32_t bit_ind = 0;
+                    for(bit_ind = 0; bit_ind < 16; bit_ind++) {
+                        table_entries += mParser->readNextByte(mParser);
+                        table_bytes--;
+                    }
+                    mParser->burnBytes(mParser, table_entries);
+                    table_bytes -= table_entries;
+                }
+
+            } while (table_bytes);
+        }
+    }
+
+    return JD_SUCCESS;
+}
+
diff --git a/imagedecoder/JPEGDecoder.h b/imagedecoder/JPEGDecoder.h
index a77db7a..f46611f 100644
--- a/imagedecoder/JPEGDecoder.h
+++ b/imagedecoder/JPEGDecoder.h
@@ -1,5 +1,5 @@
 /* INTEL CONFIDENTIAL
-* Copyright (c) 2012 Intel Corporation.  All rights reserved.
+* Copyright (c) 2012, 2013 Intel Corporation.  All rights reserved.
 * Copyright (c) Imagination Technologies Limited, UK
 *
 * The source code contained or described herein and all documents
@@ -23,91 +23,67 @@
 *
 * Authors:
 *    Nana Guo <nana.n.guo@intel.com>
+*    Yao Cheng <yao.cheng@intel.com>
 *
 */
 
-#ifndef JDLIBVA_H
-#define JDLIBVA_H
 
-#include "JPEGParser.h"
-#include <pthread.h>
-#include <va/va.h>
-//#include <va/va_android.h>
-#include "va/va_dec_jpeg.h"
-#include <stdio.h>
-#define HAVE_BOOLEAN
-#include "jpeglib.h"
-#include <hardware/gralloc.h>
+#ifndef JPEGDEC_H
+#define JPEGDEC_H
 
-#define Display unsigned int
-#define BOOL int
+#include "../videovpp/VideoVPPBase.h"
+#include <utils/KeyedVector.h>
+#include <utils/threads.h>
+#include "JPEGCommon.h"
+using namespace android;
 
-#define JPEG_MAX_COMPONENTS 4
-#define JPEG_MAX_QUANT_TABLES 4
+struct CJPEGParse;
+class JpegBlitter;
 
-typedef struct {
-    Display * android_display;
-    uint32_t surface_count;
-    VADisplay va_display;
-    VAContextID va_context;
-    VASurfaceID* va_surfaces;
-    VAConfigID va_config;
+// Non thread-safe
+class JpegDecoder
+{
+friend class JpegBlitter;
+public:
+    struct MapHandle
+    {
+    friend class JpegDecoder;
+    public:
+        bool valid;
+    private:
+        VAImage *img;
+    };
+    JpegDecoder();
+    virtual ~JpegDecoder();
+    virtual JpegDecodeStatus init(int width, int height, RenderTarget **targets, int num);
+    virtual void deinit();
+    virtual JpegDecodeStatus parse(JpegInfo &jpginfo);
+    virtual JpegDecodeStatus decode(JpegInfo &jpginfo, RenderTarget &target);
+    virtual JpegDecodeStatus sync(RenderTarget &target);
+    virtual bool busy(RenderTarget &target) const;
+    virtual JpegDecodeStatus blit(RenderTarget &src, RenderTarget &dst);
+    virtual MapHandle mapData(RenderTarget &target, void ** data, uint32_t * offsets, uint32_t * pitches);
+    virtual void unmapData(RenderTarget &target, MapHandle maphandle);
+private:
+    bool mInitialized;
+    mutable Mutex mLock;
+    VADisplay mDisplay;
+    VAConfigID mConfigId;
+    VAContextID mContextId;
+    CJPEGParse *mParser;
+    JpegBlitter *mBlitter;
+    KeyedVector<buffer_handle_t, VASurfaceID> mGrallocSurfaceMap;
+    KeyedVector<unsigned long, VASurfaceID> mDrmSurfaceMap;
+    KeyedVector<int, VASurfaceID> mNormalSurfaceMap;
+    virtual VASurfaceID getSurfaceID(RenderTarget &target) const;
+    virtual JpegDecodeStatus parseTableData(JpegInfo &jpginfo);
+    virtual bool jpegColorFormatSupported(JpegInfo &jpginfo) const;
+    virtual JpegDecodeStatus createSurfaceFromRenderTarget(RenderTarget &target, VASurfaceID *surf_id);
+    virtual JpegDecodeStatus createSurfaceInternal(int width, int height, int pixel_format, int handle, VASurfaceID *surf_id);
+    virtual JpegDecodeStatus createSurfaceDrm(int width, int height, int pixel_format, unsigned long boname, int stride, VASurfaceID *surf_id);
+    virtual JpegDecodeStatus createSurfaceGralloc(int width, int height, int pixel_format, buffer_handle_t handle, int stride, VASurfaceID *surf_id);
+};
 
-    VAPictureParameterBufferJPEGBaseline picture_param_buf;
-    VASliceParameterBufferJPEGBaseline slice_param_buf[JPEG_MAX_COMPONENTS];
-    VAIQMatrixBufferJPEGBaseline qmatrix_buf;
-    VAHuffmanTableBufferJPEGBaseline hufman_table_buf;
-
-    uint32_t dht_byte_offset[4];
-    uint32_t dqt_byte_offset[4];
-    uint32_t huffman_tables_num;
-    uint32_t quant_tables_num;
-    uint32_t soi_offset;
-    uint32_t eoi_offset;
-
-    uint8_t* bitstream_buf;
-    uint32_t image_width;
-    uint32_t image_height;
-    uint32_t scan_ctrl_count;
-
-    uint8_t * image_buf;
-    VAImage surface_image;
-    boolean hw_state_ready;
-    boolean hw_caps_ready;
-    boolean hw_path;
-    boolean initialized;
-    boolean resource_allocated;
-
-    uint32_t file_size;
-    uint32_t rotation;
-    CJPEGParse* JPEGParser;
-
-    char ** output_image;
-    uint32_t output_lines;
-    uint32_t fourcc;
-} jd_libva_struct;
-
-typedef enum {
-    DECODE_NOT_STARTED = -6,
-    DECODE_INVALID_DATA = -5,
-    DECODE_DRIVER_FAIL = -4,
-    DECODE_PARSER_FAIL = -3,
-    DECODE_MEMORY_FAIL = -2,
-    DECODE_FAIL = -1,
-    DECODE_SUCCESS = 0,
-
-} IMAGE_DECODE_STATUS;
-
-typedef int32_t Decode_Status;
-
-extern jd_libva_struct jd_libva;
-
-Decode_Status jdva_initialize (jd_libva_struct * jd_libva_ptr);
-void jdva_deinitialize (jd_libva_struct * jd_libva_ptr);
-Decode_Status jdva_decode (j_decompress_ptr cinfo, jd_libva_struct * jd_libva_ptr);
-Decode_Status jdva_create_resource (jd_libva_struct * jd_libva_ptr);
-Decode_Status jdva_release_resource (jd_libva_struct * jd_libva_ptr);
-Decode_Status parseBitstream(j_decompress_ptr cinfo, jd_libva_struct * jd_libva_ptr);
-Decode_Status parseTableData(j_decompress_ptr cinfo, jd_libva_struct * jd_libva_ptr);
 
 #endif
+
diff --git a/imagedecoder/JPEGDecoder_gen.cpp b/imagedecoder/JPEGDecoder_gen.cpp
new file mode 100644
index 0000000..9b5f242
--- /dev/null
+++ b/imagedecoder/JPEGDecoder_gen.cpp
@@ -0,0 +1,210 @@
+/* INTEL CONFIDENTIAL
+* Copyright (c) 2013 Intel Corporation.  All rights reserved.
+* Copyright (c) Imagination Technologies Limited, UK
+*
+* The source code contained or described herein and all documents
+* related to the source code ("Material") are owned by Intel
+* Corporation or its suppliers or licensors.  Title to the
+* Material remains with Intel Corporation or its suppliers and
+* licensors.  The Material contains trade secrets and proprietary
+* and confidential information of Intel or its suppliers and
+* licensors. The Material is protected by worldwide copyright and
+* trade secret laws and treaty provisions.  No part of the Material
+* may be used, copied, reproduced, modified, published, uploaded,
+* posted, transmitted, distributed, or disclosed in any way without
+* Intel's prior express written permission.
+*
+* No license under any patent, copyright, trade secret or other
+* intellectual property right is granted to or conferred upon you
+* by disclosure or delivery of the Materials, either expressly, by
+* implication, inducement, estoppel or otherwise. Any license
+* under such intellectual property rights must be express and
+* approved by Intel in writing.
+*
+* Authors:
+*    Yao Cheng <yao.cheng@intel.com>
+*
+*/
+//#define LOG_NDEBUG 0
+
+#include "va/va.h"
+#include "va/va_vpp.h"
+#include "va/va_drmcommon.h"
+#include "JPEGDecoder.h"
+#include "ImageDecoderTrace.h"
+#include <string.h>
+#include <sys/types.h>
+#include <time.h>
+#include "JPEGCommon_Gen.h"
+
+int fourcc2PixelFormat(uint32_t fourcc)
+{
+    switch(fourcc) {
+    case VA_FOURCC_YV12:
+        return HAL_PIXEL_FORMAT_YV12;
+    case VA_FOURCC_422H:
+        return HAL_PIXEL_FORMAT_YCbCr_422_H_INTEL;
+    case VA_FOURCC_YUY2:
+        return HAL_PIXEL_FORMAT_YCbCr_422_I;
+    case VA_FOURCC_NV12:
+        return HAL_PIXEL_FORMAT_NV12_TILED_INTEL;
+    case VA_FOURCC_RGBA:
+        return HAL_PIXEL_FORMAT_RGBA_8888;
+    case VA_FOURCC_IMC3:
+        return HAL_PIXEL_FORMAT_IMC3;
+    case VA_FOURCC_444P:
+        return HAL_PIXEL_FORMAT_444P;
+    case VA_FOURCC_422V:
+    case VA_FOURCC_411P:
+    default:
+        return -1;
+    }
+}
+uint32_t pixelFormat2Fourcc(int pixel_format)
+{
+    switch(pixel_format) {
+    case HAL_PIXEL_FORMAT_YV12:
+        return VA_FOURCC_YV12;
+    case HAL_PIXEL_FORMAT_YCbCr_422_H_INTEL:
+        return VA_FOURCC_422H;
+    case HAL_PIXEL_FORMAT_YCbCr_422_I:
+        return VA_FOURCC_YUY2;
+    case HAL_PIXEL_FORMAT_NV12_TILED_INTEL:
+        return VA_FOURCC_NV12;
+    case HAL_PIXEL_FORMAT_RGBA_8888:
+        return VA_FOURCC_RGBA;
+    case HAL_PIXEL_FORMAT_444P:
+        return VA_FOURCC_444P;
+    case HAL_PIXEL_FORMAT_IMC3:
+        return VA_FOURCC_IMC3;
+    default:
+        return 0;
+    }
+}
+
+//#define LOG_TAG "ImageDecoder"
+
+#define JD_CHECK(err, label) \
+        if (err) { \
+            ETRACE("%s::%d: failed: %d", __PRETTY_FUNCTION__, __LINE__, err); \
+            goto label; \
+        }
+
+#define JD_CHECK_RET(err, label, retcode) \
+        if (err) { \
+            status = retcode; \
+            ETRACE("%s::%d: failed: %d", __PRETTY_FUNCTION__, __LINE__, err); \
+            goto label; \
+        }
+
+bool JpegDecoder::jpegColorFormatSupported(JpegInfo &jpginfo) const
+{
+    return (jpginfo.image_color_fourcc == VA_FOURCC_IMC3) ||
+        (jpginfo.image_color_fourcc == VA_FOURCC_422H) ||
+        (jpginfo.image_color_fourcc == VA_FOURCC_444P);
+}
+
+JpegDecodeStatus JpegDecoder::createSurfaceDrm(int width, int height, int pixel_format, unsigned long boname, int stride, VASurfaceID *surf_id)
+{
+    VAStatus st;
+    VASurfaceAttrib                 attrib_list;
+    VASurfaceAttribExternalBuffers  vaSurfaceExternBuf;
+    uint32_t fourcc = pixelFormat2Fourcc(pixel_format);
+    vaSurfaceExternBuf.pixel_format = fourcc;
+    VTRACE("%s extBuf.pixel_format is %s", __FUNCTION__, fourcc2str(NULL, fourcc));
+    vaSurfaceExternBuf.width        = width;
+    vaSurfaceExternBuf.height       = height;
+    vaSurfaceExternBuf.pitches[0]   = stride;
+    vaSurfaceExternBuf.buffers      = &boname;
+    vaSurfaceExternBuf.num_buffers  = 1;
+    vaSurfaceExternBuf.flags        = VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM;
+    attrib_list.type          = VASurfaceAttribExternalBufferDescriptor;
+    attrib_list.flags         = VA_SURFACE_ATTRIB_SETTABLE;
+    attrib_list.value.type    = VAGenericValueTypePointer;
+    attrib_list.value.value.p = (void *)&vaSurfaceExternBuf;
+
+    st = vaCreateSurfaces(mDisplay,
+            fourcc2VaFormat(fourcc),
+            width,
+            height,
+            surf_id,
+            1,
+            &attrib_list,
+            1);
+    VTRACE("%s createSurface DRM for vaformat %u, fourcc %s", __FUNCTION__, fourcc2VaFormat(fourcc), fourcc2str(NULL, fourcc));
+    if (st != VA_STATUS_SUCCESS) {
+        ETRACE("%s: vaCreateSurfaces returns %d", __PRETTY_FUNCTION__, st);
+        return JD_RESOURCE_FAILURE;
+    }
+    return JD_SUCCESS;
+}
+
+JpegDecodeStatus JpegDecoder::createSurfaceGralloc(int width, int height, int pixel_format, buffer_handle_t handle, int stride, VASurfaceID *surf_id)
+{
+    unsigned long boname;
+    hw_module_t const* module = NULL;
+    alloc_device_t *allocdev = NULL;
+    struct gralloc_module_t *gralloc_module = NULL;
+    JpegDecodeStatus st;
+
+    uint32_t fourcc = pixelFormat2Fourcc(pixel_format);
+    VTRACE("enter %s, pixel_format 0x%x, fourcc %s", __FUNCTION__, pixel_format, fourcc2str(NULL, fourcc));
+    if ((fourcc != VA_FOURCC_422H) ||
+        (fourcc != VA_FOURCC_YUY2) ||
+        (fourcc != VA_FOURCC_RGBA)){
+        VASurfaceAttrib attrib;
+        attrib.type = VASurfaceAttribPixelFormat;
+        attrib.flags = VA_SURFACE_ATTRIB_SETTABLE;
+        attrib.value.type = VAGenericValueTypeInteger;
+        attrib.value.value.i = fourcc;
+        VAStatus va_status = vaCreateSurfaces(mDisplay,
+                                    fourcc2VaFormat(fourcc),
+                                    width,
+                                    height,
+                                    surf_id,
+                                    1,
+                                    &attrib,
+                                    1);
+        VTRACE("%s createSurface for %s", __FUNCTION__, fourcc2str(NULL, fourcc));
+        if (va_status != VA_STATUS_SUCCESS)
+            return JD_RESOURCE_FAILURE;
+        return JD_SUCCESS;
+    }
+
+    int err = hw_get_module(GRALLOC_HARDWARE_MODULE_ID, &module);
+    if (err) {
+        ETRACE("%s failed to get gralloc module", __PRETTY_FUNCTION__);
+        st = JD_RESOURCE_FAILURE;
+    }
+    JD_CHECK(err, cleanup);
+    gralloc_module = (struct gralloc_module_t *)module;
+    err = gralloc_open(module, &allocdev);
+    if (err) {
+        ETRACE("%s failed to open alloc device", __PRETTY_FUNCTION__);
+        st = JD_RESOURCE_FAILURE;
+    }
+    JD_CHECK(err, cleanup);
+    err = gralloc_module->perform(gralloc_module,
+        INTEL_UFO_GRALLOC_MODULE_PERFORM_GET_BO_NAME,
+        handle,
+        &boname);
+    if (err) {
+        ETRACE("%s failed to get boname via gralloc->perform", __PRETTY_FUNCTION__);
+        st = JD_RESOURCE_FAILURE;
+    }
+    JD_CHECK(err, cleanup);
+    VTRACE("YAO %s fourcc %s luma_stride is %d", __FUNCTION__,
+        fourcc2str(NULL, fourcc), stride);
+
+    gralloc_close(allocdev);
+    return createSurfaceDrm(width, height, pixel_format, boname, stride, surf_id);
+cleanup:
+    if (allocdev)
+        gralloc_close(allocdev);
+    return st;
+}
+
+
+
+
+
diff --git a/imagedecoder/JPEGDecoder_img.cpp b/imagedecoder/JPEGDecoder_img.cpp
new file mode 100644
index 0000000..d90559d
--- /dev/null
+++ b/imagedecoder/JPEGDecoder_img.cpp
@@ -0,0 +1,99 @@
+/* INTEL CONFIDENTIAL
+* Copyright (c) 2012, 2013 Intel Corporation.  All rights reserved.
+* Copyright (c) Imagination Technologies Limited, UK
+*
+* The source code contained or described herein and all documents
+* related to the source code ("Material") are owned by Intel
+* Corporation or its suppliers or licensors.  Title to the
+* Material remains with Intel Corporation or its suppliers and
+* licensors.  The Material contains trade secrets and proprietary
+* and confidential information of Intel or its suppliers and
+* licensors. The Material is protected by worldwide copyright and
+* trade secret laws and treaty provisions.  No part of the Material
+* may be used, copied, reproduced, modified, published, uploaded,
+* posted, transmitted, distributed, or disclosed in any way without
+* Intel's prior express written permission.
+*
+* No license under any patent, copyright, trade secret or other
+* intellectual property right is granted to or conferred upon you
+* by disclosure or delivery of the Materials, either expressly, by
+* implication, inducement, estoppel or otherwise. Any license
+* under such intellectual property rights must be express and
+* approved by Intel in writing.
+*
+* Authors:
+*    Nana Guo <nana.n.guo@intel.com>
+*    Yao Cheng <yao.cheng@intel.com>
+*
+*/
+
+#include "JPEGCommon_Img.h"
+#include "JPEGDecoder.h"
+
+int fourcc2PixelFormat(uint32_t fourcc)
+{
+    switch(fourcc) {
+    case VA_FOURCC_YV12:
+        return HAL_PIXEL_FORMAT_YV12;
+    case VA_FOURCC_YUY2:
+        return HAL_PIXEL_FORMAT_YCbCr_422_I;
+    case VA_FOURCC_RGBA:
+        return HAL_PIXEL_FORMAT_RGBA_8888;
+    default:
+        return -1;
+    }
+}
+uint32_t pixelFormat2Fourcc(int pixel_format)
+{
+    switch(pixel_format) {
+    case HAL_PIXEL_FORMAT_YV12:
+        return VA_FOURCC_YV12;
+    case HAL_PIXEL_FORMAT_YCbCr_422_I:
+        return VA_FOURCC_YUY2;
+    case HAL_PIXEL_FORMAT_RGBA_8888:
+        return VA_FOURCC_RGBA;
+    default:
+        return 0;
+    }
+}
+
+
+bool JpegDecoder::jpegColorFormatSupported(JpegInfo &jpginfo) const
+{
+    return (jpginfo.image_color_fourcc == VA_FOURCC_IMC3) ||
+        (jpginfo.image_color_fourcc == VA_FOURCC_422H) ||
+        (jpginfo.image_color_fourcc == VA_FOURCC_444P);
+}
+
+JpegDecodeStatus JpegDecoder::createSurfaceDrm(int width, int height, int pixel_format, unsigned long boname, int stride, VASurfaceID *surf_id)
+{
+    return JD_RENDER_TARGET_TYPE_UNSUPPORTED;
+}
+
+JpegDecodeStatus JpegDecoder::createSurfaceGralloc(int width, int height, int pixel_format, buffer_handle_t handle, int stride, VASurfaceID *surf_id)
+{
+    VAStatus st;
+    VASurfaceAttributeTPI attrib_tpi;
+    uint32_t va_format = VA_RT_FORMAT_YUV444;
+    attrib_tpi.count = 1;
+    attrib_tpi.luma_stride = stride;
+    attrib_tpi.pixel_format = pixel_format;
+    attrib_tpi.width = width;
+    attrib_tpi.height = height;
+    attrib_tpi.type = VAExternalMemoryAndroidGrallocBuffer;
+    attrib_tpi.buffers = (uint32_t*)&handle;
+
+    st = vaCreateSurfacesWithAttribute(
+        mDisplay,
+        width,
+        height,
+        va_format,
+        1,
+        surf_id,
+        &attrib_tpi);
+    if (st != VA_STATUS_SUCCESS)
+        return JD_RESOURCE_FAILURE;
+    return JD_SUCCESS;
+}
+
+
diff --git a/imagedecoder/JPEGDecoder_libjpeg_wrapper.cpp b/imagedecoder/JPEGDecoder_libjpeg_wrapper.cpp
new file mode 100644
index 0000000..edfaac6
--- /dev/null
+++ b/imagedecoder/JPEGDecoder_libjpeg_wrapper.cpp
@@ -0,0 +1,499 @@
+/* INTEL CONFIDENTIAL
+* Copyright (c) 2012, 2013 Intel Corporation.  All rights reserved.
+* Copyright (c) Imagination Technologies Limited, UK
+*
+* The source code contained or described herein and all documents
+* related to the source code ("Material") are owned by Intel
+* Corporation or its suppliers or licensors.  Title to the
+* Material remains with Intel Corporation or its suppliers and
+* licensors.  The Material contains trade secrets and proprietary
+* and confidential information of Intel or its suppliers and
+* licensors. The Material is protected by worldwide copyright and
+* trade secret laws and treaty provisions.  No part of the Material
+* may be used, copied, reproduced, modified, published, uploaded,
+* posted, transmitted, distributed, or disclosed in any way without
+* Intel's prior express written permission.
+*
+* No license under any patent, copyright, trade secret or other
+* intellectual property right is granted to or conferred upon you
+* by disclosure or delivery of the Materials, either expressly, by
+* implication, inducement, estoppel or otherwise. Any license
+* under such intellectual property rights must be express and
+* approved by Intel in writing.
+*
+* Authors:
+*    Nana Guo <nana.n.guo@intel.com>
+*    Yao Cheng <yao.cheng@intel.com>
+*
+*/
+
+/*
+ * Initialize VA API related stuff
+ *
+ * We will check the return value of  jva_initialize
+ * to determine which path will be use (SW or HW)
+ *
+ */
+//#define LOG_NDEBUG 0
+#define LOG_TAG "ImageDecoder"
+
+#include <utils/Log.h>
+#include "JPEGDecoder_libjpeg_wrapper.h"
+#include <hardware/gralloc.h>
+#include <utils/threads.h>
+#include "JPEGDecoder.h"
+#include <va/va.h>
+#include "va/va_dec_jpeg.h"
+
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+
+#include <assert.h>
+
+static Mutex jdlock;
+
+struct jdva_private
+{
+    JpegInfo jpg_info;
+    JpegDecoder decoder;
+    RenderTarget dec_buffer;
+    RenderTarget yuy2_buffer;
+    RenderTarget rgba_buffer;
+};
+
+static int internal_buffer_handle = 0;
+
+#define JD_CHECK(err, label) \
+        if (err) { \
+            ALOGE("%s::%d: failed: %d", __PRETTY_FUNCTION__, __LINE__, err); \
+            goto label; \
+        }
+
+#define JD_CHECK_RET(err, label, retcode) \
+        if (err) { \
+            status = retcode; \
+            ALOGE("%s::%d: failed: %d", __PRETTY_FUNCTION__, __LINE__, err); \
+            goto label; \
+        }
+
+Decode_Status jdva_initialize (jd_libva_struct * jd_libva_ptr)
+{
+  /*
+   * Please note that we won't check the input parameters to follow the
+   * convention of libjpeg duo to we need these parameters to do error handling,
+   * and if these parameters are invalid, means the whole stack is crashed, so check
+   * them here and return false is meaningless, same situation for all internal methods
+   * related to VA API
+  */
+    uint32_t va_major_version = 0;
+    uint32_t va_minor_version = 0;
+    VAStatus va_status = VA_STATUS_SUCCESS;
+    Decode_Status status = DECODE_SUCCESS;
+
+    if (jd_libva_ptr->initialized) {
+        ALOGW("%s HW decode already initialized", __FUNCTION__);
+        return DECODE_NOT_STARTED;
+    }
+
+    {
+        Mutex::Autolock autoLock(jdlock);
+        if (!(jd_libva_ptr->initialized)) {
+            jdva_private *priv = new jdva_private;
+            memset(&priv->jpg_info, 0, sizeof(JpegInfo));
+            memset(&priv->dec_buffer, 0, sizeof(RenderTarget));
+            memset(&priv->yuy2_buffer, 0, sizeof(RenderTarget));
+            memset(&priv->rgba_buffer, 0, sizeof(RenderTarget));
+            jd_libva_ptr->initialized = TRUE;
+            jd_libva_ptr->priv = (uint32_t)priv;
+            status = DECODE_SUCCESS;
+        }
+    }
+cleanup:
+    if (status) {
+        jd_libva_ptr->initialized = TRUE; // make sure we can call into jva_deinitialize()
+        jdva_deinitialize (jd_libva_ptr);
+        return status;
+    }
+
+  return status;
+}
+void jdva_deinitialize (jd_libva_struct * jd_libva_ptr)
+{
+    if (!(jd_libva_ptr->initialized)) {
+        return;
+    }
+    {
+        Mutex::Autolock autoLock(jdlock);
+        if (jd_libva_ptr->initialized) {
+            jdva_private *p = (jdva_private*)jd_libva_ptr->priv;
+            delete p;
+            jd_libva_ptr->initialized = FALSE;
+        }
+    }
+    ALOGV("jdva_deinitialize finished");
+    return;
+}
+
+RenderTarget * create_render_target(RenderTarget* target, int width, int height, int pixel_format)
+{
+    hw_module_t const* module = NULL;
+    alloc_device_t *allocdev = NULL;
+    struct gralloc_module_t *gralloc_module = NULL;
+    buffer_handle_t handle;
+    uint32_t fourcc;
+    int stride, bpp, err;
+    fourcc = pixelFormat2Fourcc(pixel_format);
+    bpp = fourcc2LumaBitsPerPixel(fourcc);
+    if (target == NULL) {
+        ALOGE("%s malloc new RenderTarget failed", __FUNCTION__);
+        return NULL;
+    }
+    ALOGV("%s created %s target %p", __FUNCTION__, fourcc2str(NULL, fourcc), target);
+    if ((fourcc == VA_FOURCC_422H) ||
+        (fourcc == VA_FOURCC_YUY2) ||
+        (fourcc == VA_FOURCC_RGBA)){
+        err = hw_get_module(GRALLOC_HARDWARE_MODULE_ID, &module);
+        if (err || !module) {
+            ALOGE("%s failed to get gralloc module", __FUNCTION__);
+            return NULL;
+        }
+        gralloc_module = (struct gralloc_module_t *)module;
+        err = gralloc_open(module, &allocdev);
+        if (err || !allocdev) {
+            ALOGE("%s failed to open alloc device", __FUNCTION__);
+            return NULL;
+        }
+        err = allocdev->alloc(allocdev,
+                width, height, pixel_format,
+                GRALLOC_USAGE_HW_RENDER,
+                &handle, &stride);
+        if (err) {
+            gralloc_close(allocdev);
+            ALOGE("%s failed to allocate surface", __FUNCTION__);
+            return NULL;
+        }
+        target->type = RenderTarget::ANDROID_GRALLOC;
+        target->handle = (int)handle;
+        target->stride = stride * bpp;
+    }
+    else {
+        *((int*)(&target->type)) = RENDERTARGET_INTERNAL_BUFFER;
+        target->handle = internal_buffer_handle++;
+    }
+    target->width = width;
+    target->height = height;
+    target->pixel_format = pixel_format;
+    target->rect.x = target->rect.y = 0;
+    target->rect.width = target->width;
+    target->rect.height = target->height;
+    return target;
+}
+
+void free_render_target(RenderTarget *target)
+{
+    if (target == NULL)
+        return;
+    uint32_t fourcc = pixelFormat2Fourcc(target->pixel_format);
+    if (target->type == RenderTarget::ANDROID_GRALLOC) {
+        buffer_handle_t handle = (buffer_handle_t)target->handle;
+        hw_module_t const* module = NULL;
+        alloc_device_t *allocdev = NULL;
+        struct gralloc_module_t *gralloc_module = NULL;
+        int err = hw_get_module(GRALLOC_HARDWARE_MODULE_ID, &module);
+        if (err || !module) {
+            ALOGE("%s failed to get gralloc module", __FUNCTION__);
+            return;
+        }
+        gralloc_module = (struct gralloc_module_t *)module;
+        err = gralloc_open(module, &allocdev);
+        if (err || !allocdev) {
+            ALOGE("%s failed to get gralloc module", __FUNCTION__);
+            return;
+        }
+        allocdev->free(allocdev, handle);
+        gralloc_close(allocdev);
+    }
+    ALOGV("%s deleting %s target %p", __FUNCTION__, fourcc2str(NULL, fourcc), target);
+}
+
+void dump_yuy2_target(RenderTarget *target, JpegDecoder *decoder, const char *filename)
+{
+    uint32_t fourcc = pixelFormat2Fourcc(target->pixel_format);
+    assert(fourcc == VA_FOURCC_YUY2);
+    uint8_t *data;
+    uint32_t offsets[3];
+    uint32_t pitches[3];
+    JpegDecoder::MapHandle maphandle = decoder->mapData(*target, (void**) &data, offsets, pitches);
+    assert (maphandle.valid);
+    FILE* fpdump = fopen(filename, "wb");
+    if (fpdump) {
+        // YUYV
+        for (int i = 0; i < target->height; ++i) {
+            fwrite(data + offsets[0] + i * pitches[0], 1, target->width * 2, fpdump);
+        }
+        fclose(fpdump);
+    }
+    else {
+        ALOGW("%s failed to create %s", __FUNCTION__, filename);
+    }
+    decoder->unmapData(*target, maphandle);
+}
+
+void dump_dec_target(RenderTarget *target, JpegDecoder *decoder, const char *filename)
+{
+    uint32_t fourcc = pixelFormat2Fourcc(target->pixel_format);
+    assert((fourcc == VA_FOURCC_IMC3) ||
+        (fourcc == VA_FOURCC_411P) ||
+        (fourcc == VA_FOURCC('4','0','0','P')) ||
+        (fourcc == VA_FOURCC_422H) ||
+        (fourcc == VA_FOURCC_422V) ||
+        (fourcc == VA_FOURCC_444P));
+    uint8_t *data;
+    uint32_t offsets[3];
+    uint32_t pitches[3];
+    JpegDecoder::MapHandle maphandle = decoder->mapData(*target, (void**) &data, offsets, pitches);
+    assert (maphandle.valid);
+    FILE* fpdump = fopen(filename, "wb");
+    if(fpdump) {
+        float hfactor, vfactor;
+        switch (fourcc) {
+            case VA_FOURCC_IMC3:
+                hfactor = 1;
+                vfactor = 0.5;
+                break;
+            case VA_FOURCC_444P:
+                hfactor = vfactor = 1;
+                break;
+            case VA_FOURCC_422H:
+                hfactor = 0.5;
+                vfactor = 1;
+                break;
+            case VA_FOURCC('4','0','0','P'):
+                hfactor = vfactor = 0;
+                break;
+            case VA_FOURCC_411P:
+                hfactor = 0.25;
+                vfactor = 1;
+                break;
+            case VA_FOURCC_422V:
+                hfactor = 0.5;
+                vfactor = 1;
+                break;
+            default:
+                hfactor = vfactor = 1;
+                break;
+        }
+        // Y
+        for (int i = 0; i < target->height; ++i) {
+            fwrite(data + offsets[0] + i * pitches[0], 1, target->width, fpdump);
+        }
+        // U
+        for (int i = 0; i < target->height * vfactor; ++i) {
+            fwrite(data + offsets[1] + i * pitches[1], 1, target->width * hfactor, fpdump);
+        }
+        // V
+        for (int i = 0; i < target->height * vfactor; ++i) {
+            fwrite(data + offsets[2] + i * pitches[2], 1, target->width * hfactor, fpdump);
+        }
+        fclose(fpdump);
+    }
+    else {
+        ALOGW("%s failed to create %s", __FUNCTION__, filename);
+    }
+    decoder->unmapData(*target, maphandle);
+}
+
+
+Decode_Status jdva_decode (j_decompress_ptr cinfo, jd_libva_struct * jd_libva_ptr)
+{
+    JpegDecodeStatus st;
+    char **outbuf = jd_libva_ptr->output_image;
+    uint32_t lines = jd_libva_ptr->output_lines;
+    jdva_private * priv = (jdva_private*)jd_libva_ptr->priv;
+    if (!priv)
+        return DECODE_DRIVER_FAIL;
+
+    JpegInfo& jpginfo = priv->jpg_info;
+
+    st = priv->decoder.decode(jpginfo, priv->dec_buffer);
+    if (st != JD_SUCCESS) {
+        ALOGE("%s: error decoding %s image", __FUNCTION__, fourcc2str(NULL, jpginfo.image_color_fourcc));
+        return DECODE_DRIVER_FAIL;
+    }
+    ALOGI("%s successfully decoded JPEG with VAAPI", __FUNCTION__);
+    RenderTarget *src_target = &priv->dec_buffer;
+    //dump_dec_target(src_target, decoder,"/sdcard/dec_dump.yuv");
+
+    bool yuy2_csc = false;
+    hw_module_t const* module = NULL;
+    alloc_device_t *allocdev = NULL;
+    struct gralloc_module_t *gralloc_module = NULL;
+    buffer_handle_t handle;
+    int err;
+    uint8_t *data = NULL;
+    uint32_t offsets[3];
+    uint32_t pitches[3];
+    JpegDecoder::MapHandle maphandle;
+    FILE *rgbafile = NULL;
+    if (jpginfo.image_color_fourcc != VA_FOURCC_422H)
+        yuy2_csc = true;
+
+    // CSC to YUY2 if needed
+    if (yuy2_csc) {
+        st = priv->decoder.blit(*src_target, priv->yuy2_buffer);
+        if (st != JD_SUCCESS) {
+            ALOGE("%s: error blitting to YUY2 buffer", __FUNCTION__);
+            goto cleanup;
+        }
+        //dump_yuy2_target(src_target, decoder,"/sdcard/yuy2_dump.yuv");
+        src_target = &priv->yuy2_buffer;
+    }
+
+    st = priv->decoder.blit(*src_target, priv->rgba_buffer);
+    if (st != JD_SUCCESS) {
+        ALOGE("%s: error blitting to RGBA buffer", __FUNCTION__);
+        goto cleanup;
+    }
+    maphandle = priv->decoder.mapData(priv->rgba_buffer, (void**) &data, offsets, pitches);
+
+    //rgbafile = fopen("/sdcard/rgba_dump", "wb");
+
+    for (uint32_t i = 0; i < lines; ++i) {
+        if (outbuf[i] != NULL) {
+            //memcpy(outbuf[i], data + offsets[0] + i * pitches[0], 4 * jpginfo.image_width);
+            for (int j = 0; j < priv->rgba_buffer.width; ++j) {
+                // BGRA -> RGBA
+                // R
+                memcpy(outbuf[i] + 4 * j, data + offsets[0] + i * pitches[0] + 4 * j + 2, 1);
+                // G
+                memcpy(outbuf[i] + 4 * j + 1, data + offsets[0] + i * pitches[0] + 4 * j + 1, 1);
+                // B
+                memcpy(outbuf[i] + 4 * j + 2, data + offsets[0] + i * pitches[0] + 4 * j, 1);
+                // A
+                memcpy(outbuf[i] + 4 * j + 3, data + offsets[0] + i * pitches[0] + 4 * j + 3, 1);
+            }
+        }
+        else {
+            ALOGE("%s outbuf line %u is NULL", __FUNCTION__, i);
+        }
+        //if (rgbafile) {
+        //    fwrite(data + offsets[0] + i * pitches[0], 1, 4 * rgba_target->width, rgbafile);
+        //}
+    }
+    //if (rgbafile)
+    //    fclose(rgbafile);
+    ALOGI("%s successfully blitted RGBA from JPEG %s data", __FUNCTION__, fourcc2str(NULL, priv->jpg_info.image_color_fourcc));
+    priv->decoder.unmapData(priv->rgba_buffer, maphandle);
+    return DECODE_SUCCESS;
+
+cleanup:
+    return DECODE_DRIVER_FAIL;
+}
+
+Decode_Status jdva_create_resource (jd_libva_struct * jd_libva_ptr)
+{
+    VAStatus va_status = VA_STATUS_SUCCESS;
+    Decode_Status status = DECODE_SUCCESS;
+    RenderTarget *dec_target, *yuy2_target, *rgba_target;
+    dec_target = yuy2_target = rgba_target = NULL;
+    JpegDecodeStatus st;
+    Mutex::Autolock autoLock(jdlock);
+    jdva_private *priv = (jdva_private*)jd_libva_ptr->priv;
+    jd_libva_ptr->image_width = priv->jpg_info.picture_param_buf.picture_width;
+    jd_libva_ptr->image_height = priv->jpg_info.picture_param_buf.picture_height;
+    dec_target = create_render_target(&priv->dec_buffer, jd_libva_ptr->image_width,jd_libva_ptr->image_height,fourcc2PixelFormat(priv->jpg_info.image_color_fourcc));
+    if (dec_target == NULL) {
+        ALOGE("%s failed to create decode render target", __FUNCTION__);
+        return DECODE_MEMORY_FAIL;
+    }
+    rgba_target = create_render_target(&priv->rgba_buffer, jd_libva_ptr->image_width,jd_libva_ptr->image_height, HAL_PIXEL_FORMAT_RGBA_8888);
+    if (rgba_target == NULL) {
+        ALOGE("%s failed to create YUY2 csc buffer", __FUNCTION__);
+        free_render_target(dec_target);
+        return DECODE_MEMORY_FAIL;
+    }
+    yuy2_target = create_render_target(&priv->yuy2_buffer, jd_libva_ptr->image_width,jd_libva_ptr->image_height, HAL_PIXEL_FORMAT_YCbCr_422_I);
+    if (yuy2_target == NULL) {
+        ALOGE("%s failed to create RGBA csc buffer", __FUNCTION__);
+        free_render_target(dec_target);
+        free_render_target(rgba_target);
+        return DECODE_MEMORY_FAIL;
+    }
+    RenderTarget *targetlist[3] = { dec_target, yuy2_target, rgba_target };
+    st = priv->decoder.init(jd_libva_ptr->image_width, jd_libva_ptr->image_height, targetlist, 3);
+    if (st != JD_SUCCESS) {
+        free_render_target(dec_target);
+        free_render_target(rgba_target);
+        free_render_target(yuy2_target);
+        ALOGE("%s failed to initialize resources for decoder: %d", __FUNCTION__, st);
+        return DECODE_DRIVER_FAIL;
+    }
+
+    jd_libva_ptr->resource_allocated = TRUE;
+    ALOGV("%s successfully set up HW decode resource", __FUNCTION__);
+    return status;
+cleanup:
+    jd_libva_ptr->resource_allocated = FALSE;
+
+    jdva_deinitialize (jd_libva_ptr);
+
+    return DECODE_DRIVER_FAIL;
+}
+Decode_Status jdva_release_resource (jd_libva_struct * jd_libva_ptr)
+{
+    Decode_Status status = DECODE_SUCCESS;
+    VAStatus va_status = VA_STATUS_SUCCESS;
+    int i;
+
+    if (!(jd_libva_ptr->resource_allocated)) {
+        ALOGW("%s decoder resource not yet allocated", __FUNCTION__);
+        return status;
+    }
+    Mutex::Autolock autoLock(jdlock);
+
+    ALOGV("%s deiniting priv 0x%x", __FUNCTION__, jd_libva_ptr->priv);
+    jdva_private *priv = (jdva_private*)jd_libva_ptr->priv;
+    if (priv) {
+        priv->decoder.deinit();
+        free_render_target(&priv->dec_buffer);
+        free_render_target(&priv->yuy2_buffer);
+        free_render_target(&priv->rgba_buffer);
+    }
+  /*
+   * It is safe to destroy Surface/Config/Context severl times
+   * and it is also safe even their value is NULL
+   */
+
+cleanup:
+
+    jd_libva_ptr->resource_allocated = FALSE;
+
+    return va_status;
+}
+Decode_Status jdva_parse_bitstream(j_decompress_ptr cinfo, jd_libva_struct * jd_libva_ptr)
+{
+    jdva_private * priv = (jdva_private*)jd_libva_ptr->priv;
+    if (!priv)
+        return DECODE_DRIVER_FAIL;
+    JpegInfo& jpginfo = priv->jpg_info;
+    jpginfo.buf = jd_libva_ptr->bitstream_buf;
+    jpginfo.bufsize = jd_libva_ptr->file_size;
+    JpegDecodeStatus st = priv->decoder.parse(jpginfo);
+    if (st != JD_SUCCESS) {
+        ALOGE("%s parser for HW decode failed: %d", __FUNCTION__, st);
+        return DECODE_PARSER_FAIL;
+    }
+
+    jd_libva_ptr->image_width = jpginfo.image_width;
+    jd_libva_ptr->image_height = jpginfo.image_height;
+    cinfo->original_image_width = jpginfo.picture_param_buf.picture_width;  /* nominal image width (from SOF marker) */
+    cinfo->image_width = jpginfo.picture_param_buf.picture_width;   /* nominal image width (from SOF marker) */
+    cinfo->image_height = jpginfo.picture_param_buf.picture_height;  /* nominal image height */
+    cinfo->num_components = jpginfo.picture_param_buf.num_components;       /* # of color components in JPEG image */
+    cinfo->jpeg_color_space = JCS_YCbCr; /* colorspace of JPEG image */
+    cinfo->out_color_space = JCS_RGB; /* colorspace for output */
+    cinfo->src->bytes_in_buffer = jd_libva_ptr->file_size;
+    return DECODE_SUCCESS;
+}
+
diff --git a/imagedecoder/JPEGDecoder_libjpeg_wrapper.h b/imagedecoder/JPEGDecoder_libjpeg_wrapper.h
new file mode 100644
index 0000000..c9d060b
--- /dev/null
+++ b/imagedecoder/JPEGDecoder_libjpeg_wrapper.h
@@ -0,0 +1,92 @@
+/* INTEL CONFIDENTIAL
+* Copyright (c) 2012, 2013 Intel Corporation.  All rights reserved.
+* Copyright (c) Imagination Technologies Limited, UK
+*
+* The source code contained or described herein and all documents
+* related to the source code ("Material") are owned by Intel
+* Corporation or its suppliers or licensors.  Title to the
+* Material remains with Intel Corporation or its suppliers and
+* licensors.  The Material contains trade secrets and proprietary
+* and confidential information of Intel or its suppliers and
+* licensors. The Material is protected by worldwide copyright and
+* trade secret laws and treaty provisions.  No part of the Material
+* may be used, copied, reproduced, modified, published, uploaded,
+* posted, transmitted, distributed, or disclosed in any way without
+* Intel's prior express written permission.
+*
+* No license under any patent, copyright, trade secret or other
+* intellectual property right is granted to or conferred upon you
+* by disclosure or delivery of the Materials, either expressly, by
+* implication, inducement, estoppel or otherwise. Any license
+* under such intellectual property rights must be express and
+* approved by Intel in writing.
+*
+* Authors:
+*    Nana Guo <nana.n.guo@intel.com>
+*    Yao Cheng <yao.cheng@intel.com>
+*
+*/
+
+#ifndef JDLIBVA_H
+#define JDLIBVA_H
+
+#include <pthread.h>
+#include <stdio.h>
+#include "jpeglib.h"
+
+#define Display unsigned int
+#define BOOL int
+
+#define JPEG_MAX_COMPONENTS 4
+#define JPEG_MAX_QUANT_TABLES 4
+
+typedef struct {
+    uint8_t* bitstream_buf;
+    uint32_t image_width;
+    uint32_t image_height;
+
+    boolean hw_state_ready;
+    boolean hw_caps_ready;
+    boolean hw_path;
+    boolean initialized;
+    boolean resource_allocated;
+
+    uint32_t file_size;
+    uint32_t rotation;
+
+    char ** output_image;
+    uint32_t output_lines;
+
+    uint32_t priv;
+} jd_libva_struct;
+
+typedef enum {
+    DECODE_NOT_STARTED = -6,
+    DECODE_INVALID_DATA = -5,
+    DECODE_DRIVER_FAIL = -4,
+    DECODE_PARSER_FAIL = -3,
+    DECODE_MEMORY_FAIL = -2,
+    DECODE_FAIL = -1,
+    DECODE_SUCCESS = 0,
+
+} IMAGE_DECODE_STATUS;
+
+/*********************** for libjpeg ****************************/
+typedef int32_t Decode_Status;
+extern jd_libva_struct jd_libva;
+#ifdef __cplusplus
+extern "C" {
+#endif
+Decode_Status jdva_initialize (jd_libva_struct * jd_libva_ptr);
+void jdva_deinitialize (jd_libva_struct * jd_libva_ptr);
+Decode_Status jdva_decode (j_decompress_ptr cinfo, jd_libva_struct * jd_libva_ptr);
+Decode_Status jdva_create_resource (jd_libva_struct * jd_libva_ptr);
+Decode_Status jdva_release_resource (jd_libva_struct * jd_libva_ptr);
+Decode_Status jdva_parse_bitstream(j_decompress_ptr cinfo, jd_libva_struct * jd_libva_ptr);
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
+
diff --git a/imagedecoder/JPEGParser.c b/imagedecoder/JPEGParser.cpp
similarity index 84%
rename from imagedecoder/JPEGParser.c
rename to imagedecoder/JPEGParser.cpp
index 4ad13b8..1d6ab26 100644
--- a/imagedecoder/JPEGParser.c
+++ b/imagedecoder/JPEGParser.cpp
@@ -33,6 +33,8 @@
 #include <stdio.h>
 #include <unistd.h>
 
+bool endOfBuffer(CJPEGParse* parser);
+
 uint8_t readNextByte(CJPEGParse* parser) {
     uint8_t byte = 0;
 
@@ -42,7 +44,7 @@
     }
 
     if (parser->parse_index == parser->buff_size) {
-        parser->end_of_buff = TRUE;
+        parser->end_of_buff = true;
     }
 
     return byte;
@@ -63,7 +65,7 @@
 
     if (parser->parse_index >= parser->buff_size) {
         parser->parse_index = parser->buff_size - 1;
-        parser->end_of_buff = TRUE;
+        parser->end_of_buff = true;
     }
 }
 
@@ -73,22 +75,22 @@
             break;
         }
     }
-	/* check the next byte to make sure we don't miss the real marker*/
-	uint8_t tempNextByte = readNextByte(parser);
-	if (tempNextByte == 0xff)
-	    return readNextByte(parser);
-	else
-		return tempNextByte;
+    /* check the next byte to make sure we don't miss the real marker*/
+    uint8_t tempNextByte = readNextByte(parser);
+    if (tempNextByte == 0xff)
+        return readNextByte(parser);
+    else
+        return tempNextByte;
 }
 
-boolean setByteOffset(CJPEGParse* parser, uint32_t byte_offset)
+bool setByteOffset(CJPEGParse* parser, uint32_t byte_offset)
 {
-    boolean offset_found = FALSE;
+    bool offset_found = false;
 
     if (byte_offset < parser->buff_size) {
         parser->parse_index = byte_offset;
-        offset_found = TRUE;
-//      end_of_buff = FALSE;
+        offset_found = true;
+//      end_of_buff = false;
     }
 
     return offset_found;
@@ -98,7 +100,7 @@
     return parser->parse_index;
 }
 
-boolean endOfBuffer(CJPEGParse* parser) {
+bool endOfBuffer(CJPEGParse* parser) {
     return parser->end_of_buff;
 }
 
@@ -110,7 +112,7 @@
     parser->parse_index = 0;
     parser->buff_size = buff_size;
     parser->stream_buff = stream_buff;
-    parser->end_of_buff = FALSE;
+    parser->end_of_buff = false;
     parser->readNextByte = readNextByte;
     parser->readBytes = readBytes;
     parser->burnBytes = burnBytes;
diff --git a/imagedecoder/JPEGParser.h b/imagedecoder/JPEGParser.h
index 9e8ebd1..be6ac4d 100644
--- a/imagedecoder/JPEGParser.h
+++ b/imagedecoder/JPEGParser.h
@@ -31,17 +31,6 @@
 
 #include <stdint.h>
 
-#ifndef boolean
-#define boolean int
-#endif
-
-#ifndef TRUE
-#define TRUE 1
-#endif
-#ifndef FALSE
-#define FALSE 0
-#endif
-
 // Marker Codes
 #define CODE_SOF_BASELINE 0xC0
 #define CODE_SOF1         0xC1
@@ -87,20 +76,20 @@
 #define CODE_APP13        0xED
 #define CODE_APP14        0xEE
 #define CODE_APP15        0xEF
-typedef  struct _CJPEGParse CJPEGParse;
-struct _CJPEGParse {
+
+struct CJPEGParse {
     uint8_t* stream_buff;
     uint32_t parse_index;
     uint32_t buff_size;
-    boolean end_of_buff;
+    bool end_of_buff;
     uint8_t (*readNextByte)(CJPEGParse* parser);
     uint32_t (*readBytes)( CJPEGParse* parser, uint32_t bytes_to_read );
     void (*burnBytes)( CJPEGParse* parser, uint32_t bytes_to_burn );
     uint8_t (*getNextMarker)(CJPEGParse* parser);
     uint32_t (*getByteOffset)(CJPEGParse* parser);
-    boolean (*endOfBuffer)(CJPEGParse* parser);
+    bool (*endOfBuffer)(CJPEGParse* parser);
     uint8_t* (*getCurrentIndex)(CJPEGParse* parser);
-    boolean (*setByteOffset)( CJPEGParse* parser, uint32_t byte_offset );
+    bool (*setByteOffset)( CJPEGParse* parser, uint32_t byte_offset );
 };
 
 void parserInitialize(CJPEGParse* parser,  uint8_t* stream_buff, uint32_t buff_size);
diff --git a/imagedecoder/test/testdecode.cpp b/imagedecoder/test/testdecode.cpp
new file mode 100644
index 0000000..6823b85
--- /dev/null
+++ b/imagedecoder/test/testdecode.cpp
@@ -0,0 +1,428 @@
+#include "JPEGDecoder.h"
+#include "JPEGBlitter.h"
+#include "JPEGCommon_Gen.h"
+#include <utils/threads.h>
+#include <utils/Timers.h>
+#include <stdio.h>
+#undef NDEBUG
+#include <assert.h>
+#include <hardware/gralloc.h>
+
+#define JPGFILE "/sdcard/1280x720xYUV422H.jpg"
+
+RenderTarget& init_render_target(RenderTarget &target, int width, int height, int pixel_format)
+{
+    hw_module_t const* module = NULL;
+    alloc_device_t *allocdev = NULL;
+    struct gralloc_module_t *gralloc_module = NULL;
+    buffer_handle_t handle;
+    uint32_t fourcc;
+    int stride, bpp, err;
+    fourcc = pixelFormat2Fourcc(pixel_format);
+    bpp = fourcc2LumaBitsPerPixel(fourcc);
+    err = hw_get_module(GRALLOC_HARDWARE_MODULE_ID, &module);
+    if (err || !module) {
+        printf("%s failed to get gralloc module\n", __PRETTY_FUNCTION__);
+        assert(false);
+    }
+    gralloc_module = (struct gralloc_module_t *)module;
+    err = gralloc_open(module, &allocdev);
+    if (err || !allocdev) {
+        printf("%s failed to open alloc device\n", __PRETTY_FUNCTION__);
+        assert(false);
+    }
+    err = allocdev->alloc(allocdev,
+            width,
+            height,
+            pixel_format,
+            GRALLOC_USAGE_HW_RENDER,
+            &handle,
+            &stride);
+    if (err) {
+        gralloc_close(allocdev);
+        printf("%s failed to allocate surface %d, %dx%d, pixelformat %x\n", __PRETTY_FUNCTION__, err,
+            width, height, pixel_format);
+        assert(false);
+    }
+    target.type = RenderTarget::ANDROID_GRALLOC;
+    target.handle = (int)handle;
+    target.width = width;
+    target.height = height;
+    target.pixel_format = pixel_format;
+    target.rect.x = target.rect.y = 0;
+    target.rect.width = target.width;
+    target.rect.height = target.height;
+    target.stride = stride * bpp;
+    return target;
+}
+
+void deinit_render_target(RenderTarget &target)
+{
+    buffer_handle_t handle = (buffer_handle_t)target.handle;
+    hw_module_t const* module = NULL;
+    alloc_device_t *allocdev = NULL;
+    struct gralloc_module_t *gralloc_module = NULL;
+    int err = hw_get_module(GRALLOC_HARDWARE_MODULE_ID, &module);
+    if (err || !module) {
+        printf("%s failed to get gralloc module\n", __PRETTY_FUNCTION__);
+        return;
+    }
+    gralloc_module = (struct gralloc_module_t *)module;
+    err = gralloc_open(module, &allocdev);
+    if (err || !allocdev) {
+        printf("%s failed to get gralloc module\n", __PRETTY_FUNCTION__);
+        return;
+    }
+    allocdev->free(allocdev, handle);
+    gralloc_close(allocdev);
+}
+
+void decode_blit_functionality_test()
+{
+    JpegDecodeStatus st;
+    JpegInfo jpginfo;
+    hw_module_t const* module = NULL;
+    alloc_device_t *allocdev = NULL;
+    struct gralloc_module_t *gralloc_module = NULL;
+    buffer_handle_t handle;
+    JpegDecoder decoder;
+    JpegBlitter blitter;
+    blitter.setDecoder(decoder);
+    RenderTarget targets[5];
+    RenderTarget *dec_target, *blit_nv12_target, *blit_rgba_target, *blit_yuy2_target, *blit_yv12_target;
+    FILE* fp = fopen(JPGFILE, "rb");
+    assert(fp);
+    fseek(fp, 0, SEEK_END);
+    jpginfo.bufsize = ftell(fp);
+    fseek(fp, 0, SEEK_SET);
+    jpginfo.buf = new uint8_t[jpginfo.bufsize];
+    fread(jpginfo.buf, 1, jpginfo.bufsize, fp);
+    fclose(fp);
+
+    printf("finished loading src file: size %u\n", jpginfo.bufsize);
+    st = decoder.parse(jpginfo);
+    assert(st == JD_SUCCESS);
+
+    init_render_target(targets[0], jpginfo.image_width, jpginfo.image_height, jpginfo.image_pixel_format);
+    init_render_target(targets[1], jpginfo.image_width, jpginfo.image_height, HAL_PIXEL_FORMAT_NV12_TILED_INTEL);
+    init_render_target(targets[2], jpginfo.image_width, jpginfo.image_height, HAL_PIXEL_FORMAT_RGBA_8888);
+    init_render_target(targets[3], jpginfo.image_width, jpginfo.image_height, HAL_PIXEL_FORMAT_YCbCr_422_I);
+    init_render_target(targets[4], jpginfo.image_width, jpginfo.image_height, HAL_PIXEL_FORMAT_YV12);
+    dec_target = &targets[0];
+    blit_nv12_target = &targets[1];
+    blit_rgba_target = &targets[2];
+    blit_yuy2_target = &targets[3];
+    blit_yv12_target = &targets[4];
+    dec_target->rect.x = blit_nv12_target->rect.x = blit_yuy2_target->rect.x = blit_rgba_target->rect.x = blit_yv12_target->rect.x = 0;
+    dec_target->rect.y = blit_nv12_target->rect.y = blit_yuy2_target->rect.y = blit_rgba_target->rect.y = blit_yv12_target->rect.y = 0;
+    dec_target->rect.width = blit_nv12_target->rect.width = blit_yuy2_target->rect.width = blit_rgba_target->rect.width = blit_yv12_target->rect.width = jpginfo.image_width;
+    dec_target->rect.height = blit_nv12_target->rect.height = blit_yuy2_target->rect.height = blit_rgba_target->rect.height = blit_yv12_target->rect.height = jpginfo.image_height;
+    RenderTarget* targetlist[5] = {dec_target, blit_nv12_target, blit_rgba_target, blit_yuy2_target, blit_yv12_target };
+    //st = decoder.init(jpginfo.image_width, jpginfo.image_height, targetlist, 5);
+    st = decoder.init(jpginfo.image_width, jpginfo.image_height, &dec_target, 1);
+    assert(st == JD_SUCCESS);
+
+    //jpginfo.render_target = dec_target;
+    st = decoder.decode(jpginfo, *dec_target);
+    printf("decode returns %d\n", st);
+    assert(st == JD_SUCCESS);
+
+    uint8_t *data;
+    uint32_t offsets[3];
+    uint32_t pitches[3];
+    JpegDecoder::MapHandle maphandle = decoder.mapData(*dec_target, (void**) &data, offsets, pitches);
+    assert (maphandle.valid);
+    FILE* fpdump = fopen("/sdcard/dec_dump.yuv", "wb");
+    assert(fpdump);
+    // Y
+    for (int i = 0; i < dec_target->height; ++i) {
+        fwrite(data + offsets[0] + i * pitches[0], 1, dec_target->width, fpdump);
+    }
+    // U
+    for (int i = 0; i < dec_target->height; ++i) {
+        fwrite(data + offsets[1] + i * pitches[1], 1, dec_target->width/2, fpdump);
+    }
+    // V
+    for (int i = 0; i < dec_target->height; ++i) {
+        fwrite(data + offsets[2] + i * pitches[2], 1, dec_target->width/2, fpdump);
+    }
+    fclose(fpdump);
+    printf("Dumped decoded YUV to /sdcard/dec_dump.yuv\n");
+    decoder.unmapData(*dec_target, maphandle);
+
+    st = decoder.blit(*dec_target, *blit_nv12_target);
+    assert(st == JD_SUCCESS);
+
+    maphandle = decoder.mapData(*blit_nv12_target, (void**) &data, offsets, pitches);
+    assert (maphandle.valid);
+    fpdump = fopen("/sdcard/nv12_dump.yuv", "wb");
+    assert(fpdump);
+    // Y
+    for (int i = 0; i < blit_nv12_target->height; ++i) {
+        fwrite(data + offsets[0] + i * pitches[0], 1, blit_nv12_target->width, fpdump);
+    }
+    // UV
+    for (int i = 0; i < blit_nv12_target->height/2; ++i) {
+        fwrite(data + offsets[1] + i * pitches[1], 1, blit_nv12_target->width, fpdump);
+    }
+    fclose(fpdump);
+    printf("Dumped converted NV12 to /sdcard/nv12_dump.yuv\n");
+    decoder.unmapData(*blit_nv12_target, maphandle);
+
+    st = decoder.blit(*dec_target, *blit_yuy2_target);
+    assert(st == JD_SUCCESS);
+    maphandle = decoder.mapData(*blit_yuy2_target, (void**) &data, offsets, pitches);
+    assert (maphandle.valid);
+    fpdump = fopen("/sdcard/yuy2_dump.yuv", "wb");
+    assert(fpdump);
+    // YUYV
+    for (int i = 0; i < blit_yuy2_target->height; ++i) {
+        fwrite(data + offsets[0] + i * pitches[0], 2, blit_yuy2_target->width, fpdump);
+    }
+    fclose(fpdump);
+    printf("Dumped converted YUY2 to /sdcard/yuy2_dump.yuv\n");
+    decoder.unmapData(*blit_yuy2_target, maphandle);
+
+    st = decoder.blit(*dec_target, *blit_rgba_target);
+    assert(st == JD_SUCCESS);
+    maphandle = decoder.mapData(*blit_rgba_target, (void**) &data, offsets, pitches);
+    assert (maphandle.valid);
+    fpdump = fopen("/sdcard/rgba_dump.yuv", "wb");
+    assert(fpdump);
+    // RGBA
+    for (int i = 0; i < blit_rgba_target->height; ++i) {
+        fwrite(data + offsets[0] + i * pitches[0], 4, blit_rgba_target->width, fpdump);
+    }
+    fclose(fpdump);
+    printf("Dumped converted RGBA to /sdcard/rgba_dump.yuv\n");
+    decoder.unmapData(*blit_rgba_target, maphandle);
+
+    st = decoder.blit(*dec_target, *blit_yv12_target);
+    assert(st == JD_SUCCESS);
+    maphandle = decoder.mapData(*blit_yv12_target, (void**) &data, offsets, pitches);
+    assert (maphandle.valid);
+    fpdump = fopen("/sdcard/yv12_dump.yuv", "wb");
+    assert(fpdump);
+    // YV12
+    for (int i = 0; i < blit_yv12_target->height; ++i) {
+        fwrite(data + offsets[0] + i * pitches[0], 1, blit_yv12_target->width, fpdump);
+    }
+    for (int i = 0; i < blit_yv12_target->height/2; ++i) {
+        fwrite(data + offsets[1] + i * pitches[1], 1, blit_yv12_target->width/2, fpdump);
+    }
+    for (int i = 0; i < blit_yv12_target->height/2; ++i) {
+        fwrite(data + offsets[2] + i * pitches[2], 1, blit_yv12_target->width/2, fpdump);
+    }
+    fclose(fpdump);
+    printf("Dumped converted YV12 to /sdcard/yv12_dump.yuv\n");
+    decoder.unmapData(*blit_yv12_target, maphandle);
+
+
+    decoder.deinit();
+
+    deinit_render_target(*dec_target);
+    deinit_render_target(*blit_nv12_target);
+    deinit_render_target(*blit_yuy2_target);
+    deinit_render_target(*blit_rgba_target);
+    deinit_render_target(*blit_yv12_target);
+    delete[] jpginfo.buf;
+
+}
+
+enum target_state
+{
+    TARGET_FREE,
+    TARGET_DECODE,
+    TARGET_BLIT,
+};
+
+struct thread_param
+{
+    JpegDecoder *decoder;
+    RenderTarget *targets;
+    RenderTarget *nv12_targets;
+    RenderTarget *yuy2_targets;
+    RenderTarget *imc3_targets;
+    size_t target_count;
+    target_state *states;
+};
+
+static Mutex state_lock;
+
+void read_new_frame(JpegInfo &jpginfo)
+{
+    memset(&jpginfo, 0, sizeof(JpegInfo));
+    FILE* fp = fopen(JPGFILE, "rb");
+    assert(fp);
+    fseek(fp, 0, SEEK_END);
+    jpginfo.bufsize = ftell(fp);
+    fseek(fp, 0, SEEK_SET);
+    jpginfo.buf = new uint8_t[jpginfo.bufsize];
+    fread(jpginfo.buf, 1, jpginfo.bufsize, fp);
+    fclose(fp);
+}
+
+static bool exit_thread = false;
+
+#define VPP_DECODE_BATCH
+
+void* decode_frame_threadproc(void* data)
+{
+    thread_param *param = (thread_param*) data;
+    JpegInfo *jpginfos = new JpegInfo[param->target_count];
+    int surface_id = 0;
+    int blit_surface_id = (surface_id + param->target_count - 1) % param->target_count;
+    while(!exit_thread) {
+        printf("%s blit %d and decode %d\n", __FUNCTION__, blit_surface_id, surface_id);
+        RenderTarget& cur_target = param->targets[surface_id];
+#ifdef VPP_DECODE_BATCH
+        RenderTarget& blit_target = param->targets[blit_surface_id];
+        RenderTarget& blit_nv12_target = param->nv12_targets[blit_surface_id];
+        RenderTarget& blit_yuy2_target = param->yuy2_targets[blit_surface_id];
+        if (param->states[blit_surface_id] == TARGET_BLIT) {
+            printf("%s blit with surface %d\n", __FUNCTION__, blit_surface_id);
+            nsecs_t t1 = systemTime();
+            if (param->decoder->busy(blit_target)) {
+                param->decoder->sync(blit_target);
+                nsecs_t t2 = systemTime();
+                printf("%s wait surface %d decode took %f ms\n", __FUNCTION__, blit_surface_id, ns2us(t2 - t1)/1000.0);
+                param->states[blit_surface_id] = TARGET_FREE;
+            }
+            t1 = systemTime();
+            param->decoder->blit(blit_target, blit_nv12_target);
+            nsecs_t t2 = systemTime();
+            param->decoder->blit(blit_target, blit_yuy2_target);
+            nsecs_t t3 = systemTime();
+            printf("%s blit %d NV12 took %f ms, YUY2 took %f ms\n",
+                __FUNCTION__,
+                blit_surface_id, ns2us(t2 - t1)/1000.0,
+                ns2us(t3 - t2)/1000.0);
+            param->states[blit_surface_id] = TARGET_FREE;
+        }
+#endif
+        if (param->states[surface_id] != TARGET_FREE) {
+            printf("%s wait surface %d blit finish\n", __FUNCTION__, surface_id);
+            nsecs_t t1 = systemTime();
+            while (param->states[surface_id] != TARGET_FREE) {
+                usleep(1000);
+            }
+            nsecs_t t2 = systemTime();
+            printf("%s wait surface %d for decode/blit finish took %f ms\n", __FUNCTION__, surface_id, ns2us(t2 - t1)/1000.0);
+        }
+        JpegInfo &jpginfo = jpginfos[surface_id];
+        read_new_frame(jpginfo);
+        nsecs_t t3 = systemTime();
+        param->decoder->parse(jpginfo);
+        nsecs_t t4 = systemTime();
+        printf("%s parse surface %d took %f ms\n", __FUNCTION__, surface_id, ns2us(t4 - t3)/1000.0);
+        param->states[surface_id] = TARGET_DECODE;
+        param->decoder->decode(jpginfo, cur_target);
+        nsecs_t t5 = systemTime();
+        printf("%s decode surface %d took %f ms\n", __FUNCTION__, surface_id, ns2us(t5 - t4)/1000.0);
+        param->states[surface_id] = TARGET_BLIT;
+        surface_id  = (surface_id + 1) % param->target_count;
+        blit_surface_id  = (blit_surface_id + 1) % param->target_count;
+    }
+    delete[] jpginfos;
+    return NULL;
+}
+
+void* blit_frame_threadproc(void* data)
+{
+    thread_param *param = (thread_param*) data;
+    int surface_id = 0;
+    while(!exit_thread) {
+        printf("%s blit %d->%d\n", __FUNCTION__, surface_id, surface_id);
+        RenderTarget& dec_target = param->targets[surface_id];
+        RenderTarget& blit_target = param->nv12_targets[surface_id];
+        if (param->states[surface_id] != TARGET_BLIT) {
+            printf("%s wait surface %d decoding finish\n", __FUNCTION__, surface_id);
+            nsecs_t t1 = systemTime();
+            while (param->states[surface_id] != TARGET_BLIT) {
+                usleep(100);
+            }
+            nsecs_t t2 = systemTime();
+            printf("%s wait surface %d for decode finish took %f ms\n", __FUNCTION__, surface_id, ns2us(t2 - t1)/1000.0);
+        }
+        nsecs_t t3 = systemTime();
+        param->decoder->blit(dec_target, blit_target);
+        nsecs_t t4 = systemTime();
+        printf("%s blit surface %d took %f ms\n", __FUNCTION__, surface_id, ns2us(t4 - t3)/1000.0);
+        param->states[surface_id] = TARGET_FREE;
+        surface_id  = (surface_id + 1) % param->target_count;
+    }
+    return NULL;
+}
+
+void parallel_decode_blit_test()
+{
+    RenderTarget **all_targets = new RenderTarget*[12];
+    RenderTarget dec_targets[12];
+    RenderTarget nv12_targets[12];
+    RenderTarget yuy2_targets[12];
+    RenderTarget imc3_targets[12];
+    JpegInfo jpginfos[12];
+    target_state states[12];
+    for (int i = 0; i < 12; ++i) {
+        init_render_target(dec_targets[i], 1280, 720, fourcc2PixelFormat(VA_FOURCC_422H)); // 422H
+        init_render_target(nv12_targets[i], 1280, 720, fourcc2PixelFormat(VA_FOURCC_NV12)); // NV12 for video encode
+        init_render_target(yuy2_targets[i], 1280, 720, fourcc2PixelFormat(VA_FOURCC_YUY2)); // YUY2 for overlay
+        //init_render_target(imc3_targets[i], 1280, 720, HAL_PIXEL_FORMAT_IMC3); // IMC3 for libjpeg encode
+        jpginfos[i].buf = new uint8_t[2 * 1024 * 1024];
+        all_targets[i] = &dec_targets[i];
+        //all_targets[i + 12] = &nv12_targets[i];
+        //all_targets[i + 24] = &yuy2_targets[i];
+        //all_targets[i + 36] = &imc3_targets[i];
+        states[i] = TARGET_FREE;
+    }
+
+    exit_thread = false;
+
+    pthread_attr_t dec_attr, blit_attr;
+    pthread_attr_init(&dec_attr);
+    pthread_attr_init(&blit_attr);
+    pthread_attr_setdetachstate(&dec_attr, PTHREAD_CREATE_JOINABLE);
+    pthread_attr_setdetachstate(&blit_attr, PTHREAD_CREATE_JOINABLE);
+    pthread_t dec_thread, blit_thread;
+    thread_param param;
+    param.nv12_targets = nv12_targets;
+    param.yuy2_targets = yuy2_targets;
+    param.imc3_targets = imc3_targets;
+    param.targets = dec_targets;
+    param.target_count = 12;
+    param.decoder = new JpegDecoder();
+    //param.decoder->init(1280, 720, all_targets, 36);
+    param.decoder->init(1280, 720, all_targets, 12);
+    param.states = states;
+    pthread_create(&dec_thread, &dec_attr, decode_frame_threadproc, (void*)&param);
+#ifndef VPP_DECODE_BATCH
+    pthread_create(&blit_thread, &blit_attr, blit_frame_threadproc, (void*)&param);
+#endif
+    pthread_attr_destroy(&blit_attr);
+    pthread_attr_destroy(&dec_attr);
+
+    // test for 1 minute
+    usleep(60 * 1000 * 1000);
+    exit_thread = true;
+    void *dummy;
+    pthread_join(dec_thread, &dummy);
+#ifndef VPP_DECODE_BATCH
+    pthread_join(blit_thread, &dummy);
+#endif
+
+    for (int i = 0; i < 12; ++i) {
+        delete[] jpginfos[i].buf;
+        deinit_render_target(dec_targets[i]);
+        deinit_render_target(nv12_targets[i]);
+        deinit_render_target(yuy2_targets[i]);
+        //deinit_render_target(imc3_targets[i]);
+    }
+    delete[] all_targets;
+}
+
+int main(int argc, char ** argv)
+{
+    //decode_blit_functionality_test();
+    parallel_decode_blit_test();
+    return 0;
+}