[libmix parser] Multi-thread parsing implementation.

BZ: 183804

1. Refactor h.264 parser to resolve logical and data structure dependency for multithreading
   and to improve PnP.
2. Two multithread parsing schemes were implemented: bundle input and sequential input.
   Default to bundle scheme.

Change-Id: I851419e079c9e0dd461337fd2be6627c88e0a294
Signed-off-by: Yuanjun Huang <yuanjun.huang@intel.com>
diff --git a/mixvbp/vbp_manager/Android.mk b/mixvbp/vbp_manager/Android.mk
index a45fe7e..d861197 100755
--- a/mixvbp/vbp_manager/Android.mk
+++ b/mixvbp/vbp_manager/Android.mk
@@ -2,19 +2,23 @@
 
 include $(CLEAR_VARS)
 
-#MIXVBP_LOG_ENABLE := true
+ifeq (true,$(strip $(PRODUCT_PACKAGE_DEBUG)))
+MIXVBP_LOG_ENABLE := true
+endif
 
-LOCAL_SRC_FILES :=			\
-	vbp_h264_parser.c		\
-	vbp_vc1_parser.c		\
-	vbp_loader.c			\
-	vbp_mp42_parser.c		\
-	vbp_utils.c			\
-	viddec_parse_sc.c		\
-	viddec_pm_parser_ops.c		\
-	viddec_pm_utils_bstream.c       \
+LOCAL_SRC_FILES :=                  \
+    vbp_h264_parser.c               \
+    vbp_vc1_parser.c                \
+    vbp_loader.c                    \
+    vbp_mp42_parser.c               \
+    vbp_utils.c                     \
+    viddec_parse_sc.c               \
+    viddec_pm_parser_ops.c          \
+    viddec_pm_utils_bstream.c       \
+    vbp_thread.c
 
 LOCAL_CFLAGS := -DVBP -DHOST_ONLY
+LOCAL_CFLAGS += -DUSE_MULTI_THREADING
 
 LOCAL_C_INCLUDES +=			\
 	$(LOCAL_PATH)/include		\
diff --git a/mixvbp/vbp_manager/include/viddec_parser_ops.h b/mixvbp/vbp_manager/include/viddec_parser_ops.h
index 77054b5..533b231 100755
--- a/mixvbp/vbp_manager/include/viddec_parser_ops.h
+++ b/mixvbp/vbp_manager/include/viddec_parser_ops.h
@@ -30,6 +30,10 @@
 typedef    uint32_t (*fn_update_data)(void *parent, void *data, uint32_t size);
 #endif
 
+typedef    uint32_t (*fn_is_payload_start)(void *parent);
+typedef    uint32_t (*fn_parse_syntax_threading) (void *parent, void *ctxt, uint32_t thread_index);
+typedef    uint32_t (*fn_post_parse_threading) (void *parent, void *ctxt, uint32_t slice_index);
+typedef    uint32_t (*fn_query_thread_parsing_cap) (void);
 
 typedef struct
 {
@@ -45,6 +49,10 @@
 #if (defined USE_AVC_SHORT_FORMAT || defined USE_SLICE_HEADER_PARSING)
     fn_update_data update_data;
 #endif
+    fn_is_payload_start is_payload_start;
+    fn_parse_syntax_threading parse_syntax_threading;
+    fn_post_parse_threading post_parse_threading;
+    fn_query_thread_parsing_cap query_thread_parsing_cap;
 } viddec_parser_ops_t;
 
 
diff --git a/mixvbp/vbp_manager/vbp_h264_parser.c b/mixvbp/vbp_manager/vbp_h264_parser.c
index dd93ea7..9c75519 100755
--- a/mixvbp/vbp_manager/vbp_h264_parser.c
+++ b/mixvbp/vbp_manager/vbp_h264_parser.c
@@ -26,6 +26,7 @@
 #include <dlfcn.h>
 
 #include "h264.h"
+#include "h264parse.h"
 #include "vbp_loader.h"
 #include "vbp_utils.h"
 #include "vbp_h264_parser.h"
@@ -185,6 +186,33 @@
         return VBP_LOAD;
     }
 
+    pcontext->parser_ops->is_payload_start = dlsym(pcontext->fd_parser, "viddec_h264_payload_start");
+    if (NULL == pcontext->parser_ops->is_payload_start)
+    {
+        ETRACE ("Failed to set entry point." );
+    }
+
+    pcontext->parser_ops->parse_syntax_threading = dlsym(pcontext->fd_parser, "viddec_h264_threading_parse");
+    if (NULL == pcontext->parser_ops->parse_syntax_threading)
+    {
+        ETRACE ("Failed to set entry point." );
+        return VBP_LOAD;
+    }
+
+    pcontext->parser_ops->post_parse_threading = dlsym(pcontext->fd_parser, "viddec_h264_post_parse");
+    if (NULL == pcontext->parser_ops->post_parse_threading)
+    {
+        ETRACE ("Failed to set entry point." );
+        return VBP_LOAD;
+    }
+
+    pcontext->parser_ops->query_thread_parsing_cap = dlsym(pcontext->fd_parser, "viddec_h264_query_thread_parsing_cap");
+    if (NULL == pcontext->parser_ops->query_thread_parsing_cap)
+    {
+        ETRACE ("Failed to set entry point." );
+        return VBP_LOAD;
+    }
+
     /* entry point not needed */
     pcontext->parser_ops->is_frame_start = NULL;
     return VBP_OK;
@@ -1022,6 +1050,7 @@
     {
         /* partial frame */
         query_data->num_pictures = 1;
+        WTRACE("partial frame found.");
     }
 
     if (query_data->num_pictures > MAX_NUM_PICTURES)
@@ -1041,7 +1070,7 @@
     pic_parms = pic_data->pic_parms;
 
     // relax this condition to support partial frame parsing
-
+    // TODO: Is partial frame needed to support??
     //if (parser->info.SliceHeader.first_mb_in_slice == 0)
     {
         /**
@@ -1131,6 +1160,8 @@
         pic_parms->pic_fields.bits.constrained_intra_pred_flag = parser->info.active_PPS.constrained_intra_pred_flag;
 
         pic_parms->frame_num = parser->info.SliceHeader.frame_num;
+
+
     }
 
 
@@ -1161,7 +1192,6 @@
         pic_parms->num_ref_idx_l1_default_active_minus1 = parser->info.active_PPS.num_ref_idx_l1_active - 1;
     }
 #endif
-
     return VBP_OK;
 }
 
@@ -1682,11 +1712,11 @@
         break;
 
     case h264_NAL_UNIT_TYPE_SPS:
-        ITRACE("SPS header is parsed.");
+        VTRACE("SPS header is parsed.");
         break;
 
     case h264_NAL_UNIT_TYPE_PPS:
-        ITRACE("PPS header is parsed.");
+        VTRACE("PPS header is parsed.");
         break;
 
     case h264_NAL_UNIT_TYPE_Acc_unit_delimiter:
@@ -1694,11 +1724,11 @@
         break;
 
     case h264_NAL_UNIT_TYPE_EOSeq:
-        ITRACE("EOSeq is parsed.");
+        VTRACE("EOSeq is parsed.");
         break;
 
     case h264_NAL_UNIT_TYPE_EOstream:
-        ITRACE("EOStream is parsed");
+        VTRACE("EOStream is parsed");
         break;
 
     default:
diff --git a/mixvbp/vbp_manager/vbp_mp42_parser.c b/mixvbp/vbp_manager/vbp_mp42_parser.c
index 9b4c63f..b954b38 100755
--- a/mixvbp/vbp_manager/vbp_mp42_parser.c
+++ b/mixvbp/vbp_manager/vbp_mp42_parser.c
@@ -124,6 +124,11 @@
     /* entry point not needed */
     pcontext->parser_ops->flush = NULL;
 
+    pcontext->parser_ops->is_payload_start = NULL;
+    pcontext->parser_ops->parse_syntax_threading = NULL;
+    pcontext->parser_ops->post_parse_threading = NULL;
+    pcontext->parser_ops->query_thread_parsing_cap = NULL;
+
     return VBP_OK;
 }
 
diff --git a/mixvbp/vbp_manager/vbp_thread.c b/mixvbp/vbp_manager/vbp_thread.c
new file mode 100644
index 0000000..3a2aa09
--- /dev/null
+++ b/mixvbp/vbp_manager/vbp_thread.c
@@ -0,0 +1,634 @@
+/* INTEL CONFIDENTIAL
+* Copyright (c) 2013 Intel Corporation.  All rights reserved.
+*
+* The source code contained or described herein and all documents
+* related to the source code ("Material") are owned by Intel
+* Corporation or its suppliers or licensors.  Title to the
+* Material remains with Intel Corporation or its suppliers and
+* licensors.  The Material contains trade secrets and proprietary
+* and confidential information of Intel or its suppliers and
+* licensors. The Material is protected by worldwide copyright and
+* trade secret laws and treaty provisions.  No part of the Material
+* may be used, copied, reproduced, modified, published, uploaded,
+* posted, transmitted, distributed, or disclosed in any way without
+* Intel's prior express written permission.
+*
+* No license under any patent, copyright, trade secret or other
+* intellectual property right is granted to or conferred upon you
+* by disclosure or delivery of the Materials, either expressly, by
+* implication, inducement, estoppel or otherwise. Any license
+* under such intellectual property rights must be express and
+* approved by Intel in writing.
+*
+*/
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+
+#include "vbp_thread.h"
+#include "vbp_loader.h"
+
+/* consider a qual core with hyper thread */
+#define MAX_AUTO_THREADS 8
+
+#define THREADING_SCHEME_BUNDLE
+
+typedef long long int nsecs_t;
+
+static nsecs_t systemTime()
+{
+    struct timeval t;
+    gettimeofday(&t, NULL);
+    return 1000000 * t.tv_sec + t.tv_usec;
+}
+
+
+typedef struct PerThreadContext {
+    pthread_t thread;
+
+    int32_t index;                  // thread index referenced by thread itself when needed.
+    int32_t thread_init;
+    struct ThreadContext* parent;
+
+    pthread_cond_t input_cond;      // Used to wait for a new packet from the main thread.
+    pthread_cond_t progress_cond;   // Used by child threads to wait for progress to change.
+    pthread_cond_t output_cond;     // Used by the main thread to wait for frames to finish.
+
+    pthread_mutex_t mutex;          // Mutex used to protect the contents of the PerThreadContext.
+    pthread_mutex_t progress_mutex; // Mutex used to protect frame progress values and progress_cond.
+
+    vbp_context* vbpctx;
+    viddec_pm_cxt_t* pmctx;         // Working parser context
+    viddec_pm_cxt_t* input_pmctx;   // Input parser context
+    void* codec_data;               // Points to specific codec data that holds output, all threads share
+                                    // one instance
+    uint32_t start_item;            // start of parsing item num for bundle parsing
+
+    enum {
+        STATE_INPUT_WAIT,
+        STATE_WORKING,
+        STATE_EXIT
+    } state;
+
+} PerThreadContext;
+
+typedef struct ThreadContext {
+    PerThreadContext* threads[MAX_AUTO_THREADS]; // The contexts for each thread.
+    PerThreadContext* prev_thread;               // The last thread submit_packet() was called on.
+    int delaying;                       // Set for the first N packets, where N is the number of threads.
+                                        // While it is set, vbp_thread_parse_syntax won't return any results
+
+    uint32_t next_finished;             // The next thread count to return output from.
+    uint32_t next_parsing;              // The next thread count to submit input packet to.
+
+    uint32_t active_thread_count;       // num of thread need to be warmed up
+
+    sem_t finish_sem;                   // semaphore of finish work to synchronize working thread and main thread
+    uint32_t start_item_to_parse;
+    uint32_t last_item_to_parse;
+
+} ThreadContext;
+
+
+int32_t get_cpu_count()
+{
+    int32_t cpu_num;
+#if defined(_SC_NPROC_ONLN)
+    cpu_num = sysconf(_SC_NPROC_ONLN);
+#elif defined(_SC_NPROCESSORS_ONLN)
+    cpu_num = sysconf(_SC_NPROCESSORS_ONLN);
+#endif
+    return cpu_num;
+}
+
+
+void set_thread_affinity_mask(cpu_set_t mask)
+{
+    int err, syscallres;
+    pid_t pid = gettid();
+    syscallres = syscall(__NR_sched_setaffinity, pid, sizeof(mask), &mask);
+    if (syscallres)
+    {
+        ETRACE("Error in the syscall setaffinity.");
+    }
+}
+
+
+static void vbp_update_parser_for_item(viddec_pm_cxt_t *cxt,
+                                      viddec_pm_cxt_t *src_cxt,
+                                      uint32 item)
+{
+
+    /* set up bitstream buffer */
+    cxt->getbits.list = src_cxt->getbits.list;
+
+    /* setup buffer pointer */
+    cxt->getbits.bstrm_buf.buf = src_cxt->getbits.bstrm_buf.buf;
+
+
+    /* setup bitstream parser */
+    cxt->getbits.bstrm_buf.buf_index = src_cxt->list.data[item].stpos;
+    cxt->getbits.bstrm_buf.buf_st = src_cxt->list.data[item].stpos;
+    cxt->getbits.bstrm_buf.buf_end = src_cxt->list.data[item].edpos;
+
+    /* It is possible to end up with buf_offset not equal zero. */
+    cxt->getbits.bstrm_buf.buf_bitoff = 0;
+    cxt->getbits.au_pos = 0;
+    cxt->getbits.list_off = 0;
+    cxt->getbits.phase = 0;
+    cxt->getbits.emulation_byte_counter = 0;
+
+    cxt->list.start_offset = src_cxt->list.data[item].stpos;
+    cxt->list.end_offset = src_cxt->list.data[item].edpos;
+    cxt->list.total_bytes = src_cxt->list.data[item].edpos - src_cxt->list.data[item].stpos;
+
+}
+
+
+
+static void* parser_worker_thread(void* arg)
+{
+    PerThreadContext* p = arg;
+    ThreadContext* t_cxt = p->parent;
+    vbp_context* vbpctx = p->vbpctx;
+    viddec_pm_cxt_t* pm_cxt = p->pmctx;
+    viddec_parser_ops_t* ops = vbpctx->parser_ops;
+
+
+// probably not to make each parsing thread have affinity to a cpu core
+// having cpus fully occupied will even lead to low performance
+// current experimental solution: just make main thread have affinity
+#if 0
+    cpu_set_t mask;
+    CPU_ZERO(&mask);
+    CPU_SET(p->index, &mask); // cpu affinity is set to same num as thread index
+    set_thread_affinity_mask(mask);
+#endif
+
+    pthread_mutex_lock(&p->mutex);
+
+    nsecs_t t0;
+    while (1) {
+        while (p->state == STATE_INPUT_WAIT) {
+            pthread_cond_wait(&p->input_cond, &p->mutex);
+        }
+
+        if (p->state == STATE_WORKING) {
+            //now we get input data, call actual parse.
+            //t0 = systemTime();
+            sleep(0);
+            ops->parse_syntax_threading((void *)p->pmctx, p->codec_data, p->index);
+
+            pthread_mutex_lock(&p->progress_mutex);
+            p->state = STATE_INPUT_WAIT;
+
+            pthread_cond_broadcast(&p->progress_cond);
+            pthread_cond_signal(&p->output_cond);
+            pthread_mutex_unlock(&p->progress_mutex);
+        } else if (p->state == STATE_EXIT) {
+            break;
+        }
+    }
+    pthread_mutex_unlock(&p->mutex);
+    pthread_exit(NULL);
+    return NULL;
+}
+
+static void* parser_worker_thread_bundle(void* arg)
+{
+    PerThreadContext* p = arg;
+    ThreadContext* t_cxt = p->parent;
+    vbp_context* vbpctx = p->vbpctx;
+    viddec_parser_ops_t* ops = vbpctx->parser_ops;
+
+// probably not to make each parsing thread have affinity to a cpu core
+// having cpus fully occupied will even lead to low performance
+// current experimental solution: just make main thread have affinity
+#if 1
+    cpu_set_t mask;
+    CPU_ZERO(&mask);
+    CPU_SET(p->index, &mask); // cpu affinity is set to same num as thread index
+    set_thread_affinity_mask(mask);
+#endif
+
+    pthread_mutex_lock(&p->mutex);
+
+    nsecs_t t0;
+    while (1) {
+        while (p->state == STATE_INPUT_WAIT) {
+            pthread_cond_wait(&p->input_cond, &p->mutex);
+        }
+
+        if (p->state == STATE_WORKING) {
+            uint32_t working_item = p->start_item;  // start point
+            uint32_t slice_index = 0 + p->index;     // start point
+
+            while (working_item <= t_cxt->last_item_to_parse) {
+                vbp_update_parser_for_item(p->pmctx, p->input_pmctx, working_item);
+                ops->parse_syntax_threading((void *)p->pmctx, p->codec_data, slice_index);
+
+                working_item += t_cxt->active_thread_count;
+                slice_index += t_cxt->active_thread_count;
+            }
+
+            pthread_mutex_lock(&p->progress_mutex);
+            p->state = STATE_INPUT_WAIT;
+
+            pthread_cond_broadcast(&p->progress_cond);
+            pthread_mutex_unlock(&p->progress_mutex);
+        } else if (p->state == STATE_EXIT) {
+            break;
+        }
+    }
+    pthread_mutex_unlock(&p->mutex);
+    pthread_exit(NULL);
+    return NULL;
+}
+
+
+uint32_t update_context_from_input(viddec_pm_cxt_t* dest,
+                                   viddec_pm_cxt_t* source)
+{
+    if ((dest == NULL) || (source == NULL) || (dest == source)) {
+        ETRACE("%s error", __func__);
+        return 1;
+    }
+    /* set up bitstream buffer */
+    dest->getbits.list = source->getbits.list;
+
+    /* buffer pointer */
+    dest->getbits.bstrm_buf.buf = source->getbits.bstrm_buf.buf;
+
+    /* bitstream parser */
+    dest->getbits.bstrm_buf.buf_index = source->getbits.bstrm_buf.buf_index;
+    dest->getbits.bstrm_buf.buf_st = source->getbits.bstrm_buf.buf_st;
+    dest->getbits.bstrm_buf.buf_end = source->getbits.bstrm_buf.buf_end;
+
+    /* It is possible to end up with buf_offset not equal zero. */
+    dest->getbits.bstrm_buf.buf_bitoff = 0;
+    dest->getbits.au_pos = 0;
+    dest->getbits.list_off = 0;
+    dest->getbits.phase = 0;
+    dest->getbits.emulation_byte_counter = 0;
+
+    dest->list.start_offset = source->list.start_offset;
+    dest->list.end_offset = source->list.end_offset;
+    dest->list.total_bytes = source->list.total_bytes;
+    return 0;
+}
+
+uint32_t update_context_to_output(viddec_pm_cxt_t* dest,
+                                   viddec_pm_cxt_t* source)
+{
+    if ((dest == NULL) || (source == NULL) || (dest == source)) {
+        ETRACE("%s error", __func__);
+        return 1;
+    }
+
+    /* bitstream parser */
+    dest->getbits.bstrm_buf.buf_index = source->getbits.bstrm_buf.buf_index;
+    dest->getbits.bstrm_buf.buf_st = source->getbits.bstrm_buf.buf_st;
+    dest->getbits.bstrm_buf.buf_end = source->getbits.bstrm_buf.buf_end;
+
+    /* It is possible to end up with buf_offset not equal zero. */
+    dest->getbits.bstrm_buf.buf_bitoff = source->getbits.bstrm_buf.buf_bitoff;
+    dest->getbits.au_pos = source->getbits.au_pos;
+    dest->getbits.list_off = source->getbits.list_off;
+    dest->getbits.phase = source->getbits.phase;
+    dest->getbits.emulation_byte_counter = source->getbits.emulation_byte_counter;
+    dest->getbits.is_emul_reqd = source->getbits.is_emul_reqd;
+
+    dest->list.start_offset = source->list.start_offset;
+    dest->list.end_offset = source->list.end_offset;
+    dest->list.total_bytes = source->list.total_bytes;
+
+    return 0;
+}
+
+
+
+uint32_t feed_thread_input(PerThreadContext* p, void* parent)
+{
+    ThreadContext* t_context = p->parent;
+    viddec_pm_cxt_t* pm_cxt = (viddec_pm_cxt_t*) parent;
+
+    //nsecs_t t0 = systemTime();
+    if (pm_cxt->getbits.bstrm_buf.buf == NULL) {
+        return 1;
+    }
+
+    pthread_mutex_lock(&p->mutex);
+
+    if (p->state == STATE_WORKING) {
+        pthread_mutex_lock(&p->progress_mutex);
+        while (p->state == STATE_WORKING) {
+            pthread_cond_wait(&p->progress_cond, &p->progress_mutex);
+        }
+        pthread_mutex_unlock(&p->progress_mutex);
+    }
+
+    /* Now update the input to the working thread*/
+    update_context_from_input(p->pmctx, pm_cxt);
+    p->codec_data = (void*)&(pm_cxt->codec_data[0]);
+
+    p->state = STATE_WORKING;
+    t_context->next_parsing++;
+
+    //t0 = systemTime();
+    pthread_cond_signal(&p->input_cond);
+    pthread_mutex_unlock(&p->mutex);
+
+    return 0;
+}
+
+void vbp_thread_init(vbp_context* pcontext)
+{
+    int i;
+    ThreadContext* t_context = NULL;
+    int32_t thread_count = pcontext->thread_count;
+    int32_t err = 0;
+
+#ifdef THREADING_SCHEME_BUNDLE
+    ITRACE("%s, threading_parse_scheme set to SCHEME_BUNDLE", __func__);
+    pcontext->threading_parse_scheme = SCHEME_BUNDLE;
+#else
+    ITRACE("%s, threading_parse_scheme set to SCHEME_SEQUENTIAL", __func__);
+    pcontext->threading_parse_scheme = SCHEME_SEQUENTIAL;
+#endif
+
+    if (thread_count == 0) {
+        int32_t cpu_num = get_cpu_count();
+        if (cpu_num > 1) {
+            if (pcontext->threading_parse_scheme == SCHEME_BUNDLE) {
+                thread_count = pcontext->thread_count = cpu_num - 1;
+            } else {
+                thread_count = pcontext->thread_count = cpu_num - 1;
+            }
+        }
+        else {
+            thread_count = pcontext->thread_count = 1;
+        }
+    }
+
+    pcontext->thread_opaque = t_context =
+              (ThreadContext*)malloc(sizeof(ThreadContext));
+    if (t_context != NULL) {
+        t_context->active_thread_count = thread_count; //default active count
+
+        t_context->delaying = 1;
+        t_context->next_parsing = t_context->next_finished = 0;
+
+        ITRACE("%s, creating %d parsing thread.", __func__, thread_count);
+        for (i = 0; i < thread_count; i++) {
+            t_context->threads[i] = (PerThreadContext*)malloc(sizeof(PerThreadContext));
+            assert(t_context->threads[i] != NULL);
+            PerThreadContext* p = t_context->threads[i];
+
+            if (p != NULL) {
+                p->index = i;
+                p->parent = t_context;
+                p->vbpctx = pcontext;
+                p->pmctx = vbp_malloc(viddec_pm_cxt_t, 1);
+                viddec_pm_utils_bstream_init(&(p->pmctx->getbits), NULL, 0);
+
+                pthread_mutex_init(&p->mutex, NULL);
+                pthread_mutex_init(&p->progress_mutex, NULL);
+                pthread_cond_init(&p->input_cond, NULL);
+                pthread_cond_init(&p->progress_cond, NULL);
+                pthread_cond_init(&p->output_cond, NULL);
+
+                p->state = STATE_INPUT_WAIT;
+
+                if(pcontext->threading_parse_scheme == SCHEME_SEQUENTIAL) {
+                    err = pthread_create(&p->thread, NULL, parser_worker_thread, p);
+                } else {
+                    err = pthread_create(&p->thread, NULL, parser_worker_thread_bundle, p);
+                }
+
+                p->thread_init = !err;
+            }
+        } 
+    }
+#if 1
+    ITRACE("%s, set_thread_affinity_mask", __func__);
+    cpu_set_t mask;
+    CPU_ZERO(&mask);
+    CPU_SET(3, &mask); // 0~thread_count-1 cpus was set to each sub thread,
+                       // last cpu is set to main thread
+    set_thread_affinity_mask(mask);
+#endif
+}
+
+
+void vbp_thread_free(vbp_context* pcontext)
+{
+    ITRACE("%s", __func__);
+    ThreadContext* t_context = pcontext->thread_opaque;
+    int i;
+    int thread_count = pcontext->thread_count;
+
+    for (i = 0; i < thread_count; i++) {
+        PerThreadContext *p = t_context->threads[i];
+
+        pthread_mutex_lock(&p->mutex);
+        p->state = STATE_EXIT;
+        pthread_cond_signal(&p->input_cond);
+        pthread_mutex_unlock(&p->mutex);
+
+        if (p->thread_init) {
+            pthread_join(p->thread, NULL);
+        }
+        p->thread_init = 0;
+    }
+
+    for (i = 0; i < thread_count; i++) {
+        PerThreadContext *p = t_context->threads[i];
+
+        pthread_mutex_destroy(&p->mutex);
+        pthread_mutex_destroy(&p->progress_mutex);
+        pthread_cond_destroy(&p->input_cond);
+        pthread_cond_destroy(&p->progress_cond);
+        pthread_cond_destroy(&p->output_cond);
+
+        if (p->pmctx != NULL) {
+            free(p->pmctx);
+        }
+
+        free(p);
+        p = NULL;
+    }
+
+    free(t_context);
+}
+
+/*
+ * Entry function of multi-thread parsing
+ *
+ * parent - A viddec_pm_cxt_t type parser management context,
+ *          which contains input stream.
+ * ctxt   - Codec specific parser context, actually codec_data[] in
+ *          viddec_pm_cxt_t, Used for storing parsed output
+ * return - 0 indicates no output is gotten, just warm up the threads
+ *          1 indicates there is output
+ *
+ * see viddec_parser_ops.h
+ *     uint32_t (*fn_parse_syntax) (void *parent, void *ctxt);
+ */
+uint32_t vbp_thread_parse_syntax(void* parent,
+                                 void* ctxt,
+                                 vbp_context* pcontext)
+{
+    ThreadContext* t_context = pcontext->thread_opaque;
+    uint32_t finished = t_context->next_finished;
+
+    if ((parent == NULL) || (ctxt == NULL)) {
+        return 0;
+    }
+
+    PerThreadContext* p;
+
+    nsecs_t t0,t1;
+    //t0 = t1 = systemTime();
+
+    /* Submit an input packet to the next parser thread*/
+    p = t_context->threads[t_context->next_parsing];
+    feed_thread_input(p, parent);
+
+    //p->state = STATE_WORKING;
+    //t_context->next_parsing++;
+
+    //t0 = systemTime();
+    //pthread_cond_signal(&p->input_cond);
+
+    //t0 = systemTime();
+
+    if ((t_context->delaying == 1) &&
+        (t_context->next_parsing > (t_context->active_thread_count - 1))) {
+        t_context->delaying = 0;
+    }
+
+    /* If we are still in early stage that warming up each thread, indicate we got no output*/
+    if (t_context->delaying == 1) {
+        return 0;
+    }
+
+    /* return available parsed frame from the oldest thread
+     * notice that we start getting output from thread[0] after just submitting input
+     * to thread[active_count-1]
+     * */
+    p = t_context->threads[finished++];
+
+    if (p->state != STATE_INPUT_WAIT) {
+        pthread_mutex_lock(&p->progress_mutex);
+        while (p->state != STATE_INPUT_WAIT) {
+            pthread_cond_wait(&p->output_cond, &p->progress_mutex);
+        }
+        pthread_mutex_unlock(&p->progress_mutex);
+    }
+
+
+    if (finished > (t_context->active_thread_count - 1)) {
+        finished = 0;
+    }
+
+    if (t_context->next_parsing >= t_context->active_thread_count) {
+        t_context->next_parsing = 0;
+    }
+
+    t_context->next_finished = finished;
+
+    update_context_to_output((viddec_pm_cxt_t*) parent, p->pmctx);
+
+    return 1;
+}
+
+
+/*
+ * Entry function of multi-thread parsing
+ *
+ * parent - A viddec_pm_cxt_t type parser management context,
+ *          which contains input stream.
+ * ctxt   - Codec specific parser context, actually codec_data[] in
+ *          viddec_pm_cxt_t, Used for storing parsed output
+ * start_item - num of start item passed to trigger multithread parsing
+ *
+ */
+uint32_t vbp_thread_parse_syntax_bundle(void* parent,
+                                   void* ctxt,
+                                   vbp_context* pcontext,
+                                   uint32_t start_item)
+{
+    ThreadContext* t_context = pcontext->thread_opaque;
+    if ((parent == NULL) || (ctxt == NULL)) {
+        return 0;
+    }
+
+    PerThreadContext* p = NULL;
+    viddec_pm_cxt_t* pm_cxt = (viddec_pm_cxt_t*) parent;
+    t_context->start_item_to_parse = start_item;
+    t_context->last_item_to_parse = pm_cxt->list.num_items - 1;
+
+    sem_init(&(t_context->finish_sem),0,0);
+
+    uint32_t i;
+    for (i = 0; i < t_context->active_thread_count; i++) {
+        p = t_context->threads[i];
+        p->start_item = start_item + i;
+
+        if (p->state == STATE_WORKING) {
+            pthread_mutex_lock(&p->progress_mutex);
+            while (p->state == STATE_WORKING) {
+                pthread_cond_wait(&p->progress_cond, &p->progress_mutex);
+            }
+            pthread_mutex_unlock(&p->progress_mutex);
+        }
+
+        p->codec_data = (void*)&(pm_cxt->codec_data[0]);
+        p->input_pmctx = pm_cxt;
+
+        p->state = STATE_WORKING;
+
+        pthread_cond_signal(&p->input_cond);
+        pthread_mutex_unlock(&p->mutex);
+
+    }
+    return 1;
+}
+
+
+/*
+ * set active threads num since not all threads need to be warmed up
+ * when a frame has fewer slice num than threads we created.
+ *
+ * active_count  - threads num to be activated.
+ */
+uint32_t vbp_thread_set_active(vbp_context* pcontext,
+                              uint32_t active_count)
+{
+    ThreadContext* t_context = pcontext->thread_opaque;
+
+    if (t_context != NULL) {
+        if (active_count < pcontext->thread_count) {
+            t_context->active_thread_count = active_count;
+        } else { //reset to the default
+            t_context->active_thread_count = pcontext->thread_count;
+        }
+
+        //reset to the default
+        t_context->delaying = 1;
+        t_context->next_parsing = t_context->next_finished = 0;
+    }
+    return 0;
+}
+
+uint32_t vbp_thread_get_active(vbp_context* pcontext)
+{
+    ThreadContext* t_context = pcontext->thread_opaque;
+
+    if (t_context != NULL) {
+        return t_context->active_thread_count;
+    }
+    return 0;
+}
+
+
diff --git a/mixvbp/vbp_manager/vbp_thread.h b/mixvbp/vbp_manager/vbp_thread.h
new file mode 100644
index 0000000..e182ac1
--- /dev/null
+++ b/mixvbp/vbp_manager/vbp_thread.h
@@ -0,0 +1,51 @@
+/* INTEL CONFIDENTIAL
+* Copyright (c) 2013 Intel Corporation.  All rights reserved.
+*
+* The source code contained or described herein and all documents
+* related to the source code ("Material") are owned by Intel
+* Corporation or its suppliers or licensors.  Title to the
+* Material remains with Intel Corporation or its suppliers and
+* licensors.  The Material contains trade secrets and proprietary
+* and confidential information of Intel or its suppliers and
+* licensors. The Material is protected by worldwide copyright and
+* trade secret laws and treaty provisions.  No part of the Material
+* may be used, copied, reproduced, modified, published, uploaded,
+* posted, transmitted, distributed, or disclosed in any way without
+* Intel's prior express written permission.
+*
+* No license under any patent, copyright, trade secret or other
+* intellectual property right is granted to or conferred upon you
+* by disclosure or delivery of the Materials, either expressly, by
+* implication, inducement, estoppel or otherwise. Any license
+* under such intellectual property rights must be express and
+* approved by Intel in writing.
+*
+*/
+
+#ifndef VBP_THREAD_H
+#define VBP_THREAD_H
+
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <semaphore.h>
+
+#include <sys/syscall.h>
+#include "vbp_utils.h"
+#include "include/viddec_pm.h"
+#include <sched.h>
+
+
+void vbp_thread_init(vbp_context *pcontext);
+
+void vbp_thread_free(vbp_context *pcontext);
+
+uint32_t vbp_thread_parse_syntax(void* parent,
+                                 void* ctxt,
+                                 vbp_context* pcontext);
+
+uint32_t vbp_thread_set_active(vbp_context* pcontext,
+                               uint32_t active_count);
+
+uint32_t vbp_thread_get_active(vbp_context* pcontext);
+
+#endif
diff --git a/mixvbp/vbp_manager/vbp_utils.c b/mixvbp/vbp_manager/vbp_utils.c
index 1647269..f0cb94b 100755
--- a/mixvbp/vbp_manager/vbp_utils.c
+++ b/mixvbp/vbp_manager/vbp_utils.c
@@ -39,6 +39,21 @@
 #include "vbp_h264secure_parser.h"
 #endif
 
+#ifdef USE_MULTI_THREADING
+#include "vbp_thread.h"
+#endif
+
+#define LEAST_SLICES_MULTI_THREADING 10
+
+typedef long long int nsecs_t;
+
+static nsecs_t systemTime()
+{
+    struct timeval t;
+    gettimeofday(&t, NULL);
+    return 1000000 * t.tv_sec + t.tv_usec;
+}
+
 
 /* buffer counter */
 uint32 buffer_counter = 0;
@@ -303,12 +318,36 @@
 }
 
 
+static void vbp_setup_parser_for_item(viddec_pm_cxt_t *cxt, uint32 item)
+{
+    /* setup bitstream parser */
+    cxt->getbits.bstrm_buf.buf_index = cxt->list.data[item].stpos;
+    cxt->getbits.bstrm_buf.buf_st = cxt->list.data[item].stpos;
+    cxt->getbits.bstrm_buf.buf_end = cxt->list.data[item].edpos;
+
+    /* It is possible to end up with buf_offset not equal zero. */
+    cxt->getbits.bstrm_buf.buf_bitoff = 0;
+
+    cxt->getbits.au_pos = 0;
+    cxt->getbits.list_off = 0;
+    cxt->getbits.phase = 0;
+    cxt->getbits.emulation_byte_counter = 0;
+
+    cxt->list.start_offset = cxt->list.data[item].stpos;
+    cxt->list.end_offset = cxt->list.data[item].edpos;
+    cxt->list.total_bytes = cxt->list.data[item].edpos - cxt->list.data[item].stpos;
+
+}
+
 
 /**
  *
  * parse the elementary sample buffer or codec configuration data
  *
  */
+//static uint32 frame_num = 0;
+//static nsecs_t total_time_of_multislice = 0;
+//static uint32 frame_multislice_num = 0;
 static uint32 vbp_utils_parse_es_buffer(vbp_context *pcontext, uint8 init_data_flag)
 {
     viddec_pm_cxt_t *cxt = pcontext->parser_cxt;
@@ -339,6 +378,7 @@
     }
     */
 
+    uint32_t multi_parse_done = 0;
 
     /* populate the list.*/
     if (init_data_flag)
@@ -364,48 +404,176 @@
 
     // TODO: check if cxt->getbits.is_emul_reqd is set properly
 
-    for (i = 0; i < cxt->list.num_items; i++)
-    {
-        /* setup bitstream parser */
-        cxt->getbits.bstrm_buf.buf_index = cxt->list.data[i].stpos;
-        cxt->getbits.bstrm_buf.buf_st = cxt->list.data[i].stpos;
-        cxt->getbits.bstrm_buf.buf_end = cxt->list.data[i].edpos;
+    //frame_num ++;
 
-        /* It is possible to end up with buf_offset not equal zero. */
-        cxt->getbits.bstrm_buf.buf_bitoff = 0;
+    nsecs_t t0, t1, t2, tt0, tt1, tt2;
+    t0 = t1 = t2 = tt0 = tt1 = tt2 = 0;
+    //t0 = systemTime();
 
-        cxt->getbits.au_pos = 0;
-        cxt->getbits.list_off = 0;
-        cxt->getbits.phase = 0;
-        cxt->getbits.emulation_byte_counter = 0;
+    if (0 == pcontext->is_multithread_parsing_enabled) {
+        for (i = 0; i < cxt->list.num_items; i++) {
+            vbp_setup_parser_for_item(cxt, i);
+            /* invoke parse entry point to parse the buffer */
+            //t1 = systemTime();
+            error = ops->parse_syntax((void *)cxt, (void *)&(cxt->codec_data[0]));
+            //t2 = systemTime();
+            //tt1 += t2 - t1;
 
-        cxt->list.start_offset = cxt->list.data[i].stpos;
-        cxt->list.end_offset = cxt->list.data[i].edpos;
-        cxt->list.total_bytes = cxt->list.data[i].edpos - cxt->list.data[i].stpos;
+            /* can't return error for now. Neet further investigation */
+            if (0 != error) {
+                WTRACE("failed to parse the syntax: %d!", error);
+            }
 
-        /* invoke parse entry point to parse the buffer */
-        error = ops->parse_syntax((void *)cxt, (void *)&(cxt->codec_data[0]));
+            /* process parsing result */
+            //t2 = systemTime();
+            error = pcontext->func_process_parsing_result(pcontext, i);
+            //tt2 += systemTime() - t2;
 
-        /* can't return error for now. Neet further investigation */
-        if (0 != error)
-        {
-            VTRACE("failed to parse the syntax: %d!", error);
-        }
-
-        /* process parsing result */
-        error = pcontext->func_process_parsing_result(pcontext, i);
-
-        if (VBP_MULTI == error) {
-            ITRACE("Multiple frames are found in one bufffer.");
-            return VBP_OK;
-        }
-        else if (0 != error)
-        {
-            ETRACE("Failed to process parsing result.");
-            return error;
+            if (VBP_MULTI == error) {
+                ITRACE("Multiple frames are found in one bufffer.");
+                return VBP_OK;
+            }
+            else if (0 != error) {
+                ETRACE("Failed to process parsing result.");
+                return error;
+            }
         }
     }
+    // Multi-threading option is enabled
+    else if (1 == pcontext->is_multithread_parsing_enabled) {
 
+        int got_output = 0;
+        int is_payload_start = 0;
+        int single_parse_count = 0;
+        int use_thread_parsing = 0;
+
+        for (i = 0; i < cxt->list.num_items; i++) {
+
+            vbp_setup_parser_for_item(cxt, i);
+
+            // we assume no configuration data following slice data in a frame's buffer
+            is_payload_start = ops->is_payload_start((void *)cxt);
+
+            if (is_payload_start == 0) {
+                //t1 = systemTime();
+                error = ops->parse_syntax((void *)cxt, (void *)&(cxt->codec_data[0]));
+                //tt1 += systemTime() - t1;
+
+                //t2 = systemTime();
+                error = pcontext->func_process_parsing_result(pcontext, i);
+                single_parse_count ++;
+                //tt2 += systemTime() - t2;
+            } else if (((cxt->list.num_items - single_parse_count) < LEAST_SLICES_MULTI_THREADING)) {
+                //t1 = systemTime();
+                error = ops->parse_syntax((void *)cxt, (void *)&(cxt->codec_data[0]));
+                //tt1 += systemTime() - t1;
+
+                //t2 = systemTime();
+                error = pcontext->func_process_parsing_result(pcontext, i);
+                //tt2 += systemTime() - t2;
+            } else {
+                use_thread_parsing = 1;
+                break;
+            }
+
+            if (VBP_MULTI == error) {
+                ITRACE("Multiple frames are found in one bufffer.");
+                return VBP_OK;
+            }
+            else if (0 != error) {
+                ETRACE("Failed to process parsing result.");
+                return error;
+            }
+        }
+
+        if (use_thread_parsing) {
+            vbp_thread_set_active(pcontext, cxt->list.num_items - single_parse_count);
+            uint32_t thread_count = vbp_thread_get_active(pcontext);
+
+            //t1 = systemTime();
+            if (pcontext->threading_parse_scheme == SCHEME_BUNDLE) {
+            // Multithread parsing Scheme-Bundle-Input
+            // This interface push threads to parse all slice header without interrupt.
+                vbp_thread_parse_syntax_bundle((void *)cxt,
+                                               (void *)&(cxt->codec_data[0]),
+                                               pcontext,
+                                               i); //first slice's item num
+
+                uint32_t j;
+                for (j = i; j < cxt->list.num_items; j++) {
+                    error = ops->post_parse_threading((void *)cxt,
+                                                      (void *)&(cxt->codec_data[0]),
+                                                      j-single_parse_count); // slice index
+                    error = pcontext->func_process_parsing_result(pcontext, j); // item num
+                }
+                //tt1 += systemTime() - t1;
+
+            } else if (pcontext->threading_parse_scheme == SCHEME_SEQUENTIAL) {
+            // Multithread parsing Scheme-Sequential-Input.
+            // This interface push threads to parse one slice header one time.
+                uint32_t j;
+                for (j = i; j < cxt->list.num_items; j++) {
+                    vbp_setup_parser_for_item(cxt, j);
+
+                    //t1 = systemTime();
+                    got_output = vbp_thread_parse_syntax((void *)cxt,
+                                                         (void *)&(cxt->codec_data[0]),
+                                                         pcontext);
+                    //tt1 += systemTime() - t1;
+
+                    if (got_output == 1) {
+                        //t2 = systemTime();
+                        error = ops->post_parse_threading((void *)cxt,
+                                                          (void *)&(cxt->codec_data[0]),
+                                                          //slice count with thread delay
+                                                          (j-(thread_count-1)-single_parse_count) % thread_count);
+
+                        error = pcontext->func_process_parsing_result(pcontext,
+                                                                      // item count with thread delay
+                                                                      j-(thread_count-1));
+
+                        multi_parse_done ++;
+                        //tt2 += systemTime() - t2;
+                    }
+                }
+
+                int need_to_clearance = thread_count -1;
+                cxt->getbits.bstrm_buf.buf = NULL;
+                for (i = cxt->list.num_items - need_to_clearance; i < cxt->list.num_items; i++) {
+                    //t1 = systemTime();
+                    got_output = vbp_thread_parse_syntax((void *)cxt,
+                                                         (void *)&(cxt->codec_data[0]),
+                                                         pcontext);
+                                                         //&got_output);
+                    //tt1 += systemTime() - t1;
+
+                    if (got_output == 1) {
+                        //t2 = systemTime();
+                        error = ops->post_parse_threading((void *)cxt,
+                                                          (void *)&(cxt->codec_data[0]),
+                                                          (i-single_parse_count) % thread_count);
+
+                        error = pcontext->func_process_parsing_result(pcontext, i);
+                        multi_parse_done ++;
+                        //tt2 += systemTime() - t2;
+                    }
+                }
+            }
+        }
+    }
+#if 0
+    tt0 = systemTime() - t0;
+    if (cxt->list.num_items > 8) {
+        total_time_of_multislice += tt0;
+        frame_multislice_num ++;
+        ETRACE("### ================== TIME CALCULATION =======================");
+        ETRACE("### ------------item num: %d", cxt->list.num_items);
+        ETRACE("### ------------The frame[%d] cost time: %lld us", frame_num-1, tt0);
+        ETRACE("### ------------Accumulated multi-slice frames: %d", frame_multislice_num);
+        ETRACE("### ------------Accumulated average time that multislice frame cost: %lld us", total_time_of_multislice/frame_multislice_num);
+        ETRACE("### ================== TIME CALCULATION END ===================");
+    }
+#endif
     return VBP_OK;
 }
 
@@ -463,6 +631,21 @@
     *ppcontext = pcontext;
     error = VBP_OK;
 
+
+    /* default is not enabled */
+    pcontext->is_multithread_parsing_enabled = 0;
+
+#if (!defined USE_AVC_SHORT_FORMAT && !defined USE_SLICE_HEADER_PARSING)
+#ifdef USE_MULTI_THREADING
+    if (pcontext->parser_ops->query_thread_parsing_cap != NULL) {
+        if (pcontext->parser_ops->query_thread_parsing_cap() == 1) {
+            pcontext->is_multithread_parsing_enabled = 1;
+            ITRACE("Multi-thead parsing is enabled.");
+            vbp_thread_init(pcontext);
+        }
+    }
+#endif
+#endif
 cleanup:
 
     if (VBP_OK != error)
@@ -483,6 +666,12 @@
  */
 uint32 vbp_utils_destroy_context(vbp_context *pcontext)
 {
+#ifdef USE_MULTI_THREADING
+    if (1 == pcontext->is_multithread_parsing_enabled) {
+        vbp_thread_free(pcontext);
+    }
+#endif
+
     /* entry point, not need to validate input parameters. */
     vbp_utils_free_parser_memory(pcontext);
     vbp_utils_uninitialize_context(pcontext);
diff --git a/mixvbp/vbp_manager/vbp_utils.h b/mixvbp/vbp_manager/vbp_utils.h
index 633159c..7cf9321 100755
--- a/mixvbp/vbp_manager/vbp_utils.h
+++ b/mixvbp/vbp_manager/vbp_utils.h
@@ -31,6 +31,9 @@
 #include "viddec_pm.h"
 #include "vbp_trace.h"
 #include <stdlib.h>
+#include "vbp_loader.h"
+
+#include <sys/time.h>
 
 #define MAGIC_NUMBER 0x0DEADBEEF
 #define MAX_WORKLOAD_ITEMS 1000
@@ -68,6 +71,13 @@
 typedef uint32 (*function_update_data)(vbp_context* cxt, void *newdata, uint32 size);
 #endif
 
+typedef enum
+{
+    SCHEME_BUNDLE = 0,
+    SCHEME_SEQUENTIAL,
+} threading_parse_scheme_t;
+
+
 struct vbp_context_t
 {
     /* magic number */
@@ -94,6 +104,13 @@
     /* parser type specific data*/
     void *parser_private;
 
+    /* multithreading */
+    uint32 thread_count;
+    void *thread_opaque;
+    uint32 is_multithread_parsing_enabled;
+
+    threading_parse_scheme_t  threading_parse_scheme;
+
     function_init_parser_entries func_init_parser_entries;
     function_allocate_query_data func_allocate_query_data;
     function_free_query_data func_free_query_data;
diff --git a/mixvbp/vbp_manager/vbp_vc1_parser.c b/mixvbp/vbp_manager/vbp_vc1_parser.c
index 12e28e9..65b6f76 100755
--- a/mixvbp/vbp_manager/vbp_vc1_parser.c
+++ b/mixvbp/vbp_manager/vbp_vc1_parser.c
@@ -111,6 +111,10 @@
 
     /* entry point not needed */
     pcontext->parser_ops->flush = NULL;
+    pcontext->parser_ops->is_payload_start = NULL;
+    pcontext->parser_ops->parse_syntax_threading = NULL;
+    pcontext->parser_ops->post_parse_threading = NULL;
+    pcontext->parser_ops->query_thread_parsing_cap = NULL;
 
     return VBP_OK;
 }
diff --git a/mixvbp/vbp_manager/vbp_vp8_parser.c b/mixvbp/vbp_manager/vbp_vp8_parser.c
index 73d9281..9ac097d 100755
--- a/mixvbp/vbp_manager/vbp_vp8_parser.c
+++ b/mixvbp/vbp_manager/vbp_vp8_parser.c
@@ -67,6 +67,10 @@
     pcontext->parser_ops->is_frame_start = NULL;
 
     pcontext->parser_ops->flush = NULL;
+    pcontext->parser_ops->is_payload_start = NULL;
+    pcontext->parser_ops->parse_syntax_threading = NULL;
+    pcontext->parser_ops->post_parse_threading = NULL;
+    pcontext->parser_ops->query_thread_parsing_cap = NULL;
 
     return VBP_OK;
 }
diff --git a/mixvbp/vbp_plugin/h264/h264parse.c b/mixvbp/vbp_plugin/h264/h264parse.c
index cbb04fe..330c5e6 100755
--- a/mixvbp/vbp_plugin/h264/h264parse.c
+++ b/mixvbp/vbp_plugin/h264/h264parse.c
@@ -65,7 +65,7 @@
 /* ------------------------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------------------------ */
-
+// keep for h264 secure parse
 h264_Status h264_active_par_set(h264_Info*pInfo,h264_Slice_Header_t* SliceHeader)
 {
     //h264_Slice_Header_t* SliceHeader = &pInfo->SliceHeader;
@@ -114,14 +114,43 @@
         }
     }
 
-    pInfo->img.PicWidthInMbs    = (pInfo->active_SPS.sps_disp.pic_width_in_mbs_minus1 + 1);
-    pInfo->img.FrameHeightInMbs = pInfo->active_SPS.sps_disp.frame_mbs_only_flag?				\
-                                  (pInfo->active_SPS.sps_disp.pic_height_in_map_units_minus1 + 1) :	\
-                                  ((pInfo->active_SPS.sps_disp.pic_height_in_map_units_minus1 + 1) << 1);
+    pInfo->img.PicWidthInMbs = (pInfo->active_SPS.sps_disp.pic_width_in_mbs_minus1 + 1);
+    pInfo->img.FrameHeightInMbs = pInfo->active_SPS.sps_disp.frame_mbs_only_flag ?
+                 (pInfo->active_SPS.sps_disp.pic_height_in_map_units_minus1 + 1) :
+                 ((pInfo->active_SPS.sps_disp.pic_height_in_map_units_minus1 + 1) << 1);
 
     return H264_STATUS_OK;
 };   //// End of h264_active_par_set
 
+
+
+h264_Status h264_set_active_par_set(h264_Info*pInfo,h264_Slice_Header_t* SliceHeader)
+{
+    uint32_t pps_addr = pInfo->PPS_PADDR_GL +
+                        SliceHeader->pic_parameter_id * sizeof(pic_param_set);
+    SliceHeader->active_PPS = (pic_param_set*)pps_addr;
+    pic_param_set* active_PPS = SliceHeader->active_PPS;
+
+    if (active_PPS->seq_parameter_set_id >= MAX_NUM_SPS)
+    {
+        return H264_PPS_INVALID_PIC_ID;    /// Invalid PPS detected
+    }
+
+    uint32_t sps_addr = pInfo->SPS_PADDR_GL + \
+                        active_PPS->seq_parameter_set_id * sizeof(seq_param_set_all);
+    SliceHeader->active_SPS = (seq_param_set_used*)sps_addr;
+    seq_param_set_used* active_SPS = SliceHeader->active_SPS;
+
+    if (active_SPS->seq_parameter_set_id >= MAX_NUM_SPS)
+    {
+        return H264_PPS_INVALID_PIC_ID;    //// Invalid SPS detected
+    }
+
+    return H264_STATUS_OK;
+};   // End of h264_set_active_par_set
+
+
+
 /* ------------------------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------------------------ */
@@ -173,38 +202,183 @@
             SliceHeader->sh_error |= 4;
         }
 
-    } else 	{
+    } else      {
         SliceHeader->sh_error |= 1;
     }
 
+    return retStatus;
+}
 
-    //if(SliceHeader->sh_error) {
-    //pInfo->wl_err_flag |= VIDDEC_FW_WORKLOAD_ERR_NOTDECODABLE;
-    //}
+h264_Status h264_Parse_Slice_Layer_Without_Partitioning_RBSP_opt(void *parent, h264_Info* pInfo, h264_Slice_Header_t *SliceHeader)
+{
+    h264_Status retStatus = H264_STATUS_ERROR;
 
+    ////////////////////////////////////////////////////
+    //// Parse slice header info
+    //// Part1: not depend on the active PPS/SPS
+    //// Part2/3: depend on the active parset
+    //////////////////////////////////////////////////
 
+    SliceHeader->sh_error = 0;
 
-    //////////////////////////////////
-    //// Parse slice data (MB loop)
-    //////////////////////////////////
-    //retStatus = h264_Parse_Slice_Data(pInfo);
+    if (h264_Parse_Slice_Header_1(parent, pInfo, SliceHeader) == H264_STATUS_OK)
     {
-        //uint32_t data = 0;
-        //if( viddec_pm_peek_bits(parent, &data, 32) == -1)
-        //retStatus = H264_STATUS_ERROR;
+        retStatus = h264_set_active_par_set(pInfo, SliceHeader);
     }
-    //h264_Parse_rbsp_trailing_bits(pInfo);
+
+   if (retStatus == H264_STATUS_OK)
+    {
+        switch (SliceHeader->active_SPS->profile_idc)
+        {
+        case h264_ProfileBaseline:
+        case h264_ProfileMain:
+        case h264_ProfileExtended:
+            SliceHeader->active_PPS->transform_8x8_mode_flag = 0;
+            SliceHeader->active_PPS->pic_scaling_matrix_present_flag = 0;
+            SliceHeader->active_PPS->second_chroma_qp_index_offset =
+                SliceHeader->active_PPS->chroma_qp_index_offset;
+
+        default:
+            break;
+        }
+
+        if (h264_Parse_Slice_Header_2_opt(parent, pInfo, SliceHeader) != H264_STATUS_OK)
+        {
+            SliceHeader->sh_error |= 2;
+        }
+        else if (h264_Parse_Slice_Header_3_opt(parent, pInfo, SliceHeader) != H264_STATUS_OK)
+        {
+            SliceHeader->sh_error |= 4;
+        }
+    } else {
+        SliceHeader->sh_error |= 1;
+    }
 
     return retStatus;
 }
 
 
+h264_Status h264_Post_Parsing_Slice_Header(void *parent, h264_Info* pInfo, h264_Slice_Header_t *next_SliceHeader)
+{
+
+    h264_Status retStatus = H264_STATUS_OK;
+
+    memcpy(&pInfo->active_PPS, next_SliceHeader->active_PPS, sizeof(pic_param_set));
+    memcpy(&pInfo->active_SPS, next_SliceHeader->active_SPS, sizeof(seq_param_set_used));
+
+    if ((1 == pInfo->primary_pic_type_plus_one) && (pInfo->got_start)) {
+        pInfo->img.recovery_point_found |= 4;
+    }
+    pInfo->primary_pic_type_plus_one = 0;
+
+    pInfo->img.PicWidthInMbs    = (pInfo->active_SPS.sps_disp.pic_width_in_mbs_minus1 + 1);
+    pInfo->img.FrameHeightInMbs = pInfo->active_SPS.sps_disp.frame_mbs_only_flag?                       \
+                                  (pInfo->active_SPS.sps_disp.pic_height_in_map_units_minus1 + 1) :     \
+                                  ((pInfo->active_SPS.sps_disp.pic_height_in_map_units_minus1 + 1) << 1);
+
+    pInfo->sei_information.recovery_point = 0;
+
+    pInfo->img.current_slice_num++;
+
+
+    ////////////////////////////////////////////////////////////////////////////
+    // Processing if new picture coming
+    //  1) if it's the second field
+    //	2) if it's a new frame
+    ////////////////////////////////////////////////////////////////////////////
+    //AssignQuantParam(pInfo);
+    if (h264_is_new_picture_start(pInfo, *next_SliceHeader, pInfo->SliceHeader))
+    {
+        //
+        ///----------------- New Picture.boundary detected--------------------
+        //
+        pInfo->img.g_new_pic++;
+
+        //
+        // Complete previous picture
+        h264_dpb_store_previous_picture_in_dpb(pInfo, 0, 0); //curr old
+        //h264_hdr_post_poc(0, 0, use_old);
+        //
+        // Update slice structures:
+        h264_update_old_slice(pInfo, *next_SliceHeader);  //cur->old; next->cur;
+        //
+        // 1) if resolution change: reset dpb
+        // 2) else: init frame store
+        h264_update_img_info(pInfo); //img, dpb
+        //
+        ///----------------- New frame.boundary detected--------------------
+        //
+        pInfo->img.second_field = h264_is_second_field(pInfo);
+        if (pInfo->img.second_field == 0)
+        {
+            pInfo->img.g_new_frame = 1;
+            h264_dpb_update_queue_dangling_field(pInfo);
+            //
+            /// DPB management
+            ///	1) check the gaps
+            ///	2) assign fs for non-exist frames
+            ///	3) fill the gaps
+            ///	4) store frame into DPB if ...
+            //
+            //if(pInfo->SliceHeader.redundant_pic_cnt)
+            {
+                h264_dpb_gaps_in_frame_num_mem_management(pInfo);
+            }
+        }
+        //
+        /// Decoding POC
+        h264_hdr_decoding_poc (pInfo, 0, 0);
+        //
+        /// Init Frame Store for next frame
+        h264_dpb_init_frame_store (pInfo);
+        pInfo->img.current_slice_num = 1;
+        if (pInfo->SliceHeader.first_mb_in_slice != 0)
+        {
+            ////Come here means we have slice lost at the beginning, since no FMO support
+            pInfo->SliceHeader.sh_error |= (pInfo->SliceHeader.structure << 17);
+        }
+        /// Emit out the New Frame
+        if (pInfo->img.g_new_frame)
+        {
+            h264_parse_emit_start_new_frame(parent, pInfo);
+        }
+
+        h264_parse_emit_current_pic(parent, pInfo);
+    }
+    else ///////////////////////////////////////////////////// If Not a picture start
+    {
+        //
+        /// Update slice structures: cur->old; next->cur;
+        h264_update_old_slice(pInfo, *next_SliceHeader);
+        //
+        /// 1) if resolution change: reset dpb
+        /// 2) else: update img info
+        h264_update_img_info(pInfo);
+    }
+
+
+    //////////////////////////////////////////////////////////////
+    // DPB reference list init and reordering
+    //////////////////////////////////////////////////////////////
+
+    //////////////////////////////////////////////// Update frame Type--- IDR/I/P/B for frame or field
+    h264_update_frame_type(pInfo);
+
+#ifndef USE_AVC_SHORT_FORMAT
+    h264_dpb_update_ref_lists(pInfo);
+#endif
+    /// Emit out the current "good" slice
+    h264_parse_emit_current_slice(parent, pInfo);
+
+    return retStatus;
+}
+
 
 /* ------------------------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------------------------ */
 
-h264_Status h264_Parse_NAL_Unit(void *parent, h264_Info* pInfo, uint8_t *nal_ref_idc)
+h264_Status h264_Parse_NAL_Unit(void *parent, uint8_t* nal_unit_type, uint8_t *nal_ref_idc)
 {
     h264_Status ret = H264_STATUS_ERROR;
 
@@ -212,7 +386,7 @@
     if (viddec_pm_get_bits(parent, &code, 8) != -1)
     {
         *nal_ref_idc = (uint8_t)((code >> 5) & 0x3);
-        pInfo->nal_unit_type = (uint8_t)((code >> 0) & 0x1f);
+        *nal_unit_type = (uint8_t)((code >> 0) & 0x1f);
         ret = H264_STATUS_OK;
     }
 
@@ -430,43 +604,58 @@
 /* ------------------------------------------------------------------------------------------ */
 /* ------------------------------------------------------------------------------------------ */
 
-int32_t h264_is_new_picture_start(h264_Info * pInfo, h264_Slice_Header_t cur_slice, h264_Slice_Header_t old_slice)
+int32_t h264_is_new_picture_start(h264_Info * pInfo,
+                                  h264_Slice_Header_t cur_slice,
+                                  h264_Slice_Header_t old_slice)
 {
     int result = 0;
 
-    if (pInfo->number_of_first_au_info_nal_before_first_slice)
-    {
+    if (pInfo->number_of_first_au_info_nal_before_first_slice) {
         pInfo->number_of_first_au_info_nal_before_first_slice = 0;
         return 1;
     }
 
-    result |= (old_slice.pic_parameter_id != cur_slice.pic_parameter_id);
-    result |= (old_slice.frame_num != cur_slice.frame_num);
-    result |= (old_slice.field_pic_flag != cur_slice.field_pic_flag);
-    if (cur_slice.field_pic_flag && old_slice.field_pic_flag)
-    {
-        result |= (old_slice.bottom_field_flag != cur_slice.bottom_field_flag);
+    if (old_slice.pic_parameter_id != cur_slice.pic_parameter_id) {
+        return 1;
+    }
+    if (old_slice.frame_num != cur_slice.frame_num) {
+        return 1;
+    }
+    if (old_slice.field_pic_flag != cur_slice.field_pic_flag) {
+        return 1;
+    }
+    if (cur_slice.field_pic_flag && old_slice.field_pic_flag) {
+        if (old_slice.bottom_field_flag != cur_slice.bottom_field_flag) {
+            return 1;
+        }
     }
 
-    result |= (old_slice.nal_ref_idc != cur_slice.nal_ref_idc) && \
-              ((old_slice.nal_ref_idc == 0) || (cur_slice.nal_ref_idc == 0));
-    result |= ( old_slice.idr_flag != cur_slice.idr_flag);
-
-    if (cur_slice.idr_flag && old_slice.idr_flag)
-    {
-        result |= (old_slice.idr_pic_id != cur_slice.idr_pic_id);
+    if ((old_slice.nal_ref_idc != cur_slice.nal_ref_idc) && \
+              ((old_slice.nal_ref_idc == 0) || (cur_slice.nal_ref_idc == 0))) {
+        return 1;
+    }
+    if (old_slice.idr_flag != cur_slice.idr_flag) {
+        return 1;
     }
 
-    if (pInfo->active_SPS.pic_order_cnt_type == 0)
-    {
-        result |=  (old_slice.pic_order_cnt_lsb          != cur_slice.pic_order_cnt_lsb);
-        result |=  (old_slice.delta_pic_order_cnt_bottom != cur_slice.delta_pic_order_cnt_bottom);
+    if (cur_slice.idr_flag && old_slice.idr_flag) {
+        if (old_slice.idr_pic_id != cur_slice.idr_pic_id) {
+            return 1;
+        }
     }
 
-    if (pInfo->active_SPS.pic_order_cnt_type == 1)
-    {
-        result |= (old_slice.delta_pic_order_cnt[0] != cur_slice.delta_pic_order_cnt[0]);
-        result |= (old_slice.delta_pic_order_cnt[1] != cur_slice.delta_pic_order_cnt[1]);
+    if (pInfo->active_SPS.pic_order_cnt_type == 0) {
+        if ((old_slice.pic_order_cnt_lsb != cur_slice.pic_order_cnt_lsb) || \
+           (old_slice.delta_pic_order_cnt_bottom != cur_slice.delta_pic_order_cnt_bottom)) {
+            return 1;
+        }
+    }
+
+    if (pInfo->active_SPS.pic_order_cnt_type == 1) {
+        if ((old_slice.delta_pic_order_cnt[0] != cur_slice.delta_pic_order_cnt[0]) || \
+            (old_slice.delta_pic_order_cnt[1] != cur_slice.delta_pic_order_cnt[1])) {
+            return 1;
+        }
     }
 
     return result;
diff --git a/mixvbp/vbp_plugin/h264/h264parse_pps.c b/mixvbp/vbp_plugin/h264/h264parse_pps.c
index 2c4cc52..b4098ec 100755
--- a/mixvbp/vbp_plugin/h264/h264parse_pps.c
+++ b/mixvbp/vbp_plugin/h264/h264parse_pps.c
@@ -22,6 +22,8 @@
         }
         PictureParameterSet->pic_parameter_set_id = (uint8_t)code;
 
+        VTRACE("parsing PPS: id = %d", PictureParameterSet->pic_parameter_set_id);
+
         code = h264_GetVLCElement(parent, pInfo, false);
         if (code > MAX_NUM_SPS - 1)
         {
@@ -30,6 +32,8 @@
         }
         PictureParameterSet->seq_parameter_set_id = (uint8_t)code;
 
+        VTRACE("parsing PPS: refering SPS id = %d", PictureParameterSet->seq_parameter_set_id);
+
         ///// entropy_coding_mode_flag
         viddec_pm_get_bits(parent, &code, 1);
         PictureParameterSet->entropy_coding_mode_flag = (uint8_t)code;
diff --git a/mixvbp/vbp_plugin/h264/h264parse_sh.c b/mixvbp/vbp_plugin/h264/h264parse_sh.c
index 625e146..33ccbdd 100755
--- a/mixvbp/vbp_plugin/h264/h264parse_sh.c
+++ b/mixvbp/vbp_plugin/h264/h264parse_sh.c
@@ -95,13 +95,17 @@
         }
 
         ////// Check valid or not of first_mb_in_slice
+        int32_t PicWidthInMbs    = (pInfo->active_SPS.sps_disp.pic_width_in_mbs_minus1 + 1);
+        int32_t FrameHeightInMbs = pInfo->active_SPS.sps_disp.frame_mbs_only_flag ?
+                                  (pInfo->active_SPS.sps_disp.pic_height_in_map_units_minus1 + 1) :
+                                  ((pInfo->active_SPS.sps_disp.pic_height_in_map_units_minus1 + 1) << 1);
         if (SliceHeader->structure == FRAME)
         {
-            max_mb_num = pInfo->img.FrameHeightInMbs * pInfo->img.PicWidthInMbs;
+            max_mb_num = FrameHeightInMbs * PicWidthInMbs;
         }
         else
         {
-            max_mb_num = pInfo->img.FrameHeightInMbs * pInfo->img.PicWidthInMbs / 2;
+            max_mb_num = FrameHeightInMbs * PicWidthInMbs / 2;
         }
 
         ///if(pInfo->img.MbaffFrameFlag)
@@ -111,7 +115,10 @@
         }
 
         if (SliceHeader->first_mb_in_slice >= max_mb_num)
+        {
+            WTRACE("first mb in slice exceed max mb num.");
             break;
+        }
 
         if (pInfo->nal_unit_type == h264_NAL_UNIT_TYPE_IDR)
         {
@@ -165,6 +172,121 @@
     return ret;
 }
 
+h264_Status h264_Parse_Slice_Header_2_opt(void *parent, h264_Info* pInfo, h264_Slice_Header_t *SliceHeader)
+{
+    h264_Status ret = H264_SliceHeader_ERROR;
+
+    uint32_t code;
+    int32_t max_mb_num=0;
+
+    do {
+        //////////////////////////////////// Slice header part 2//////////////////
+
+        /// Frame_num
+        viddec_pm_get_bits(parent, &code, SliceHeader->active_SPS->log2_max_frame_num_minus4 + 4);
+        SliceHeader->frame_num = (int32_t)code;
+
+        /// Picture structure
+        SliceHeader->structure = FRAME;
+        SliceHeader->field_pic_flag = 0;
+        SliceHeader->bottom_field_flag = 0;
+
+        if (!(SliceHeader->active_SPS->sps_disp.frame_mbs_only_flag))
+        {
+            /// field_pic_flag
+            viddec_pm_get_bits(parent, &code, 1);
+            SliceHeader->field_pic_flag = (uint8_t)code;
+
+            if (SliceHeader->field_pic_flag)
+            {
+                viddec_pm_get_bits(parent, &code, 1);
+                SliceHeader->bottom_field_flag = (uint8_t)code;
+
+                SliceHeader->structure = SliceHeader->bottom_field_flag ? BOTTOM_FIELD: TOP_FIELD;
+            }
+        }
+
+        ////// Check valid or not of first_mb_in_slice
+        int32_t PicWidthInMbs    = (SliceHeader->active_SPS->sps_disp.pic_width_in_mbs_minus1 + 1);
+        int32_t FrameHeightInMbs = SliceHeader->active_SPS->sps_disp.frame_mbs_only_flag ?
+                                  (SliceHeader->active_SPS->sps_disp.pic_height_in_map_units_minus1 + 1) :
+                                  ((SliceHeader->active_SPS->sps_disp.pic_height_in_map_units_minus1 + 1) << 1);
+        if (SliceHeader->structure == FRAME)
+        {
+            max_mb_num = FrameHeightInMbs * PicWidthInMbs;
+        }
+        else
+        {
+            max_mb_num = FrameHeightInMbs * PicWidthInMbs / 2;
+        }
+
+        ///if(pInfo->img.MbaffFrameFlag)
+        if (SliceHeader->active_SPS->sps_disp.mb_adaptive_frame_field_flag & (!(pInfo->SliceHeader.field_pic_flag)))
+        {
+            SliceHeader->first_mb_in_slice <<= 1;
+        }
+
+        if (SliceHeader->first_mb_in_slice >= max_mb_num)
+        {
+            WTRACE("first mb in slice exceed max mb num.");
+            break;
+        }
+
+        if (pInfo->nal_unit_type == h264_NAL_UNIT_TYPE_IDR)
+        {
+            SliceHeader->idr_pic_id = h264_GetVLCElement(parent, pInfo, false);
+        }
+
+        if (SliceHeader->active_SPS->pic_order_cnt_type == 0)
+        {
+            viddec_pm_get_bits(parent, &code , SliceHeader->active_SPS->log2_max_pic_order_cnt_lsb_minus4 + 4);
+            SliceHeader->pic_order_cnt_lsb = (uint32_t)code;
+
+            if ((SliceHeader->active_PPS->pic_order_present_flag) && !(SliceHeader->field_pic_flag))
+            {
+                SliceHeader->delta_pic_order_cnt_bottom = h264_GetVLCElement(parent, pInfo, true);
+            }
+            else
+            {
+                SliceHeader->delta_pic_order_cnt_bottom = 0;
+            }
+        }
+
+        if ((SliceHeader->active_SPS->pic_order_cnt_type == 1) &&
+            !(SliceHeader->active_SPS->delta_pic_order_always_zero_flag))
+        {
+            SliceHeader->delta_pic_order_cnt[0] = h264_GetVLCElement(parent, pInfo, true);
+            if ((SliceHeader->active_PPS->pic_order_present_flag) && !(SliceHeader->field_pic_flag))
+            {
+                SliceHeader->delta_pic_order_cnt[1] = h264_GetVLCElement(parent, pInfo, true);
+            }
+        }
+
+        if (SliceHeader->active_PPS->redundant_pic_cnt_present_flag)
+        {
+            SliceHeader->redundant_pic_cnt = h264_GetVLCElement(parent, pInfo, false);
+            if (SliceHeader->redundant_pic_cnt > 127)
+                break;
+        }
+        else
+        {
+            SliceHeader->redundant_pic_cnt = 0;
+        }
+
+        ret = H264_STATUS_OK;
+    } while (0);
+
+    //////////// FMO is not supported curently, so comment out the following code
+    //if((pInfo->active_PPS.num_slice_groups_minus1 > 0) && (pInfo->active_PPS.slice_group_map_type >= 3) && (pInfo->active_PPS.slice_group_map_type <= 5) )
+    //{
+    //	SliceHeader->slice_group_change_cycle = 0;				//one of the variables is not known in the high profile
+    //}
+
+    return ret;
+}
+
+
+
 /*-----------------------------------------------------------------------------------------*/
 // slice header 3
 // (direct_spatial_mv_pred_flag, num_ref_idx, pic_list_reorder, PWT,  ref_pic_remark, alpha, beta, etc)
@@ -244,14 +366,13 @@
         ////
         //// Parse Pred_weight_table but not store it becasue it will be reparsed in HW
         ////
-        if (((pInfo->active_PPS.weighted_pred_flag) && ((SliceHeader->slice_type == h264_PtypeP) || (SliceHeader->slice_type == h264_PtypeSP))) || ((pInfo->active_PPS.weighted_bipred_idc == 1) && (SliceHeader->slice_type == h264_PtypeB)))
+        if (((pInfo->active_PPS.weighted_pred_flag)
+               && ((SliceHeader->slice_type == h264_PtypeP) || (SliceHeader->slice_type == h264_PtypeSP)))
+            || ((pInfo->active_PPS.weighted_bipred_idc == 1) && (SliceHeader->slice_type == h264_PtypeB)))
         {
 
             viddec_pm_get_au_pos(parent, &bits_offset, &byte_offset, &is_emul);
 
-            pInfo->h264_pwt_enabled = 1;
-            pInfo->h264_pwt_start_byte_offset = byte_offset;
-            pInfo->h264_pwt_start_bit_offset  = bits_offset;
 
             if (h264_Parse_Pred_Weight_Table(parent, pInfo, SliceHeader) != H264_STATUS_OK)
             {
@@ -259,18 +380,6 @@
             }
 
             viddec_pm_get_au_pos(parent, &bits_offset, &byte_offset, &is_emul);
-
-            if (0 == bits_offset)
-            {
-                pInfo->h264_pwt_end_byte_offset = byte_offset-1;
-                pInfo->h264_pwt_end_bit_offset  = 8;
-            }
-            else
-            {
-                pInfo->h264_pwt_end_byte_offset = byte_offset;
-                pInfo->h264_pwt_end_bit_offset  = bits_offset;
-            }
-
         }
 
 
@@ -363,6 +472,195 @@
 }
 
 
+h264_Status h264_Parse_Slice_Header_3_opt(void *parent, h264_Info* pInfo, h264_Slice_Header_t *SliceHeader)
+{
+    h264_Status ret = H264_SliceHeader_ERROR;
+
+    //h264_Slice_Header_t* SliceHeader = &pInfo->SliceHeader;
+    int32_t  slice_alpha_c0_offset, slice_beta_offset;
+    uint32_t code;
+    uint32_t bits_offset =0, byte_offset =0;
+    uint8_t  is_emul =0;
+
+    do {
+        /// direct_spatial_mv_pred_flag
+        if (SliceHeader->slice_type == h264_PtypeB)
+        {
+            viddec_pm_get_bits(parent, &code , 1);
+            SliceHeader->direct_spatial_mv_pred_flag = (uint8_t)code;
+        }
+        else
+        {
+            SliceHeader->direct_spatial_mv_pred_flag = 0;
+        }
+
+        //
+        // Reset ref_idx and Overide it if exist
+        //
+        SliceHeader->num_ref_idx_l0_active = SliceHeader->active_PPS->num_ref_idx_l0_active;
+        SliceHeader->num_ref_idx_l1_active = SliceHeader->active_PPS->num_ref_idx_l1_active;
+
+        if ((SliceHeader->slice_type == h264_PtypeP) ||
+            (SliceHeader->slice_type == h264_PtypeSP) ||
+            (SliceHeader->slice_type == h264_PtypeB))
+        {
+            viddec_pm_get_bits(parent, &code, 1);
+            SliceHeader->num_ref_idx_active_override_flag  = (uint8_t)code;
+
+            if (SliceHeader->num_ref_idx_active_override_flag)
+            {
+                SliceHeader->num_ref_idx_l0_active = h264_GetVLCElement(parent, pInfo, false) + 1;
+                if (SliceHeader->slice_type == h264_PtypeB)
+                {
+                    SliceHeader->num_ref_idx_l1_active = h264_GetVLCElement(parent, pInfo, false) + 1;
+                }
+            }
+        }
+
+        if (SliceHeader->slice_type != h264_PtypeB)
+        {
+            SliceHeader->num_ref_idx_l1_active = 0;
+        }
+
+        if ((SliceHeader->num_ref_idx_l0_active > MAX_NUM_REF_FRAMES) ||
+            (SliceHeader->num_ref_idx_l1_active > MAX_NUM_REF_FRAMES))
+        {
+            WTRACE("ref index greater than expected during slice header parsing.");
+            break;
+        }
+
+#ifdef USE_AVC_SHORT_FORMAT
+        bool keepParsing = false;
+        keepParsing = h264_is_new_picture_start(pInfo, *SliceHeader, pInfo->SliceHeader) &&
+                      (SliceHeader->nal_ref_idc != 0);
+        if (!keepParsing)
+        {
+            ITRACE("short format parsing: no need to go on!");
+            ret = H264_STATUS_OK;
+            break;
+        }
+#endif
+        if (h264_Parse_Ref_Pic_List_Reordering(parent, pInfo, SliceHeader) != H264_STATUS_OK)
+        {
+            WTRACE("ref list reordering failed during slice header parsing.");
+            break;
+        }
+
+
+        ////
+        //// Parse Pred_weight_table but not store it becasue it will be reparsed in HW
+        ////
+        if (((SliceHeader->active_PPS->weighted_pred_flag)
+               && ((SliceHeader->slice_type == h264_PtypeP) || (SliceHeader->slice_type == h264_PtypeSP)))
+            || ((SliceHeader->active_PPS->weighted_bipred_idc == 1) && (SliceHeader->slice_type == h264_PtypeB)))
+        {
+
+            //viddec_pm_get_au_pos(parent, &bits_offset, &byte_offset, &is_emul);
+
+            if (h264_Parse_Pred_Weight_Table_opt(parent, pInfo, SliceHeader) != H264_STATUS_OK)
+            {
+                break;
+            }
+
+            viddec_pm_get_au_pos(parent, &bits_offset, &byte_offset, &is_emul);
+
+        }
+
+
+
+        ////
+        //// Parse Ref_pic marking if there
+        ////
+        if (SliceHeader->nal_ref_idc != 0)
+        {
+            if (h264_Parse_Dec_Ref_Pic_Marking(parent, pInfo, SliceHeader) != H264_STATUS_OK)
+            {
+                WTRACE("ref pic marking failed during slice header parsing.");
+                break;
+            }
+        }
+
+        if ((SliceHeader->active_PPS->entropy_coding_mode_flag) &&
+            (SliceHeader->slice_type != h264_PtypeI) &&
+            (SliceHeader->slice_type != h264_PtypeSI))
+        {
+            SliceHeader->cabac_init_idc = h264_GetVLCElement(parent, pInfo, false);
+        }
+        else
+        {
+            SliceHeader->cabac_init_idc = 0;
+        }
+
+        if (SliceHeader->cabac_init_idc > 2)
+        {
+            break;
+        }
+
+        SliceHeader->slice_qp_delta = h264_GetVLCElement(parent, pInfo, true);
+        if ((SliceHeader->slice_qp_delta > (25 - SliceHeader->active_PPS->pic_init_qp_minus26)) ||
+            (SliceHeader->slice_qp_delta < -(26 + SliceHeader->active_PPS->pic_init_qp_minus26)))
+        {
+            WTRACE("slice_qp_delta value is invalid.");
+            break;
+        }
+
+        if ((SliceHeader->slice_type == h264_PtypeSP) || (SliceHeader->slice_type == h264_PtypeSI))
+        {
+            if (SliceHeader->slice_type == h264_PtypeSP)
+            {
+                viddec_pm_get_bits(parent, &code, 1);
+                SliceHeader->sp_for_switch_flag  = (uint8_t)code;
+
+            }
+            SliceHeader->slice_qs_delta = h264_GetVLCElement(parent, pInfo, true);
+
+            if ((SliceHeader->slice_qs_delta > (25 - SliceHeader->active_PPS->pic_init_qs_minus26)) ||
+                (SliceHeader->slice_qs_delta < -(26 + SliceHeader->active_PPS->pic_init_qs_minus26)) )
+            {
+                WTRACE("slice_qp_delta value is invalid.");
+                break;
+            }
+        }
+        if (SliceHeader->active_PPS->deblocking_filter_control_present_flag)
+        {
+            SliceHeader->disable_deblocking_filter_idc = h264_GetVLCElement(parent, pInfo, false);
+            if (SliceHeader->disable_deblocking_filter_idc != 1)
+            {
+                SliceHeader->slice_alpha_c0_offset_div2 = h264_GetVLCElement(parent, pInfo, true);
+                slice_alpha_c0_offset = SliceHeader->slice_alpha_c0_offset_div2 << 1;
+                if (slice_alpha_c0_offset < -12 || slice_alpha_c0_offset > 12)
+                {
+                    break;
+                }
+
+                SliceHeader->slice_beta_offset_div2 = h264_GetVLCElement(parent, pInfo, true);
+                slice_beta_offset = SliceHeader->slice_beta_offset_div2 << 1;
+                if (slice_beta_offset < -12 || slice_beta_offset > 12)
+                {
+                    break;
+                }
+            }
+            else
+            {
+                SliceHeader->slice_alpha_c0_offset_div2 = 0;
+                SliceHeader->slice_beta_offset_div2 = 0;
+            }
+        }
+
+        ret = H264_STATUS_OK;
+    } while (0);
+
+    //////////// FMO is not supported curently, so comment out the following code
+    //if((pInfo->active_PPS.num_slice_groups_minus1 > 0) && (pInfo->active_PPS.slice_group_map_type >= 3) && (pInfo->active_PPS.slice_group_map_type <= 5) )
+    //{
+    //	SliceHeader->slice_group_change_cycle = 0;				//one of the variables is not known in the high profile
+    //}
+
+    return ret;
+}
+
+
+
 /*--------------------------------------------------------------------------------------------------*/
 //
 // The syntax elements reordering_of_pic_nums_idc, abs_diff_pic_num_minus1, and long_term_pic_num
@@ -377,7 +675,7 @@
 //
 /*--------------------------------------------------------------------------------------------------*/
 
-h264_Status h264_Parse_Ref_Pic_List_Reordering(void *parent, h264_Info* pInfo,h264_Slice_Header_t *SliceHeader)
+h264_Status h264_Parse_Ref_Pic_List_Reordering(void *parent, h264_Info* pInfo, h264_Slice_Header_t *SliceHeader)
 {
     //h264_Slice_Header_t* SliceHeader = &pInfo->SliceHeader;
     int32_t reorder= -1;
@@ -400,14 +698,18 @@
                     return H264_SliceHeader_ERROR;
                 }
 
-                SliceHeader->sh_refpic_l0.reordering_of_pic_nums_idc[reorder] = h264_GetVLCElement(parent, pInfo, false);
-                if ((SliceHeader->sh_refpic_l0.reordering_of_pic_nums_idc[reorder] == 0) || (SliceHeader->sh_refpic_l0.reordering_of_pic_nums_idc[reorder] == 1))
+                SliceHeader->sh_refpic_l0.reordering_of_pic_nums_idc[reorder] =
+                    h264_GetVLCElement(parent, pInfo, false);
+                if ((SliceHeader->sh_refpic_l0.reordering_of_pic_nums_idc[reorder] == 0) ||
+                    (SliceHeader->sh_refpic_l0.reordering_of_pic_nums_idc[reorder] == 1))
                 {
-                    SliceHeader->sh_refpic_l0.list_reordering_num[reorder].abs_diff_pic_num_minus1 = h264_GetVLCElement(parent, pInfo, false);
+                    SliceHeader->sh_refpic_l0.list_reordering_num[reorder].abs_diff_pic_num_minus1 =
+                        h264_GetVLCElement(parent, pInfo, false);
                 }
                 else if (SliceHeader->sh_refpic_l0.reordering_of_pic_nums_idc[reorder] == 2)
                 {
-                    SliceHeader->sh_refpic_l0.list_reordering_num[reorder].long_term_pic_num = h264_GetVLCElement(parent, pInfo, false);
+                    SliceHeader->sh_refpic_l0.list_reordering_num[reorder].long_term_pic_num =
+                        h264_GetVLCElement(parent, pInfo, false);
                 }
 
             } while (SliceHeader->sh_refpic_l0.reordering_of_pic_nums_idc[reorder] != 3);
@@ -430,13 +732,16 @@
                     return H264_SliceHeader_ERROR;
                 }
                 SliceHeader->sh_refpic_l1.reordering_of_pic_nums_idc[reorder] = h264_GetVLCElement(parent, pInfo, false);
-                if ((SliceHeader->sh_refpic_l1.reordering_of_pic_nums_idc[reorder] == 0) || (SliceHeader->sh_refpic_l1.reordering_of_pic_nums_idc[reorder] == 1))
+                if ((SliceHeader->sh_refpic_l1.reordering_of_pic_nums_idc[reorder] == 0) ||
+                    (SliceHeader->sh_refpic_l1.reordering_of_pic_nums_idc[reorder] == 1))
                 {
-                    SliceHeader->sh_refpic_l1.list_reordering_num[reorder].abs_diff_pic_num_minus1 = h264_GetVLCElement(parent, pInfo, false);
+                    SliceHeader->sh_refpic_l1.list_reordering_num[reorder].abs_diff_pic_num_minus1 =
+                        h264_GetVLCElement(parent, pInfo, false);
                 }
                 else if (SliceHeader->sh_refpic_l1.reordering_of_pic_nums_idc[reorder] == 2)
                 {
-                    SliceHeader->sh_refpic_l1.list_reordering_num[reorder].long_term_pic_num = h264_GetVLCElement(parent, pInfo, false);
+                    SliceHeader->sh_refpic_l1.list_reordering_num[reorder].long_term_pic_num =
+                        h264_GetVLCElement(parent, pInfo, false);
                 }
             } while (SliceHeader->sh_refpic_l1.reordering_of_pic_nums_idc[reorder] != 3);
         }
@@ -514,7 +819,8 @@
             }
             else
             {
-                SliceHeader->sh_predwttbl.luma_weight_l1[i] = (1 << SliceHeader->sh_predwttbl.luma_log2_weight_denom);
+                SliceHeader->sh_predwttbl.luma_weight_l1[i] =
+                    (1 << SliceHeader->sh_predwttbl.luma_log2_weight_denom);
                 SliceHeader->sh_predwttbl.luma_offset_l1[i] = 0;
             }
 
@@ -535,7 +841,8 @@
                 {
                     for (j = 0; j < 2; j++)
                     {
-                        SliceHeader->sh_predwttbl.chroma_weight_l1[i][j] = (1 << SliceHeader->sh_predwttbl.chroma_log2_weight_denom);
+                        SliceHeader->sh_predwttbl.chroma_weight_l1[i][j] =
+                            (1 << SliceHeader->sh_predwttbl.chroma_log2_weight_denom);
                         SliceHeader->sh_predwttbl.chroma_offset_l1[i][j] = 0;
                     }
                 }
@@ -548,6 +855,113 @@
 } ///// End of h264_Parse_Pred_Weight_Table
 
 
+h264_Status h264_Parse_Pred_Weight_Table_opt(void *parent, h264_Info* pInfo,h264_Slice_Header_t *SliceHeader)
+{
+    uint32_t i = 0, j = 0;
+    uint32_t flag;
+
+    SliceHeader->sh_predwttbl.luma_log2_weight_denom = h264_GetVLCElement(parent, pInfo, false);
+
+    if (SliceHeader->active_SPS->sps_disp.chroma_format_idc != 0)
+    {
+        SliceHeader->sh_predwttbl.chroma_log2_weight_denom = h264_GetVLCElement(parent,pInfo, false);
+    }
+
+    for (i = 0; i < SliceHeader->num_ref_idx_l0_active; i++)
+    {
+        viddec_pm_get_bits(parent, (uint32_t *)&flag, 1);
+        SliceHeader->sh_predwttbl.luma_weight_l0_flag = flag;
+
+        if (SliceHeader->sh_predwttbl.luma_weight_l0_flag)
+        {
+            SliceHeader->sh_predwttbl.luma_weight_l0[i] = h264_GetVLCElement(parent, pInfo, true);
+            SliceHeader->sh_predwttbl.luma_offset_l0[i] = h264_GetVLCElement(parent, pInfo, true);
+        }
+        else
+        {
+            SliceHeader->sh_predwttbl.luma_weight_l0[i] = (1 << SliceHeader->sh_predwttbl.luma_log2_weight_denom);
+            SliceHeader->sh_predwttbl.luma_offset_l0[i] = 0;
+        }
+
+        if (SliceHeader->active_SPS->sps_disp.chroma_format_idc != 0)
+        {
+            viddec_pm_get_bits(parent, (uint32_t *)&flag, 1);
+            SliceHeader->sh_predwttbl.chroma_weight_l0_flag = flag;
+
+            if (SliceHeader->sh_predwttbl.chroma_weight_l0_flag)
+            {
+                for (j = 0; j < 2; j++)
+                {
+                    SliceHeader->sh_predwttbl.chroma_weight_l0[i][j] = h264_GetVLCElement(parent, pInfo, true);
+                    SliceHeader->sh_predwttbl.chroma_offset_l0[i][j] = h264_GetVLCElement(parent, pInfo, true);
+                }
+            }
+            else
+            {
+                for (j = 0; j < 2; j++)
+                {
+                    SliceHeader->sh_predwttbl.chroma_weight_l0[i][j] =
+                        (1 << SliceHeader->sh_predwttbl.chroma_log2_weight_denom);
+                    SliceHeader->sh_predwttbl.chroma_offset_l0[i][j] = 0;
+                }
+            }
+        }
+
+    }
+
+    if (SliceHeader->slice_type == h264_PtypeB)
+    {
+        for (i = 0; i < SliceHeader->num_ref_idx_l1_active; i++)
+        {
+            viddec_pm_get_bits(parent, (uint32_t *)&flag, 1);
+            SliceHeader->sh_predwttbl.luma_weight_l1_flag = flag;
+
+            if (SliceHeader->sh_predwttbl.luma_weight_l1_flag)
+            {
+                SliceHeader->sh_predwttbl.luma_weight_l1[i] = h264_GetVLCElement(parent, pInfo, true);
+                SliceHeader->sh_predwttbl.luma_offset_l1[i] = h264_GetVLCElement(parent, pInfo, true);
+            }
+            else
+            {
+                SliceHeader->sh_predwttbl.luma_weight_l1[i] =
+                    (1 << SliceHeader->sh_predwttbl.luma_log2_weight_denom);
+                SliceHeader->sh_predwttbl.luma_offset_l1[i] = 0;
+            }
+
+            if (SliceHeader->active_SPS->sps_disp.chroma_format_idc != 0)
+            {
+                viddec_pm_get_bits(parent, (uint32_t *)&flag, 1);
+                SliceHeader->sh_predwttbl.chroma_weight_l1_flag = flag;
+
+                if (SliceHeader->sh_predwttbl.chroma_weight_l1_flag)
+                {
+                    for (j = 0; j < 2; j++)
+                    {
+                        SliceHeader->sh_predwttbl.chroma_weight_l1[i][j] =
+                            h264_GetVLCElement(parent, pInfo, true);
+                        SliceHeader->sh_predwttbl.chroma_offset_l1[i][j] =
+                            h264_GetVLCElement(parent, pInfo, true);
+                    }
+                }
+                else
+                {
+                    for (j = 0; j < 2; j++)
+                    {
+                        SliceHeader->sh_predwttbl.chroma_weight_l1[i][j] =
+                            (1 << SliceHeader->sh_predwttbl.chroma_log2_weight_denom);
+                        SliceHeader->sh_predwttbl.chroma_offset_l1[i][j] = 0;
+                    }
+                }
+            }
+
+        }
+    }
+
+    return H264_STATUS_OK;
+}
+
+
+
 /*--------------------------------------------------------------------------------------------------*/
 // The syntax elements specify marking of the reference pictures.
 //			1)IDR:		no_output_of_prior_pics_flag,
@@ -600,25 +1014,32 @@
             {
                 if (i < NUM_MMCO_OPERATIONS)
                 {
-                    SliceHeader->sh_dec_refpic.memory_management_control_operation[i] = h264_GetVLCElement(parent, pInfo, false);
-                    if ((SliceHeader->sh_dec_refpic.memory_management_control_operation[i] == 1) || (SliceHeader->sh_dec_refpic.memory_management_control_operation[i] == 3))
+                    SliceHeader->sh_dec_refpic.memory_management_control_operation[i] =
+                        h264_GetVLCElement(parent, pInfo, false);
+                    if ((SliceHeader->sh_dec_refpic.memory_management_control_operation[i] == 1) ||
+                        (SliceHeader->sh_dec_refpic.memory_management_control_operation[i] == 3))
                     {
-                        SliceHeader->sh_dec_refpic.difference_of_pic_num_minus1[i] = h264_GetVLCElement(parent, pInfo, false);
+                        SliceHeader->sh_dec_refpic.difference_of_pic_num_minus1[i] =
+                            h264_GetVLCElement(parent, pInfo, false);
                     }
 
                     if (SliceHeader->sh_dec_refpic.memory_management_control_operation[i] == 2)
                     {
-                        SliceHeader->sh_dec_refpic.long_term_pic_num[i] = h264_GetVLCElement(parent, pInfo, false);
+                        SliceHeader->sh_dec_refpic.long_term_pic_num[i] =
+                            h264_GetVLCElement(parent, pInfo, false);
                     }
 
-                    if ((SliceHeader->sh_dec_refpic.memory_management_control_operation[i] == 3) || (SliceHeader->sh_dec_refpic.memory_management_control_operation[i] == 6))
+                    if ((SliceHeader->sh_dec_refpic.memory_management_control_operation[i] == 3) ||
+                        (SliceHeader->sh_dec_refpic.memory_management_control_operation[i] == 6))
                     {
-                        SliceHeader->sh_dec_refpic.long_term_frame_idx[i] = h264_GetVLCElement(parent, pInfo, false);
+                        SliceHeader->sh_dec_refpic.long_term_frame_idx[i] =
+                            h264_GetVLCElement(parent, pInfo, false);
                     }
 
                     if (SliceHeader->sh_dec_refpic.memory_management_control_operation[i] == 4)
                     {
-                        SliceHeader->sh_dec_refpic.max_long_term_frame_idx_plus1[i] = h264_GetVLCElement(parent, pInfo, false);
+                        SliceHeader->sh_dec_refpic.max_long_term_frame_idx_plus1[i] =
+                            h264_GetVLCElement(parent, pInfo, false);
                     }
 
                     if (SliceHeader->sh_dec_refpic.memory_management_control_operation[i] == 5)
diff --git a/mixvbp/vbp_plugin/h264/include/h264.h b/mixvbp/vbp_plugin/h264/include/h264.h
index 7015c37..28699f8 100755
--- a/mixvbp/vbp_plugin/h264/include/h264.h
+++ b/mixvbp/vbp_plugin/h264/include/h264.h
@@ -681,56 +681,8 @@
         int8_t chroma_offset_l1[32][2];
     } h264_pred_weight_table;
 
-    typedef struct _h264_Slice_Header
-    {
-        int32_t 		first_mb_in_slice;								//UE
-        int32_t		frame_num;											//UV
-        int32_t		pic_order_cnt_lsb;								//UV
-        int32_t		delta_pic_order_cnt_bottom;					//SE
-        int32_t		delta_pic_order_cnt[2];								//SE
-        int32_t		redundant_pic_cnt;									//UE
+#define MAX_USER_DATA_SIZE 1024
 
-        uint32_t		num_ref_idx_l0_active;								//UE
-        uint32_t		num_ref_idx_l1_active;								//UE
-
-        int32_t		slice_qp_delta;										//SE
-        int32_t		slice_qs_delta;										//SE
-        int32_t		slice_alpha_c0_offset_div2;						//SE
-        int32_t		slice_beta_offset_div2;								//SE
-        int32_t		slice_group_change_cycle;							//UV
-
-        h264_pred_weight_table  sh_predwttbl;
-
-        ///// Flags or IDs
-        //h264_ptype_t	slice_type;											//UE
-        uint8_t			slice_type;
-        uint8_t 			nal_ref_idc;
-        uint8_t			structure;
-        uint8_t 			pic_parameter_id;									//UE
-
-        uint8_t			field_pic_flag;
-        uint8_t			bottom_field_flag;
-        uint8_t			idr_flag;											//UE
-        uint8_t			idr_pic_id;											//UE
-
-        uint8_t 			sh_error;
-        uint8_t			cabac_init_idc;										//UE
-        uint8_t			sp_for_switch_flag;
-        uint8_t			disable_deblocking_filter_idc;						//UE
-
-        uint8_t			direct_spatial_mv_pred_flag;
-        uint8_t			num_ref_idx_active_override_flag;
-        int16_t			current_slice_nr;
-
-        //// For Ref list reordering
-        h264_Dec_Ref_Pic_Marking_t sh_dec_refpic;
-        h264_Ref_Pic_List_Reordering_t sh_refpic_l0;
-        h264_Ref_Pic_List_Reordering_t sh_refpic_l1;
-
-    } h264_Slice_Header_t;
-
-
-#define   MAX_USER_DATA_SIZE              1024
     typedef struct _h264_user_data_t
     {
         h264_sei_payloadtype    user_data_type;
@@ -828,6 +780,71 @@
 
     } seq_param_set_all, *seq_param_set_all_ptr;
 
+    typedef struct _h264_Slice_Header
+    {
+        int32_t 	first_mb_in_slice;						//UE
+        int32_t		frame_num;							//UV
+        int32_t		pic_order_cnt_lsb;						//UV
+        int32_t		delta_pic_order_cnt_bottom;					//SE
+        int32_t		delta_pic_order_cnt[2];						//SE
+        int32_t		redundant_pic_cnt;						//UE
+
+        uint32_t	num_ref_idx_l0_active;						//UE
+        uint32_t	num_ref_idx_l1_active;						//UE
+
+        int32_t		slice_qp_delta;							//SE
+        int32_t		slice_qs_delta;							//SE
+        int32_t		slice_alpha_c0_offset_div2;					//SE
+        int32_t		slice_beta_offset_div2;						//SE
+        int32_t		slice_group_change_cycle;					//UV
+
+        h264_pred_weight_table  sh_predwttbl;
+
+        ///// Flags or IDs
+        //h264_ptype_t	slice_type;							//UE
+        uint8_t			slice_type;
+        uint8_t 		nal_ref_idc;
+        uint8_t			structure;
+        uint8_t 		pic_parameter_id;					//UE
+
+        uint8_t			field_pic_flag;
+        uint8_t			bottom_field_flag;
+        uint8_t			idr_flag;						//UE
+        uint8_t			idr_pic_id;						//UE
+
+        uint8_t 		sh_error;
+        uint8_t			cabac_init_idc;						//UE
+        uint8_t			sp_for_switch_flag;
+        uint8_t			disable_deblocking_filter_idc;				//UE
+
+        uint8_t			direct_spatial_mv_pred_flag;
+        uint8_t			num_ref_idx_active_override_flag;
+        int16_t			current_slice_nr;
+
+        //// For Ref list reordering
+        h264_Dec_Ref_Pic_Marking_t sh_dec_refpic;
+        h264_Ref_Pic_List_Reordering_t sh_refpic_l0;
+        h264_Ref_Pic_List_Reordering_t sh_refpic_l1;
+
+        seq_param_set_used*     active_SPS;
+        pic_param_set*          active_PPS;
+        uint32_t                parse_done;         // flag to indicate parse done
+
+        // temp field for multithread parsing to store bitstream info
+        uint32_t                bstrm_buf_buf_index;
+        uint32_t                bstrm_buf_buf_st;
+        uint32_t                bstrm_buf_buf_end;
+        uint32_t                bstrm_buf_buf_bitoff;
+        uint32_t                au_pos;
+        uint32_t                list_off;
+        uint32_t                phase;
+        uint32_t                emulation_byte_counter;
+        uint32_t                is_emul_reqd;
+        int32_t                 list_start_offset;
+        int32_t                 list_end_offset;
+        int32_t                 list_total_bytes;
+
+    } h264_Slice_Header_t;
 
 ///// Image control parameter////////////
     typedef struct _h264_img_par
@@ -932,12 +949,13 @@
         //// Structures
         //// need to gurantee active_SPS and active_PPS start from 4-bytes alignment address
         seq_param_set_used	active_SPS;
-        pic_param_set			active_PPS;
+        pic_param_set		active_PPS;
 
 
         h264_Slice_Header_t  SliceHeader;
         OldSliceParams       old_slice;
         sei_info             sei_information;
+        h264_Slice_Header_t*  working_sh[150]; // working slice header for multithreading
 
         h264_img_par      img;
 
diff --git a/mixvbp/vbp_plugin/h264/include/h264parse.h b/mixvbp/vbp_plugin/h264/include/h264parse.h
index 2b1c7a6..6adee42 100755
--- a/mixvbp/vbp_plugin/h264/include/h264parse.h
+++ b/mixvbp/vbp_plugin/h264/include/h264parse.h
@@ -89,7 +89,7 @@
 ////////////////////////////////////////////////////////////////////
 
 //NAL
-    extern h264_Status h264_Parse_NAL_Unit(void *parent, h264_Info* pInfo, uint8_t *nal_ref_idc);
+    extern h264_Status h264_Parse_NAL_Unit(void *parent, uint8_t *nal_unit_type, uint8_t *nal_ref_idc);
 
 ////// Slice header
     extern h264_Status h264_Parse_Slice_Layer_Without_Partitioning_RBSP(void *parent, h264_Info* pInfo, h264_Slice_Header_t *SliceHeader);
@@ -97,6 +97,12 @@
     extern h264_Status h264_Parse_Slice_Header_2(void *parent, h264_Info* pInfo, h264_Slice_Header_t *SliceHeader);
     extern h264_Status h264_Parse_Slice_Header_3(void *parent, h264_Info* pInfo, h264_Slice_Header_t *SliceHeader);
 
+// For multi-thread parsing optimized.
+    extern h264_Status h264_Parse_Slice_Layer_Without_Partitioning_RBSP_opt(void *parent, h264_Info* pInfo, h264_Slice_Header_t *SliceHeader);
+    extern h264_Status h264_Parse_Slice_Header_2_opt(void *parent, h264_Info* pInfo, h264_Slice_Header_t *SliceHeader);
+    extern h264_Status h264_Parse_Slice_Header_3_opt(void *parent, h264_Info* pInfo, h264_Slice_Header_t *SliceHeader);
+
+    extern h264_Status h264_Post_Parsing_Slice_Header(void *parent, h264_Info* pInfo, h264_Slice_Header_t *next_SliceHeader);
 
 ////// SPS
     extern h264_Status h264_Parse_SeqParameterSet(void *parent, h264_Info * pInfo,seq_param_set_used_ptr SPS, vui_seq_parameters_t_not_used_ptr pVUI_Seq_Not_Used, int32_t* pOffset_ref_frame);
@@ -113,6 +119,8 @@
     extern h264_Status h264_Parse_Pred_Weight_Table(void *parent,h264_Info* pInfo,h264_Slice_Header_t *SliceHeader);
     extern h264_Status h264_Parse_Dec_Ref_Pic_Marking(void *parent,h264_Info* pInfo,h264_Slice_Header_t *SliceHeader);
 
+    extern h264_Status h264_Parse_Pred_Weight_Table_opt(void *parent,h264_Info* pInfo,h264_Slice_Header_t *SliceHeader);
+
 
 
 ///// Mem functions
diff --git a/mixvbp/vbp_plugin/h264/mix_vbp_h264_stubs.c b/mixvbp/vbp_plugin/h264/mix_vbp_h264_stubs.c
index 62e6ab3..eb85022 100755
--- a/mixvbp/vbp_plugin/h264/mix_vbp_h264_stubs.c
+++ b/mixvbp/vbp_plugin/h264/mix_vbp_h264_stubs.c
@@ -204,10 +204,12 @@
 void h264_parse_emit_start_new_frame( void *parent, h264_Info *pInfo )
 {
 
-    uint32_t                   i=0,nitems=0;
+    uint32_t i=0,nitems=0;
 
     ///////////////////////// Frame attributes//////////////////////////
 
+// Remove workload related stuff
+# if 0
     //Push data into current workload if first frame or frame_boundary already detected by non slice nal
     if ( (pInfo->Is_first_frame_in_stream)||(pInfo->is_frame_boundary_detected_by_non_slice_nal))
     {
@@ -227,6 +229,7 @@
 
         pInfo->is_current_workload_done=1;
     }
+#endif
 
     ///////////////////// SPS/////////////////////
     // h264_parse_emit_sps(parent, pInfo);
diff --git a/mixvbp/vbp_plugin/h264/secvideo/baytrail/viddec_h264secure_parse.c b/mixvbp/vbp_plugin/h264/secvideo/baytrail/viddec_h264secure_parse.c
index ef03351..d8e0835 100755
--- a/mixvbp/vbp_plugin/h264/secvideo/baytrail/viddec_h264secure_parse.c
+++ b/mixvbp/vbp_plugin/h264/secvideo/baytrail/viddec_h264secure_parse.c
@@ -40,14 +40,15 @@
 
 
     uint8_t nal_ref_idc = 0;
+    uint8_t nal_unit_type = 0;
 
     ///// Parse NAL Unit header
     pInfo->img.g_new_frame = 0;
     pInfo->push_to_cur = 1;
     pInfo->is_current_workload_done =0;
-    pInfo->nal_unit_type = 0;
 
-    h264_Parse_NAL_Unit(parent, pInfo, &nal_ref_idc);
+    h264_Parse_NAL_Unit(parent, &nal_unit_type, &nal_ref_idc);
+    pInfo->nal_unit_type = nal_unit_type;
 
     ///// Check frame bounday for non-vcl elimitter
     h264_check_previous_frame_end(pInfo);
diff --git a/mixvbp/vbp_plugin/h264/secvideo/merrifield/viddec_h264secure_parse.c b/mixvbp/vbp_plugin/h264/secvideo/merrifield/viddec_h264secure_parse.c
index 06efe5f..acfde74 100755
--- a/mixvbp/vbp_plugin/h264/secvideo/merrifield/viddec_h264secure_parse.c
+++ b/mixvbp/vbp_plugin/h264/secvideo/merrifield/viddec_h264secure_parse.c
@@ -115,7 +115,10 @@
     pInfo->is_current_workload_done =0;
     pInfo->nal_unit_type = 0;
 
-    h264_Parse_NAL_Unit(parent, pInfo, &nal_ref_idc);
+    uint8_t nal_unit_type = 0;
+
+    h264_Parse_NAL_Unit(parent, &nal_unit_type, &nal_ref_idc);
+    pInfo->nal_unit_type = nal_unit_type;
 
     ///// Check frame bounday for non-vcl elimitter
     h264_check_previous_frame_end(pInfo);
diff --git a/mixvbp/vbp_plugin/h264/viddec_h264_parse.c b/mixvbp/vbp_plugin/h264/viddec_h264_parse.c
index b3639c4..976efb5 100755
--- a/mixvbp/vbp_plugin/h264/viddec_h264_parse.c
+++ b/mixvbp/vbp_plugin/h264/viddec_h264_parse.c
@@ -7,6 +7,12 @@
 
 #include "h264parse_dpb.h"
 #include <vbp_trace.h>
+#include <assert.h>
+
+uint32_t viddec_threading_backup_ctx_info(void *parent, h264_Slice_Header_t *next_SliceHeader);
+uint32_t viddec_threading_restore_ctx_info(void *parent, h264_Slice_Header_t *next_SliceHeader);
+
+#define MAX_SLICE_HEADER 150
 
 /* Init function which can be called to intialized local context on open and flush and preserve*/
 void viddec_h264_init(void *ctxt, uint32_t *persist_mem, uint32_t preserve)
@@ -24,6 +30,26 @@
     /* picture level info which will always be initialized */
     h264_init_Info_under_sps_pps_level(pInfo);
 
+    uint32_t i;
+    for(i = 0; i < MAX_SLICE_HEADER; i++) {
+        pInfo->working_sh[i] = (h264_Slice_Header_t*)malloc(sizeof(h264_Slice_Header_t));
+        assert(pInfo->working_sh[i] != NULL);
+
+        pInfo->working_sh[i]->parse_done = 0;
+        pInfo->working_sh[i]->bstrm_buf_buf_index = 0;
+        pInfo->working_sh[i]->bstrm_buf_buf_st = 0;
+        pInfo->working_sh[i]->bstrm_buf_buf_end = 0;
+        pInfo->working_sh[i]->bstrm_buf_buf_bitoff = 0;
+        pInfo->working_sh[i]->au_pos = 0;
+        pInfo->working_sh[i]->list_off = 0;
+        pInfo->working_sh[i]->phase = 0;
+        pInfo->working_sh[i]->emulation_byte_counter = 0;
+        pInfo->working_sh[i]->is_emul_reqd = 0;
+        pInfo->working_sh[i]->list_start_offset = 0;
+        pInfo->working_sh[i]->list_end_offset = 0;
+        pInfo->working_sh[i]->list_total_bytes = 0;
+        pInfo->working_sh[i]->slice_group_change_cycle = 0;
+    }
     return;
 }
 
@@ -40,6 +66,7 @@
     h264_Status status = H264_STATUS_ERROR;
 
     uint8_t nal_ref_idc = 0;
+    uint8_t nal_unit_type = 0;
 
     ///// Parse NAL Unit header
     pInfo->img.g_new_frame = 0;
@@ -47,9 +74,10 @@
     pInfo->is_current_workload_done =0;
     pInfo->nal_unit_type = 0;
 
-    h264_Parse_NAL_Unit(parent, pInfo, &nal_ref_idc);
+    h264_Parse_NAL_Unit(parent, &nal_unit_type, &nal_ref_idc);
     VTRACE("Start parsing NAL unit, type = %d", pInfo->nal_unit_type);
 
+    pInfo->nal_unit_type = nal_unit_type;
     ///// Check frame bounday for non-vcl elimitter
     h264_check_previous_frame_end(pInfo);
 
@@ -417,6 +445,64 @@
     return status;
 }
 
+
+uint32_t viddec_h264_threading_parse(void *parent, void *ctxt, uint32_t slice_index)
+{
+    struct h264_viddec_parser* parser = ctxt;
+
+    h264_Info * pInfo = &(parser->info);
+
+    h264_Status status = H264_STATUS_ERROR;
+
+    uint8_t nal_ref_idc = 0;
+    uint8_t nal_unit_type = 0;
+
+    h264_Parse_NAL_Unit(parent, &nal_unit_type, &nal_ref_idc);
+
+    pInfo->nal_unit_type = nal_unit_type;
+
+
+    //////// Parse valid NAL unit
+    if (nal_unit_type == h264_NAL_UNIT_TYPE_SLICE) {
+        h264_Slice_Header_t* next_SliceHeader = pInfo->working_sh[slice_index];
+        memset(next_SliceHeader, 0, sizeof(h264_Slice_Header_t));
+
+        next_SliceHeader->nal_ref_idc = nal_ref_idc;
+
+
+        ////////////////////////////////////////////////////////////////////////////
+        // Step 2: Parsing slice header
+        ////////////////////////////////////////////////////////////////////////////
+        /// IDR flag
+        next_SliceHeader->idr_flag = (pInfo->nal_unit_type == h264_NAL_UNIT_TYPE_IDR);
+
+
+        /// Pass slice header
+        status = h264_Parse_Slice_Layer_Without_Partitioning_RBSP_opt(parent, pInfo, next_SliceHeader);
+
+        viddec_threading_backup_ctx_info(parent, next_SliceHeader);
+
+        if (next_SliceHeader->sh_error & 3)
+        {
+            ETRACE("Slice Header parsing error.");
+            status = H264_STATUS_ERROR;
+            return status;
+        }
+
+        //h264_Post_Parsing_Slice_Header(parent, pInfo, &next_SliceHeader);
+        next_SliceHeader->parse_done  = 1;
+
+    } else {
+        ETRACE("Wrong NALU. Multi thread is supposed to just parse slice nalu type.");
+        status = H264_STATUS_ERROR;
+        return status;
+    }
+
+   return status;
+}
+
+
+
 void viddec_h264_get_context_size(viddec_parser_memory_sizes_t *size)
 {
     /* Should return size of my structure */
@@ -451,7 +537,104 @@
     p_dpb->fs_dec_idc = MPD_DPB_FS_NULL_IDC;
     p_dpb->fs_non_exist_idc = MPD_DPB_FS_NULL_IDC;
 
+    for(i = 0; i < MAX_SLICE_HEADER; i++) {
+        free(pInfo->working_sh[i]);
+        pInfo->working_sh[i] = NULL;
+    }
     return;
 }
 
+uint32_t viddec_h264_payload_start(void *parent)
+{
+
+    uint32_t code;
+    uint8_t nal_unit_type = 0;
+    if ( viddec_pm_peek_bits(parent, &code, 8) != -1)
+    {
+        nal_unit_type = (uint8_t)((code >> 0) & 0x1f);
+    }
+    //check that whether slice data starts
+    if (nal_unit_type == h264_NAL_UNIT_TYPE_SLICE)
+    {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+uint32_t viddec_h264_post_parse(void *parent, void *ctxt, uint32_t slice_index)
+{
+    struct h264_viddec_parser* parser = ctxt;
+    h264_Info * pInfo = &(parser->info);
+    h264_Status status = H264_STATUS_ERROR;
+
+    h264_Slice_Header_t* next_SliceHeader = pInfo->working_sh[slice_index];
+
+    while (next_SliceHeader->parse_done != 1) {
+        sleep(0);
+        //WTRACE("slice header[%d] parse not finish, block to wait.", slice_index);
+    }
+
+    viddec_threading_restore_ctx_info(parent, next_SliceHeader);
+    status = h264_Post_Parsing_Slice_Header(parent, pInfo, next_SliceHeader);
+
+    next_SliceHeader->parse_done = 0;
+
+    return status;
+}
+
+
+uint32_t viddec_h264_query_thread_parsing_cap(void)
+{
+    // current implementation of h.264 is capable to enable multi-thread parsing
+    return 1;
+}
+
+uint32_t viddec_threading_backup_ctx_info(void *parent, h264_Slice_Header_t *next_SliceHeader)
+{
+    h264_Status retStatus = H264_STATUS_OK;
+
+    viddec_pm_cxt_t* pm_cxt = (viddec_pm_cxt_t*) parent;
+
+    next_SliceHeader->bstrm_buf_buf_index = pm_cxt->getbits.bstrm_buf.buf_index;
+    next_SliceHeader->bstrm_buf_buf_st = pm_cxt->getbits.bstrm_buf.buf_st;
+    next_SliceHeader->bstrm_buf_buf_end = pm_cxt->getbits.bstrm_buf.buf_end;
+    next_SliceHeader->bstrm_buf_buf_bitoff = pm_cxt->getbits.bstrm_buf.buf_bitoff;
+
+    next_SliceHeader->au_pos = pm_cxt->getbits.au_pos;
+    next_SliceHeader->list_off = pm_cxt->getbits.list_off;
+    next_SliceHeader->phase = pm_cxt->getbits.phase;
+    next_SliceHeader->emulation_byte_counter = pm_cxt->getbits.emulation_byte_counter;
+    next_SliceHeader->is_emul_reqd = pm_cxt->getbits.is_emul_reqd;
+
+    next_SliceHeader->list_start_offset = pm_cxt->list.start_offset;
+    next_SliceHeader->list_end_offset = pm_cxt->list.end_offset;
+    next_SliceHeader->list_total_bytes = pm_cxt->list.total_bytes;
+
+    return retStatus;
+}
+
+uint32_t viddec_threading_restore_ctx_info(void *parent, h264_Slice_Header_t *next_SliceHeader)
+{
+    h264_Status retStatus = H264_STATUS_OK;
+
+    viddec_pm_cxt_t* pm_cxt = (viddec_pm_cxt_t*) parent;
+
+    pm_cxt->getbits.bstrm_buf.buf_index = next_SliceHeader->bstrm_buf_buf_index;
+    pm_cxt->getbits.bstrm_buf.buf_st = next_SliceHeader->bstrm_buf_buf_st;
+    pm_cxt->getbits.bstrm_buf.buf_end = next_SliceHeader->bstrm_buf_buf_end;
+    pm_cxt->getbits.bstrm_buf.buf_bitoff = next_SliceHeader->bstrm_buf_buf_bitoff;
+
+    pm_cxt->getbits.au_pos = next_SliceHeader->au_pos;
+    pm_cxt->getbits.list_off = next_SliceHeader->list_off;
+    pm_cxt->getbits.phase = next_SliceHeader->phase;
+    pm_cxt->getbits.emulation_byte_counter = next_SliceHeader->emulation_byte_counter;
+    pm_cxt->getbits.is_emul_reqd = next_SliceHeader->is_emul_reqd;
+
+    pm_cxt->list.start_offset = next_SliceHeader->list_start_offset;
+    pm_cxt->list.end_offset = next_SliceHeader->list_end_offset;
+    pm_cxt->list.total_bytes = next_SliceHeader->list_total_bytes;
+
+    return retStatus;
+}