mix_vbp: Porting patches from R2 to R3

    BZ: 18443

    ---------------
    patches history
    ---------------

    commit b649c693a0ee8cde9377f64f3874cce9063e0ae4
    Author: Andy Qiu <junhai.qiu@intel.com>
    Date:   Mon Nov 14 07:54:33 2011 -0800

        Extended video middleware to support dynamic resolution change.

        BZ 14251

        vbp_parser only reports new sequence when there is change in SPS
        or PPS id. For some contents, SPS and PPS stays the same but actual
        resolution is changed.  Change vbp parser to compare current resolution
        with the previous resolution to detect if there is a new sequence.

        Change-Id: Iafae7f0e0610df98edc77101b9a9df3341beed47
        Signed-off-by: Andy Qiu <junhai.qiu@intel.com>
        Reviewed-on: http://android.intel.com:8080/24045
        Reviewed-by: Chen, Weian <weian.chen@intel.com>
        Reviewed-by: Fang, Yanlong <yanlong.fang@intel.com>
        Reviewed-by: Ding, Haitao <haitao.ding@intel.com>
        Tested-by: Ding, Haitao <haitao.ding@intel.com>
        Reviewed-by: buildbot <buildbot@intel.com>
        Tested-by: buildbot <buildbot@intel.com>

    commit 74c19eb381782be1e04684efb4856c77b4fa12b2
    Author: Richard Tang <richard.tang@intel.com>
    Date:   Thu Nov 3 17:58:56 2011 +0800

        mix_vbp: viddec_parse_sc optimization

        BZ: 5661

        viddec_parse_sc function is to parse SC code 0x00 0x00 0x01 from buffer.

        And it will also change the 32-bits algorithm to 128-bits algorithm.
        This patch will change algorithm to handle little endian directly.

        Change-Id: I3787b9c094159e8c8e671737ebb180746ff0b0bd
        Signed-off-by: Richard Tang <richard.tang@intel.com>
        Reviewed-on: http://android.intel.com:8080/21889
        Reviewed-by: Qiu, Junhai <junhai.qiu@intel.com>
        Reviewed-by: Chen, Weian <weian.chen@intel.com>
        Tested-by: Ding, Haitao <haitao.ding@intel.com>
        Reviewed-by: buildbot <buildbot@intel.com>
        Tested-by: buildbot <buildbot@intel.com>

Change-Id: I0cfa84bd28ca8e472f8de97018ea8ff3bea8710e
Signed-off-by: Weian Chen <weian.chen@intel.com>
Reviewed-on: http://android.intel.com:8080/30000
Reviewed-by: Tang, Richard <richard.tang@intel.com>
Reviewed-by: Qiu, Junhai <junhai.qiu@intel.com>
Reviewed-by: Ding, Haitao <haitao.ding@intel.com>
Tested-by: Ding, Haitao <haitao.ding@intel.com>
Reviewed-by: buildbot <buildbot@intel.com>
Tested-by: buildbot <buildbot@intel.com>
diff --git a/mix_vbp/viddec_fw/fw/parser/vbp_h264_parser.c b/mix_vbp/viddec_fw/fw/parser/vbp_h264_parser.c
index eae56a9..9ed4285 100644
--- a/mix_vbp/viddec_fw/fw/parser/vbp_h264_parser.c
+++ b/mix_vbp/viddec_fw/fw/parser/vbp_h264_parser.c
@@ -49,12 +49,6 @@
     /* indicate if stream is length prefixed */
     int length_prefix_verified;
 
-    /* active sequence parameter set id */
-    uint8 seq_parameter_set_id;
-
-    /* active picture parameter set id */
-    uint8 pic_parameter_set_id;
-
     H264_BS_PATTERN bitstream_pattern;
 };
 
@@ -273,11 +267,6 @@
 
     parser_private->bitstream_pattern = H264_BS_SC_PREFIXED;
 
-    /* range from 0 to 31 inclusive */
-    parser_private->seq_parameter_set_id = 0xff;
-
-    /* range from 0 to  255 inclusive */
-    parser_private->pic_parameter_set_id = 0xff;
     return VBP_OK;
 
 cleanup:
@@ -832,8 +821,16 @@
 
 static void vbp_set_codec_data_h264(
     struct h264_viddec_parser *parser,
-    vbp_codec_data_h264* codec_data)
+     vbp_data_h264 *query_data)
 {
+    vbp_codec_data_h264* codec_data = query_data->codec_data;
+
+    /* The following variables are used to detect if there is new SPS or PPS */
+    uint8 seq_parameter_set_id = codec_data->seq_parameter_set_id;
+    uint8 pic_parameter_set_id = codec_data->pic_parameter_set_id;
+    int frame_width = codec_data->frame_width;
+    int frame_height = codec_data->frame_height;
+
     /* parameter id */
     codec_data->seq_parameter_set_id = parser->info.active_SPS.seq_parameter_set_id;
     codec_data->pic_parameter_set_id = parser->info.active_PPS.pic_parameter_set_id;
@@ -878,7 +875,7 @@
     codec_data->crop_top = 0;
     codec_data->crop_bottom = 0;
     if(parser->info.active_SPS.sps_disp.frame_cropping_flag) {
-        int CropUnitX = 0,	CropUnitY = 0, SubWidthC = 0, SubHeightC = 0;
+        int CropUnitX = 0, CropUnitY = 0, SubWidthC = 0, SubHeightC = 0;
         int ChromaArrayType = 0;
         if(parser->info.active_SPS.sps_disp.separate_colour_plane_flag == 0) {
             if(parser->info.active_SPS.sps_disp.chroma_format_idc == 1) {
@@ -903,7 +900,7 @@
         }
 
         codec_data->crop_left = CropUnitX * parser->info.active_SPS.sps_disp.frame_crop_rect_left_offset;
-        codec_data->crop_right = CropUnitX * parser->info.active_SPS.sps_disp.frame_crop_rect_right_offset; //	+ 1;
+        codec_data->crop_right = CropUnitX * parser->info.active_SPS.sps_disp.frame_crop_rect_right_offset; // + 1;
         codec_data->crop_top = CropUnitY * parser->info.active_SPS.sps_disp.frame_crop_rect_top_offset;
         codec_data->crop_bottom = CropUnitY * parser->info.active_SPS.sps_disp.frame_crop_rect_bottom_offset; // + 1;
     }
@@ -974,6 +971,17 @@
     codec_data->log2_max_pic_order_cnt_lsb_minus4 = parser->info.active_SPS.log2_max_pic_order_cnt_lsb_minus4;
     codec_data->pic_order_cnt_type = parser->info.active_SPS.pic_order_cnt_type;
 
+
+    /* udpate sps and pps status */
+    query_data->new_sps = (seq_parameter_set_id != parser->info.active_PPS.seq_parameter_set_id) ? 1 : 0;
+    query_data->new_pps = (pic_parameter_set_id != parser->info.active_PPS.pic_parameter_set_id) ? 1 : 0;
+    query_data->has_sps = parser->info.active_SPS.seq_parameter_set_id != 0xff;
+    query_data->has_pps = parser->info.active_PPS.seq_parameter_set_id != 0xff;
+    if ( frame_width != codec_data->frame_width || frame_height != codec_data->frame_height)
+    {
+        query_data->new_sps = 1;
+        query_data->new_pps = 1;
+    }
 }
 
 
@@ -1698,7 +1706,7 @@
     query_data = (vbp_data_h264 *)pcontext->query_data;
     private = (struct vbp_h264_parser_private_t *)pcontext->parser_private;
 
-    vbp_set_codec_data_h264(parser, query_data->codec_data);
+    vbp_set_codec_data_h264(parser, query_data);
 
     /* buffer number */
     query_data->buf_number = buffer_counter;
@@ -1721,20 +1729,6 @@
         vbp_add_pic_data_h264(pcontext, 0);
     }
 
-    query_data->new_pps = 0;
-    query_data->new_sps = 0;
-    if (private->seq_parameter_set_id != 0xff)
-    {
-        query_data->new_pps = (private->pic_parameter_set_id != parser->info.active_PPS.pic_parameter_set_id) ? 1 : 0;
-        query_data->new_sps = (private->seq_parameter_set_id != parser->info.active_PPS.seq_parameter_set_id) ? 1 : 0;
-    }
-
-    private->pic_parameter_set_id = parser->info.active_PPS.pic_parameter_set_id;
-    private->seq_parameter_set_id = parser->info.active_PPS.seq_parameter_set_id;
-
-    query_data->has_sps = parser->info.active_SPS.seq_parameter_set_id != 0xff;
-    query_data->has_pps = parser->info.active_PPS.seq_parameter_set_id != 0xff;
-
     return VBP_OK;
 }
 
diff --git a/mix_vbp/viddec_fw/fw/parser/viddec_parse_sc.c b/mix_vbp/viddec_fw/fw/parser/viddec_parse_sc.c
index 6f5aae0..510349c 100644
--- a/mix_vbp/viddec_fw/fw/parser/viddec_parse_sc.c
+++ b/mix_vbp/viddec_fw/fw/parser/viddec_parse_sc.c
@@ -1,6 +1,100 @@
 #include "viddec_pm_parse.h"
 #include "viddec_fw_debug.h"
 
+#ifndef MFDBIGENDIAN
+uint32_t viddec_parse_sc(void *in, void *pcxt, void *sc_state)
+{
+    uint8_t *ptr;
+    uint32_t data_left=0, phase = 0, ret = 0;
+    uint32_t single_byte_table[3][2] = {{1, 0}, {2, 0}, {2, 3}};
+    viddec_sc_parse_cubby_cxt_t *cxt;
+    /* What is phase?: phase is a value between [0-4], we keep track of consecutive '0's with this.
+       Any time a '0' is found its incremented by 1(uptp 2) and reset to '0' if a zero not found.
+       if 0xXX code is found and current phase is 2, its changed to 3 which means we found the pattern
+       we are looking for. Its incremented to 4 once we see a byte after this pattern */
+    cxt = ( viddec_sc_parse_cubby_cxt_t *)in;
+    data_left = cxt->size;
+    ptr = cxt->buf;
+    phase = cxt->phase;
+    cxt->sc_end_pos = -1;
+    pcxt=pcxt;
+
+    /* parse until there is more data and start code not found */
+    while ((data_left > 0) && (phase < 3))
+    {
+        /* Check if we are 16 bytes aligned & phase=0 & more than 16 bytes left,
+           if thats the case we can check work at a time instead of byte */
+
+        if (((((uint32_t)ptr) & 0xF) == 0) && (phase == 0) && (data_left > 0xF))
+        {
+            // 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00      -- check 16 bytes at one time
+            // 00 ?? 00 ?? 00 ?? 00 ?? 00 ?? 00 ?? 00 ?? 00 ??      -- if no 00 at byte position: 15,13,11,09,07,05,03,01
+            // it is impossible to have 0x010000 at these 16 bytes.
+            // so we cound drop 16 bytes one time (increase ptr, decrease data_left and keep phase = 0)
+            __asm__(
+            //Data input
+            "movl %1, %%ecx\n\t"                   //ptr-->ecx
+            "movl %0, %%eax\n\t"                   //data_left-->eax
+
+            //Main compare loop
+            "MATCH_8_ZERO:\n\t"
+            "pxor %%xmm0,%%xmm0\n\t"               //0 --> xmm0
+            "pcmpeqb (%%ecx),%%xmm0\n\t"           //uint128_data[ptr] eq xmm0 --> xmm0 , For each byte do calculation,  (byte == 0x00)?0xFF:0x00
+            "pmovmskb %%xmm0, %%edx\n\t"           //xmm0(128)-->edx(32), edx[0]=xmm0[7], edx[1]=xmm0[15], ... , edx[15]=xmm0[127], edx[31-16]=0x0000
+            "test $0xAAAA, %%edx\n\t"              //edx& 1010 1010 1010 1010b
+            "jnz DATA_RET\n\t"                     //Not equal to zero means that at least one byte is 0x00.
+
+            "PREPARE_NEXT_MATCH:\n\t"
+            "add $0x10, %%ecx\n\t"                 //16 + ecx --> ecx
+            "sub $0x10, %%eax\n\t"                 //eax-16 --> eax
+            "cmp $0x10, %%eax\n\t"                 //eax >= 16?
+            "jge MATCH_8_ZERO\n\t"                 //search next 16 bytes
+
+            "DATA_RET:\n\t"
+            "movl %%ecx, %1\n\t"                   //ecx --> ptr
+            "movl %%eax, %0\n\t"                   //eax --> data_left
+            : "+m"(data_left), "+m"(ptr)           //data_left --> eax, ptr -> ecx
+            :
+            :"eax", "ecx", "edx", "xmm0"
+            );
+
+            if (data_left <= 0)
+            {
+                 break;
+            }
+        }
+
+        //check byte one by one
+        //  (*ptr)    0       1      >=2
+        // phase=0    1       0      0
+        // phase=1    2       0      0
+        // phase=2    2       3      0
+        if (*ptr >= 2)
+        {
+            phase = 0;
+        }
+        else
+        {
+            phase = single_byte_table[phase][*ptr];
+        }
+        ptr ++;
+        data_left --;
+    }
+    if ((data_left > 0) && (phase == 3))
+    {
+        viddec_sc_prefix_state_t *state = (viddec_sc_prefix_state_t *)sc_state;
+        cxt->sc_end_pos = cxt->size - data_left;
+        state->next_sc = cxt->buf[cxt->sc_end_pos];
+        state->second_scprfx_length = 3;
+        phase++;
+        ret = 1;
+    }
+    cxt->phase = phase;
+    /* Return SC found only if phase is 4, else always success */
+    return ret;
+}
+
+#else
 #define FIRST_STARTCODE_BYTE        0x00
 #define SECOND_STARTCODE_BYTE       0x00
 #define THIRD_STARTCODE_BYTE        0x01
@@ -45,9 +139,6 @@
                 char mask1 = 0, mask2=0;
 
                 data = *((uint32_t *)ptr);
-#ifndef MFDBIGENDIAN
-                data = SWAP_WORD(data);
-#endif
                 mask1 = (FIRST_STARTCODE_BYTE != (data & SC_BYTE_MASK0));
                 mask2 = (FIRST_STARTCODE_BYTE != (data & SC_BYTE_MASK1));
                 /* If second byte and fourth byte are not zero's then we cannot have a start code here as we need
@@ -125,3 +216,4 @@
     /* Return SC found only if phase is 4, else always success */
     return ret;
 }
+#endif