Added memory barrier calls am: 27405a217f am: dfad59fe8b
am: 093c3d91fa

Change-Id: Icc2983ed9764489dccaf402a637ad948706ab431
diff --git a/common/arm/ihevc_platform_macros.h b/common/arm/ihevc_platform_macros.h
index 3d79d07..d6c4b48 100644
--- a/common/arm/ihevc_platform_macros.h
+++ b/common/arm/ihevc_platform_macros.h
@@ -218,7 +218,7 @@
 
 
 
-#define NOP(nop_cnt)    {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++);}
+#define NOP(nop_cnt)    {UWORD32 nop_i; for (nop_i = (nop_cnt) ; nop_i > 0 ; nop_i--) asm("nop");}
 
 
 
diff --git a/common/arm/ihevc_sao_edge_offset_class2_chroma.s b/common/arm/ihevc_sao_edge_offset_class2_chroma.s
index b74a8f6..6a301cb 100644
--- a/common/arm/ihevc_sao_edge_offset_class2_chroma.s
+++ b/common/arm/ihevc_sao_edge_offset_class2_chroma.s
@@ -829,6 +829,10 @@
 
     SUBS        r6,r6,#16                   @Decrement the wd loop count by 16
     BLE         RE_ASSINING_LOOP            @Jump to re-assigning loop
+    LDR         r7,[sp,#0x114]              @Loads wd
+    LDR         r0,[sp,#0x02]               @Loads *pu1_src
+    SUB         r7,r7,r6
+    ADD         r0,r0,r7
     BGT         WD_16_HT_4_LOOP
 
 
diff --git a/common/arm/ihevc_sao_edge_offset_class3.s b/common/arm/ihevc_sao_edge_offset_class3.s
index de09d6c..f3482dc 100644
--- a/common/arm/ihevc_sao_edge_offset_class3.s
+++ b/common/arm/ihevc_sao_edge_offset_class3.s
@@ -691,6 +691,10 @@
 
     SUBS        r6,r6,#16                   @Decrement the wd loop count by 16
     BLE         RE_ASSINING_LOOP            @Jump to re-assigning loop
+    LDR         r7,[sp,#0xD0]               @Loads wd
+    LDR         r0,[sp,#0x90]               @Loads *pu1_src
+    SUB         r7,r7,r6
+    ADD         r0,r0,r7
     BGT         WD_16_HT_4_LOOP             @If not equal jump to width_loop
 
 
diff --git a/common/arm/ihevc_sao_edge_offset_class3_chroma.s b/common/arm/ihevc_sao_edge_offset_class3_chroma.s
index 6561a8a..ee34c56 100644
--- a/common/arm/ihevc_sao_edge_offset_class3_chroma.s
+++ b/common/arm/ihevc_sao_edge_offset_class3_chroma.s
@@ -851,6 +851,10 @@
 
     SUBS        r6,r6,#16                   @Decrement the wd loop count by 16
     BLE         RE_ASSINING_LOOP            @Jump to re-assigning loop
+    LDR         r7,[sp,#0x114]              @Loads wd
+    LDR         r0,[sp,#0x02]               @Loads *pu1_src
+    SUB         r7,r7,r6
+    ADD         r0,r0,r7
     BGT         WD_16_HT_4_LOOP             @If not equal jump to width_loop
 
 WIDTH_RESIDUE:
diff --git a/common/arm64/ihevc_sao_edge_offset_class2.s b/common/arm64/ihevc_sao_edge_offset_class2.s
index 59eeadd..5494619 100644
--- a/common/arm64/ihevc_sao_edge_offset_class2.s
+++ b/common/arm64/ihevc_sao_edge_offset_class2.s
@@ -146,6 +146,9 @@
     mov         x20,#255
     cmp         x9,x20
     csel        x9, x20, x9, ge             //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+    mov         x20,#0
+    cmp         x9,x20
+    csel        x9, x20, x9, LT             //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
 
 PU1_AVAIL_7_LOOP:
     LDRB        w14,[x5,#7]                 //pu1_avail[7]
@@ -190,6 +193,9 @@
     mov         x20,#255
     cmp         x10,x20
     csel        x10, x20, x10, ge           //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+    mov         x20,#0
+    cmp         x10,x20
+    csel        x10, x20, x10, LT           //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
 
 PU1_AVAIL:
     MOV         x12,x8                      //Move ht
diff --git a/common/arm64/ihevc_sao_edge_offset_class2_chroma.s b/common/arm64/ihevc_sao_edge_offset_class2_chroma.s
index b430709..0a8a748 100644
--- a/common/arm64/ihevc_sao_edge_offset_class2_chroma.s
+++ b/common/arm64/ihevc_sao_edge_offset_class2_chroma.s
@@ -165,6 +165,9 @@
     mov         x20,#255
     cmp         x9,x20
     csel        x9, x20, x9, ge             //u1_pos_0_0_tmp_u = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+    mov         x20,#0
+    cmp         x9,x20
+    csel        x9, x20, x9, LT             //u1_pos_0_0_tmp_u = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
 
 PU1_AVAIL_4_LOOP_V:
 
@@ -201,6 +204,9 @@
     mov         x20,#255
     cmp         x10,x20
     csel        x10, x20, x10, ge           //u1_pos_0_0_tmp_v = CLIP3(pu1_src[0] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1)
+    mov         x20,#0
+    cmp         x10,x20
+    csel        x10, x20, x10, LT           //u1_pos_0_0_tmp_v = CLIP3(pu1_src[0] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1)
 
 PU1_AVAIL_7_LOOP_U:
     STRB        w10,[sp,#7]
@@ -249,6 +255,9 @@
     mov         x20,#255
     cmp         x10,x20
     csel        x10, x20, x10, ge           //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+    mov         x20,#0
+    cmp         x10,x20
+    csel        x10, x20, x10, LT           //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
 
 PU1_AVAIL_7_LOOP_V:
     ADD         x12,x12,#1
@@ -286,6 +295,9 @@
     mov         x20,#255
     cmp         x9,x20
     csel        x9, x20, x9, ge             //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+    mov         x20,#0
+    cmp         x9,x20
+    csel        x9, x20, x9, LT             //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
 
 PU1_AVAIL_3_LOOP:
     STRB        w10,[sp,#8]
@@ -924,6 +936,10 @@
 
     SUBS        x6,x6,#16                   //Decrement the wd loop count by 16
     BLE         RE_ASSINING_LOOP            //Jump to re-assigning loop
+    mov         w7, w24                     //Loads wd
+    mov         x0, x27                     //Loads *pu1_src
+    SUB         x7,x7,x6
+    ADD         x0,x0,x7
     BGT         WD_16_HT_4_LOOP
 
 
diff --git a/common/arm64/ihevc_sao_edge_offset_class3.s b/common/arm64/ihevc_sao_edge_offset_class3.s
index 9d4f26a..924861b 100644
--- a/common/arm64/ihevc_sao_edge_offset_class3.s
+++ b/common/arm64/ihevc_sao_edge_offset_class3.s
@@ -151,6 +151,9 @@
     mov         x20,#255
     cmp         x9,x20
     csel        x9, x20, x9, ge             //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+    mov         x20,#0
+    cmp         x9,x20
+    csel        x9, x20, x9, LT             //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
 
 PU1_AVAIL_6_LOOP:
     LDRB        w10,[x5,#6]                 //pu1_avail[6]
@@ -198,6 +201,9 @@
     mov         x20,#255
     cmp         x10,x20
     csel        x10, x20, x10, ge           //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+    mov         x20,#0
+    cmp         x10,x20
+    csel        x10, x20, x10, LT           //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
 
 PU1_AVAIL_3_LOOP:
     MOV         x21,x2
@@ -713,6 +719,10 @@
 
     SUBS        x6,x6,#16                   //Decrement the wd loop count by 16
     BLE         RE_ASSINING_LOOP            //Jump to re-assigning loop
+    MOV         x7,x16                      //Loads wd
+    MOV         x0,x15                      //Loads *pu1_src
+    SUB         x7,x7,x6
+    ADD         x0,x0,x7
     BGT         WD_16_HT_4_LOOP             //If not equal jump to width_loop
 
 
diff --git a/common/arm64/ihevc_sao_edge_offset_class3_chroma.s b/common/arm64/ihevc_sao_edge_offset_class3_chroma.s
index 8e93110..631ce58 100644
--- a/common/arm64/ihevc_sao_edge_offset_class3_chroma.s
+++ b/common/arm64/ihevc_sao_edge_offset_class3_chroma.s
@@ -160,6 +160,9 @@
     mov         x20,#255
     cmp         x9,x20
     csel        x9, x20, x9, ge             //u1_pos_0_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+    mov         x20,#0
+    cmp         x9,x20
+    csel        x9, x20, x9, LT             //u1_pos_0_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
 
 PU1_AVAIL_5_LOOP_V:
 
@@ -194,6 +197,9 @@
     mov         x20,#255
     cmp         x10,x20
     csel        x10, x20, x10, ge           //u1_pos_0_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1)
+    mov         x20,#0
+    cmp         x10,x20
+    csel        x10, x20, x10, LT           //u1_pos_0_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1)
 
 PU1_AVAIL_6_LOOP_U:
     STRB        w9,[sp,#6]
@@ -240,6 +246,9 @@
     mov         x20,#255
     cmp         x10,x20
     csel        x10, x20, x10, ge           //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+    mov         x20,#0
+    cmp         x10,x20
+    csel        x10, x20, x10, LT           //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
 
 PU1_AVAIL_6_LOOP_V:
     ADD         x12,x12,#1                  //pu1_src[(ht - 1) * src_strd + 1]
@@ -276,6 +285,9 @@
     mov         x20,#255
     cmp         x9,x20
     csel        x9, x20, x9, ge             //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
+    mov         x20,#0
+    cmp         x9,x20
+    csel        x9, x20, x9, LT             //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
 
 PU1_AVAIL_3_LOOP:
     STRB        w10,[sp,#8]
@@ -933,6 +945,10 @@
 
     SUBS        x6,x6,#16                   //Decrement the wd loop count by 16
     BLE         RE_ASSINING_LOOP            //Jump to re-assigning loop
+    mov         w7, w24                     //Loads wd
+    mov         x0, x28                     //Loads *pu1_src
+    SUB         x7,x7,x6
+    ADD         x0,x0,x7
     BGT         WD_16_HT_4_LOOP             //If not equal jump to width_loop
 
 WIDTH_RESIDUE:
diff --git a/common/ihevc_defs.h b/common/ihevc_defs.h
index a2b7eda..5789627 100644
--- a/common/ihevc_defs.h
+++ b/common/ihevc_defs.h
@@ -219,6 +219,45 @@
     USER_DATA_MAX
 } USER_DATA_SEI_TYPE_T;
 
+/**
+ ****************************************************************************
+ * SEI payload type
+ ****************************************************************************
+ */
+enum
+{
+    SEI_BUFFERING_PERIOD                     = 0,
+    SEI_PICTURE_TIMING                       = 1,
+    SEI_PAN_SCAN_RECT                        = 2,
+    SEI_FILLER_PAYLOAD                       = 3,
+    SEI_USER_DATA_REGISTERED_ITU_T_T35       = 4,
+    SEI_USER_DATA_UNREGISTERED               = 5,
+    SEI_RECOVERY_POINT                       = 6,
+    SEI_SCENE_INFO                           = 9,
+    SEI_FULL_FRAME_SNAPSHOT                  = 15,
+    SEI_PROGRESSIVE_REFINEMENT_SEGMENT_START = 16,
+    SEI_PROGRESSIVE_REFINEMENT_SEGMENT_END   = 17,
+    SEI_FILM_GRAIN_CHARACTERISTICS           = 19,
+    SEI_POST_FILTER_HINT                     = 22,
+    SEI_TONE_MAPPING_INFO                    = 23,
+    SEI_FRAME_PACKING                        = 45,
+    SEI_DISPLAY_ORIENTATION                  = 47,
+    SEI_SOP_DESCRIPTION                      = 128,
+    SEI_ACTIVE_PARAMETER_SETS                = 129,
+    SEI_DECODING_UNIT_INFO                   = 130,
+    SEI_TEMPORAL_LEVEL0_INDEX                = 131,
+    SEI_DECODED_PICTURE_HASH                 = 132,
+    SEI_SCALABLE_NESTING                     = 133,
+    SEI_REGION_REFRESH_INFO                  = 134,
+    SEI_NO_DISPLAY                           = 135,
+    SEI_TIME_CODE                            = 136,
+    SEI_MASTERING_DISPLAY_COLOUR_VOLUME      = 137,
+    SEI_SEGM_RECT_FRAME_PACKING              = 138,
+    SEI_TEMP_MOTION_CONSTRAINED_TILE_SETS    = 139,
+    SEI_CHROMA_SAMPLING_FILTER_HINT          = 140,
+    SEI_KNEE_FUNCTION_INFO                   = 141
+};
+
 
 #define BIT_DEPTH           8
 #define BIT_DEPTH_LUMA      BIT_DEPTH
diff --git a/common/ihevc_structs.h b/common/ihevc_structs.h
index 93d2ad4..0205582 100644
--- a/common/ihevc_structs.h
+++ b/common/ihevc_structs.h
@@ -2406,7 +2406,7 @@
     /**
      *  lt_ref_pic_poc_lsb_sps[]
      */
-    WORD8 ai1_lt_ref_pic_poc_lsb_sps[MAX_LTREF_PICS_SPS];
+    UWORD16 au2_lt_ref_pic_poc_lsb_sps[MAX_LTREF_PICS_SPS];
 
     /**
      *  used_by_curr_pic_lt_sps_flag[]
diff --git a/common/mips/ihevc_platform_macros.h b/common/mips/ihevc_platform_macros.h
index f820cf2..6c0d49c 100644
--- a/common/mips/ihevc_platform_macros.h
+++ b/common/mips/ihevc_platform_macros.h
@@ -85,7 +85,7 @@
 
 #define POPCNT_U32(x)       __builtin_popcount(x)
 
-#define NOP(nop_cnt)    {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++);}
+#define NOP(nop_cnt)    {UWORD32 nop_i; for (nop_i = (nop_cnt) ; nop_i > 0 ; nop_i--) asm("nop");}
 
 #define INLINE
 
diff --git a/common/x86/ihevc_platform_macros.h b/common/x86/ihevc_platform_macros.h
index 66ef542..7b10473 100644
--- a/common/x86/ihevc_platform_macros.h
+++ b/common/x86/ihevc_platform_macros.h
@@ -66,7 +66,7 @@
                             ((UWORD32)x >> 24);
 
 
-#define NOP(nop_cnt)    {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++);}
+#define NOP(nop_cnt)    {UWORD32 nop_i; for (nop_i = (nop_cnt) ; nop_i > 0 ; nop_i--) asm("nop");}
 
 #define POPCNT_U32(x)       __builtin_popcount(x)
 
@@ -149,7 +149,7 @@
         r = 1;                                          \
     }                                                   \
 }
-#define GCC_ENABLE 1
+#define GCC_ENABLE 0
 
 #if GCC_ENABLE
 #define _mm256_loadu2_m128i(X,Y) _mm256_insertf128_si256(_mm256_castsi128_si256(_mm_loadu_si128((Y))), _mm_loadu_si128((X)),1);
diff --git a/decoder.x86.mk b/decoder.x86.mk
index 3f86923..768cd06 100644
--- a/decoder.x86.mk
+++ b/decoder.x86.mk
@@ -39,7 +39,3 @@
 LOCAL_SRC_FILES_x86 += $(libhevcd_srcs_c_x86) $(libhevcd_srcs_asm_x86)
 LOCAL_C_INCLUDES_x86 += $(libhevcd_inc_dir_x86)
 LOCAL_CFLAGS_x86 += $(libhevcd_cflags_x86)
-
-# Bug: 25132373
-LOCAL_CLANG := false
-
diff --git a/decoder/ihevcd_api.c b/decoder/ihevcd_api.c
index c661083..c349fcb 100644
--- a/decoder/ihevcd_api.c
+++ b/decoder/ihevcd_api.c
@@ -1247,18 +1247,21 @@
     size = MAX_VPS_CNT * sizeof(vps_t);
     pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
     RETURN_IF((NULL == pv_buf), IV_FAIL);
+    memset(pv_buf, 0, size);
     ps_codec->ps_vps_base = pv_buf;
     ps_codec->s_parse.ps_vps_base = ps_codec->ps_vps_base;
 
     size = MAX_SPS_CNT * sizeof(sps_t);
     pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
     RETURN_IF((NULL == pv_buf), IV_FAIL);
+    memset(pv_buf, 0, size);
     ps_codec->ps_sps_base = pv_buf;
     ps_codec->s_parse.ps_sps_base = ps_codec->ps_sps_base;
 
     size = MAX_PPS_CNT * sizeof(pps_t);
     pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
     RETURN_IF((NULL == pv_buf), IV_FAIL);
+    memset(pv_buf, 0, size);
     ps_codec->ps_pps_base = pv_buf;
     ps_codec->s_parse.ps_pps_base = ps_codec->ps_pps_base;
 
@@ -1311,6 +1314,7 @@
     size =  3 * 16 * sizeof(UWORD8);
     pu1_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
     RETURN_IF((NULL == pu1_buf), IV_FAIL);
+    memset(pu1_buf, 0, size);
     ps_codec->s_parse.pu1_luma_intra_pred_mode_left = pu1_buf;
     ps_codec->s_parse.pu1_luma_intra_pred_mode_top  = pu1_buf + 16;
 
@@ -1917,6 +1921,7 @@
     size = ihevcd_get_tu_data_size(wd * ht);
     pv_buf = ps_codec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
     RETURN_IF((NULL == pv_buf), IV_FAIL);
+    memset(pv_buf, 0, size);
     ps_codec->pv_tu_data = pv_buf;
 
     {
diff --git a/decoder/ihevcd_bitstream.c b/decoder/ihevcd_bitstream.c
index be9addb..3b8d24f 100644
--- a/decoder/ihevcd_bitstream.c
+++ b/decoder/ihevcd_bitstream.c
@@ -414,8 +414,16 @@
                                  3) + ps_bitstrm->u4_bit_ofst;
 
     u4_size_in_bits = (UWORD32)(ps_bitstrm->pu1_buf_max -
-                    ps_bitstrm->pu1_buf_base);
-    return (u4_size_in_bits - u4_bits_consumed);
+                    ps_bitstrm->pu1_buf_base) - 8;
+    u4_size_in_bits <<= 3;
+    if(u4_size_in_bits > u4_bits_consumed)
+    {
+        return (u4_size_in_bits - u4_bits_consumed);
+    }
+    else
+    {
+        return 0;
+    }
 }
 
 /**
diff --git a/decoder/ihevcd_decode.c b/decoder/ihevcd_decode.c
index 954f8f3..53ab2bd 100644
--- a/decoder/ihevcd_decode.c
+++ b/decoder/ihevcd_decode.c
@@ -203,6 +203,18 @@
 
     ps_dec_op->u4_output_present = 0;
     ps_dec_op->u4_progressive_frame_flag = 1;
+    if(ps_codec->i4_sps_done)
+    {
+        sps_t *ps_sps = (ps_codec->s_parse.ps_sps_base + ps_codec->i4_sps_id);
+        profile_tier_lvl_info_t *ps_ptl;
+        ps_ptl = &ps_sps->s_ptl;
+        if((0 == ps_ptl->s_ptl_gen.i1_general_progressive_source_flag) &&
+           (1 == ps_ptl->s_ptl_gen.i1_general_interlaced_source_flag))
+        {
+            ps_dec_op->u4_progressive_frame_flag = 0;
+        }
+    }
+
     ps_dec_op->u4_is_ref_flag = 1;
     ps_dec_op->e_output_format = ps_codec->e_chroma_fmt;
     ps_dec_op->u4_is_ref_flag = 1;
@@ -224,7 +236,30 @@
     if(ps_codec->ps_disp_buf)
     {
         pic_buf_t *ps_disp_buf = ps_codec->ps_disp_buf;
+        sei_params_t *ps_sei = &ps_disp_buf->s_sei_params;
 
+        if(ps_sei->i1_sei_parameters_present_flag &&
+           ps_sei->i1_pic_timing_params_present_flag)
+        {
+            UWORD32 u4_pic_struct;
+            u4_pic_struct = ps_sei->s_pic_timing_sei_params.u4_pic_struct;
+            switch(u4_pic_struct)
+            {
+                case 1:
+                    ps_dec_op->e4_fld_type = IV_TOP_FLD;
+                    ps_dec_op->u4_progressive_frame_flag = 0;
+                    break;
+                case 2:
+                    ps_dec_op->e4_fld_type = IV_BOT_FLD;
+                    ps_dec_op->u4_progressive_frame_flag = 0;
+                    break;
+                case 0:
+                default:
+                    ps_dec_op->e4_fld_type = IV_FLD_TYPE_DEFAULT;
+                    ps_dec_op->u4_progressive_frame_flag = 1;
+                    break;
+            }
+        }
         ps_dec_op->u4_output_present = 1;
         ps_dec_op->u4_ts = ps_disp_buf->u4_ts;
         if((ps_codec->i4_flush_mode == 0) && (ps_codec->s_parse.i4_end_of_frame == 0))
@@ -635,7 +670,7 @@
         }
 
         if((IVD_RES_CHANGED == ret) ||
-           (IHEVCD_UNSUPPORTED_DIMENSIONS == ret))
+           (IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED == ret))
         {
             break;
         }
diff --git a/decoder/ihevcd_iquant_itrans_recon_ctb.c b/decoder/ihevcd_iquant_itrans_recon_ctb.c
index a558644..a721db0 100644
--- a/decoder/ihevcd_iquant_itrans_recon_ctb.c
+++ b/decoder/ihevcd_iquant_itrans_recon_ctb.c
@@ -567,9 +567,11 @@
     /* Intra 32x32 Y                                                         */
     /* Inter 32x32 Y                                                         */
     /*************************************************************************/
-    WORD32 scaling_mat_offset[] =
+    /* Only first 20 entries are used. Array is extended to avoid out of bound
+       reads. Skip CUs (64x64) read this table, but don't really use the value */
+    static const WORD32 scaling_mat_offset[] =
       { 0, 16, 32, 48, 64, 80, 96, 160, 224, 288, 352, 416, 480, 736, 992,
-        1248, 1504, 1760, 2016, 3040 };
+        1248, 1504, 1760, 2016, 3040, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
 
     PROFILE_DISABLE_IQ_IT_RECON_INTRA_PRED();
 
@@ -936,7 +938,10 @@
                 /***************************************************************/
                 if(intra_flag) /* Intra */
                 {
-                    UWORD8 au1_ref_sub_out[(MAX_TU_SIZE * 2 * 2) + 4];
+                    /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actaul size needed,
+                       au1_ref_sub_out size is kept as multiple of 8,
+                       so that SIMD functions can load 64 bits */
+                    UWORD8 au1_ref_sub_out[(MAX_TU_SIZE * 2 * 2) + 8];
                     UWORD8 *pu1_top_left, *pu1_top, *pu1_left;
                     WORD32 luma_pred_func_idx, chroma_pred_func_idx;
 
diff --git a/decoder/ihevcd_nal.c b/decoder/ihevcd_nal.c
index bee399f..1bfcb53 100644
--- a/decoder/ihevcd_nal.c
+++ b/decoder/ihevcd_nal.c
@@ -450,6 +450,16 @@
             DEBUG_PRINT_NAL_INFO(ps_codec, s_nal.i1_nal_unit_type);
             break;
 
+        case NAL_PREFIX_SEI:
+        case NAL_SUFFIX_SEI:
+            if(IVD_DECODE_HEADER == ps_codec->i4_header_mode)
+            {
+                return IHEVCD_SLICE_IN_HEADER_MODE;
+            }
+
+            ret = ihevcd_parse_sei(ps_codec, &s_nal);
+            break;
+
         case NAL_EOS        :
             ps_codec->i4_cra_as_first_pic = 1;
             break;
diff --git a/decoder/ihevcd_parse_headers.c b/decoder/ihevcd_parse_headers.c
index 04d22d8..ea82a80 100644
--- a/decoder/ihevcd_parse_headers.c
+++ b/decoder/ihevcd_parse_headers.c
@@ -421,7 +421,6 @@
             if(0 == ref_idc)
             {
                 BITS_PARSE("use_delta_flag", value, ps_bitstrm, 1);
-                ps_stref_picset->ai1_used[i] = value;
                 ref_idc = value << 1;
             }
             if((ref_idc == 1) || (ref_idc == 2))
@@ -666,22 +665,97 @@
                                           WORD32 sps_max_sub_layers_minus1)
 {
     WORD32 ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
+    UWORD16 u2_sar_width = 0;
+    UWORD16 u2_sar_height = 0;
 
     BITS_PARSE("aspect_ratio_info_present_flag", ps_vui->u1_aspect_ratio_info_present_flag, ps_bitstrm, 1);
 
     ps_vui->u1_aspect_ratio_idc = SAR_UNUSED;
-    ps_vui->u2_sar_width = 0;
-    ps_vui->u2_sar_height = 0;
+    u2_sar_width = 0;
+    u2_sar_height = 0;
     if(ps_vui->u1_aspect_ratio_info_present_flag)
     {
         BITS_PARSE("aspect_ratio_idc", ps_vui->u1_aspect_ratio_idc, ps_bitstrm, 8);
-        if(ps_vui->u1_aspect_ratio_idc  ==  EXTENDED_SAR)
+        switch(ps_vui->u1_aspect_ratio_idc)
         {
-            BITS_PARSE("sar_width", ps_vui->u2_sar_width, ps_bitstrm, 16);
-            BITS_PARSE("sar_height", ps_vui->u2_sar_height, ps_bitstrm, 16);
+            case SAR_1_1:
+                u2_sar_width = 1;
+                u2_sar_height = 1;
+                break;
+            case SAR_12_11:
+                u2_sar_width = 12;
+                u2_sar_height = 11;
+                break;
+            case SAR_10_11:
+                u2_sar_width = 10;
+                u2_sar_height = 11;
+                break;
+            case SAR_16_11:
+                u2_sar_width = 16;
+                u2_sar_height = 11;
+                break;
+            case SAR_40_33:
+                u2_sar_width = 40;
+                u2_sar_height = 33;
+                break;
+            case SAR_24_11:
+                u2_sar_width = 24;
+                u2_sar_height = 11;
+                break;
+            case SAR_20_11:
+                u2_sar_width = 20;
+                u2_sar_height = 11;
+                break;
+            case SAR_32_11:
+                u2_sar_width = 32;
+                u2_sar_height = 11;
+                break;
+            case SAR_80_33:
+                u2_sar_width = 80;
+                u2_sar_height = 33;
+                break;
+            case SAR_18_11:
+                u2_sar_width = 18;
+                u2_sar_height = 11;
+                break;
+            case SAR_15_11:
+                u2_sar_width = 15;
+                u2_sar_height = 11;
+                break;
+            case SAR_64_33:
+                u2_sar_width = 64;
+                u2_sar_height = 33;
+                break;
+            case SAR_160_99:
+                u2_sar_width = 160;
+                u2_sar_height = 99;
+                break;
+            case SAR_4_3:
+                u2_sar_width = 4;
+                u2_sar_height = 3;
+                break;
+            case SAR_3_2:
+                u2_sar_width = 3;
+                u2_sar_height = 2;
+                break;
+            case SAR_2_1:
+                u2_sar_width = 2;
+                u2_sar_height = 1;
+                break;
+            case EXTENDED_SAR:
+                BITS_PARSE("sar_width", u2_sar_width, ps_bitstrm, 16);
+                BITS_PARSE("sar_height", u2_sar_height, ps_bitstrm, 16);
+                break;
+            default:
+                u2_sar_width = 0;
+                u2_sar_height = 0;
+                break;
         }
     }
 
+    ps_vui->u2_sar_width    = u2_sar_width;
+    ps_vui->u2_sar_height   = u2_sar_height;
+
     BITS_PARSE("overscan_info_present_flag", ps_vui->u1_overscan_info_present_flag, ps_bitstrm, 1);
     ps_vui->u1_overscan_appropriate_flag = 0;
     if(ps_vui->u1_overscan_info_present_flag)
@@ -825,13 +899,13 @@
     ps_ptl->i1_general_progressive_source_flag = value;
 
     BITS_PARSE("general_interlaced_source_flag", value, ps_bitstrm, 1);
-    ps_ptl->i1_general_progressive_source_flag = value;
+    ps_ptl->i1_general_interlaced_source_flag = value;
 
     BITS_PARSE("general_non_packed_constraint_flag", value, ps_bitstrm, 1);
-    ps_ptl->i1_general_progressive_source_flag = value;
+    ps_ptl->i1_general_non_packed_constraint_flag = value;
 
     BITS_PARSE("general_frame_only_constraint_flag", value, ps_bitstrm, 1);
-    ps_ptl->i1_general_progressive_source_flag = value;
+    ps_ptl->i1_frame_only_constraint_flag = value;
 
     BITS_PARSE("XXX_reserved_zero_44bits[0..15]", value, ps_bitstrm, 16);
 
@@ -884,6 +958,10 @@
     if(profile_present)
     {
         ret = ihevcd_parse_profile_tier_level_layer(ps_bitstrm, &ps_ptl->s_ptl_gen);
+        if((IHEVCD_ERROR_T)IHEVCD_SUCCESS != ret)
+        {
+            return ret;
+        }
     }
 
     BITS_PARSE("general_level_idc", value, ps_bitstrm, 8);
@@ -1319,6 +1397,27 @@
         UEV_PARSE("max_latency_increase", value, ps_bitstrm);
         ps_sps->ai1_sps_max_latency_increase[i] = value;
     }
+
+    /* Check if sps_max_dec_pic_buffering or sps_max_num_reorder_pics
+       has changed */
+    if(0 != ps_codec->i4_first_pic_done)
+    {
+        sps_t *ps_sps_old = ps_codec->s_parse.ps_sps;
+        if(ps_sps_old->ai1_sps_max_dec_pic_buffering[ps_sps_old->i1_sps_max_sub_layers - 1] !=
+                    ps_sps->ai1_sps_max_dec_pic_buffering[ps_sps->i1_sps_max_sub_layers - 1])
+        {
+            ps_codec->i4_reset_flag = 1;
+            return (IHEVCD_ERROR_T)IVD_RES_CHANGED;
+        }
+
+        if(ps_sps_old->ai1_sps_max_num_reorder_pics[ps_sps_old->i1_sps_max_sub_layers - 1] !=
+                    ps_sps->ai1_sps_max_num_reorder_pics[ps_sps->i1_sps_max_sub_layers - 1])
+        {
+            ps_codec->i4_reset_flag = 1;
+            return (IHEVCD_ERROR_T)IVD_RES_CHANGED;
+        }
+    }
+
     UEV_PARSE("log2_min_coding_block_size_minus3", value, ps_bitstrm);
     ps_sps->i1_log2_min_coding_block_size = value + 3;
 
@@ -1419,7 +1518,7 @@
         for(i = 0; i < ps_sps->i1_num_long_term_ref_pics_sps; i++)
         {
             BITS_PARSE("lt_ref_pic_poc_lsb_sps[ i ]", value, ps_bitstrm, ps_sps->i1_log2_max_pic_order_cnt_lsb);
-            ps_sps->ai1_lt_ref_pic_poc_lsb_sps[i] = value;
+            ps_sps->au2_lt_ref_pic_poc_lsb_sps[i] = value;
 
             BITS_PARSE("used_by_curr_pic_lt_sps_flag[ i ]", value, ps_bitstrm, 1);
             ps_sps->ai1_used_by_curr_pic_lt_sps_flag[i] = value;
@@ -1480,6 +1579,13 @@
         return (IHEVCD_ERROR_T)IVD_RES_CHANGED;
     }
 
+    if((ps_sps->i2_pic_width_in_luma_samples > MAX_WD) ||
+                    ((ps_sps->i2_pic_width_in_luma_samples * ps_sps->i2_pic_height_in_luma_samples) >
+                    (MAX_WD * MAX_HT)))
+    {
+        return (IHEVCD_ERROR_T)IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED;
+    }
+
     /* Update display width and display height */
     {
         WORD32 disp_wd, disp_ht;
@@ -1935,7 +2041,600 @@
 }
 
 
+IHEVCD_ERROR_T ihevcd_parse_buffering_period_sei(codec_t *ps_codec,
+                                                 sps_t *ps_sps)
+{
+    parse_ctxt_t *ps_parse = &ps_codec->s_parse;
+    bitstrm_t *ps_bitstrm = &ps_parse->s_bitstrm;
+    UWORD32 value;
+    vui_t *ps_vui;
+    buf_period_sei_params_t *ps_buf_period_sei_params;
+    UWORD32 i;
+    hrd_params_t *ps_vui_hdr;
+    UWORD32 u4_cpb_cnt;
 
+    ps_vui = &ps_sps->s_vui_parameters;
+    ps_vui_hdr = &ps_vui->s_vui_hrd_parameters;
+
+    ps_buf_period_sei_params = &ps_parse->s_sei_params.s_buf_period_sei_params;
+
+    ps_parse->s_sei_params.i1_buf_period_params_present_flag = 1;
+
+    UEV_PARSE("bp_seq_parameter_set_id", value, ps_bitstrm);
+    ps_buf_period_sei_params->u1_bp_seq_parameter_set_id = value;
+
+    if(!ps_vui_hdr->u1_sub_pic_cpb_params_present_flag)
+    {
+        BITS_PARSE("irap_cpb_params_present_flag", value, ps_bitstrm, 1);
+        ps_buf_period_sei_params->u1_rap_cpb_params_present_flag = value;
+    }
+
+    if(ps_buf_period_sei_params->u1_rap_cpb_params_present_flag)
+    {
+        BITS_PARSE("cpb_delay_offset",
+                   value,
+                   ps_bitstrm,
+                   (ps_vui_hdr->u1_au_cpb_removal_delay_length_minus1
+                                   + 1));
+        ps_buf_period_sei_params->u4_cpb_delay_offset = value;
+
+        BITS_PARSE("dpb_delay_offset",
+                   value,
+                   ps_bitstrm,
+                   (ps_vui_hdr->u1_dpb_output_delay_length_minus1
+                                   + 1));
+        ps_buf_period_sei_params->u4_dpb_delay_offset = value;
+    }
+    else
+    {
+        ps_buf_period_sei_params->u4_cpb_delay_offset = 0;
+        ps_buf_period_sei_params->u4_dpb_delay_offset = 0;
+    }
+
+    BITS_PARSE("concatenation_flag", value, ps_bitstrm, 1);
+    ps_buf_period_sei_params->u1_concatenation_flag = value;
+
+    BITS_PARSE("au_cpb_removal_delay_delta_minus1",
+               value,
+               ps_bitstrm,
+               (ps_vui_hdr->u1_au_cpb_removal_delay_length_minus1
+                               + 1));
+    ps_buf_period_sei_params->u4_au_cpb_removal_delay_delta_minus1 = value;
+
+    if(ps_vui_hdr->u1_nal_hrd_parameters_present_flag)
+    {
+        u4_cpb_cnt = ps_vui_hdr->au1_cpb_cnt_minus1[0];
+
+        for(i = 0; i <= u4_cpb_cnt; i++)
+        {
+            BITS_PARSE("nal_initial_cpb_removal_delay[i]",
+                       value,
+                       ps_bitstrm,
+                       (ps_vui_hdr->u1_initial_cpb_removal_delay_length_minus1
+                                       + 1));
+            ps_buf_period_sei_params->au4_nal_initial_cpb_removal_delay[i] =
+                            value;
+
+            BITS_PARSE("nal_initial_cpb_removal_delay_offset",
+                       value,
+                       ps_bitstrm,
+                       (ps_vui_hdr->u1_initial_cpb_removal_delay_length_minus1
+                                       + 1));
+            ps_buf_period_sei_params->au4_nal_initial_cpb_removal_delay_offset[i] =
+                            value;
+
+            if(ps_vui_hdr->u1_sub_pic_cpb_params_present_flag
+                            || ps_buf_period_sei_params->u1_rap_cpb_params_present_flag)
+            {
+                BITS_PARSE("nal_initial_alt_cpb_removal_delay[i]",
+                           value,
+                           ps_bitstrm,
+                           (ps_vui_hdr->u1_initial_cpb_removal_delay_length_minus1
+                                           + 1));
+                ps_buf_period_sei_params->au4_nal_initial_alt_cpb_removal_delay[i] =
+                                value;
+
+                BITS_PARSE("nal_initial_alt_cpb_removal_delay_offset",
+                           value,
+                           ps_bitstrm,
+                           (ps_vui_hdr->u1_initial_cpb_removal_delay_length_minus1
+                                           + 1));
+                ps_buf_period_sei_params->au4_nal_initial_alt_cpb_removal_delay_offset[i] =
+                                value;
+            }
+        }
+    }
+
+    if(ps_vui_hdr->u1_vcl_hrd_parameters_present_flag)
+    {
+        u4_cpb_cnt = ps_vui_hdr->au1_cpb_cnt_minus1[0];
+
+        for(i = 0; i <= u4_cpb_cnt; i++)
+        {
+            BITS_PARSE("vcl_initial_cpb_removal_delay[i]",
+                       value,
+                       ps_bitstrm,
+                       (ps_vui_hdr->u1_initial_cpb_removal_delay_length_minus1
+                                       + 1));
+            ps_buf_period_sei_params->au4_vcl_initial_cpb_removal_delay[i] =
+                            value;
+
+            BITS_PARSE("vcl_initial_cpb_removal_delay_offset",
+                       value,
+                       ps_bitstrm,
+                       (ps_vui_hdr->u1_initial_cpb_removal_delay_length_minus1
+                                       + 1));
+            ps_buf_period_sei_params->au4_vcl_initial_cpb_removal_delay_offset[i] =
+                            value;
+
+            if(ps_vui_hdr->u1_sub_pic_cpb_params_present_flag
+                            || ps_buf_period_sei_params->u1_rap_cpb_params_present_flag)
+            {
+                BITS_PARSE("vcl_initial_alt_cpb_removal_delay[i]",
+                           value,
+                           ps_bitstrm,
+                           (ps_vui_hdr->u1_initial_cpb_removal_delay_length_minus1
+                                           + 1));
+                ps_buf_period_sei_params->au4_vcl_initial_alt_cpb_removal_delay[i] =
+                                value;
+
+                BITS_PARSE("vcl_initial_alt_cpb_removal_delay_offset",
+                           value,
+                           ps_bitstrm,
+                           (ps_vui_hdr->u1_initial_cpb_removal_delay_length_minus1
+                                           + 1));
+                ps_buf_period_sei_params->au4_vcl_initial_alt_cpb_removal_delay_offset[i] =
+                                value;
+            }
+        }
+    }
+
+    return (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
+}
+
+IHEVCD_ERROR_T ihevcd_parse_pic_timing_sei(codec_t *ps_codec, sps_t *ps_sps)
+{
+    parse_ctxt_t *ps_parse = &ps_codec->s_parse;
+    bitstrm_t *ps_bitstrm = &ps_parse->s_bitstrm;
+    UWORD32 value;
+    vui_t *ps_vui;
+    UWORD32 i;
+    hrd_params_t *ps_vui_hdr;
+    UWORD32 u4_cpb_dpb_delays_present_flag = 0;
+    pic_timing_sei_params_t *ps_pic_timing;
+
+    ps_pic_timing = &ps_parse->s_sei_params.s_pic_timing_sei_params;
+    ps_vui = &ps_sps->s_vui_parameters;
+    ps_vui_hdr = &ps_vui->s_vui_hrd_parameters;
+    ps_parse->s_sei_params.i1_pic_timing_params_present_flag = 1;
+    if(ps_vui->u1_frame_field_info_present_flag)
+    {
+        BITS_PARSE("pic_struct", value, ps_bitstrm, 4);
+        ps_pic_timing->u4_pic_struct = value;
+
+        BITS_PARSE("source_scan_type", value, ps_bitstrm, 2);
+        ps_pic_timing->u4_source_scan_type = value;
+
+        BITS_PARSE("duplicate_flag", value, ps_bitstrm, 1);
+        ps_pic_timing->u1_duplicate_flag = value;
+    }
+
+    if(ps_vui_hdr->u1_nal_hrd_parameters_present_flag
+                    || ps_vui_hdr->u1_vcl_hrd_parameters_present_flag)
+    {
+        u4_cpb_dpb_delays_present_flag = 1;
+    }
+    else
+    {
+        u4_cpb_dpb_delays_present_flag = 0;
+    }
+
+    if(u4_cpb_dpb_delays_present_flag)
+    {
+        BITS_PARSE("au_cpb_removal_delay_minus1", value, ps_bitstrm,
+                   (ps_vui_hdr->u1_au_cpb_removal_delay_length_minus1 + 1));
+        ps_pic_timing->u4_au_cpb_removal_delay_minus1 = value;
+
+        BITS_PARSE("pic_dpb_output_delay", value, ps_bitstrm,
+                   (ps_vui_hdr->u1_dpb_output_delay_length_minus1 + 1));
+        ps_pic_timing->u4_pic_dpb_output_delay = value;
+
+        if(ps_vui_hdr->u1_sub_pic_cpb_params_present_flag)
+        {
+            BITS_PARSE("pic_dpb_output_du_delay", value, ps_bitstrm,
+                       (ps_vui_hdr->u1_dpb_output_delay_du_length_minus1 + 1));
+            ps_pic_timing->u4_pic_dpb_output_du_delay = value;
+        }
+
+        if(ps_vui_hdr->u1_sub_pic_cpb_params_present_flag
+                        && ps_vui_hdr->u1_sub_pic_cpb_params_in_pic_timing_sei_flag)
+        {
+            UEV_PARSE("num_decoding_units_minus1", value, ps_bitstrm);
+            ps_pic_timing->u4_num_decoding_units_minus1 = value;
+
+            BITS_PARSE("du_common_cpb_removal_delay_flag", value, ps_bitstrm, 1);
+            ps_pic_timing->u1_du_common_cpb_removal_delay_flag = value;
+
+            if(ps_pic_timing->u1_du_common_cpb_removal_delay_flag)
+            {
+                BITS_PARSE("du_common_cpb_removal_delay_increment_minus1",
+                           value,
+                           ps_bitstrm,
+                           (ps_vui_hdr->u1_du_cpb_removal_delay_increment_length_minus1
+                                           + 1));
+                ps_pic_timing->u4_du_common_cpb_removal_delay_increment_minus1 =
+                                value;
+            }
+
+            for(i = 0; i <= ps_pic_timing->u4_num_decoding_units_minus1; i++)
+            {
+                UEV_PARSE("num_nalus_in_du_minus1", value, ps_bitstrm);
+                ps_pic_timing->au4_num_nalus_in_du_minus1[i] = value;
+
+                if((!ps_pic_timing->u1_du_common_cpb_removal_delay_flag)
+                                && (i < ps_pic_timing->u4_num_decoding_units_minus1))
+                {
+                    BITS_PARSE("du_common_cpb_removal_delay_increment_minus1",
+                               value,
+                               ps_bitstrm,
+                               (ps_vui_hdr->u1_du_cpb_removal_delay_increment_length_minus1
+                                               + 1));
+                    ps_pic_timing->au4_du_cpb_removal_delay_increment_minus1[i] =
+                                    value;
+                }
+            }
+        }
+    }
+
+    return (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
+}
+
+IHEVCD_ERROR_T ihevcd_parse_time_code_sei(codec_t *ps_codec)
+{
+    parse_ctxt_t *ps_parse = &ps_codec->s_parse;
+    bitstrm_t *ps_bitstrm = &ps_parse->s_bitstrm;
+    UWORD32 value;
+    time_code_t *ps_time_code;
+    WORD32 i;
+
+    ps_parse->s_sei_params.i1_time_code_present_flag = 1;
+    ps_time_code = &ps_parse->s_sei_params.s_time_code;
+
+    BITS_PARSE("num_clock_ts", value, ps_bitstrm, 2);
+    ps_time_code->u1_num_clock_ts = value;
+
+    for(i = 0; i < ps_time_code->u1_num_clock_ts; i++)
+    {
+        BITS_PARSE("clock_timestamp_flag[i]", value, ps_bitstrm, 1);
+        ps_time_code->au1_clock_timestamp_flag[i] = value;
+
+        if(ps_time_code->au1_clock_timestamp_flag[i])
+        {
+            BITS_PARSE("units_field_based_flag[i]", value, ps_bitstrm, 1);
+            ps_time_code->au1_units_field_based_flag[i] = value;
+
+            BITS_PARSE("counting_type[i]", value, ps_bitstrm, 5);
+            ps_time_code->au1_counting_type[i] = value;
+
+            BITS_PARSE("full_timestamp_flag[i]", value, ps_bitstrm, 1);
+            ps_time_code->au1_full_timestamp_flag[i] = value;
+
+            BITS_PARSE("discontinuity_flag[i]", value, ps_bitstrm, 1);
+            ps_time_code->au1_discontinuity_flag[i] = value;
+
+            BITS_PARSE("cnt_dropped_flag[i]", value, ps_bitstrm, 1);
+            ps_time_code->au1_cnt_dropped_flag[i] = value;
+
+            BITS_PARSE("n_frames[i]", value, ps_bitstrm, 9);
+            ps_time_code->au2_n_frames[i] = value;
+
+            if(ps_time_code->au1_full_timestamp_flag[i])
+            {
+                BITS_PARSE("seconds_value[i]", value, ps_bitstrm, 6);
+                ps_time_code->au1_seconds_value[i] = value;
+
+                BITS_PARSE("minutes_value[i]", value, ps_bitstrm, 6);
+                ps_time_code->au1_minutes_value[i] = value;
+
+                BITS_PARSE("hours_value[i]", value, ps_bitstrm, 5);
+                ps_time_code->au1_hours_value[i] = value;
+            }
+            else
+            {
+                BITS_PARSE("seconds_flag[i]", value, ps_bitstrm, 1);
+                ps_time_code->au1_seconds_flag[i] = value;
+
+                if(ps_time_code->au1_seconds_flag[i])
+                {
+                    BITS_PARSE("seconds_value[i]", value, ps_bitstrm, 6);
+                    ps_time_code->au1_seconds_value[i] = value;
+
+                    BITS_PARSE("minutes_flag[i]", value, ps_bitstrm, 1);
+                    ps_time_code->au1_minutes_flag[i] = value;
+
+                    if(ps_time_code->au1_minutes_flag[i])
+                    {
+                        BITS_PARSE("minutes_value[i]", value, ps_bitstrm, 6);
+                        ps_time_code->au1_minutes_value[i] = value;
+
+                        BITS_PARSE("hours_flag[i]", value, ps_bitstrm, 1);
+                        ps_time_code->au1_hours_flag[i] = value;
+
+                        if(ps_time_code->au1_hours_flag[i])
+                        {
+                            BITS_PARSE("hours_value[i]", value, ps_bitstrm, 5);
+                            ps_time_code->au1_hours_value[i] = value;
+                        }
+                    }
+                }
+            }
+
+            BITS_PARSE("time_offset_length[i]", value, ps_bitstrm, 5);
+            ps_time_code->au1_time_offset_length[i] = value;
+
+            if(ps_time_code->au1_time_offset_length[i] > 0)
+            {
+                BITS_PARSE("time_offset_value[i]", value, ps_bitstrm,
+                           ps_time_code->au1_time_offset_length[i]);
+                ps_time_code->au1_time_offset_value[i] = value;
+            }
+            else
+            {
+                ps_time_code->au1_time_offset_value[i] = 0;
+            }
+        }
+    }
+
+    return (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
+}
+
+IHEVCD_ERROR_T ihevcd_parse_mastering_disp_params_sei(codec_t *ps_codec)
+{
+    parse_ctxt_t *ps_parse = &ps_codec->s_parse;
+    bitstrm_t *ps_bitstrm = &ps_parse->s_bitstrm;
+    UWORD32 value;
+    mastering_dis_col_vol_sei_params_t *ps_mastering_dis_col_vol;
+    WORD32 i;
+
+    ps_parse->s_sei_params.i4_sei_mastering_disp_colour_vol_params_present_flags = 1;
+
+    ps_mastering_dis_col_vol = &ps_parse->s_sei_params.s_mastering_dis_col_vol_sei_params;
+
+    for(i = 0; i < 3; i++)
+    {
+        BITS_PARSE("display_primaries_x[c]", value, ps_bitstrm, 16);
+        ps_mastering_dis_col_vol->au2_display_primaries_x[i] = value;
+
+        BITS_PARSE("display_primaries_y[c]", value, ps_bitstrm, 16);
+        ps_mastering_dis_col_vol->au2_display_primaries_y[i] = value;
+    }
+
+    BITS_PARSE("white_point_x", value, ps_bitstrm, 16);
+    ps_mastering_dis_col_vol->u2_white_point_x = value;
+
+    BITS_PARSE("white_point_y", value, ps_bitstrm, 16);
+    ps_mastering_dis_col_vol->u2_white_point_y = value;
+
+    BITS_PARSE("max_display_mastering_luminance", value, ps_bitstrm, 32);
+    ps_mastering_dis_col_vol->u4_max_display_mastering_luminance = value;
+
+    BITS_PARSE("min_display_mastering_luminance", value, ps_bitstrm, 32);
+    ps_mastering_dis_col_vol->u4_min_display_mastering_luminance = value;
+
+    return (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
+}
+
+IHEVCD_ERROR_T ihevcd_parse_user_data_registered_itu_t_t35(codec_t *ps_codec,
+                                                           UWORD32 u4_payload_size)
+{
+    parse_ctxt_t *ps_parse = &ps_codec->s_parse;
+    bitstrm_t *ps_bitstrm = &ps_parse->s_bitstrm;
+    UWORD32 value;
+    user_data_registered_itu_t_t35_t *ps_user_data_registered_itu_t_t35;
+    UWORD32 i;
+    UWORD32 j = 0;
+
+    ps_parse->s_sei_params.i1_user_data_registered_present_flag = 1;
+    ps_user_data_registered_itu_t_t35 =
+                    &ps_parse->s_sei_params.as_user_data_registered_itu_t_t35[ps_parse->s_sei_params.i4_sei_user_data_cnt];
+    ps_parse->s_sei_params.i4_sei_user_data_cnt++;
+
+    ps_user_data_registered_itu_t_t35->i4_payload_size = u4_payload_size;
+
+    if(u4_payload_size > MAX_USERDATA_PAYLOAD)
+    {
+        u4_payload_size = MAX_USERDATA_PAYLOAD;
+    }
+
+    ps_user_data_registered_itu_t_t35->i4_valid_payload_size = u4_payload_size;
+
+    BITS_PARSE("itu_t_t35_country_code", value, ps_bitstrm, 8);
+    ps_user_data_registered_itu_t_t35->u1_itu_t_t35_country_code = value;
+
+    if(0xFF != ps_user_data_registered_itu_t_t35->u1_itu_t_t35_country_code)
+    {
+        i = 1;
+    }
+    else
+    {
+        BITS_PARSE("itu_t_t35_country_code_extension_byte", value, ps_bitstrm,
+                   8);
+        ps_user_data_registered_itu_t_t35->u1_itu_t_t35_country_code_extension_byte =
+                        value;
+
+        i = 2;
+    }
+
+    do
+    {
+        BITS_PARSE("itu_t_t35_payload_byte", value, ps_bitstrm, 8);
+        ps_user_data_registered_itu_t_t35->u1_itu_t_t35_payload_byte[j++] =
+                        value;
+
+        i++;
+    }while(i < u4_payload_size);
+
+    return (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
+}
+
+void ihevcd_parse_sei_payload(codec_t *ps_codec,
+                              UWORD32 u4_payload_type,
+                              UWORD32 u4_payload_size,
+                              WORD8 i1_nal_type)
+{
+    parse_ctxt_t *ps_parse = &ps_codec->s_parse;
+    bitstrm_t *ps_bitstrm = &ps_parse->s_bitstrm;
+    WORD32 payload_bits_remaining = 0;
+    sps_t *ps_sps;
+
+    UWORD32 i;
+
+    for(i = 0; i < MAX_SPS_CNT; i++)
+    {
+        ps_sps = ps_codec->ps_sps_base + i;
+        if(ps_sps->i1_sps_valid)
+        {
+            break;
+        }
+    }
+    if(NULL == ps_sps)
+    {
+        return;
+    }
+
+    if(NAL_PREFIX_SEI == i1_nal_type)
+    {
+        switch(u4_payload_type)
+        {
+            case SEI_BUFFERING_PERIOD:
+                ps_parse->s_sei_params.i1_sei_parameters_present_flag = 1;
+                ihevcd_parse_buffering_period_sei(ps_codec, ps_sps);
+                break;
+
+            case SEI_PICTURE_TIMING:
+                ps_parse->s_sei_params.i1_sei_parameters_present_flag = 1;
+                ihevcd_parse_pic_timing_sei(ps_codec, ps_sps);
+                break;
+
+            case SEI_TIME_CODE:
+                ps_parse->s_sei_params.i1_sei_parameters_present_flag = 1;
+                ihevcd_parse_time_code_sei(ps_codec);
+                break;
+
+            case SEI_MASTERING_DISPLAY_COLOUR_VOLUME:
+                ps_parse->s_sei_params.i4_sei_mastering_disp_colour_vol_params_present_flags = 1;
+                ihevcd_parse_mastering_disp_params_sei(ps_codec);
+                break;
+
+            case SEI_USER_DATA_REGISTERED_ITU_T_T35:
+                ps_parse->s_sei_params.i1_sei_parameters_present_flag = 1;
+                ihevcd_parse_user_data_registered_itu_t_t35(ps_codec,
+                                                            u4_payload_size);
+                break;
+
+            default:
+                for(i = 0; i < u4_payload_size; i++)
+                {
+                    ihevcd_bits_flush(ps_bitstrm, 8);
+                }
+                break;
+        }
+    }
+    else /* NAL_SUFFIX_SEI */
+    {
+        switch(u4_payload_type)
+        {
+            case SEI_USER_DATA_REGISTERED_ITU_T_T35:
+                ps_parse->s_sei_params.i1_sei_parameters_present_flag = 1;
+                ihevcd_parse_user_data_registered_itu_t_t35(ps_codec,
+                                                            u4_payload_size);
+                break;
+
+            default:
+                for(i = 0; i < u4_payload_size; i++)
+                {
+                    ihevcd_bits_flush(ps_bitstrm, 8);
+                }
+                break;
+        }
+    }
+
+    /**
+     * By definition the underlying bitstream terminates in a byte-aligned manner.
+     * 1. Extract all bar the last MIN(bitsremaining,nine) bits as reserved_payload_extension_data
+     * 2. Examine the final 8 bits to determine the payload_bit_equal_to_one marker
+     * 3. Extract the remainingreserved_payload_extension_data bits.
+     *
+     * If there are fewer than 9 bits available, extract them.
+     */
+
+    payload_bits_remaining = ihevcd_bits_num_bits_remaining(ps_bitstrm);
+    if(payload_bits_remaining) /* more_data_in_payload() */
+    {
+        WORD32 final_bits;
+        WORD32 final_payload_bits = 0;
+        WORD32 mask = 0xFF;
+        UWORD32 u4_dummy;
+        UWORD32 u4_reserved_payload_extension_data;
+        UNUSED(u4_dummy);
+        UNUSED(u4_reserved_payload_extension_data);
+
+        while(payload_bits_remaining > 9)
+        {
+            BITS_PARSE("reserved_payload_extension_data",
+                       u4_reserved_payload_extension_data, ps_bitstrm, 1);
+            payload_bits_remaining--;
+        }
+
+        final_bits = ihevcd_bits_nxt(ps_bitstrm, payload_bits_remaining);
+
+        while(final_bits & (mask >> final_payload_bits))
+        {
+            final_payload_bits++;
+            continue;
+        }
+
+        while(payload_bits_remaining > (9 - final_payload_bits))
+        {
+            BITS_PARSE("reserved_payload_extension_data",
+                       u4_reserved_payload_extension_data, ps_bitstrm, 1);
+            payload_bits_remaining--;
+        }
+
+        BITS_PARSE("payload_bit_equal_to_one", u4_dummy, ps_bitstrm, 1);
+        payload_bits_remaining--;
+        while(payload_bits_remaining)
+        {
+            BITS_PARSE("payload_bit_equal_to_zero", u4_dummy, ps_bitstrm, 1);
+            payload_bits_remaining--;
+        }
+    }
+
+    return;
+}
+
+IHEVCD_ERROR_T ihevcd_read_rbsp_trailing_bits(codec_t *ps_codec,
+                                              UWORD32 u4_bits_left)
+{
+    parse_ctxt_t *ps_parse = &ps_codec->s_parse;
+    UWORD32 value;
+    WORD32 cnt = 0;
+    BITS_PARSE("rbsp_stop_one_bit", value, &ps_parse->s_bitstrm, 1);
+    u4_bits_left--;
+    if(value != 1)
+    {
+        return (IHEVCD_ERROR_T)IHEVCD_FAIL;
+    }
+    while(u4_bits_left)
+    {
+        BITS_PARSE("rbsp_alignment_zero_bit", value, &ps_parse->s_bitstrm, 1);
+        u4_bits_left--;
+        cnt++;
+    }
+    ASSERT(cnt < 8);
+
+    return (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
+}
 /**
 *******************************************************************************
 *
@@ -1955,10 +2654,56 @@
 *
 *******************************************************************************
 */
-IHEVCD_ERROR_T ihevcd_parse_sei(codec_t *ps_codec)
+IHEVCD_ERROR_T ihevcd_parse_sei(codec_t *ps_codec, nal_header_t *ps_nal)
 {
     IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
-    UNUSED(ps_codec);
+    parse_ctxt_t *ps_parse = &ps_codec->s_parse;
+    UWORD32 u4_payload_type = 0, u4_last_payload_type_byte = 0;
+    UWORD32 u4_payload_size = 0, u4_last_payload_size_byte = 0;
+    UWORD32 value;
+    bitstrm_t *ps_bitstrm = &ps_parse->s_bitstrm;
+    UWORD32 u4_bits_left;
+
+    u4_bits_left = ihevcd_bits_num_bits_remaining(ps_bitstrm);
+
+    while(u4_bits_left > 8)
+    {
+        while(ihevcd_bits_nxt(ps_bitstrm, 8) == 0xFF)
+        {
+            ihevcd_bits_flush(ps_bitstrm, 8); /* equal to 0xFF */
+            u4_payload_type += 255;
+        }
+
+        BITS_PARSE("last_payload_type_byte", value, ps_bitstrm, 8);
+        u4_last_payload_type_byte = value;
+
+        u4_payload_type += u4_last_payload_type_byte;
+
+        while(ihevcd_bits_nxt(ps_bitstrm, 8) == 0xFF)
+        {
+            ihevcd_bits_flush(ps_bitstrm, 8); /* equal to 0xFF */
+            u4_payload_size += 255;
+        }
+
+        BITS_PARSE("last_payload_size_byte", value, ps_bitstrm, 8);
+        u4_last_payload_size_byte = value;
+
+        u4_payload_size += u4_last_payload_size_byte;
+        u4_bits_left = ihevcd_bits_num_bits_remaining(ps_bitstrm);
+        u4_payload_size = MIN(u4_payload_size, u4_bits_left / 8);
+        ihevcd_parse_sei_payload(ps_codec, u4_payload_type, u4_payload_size,
+                                 ps_nal->i1_nal_unit_type);
+
+        /* Calculate the bits left in the current payload */
+        u4_bits_left = ihevcd_bits_num_bits_remaining(ps_bitstrm);
+    }
+
+    // read rbsp_trailing_bits
+    if(u4_bits_left)
+    {
+        ihevcd_read_rbsp_trailing_bits(ps_codec, u4_bits_left);
+    }
+
     return ret;
 }
 
diff --git a/decoder/ihevcd_parse_headers.h b/decoder/ihevcd_parse_headers.h
index 2139f64..6e9870f 100644
--- a/decoder/ihevcd_parse_headers.h
+++ b/decoder/ihevcd_parse_headers.h
@@ -42,6 +42,13 @@
 IHEVCD_ERROR_T ihevcd_parse_vps(codec_t *ps_codec);
 IHEVCD_ERROR_T ihevcd_parse_sps(codec_t *ps_codec);
 IHEVCD_ERROR_T ihevcd_parse_pps(codec_t *ps_codec);
+IHEVCD_ERROR_T ihevcd_parse_sei(codec_t *ps_codec, nal_header_t *ps_nal);
+IHEVCD_ERROR_T ihevcd_parse_pic_timing_sei(codec_t *ps_codec, sps_t *ps_sps);
+IHEVCD_ERROR_T ihevcd_parse_buffering_period_sei(codec_t *ps_codec, sps_t *ps_sps);
+IHEVCD_ERROR_T ihevcd_parse_time_code_sei(codec_t *ps_codec);
+IHEVCD_ERROR_T ihevcd_parse_user_data_registered_itu_t_t35(codec_t *ps_codec, UWORD32 u4_payload_size);
+IHEVCD_ERROR_T ihevcd_parse_active_parameter_sets_sei(codec_t *ps_codec, sps_t *ps_sps);
+IHEVCD_ERROR_T ihevcd_read_rbsp_trailing_bits(codec_t *ps_codec, UWORD32 u4_bits_left);
 IHEVCD_ERROR_T ihevcd_parse_slice_header(codec_t *ps_codec,
                                          nal_header_t *ps_nal);
 
diff --git a/decoder/ihevcd_parse_slice_header.c b/decoder/ihevcd_parse_slice_header.c
index e9c3073..b0ed14d 100644
--- a/decoder/ihevcd_parse_slice_header.c
+++ b/decoder/ihevcd_parse_slice_header.c
@@ -468,9 +468,16 @@
                     if(i < ps_slice_hdr->i1_num_long_term_sps)
                     {
                         /* Use CLZ to compute Ceil( Log2( num_long_term_ref_pics_sps ) ) */
-                        WORD32 num_bits = 32 - CLZ(ps_sps->i1_num_long_term_ref_pics_sps);
-                        BITS_PARSE("lt_idx_sps[ i ]", value, ps_bitstrm, num_bits);
-                        ps_slice_hdr->ai4_poc_lsb_lt[i] = ps_sps->ai1_lt_ref_pic_poc_lsb_sps[value];
+                        if (ps_sps->i1_num_long_term_ref_pics_sps > 1)
+                        {
+                            WORD32 num_bits = 32 - CLZ(ps_sps->i1_num_long_term_ref_pics_sps - 1);
+                            BITS_PARSE("lt_idx_sps[ i ]", value, ps_bitstrm, num_bits);
+                        }
+                        else
+                        {
+                            value = 0;
+                        }
+                        ps_slice_hdr->ai4_poc_lsb_lt[i] = ps_sps->au2_lt_ref_pic_poc_lsb_sps[value];
                         ps_slice_hdr->ai1_used_by_curr_pic_lt_flag[i] = ps_sps->ai1_used_by_curr_pic_lt_sps_flag[value];
 
                     }
diff --git a/decoder/ihevcd_sao.c b/decoder/ihevcd_sao.c
index 2702317..dc852c6 100644
--- a/decoder/ihevcd_sao.c
+++ b/decoder/ihevcd_sao.c
@@ -568,10 +568,11 @@
     UWORD8  *pu1_sao_src_top_left_luma_bot_left;
     UWORD8 *au1_sao_src_top_left_chroma_bot_left;
     UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
-
-    WORD8 ai1_offset_y[5];
-    WORD8 ai1_offset_cb[5];
-    WORD8 ai1_offset_cr[5];
+    /* Only 5 values are used, but arrays are large
+     enough so that SIMD functions can read 64 bits at a time */
+    WORD8 ai1_offset_y[8];
+    WORD8 ai1_offset_cb[8];
+    WORD8 ai1_offset_cr[8];
     WORD32  chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
 
     PROFILE_DISABLE_SAO();
diff --git a/decoder/ihevcd_structs.h b/decoder/ihevcd_structs.h
index ce0653e..5285e70 100644
--- a/decoder/ihevcd_structs.h
+++ b/decoder/ihevcd_structs.h
@@ -1161,7 +1161,10 @@
      */
     WORD32 i4_next_tu_ctb_cnt;
 
-
+    /**
+     * SEI parameters
+     */
+    sei_params_t s_sei_params;
 }parse_ctxt_t;
 
 /**
diff --git a/decoder/ihevcd_utils.c b/decoder/ihevcd_utils.c
index c6c21f5..7066102 100644
--- a/decoder/ihevcd_utils.c
+++ b/decoder/ihevcd_utils.c
@@ -518,6 +518,47 @@
             ps_pic_buf->pu1_chroma = pu1_buf + ps_codec->i4_strd * (PAD_TOP / 2) + PAD_LEFT;
             pu1_buf += chroma_samples;
 
+            /* Pad boundary pixels (one pixel on all sides) */
+            /* This ensures SAO does not read uninitialized pixels */
+            /* Note these are not used in actual processing */
+            {
+                UWORD8 *pu1_buf;
+                WORD32 strd, wd, ht;
+                WORD32 i;
+                strd = ps_codec->i4_strd;
+                wd = ps_codec->i4_wd;
+                ht = ps_codec->i4_ht;
+
+                pu1_buf = ps_pic_buf->pu1_luma;
+                for(i = 0; i < ht; i++)
+                {
+                    pu1_buf[-1] = 0;
+                    pu1_buf[wd] = 0;
+                    pu1_buf += strd;
+                }
+                pu1_buf = ps_pic_buf->pu1_luma;
+                memset(pu1_buf - strd - 1, 0, wd + 2);
+
+                pu1_buf += strd * ht;
+                memset(pu1_buf - 1, 0, wd + 2);
+
+                pu1_buf = ps_pic_buf->pu1_chroma;
+                ht >>= 1;
+                for(i = 0; i < ht; i++)
+                {
+                    pu1_buf[-1] = 0;
+                    pu1_buf[-2] = 0;
+                    pu1_buf[wd] = 0;
+                    pu1_buf[wd + 1] = 0;
+                    pu1_buf += strd;
+                }
+                pu1_buf = ps_pic_buf->pu1_chroma;
+                memset(pu1_buf - strd - 2, 0, wd + 4);
+
+                pu1_buf += strd * ht;
+                memset(pu1_buf - 2, 0, wd + 4);
+            }
+
             buf_ret = ihevc_buf_mgr_add((buf_mgr_t *)ps_codec->pv_pic_buf_mgr, ps_pic_buf, i);
 
 
@@ -782,6 +823,27 @@
         pu1_buf = ps_cur_pic->pu1_chroma;
 
         pu1_cur_pic_chroma = pu1_buf;
+
+        ps_cur_pic->s_sei_params.i1_sei_parameters_present_flag = 0;
+        if(ps_codec->s_parse.s_sei_params.i1_sei_parameters_present_flag)
+        {
+            sei_params_t *ps_sei = &ps_codec->s_parse.s_sei_params;
+            ps_cur_pic->s_sei_params = ps_codec->s_parse.s_sei_params;
+
+            /* Once sei_params is copied to pic_buf,
+             * mark sei_params in s_parse as not present,
+             * this ensures that future frames do not use this data again.
+             */
+            ps_sei->i1_sei_parameters_present_flag = 0;
+            ps_sei->i1_user_data_registered_present_flag = 0;
+            ps_sei->i1_aud_present_flag = 0;
+            ps_sei->i1_time_code_present_flag = 0;
+            ps_sei->i1_buf_period_params_present_flag = 0;
+            ps_sei->i1_pic_timing_params_present_flag = 0;
+            ps_sei->i1_recovery_point_params_present_flag = 0;
+            ps_sei->i1_active_parameter_set = 0;
+            ps_sei->i4_sei_mastering_disp_colour_vol_params_present_flags = 0;
+        }
     }
 
     if(0 == ps_codec->u4_pic_cnt)