Reduced memory requirements.

Buffer allocation is based on minimum level required for the
input resolution rather than the input max level.

Number of maximum context sets can be set to 1 to reduce
memory usage.

Added a macro ENC_MIN_PU_SIZE for minimum size of
inter prediction unit supported by encoder.

Changed the maximum constraint on number of MBs for NMB
processing to width in Mbs.

Change-Id: I5a9255e93935d90c13262681aafc772aedf8ae81
diff --git a/encoder/ih264e_api.c b/encoder/ih264e_api.c
index 232a110..a8de1ec 100644
--- a/encoder/ih264e_api.c
+++ b/encoder/ih264e_api.c
@@ -2502,7 +2502,7 @@
     {
         WORD32 max_mb_rows = ps_cfg->i4_ht_mbs;
 
-        WORD32 num_jobs = max_mb_rows * 2;
+        WORD32 num_jobs = max_mb_rows * MAX_CTXT_SETS;
         WORD32 clz;
 
         /* Use next power of two number of entries*/
@@ -2674,8 +2674,6 @@
     /* error status */
     IV_STATUS_T status = IV_SUCCESS;
 
-    /* profile / level info */
-    level = ps_ip->s_ive_ip.u4_max_level;
     num_reorder_frames = ps_ip->s_ive_ip.u4_max_reorder_cnt;
     num_ref_frames = ps_ip->s_ive_ip.u4_max_ref_cnt;
 
@@ -2692,6 +2690,9 @@
     max_mb_cols = max_wd_luma / MB_SIZE;
     max_mb_cnt = max_mb_rows * max_mb_cols;
 
+    /* profile / level info */
+    level = ih264e_get_min_level(max_ht_luma, max_wd_luma);
+
     /* validate params */
     if ((level < MIN_LEVEL) || (level > MAX_LEVEL))
     {
@@ -3062,7 +3063,7 @@
     {
         /* One process job per row of MBs */
         /* Allocate for two pictures, so that wrap around can be handled easily */
-        WORD32 num_jobs = max_mb_rows * 2;
+        WORD32 num_jobs = max_mb_rows * MAX_CTXT_SETS;
 
         WORD32 job_queue_size = ih264_list_size(num_jobs, sizeof(job_t));
 
@@ -3077,7 +3078,7 @@
     {
         /* One process job per row of MBs */
         /* Allocate for two pictures, so that wrap around can be handled easily */
-        WORD32 num_jobs = max_mb_rows * 2;
+        WORD32 num_jobs = max_mb_rows * MAX_CTXT_SETS;
 
         WORD32 job_queue_size = ih264_list_size(num_jobs, sizeof(job_t));
 
@@ -3464,9 +3465,9 @@
      ************************************************************************/
     ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_INFO_NMB];
     {
-        ps_mem_rec->u4_mem_size = MAX_PROCESS_CTXT * MAX_NMB
-                        * (sizeof(mb_info_nmb_t)
-                                        + MB_SIZE * MB_SIZE * sizeof(UWORD8));
+        ps_mem_rec->u4_mem_size = MAX_PROCESS_CTXT * max_mb_cols *
+                                 (sizeof(mb_info_nmb_t) + MB_SIZE * MB_SIZE 
+                                  * sizeof(UWORD8));
     }
     DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MB_INFO_NMB, ps_mem_rec->u4_mem_size);
 
@@ -3641,7 +3642,7 @@
 
         for (i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if (i < MAX_PROCESS_CTXT / 2)
+            if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
             {
                 /* base ptr */
                 UWORD8 *pu1_buf = ps_mem_rec->pv_base;
@@ -3756,7 +3757,7 @@
 
         for (i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if (i < MAX_PROCESS_CTXT / 2)
+            if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
             {
                 ps_codec->as_process[i].pv_pic_mb_coeff_data = pu1_buf;
                 ps_codec->as_process[i].s_entropy.pv_pic_mb_coeff_data =
@@ -3794,7 +3795,7 @@
 
         for (i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if (i < MAX_PROCESS_CTXT / 2)
+            if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
             {
                 ps_codec->as_process[i].pv_pic_mb_header_data = pu1_buf;
                 ps_codec->as_process[i].s_entropy.pv_pic_mb_header_data =
@@ -3874,7 +3875,7 @@
 
         for (i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if (i < MAX_PROCESS_CTXT / 2)
+            if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
             {
                 ps_codec->as_process[i].ps_slice_hdr_base = ps_mem_rec->pv_base;
             }
@@ -3896,7 +3897,7 @@
 
         for (i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if (i < MAX_PROCESS_CTXT / 2)
+            if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
             {
                 ps_codec->as_process[i].pu1_is_intra_coded = pu1_buf;
             }
@@ -3921,7 +3922,7 @@
 
         for (i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if (i < MAX_PROCESS_CTXT / 2)
+            if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
             {
                 ps_codec->as_process[i].pu1_slice_idx = pu1_buf_ping;
             }
@@ -3981,7 +3982,7 @@
 
         for (i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if (i < MAX_PROCESS_CTXT / 2)
+            if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
             {
                 ps_codec->as_process[i].pu1_proc_map = pu1_buf + max_mb_cols;
             }
@@ -4012,7 +4013,7 @@
 
         for (i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if (i < MAX_PROCESS_CTXT / 2)
+            if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
             {
                 ps_codec->as_process[i].pu1_deblk_map = pu1_buf + max_mb_cols;
 
@@ -4042,7 +4043,7 @@
 
         for (i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if (i < MAX_PROCESS_CTXT / 2)
+            if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
             {
                 ps_codec->as_process[i].pu1_me_map = pu1_buf + max_mb_cols;
             }
@@ -4238,7 +4239,7 @@
 
         for (i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if (i < MAX_PROCESS_CTXT / 2)
+            if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
             {
                 ps_codec->as_process[i].ps_top_row_mb_syntax_ele_base =
                                 (mb_info_t *) pu1_buf;
@@ -4289,7 +4290,7 @@
 
         for (i = 0; i < MAX_PROCESS_CTXT; i++)
         {
-            if (i < MAX_PROCESS_CTXT / 2)
+            if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
             {
                 pu1_buf_ping = (UWORD8 *) ps_mem_rec->pv_base;
 
@@ -4370,9 +4371,9 @@
         UWORD8 *pu1_buf = ps_mem_rec->pv_base;
 
         /* size of nmb ctxt */
-        WORD32 size = MAX_NMB * sizeof(mb_info_nmb_t);
+        WORD32 size = max_mb_cols * sizeof(mb_info_nmb_t);
 
-        UWORD32 nmb_cntr, subpel_buf_size;
+        WORD32 nmb_cntr, subpel_buf_size;
 
         /* init nmb info structure pointer in all proc ctxts */
         for (i = 0; i < MAX_PROCESS_CTXT; i++)
@@ -4390,7 +4391,7 @@
             mb_info_nmb_t* ps_mb_info_nmb =
                             &ps_codec->as_process[i].ps_nmb_info[0];
 
-            for (nmb_cntr = 0; nmb_cntr < MAX_NMB; nmb_cntr++)
+            for (nmb_cntr = 0; nmb_cntr < max_mb_cols; nmb_cntr++)
             {
                 ps_mb_info_nmb[nmb_cntr].pu1_best_sub_pel_buf = pu1_buf;
 
diff --git a/encoder/ih264e_defs.h b/encoder/ih264e_defs.h
index c7e2a87..aee270e 100644
--- a/encoder/ih264e_defs.h
+++ b/encoder/ih264e_defs.h
@@ -140,6 +140,11 @@
 #define MAX_REF_CNT  32
 
 /*****************************************************************************/
+/* Minimum size of inter prediction unit supported by encoder                */
+/*****************************************************************************/
+#define ENC_MIN_PU_SIZE     16
+
+/*****************************************************************************/
 /* Num cores releated defs                                                   */
 /*****************************************************************************/
 /**
@@ -156,7 +161,7 @@
  * Maximum process context sets
  * Used to stagger encoding of MAX_CTXT_SETS in parallel
  */
-#define MAX_CTXT_SETS   2
+#define MAX_CTXT_SETS   1
 /**
  * Maximum number of contexts
  * Kept as twice the number of threads, to make it easier to initialize the contexts
@@ -529,8 +534,6 @@
 #define MIN_RAW_BUFS_RGBA8888_COMP   1
 #define MIN_RAW_BUFS_420SP_COMP      2
 
-#define MAX_NMB 120
-
 /** Maximum number of active config paramter sets */
 #define MAX_ACTIVE_CONFIG_PARAMS 32
 
diff --git a/encoder/ih264e_encode.c b/encoder/ih264e_encode.c
index f131eb2..7651352 100644
--- a/encoder/ih264e_encode.c
+++ b/encoder/ih264e_encode.c
@@ -236,7 +236,7 @@
     ps_codec->i4_encode_api_call_cnt += 1;
 
     /* codec context selector */
-    ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+    ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
 
     /* reset status flags */
     ps_codec->ai4_pic_cnt[ctxt_sel] = -1;
diff --git a/encoder/ih264e_encode_header.c b/encoder/ih264e_encode_header.c
index 2a895b1..6c96e63 100644
--- a/encoder/ih264e_encode_header.c
+++ b/encoder/ih264e_encode_header.c
@@ -90,6 +90,7 @@
 #include "ih264e_encode_header.h"
 #include "ih264_common_tables.h"
 #include "ih264_macros.h"
+#include "ih264e_utils.h"
 
 
 /*****************************************************************************/
@@ -686,17 +687,8 @@
     }
 
     /* level */
-    ps_sps->u1_level_idc = ps_cfg->u4_max_level;
-//    i4_err_code = ih264e_get_level(ps_cfg, &level_idc);
-//    if (i4_err_code == IH264E_SUCCESS)
-//    {
-//        ps_sps->u1_level_idc = level_idc;
-//
-//    }
-//    else
-//    {
-//        return i4_err_code;
-//    }
+    ps_sps->u1_level_idc = MAX(ps_cfg->u4_max_level,
+                               (UWORD32)ih264e_get_min_level(ps_cfg->u4_max_wd, ps_cfg->u4_max_ht));
 
     /* constrained flags */
     /*
diff --git a/encoder/ih264e_process.c b/encoder/ih264e_process.c
index db960dc..ff6846a 100644
--- a/encoder/ih264e_process.c
+++ b/encoder/ih264e_process.c
@@ -138,7 +138,7 @@
 IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
 {
     /* choose between ping-pong process buffer set */
-    WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+    WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
 
     /* entropy ctxt */
     entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
@@ -308,7 +308,7 @@
     UWORD8  *pu1_entropy_map_curr;
 
     /* proc base idx */
-    WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1;
+    WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
 
     /* temp var */
     WORD32 i4_wd_mbs, i4_ht_mbs;
@@ -1037,7 +1037,7 @@
         s_job.i2_mb_y = ps_proc->i4_mb_y;
 
         /* proc base idx */
-        s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt & 1) ? (MAX_PROCESS_CTXT / 2): 0 ;
+        s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0;
 
         /* queue the job */
         error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
@@ -1182,8 +1182,8 @@
     i4_mb_y = ps_proc->i4_mb_y;
 
     /* Number of mbs processed in one loop of process function */
-    ps_proc->i4_nmb_ntrpy = (ps_proc->i4_wd_mbs > MAX_NMB) ? MAX_NMB : ps_proc->i4_wd_mbs;
-    ps_proc->u4_nmb_me = (ps_proc->i4_wd_mbs > MAX_NMB)? MAX_NMB : ps_proc->i4_wd_mbs;
+    ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs;
+    ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs;
 
     /* init buffer pointers */
     convert_uv_only = 1;
@@ -1371,10 +1371,12 @@
     /*********************************************************************/
 
     /* init mv buffer ptr */
-    ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
+    ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
+                     ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
 
     /* Init co-located mv buffer */
-    ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
+    ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
+                        ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
 
     if (i4_mb_y == 0)
     {
@@ -1382,7 +1384,8 @@
     }
     else
     {
-        ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
+        ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs *
+                                    ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
     }
 
     ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
@@ -1911,7 +1914,7 @@
     WORD32 luma_idx, chroma_idx, is_intra;
 
     /* temp variables */
-    WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1;
+    WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
 
     /* list of modes for evaluation */
     if (ps_proc->i4_slice_type == ISLICE)
@@ -2435,7 +2438,7 @@
             int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
 
             /* codec context selector */
-            WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+            WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
 
             volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
 
diff --git a/encoder/ih264e_utils.c b/encoder/ih264e_utils.c
index 6feb036..b8514a8 100644
--- a/encoder/ih264e_utils.c
+++ b/encoder/ih264e_utils.c
@@ -331,7 +331,7 @@
 
     /* Mark the skip flag   */
     i4_skip = 0;
-    ctxt_sel = ps_codec->i4_encode_api_call_cnt & 0x01;
+    ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
     ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] = i4_skip;
 
     /* Get a buffer to encode */
@@ -375,13 +375,15 @@
 *
 *******************************************************************************
 */
-WORD32 ih264e_get_min_level(WORD32 pic_size)
+WORD32 ih264e_get_min_level(WORD32 wd, WORD32 ht)
 {
     WORD32 lvl_idx = MAX_LEVEL, i;
-
+    WORD32 pic_size = wd * ht;
+    WORD32 max = MAX(wd, ht);
     for (i = 0; i < MAX_LEVEL; i++)
     {
-        if (pic_size <= gai4_ih264_max_luma_pic_size[i])
+        if ((pic_size <= gai4_ih264_max_luma_pic_size[i]) &&
+            (max <= gai4_ih264_max_wd_ht[i]))
         {
             lvl_idx = i;
             break;
@@ -645,7 +647,7 @@
     WORD32 mv_bank_size = 0;
 
     /* number of sub mb partitions possible */
-    WORD32 num_pu = num_luma_samples / (MIN_PU_SIZE * MIN_PU_SIZE);
+    WORD32 num_pu = num_luma_samples / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE);
 
     /* number of mbs */
     WORD32 num_mb = num_luma_samples / (MB_SIZE * MB_SIZE);
@@ -655,10 +657,10 @@
     mv_bank_size += num_mb * sizeof(WORD32);
 
     /* Size for pu_map */
-    mv_bank_size += num_pu;
+    mv_bank_size += ALIGN4(num_pu);
 
     /* Size for storing enc_pu_t for each PU */
-    mv_bank_size += num_pu * sizeof(enc_pu_t);
+    mv_bank_size += ALIGN4(num_pu * sizeof(enc_pu_t));
 
     return mv_bank_size;
 }
@@ -789,7 +791,7 @@
 
     /* num of luma samples */
     WORD32 num_luma_samples = ALIGN16(ps_codec->s_cfg.u4_wd)
-                    * ALIGN16(ps_codec->s_cfg.u4_ht);
+                            * ALIGN16(ps_codec->s_cfg.u4_ht);
 
     /* number of mb's & frame partitions */
     WORD32 num_pu, num_mb;
@@ -815,7 +817,7 @@
     /* compute MV bank size per picture */
     pic_mv_bank_size = ih264e_get_pic_mv_bank_size(num_luma_samples);
 
-    num_pu = num_luma_samples / (MIN_PU_SIZE * MIN_PU_SIZE);
+    num_pu = num_luma_samples / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE);
     num_mb = num_luma_samples / (MB_SIZE * MB_SIZE);
     i = 0;
     ps_mv_buf = ps_codec->pv_mv_bank_buf_base;
@@ -834,11 +836,13 @@
         }
 
         ps_mv_buf->pu4_mb_pu_cnt = (UWORD32 *) pu1_buf;
+        pu1_buf += num_mb * sizeof(WORD32);
 
-        ps_mv_buf->pu1_pic_pu_map = (pu1_buf + num_mb * sizeof(WORD32));
+        ps_mv_buf->pu1_pic_pu_map = pu1_buf;
+        pu1_buf += ALIGN4(num_pu);
 
-        ps_mv_buf->ps_pic_pu = (enc_pu_t *) (pu1_buf + num_mb * sizeof(WORD32)
-                        + num_pu);
+        ps_mv_buf->ps_pic_pu = (enc_pu_t *) (pu1_buf);
+        pu1_buf += ALIGN4(num_pu * sizeof(enc_pu_t));
 
         ret = ih264_buf_mgr_add((buf_mgr_t *) ps_codec->pv_mv_buf_mgr,
                                 ps_mv_buf, i);
@@ -850,7 +854,6 @@
             return error_status;
         }
 
-        pu1_buf += pic_mv_bank_size;
         ps_mv_buf++;
         i++;
     }
@@ -1321,7 +1324,7 @@
     UWORD32 u4_timestamp_low = ps_inp_buf->u4_timestamp_low;
 
     /* indices to access curr/prev frame info */
-    WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
+    WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
 
     /* curr pic type */
     PIC_TYPE_T *pic_type = &ps_codec->pic_type;
diff --git a/encoder/ih264e_utils.h b/encoder/ih264e_utils.h
index 912711f..27e37e8 100644
--- a/encoder/ih264e_utils.h
+++ b/encoder/ih264e_utils.h
@@ -91,8 +91,11 @@
 *  Gets the minimum level index and then gets corresponding level.
 *  Also used to ignore invalid levels like 2.3, 3.3 etc
 *
-* @param[in] level
-*  Level of the stream
+* @param[in] wd
+*  Width
+*
+* @param[in] ht
+*  Height
 *
 * @returns  Level index for a given level
 *
@@ -100,7 +103,7 @@
 *
 *******************************************************************************
 */
-WORD32 ih264e_get_min_level(WORD32 pic_size);
+WORD32 ih264e_get_min_level(WORD32 wd, WORD32 ht);
 
 /**
 *******************************************************************************
diff --git a/test/encoder/recon.c b/test/encoder/recon.c
index 7b347f3..d177a62 100644
--- a/test/encoder/recon.c
+++ b/test/encoder/recon.c
@@ -161,7 +161,7 @@
 
     /* All the pointers and dimensions are initialized here
      * to support change in resolution from the application */
-    luma_size = ALIGN16(ps_app_ctxt->u4_max_wd) * ALIGN16(ps_app_ctxt->u4_max_ht);
+    luma_size = ps_app_ctxt->u4_max_wd * ps_app_ctxt->u4_max_ht;
     chroma_size = (luma_size) / 4;
 
     ps_raw_buf->apv_bufs[0] = pu1_buf;