merged strats
diff --git a/lib/zstdhc.c b/lib/zstdhc.c
index fb10b4b..efe0c2b 100644
--- a/lib/zstdhc.c
+++ b/lib/zstdhc.c
@@ -385,7 +385,261 @@
 }
 
 
-size_t ZSTD_HC_compressBlock_btLazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+/* ***********************
+*  Hash Chain
+*************************/
+
+/* Update chains up to ip (excluded) */
+static U32 ZSTD_HC_insertAndFindFirstIndex  (ZSTD_HC_CCtx* zc, const BYTE* ip, U32 mls)
+{
+    U32* const hashTable  = zc->hashTable;
+    const U32 hashLog = zc->params.hashLog;
+    U32* const chainTable = zc->chainTable;
+    const U32 chainMask = (1 << zc->params.chainLog) - 1;
+    const BYTE* const base = zc->base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = zc->nextToUpdate;
+
+    while(idx < target)
+    {
+        size_t h = ZSTD_HC_hashPtr(base+idx, hashLog, mls);
+        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
+        hashTable[h] = idx;
+        idx++;
+    }
+
+    zc->nextToUpdate = target;
+    return hashTable[ZSTD_HC_hashPtr(ip, hashLog, mls)];
+}
+
+
+FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
+size_t ZSTD_HC_insertAndFindBestMatch (
+                        ZSTD_HC_CCtx* zc,   /* Index table will be updated */
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 maxNbAttempts, const U32 matchLengthSearch)
+{
+    U32* const chainTable = zc->chainTable;
+    const U32 chainSize = (1 << zc->params.chainLog);
+    const U32 chainMask = chainSize-1;
+    const BYTE* const base = zc->base;
+    const BYTE* const dictBase = zc->dictBase;
+    const U32 dictLimit = zc->dictLimit;
+    const U32 maxDistance = (1 << zc->params.windowLog);
+    const U32 lowLimit = (zc->lowLimit + maxDistance > (U32)(ip-base)) ? zc->lowLimit : (U32)(ip - base) - (maxDistance - 1);
+    U32 matchIndex;
+    const BYTE* match;
+    int nbAttempts=maxNbAttempts;
+    size_t ml=0;
+
+    /* HC4 match finder */
+    matchIndex = ZSTD_HC_insertAndFindFirstIndex (zc, ip, matchLengthSearch);
+
+    while ((matchIndex>lowLimit) && (nbAttempts))
+    {
+        nbAttempts--;
+        if (matchIndex >= dictLimit)
+        {
+            match = base + matchIndex;
+            if ( (match[ml] == ip[ml])
+              && (MEM_read32(match) == MEM_read32(ip)) )   /* ensures minimum match of 4 */
+            {
+                const size_t mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
+                if (mlt > ml)
+                //if (((int)(4*mlt) - (int)ZSTD_highbit((U32)(ip-match)+1)) > ((int)(4*ml) - (int)ZSTD_highbit((U32)((*offsetPtr)+1))))
+                {
+                    ml = mlt; *offsetPtr = ip-match;
+                    if (ip+ml >= iLimit) break;
+                }
+            }
+        }
+        else
+        {
+            match = dictBase + matchIndex;
+            if (MEM_read32(match) == MEM_read32(ip))
+            {
+                size_t mlt;
+                const BYTE* vLimit = ip + (dictLimit - matchIndex);
+                if (vLimit > iLimit) vLimit = iLimit;
+                mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
+                if ((ip+mlt == vLimit) && (vLimit < iLimit))
+                    mlt += ZSTD_count(ip+mlt, base+dictLimit, iLimit);
+                if (mlt > ml) { ml = mlt; *offsetPtr = (ip-base) - matchIndex; }
+            }
+        }
+
+        if (base + matchIndex <= ip - chainSize) break;
+        matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
+    }
+
+    return ml;
+}
+
+
+FORCE_INLINE size_t ZSTD_HC_insertAndFindBestMatch_selectMLS (
+                        ZSTD_HC_CCtx* zc,   /* Index table will be updated */
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 maxNbAttempts, const U32 matchLengthSearch)
+{
+    switch(matchLengthSearch)
+    {
+    default :
+    case 4 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
+    case 5 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
+    case 6 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
+    }
+}
+
+
+#if 1
+
+FORCE_INLINE
+size_t ZSTD_HC_compressBlock_lazy_generic(ZSTD_HC_CCtx* ctx,
+                                     void* dst, size_t maxDstSize, const void* src, size_t srcSize,
+                                     const U32 searchMethod, const U32 deep)   /* 0 : hc; 1 : bt */
+{
+    seqStore_t* seqStorePtr = &(ctx->seqStore);
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+
+    size_t offset_2=REPCODE_STARTVALUE, offset_1=REPCODE_STARTVALUE;
+    const U32 maxSearches = 1 << ctx->params.searchLog;
+    const U32 mls = ctx->params.searchLength;
+
+    typedef size_t (*searchMax_f)(ZSTD_HC_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
+                        size_t* offsetPtr,
+                        U32 maxNbAttempts, U32 matchLengthSearch);
+    searchMax_f searchMax = searchMethod ? ZSTD_HC_BtFindBestMatch_selectMLS : ZSTD_HC_insertAndFindBestMatch_selectMLS;
+
+    /* init */
+    ZSTD_resetSeqStore(seqStorePtr);
+    if (((ip-ctx->base) - ctx->dictLimit) < REPCODE_STARTVALUE) ip += REPCODE_STARTVALUE;
+
+    /* Match Loop */
+    while (ip <= ilimit)
+    {
+        size_t matchLength;
+        size_t offset=999999;
+        const BYTE* start;
+
+        /* try to find a first match */
+        if (MEM_read32(ip) == MEM_read32(ip - offset_2))
+        {
+            /* repcode : we take it*/
+            size_t offtmp = offset_2;
+            size_t litLength = ip - anchor;
+            matchLength = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_2, iend);
+            offset_2 = offset_1;
+            offset_1 = offtmp;
+            ZSTD_storeSeq(seqStorePtr, litLength, anchor, 0, matchLength);
+            ip += matchLength+MINMATCH;
+            anchor = ip;
+            continue;
+        }
+
+        offset_2 = offset_1;
+        matchLength = searchMax(ctx, ip, iend, &offset, maxSearches, mls);
+        if (!matchLength) { ip++; continue; }
+
+        /* let's try to find a better solution */
+        start = ip;
+
+        while (ip<ilimit)
+        {
+            ip ++;
+            if (MEM_read32(ip) == MEM_read32(ip - offset_1))
+            {
+                size_t ml2 = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH;
+                int gain2 = (int)(ml2 * 3);
+                int gain1 = (int)(matchLength*3 - ZSTD_highbit((U32)offset+1) + 1);
+                if (gain2 > gain1)
+                    matchLength = ml2, offset = 0, start = ip;
+            }
+            {
+                size_t offset2=999999;
+                size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+                int gain2 = (int)(ml2*(3+deep) - ZSTD_highbit((U32)offset2+1));   /* raw approx */
+                int gain1 = (int)(matchLength*(3+deep) - ZSTD_highbit((U32)offset+1) + (3+deep));
+                if (gain2 > gain1)
+                {
+                    matchLength = ml2, offset = offset2, start = ip;
+                    continue;   /* search a better one */
+                }
+            }
+
+            /* let's find an even better one */
+            if (deep && (ip<ilimit))
+            {
+                ip ++;
+                if (MEM_read32(ip) == MEM_read32(ip - offset_1))
+                {
+                    size_t ml2 = ZSTD_count(ip+MINMATCH, ip+MINMATCH-offset_1, iend) + MINMATCH;
+                    int gain2 = (int)(ml2 * 4);
+                    int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 1);
+                    if (gain2 > gain1)
+                        matchLength = ml2, offset = 0, start = ip;
+                }
+                {
+                    size_t offset2=999999;
+                    size_t ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+                    int gain2 = (int)(ml2*4 - ZSTD_highbit((U32)offset2+1));   /* raw approx */
+                    int gain1 = (int)(matchLength*4 - ZSTD_highbit((U32)offset+1) + 7);
+                    if (gain2 > gain1)
+                    {
+                        matchLength = ml2, offset = offset2, start = ip;
+                        continue;
+                    }
+                }
+            }
+            break;  /* nothing found : store previous solution */
+        }
+
+        /* store sequence */
+        {
+            size_t litLength = start - anchor;
+            if (offset) offset_1 = offset;
+            ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, matchLength-MINMATCH);
+            ip = start + matchLength;
+            anchor = ip;
+        }
+
+    }
+
+    /* Last Literals */
+    {
+        size_t lastLLSize = iend - anchor;
+        memcpy(seqStorePtr->lit, anchor, lastLLSize);
+        seqStorePtr->lit += lastLLSize;
+    }
+
+    /* Final compression stage */
+    return ZSTD_compressSequences((BYTE*)dst, maxDstSize,
+                                  seqStorePtr, srcSize);
+}
+
+size_t ZSTD_HC_compressBlock_btlazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_HC_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 1, 1);
+}
+
+size_t ZSTD_HC_compressBlock_hclazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_HC_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 1);
+}
+
+size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+{
+    return ZSTD_HC_compressBlock_lazy_generic(ctx, dst, maxDstSize, src, srcSize, 0, 0);
+}
+
+#else
+
+size_t ZSTD_HC_compressBlock_btlazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
 {
     seqStore_t* seqStorePtr = &(ctx->seqStore);
     const BYTE* const istart = (const BYTE*)src;
@@ -505,116 +759,7 @@
 }
 
 
-
-/* ***********************
-*  Hash Chain
-*************************/
-
-/* Update chains up to ip (excluded) */
-static U32 ZSTD_HC_insertAndFindFirstIndex  (ZSTD_HC_CCtx* zc, const BYTE* ip, U32 mls)
-{
-    U32* const hashTable  = zc->hashTable;
-    const U32 hashLog = zc->params.hashLog;
-    U32* const chainTable = zc->chainTable;
-    const U32 chainMask = (1 << zc->params.chainLog) - 1;
-    const BYTE* const base = zc->base;
-    const U32 target = (U32)(ip - base);
-    U32 idx = zc->nextToUpdate;
-
-    while(idx < target)
-    {
-        size_t h = ZSTD_HC_hashPtr(base+idx, hashLog, mls);
-        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
-        hashTable[h] = idx;
-        idx++;
-    }
-
-    zc->nextToUpdate = target;
-    return hashTable[ZSTD_HC_hashPtr(ip, hashLog, mls)];
-}
-
-
-FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
-size_t ZSTD_HC_insertAndFindBestMatch (
-                        ZSTD_HC_CCtx* zc,   /* Index table will be updated */
-                        const BYTE* const ip, const BYTE* const iLimit,
-                        size_t* offsetPtr,
-                        const U32 maxNbAttempts, const U32 matchLengthSearch)
-{
-    U32* const chainTable = zc->chainTable;
-    const U32 chainSize = (1 << zc->params.chainLog);
-    const U32 chainMask = chainSize-1;
-    const BYTE* const base = zc->base;
-    const BYTE* const dictBase = zc->dictBase;
-    const U32 dictLimit = zc->dictLimit;
-    const U32 maxDistance = (1 << zc->params.windowLog);
-    const U32 lowLimit = (zc->lowLimit + maxDistance > (U32)(ip-base)) ? zc->lowLimit : (U32)(ip - base) - (maxDistance - 1);
-    U32 matchIndex;
-    const BYTE* match;
-    int nbAttempts=maxNbAttempts;
-    size_t ml=0;
-
-    /* HC4 match finder */
-    matchIndex = ZSTD_HC_insertAndFindFirstIndex (zc, ip, matchLengthSearch);
-
-    while ((matchIndex>lowLimit) && (nbAttempts))
-    {
-        nbAttempts--;
-        if (matchIndex >= dictLimit)
-        {
-            match = base + matchIndex;
-            if ( (match[ml] == ip[ml])
-              && (MEM_read32(match) == MEM_read32(ip)) )   /* ensures minimum match of 4 */
-            {
-                const size_t mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH;
-                if (mlt > ml)
-                //if (((int)(4*mlt) - (int)ZSTD_highbit((U32)(ip-match)+1)) > ((int)(4*ml) - (int)ZSTD_highbit((U32)((*offsetPtr)+1))))
-                {
-                    ml = mlt; *offsetPtr = ip-match;
-                    if (ip+ml >= iLimit) break;
-                }
-            }
-        }
-        else
-        {
-            match = dictBase + matchIndex;
-            if (MEM_read32(match) == MEM_read32(ip))
-            {
-                size_t mlt;
-                const BYTE* vLimit = ip + (dictLimit - matchIndex);
-                if (vLimit > iLimit) vLimit = iLimit;
-                mlt = ZSTD_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH;
-                if ((ip+mlt == vLimit) && (vLimit < iLimit))
-                    mlt += ZSTD_count(ip+mlt, base+dictLimit, iLimit);
-                if (mlt > ml) { ml = mlt; *offsetPtr = (ip-base) - matchIndex; }
-            }
-        }
-
-        if (base + matchIndex <= ip - chainSize) break;
-        matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
-    }
-
-    return ml;
-}
-
-
-FORCE_INLINE size_t ZSTD_HC_insertAndFindBestMatch_selectMLS (
-                        ZSTD_HC_CCtx* zc,   /* Index table will be updated */
-                        const BYTE* ip, const BYTE* const iLimit,
-                        size_t* offsetPtr,
-                        const U32 maxNbAttempts, const U32 matchLengthSearch)
-{
-    switch(matchLengthSearch)
-    {
-    default :
-    case 4 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
-    case 5 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
-    case 6 : return ZSTD_HC_insertAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
-    }
-}
-
-
-size_t ZSTD_HC_compressBlock_lazydeep(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
+size_t ZSTD_HC_compressBlock_hclazy2(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
 {
     seqStore_t* seqStorePtr = &(ctx->seqStore);
     const BYTE* const istart = (const BYTE*)src;
@@ -733,7 +878,6 @@
                                   seqStorePtr, srcSize);
 }
 
-
 size_t ZSTD_HC_compressBlock_lazy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
 {
     seqStore_t* seqStorePtr = &(ctx->seqStore);
@@ -833,6 +977,11 @@
 }
 
 
+#endif
+
+
+
+
 size_t ZSTD_HC_compressBlock_greedy(ZSTD_HC_CCtx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
 {
     seqStore_t* seqStorePtr = &(ctx->seqStore);
@@ -923,10 +1072,10 @@
         return ZSTD_HC_compressBlock_greedy;
     case ZSTD_HC_lazy:
         return ZSTD_HC_compressBlock_lazy;
-    case ZSTD_HC_lazydeep:
-        return ZSTD_HC_compressBlock_lazydeep;
+    case ZSTD_HC_hclazy2:
+        return ZSTD_HC_compressBlock_hclazy2;
     case ZSTD_HC_btlazy2:
-        return ZSTD_HC_compressBlock_btLazy2;
+        return ZSTD_HC_compressBlock_btlazy2;
     }
 }
 
diff --git a/lib/zstdhc_static.h b/lib/zstdhc_static.h
index 1525b35..b49aef2 100644
--- a/lib/zstdhc_static.h
+++ b/lib/zstdhc_static.h
@@ -45,7 +45,9 @@
 /* *************************************
 *  Types
 ***************************************/
-typedef enum { ZSTD_HC_greedy, ZSTD_HC_lazy, ZSTD_HC_lazydeep, ZSTD_HC_btlazy2 } ZSTD_HC_strategy;
+/** from faster to stronger */
+typedef enum { ZSTD_HC_greedy, ZSTD_HC_lazy, ZSTD_HC_hclazy2, ZSTD_HC_btlazy2 } ZSTD_HC_strategy;
+
 typedef struct
 {
     U32 windowLog;     /* largest match distance : impact decompression buffer size */
@@ -53,7 +55,7 @@
     U32 hashLog;       /* dispatch table : larger == more memory, faster*/
     U32 searchLog;     /* nb of searches : larger == more compression, slower*/
     U32 searchLength;  /* size of matches : larger == faster decompression */
-    ZSTD_HC_strategy strategy;   /* faster to stronger : greedy, lazy, lazydeep, btlazy2 */
+    ZSTD_HC_strategy strategy;
 } ZSTD_HC_parameters;
 
 /* parameters boundaries */
@@ -110,11 +112,11 @@
     { 21, 19, 20,  4,  5, ZSTD_HC_lazy     },  /* level  8 */
     { 21, 19, 20,  5,  5, ZSTD_HC_lazy     },  /* level  9 */
     { 21, 20, 20,  5,  5, ZSTD_HC_lazy     },  /* level 10 */
-    { 21, 20, 20,  5,  5, ZSTD_HC_lazydeep },  /* level 11 */
-    { 22, 20, 22,  5,  5, ZSTD_HC_lazydeep },  /* level 12 */
-    { 22, 20, 22,  6,  5, ZSTD_HC_lazydeep },  /* level 13 */
-    { 22, 21, 22,  6,  5, ZSTD_HC_lazydeep },  /* level 14 */
-    { 22, 21, 22,  6,  5, ZSTD_HC_lazydeep },  /* level 15 */
+    { 21, 20, 20,  5,  5, ZSTD_HC_hclazy2  },  /* level 11 */
+    { 22, 20, 22,  5,  5, ZSTD_HC_hclazy2  },  /* level 12 */
+    { 22, 20, 22,  6,  5, ZSTD_HC_hclazy2  },  /* level 13 */
+    { 22, 21, 22,  6,  5, ZSTD_HC_hclazy2  },  /* level 14 */
+    { 22, 21, 22,  6,  5, ZSTD_HC_hclazy2  },  /* level 15 */
     { 22, 21, 22,  4,  5, ZSTD_HC_btlazy2  },  /* level 16 */
     { 23, 23, 23,  4,  5, ZSTD_HC_btlazy2  },  /* level 17 */
     { 23, 23, 23,  5,  5, ZSTD_HC_btlazy2  },  /* level 18 */
diff --git a/programs/paramgrill.c b/programs/paramgrill.c
index 40033a6..b133a94 100644
--- a/programs/paramgrill.c
+++ b/programs/paramgrill.c
@@ -429,7 +429,7 @@
 }
 
 
-const char* g_stratName[] = { "ZSTD_HC_greedy  ", "ZSTD_HC_lazy    ", "ZSTD_HC_lazydeep", "ZSTD_HC_btlazy2 " };
+const char* g_stratName[] = { "ZSTD_HC_greedy ", "ZSTD_HC_lazy   ", "ZSTD_HC_hclazy2", "ZSTD_HC_btlazy2" };
 
 static void BMK_printWinner(FILE* f, U32 cLevel, BMK_result_t result, ZSTD_HC_parameters params, size_t srcSize)
 {