From 2ea0373b57f1a67c4578811b71cf9a52aeb0ee8f Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Fri, 19 Jul 2024 23:25:30 -0700
Subject: [PATCH] level 2 compatibility with LZ4F dictionary compression

LZ4_createCDict() doesn't know the compression level with which it will be used.
Consequently, it's prepared for the "standard" HC mode (i.e. levels 3+).
Unfortunately, the layout of the search tables for this mode is incompatible with level 2 (lz4mid).

Fixed, by making level 2 able to query lz4hc tables from an external dictionary state.
Also, disable the "copy tables" command when the active state is incompatible with the dictionary state.
---
 lib/lz4frame.c |  12 ++--
 lib/lz4hc.c    | 152 ++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 121 insertions(+), 43 deletions(-)

diff --git a/lib/lz4frame.c b/lib/lz4frame.c
index aae5b35f1..f89c05579 100644
--- a/lib/lz4frame.c
+++ b/lib/lz4frame.c
@@ -126,8 +126,9 @@ static void* LZ4F_malloc(size_t s, LZ4F_CustomMem cmem)
 
 static void LZ4F_free(void* p, LZ4F_CustomMem cmem)
 {
-    /* custom malloc defined : use it */
+    if (p == NULL) return;
     if (cmem.customFree != NULL) {
+        /* custom allocation defined : use it */
         cmem.customFree(cmem.opaqueState, p);
         return;
     }
@@ -547,18 +548,19 @@ LZ4F_createCDict_advanced(LZ4F_CustomMem cmem, const void* dictBuffer, size_t di
         dictSize = 64 KB;
     }
     cdict->dictContent = LZ4F_malloc(dictSize, cmem);
+    /* note: using @cmem to allocate => can't use default create */
     cdict->fastCtx = (LZ4_stream_t*)LZ4F_malloc(sizeof(LZ4_stream_t), cmem);
-    if (cdict->fastCtx)
-        LZ4_initStream(cdict->fastCtx, sizeof(LZ4_stream_t));
     cdict->HCCtx = (LZ4_streamHC_t*)LZ4F_malloc(sizeof(LZ4_streamHC_t), cmem);
-    if (cdict->HCCtx)
-        LZ4_initStreamHC(cdict->HCCtx, sizeof(LZ4_streamHC_t));
     if (!cdict->dictContent || !cdict->fastCtx || !cdict->HCCtx) {
         LZ4F_freeCDict(cdict);
         return NULL;
     }
     memcpy(cdict->dictContent, dictStart, dictSize);
+    LZ4_initStream(cdict->fastCtx, sizeof(LZ4_stream_t));
     LZ4_loadDictSlow(cdict->fastCtx, (const char*)cdict->dictContent, (int)dictSize);
+    LZ4_initStreamHC(cdict->HCCtx, sizeof(LZ4_streamHC_t));
+    /* note: we don't know at this point which compression level is going to be used
+     * as a consequence, HCCtx is created for the more common HC mode */
     LZ4_setCompressionLevel(cdict->HCCtx, LZ4HC_CLEVEL_DEFAULT);
     LZ4_loadDictHC(cdict->HCCtx, (const char*)cdict->dictContent, (int)dictSize);
     return cdict;
diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index f709123e5..588919bae 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -80,9 +80,40 @@ typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive;
 /*===   Macros   ===*/
 #define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
 #define MAX(a,b)   ( (a) > (b) ? (a) : (b) )
-#define DELTANEXTU16(table, pos) table[(U16)(pos)]   /* faster */
-/* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */
-#define UPDATABLE(ip, op, anchor) &ip, &op, &anchor
+
+
+/*===   Levels definition   ===*/
+typedef enum { lz4mid, lz4hc, lz4opt } lz4hc_strat_e;
+typedef struct {
+    lz4hc_strat_e strat;
+    int nbSearches;
+    U32 targetLength;
+} cParams_t;
+static const cParams_t k_clTable[LZ4HC_CLEVEL_MAX+1] = {
+    { lz4mid,    2, 16 },  /* 0, unused */
+    { lz4mid,    2, 16 },  /* 1, unused */
+    { lz4mid,    2, 16 },  /* 2 */
+    { lz4hc,     4, 16 },  /* 3 */
+    { lz4hc,     8, 16 },  /* 4 */
+    { lz4hc,    16, 16 },  /* 5 */
+    { lz4hc,    32, 16 },  /* 6 */
+    { lz4hc,    64, 16 },  /* 7 */
+    { lz4hc,   128, 16 },  /* 8 */
+    { lz4hc,   256, 16 },  /* 9 */
+    { lz4opt,   96, 64 },  /*10==LZ4HC_CLEVEL_OPT_MIN*/
+    { lz4opt,  512,128 },  /*11 */
+    { lz4opt,16384,LZ4_OPT_NUM },  /* 12==LZ4HC_CLEVEL_MAX */
+};
+
+static cParams_t LZ4HC_getCLevelParams(int cLevel)
+{
+    /* note : clevel convention is a bit different from lz4frame,
+     * possibly something worth revisiting for consistency */
+    if (cLevel < 1)
+        cLevel = LZ4HC_CLEVEL_DEFAULT;
+    cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
+    return k_clTable[cLevel];
+}
 
 
 /*===   Hashing   ===*/
@@ -193,6 +224,11 @@ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
     return back;
 }
 
+/*===   Chain table updates   ===*/
+#define DELTANEXTU16(table, pos) table[(U16)(pos)]   /* faster */
+/* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */
+#define UPDATABLE(ip, op, anchor) &ip, &op, &anchor
+
 
 /**************************************
 *  Init
@@ -324,6 +360,63 @@ typedef struct {
     int back;  /* negative value */
 } LZ4HC_match_t;
 
+LZ4HC_match_t LZ4HC_searchExtDict(const BYTE* ip, U32 ipIndex,
+        const BYTE* const iLowLimit, const BYTE* const iHighLimit,
+        const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex,
+        int currentBestML, int nbAttempts)
+{
+    size_t const lDictEndIndex = (size_t)(dictCtx->end - dictCtx->prefixStart) + dictCtx->dictLimit;
+    U32 lDictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
+    U32 matchIndex = lDictMatchIndex + gDictEndIndex - (U32)lDictEndIndex;
+    int offset = 0, sBack = 0;
+    assert(lDictEndIndex <= 1 GB);
+    if (lDictMatchIndex>0)
+        DEBUGLOG(7, "lDictEndIndex = %zu, lDictMatchIndex = %u", lDictEndIndex, lDictMatchIndex);
+    while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) {
+        const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + lDictMatchIndex;
+
+        if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+            int mlt;
+            int back = 0;
+            const BYTE* vLimit = ip + (lDictEndIndex - lDictMatchIndex);
+            if (vLimit > iHighLimit) vLimit = iHighLimit;
+            mlt = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH;
+            back = (ip > iLowLimit) ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->prefixStart) : 0;
+            mlt -= back;
+            if (mlt > currentBestML) {
+                currentBestML = mlt;
+                offset = (int)(ipIndex - matchIndex);
+                sBack = back;
+                DEBUGLOG(7, "found match of length %i within extDictCtx", currentBestML);
+        }   }
+
+        {   U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, lDictMatchIndex);
+            lDictMatchIndex -= nextOffset;
+            matchIndex -= nextOffset;
+    }   }
+
+    {   LZ4HC_match_t md;
+        md.len = currentBestML;
+        md.off = offset;
+        md.back = sBack;
+        return md;
+    }
+}
+
+typedef LZ4HC_match_t (*LZ4MID_searchIntoDict_f)(const BYTE* ip, U32 ipIndex,
+        const BYTE* const iHighLimit,
+        const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex);
+
+static LZ4HC_match_t LZ4MID_searchHCDict(const BYTE* ip, U32 ipIndex,
+        const BYTE* const iHighLimit,
+        const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex)
+{
+    return LZ4HC_searchExtDict(ip,ipIndex,
+                            ip, iHighLimit,
+                            dictCtx, gDictEndIndex,
+                            MINMATCH-1, 2);
+}
+
 static LZ4HC_match_t LZ4MID_searchExtDict(const BYTE* ip, U32 ipIndex,
         const BYTE* const iHighLimit,
         const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex)
@@ -418,6 +511,14 @@ LZ4MID_fillHTable (LZ4HC_CCtx_internal* cctx, const void* dict, size_t size)
     cctx->nextToUpdate = target;
 }
 
+static LZ4MID_searchIntoDict_f select_searchDict_function(const LZ4HC_CCtx_internal* dictCtx)
+{
+    if (dictCtx == NULL) return NULL;
+    if (LZ4HC_getCLevelParams(dictCtx->compressionLevel).strat == lz4mid)
+        return LZ4MID_searchExtDict;
+    return LZ4MID_searchHCDict;
+}
+
 static int LZ4MID_compress (
     LZ4HC_CCtx_internal* const ctx,
     const char* const src,
@@ -445,6 +546,7 @@ static int LZ4MID_compress (
     const BYTE* const dictStart = ctx->dictStart;
     const U32 dictIdx = ctx->lowLimit;
     const U32 gDictEndIndex = ctx->lowLimit;
+    const LZ4MID_searchIntoDict_f searchIntoDict = select_searchDict_function(ctx->dictCtx);
     unsigned matchLength;
     unsigned matchDistance;
 
@@ -550,7 +652,7 @@ static int LZ4MID_compress (
         if ( (dict == usingDictCtxHc)
           && (ipIndex - gDictEndIndex < LZ4_DISTANCE_MAX - 8) ) {
             /* search a match in dictionary */
-            LZ4HC_match_t dMatch = LZ4MID_searchExtDict(ip, ipIndex,
+            LZ4HC_match_t dMatch = searchIntoDict(ip, ipIndex,
                     matchlimit,
                     ctx->dictCtx, gDictEndIndex);
             if (dMatch.len >= MINMATCH) {
@@ -1269,39 +1371,6 @@ static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx,
     const dictCtx_directive dict,
     const HCfavor_e favorDecSpeed);
 
-
-typedef enum { lz4mid, lz4hc, lz4opt } lz4hc_strat_e;
-typedef struct {
-    lz4hc_strat_e strat;
-    int nbSearches;
-    U32 targetLength;
-} cParams_t;
-static const cParams_t k_clTable[LZ4HC_CLEVEL_MAX+1] = {
-    { lz4mid,    2, 16 },  /* 0, unused */
-    { lz4mid,    2, 16 },  /* 1, unused */
-    { lz4mid,    2, 16 },  /* 2 */
-    { lz4hc,     4, 16 },  /* 3 */
-    { lz4hc,     8, 16 },  /* 4 */
-    { lz4hc,    16, 16 },  /* 5 */
-    { lz4hc,    32, 16 },  /* 6 */
-    { lz4hc,    64, 16 },  /* 7 */
-    { lz4hc,   128, 16 },  /* 8 */
-    { lz4hc,   256, 16 },  /* 9 */
-    { lz4opt,   96, 64 },  /*10==LZ4HC_CLEVEL_OPT_MIN*/
-    { lz4opt,  512,128 },  /*11 */
-    { lz4opt,16384,LZ4_OPT_NUM },  /* 12==LZ4HC_CLEVEL_MAX */
-};
-
-static cParams_t LZ4HC_getCLevelParams(int cLevel)
-{
-    /* note : clevel convention is a bit different from lz4frame,
-     * possibly something worth revisiting for consistency */
-    if (cLevel < 1)
-        cLevel = LZ4HC_CLEVEL_DEFAULT;
-    cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
-    return k_clTable[cLevel];
-}
-
 LZ4_FORCE_INLINE int
 LZ4HC_compress_generic_internal (
             LZ4HC_CCtx_internal* const ctx,
@@ -1363,6 +1432,13 @@ LZ4HC_compress_generic_noDictCtx (
     return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx);
 }
 
+static int isStateCompatible(const LZ4HC_CCtx_internal* ctx1, const LZ4HC_CCtx_internal* ctx2)
+{
+    int const isMid1 = LZ4HC_getCLevelParams(ctx1->compressionLevel).strat == lz4mid;
+    int const isMid2 = LZ4HC_getCLevelParams(ctx2->compressionLevel).strat == lz4mid;
+    return !(isMid1 ^ isMid2);
+}
+
 static int
 LZ4HC_compress_generic_dictCtx (
         LZ4HC_CCtx_internal* const ctx,
@@ -1379,7 +1455,7 @@ LZ4HC_compress_generic_dictCtx (
     if (position >= 64 KB) {
         ctx->dictCtx = NULL;
         return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit);
-    } else if (position == 0 && *srcSizePtr > 4 KB) {
+    } else if (position == 0 && *srcSizePtr > 4 KB && isStateCompatible(ctx, ctx->dictCtx)) {
         LZ4_memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal));
         LZ4HC_setExternalDict(ctx, (const BYTE *)src);
         ctx->compressionLevel = (short)cLevel;