upgrade bundled zstd to 1.4.4

2020-03-08 21:01:54 +01:00 · 2020-03-08 21:01:54 +01:00 · 99f72beaa5
parent 108c590c02
commit 99f72beaa5
54 changed files with 5753 additions and 3257 deletions
--- a/setup_compress.py
+++ b/setup_compress.py
@ -26,6 +26,8 @@ zstd_sources = [
    'lib/compress/hist.c',
    'lib/compress/huf_compress.c',
    'lib/compress/zstd_compress.c',
+    'lib/compress/zstd_compress_literals.c',
+    'lib/compress/zstd_compress_sequences.c',
    'lib/compress/zstd_double_fast.c',
    'lib/compress/zstd_fast.c',
    'lib/compress/zstd_lazy.c',
--- a/src/borg/algorithms/zstd/lib/common/bitstream.h
+++ b/src/borg/algorithms/zstd/lib/common/bitstream.h
@ -57,6 +57,8 @@ extern "C" {
 =========================================*/
 #if defined(__BMI__) && defined(__GNUC__)
 #  include <immintrin.h>   /* support for bextr (experimental) */
+#elif defined(__ICCARM__)
+#  include <intrinsics.h>
 #endif

 #define STREAM_ACCUMULATOR_MIN_32  25
@ -162,7 +164,9 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
        _BitScanReverse ( &r, val );
        return (unsigned) r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
-        return 31 - __builtin_clz (val);
+        return __builtin_clz (val) ^ 31;
+#   elif defined(__ICCARM__)    /* IAR Intrinsic */
+        return 31 - __CLZ(val);
 #   else   /* Software version */
        static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
                                                 11, 14, 16, 18, 22, 25,  3, 30,
@ -240,9 +244,9 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
 {
    size_t const nbBytes = bitC->bitPos >> 3;
    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    assert(bitC->ptr <= bitC->endPtr);
    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
    bitC->ptr += nbBytes;
-    assert(bitC->ptr <= bitC->endPtr);
    bitC->bitPos &= 7;
    bitC->bitContainer >>= nbBytes*8;
 }
@ -256,6 +260,7 @@ MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
 {
    size_t const nbBytes = bitC->bitPos >> 3;
    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    assert(bitC->ptr <= bitC->endPtr);
    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
    bitC->ptr += nbBytes;
    if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
--- a/src/borg/algorithms/zstd/lib/common/compiler.h
+++ b/src/borg/algorithms/zstd/lib/common/compiler.h
@ -23,7 +23,7 @@
 #  define INLINE_KEYWORD
 #endif

-#if defined(__GNUC__)
+#if defined(__GNUC__) || defined(__ICCARM__)
 #  define FORCE_INLINE_ATTR __attribute__((always_inline))
 #elif defined(_MSC_VER)
 #  define FORCE_INLINE_ATTR __forceinline
@ -40,7 +40,7 @@

 /**
 * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
- * parameters. They must be inlined for the compiler to elimininate the constant
+ * parameters. They must be inlined for the compiler to eliminate the constant
 * branches.
 */
 #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
@ -61,11 +61,18 @@
 #  define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
 #endif

+/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
+#if defined(__GNUC__)
+#  define UNUSED_ATTR __attribute__((unused))
+#else
+#  define UNUSED_ATTR
+#endif
+
 /* force no inlining */
 #ifdef _MSC_VER
 #  define FORCE_NOINLINE static __declspec(noinline)
 #else
-#  ifdef __GNUC__
+#  if defined(__GNUC__) || defined(__ICCARM__)
 #    define FORCE_NOINLINE static __attribute__((__noinline__))
 #  else
 #    define FORCE_NOINLINE static
@ -76,7 +83,7 @@
 #ifndef __has_attribute
  #define __has_attribute(x) 0  /* Compatibility with non-clang compilers. */
 #endif
-#if defined(__GNUC__)
+#if defined(__GNUC__) || defined(__ICCARM__)
 #  define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
 #else
 #  define TARGET_ATTRIBUTE(target)
@ -127,6 +134,18 @@
    }                                     \
 }

+/* vectorization
+ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
+#if !defined(__clang__) && defined(__GNUC__)
+#  if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
+#    define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
+#  else
+#    define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
+#  endif
+#else
+#  define DONT_VECTORIZE
+#endif
+
 /* disable warnings */
 #ifdef _MSC_VER    /* Visual Studio */
 #  include <intrin.h>                    /* For Visual 2005 */
--- a/src/borg/algorithms/zstd/lib/common/fse.h
+++ b/src/borg/algorithms/zstd/lib/common/fse.h
@ -308,7 +308,7 @@ If there is an error, the function will return an error code, which can be teste
 *******************************************/
 /* FSE buffer bounds */
 #define FSE_NCOUNTBOUND 512
-#define FSE_BLOCKBOUND(size) (size + (size>>7))
+#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
 #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */

 /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
@ -358,7 +358,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size
 typedef enum {
   FSE_repeat_none,  /**< Cannot use the previous table */
   FSE_repeat_check, /**< Can use the previous table but it must be checked */
-   FSE_repeat_valid  /**< Can use the previous table and it is asumed to be valid */
+   FSE_repeat_valid  /**< Can use the previous table and it is assumed to be valid */
 } FSE_repeat;

 /* *****************************************
--- a/src/borg/algorithms/zstd/lib/common/fse_decompress.c
+++ b/src/borg/algorithms/zstd/lib/common/fse_decompress.c
@ -52,7 +52,9 @@
 #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */

 /* check and forward error code */
+#ifndef CHECK_F
 #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
+#endif


 /* **************************************************************
--- a/src/borg/algorithms/zstd/lib/common/mem.h
+++ b/src/borg/algorithms/zstd/lib/common/mem.h
@ -47,6 +47,79 @@ extern "C" {
 #define MEM_STATIC_ASSERT(c)   { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }

+/* detects whether we are being compiled under msan */
+#if defined (__has_feature)
+#  if __has_feature(memory_sanitizer)
+#    define MEMORY_SANITIZER 1
+#  endif
+#endif
+
+#if defined (MEMORY_SANITIZER)
+/* Not all platforms that support msan provide sanitizers/msan_interface.h.
+ * We therefore declare the functions we need ourselves, rather than trying to
+ * include the header file... */
+
+#include <stdint.h> /* intptr_t */
+
+/* Make memory region fully initialized (without changing its contents). */
+void __msan_unpoison(const volatile void *a, size_t size);
+
+/* Make memory region fully uninitialized (without changing its contents).
+   This is a legacy interface that does not update origin information. Use
+   __msan_allocated_memory() instead. */
+void __msan_poison(const volatile void *a, size_t size);
+
+/* Returns the offset of the first (at least partially) poisoned byte in the
+   memory range, or -1 if the whole range is good. */
+intptr_t __msan_test_shadow(const volatile void *x, size_t size);
+#endif
+
+/* detects whether we are being compiled under asan */
+#if defined (__has_feature)
+#  if __has_feature(address_sanitizer)
+#    define ADDRESS_SANITIZER 1
+#  endif
+#elif defined(__SANITIZE_ADDRESS__)
+#  define ADDRESS_SANITIZER 1
+#endif
+
+#if defined (ADDRESS_SANITIZER)
+/* Not all platforms that support asan provide sanitizers/asan_interface.h.
+ * We therefore declare the functions we need ourselves, rather than trying to
+ * include the header file... */
+
+/**
+ * Marks a memory region (<c>[addr, addr+size)</c>) as unaddressable.
+ *
+ * This memory must be previously allocated by your program. Instrumented
+ * code is forbidden from accessing addresses in this region until it is
+ * unpoisoned. This function is not guaranteed to poison the entire region -
+ * it could poison only a subregion of <c>[addr, addr+size)</c> due to ASan
+ * alignment restrictions.
+ *
+ * \note This function is not thread-safe because no two threads can poison or
+ * unpoison memory in the same memory region simultaneously.
+ *
+ * \param addr Start of memory region.
+ * \param size Size of memory region. */
+void __asan_poison_memory_region(void const volatile *addr, size_t size);
+
+/**
+ * Marks a memory region (<c>[addr, addr+size)</c>) as addressable.
+ *
+ * This memory must be previously allocated by your program. Accessing
+ * addresses in this region is allowed until this region is poisoned again.
+ * This function could unpoison a super-region of <c>[addr, addr+size)</c> due
+ * to ASan alignment restrictions.
+ *
+ * \note This function is not thread-safe because no two threads can
+ * poison or unpoison memory in the same memory region simultaneously.
+ *
+ * \param addr Start of memory region.
+ * \param size Size of memory region. */
+void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
+#endif
+

 /*-**************************************************************
 *  Basic Types
@ -102,7 +175,7 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size
 #ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
 #  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
 #    define MEM_FORCE_MEMORY_ACCESS 2
-#  elif defined(__INTEL_COMPILER) || defined(__GNUC__)
+#  elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
 #    define MEM_FORCE_MEMORY_ACCESS 1
 #  endif
 #endif
--- a/src/borg/algorithms/zstd/lib/common/pool.c
+++ b/src/borg/algorithms/zstd/lib/common/pool.c
@ -127,9 +127,13 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
    ctx->queueTail = 0;
    ctx->numThreadsBusy = 0;
    ctx->queueEmpty = 1;
-    (void)ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
-    (void)ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
-    (void)ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
+    {
+        int error = 0;
+        error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL);
+        error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL);
+        error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL);
+        if (error) { POOL_free(ctx); return NULL; }
+    }
    ctx->shutdown = 0;
    /* Allocate space for the thread handles */
    ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
--- a/src/borg/algorithms/zstd/lib/common/threading.c
+++ b/src/borg/algorithms/zstd/lib/common/threading.c
@ -14,8 +14,10 @@
 * This file will hold wrapper for systems, which do not support pthreads
 */

-/* create fake symbol to avoid empty trnaslation unit warning */
-int g_ZSTD_threading_useles_symbol;
+#include "threading.h"
+
+/* create fake symbol to avoid empty translation unit warning */
+int g_ZSTD_threading_useless_symbol;

 #if defined(ZSTD_MULTITHREAD) && defined(_WIN32)

@ -28,7 +30,6 @@ int g_ZSTD_threading_useles_symbol;
 /* ===  Dependencies  === */
 #include <process.h>
 #include <errno.h>
-#include "threading.h"


 /* ===  Implementation  === */
@ -73,3 +74,47 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
 }

 #endif   /* ZSTD_MULTITHREAD */
+
+#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32)
+
+#include <stdlib.h>
+
+int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
+{
+    *mutex = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t));
+    if (!*mutex)
+        return 1;
+    return pthread_mutex_init(*mutex, attr);
+}
+
+int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
+{
+    if (!*mutex)
+        return 0;
+    {
+        int const ret = pthread_mutex_destroy(*mutex);
+        free(*mutex);
+        return ret;
+    }
+}
+
+int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
+{
+    *cond = (pthread_cond_t*)malloc(sizeof(pthread_cond_t));
+    if (!*cond)
+        return 1;
+    return pthread_cond_init(*cond, attr);
+}
+
+int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
+{
+    if (!*cond)
+        return 0;
+    {
+        int const ret = pthread_cond_destroy(*cond);
+        free(*cond);
+        return ret;
+    }
+}
+
+#endif
--- a/src/borg/algorithms/zstd/lib/common/threading.h
+++ b/src/borg/algorithms/zstd/lib/common/threading.h
@ -13,6 +13,8 @@
 #ifndef THREADING_H_938743
 #define THREADING_H_938743

+#include "debug.h"
+
 #if defined (__cplusplus)
 extern "C" {
 #endif
@ -75,10 +77,12 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
 */


-#elif defined(ZSTD_MULTITHREAD)   /* posix assumed ; need a better detection method */
+#elif defined(ZSTD_MULTITHREAD)    /* posix assumed ; need a better detection method */
 /* ===   POSIX Systems   === */
 #  include <pthread.h>

+#if DEBUGLEVEL < 1
+
 #define ZSTD_pthread_mutex_t            pthread_mutex_t
 #define ZSTD_pthread_mutex_init(a, b)   pthread_mutex_init((a), (b))
 #define ZSTD_pthread_mutex_destroy(a)   pthread_mutex_destroy((a))
@ -96,6 +100,33 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
 #define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
 #define ZSTD_pthread_join(a, b)         pthread_join((a),(b))

+#else /* DEBUGLEVEL >= 1 */
+
+/* Debug implementation of threading.
+ * In this implementation we use pointers for mutexes and condition variables.
+ * This way, if we forget to init/destroy them the program will crash or ASAN
+ * will report leaks.
+ */
+
+#define ZSTD_pthread_mutex_t            pthread_mutex_t*
+int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr);
+int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex);
+#define ZSTD_pthread_mutex_lock(a)      pthread_mutex_lock(*(a))
+#define ZSTD_pthread_mutex_unlock(a)    pthread_mutex_unlock(*(a))
+
+#define ZSTD_pthread_cond_t             pthread_cond_t*
+int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr);
+int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond);
+#define ZSTD_pthread_cond_wait(a, b)    pthread_cond_wait(*(a), *(b))
+#define ZSTD_pthread_cond_signal(a)     pthread_cond_signal(*(a))
+#define ZSTD_pthread_cond_broadcast(a)  pthread_cond_broadcast(*(a))
+
+#define ZSTD_pthread_t                  pthread_t
+#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d))
+#define ZSTD_pthread_join(a, b)         pthread_join((a),(b))
+
+#endif
+
 #else  /* ZSTD_MULTITHREAD not defined */
 /* No multithreading support */

--- a/src/borg/algorithms/zstd/lib/common/xxhash.c
+++ b/src/borg/algorithms/zstd/lib/common/xxhash.c
@ -53,7 +53,8 @@
 #  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
 #    define XXH_FORCE_MEMORY_ACCESS 2
 #  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
-  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
+  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
+  defined(__ICCARM__)
 #    define XXH_FORCE_MEMORY_ACCESS 1
 #  endif
 #endif
@ -66,10 +67,10 @@
 /* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */

 /*!XXH_FORCE_NATIVE_FORMAT :
- * By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
+ * By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
 * Results are therefore identical for little-endian and big-endian CPU.
 * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
- * Should endian-independance be of no importance for your application, you may set the #define below to 1,
+ * Should endian-independence be of no importance for your application, you may set the #define below to 1,
 * to improve speed for Big-endian CPU.
 * This option has no impact on Little_Endian CPU.
 */
@ -120,7 +121,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp
 #  define INLINE_KEYWORD
 #endif

-#if defined(__GNUC__)
+#if defined(__GNUC__) || defined(__ICCARM__)
 #  define FORCE_INLINE_ATTR __attribute__((always_inline))
 #elif defined(_MSC_VER)
 #  define FORCE_INLINE_ATTR __forceinline
@ -206,7 +207,12 @@ static U64 XXH_read64(const void* memPtr)
 #  define XXH_rotl32(x,r) _rotl(x,r)
 #  define XXH_rotl64(x,r) _rotl64(x,r)
 #else
+#if defined(__ICCARM__)
+#  include <intrinsics.h>
+#  define XXH_rotl32(x,r) __ROR(x,(32 - r))
+#else
 #  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+#endif
 #  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
 #endif

--- a/src/borg/algorithms/zstd/lib/common/zstd_internal.h
+++ b/src/borg/algorithms/zstd/lib/common/zstd_internal.h
@ -34,7 +34,6 @@
 #endif
 #include "xxhash.h"                /* XXH_reset, update, digest */

-
 #if defined (__cplusplus)
 extern "C" {
 #endif
@ -53,8 +52,50 @@ extern "C" {
 #undef MAX
 #define MIN(a,b) ((a)<(b) ? (a) : (b))
 #define MAX(a,b) ((a)>(b) ? (a) : (b))
-#define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; }  /* check and Forward error code */
-#define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return ERROR(e); }  /* check and send Error code */
+
+/**
+ * Return the specified error if the condition evaluates to true.
+ *
+ * In debug modes, prints additional information.
+ * In order to do that (particularly, printing the conditional that failed),
+ * this can't just wrap RETURN_ERROR().
+ */
+#define RETURN_ERROR_IF(cond, err, ...) \
+  if (cond) { \
+    RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
+    RAWLOG(3, ": " __VA_ARGS__); \
+    RAWLOG(3, "\n"); \
+    return ERROR(err); \
+  }
+
+/**
+ * Unconditionally return the specified error.
+ *
+ * In debug modes, prints additional information.
+ */
+#define RETURN_ERROR(err, ...) \
+  do { \
+    RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
+    RAWLOG(3, ": " __VA_ARGS__); \
+    RAWLOG(3, "\n"); \
+    return ERROR(err); \
+  } while(0);
+
+/**
+ * If the provided expression evaluates to an error code, returns that error code.
+ *
+ * In debug modes, prints additional information.
+ */
+#define FORWARD_IF_ERROR(err, ...) \
+  do { \
+    size_t const err_code = (err); \
+    if (ERR_isError(err_code)) { \
+      RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
+      RAWLOG(3, ": " __VA_ARGS__); \
+      RAWLOG(3, "\n"); \
+      return err_code; \
+    } \
+  } while(0);


 /*-*************************************
@ -151,29 +192,59 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
 *  Shared functions to include for inlining
 *********************************************/
 static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
+
 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
+#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
+
+#define WILDCOPY_OVERLENGTH 32
+#define WILDCOPY_VECLEN 16
+
+typedef enum {
+    ZSTD_no_overlap,
+    ZSTD_overlap_src_before_dst
+    /*  ZSTD_overlap_dst_before_src, */
+} ZSTD_overlap_e;

 /*! ZSTD_wildcopy() :
- *  custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
-#define WILDCOPY_OVERLENGTH 8
-MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
+ *  Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
+ *  @param ovtype controls the overlap detection
+ *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
+ *           The src buffer must be before the dst buffer.
+ */
+MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
+void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
 {
+    ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
    const BYTE* ip = (const BYTE*)src;
    BYTE* op = (BYTE*)dst;
    BYTE* const oend = op + length;
-    do
-        COPY8(op, ip)
-    while (op < oend);
-}

-MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd)   /* should be faster for decoding, but strangely, not verified on all platform */
-{
-    const BYTE* ip = (const BYTE*)src;
-    BYTE* op = (BYTE*)dst;
-    BYTE* const oend = (BYTE*)dstEnd;
-    do
-        COPY8(op, ip)
-    while (op < oend);
+    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
+
+    if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
+        /* Handle short offset copies. */
+        do {
+            COPY8(op, ip)
+        } while (op < oend);
+    } else {
+        assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
+        /* Separate out the first two COPY16() calls because the copy length is
+         * almost certain to be short, so the branches have different
+         * probabilities.
+         * On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%.
+         * On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%.
+         */
+        COPY16(op, ip);
+        COPY16(op, ip);
+        if (op >= oend) return;
+        do {
+            COPY16(op, ip);
+            COPY16(op, ip);
+        }
+        while (op < oend);
+    }
 }


@ -200,6 +271,17 @@ typedef struct {
    U32   longLengthPos;
 } seqStore_t;

+/**
+ * Contains the compressed frame size and an upper-bound for the decompressed frame size.
+ * Note: before using `compressedSize`, check for errors using ZSTD_isError().
+ *       similarly, before using `decompressedBound`, check for errors using:
+ *          `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
+ */
+typedef struct {
+    size_t compressedSize;
+    unsigned long long decompressedBound;
+} ZSTD_frameSizeInfo;   /* decompress & legacy */
+
 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */

@ -218,7 +300,9 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus
        _BitScanReverse(&r, val);
        return (unsigned)r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* GCC Intrinsic */
-        return 31 - __builtin_clz(val);
+        return __builtin_clz (val) ^ 31;
+#   elif defined(__ICCARM__)    /* IAR Intrinsic */
+        return 31 - __CLZ(val);
 #   else   /* Software version */
        static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
        U32 v = val;
--- a/src/borg/algorithms/zstd/lib/compress/fse_compress.c
+++ b/src/borg/algorithms/zstd/lib/compress/fse_compress.c
@ -129,9 +129,9 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
    {   U32 position = 0;
        U32 symbol;
        for (symbol=0; symbol<=maxSymbolValue; symbol++) {
-            int nbOccurences;
+            int nbOccurrences;
            int const freq = normalizedCounter[symbol];
-            for (nbOccurences=0; nbOccurences<freq; nbOccurences++) {
+            for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
                tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
                position = (position + step) & tableMask;
                while (position > highThreshold)
--- a/src/borg/algorithms/zstd/lib/compress/zstd_compress.c
+++ b/src/borg/algorithms/zstd/lib/compress/zstd_compress.c
--- a/src/borg/algorithms/zstd/lib/compress/zstd_compress_internal.h
+++ b/src/borg/algorithms/zstd/lib/compress/zstd_compress_internal.h
@ -19,6 +19,7 @@
 *  Dependencies
 ***************************************/
 #include "zstd_internal.h"
+#include "zstd_cwksp.h"
 #ifdef ZSTD_MULTITHREAD
 #  include "zstdmt_compress.h"
 #endif
@ -33,13 +34,13 @@ extern "C" {
 ***************************************/
 #define kSearchStrength      8
 #define HASH_READ_SIZE       8
-#define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index 1 now means "unsorted".
+#define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
                                       It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
                                       It's not a big deal though : candidate will just be sorted again.
-                                       Additionnally, candidate position 1 will be lost.
+                                       Additionally, candidate position 1 will be lost.
                                       But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
-                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy
-                                       Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
+                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
+                                       This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */


 /*-*************************************
@ -54,6 +55,14 @@ typedef struct ZSTD_prefixDict_s {
    ZSTD_dictContentType_e dictContentType;
 } ZSTD_prefixDict;

+typedef struct {
+    void* dictBuffer;
+    void const* dict;
+    size_t dictSize;
+    ZSTD_dictContentType_e dictContentType;
+    ZSTD_CDict* cdict;
+} ZSTD_localDict;
+
 typedef struct {
    U32 CTable[HUF_CTABLE_SIZE_U32(255)];
    HUF_repeat repeatMode;
@ -107,6 +116,7 @@ typedef struct {
    U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
    ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
    const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
+    ZSTD_literalCompressionMode_e literalCompressionMode;
 } optState_t;

 typedef struct {
@ -119,21 +129,26 @@ typedef struct {
    BYTE const* base;       /* All regular indexes relative to this position */
    BYTE const* dictBase;   /* extDict indexes relative to this position */
    U32 dictLimit;          /* below that point, need extDict */
-    U32 lowLimit;           /* below that point, no more data */
+    U32 lowLimit;           /* below that point, no more valid data */
 } ZSTD_window_t;

 typedef struct ZSTD_matchState_t ZSTD_matchState_t;
 struct ZSTD_matchState_t {
    ZSTD_window_t window;   /* State for window round buffer management */
-    U32 loadedDictEnd;      /* index of end of dictionary */
+    U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
+                             * When loadedDictEnd != 0, a dictionary is in use, and still valid.
+                             * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
+                             * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
+                             * When dict referential is copied into active context (i.e. not attached),
+                             * loadedDictEnd == dictSize, since referential starts from zero.
+                             */
    U32 nextToUpdate;       /* index from which to continue table update */
-    U32 nextToUpdate3;      /* index from which to continue table update */
-    U32 hashLog3;           /* dispatch table : larger == faster, more memory */
+    U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
    U32* hashTable;
    U32* hashTable3;
    U32* chainTable;
    optState_t opt;         /* optimal parser state */
-    const ZSTD_matchState_t * dictMatchState;
+    const ZSTD_matchState_t* dictMatchState;
    ZSTD_compressionParameters cParams;
 };

@ -178,6 +193,13 @@ typedef struct {
  size_t capacity; /* The capacity starting from `seq` pointer */
 } rawSeqStore_t;

+typedef struct {
+    int collectSequences;
+    ZSTD_Sequence* seqStart;
+    size_t seqIndex;
+    size_t maxSequences;
+} SeqCollector;
+
 struct ZSTD_CCtx_params_s {
    ZSTD_format_e format;
    ZSTD_compressionParameters cParams;
@ -186,8 +208,15 @@ struct ZSTD_CCtx_params_s {
    int compressionLevel;
    int forceWindow;           /* force back-references to respect limit of
                                * 1<<wLog, even for dictionary */
+    size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
+                                * No target when targetCBlockSize == 0.
+                                * There is no guarantee on compressed block size */
+    int srcSizeHint;           /* User's best guess of source size.
+                                * Hint is not valid when srcSizeHint == 0.
+                                * There is no guarantee that hint is close to actual source size */

    ZSTD_dictAttachPref_e attachDictPref;
+    ZSTD_literalCompressionMode_e literalCompressionMode;

    /* Multithreading: used to pass parameters to mtctx */
    int nbWorkers;
@ -210,9 +239,7 @@ struct ZSTD_CCtx_s {
    ZSTD_CCtx_params appliedParams;
    U32   dictID;

-    int workSpaceOversizedDuration;
-    void* workSpace;
-    size_t workSpaceSize;
+    ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
    size_t blockSize;
    unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
    unsigned long long consumedSrcSize;
@ -220,6 +247,8 @@ struct ZSTD_CCtx_s {
    XXH64_state_t xxhState;
    ZSTD_customMem customMem;
    size_t staticSize;
+    SeqCollector seqCollector;
+    int isFirstBlock;

    seqStore_t seqStore;      /* sequences storage ptrs */
    ldmState_t ldmState;      /* long distance matching state */
@ -243,7 +272,7 @@ struct ZSTD_CCtx_s {
    U32    frameEnded;

    /* Dictionary */
-    ZSTD_CDict* cdictLocal;
+    ZSTD_localDict localDict;
    const ZSTD_CDict* cdict;
    ZSTD_prefixDict prefixDict;   /* single-usage dictionary */

@ -295,26 +324,81 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
    return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
 }

-/*! ZSTD_storeSeq() :
- *  Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
- *  `offsetCode` : distance to match + 3 (values 1-3 are repCodes).
- *  `mlBase` : matchLength - MINMATCH
-*/
-MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
+/* ZSTD_cParam_withinBounds:
+ * @return 1 if value is within cParam bounds,
+ * 0 otherwise */
+MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
 {
+    ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
+    if (ZSTD_isError(bounds.error)) return 0;
+    if (value < bounds.lowerBound) return 0;
+    if (value > bounds.upperBound) return 0;
+    return 1;
+}
+
+/* ZSTD_minGain() :
+ * minimum compression required
+ * to generate a compress block or a compressed literals section.
+ * note : use same formula for both situations */
+MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
+{
+    U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
+    ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
+    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
+    return (srcSize >> minlog) + 2;
+}
+
+/*! ZSTD_safecopyLiterals() :
+ *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
+ *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
+ *  large copies.
+ */
+static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
+    assert(iend > ilimit_w);
+    if (ip <= ilimit_w) {
+        ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
+        op += ilimit_w - ip;
+        ip = ilimit_w;
+    }
+    while (ip < iend) *op++ = *ip++;
+}
+
+/*! ZSTD_storeSeq() :
+ *  Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
+ *  `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
+ *  `mlBase` : matchLength - MINMATCH
+ *  Allowed to overread literals up to litLimit.
+*/
+HINT_INLINE UNUSED_ATTR
+void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
+{
+    BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
+    BYTE const* const litEnd = literals + litLength;
 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
    static const BYTE* g_start = NULL;
    if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
    {   U32 const pos = (U32)((const BYTE*)literals - g_start);
        DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
-               pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
+               pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
    }
 #endif
    assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
    /* copy Literals */
    assert(seqStorePtr->maxNbLit <= 128 KB);
    assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
-    ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
+    assert(literals + litLength <= litLimit);
+    if (litEnd <= litLimit_w) {
+        /* Common case we can use wildcopy.
+	 * First copy 16 bytes, because literals are likely short.
+	 */
+        assert(WILDCOPY_OVERLENGTH >= 16);
+        ZSTD_copy16(seqStorePtr->lit, literals);
+        if (litLength > 16) {
+            ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
+        }
+    } else {
+        ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
+    }
    seqStorePtr->lit += litLength;

    /* literal Length */
@ -326,7 +410,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v
    seqStorePtr->sequences[0].litLength = (U16)litLength;

    /* match offset */
-    seqStorePtr->sequences[0].offset = offsetCode + 1;
+    seqStorePtr->sequences[0].offset = offCode + 1;

    /* match Length */
    if (mlBase>0xFFFF) {
@ -554,6 +638,9 @@ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64
 /*-*************************************
 *  Round buffer management
 ***************************************/
+#if (ZSTD_WINDOWLOG_MAX_64 > 31)
+# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
+#endif
 /* Max current allowed */
 #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
 /* Maximum chunk size before overflow correction needs to be called again */
@ -665,31 +752,49 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
 * Updates lowLimit so that:
 *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
 *
- * This allows a simple check that index >= lowLimit to see if index is valid.
- * This must be called before a block compression call, with srcEnd as the block
- * source end.
+ * It ensures index is valid as long as index >= lowLimit.
+ * This must be called before a block compression call.
 *
- * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
- * This is because dictionaries are allowed to be referenced as long as the last
- * byte of the dictionary is in the window, but once they are out of range,
- * they cannot be referenced. If loadedDictEndPtr is NULL, we use
- * loadedDictEnd == 0.
+ * loadedDictEnd is only defined if a dictionary is in use for current compression.
+ * As the name implies, loadedDictEnd represents the index at end of dictionary.
+ * The value lies within context's referential, it can be directly compared to blockEndIdx.
 *
- * In normal dict mode, the dict is between lowLimit and dictLimit. In
- * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
- * is below them. forceWindow and dictMatchState are therefore incompatible.
+ * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
+ * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
+ * This is because dictionaries are allowed to be referenced fully
+ * as long as the last byte of the dictionary is in the window.
+ * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
+ *
+ * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
+ * In dictMatchState mode, lowLimit and dictLimit are the same,
+ * and the dictionary is below them.
+ * forceWindow and dictMatchState are therefore incompatible.
 */
 MEM_STATIC void
 ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
-                           void const* srcEnd,
-                           U32 maxDist,
-                           U32* loadedDictEndPtr,
+                     const void* blockEnd,
+                           U32   maxDist,
+                           U32*  loadedDictEndPtr,
                     const ZSTD_matchState_t** dictMatchStatePtr)
 {
-    U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base);
-    U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
-    DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
-                (unsigned)blockEndIdx, (unsigned)maxDist);
+    U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+    U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
+    DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+                (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+
+    /* - When there is no dictionary : loadedDictEnd == 0.
+         In which case, the test (blockEndIdx > maxDist) is merely to avoid
+         overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
+       - When there is a standard dictionary :
+         Index referential is copied from the dictionary,
+         which means it starts from 0.
+         In which case, loadedDictEnd == dictSize,
+         and it makes sense to compare `blockEndIdx > maxDist + dictSize`
+         since `blockEndIdx` also starts from zero.
+       - When there is an attached dictionary :
+         loadedDictEnd is expressed within the referential of the context,
+         so it can be directly compared against blockEndIdx.
+    */
    if (blockEndIdx > maxDist + loadedDictEnd) {
        U32 const newLowLimit = blockEndIdx - maxDist;
        if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
@ -698,13 +803,47 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
                        (unsigned)window->dictLimit, (unsigned)window->lowLimit);
            window->dictLimit = window->lowLimit;
        }
-        if (loadedDictEndPtr)
-            *loadedDictEndPtr = 0;
-        if (dictMatchStatePtr)
-            *dictMatchStatePtr = NULL;
+        /* On reaching window size, dictionaries are invalidated */
+        if (loadedDictEndPtr) *loadedDictEndPtr = 0;
+        if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
    }
 }

+/* Similar to ZSTD_window_enforceMaxDist(),
+ * but only invalidates dictionary
+ * when input progresses beyond window size.
+ * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
+ *              loadedDictEnd uses same referential as window->base
+ *              maxDist is the window size */
+MEM_STATIC void
+ZSTD_checkDictValidity(const ZSTD_window_t* window,
+                       const void* blockEnd,
+                             U32   maxDist,
+                             U32*  loadedDictEndPtr,
+                       const ZSTD_matchState_t** dictMatchStatePtr)
+{
+    assert(loadedDictEndPtr != NULL);
+    assert(dictMatchStatePtr != NULL);
+    {   U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+        U32 const loadedDictEnd = *loadedDictEndPtr;
+        DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+                    (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+        assert(blockEndIdx >= loadedDictEnd);
+
+        if (blockEndIdx > loadedDictEnd + maxDist) {
+            /* On reaching window size, dictionaries are invalidated.
+             * For simplification, if window size is reached anywhere within next block,
+             * the dictionary is invalidated for the full block.
+             */
+            DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
+            *loadedDictEndPtr = 0;
+            *dictMatchStatePtr = NULL;
+        } else {
+            if (*loadedDictEndPtr != 0) {
+                DEBUGLOG(6, "dictionary considered valid for current block");
+    }   }   }
+}
+
 /**
 * ZSTD_window_update():
 * Updates the window by appending [src, src + srcSize) to the window.
@ -744,6 +883,17 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
    return contiguous;
 }

+MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog)
+{
+    U32    const maxDistance = 1U << windowLog;
+    U32    const lowestValid = ms->window.lowLimit;
+    U32    const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
+    U32    const isDictionary = (ms->loadedDictEnd != 0);
+    U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
+    return matchLowest;
+}
+
+

 /* debug functions */
 #if (DEBUGLEVEL>=2)
@ -802,17 +952,10 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
 size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
                     const void* dict, size_t dictSize,
                     const ZSTD_CDict* cdict,
-                     ZSTD_CCtx_params  params, unsigned long long pledgedSrcSize);
+                     const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);

 void ZSTD_resetSeqStore(seqStore_t* ssPtr);

-/*! ZSTD_compressStream_generic() :
- *  Private use only. To be called from zstdmt_compress.c in single-thread mode. */
-size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
-                                   ZSTD_outBuffer* output,
-                                   ZSTD_inBuffer* input,
-                                   ZSTD_EndDirective const flushMode);
-
 /*! ZSTD_getCParamsFromCDict() :
 *  as the name implies */
 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
@ -824,7 +967,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
                                    ZSTD_dictContentType_e dictContentType,
                                    ZSTD_dictTableLoadMethod_e dtlm,
                                    const ZSTD_CDict* cdict,
-                                    ZSTD_CCtx_params params,
+                                    const ZSTD_CCtx_params* params,
                                    unsigned long long pledgedSrcSize);

 /* ZSTD_compress_advanced_internal() :
@ -833,13 +976,13 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
                                       void* dst, size_t dstCapacity,
                                 const void* src, size_t srcSize,
                                 const void* dict,size_t dictSize,
-                                 ZSTD_CCtx_params params);
+                                 const ZSTD_CCtx_params* params);


 /* ZSTD_writeLastEmptyBlock() :
 * output an empty Block with end-of-frame mark to complete a frame
 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
- *           or an error code if `dstCapcity` is too small (<ZSTD_blockHeaderSize)
+ *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
 */
 size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);

--- a/src/borg/algorithms/zstd/lib/compress/zstd_compress_literals.c
+++ b/src/borg/algorithms/zstd/lib/compress/zstd_compress_literals.c
@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+#include "zstd_compress_literals.h"
+
+size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE* const)dst;
+    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
+
+    RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall);
+
+    switch(flSize)
+    {
+        case 1: /* 2 - 1 - 5 */
+            ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
+            break;
+        case 2: /* 2 - 2 - 12 */
+            MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
+            break;
+        case 3: /* 2 - 2 - 20 */
+            MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
+            break;
+        default:   /* not necessary : flSize is {1,2,3} */
+            assert(0);
+    }
+
+    memcpy(ostart + flSize, src, srcSize);
+    return srcSize + flSize;
+}
+
+size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE* const)dst;
+    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
+
+    (void)dstCapacity;  /* dstCapacity already guaranteed to be >=4, hence large enough */
+
+    switch(flSize)
+    {
+        case 1: /* 2 - 1 - 5 */
+            ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
+            break;
+        case 2: /* 2 - 2 - 12 */
+            MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
+            break;
+        case 3: /* 2 - 2 - 20 */
+            MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
+            break;
+        default:   /* not necessary : flSize is {1,2,3} */
+            assert(0);
+    }
+
+    ostart[flSize] = *(const BYTE*)src;
+    return flSize+1;
+}
+
+size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+                              ZSTD_hufCTables_t* nextHuf,
+                              ZSTD_strategy strategy, int disableLiteralCompression,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                              void* entropyWorkspace, size_t entropyWorkspaceSize,
+                        const int bmi2)
+{
+    size_t const minGain = ZSTD_minGain(srcSize, strategy);
+    size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
+    BYTE*  const ostart = (BYTE*)dst;
+    U32 singleStream = srcSize < 256;
+    symbolEncodingType_e hType = set_compressed;
+    size_t cLitSize;
+
+    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)",
+                disableLiteralCompression);
+
+    /* Prepare nextEntropy assuming reusing the existing table */
+    memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+
+    if (disableLiteralCompression)
+        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+
+    /* small ? don't even attempt compression (speed opt) */
+#   define COMPRESS_LITERALS_SIZE_MIN 63
+    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+        if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    }
+
+    RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
+    {   HUF_repeat repeat = prevHuf->repeatMode;
+        int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
+        if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
+        cLitSize = singleStream ?
+            HUF_compress1X_repeat(
+                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+                255, 11, entropyWorkspace, entropyWorkspaceSize,
+                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
+            HUF_compress4X_repeat(
+                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+                255, 11, entropyWorkspace, entropyWorkspaceSize,
+                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
+        if (repeat != HUF_repeat_none) {
+            /* reused the existing table */
+            hType = set_repeat;
+        }
+    }
+
+    if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
+        memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    }
+    if (cLitSize==1) {
+        memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+        return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
+    }
+
+    if (hType == set_compressed) {
+        /* using a newly constructed table */
+        nextHuf->repeatMode = HUF_repeat_check;
+    }
+
+    /* Build header */
+    switch(lhSize)
+    {
+    case 3: /* 2 - 2 - 10 - 10 */
+        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
+            MEM_writeLE24(ostart, lhc);
+            break;
+        }
+    case 4: /* 2 - 2 - 14 - 14 */
+        {   U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
+            MEM_writeLE32(ostart, lhc);
+            break;
+        }
+    case 5: /* 2 - 2 - 18 - 18 */
+        {   U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
+            MEM_writeLE32(ostart, lhc);
+            ostart[4] = (BYTE)(cLitSize >> 10);
+            break;
+        }
+    default:  /* not possible : lhSize is {3,4,5} */
+        assert(0);
+    }
+    return lhSize+cLitSize;
+}
--- a/src/borg/algorithms/zstd/lib/compress/zstd_compress_literals.h
+++ b/src/borg/algorithms/zstd/lib/compress/zstd_compress_literals.h
@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_LITERALS_H
+#define ZSTD_COMPRESS_LITERALS_H
+
+#include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */
+
+
+size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+                              ZSTD_hufCTables_t* nextHuf,
+                              ZSTD_strategy strategy, int disableLiteralCompression,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                              void* entropyWorkspace, size_t entropyWorkspaceSize,
+                        const int bmi2);
+
+#endif /* ZSTD_COMPRESS_LITERALS_H */
--- a/src/borg/algorithms/zstd/lib/compress/zstd_compress_sequences.c
+++ b/src/borg/algorithms/zstd/lib/compress/zstd_compress_sequences.c
@ -0,0 +1,415 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+#include "zstd_compress_sequences.h"
+
+/**
+ * -log2(x / 256) lookup table for x in [0, 256).
+ * If x == 0: Return 0
+ * Else: Return floor(-log2(x / 256) * 256)
+ */
+static unsigned const kInverseProbabilityLog256[256] = {
+    0,    2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
+    1130, 1100, 1073, 1047, 1024, 1001, 980,  960,  941,  923,  906,  889,
+    874,  859,  844,  830,  817,  804,  791,  779,  768,  756,  745,  734,
+    724,  714,  704,  694,  685,  676,  667,  658,  650,  642,  633,  626,
+    618,  610,  603,  595,  588,  581,  574,  567,  561,  554,  548,  542,
+    535,  529,  523,  517,  512,  506,  500,  495,  489,  484,  478,  473,
+    468,  463,  458,  453,  448,  443,  438,  434,  429,  424,  420,  415,
+    411,  407,  402,  398,  394,  390,  386,  382,  377,  373,  370,  366,
+    362,  358,  354,  350,  347,  343,  339,  336,  332,  329,  325,  322,
+    318,  315,  311,  308,  305,  302,  298,  295,  292,  289,  286,  282,
+    279,  276,  273,  270,  267,  264,  261,  258,  256,  253,  250,  247,
+    244,  241,  239,  236,  233,  230,  228,  225,  222,  220,  217,  215,
+    212,  209,  207,  204,  202,  199,  197,  194,  192,  190,  187,  185,
+    182,  180,  178,  175,  173,  171,  168,  166,  164,  162,  159,  157,
+    155,  153,  151,  149,  146,  144,  142,  140,  138,  136,  134,  132,
+    130,  128,  126,  123,  121,  119,  117,  115,  114,  112,  110,  108,
+    106,  104,  102,  100,  98,   96,   94,   93,   91,   89,   87,   85,
+    83,   82,   80,   78,   76,   74,   73,   71,   69,   67,   66,   64,
+    62,   61,   59,   57,   55,   54,   52,   50,   49,   47,   46,   44,
+    42,   41,   39,   37,   36,   34,   33,   31,   30,   28,   26,   25,
+    23,   22,   20,   19,   17,   16,   14,   13,   11,   10,   8,    7,
+    5,    4,    2,    1,
+};
+
+static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
+  void const* ptr = ctable;
+  U16 const* u16ptr = (U16 const*)ptr;
+  U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
+  return maxSymbolValue;
+}
+
+/**
+ * Returns the cost in bytes of encoding the normalized count header.
+ * Returns an error if any of the helper functions return an error.
+ */
+static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
+                              size_t const nbSeq, unsigned const FSELog)
+{
+    BYTE wksp[FSE_NCOUNTBOUND];
+    S16 norm[MaxSeq + 1];
+    const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
+    FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max));
+    return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
+}
+
+/**
+ * Returns the cost in bits of encoding the distribution described by count
+ * using the entropy bound.
+ */
+static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
+{
+    unsigned cost = 0;
+    unsigned s;
+    for (s = 0; s <= max; ++s) {
+        unsigned norm = (unsigned)((256 * count[s]) / total);
+        if (count[s] != 0 && norm == 0)
+            norm = 1;
+        assert(count[s] < total);
+        cost += count[s] * kInverseProbabilityLog256[norm];
+    }
+    return cost >> 8;
+}
+
+/**
+ * Returns the cost in bits of encoding the distribution in count using ctable.
+ * Returns an error if ctable cannot represent all the symbols in count.
+ */
+static size_t ZSTD_fseBitCost(
+    FSE_CTable const* ctable,
+    unsigned const* count,
+    unsigned const max)
+{
+    unsigned const kAccuracyLog = 8;
+    size_t cost = 0;
+    unsigned s;
+    FSE_CState_t cstate;
+    FSE_initCState(&cstate, ctable);
+    RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC,
+                    "Repeat FSE_CTable has maxSymbolValue %u < %u",
+                    ZSTD_getFSEMaxSymbolValue(ctable), max);
+    for (s = 0; s <= max; ++s) {
+        unsigned const tableLog = cstate.stateLog;
+        unsigned const badCost = (tableLog + 1) << kAccuracyLog;
+        unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
+        if (count[s] == 0)
+            continue;
+        RETURN_ERROR_IF(bitCost >= badCost, GENERIC,
+                        "Repeat FSE_CTable has Prob[%u] == 0", s);
+        cost += count[s] * bitCost;
+    }
+    return cost >> kAccuracyLog;
+}
+
+/**
+ * Returns the cost in bits of encoding the distribution in count using the
+ * table described by norm. The max symbol support by norm is assumed >= max.
+ * norm must be valid for every symbol with non-zero probability in count.
+ */
+static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
+                                    unsigned const* count, unsigned const max)
+{
+    unsigned const shift = 8 - accuracyLog;
+    size_t cost = 0;
+    unsigned s;
+    assert(accuracyLog <= 8);
+    for (s = 0; s <= max; ++s) {
+        unsigned const normAcc = norm[s] != -1 ? norm[s] : 1;
+        unsigned const norm256 = normAcc << shift;
+        assert(norm256 > 0);
+        assert(norm256 < 256);
+        cost += count[s] * kInverseProbabilityLog256[norm256];
+    }
+    return cost >> 8;
+}
+
+symbolEncodingType_e
+ZSTD_selectEncodingType(
+        FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
+        size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
+        FSE_CTable const* prevCTable,
+        short const* defaultNorm, U32 defaultNormLog,
+        ZSTD_defaultPolicy_e const isDefaultAllowed,
+        ZSTD_strategy const strategy)
+{
+    ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
+    if (mostFrequent == nbSeq) {
+        *repeatMode = FSE_repeat_none;
+        if (isDefaultAllowed && nbSeq <= 2) {
+            /* Prefer set_basic over set_rle when there are 2 or less symbols,
+             * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
+             * If basic encoding isn't possible, always choose RLE.
+             */
+            DEBUGLOG(5, "Selected set_basic");
+            return set_basic;
+        }
+        DEBUGLOG(5, "Selected set_rle");
+        return set_rle;
+    }
+    if (strategy < ZSTD_lazy) {
+        if (isDefaultAllowed) {
+            size_t const staticFse_nbSeq_max = 1000;
+            size_t const mult = 10 - strategy;
+            size_t const baseLog = 3;
+            size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog;  /* 28-36 for offset, 56-72 for lengths */
+            assert(defaultNormLog >= 5 && defaultNormLog <= 6);  /* xx_DEFAULTNORMLOG */
+            assert(mult <= 9 && mult >= 7);
+            if ( (*repeatMode == FSE_repeat_valid)
+              && (nbSeq < staticFse_nbSeq_max) ) {
+                DEBUGLOG(5, "Selected set_repeat");
+                return set_repeat;
+            }
+            if ( (nbSeq < dynamicFse_nbSeq_min)
+              || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
+                DEBUGLOG(5, "Selected set_basic");
+                /* The format allows default tables to be repeated, but it isn't useful.
+                 * When using simple heuristics to select encoding type, we don't want
+                 * to confuse these tables with dictionaries. When running more careful
+                 * analysis, we don't need to waste time checking both repeating tables
+                 * and default tables.
+                 */
+                *repeatMode = FSE_repeat_none;
+                return set_basic;
+            }
+        }
+    } else {
+        size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
+        size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
+        size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
+        size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
+
+        if (isDefaultAllowed) {
+            assert(!ZSTD_isError(basicCost));
+            assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
+        }
+        assert(!ZSTD_isError(NCountCost));
+        assert(compressedCost < ERROR(maxCode));
+        DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
+                    (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
+        if (basicCost <= repeatCost && basicCost <= compressedCost) {
+            DEBUGLOG(5, "Selected set_basic");
+            assert(isDefaultAllowed);
+            *repeatMode = FSE_repeat_none;
+            return set_basic;
+        }
+        if (repeatCost <= compressedCost) {
+            DEBUGLOG(5, "Selected set_repeat");
+            assert(!ZSTD_isError(repeatCost));
+            return set_repeat;
+        }
+        assert(compressedCost < basicCost && compressedCost < repeatCost);
+    }
+    DEBUGLOG(5, "Selected set_compressed");
+    *repeatMode = FSE_repeat_check;
+    return set_compressed;
+}
+
+size_t
+ZSTD_buildCTable(void* dst, size_t dstCapacity,
+                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
+                unsigned* count, U32 max,
+                const BYTE* codeTable, size_t nbSeq,
+                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                const FSE_CTable* prevCTable, size_t prevCTableSize,
+                void* entropyWorkspace, size_t entropyWorkspaceSize)
+{
+    BYTE* op = (BYTE*)dst;
+    const BYTE* const oend = op + dstCapacity;
+    DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
+
+    switch (type) {
+    case set_rle:
+        FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max));
+        RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall);
+        *op = codeTable[0];
+        return 1;
+    case set_repeat:
+        memcpy(nextCTable, prevCTable, prevCTableSize);
+        return 0;
+    case set_basic:
+        FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize));  /* note : could be pre-calculated */
+        return 0;
+    case set_compressed: {
+        S16 norm[MaxSeq + 1];
+        size_t nbSeq_1 = nbSeq;
+        const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
+        if (count[codeTable[nbSeq-1]] > 1) {
+            count[codeTable[nbSeq-1]]--;
+            nbSeq_1--;
+        }
+        assert(nbSeq_1 > 1);
+        FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
+        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog);   /* overflow protected */
+            FORWARD_IF_ERROR(NCountSize);
+            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize));
+            return NCountSize;
+        }
+    }
+    default: assert(0); RETURN_ERROR(GENERIC);
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_encodeSequences_body(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    BIT_CStream_t blockStream;
+    FSE_CState_t  stateMatchLength;
+    FSE_CState_t  stateOffsetBits;
+    FSE_CState_t  stateLitLength;
+
+    RETURN_ERROR_IF(
+        ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
+        dstSize_tooSmall, "not enough space remaining");
+    DEBUGLOG(6, "available space for bitstream : %i  (dstCapacity=%u)",
+                (int)(blockStream.endPtr - blockStream.startPtr),
+                (unsigned)dstCapacity);
+
+    /* first symbols */
+    FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
+    FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
+    FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
+    BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
+    if (MEM_32bits()) BIT_flushBits(&blockStream);
+    BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
+    if (MEM_32bits()) BIT_flushBits(&blockStream);
+    if (longOffsets) {
+        U32 const ofBits = ofCodeTable[nbSeq-1];
+        int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+        if (extraBits) {
+            BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
+            BIT_flushBits(&blockStream);
+        }
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
+                    ofBits - extraBits);
+    } else {
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
+    }
+    BIT_flushBits(&blockStream);
+
+    {   size_t n;
+        for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
+            BYTE const llCode = llCodeTable[n];
+            BYTE const ofCode = ofCodeTable[n];
+            BYTE const mlCode = mlCodeTable[n];
+            U32  const llBits = LL_bits[llCode];
+            U32  const ofBits = ofCode;
+            U32  const mlBits = ML_bits[mlCode];
+            DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
+                        (unsigned)sequences[n].litLength,
+                        (unsigned)sequences[n].matchLength + MINMATCH,
+                        (unsigned)sequences[n].offset);
+                                                                            /* 32b*/  /* 64b*/
+                                                                            /* (7)*/  /* (7)*/
+            FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
+            FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
+            if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
+            FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
+            if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
+                BIT_flushBits(&blockStream);                                /* (7)*/
+            BIT_addBits(&blockStream, sequences[n].litLength, llBits);
+            if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
+            BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
+            if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
+            if (longOffsets) {
+                int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+                if (extraBits) {
+                    BIT_addBits(&blockStream, sequences[n].offset, extraBits);
+                    BIT_flushBits(&blockStream);                            /* (7)*/
+                }
+                BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
+                            ofBits - extraBits);                            /* 31 */
+            } else {
+                BIT_addBits(&blockStream, sequences[n].offset, ofBits);     /* 31 */
+            }
+            BIT_flushBits(&blockStream);                                    /* (7)*/
+            DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
+    }   }
+
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
+    FSE_flushCState(&blockStream, &stateMatchLength);
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
+    FSE_flushCState(&blockStream, &stateOffsetBits);
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
+    FSE_flushCState(&blockStream, &stateLitLength);
+
+    {   size_t const streamSize = BIT_closeCStream(&blockStream);
+        RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
+        return streamSize;
+    }
+}
+
+static size_t
+ZSTD_encodeSequences_default(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    return ZSTD_encodeSequences_body(dst, dstCapacity,
+                                    CTable_MatchLength, mlCodeTable,
+                                    CTable_OffsetBits, ofCodeTable,
+                                    CTable_LitLength, llCodeTable,
+                                    sequences, nbSeq, longOffsets);
+}
+
+
+#if DYNAMIC_BMI2
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_encodeSequences_bmi2(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    return ZSTD_encodeSequences_body(dst, dstCapacity,
+                                    CTable_MatchLength, mlCodeTable,
+                                    CTable_OffsetBits, ofCodeTable,
+                                    CTable_LitLength, llCodeTable,
+                                    sequences, nbSeq, longOffsets);
+}
+
+#endif
+
+size_t ZSTD_encodeSequences(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
+{
+    DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
+                                         CTable_MatchLength, mlCodeTable,
+                                         CTable_OffsetBits, ofCodeTable,
+                                         CTable_LitLength, llCodeTable,
+                                         sequences, nbSeq, longOffsets);
+    }
+#endif
+    (void)bmi2;
+    return ZSTD_encodeSequences_default(dst, dstCapacity,
+                                        CTable_MatchLength, mlCodeTable,
+                                        CTable_OffsetBits, ofCodeTable,
+                                        CTable_LitLength, llCodeTable,
+                                        sequences, nbSeq, longOffsets);
+}
--- a/src/borg/algorithms/zstd/lib/compress/zstd_compress_sequences.h
+++ b/src/borg/algorithms/zstd/lib/compress/zstd_compress_sequences.h
@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_SEQUENCES_H
+#define ZSTD_COMPRESS_SEQUENCES_H
+
+#include "fse.h" /* FSE_repeat, FSE_CTable */
+#include "zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */
+
+typedef enum {
+    ZSTD_defaultDisallowed = 0,
+    ZSTD_defaultAllowed = 1
+} ZSTD_defaultPolicy_e;
+
+symbolEncodingType_e
+ZSTD_selectEncodingType(
+        FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
+        size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
+        FSE_CTable const* prevCTable,
+        short const* defaultNorm, U32 defaultNormLog,
+        ZSTD_defaultPolicy_e const isDefaultAllowed,
+        ZSTD_strategy const strategy);
+
+size_t
+ZSTD_buildCTable(void* dst, size_t dstCapacity,
+                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
+                unsigned* count, U32 max,
+                const BYTE* codeTable, size_t nbSeq,
+                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                const FSE_CTable* prevCTable, size_t prevCTableSize,
+                void* entropyWorkspace, size_t entropyWorkspaceSize);
+
+size_t ZSTD_encodeSequences(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
+
+#endif /* ZSTD_COMPRESS_SEQUENCES_H */
--- a/src/borg/algorithms/zstd/lib/compress/zstd_cwksp.h
+++ b/src/borg/algorithms/zstd/lib/compress/zstd_cwksp.h
@ -0,0 +1,535 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_CWKSP_H
+#define ZSTD_CWKSP_H
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "zstd_internal.h"
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+/*-*************************************
+*  Constants
+***************************************/
+
+/* define "workspace is too large" as this number of times larger than needed */
+#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
+
+/* when workspace is continuously too large
+ * during at least this number of times,
+ * context's memory usage is considered wasteful,
+ * because it's sized to handle a worst case scenario which rarely happens.
+ * In which case, resize it down to free some memory */
+#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
+
+/* Since the workspace is effectively its own little malloc implementation /
+ * arena, when we run under ASAN, we should similarly insert redzones between
+ * each internal element of the workspace, so ASAN will catch overruns that
+ * reach outside an object but that stay inside the workspace.
+ *
+ * This defines the size of that redzone.
+ */
+#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE
+#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
+#endif
+
+/*-*************************************
+*  Structures
+***************************************/
+typedef enum {
+    ZSTD_cwksp_alloc_objects,
+    ZSTD_cwksp_alloc_buffers,
+    ZSTD_cwksp_alloc_aligned
+} ZSTD_cwksp_alloc_phase_e;
+
+/**
+ * Zstd fits all its internal datastructures into a single continuous buffer,
+ * so that it only needs to perform a single OS allocation (or so that a buffer
+ * can be provided to it and it can perform no allocations at all). This buffer
+ * is called the workspace.
+ *
+ * Several optimizations complicate that process of allocating memory ranges
+ * from this workspace for each internal datastructure:
+ *
+ * - These different internal datastructures have different setup requirements:
+ *
+ *   - The static objects need to be cleared once and can then be trivially
+ *     reused for each compression.
+ *
+ *   - Various buffers don't need to be initialized at all--they are always
+ *     written into before they're read.
+ *
+ *   - The matchstate tables have a unique requirement that they don't need
+ *     their memory to be totally cleared, but they do need the memory to have
+ *     some bound, i.e., a guarantee that all values in the memory they've been
+ *     allocated is less than some maximum value (which is the starting value
+ *     for the indices that they will then use for compression). When this
+ *     guarantee is provided to them, they can use the memory without any setup
+ *     work. When it can't, they have to clear the area.
+ *
+ * - These buffers also have different alignment requirements.
+ *
+ * - We would like to reuse the objects in the workspace for multiple
+ *   compressions without having to perform any expensive reallocation or
+ *   reinitialization work.
+ *
+ * - We would like to be able to efficiently reuse the workspace across
+ *   multiple compressions **even when the compression parameters change** and
+ *   we need to resize some of the objects (where possible).
+ *
+ * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp
+ * abstraction was created. It works as follows:
+ *
+ * Workspace Layout:
+ *
+ * [                        ... workspace ...                         ]
+ * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
+ *
+ * The various objects that live in the workspace are divided into the
+ * following categories, and are allocated separately:
+ *
+ * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
+ *   so that literally everything fits in a single buffer. Note: if present,
+ *   this must be the first object in the workspace, since ZSTD_free{CCtx,
+ *   CDict}() rely on a pointer comparison to see whether one or two frees are
+ *   required.
+ *
+ * - Fixed size objects: these are fixed-size, fixed-count objects that are
+ *   nonetheless "dynamically" allocated in the workspace so that we can
+ *   control how they're initialized separately from the broader ZSTD_CCtx.
+ *   Examples:
+ *   - Entropy Workspace
+ *   - 2 x ZSTD_compressedBlockState_t
+ *   - CDict dictionary contents
+ *
+ * - Tables: these are any of several different datastructures (hash tables,
+ *   chain tables, binary trees) that all respect a common format: they are
+ *   uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
+ *   Their sizes depend on the cparams.
+ *
+ * - Aligned: these buffers are used for various purposes that require 4 byte
+ *   alignment, but don't require any initialization before they're used.
+ *
+ * - Buffers: these buffers are used for various purposes that don't require
+ *   any alignment or initialization before they're used. This means they can
+ *   be moved around at no cost for a new compression.
+ *
+ * Allocating Memory:
+ *
+ * The various types of objects must be allocated in order, so they can be
+ * correctly packed into the workspace buffer. That order is:
+ *
+ * 1. Objects
+ * 2. Buffers
+ * 3. Aligned
+ * 4. Tables
+ *
+ * Attempts to reserve objects of different types out of order will fail.
+ */
+typedef struct {
+    void* workspace;
+    void* workspaceEnd;
+
+    void* objectEnd;
+    void* tableEnd;
+    void* tableValidEnd;
+    void* allocStart;
+
+    int allocFailed;
+    int workspaceOversizedDuration;
+    ZSTD_cwksp_alloc_phase_e phase;
+} ZSTD_cwksp;
+
+/*-*************************************
+*  Functions
+***************************************/
+
+MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
+
+MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
+    (void)ws;
+    assert(ws->workspace <= ws->objectEnd);
+    assert(ws->objectEnd <= ws->tableEnd);
+    assert(ws->objectEnd <= ws->tableValidEnd);
+    assert(ws->tableEnd <= ws->allocStart);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    assert(ws->allocStart <= ws->workspaceEnd);
+}
+
+/**
+ * Align must be a power of 2.
+ */
+MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
+    size_t const mask = align - 1;
+    assert((align & mask) == 0);
+    return (size + mask) & ~mask;
+}
+
+/**
+ * Use this to determine how much space in the workspace we will consume to
+ * allocate this object. (Normally it should be exactly the size of the object,
+ * but under special conditions, like ASAN, where we pad each object, it might
+ * be larger.)
+ *
+ * Since tables aren't currently redzoned, you don't need to call through this
+ * to figure out how much space you need for the matchState tables. Everything
+ * else is though.
+ */
+MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+#else
+    return size;
+#endif
+}
+
+MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
+        ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
+    assert(phase >= ws->phase);
+    if (phase > ws->phase) {
+        if (ws->phase < ZSTD_cwksp_alloc_buffers &&
+                phase >= ZSTD_cwksp_alloc_buffers) {
+            ws->tableValidEnd = ws->objectEnd;
+        }
+        if (ws->phase < ZSTD_cwksp_alloc_aligned &&
+                phase >= ZSTD_cwksp_alloc_aligned) {
+            /* If unaligned allocations down from a too-large top have left us
+             * unaligned, we need to realign our alloc ptr. Technically, this
+             * can consume space that is unaccounted for in the neededSpace
+             * calculation. However, I believe this can only happen when the
+             * workspace is too large, and specifically when it is too large
+             * by a larger margin than the space that will be consumed. */
+            /* TODO: cleaner, compiler warning friendly way to do this??? */
+            ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
+            if (ws->allocStart < ws->tableValidEnd) {
+                ws->tableValidEnd = ws->allocStart;
+            }
+        }
+        ws->phase = phase;
+    }
+}
+
+/**
+ * Returns whether this object/buffer/etc was allocated in this workspace.
+ */
+MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
+    return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
+}
+
+/**
+ * Internal function. Do not use directly.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_internal(
+        ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
+    void* alloc;
+    void* bottom = ws->tableEnd;
+    ZSTD_cwksp_internal_advance_phase(ws, phase);
+    alloc = (BYTE *)ws->allocStart - bytes;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    /* over-reserve space */
+    alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+#endif
+
+    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
+        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    assert(alloc >= bottom);
+    if (alloc < bottom) {
+        DEBUGLOG(4, "cwksp: alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    if (alloc < ws->tableValidEnd) {
+        ws->tableValidEnd = alloc;
+    }
+    ws->allocStart = alloc;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
+     * either size. */
+    alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+    __asan_unpoison_memory_region(alloc, bytes);
+#endif
+
+    return alloc;
+}
+
+/**
+ * Reserves and returns unaligned memory.
+ */
+MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
+    return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
+}
+
+/**
+ * Reserves and returns memory sized on and aligned on sizeof(unsigned).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
+    assert((bytes & (sizeof(U32)-1)) == 0);
+    return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
+}
+
+/**
+ * Aligned on sizeof(unsigned). These buffers have the special property that
+ * their values remain constrained, allowing us to re-use them without
+ * memset()-ing them.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
+    const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
+    void* alloc = ws->tableEnd;
+    void* end = (BYTE *)alloc + bytes;
+    void* top = ws->allocStart;
+
+    DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
+        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+    assert((bytes & (sizeof(U32)-1)) == 0);
+    ZSTD_cwksp_internal_advance_phase(ws, phase);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    assert(end <= top);
+    if (end > top) {
+        DEBUGLOG(4, "cwksp: table alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    ws->tableEnd = end;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    __asan_unpoison_memory_region(alloc, bytes);
+#endif
+
+    return alloc;
+}
+
+/**
+ * Aligned on sizeof(void*).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
+    size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
+    void* alloc = ws->objectEnd;
+    void* end = (BYTE*)alloc + roundedBytes;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    /* over-reserve space */
+    end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+#endif
+
+    DEBUGLOG(5,
+        "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
+        alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
+    assert(((size_t)alloc & (sizeof(void*)-1)) == 0);
+    assert((bytes & (sizeof(void*)-1)) == 0);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    /* we must be in the first phase, no advance is possible */
+    if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
+        DEBUGLOG(4, "cwksp: object alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    ws->objectEnd = end;
+    ws->tableEnd = end;
+    ws->tableValidEnd = end;
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on
+     * either size. */
+    alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE;
+    __asan_unpoison_memory_region(alloc, bytes);
+#endif
+
+    return alloc;
+}
+
+MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
+
+#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
+    /* To validate that the table re-use logic is sound, and that we don't
+     * access table space that we haven't cleaned, we re-"poison" the table
+     * space every time we mark it dirty. */
+    {
+        size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
+        assert(__msan_test_shadow(ws->objectEnd, size) == -1);
+        __msan_poison(ws->objectEnd, size);
+    }
+#endif
+
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    ws->tableValidEnd = ws->objectEnd;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean");
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    if (ws->tableValidEnd < ws->tableEnd) {
+        ws->tableValidEnd = ws->tableEnd;
+    }
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/**
+ * Zero the part of the allocated tables not already marked clean.
+ */
+MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables");
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    if (ws->tableValidEnd < ws->tableEnd) {
+        memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
+    }
+    ZSTD_cwksp_mark_tables_clean(ws);
+}
+
+/**
+ * Invalidates table allocations.
+ * All other allocations remain valid.
+ */
+MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: clearing tables!");
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    {
+        size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd;
+        __asan_poison_memory_region(ws->objectEnd, size);
+    }
+#endif
+
+    ws->tableEnd = ws->objectEnd;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/**
+ * Invalidates all buffer, aligned, and table allocations.
+ * Object allocations remain valid.
+ */
+MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: clearing!");
+
+#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
+    /* To validate that the context re-use logic is sound, and that we don't
+     * access stuff that this compression hasn't initialized, we re-"poison"
+     * the workspace (or at least the non-static, non-table parts of it)
+     * every time we start a new compression. */
+    {
+        size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd;
+        __msan_poison(ws->tableValidEnd, size);
+    }
+#endif
+
+#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE)
+    {
+        size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd;
+        __asan_poison_memory_region(ws->objectEnd, size);
+    }
+#endif
+
+    ws->tableEnd = ws->objectEnd;
+    ws->allocStart = ws->workspaceEnd;
+    ws->allocFailed = 0;
+    if (ws->phase > ZSTD_cwksp_alloc_buffers) {
+        ws->phase = ZSTD_cwksp_alloc_buffers;
+    }
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/**
+ * The provided workspace takes ownership of the buffer [start, start+size).
+ * Any existing values in the workspace are ignored (the previously managed
+ * buffer, if present, must be separately freed).
+ */
+MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
+    DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
+    assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
+    ws->workspace = start;
+    ws->workspaceEnd = (BYTE*)start + size;
+    ws->objectEnd = ws->workspace;
+    ws->tableValidEnd = ws->objectEnd;
+    ws->phase = ZSTD_cwksp_alloc_objects;
+    ZSTD_cwksp_clear(ws);
+    ws->workspaceOversizedDuration = 0;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
+    void* workspace = ZSTD_malloc(size, customMem);
+    DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
+    RETURN_ERROR_IF(workspace == NULL, memory_allocation);
+    ZSTD_cwksp_init(ws, workspace, size);
+    return 0;
+}
+
+MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
+    void *ptr = ws->workspace;
+    DEBUGLOG(4, "cwksp: freeing workspace");
+    memset(ws, 0, sizeof(ZSTD_cwksp));
+    ZSTD_free(ptr, customMem);
+}
+
+/**
+ * Moves the management of a workspace from one cwksp to another. The src cwksp
+ * is left in an invalid state (src must be re-init()'ed before its used again).
+ */
+MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
+    *dst = *src;
+    memset(src, 0, sizeof(ZSTD_cwksp));
+}
+
+MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
+}
+
+MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
+    return ws->allocFailed;
+}
+
+/*-*************************************
+*  Functions Checking Free Space
+***************************************/
+
+MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
+}
+
+MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace;
+}
+
+MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_check_available(
+        ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR);
+}
+
+MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)
+        && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION;
+}
+
+MEM_STATIC void ZSTD_cwksp_bump_oversized_duration(
+        ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) {
+        ws->workspaceOversizedDuration++;
+    } else {
+        ws->workspaceOversizedDuration = 0;
+    }
+}
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* ZSTD_CWKSP_H */
--- a/src/borg/algorithms/zstd/lib/compress/zstd_double_fast.c
+++ b/src/borg/algorithms/zstd/lib/compress/zstd_double_fast.c
@ -43,8 +43,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
            /* Only load extra positions for ZSTD_dtlm_full */
            if (dtlm == ZSTD_dtlm_fast)
                break;
-        }
-    }
+    }   }
 }


@ -63,7 +62,11 @@ size_t ZSTD_compressBlock_doubleFast_generic(
    const BYTE* const istart = (const BYTE*)src;
    const BYTE* ip = istart;
    const BYTE* anchor = istart;
-    const U32 prefixLowestIndex = ms->window.dictLimit;
+    const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32 lowestValid = ms->window.dictLimit;
+    const U32 maxDistance = 1U << cParams->windowLog;
+    /* presumes that, if there is a dictionary, it must be using Attach mode */
+    const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
    const BYTE* const prefixLowest = base + prefixLowestIndex;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - HASH_READ_SIZE;
@ -95,8 +98,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
                                     dictCParams->chainLog : hBitsS;
    const U32 dictAndPrefixLength  = (U32)(ip - prefixLowest + dictEnd - dictStart);

+    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
+
    assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);

+    /* if a dictionary is attached, it must be within window range */
+    if (dictMode == ZSTD_dictMatchState) {
+        assert(lowestValid + maxDistance >= endIndex);
+    }
+
    /* init */
    ip += (dictAndPrefixLength == 0);
    if (dictMode == ZSTD_noDict) {
@ -138,7 +148,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
            const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
            goto _match_stored;
        }

@ -147,7 +157,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
          && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
            mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
            goto _match_stored;
        }

@ -170,8 +180,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
                offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
                while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
                goto _match_found;
-            }
-        }
+        }   }

        if (matchIndexS > prefixLowestIndex) {
            /* check prefix short match */
@ -186,16 +195,14 @@ size_t ZSTD_compressBlock_doubleFast_generic(

            if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
                goto _search_next_long;
-            }
-        }
+        }   }

        ip += ((ip-anchor) >> kSearchStrength) + 1;
        continue;

 _search_next_long:

-        {
-            size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+        {   size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
            size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
            U32 const matchIndexL3 = hashLong[hl3];
            const BYTE* matchL3 = base + matchIndexL3;
@ -221,9 +228,7 @@ _search_next_long:
                    offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
                    while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
                    goto _match_found;
-                }
-            }
-        }
+        }   }   }

        /* if no long +1 match, explore the short match we found */
        if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
@ -242,7 +247,7 @@ _match_found:
        offset_2 = offset_1;
        offset_1 = offset;

-        ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);

 _match_stored:
        /* match found */
@ -250,11 +255,14 @@ _match_stored:
        anchor = ip;

        if (ip <= ilimit) {
-            /* Fill Table */
-            hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
-                hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;  /* here because current+2 could be > iend-8 */
-            hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
-                hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
+            /* Complementary insertion */
+            /* done after iLimit test, as candidates could be > iend-8 */
+            {   U32 const indexToInsert = current+2;
+                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+            }

            /* check immediate repcode */
            if (dictMode == ZSTD_dictMatchState) {
@ -270,7 +278,7 @@ _match_stored:
                        const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
                        size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
                        U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                        ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+                        ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
                        hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
                        hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
                        ip += repLength2;
@ -278,8 +286,7 @@ _match_stored:
                        continue;
                    }
                    break;
-                }
-            }
+            }   }

            if (dictMode == ZSTD_noDict) {
                while ( (ip <= ilimit)
@ -290,18 +297,19 @@ _match_stored:
                    U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
                    ip += rLength;
                    anchor = ip;
                    continue;   /* faster when present ... (?) */
-    }   }   }   }
+        }   }   }
+    }   /* while (ip < ilimit) */

    /* save reps for next block */
    rep[0] = offset_1 ? offset_1 : offsetSaved;
    rep[1] = offset_2 ? offset_2 : offsetSaved;

    /* Return the last literals size */
-    return iend - anchor;
+    return (size_t)(iend - anchor);
 }


@ -360,10 +368,13 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
    const BYTE* anchor = istart;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - 8;
-    const U32   prefixStartIndex = ms->window.dictLimit;
    const BYTE* const base = ms->window.base;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
+    const U32   dictStartIndex = lowLimit;
+    const U32   dictLimit = ms->window.dictLimit;
+    const U32   prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
    const BYTE* const prefixStart = base + prefixStartIndex;
-    const U32   dictStartIndex = ms->window.lowLimit;
    const BYTE* const dictBase = ms->window.dictBase;
    const BYTE* const dictStart = dictBase + dictStartIndex;
    const BYTE* const dictEnd = dictBase + prefixStartIndex;
@ -371,6 +382,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(

    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);

+    /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
+    if (prefixStartIndex == dictStartIndex)
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
+
    /* Search Loop */
    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
        const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
@ -396,7 +411,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
            const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
        } else {
            if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
                const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@ -407,7 +422,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
                offset_2 = offset_1;
                offset_1 = offset;
-                ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);

            } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
                size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@ -432,23 +447,27 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                }
                offset_2 = offset_1;
                offset_1 = offset;
-                ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);

            } else {
                ip += ((ip-anchor) >> kSearchStrength) + 1;
                continue;
        }   }

-        /* found a match : store it */
+        /* move to next sequence start */
        ip += mLength;
        anchor = ip;

        if (ip <= ilimit) {
-            /* Fill Table */
-            hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
-            hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
-            hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
-            hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+            /* Complementary insertion */
+            /* done after iLimit test, as candidates could be > iend-8 */
+            {   U32 const indexToInsert = current+2;
+                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+            }
+
            /* check immediate repcode */
            while (ip <= ilimit) {
                U32 const current2 = (U32)(ip-base);
@ -460,7 +479,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
                    U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
                    ip += repLength2;
@ -475,7 +494,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
    rep[1] = offset_2;

    /* Return the last literals size */
-    return iend - anchor;
+    return (size_t)(iend - anchor);
 }


--- a/src/borg/algorithms/zstd/lib/compress/zstd_fast.c
+++ b/src/borg/algorithms/zstd/lib/compress/zstd_fast.c
@ -8,12 +8,13 @@
 * You may select, at your option, one of the above-listed licenses.
 */

-#include "zstd_compress_internal.h"
+#include "zstd_compress_internal.h"  /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
 #include "zstd_fast.h"


 void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
-                        void const* end, ZSTD_dictTableLoadMethod_e dtlm)
+                        const void* const end,
+                        ZSTD_dictTableLoadMethod_e dtlm)
 {
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
    U32* const hashTable = ms->hashTable;
@ -41,11 +42,160 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
    }   }   }   }
 }

-FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_fast_generic(
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_fast_generic(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize,
-        U32 const mls, ZSTD_dictMode_e const dictMode)
+        U32 const mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32 const hlog = cParams->hashLog;
+    /* support stepSize of 0 */
+    size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const istart = (const BYTE*)src;
+    /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
+    const BYTE* ip0 = istart;
+    const BYTE* ip1;
+    const BYTE* anchor = istart;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   maxDistance = 1U << cParams->windowLog;
+    const U32   validStartIndex = ms->window.dictLimit;
+    const U32   prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - HASH_READ_SIZE;
+    U32 offset_1=rep[0], offset_2=rep[1];
+    U32 offsetSaved = 0;
+
+    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
+    ip0 += (ip0 == prefixStart);
+    ip1 = ip0 + 1;
+    {   U32 const maxRep = (U32)(ip0 - prefixStart);
+        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
+    }
+
+    /* Main Search Loop */
+    while (ip1 < ilimit) {   /* < instead of <=, because check at ip0+2 */
+        size_t mLength;
+        BYTE const* ip2 = ip0 + 2;
+        size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls);
+        U32 const val0 = MEM_read32(ip0);
+        size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls);
+        U32 const val1 = MEM_read32(ip1);
+        U32 const current0 = (U32)(ip0-base);
+        U32 const current1 = (U32)(ip1-base);
+        U32 const matchIndex0 = hashTable[h0];
+        U32 const matchIndex1 = hashTable[h1];
+        BYTE const* repMatch = ip2-offset_1;
+        const BYTE* match0 = base + matchIndex0;
+        const BYTE* match1 = base + matchIndex1;
+        U32 offcode;
+        hashTable[h0] = current0;   /* update hash table */
+        hashTable[h1] = current1;   /* update hash table */
+
+        assert(ip0 + 1 == ip1);
+
+        if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
+            mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
+            ip0 = ip2 - mLength;
+            match0 = repMatch - mLength;
+            offcode = 0;
+            goto _match;
+        }
+        if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) {
+            /* found a regular match */
+            goto _offset;
+        }
+        if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) {
+            /* found a regular match after one literal */
+            ip0 = ip1;
+            match0 = match1;
+            goto _offset;
+        }
+        {   size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
+            assert(step >= 2);
+            ip0 += step;
+            ip1 += step;
+            continue;
+        }
+_offset: /* Requires: ip0, match0 */
+        /* Compute the offset code */
+        offset_2 = offset_1;
+        offset_1 = (U32)(ip0-match0);
+        offcode = offset_1 + ZSTD_REP_MOVE;
+        mLength = 0;
+        /* Count the backwards match length */
+        while (((ip0>anchor) & (match0>prefixStart))
+             && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
+
+_match: /* Requires: ip0, match0, offcode */
+        /* Count the forward length */
+        mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
+        ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
+        /* match found */
+        ip0 += mLength;
+        anchor = ip0;
+        ip1 = ip0 + 1;
+
+        if (ip0 <= ilimit) {
+            /* Fill Table */
+            assert(base+current0+2 > istart);  /* check base overflow */
+            hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;  /* here because current+2 could be > iend-8 */
+            hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
+
+            while ( ((ip0 <= ilimit) & (offset_2>0))  /* offset_2==0 means offset_2 is invalidated */
+                 && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
+                /* store sequence */
+                size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
+                { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
+                hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
+                ip0 += rLength;
+                ip1 = ip0 + 1;
+                ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
+                anchor = ip0;
+                continue;   /* faster when present (confirmed on gcc-8) ... (?) */
+            }
+        }
+    }
+
+    /* save reps for next block */
+    rep[0] = offset_1 ? offset_1 : offsetSaved;
+    rep[1] = offset_2 ? offset_2 : offsetSaved;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_fast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    assert(ms->dictMatchState == NULL);
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
+
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_compressBlock_fast_dictMatchState_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize, U32 const mls)
 {
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
    U32* const hashTable = ms->hashTable;
@ -64,46 +214,34 @@ size_t ZSTD_compressBlock_fast_generic(
    U32 offsetSaved = 0;

    const ZSTD_matchState_t* const dms = ms->dictMatchState;
-    const ZSTD_compressionParameters* const dictCParams =
-                                     dictMode == ZSTD_dictMatchState ?
-                                     &dms->cParams : NULL;
-    const U32* const dictHashTable = dictMode == ZSTD_dictMatchState ?
-                                     dms->hashTable : NULL;
-    const U32 dictStartIndex       = dictMode == ZSTD_dictMatchState ?
-                                     dms->window.dictLimit : 0;
-    const BYTE* const dictBase     = dictMode == ZSTD_dictMatchState ?
-                                     dms->window.base : NULL;
-    const BYTE* const dictStart    = dictMode == ZSTD_dictMatchState ?
-                                     dictBase + dictStartIndex : NULL;
-    const BYTE* const dictEnd      = dictMode == ZSTD_dictMatchState ?
-                                     dms->window.nextSrc : NULL;
-    const U32 dictIndexDelta       = dictMode == ZSTD_dictMatchState ?
-                                     prefixStartIndex - (U32)(dictEnd - dictBase) :
-                                     0;
+    const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
+    const U32* const dictHashTable = dms->hashTable;
+    const U32 dictStartIndex       = dms->window.dictLimit;
+    const BYTE* const dictBase     = dms->window.base;
+    const BYTE* const dictStart    = dictBase + dictStartIndex;
+    const BYTE* const dictEnd      = dms->window.nextSrc;
+    const U32 dictIndexDelta       = prefixStartIndex - (U32)(dictEnd - dictBase);
    const U32 dictAndPrefixLength  = (U32)(ip - prefixStart + dictEnd - dictStart);
-    const U32 dictHLog             = dictMode == ZSTD_dictMatchState ?
-                                     dictCParams->hashLog : hlog;
+    const U32 dictHLog             = dictCParams->hashLog;

-    assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
+    /* if a dictionary is still attached, it necessarily means that
+     * it is within window size. So we just check it. */
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
+    assert(endIndex - prefixStartIndex <= maxDistance);
+    (void)maxDistance; (void)endIndex;   /* these variables are not used when assert() is disabled */

-    /* otherwise, we would get index underflow when translating a dict index
-     * into a local index */
-    assert(dictMode != ZSTD_dictMatchState
-        || prefixStartIndex >= (U32)(dictEnd - dictBase));
+    /* ensure there will be no no underflow
+     * when translating a dict index into a local index */
+    assert(prefixStartIndex >= (U32)(dictEnd - dictBase));

    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
    ip += (dictAndPrefixLength == 0);
-    if (dictMode == ZSTD_noDict) {
-        U32 const maxRep = (U32)(ip - prefixStart);
-        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
-        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
-    }
-    if (dictMode == ZSTD_dictMatchState) {
-        /* dictMatchState repCode checks don't currently handle repCode == 0
-         * disabling. */
-        assert(offset_1 <= dictAndPrefixLength);
-        assert(offset_2 <= dictAndPrefixLength);
-    }
+    /* dictMatchState repCode checks don't currently handle repCode == 0
+     * disabling. */
+    assert(offset_1 <= dictAndPrefixLength);
+    assert(offset_2 <= dictAndPrefixLength);

    /* Main Search Loop */
    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
@ -113,50 +251,37 @@ size_t ZSTD_compressBlock_fast_generic(
        U32 const matchIndex = hashTable[h];
        const BYTE* match = base + matchIndex;
        const U32 repIndex = current + 1 - offset_1;
-        const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
-                            && repIndex < prefixStartIndex) ?
+        const BYTE* repMatch = (repIndex < prefixStartIndex) ?
                               dictBase + (repIndex - dictIndexDelta) :
                               base + repIndex;
        hashTable[h] = current;   /* update hash table */

-        if ( (dictMode == ZSTD_dictMatchState)
-          && ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
+        if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
-        } else if ( dictMode == ZSTD_noDict
-                 && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
-            mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
-            ip++;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
        } else if ( (matchIndex <= prefixStartIndex) ) {
-            if (dictMode == ZSTD_dictMatchState) {
-                size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
-                U32 const dictMatchIndex = dictHashTable[dictHash];
-                const BYTE* dictMatch = dictBase + dictMatchIndex;
-                if (dictMatchIndex <= dictStartIndex ||
-                    MEM_read32(dictMatch) != MEM_read32(ip)) {
-                    assert(stepSize >= 1);
-                    ip += ((ip-anchor) >> kSearchStrength) + stepSize;
-                    continue;
-                } else {
-                    /* found a dict match */
-                    U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
-                    mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
-                    while (((ip>anchor) & (dictMatch>dictStart))
-                         && (ip[-1] == dictMatch[-1])) {
-                        ip--; dictMatch--; mLength++;
-                    } /* catch up */
-                    offset_2 = offset_1;
-                    offset_1 = offset;
-                    ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
-                }
-            } else {
+            size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
+            U32 const dictMatchIndex = dictHashTable[dictHash];
+            const BYTE* dictMatch = dictBase + dictMatchIndex;
+            if (dictMatchIndex <= dictStartIndex ||
+                MEM_read32(dictMatch) != MEM_read32(ip)) {
                assert(stepSize >= 1);
                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
                continue;
+            } else {
+                /* found a dict match */
+                U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
+                mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
+                while (((ip>anchor) & (dictMatch>dictStart))
+                     && (ip[-1] == dictMatch[-1])) {
+                    ip--; dictMatch--; mLength++;
+                } /* catch up */
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
            }
        } else if (MEM_read32(match) != MEM_read32(ip)) {
            /* it's not a match, and we're not going to check the dictionary */
@ -171,7 +296,7 @@ size_t ZSTD_compressBlock_fast_generic(
                 && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
            offset_2 = offset_1;
            offset_1 = offset;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
        }

        /* match found */
@ -185,90 +310,53 @@ size_t ZSTD_compressBlock_fast_generic(
            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);

            /* check immediate repcode */
-            if (dictMode == ZSTD_dictMatchState) {
-                while (ip <= ilimit) {
-                    U32 const current2 = (U32)(ip-base);
-                    U32 const repIndex2 = current2 - offset_2;
-                    const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
-                            dictBase - dictIndexDelta + repIndex2 :
-                            base + repIndex2;
-                    if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
-                       && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
-                        const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
-                        size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
-                        U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                        ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
-                        hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
-                        ip += repLength2;
-                        anchor = ip;
-                        continue;
-                    }
-                    break;
-                }
-            }
-
-            if (dictMode == ZSTD_noDict) {
-                while ( (ip <= ilimit)
-                     && ( (offset_2>0)
-                        & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
-                    /* store sequence */
-                    size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
-                    U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
-                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base);
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
-                    ip += rLength;
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
+                        dictBase - dictIndexDelta + repIndex2 :
+                        base + repIndex2;
+                if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
+                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
+                    ip += repLength2;
                    anchor = ip;
-                    continue;   /* faster when present ... (?) */
-    }   }   }   }
+                    continue;
+                }
+                break;
+            }
+        }
+    }

    /* save reps for next block */
    rep[0] = offset_1 ? offset_1 : offsetSaved;
    rep[1] = offset_2 ? offset_2 : offsetSaved;

    /* Return the last literals size */
-    return iend - anchor;
-}
-
-
-size_t ZSTD_compressBlock_fast(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize)
-{
-    ZSTD_compressionParameters const* cParams = &ms->cParams;
-    U32 const mls = cParams->minMatch;
-    assert(ms->dictMatchState == NULL);
-    switch(mls)
-    {
-    default: /* includes case 3 */
-    case 4 :
-        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
-    case 5 :
-        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
-    case 6 :
-        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
-    case 7 :
-        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
-    }
+    return (size_t)(iend - anchor);
 }

 size_t ZSTD_compressBlock_fast_dictMatchState(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
-    ZSTD_compressionParameters const* cParams = &ms->cParams;
-    U32 const mls = cParams->minMatch;
+    U32 const mls = ms->cParams.minMatch;
    assert(ms->dictMatchState != NULL);
    switch(mls)
    {
    default: /* includes case 3 */
    case 4 :
-        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
    case 5 :
-        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
    case 6 :
-        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
    case 7 :
-        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
    }
 }

@ -287,15 +375,24 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
    const BYTE* const istart = (const BYTE*)src;
    const BYTE* ip = istart;
    const BYTE* anchor = istart;
-    const U32   dictStartIndex = ms->window.lowLimit;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
+    const U32   dictStartIndex = lowLimit;
    const BYTE* const dictStart = dictBase + dictStartIndex;
-    const U32   prefixStartIndex = ms->window.dictLimit;
+    const U32   dictLimit = ms->window.dictLimit;
+    const U32   prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
    const BYTE* const prefixStart = base + prefixStartIndex;
    const BYTE* const dictEnd = dictBase + prefixStartIndex;
    const BYTE* const iend = istart + srcSize;
    const BYTE* const ilimit = iend - 8;
    U32 offset_1=rep[0], offset_2=rep[1];

+    DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic");
+
+    /* switch to "regular" variant if extDict is invalidated due to maxDistance */
+    if (prefixStartIndex == dictStartIndex)
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
+
    /* Search Loop */
    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
        const size_t h = ZSTD_hashPtr(ip, hlog, mls);
@ -306,16 +403,17 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
        const U32    repIndex = current + 1 - offset_1;
        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
        const BYTE* const repMatch = repBase + repIndex;
-        size_t mLength;
        hashTable[h] = current;   /* update hash table */
        assert(offset_1 <= current +1);   /* check repIndex */

        if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
-            const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
-            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
+            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+            size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
            ip++;
-            ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
+            ip += rLength;
+            anchor = ip;
        } else {
            if ( (matchIndex < dictStartIndex) ||
                 (MEM_read32(match) != MEM_read32(ip)) ) {
@ -323,21 +421,17 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
                continue;
            }
-            {   const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
-                const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
-                U32 offset;
-                mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
+            {   const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
+                const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
+                U32 const offset = current - matchIndex;
+                size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
                while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
-                offset = current - matchIndex;
-                offset_2 = offset_1;
-                offset_1 = offset;
-                ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                offset_2 = offset_1; offset_1 = offset;  /* update offset history */
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ip += mLength;
+                anchor = ip;
        }   }

-        /* found a match : store it */
-        ip += mLength;
-        anchor = ip;
-
        if (ip <= ilimit) {
            /* Fill Table */
            hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
@ -346,13 +440,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
            while (ip <= ilimit) {
                U32 const current2 = (U32)(ip-base);
                U32 const repIndex2 = current2 - offset_2;
-                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
+                const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex))  /* intentional overflow */
                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
-                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+                    { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
                    ip += repLength2;
                    anchor = ip;
@ -366,7 +460,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
    rep[1] = offset_2;

    /* Return the last literals size */
-    return iend - anchor;
+    return (size_t)(iend - anchor);
 }


@ -374,8 +468,7 @@ size_t ZSTD_compressBlock_fast_extDict(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
-    ZSTD_compressionParameters const* cParams = &ms->cParams;
-    U32 const mls = cParams->minMatch;
+    U32 const mls = ms->cParams.minMatch;
    switch(mls)
    {
    default: /* includes case 3 */
--- a/src/borg/algorithms/zstd/lib/compress/zstd_lazy.c
+++ b/src/borg/algorithms/zstd/lib/compress/zstd_lazy.c
@ -83,7 +83,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
    U32* largerPtr  = smallerPtr + 1;
    U32 matchIndex = *smallerPtr;   /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
    U32 dummy32;   /* to be nullified at the end */
-    U32 const windowLow = ms->window.lowLimit;
+    U32 const windowValid = ms->window.lowLimit;
+    U32 const maxDistance = 1U << cParams->windowLog;
+    U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid;
+

    DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
                current, dictLimit, windowLow);
@ -239,7 +242,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,

    const BYTE* const base = ms->window.base;
    U32    const current = (U32)(ip-base);
-    U32    const windowLow = ms->window.lowLimit;
+    U32    const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);

    U32*   const bt = ms->chainTable;
    U32    const btLog  = cParams->chainLog - 1;
@ -490,8 +493,12 @@ size_t ZSTD_HcFindBestMatch_generic (
    const U32 dictLimit = ms->window.dictLimit;
    const BYTE* const prefixStart = base + dictLimit;
    const BYTE* const dictEnd = dictBase + dictLimit;
-    const U32 lowLimit = ms->window.lowLimit;
    const U32 current = (U32)(ip-base);
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 lowestValid = ms->window.lowLimit;
+    const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid;
+    const U32 isDictionary = (ms->loadedDictEnd != 0);
+    const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
    const U32 minChain = current > chainSize ? current - chainSize : 0;
    U32 nbAttempts = 1U << cParams->searchLog;
    size_t ml=4-1;
@ -612,12 +619,14 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
 /* *******************************
 *  Common parser - lazy strategy
 *********************************/
-FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_lazy_generic(
+typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_lazy_generic(
                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
                        U32 rep[ZSTD_REP_NUM],
                        const void* src, size_t srcSize,
-                        const U32 searchMethod, const U32 depth,
+                        const searchMethod_e searchMethod, const U32 depth,
                        ZSTD_dictMode_e const dictMode)
 {
    const BYTE* const istart = (const BYTE*)src;
@ -633,8 +642,10 @@ size_t ZSTD_compressBlock_lazy_generic(
                        ZSTD_matchState_t* ms,
                        const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
    searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
-        (searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
-        (searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS);
+        (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
+                                         : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
+        (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS
+                                         : ZSTD_HcFindBestMatch_selectMLS);
    U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;

    const ZSTD_matchState_t* const dms = ms->dictMatchState;
@ -653,7 +664,6 @@ size_t ZSTD_compressBlock_lazy_generic(

    /* init */
    ip += (dictAndPrefixLength == 0);
-    ms->nextToUpdate3 = ms->nextToUpdate;
    if (dictMode == ZSTD_noDict) {
        U32 const maxRep = (U32)(ip - prefixLowest);
        if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
@ -800,7 +810,7 @@ size_t ZSTD_compressBlock_lazy_generic(
        /* store sequence */
 _storeSequence:
        {   size_t const litLength = start - anchor;
-            ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
            anchor = ip = start + matchLength;
        }

@ -818,7 +828,7 @@ _storeSequence:
                    const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
                    matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
                    offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
                    ip += matchLength;
                    anchor = ip;
                    continue;
@ -833,7 +843,7 @@ _storeSequence:
                /* store sequence */
                matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
-                ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
                ip += matchLength;
                anchor = ip;
                continue;   /* faster when present ... (?) */
@ -844,7 +854,7 @@ _storeSequence:
    rep[1] = offset_2 ? offset_2 : savedOffset;

    /* Return the last literals size */
-    return iend - anchor;
+    return (size_t)(iend - anchor);
 }


@ -852,56 +862,56 @@ size_t ZSTD_compressBlock_btlazy2(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_noDict);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
 }

 size_t ZSTD_compressBlock_lazy2(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_noDict);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
 }

 size_t ZSTD_compressBlock_lazy(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_noDict);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
 }

 size_t ZSTD_compressBlock_greedy(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_noDict);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
 }

 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_dictMatchState);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
 }

 size_t ZSTD_compressBlock_lazy2_dictMatchState(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_dictMatchState);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
 }

 size_t ZSTD_compressBlock_lazy_dictMatchState(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_dictMatchState);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
 }

 size_t ZSTD_compressBlock_greedy_dictMatchState(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_dictMatchState);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
 }


@ -910,7 +920,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
                        U32 rep[ZSTD_REP_NUM],
                        const void* src, size_t srcSize,
-                        const U32 searchMethod, const U32 depth)
+                        const searchMethod_e searchMethod, const U32 depth)
 {
    const BYTE* const istart = (const BYTE*)src;
    const BYTE* ip = istart;
@ -928,12 +938,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
    typedef size_t (*searchMax_f)(
                        ZSTD_matchState_t* ms,
                        const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
-    searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
+    searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;

    U32 offset_1 = rep[0], offset_2 = rep[1];

    /* init */
-    ms->nextToUpdate3 = ms->nextToUpdate;
    ip += (ip == prefixStart);

    /* Match Loop */
@ -1042,7 +1051,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
        /* store sequence */
 _storeSequence:
        {   size_t const litLength = start - anchor;
-            ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
            anchor = ip = start + matchLength;
        }

@ -1057,7 +1066,7 @@ _storeSequence:
                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
-                ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
                ip += matchLength;
                anchor = ip;
                continue;   /* faster when present ... (?) */
@ -1070,7 +1079,7 @@ _storeSequence:
    rep[1] = offset_2;

    /* Return the last literals size */
-    return iend - anchor;
+    return (size_t)(iend - anchor);
 }


@ -1078,7 +1087,7 @@ size_t ZSTD_compressBlock_greedy_extDict(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
        void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 0);
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
 }

 size_t ZSTD_compressBlock_lazy_extDict(
@ -1086,7 +1095,7 @@ size_t ZSTD_compressBlock_lazy_extDict(
        void const* src, size_t srcSize)

 {
-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 1);
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
 }

 size_t ZSTD_compressBlock_lazy2_extDict(
@ -1094,7 +1103,7 @@ size_t ZSTD_compressBlock_lazy2_extDict(
        void const* src, size_t srcSize)

 {
-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 2);
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
 }

 size_t ZSTD_compressBlock_btlazy2_extDict(
@ -1102,5 +1111,5 @@ size_t ZSTD_compressBlock_btlazy2_extDict(
        void const* src, size_t srcSize)

 {
-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 1, 2);
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
 }
--- a/src/borg/algorithms/zstd/lib/compress/zstd_lazy.h
+++ b/src/borg/algorithms/zstd/lib/compress/zstd_lazy.h
@ -19,7 +19,7 @@ extern "C" {

 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);

-void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue);  /*! used in ZSTD_reduceIndex(). pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK */
+void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue);  /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */

 size_t ZSTD_compressBlock_btlazy2(
        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
--- a/src/borg/algorithms/zstd/lib/compress/zstd_ldm.c
+++ b/src/borg/algorithms/zstd/lib/compress/zstd_ldm.c
@ -49,9 +49,9 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
 {
    size_t const ldmHSize = ((size_t)1) << params.hashLog;
    size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
-    size_t const ldmBucketSize =
-        ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
-    size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
+    size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
+    size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
+                           + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
    return params.enableLdm ? totalSize : 0;
 }

@ -429,7 +429,7 @@ size_t ZSTD_ldm_generateSequences(
     */
    assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
    /* The input could be very large (in zstdmt), so it must be broken up into
-     * chunks to enforce the maximmum distance and handle overflow correction.
+     * chunks to enforce the maximum distance and handle overflow correction.
     */
    assert(sequences->pos <= sequences->size);
    assert(sequences->size <= sequences->capacity);
@ -447,7 +447,7 @@ size_t ZSTD_ldm_generateSequences(
        if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
            U32 const ldmHSize = 1U << params->hashLog;
            U32 const correction = ZSTD_window_correctOverflow(
-                &ldmState->window, /* cycleLog */ 0, maxDist, src);
+                &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
            ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
        }
        /* 2. We enforce the maximum offset allowed.
@ -583,7 +583,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
                rep[i] = rep[i-1];
            rep[0] = sequence.offset;
            /* Store the sequence */
-            ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
+            ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
                          sequence.offset + ZSTD_REP_MOVE,
                          sequence.matchLength - MINMATCH);
            ip += sequence.matchLength;
--- a/src/borg/algorithms/zstd/lib/compress/zstd_opt.c
+++ b/src/borg/algorithms/zstd/lib/compress/zstd_opt.c
@ -64,9 +64,15 @@ MEM_STATIC double ZSTD_fCost(U32 price)
 }
 #endif

+static int ZSTD_compressedLiterals(optState_t const* const optPtr)
+{
+    return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
+}
+
 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
 {
-    optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
+    if (ZSTD_compressedLiterals(optPtr))
+        optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
    optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
    optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
    optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
@ -99,6 +105,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
            const BYTE* const src, size_t const srcSize,
                  int const optLevel)
 {
+    int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
    DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
    optPtr->priceType = zop_dynamic;

@ -113,9 +120,10 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
            /* huffman table presumed generated by dictionary */
            optPtr->priceType = zop_dynamic;

-            assert(optPtr->litFreq != NULL);
-            optPtr->litSum = 0;
-            {   unsigned lit;
+            if (compressedLiterals) {
+                unsigned lit;
+                assert(optPtr->litFreq != NULL);
+                optPtr->litSum = 0;
                for (lit=0; lit<=MaxLit; lit++) {
                    U32 const scaleLog = 11;   /* scale to 2K */
                    U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
@ -163,10 +171,11 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
        } else {  /* not a dictionary */

            assert(optPtr->litFreq != NULL);
-            {   unsigned lit = MaxLit;
+            if (compressedLiterals) {
+                unsigned lit = MaxLit;
                HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
+                optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
            }
-            optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);

            {   unsigned ll;
                for (ll=0; ll<=MaxLL; ll++)
@ -190,7 +199,8 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,

    } else {   /* new block : re-use previous statistics, scaled down */

-        optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
+        if (compressedLiterals)
+            optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
        optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
        optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
        optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
@ -207,6 +217,10 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
                                int optLevel)
 {
    if (litLength == 0) return 0;
+
+    if (!ZSTD_compressedLiterals(optPtr))
+        return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
+
    if (optPtr->priceType == zop_predef)
        return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */

@ -241,13 +255,13 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
 * to provide a cost which is directly comparable to a match ending at same position */
 static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
 {
-    if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel);
+    if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel);

    /* dynamic statistics */
    {   U32 const llCode = ZSTD_LLcode(litLength);
-        int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER)
-                               + WEIGHT(optPtr->litLengthFreq[0], optLevel)   /* note: log2litLengthSum cancel out */
-                               - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
+        int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER)
+                               + (int)WEIGHT(optPtr->litLengthFreq[0], optLevel)   /* note: log2litLengthSum cancel out */
+                               - (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
 #if 1
        return contribution;
 #else
@ -264,7 +278,7 @@ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLe
                                     const optState_t* const optPtr,
                                     int optLevel)
 {
-    int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
+    int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
                           + ZSTD_litLengthContribution(litLength, optPtr, optLevel);
    return contribution;
 }
@ -310,7 +324,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
                             U32 offsetCode, U32 matchLength)
 {
    /* literals */
-    {   U32 u;
+    if (ZSTD_compressedLiterals(optPtr)) {
+        U32 u;
        for (u=0; u < litLength; u++)
            optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
        optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
@ -357,13 +372,15 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)

 /* Update hashTable3 up to ip (excluded)
   Assumption : always within prefix (i.e. not within extDict) */
-static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip)
+static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
+                                              U32* nextToUpdate3,
+                                              const BYTE* const ip)
 {
    U32* const hashTable3 = ms->hashTable3;
    U32 const hashLog3 = ms->hashLog3;
    const BYTE* const base = ms->window.base;
-    U32 idx = ms->nextToUpdate3;
-    U32 const target = ms->nextToUpdate3 = (U32)(ip - base);
+    U32 idx = *nextToUpdate3;
+    U32 const target = (U32)(ip - base);
    size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
    assert(hashLog3 > 0);

@ -372,6 +389,7 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE*
        idx++;
    }

+    *nextToUpdate3 = target;
    return hashTable3[hash3];
 }

@ -488,9 +506,11 @@ static U32 ZSTD_insertBt1(
    }   }

    *smallerPtr = *largerPtr = 0;
-    if (bestLength > 384) return MIN(192, (U32)(bestLength - 384));   /* speed optimization */
-    assert(matchEndIdx > current + 8);
-    return matchEndIdx - (current + 8);
+    {   U32 positions = 0;
+        if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
+        assert(matchEndIdx > current + 8);
+        return MAX(positions, matchEndIdx - (current + 8));
+    }
 }

 FORCE_INLINE_TEMPLATE
@ -505,8 +525,13 @@ void ZSTD_updateTree_internal(
    DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
                idx, target, dictMode);

-    while(idx < target)
-        idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
+    while(idx < target) {
+        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
+        assert(idx < (U32)(idx + forward));
+        idx += forward;
+    }
+    assert((size_t)(ip - base) <= (size_t)(U32)(-1));
+    assert((size_t)(iend - base) <= (size_t)(U32)(-1));
    ms->nextToUpdate = target;
 }

@ -516,11 +541,12 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {

 FORCE_INLINE_TEMPLATE
 U32 ZSTD_insertBtAndGetAllMatches (
+                    ZSTD_match_t* matches,   /* store result (found matches) in this table (presumed large enough) */
                    ZSTD_matchState_t* ms,
+                    U32* nextToUpdate3,
                    const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
-                    U32 rep[ZSTD_REP_NUM],
+                    const U32 rep[ZSTD_REP_NUM],
                    U32 const ll0,   /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
-                    ZSTD_match_t* matches,
                    const U32 lengthToBeat,
                    U32 const mls /* template */)
 {
@ -541,8 +567,8 @@ U32 ZSTD_insertBtAndGetAllMatches (
    U32 const dictLimit = ms->window.dictLimit;
    const BYTE* const dictEnd = dictBase + dictLimit;
    const BYTE* const prefixStart = base + dictLimit;
-    U32 const btLow = btMask >= current ? 0 : current - btMask;
-    U32 const windowLow = ms->window.lowLimit;
+    U32 const btLow = (btMask >= current) ? 0 : current - btMask;
+    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog);
    U32 const matchLow = windowLow ? windowLow : 1;
    U32* smallerPtr = bt + 2*(current&btMask);
    U32* largerPtr  = bt + 2*(current&btMask) + 1;
@ -612,7 +638,7 @@ U32 ZSTD_insertBtAndGetAllMatches (

    /* HC3 match finder */
    if ((mls == 3) /*static*/ && (bestLength < mls)) {
-        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
+        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
        if ((matchIndex3 >= matchLow)
          & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
            size_t mlen;
@ -638,9 +664,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
                     (ip+mlen == iLimit) ) {  /* best possible length */
                    ms->nextToUpdate = current+1;  /* skip insertion */
                    return 1;
-                }
-            }
-        }
+        }   }   }
        /* no dictMatchState lookup: dicts don't have a populated HC3 table */
    }

@ -648,19 +672,21 @@ U32 ZSTD_insertBtAndGetAllMatches (

    while (nbCompares-- && (matchIndex >= matchLow)) {
        U32* const nextPtr = bt + 2*(matchIndex & btMask);
-        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
        const BYTE* match;
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
        assert(current > matchIndex);

        if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
            assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
            match = base + matchIndex;
+            if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
        } else {
            match = dictBase + matchIndex;
+            assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
            if (matchIndex+matchLength >= dictLimit)
-                match = base + matchIndex;   /* prepare for match[matchLength] */
+                match = base + matchIndex;   /* prepare for match[matchLength] read */
        }

        if (matchLength > bestLength) {
@ -745,10 +771,13 @@ U32 ZSTD_insertBtAndGetAllMatches (


 FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
+                        ZSTD_match_t* matches,   /* store result (match found, increasing size) in this table */
                        ZSTD_matchState_t* ms,
+                        U32* nextToUpdate3,
                        const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
-                        U32 rep[ZSTD_REP_NUM], U32 const ll0,
-                        ZSTD_match_t* matches, U32 const lengthToBeat)
+                        const U32 rep[ZSTD_REP_NUM],
+                        U32 const ll0,
+                        U32 const lengthToBeat)
 {
    const ZSTD_compressionParameters* const cParams = &ms->cParams;
    U32 const matchLengthSearch = cParams->minMatch;
@ -757,12 +786,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
    ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
    switch(matchLengthSearch)
    {
-    case 3 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 3);
+    case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
    default :
-    case 4 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 4);
-    case 5 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 5);
+    case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
+    case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
    case 7 :
-    case 6 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 6);
+    case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
    }
 }

@ -838,6 +867,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,

    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
    U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
+    U32 nextToUpdate3 = ms->nextToUpdate;

    ZSTD_optimal_t* const opt = optStatePtr->priceTable;
    ZSTD_match_t* const matches = optStatePtr->matchTable;
@ -847,7 +877,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
    DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
                (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
    assert(optLevel <= 2);
-    ms->nextToUpdate3 = ms->nextToUpdate;
    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
    ip += (ip==prefixStart);

@ -858,7 +887,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
        /* find first match */
        {   U32 const litlen = (U32)(ip - anchor);
            U32 const ll0 = !litlen;
-            U32 const nbMatches = ZSTD_BtGetAllMatches(ms, ip, iend, dictMode, rep, ll0, matches, minMatch);
+            U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
            if (!nbMatches) { ip++; continue; }

            /* initialize opt[0] */
@ -870,7 +899,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
            /* large match -> immediate encoding */
            {   U32 const maxML = matches[nbMatches-1].len;
                U32 const maxOffset = matches[nbMatches-1].off;
-                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new serie",
+                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
                            nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));

                if (maxML > sufficient_len) {
@ -955,7 +984,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
                U32 const previousPrice = opt[cur].price;
                U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
-                U32 const nbMatches = ZSTD_BtGetAllMatches(ms, inr, iend, dictMode, opt[cur].rep, ll0, matches, minMatch);
+                U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
                U32 matchNb;
                if (!nbMatches) {
                    DEBUGLOG(7, "rPos:%u : no match found", cur);
@ -1069,7 +1098,7 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */

                    assert(anchor + llen <= iend);
                    ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
-                    ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH);
+                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
                    anchor += advance;
                    ip = anchor;
            }   }
@ -1079,7 +1108,7 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
    }   /* while (ip < ilimit) */

    /* Return the last literals size */
-    return iend - anchor;
+    return (size_t)(iend - anchor);
 }


@ -1108,7 +1137,8 @@ static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
 /* used in 2-pass strategy */
 MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
 {
-    optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
+    if (ZSTD_compressedLiterals(optPtr))
+        optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
    optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
    optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
    optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
@ -1117,7 +1147,7 @@ MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
 /* ZSTD_initStats_ultra():
 * make a first compression pass, just to seed stats with more accurate starting values.
 * only works on first block, with no dictionary and no ldm.
- * this function cannot error, hence its constract must be respected.
+ * this function cannot error, hence its contract must be respected.
 */
 static void
 ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
@ -1142,7 +1172,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
    ms->window.dictLimit += (U32)srcSize;
    ms->window.lowLimit = ms->window.dictLimit;
    ms->nextToUpdate = ms->window.dictLimit;
-    ms->nextToUpdate3 = ms->window.dictLimit;

    /* re-inforce weight of collected statistics */
    ZSTD_upscaleStats(&ms->opt);
--- a/src/borg/algorithms/zstd/lib/compress/zstdmt_compress.c
+++ b/src/borg/algorithms/zstd/lib/compress/zstdmt_compress.c
@ -22,6 +22,7 @@
 /* ======   Dependencies   ====== */
 #include <string.h>      /* memcpy, memset */
 #include <limits.h>      /* INT_MAX, UINT_MAX */
+#include "mem.h"         /* MEM_STATIC */
 #include "pool.h"        /* threadpool */
 #include "threading.h"   /* mutex */
 #include "zstd_compress_internal.h"  /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
@ -456,7 +457,7 @@ typedef struct {
     * Must be acquired after the main mutex when acquiring both.
     */
    ZSTD_pthread_mutex_t ldmWindowMutex;
-    ZSTD_pthread_cond_t ldmWindowCond;  /* Signaled when ldmWindow is udpated */
+    ZSTD_pthread_cond_t ldmWindowCond;  /* Signaled when ldmWindow is updated */
    ZSTD_window_t ldmWindow;  /* A thread-safe copy of ldmState.window */
 } serialState_t;

@ -647,7 +648,7 @@ static void ZSTDMT_compressionJob(void* jobDescription)
    buffer_t dstBuff = job->dstBuff;
    size_t lastCBlockSize = 0;

-    /* ressources */
+    /* resources */
    if (cctx==NULL) JOB_ERROR(ERROR(memory_allocation));
    if (dstBuff.start == NULL) {   /* streaming job : doesn't provide a dstBuffer */
        dstBuff = ZSTDMT_getBuffer(job->bufPool);
@ -667,19 +668,19 @@ static void ZSTDMT_compressionJob(void* jobDescription)

    /* init */
    if (job->cdict) {
-        size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
+        size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize);
        assert(job->firstJob);  /* only allowed for first job */
        if (ZSTD_isError(initError)) JOB_ERROR(initError);
    } else {  /* srcStart points at reloaded section */
        U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
-        {   size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
+        {   size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob);
            if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
        }
        {   size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
                                        job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
                                        ZSTD_dtlm_fast,
                                        NULL, /*cdict*/
-                                        jobParams, pledgedSrcSize);
+                                        &jobParams, pledgedSrcSize);
            if (ZSTD_isError(initError)) JOB_ERROR(initError);
    }   }

@ -864,14 +865,10 @@ static size_t ZSTDMT_expandJobsTable (ZSTDMT_CCtx* mtctx, U32 nbWorkers) {
 * Internal use only */
 size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
 {
-    if (nbWorkers > ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX;
-    params->nbWorkers = nbWorkers;
-    params->overlapLog = ZSTDMT_OVERLAPLOG_DEFAULT;
-    params->jobSize = 0;
-    return nbWorkers;
+    return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers);
 }

-ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem)
+MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem)
 {
    ZSTDMT_CCtx* mtctx;
    U32 nbJobs = nbWorkers + 2;
@ -906,6 +903,17 @@ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem)
    return mtctx;
 }

+ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem)
+{
+#ifdef ZSTD_MULTITHREAD
+    return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem);
+#else
+    (void)nbWorkers;
+    (void)cMem;
+    return NULL;
+#endif
+}
+
 ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers)
 {
    return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem);
@ -919,12 +927,18 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
    unsigned jobID;
    DEBUGLOG(3, "ZSTDMT_releaseAllJobResources");
    for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) {
+        /* Copy the mutex/cond out */
+        ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex;
+        ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond;
+
        DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
        ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
-        mtctx->jobs[jobID].dstBuff = g_nullBuffer;
-        mtctx->jobs[jobID].cSize = 0;
+
+        /* Clear the job description, but keep the mutex/cond */
+        memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID]));
+        mtctx->jobs[jobID].job_mutex = mutex;
+        mtctx->jobs[jobID].job_cond = cond;
    }
-    memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
    mtctx->inBuff.buffer = g_nullBuffer;
    mtctx->inBuff.filled = 0;
    mtctx->allJobsCompleted = 1;
@ -986,26 +1000,13 @@ ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params,
    {
    case ZSTDMT_p_jobSize :
        DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value);
-        if ( value != 0  /* default */
-          && value < ZSTDMT_JOBSIZE_MIN)
-            value = ZSTDMT_JOBSIZE_MIN;
-        assert(value >= 0);
-        if (value > ZSTDMT_JOBSIZE_MAX) value = ZSTDMT_JOBSIZE_MAX;
-        params->jobSize = value;
-        return value;
-
+        return ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, value);
    case ZSTDMT_p_overlapLog :
        DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value);
-        if (value < ZSTD_OVERLAPLOG_MIN) value = ZSTD_OVERLAPLOG_MIN;
-        if (value > ZSTD_OVERLAPLOG_MAX) value = ZSTD_OVERLAPLOG_MAX;
-        params->overlapLog = value;
-        return value;
-
+        return ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, value);
    case ZSTDMT_p_rsyncable :
-        value = (value != 0);
-        params->rsyncable = value;
-        return value;
-
+        DEBUGLOG(4, "ZSTD_p_rsyncable : %i", value);
+        return ZSTD_CCtxParams_setParameter(params, ZSTD_c_rsyncable, value);
    default :
        return ERROR(parameter_unsupported);
    }
@ -1021,32 +1022,29 @@ size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
 {
    switch (parameter) {
    case ZSTDMT_p_jobSize:
-        assert(mtctx->params.jobSize <= INT_MAX);
-        *value = (int)(mtctx->params.jobSize);
-        break;
+        return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_jobSize, value);
    case ZSTDMT_p_overlapLog:
-        *value = mtctx->params.overlapLog;
-        break;
+        return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_overlapLog, value);
    case ZSTDMT_p_rsyncable:
-        *value = mtctx->params.rsyncable;
-        break;
+        return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_rsyncable, value);
    default:
        return ERROR(parameter_unsupported);
    }
-    return 0;
 }

 /* Sets parameters relevant to the compression job,
 * initializing others to default values. */
-static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
+static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params)
 {
-    ZSTD_CCtx_params jobParams;
-    memset(&jobParams, 0, sizeof(jobParams));
-
-    jobParams.cParams = params.cParams;
-    jobParams.fParams = params.fParams;
-    jobParams.compressionLevel = params.compressionLevel;
-
+    ZSTD_CCtx_params jobParams = *params;
+    /* Clear parameters related to multithreading */
+    jobParams.forceWindow = 0;
+    jobParams.nbWorkers = 0;
+    jobParams.jobSize = 0;
+    jobParams.overlapLog = 0;
+    jobParams.rsyncable = 0;
+    memset(&jobParams.ldmParams, 0, sizeof(ldmParams_t));
+    memset(&jobParams.customMem, 0, sizeof(ZSTD_customMem));
    return jobParams;
 }

@ -1056,7 +1054,7 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
 static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
 {
    if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
-    CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
+    FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
    mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
    if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
    mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
@ -1137,9 +1135,14 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
            size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
            size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
            assert(flushed <= produced);
+            assert(jobPtr->consumed <= jobPtr->src.size);
            toFlush = produced - flushed;
-            if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) {
-                /* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */
+            /* if toFlush==0, nothing is available to flush.
+             * However, jobID is expected to still be active:
+             * if jobID was already completed and fully flushed,
+             * ZSTDMT_flushProduced() should have already moved onto next job.
+             * Therefore, some input has not yet been consumed. */
+            if (toFlush==0) {
                assert(jobPtr->consumed < jobPtr->src.size);
            }
        }
@ -1154,14 +1157,18 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
 /* =====   Multi-threaded compression   ===== */
 /* ------------------------------------------ */

-static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
+static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params)
 {
-    if (params.ldmParams.enableLdm)
+    unsigned jobLog;
+    if (params->ldmParams.enableLdm) {
        /* In Long Range Mode, the windowLog is typically oversized.
         * In which case, it's preferable to determine the jobSize
         * based on chainLog instead. */
-        return MAX(21, params.cParams.chainLog + 4);
-    return MAX(20, params.cParams.windowLog + 2);
+        jobLog = MAX(21, params->cParams.chainLog + 4);
+    } else {
+        jobLog = MAX(20, params->cParams.windowLog + 2);
+    }
+    return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX);
 }

 static int ZSTDMT_overlapLog_default(ZSTD_strategy strat)
@ -1192,27 +1199,27 @@ static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat)
    return ovlog;
 }

-static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params)
+static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params)
 {
-    int const overlapRLog = 9 - ZSTDMT_overlapLog(params.overlapLog, params.cParams.strategy);
-    int ovLog = (overlapRLog >= 8) ? 0 : (params.cParams.windowLog - overlapRLog);
+    int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy);
+    int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog);
    assert(0 <= overlapRLog && overlapRLog <= 8);
-    if (params.ldmParams.enableLdm) {
+    if (params->ldmParams.enableLdm) {
        /* In Long Range Mode, the windowLog is typically oversized.
         * In which case, it's preferable to determine the jobSize
         * based on chainLog instead.
         * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */
-        ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
+        ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2)
                - overlapRLog;
    }
-    assert(0 <= ovLog && ovLog <= 30);
-    DEBUGLOG(4, "overlapLog : %i", params.overlapLog);
+    assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX);
+    DEBUGLOG(4, "overlapLog : %i", params->overlapLog);
    DEBUGLOG(4, "overlap size : %i", 1 << ovLog);
    return (ovLog==0) ? 0 : (size_t)1 << ovLog;
 }

 static unsigned
-ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers)
+ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nbWorkers)
 {
    assert(nbWorkers>0);
    {   size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
@ -1235,9 +1242,9 @@ static size_t ZSTDMT_compress_advanced_internal(
          const ZSTD_CDict* cdict,
                ZSTD_CCtx_params params)
 {
-    ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
-    size_t const overlapSize = ZSTDMT_computeOverlapSize(params);
-    unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
+    ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(&params);
+    size_t const overlapSize = ZSTDMT_computeOverlapSize(&params);
+    unsigned const nbJobs = ZSTDMT_computeNbJobs(&params, srcSize, params.nbWorkers);
    size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
    size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize;   /* avoid too small last block */
    const char* const srcStart = (const char*)src;
@ -1255,7 +1262,7 @@ static size_t ZSTDMT_compress_advanced_internal(
        ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
        DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
        if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
-        return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
+        return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, &jobParams);
    }

    assert(avgJobSize >= 256 KB);  /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
@ -1263,7 +1270,7 @@ static size_t ZSTDMT_compress_advanced_internal(
    if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize))
        return ERROR(memory_allocation);

-    CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbJobs) );  /* only expands if necessary */
+    FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) );  /* only expands if necessary */

    {   unsigned u;
        for (u=0; u<nbJobs; u++) {
@ -1396,19 +1403,19 @@ size_t ZSTDMT_initCStream_internal(

    /* init */
    if (params.nbWorkers != mtctx->params.nbWorkers)
-        CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) );
+        FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) );

    if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
-    if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
+    if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX;

    mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN);  /* do not trigger multi-threading when srcSize is too small */
    if (mtctx->singleBlockingThread) {
-        ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
+        ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(&params);
        DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
        assert(singleThreadParams.nbWorkers == 0);
        return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
                                         dict, dictSize, cdict,
-                                         singleThreadParams, pledgedSrcSize);
+                                         &singleThreadParams, pledgedSrcSize);
    }

    DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
@ -1434,12 +1441,14 @@ size_t ZSTDMT_initCStream_internal(
        mtctx->cdict = cdict;
    }

-    mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(params);
+    mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(&params);
    DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10));
    mtctx->targetSectionSize = params.jobSize;
    if (mtctx->targetSectionSize == 0) {
-        mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
+        mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(&params);
    }
+    assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX);
+
    if (params.rsyncable) {
        /* Aim for the targetsectionSize as the average job size. */
        U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20);
@ -1547,7 +1556,7 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel) {
 /* ZSTDMT_writeLastEmptyBlock()
 * Write a single empty block with an end-of-frame to finish a frame.
 * Job must be created from streaming variant.
- * This function is always successfull if expected conditions are fulfilled.
+ * This function is always successful if expected conditions are fulfilled.
 */
 static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job)
 {
@ -1987,7 +1996,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
    assert(input->pos  <= input->size);

    if (mtctx->singleBlockingThread) {  /* delegate to single-thread (synchronous) */
-        return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp);
+        return ZSTD_compressStream2(mtctx->cctxPool->cctx[0], output, input, endOp);
    }

    if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) {
@ -2051,7 +2060,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
      || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) {   /* must finish the frame with a zero-size block */
        size_t const jobSize = mtctx->inBuff.filled;
        assert(mtctx->inBuff.filled <= mtctx->targetSectionSize);
-        CHECK_F( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
+        FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
    }

    /* check for potential compressed data ready to be flushed */
@ -2065,7 +2074,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,

 size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
 {
-    CHECK_F( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );
+    FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );

    /* recommended next input size : fill current input buffer */
    return mtctx->targetSectionSize - mtctx->inBuff.filled;   /* note : could be zero when input buffer is fully filled and no more availability to create new job */
@ -2082,7 +2091,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* ou
      || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) {  /* need a last 0-size block to end frame */
           DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
                        (U32)srcSize, (U32)endFrame);
-        CHECK_F( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
+        FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
    }

    /* check if there is any data available to flush */
--- a/src/borg/algorithms/zstd/lib/compress/zstdmt_compress.h
+++ b/src/borg/algorithms/zstd/lib/compress/zstdmt_compress.h
@ -17,10 +17,25 @@


 /* Note : This is an internal API.
- *        Some methods are still exposed (ZSTDLIB_API),
+ *        These APIs used to be exposed with ZSTDLIB_API,
 *        because it used to be the only way to invoke MT compression.
- *        Now, it's recommended to use ZSTD_compress_generic() instead.
- *        These methods will stop being exposed in a future version */
+ *        Now, it's recommended to use ZSTD_compress2 and ZSTD_compressStream2()
+ *        instead.
+ *
+ *        If you depend on these APIs and can't switch, then define
+ *        ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library.
+ *        However, we may completely remove these functions in a future
+ *        release, so please switch soon.
+ *
+ *        This API requires ZSTD_MULTITHREAD to be defined during compilation,
+ *        otherwise ZSTDMT_createCCtx*() will fail.
+ */
+
+#ifdef ZSTD_LEGACY_MULTITHREADED_API
+#  define ZSTDMT_API ZSTDLIB_API
+#else
+#  define ZSTDMT_API
+#endif

 /* ===   Dependencies   === */
 #include <stddef.h>                /* size_t */
@ -35,22 +50,25 @@
 #ifndef ZSTDMT_JOBSIZE_MIN
 #  define ZSTDMT_JOBSIZE_MIN (1 MB)
 #endif
+#define ZSTDMT_JOBLOG_MAX   (MEM_32bits() ? 29 : 30)
 #define ZSTDMT_JOBSIZE_MAX  (MEM_32bits() ? (512 MB) : (1024 MB))


 /* ===   Memory management   === */
 typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
-ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
-ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
+/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
+ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
+/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
+ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
                                                    ZSTD_customMem cMem);
-ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
+ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);

-ZSTDLIB_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
+ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);


 /* ===   Simple one-pass compression function   === */

-ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
+ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
                                       void* dst, size_t dstCapacity,
                                 const void* src, size_t srcSize,
                                       int compressionLevel);
@ -59,31 +77,31 @@ ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,

 /* ===   Streaming functions   === */

-ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
-ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize);  /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
+ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
+ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize);  /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */

-ZSTDLIB_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
-ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
+ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);

-ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);   /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
-ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);     /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
+ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);   /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
+ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output);     /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */


 /* ===   Advanced functions and parameters  === */

-ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
-                                           void* dst, size_t dstCapacity,
-                                     const void* src, size_t srcSize,
-                                     const ZSTD_CDict* cdict,
-                                           ZSTD_parameters params,
-                                           int overlapLog);
+ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
+                                          void* dst, size_t dstCapacity,
+                                    const void* src, size_t srcSize,
+                                    const ZSTD_CDict* cdict,
+                                          ZSTD_parameters params,
+                                          int overlapLog);

-ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
+ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
                                        const void* dict, size_t dictSize,   /* dict can be released after init, a local copy is preserved within zcs */
                                        ZSTD_parameters params,
                                        unsigned long long pledgedSrcSize);  /* pledgedSrcSize is optional and can be zero == unknown */

-ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
+ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
                                        const ZSTD_CDict* cdict,
                                        ZSTD_frameParameters fparams,
                                        unsigned long long pledgedSrcSize);  /* note : zero means empty */
@ -92,7 +110,7 @@ ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
 * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
 typedef enum {
    ZSTDMT_p_jobSize,     /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
-    ZSTDMT_p_overlapLog,  /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
+    ZSTDMT_p_overlapLog,  /* Each job may reload a part of previous job to enhance compression ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
    ZSTDMT_p_rsyncable    /* Enables rsyncable mode. */
 } ZSTDMT_parameter;

@ -101,12 +119,12 @@ typedef enum {
 * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
 * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
 * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
-ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
+ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);

 /* ZSTDMT_getMTCtxParameter() :
 * Query the ZSTDMT_CCtx for a parameter value.
 * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
-ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
+ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);


 /*! ZSTDMT_compressStream_generic() :
@ -116,7 +134,7 @@ ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter
 *           0 if fully flushed
 *           or an error code
 *  note : needs to be init using any ZSTD_initCStream*() variant */
-ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
+ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
                                                ZSTD_outBuffer* output,
                                                ZSTD_inBuffer* input,
                                                ZSTD_EndDirective endOp);
--- a/src/borg/algorithms/zstd/lib/decompress/huf_decompress.c
+++ b/src/borg/algorithms/zstd/lib/decompress/huf_decompress.c
@ -61,7 +61,9 @@
 *  Error Management
 ****************************************************************/
 #define HUF_isError ERR_isError
+#ifndef CHECK_F
 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
+#endif


 /* **************************************************************
--- a/src/borg/algorithms/zstd/lib/decompress/zstd_ddict.c
+++ b/src/borg/algorithms/zstd/lib/decompress/zstd_ddict.c
@ -105,9 +105,9 @@ ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
    ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);

    /* load entropy tables */
-    CHECK_E( ZSTD_loadDEntropy(&ddict->entropy,
-                                ddict->dictContent, ddict->dictSize),
-             dictionary_corrupted );
+    RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
+            &ddict->entropy, ddict->dictContent, ddict->dictSize)),
+        dictionary_corrupted);
    ddict->entropyPresent = 1;
    return 0;
 }
@ -133,7 +133,7 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
    ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */

    /* parse dictionary content */
-    CHECK_F( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) );
+    FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) );

    return 0;
 }
--- a/src/borg/algorithms/zstd/lib/decompress/zstd_decompress.c
+++ b/src/borg/algorithms/zstd/lib/decompress/zstd_decompress.c
@ -88,10 +88,7 @@ size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }

 static size_t ZSTD_startingInputLength(ZSTD_format_e format)
 {
-    size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ?
-                    ZSTD_FRAMEHEADERSIZE_PREFIX - ZSTD_FRAMEIDSIZE :
-                    ZSTD_FRAMEHEADERSIZE_PREFIX;
-    ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
+    size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
    /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
    assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
    return startingInputLength;
@ -106,6 +103,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
    dctx->ddictLocal  = NULL;
    dctx->dictEnd     = NULL;
    dctx->ddictIsCold = 0;
+    dctx->dictUses = ZSTD_dont_use;
    dctx->inBuff      = NULL;
    dctx->inBuffSize  = 0;
    dctx->outBuffSize = 0;
@ -147,13 +145,20 @@ ZSTD_DCtx* ZSTD_createDCtx(void)
    return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
 }

+static void ZSTD_clearDict(ZSTD_DCtx* dctx)
+{
+    ZSTD_freeDDict(dctx->ddictLocal);
+    dctx->ddictLocal = NULL;
+    dctx->ddict = NULL;
+    dctx->dictUses = ZSTD_dont_use;
+}
+
 size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
 {
    if (dctx==NULL) return 0;   /* support free on NULL */
-    if (dctx->staticSize) return ERROR(memory_allocation);   /* not compatible with static DCtx */
+    RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx");
    {   ZSTD_customMem const cMem = dctx->customMem;
-        ZSTD_freeDDict(dctx->ddictLocal);
-        dctx->ddictLocal = NULL;
+        ZSTD_clearDict(dctx);
        ZSTD_free(dctx->inBuff, cMem);
        dctx->inBuff = NULL;
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
@ -203,7 +208,7 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size)
 static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
 {
    size_t const minInputSize = ZSTD_startingInputLength(format);
-    if (srcSize < minInputSize) return ERROR(srcSize_wrong);
+    RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong);

    {   BYTE const fhd = ((const BYTE*)src)[minInputSize-1];
        U32 const dictID= fhd & 3;
@ -238,7 +243,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s

    memset(zfhPtr, 0, sizeof(*zfhPtr));   /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
    if (srcSize < minInputSize) return minInputSize;
-    if (src==NULL) return ERROR(GENERIC);   /* invalid parameter */
+    RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");

    if ( (format != ZSTD_f_zstd1_magicless)
      && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
@ -251,7 +256,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
            zfhPtr->frameType = ZSTD_skippableFrame;
            return 0;
        }
-        return ERROR(prefix_unknown);
+        RETURN_ERROR(prefix_unknown);
    }

    /* ensure there is enough `srcSize` to fully read/decode frame header */
@ -269,14 +274,13 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s
        U64 windowSize = 0;
        U32 dictID = 0;
        U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN;
-        if ((fhdByte & 0x08) != 0)
-            return ERROR(frameParameter_unsupported); /* reserved bits, must be zero */
+        RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported,
+                        "reserved bits, must be zero");

        if (!singleSegment) {
            BYTE const wlByte = ip[pos++];
            U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
-            if (windowLog > ZSTD_WINDOWLOG_MAX)
-                return ERROR(frameParameter_windowTooLarge);
+            RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge);
            windowSize = (1ULL << windowLog);
            windowSize += (windowSize >> 3) * (wlByte&7);
        }
@ -348,14 +352,16 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
    size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE;
    U32 sizeU32;

-    if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
-        return ERROR(srcSize_wrong);
+    RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong);

    sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
-    if ((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32)
-        return ERROR(frameParameter_unsupported);
-
-    return skippableHeaderSize + sizeU32;
+    RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
+                    frameParameter_unsupported);
+    {
+        size_t const skippableSize = skippableHeaderSize + sizeU32;
+        RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong);
+        return skippableSize;
+    }
 }

 /** ZSTD_findDecompressedSize() :
@ -367,16 +373,15 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
 {
    unsigned long long totalDstSize = 0;

-    while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
+    while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
        U32 const magicNumber = MEM_readLE32(src);

        if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
            size_t const skippableSize = readSkippableFrameSize(src, srcSize);
-            if (ZSTD_isError(skippableSize))
-                return skippableSize;
-            if (srcSize < skippableSize) {
+            if (ZSTD_isError(skippableSize)) {
                return ZSTD_CONTENTSIZE_ERROR;
            }
+            assert(skippableSize <= srcSize);

            src = (const BYTE *)src + skippableSize;
            srcSize -= skippableSize;
@ -428,13 +433,91 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he
 {
    size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
    if (ZSTD_isError(result)) return result;    /* invalid header */
-    if (result>0) return ERROR(srcSize_wrong);  /* headerSize too small */
-    if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID))
-        return ERROR(dictionary_wrong);
+    RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    /* Skip the dictID check in fuzzing mode, because it makes the search
+     * harder.
+     */
+    RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
+                    dictionary_wrong);
+#endif
    if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0);
    return 0;
 }

+static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
+{
+    ZSTD_frameSizeInfo frameSizeInfo;
+    frameSizeInfo.compressedSize = ret;
+    frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
+    return frameSizeInfo;
+}
+
+static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
+{
+    ZSTD_frameSizeInfo frameSizeInfo;
+    memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
+
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
+    if (ZSTD_isLegacy(src, srcSize))
+        return ZSTD_findFrameSizeInfoLegacy(src, srcSize);
+#endif
+
+    if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
+        && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+        frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
+        assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
+               frameSizeInfo.compressedSize <= srcSize);
+        return frameSizeInfo;
+    } else {
+        const BYTE* ip = (const BYTE*)src;
+        const BYTE* const ipstart = ip;
+        size_t remainingSize = srcSize;
+        size_t nbBlocks = 0;
+        ZSTD_frameHeader zfh;
+
+        /* Extract Frame Header */
+        {   size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
+            if (ZSTD_isError(ret))
+                return ZSTD_errorFrameSizeInfo(ret);
+            if (ret > 0)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+        }
+
+        ip += zfh.headerSize;
+        remainingSize -= zfh.headerSize;
+
+        /* Iterate over each block */
+        while (1) {
+            blockProperties_t blockProperties;
+            size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
+            if (ZSTD_isError(cBlockSize))
+                return ZSTD_errorFrameSizeInfo(cBlockSize);
+
+            if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+
+            ip += ZSTD_blockHeaderSize + cBlockSize;
+            remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
+            nbBlocks++;
+
+            if (blockProperties.lastBlock) break;
+        }
+
+        /* Final frame content checksum */
+        if (zfh.checksumFlag) {
+            if (remainingSize < 4)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+            ip += 4;
+        }
+
+        frameSizeInfo.compressedSize = ip - ipstart;
+        frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
+                                        ? zfh.frameContentSize
+                                        : nbBlocks * zfh.blockSizeMax;
+        return frameSizeInfo;
+    }
+}

 /** ZSTD_findFrameCompressedSize() :
 *  compatible with legacy mode
@ -443,52 +526,33 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he
 *  @return : the compressed size of the frame starting at `src` */
 size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
 {
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
-    if (ZSTD_isLegacy(src, srcSize))
-        return ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
-#endif
-    if ( (srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
-      && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START ) {
-        return readSkippableFrameSize(src, srcSize);
-    } else {
-        const BYTE* ip = (const BYTE*)src;
-        const BYTE* const ipstart = ip;
-        size_t remainingSize = srcSize;
-        ZSTD_frameHeader zfh;
-
-        /* Extract Frame Header */
-        {   size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
-            if (ZSTD_isError(ret)) return ret;
-            if (ret > 0) return ERROR(srcSize_wrong);
-        }
-
-        ip += zfh.headerSize;
-        remainingSize -= zfh.headerSize;
-
-        /* Loop on each block */
-        while (1) {
-            blockProperties_t blockProperties;
-            size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
-            if (ZSTD_isError(cBlockSize)) return cBlockSize;
-
-            if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
-                return ERROR(srcSize_wrong);
-
-            ip += ZSTD_blockHeaderSize + cBlockSize;
-            remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
-
-            if (blockProperties.lastBlock) break;
-        }
-
-        if (zfh.checksumFlag) {   /* Final frame content checksum */
-            if (remainingSize < 4) return ERROR(srcSize_wrong);
-            ip += 4;
-        }
-
-        return ip - ipstart;
-    }
+    ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+    return frameSizeInfo.compressedSize;
 }

+/** ZSTD_decompressBound() :
+ *  compatible with legacy mode
+ *  `src` must point to the start of a ZSTD frame or a skippeable frame
+ *  `srcSize` must be at least as large as the frame contained
+ *  @return : the maximum decompressed size of the compressed source
+ */
+unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
+{
+    unsigned long long bound = 0;
+    /* Iterate over each frame */
+    while (srcSize > 0) {
+        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+        size_t const compressedSize = frameSizeInfo.compressedSize;
+        unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
+        if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
+            return ZSTD_CONTENTSIZE_ERROR;
+        assert(srcSize >= compressedSize);
+        src = (const BYTE*)src + compressedSize;
+        srcSize -= compressedSize;
+        bound += decompressedBound;
+    }
+    return bound;
+}


 /*-*************************************************************
@ -507,9 +571,10 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
 }

 /** ZSTD_insertBlock() :
-    insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
+ *  insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
 size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
 {
+    DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
    ZSTD_checkContinuity(dctx, blockStart);
    dctx->previousDstEnd = (const char*)blockStart + blockSize;
    return blockSize;
@ -522,9 +587,9 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
    DEBUGLOG(5, "ZSTD_copyRawBlock");
    if (dst == NULL) {
        if (srcSize == 0) return 0;
-        return ERROR(dstBuffer_null);
+        RETURN_ERROR(dstBuffer_null);
    }
-    if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
+    RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall);
    memcpy(dst, src, srcSize);
    return srcSize;
 }
@ -535,9 +600,9 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
 {
    if (dst == NULL) {
        if (regenSize == 0) return 0;
-        return ERROR(dstBuffer_null);
+        RETURN_ERROR(dstBuffer_null);
    }
-    if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall);
+    RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall);
    memset(dst, b, regenSize);
    return regenSize;
 }
@ -560,15 +625,17 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
    DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr);

    /* check */
-    if (remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize)
-        return ERROR(srcSize_wrong);
+    RETURN_ERROR_IF(
+        remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
+        srcSize_wrong);

    /* Frame Header */
-    {   size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX);
+    {   size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
+                ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
-        if (remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize)
-            return ERROR(srcSize_wrong);
-        CHECK_F( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) );
+        RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
+                        srcSize_wrong);
+        FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) );
        ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
    }

@ -581,7 +648,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,

        ip += ZSTD_blockHeaderSize;
        remainingSrcSize -= ZSTD_blockHeaderSize;
-        if (cBlockSize > remainingSrcSize) return ERROR(srcSize_wrong);
+        RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong);

        switch(blockProperties.blockType)
        {
@ -596,7 +663,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
            break;
        case bt_reserved :
        default:
-            return ERROR(corruption_detected);
+            RETURN_ERROR(corruption_detected);
        }

        if (ZSTD_isError(decodedSize)) return decodedSize;
@ -609,15 +676,15 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
    }

    if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
-        if ((U64)(op-ostart) != dctx->fParams.frameContentSize) {
-            return ERROR(corruption_detected);
-    }   }
+        RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize,
+                        corruption_detected);
+    }
    if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
        U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
        U32 checkRead;
-        if (remainingSrcSize<4) return ERROR(checksum_wrong);
+        RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong);
        checkRead = MEM_readLE32(ip);
-        if (checkRead != checkCalc) return ERROR(checksum_wrong);
+        RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong);
        ip += 4;
        remainingSrcSize -= 4;
    }
@ -645,15 +712,15 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
        dictSize = ZSTD_DDict_dictSize(ddict);
    }

-    while (srcSize >= ZSTD_FRAMEHEADERSIZE_PREFIX) {
+    while (srcSize >= ZSTD_startingInputLength(dctx->format)) {

 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
        if (ZSTD_isLegacy(src, srcSize)) {
            size_t decodedSize;
            size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
            if (ZSTD_isError(frameSize)) return frameSize;
-            /* legacy support is not compatible with static dctx */
-            if (dctx->staticSize) return ERROR(memory_allocation);
+            RETURN_ERROR_IF(dctx->staticSize, memory_allocation,
+                "legacy support is not compatible with static dctx");

            decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
            if (ZSTD_isError(decodedSize)) return decodedSize;
@ -674,9 +741,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
                        (unsigned)magicNumber, ZSTD_MAGICNUMBER);
            if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
                size_t const skippableSize = readSkippableFrameSize(src, srcSize);
-                if (ZSTD_isError(skippableSize))
-                    return skippableSize;
-                if (srcSize < skippableSize) return ERROR(srcSize_wrong);
+                FORWARD_IF_ERROR(skippableSize);
+                assert(skippableSize <= srcSize);

                src = (const BYTE *)src + skippableSize;
                srcSize -= skippableSize;
@ -685,29 +751,29 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,

        if (ddict) {
            /* we were called from ZSTD_decompress_usingDDict */
-            CHECK_F(ZSTD_decompressBegin_usingDDict(dctx, ddict));
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict));
        } else {
            /* this will initialize correctly with no dict if dict == NULL, so
             * use this in all cases but ddict */
-            CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
        }
        ZSTD_checkContinuity(dctx, dst);

        {   const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
                                                    &src, &srcSize);
-            if ( (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
-              && (moreThan1Frame==1) ) {
-                /* at least one frame successfully completed,
-                 * but following bytes are garbage :
-                 * it's more likely to be a srcSize error,
-                 * specifying more bytes than compressed size of frame(s).
-                 * This error message replaces ERROR(prefix_unknown),
-                 * which would be confusing, as the first header is actually correct.
-                 * Note that one could be unlucky, it might be a corruption error instead,
-                 * happening right at the place where we expect zstd magic bytes.
-                 * But this is _much_ less likely than a srcSize field error. */
-                return ERROR(srcSize_wrong);
-            }
+            RETURN_ERROR_IF(
+                (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
+             && (moreThan1Frame==1),
+                srcSize_wrong,
+                "at least one frame successfully completed, but following "
+                "bytes are garbage: it's more likely to be a srcSize error, "
+                "specifying more bytes than compressed size of frame(s). This "
+                "error message replaces ERROR(prefix_unknown), which would be "
+                "confusing, as the first header is actually correct. Note that "
+                "one could be unlucky, it might be a corruption error instead, "
+                "happening right at the place where we expect zstd magic "
+                "bytes. But this is _much_ less likely than a srcSize field "
+                "error.");
            if (ZSTD_isError(res)) return res;
            assert(res <= dstCapacity);
            dst = (BYTE*)dst + res;
@ -716,7 +782,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
        moreThan1Frame = 1;
    }  /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */

-    if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */
+    RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed");

    return (BYTE*)dst - (BYTE*)dststart;
 }
@ -730,9 +796,26 @@ size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
 }


+static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx)
+{
+    switch (dctx->dictUses) {
+    default:
+        assert(0 /* Impossible */);
+        /* fall-through */
+    case ZSTD_dont_use:
+        ZSTD_clearDict(dctx);
+        return NULL;
+    case ZSTD_use_indefinitely:
+        return dctx->ddict;
+    case ZSTD_use_once:
+        dctx->dictUses = ZSTD_dont_use;
+        return dctx->ddict;
+    }
+}
+
 size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
-    return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0);
+    return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx));
 }


@ -741,7 +824,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
 #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
    size_t regenSize;
    ZSTD_DCtx* const dctx = ZSTD_createDCtx();
-    if (dctx==NULL) return ERROR(memory_allocation);
+    RETURN_ERROR_IF(dctx==NULL, memory_allocation);
    regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
    ZSTD_freeDCtx(dctx);
    return regenSize;
@ -791,8 +874,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
 {
    DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize);
    /* Sanity check */
-    if (srcSize != dctx->expected)
-        return ERROR(srcSize_wrong);  /* not allowed */
+    RETURN_ERROR_IF(srcSize != dctx->expected, srcSize_wrong, "not allowed");
    if (dstCapacity) ZSTD_checkContinuity(dctx, dst);

    switch (dctx->stage)
@ -817,7 +899,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
    case ZSTDds_decodeFrameHeader:
        assert(src != NULL);
        memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
-        CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize));
+        FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize));
        dctx->expected = ZSTD_blockHeaderSize;
        dctx->stage = ZSTDds_decodeBlockHeader;
        return 0;
@ -826,6 +908,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
        {   blockProperties_t bp;
            size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
            if (ZSTD_isError(cBlockSize)) return cBlockSize;
+            RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum");
            dctx->expected = cBlockSize;
            dctx->bType = bp.blockType;
            dctx->rleSize = bp.origSize;
@ -867,19 +950,20 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
                break;
            case bt_reserved :   /* should never happen */
            default:
-                return ERROR(corruption_detected);
+                RETURN_ERROR(corruption_detected);
            }
            if (ZSTD_isError(rSize)) return rSize;
+            RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
            DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
            dctx->decodedSize += rSize;
            if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);

            if (dctx->stage == ZSTDds_decompressLastBlock) {   /* end of frame */
                DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize);
-                if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
-                    if (dctx->decodedSize != dctx->fParams.frameContentSize) {
-                        return ERROR(corruption_detected);
-                }   }
+                RETURN_ERROR_IF(
+                    dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                 && dctx->decodedSize != dctx->fParams.frameContentSize,
+                    corruption_detected);
                if (dctx->fParams.checksumFlag) {  /* another round for frame checksum */
                    dctx->expected = 4;
                    dctx->stage = ZSTDds_checkChecksum;
@ -900,7 +984,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
        {   U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
            U32 const check32 = MEM_readLE32(src);
            DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
-            if (check32 != h32) return ERROR(checksum_wrong);
+            RETURN_ERROR_IF(check32 != h32, checksum_wrong);
            dctx->expected = 0;
            dctx->stage = ZSTDds_getFrameHeaderSize;
            return 0;
@ -921,7 +1005,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c

    default:
        assert(0);   /* impossible */
-        return ERROR(GENERIC);   /* some compiler require default to do something */
+        RETURN_ERROR(GENERIC);   /* some compiler require default to do something */
    }
 }

@ -945,7 +1029,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
    const BYTE* dictPtr = (const BYTE*)dict;
    const BYTE* const dictEnd = dictPtr + dictSize;

-    if (dictSize <= 8) return ERROR(dictionary_corrupted);
+    RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted);
    assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY);   /* dict must be valid */
    dictPtr += 8;   /* skip header = magic + dictID */

@ -964,16 +1048,16 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
                                                dictPtr, dictEnd - dictPtr,
                                                workspace, workspaceSize);
 #endif
-        if (HUF_isError(hSize)) return ERROR(dictionary_corrupted);
+        RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted);
        dictPtr += hSize;
    }

    {   short offcodeNCount[MaxOff+1];
        unsigned offcodeMaxValue = MaxOff, offcodeLog;
        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
-        if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
-        if (offcodeMaxValue > MaxOff) return ERROR(dictionary_corrupted);
-        if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
+        RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted);
+        RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted);
+        RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted);
        ZSTD_buildFSETable( entropy->OFTable,
                            offcodeNCount, offcodeMaxValue,
                            OF_base, OF_bits,
@ -984,9 +1068,9 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
    {   short matchlengthNCount[MaxML+1];
        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
-        if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
-        if (matchlengthMaxValue > MaxML) return ERROR(dictionary_corrupted);
-        if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
+        RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted);
+        RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted);
+        RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted);
        ZSTD_buildFSETable( entropy->MLTable,
                            matchlengthNCount, matchlengthMaxValue,
                            ML_base, ML_bits,
@ -997,9 +1081,9 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
    {   short litlengthNCount[MaxLL+1];
        unsigned litlengthMaxValue = MaxLL, litlengthLog;
        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
-        if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
-        if (litlengthMaxValue > MaxLL) return ERROR(dictionary_corrupted);
-        if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
+        RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted);
+        RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted);
+        RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted);
        ZSTD_buildFSETable( entropy->LLTable,
                            litlengthNCount, litlengthMaxValue,
                            LL_base, LL_bits,
@ -1007,12 +1091,13 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
        dictPtr += litlengthHeaderSize;
    }

-    if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
+    RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted);
    {   int i;
        size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
        for (i=0; i<3; i++) {
            U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
-            if (rep==0 || rep >= dictContentSize) return ERROR(dictionary_corrupted);
+            RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
+                            dictionary_corrupted);
            entropy->rep[i] = rep;
    }   }

@ -1030,7 +1115,7 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict

    /* load entropy tables */
    {   size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize);
-        if (ZSTD_isError(eSize)) return ERROR(dictionary_corrupted);
+        RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted);
        dict = (const char*)dict + eSize;
        dictSize -= eSize;
    }
@ -1064,9 +1149,11 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)

 size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
 {
-    CHECK_F( ZSTD_decompressBegin(dctx) );
+    FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) );
    if (dict && dictSize)
-        CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted);
+        RETURN_ERROR_IF(
+            ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)),
+            dictionary_corrupted);
    return 0;
 }

@ -1085,7 +1172,7 @@ size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
        DEBUGLOG(4, "DDict is %s",
                    dctx->ddictIsCold ? "~cold~" : "hot!");
    }
-    CHECK_F( ZSTD_decompressBegin(dctx) );
+    FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) );
    if (ddict) {   /* NULL ddict is equivalent to no dictionary */
        ZSTD_copyDDictParameters(dctx, ddict);
    }
@ -1104,7 +1191,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
 }

 /*! ZSTD_getDictID_fromFrame() :
- *  Provides the dictID required to decompresse frame stored within `src`.
+ *  Provides the dictID required to decompress frame stored within `src`.
 *  If @return == 0, the dictID could not be decoded.
 *  This could for one of the following reasons :
 *  - The frame does not require a dictionary (most common case).
@ -1176,15 +1263,14 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
                                         ZSTD_dictLoadMethod_e dictLoadMethod,
                                         ZSTD_dictContentType_e dictContentType)
 {
-    if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
-    ZSTD_freeDDict(dctx->ddictLocal);
-    if (dict && dictSize >= 8) {
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
+    ZSTD_clearDict(dctx);
+    if (dict && dictSize != 0) {
        dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
-        if (dctx->ddictLocal == NULL) return ERROR(memory_allocation);
-    } else {
-        dctx->ddictLocal = NULL;
+        RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation);
+        dctx->ddict = dctx->ddictLocal;
+        dctx->dictUses = ZSTD_use_indefinitely;
    }
-    dctx->ddict = dctx->ddictLocal;
    return 0;
 }

@ -1200,7 +1286,9 @@ size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSi

 size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
 {
-    return ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType);
+    FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType));
+    dctx->dictUses = ZSTD_use_once;
+    return 0;
 }

 size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize)
@ -1210,22 +1298,21 @@ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSiz


 /* ZSTD_initDStream_usingDict() :
- * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
+ * return : expected size, aka ZSTD_startingInputLength().
 * this function cannot fail */
 size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
 {
    DEBUGLOG(4, "ZSTD_initDStream_usingDict");
-    zds->streamStage = zdss_init;
-    zds->noForwardProgress = 0;
-    CHECK_F( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
-    return ZSTD_FRAMEHEADERSIZE_PREFIX;
+    FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) );
+    FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
+    return ZSTD_startingInputLength(zds->format);
 }

 /* note : this variant can't fail */
 size_t ZSTD_initDStream(ZSTD_DStream* zds)
 {
    DEBUGLOG(4, "ZSTD_initDStream");
-    return ZSTD_initDStream_usingDict(zds, NULL, 0);
+    return ZSTD_initDStream_usingDDict(zds, NULL);
 }

 /* ZSTD_initDStream_usingDDict() :
@ -1233,29 +1320,29 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds)
 * this function cannot fail */
 size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
 {
-    size_t const initResult = ZSTD_initDStream(dctx);
-    dctx->ddict = ddict;
-    return initResult;
+    FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) );
+    FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) );
+    return ZSTD_startingInputLength(dctx->format);
 }

 /* ZSTD_resetDStream() :
- * return : expected size, aka ZSTD_FRAMEHEADERSIZE_PREFIX.
+ * return : expected size, aka ZSTD_startingInputLength().
 * this function cannot fail */
 size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
 {
-    DEBUGLOG(4, "ZSTD_resetDStream");
-    dctx->streamStage = zdss_loadHeader;
-    dctx->lhSize = dctx->inPos = dctx->outStart = dctx->outEnd = 0;
-    dctx->legacyVersion = 0;
-    dctx->hostageByte = 0;
-    return ZSTD_FRAMEHEADERSIZE_PREFIX;
+    FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only));
+    return ZSTD_startingInputLength(dctx->format);
 }


 size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
 {
-    if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
-    dctx->ddict = ddict;
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
+    ZSTD_clearDict(dctx);
+    if (ddict) {
+        dctx->ddict = ddict;
+        dctx->dictUses = ZSTD_use_indefinitely;
+    }
    return 0;
 }

@ -1267,9 +1354,9 @@ size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
    ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
    size_t const min = (size_t)1 << bounds.lowerBound;
    size_t const max = (size_t)1 << bounds.upperBound;
-    if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
-    if (maxWindowSize < min) return ERROR(parameter_outOfBound);
-    if (maxWindowSize > max) return ERROR(parameter_outOfBound);
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
+    RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound);
+    RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound);
    dctx->maxWindowSize = maxWindowSize;
    return 0;
 }
@ -1311,15 +1398,15 @@ static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value)
 }

 #define CHECK_DBOUNDS(p,v) {                \
-    if (!ZSTD_dParam_withinBounds(p, v))    \
-        return ERROR(parameter_outOfBound); \
+    RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound); \
 }

 size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value)
 {
-    if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
    switch(dParam) {
        case ZSTD_d_windowLogMax:
+            if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
            CHECK_DBOUNDS(ZSTD_d_windowLogMax, value);
            dctx->maxWindowSize = ((size_t)1) << value;
            return 0;
@ -1329,19 +1416,20 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
            return 0;
        default:;
    }
-    return ERROR(parameter_unsupported);
+    RETURN_ERROR(parameter_unsupported);
 }

 size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
 {
    if ( (reset == ZSTD_reset_session_only)
      || (reset == ZSTD_reset_session_and_parameters) ) {
-        (void)ZSTD_initDStream(dctx);
+        dctx->streamStage = zdss_init;
+        dctx->noForwardProgress = 0;
    }
    if ( (reset == ZSTD_reset_parameters)
      || (reset == ZSTD_reset_session_and_parameters) ) {
-        if (dctx->streamStage != zdss_init)
-            return ERROR(stage_wrong);
+        RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong);
+        ZSTD_clearDict(dctx);
        dctx->format = ZSTD_f_zstd1;
        dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
    }
@ -1360,7 +1448,8 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
    unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
    unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
    size_t const minRBSize = (size_t) neededSize;
-    if ((unsigned long long)minRBSize != neededSize) return ERROR(frameParameter_windowTooLarge);
+    RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
+                    frameParameter_windowTooLarge);
    return minRBSize;
 }

@ -1378,9 +1467,9 @@ size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
    ZSTD_frameHeader zfh;
    size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
    if (ZSTD_isError(err)) return err;
-    if (err>0) return ERROR(srcSize_wrong);
-    if (zfh.windowSize > windowSizeMax)
-        return ERROR(frameParameter_windowTooLarge);
+    RETURN_ERROR_IF(err>0, srcSize_wrong);
+    RETURN_ERROR_IF(zfh.windowSize > windowSizeMax,
+                    frameParameter_windowTooLarge);
    return ZSTD_estimateDStreamSize((size_t)zfh.windowSize);
 }

@ -1406,16 +1495,16 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
    U32 someMoreWork = 1;

    DEBUGLOG(5, "ZSTD_decompressStream");
-    if (input->pos > input->size) {  /* forbidden */
-        DEBUGLOG(5, "in: pos: %u   vs size: %u",
-                    (U32)input->pos, (U32)input->size);
-        return ERROR(srcSize_wrong);
-    }
-    if (output->pos > output->size) {  /* forbidden */
-        DEBUGLOG(5, "out: pos: %u   vs size: %u",
-                    (U32)output->pos, (U32)output->size);
-        return ERROR(dstSize_tooSmall);
-    }
+    RETURN_ERROR_IF(
+        input->pos > input->size,
+        srcSize_wrong,
+        "forbidden. in: pos: %u   vs size: %u",
+        (U32)input->pos, (U32)input->size);
+    RETURN_ERROR_IF(
+        output->pos > output->size,
+        dstSize_tooSmall,
+        "forbidden. out: pos: %u   vs size: %u",
+        (U32)output->pos, (U32)output->size);
    DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));

    while (someMoreWork) {
@ -1423,15 +1512,18 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
        {
        case zdss_init :
            DEBUGLOG(5, "stage zdss_init => transparent reset ");
-            ZSTD_resetDStream(zds);   /* transparent reset on starting decoding a new frame */
+            zds->streamStage = zdss_loadHeader;
+            zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
+            zds->legacyVersion = 0;
+            zds->hostageByte = 0;
            /* fall-through */

        case zdss_loadHeader :
            DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
            if (zds->legacyVersion) {
-                /* legacy support is incompatible with static dctx */
-                if (zds->staticSize) return ERROR(memory_allocation);
+                RETURN_ERROR_IF(zds->staticSize, memory_allocation,
+                    "legacy support is incompatible with static dctx");
                {   size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
                    if (hint==0) zds->streamStage = zdss_init;
                    return hint;
@ -1443,12 +1535,13 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
                    U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
                    if (legacyVersion) {
-                        const void* const dict = zds->ddict ? ZSTD_DDict_dictContent(zds->ddict) : NULL;
-                        size_t const dictSize = zds->ddict ? ZSTD_DDict_dictSize(zds->ddict) : 0;
+                        ZSTD_DDict const* const ddict = ZSTD_getDDict(zds);
+                        const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL;
+                        size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0;
                        DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion);
-                        /* legacy support is incompatible with static dctx */
-                        if (zds->staticSize) return ERROR(memory_allocation);
-                        CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext,
+                        RETURN_ERROR_IF(zds->staticSize, memory_allocation,
+                            "legacy support is incompatible with static dctx");
+                        FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext,
                                    zds->previousLegacyVersion, legacyVersion,
                                    dict, dictSize));
                        zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
@ -1469,7 +1562,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                            zds->lhSize += remainingInput;
                        }
                        input->pos = input->size;
-                        return (MAX(ZSTD_FRAMEHEADERSIZE_MIN, hSize) - zds->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
+                        return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
                    }
                    assert(ip != NULL);
                    memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
@ -1482,7 +1575,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart);
                if (cSize <= (size_t)(iend-istart)) {
                    /* shortcut : using single-pass mode */
-                    size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, zds->ddict);
+                    size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds));
                    if (ZSTD_isError(decompressedSize)) return decompressedSize;
                    DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
                    ip = istart + cSize;
@ -1495,13 +1588,13 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB

            /* Consume header (see ZSTDds_decodeFrameHeader) */
            DEBUGLOG(4, "Consume header");
-            CHECK_F(ZSTD_decompressBegin_usingDDict(zds, zds->ddict));
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)));

            if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
                zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
                zds->stage = ZSTDds_skipFrame;
            } else {
-                CHECK_F(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize));
+                FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize));
                zds->expected = ZSTD_blockHeaderSize;
                zds->stage = ZSTDds_decodeBlockHeader;
            }
@ -1511,7 +1604,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                        (U32)(zds->fParams.windowSize >>10),
                        (U32)(zds->maxWindowSize >> 10) );
            zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
-            if (zds->fParams.windowSize > zds->maxWindowSize) return ERROR(frameParameter_windowTooLarge);
+            RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
+                            frameParameter_windowTooLarge);

            /* Adapt buffer sizes to frame header instructions */
            {   size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
@ -1525,14 +1619,15 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                    if (zds->staticSize) {  /* static DCtx */
                        DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
                        assert(zds->staticSize >= sizeof(ZSTD_DCtx));  /* controlled at init */
-                        if (bufferSize > zds->staticSize - sizeof(ZSTD_DCtx))
-                            return ERROR(memory_allocation);
+                        RETURN_ERROR_IF(
+                            bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
+                            memory_allocation);
                    } else {
                        ZSTD_free(zds->inBuff, zds->customMem);
                        zds->inBuffSize = 0;
                        zds->outBuffSize = 0;
                        zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem);
-                        if (zds->inBuff == NULL) return ERROR(memory_allocation);
+                        RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation);
                    }
                    zds->inBuffSize = neededInBuffSize;
                    zds->outBuff = zds->inBuff + zds->inBuffSize;
@ -1574,7 +1669,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
                if (isSkipFrame) {
                    loadedSize = MIN(toLoad, (size_t)(iend-ip));
                } else {
-                    if (toLoad > zds->inBuffSize - zds->inPos) return ERROR(corruption_detected);   /* should never happen */
+                    RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos,
+                                    corruption_detected,
+                                    "should never happen");
                    loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip);
                }
                ip += loadedSize;
@ -1615,7 +1712,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB

        default:
            assert(0);    /* impossible */
-            return ERROR(GENERIC);   /* some compiler require default to do something */
+            RETURN_ERROR(GENERIC);   /* some compiler require default to do something */
    }   }

    /* result */
@ -1624,8 +1721,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
    if ((ip==istart) && (op==ostart)) {  /* no forward progress */
        zds->noForwardProgress ++;
        if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
-            if (op==oend) return ERROR(dstSize_tooSmall);
-            if (ip==iend) return ERROR(srcSize_wrong);
+            RETURN_ERROR_IF(op==oend, dstSize_tooSmall);
+            RETURN_ERROR_IF(ip==iend, srcSize_wrong);
            assert(0);
        }
    } else {
--- a/src/borg/algorithms/zstd/lib/decompress/zstd_decompress_block.c
+++ b/src/borg/algorithms/zstd/lib/decompress/zstd_decompress_block.c
@ -56,14 +56,15 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
                          blockProperties_t* bpPtr)
 {
-    if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong);
+
    {   U32 const cBlockHeader = MEM_readLE24(src);
        U32 const cSize = cBlockHeader >> 3;
        bpPtr->lastBlock = cBlockHeader & 1;
        bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
        bpPtr->origSize = cSize;   /* only useful for RLE */
        if (bpPtr->blockType == bt_rle) return 1;
-        if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected);
+        RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected);
        return cSize;
    }
 }
@ -78,7 +79,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
 size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
 {
-    if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected);
+    DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
+    RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected);

    {   const BYTE* const istart = (const BYTE*) src;
        symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
@ -86,11 +88,12 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
        switch(litEncType)
        {
        case set_repeat:
-            if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
+            DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
+            RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted);
            /* fall-through */

        case set_compressed:
-            if (srcSize < 5) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
+            RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
            {   size_t lhSize, litSize, litCSize;
                U32 singleStream=0;
                U32 const lhlCode = (istart[0] >> 2) & 3;
@ -115,11 +118,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                    /* 2 - 2 - 18 - 18 */
                    lhSize = 5;
                    litSize  = (lhc >> 4) & 0x3FFFF;
-                    litCSize = (lhc >> 22) + (istart[4] << 10);
+                    litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
                    break;
                }
-                if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
-                if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
+                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
+                RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected);

                /* prefetch huffman table if cold */
                if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
@ -157,7 +160,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                    }
                }

-                if (HUF_isError(hufSuccess)) return ERROR(corruption_detected);
+                RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected);

                dctx->litPtr = dctx->litBuffer;
                dctx->litSize = litSize;
@ -187,7 +190,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                }

                if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
-                    if (litSize+lhSize > srcSize) return ERROR(corruption_detected);
+                    RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected);
                    memcpy(dctx->litBuffer, istart+lhSize, litSize);
                    dctx->litPtr = dctx->litBuffer;
                    dctx->litSize = litSize;
@ -216,17 +219,17 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                case 3:
                    lhSize = 3;
                    litSize = MEM_readLE24(istart) >> 4;
-                    if (srcSize<4) return ERROR(corruption_detected);   /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
+                    RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
                    break;
                }
-                if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
+                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected);
                memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
                dctx->litPtr = dctx->litBuffer;
                dctx->litSize = litSize;
                return lhSize+1;
            }
        default:
-            return ERROR(corruption_detected);   /* impossible */
+            RETURN_ERROR(corruption_detected, "impossible");
        }
    }
 }
@ -390,7 +393,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
                    symbolNext[s] = 1;
                } else {
                    if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
-                    symbolNext[s] = normalizedCounter[s];
+                    assert(normalizedCounter[s]>=0);
+                    symbolNext[s] = (U16)normalizedCounter[s];
        }   }   }
        memcpy(dt, &DTableH, sizeof(DTableH));
    }
@ -436,8 +440,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
    switch(type)
    {
    case set_rle :
-        if (!srcSize) return ERROR(srcSize_wrong);
-        if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
+        RETURN_ERROR_IF(!srcSize, srcSize_wrong);
+        RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected);
        {   U32 const symbol = *(const BYTE*)src;
            U32 const baseline = baseValue[symbol];
            U32 const nbBits = nbAdditionalBits[symbol];
@ -449,7 +453,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
        *DTablePtr = defaultTable;
        return 0;
    case set_repeat:
-        if (!flagRepeatTable) return ERROR(corruption_detected);
+        RETURN_ERROR_IF(!flagRepeatTable, corruption_detected);
        /* prefetch FSE table if used */
        if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
            const void* const pStart = *DTablePtr;
@ -461,15 +465,15 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
        {   unsigned tableLog;
            S16 norm[MaxSeq+1];
            size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
-            if (FSE_isError(headerSize)) return ERROR(corruption_detected);
-            if (tableLog > maxLog) return ERROR(corruption_detected);
+            RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected);
+            RETURN_ERROR_IF(tableLog > maxLog, corruption_detected);
            ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
            *DTablePtr = DTableSpace;
            return headerSize;
        }
-    default :   /* impossible */
+    default :
        assert(0);
-        return ERROR(GENERIC);
+        RETURN_ERROR(GENERIC, "impossible");
    }
 }

@ -483,28 +487,28 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
    DEBUGLOG(5, "ZSTD_decodeSeqHeaders");

    /* check */
-    if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
+    RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong);

    /* SeqHead */
    nbSeq = *ip++;
    if (!nbSeq) {
        *nbSeqPtr=0;
-        if (srcSize != 1) return ERROR(srcSize_wrong);
+        RETURN_ERROR_IF(srcSize != 1, srcSize_wrong);
        return 1;
    }
    if (nbSeq > 0x7F) {
        if (nbSeq == 0xFF) {
-            if (ip+2 > iend) return ERROR(srcSize_wrong);
+            RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong);
            nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
        } else {
-            if (ip >= iend) return ERROR(srcSize_wrong);
+            RETURN_ERROR_IF(ip >= iend, srcSize_wrong);
            nbSeq = ((nbSeq-0x80)<<8) + *ip++;
        }
    }
    *nbSeqPtr = nbSeq;

    /* FSE table descriptors */
-    if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
+    RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */
    {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
        symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
        symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
@ -517,7 +521,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
                                                      LL_base, LL_bits,
                                                      LL_defaultDTable, dctx->fseEntropy,
                                                      dctx->ddictIsCold, nbSeq);
-            if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
+            RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected);
            ip += llhSize;
        }

@ -527,7 +531,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
                                                      OF_base, OF_bits,
                                                      OF_defaultDTable, dctx->fseEntropy,
                                                      dctx->ddictIsCold, nbSeq);
-            if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
+            RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected);
            ip += ofhSize;
        }

@ -537,7 +541,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
                                                      ML_base, ML_bits,
                                                      ML_defaultDTable, dctx->fseEntropy,
                                                      dctx->ddictIsCold, nbSeq);
-            if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
+            RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected);
            ip += mlhSize;
        }
    }
@ -569,38 +573,118 @@ typedef struct {
    size_t pos;
 } seqState_t;

+/*! ZSTD_overlapCopy8() :
+ *  Copies 8 bytes from ip to op and updates op and ip where ip <= op.
+ *  If the offset is < 8 then the offset is spread to at least 8 bytes.
+ *
+ *  Precondition: *ip <= *op
+ *  Postcondition: *op - *op >= 8
+ */
+static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
+    assert(*ip <= *op);
+    if (offset < 8) {
+        /* close range match, overlap */
+        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
+        int const sub2 = dec64table[offset];
+        (*op)[0] = (*ip)[0];
+        (*op)[1] = (*ip)[1];
+        (*op)[2] = (*ip)[2];
+        (*op)[3] = (*ip)[3];
+        *ip += dec32table[offset];
+        ZSTD_copy4(*op+4, *ip);
+        *ip -= sub2;
+    } else {
+        ZSTD_copy8(*op, *ip);
+    }
+    *ip += 8;
+    *op += 8;
+    assert(*op - *ip >= 8);
+}

-/* ZSTD_execSequenceLast7():
- * exceptional case : decompress a match starting within last 7 bytes of output buffer.
- * requires more careful checks, to ensure there is no overflow.
- * performance does not matter though.
- * note : this case is supposed to be never generated "naturally" by reference encoder,
- *        since in most cases it needs at least 8 bytes to look for a match.
- *        but it's allowed by the specification. */
+/*! ZSTD_safecopy() :
+ *  Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
+ *  and write up to 16 bytes past oend_w (op >= oend_w is allowed).
+ *  This function is only called in the uncommon case where the sequence is near the end of the block. It
+ *  should be fast for a single long sequence, but can be slow for several short sequences.
+ *
+ *  @param ovtype controls the overlap detection
+ *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ *         - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
+ *           The src buffer must be before the dst buffer.
+ */
+static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
+    ptrdiff_t const diff = op - ip;
+    BYTE* const oend = op + length;
+
+    assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
+           (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
+
+    if (length < 8) {
+        /* Handle short lengths. */
+        while (op < oend) *op++ = *ip++;
+        return;
+    }
+    if (ovtype == ZSTD_overlap_src_before_dst) {
+        /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
+        assert(length >= 8);
+        ZSTD_overlapCopy8(&op, &ip, diff);
+        assert(op - ip >= 8);
+        assert(op <= oend);
+    }
+
+    if (oend <= oend_w) {
+        /* No risk of overwrite. */
+        ZSTD_wildcopy(op, ip, length, ovtype);
+        return;
+    }
+    if (op <= oend_w) {
+        /* Wildcopy until we get close to the end. */
+        assert(oend > oend_w);
+        ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
+        ip += oend_w - op;
+        op = oend_w;
+    }
+    /* Handle the leftovers. */
+    while (op < oend) *op++ = *ip++;
+}
+
+/* ZSTD_execSequenceEnd():
+ * This version handles cases that are near the end of the output buffer. It requires
+ * more careful checks to make sure there is no overflow. By separating out these hard
+ * and unlikely cases, we can speed up the common cases.
+ *
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
+ */
 FORCE_NOINLINE
-size_t ZSTD_execSequenceLast7(BYTE* op,
-                              BYTE* const oend, seq_t sequence,
-                              const BYTE** litPtr, const BYTE* const litLimit,
-                              const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
+size_t ZSTD_execSequenceEnd(BYTE* op,
+                            BYTE* const oend, seq_t sequence,
+                            const BYTE** litPtr, const BYTE* const litLimit,
+                            const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
 {
    BYTE* const oLitEnd = op + sequence.litLength;
    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
    const BYTE* match = oLitEnd - sequence.offset;
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;

-    /* check */
-    if (oMatchEnd>oend) return ERROR(dstSize_tooSmall);   /* last match must fit within dstBuffer */
-    if (iLitEnd > litLimit) return ERROR(corruption_detected);   /* try to read beyond literal buffer */
+    /* bounds checks */
+    assert(oLitEnd < oMatchEnd);
+    RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer");
+    RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer");

    /* copy literals */
-    while (op < oLitEnd) *op++ = *(*litPtr)++;
+    ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
+    op = oLitEnd;
+    *litPtr = iLitEnd;

    /* copy Match */
-    if (sequence.offset > (size_t)(oLitEnd - base)) {
+    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
        /* offset beyond prefix */
-        if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected);
-        match = dictEnd - (base-match);
+        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
+        match = dictEnd - (prefixStart-match);
        if (match + sequence.matchLength <= dictEnd) {
            memmove(oLitEnd, match, sequence.matchLength);
            return sequenceLength;
@ -610,13 +694,12 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
            memmove(oLitEnd, match, length1);
            op = oLitEnd + length1;
            sequence.matchLength -= length1;
-            match = base;
+            match = prefixStart;
    }   }
-    while (op < oMatchEnd) *op++ = *match++;
+    ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
    return sequenceLength;
 }

-
 HINT_INLINE
 size_t ZSTD_execSequence(BYTE* op,
                         BYTE* const oend, seq_t sequence,
@ -630,23 +713,32 @@ size_t ZSTD_execSequence(BYTE* op,
    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
    const BYTE* match = oLitEnd - sequence.offset;

-    /* check */
-    if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
-    if (iLitEnd > litLimit) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
-    if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
+    /* Errors and uncommon cases handled here. */
+    assert(oLitEnd < oMatchEnd);
+    if (iLitEnd > litLimit || oMatchEnd > oend_w)
+        return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);

-    /* copy Literals */
-    ZSTD_copy8(op, *litPtr);
-    if (sequence.litLength > 8)
-        ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+    /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
+    assert(iLitEnd <= litLimit /* Literal length is in bounds */);
+    assert(oLitEnd <= oend_w /* Can wildcopy literals */);
+    assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
+
+    /* Copy Literals:
+     * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
+     * We likely don't need the full 32-byte wildcopy.
+     */
+    assert(WILDCOPY_OVERLENGTH >= 16);
+    ZSTD_copy16(op, (*litPtr));
+    if (sequence.litLength > 16) {
+        ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
+    }
    op = oLitEnd;
    *litPtr = iLitEnd;   /* update for next sequence */

-    /* copy Match */
+    /* Copy Match */
    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
        /* offset beyond prefix -> go into extDict */
-        if (sequence.offset > (size_t)(oLitEnd - virtualStart))
-            return ERROR(corruption_detected);
+        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected);
        match = dictEnd + (match - prefixStart);
        if (match + sequence.matchLength <= dictEnd) {
            memmove(oLitEnd, match, sequence.matchLength);
@ -658,121 +750,33 @@ size_t ZSTD_execSequence(BYTE* op,
            op = oLitEnd + length1;
            sequence.matchLength -= length1;
            match = prefixStart;
-            if (op > oend_w || sequence.matchLength < MINMATCH) {
-              U32 i;
-              for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
-              return sequenceLength;
-            }
    }   }
-    /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
+    /* Match within prefix of 1 or more bytes */
+    assert(op <= oMatchEnd);
+    assert(oMatchEnd <= oend_w);
+    assert(match >= prefixStart);
+    assert(sequence.matchLength >= 1);

-    /* match within prefix */
-    if (sequence.offset < 8) {
-        /* close range match, overlap */
-        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
-        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
-        int const sub2 = dec64table[sequence.offset];
-        op[0] = match[0];
-        op[1] = match[1];
-        op[2] = match[2];
-        op[3] = match[3];
-        match += dec32table[sequence.offset];
-        ZSTD_copy4(op+4, match);
-        match -= sub2;
-    } else {
-        ZSTD_copy8(op, match);
+    /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
+     * without overlap checking.
+     */
+    if (sequence.offset >= WILDCOPY_VECLEN) {
+        /* We bet on a full wildcopy for matches, since we expect matches to be
+         * longer than literals (in general). In silesia, ~10% of matches are longer
+         * than 16 bytes.
+         */
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
+        return sequenceLength;
    }
-    op += 8; match += 8;
+    assert(sequence.offset < WILDCOPY_VECLEN);

-    if (oMatchEnd > oend-(16-MINMATCH)) {
-        if (op < oend_w) {
-            ZSTD_wildcopy(op, match, oend_w - op);
-            match += oend_w - op;
-            op = oend_w;
-        }
-        while (op < oMatchEnd) *op++ = *match++;
-    } else {
-        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
-    }
-    return sequenceLength;
-}
+    /* Copy 8 bytes and spread the offset to be >= 8. */
+    ZSTD_overlapCopy8(&op, &match, sequence.offset);

-
-HINT_INLINE
-size_t ZSTD_execSequenceLong(BYTE* op,
-                             BYTE* const oend, seq_t sequence,
-                             const BYTE** litPtr, const BYTE* const litLimit,
-                             const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
-{
-    BYTE* const oLitEnd = op + sequence.litLength;
-    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
-    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
-    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
-    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
-    const BYTE* match = sequence.match;
-
-    /* check */
-    if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
-    if (iLitEnd > litLimit) return ERROR(corruption_detected);   /* over-read beyond lit buffer */
-    if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
-
-    /* copy Literals */
-    ZSTD_copy8(op, *litPtr);  /* note : op <= oLitEnd <= oend_w == oend - 8 */
-    if (sequence.litLength > 8)
-        ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8);   /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
-    op = oLitEnd;
-    *litPtr = iLitEnd;   /* update for next sequence */
-
-    /* copy Match */
-    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
-        /* offset beyond prefix */
-        if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
-        if (match + sequence.matchLength <= dictEnd) {
-            memmove(oLitEnd, match, sequence.matchLength);
-            return sequenceLength;
-        }
-        /* span extDict & currentPrefixSegment */
-        {   size_t const length1 = dictEnd - match;
-            memmove(oLitEnd, match, length1);
-            op = oLitEnd + length1;
-            sequence.matchLength -= length1;
-            match = prefixStart;
-            if (op > oend_w || sequence.matchLength < MINMATCH) {
-              U32 i;
-              for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
-              return sequenceLength;
-            }
-    }   }
-    assert(op <= oend_w);
-    assert(sequence.matchLength >= MINMATCH);
-
-    /* match within prefix */
-    if (sequence.offset < 8) {
-        /* close range match, overlap */
-        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
-        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
-        int const sub2 = dec64table[sequence.offset];
-        op[0] = match[0];
-        op[1] = match[1];
-        op[2] = match[2];
-        op[3] = match[3];
-        match += dec32table[sequence.offset];
-        ZSTD_copy4(op+4, match);
-        match -= sub2;
-    } else {
-        ZSTD_copy8(op, match);
-    }
-    op += 8; match += 8;
-
-    if (oMatchEnd > oend-(16-MINMATCH)) {
-        if (op < oend_w) {
-            ZSTD_wildcopy(op, match, oend_w - op);
-            match += oend_w - op;
-            op = oend_w;
-        }
-        while (op < oMatchEnd) *op++ = *match++;
-    } else {
-        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8);   /* works even if matchLength < 8 */
+    /* If the match length is > 8 bytes, then continue with the wildcopy. */
+    if (sequence.matchLength > 8) {
+        assert(op < oMatchEnd);
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
    }
    return sequenceLength;
 }
@ -801,7 +805,7 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
 /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
 * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
 * bits before reloading. This value is the maximum number of bytes we read
- * after reloading when we are decoding long offets.
+ * after reloading when we are decoding long offsets.
 */
 #define LONG_OFFSETS_MAX_EXTRA_BITS_32                       \
    (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32       \
@ -889,6 +893,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
 }

 FORCE_INLINE_TEMPLATE size_t
+DONT_VECTORIZE
 ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
                               void* dst, size_t maxDstSize,
                         const void* seqStart, size_t seqSize, int nbSeq,
@ -911,11 +916,18 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
        seqState_t seqState;
        dctx->fseEntropy = 1;
        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
-        CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
+        RETURN_ERROR_IF(
+            ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
+            corruption_detected);
        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);

+        ZSTD_STATIC_ASSERT(
+                BIT_DStream_unfinished < BIT_DStream_completed &&
+                BIT_DStream_endOfBuffer < BIT_DStream_completed &&
+                BIT_DStream_completed < BIT_DStream_overflow);
+
        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
            nbSeq--;
            {   seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
@ -927,14 +939,15 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,

        /* check if reached exact end */
        DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
-        if (nbSeq) return ERROR(corruption_detected);
+        RETURN_ERROR_IF(nbSeq, corruption_detected);
+        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected);
        /* save reps for next block */
        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
    }

    /* last literal segment */
    {   size_t const lastLLSize = litEnd - litPtr;
-        if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
+        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
        memcpy(op, litPtr, lastLLSize);
        op += lastLLSize;
    }
@ -1066,7 +1079,9 @@ ZSTD_decompressSequencesLong_body(
        seqState.pos = (size_t)(op-prefixStart);
        seqState.dictEnd = dictEnd;
        assert(iend >= ip);
-        CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
+        RETURN_ERROR_IF(
+            ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
+            corruption_detected);
        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
@ -1076,23 +1091,23 @@ ZSTD_decompressSequencesLong_body(
            sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
            PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
        }
-        if (seqNb<seqAdvance) return ERROR(corruption_detected);
+        RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected);

        /* decode and decompress */
        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
            seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
-            size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
            PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
            sequences[seqNb & STORED_SEQS_MASK] = sequence;
            op += oneSeqSize;
        }
-        if (seqNb<nbSeq) return ERROR(corruption_detected);
+        RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected);

        /* finish queue */
        seqNb -= seqAdvance;
        for ( ; seqNb<nbSeq ; seqNb++) {
-            size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
            op += oneSeqSize;
        }
@ -1103,7 +1118,7 @@ ZSTD_decompressSequencesLong_body(

    /* last literal segment */
    {   size_t const lastLLSize = litEnd - litPtr;
-        if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
+        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall);
        memcpy(op, litPtr, lastLLSize);
        op += lastLLSize;
    }
@ -1127,6 +1142,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,

 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
 static TARGET_ATTRIBUTE("bmi2") size_t
+DONT_VECTORIZE
 ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
                                 void* dst, size_t maxDstSize,
                           const void* seqStart, size_t seqSize, int nbSeq,
@ -1176,7 +1192,7 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
 /* ZSTD_decompressSequencesLong() :
 * decompression function triggered when a minimum share of offsets is considered "long",
 * aka out of cache.
- * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes mearning "farther than memory cache distance".
+ * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
 * This function will try to mitigate main memory latency through the use of prefetching */
 static size_t
 ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
@ -1240,7 +1256,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
    ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
    DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);

-    if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
+    RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong);

    /* Decode literals section */
    {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
--- a/src/borg/algorithms/zstd/lib/decompress/zstd_decompress_internal.h
+++ b/src/borg/algorithms/zstd/lib/decompress/zstd_decompress_internal.h
@ -89,6 +89,12 @@ typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
 typedef enum { zdss_init=0, zdss_loadHeader,
               zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;

+typedef enum {
+    ZSTD_use_indefinitely = -1,  /* Use the dictionary indefinitely */
+    ZSTD_dont_use = 0,           /* Do not use the dictionary (if one exists free it) */
+    ZSTD_use_once = 1            /* Use the dictionary once and set to ZSTD_dont_use */
+} ZSTD_dictUses_e;
+
 struct ZSTD_DCtx_s
 {
    const ZSTD_seqSymbol* LLTptr;
@ -123,6 +129,7 @@ struct ZSTD_DCtx_s
    const ZSTD_DDict* ddict;     /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
    U32 dictID;
    int ddictIsCold;             /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
+    ZSTD_dictUses_e dictUses;

    /* streaming */
    ZSTD_dStreamStage streamStage;
--- a/src/borg/algorithms/zstd/lib/deprecated/zbuff.h
+++ b/src/borg/algorithms/zstd/lib/deprecated/zbuff.h
@ -36,16 +36,17 @@ extern "C" {
 *****************************************************************/
 /* Deprecation warnings */
 /* Should these warnings be a problem,
-   it is generally possible to disable them,
-   typically with -Wno-deprecated-declarations for gcc
-   or _CRT_SECURE_NO_WARNINGS in Visual.
-   Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS */
+ * it is generally possible to disable them,
+ * typically with -Wno-deprecated-declarations for gcc
+ * or _CRT_SECURE_NO_WARNINGS in Visual.
+ * Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS
+ */
 #ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS
 #  define ZBUFF_DEPRECATED(message) ZSTDLIB_API  /* disable deprecation warnings */
 #else
 #  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
 #    define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
-#  elif (defined(__GNUC__) && (__GNUC__ >= 5)) || defined(__clang__)
+#  elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
 #    define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
 #  elif defined(__GNUC__) && (__GNUC__ >= 3)
 #    define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
--- a/src/borg/algorithms/zstd/lib/dictBuilder/cover.c
+++ b/src/borg/algorithms/zstd/lib/dictBuilder/cover.c
@ -391,7 +391,7 @@ static void COVER_group(COVER_ctx_t *ctx, const void *group,
 *
 *     Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
 *
- * Once the dmer d is in the dictionay we set F(d) = 0.
+ * Once the dmer d is in the dictionary we set F(d) = 0.
 */
 static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
                                           COVER_map_t *activeDmers, U32 begin,
@ -435,7 +435,7 @@ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
      U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
      activeSegment.begin += 1;
      *delDmerOcc -= 1;
-      /* If this is the last occurence of the dmer, subtract its score */
+      /* If this is the last occurrence of the dmer, subtract its score */
      if (*delDmerOcc == 0) {
        COVER_map_remove(activeDmers, delDmer);
        activeSegment.score -= freqs[delDmer];
@ -526,10 +526,10 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
 * Prepare a context for dictionary building.
 * The context is only dependent on the parameter `d` and can used multiple
 * times.
- * Returns 1 on success or zero on error.
+ * Returns 0 on success or error code on error.
 * The context must be destroyed with `COVER_ctx_destroy()`.
 */
-static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
+static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
                          const size_t *samplesSizes, unsigned nbSamples,
                          unsigned d, double splitPoint) {
  const BYTE *const samples = (const BYTE *)samplesBuffer;
@ -544,17 +544,17 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
      totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
    DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
                 (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
-    return 0;
+    return ERROR(srcSize_wrong);
  }
  /* Check if there are at least 5 training samples */
  if (nbTrainSamples < 5) {
    DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
-    return 0;
+    return ERROR(srcSize_wrong);
  }
  /* Check if there's testing sample */
  if (nbTestSamples < 1) {
    DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
-    return 0;
+    return ERROR(srcSize_wrong);
  }
  /* Zero the context */
  memset(ctx, 0, sizeof(*ctx));
@ -577,7 +577,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
  if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
    DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
    COVER_ctx_destroy(ctx);
-    return 0;
+    return ERROR(memory_allocation);
  }
  ctx->freqs = NULL;
  ctx->d = d;
@ -624,7 +624,40 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
                (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
  ctx->freqs = ctx->suffix;
  ctx->suffix = NULL;
-  return 1;
+  return 0;
+}
+
+void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
+{
+  const double ratio = (double)nbDmers / maxDictSize;
+  if (ratio >= 10) {
+      return;
+  }
+  LOCALDISPLAYLEVEL(displayLevel, 1,
+                    "WARNING: The maximum dictionary size %u is too large "
+                    "compared to the source size %u! "
+                    "size(source)/size(dictionary) = %f, but it should be >= "
+                    "10! This may lead to a subpar dictionary! We recommend "
+                    "training on sources at least 10x, and preferably 100x "
+                    "the size of the dictionary! \n", (U32)maxDictSize,
+                    (U32)nbDmers, ratio);
+}
+
+COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize,
+                                       U32 nbDmers, U32 k, U32 passes)
+{
+  const U32 minEpochSize = k * 10;
+  COVER_epoch_info_t epochs;
+  epochs.num = MAX(1, maxDictSize / k / passes);
+  epochs.size = nbDmers / epochs.num;
+  if (epochs.size >= minEpochSize) {
+      assert(epochs.size * epochs.num <= nbDmers);
+      return epochs;
+  }
+  epochs.size = MIN(minEpochSize, nbDmers);
+  epochs.num = nbDmers / epochs.size;
+  assert(epochs.size * epochs.num <= nbDmers);
+  return epochs;
 }

 /**
@ -636,28 +669,34 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
                                    ZDICT_cover_params_t parameters) {
  BYTE *const dict = (BYTE *)dictBuffer;
  size_t tail = dictBufferCapacity;
-  /* Divide the data up into epochs of equal size.
-   * We will select at least one segment from each epoch.
-   */
-  const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4));
-  const unsigned epochSize = (U32)(ctx->suffixSize / epochs);
+  /* Divide the data into epochs. We will select one segment from each epoch. */
+  const COVER_epoch_info_t epochs = COVER_computeEpochs(
+      (U32)dictBufferCapacity, (U32)ctx->suffixSize, parameters.k, 4);
+  const size_t maxZeroScoreRun = MAX(10, MIN(100, epochs.num >> 3));
+  size_t zeroScoreRun = 0;
  size_t epoch;
  DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
-                epochs, epochSize);
+                (U32)epochs.num, (U32)epochs.size);
  /* Loop through the epochs until there are no more segments or the dictionary
   * is full.
   */
-  for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
-    const U32 epochBegin = (U32)(epoch * epochSize);
-    const U32 epochEnd = epochBegin + epochSize;
+  for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) {
+    const U32 epochBegin = (U32)(epoch * epochs.size);
+    const U32 epochEnd = epochBegin + epochs.size;
    size_t segmentSize;
    /* Select a segment */
    COVER_segment_t segment = COVER_selectSegment(
        ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
-    /* If the segment covers no dmers, then we are out of content */
+    /* If the segment covers no dmers, then we are out of content.
+     * There may be new content in other epochs, for continue for some time.
+     */
    if (segment.score == 0) {
-      break;
+      if (++zeroScoreRun >= maxZeroScoreRun) {
+          break;
+      }
+      continue;
    }
+    zeroScoreRun = 0;
    /* Trim the segment if necessary and if it is too small then we are done */
    segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
    if (segmentSize < parameters.d) {
@ -690,11 +729,11 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
  /* Checks */
  if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
    DISPLAYLEVEL(1, "Cover parameters incorrect\n");
-    return ERROR(GENERIC);
+    return ERROR(parameter_outOfBound);
  }
  if (nbSamples == 0) {
    DISPLAYLEVEL(1, "Cover must have at least one input file\n");
-    return ERROR(GENERIC);
+    return ERROR(srcSize_wrong);
  }
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
    DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@ -702,14 +741,18 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
    return ERROR(dstSize_tooSmall);
  }
  /* Initialize context and activeDmers */
-  if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
-                      parameters.d, parameters.splitPoint)) {
-    return ERROR(GENERIC);
+  {
+    size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
+                      parameters.d, parameters.splitPoint);
+    if (ZSTD_isError(initVal)) {
+      return initVal;
+    }
  }
+  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
    DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
    COVER_ctx_destroy(&ctx);
-    return ERROR(GENERIC);
+    return ERROR(memory_allocation);
  }

  DISPLAYLEVEL(2, "Building dictionary\n");
@ -770,7 +813,7 @@ size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
        cctx, dst, dstCapacity, samples + offsets[i],
        samplesSizes[i], cdict);
    if (ZSTD_isError(size)) {
-      totalCompressedSize = ERROR(GENERIC);
+      totalCompressedSize = size;
      goto _compressCleanup;
    }
    totalCompressedSize += size;
@ -846,9 +889,11 @@ void COVER_best_start(COVER_best_t *best) {
 * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
 * If this dictionary is the best so far save it and its parameters.
 */
-void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
-                              ZDICT_cover_params_t parameters, void *dict,
-                              size_t dictSize) {
+void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
+                              COVER_dictSelection_t selection) {
+  void* dict = selection.dictContent;
+  size_t compressedSize = selection.totalCompressedSize;
+  size_t dictSize = selection.dictSize;
  if (!best) {
    return;
  }
@ -874,10 +919,12 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
        }
      }
      /* Save the dictionary, parameters, and size */
-      memcpy(best->dict, dict, dictSize);
-      best->dictSize = dictSize;
-      best->parameters = parameters;
-      best->compressedSize = compressedSize;
+      if (dict) {
+        memcpy(best->dict, dict, dictSize);
+        best->dictSize = dictSize;
+        best->parameters = parameters;
+        best->compressedSize = compressedSize;
+      }
    }
    if (liveJobs == 0) {
      ZSTD_pthread_cond_broadcast(&best->cond);
@ -886,6 +933,111 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
  }
 }

+COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
+    COVER_dictSelection_t selection = { NULL, 0, error };
+    return selection;
+}
+
+unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
+  return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
+}
+
+void COVER_dictSelectionFree(COVER_dictSelection_t selection){
+  free(selection.dictContent);
+}
+
+COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
+        size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
+        size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
+
+  size_t largestDict = 0;
+  size_t largestCompressed = 0;
+  BYTE* customDictContentEnd = customDictContent + dictContentSize;
+
+  BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
+  BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
+  double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
+
+  if (!largestDictbuffer || !candidateDictBuffer) {
+    free(largestDictbuffer);
+    free(candidateDictBuffer);
+    return COVER_dictSelectionError(dictContentSize);
+  }
+
+  /* Initial dictionary size and compressed size */
+  memcpy(largestDictbuffer, customDictContent, dictContentSize);
+  dictContentSize = ZDICT_finalizeDictionary(
+    largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
+    samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
+
+  if (ZDICT_isError(dictContentSize)) {
+    free(largestDictbuffer);
+    free(candidateDictBuffer);
+    return COVER_dictSelectionError(dictContentSize);
+  }
+
+  totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
+                                                       samplesBuffer, offsets,
+                                                       nbCheckSamples, nbSamples,
+                                                       largestDictbuffer, dictContentSize);
+
+  if (ZSTD_isError(totalCompressedSize)) {
+    free(largestDictbuffer);
+    free(candidateDictBuffer);
+    return COVER_dictSelectionError(totalCompressedSize);
+  }
+
+  if (params.shrinkDict == 0) {
+    COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
+    free(candidateDictBuffer);
+    return selection;
+  }
+
+  largestDict = dictContentSize;
+  largestCompressed = totalCompressedSize;
+  dictContentSize = ZDICT_DICTSIZE_MIN;
+
+  /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */
+  while (dictContentSize < largestDict) {
+    memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
+    dictContentSize = ZDICT_finalizeDictionary(
+      candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
+      samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
+
+    if (ZDICT_isError(dictContentSize)) {
+      free(largestDictbuffer);
+      free(candidateDictBuffer);
+      return COVER_dictSelectionError(dictContentSize);
+
+    }
+
+    totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
+                                                         samplesBuffer, offsets,
+                                                         nbCheckSamples, nbSamples,
+                                                         candidateDictBuffer, dictContentSize);
+
+    if (ZSTD_isError(totalCompressedSize)) {
+      free(largestDictbuffer);
+      free(candidateDictBuffer);
+      return COVER_dictSelectionError(totalCompressedSize);
+    }
+
+    if (totalCompressedSize <= largestCompressed * regressionTolerance) {
+      COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
+      free(largestDictbuffer);
+      return selection;
+    }
+    dictContentSize *= 2;
+  }
+  dictContentSize = largestDict;
+  totalCompressedSize = largestCompressed;
+  {
+    COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
+    free(candidateDictBuffer);
+    return selection;
+  }
+}
+
 /**
 * Parameters for COVER_tryParameters().
 */
@ -911,6 +1063,7 @@ static void COVER_tryParameters(void *opaque) {
  /* Allocate space for hash table, dict, and freqs */
  COVER_map_t activeDmers;
  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
+  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
  U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
    DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
@ -926,29 +1079,21 @@ static void COVER_tryParameters(void *opaque) {
  {
    const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
                                              dictBufferCapacity, parameters);
-    dictBufferCapacity = ZDICT_finalizeDictionary(
-        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
-        ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
-        parameters.zParams);
-    if (ZDICT_isError(dictBufferCapacity)) {
-      DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
+    selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
+        ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
+        totalCompressedSize);
+
+    if (COVER_dictSelectionIsError(selection)) {
+      DISPLAYLEVEL(1, "Failed to select dictionary\n");
      goto _cleanup;
    }
  }
-  /* Check total compressed size */
-  totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
-                                                       ctx->samples, ctx->offsets,
-                                                       ctx->nbTrainSamples, ctx->nbSamples,
-                                                       dict, dictBufferCapacity);
-
 _cleanup:
-  COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
-                    dictBufferCapacity);
+  free(dict);
+  COVER_best_finish(data->best, parameters, selection);
  free(data);
  COVER_map_destroy(&activeDmers);
-  if (dict) {
-    free(dict);
-  }
+  COVER_dictSelectionFree(selection);
  if (freqs) {
    free(freqs);
  }
@ -970,6 +1115,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
  const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
  const unsigned kIterations =
      (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
+  const unsigned shrinkDict = 0;
  /* Local variables */
  const int displayLevel = parameters->zParams.notificationLevel;
  unsigned iteration = 1;
@ -977,19 +1123,20 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
  unsigned k;
  COVER_best_t best;
  POOL_ctx *pool = NULL;
+  int warned = 0;

  /* Checks */
  if (splitPoint <= 0 || splitPoint > 1) {
    LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
-    return ERROR(GENERIC);
+    return ERROR(parameter_outOfBound);
  }
  if (kMinK < kMaxD || kMaxK < kMinK) {
    LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
-    return ERROR(GENERIC);
+    return ERROR(parameter_outOfBound);
  }
  if (nbSamples == 0) {
    DISPLAYLEVEL(1, "Cover must have at least one input file\n");
-    return ERROR(GENERIC);
+    return ERROR(srcSize_wrong);
  }
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
    DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@ -1013,11 +1160,18 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
    /* Initialize the context for this value of d */
    COVER_ctx_t ctx;
    LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
-    if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) {
-      LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
-      COVER_best_destroy(&best);
-      POOL_free(pool);
-      return ERROR(GENERIC);
+    {
+      const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
+      if (ZSTD_isError(initVal)) {
+        LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
+        COVER_best_destroy(&best);
+        POOL_free(pool);
+        return initVal;
+      }
+    }
+    if (!warned) {
+      COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
+      warned = 1;
    }
    /* Loop through k reusing the same context */
    for (k = kMinK; k <= kMaxK; k += kStepSize) {
@ -1030,7 +1184,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
        COVER_best_destroy(&best);
        COVER_ctx_destroy(&ctx);
        POOL_free(pool);
-        return ERROR(GENERIC);
+        return ERROR(memory_allocation);
      }
      data->ctx = &ctx;
      data->best = &best;
@ -1040,6 +1194,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
      data->parameters.d = d;
      data->parameters.splitPoint = splitPoint;
      data->parameters.steps = kSteps;
+      data->parameters.shrinkDict = shrinkDict;
      data->parameters.zParams.notificationLevel = g_displayLevel;
      /* Check the parameters */
      if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
--- a/src/borg/algorithms/zstd/lib/dictBuilder/cover.h
+++ b/src/borg/algorithms/zstd/lib/dictBuilder/cover.h
@ -38,6 +38,44 @@ typedef struct {
  U32 score;
 } COVER_segment_t;

+/**
+ *Number of epochs and size of each epoch.
+ */
+typedef struct {
+  U32 num;
+  U32 size;
+} COVER_epoch_info_t;
+
+/**
+ * Struct used for the dictionary selection function.
+ */
+typedef struct COVER_dictSelection {
+  BYTE* dictContent;
+  size_t dictSize;
+  size_t totalCompressedSize;
+} COVER_dictSelection_t;
+
+/**
+ * Computes the number of epochs and the size of each epoch.
+ * We will make sure that each epoch gets at least 10 * k bytes.
+ *
+ * The COVER algorithms divide the data up into epochs of equal size and
+ * select one segment from each epoch.
+ *
+ * @param maxDictSize The maximum allowed dictionary size.
+ * @param nbDmers     The number of dmers we are training on.
+ * @param k           The parameter k (segment size).
+ * @param passes      The target number of passes over the dmer corpus.
+ *                    More passes means a better dictionary.
+ */
+COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers,
+                                       U32 k, U32 passes);
+
+/**
+ * Warns the user when their corpus is too small.
+ */
+void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel);
+
 /**
 *  Checks total compressed size of a dictionary
 */
@ -78,6 +116,32 @@ void COVER_best_start(COVER_best_t *best);
 * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
 * If this dictionary is the best so far save it and its parameters.
 */
-void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
-                       ZDICT_cover_params_t parameters, void *dict,
-                       size_t dictSize);
+void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
+                       COVER_dictSelection_t selection);
+/**
+ * Error function for COVER_selectDict function. Checks if the return
+ * value is an error.
+ */
+unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection);
+
+ /**
+  * Error function for COVER_selectDict function. Returns a struct where
+  * return.totalCompressedSize is a ZSTD error.
+  */
+COVER_dictSelection_t COVER_dictSelectionError(size_t error);
+
+/**
+ * Always call after selectDict is called to free up used memory from
+ * newly created dictionary.
+ */
+void COVER_dictSelectionFree(COVER_dictSelection_t selection);
+
+/**
+ * Called to finalize the dictionary and select one based on whether or not
+ * the shrink-dict flag was enabled. If enabled the dictionary used is the
+ * smallest dictionary within a specified regression of the compressed size
+ * from the largest dictionary.
+ */
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
+                       size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
+                       size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
--- a/src/borg/algorithms/zstd/lib/dictBuilder/fastcover.c
+++ b/src/borg/algorithms/zstd/lib/dictBuilder/fastcover.c
@ -132,7 +132,7 @@ typedef struct {
 *
 *     Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
 *
- * Once the dmer with hash value d is in the dictionay we set F(d) = 0.
+ * Once the dmer with hash value d is in the dictionary we set F(d) = 0.
 */
 static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
                                              U32 *freqs, U32 begin, U32 end,
@ -161,7 +161,7 @@ static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
    /* Get hash value of current dmer */
    const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d);

-    /* Add frequency of this index to score if this is the first occurence of index in active segment */
+    /* Add frequency of this index to score if this is the first occurrence of index in active segment */
    if (segmentFreqs[idx] == 0) {
      activeSegment.score += freqs[idx];
    }
@ -287,10 +287,10 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
 * Prepare a context for dictionary building.
 * The context is only dependent on the parameter `d` and can used multiple
 * times.
- * Returns 1 on success or zero on error.
+ * Returns 0 on success or error code on error.
 * The context must be destroyed with `FASTCOVER_ctx_destroy()`.
 */
-static int
+static size_t
 FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
                   const void* samplesBuffer,
                   const size_t* samplesSizes, unsigned nbSamples,
@ -310,19 +310,19 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
        totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
        DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
                    (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
-        return 0;
+        return ERROR(srcSize_wrong);
    }

    /* Check if there are at least 5 training samples */
    if (nbTrainSamples < 5) {
        DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples);
-        return 0;
+        return ERROR(srcSize_wrong);
    }

    /* Check if there's testing sample */
    if (nbTestSamples < 1) {
        DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples);
-        return 0;
+        return ERROR(srcSize_wrong);
    }

    /* Zero the context */
@ -347,7 +347,7 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
    if (ctx->offsets == NULL) {
        DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n");
        FASTCOVER_ctx_destroy(ctx);
-        return 0;
+        return ERROR(memory_allocation);
    }

    /* Fill offsets from the samplesSizes */
@ -364,13 +364,13 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
    if (ctx->freqs == NULL) {
        DISPLAYLEVEL(1, "Failed to allocate frequency table \n");
        FASTCOVER_ctx_destroy(ctx);
-        return 0;
+        return ERROR(memory_allocation);
    }

    DISPLAYLEVEL(2, "Computing frequencies\n");
    FASTCOVER_computeFrequency(ctx->freqs, ctx);

-    return 1;
+    return 0;
 }


@ -386,29 +386,35 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
 {
  BYTE *const dict = (BYTE *)dictBuffer;
  size_t tail = dictBufferCapacity;
-  /* Divide the data up into epochs of equal size.
-   * We will select at least one segment from each epoch.
-   */
-  const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k));
-  const unsigned epochSize = (U32)(ctx->nbDmers / epochs);
+  /* Divide the data into epochs. We will select one segment from each epoch. */
+  const COVER_epoch_info_t epochs = COVER_computeEpochs(
+      (U32)dictBufferCapacity, (U32)ctx->nbDmers, parameters.k, 1);
+  const size_t maxZeroScoreRun = 10;
+  size_t zeroScoreRun = 0;
  size_t epoch;
  DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
-                epochs, epochSize);
+                (U32)epochs.num, (U32)epochs.size);
  /* Loop through the epochs until there are no more segments or the dictionary
   * is full.
   */
-  for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
-    const U32 epochBegin = (U32)(epoch * epochSize);
-    const U32 epochEnd = epochBegin + epochSize;
+  for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) {
+    const U32 epochBegin = (U32)(epoch * epochs.size);
+    const U32 epochEnd = epochBegin + epochs.size;
    size_t segmentSize;
    /* Select a segment */
    COVER_segment_t segment = FASTCOVER_selectSegment(
        ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs);

-    /* If the segment covers no dmers, then we are out of content */
+    /* If the segment covers no dmers, then we are out of content.
+     * There may be new content in other epochs, for continue for some time.
+     */
    if (segment.score == 0) {
-      break;
+      if (++zeroScoreRun >= maxZeroScoreRun) {
+          break;
+      }
+      continue;
    }
+    zeroScoreRun = 0;

    /* Trim the segment if necessary and if it is too small then we are done */
    segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
@ -429,7 +435,6 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
  return tail;
 }

-
 /**
 * Parameters for FASTCOVER_tryParameters().
 */
@ -458,6 +463,7 @@ static void FASTCOVER_tryParameters(void *opaque)
  U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
  /* Allocate space for hash table, dict, and freqs */
  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
+  COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
  U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
  if (!segmentFreqs || !dict || !freqs) {
    DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
@ -467,27 +473,24 @@ static void FASTCOVER_tryParameters(void *opaque)
  memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32));
  /* Build the dictionary */
  { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity,
-                                                  parameters, segmentFreqs);
+                                                    parameters, segmentFreqs);
+
    const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
-    dictBufferCapacity = ZDICT_finalizeDictionary(
-        dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
-        ctx->samples, ctx->samplesSizes, nbFinalizeSamples, parameters.zParams);
-    if (ZDICT_isError(dictBufferCapacity)) {
-      DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
+    selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
+         ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
+         totalCompressedSize);
+
+    if (COVER_dictSelectionIsError(selection)) {
+      DISPLAYLEVEL(1, "Failed to select dictionary\n");
      goto _cleanup;
    }
  }
-  /* Check total compressed size */
-  totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
-                                                       ctx->samples, ctx->offsets,
-                                                       ctx->nbTrainSamples, ctx->nbSamples,
-                                                       dict, dictBufferCapacity);
 _cleanup:
-  COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
-                    dictBufferCapacity);
+  free(dict);
+  COVER_best_finish(data->best, parameters, selection);
  free(data);
  free(segmentFreqs);
-  free(dict);
+  COVER_dictSelectionFree(selection);
  free(freqs);
 }

@ -502,6 +505,7 @@ FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams,
    coverParams->nbThreads = fastCoverParams.nbThreads;
    coverParams->splitPoint = fastCoverParams.splitPoint;
    coverParams->zParams = fastCoverParams.zParams;
+    coverParams->shrinkDict = fastCoverParams.shrinkDict;
 }


@ -518,6 +522,7 @@ FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
    fastCoverParams->f = f;
    fastCoverParams->accel = accel;
    fastCoverParams->zParams = coverParams.zParams;
+    fastCoverParams->shrinkDict = coverParams.shrinkDict;
 }


@ -544,11 +549,11 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
    if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f,
                                   parameters.accel)) {
      DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
-      return ERROR(GENERIC);
+      return ERROR(parameter_outOfBound);
    }
    if (nbSamples == 0) {
      DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
-      return ERROR(GENERIC);
+      return ERROR(srcSize_wrong);
    }
    if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
      DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@ -558,12 +563,16 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
    /* Assign corresponding FASTCOVER_accel_t to accelParams*/
    accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];
    /* Initialize context */
-    if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
+    {
+      size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
                            coverParams.d, parameters.splitPoint, parameters.f,
-                            accelParams)) {
-      DISPLAYLEVEL(1, "Failed to initialize context\n");
-      return ERROR(GENERIC);
+                            accelParams);
+      if (ZSTD_isError(initVal)) {
+        DISPLAYLEVEL(1, "Failed to initialize context\n");
+        return initVal;
+      }
    }
+    COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
    /* Build the dictionary */
    DISPLAYLEVEL(2, "Building dictionary\n");
    {
@ -609,6 +618,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
        (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
    const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f;
    const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
+    const unsigned shrinkDict = 0;
    /* Local variables */
    const int displayLevel = parameters->zParams.notificationLevel;
    unsigned iteration = 1;
@ -616,22 +626,23 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
    unsigned k;
    COVER_best_t best;
    POOL_ctx *pool = NULL;
+    int warned = 0;
    /* Checks */
    if (splitPoint <= 0 || splitPoint > 1) {
      LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
-      return ERROR(GENERIC);
+      return ERROR(parameter_outOfBound);
    }
    if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) {
      LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n");
-      return ERROR(GENERIC);
+      return ERROR(parameter_outOfBound);
    }
    if (kMinK < kMaxD || kMaxK < kMinK) {
      LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
-      return ERROR(GENERIC);
+      return ERROR(parameter_outOfBound);
    }
    if (nbSamples == 0) {
      LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n");
-      return ERROR(GENERIC);
+      return ERROR(srcSize_wrong);
    }
    if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
      LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n",
@ -658,11 +669,18 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
      /* Initialize the context for this value of d */
      FASTCOVER_ctx_t ctx;
      LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
-      if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams)) {
-        LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
-        COVER_best_destroy(&best);
-        POOL_free(pool);
-        return ERROR(GENERIC);
+      {
+        size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams);
+        if (ZSTD_isError(initVal)) {
+          LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
+          COVER_best_destroy(&best);
+          POOL_free(pool);
+          return initVal;
+        }
+      }
+      if (!warned) {
+        COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
+        warned = 1;
      }
      /* Loop through k reusing the same context */
      for (k = kMinK; k <= kMaxK; k += kStepSize) {
@ -675,7 +693,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
          COVER_best_destroy(&best);
          FASTCOVER_ctx_destroy(&ctx);
          POOL_free(pool);
-          return ERROR(GENERIC);
+          return ERROR(memory_allocation);
        }
        data->ctx = &ctx;
        data->best = &best;
@ -685,6 +703,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
        data->parameters.d = d;
        data->parameters.splitPoint = splitPoint;
        data->parameters.steps = kSteps;
+        data->parameters.shrinkDict = shrinkDict;
        data->parameters.zParams.notificationLevel = g_displayLevel;
        /* Check the parameters */
        if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
--- a/src/borg/algorithms/zstd/lib/dictBuilder/zdict.c
+++ b/src/borg/algorithms/zstd/lib/dictBuilder/zdict.c
@ -571,7 +571,7 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
    unsigned const prime1 = 2654435761U;
    unsigned const prime2 = 2246822519U;
    unsigned acc = prime1;
-    size_t p=0;;
+    size_t p=0;
    for (p=0; p<length; p++) {
        acc *= prime2;
        ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
@ -741,7 +741,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    /* analyze, build stats, starting with literals */
    {   size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
        if (HUF_isError(maxNbBits)) {
-            eSize = ERROR(GENERIC);
+            eSize = maxNbBits;
            DISPLAYLEVEL(1, " HUF_buildCTable error \n");
            goto _cleanup;
        }
@ -764,7 +764,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
    errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
    if (FSE_isError(errorCode)) {
-        eSize = ERROR(GENERIC);
+        eSize = errorCode;
        DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
        goto _cleanup;
    }
@ -773,7 +773,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
    errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
    if (FSE_isError(errorCode)) {
-        eSize = ERROR(GENERIC);
+        eSize = errorCode;
        DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
        goto _cleanup;
    }
@ -782,7 +782,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
    errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
    if (FSE_isError(errorCode)) {
-        eSize = ERROR(GENERIC);
+        eSize = errorCode;
        DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
        goto _cleanup;
    }
@ -791,7 +791,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    /* write result to buffer */
    {   size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
        if (HUF_isError(hhSize)) {
-            eSize = ERROR(GENERIC);
+            eSize = hhSize;
            DISPLAYLEVEL(1, "HUF_writeCTable error \n");
            goto _cleanup;
        }
@ -802,7 +802,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,

    {   size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
        if (FSE_isError(ohSize)) {
-            eSize = ERROR(GENERIC);
+            eSize = ohSize;
            DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
            goto _cleanup;
        }
@ -813,7 +813,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,

    {   size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
        if (FSE_isError(mhSize)) {
-            eSize = ERROR(GENERIC);
+            eSize = mhSize;
            DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
            goto _cleanup;
        }
@ -824,7 +824,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,

    {   size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
        if (FSE_isError(lhSize)) {
-            eSize = ERROR(GENERIC);
+            eSize = lhSize;
            DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
            goto _cleanup;
        }
@ -834,7 +834,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
    }

    if (maxDstSize<12) {
-        eSize = ERROR(GENERIC);
+        eSize = ERROR(dstSize_tooSmall);
        DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
        goto _cleanup;
    }
--- a/src/borg/algorithms/zstd/lib/dictBuilder/zdict.h
+++ b/src/borg/algorithms/zstd/lib/dictBuilder/zdict.h
@ -46,7 +46,12 @@ extern "C" {
 *  The resulting dictionary will be saved into `dictBuffer`.
 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
 *          or an error code, which can be tested with ZDICT_isError().
- *  Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
+ *  Note:  Dictionary training will fail if there are not enough samples to construct a
+ *         dictionary, or if most of the samples are too small (< 8 bytes being the lower limit).
+ *         If dictionary training fails, you should use zstd without a dictionary, as the dictionary
+ *         would've been ineffective anyways. If you believe your samples would benefit from a dictionary
+ *         please open an issue with details, and we can look into it.
+ *  Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB.
 *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
 *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
 *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
@ -89,6 +94,8 @@ typedef struct {
    unsigned steps;              /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
    unsigned nbThreads;          /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
    double splitPoint;           /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
+    unsigned shrinkDict;         /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking  */
+    unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
    ZDICT_params_t zParams;
 } ZDICT_cover_params_t;

@ -100,6 +107,9 @@ typedef struct {
    unsigned nbThreads;          /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
    double splitPoint;           /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
    unsigned accel;              /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
+    unsigned shrinkDict;         /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking  */
+    unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */
+
    ZDICT_params_t zParams;
 } ZDICT_fastCover_params_t;

@ -110,6 +120,7 @@ typedef struct {
 *  The resulting dictionary will be saved into `dictBuffer`.
 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
 *          or an error code, which can be tested with ZDICT_isError().
+ *          See ZDICT_trainFromBuffer() for details on failure modes.
 *  Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
 *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
 *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
@ -133,8 +144,9 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
 * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
 *
 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- *           or an error code, which can be tested with ZDICT_isError().
- *           On success `*parameters` contains the parameters selected.
+ *          or an error code, which can be tested with ZDICT_isError().
+ *          On success `*parameters` contains the parameters selected.
+ *          See ZDICT_trainFromBuffer() for details on failure modes.
 * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
 */
 ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
@ -151,7 +163,8 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
 *  The resulting dictionary will be saved into `dictBuffer`.
 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
 *          or an error code, which can be tested with ZDICT_isError().
- *  Note: ZDICT_trainFromBuffer_fastCover() requires about 1 bytes of memory for each input byte and additionally another 6 * 2^f bytes of memory .
+ *          See ZDICT_trainFromBuffer() for details on failure modes.
+ *  Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory.
 *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
 *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
 *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
@ -175,9 +188,10 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
 * If accel is zero, default value of 1 is used.
 *
 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- *           or an error code, which can be tested with ZDICT_isError().
- *           On success `*parameters` contains the parameters selected.
- * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 1 byte of memory for each input byte and additionally another 6 * 2^f bytes of memory for each thread.
+ *          or an error code, which can be tested with ZDICT_isError().
+ *          On success `*parameters` contains the parameters selected.
+ *          See ZDICT_trainFromBuffer() for details on failure modes.
+ * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread.
 */
 ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
                    size_t dictBufferCapacity, const void* samplesBuffer,
@ -195,7 +209,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
 * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
 *
 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
- *           or an error code, which can be tested by ZDICT_isError().
+ *          or an error code, which can be tested by ZDICT_isError().
 * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
 * Note 2: dictBuffer and dictContent can overlap
 */
@ -219,6 +233,7 @@ typedef struct {
 * `parameters` is optional and can be provided with values set to 0 to mean "default".
 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
 *          or an error code, which can be tested with ZDICT_isError().
+ *          See ZDICT_trainFromBuffer() for details on failure modes.
 *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
 *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
 *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
--- a/src/borg/algorithms/zstd/lib/legacy/zstd_legacy.h
+++ b/src/borg/algorithms/zstd/lib/legacy/zstd_legacy.h
@ -20,7 +20,7 @@ extern "C" {
 ***************************************/
 #include "mem.h"            /* MEM_STATIC */
 #include "error_private.h"  /* ERROR */
-#include "zstd.h"           /* ZSTD_inBuffer, ZSTD_outBuffer */
+#include "zstd_internal.h"  /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTD_frameSizeInfo */

 #if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0)
 #  undef ZSTD_LEGACY_SUPPORT
@ -178,43 +178,77 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
    }
 }

-MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src,
-                                             size_t compressedSize)
+MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size_t srcSize)
 {
-    U32 const version = ZSTD_isLegacy(src, compressedSize);
+    ZSTD_frameSizeInfo frameSizeInfo;
+    U32 const version = ZSTD_isLegacy(src, srcSize);
    switch(version)
    {
 #if (ZSTD_LEGACY_SUPPORT <= 1)
        case 1 :
-            return ZSTDv01_findFrameCompressedSize(src, compressedSize);
+            ZSTDv01_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
 #endif
 #if (ZSTD_LEGACY_SUPPORT <= 2)
        case 2 :
-            return ZSTDv02_findFrameCompressedSize(src, compressedSize);
+            ZSTDv02_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
 #endif
 #if (ZSTD_LEGACY_SUPPORT <= 3)
        case 3 :
-            return ZSTDv03_findFrameCompressedSize(src, compressedSize);
+            ZSTDv03_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
 #endif
 #if (ZSTD_LEGACY_SUPPORT <= 4)
        case 4 :
-            return ZSTDv04_findFrameCompressedSize(src, compressedSize);
+            ZSTDv04_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
 #endif
 #if (ZSTD_LEGACY_SUPPORT <= 5)
        case 5 :
-            return ZSTDv05_findFrameCompressedSize(src, compressedSize);
+            ZSTDv05_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
 #endif
 #if (ZSTD_LEGACY_SUPPORT <= 6)
        case 6 :
-            return ZSTDv06_findFrameCompressedSize(src, compressedSize);
+            ZSTDv06_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
 #endif
 #if (ZSTD_LEGACY_SUPPORT <= 7)
        case 7 :
-            return ZSTDv07_findFrameCompressedSize(src, compressedSize);
+            ZSTDv07_findFrameSizeInfoLegacy(src, srcSize,
+                &frameSizeInfo.compressedSize,
+                &frameSizeInfo.decompressedBound);
+            break;
 #endif
        default :
-            return ERROR(prefix_unknown);
+            frameSizeInfo.compressedSize = ERROR(prefix_unknown);
+            frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
+            break;
    }
+    if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) {
+        frameSizeInfo.compressedSize = ERROR(srcSize_wrong);
+        frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
+    }
+    return frameSizeInfo;
+}
+
+MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t srcSize)
+{
+    ZSTD_frameSizeInfo frameSizeInfo = ZSTD_findFrameSizeInfoLegacy(src, srcSize);
+    return frameSizeInfo.compressedSize;
 }

 MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
--- a/src/borg/algorithms/zstd/lib/legacy/zstd_v01.c
+++ b/src/borg/algorithms/zstd/lib/legacy/zstd_v01.c
@ -346,7 +346,7 @@ FORCE_INLINE unsigned FSE_highbit32 (U32 val)
    _BitScanReverse ( &r, val );
    return (unsigned) r;
 #   elif defined(__GNUC__) && (GCC_VERSION >= 304)   /* GCC Intrinsic */
-    return 31 - __builtin_clz (val);
+    return __builtin_clz (val) ^ 31;
 #   else   /* Software version */
    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
    U32 v = val;
@ -1073,99 +1073,102 @@ static size_t HUF_decompress_usingDTable(   /* -3% slower when non static */
    const void* cSrc, size_t cSrcSize,
    const U16* DTable)
 {
-    BYTE* const ostart = (BYTE*) dst;
-    BYTE* op = ostart;
-    BYTE* const omax = op + maxDstSize;
-    BYTE* const olimit = omax-15;
-
-    const void* ptr = DTable;
-    const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
-    const U32 dtLog = DTable[0];
-    size_t errorCode;
-    U32 reloadStatus;
-
-    /* Init */
-
-    const U16* jumpTable = (const U16*)cSrc;
-    const size_t length1 = FSE_readLE16(jumpTable);
-    const size_t length2 = FSE_readLE16(jumpTable+1);
-    const size_t length3 = FSE_readLE16(jumpTable+2);
-    const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   // check coherency !!
-    const char* const start1 = (const char*)(cSrc) + 6;
-    const char* const start2 = start1 + length1;
-    const char* const start3 = start2 + length2;
-    const char* const start4 = start3 + length3;
-    FSE_DStream_t bitD1, bitD2, bitD3, bitD4;
-
-    if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;
-
-    errorCode = FSE_initDStream(&bitD1, start1, length1);
-    if (FSE_isError(errorCode)) return errorCode;
-    errorCode = FSE_initDStream(&bitD2, start2, length2);
-    if (FSE_isError(errorCode)) return errorCode;
-    errorCode = FSE_initDStream(&bitD3, start3, length3);
-    if (FSE_isError(errorCode)) return errorCode;
-    errorCode = FSE_initDStream(&bitD4, start4, length4);
-    if (FSE_isError(errorCode)) return errorCode;
-
-    reloadStatus=FSE_reloadDStream(&bitD2);
-
-    /* 16 symbols per loop */
-    for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit);  /* D2-3-4 are supposed to be synchronized and finish together */
-        op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
+    if (cSrcSize < 6) return (size_t)-FSE_ERROR_srcSize_wrong;
    {
-#define HUF_DECODE_SYMBOL_0(n, Dstream) \
-        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* op = ostart;
+        BYTE* const omax = op + maxDstSize;
+        BYTE* const olimit = omax-15;

-#define HUF_DECODE_SYMBOL_1(n, Dstream) \
-        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
-        if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
+        const void* ptr = DTable;
+        const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;
+        U32 reloadStatus;

-#define HUF_DECODE_SYMBOL_2(n, Dstream) \
-        op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
-        if (FSE_32bits()) FSE_reloadDStream(&Dstream)
+        /* Init */

-        HUF_DECODE_SYMBOL_1( 0, bitD1);
-        HUF_DECODE_SYMBOL_1( 1, bitD2);
-        HUF_DECODE_SYMBOL_1( 2, bitD3);
-        HUF_DECODE_SYMBOL_1( 3, bitD4);
-        HUF_DECODE_SYMBOL_2( 4, bitD1);
-        HUF_DECODE_SYMBOL_2( 5, bitD2);
-        HUF_DECODE_SYMBOL_2( 6, bitD3);
-        HUF_DECODE_SYMBOL_2( 7, bitD4);
-        HUF_DECODE_SYMBOL_1( 8, bitD1);
-        HUF_DECODE_SYMBOL_1( 9, bitD2);
-        HUF_DECODE_SYMBOL_1(10, bitD3);
-        HUF_DECODE_SYMBOL_1(11, bitD4);
-        HUF_DECODE_SYMBOL_0(12, bitD1);
-        HUF_DECODE_SYMBOL_0(13, bitD2);
-        HUF_DECODE_SYMBOL_0(14, bitD3);
-        HUF_DECODE_SYMBOL_0(15, bitD4);
-    }
+        const U16* jumpTable = (const U16*)cSrc;
+        const size_t length1 = FSE_readLE16(jumpTable);
+        const size_t length2 = FSE_readLE16(jumpTable+1);
+        const size_t length3 = FSE_readLE16(jumpTable+2);
+        const size_t length4 = cSrcSize - 6 - length1 - length2 - length3;   // check coherency !!
+        const char* const start1 = (const char*)(cSrc) + 6;
+        const char* const start2 = start1 + length1;
+        const char* const start3 = start2 + length2;
+        const char* const start4 = start3 + length3;
+        FSE_DStream_t bitD1, bitD2, bitD3, bitD4;

-    if (reloadStatus!=FSE_DStream_completed)   /* not complete : some bitStream might be FSE_DStream_unfinished */
-        return (size_t)-FSE_ERROR_corruptionDetected;
+        if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong;

-    /* tail */
-    {
-        // bitTail = bitD1;   // *much* slower : -20% !??!
-        FSE_DStream_t bitTail;
-        bitTail.ptr = bitD1.ptr;
-        bitTail.bitsConsumed = bitD1.bitsConsumed;
-        bitTail.bitContainer = bitD1.bitContainer;   // required in case of FSE_DStream_endOfBuffer
-        bitTail.start = start1;
-        for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
+        errorCode = FSE_initDStream(&bitD1, start1, length1);
+        if (FSE_isError(errorCode)) return errorCode;
+        errorCode = FSE_initDStream(&bitD2, start2, length2);
+        if (FSE_isError(errorCode)) return errorCode;
+        errorCode = FSE_initDStream(&bitD3, start3, length3);
+        if (FSE_isError(errorCode)) return errorCode;
+        errorCode = FSE_initDStream(&bitD4, start4, length4);
+        if (FSE_isError(errorCode)) return errorCode;
+
+        reloadStatus=FSE_reloadDStream(&bitD2);
+
+        /* 16 symbols per loop */
+        for ( ; (reloadStatus<FSE_DStream_completed) && (op<olimit);  /* D2-3-4 are supposed to be synchronized and finish together */
+            op+=16, reloadStatus = FSE_reloadDStream(&bitD2) | FSE_reloadDStream(&bitD3) | FSE_reloadDStream(&bitD4), FSE_reloadDStream(&bitD1))
        {
-            HUF_DECODE_SYMBOL_0(0, bitTail);
+    #define HUF_DECODE_SYMBOL_0(n, Dstream) \
+            op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog);
+
+    #define HUF_DECODE_SYMBOL_1(n, Dstream) \
+            op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
+            if (FSE_32bits() && (HUF_MAX_TABLELOG>12)) FSE_reloadDStream(&Dstream)
+
+    #define HUF_DECODE_SYMBOL_2(n, Dstream) \
+            op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \
+            if (FSE_32bits()) FSE_reloadDStream(&Dstream)
+
+            HUF_DECODE_SYMBOL_1( 0, bitD1);
+            HUF_DECODE_SYMBOL_1( 1, bitD2);
+            HUF_DECODE_SYMBOL_1( 2, bitD3);
+            HUF_DECODE_SYMBOL_1( 3, bitD4);
+            HUF_DECODE_SYMBOL_2( 4, bitD1);
+            HUF_DECODE_SYMBOL_2( 5, bitD2);
+            HUF_DECODE_SYMBOL_2( 6, bitD3);
+            HUF_DECODE_SYMBOL_2( 7, bitD4);
+            HUF_DECODE_SYMBOL_1( 8, bitD1);
+            HUF_DECODE_SYMBOL_1( 9, bitD2);
+            HUF_DECODE_SYMBOL_1(10, bitD3);
+            HUF_DECODE_SYMBOL_1(11, bitD4);
+            HUF_DECODE_SYMBOL_0(12, bitD1);
+            HUF_DECODE_SYMBOL_0(13, bitD2);
+            HUF_DECODE_SYMBOL_0(14, bitD3);
+            HUF_DECODE_SYMBOL_0(15, bitD4);
        }

-        if (FSE_endOfDStream(&bitTail))
-            return op-ostart;
+        if (reloadStatus!=FSE_DStream_completed)   /* not complete : some bitStream might be FSE_DStream_unfinished */
+            return (size_t)-FSE_ERROR_corruptionDetected;
+
+        /* tail */
+        {
+            // bitTail = bitD1;   // *much* slower : -20% !??!
+            FSE_DStream_t bitTail;
+            bitTail.ptr = bitD1.ptr;
+            bitTail.bitsConsumed = bitD1.bitsConsumed;
+            bitTail.bitContainer = bitD1.bitContainer;   // required in case of FSE_DStream_endOfBuffer
+            bitTail.start = start1;
+            for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op<omax) ; op++)
+            {
+                HUF_DECODE_SYMBOL_0(0, bitTail);
+            }
+
+            if (FSE_endOfDStream(&bitTail))
+                return op-ostart;
+        }
+
+        if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
+
+        return (size_t)-FSE_ERROR_corruptionDetected;
    }
-
-    if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall;   /* dst buffer is full, but cSrc unfinished */
-
-    return (size_t)-FSE_ERROR_corruptionDetected;
 }


@ -1336,6 +1339,8 @@ static const U32 ZSTD_magicNumber = 0xFD2FB51E;   /* 3rd version : seqNb header
 #define LITERAL_NOENTROPY 63
 #define COMMAND_NOENTROPY 7   /* to remove */

+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+
 static const size_t ZSTD_blockHeaderSize = 3;
 static const size_t ZSTD_frameHeaderSize = 4;

@ -1353,8 +1358,6 @@ static unsigned ZSTD_isLittleEndian(void)

 static U16    ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }

-static U32    ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
-
 static void   ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }

 static void   ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
@ -1379,16 +1382,9 @@ static U16 ZSTD_readLE16(const void* memPtr)
    }
 }

-
-static U32 ZSTD_readLE32(const void* memPtr)
+static U32 ZSTD_readLE24(const void* memPtr)
 {
-    if (ZSTD_isLittleEndian())
-        return ZSTD_read32(memPtr);
-    else
-    {
-        const BYTE* p = (const BYTE*)memPtr;
-        return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
-    }
+    return ZSTD_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
 }

 static U32 ZSTD_readBE32(const void* memPtr)
@ -1702,13 +1698,13 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
    seqState->prevOffset = seq->offset;
    if (litLength == MaxLL)
    {
-        U32 add = dumps<de ? *dumps++ : 0;
+        const U32 add = dumps<de ? *dumps++ : 0;
        if (add < 255) litLength += add;
        else
        {
            if (dumps<=(de-3))
            {
-                litLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+                litLength = ZSTD_readLE24(dumps);
                dumps += 3;
            }
        }
@ -1730,13 +1726,13 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
    if (matchLength == MaxML)
    {
-        U32 add = dumps<de ? *dumps++ : 0;
+        const U32 add = dumps<de ? *dumps++ : 0;
        if (add < 255) matchLength += add;
        else
        {
            if (dumps<=(de-3))
            {
-                matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+                matchLength = ZSTD_readLE24(dumps);
                dumps += 3;
            }
        }
@ -1757,7 +1753,7 @@ static size_t ZSTD_execSequence(BYTE* op,
                                BYTE* const base, BYTE* const oend)
 {
    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
-    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
+    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* subtracted */
    const BYTE* const ostart = op;
    const size_t litLength = sequence.litLength;
    BYTE* const endMatch = op + litLength + sequence.matchLength;    /* risk : address space overflow (32-bits) */
@ -1999,36 +1995,59 @@ size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t
    return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
 }

-size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t srcSize)
+/* ZSTD_errorFrameSizeInfoLegacy() :
+   assumes `cSize` and `dBound` are _not_ NULL */
+static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
+{
+    *cSize = ret;
+    *dBound = ZSTD_CONTENTSIZE_ERROR;
+}
+
+void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
 {
    const BYTE* ip = (const BYTE*)src;
    size_t remainingSize = srcSize;
+    size_t nbBlocks = 0;
    U32 magicNumber;
    blockProperties_t blockProperties;

    /* Frame Header */
-    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
+        ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
+        return;
+    }
    magicNumber = ZSTD_readBE32(src);
-    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    if (magicNumber != ZSTD_magicNumber) {
+        ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
+        return;
+    }
    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;

    /* Loop on each block */
    while (1)
    {
        size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties);
-        if (ZSTDv01_isError(blockSize)) return blockSize;
+        if (ZSTDv01_isError(blockSize)) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, blockSize);
+            return;
+        }

        ip += ZSTD_blockHeaderSize;
        remainingSize -= ZSTD_blockHeaderSize;
-        if (blockSize > remainingSize) return ERROR(srcSize_wrong);
+        if (blockSize > remainingSize) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
+            return;
+        }

        if (blockSize == 0) break;   /* bt_end */

        ip += blockSize;
        remainingSize -= blockSize;
+        nbBlocks++;
    }

-    return ip - (const BYTE*)src;
+    *cSize = ip - (const BYTE*)src;
+    *dBound = nbBlocks * BLOCKSIZE;
 }

 /*******************************
--- a/src/borg/algorithms/zstd/lib/legacy/zstd_v01.h
+++ b/src/borg/algorithms/zstd/lib/legacy/zstd_v01.h
@ -35,13 +35,18 @@ ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format
 size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize,
                     const void* src, size_t compressedSize);

-/**
-ZSTDv01_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.1.x format
-    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
-    return : the number of bytes that would be read to decompress this frame
-             or an errorCode if it fails (which can be tested using ZSTDv01_isError())
-*/
-size_t ZSTDv01_findFrameCompressedSize(const void* src, size_t compressedSize);
+ /**
+ ZSTDv01_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.1.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+     note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);

 /**
 ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error
--- a/src/borg/algorithms/zstd/lib/legacy/zstd_v02.c
+++ b/src/borg/algorithms/zstd/lib/legacy/zstd_v02.c
@ -217,6 +217,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
    }
 }

+MEM_STATIC U32 MEM_readLE24(const void* memPtr)
+{
+    return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
+}
+
 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
 {
    if (MEM_isLittleEndian())
@ -348,7 +353,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
    _BitScanReverse ( &r, val );
    return (unsigned) r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
-    return 31 - __builtin_clz (val);
+    return __builtin_clz (val) ^ 31;
 #   else   /* Software version */
    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
    U32 v = val;
@ -2728,6 +2733,8 @@ static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_
 #define LITERAL_NOENTROPY 63
 #define COMMAND_NOENTROPY 7   /* to remove */

+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+
 static const size_t ZSTD_blockHeaderSize = 3;
 static const size_t ZSTD_frameHeaderSize = 4;

@ -2882,6 +2889,7 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx,
            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
            {
+                if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
                if (litSize > srcSize-3) return ERROR(corruption_detected);
                memcpy(dctx->litBuffer, istart, litSize);
                dctx->litPtr = dctx->litBuffer;
@ -3041,11 +3049,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
    seqState->prevOffset = seq->offset;
    if (litLength == MaxLL)
    {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
        if (add < 255) litLength += add;
-        else
+        else if (dumps + 3 <= de)
        {
-            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            litLength = MEM_readLE24(dumps);
            dumps += 3;
        }
        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
@ -3071,11 +3079,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
    if (matchLength == MaxML)
    {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
        if (add < 255) matchLength += add;
-        else
+        else if (dumps + 3 <= de)
        {
-            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            matchLength = MEM_readLE24(dumps);
            dumps += 3;
        }
        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
@ -3096,7 +3104,7 @@ static size_t ZSTD_execSequence(BYTE* op,
                                BYTE* const base, BYTE* const oend)
 {
    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
-    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
+    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* subtracted */
    const BYTE* const ostart = op;
    BYTE* const oLitEnd = op + sequence.litLength;
    BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength;   /* risk : address space overflow (32-bits) */
@ -3312,37 +3320,59 @@ static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, siz
    return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
 }

-static size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
+/* ZSTD_errorFrameSizeInfoLegacy() :
+   assumes `cSize` and `dBound` are _not_ NULL */
+static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
 {
+    *cSize = ret;
+    *dBound = ZSTD_CONTENTSIZE_ERROR;
+}

+void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
+{
    const BYTE* ip = (const BYTE*)src;
    size_t remainingSize = srcSize;
+    size_t nbBlocks = 0;
    U32 magicNumber;
    blockProperties_t blockProperties;

    /* Frame Header */
-    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
+        ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
+        return;
+    }
    magicNumber = MEM_readLE32(src);
-    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    if (magicNumber != ZSTD_magicNumber) {
+        ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
+        return;
+    }
    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;

    /* Loop on each block */
    while (1)
    {
        size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
-        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+        if (ZSTD_isError(cBlockSize)) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
+            return;
+        }

        ip += ZSTD_blockHeaderSize;
        remainingSize -= ZSTD_blockHeaderSize;
-        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+        if (cBlockSize > remainingSize) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
+            return;
+        }

        if (cBlockSize == 0) break;   /* bt_end */

        ip += cBlockSize;
        remainingSize -= cBlockSize;
+        nbBlocks++;
    }

-    return ip - (const BYTE*)src;
+    *cSize = ip - (const BYTE*)src;
+    *dBound = nbBlocks * BLOCKSIZE;
 }

 /*******************************
@ -3458,11 +3488,6 @@ size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
    return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
 }

-size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize)
-{
-    return ZSTD_findFrameCompressedSize(src, compressedSize);
-}
-
 ZSTDv02_Dctx* ZSTDv02_createDCtx(void)
 {
    return (ZSTDv02_Dctx*)ZSTD_createDCtx();
--- a/src/borg/algorithms/zstd/lib/legacy/zstd_v02.h
+++ b/src/borg/algorithms/zstd/lib/legacy/zstd_v02.h
@ -35,13 +35,18 @@ ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format
 size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
                     const void* src, size_t compressedSize);

-/**
-ZSTDv02_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.2.x format
-    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
-    return : the number of bytes that would be read to decompress this frame
-             or an errorCode if it fails (which can be tested using ZSTDv02_isError())
-*/
-size_t ZSTDv02_findFrameCompressedSize(const void* src, size_t compressedSize);
+ /**
+ ZSTDv02_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.2.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);

 /**
 ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error
--- a/src/borg/algorithms/zstd/lib/legacy/zstd_v03.c
+++ b/src/borg/algorithms/zstd/lib/legacy/zstd_v03.c
@ -219,6 +219,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
    }
 }

+MEM_STATIC U32 MEM_readLE24(const void* memPtr)
+{
+    return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
+}
+
 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
 {
    if (MEM_isLittleEndian())
@ -351,7 +356,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
    _BitScanReverse ( &r, val );
    return (unsigned) r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
-    return 31 - __builtin_clz (val);
+    return __builtin_clz (val) ^ 31;
 #   else   /* Software version */
    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
    U32 v = val;
@ -2369,6 +2374,8 @@ static size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_
 #define LITERAL_NOENTROPY 63
 #define COMMAND_NOENTROPY 7   /* to remove */

+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+
 static const size_t ZSTD_blockHeaderSize = 3;
 static const size_t ZSTD_frameHeaderSize = 4;

@ -2523,6 +2530,7 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx,
            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
            {
+                if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
                if (litSize > srcSize-3) return ERROR(corruption_detected);
                memcpy(dctx->litBuffer, istart, litSize);
                dctx->litPtr = dctx->litBuffer;
@ -2682,11 +2690,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
    seqState->prevOffset = seq->offset;
    if (litLength == MaxLL)
    {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
        if (add < 255) litLength += add;
-        else
+        else if (dumps + 3 <= de)
        {
-            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            litLength = MEM_readLE24(dumps);
            dumps += 3;
        }
        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
@ -2712,11 +2720,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
    if (matchLength == MaxML)
    {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
        if (add < 255) matchLength += add;
-        else
+        else if (dumps + 3 <= de)
        {
-            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
+            matchLength = MEM_readLE24(dumps);
            dumps += 3;
        }
        if (dumps >= de) dumps = de-1;   /* late correction, to avoid read overflow (data is now corrupted anyway) */
@ -2737,7 +2745,7 @@ static size_t ZSTD_execSequence(BYTE* op,
                                BYTE* const base, BYTE* const oend)
 {
    static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
-    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* substracted */
+    static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11};   /* subtracted */
    const BYTE* const ostart = op;
    BYTE* const oLitEnd = op + sequence.litLength;
    BYTE* const oMatchEnd = op + sequence.litLength + sequence.matchLength;   /* risk : address space overflow (32-bits) */
@ -2953,36 +2961,59 @@ static size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, siz
    return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
 }

-static size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
+/* ZSTD_errorFrameSizeInfoLegacy() :
+   assumes `cSize` and `dBound` are _not_ NULL */
+MEM_STATIC void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
+{
+    *cSize = ret;
+    *dBound = ZSTD_CONTENTSIZE_ERROR;
+}
+
+void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
 {
    const BYTE* ip = (const BYTE*)src;
    size_t remainingSize = srcSize;
+    size_t nbBlocks = 0;
    U32 magicNumber;
    blockProperties_t blockProperties;

    /* Frame Header */
-    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
+    if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) {
+        ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
+        return;
+    }
    magicNumber = MEM_readLE32(src);
-    if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown);
+    if (magicNumber != ZSTD_magicNumber) {
+        ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
+        return;
+    }
    ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;

    /* Loop on each block */
    while (1)
    {
        size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
-        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+        if (ZSTD_isError(cBlockSize)) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
+            return;
+        }

        ip += ZSTD_blockHeaderSize;
        remainingSize -= ZSTD_blockHeaderSize;
-        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+        if (cBlockSize > remainingSize) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
+            return;
+        }

        if (cBlockSize == 0) break;   /* bt_end */

        ip += cBlockSize;
        remainingSize -= cBlockSize;
+        nbBlocks++;
    }

-    return ip - (const BYTE*)src;
+    *cSize = ip - (const BYTE*)src;
+    *dBound = nbBlocks * BLOCKSIZE;
 }


@ -3099,11 +3130,6 @@ size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
    return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
 }

-size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t srcSize)
-{
-    return ZSTD_findFrameCompressedSize(src, srcSize);
-}
-
 ZSTDv03_Dctx* ZSTDv03_createDCtx(void)
 {
    return (ZSTDv03_Dctx*)ZSTD_createDCtx();
--- a/src/borg/algorithms/zstd/lib/legacy/zstd_v03.h
+++ b/src/borg/algorithms/zstd/lib/legacy/zstd_v03.h
@ -35,13 +35,18 @@ ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format
 size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize,
                     const void* src, size_t compressedSize);

-/**
-ZSTDv03_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.3.x format
-    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
-    return : the number of bytes that would be read to decompress this frame
-             or an errorCode if it fails (which can be tested using ZSTDv03_isError())
-*/
-size_t ZSTDv03_findFrameCompressedSize(const void* src, size_t compressedSize);
+ /**
+ ZSTDv03_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.3.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+ void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                      size_t* cSize, unsigned long long* dBound);

    /**
 ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error
--- a/src/borg/algorithms/zstd/lib/legacy/zstd_v04.c
+++ b/src/borg/algorithms/zstd/lib/legacy/zstd_v04.c
@ -189,6 +189,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
    }
 }

+MEM_STATIC U32 MEM_readLE24(const void* memPtr)
+{
+    return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
+}
+
 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
 {
    if (MEM_isLittleEndian())
@ -373,6 +378,8 @@ static const size_t ZSTD_frameHeaderSize_min = 5;
 #define MIN_SEQUENCES_SIZE (2 /*seqNb*/ + 2 /*dumps*/ + 3 /*seqTables*/ + 1 /*bitStream*/)
 #define MIN_CBLOCK_SIZE (3 /*litCSize*/ + MIN_SEQUENCES_SIZE)

+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+
 typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;


@ -620,7 +627,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val)
    _BitScanReverse ( &r, val );
    return (unsigned) r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
-    return 31 - __builtin_clz (val);
+    return __builtin_clz (val) ^ 31;
 #   else   /* Software version */
    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
    U32 v = val;
@ -2648,6 +2655,7 @@ static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
            const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2;   /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
            if (litSize > srcSize-11)   /* risk of reading too far with wildcopy */
            {
+                if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
                if (litSize > srcSize-3) return ERROR(corruption_detected);
                memcpy(dctx->litBuffer, istart, litSize);
                dctx->litPtr = dctx->litBuffer;
@ -2806,13 +2814,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
    litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
    prevOffset = litLength ? seq->offset : seqState->prevOffset;
    if (litLength == MaxLL) {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
        if (add < 255) litLength += add;
-        else {
-            litLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
+        else if (dumps + 3 <= de) {
+            litLength = MEM_readLE24(dumps);
            dumps += 3;
        }
-        if (dumps > de) { litLength = MaxLL+255; }  /* late correction, to avoid using uninitialized memory */
        if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
    }

@ -2835,13 +2842,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
    /* MatchLength */
    matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
    if (matchLength == MaxML) {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
        if (add < 255) matchLength += add;
-        else {
-            matchLength = dumps[0] + (dumps[1]<<8) + (dumps[2]<<16);
+        else if (dumps + 3 <= de){
+            matchLength = MEM_readLE24(dumps);
            dumps += 3;
        }
-        if (dumps > de) { matchLength = MaxML+255; }  /* late correction, to avoid using uninitialized memory */
        if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
    }
    matchLength += MINMATCH;
@ -2860,7 +2866,7 @@ static size_t ZSTD_execSequence(BYTE* op,
                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
 {
    static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
-    static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
+    static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
    BYTE* const oLitEnd = op + sequence.litLength;
    const size_t sequenceLength = sequence.litLength + sequence.matchLength;
    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
@ -3029,9 +3035,12 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
 {
    /* blockType == blockCompressed */
    const BYTE* ip = (const BYTE*)src;
+    size_t litCSize;
+
+    if (srcSize > BLOCKSIZE) return ERROR(corruption_detected);

    /* Decode literals sub-block */
-    size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+    litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
    if (ZSTD_isError(litCSize)) return litCSize;
    ip += litCSize;
    srcSize -= litCSize;
@ -3119,34 +3128,57 @@ static size_t ZSTD_decompress_usingDict(ZSTD_DCtx* ctx,
    return op-ostart;
 }

-static size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
+/* ZSTD_errorFrameSizeInfoLegacy() :
+   assumes `cSize` and `dBound` are _not_ NULL */
+static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
+{
+    *cSize = ret;
+    *dBound = ZSTD_CONTENTSIZE_ERROR;
+}
+
+void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
 {
    const BYTE* ip = (const BYTE*)src;
    size_t remainingSize = srcSize;
+    size_t nbBlocks = 0;
    blockProperties_t blockProperties;

    /* Frame Header */
-    if (srcSize < ZSTD_frameHeaderSize_min) return ERROR(srcSize_wrong);
-    if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) return ERROR(prefix_unknown);
+    if (srcSize < ZSTD_frameHeaderSize_min) {
+        ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
+        return;
+    }
+    if (MEM_readLE32(src) != ZSTD_MAGICNUMBER) {
+        ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
+        return;
+    }
    ip += ZSTD_frameHeaderSize_min; remainingSize -= ZSTD_frameHeaderSize_min;

    /* Loop on each block */
    while (1)
    {
        size_t cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
-        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+        if (ZSTD_isError(cBlockSize)) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
+            return;
+        }

        ip += ZSTD_blockHeaderSize;
        remainingSize -= ZSTD_blockHeaderSize;
-        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+        if (cBlockSize > remainingSize) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
+            return;
+        }

        if (cBlockSize == 0) break;   /* bt_end */

        ip += cBlockSize;
        remainingSize -= cBlockSize;
+        nbBlocks++;
    }

-    return ip - (const BYTE*)src;
+    *cSize = ip - (const BYTE*)src;
+    *dBound = nbBlocks * BLOCKSIZE;
 }

 /* ******************************
@ -3578,11 +3610,6 @@ size_t ZSTDv04_decompress(void* dst, size_t maxDstSize, const void* src, size_t
 #endif
 }

-size_t ZSTDv04_findFrameCompressedSize(const void* src, size_t srcSize)
-{
-    return ZSTD_findFrameCompressedSize(src, srcSize);
-}
-
 size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx) { return ZSTD_resetDCtx(dctx); }

 size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx)
--- a/src/borg/algorithms/zstd/lib/legacy/zstd_v04.h
+++ b/src/borg/algorithms/zstd/lib/legacy/zstd_v04.h
@ -35,13 +35,18 @@ ZSTDv04_decompress() : decompress ZSTD frames compliant with v0.4.x format
 size_t ZSTDv04_decompress( void* dst, size_t maxOriginalSize,
                     const void* src, size_t compressedSize);

-/**
-ZSTDv04_getFrameSrcSize() : get the source length of a ZSTD frame compliant with v0.4.x format
-    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
-    return : the number of bytes that would be read to decompress this frame
-             or an errorCode if it fails (which can be tested using ZSTDv04_isError())
-*/
-size_t ZSTDv04_findFrameCompressedSize(const void* src, size_t compressedSize);
+ /**
+ ZSTDv04_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.4.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+ void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                      size_t* cSize, unsigned long long* dBound);

 /**
 ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error
--- a/src/borg/algorithms/zstd/lib/legacy/zstd_v05.c
+++ b/src/borg/algorithms/zstd/lib/legacy/zstd_v05.c
@ -491,6 +491,8 @@ static const size_t ZSTDv05_frameHeaderSize_min = 5;

 #define WILDCOPY_OVERLENGTH 8

+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+
 typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;


@ -754,7 +756,7 @@ MEM_STATIC unsigned BITv05_highbit32 (U32 val)
    _BitScanReverse ( &r, val );
    return (unsigned) r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
-    return 31 - __builtin_clz (val);
+    return __builtin_clz (val) ^ 31;
 #   else   /* Software version */
    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
    U32 v = val;
@ -1996,91 +1998,92 @@ size_t HUFv05_decompress4X2_usingDTable(
    const void* cSrc, size_t cSrcSize,
    const U16* DTable)
 {
-    const BYTE* const istart = (const BYTE*) cSrc;
-    BYTE* const ostart = (BYTE*) dst;
-    BYTE* const oend = ostart + dstSize;
-    const void* const dtPtr = DTable;
-    const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1;
-    const U32 dtLog = DTable[0];
-    size_t errorCode;
-
-    /* Init */
-    BITv05_DStream_t bitD1;
-    BITv05_DStream_t bitD2;
-    BITv05_DStream_t bitD3;
-    BITv05_DStream_t bitD4;
-    const size_t length1 = MEM_readLE16(istart);
-    const size_t length2 = MEM_readLE16(istart+2);
-    const size_t length3 = MEM_readLE16(istart+4);
-    size_t length4;
-    const BYTE* const istart1 = istart + 6;  /* jumpTable */
-    const BYTE* const istart2 = istart1 + length1;
-    const BYTE* const istart3 = istart2 + length2;
-    const BYTE* const istart4 = istart3 + length3;
-    const size_t segmentSize = (dstSize+3) / 4;
-    BYTE* const opStart2 = ostart + segmentSize;
-    BYTE* const opStart3 = opStart2 + segmentSize;
-    BYTE* const opStart4 = opStart3 + segmentSize;
-    BYTE* op1 = ostart;
-    BYTE* op2 = opStart2;
-    BYTE* op3 = opStart3;
-    BYTE* op4 = opStart4;
-    U32 endSignal;
-
    /* Check */
    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+    {
+        const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable;
+        const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1;
+        const U32 dtLog = DTable[0];
+        size_t errorCode;

-    length4 = cSrcSize - (length1 + length2 + length3 + 6);
-    if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
-    errorCode = BITv05_initDStream(&bitD1, istart1, length1);
-    if (HUFv05_isError(errorCode)) return errorCode;
-    errorCode = BITv05_initDStream(&bitD2, istart2, length2);
-    if (HUFv05_isError(errorCode)) return errorCode;
-    errorCode = BITv05_initDStream(&bitD3, istart3, length3);
-    if (HUFv05_isError(errorCode)) return errorCode;
-    errorCode = BITv05_initDStream(&bitD4, istart4, length4);
-    if (HUFv05_isError(errorCode)) return errorCode;
+        /* Init */
+        BITv05_DStream_t bitD1;
+        BITv05_DStream_t bitD2;
+        BITv05_DStream_t bitD3;
+        BITv05_DStream_t bitD4;
+        const size_t length1 = MEM_readLE16(istart);
+        const size_t length2 = MEM_readLE16(istart+2);
+        const size_t length3 = MEM_readLE16(istart+4);
+        size_t length4;
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal;

-    /* 16-32 symbols per loop (4-8 symbols per stream) */
-    endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
-    for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
-        HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
-        HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
-        HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
-        HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
-        HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1);
-        HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2);
-        HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3);
-        HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4);
-        HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
-        HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
-        HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
-        HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
-        HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1);
-        HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2);
-        HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3);
-        HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4);
+        length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        errorCode = BITv05_initDStream(&bitD1, istart1, length1);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD2, istart2, length2);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD3, istart3, length3);
+        if (HUFv05_isError(errorCode)) return errorCode;
+        errorCode = BITv05_initDStream(&bitD4, istart4, length4);
+        if (HUFv05_isError(errorCode)) return errorCode;
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
        endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+        for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) {
+            HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4);
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUFv05_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
+        if (!endSignal) return ERROR(corruption_detected);
+
+        /* decoded size */
+        return dstSize;
    }
-
-    /* check corruption */
-    if (op1 > opStart2) return ERROR(corruption_detected);
-    if (op2 > opStart3) return ERROR(corruption_detected);
-    if (op3 > opStart4) return ERROR(corruption_detected);
-    /* note : op4 supposed already verified within main loop */
-
-    /* finish bitStreams one by one */
-    HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
-    HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
-    HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
-    HUFv05_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
-
-    /* check */
-    endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4);
-    if (!endSignal) return ERROR(corruption_detected);
-
-    /* decoded size */
-    return dstSize;
 }


@ -3148,14 +3151,17 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
    litLength = FSEv05_peakSymbol(&(seqState->stateLL));
    prevOffset = litLength ? seq->offset : seqState->prevOffset;
    if (litLength == MaxLL) {
-        U32 add = *dumps++;
+        const U32 add = *dumps++;
        if (add < 255) litLength += add;
-        else {
-            litLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no risk : dumps is always followed by seq tables > 1 byte */
-            if (litLength&1) litLength>>=1, dumps += 3;
-            else litLength = (U16)(litLength)>>1, dumps += 2;
+        else if (dumps + 2 <= de) {
+            litLength = MEM_readLE16(dumps);
+            dumps += 2;
+            if ((litLength & 1) && dumps < de) {
+                litLength += *dumps << 16;
+                dumps += 1;
+            }
+            litLength>>=1;
        }
-        if (dumps > de) { litLength = MaxLL+255; }  /* late correction, to avoid using uninitialized memory */
        if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
    }

@ -3182,14 +3188,17 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState)
    /* MatchLength */
    matchLength = FSEv05_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
    if (matchLength == MaxML) {
-        U32 add = *dumps++;
+        const U32 add = dumps<de ? *dumps++ : 0;
        if (add < 255) matchLength += add;
-        else {
-            matchLength = MEM_readLE32(dumps) & 0xFFFFFF;  /* no pb : dumps is always followed by seq tables > 1 byte */
-            if (matchLength&1) matchLength>>=1, dumps += 3;
-            else matchLength = (U16)(matchLength)>>1, dumps += 2;
+        else if (dumps + 2 <= de) {
+            matchLength = MEM_readLE16(dumps);
+            dumps += 2;
+            if ((matchLength & 1) && dumps < de) {
+                matchLength += *dumps << 16;
+                dumps += 1;
+            }
+            matchLength >>= 1;
        }
-        if (dumps > de) { matchLength = MaxML+255; }  /* late correction, to avoid using uninitialized memory */
        if (dumps >= de) { dumps = de-1; }  /* late correction, to avoid read overflow (data is now corrupted anyway) */
    }
    matchLength += MINMATCH;
@ -3217,7 +3226,7 @@ static size_t ZSTDv05_execSequence(BYTE* op,
                                const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
 {
    static const int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
-    static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
+    static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
    BYTE* const oLitEnd = op + sequence.litLength;
    const size_t sequenceLength = sequence.litLength + sequence.matchLength;
    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
@ -3508,34 +3517,57 @@ size_t ZSTDv05_decompress(void* dst, size_t maxDstSize, const void* src, size_t
 #endif
 }

-size_t ZSTDv05_findFrameCompressedSize(const void *src, size_t srcSize)
+/* ZSTD_errorFrameSizeInfoLegacy() :
+   assumes `cSize` and `dBound` are _not_ NULL */
+static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
+{
+    *cSize = ret;
+    *dBound = ZSTD_CONTENTSIZE_ERROR;
+}
+
+void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
 {
    const BYTE* ip = (const BYTE*)src;
    size_t remainingSize = srcSize;
+    size_t nbBlocks = 0;
    blockProperties_t blockProperties;

    /* Frame Header */
-    if (srcSize < ZSTDv05_frameHeaderSize_min) return ERROR(srcSize_wrong);
-    if (MEM_readLE32(src) != ZSTDv05_MAGICNUMBER) return ERROR(prefix_unknown);
+    if (srcSize < ZSTDv05_frameHeaderSize_min) {
+        ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
+        return;
+    }
+    if (MEM_readLE32(src) != ZSTDv05_MAGICNUMBER) {
+        ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
+        return;
+    }
    ip += ZSTDv05_frameHeaderSize_min; remainingSize -= ZSTDv05_frameHeaderSize_min;

    /* Loop on each block */
    while (1)
    {
        size_t cBlockSize = ZSTDv05_getcBlockSize(ip, remainingSize, &blockProperties);
-        if (ZSTDv05_isError(cBlockSize)) return cBlockSize;
+        if (ZSTDv05_isError(cBlockSize)) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
+            return;
+        }

        ip += ZSTDv05_blockHeaderSize;
        remainingSize -= ZSTDv05_blockHeaderSize;
-        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+        if (cBlockSize > remainingSize) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
+            return;
+        }

        if (cBlockSize == 0) break;   /* bt_end */

        ip += cBlockSize;
        remainingSize -= cBlockSize;
+        nbBlocks++;
    }

-    return ip - (const BYTE*)src;
+    *cSize = ip - (const BYTE*)src;
+    *dBound = nbBlocks * BLOCKSIZE;
 }

 /* ******************************
--- a/src/borg/algorithms/zstd/lib/legacy/zstd_v05.h
+++ b/src/borg/algorithms/zstd/lib/legacy/zstd_v05.h
@ -33,13 +33,18 @@ extern "C" {
 size_t ZSTDv05_decompress( void* dst, size_t dstCapacity,
                     const void* src, size_t compressedSize);

-/**
-ZSTDv05_getFrameSrcSize() : get the source length of a ZSTD frame
-    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
-    return : the number of bytes that would be read to decompress this frame
-             or an errorCode if it fails (which can be tested using ZSTDv05_isError())
-*/
-size_t ZSTDv05_findFrameCompressedSize(const void* src, size_t compressedSize);
+ /**
+ ZSTDv05_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.5.x format
+     srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+     cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                 or an error code if it fails (which can be tested using ZSTDv01_isError())
+     dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                 or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
+ */
+void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);

 /* *************************************
 *  Helper functions
--- a/src/borg/algorithms/zstd/lib/legacy/zstd_v06.c
+++ b/src/borg/algorithms/zstd/lib/legacy/zstd_v06.c
@ -506,6 +506,8 @@ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
 #define FSEv06_ENCODING_STATIC  2
 #define FSEv06_ENCODING_DYNAMIC 3

+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+
 static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                      1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
                                     13,14,15,16 };
@ -858,7 +860,7 @@ MEM_STATIC unsigned BITv06_highbit32 ( U32 val)
    _BitScanReverse ( &r, val );
    return (unsigned) r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
-    return 31 - __builtin_clz (val);
+    return __builtin_clz (val) ^ 31;
 #   else   /* Software version */
    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
    U32 v = val;
@ -3240,14 +3242,12 @@ static size_t ZSTDv06_decodeSeqHeaders(int* nbSeqPtr,
    }

    /* FSE table descriptors */
+    if (ip + 4 > iend) return ERROR(srcSize_wrong); /* min : header byte + all 3 are "raw", hence no header, but at least xxLog bits per type */
    {   U32 const LLtype  = *ip >> 6;
        U32 const Offtype = (*ip >> 4) & 3;
        U32 const MLtype  = (*ip >> 2) & 3;
        ip++;

-        /* check */
-        if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
-
        /* Build DTables */
        {   size_t const bhSize = ZSTDv06_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
            if (ZSTDv06_isError(bhSize)) return ERROR(corruption_detected);
@ -3406,7 +3406,7 @@ static size_t ZSTDv06_execSequence(BYTE* op,
    if (sequence.offset < 8) {
        /* close range match, overlap */
        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
-        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
+        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
        int const sub2 = dec64table[sequence.offset];
        op[0] = match[0];
        op[1] = match[1];
@ -3654,36 +3654,62 @@ size_t ZSTDv06_decompress(void* dst, size_t dstCapacity, const void* src, size_t
 #endif
 }

-size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t srcSize)
+/* ZSTD_errorFrameSizeInfoLegacy() :
+   assumes `cSize` and `dBound` are _not_ NULL */
+static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
+{
+    *cSize = ret;
+    *dBound = ZSTD_CONTENTSIZE_ERROR;
+}
+
+void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
 {
    const BYTE* ip = (const BYTE*)src;
    size_t remainingSize = srcSize;
+    size_t nbBlocks = 0;
    blockProperties_t blockProperties = { bt_compressed, 0 };

    /* Frame Header */
-    {   size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, ZSTDv06_frameHeaderSize_min);
-        if (ZSTDv06_isError(frameHeaderSize)) return frameHeaderSize;
-        if (MEM_readLE32(src) != ZSTDv06_MAGICNUMBER) return ERROR(prefix_unknown);
-        if (srcSize < frameHeaderSize+ZSTDv06_blockHeaderSize) return ERROR(srcSize_wrong);
+    {   size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, srcSize);
+        if (ZSTDv06_isError(frameHeaderSize)) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize);
+            return;
+        }
+        if (MEM_readLE32(src) != ZSTDv06_MAGICNUMBER) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
+            return;
+        }
+        if (srcSize < frameHeaderSize+ZSTDv06_blockHeaderSize) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
+            return;
+        }
        ip += frameHeaderSize; remainingSize -= frameHeaderSize;
    }

    /* Loop on each block */
    while (1) {
        size_t const cBlockSize = ZSTDv06_getcBlockSize(ip, remainingSize, &blockProperties);
-        if (ZSTDv06_isError(cBlockSize)) return cBlockSize;
+        if (ZSTDv06_isError(cBlockSize)) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
+            return;
+        }

        ip += ZSTDv06_blockHeaderSize;
        remainingSize -= ZSTDv06_blockHeaderSize;
-        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+        if (cBlockSize > remainingSize) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
+            return;
+        }

        if (cBlockSize == 0) break;   /* bt_end */

        ip += cBlockSize;
        remainingSize -= cBlockSize;
+        nbBlocks++;
    }

-    return ip - (const BYTE*)src;
+    *cSize = ip - (const BYTE*)src;
+    *dBound = nbBlocks * ZSTDv06_BLOCKSIZE_MAX;
 }

 /*_******************************
--- a/src/borg/algorithms/zstd/lib/legacy/zstd_v06.h
+++ b/src/borg/algorithms/zstd/lib/legacy/zstd_v06.h
@ -43,12 +43,17 @@ ZSTDLIBv06_API size_t ZSTDv06_decompress( void* dst, size_t dstCapacity,
                                    const void* src, size_t compressedSize);

 /**
-ZSTDv06_getFrameSrcSize() : get the source length of a ZSTD frame
-    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
-    return : the number of bytes that would be read to decompress this frame
-             or an errorCode if it fails (which can be tested using ZSTDv06_isError())
+ZSTDv06_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.6.x format
+    srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                or an error code if it fails (which can be tested using ZSTDv01_isError())
+    dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
 */
-size_t ZSTDv06_findFrameCompressedSize(const void* src, size_t compressedSize);
+void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);

 /* *************************************
 *  Helper functions
--- a/src/borg/algorithms/zstd/lib/legacy/zstd_v07.c
+++ b/src/borg/algorithms/zstd/lib/legacy/zstd_v07.c
@ -530,7 +530,7 @@ MEM_STATIC unsigned BITv07_highbit32 (U32 val)
    _BitScanReverse ( &r, val );
    return (unsigned) r;
 #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
-    return 31 - __builtin_clz (val);
+    return __builtin_clz (val) ^ 31;
 #   else   /* Software version */
    static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
    U32 v = val;
@ -2740,6 +2740,8 @@ typedef enum { lbt_huffman, lbt_repeat, lbt_raw, lbt_rle } litBlockType_t;
 #define FSEv07_ENCODING_STATIC  2
 #define FSEv07_ENCODING_DYNAMIC 3

+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+
 static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                      1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
                                     13,14,15,16 };
@ -3468,14 +3470,12 @@ static size_t ZSTDv07_decodeSeqHeaders(int* nbSeqPtr,
    }

    /* FSE table descriptors */
+    if (ip + 4 > iend) return ERROR(srcSize_wrong); /* min : header byte + all 3 are "raw", hence no header, but at least xxLog bits per type */
    {   U32 const LLtype  = *ip >> 6;
        U32 const OFtype = (*ip >> 4) & 3;
        U32 const MLtype  = (*ip >> 2) & 3;
        ip++;

-        /* check */
-        if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
-
        /* Build DTables */
        {   size_t const llhSize = ZSTDv07_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable);
            if (ZSTDv07_isError(llhSize)) return ERROR(corruption_detected);
@ -3631,7 +3631,7 @@ size_t ZSTDv07_execSequence(BYTE* op,
    if (sequence.offset < 8) {
        /* close range match, overlap */
        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
-        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* substracted */
+        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
        int const sub2 = dec64table[sequence.offset];
        op[0] = match[0];
        op[1] = match[1];
@ -3895,19 +3895,40 @@ size_t ZSTDv07_decompress(void* dst, size_t dstCapacity, const void* src, size_t
 #endif
 }

-size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t srcSize)
+/* ZSTD_errorFrameSizeInfoLegacy() :
+   assumes `cSize` and `dBound` are _not_ NULL */
+static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret)
+{
+    *cSize = ret;
+    *dBound = ZSTD_CONTENTSIZE_ERROR;
+}
+
+void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound)
 {
    const BYTE* ip = (const BYTE*)src;
    size_t remainingSize = srcSize;
+    size_t nbBlocks = 0;

    /* check */
-    if (srcSize < ZSTDv07_frameHeaderSize_min+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
+    if (srcSize < ZSTDv07_frameHeaderSize_min+ZSTDv07_blockHeaderSize) {
+        ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
+        return;
+    }

    /* Frame Header */
-    {   size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min);
-        if (ZSTDv07_isError(frameHeaderSize)) return frameHeaderSize;
-        if (MEM_readLE32(src) != ZSTDv07_MAGICNUMBER) return ERROR(prefix_unknown);
-        if (srcSize < frameHeaderSize+ZSTDv07_blockHeaderSize) return ERROR(srcSize_wrong);
+    {   size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, srcSize);
+        if (ZSTDv07_isError(frameHeaderSize)) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize);
+            return;
+        }
+        if (MEM_readLE32(src) != ZSTDv07_MAGICNUMBER) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown));
+            return;
+        }
+        if (srcSize < frameHeaderSize+ZSTDv07_blockHeaderSize) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
+            return;
+        }
        ip += frameHeaderSize; remainingSize -= frameHeaderSize;
    }

@ -3915,20 +3936,28 @@ size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t srcSize)
    while (1) {
        blockProperties_t blockProperties;
        size_t const cBlockSize = ZSTDv07_getcBlockSize(ip, remainingSize, &blockProperties);
-        if (ZSTDv07_isError(cBlockSize)) return cBlockSize;
+        if (ZSTDv07_isError(cBlockSize)) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, cBlockSize);
+            return;
+        }

        ip += ZSTDv07_blockHeaderSize;
        remainingSize -= ZSTDv07_blockHeaderSize;

        if (blockProperties.blockType == bt_end) break;

-        if (cBlockSize > remainingSize) return ERROR(srcSize_wrong);
+        if (cBlockSize > remainingSize) {
+            ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong));
+            return;
+        }

        ip += cBlockSize;
        remainingSize -= cBlockSize;
+        nbBlocks++;
    }

-    return ip - (const BYTE*)src;
+    *cSize = ip - (const BYTE*)src;
+    *dBound = nbBlocks * ZSTDv07_BLOCKSIZE_ABSOLUTEMAX;
 }

 /*_******************************
--- a/src/borg/algorithms/zstd/lib/legacy/zstd_v07.h
+++ b/src/borg/algorithms/zstd/lib/legacy/zstd_v07.h
@ -50,12 +50,17 @@ ZSTDLIBv07_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity,
                                    const void* src, size_t compressedSize);

 /**
-ZSTDv07_getFrameSrcSize() : get the source length of a ZSTD frame
-    compressedSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
-    return : the number of bytes that would be read to decompress this frame
-             or an errorCode if it fails (which can be tested using ZSTDv07_isError())
+ZSTDv07_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.7.x format
+    srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src'
+    cSize (output parameter)  : the number of bytes that would be read to decompress this frame
+                                or an error code if it fails (which can be tested using ZSTDv01_isError())
+    dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame
+                                or ZSTD_CONTENTSIZE_ERROR if an error occurs
+
+    note : assumes `cSize` and `dBound` are _not_ NULL.
 */
-size_t ZSTDv07_findFrameCompressedSize(const void* src, size_t compressedSize);
+void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize,
+                                     size_t* cSize, unsigned long long* dBound);

 /*======  Helper functions  ======*/
 ZSTDLIBv07_API unsigned    ZSTDv07_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
--- a/src/borg/algorithms/zstd/lib/zstd.h
+++ b/src/borg/algorithms/zstd/lib/zstd.h