update lodepng to the latest version
diff --git a/src/zopflipng/lodepng/lodepng.cpp b/src/zopflipng/lodepng/lodepng.cpp
index 50baa8d..51492b8 100644
--- a/src/zopflipng/lodepng/lodepng.cpp
+++ b/src/zopflipng/lodepng/lodepng.cpp
@@ -1,7 +1,7 @@
 /*
-LodePNG version 20191107
+LodePNG version 20201017
 
-Copyright (c) 2005-2019 Lode Vandevenne
+Copyright (c) 2005-2020 Lode Vandevenne
 
 This software is provided 'as-is', without any express or implied
 warranty. In no event will the authors be held liable for any damages
@@ -44,7 +44,7 @@
 #pragma warning( disable : 4996 ) /*VS does not like fopen, but fopen_s is not standard C so unusable here*/
 #endif /*_MSC_VER */
 
-const char* LODEPNG_VERSION_STRING = "20191107";
+const char* LODEPNG_VERSION_STRING = "20201017";
 
 /*
 This source file is built up in the following large parts. The code sections
@@ -78,6 +78,7 @@
   return malloc(size);
 }
 
+/* NOTE: when realloc returns NULL, it leaves the original memory untouched */
 static void* lodepng_realloc(void* ptr, size_t new_size) {
 #ifdef LODEPNG_MAX_ALLOC
   if(new_size > LODEPNG_MAX_ALLOC) return 0;
@@ -104,14 +105,15 @@
 #endif
 
 /* restrict is not available in C90, but use it when supported by the compiler */
-#if (defined(__GNUC__) && defined(__GNUC_MINOR__) && (__GNUC__ >= 3) && (__GNUC_MINOR__ >= 1)) ||\
-    (defined(_MSC_VER) && (_MSC_VER >= 1400)) || (defined(__WATCOMC__) && (__WATCOMC__ >= 1250))
+#if (defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) ||\
+    (defined(_MSC_VER) && (_MSC_VER >= 1400)) || \
+    (defined(__WATCOMC__) && (__WATCOMC__ >= 1250) && !defined(__cplusplus))
 #define LODEPNG_RESTRICT __restrict
 #else
 #define LODEPNG_RESTRICT /* not available */
 #endif
 
-/* Replacements for C library functions memcpy and strlen, to support those platforms
+/* Replacements for C library functions such as memcpy and strlen, to support platforms
 where a full C library is not available. The compiler can recognize them and compile
 to something as fast. */
 
@@ -121,11 +123,17 @@
   for(i = 0; i < size; i++) ((char*)dst)[i] = ((const char*)src)[i];
 }
 
+static void lodepng_memset(void* LODEPNG_RESTRICT dst,
+                           int value, size_t num) {
+  size_t i;
+  for(i = 0; i < num; i++) ((char*)dst)[i] = (char)value;
+}
+
 /* does not check memory out of bounds, do not use on untrusted data */
 static size_t lodepng_strlen(const char* a) {
   const char* orig = a;
   /* avoid warning about unused function in case of disabled COMPILE... macros */
-  (void)lodepng_strlen;
+  (void)(&lodepng_strlen);
   while(*a) a++;
   return (size_t)(a - orig);
 }
@@ -134,6 +142,14 @@
 #define LODEPNG_MIN(a, b) (((a) < (b)) ? (a) : (b))
 #define LODEPNG_ABS(x) ((x) < 0 ? -(x) : (x))
 
+#if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_DECODER)
+/* Safely check if adding two integers will overflow (no undefined
+behavior, compiler removing the code, etc...) and output result. */
+static int lodepng_addofl(size_t a, size_t b, size_t* result) {
+  *result = a + b; /* Unsigned addition is well defined and safe in C90 */
+  return *result < a;
+}
+#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_DECODER)*/
 
 #ifdef LODEPNG_COMPILE_DECODER
 /* Safely check if multiplying two integers will overflow (no undefined
@@ -143,13 +159,6 @@
   return (a != 0 && *result / a != b);
 }
 
-/* Safely check if adding two integers will overflow (no undefined
-behavior, compiler removing the code, etc...) and output result. */
-static int lodepng_addofl(size_t a, size_t b, size_t* result) {
-  *result = a + b; /* Unsigned addition is well defined and safe in C90 */
-  return *result < a;
-}
-
 #ifdef LODEPNG_COMPILE_ZLIB
 /* Safely check if a + b > c, even if overflow could happen. */
 static int lodepng_gtofl(size_t a, size_t b, size_t c) {
@@ -166,7 +175,7 @@
 out of a loop (to go to the cleanup phase of a function). This macro does that.
 It makes the error handling code shorter and more readable.
 
-Example: if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83);
+Example: if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83);
 */
 #define CERROR_BREAK(errorvar, code){\
   errorvar = code;\
@@ -219,9 +228,10 @@
 }
 
 /*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned uivector_reserve(uivector* p, size_t allocsize) {
+static unsigned uivector_resize(uivector* p, size_t size) {
+  size_t allocsize = size * sizeof(unsigned);
   if(allocsize > p->allocsize) {
-    size_t newsize = (allocsize > p->allocsize * 2u) ? allocsize : ((allocsize * 3u) >> 1u);
+    size_t newsize = allocsize + (p->allocsize >> 1u);
     void* data = lodepng_realloc(p->data, newsize);
     if(data) {
       p->allocsize = newsize;
@@ -229,24 +239,10 @@
     }
     else return 0; /*error: not enough memory*/
   }
-  return 1;
-}
-
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned uivector_resize(uivector* p, size_t size) {
-  if(!uivector_reserve(p, size * sizeof(unsigned))) return 0;
   p->size = size;
   return 1; /*success*/
 }
 
-/*resize and give all new elements the value*/
-static unsigned uivector_resizev(uivector* p, size_t size, unsigned value) {
-  size_t oldsize = p->size, i;
-  if(!uivector_resize(p, size)) return 0;
-  for(i = oldsize; i < size; ++i) p->data[i] = value;
-  return 1;
-}
-
 static void uivector_init(uivector* p) {
   p->data = NULL;
   p->size = p->allocsize = 0;
@@ -271,9 +267,9 @@
 } ucvector;
 
 /*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned ucvector_reserve(ucvector* p, size_t allocsize) {
-  if(allocsize > p->allocsize) {
-    size_t newsize = (allocsize > p->allocsize * 2u) ? allocsize : ((allocsize * 3u) >> 1u);
+static unsigned ucvector_resize(ucvector* p, size_t size) {
+  if(size > p->allocsize) {
+    size_t newsize = size + (p->allocsize >> 1u);
     void* data = lodepng_realloc(p->data, newsize);
     if(data) {
       p->allocsize = newsize;
@@ -281,49 +277,17 @@
     }
     else return 0; /*error: not enough memory*/
   }
-  return 1;
-}
-
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned ucvector_resize(ucvector* p, size_t size) {
-  if(!ucvector_reserve(p, size * sizeof(unsigned char))) return 0;
   p->size = size;
   return 1; /*success*/
 }
 
-#ifdef LODEPNG_COMPILE_PNG
-
-static void ucvector_cleanup(void* p) {
-  ((ucvector*)p)->size = ((ucvector*)p)->allocsize = 0;
-  lodepng_free(((ucvector*)p)->data);
-  ((ucvector*)p)->data = NULL;
+static ucvector ucvector_init(unsigned char* buffer, size_t size) {
+  ucvector v;
+  v.data = buffer;
+  v.allocsize = v.size = size;
+  return v;
 }
 
-static void ucvector_init(ucvector* p) {
-  p->data = NULL;
-  p->size = p->allocsize = 0;
-}
-#endif /*LODEPNG_COMPILE_PNG*/
-
-#ifdef LODEPNG_COMPILE_ZLIB
-/*you can both convert from vector to buffer&size and vice versa. If you use
-init_buffer to take over a buffer and size, it is not needed to use cleanup*/
-static void ucvector_init_buffer(ucvector* p, unsigned char* buffer, size_t size) {
-  p->data = buffer;
-  p->allocsize = p->size = size;
-}
-#endif /*LODEPNG_COMPILE_ZLIB*/
-
-#if (defined(LODEPNG_COMPILE_PNG) && defined(LODEPNG_COMPILE_ANCILLARY_CHUNKS)) || defined(LODEPNG_COMPILE_ENCODER)
-/*returns 1 if success, 0 if failure ==> nothing done*/
-static unsigned ucvector_push_back(ucvector* p, unsigned char c) {
-  if(!ucvector_resize(p, p->size + 1)) return 0;
-  p->data[p->size - 1] = c;
-  return 1;
-}
-#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
-
-
 /* ////////////////////////////////////////////////////////////////////////// */
 
 #ifdef LODEPNG_COMPILE_PNG
@@ -335,19 +299,20 @@
   *out = NULL;
 }
 
-/* dynamically allocates a new string with a copy of the null terminated input text */
-static char* alloc_string(const char* in) {
-  size_t insize = lodepng_strlen(in);
+/*also appends null termination character*/
+static char* alloc_string_sized(const char* in, size_t insize) {
   char* out = (char*)lodepng_malloc(insize + 1);
   if(out) {
-    size_t i;
-    for(i = 0; i != insize; ++i) {
-      out[i] = in[i];
-    }
-    out[i] = 0;
+    lodepng_memcpy(out, in, insize);
+    out[insize] = 0;
   }
   return out;
 }
+
+/* dynamically allocates a new string with a copy of the null terminated input text */
+static char* alloc_string(const char* in) {
+  return alloc_string_sized(in, lodepng_strlen(in));
+}
 #endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
 #endif /*LODEPNG_COMPILE_PNG*/
 
@@ -406,13 +371,13 @@
   readsize = fread(out, 1, size, file);
   fclose(file);
 
-  if (readsize != size) return 78;
+  if(readsize != size) return 78;
   return 0;
 }
 
 unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename) {
   long size = lodepng_filesize(filename);
-  if (size < 0) return 78;
+  if(size < 0) return 78;
   *outsize = (size_t)size;
 
   *out = (unsigned char*)lodepng_malloc((size_t)size);
@@ -444,18 +409,21 @@
 
 typedef struct {
   ucvector* data;
-  size_t bp;
+  unsigned char bp; /*ok to overflow, indicates bit pos inside byte*/
 } LodePNGBitWriter;
 
-void LodePNGBitWriter_init(LodePNGBitWriter* writer, ucvector* data) {
+static void LodePNGBitWriter_init(LodePNGBitWriter* writer, ucvector* data) {
   writer->data = data;
   writer->bp = 0;
 }
 
 /*TODO: this ignores potential out of memory errors*/
-#define WRITEBIT(/*size_t**/ writer, /*unsigned char*/ bit){\
+#define WRITEBIT(writer, bit){\
   /* append new byte */\
-  if(((writer->bp) & 7u) == 0) ucvector_push_back(writer->data, (unsigned char)0);\
+  if(((writer->bp) & 7u) == 0) {\
+    if(!ucvector_resize(writer->data, writer->data->size + 1)) return;\
+    writer->data->data[writer->data->size - 1] = 0;\
+  }\
   (writer->data->data[writer->data->size - 1]) |= (bit << ((writer->bp) & 7u));\
   ++writer->bp;\
 }
@@ -465,7 +433,7 @@
   if(nbits == 1) { /* compiler should statically compile this case if nbits == 1 */
     WRITEBIT(writer, value);
   } else {
-    /* TODO: increase output size nly once here rather than in each WRITEBIT */
+    /* TODO: increase output size only once here rather than in each WRITEBIT */
     size_t i;
     for(i = 0; i != nbits; ++i) {
       WRITEBIT(writer, (unsigned char)((value >> i) & 1));
@@ -527,14 +495,14 @@
   size_t start = reader->bp >> 3u;
   size_t size = reader->size;
   if(start + 1u < size) {
-    reader->buffer = (unsigned)(reader->data[start + 0]) | (unsigned)(reader->data[start + 1] << 8u);
+    reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u);
     reader->buffer >>= (reader->bp & 7u);
     return 1;
   } else {
     reader->buffer = 0;
     if(start + 0u < size) reader->buffer |= reader->data[start + 0];
     reader->buffer >>= (reader->bp & 7u);
-    return reader->bp + nbits < reader->bitsize;
+    return reader->bp + nbits <= reader->bitsize;
   }
 }
 
@@ -543,16 +511,16 @@
   size_t start = reader->bp >> 3u;
   size_t size = reader->size;
   if(start + 2u < size) {
-    reader->buffer = (unsigned)(reader->data[start + 0]) | (unsigned)(reader->data[start + 1] << 8u) |
-                     (unsigned)(reader->data[start + 2] << 16u);
+    reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u) |
+                     ((unsigned)reader->data[start + 2] << 16u);
     reader->buffer >>= (reader->bp & 7u);
     return 1;
   } else {
     reader->buffer = 0;
     if(start + 0u < size) reader->buffer |= reader->data[start + 0];
-    if(start + 1u < size) reader->buffer |= (unsigned)(reader->data[start + 1] << 8u);
+    if(start + 1u < size) reader->buffer |= ((unsigned)reader->data[start + 1] << 8u);
     reader->buffer >>= (reader->bp & 7u);
-    return reader->bp + nbits < reader->bitsize;
+    return reader->bp + nbits <= reader->bitsize;
   }
 }
 
@@ -561,17 +529,17 @@
   size_t start = reader->bp >> 3u;
   size_t size = reader->size;
   if(start + 3u < size) {
-    reader->buffer = (unsigned)(reader->data[start + 0]) | (unsigned)(reader->data[start + 1] << 8u) |
-                     (unsigned)(reader->data[start + 2] << 16u) | (unsigned)(reader->data[start + 3] << 24u);
+    reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u) |
+                     ((unsigned)reader->data[start + 2] << 16u) | ((unsigned)reader->data[start + 3] << 24u);
     reader->buffer >>= (reader->bp & 7u);
     return 1;
   } else {
     reader->buffer = 0;
     if(start + 0u < size) reader->buffer |= reader->data[start + 0];
-    if(start + 1u < size) reader->buffer |= (unsigned)(reader->data[start + 1] << 8u);
-    if(start + 2u < size) reader->buffer |= (unsigned)(reader->data[start + 2] << 16u);
+    if(start + 1u < size) reader->buffer |= ((unsigned)reader->data[start + 1] << 8u);
+    if(start + 2u < size) reader->buffer |= ((unsigned)reader->data[start + 2] << 16u);
     reader->buffer >>= (reader->bp & 7u);
-    return reader->bp + nbits < reader->bitsize;
+    return reader->bp + nbits <= reader->bitsize;
   }
 }
 
@@ -580,24 +548,25 @@
   size_t start = reader->bp >> 3u;
   size_t size = reader->size;
   if(start + 4u < size) {
-    reader->buffer = (unsigned)(reader->data[start + 0]) | (unsigned)(reader->data[start + 1] << 8u) |
-                     (unsigned)(reader->data[start + 2] << 16u) | (unsigned)(reader->data[start + 3] << 24u);
+    reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u) |
+                     ((unsigned)reader->data[start + 2] << 16u) | ((unsigned)reader->data[start + 3] << 24u);
     reader->buffer >>= (reader->bp & 7u);
-    reader->buffer |= ((unsigned)(reader->data[start + 4] << 24u) << (7u - (reader->bp & 7u)));
+    reader->buffer |= (((unsigned)reader->data[start + 4] << 24u) << (8u - (reader->bp & 7u)));
     return 1;
   } else {
     reader->buffer = 0;
     if(start + 0u < size) reader->buffer |= reader->data[start + 0];
-    if(start + 1u < size) reader->buffer |= (unsigned)(reader->data[start + 1] << 8u);
-    if(start + 2u < size) reader->buffer |= (unsigned)(reader->data[start + 2] << 16u);
-    if(start + 3u < size) reader->buffer |= (unsigned)(reader->data[start + 3] << 24u);
+    if(start + 1u < size) reader->buffer |= ((unsigned)reader->data[start + 1] << 8u);
+    if(start + 2u < size) reader->buffer |= ((unsigned)reader->data[start + 2] << 16u);
+    if(start + 3u < size) reader->buffer |= ((unsigned)reader->data[start + 3] << 24u);
     reader->buffer >>= (reader->bp & 7u);
-    return reader->bp + nbits < reader->bitsize;
+    return reader->bp + nbits <= reader->bitsize;
   }
 }
 
-/* Get bits without advancing the bit pointer. Must have enough bits available with ensureBits */
+/* Get bits without advancing the bit pointer. Must have enough bits available with ensureBits. Max nbits is 31. */
 static unsigned peekBits(LodePNGBitReader* reader, size_t nbits) {
+  /* The shift allows nbits to be only up to 31. */
   return reader->buffer & ((1u << nbits) - 1u);
 }
 
@@ -613,6 +582,26 @@
   advanceBits(reader, nbits);
   return result;
 }
+
+/* Public for testing only. steps and result must have numsteps values. */
+unsigned lode_png_test_bitreader(const unsigned char* data, size_t size,
+                                 size_t numsteps, const size_t* steps, unsigned* result) {
+  size_t i;
+  LodePNGBitReader reader;
+  unsigned error = LodePNGBitReader_init(&reader, data, size);
+  if(error) return 0;
+  for(i = 0; i < numsteps; i++) {
+    size_t step = steps[i];
+    unsigned ok;
+    if(step > 25) ok = ensureBits32(&reader, step);
+    else if(step > 17) ok = ensureBits25(&reader, step);
+    else if(step > 9) ok = ensureBits17(&reader, step);
+    else ok = ensureBits9(&reader, step);
+    if(!ok) return 0;
+    result[i] = readBits(&reader, step);
+  }
+  return 1;
+}
 #endif /*LODEPNG_COMPILE_DECODER*/
 
 static unsigned reverseBits(unsigned bits, unsigned num) {
@@ -655,8 +644,8 @@
   = {0, 0, 0, 0, 1, 1, 2,  2,  3,  3,  4,  4,  5,  5,   6,   6,   7,   7,   8,
        8,    9,    9,   10,   10,   11,   11,   12,    12,    13,    13};
 
-/*the order in which "code length alphabet code lengths" are stored, out of this
-the huffman tree of the dynamic huffman tree lengths is generated*/
+/*the order in which "code length alphabet code lengths" are stored as specified by deflate, out of this the huffman
+tree of the dynamic huffman tree lengths is generated*/
 static const unsigned CLCL_ORDER[NUM_CODE_LENGTH_CODES]
   = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
 
@@ -706,7 +695,7 @@
   if(!maxlens) return 83; /*alloc fail*/
 
   /* compute maxlens: max total bit length of symbols sharing prefix in the first table*/
-  for(i = 0; i < headsize; ++i) maxlens[i] = 0;
+  lodepng_memset(maxlens, 0, headsize * sizeof(*maxlens));
   for(i = 0; i < tree->numcodes; i++) {
     unsigned symbol = tree->codes[i];
     unsigned l = tree->lengths[i];
@@ -835,7 +824,7 @@
     for(bits = 0; bits != tree->numcodes; ++bits) ++blcount[tree->lengths[bits]];
     /*step 2: generate the nextcode values*/
     for(bits = 1; bits <= tree->maxbitlen; ++bits) {
-      nextcode[bits] = (nextcode[bits - 1] + blcount[bits - 1]) << 1;
+      nextcode[bits] = (nextcode[bits - 1] + blcount[bits - 1]) << 1u;
     }
     /*step 3: generate all the codes*/
     for(n = 0; n != tree->numcodes; ++n) {
@@ -996,7 +985,7 @@
     }
   }
 
-  for(i = 0; i != numcodes; ++i) lengths[i] = 0;
+  lodepng_memset(lengths, 0, numcodes * sizeof(*lengths));
 
   /*ensure at least two present symbols. There should be at least one symbol
   according to RFC 1951 section 3.2.7. Some decoders incorrectly require two. To
@@ -1056,28 +1045,17 @@
 /*Create the Huffman tree given the symbol frequencies*/
 static unsigned HuffmanTree_makeFromFrequencies(HuffmanTree* tree, const unsigned* frequencies,
                                                 size_t mincodes, size_t numcodes, unsigned maxbitlen) {
-  size_t i;
   unsigned error = 0;
   while(!frequencies[numcodes - 1] && numcodes > mincodes) --numcodes; /*trim zeroes*/
+  tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned));
+  if(!tree->lengths) return 83; /*alloc fail*/
   tree->maxbitlen = maxbitlen;
   tree->numcodes = (unsigned)numcodes; /*number of symbols*/
-  tree->lengths = (unsigned*)lodepng_realloc(tree->lengths, numcodes * sizeof(unsigned));
-  if(!tree->lengths) return 83; /*alloc fail*/
-  /*initialize all lengths to 0*/
-  for(i = 0; i < numcodes; i++) tree->lengths[i] = 0;
 
   error = lodepng_huffman_code_lengths(tree->lengths, frequencies, numcodes, maxbitlen);
   if(!error) error = HuffmanTree_makeFromLengths2(tree);
   return error;
 }
-
-static unsigned HuffmanTree_getCode(const HuffmanTree* tree, unsigned index) {
-  return tree->codes[index];
-}
-
-static unsigned HuffmanTree_getLength(const HuffmanTree* tree, unsigned index) {
-  return tree->lengths[index];
-}
 #endif /*LODEPNG_COMPILE_ENCODER*/
 
 /*get the literal and length code tree of a deflated block with fixed tree, as per the deflate specification*/
@@ -1140,11 +1118,12 @@
 /* / Inflator (Decompressor)                                                / */
 /* ////////////////////////////////////////////////////////////////////////// */
 
-/*get the tree of a deflated block with fixed tree, as specified in the deflate specification*/
-static void getTreeInflateFixed(HuffmanTree* tree_ll, HuffmanTree* tree_d) {
-  /*TODO: check for out of memory errors*/
-  generateFixedLitLenTree(tree_ll);
-  generateFixedDistanceTree(tree_d);
+/*get the tree of a deflated block with fixed tree, as specified in the deflate specification
+Returns error code.*/
+static unsigned getTreeInflateFixed(HuffmanTree* tree_ll, HuffmanTree* tree_d) {
+  unsigned error = generateFixedLitLenTree(tree_ll);
+  if(error) return error;
+  return generateFixedDistanceTree(tree_d);
 }
 
 /*get the tree of a deflated block with dynamic tree, the tree itself is also Huffman compressed with a known tree*/
@@ -1195,8 +1174,8 @@
     bitlen_ll = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
     bitlen_d = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
     if(!bitlen_ll || !bitlen_d) ERROR_BREAK(83 /*alloc fail*/);
-    for(i = 0; i != NUM_DEFLATE_CODE_SYMBOLS; ++i) bitlen_ll[i] = 0;
-    for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen_d[i] = 0;
+    lodepng_memset(bitlen_ll, 0, NUM_DEFLATE_CODE_SYMBOLS * sizeof(*bitlen_ll));
+    lodepng_memset(bitlen_d, 0, NUM_DISTANCE_SYMBOLS * sizeof(*bitlen_d));
 
     /*i is the current symbol we're reading in the part that contains the code lengths of lit/len and dist codes*/
     i = 0;
@@ -1281,8 +1260,8 @@
 }
 
 /*inflate a block with dynamic of fixed Huffman tree. btype must be 1 or 2.*/
-static unsigned inflateHuffmanBlock(ucvector* out, size_t* pos, LodePNGBitReader* reader,
-                                    unsigned btype) {
+static unsigned inflateHuffmanBlock(ucvector* out, LodePNGBitReader* reader,
+                                    unsigned btype, size_t max_output_size) {
   unsigned error = 0;
   HuffmanTree tree_ll; /*the huffman tree for literal and length codes*/
   HuffmanTree tree_d; /*the huffman tree for distance codes*/
@@ -1290,7 +1269,7 @@
   HuffmanTree_init(&tree_ll);
   HuffmanTree_init(&tree_d);
 
-  if(btype == 1) getTreeInflateFixed(&tree_ll, &tree_d);
+  if(btype == 1) error = getTreeInflateFixed(&tree_ll, &tree_d);
   else /*if(btype == 2)*/ error = getTreeInflateDynamic(&tree_ll, &tree_d, reader);
 
   while(!error) /*decode all symbols until end reached, breaks at end code*/ {
@@ -1299,10 +1278,8 @@
     ensureBits25(reader, 20); /* up to 15 for the huffman symbol, up to 5 for the length extra bits */
     code_ll = huffmanDecodeSymbol(reader, &tree_ll);
     if(code_ll <= 255) /*literal symbol*/ {
-      /*ucvector_push_back would do the same, but for some reason the two lines below run 10% faster*/
-      if(!ucvector_resize(out, (*pos) + 1)) ERROR_BREAK(83 /*alloc fail*/);
-      out->data[*pos] = (unsigned char)code_ll;
-      ++(*pos);
+      if(!ucvector_resize(out, out->size + 1)) ERROR_BREAK(83 /*alloc fail*/);
+      out->data[out->size - 1] = (unsigned char)code_ll;
     } else if(code_ll >= FIRST_LENGTH_CODE_INDEX && code_ll <= LAST_LENGTH_CODE_INDEX) /*length code*/ {
       unsigned code_d, distance;
       unsigned numextrabits_l, numextrabits_d; /*extra bits for length and distance*/
@@ -1338,21 +1315,20 @@
       }
 
       /*part 5: fill in all the out[n] values based on the length and dist*/
-      start = (*pos);
+      start = out->size;
       if(distance > start) ERROR_BREAK(52); /*too long backward distance*/
       backward = start - distance;
 
-      if(!ucvector_resize(out, (*pos) + length)) ERROR_BREAK(83 /*alloc fail*/);
-      if (distance < length) {
+      if(!ucvector_resize(out, out->size + length)) ERROR_BREAK(83 /*alloc fail*/);
+      if(distance < length) {
         size_t forward;
-        lodepng_memcpy(out->data + *pos, out->data + backward, distance);
-        *pos += distance;
+        lodepng_memcpy(out->data + start, out->data + backward, distance);
+        start += distance;
         for(forward = distance; forward < length; ++forward) {
-          out->data[(*pos)++] = out->data[backward++];
+          out->data[start++] = out->data[backward++];
         }
       } else {
-        lodepng_memcpy(out->data + *pos, out->data + backward, length);
-        *pos += length;
+        lodepng_memcpy(out->data + start, out->data + backward, length);
       }
     } else if(code_ll == 256) {
       break; /*end code, break the loop*/
@@ -1366,6 +1342,9 @@
       /* TODO: revise error codes 10,11,50: the above comment is no longer valid */
       ERROR_BREAK(51); /*error, bit pointer jumps past memory*/
     }
+    if(max_output_size && out->size > max_output_size) {
+      ERROR_BREAK(109); /*error, larger than max size*/
+    }
   }
 
   HuffmanTree_cleanup(&tree_ll);
@@ -1374,8 +1353,8 @@
   return error;
 }
 
-static unsigned inflateNoCompression(ucvector* out, size_t* pos,
-                                     LodePNGBitReader* reader, const LodePNGDecompressSettings* settings) {
+static unsigned inflateNoCompression(ucvector* out, LodePNGBitReader* reader,
+                                     const LodePNGDecompressSettings* settings) {
   size_t bytepos;
   size_t size = reader->size;
   unsigned LEN, NLEN, error = 0;
@@ -1385,21 +1364,20 @@
 
   /*read LEN (2 bytes) and NLEN (2 bytes)*/
   if(bytepos + 4 >= size) return 52; /*error, bit pointer will jump past memory*/
-  LEN = (unsigned)reader->data[bytepos] + (unsigned)(reader->data[bytepos + 1] << 8u); bytepos += 2;
-  NLEN = (unsigned)reader->data[bytepos] + (unsigned)(reader->data[bytepos + 1] << 8u); bytepos += 2;
+  LEN = (unsigned)reader->data[bytepos] + ((unsigned)reader->data[bytepos + 1] << 8u); bytepos += 2;
+  NLEN = (unsigned)reader->data[bytepos] + ((unsigned)reader->data[bytepos + 1] << 8u); bytepos += 2;
 
   /*check if 16-bit NLEN is really the one's complement of LEN*/
   if(!settings->ignore_nlen && LEN + NLEN != 65535) {
     return 21; /*error: NLEN is not one's complement of LEN*/
   }
 
-  if(!ucvector_resize(out, (*pos) + LEN)) return 83; /*alloc fail*/
+  if(!ucvector_resize(out, out->size + LEN)) return 83; /*alloc fail*/
 
   /*read the literal data: LEN bytes are now stored in the out buffer*/
   if(bytepos + LEN > size) return 23; /*error: reading outside of in buffer*/
 
-  lodepng_memcpy(out->data + *pos, reader->data + bytepos, LEN);
-  *pos += LEN;
+  lodepng_memcpy(out->data + out->size - LEN, reader->data + bytepos, LEN);
   bytepos += LEN;
 
   reader->bp = bytepos << 3u;
@@ -1411,7 +1389,6 @@
                                  const unsigned char* in, size_t insize,
                                  const LodePNGDecompressSettings* settings) {
   unsigned BFINAL = 0;
-  size_t pos = 0; /*byte position in the out buffer*/
   LodePNGBitReader reader;
   unsigned error = LodePNGBitReader_init(&reader, in, insize);
 
@@ -1424,10 +1401,10 @@
     BTYPE = readBits(&reader, 2);
 
     if(BTYPE == 3) return 20; /*error: invalid BTYPE*/
-    else if(BTYPE == 0) error = inflateNoCompression(out, &pos, &reader, settings); /*no compression*/
-    else error = inflateHuffmanBlock(out, &pos, &reader, BTYPE); /*compression, BTYPE 01 or 10*/
-
-    if(error) return error;
+    else if(BTYPE == 0) error = inflateNoCompression(out, &reader, settings); /*no compression*/
+    else error = inflateHuffmanBlock(out, &reader, BTYPE, settings->max_output_size); /*compression, BTYPE 01 or 10*/
+    if(!error && settings->max_output_size && out->size > settings->max_output_size) error = 109;
+    if(error) break;
   }
 
   return error;
@@ -1436,22 +1413,27 @@
 unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
                          const unsigned char* in, size_t insize,
                          const LodePNGDecompressSettings* settings) {
-  unsigned error;
-  ucvector v;
-  ucvector_init_buffer(&v, *out, *outsize);
-  error = lodepng_inflatev(&v, in, insize, settings);
+  ucvector v = ucvector_init(*out, *outsize);
+  unsigned error = lodepng_inflatev(&v, in, insize, settings);
   *out = v.data;
   *outsize = v.size;
   return error;
 }
 
-static unsigned inflate(unsigned char** out, size_t* outsize,
-                        const unsigned char* in, size_t insize,
+static unsigned inflatev(ucvector* out, const unsigned char* in, size_t insize,
                         const LodePNGDecompressSettings* settings) {
   if(settings->custom_inflate) {
-    return settings->custom_inflate(out, outsize, in, insize, settings);
+    unsigned error = settings->custom_inflate(&out->data, &out->size, in, insize, settings);
+    out->allocsize = out->size;
+    if(error) {
+      /*the custom inflate is allowed to have its own error codes, however, we translate it to code 110*/
+      error = 110;
+      /*if there's a max output size, and the custom zlib returned error, then indicate that error instead*/
+      if(settings->max_output_size && out->size > settings->max_output_size) error = 109;
+    }
+    return error;
   } else {
-    return lodepng_inflate(out, outsize, in, insize, settings);
+    return lodepng_inflatev(out, in, insize, settings);
   }
 }
 
@@ -1474,7 +1456,7 @@
 
   while(left <= right) {
     size_t mid = (left + right) >> 1;
-    if (array[mid] >= value) right = mid - 1;
+    if(array[mid] >= value) right = mid - 1;
     else left = mid + 1;
   }
   if(left >= array_size || array[left] > value) left--;
@@ -1493,10 +1475,15 @@
   unsigned dist_code = (unsigned)searchCodeIndex(DISTANCEBASE, 30, distance);
   unsigned extra_distance = (unsigned)(distance - DISTANCEBASE[dist_code]);
 
-  uivector_push_back(values, length_code + FIRST_LENGTH_CODE_INDEX);
-  uivector_push_back(values, extra_length);
-  uivector_push_back(values, dist_code);
-  uivector_push_back(values, extra_distance);
+  size_t pos = values->size;
+  /*TODO: return error when this fails (out of memory)*/
+  unsigned ok = uivector_resize(values, values->size + 4);
+  if(ok) {
+    values->data[pos + 0] = length_code + FIRST_LENGTH_CODE_INDEX;
+    values->data[pos + 1] = extra_length;
+    values->data[pos + 2] = dist_code;
+    values->data[pos + 3] = extra_distance;
+  }
 }
 
 /*3 bytes of data get encoded into two bytes. The hash cannot use more than 3
@@ -1561,14 +1548,14 @@
     by zeroes due to the filters, a better hash does not have a significant
     effect on speed in traversing the chain, and causes more time spend on
     calculating the hash.*/
-    result ^= (unsigned)(data[pos + 0] << 0u);
-    result ^= (unsigned)(data[pos + 1] << 4u);
-    result ^= (unsigned)(data[pos + 2] << 8u);
+    result ^= ((unsigned)data[pos + 0] << 0u);
+    result ^= ((unsigned)data[pos + 1] << 4u);
+    result ^= ((unsigned)data[pos + 2] << 8u);
   } else {
     size_t amount, i;
     if(pos >= size) return 0;
     amount = size - pos;
-    for(i = 0; i != amount; ++i) result ^= (unsigned)(data[pos + i] << (i * 8u));
+    for(i = 0; i != amount; ++i) result ^= ((unsigned)data[pos + i] << (i * 8u));
   }
   return result & HASH_BIT_MASK;
 }
@@ -1758,31 +1745,30 @@
   /*non compressed deflate block data: 1 bit BFINAL,2 bits BTYPE,(5 bits): it jumps to start of next byte,
   2 bytes LEN, 2 bytes NLEN, LEN bytes literal DATA*/
 
-  size_t i, j, numdeflateblocks = (datasize + 65534u) / 65535u;
+  size_t i, numdeflateblocks = (datasize + 65534u) / 65535u;
   unsigned datapos = 0;
   for(i = 0; i != numdeflateblocks; ++i) {
     unsigned BFINAL, BTYPE, LEN, NLEN;
     unsigned char firstbyte;
+    size_t pos = out->size;
 
     BFINAL = (i == numdeflateblocks - 1);
     BTYPE = 0;
 
-    firstbyte = (unsigned char)(BFINAL + ((BTYPE & 1) << 1) + ((BTYPE & 2) << 1));
-    ucvector_push_back(out, firstbyte);
-
     LEN = 65535;
     if(datasize - datapos < 65535u) LEN = (unsigned)datasize - datapos;
     NLEN = 65535 - LEN;
 
-    ucvector_push_back(out, (unsigned char)(LEN & 255));
-    ucvector_push_back(out, (unsigned char)(LEN >> 8u));
-    ucvector_push_back(out, (unsigned char)(NLEN & 255));
-    ucvector_push_back(out, (unsigned char)(NLEN >> 8u));
+    if(!ucvector_resize(out, out->size + LEN + 5)) return 83; /*alloc fail*/
 
-    /*Decompressed data*/
-    for(j = 0; j < 65535 && datapos < datasize; ++j) {
-      ucvector_push_back(out, data[datapos++]);
-    }
+    firstbyte = (unsigned char)(BFINAL + ((BTYPE & 1u) << 1u) + ((BTYPE & 2u) << 1u));
+    out->data[pos + 0] = firstbyte;
+    out->data[pos + 1] = (unsigned char)(LEN & 255);
+    out->data[pos + 2] = (unsigned char)(LEN >> 8u);
+    out->data[pos + 3] = (unsigned char)(NLEN & 255);
+    out->data[pos + 4] = (unsigned char)(NLEN >> 8u);
+    lodepng_memcpy(out->data + pos + 5, data + datapos, LEN);
+    datapos += LEN;
   }
 
   return 0;
@@ -1798,7 +1784,7 @@
   size_t i = 0;
   for(i = 0; i != lz77_encoded->size; ++i) {
     unsigned val = lz77_encoded->data[i];
-    writeBitsReversed(writer, HuffmanTree_getCode(tree_ll, val), HuffmanTree_getLength(tree_ll, val));
+    writeBitsReversed(writer, tree_ll->codes[val], tree_ll->lengths[val]);
     if(val > 256) /*for a length code, 3 more things have to be added*/ {
       unsigned length_index = val - FIRST_LENGTH_CODE_INDEX;
       unsigned n_length_extra_bits = LENGTHEXTRA[length_index];
@@ -1811,8 +1797,7 @@
       unsigned distance_extra_bits = lz77_encoded->data[++i];
 
       writeBits(writer, length_extra_bits, n_length_extra_bits);
-      writeBitsReversed(writer, HuffmanTree_getCode(tree_d, distance_code),
-                       HuffmanTree_getLength(tree_d, distance_code));
+      writeBitsReversed(writer, tree_d->codes[distance_code], tree_d->lengths[distance_code]);
       writeBits(writer, distance_extra_bits, n_distance_extra_bits);
     }
   }
@@ -1840,42 +1825,45 @@
   HuffmanTree tree_ll; /*tree for lit,len values*/
   HuffmanTree tree_d; /*tree for distance codes*/
   HuffmanTree tree_cl; /*tree for encoding the code lengths representing tree_ll and tree_d*/
-  uivector frequencies_ll; /*frequency of lit,len codes*/
-  uivector frequencies_d; /*frequency of dist codes*/
-  uivector frequencies_cl; /*frequency of code length codes*/
-  uivector bitlen_lld; /*lit,len,dist code lengths (int bits), literally (without repeat codes).*/
-  uivector bitlen_lld_e; /*bitlen_lld encoded with repeat codes (this is a rudimentary run length compression)*/
-  /*bitlen_cl is the code length code lengths ("clcl"). The bit lengths of codes to represent tree_cl
-  (these are written as is in the file, it would be crazy to compress these using yet another huffman
-  tree that needs to be represented by yet another set of code lengths)*/
-  uivector bitlen_cl;
+  unsigned* frequencies_ll = 0; /*frequency of lit,len codes*/
+  unsigned* frequencies_d = 0; /*frequency of dist codes*/
+  unsigned* frequencies_cl = 0; /*frequency of code length codes*/
+  unsigned* bitlen_lld = 0; /*lit,len,dist code lengths (int bits), literally (without repeat codes).*/
+  unsigned* bitlen_lld_e = 0; /*bitlen_lld encoded with repeat codes (this is a rudimentary run length compression)*/
   size_t datasize = dataend - datapos;
 
   /*
-  Due to the huffman compression of huffman tree representations ("two levels"), there are some analogies:
+  If we could call "bitlen_cl" the the code length code lengths ("clcl"), that is the bit lengths of codes to represent
+  tree_cl in CLCL_ORDER, then due to the huffman compression of huffman tree representations ("two levels"), there are
+  some analogies:
   bitlen_lld is to tree_cl what data is to tree_ll and tree_d.
   bitlen_lld_e is to bitlen_lld what lz77_encoded is to data.
   bitlen_cl is to bitlen_lld_e what bitlen_lld is to lz77_encoded.
   */
 
   unsigned BFINAL = final;
-  size_t numcodes_ll, numcodes_d, i;
+  size_t i;
+  size_t numcodes_ll, numcodes_d, numcodes_lld, numcodes_lld_e, numcodes_cl;
   unsigned HLIT, HDIST, HCLEN;
 
   uivector_init(&lz77_encoded);
   HuffmanTree_init(&tree_ll);
   HuffmanTree_init(&tree_d);
   HuffmanTree_init(&tree_cl);
-  uivector_init(&frequencies_ll);
-  uivector_init(&frequencies_d);
-  uivector_init(&frequencies_cl);
-  uivector_init(&bitlen_lld);
-  uivector_init(&bitlen_lld_e);
-  uivector_init(&bitlen_cl);
+  /* could fit on stack, but >1KB is on the larger side so allocate instead */
+  frequencies_ll = (unsigned*)lodepng_malloc(286 * sizeof(*frequencies_ll));
+  frequencies_d = (unsigned*)lodepng_malloc(30 * sizeof(*frequencies_d));
+  frequencies_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(*frequencies_cl));
+
+  if(!frequencies_ll || !frequencies_d || !frequencies_cl) error = 83; /*alloc fail*/
 
   /*This while loop never loops due to a break at the end, it is here to
   allow breaking out of it to the cleanup phase on error conditions.*/
   while(!error) {
+    lodepng_memset(frequencies_ll, 0, 286 * sizeof(*frequencies_ll));
+    lodepng_memset(frequencies_d, 0, 30 * sizeof(*frequencies_d));
+    lodepng_memset(frequencies_cl, 0, NUM_CODE_LENGTH_CODES * sizeof(*frequencies_cl));
+
     if(settings->use_lz77) {
       error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
                          settings->minmatch, settings->nicematch, settings->lazymatching);
@@ -1885,94 +1873,92 @@
       for(i = datapos; i < dataend; ++i) lz77_encoded.data[i - datapos] = data[i]; /*no LZ77, but still will be Huffman compressed*/
     }
 
-    if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83 /*alloc fail*/);
-    if(!uivector_resizev(&frequencies_d, 30, 0)) ERROR_BREAK(83 /*alloc fail*/);
-
     /*Count the frequencies of lit, len and dist codes*/
     for(i = 0; i != lz77_encoded.size; ++i) {
       unsigned symbol = lz77_encoded.data[i];
-      ++frequencies_ll.data[symbol];
+      ++frequencies_ll[symbol];
       if(symbol > 256) {
         unsigned dist = lz77_encoded.data[i + 2];
-        ++frequencies_d.data[dist];
+        ++frequencies_d[dist];
         i += 3;
       }
     }
-    frequencies_ll.data[256] = 1; /*there will be exactly 1 end code, at the end of the block*/
+    frequencies_ll[256] = 1; /*there will be exactly 1 end code, at the end of the block*/
 
     /*Make both huffman trees, one for the lit and len codes, one for the dist codes*/
-    error = HuffmanTree_makeFromFrequencies(&tree_ll, frequencies_ll.data, 257, frequencies_ll.size, 15);
+    error = HuffmanTree_makeFromFrequencies(&tree_ll, frequencies_ll, 257, 286, 15);
     if(error) break;
     /*2, not 1, is chosen for mincodes: some buggy PNG decoders require at least 2 symbols in the dist tree*/
-    error = HuffmanTree_makeFromFrequencies(&tree_d, frequencies_d.data, 2, frequencies_d.size, 15);
+    error = HuffmanTree_makeFromFrequencies(&tree_d, frequencies_d, 2, 30, 15);
     if(error) break;
 
-    numcodes_ll = tree_ll.numcodes; if(numcodes_ll > 286) numcodes_ll = 286;
-    numcodes_d = tree_d.numcodes; if(numcodes_d > 30) numcodes_d = 30;
+    numcodes_ll = LODEPNG_MIN(tree_ll.numcodes, 286);
+    numcodes_d = LODEPNG_MIN(tree_d.numcodes, 30);
     /*store the code lengths of both generated trees in bitlen_lld*/
-    for(i = 0; i != numcodes_ll; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_ll, (unsigned)i));
-    for(i = 0; i != numcodes_d; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_d, (unsigned)i));
+    numcodes_lld = numcodes_ll + numcodes_d;
+    bitlen_lld = (unsigned*)lodepng_malloc(numcodes_lld * sizeof(*bitlen_lld));
+    /*numcodes_lld_e never needs more size than bitlen_lld*/
+    bitlen_lld_e = (unsigned*)lodepng_malloc(numcodes_lld * sizeof(*bitlen_lld_e));
+    if(!bitlen_lld || !bitlen_lld_e) ERROR_BREAK(83); /*alloc fail*/
+    numcodes_lld_e = 0;
+
+    for(i = 0; i != numcodes_ll; ++i) bitlen_lld[i] = tree_ll.lengths[i];
+    for(i = 0; i != numcodes_d; ++i) bitlen_lld[numcodes_ll + i] = tree_d.lengths[i];
 
     /*run-length compress bitlen_ldd into bitlen_lld_e by using repeat codes 16 (copy length 3-6 times),
     17 (3-10 zeroes), 18 (11-138 zeroes)*/
-    for(i = 0; i != (unsigned)bitlen_lld.size; ++i) {
+    for(i = 0; i != numcodes_lld; ++i) {
       unsigned j = 0; /*amount of repetitions*/
-      while(i + j + 1 < (unsigned)bitlen_lld.size && bitlen_lld.data[i + j + 1] == bitlen_lld.data[i]) ++j;
+      while(i + j + 1 < numcodes_lld && bitlen_lld[i + j + 1] == bitlen_lld[i]) ++j;
 
-      if(bitlen_lld.data[i] == 0 && j >= 2) /*repeat code for zeroes*/ {
+      if(bitlen_lld[i] == 0 && j >= 2) /*repeat code for zeroes*/ {
         ++j; /*include the first zero*/
         if(j <= 10) /*repeat code 17 supports max 10 zeroes*/ {
-          uivector_push_back(&bitlen_lld_e, 17);
-          uivector_push_back(&bitlen_lld_e, j - 3);
+          bitlen_lld_e[numcodes_lld_e++] = 17;
+          bitlen_lld_e[numcodes_lld_e++] = j - 3;
         } else /*repeat code 18 supports max 138 zeroes*/ {
           if(j > 138) j = 138;
-          uivector_push_back(&bitlen_lld_e, 18);
-          uivector_push_back(&bitlen_lld_e, j - 11);
+          bitlen_lld_e[numcodes_lld_e++] = 18;
+          bitlen_lld_e[numcodes_lld_e++] = j - 11;
         }
         i += (j - 1);
       } else if(j >= 3) /*repeat code for value other than zero*/ {
         size_t k;
         unsigned num = j / 6u, rest = j % 6u;
-        uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]);
+        bitlen_lld_e[numcodes_lld_e++] = bitlen_lld[i];
         for(k = 0; k < num; ++k) {
-          uivector_push_back(&bitlen_lld_e, 16);
-          uivector_push_back(&bitlen_lld_e, 6 - 3);
+          bitlen_lld_e[numcodes_lld_e++] = 16;
+          bitlen_lld_e[numcodes_lld_e++] = 6 - 3;
         }
         if(rest >= 3) {
-          uivector_push_back(&bitlen_lld_e, 16);
-          uivector_push_back(&bitlen_lld_e, rest - 3);
+          bitlen_lld_e[numcodes_lld_e++] = 16;
+          bitlen_lld_e[numcodes_lld_e++] = rest - 3;
         }
         else j -= rest;
         i += j;
       } else /*too short to benefit from repeat code*/ {
-        uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]);
+        bitlen_lld_e[numcodes_lld_e++] = bitlen_lld[i];
       }
     }
 
     /*generate tree_cl, the huffmantree of huffmantrees*/
-
-    if(!uivector_resizev(&frequencies_cl, NUM_CODE_LENGTH_CODES, 0)) ERROR_BREAK(83 /*alloc fail*/);
-    for(i = 0; i != bitlen_lld_e.size; ++i) {
-      ++frequencies_cl.data[bitlen_lld_e.data[i]];
+    for(i = 0; i != numcodes_lld_e; ++i) {
+      ++frequencies_cl[bitlen_lld_e[i]];
       /*after a repeat code come the bits that specify the number of repetitions,
       those don't need to be in the frequencies_cl calculation*/
-      if(bitlen_lld_e.data[i] >= 16) ++i;
+      if(bitlen_lld_e[i] >= 16) ++i;
     }
 
-    error = HuffmanTree_makeFromFrequencies(&tree_cl, frequencies_cl.data,
-                                            frequencies_cl.size, frequencies_cl.size, 7);
+    error = HuffmanTree_makeFromFrequencies(&tree_cl, frequencies_cl,
+                                            NUM_CODE_LENGTH_CODES, NUM_CODE_LENGTH_CODES, 7);
     if(error) break;
 
-    if(!uivector_resize(&bitlen_cl, tree_cl.numcodes)) ERROR_BREAK(83 /*alloc fail*/);
-    for(i = 0; i != tree_cl.numcodes; ++i) {
-      /*lengths of code length tree is in the order as specified by deflate*/
-      bitlen_cl.data[i] = HuffmanTree_getLength(&tree_cl, CLCL_ORDER[i]);
+    /*compute amount of code-length-code-lengths to output*/
+    numcodes_cl = NUM_CODE_LENGTH_CODES;
+    /*trim zeros at the end (using CLCL_ORDER), but minimum size must be 4 (see HCLEN below)*/
+    while(numcodes_cl > 4u && tree_cl.lengths[CLCL_ORDER[numcodes_cl - 1u]] == 0) {
+      numcodes_cl--;
     }
-    while(bitlen_cl.data[bitlen_cl.size - 1] == 0 && bitlen_cl.size > 4) {
-      /*remove zeros at the end, but minimum size must be 4*/
-      if(!uivector_resize(&bitlen_cl, bitlen_cl.size - 1)) ERROR_BREAK(83 /*alloc fail*/);
-    }
-    if(error) break;
 
     /*
     Write everything into the output
@@ -1994,35 +1980,34 @@
     writeBits(writer, 1, 1); /*second bit of BTYPE "dynamic"*/
 
     /*write the HLIT, HDIST and HCLEN values*/
+    /*all three sizes take trimmed ending zeroes into account, done either by HuffmanTree_makeFromFrequencies
+    or in the loop for numcodes_cl above, which saves space. */
     HLIT = (unsigned)(numcodes_ll - 257);
     HDIST = (unsigned)(numcodes_d - 1);
-    HCLEN = (unsigned)bitlen_cl.size - 4;
-    /*trim zeroes for HCLEN. HLIT and HDIST were already trimmed at tree creation*/
-    while(!bitlen_cl.data[HCLEN + 4 - 1] && HCLEN > 0) --HCLEN;
+    HCLEN = (unsigned)(numcodes_cl - 4);
     writeBits(writer, HLIT, 5);
     writeBits(writer, HDIST, 5);
     writeBits(writer, HCLEN, 4);
 
-    /*write the code lengths of the code length alphabet*/
-    for(i = 0; i != HCLEN + 4; ++i) writeBits(writer, bitlen_cl.data[i], 3);
+    /*write the code lengths of the code length alphabet ("bitlen_cl")*/
+    for(i = 0; i != numcodes_cl; ++i) writeBits(writer, tree_cl.lengths[CLCL_ORDER[i]], 3);
 
     /*write the lengths of the lit/len AND the dist alphabet*/
-    for(i = 0; i != bitlen_lld_e.size; ++i) {
-      writeBitsReversed(writer, HuffmanTree_getCode(&tree_cl, bitlen_lld_e.data[i]),
-                        HuffmanTree_getLength(&tree_cl, bitlen_lld_e.data[i]));
+    for(i = 0; i != numcodes_lld_e; ++i) {
+      writeBitsReversed(writer, tree_cl.codes[bitlen_lld_e[i]], tree_cl.lengths[bitlen_lld_e[i]]);
       /*extra bits of repeat codes*/
-      if(bitlen_lld_e.data[i] == 16) writeBits(writer, bitlen_lld_e.data[++i], 2);
-      else if(bitlen_lld_e.data[i] == 17) writeBits(writer, bitlen_lld_e.data[++i], 3);
-      else if(bitlen_lld_e.data[i] == 18) writeBits(writer, bitlen_lld_e.data[++i], 7);
+      if(bitlen_lld_e[i] == 16) writeBits(writer, bitlen_lld_e[++i], 2);
+      else if(bitlen_lld_e[i] == 17) writeBits(writer, bitlen_lld_e[++i], 3);
+      else if(bitlen_lld_e[i] == 18) writeBits(writer, bitlen_lld_e[++i], 7);
     }
 
     /*write the compressed data symbols*/
     writeLZ77data(writer, &lz77_encoded, &tree_ll, &tree_d);
     /*error: the length of the end code 256 must be larger than 0*/
-    if(HuffmanTree_getLength(&tree_ll, 256) == 0) ERROR_BREAK(64);
+    if(tree_ll.lengths[256] == 0) ERROR_BREAK(64);
 
     /*write the end code*/
-    writeBitsReversed(writer, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256));
+    writeBitsReversed(writer, tree_ll.codes[256], tree_ll.lengths[256]);
 
     break; /*end of error-while*/
   }
@@ -2032,12 +2017,11 @@
   HuffmanTree_cleanup(&tree_ll);
   HuffmanTree_cleanup(&tree_d);
   HuffmanTree_cleanup(&tree_cl);
-  uivector_cleanup(&frequencies_ll);
-  uivector_cleanup(&frequencies_d);
-  uivector_cleanup(&frequencies_cl);
-  uivector_cleanup(&bitlen_lld_e);
-  uivector_cleanup(&bitlen_lld);
-  uivector_cleanup(&bitlen_cl);
+  lodepng_free(frequencies_ll);
+  lodepng_free(frequencies_d);
+  lodepng_free(frequencies_cl);
+  lodepng_free(bitlen_lld);
+  lodepng_free(bitlen_lld_e);
 
   return error;
 }
@@ -2056,27 +2040,29 @@
   HuffmanTree_init(&tree_ll);
   HuffmanTree_init(&tree_d);
 
-  generateFixedLitLenTree(&tree_ll);
-  generateFixedDistanceTree(&tree_d);
+  error = generateFixedLitLenTree(&tree_ll);
+  if(!error) error = generateFixedDistanceTree(&tree_d);
 
-  writeBits(writer, BFINAL, 1);
-  writeBits(writer, 1, 1); /*first bit of BTYPE*/
-  writeBits(writer, 0, 1); /*second bit of BTYPE*/
+  if(!error) {
+    writeBits(writer, BFINAL, 1);
+    writeBits(writer, 1, 1); /*first bit of BTYPE*/
+    writeBits(writer, 0, 1); /*second bit of BTYPE*/
 
-  if(settings->use_lz77) /*LZ77 encoded*/ {
-    uivector lz77_encoded;
-    uivector_init(&lz77_encoded);
-    error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
-                       settings->minmatch, settings->nicematch, settings->lazymatching);
-    if(!error) writeLZ77data(writer, &lz77_encoded, &tree_ll, &tree_d);
-    uivector_cleanup(&lz77_encoded);
-  } else /*no LZ77, but still will be Huffman compressed*/ {
-    for(i = datapos; i < dataend; ++i) {
-      writeBitsReversed(writer, HuffmanTree_getCode(&tree_ll, data[i]), HuffmanTree_getLength(&tree_ll, data[i]));
+    if(settings->use_lz77) /*LZ77 encoded*/ {
+      uivector lz77_encoded;
+      uivector_init(&lz77_encoded);
+      error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
+                         settings->minmatch, settings->nicematch, settings->lazymatching);
+      if(!error) writeLZ77data(writer, &lz77_encoded, &tree_ll, &tree_d);
+      uivector_cleanup(&lz77_encoded);
+    } else /*no LZ77, but still will be Huffman compressed*/ {
+      for(i = datapos; i < dataend; ++i) {
+        writeBitsReversed(writer, tree_ll.codes[data[i]], tree_ll.lengths[data[i]]);
+      }
     }
+    /*add END code*/
+    if(!error) writeBitsReversed(writer,tree_ll.codes[256], tree_ll.lengths[256]);
   }
-  /*add END code*/
-  if(!error) writeBitsReversed(writer, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256));
 
   /*cleanup*/
   HuffmanTree_cleanup(&tree_ll);
@@ -2108,16 +2094,17 @@
   if(numdeflateblocks == 0) numdeflateblocks = 1;
 
   error = hash_init(&hash, settings->windowsize);
-  if(error) return error;
 
-  for(i = 0; i != numdeflateblocks && !error; ++i) {
-    unsigned final = (i == numdeflateblocks - 1);
-    size_t start = i * blocksize;
-    size_t end = start + blocksize;
-    if(end > insize) end = insize;
+  if(!error) {
+    for(i = 0; i != numdeflateblocks && !error; ++i) {
+      unsigned final = (i == numdeflateblocks - 1);
+      size_t start = i * blocksize;
+      size_t end = start + blocksize;
+      if(end > insize) end = insize;
 
-    if(settings->btype == 1) error = deflateFixed(&writer, &hash, in, start, end, settings, final);
-    else if(settings->btype == 2) error = deflateDynamic(&writer, &hash, in, start, end, settings, final);
+      if(settings->btype == 1) error = deflateFixed(&writer, &hash, in, start, end, settings, final);
+      else if(settings->btype == 2) error = deflateDynamic(&writer, &hash, in, start, end, settings, final);
+    }
   }
 
   hash_cleanup(&hash);
@@ -2128,10 +2115,8 @@
 unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
                          const unsigned char* in, size_t insize,
                          const LodePNGCompressSettings* settings) {
-  unsigned error;
-  ucvector v;
-  ucvector_init_buffer(&v, *out, *outsize);
-  error = lodepng_deflatev(&v, in, insize, settings);
+  ucvector v = ucvector_init(*out, *outsize);
+  unsigned error = lodepng_deflatev(&v, in, insize, settings);
   *out = v.data;
   *outsize = v.size;
   return error;
@@ -2141,7 +2126,9 @@
                         const unsigned char* in, size_t insize,
                         const LodePNGCompressSettings* settings) {
   if(settings->custom_deflate) {
-    return settings->custom_deflate(out, outsize, in, insize, settings);
+    unsigned error = settings->custom_deflate(out, outsize, in, insize, settings);
+    /*the custom deflate is allowed to have its own error codes, however, we translate it to code 111*/
+    return error ? 111 : 0;
   } else {
     return lodepng_deflate(out, outsize, in, insize, settings);
   }
@@ -2184,8 +2171,9 @@
 
 #ifdef LODEPNG_COMPILE_DECODER
 
-unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                                 size_t insize, const LodePNGDecompressSettings* settings) {
+static unsigned lodepng_zlib_decompressv(ucvector* out,
+                                         const unsigned char* in, size_t insize,
+                                         const LodePNGDecompressSettings* settings) {
   unsigned error = 0;
   unsigned CM, CINFO, FDICT;
 
@@ -2212,25 +2200,52 @@
     return 26;
   }
 
-  error = inflate(out, outsize, in + 2, insize - 2, settings);
+  error = inflatev(out, in + 2, insize - 2, settings);
   if(error) return error;
 
   if(!settings->ignore_adler32) {
     unsigned ADLER32 = lodepng_read32bitInt(&in[insize - 4]);
-    unsigned checksum = adler32(*out, (unsigned)(*outsize));
+    unsigned checksum = adler32(out->data, (unsigned)(out->size));
     if(checksum != ADLER32) return 58; /*error, adler checksum not correct, data must be corrupted*/
   }
 
   return 0; /*no error*/
 }
 
-static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                                size_t insize, const LodePNGDecompressSettings* settings) {
+
+unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                                 size_t insize, const LodePNGDecompressSettings* settings) {
+  ucvector v = ucvector_init(*out, *outsize);
+  unsigned error = lodepng_zlib_decompressv(&v, in, insize, settings);
+  *out = v.data;
+  *outsize = v.size;
+  return error;
+}
+
+/*expected_size is expected output size, to avoid intermediate allocations. Set to 0 if not known. */
+static unsigned zlib_decompress(unsigned char** out, size_t* outsize, size_t expected_size,
+                                const unsigned char* in, size_t insize, const LodePNGDecompressSettings* settings) {
+  unsigned error;
   if(settings->custom_zlib) {
-    return settings->custom_zlib(out, outsize, in, insize, settings);
+    error = settings->custom_zlib(out, outsize, in, insize, settings);
+    if(error) {
+      /*the custom zlib is allowed to have its own error codes, however, we translate it to code 110*/
+      error = 110;
+      /*if there's a max output size, and the custom zlib returned error, then indicate that error instead*/
+      if(settings->max_output_size && *outsize > settings->max_output_size) error = 109;
+    }
   } else {
-    return lodepng_zlib_decompress(out, outsize, in, insize, settings);
+    ucvector v = ucvector_init(*out, *outsize);
+    if(expected_size) {
+      /*reserve the memory to avoid intermediate reallocations*/
+      ucvector_resize(&v, *outsize + expected_size);
+      v.size = *outsize;
+    }
+    error = lodepng_zlib_decompressv(&v, in, insize, settings);
+    *out = v.data;
+    *outsize = v.size;
   }
+  return error;
 }
 
 #endif /*LODEPNG_COMPILE_DECODER*/
@@ -2251,7 +2266,7 @@
   if(!error) {
     *outsize = deflatesize + 6;
     *out = (unsigned char*)lodepng_malloc(*outsize);
-    if(!out) error = 83; /*alloc fail*/
+    if(!*out) error = 83; /*alloc fail*/
   }
 
   if(!error) {
@@ -2278,7 +2293,9 @@
 static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
                               size_t insize, const LodePNGCompressSettings* settings) {
   if(settings->custom_zlib) {
-    return settings->custom_zlib(out, outsize, in, insize, settings);
+    unsigned error = settings->custom_zlib(out, outsize, in, insize, settings);
+    /*the custom zlib is allowed to have its own error codes, however, we translate it to code 111*/
+    return error ? 111 : 0;
   } else {
     return lodepng_zlib_compress(out, outsize, in, insize, settings);
   }
@@ -2289,9 +2306,10 @@
 #else /*no LODEPNG_COMPILE_ZLIB*/
 
 #ifdef LODEPNG_COMPILE_DECODER
-static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
-                                size_t insize, const LodePNGDecompressSettings* settings) {
+static unsigned zlib_decompress(unsigned char** out, size_t* outsize, size_t expected_size,
+                                const unsigned char* in, size_t insize, const LodePNGDecompressSettings* settings) {
   if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
+  (void)expected_size;
   return settings->custom_zlib(out, outsize, in, insize, settings);
 }
 #endif /*LODEPNG_COMPILE_DECODER*/
@@ -2336,13 +2354,14 @@
 void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings) {
   settings->ignore_adler32 = 0;
   settings->ignore_nlen = 0;
+  settings->max_output_size = 0;
 
   settings->custom_zlib = 0;
   settings->custom_inflate = 0;
   settings->custom_context = 0;
 }
 
-const LodePNGDecompressSettings lodepng_default_decompress_settings = {0, 0, 0, 0, 0};
+const LodePNGDecompressSettings lodepng_default_decompress_settings = {0, 0, 0, 0, 0, 0};
 
 #endif /*LODEPNG_COMPILE_DECODER*/
 
@@ -2427,7 +2446,7 @@
   unsigned result = 0;
   size_t i;
   for(i = 0 ; i < nbits; ++i) {
-    result <<= 1;
+    result <<= 1u;
     result |= (unsigned)readBitFromReversedStream(bitpointer, bitstream);
   }
   return result;
@@ -2435,8 +2454,8 @@
 
 static void setBitOfReversedStream(size_t* bitpointer, unsigned char* bitstream, unsigned char bit) {
   /*the current bit in bitstream may be 0 or 1 for this to work*/
-  if(bit == 0) bitstream[(*bitpointer) >> 3] &=  (unsigned char)(~(1 << (7 - ((*bitpointer) & 0x7))));
-  else         bitstream[(*bitpointer) >> 3] |=  (1 << (7 - ((*bitpointer) & 0x7)));
+  if(bit == 0) bitstream[(*bitpointer) >> 3u] &=  (unsigned char)(~(1u << (7u - ((*bitpointer) & 7u))));
+  else         bitstream[(*bitpointer) >> 3u] |=  (1u << (7u - ((*bitpointer) & 7u)));
   ++(*bitpointer);
 }
 
@@ -2494,55 +2513,66 @@
   lodepng_set32bitInt(chunk + 8 + length, CRC);
 }
 
-unsigned char* lodepng_chunk_next(unsigned char* chunk) {
+unsigned char* lodepng_chunk_next(unsigned char* chunk, unsigned char* end) {
+  if(chunk >= end || end - chunk < 12) return end; /*too small to contain a chunk*/
   if(chunk[0] == 0x89 && chunk[1] == 0x50 && chunk[2] == 0x4e && chunk[3] == 0x47
     && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) {
     /* Is PNG magic header at start of PNG file. Jump to first actual chunk. */
     return chunk + 8;
   } else {
-    unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
-    return chunk + total_chunk_length;
+    size_t total_chunk_length;
+    unsigned char* result;
+    if(lodepng_addofl(lodepng_chunk_length(chunk), 12, &total_chunk_length)) return end;
+    result = chunk + total_chunk_length;
+    if(result < chunk) return end; /*pointer overflow*/
+    return result;
   }
 }
 
-const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk) {
+const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk, const unsigned char* end) {
+  if(chunk >= end || end - chunk < 12) return end; /*too small to contain a chunk*/
   if(chunk[0] == 0x89 && chunk[1] == 0x50 && chunk[2] == 0x4e && chunk[3] == 0x47
     && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) {
     /* Is PNG magic header at start of PNG file. Jump to first actual chunk. */
     return chunk + 8;
   } else {
-    unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
-    return chunk + total_chunk_length;
+    size_t total_chunk_length;
+    const unsigned char* result;
+    if(lodepng_addofl(lodepng_chunk_length(chunk), 12, &total_chunk_length)) return end;
+    result = chunk + total_chunk_length;
+    if(result < chunk) return end; /*pointer overflow*/
+    return result;
   }
 }
 
-unsigned char* lodepng_chunk_find(unsigned char* chunk, const unsigned char* end, const char type[5]) {
+unsigned char* lodepng_chunk_find(unsigned char* chunk, unsigned char* end, const char type[5]) {
   for(;;) {
-    if(chunk + 12 >= end) return 0;
+    if(chunk >= end || end - chunk < 12) return 0; /* past file end: chunk + 12 > end */
     if(lodepng_chunk_type_equals(chunk, type)) return chunk;
-    chunk = lodepng_chunk_next(chunk);
+    chunk = lodepng_chunk_next(chunk, end);
   }
 }
 
 const unsigned char* lodepng_chunk_find_const(const unsigned char* chunk, const unsigned char* end, const char type[5]) {
   for(;;) {
-    if(chunk + 12 >= end) return 0;
+    if(chunk >= end || end - chunk < 12) return 0; /* past file end: chunk + 12 > end */
     if(lodepng_chunk_type_equals(chunk, type)) return chunk;
-    chunk = lodepng_chunk_next_const(chunk);
+    chunk = lodepng_chunk_next_const(chunk, end);
   }
 }
 
-unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk) {
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outsize, const unsigned char* chunk) {
   unsigned i;
-  unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
+  size_t total_chunk_length, new_length;
   unsigned char *chunk_start, *new_buffer;
-  size_t new_length = (*outlength) + total_chunk_length;
-  if(new_length < total_chunk_length || new_length < (*outlength)) return 77; /*integer overflow happened*/
+
+  if(lodepng_addofl(lodepng_chunk_length(chunk), 12, &total_chunk_length)) return 77;
+  if(lodepng_addofl(*outsize, total_chunk_length, &new_length)) return 77;
 
   new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
   if(!new_buffer) return 83; /*alloc fail*/
   (*out) = new_buffer;
-  (*outlength) = new_length;
+  (*outsize) = new_length;
   chunk_start = &(*out)[new_length - total_chunk_length];
 
   for(i = 0; i != total_chunk_length; ++i) chunk_start[i] = chunk[i];
@@ -2550,29 +2580,36 @@
   return 0;
 }
 
-unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
-                              const char* type, const unsigned char* data) {
-  unsigned i;
-  unsigned char *chunk, *new_buffer;
-  size_t new_length = (*outlength) + length + 12;
-  if(new_length < length + 12 || new_length < (*outlength)) return 77; /*integer overflow happened*/
-  new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
-  if(!new_buffer) return 83; /*alloc fail*/
-  (*out) = new_buffer;
-  (*outlength) = new_length;
-  chunk = &(*out)[(*outlength) - length - 12];
+/*Sets length and name and allocates the space for data and crc but does not
+set data or crc yet. Returns the start of the chunk in chunk. The start of
+the data is at chunk + 8. To finalize chunk, add the data, then use
+lodepng_chunk_generate_crc */
+static unsigned lodepng_chunk_init(unsigned char** chunk,
+                                   ucvector* out,
+                                   unsigned length, const char* type) {
+  size_t new_length = out->size;
+  if(lodepng_addofl(new_length, length, &new_length)) return 77;
+  if(lodepng_addofl(new_length, 12, &new_length)) return 77;
+  if(!ucvector_resize(out, new_length)) return 83; /*alloc fail*/
+  *chunk = out->data + new_length - length - 12u;
 
   /*1: length*/
-  lodepng_set32bitInt(chunk, (unsigned)length);
+  lodepng_set32bitInt(*chunk, length);
 
   /*2: chunk name (4 letters)*/
-  chunk[4] = (unsigned char)type[0];
-  chunk[5] = (unsigned char)type[1];
-  chunk[6] = (unsigned char)type[2];
-  chunk[7] = (unsigned char)type[3];
+  lodepng_memcpy(*chunk + 4, type, 4);
+
+  return 0;
+}
+
+/* like lodepng_chunk_create but with custom allocsize */
+static unsigned lodepng_chunk_createv(ucvector* out,
+                                      unsigned length, const char* type, const unsigned char* data) {
+  unsigned char* chunk;
+  CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, length, type));
 
   /*3: the data*/
-  for(i = 0; i != length; ++i) chunk[8 + i] = data[i];
+  lodepng_memcpy(chunk + 8, data, length);
 
   /*4: CRC (of the chunkname characters and the data)*/
   lodepng_chunk_generate_crc(chunk);
@@ -2580,6 +2617,15 @@
   return 0;
 }
 
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outsize,
+                              unsigned length, const char* type, const unsigned char* data) {
+  ucvector v = ucvector_init(*out, *outsize);
+  unsigned error = lodepng_chunk_createv(&v, length, type, data);
+  *out = v.data;
+  *outsize = v.size;
+  return error;
+}
+
 /* ////////////////////////////////////////////////////////////////////////// */
 /* / Color types, channels, bits                                            / */
 /* ////////////////////////////////////////////////////////////////////////// */
@@ -2593,6 +2639,7 @@
     case LCT_PALETTE:    if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8            )) return 37; break;
     case LCT_GREY_ALPHA: if(!(                                 bd == 8 || bd == 16)) return 37; break;
     case LCT_RGBA:       if(!(                                 bd == 8 || bd == 16)) return 37; break;
+    case LCT_MAX_OCTET_VALUE: return 31; /* invalid color type */
     default: return 31; /* invalid color type */
   }
   return 0; /*allowed color type / bits combination*/
@@ -2605,6 +2652,7 @@
     case LCT_PALETTE: return 1;
     case LCT_GREY_ALPHA: return 2;
     case LCT_RGBA: return 4;
+    case LCT_MAX_OCTET_VALUE: return 0; /* invalid color type */
     default: return 0; /*invalid color type*/
   }
 }
@@ -2625,10 +2673,12 @@
   info->palettesize = 0;
 }
 
-void lodepng_color_mode_alloc_palette(LodePNGColorMode* info) {
+/*allocates palette memory if needed, and initializes all colors to black*/
+static void lodepng_color_mode_alloc_palette(LodePNGColorMode* info) {
   size_t i;
-  /*room for 256 colors with 4 bytes each. Using realloc to avoid leak if it is being overwritten*/
-  info->palette = (unsigned char*)lodepng_realloc(info->palette, 1024);
+  /*if the palette is already allocated, it will have size 1024 so no reallocation needed in that case*/
+  /*the palette must have room for up to 256 colors with 4 bytes each.*/
+  if(!info->palette) info->palette = (unsigned char*)lodepng_malloc(1024);
   if(!info->palette) return; /*alloc fail*/
   for(i = 0; i != 256; ++i) {
     /*Initialize all unused colors with black, the value used for invalid palette indices.
@@ -2646,13 +2696,12 @@
 }
 
 unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source) {
-  size_t i;
   lodepng_color_mode_cleanup(dest);
-  *dest = *source;
+  lodepng_memcpy(dest, source, sizeof(LodePNGColorMode));
   if(source->palette) {
     dest->palette = (unsigned char*)lodepng_malloc(1024);
     if(!dest->palette && source->palettesize) return 83; /*alloc fail*/
-    for(i = 0; i != source->palettesize * 4; ++i) dest->palette[i] = source->palette[i];
+    lodepng_memcpy(dest->palette, source->palette, source->palettesize * 4);
   }
   return 0;
 }
@@ -2752,18 +2801,18 @@
 
 
 #ifdef LODEPNG_COMPILE_PNG
-#ifdef LODEPNG_COMPILE_DECODER
 
 /*in an idat chunk, each scanline is a multiple of 8 bits, unlike the lodepng output buffer,
 and in addition has one extra byte per line: the filter byte. So this gives a larger
-result than lodepng_get_raw_size. */
-static size_t lodepng_get_raw_size_idat(unsigned w, unsigned h, const LodePNGColorMode* color) {
-  size_t bpp = lodepng_get_bpp(color);
-  /* + 1 for the filter byte, and possibly plus padding bits per line */
+result than lodepng_get_raw_size. Set h to 1 to get the size of 1 row including filter byte. */
+static size_t lodepng_get_raw_size_idat(unsigned w, unsigned h, unsigned bpp) {
+  /* + 1 for the filter byte, and possibly plus padding bits per line. */
+  /* Ignoring casts, the expression is equal to (w * bpp + 7) / 8 + 1, but avoids overflow of w * bpp */
   size_t line = ((size_t)(w / 8u) * bpp) + 1u + ((w & 7u) * bpp + 7u) / 8u;
   return (size_t)h * line;
 }
 
+#ifdef LODEPNG_COMPILE_DECODER
 /*Safely checks whether size_t overflow can be caused due to amount of pixels.
 This check is overcautious rather than precise. If this check indicates no overflow,
 you can safely compute in a size_t (but not an unsigned):
@@ -2844,8 +2893,8 @@
 
 static unsigned LodePNGText_copy(LodePNGInfo* dest, const LodePNGInfo* source) {
   size_t i = 0;
-  dest->text_keys = 0;
-  dest->text_strings = 0;
+  dest->text_keys = NULL;
+  dest->text_strings = NULL;
   dest->text_num = 0;
   for(i = 0; i != source->text_num; ++i) {
     CERROR_TRY_RETURN(lodepng_add_text(dest, source->text_keys[i], source->text_strings[i]));
@@ -2853,27 +2902,29 @@
   return 0;
 }
 
-void lodepng_clear_text(LodePNGInfo* info) {
-  LodePNGText_cleanup(info);
+static unsigned lodepng_add_text_sized(LodePNGInfo* info, const char* key, const char* str, size_t size) {
+  char** new_keys = (char**)(lodepng_realloc(info->text_keys, sizeof(char*) * (info->text_num + 1)));
+  char** new_strings = (char**)(lodepng_realloc(info->text_strings, sizeof(char*) * (info->text_num + 1)));
+
+  if(new_keys) info->text_keys = new_keys;
+  if(new_strings) info->text_strings = new_strings;
+
+  if(!new_keys || !new_strings) return 83; /*alloc fail*/
+
+  ++info->text_num;
+  info->text_keys[info->text_num - 1] = alloc_string(key);
+  info->text_strings[info->text_num - 1] = alloc_string_sized(str, size);
+  if(!info->text_keys[info->text_num - 1] || !info->text_strings[info->text_num - 1]) return 83; /*alloc fail*/
+
+  return 0;
 }
 
 unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str) {
-  char** new_keys = (char**)(lodepng_realloc(info->text_keys, sizeof(char*) * (info->text_num + 1)));
-  char** new_strings = (char**)(lodepng_realloc(info->text_strings, sizeof(char*) * (info->text_num + 1)));
-  if(!new_keys || !new_strings) {
-    lodepng_free(new_keys);
-    lodepng_free(new_strings);
-    return 83; /*alloc fail*/
-  }
+  return lodepng_add_text_sized(info, key, str, lodepng_strlen(str));
+}
 
-  ++info->text_num;
-  info->text_keys = new_keys;
-  info->text_strings = new_strings;
-
-  info->text_keys[info->text_num - 1] = alloc_string(key);
-  info->text_strings[info->text_num - 1] = alloc_string(str);
-
-  return 0;
+void lodepng_clear_text(LodePNGInfo* info) {
+  LodePNGText_cleanup(info);
 }
 
 /******************************************************************************/
@@ -2902,10 +2953,10 @@
 
 static unsigned LodePNGIText_copy(LodePNGInfo* dest, const LodePNGInfo* source) {
   size_t i = 0;
-  dest->itext_keys = 0;
-  dest->itext_langtags = 0;
-  dest->itext_transkeys = 0;
-  dest->itext_strings = 0;
+  dest->itext_keys = NULL;
+  dest->itext_langtags = NULL;
+  dest->itext_transkeys = NULL;
+  dest->itext_strings = NULL;
   dest->itext_num = 0;
   for(i = 0; i != source->itext_num; ++i) {
     CERROR_TRY_RETURN(lodepng_add_itext(dest, source->itext_keys[i], source->itext_langtags[i],
@@ -2918,34 +2969,35 @@
   LodePNGIText_cleanup(info);
 }
 
-unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
-                           const char* transkey, const char* str) {
+static unsigned lodepng_add_itext_sized(LodePNGInfo* info, const char* key, const char* langtag,
+                                        const char* transkey, const char* str, size_t size) {
   char** new_keys = (char**)(lodepng_realloc(info->itext_keys, sizeof(char*) * (info->itext_num + 1)));
   char** new_langtags = (char**)(lodepng_realloc(info->itext_langtags, sizeof(char*) * (info->itext_num + 1)));
   char** new_transkeys = (char**)(lodepng_realloc(info->itext_transkeys, sizeof(char*) * (info->itext_num + 1)));
   char** new_strings = (char**)(lodepng_realloc(info->itext_strings, sizeof(char*) * (info->itext_num + 1)));
-  if(!new_keys || !new_langtags || !new_transkeys || !new_strings) {
-    lodepng_free(new_keys);
-    lodepng_free(new_langtags);
-    lodepng_free(new_transkeys);
-    lodepng_free(new_strings);
-    return 83; /*alloc fail*/
-  }
+
+  if(new_keys) info->itext_keys = new_keys;
+  if(new_langtags) info->itext_langtags = new_langtags;
+  if(new_transkeys) info->itext_transkeys = new_transkeys;
+  if(new_strings) info->itext_strings = new_strings;
+
+  if(!new_keys || !new_langtags || !new_transkeys || !new_strings) return 83; /*alloc fail*/
 
   ++info->itext_num;
-  info->itext_keys = new_keys;
-  info->itext_langtags = new_langtags;
-  info->itext_transkeys = new_transkeys;
-  info->itext_strings = new_strings;
 
   info->itext_keys[info->itext_num - 1] = alloc_string(key);
   info->itext_langtags[info->itext_num - 1] = alloc_string(langtag);
   info->itext_transkeys[info->itext_num - 1] = alloc_string(transkey);
-  info->itext_strings[info->itext_num - 1] = alloc_string(str);
+  info->itext_strings[info->itext_num - 1] = alloc_string_sized(str, size);
 
   return 0;
 }
 
+unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag,
+                           const char* transkey, const char* str) {
+  return lodepng_add_itext_sized(info, key, langtag, transkey, str, lodepng_strlen(str));
+}
+
 /* same as set but does not delete */
 static unsigned lodepng_assign_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size) {
   if(profile_size == 0) return 100; /*invalid ICC profile size*/
@@ -3017,7 +3069,7 @@
 
 unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source) {
   lodepng_info_cleanup(dest);
-  *dest = *source;
+  lodepng_memcpy(dest, source, sizeof(LodePNGInfo));
   lodepng_color_mode_init(&dest->color);
   CERROR_TRY_RETURN(lodepng_color_mode_copy(&dest->color, &source->color));
 
@@ -3061,8 +3113,7 @@
 };
 
 static void color_tree_init(ColorTree* tree) {
-  int i;
-  for(i = 0; i != 16; ++i) tree->children[i] = 0;
+  lodepng_memset(tree->children, 0, 16 * sizeof(*tree->children));
   tree->index = -1;
 }
 
@@ -3094,19 +3145,22 @@
 #endif /*LODEPNG_COMPILE_ENCODER*/
 
 /*color is not allowed to already exist.
-Index should be >= 0 (it's signed to be compatible with using -1 for "doesn't exist")*/
-static void color_tree_add(ColorTree* tree,
-                           unsigned char r, unsigned char g, unsigned char b, unsigned char a, unsigned index) {
+Index should be >= 0 (it's signed to be compatible with using -1 for "doesn't exist")
+Returns error code, or 0 if ok*/
+static unsigned color_tree_add(ColorTree* tree,
+                               unsigned char r, unsigned char g, unsigned char b, unsigned char a, unsigned index) {
   int bit;
   for(bit = 0; bit < 8; ++bit) {
     int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1);
     if(!tree->children[i]) {
       tree->children[i] = (ColorTree*)lodepng_malloc(sizeof(ColorTree));
+      if(!tree->children[i]) return 83; /*alloc fail*/
       color_tree_init(tree->children[i]);
     }
     tree = tree->children[i];
   }
   tree->index = (int)index;
+  return 0;
 }
 
 /*put a pixel, given its RGBA color, into image of any color type*/
@@ -3119,7 +3173,7 @@
     else if(mode->bitdepth == 16) out[i * 2 + 0] = out[i * 2 + 1] = gray;
     else {
       /*take the most significant bits of gray*/
-      gray = (gray >> (8 - mode->bitdepth)) & ((1 << mode->bitdepth) - 1);
+      gray = ((unsigned)gray >> (8u - mode->bitdepth)) & ((1u << mode->bitdepth) - 1u);
       addColorBits(out, i, mode->bitdepth, gray);
     }
   } else if(mode->colortype == LCT_RGB) {
@@ -3481,7 +3535,7 @@
 
   if(lodepng_color_mode_equal(mode_out, mode_in)) {
     size_t numbytes = lodepng_get_raw_size(w, h, mode_in);
-    for(i = 0; i != numbytes; ++i) out[i] = in[i];
+    lodepng_memcpy(out, in, numbytes);
     return 0;
   }
 
@@ -3498,9 +3552,9 @@
       /*if the input was also palette with same bitdepth, then the color types are also
       equal, so copy literally. This to preserve the exact indices that were in the PNG
       even in case there are duplicate colors in the palette.*/
-      if (mode_in->colortype == LCT_PALETTE && mode_in->bitdepth == mode_out->bitdepth) {
+      if(mode_in->colortype == LCT_PALETTE && mode_in->bitdepth == mode_out->bitdepth) {
         size_t numbytes = lodepng_get_raw_size(w, h, mode_in);
-        for(i = 0; i != numbytes; ++i) out[i] = in[i];
+        lodepng_memcpy(out, in, numbytes);
         return 0;
       }
     }
@@ -3508,26 +3562,29 @@
     color_tree_init(&tree);
     for(i = 0; i != palsize; ++i) {
       const unsigned char* p = &palette[i * 4];
-      color_tree_add(&tree, p[0], p[1], p[2], p[3], (unsigned)i);
+      error = color_tree_add(&tree, p[0], p[1], p[2], p[3], (unsigned)i);
+      if(error) break;
     }
   }
 
-  if(mode_in->bitdepth == 16 && mode_out->bitdepth == 16) {
-    for(i = 0; i != numpixels; ++i) {
-      unsigned short r = 0, g = 0, b = 0, a = 0;
-      getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
-      rgba16ToPixel(out, i, mode_out, r, g, b, a);
-    }
-  } else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGBA) {
-    getPixelColorsRGBA8(out, numpixels, in, mode_in);
-  } else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGB) {
-    getPixelColorsRGB8(out, numpixels, in, mode_in);
-  } else {
-    unsigned char r = 0, g = 0, b = 0, a = 0;
-    for(i = 0; i != numpixels; ++i) {
-      getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
-      error = rgba8ToPixel(out, i, mode_out, &tree, r, g, b, a);
-      if (error) break;
+  if(!error) {
+    if(mode_in->bitdepth == 16 && mode_out->bitdepth == 16) {
+      for(i = 0; i != numpixels; ++i) {
+        unsigned short r = 0, g = 0, b = 0, a = 0;
+        getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
+        rgba16ToPixel(out, i, mode_out, r, g, b, a);
+      }
+    } else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGBA) {
+      getPixelColorsRGBA8(out, numpixels, in, mode_in);
+    } else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGB) {
+      getPixelColorsRGB8(out, numpixels, in, mode_in);
+    } else {
+      unsigned char r = 0, g = 0, b = 0, a = 0;
+      for(i = 0; i != numpixels; ++i) {
+        getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in);
+        error = rgba8ToPixel(out, i, mode_out, &tree, r, g, b, a);
+        if(error) break;
+      }
     }
   }
 
@@ -3632,12 +3689,13 @@
 }
 
 /*stats must already have been inited. */
-void lodepng_compute_color_stats(LodePNGColorStats* stats,
-                                 const unsigned char* in, unsigned w, unsigned h,
-                                 const LodePNGColorMode* mode_in) {
+unsigned lodepng_compute_color_stats(LodePNGColorStats* stats,
+                                     const unsigned char* in, unsigned w, unsigned h,
+                                     const LodePNGColorMode* mode_in) {
   size_t i;
   ColorTree tree;
   size_t numpixels = (size_t)w * (size_t)h;
+  unsigned error = 0;
 
   /* mark things as done already if it would be impossible to have a more expensive case */
   unsigned colored_done = lodepng_is_greyscale_type(mode_in) ? 1 : 0;
@@ -3667,13 +3725,14 @@
   if(!numcolors_done) {
     for(i = 0; i < stats->numcolors; i++) {
       const unsigned char* color = &stats->palette[i * 4];
-      color_tree_add(&tree, color[0], color[1], color[2], color[3], i);
+      error = color_tree_add(&tree, color[0], color[1], color[2], color[3], i);
+      if(error) goto cleanup;
     }
   }
 
   /*Check if the 16-bit input is truly 16-bit*/
   if(mode_in->bitdepth == 16 && !sixteen) {
-    unsigned short r, g, b, a;
+    unsigned short r = 0, g = 0, b = 0, a = 0;
     for(i = 0; i != numpixels; ++i) {
       getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in);
       if((r & 255) != ((r >> 8) & 255) || (g & 255) != ((g >> 8) & 255) ||
@@ -3771,7 +3830,8 @@
 
       if(!numcolors_done) {
         if(!color_tree_has(&tree, r, g, b, a)) {
-          color_tree_add(&tree, r, g, b, a, stats->numcolors);
+          error = color_tree_add(&tree, r, g, b, a, stats->numcolors);
+          if(error) goto cleanup;
           if(stats->numcolors < 256) {
             unsigned char* p = stats->palette;
             unsigned n = stats->numcolors;
@@ -3807,15 +3867,18 @@
     stats->key_b += (stats->key_b << 8);
   }
 
+cleanup:
   color_tree_cleanup(&tree);
+  return error;
 }
 
 #ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
 /*Adds a single color to the color stats. The stats must already have been inited. The color must be given as 16-bit
 (with 2 bytes repeating for 8-bit and 65535 for opaque alpha channel). This function is expensive, do not call it for
 all pixels of an image but only for a few additional values. */
-static void lodepng_color_stats_add(LodePNGColorStats* stats,
-                                    unsigned r, unsigned g, unsigned b, unsigned a) {
+static unsigned lodepng_color_stats_add(LodePNGColorStats* stats,
+                                        unsigned r, unsigned g, unsigned b, unsigned a) {
+  unsigned error = 0;
   unsigned char image[8];
   LodePNGColorMode mode;
   lodepng_color_mode_init(&mode);
@@ -3823,14 +3886,22 @@
   image[4] = b >> 8; image[5] = b; image[6] = a >> 8; image[7] = a;
   mode.bitdepth = 16;
   mode.colortype = LCT_RGBA;
-  lodepng_compute_color_stats(stats, image, 1, 1, &mode);
+  error = lodepng_compute_color_stats(stats, image, 1, 1, &mode);
   lodepng_color_mode_cleanup(&mode);
+  return error;
 }
 #endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
 
-unsigned auto_choose_color(LodePNGColorMode* mode_out,
-                           const LodePNGColorMode* mode_in,
-                           const LodePNGColorStats* stats) {
+/*Computes a minimal PNG color model that can contain all colors as indicated by the stats.
+The stats should be computed with lodepng_compute_color_stats.
+mode_in is raw color profile of the image the stats were computed on, to copy palette order from when relevant.
+Minimal PNG color model means the color type and bit depth that gives smallest amount of bits in the output image,
+e.g. gray if only grayscale pixels, palette if less than 256 colors, color key if only single transparent color, ...
+This is used if auto_convert is enabled (it is by default).
+*/
+static unsigned auto_choose_color(LodePNGColorMode* mode_out,
+                                  const LodePNGColorMode* mode_in,
+                                  const LodePNGColorStats* stats) {
   unsigned error = 0;
   unsigned palettebits;
   size_t i, n;
@@ -4119,7 +4190,7 @@
         }
       }
       break;
-    default: return 36; /*error: nonexistent filter type given*/
+    default: return 36; /*error: invalid filter type given*/
   }
   return 0;
 }
@@ -4138,7 +4209,8 @@
 
   /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
   size_t bytewidth = (bpp + 7u) / 8u;
-  size_t linebytes = (w * bpp + 7u) / 8u;
+  /*the width of a scanline in bytes, not including the filter type*/
+  size_t linebytes = lodepng_get_raw_size_idat(w, 1, bpp) - 1u;
 
   for(y = 0; y < h; ++y) {
     size_t outindex = linebytes * y;
@@ -4178,7 +4250,8 @@
       for(y = 0; y < passh[i]; ++y)
       for(x = 0; x < passw[i]; ++x) {
         size_t pixelinstart = passstart[i] + (y * passw[i] + x) * bytewidth;
-        size_t pixeloutstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth;
+        size_t pixeloutstart = ((ADAM7_IY[i] + (size_t)y * ADAM7_DY[i]) * (size_t)w
+                             + ADAM7_IX[i] + (size_t)x * ADAM7_DX[i]) * bytewidth;
         for(b = 0; b < bytewidth; ++b) {
           out[pixeloutstart + b] = in[pixelinstart + b];
         }
@@ -4193,7 +4266,7 @@
       for(y = 0; y < passh[i]; ++y)
       for(x = 0; x < passw[i]; ++x) {
         ibp = (8 * passstart[i]) + (y * ilinebits + x * bpp);
-        obp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp;
+        obp = (ADAM7_IY[i] + (size_t)y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + (size_t)x * ADAM7_DX[i]) * bpp;
         for(b = 0; b < bpp; ++b) {
           unsigned char bit = readBitFromReversedStream(&ibp, in);
           setBitOfReversedStream(&obp, out, bit);
@@ -4358,7 +4431,6 @@
 static unsigned readChunk_tEXt(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) {
   unsigned error = 0;
   char *key = 0, *str = 0;
-  unsigned i;
 
   while(!error) /*not really a while loop, only used to break on error*/ {
     unsigned length, string2_begin;
@@ -4372,8 +4444,8 @@
     key = (char*)lodepng_malloc(length + 1);
     if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
 
+    lodepng_memcpy(key, data, length);
     key[length] = 0;
-    for(i = 0; i != length; ++i) key[i] = (char)data[i];
 
     string2_begin = length + 1; /*skip keyword null terminator*/
 
@@ -4381,8 +4453,8 @@
     str = (char*)lodepng_malloc(length + 1);
     if(!str) CERROR_BREAK(error, 83); /*alloc fail*/
 
+    lodepng_memcpy(str, data + string2_begin, length);
     str[length] = 0;
-    for(i = 0; i != length; ++i) str[i] = (char)data[string2_begin + i];
 
     error = lodepng_add_text(info, key, str);
 
@@ -4396,16 +4468,17 @@
 }
 
 /*compressed text chunk (zTXt)*/
-static unsigned readChunk_zTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
+static unsigned readChunk_zTXt(LodePNGInfo* info, const LodePNGDecoderSettings* decoder,
                                const unsigned char* data, size_t chunkLength) {
   unsigned error = 0;
-  unsigned i;
+
+  /*copy the object to change parameters in it*/
+  LodePNGDecompressSettings zlibsettings = decoder->zlibsettings;
 
   unsigned length, string2_begin;
   char *key = 0;
-  ucvector decoded;
-
-  ucvector_init(&decoded);
+  unsigned char* str = 0;
+  size_t size = 0;
 
   while(!error) /*not really a while loop, only used to break on error*/ {
     for(length = 0; length < chunkLength && data[length] != 0; ++length) ;
@@ -4415,8 +4488,8 @@
     key = (char*)lodepng_malloc(length + 1);
     if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
 
+    lodepng_memcpy(key, data, length);
     key[length] = 0;
-    for(i = 0; i != length; ++i) key[i] = (char)data[i];
 
     if(data[length + 1] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/
 
@@ -4424,34 +4497,34 @@
     if(string2_begin > chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/
 
     length = (unsigned)chunkLength - string2_begin;
+    zlibsettings.max_output_size = decoder->max_text_size;
     /*will fail if zlib error, e.g. if length is too small*/
-    error = zlib_decompress(&decoded.data, &decoded.size,
-                            &data[string2_begin],
-                            length, zlibsettings);
+    error = zlib_decompress(&str, &size, 0, &data[string2_begin],
+                            length, &zlibsettings);
+    /*error: compressed text larger than  decoder->max_text_size*/
+    if(error && size > zlibsettings.max_output_size) error = 112;
     if(error) break;
-    ucvector_push_back(&decoded, 0);
-
-    error = lodepng_add_text(info, key, (char*)decoded.data);
-
+    error = lodepng_add_text_sized(info, key, (char*)str, size);
     break;
   }
 
   lodepng_free(key);
-  ucvector_cleanup(&decoded);
+  lodepng_free(str);
 
   return error;
 }
 
 /*international text chunk (iTXt)*/
-static unsigned readChunk_iTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
+static unsigned readChunk_iTXt(LodePNGInfo* info, const LodePNGDecoderSettings* decoder,
                                const unsigned char* data, size_t chunkLength) {
   unsigned error = 0;
   unsigned i;
 
+  /*copy the object to change parameters in it*/
+  LodePNGDecompressSettings zlibsettings = decoder->zlibsettings;
+
   unsigned length, begin, compressed;
   char *key = 0, *langtag = 0, *transkey = 0;
-  ucvector decoded;
-  ucvector_init(&decoded); /* TODO: only use in case of compressed text */
 
   while(!error) /*not really a while loop, only used to break on error*/ {
     /*Quick check if the chunk length isn't too small. Even without check
@@ -4466,8 +4539,8 @@
     key = (char*)lodepng_malloc(length + 1);
     if(!key) CERROR_BREAK(error, 83); /*alloc fail*/
 
+    lodepng_memcpy(key, data, length);
     key[length] = 0;
-    for(i = 0; i != length; ++i) key[i] = (char)data[i];
 
     /*read the compression method*/
     compressed = data[length + 1];
@@ -4484,8 +4557,8 @@
     langtag = (char*)lodepng_malloc(length + 1);
     if(!langtag) CERROR_BREAK(error, 83); /*alloc fail*/
 
+    lodepng_memcpy(langtag, data + begin, length);
     langtag[length] = 0;
-    for(i = 0; i != length; ++i) langtag[i] = (char)data[begin + i];
 
     /*read the transkey*/
     begin += length + 1;
@@ -4495,8 +4568,8 @@
     transkey = (char*)lodepng_malloc(length + 1);
     if(!transkey) CERROR_BREAK(error, 83); /*alloc fail*/
 
+    lodepng_memcpy(transkey, data + begin, length);
     transkey[length] = 0;
-    for(i = 0; i != length; ++i) transkey[i] = (char)data[begin + i];
 
     /*read the actual text*/
     begin += length + 1;
@@ -4504,29 +4577,26 @@
     length = (unsigned)chunkLength < begin ? 0 : (unsigned)chunkLength - begin;
 
     if(compressed) {
+      unsigned char* str = 0;
+      size_t size = 0;
+      zlibsettings.max_output_size = decoder->max_text_size;
       /*will fail if zlib error, e.g. if length is too small*/
-      error = zlib_decompress(&decoded.data, &decoded.size,
-                              &data[begin],
-                              length, zlibsettings);
-      if(error) break;
-      if(decoded.allocsize < decoded.size) decoded.allocsize = decoded.size;
-      ucvector_push_back(&decoded, 0);
+      error = zlib_decompress(&str, &size, 0, &data[begin],
+                              length, &zlibsettings);
+      /*error: compressed text larger than  decoder->max_text_size*/
+      if(error && size > zlibsettings.max_output_size) error = 112;
+      if(!error) error = lodepng_add_itext_sized(info, key, langtag, transkey, (char*)str, size);
+      lodepng_free(str);
     } else {
-      if(!ucvector_resize(&decoded, length + 1)) CERROR_BREAK(error, 83 /*alloc fail*/);
-
-      decoded.data[length] = 0;
-      for(i = 0; i != length; ++i) decoded.data[i] = data[begin + i];
+      error = lodepng_add_itext_sized(info, key, langtag, transkey, (char*)(data + begin), length);
     }
 
-    error = lodepng_add_itext(info, key, langtag, transkey, (char*)decoded.data);
-
     break;
   }
 
   lodepng_free(key);
   lodepng_free(langtag);
   lodepng_free(transkey);
-  ucvector_cleanup(&decoded);
 
   return error;
 }
@@ -4590,13 +4660,15 @@
   return 0; /* OK */
 }
 
-static unsigned readChunk_iCCP(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings,
+static unsigned readChunk_iCCP(LodePNGInfo* info, const LodePNGDecoderSettings* decoder,
                                const unsigned char* data, size_t chunkLength) {
   unsigned error = 0;
   unsigned i;
+  size_t size = 0;
+  /*copy the object to change parameters in it*/
+  LodePNGDecompressSettings zlibsettings = decoder->zlibsettings;
 
   unsigned length, string2_begin;
-  ucvector decoded;
 
   info->iccp_defined = 1;
   if(info->iccp_name) lodepng_clear_icc(info);
@@ -4617,24 +4689,14 @@
   if(string2_begin > chunkLength) return 75; /*no null termination, corrupt?*/
 
   length = (unsigned)chunkLength - string2_begin;
-  ucvector_init(&decoded);
-  error = zlib_decompress(&decoded.data, &decoded.size,
+  zlibsettings.max_output_size = decoder->max_icc_size;
+  error = zlib_decompress(&info->iccp_profile, &size, 0,
                           &data[string2_begin],
-                          length, zlibsettings);
-  if(!error) {
-    if(decoded.size) {
-      info->iccp_profile_size = decoded.size;
-      info->iccp_profile = (unsigned char*)lodepng_malloc(decoded.size);
-      if(info->iccp_profile) {
-        lodepng_memcpy(info->iccp_profile, decoded.data, decoded.size);
-      } else {
-        error = 83; /* alloc fail */
-      }
-    } else {
-      error = 100; /*invalid ICC profile size*/
-    }
-  }
-  ucvector_cleanup(&decoded);
+                          length, &zlibsettings);
+  /*error: ICC profile larger than  decoder->max_icc_size*/
+  if(error && size > zlibsettings.max_output_size) error = 113;
+  info->iccp_profile_size = size;
+  if(!error && !info->iccp_profile_size) error = 100; /*invalid ICC profile size*/
   return error;
 }
 #endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
@@ -4647,7 +4709,7 @@
   unsigned unhandled = 0;
   unsigned error = 0;
 
-  if (pos + 4 > insize) return 30;
+  if(pos + 4 > insize) return 30;
   chunkLength = lodepng_chunk_length(chunk);
   if(chunkLength > 2147483647) return 63;
   data = lodepng_chunk_data_const(chunk);
@@ -4663,9 +4725,9 @@
   } else if(lodepng_chunk_type_equals(chunk, "tEXt")) {
     error = readChunk_tEXt(&state->info_png, data, chunkLength);
   } else if(lodepng_chunk_type_equals(chunk, "zTXt")) {
-    error = readChunk_zTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+    error = readChunk_zTXt(&state->info_png, &state->decoder, data, chunkLength);
   } else if(lodepng_chunk_type_equals(chunk, "iTXt")) {
-    error = readChunk_iTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+    error = readChunk_iTXt(&state->info_png, &state->decoder, data, chunkLength);
   } else if(lodepng_chunk_type_equals(chunk, "tIME")) {
     error = readChunk_tIME(&state->info_png, data, chunkLength);
   } else if(lodepng_chunk_type_equals(chunk, "pHYs")) {
@@ -4677,7 +4739,7 @@
   } else if(lodepng_chunk_type_equals(chunk, "sRGB")) {
     error = readChunk_sRGB(&state->info_png, data, chunkLength);
   } else if(lodepng_chunk_type_equals(chunk, "iCCP")) {
-    error = readChunk_iCCP(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+    error = readChunk_iCCP(&state->info_png, &state->decoder, data, chunkLength);
 #endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
   } else {
     /* unhandled chunk is ok (is not an error) */
@@ -4697,8 +4759,8 @@
                           const unsigned char* in, size_t insize) {
   unsigned char IEND = 0;
   const unsigned char* chunk;
-  size_t i;
-  ucvector idat; /*the data from idat chunks*/
+  unsigned char* idat; /*the data from idat chunks, zlib compressed*/
+  size_t idatsize = 0;
   unsigned char* scanlines = 0;
   size_t scanlines_size = 0, expected_size = 0;
   size_t outsize = 0;
@@ -4721,7 +4783,10 @@
     CERROR_RETURN(state->error, 92); /*overflow possible due to amount of pixels*/
   }
 
-  ucvector_init(&idat);
+  /*the input filesize is a safe upper bound for the sum of idat chunks size*/
+  idat = (unsigned char*)lodepng_malloc(insize);
+  if(!idat) CERROR_RETURN(state->error, 83); /*alloc fail*/
+
   chunk = &in[33]; /*first byte of the first chunk after the header*/
 
   /*loop through the chunks, ignoring unknown chunks and stopping at IEND chunk.
@@ -4754,11 +4819,11 @@
 
     /*IDAT chunk, containing compressed image data*/
     if(lodepng_chunk_type_equals(chunk, "IDAT")) {
-      size_t oldsize = idat.size;
       size_t newsize;
-      if(lodepng_addofl(oldsize, chunkLength, &newsize)) CERROR_BREAK(state->error, 95);
-      if(!ucvector_resize(&idat, newsize)) CERROR_BREAK(state->error, 83 /*alloc fail*/);
-      for(i = 0; i != chunkLength; ++i) idat.data[oldsize + i] = data[i];
+      if(lodepng_addofl(idatsize, chunkLength, &newsize)) CERROR_BREAK(state->error, 95);
+      if(newsize > insize) CERROR_BREAK(state->error, 95);
+      lodepng_memcpy(idat + idatsize, data, chunkLength);
+      idatsize += chunkLength;
 #ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
       critical_pos = 3;
 #endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
@@ -4792,13 +4857,13 @@
     } else if(lodepng_chunk_type_equals(chunk, "zTXt")) {
       /*compressed text chunk (zTXt)*/
       if(state->decoder.read_text_chunks) {
-        state->error = readChunk_zTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+        state->error = readChunk_zTXt(&state->info_png, &state->decoder, data, chunkLength);
         if(state->error) break;
       }
     } else if(lodepng_chunk_type_equals(chunk, "iTXt")) {
       /*international text chunk (iTXt)*/
       if(state->decoder.read_text_chunks) {
-        state->error = readChunk_iTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+        state->error = readChunk_iTXt(&state->info_png, &state->decoder, data, chunkLength);
         if(state->error) break;
       }
     } else if(lodepng_chunk_type_equals(chunk, "tIME")) {
@@ -4817,7 +4882,7 @@
       state->error = readChunk_sRGB(&state->info_png, data, chunkLength);
       if(state->error) break;
     } else if(lodepng_chunk_type_equals(chunk, "iCCP")) {
-      state->error = readChunk_iCCP(&state->info_png, &state->decoder.zlibsettings, data, chunkLength);
+      state->error = readChunk_iCCP(&state->info_png, &state->decoder, data, chunkLength);
       if(state->error) break;
 #endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
     } else /*it's not an implemented chunk type, so ignore it: skip over the data*/ {
@@ -4840,45 +4905,36 @@
       if(lodepng_chunk_check_crc(chunk)) CERROR_BREAK(state->error, 57); /*invalid CRC*/
     }
 
-    if(!IEND) chunk = lodepng_chunk_next_const(chunk);
+    if(!IEND) chunk = lodepng_chunk_next_const(chunk, in + insize);
   }
 
-  if (state->info_png.color.colortype == LCT_PALETTE
-      && !state->info_png.color.palette) {
+  if(!state->error && state->info_png.color.colortype == LCT_PALETTE && !state->info_png.color.palette) {
     state->error = 106; /* error: PNG file must have PLTE chunk if color type is palette */
   }
 
-  /*predict output size, to allocate exact size for output buffer to avoid more dynamic allocation.
-  If the decompressed size does not match the prediction, the image must be corrupt.*/
-  if(state->info_png.interlace_method == 0) {
-    expected_size = lodepng_get_raw_size_idat(*w, *h, &state->info_png.color);
-  } else {
-    /*Adam-7 interlaced: expected size is the sum of the 7 sub-images sizes*/
-    const LodePNGColorMode* color = &state->info_png.color;
-    expected_size = 0;
-    expected_size += lodepng_get_raw_size_idat((*w + 7) >> 3, (*h + 7) >> 3, color);
-    if(*w > 4) expected_size += lodepng_get_raw_size_idat((*w + 3) >> 3, (*h + 7) >> 3, color);
-    expected_size += lodepng_get_raw_size_idat((*w + 3) >> 2, (*h + 3) >> 3, color);
-    if(*w > 2) expected_size += lodepng_get_raw_size_idat((*w + 1) >> 2, (*h + 3) >> 2, color);
-    expected_size += lodepng_get_raw_size_idat((*w + 1) >> 1, (*h + 1) >> 2, color);
-    if(*w > 1) expected_size += lodepng_get_raw_size_idat((*w + 0) >> 1, (*h + 1) >> 1, color);
-    expected_size += lodepng_get_raw_size_idat((*w + 0), (*h + 0) >> 1, color);
-  }
   if(!state->error) {
-    /* This allocated data will be realloced by zlib_decompress, initially at
-    smaller size again. But the fact that it's already allocated at full size
-    here speeds the multiple reallocs up. TODO: make zlib_decompress support
-    receiving already allocated buffer with expected size instead. */
-    scanlines = (unsigned char*)lodepng_malloc(expected_size);
-    if(!scanlines) state->error = 83; /*alloc fail*/
-    scanlines_size = 0;
+    /*predict output size, to allocate exact size for output buffer to avoid more dynamic allocation.
+    If the decompressed size does not match the prediction, the image must be corrupt.*/
+    if(state->info_png.interlace_method == 0) {
+      size_t bpp = lodepng_get_bpp(&state->info_png.color);
+      expected_size = lodepng_get_raw_size_idat(*w, *h, bpp);
+    } else {
+      size_t bpp = lodepng_get_bpp(&state->info_png.color);
+      /*Adam-7 interlaced: expected size is the sum of the 7 sub-images sizes*/
+      expected_size = 0;
+      expected_size += lodepng_get_raw_size_idat((*w + 7) >> 3, (*h + 7) >> 3, bpp);
+      if(*w > 4) expected_size += lodepng_get_raw_size_idat((*w + 3) >> 3, (*h + 7) >> 3, bpp);
+      expected_size += lodepng_get_raw_size_idat((*w + 3) >> 2, (*h + 3) >> 3, bpp);
+      if(*w > 2) expected_size += lodepng_get_raw_size_idat((*w + 1) >> 2, (*h + 3) >> 2, bpp);
+      expected_size += lodepng_get_raw_size_idat((*w + 1) >> 1, (*h + 1) >> 2, bpp);
+      if(*w > 1) expected_size += lodepng_get_raw_size_idat((*w + 0) >> 1, (*h + 1) >> 1, bpp);
+      expected_size += lodepng_get_raw_size_idat((*w + 0), (*h + 0) >> 1, bpp);
+    }
+
+    state->error = zlib_decompress(&scanlines, &scanlines_size, expected_size, idat, idatsize, &state->decoder.zlibsettings);
   }
-  if(!state->error) {
-    state->error = zlib_decompress(&scanlines, &scanlines_size, idat.data,
-                                   idat.size, &state->decoder.zlibsettings);
-    if(!state->error && scanlines_size != expected_size) state->error = 91; /*decompressed size doesn't match prediction*/
-  }
-  ucvector_cleanup(&idat);
+  if(!state->error && scanlines_size != expected_size) state->error = 91; /*decompressed size doesn't match prediction*/
+  lodepng_free(idat);
 
   if(!state->error) {
     outsize = lodepng_get_raw_size(*w, *h, &state->info_png.color);
@@ -4886,7 +4942,7 @@
     if(!*out) state->error = 83; /*alloc fail*/
   }
   if(!state->error) {
-    for(i = 0; i < outsize; i++) (*out)[i] = 0;
+    lodepng_memset(*out, 0, outsize);
     state->error = postProcessScanlines(*out, scanlines, *w, *h, &state->info_png);
   }
   lodepng_free(scanlines);
@@ -4936,6 +4992,11 @@
   lodepng_state_init(&state);
   state.info_raw.colortype = colortype;
   state.info_raw.bitdepth = bitdepth;
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+  /*disable reading things that this function doesn't output*/
+  state.decoder.read_text_chunks = 0;
+  state.decoder.remember_unknown_chunks = 0;
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
   error = lodepng_decode(out, w, h, &state, in, insize);
   lodepng_state_cleanup(&state);
   return error;
@@ -4978,6 +5039,8 @@
 #ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
   settings->read_text_chunks = 1;
   settings->remember_unknown_chunks = 0;
+  settings->max_text_size = 16777216;
+  settings->max_icc_size = 16777216; /* 16MB is much more than enough for any reasonable ICC profile */
 #endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
   settings->ignore_crc = 0;
   settings->ignore_critical = 0;
@@ -5023,28 +5086,21 @@
 /* / PNG Encoder                                                            / */
 /* ////////////////////////////////////////////////////////////////////////// */
 
-/*chunkName must be string of 4 characters*/
-static unsigned addChunk(ucvector* out, const char* chunkName, const unsigned char* data, size_t length) {
-  CERROR_TRY_RETURN(lodepng_chunk_create(&out->data, &out->size, (unsigned)length, chunkName, data));
-  out->allocsize = out->size; /*fix the allocsize again*/
-  return 0;
-}
 
-static void writeSignature(ucvector* out) {
+static unsigned writeSignature(ucvector* out) {
+  size_t pos = out->size;
+  const unsigned char signature[] = {137, 80, 78, 71, 13, 10, 26, 10};
   /*8 bytes PNG signature, aka the magic bytes*/
-  ucvector_push_back(out, 137);
-  ucvector_push_back(out, 80);
-  ucvector_push_back(out, 78);
-  ucvector_push_back(out, 71);
-  ucvector_push_back(out, 13);
-  ucvector_push_back(out, 10);
-  ucvector_push_back(out, 26);
-  ucvector_push_back(out, 10);
+  if(!ucvector_resize(out, out->size + 8)) return 83; /*alloc fail*/
+  lodepng_memcpy(out->data + pos, signature, 8);
+  return 0;
 }
 
 static unsigned addChunk_IHDR(ucvector* out, unsigned w, unsigned h,
                               LodePNGColorType colortype, unsigned bitdepth, unsigned interlace_method) {
-  unsigned char data[13];
+  unsigned char *chunk, *data;
+  CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 13, "IHDR"));
+  data = chunk + 8;
 
   lodepng_set32bitInt(data + 0, w); /*width*/
   lodepng_set32bitInt(data + 4, h); /*height*/
@@ -5054,244 +5110,267 @@
   data[11] = 0; /*filter method*/
   data[12] = interlace_method; /*interlace method*/
 
-  return addChunk(out, "IHDR", data, sizeof(data));
+  lodepng_chunk_generate_crc(chunk);
+  return 0;
 }
 
+/* only adds the chunk if needed (there is a key or palette with alpha) */
 static unsigned addChunk_PLTE(ucvector* out, const LodePNGColorMode* info) {
-  unsigned error = 0;
-  size_t i;
-  ucvector PLTE;
-  ucvector_init(&PLTE);
-  for(i = 0; i != info->palettesize * 4; ++i) {
-    /*add all channels except alpha channel*/
-    if(i % 4 != 3) ucvector_push_back(&PLTE, info->palette[i]);
-  }
-  error = addChunk(out, "PLTE", PLTE.data, PLTE.size);
-  ucvector_cleanup(&PLTE);
+  unsigned char* chunk;
+  size_t i, j = 8;
 
-  return error;
+  CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, info->palettesize * 3, "PLTE"));
+
+  for(i = 0; i != info->palettesize; ++i) {
+    /*add all channels except alpha channel*/
+    chunk[j++] = info->palette[i * 4 + 0];
+    chunk[j++] = info->palette[i * 4 + 1];
+    chunk[j++] = info->palette[i * 4 + 2];
+  }
+
+  lodepng_chunk_generate_crc(chunk);
+  return 0;
 }
 
 static unsigned addChunk_tRNS(ucvector* out, const LodePNGColorMode* info) {
-  unsigned error = 0;
-  size_t i;
-  ucvector tRNS;
-  ucvector_init(&tRNS);
+  unsigned char* chunk = 0;
+
   if(info->colortype == LCT_PALETTE) {
-    size_t amount = info->palettesize;
+    size_t i, amount = info->palettesize;
     /*the tail of palette values that all have 255 as alpha, does not have to be encoded*/
     for(i = info->palettesize; i != 0; --i) {
-      if(info->palette[4 * (i - 1) + 3] == 255) --amount;
-      else break;
+      if(info->palette[4 * (i - 1) + 3] != 255) break;
+      --amount;
     }
-    /*add only alpha channel*/
-    for(i = 0; i != amount; ++i) ucvector_push_back(&tRNS, info->palette[4 * i + 3]);
+    if(amount) {
+      CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, amount, "tRNS"));
+      /*add the alpha channel values from the palette*/
+      for(i = 0; i != amount; ++i) chunk[8 + i] = info->palette[4 * i + 3];
+    }
   } else if(info->colortype == LCT_GREY) {
     if(info->key_defined) {
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255));
+      CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 2, "tRNS"));
+      chunk[8] = (unsigned char)(info->key_r >> 8);
+      chunk[9] = (unsigned char)(info->key_r & 255);
     }
   } else if(info->colortype == LCT_RGB) {
     if(info->key_defined) {
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_g >> 8));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_g & 255));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_b >> 8));
-      ucvector_push_back(&tRNS, (unsigned char)(info->key_b & 255));
+      CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 6, "tRNS"));
+      chunk[8] = (unsigned char)(info->key_r >> 8);
+      chunk[9] = (unsigned char)(info->key_r & 255);
+      chunk[10] = (unsigned char)(info->key_g >> 8);
+      chunk[11] = (unsigned char)(info->key_g & 255);
+      chunk[12] = (unsigned char)(info->key_b >> 8);
+      chunk[13] = (unsigned char)(info->key_b & 255);
     }
   }
 
-  error = addChunk(out, "tRNS", tRNS.data, tRNS.size);
-  ucvector_cleanup(&tRNS);
-
-  return error;
+  if(chunk) lodepng_chunk_generate_crc(chunk);
+  return 0;
 }
 
 static unsigned addChunk_IDAT(ucvector* out, const unsigned char* data, size_t datasize,
                               LodePNGCompressSettings* zlibsettings) {
-  ucvector zlibdata;
   unsigned error = 0;
+  unsigned char* zlib = 0;
+  size_t zlibsize = 0;
 
-  /*compress with the Zlib compressor*/
-  ucvector_init(&zlibdata);
-  error = zlib_compress(&zlibdata.data, &zlibdata.size, data, datasize, zlibsettings);
-  if(!error) error = addChunk(out, "IDAT", zlibdata.data, zlibdata.size);
-  ucvector_cleanup(&zlibdata);
-
+  error = zlib_compress(&zlib, &zlibsize, data, datasize, zlibsettings);
+  if(!error) {
+    error = lodepng_chunk_createv(out, zlibsize, "IDAT", zlib);
+  }
+  lodepng_free(zlib);
   return error;
 }
 
 static unsigned addChunk_IEND(ucvector* out) {
-  unsigned error = 0;
-  error = addChunk(out, "IEND", 0, 0);
-  return error;
+  return lodepng_chunk_createv(out, 0, "IEND", 0);
 }
 
 #ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
 
 static unsigned addChunk_tEXt(ucvector* out, const char* keyword, const char* textstring) {
-  unsigned error = 0;
-  size_t i;
-  ucvector text;
-  ucvector_init(&text);
-  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)keyword[i]);
-  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
-  ucvector_push_back(&text, 0); /*0 termination char*/
-  for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)textstring[i]);
-  error = addChunk(out, "tEXt", text.data, text.size);
-  ucvector_cleanup(&text);
-
-  return error;
+  unsigned char* chunk = 0;
+  size_t keysize = lodepng_strlen(keyword), textsize = lodepng_strlen(textstring);
+  size_t size = keysize + 1 + textsize;
+  if(keysize < 1 || keysize > 79) return 89; /*error: invalid keyword size*/
+  CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, size, "tEXt"));
+  lodepng_memcpy(chunk + 8, keyword, keysize);
+  chunk[8 + keysize] = 0; /*null termination char*/
+  lodepng_memcpy(chunk + 9 + keysize, textstring, textsize);
+  lodepng_chunk_generate_crc(chunk);
+  return 0;
 }
 
 static unsigned addChunk_zTXt(ucvector* out, const char* keyword, const char* textstring,
                               LodePNGCompressSettings* zlibsettings) {
   unsigned error = 0;
-  ucvector data, compressed;
-  size_t i, textsize = lodepng_strlen(textstring);
+  unsigned char* chunk = 0;
+  unsigned char* compressed = 0;
+  size_t compressedsize = 0;
+  size_t textsize = lodepng_strlen(textstring);
+  size_t keysize = lodepng_strlen(keyword);
+  if(keysize < 1 || keysize > 79) return 89; /*error: invalid keyword size*/
 
-  ucvector_init(&data);
-  ucvector_init(&compressed);
-  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]);
-  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
-  ucvector_push_back(&data, 0); /*0 termination char*/
-  ucvector_push_back(&data, 0); /*compression method: 0*/
-
-  error = zlib_compress(&compressed.data, &compressed.size,
+  error = zlib_compress(&compressed, &compressedsize,
                         (const unsigned char*)textstring, textsize, zlibsettings);
   if(!error) {
-    for(i = 0; i != compressed.size; ++i) ucvector_push_back(&data, compressed.data[i]);
-    error = addChunk(out, "zTXt", data.data, data.size);
+    size_t size = keysize + 2 + compressedsize;
+    error = lodepng_chunk_init(&chunk, out, size, "zTXt");
+  }
+  if(!error) {
+    lodepng_memcpy(chunk + 8, keyword, keysize);
+    chunk[8 + keysize] = 0; /*null termination char*/
+    chunk[9 + keysize] = 0; /*compression method: 0*/
+    lodepng_memcpy(chunk + 10 + keysize, compressed, compressedsize);
+    lodepng_chunk_generate_crc(chunk);
   }
 
-  ucvector_cleanup(&compressed);
-  ucvector_cleanup(&data);
+  lodepng_free(compressed);
   return error;
 }
 
-static unsigned addChunk_iTXt(ucvector* out, unsigned compressed, const char* keyword, const char* langtag,
+static unsigned addChunk_iTXt(ucvector* out, unsigned compress, const char* keyword, const char* langtag,
                               const char* transkey, const char* textstring, LodePNGCompressSettings* zlibsettings) {
   unsigned error = 0;
-  ucvector data;
-  size_t i, textsize = lodepng_strlen(textstring);
+  unsigned char* chunk = 0;
+  unsigned char* compressed = 0;
+  size_t compressedsize = 0;
+  size_t textsize = lodepng_strlen(textstring);
+  size_t keysize = lodepng_strlen(keyword), langsize = lodepng_strlen(langtag), transsize = lodepng_strlen(transkey);
 
-  ucvector_init(&data);
+  if(keysize < 1 || keysize > 79) return 89; /*error: invalid keyword size*/
 
-  for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]);
-  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
-  ucvector_push_back(&data, 0); /*null termination char*/
-  ucvector_push_back(&data, compressed ? 1 : 0); /*compression flag*/
-  ucvector_push_back(&data, 0); /*compression method*/
-  for(i = 0; langtag[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)langtag[i]);
-  ucvector_push_back(&data, 0); /*null termination char*/
-  for(i = 0; transkey[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)transkey[i]);
-  ucvector_push_back(&data, 0); /*null termination char*/
-
-  if(compressed) {
-    ucvector compressed_data;
-    ucvector_init(&compressed_data);
-    error = zlib_compress(&compressed_data.data, &compressed_data.size,
+  if(compress) {
+    error = zlib_compress(&compressed, &compressedsize,
                           (const unsigned char*)textstring, textsize, zlibsettings);
-    if(!error) {
-      for(i = 0; i != compressed_data.size; ++i) ucvector_push_back(&data, compressed_data.data[i]);
+  }
+  if(!error) {
+    size_t size = keysize + 3 + langsize + 1 + transsize + 1 + (compress ? compressedsize : textsize);
+    error = lodepng_chunk_init(&chunk, out, size, "iTXt");
+  }
+  if(!error) {
+    size_t pos = 8;
+    lodepng_memcpy(chunk + pos, keyword, keysize);
+    pos += keysize;
+    chunk[pos++] = 0; /*null termination char*/
+    chunk[pos++] = (compress ? 1 : 0); /*compression flag*/
+    chunk[pos++] = 0; /*compression method: 0*/
+    lodepng_memcpy(chunk + pos, langtag, langsize);
+    pos += langsize;
+    chunk[pos++] = 0; /*null termination char*/
+    lodepng_memcpy(chunk + pos, transkey, transsize);
+    pos += transsize;
+    chunk[pos++] = 0; /*null termination char*/
+    if(compress) {
+      lodepng_memcpy(chunk + pos, compressed, compressedsize);
+    } else {
+      lodepng_memcpy(chunk + pos, textstring, textsize);
     }
-    ucvector_cleanup(&compressed_data);
-  } else /*not compressed*/ {
-    for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)textstring[i]);
+    lodepng_chunk_generate_crc(chunk);
   }
 
-  if(!error) error = addChunk(out, "iTXt", data.data, data.size);
-  ucvector_cleanup(&data);
+  lodepng_free(compressed);
   return error;
 }
 
 static unsigned addChunk_bKGD(ucvector* out, const LodePNGInfo* info) {
-  unsigned char data[6];
-  size_t size = 0;
+  unsigned char* chunk = 0;
   if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA) {
-    data[0] = (unsigned char)(info->background_r >> 8);
-    data[1] = (unsigned char)(info->background_r & 255);
-    size = 2;
+    CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 2, "bKGD"));
+    chunk[8] = (unsigned char)(info->background_r >> 8);
+    chunk[9] = (unsigned char)(info->background_r & 255);
   } else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA) {
-    data[0] = (unsigned char)(info->background_r >> 8);
-    data[1] = (unsigned char)(info->background_r & 255);
-    data[2] = (unsigned char)(info->background_g >> 8);
-    data[3] = (unsigned char)(info->background_g & 255);
-    data[4] = (unsigned char)(info->background_b >> 8);
-    data[5] = (unsigned char)(info->background_b & 255);
-    size = 6;
+    CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 6, "bKGD"));
+    chunk[8] = (unsigned char)(info->background_r >> 8);
+    chunk[9] = (unsigned char)(info->background_r & 255);
+    chunk[10] = (unsigned char)(info->background_g >> 8);
+    chunk[11] = (unsigned char)(info->background_g & 255);
+    chunk[12] = (unsigned char)(info->background_b >> 8);
+    chunk[13] = (unsigned char)(info->background_b & 255);
   } else if(info->color.colortype == LCT_PALETTE) {
-    data[0] =(unsigned char)(info->background_r & 255); /*palette index*/
-    size = 1;
+    CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 1, "bKGD"));
+    chunk[8] = (unsigned char)(info->background_r & 255); /*palette index*/
   }
-  return addChunk(out, "bKGD", data, size);
+  if(chunk) lodepng_chunk_generate_crc(chunk);
+  return 0;
 }
 
 static unsigned addChunk_tIME(ucvector* out, const LodePNGTime* time) {
-  unsigned char data[7];
-  data[0] = (unsigned char)(time->year >> 8);
-  data[1] = (unsigned char)(time->year & 255);
-  data[2] = (unsigned char)time->month;
-  data[3] = (unsigned char)time->day;
-  data[4] = (unsigned char)time->hour;
-  data[5] = (unsigned char)time->minute;
-  data[6] = (unsigned char)time->second;
-  return addChunk(out, "tIME", data, sizeof(data));
+  unsigned char* chunk;
+  CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 7, "tIME"));
+  chunk[8] = (unsigned char)(time->year >> 8);
+  chunk[9] = (unsigned char)(time->year & 255);
+  chunk[10] = (unsigned char)time->month;
+  chunk[11] = (unsigned char)time->day;
+  chunk[12] = (unsigned char)time->hour;
+  chunk[13] = (unsigned char)time->minute;
+  chunk[14] = (unsigned char)time->second;
+  lodepng_chunk_generate_crc(chunk);
+  return 0;
 }
 
 static unsigned addChunk_pHYs(ucvector* out, const LodePNGInfo* info) {
-  unsigned char data[9];
-  lodepng_set32bitInt(data + 0, info->phys_x);
-  lodepng_set32bitInt(data + 4, info->phys_y);  data[8] = info->phys_unit;
-  return addChunk(out, "pHYs", data, sizeof(data));
+  unsigned char* chunk;
+  CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 9, "pHYs"));
+  lodepng_set32bitInt(chunk + 8, info->phys_x);
+  lodepng_set32bitInt(chunk + 12, info->phys_y);
+  chunk[16] = info->phys_unit;
+  lodepng_chunk_generate_crc(chunk);
+  return 0;
 }
 
 static unsigned addChunk_gAMA(ucvector* out, const LodePNGInfo* info) {
-  unsigned char data[4];
-  lodepng_set32bitInt(data, info->gama_gamma);
-  return addChunk(out, "gAMA", data, sizeof(data));
+  unsigned char* chunk;
+  CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 4, "gAMA"));
+  lodepng_set32bitInt(chunk + 8, info->gama_gamma);
+  lodepng_chunk_generate_crc(chunk);
+  return 0;
 }
 
 static unsigned addChunk_cHRM(ucvector* out, const LodePNGInfo* info) {
-  unsigned char data[32];
-  lodepng_set32bitInt(data + 0, info->chrm_white_x);
-  lodepng_set32bitInt(data + 4, info->chrm_white_y);
-  lodepng_set32bitInt(data + 8, info->chrm_red_x);
-  lodepng_set32bitInt(data + 12, info->chrm_red_y);
-  lodepng_set32bitInt(data + 16, info->chrm_green_x);
-  lodepng_set32bitInt(data + 20, info->chrm_green_y);
-  lodepng_set32bitInt(data + 24, info->chrm_blue_x);
-  lodepng_set32bitInt(data + 28, info->chrm_blue_y);
-  return addChunk(out, "cHRM", data, sizeof(data));
+  unsigned char* chunk;
+  CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 32, "cHRM"));
+  lodepng_set32bitInt(chunk + 8, info->chrm_white_x);
+  lodepng_set32bitInt(chunk + 12, info->chrm_white_y);
+  lodepng_set32bitInt(chunk + 16, info->chrm_red_x);
+  lodepng_set32bitInt(chunk + 20, info->chrm_red_y);
+  lodepng_set32bitInt(chunk + 24, info->chrm_green_x);
+  lodepng_set32bitInt(chunk + 28, info->chrm_green_y);
+  lodepng_set32bitInt(chunk + 32, info->chrm_blue_x);
+  lodepng_set32bitInt(chunk + 36, info->chrm_blue_y);
+  lodepng_chunk_generate_crc(chunk);
+  return 0;
 }
 
 static unsigned addChunk_sRGB(ucvector* out, const LodePNGInfo* info) {
   unsigned char data = info->srgb_intent;
-  return addChunk(out, "sRGB", &data, 1);
+  return lodepng_chunk_createv(out, 1, "sRGB", &data);
 }
 
 static unsigned addChunk_iCCP(ucvector* out, const LodePNGInfo* info, LodePNGCompressSettings* zlibsettings) {
   unsigned error = 0;
-  ucvector data, compressed;
-  size_t i;
+  unsigned char* chunk = 0;
+  unsigned char* compressed = 0;
+  size_t compressedsize = 0;
+  size_t keysize = lodepng_strlen(info->iccp_name);
 
-  ucvector_init(&data);
-  ucvector_init(&compressed);
-  for(i = 0; info->iccp_name[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)info->iccp_name[i]);
-  if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/
-  ucvector_push_back(&data, 0); /*0 termination char*/
-  ucvector_push_back(&data, 0); /*compression method: 0*/
-
-  error = zlib_compress(&compressed.data, &compressed.size,
+  if(keysize < 1 || keysize > 79) return 89; /*error: invalid keyword size*/
+  error = zlib_compress(&compressed, &compressedsize,
                         info->iccp_profile, info->iccp_profile_size, zlibsettings);
   if(!error) {
-    for(i = 0; i != compressed.size; ++i) ucvector_push_back(&data, compressed.data[i]);
-    error = addChunk(out, "iCCP", data.data, data.size);
+    size_t size = keysize + 2 + compressedsize;
+    error = lodepng_chunk_init(&chunk, out, size, "iCCP");
+  }
+  if(!error) {
+    lodepng_memcpy(chunk + 8, info->iccp_name, keysize);
+    chunk[8 + keysize] = 0; /*null termination char*/
+    chunk[9 + keysize] = 0; /*compression method: 0*/
+    lodepng_memcpy(chunk + 10 + keysize, compressed, compressedsize);
+    lodepng_chunk_generate_crc(chunk);
   }
 
-  ucvector_cleanup(&compressed);
-  ucvector_cleanup(&data);
+  lodepng_free(compressed);
   return error;
 }
 
@@ -5337,17 +5416,18 @@
         for(i = bytewidth; i < length; ++i) out[i] = (scanline[i] - scanline[i - bytewidth]);
       }
       break;
-    default: return; /*nonexistent filter type given*/
+    default: return; /*invalid filter type given*/
   }
 }
 
-/* integer binary logarithm */
+/* integer binary logarithm, max return value is 31 */
 static size_t ilog2(size_t i) {
   size_t result = 0;
-  while(i >= 65536) { result += 16; i >>= 16; }
-  while(i >= 256) { result += 8; i >>= 8; }
-  while(i >= 16) { result += 4; i >>= 4; }
-  while(i >= 2) { result += 1; i >>= 1; }
+  if(i >= 65536) { result += 16; i >>= 16; }
+  if(i >= 256) { result += 8; i >>= 8; }
+  if(i >= 16) { result += 4; i >>= 4; }
+  if(i >= 4) { result += 2; i >>= 2; }
+  if(i >= 2) { result += 1; /*i >>= 1;*/ }
   return result;
 }
 
@@ -5362,16 +5442,17 @@
 }
 
 static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h,
-                       const LodePNGColorMode* info, const LodePNGEncoderSettings* settings) {
+                       const LodePNGColorMode* color, const LodePNGEncoderSettings* settings) {
   /*
   For PNG filter method 0
   out must be a buffer with as size: h + (w * h * bpp + 7u) / 8u, because there are
   the scanlines with 1 extra byte per scanline
   */
 
-  unsigned bpp = lodepng_get_bpp(info);
+  unsigned bpp = lodepng_get_bpp(color);
   /*the width of a scanline in bytes, not including the filter type*/
-  size_t linebytes = (w * bpp + 7u) / 8u;
+  size_t linebytes = lodepng_get_raw_size_idat(w, 1, bpp) - 1u;
+
   /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/
   size_t bytewidth = (bpp + 7u) / 8u;
   const unsigned char* prevline = 0;
@@ -5393,7 +5474,7 @@
   heuristic is used.
   */
   if(settings->filter_palette_zero &&
-     (info->colortype == LCT_PALETTE || info->bitdepth < 8)) strategy = LFS_ZERO;
+     (color->colortype == LCT_PALETTE || color->bitdepth < 8)) strategy = LFS_ZERO;
 
   if(bpp == 0) return 31; /*error: invalid color type*/
 
@@ -5414,7 +5495,7 @@
 
     for(type = 0; type != 5; ++type) {
       attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
-      if(!attempt[type]) return 83; /*alloc fail*/
+      if(!attempt[type]) error = 83; /*alloc fail*/
     }
 
     if(!error) {
@@ -5461,32 +5542,34 @@
 
     for(type = 0; type != 5; ++type) {
       attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
-      if(!attempt[type]) return 83; /*alloc fail*/
+      if(!attempt[type]) error = 83; /*alloc fail*/
     }
 
-    for(y = 0; y != h; ++y) {
-      /*try the 5 filter types*/
-      for(type = 0; type != 5; ++type) {
-        size_t sum = 0;
-        filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
-        for(x = 0; x != 256; ++x) count[x] = 0;
-        for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]];
-        ++count[type]; /*the filter type itself is part of the scanline*/
-        for(x = 0; x != 256; ++x) {
-          sum += ilog2i(count[x]);
+    if(!error) {
+      for(y = 0; y != h; ++y) {
+        /*try the 5 filter types*/
+        for(type = 0; type != 5; ++type) {
+          size_t sum = 0;
+          filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
+          lodepng_memset(count, 0, 256 * sizeof(*count));
+          for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]];
+          ++count[type]; /*the filter type itself is part of the scanline*/
+          for(x = 0; x != 256; ++x) {
+            sum += ilog2i(count[x]);
+          }
+          /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
+          if(type == 0 || sum > bestSum) {
+            bestType = type;
+            bestSum = sum;
+          }
         }
-        /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/
-        if(type == 0 || sum > bestSum) {
-          bestType = type;
-          bestSum = sum;
-        }
+
+        prevline = &in[y * linebytes];
+
+        /*now fill the out values*/
+        out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
+        for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
       }
-
-      prevline = &in[y * linebytes];
-
-      /*now fill the out values*/
-      out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
-      for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
     }
 
     for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
@@ -5508,7 +5591,8 @@
     size_t smallest = 0;
     unsigned type = 0, bestType = 0;
     unsigned char* dummy;
-    LodePNGCompressSettings zlibsettings = settings->zlibsettings;
+    LodePNGCompressSettings zlibsettings;
+    lodepng_memcpy(&zlibsettings, &settings->zlibsettings, sizeof(LodePNGCompressSettings));
     /*use fixed tree on the attempts so that the tree is not adapted to the filtertype on purpose,
     to simulate the true case where the tree is the same for the whole image. Sometimes it gives
     better result with dynamic tree anyway. Using the fixed tree sometimes gives worse, but in rare
@@ -5520,27 +5604,29 @@
     zlibsettings.custom_deflate = 0;
     for(type = 0; type != 5; ++type) {
       attempt[type] = (unsigned char*)lodepng_malloc(linebytes);
-      if(!attempt[type]) return 83; /*alloc fail*/
+      if(!attempt[type]) error = 83; /*alloc fail*/
     }
-    for(y = 0; y != h; ++y) /*try the 5 filter types*/ {
-      for(type = 0; type != 5; ++type) {
-        unsigned testsize = (unsigned)linebytes;
-        /*if(testsize > 8) testsize /= 8;*/ /*it already works good enough by testing a part of the row*/
+    if(!error) {
+      for(y = 0; y != h; ++y) /*try the 5 filter types*/ {
+        for(type = 0; type != 5; ++type) {
+          unsigned testsize = (unsigned)linebytes;
+          /*if(testsize > 8) testsize /= 8;*/ /*it already works good enough by testing a part of the row*/
 
-        filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
-        size[type] = 0;
-        dummy = 0;
-        zlib_compress(&dummy, &size[type], attempt[type], testsize, &zlibsettings);
-        lodepng_free(dummy);
-        /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/
-        if(type == 0 || size[type] < smallest) {
-          bestType = type;
-          smallest = size[type];
+          filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type);
+          size[type] = 0;
+          dummy = 0;
+          zlib_compress(&dummy, &size[type], attempt[type], testsize, &zlibsettings);
+          lodepng_free(dummy);
+          /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/
+          if(type == 0 || size[type] < smallest) {
+            bestType = type;
+            smallest = size[type];
+          }
         }
+        prevline = &in[y * linebytes];
+        out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
+        for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
       }
-      prevline = &in[y * linebytes];
-      out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/
-      for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x];
     }
     for(type = 0; type != 5; ++type) lodepng_free(attempt[type]);
   }
@@ -5693,36 +5779,13 @@
   return error;
 }
 
-/*
-palette must have 4 * palettesize bytes allocated, and given in format RGBARGBARGBARGBA...
-returns 0 if the palette is opaque,
-returns 1 if the palette has a single color with alpha 0 ==> color key
-returns 2 if the palette is semi-translucent.
-*/
-static unsigned getPaletteTranslucency(const unsigned char* palette, size_t palettesize) {
-  size_t i;
-  unsigned key = 0;
-  unsigned r = 0, g = 0, b = 0; /*the value of the color with alpha 0, so long as color keying is possible*/
-  for(i = 0; i != palettesize; ++i) {
-    if(!key && palette[4 * i + 3] == 0) {
-      r = palette[4 * i + 0]; g = palette[4 * i + 1]; b = palette[4 * i + 2];
-      key = 1;
-      i = (size_t)(-1); /*restart from beginning, to detect earlier opaque colors with key's value*/
-    }
-    else if(palette[4 * i + 3] != 255) return 2;
-    /*when key, no opaque RGB may have key's RGB*/
-    else if(key && r == palette[i * 4 + 0] && g == palette[i * 4 + 1] && b == palette[i * 4 + 2]) return 2;
-  }
-  return key;
-}
-
 #ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
 static unsigned addUnknownChunks(ucvector* out, unsigned char* data, size_t datasize) {
   unsigned char* inchunk = data;
   while((size_t)(inchunk - data) < datasize) {
     CERROR_TRY_RETURN(lodepng_chunk_append(&out->data, &out->size, inchunk));
     out->allocsize = out->size; /*fix the allocsize again*/
-    inchunk = lodepng_chunk_next(inchunk);
+    inchunk = lodepng_chunk_next(inchunk, data + datasize);
   }
   return 0;
 }
@@ -5753,11 +5816,10 @@
                         LodePNGState* state) {
   unsigned char* data = 0; /*uncompressed version of the IDAT chunk data*/
   size_t datasize = 0;
-  ucvector outv;
+  ucvector outv = ucvector_init(NULL, 0);
   LodePNGInfo info;
   const LodePNGInfo* info_png = &state->info_png;
 
-  ucvector_init(&outv);
   lodepng_info_init(&info);
 
   /*provide some proper output values if error will happen*/
@@ -5772,17 +5834,17 @@
     goto cleanup;
   }
   if(state->encoder.zlibsettings.btype > 2) {
-    state->error = 61; /*error: nonexistent btype*/
+    state->error = 61; /*error: invalid btype*/
     goto cleanup;
   }
   if(info_png->interlace_method > 1) {
-    state->error = 71; /*error: nonexistent interlace mode*/
+    state->error = 71; /*error: invalid interlace mode*/
     goto cleanup;
   }
   state->error = checkColorValidity(info_png->color.colortype, info_png->color.bitdepth);
-  if(state->error) goto cleanup; /*error: nonexistent color type given*/
+  if(state->error) goto cleanup; /*error: invalid color type given*/
   state->error = checkColorValidity(state->info_raw.colortype, state->info_raw.bitdepth);
-  if(state->error) goto cleanup; /*error: nonexistent color type given*/
+  if(state->error) goto cleanup; /*error: invalid color type given*/
 
   /* color convert and compute scanline filter types */
   lodepng_info_copy(&info, &state->info_png);
@@ -5802,14 +5864,16 @@
       stats.allow_greyscale = 0;
     }
 #endif /* LODEPNG_COMPILE_ANCILLARY_CHUNKS */
-    lodepng_compute_color_stats(&stats, image, w, h, &state->info_raw);
+    state->error = lodepng_compute_color_stats(&stats, image, w, h, &state->info_raw);
+    if(state->error) goto cleanup;
 #ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
     if(info_png->background_defined) {
       /*the background chunk's color must be taken into account as well*/
       unsigned r = 0, g = 0, b = 0;
       LodePNGColorMode mode16 = lodepng_color_mode_make(LCT_RGB, 16);
       lodepng_convert_rgb(&r, &g, &b, info_png->background_r, info_png->background_g, info_png->background_b, &mode16, &info_png->color);
-      lodepng_color_stats_add(&stats, r, g, b, 65535);
+      state->error = lodepng_color_stats_add(&stats, r, g, b, 65535);
+      if(state->error) goto cleanup;
     }
 #endif /* LODEPNG_COMPILE_ANCILLARY_CHUNKS */
     state->error = auto_choose_color(&info.color, &state->info_raw, &stats);
@@ -5851,20 +5915,26 @@
     if(!state->error) {
       state->error = lodepng_convert(converted, image, &info.color, &state->info_raw, w, h);
     }
-    if(!state->error) preProcessScanlines(&data, &datasize, converted, w, h, &info, &state->encoder);
+    if(!state->error) {
+      state->error = preProcessScanlines(&data, &datasize, converted, w, h, &info, &state->encoder);
+    }
     lodepng_free(converted);
     if(state->error) goto cleanup;
+  } else {
+    state->error = preProcessScanlines(&data, &datasize, image, w, h, &info, &state->encoder);
+    if(state->error) goto cleanup;
   }
-  else preProcessScanlines(&data, &datasize, image, w, h, &info, &state->encoder);
 
   /* output all PNG chunks */ {
 #ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
     size_t i;
 #endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
     /*write signature and chunks*/
-    writeSignature(&outv);
+    state->error = writeSignature(&outv);
+    if(state->error) goto cleanup;
     /*IHDR*/
-    addChunk_IHDR(&outv, w, h, info.color.colortype, info.color.bitdepth, info.interlace_method);
+    state->error = addChunk_IHDR(&outv, w, h, info.color.colortype, info.color.bitdepth, info.interlace_method);
+    if(state->error) goto cleanup;
 #ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
     /*unknown chunks between IHDR and PLTE*/
     if(info.unknown_chunks_data[0]) {
@@ -5872,25 +5942,36 @@
       if(state->error) goto cleanup;
     }
     /*color profile chunks must come before PLTE */
-    if(info.iccp_defined) addChunk_iCCP(&outv, &info, &state->encoder.zlibsettings);
-    if(info.srgb_defined) addChunk_sRGB(&outv, &info);
-    if(info.gama_defined) addChunk_gAMA(&outv, &info);
-    if(info.chrm_defined) addChunk_cHRM(&outv, &info);
+    if(info.iccp_defined) {
+      state->error = addChunk_iCCP(&outv, &info, &state->encoder.zlibsettings);
+      if(state->error) goto cleanup;
+    }
+    if(info.srgb_defined) {
+      state->error = addChunk_sRGB(&outv, &info);
+      if(state->error) goto cleanup;
+    }
+    if(info.gama_defined) {
+      state->error = addChunk_gAMA(&outv, &info);
+      if(state->error) goto cleanup;
+    }
+    if(info.chrm_defined) {
+      state->error = addChunk_cHRM(&outv, &info);
+      if(state->error) goto cleanup;
+    }
 #endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
     /*PLTE*/
     if(info.color.colortype == LCT_PALETTE) {
-      addChunk_PLTE(&outv, &info.color);
+      state->error = addChunk_PLTE(&outv, &info.color);
+      if(state->error) goto cleanup;
     }
     if(state->encoder.force_palette && (info.color.colortype == LCT_RGB || info.color.colortype == LCT_RGBA)) {
-      addChunk_PLTE(&outv, &info.color);
+      /*force_palette means: write suggested palette for truecolor in PLTE chunk*/
+      state->error = addChunk_PLTE(&outv, &info.color);
+      if(state->error) goto cleanup;
     }
-    /*tRNS*/
-    if(info.color.colortype == LCT_PALETTE && getPaletteTranslucency(info.color.palette, info.color.palettesize) != 0) {
-      addChunk_tRNS(&outv, &info.color);
-    }
-    if((info.color.colortype == LCT_GREY || info.color.colortype == LCT_RGB) && info.color.key_defined) {
-      addChunk_tRNS(&outv, &info.color);
-    }
+    /*tRNS (this will only add if when necessary) */
+    state->error = addChunk_tRNS(&outv, &info.color);
+    if(state->error) goto cleanup;
 #ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
     /*bKGD (must come between PLTE and the IDAt chunks*/
     if(info.background_defined) {
@@ -5898,7 +5979,10 @@
       if(state->error) goto cleanup;
     }
     /*pHYs (must come before the IDAT chunks)*/
-    if(info.phys_defined) addChunk_pHYs(&outv, &info);
+    if(info.phys_defined) {
+      state->error = addChunk_pHYs(&outv, &info);
+      if(state->error) goto cleanup;
+    }
 
     /*unknown chunks between PLTE and IDAT*/
     if(info.unknown_chunks_data[1]) {
@@ -5911,7 +5995,10 @@
     if(state->error) goto cleanup;
 #ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
     /*tIME*/
-    if(info.time_defined) addChunk_tIME(&outv, &info.time);
+    if(info.time_defined) {
+      state->error = addChunk_tIME(&outv, &info.time);
+      if(state->error) goto cleanup;
+    }
     /*tEXt and/or zTXt*/
     for(i = 0; i != info.text_num; ++i) {
       if(lodepng_strlen(info.text_keys[i]) > 79) {
@@ -5923,9 +6010,11 @@
         goto cleanup;
       }
       if(state->encoder.text_compression) {
-        addChunk_zTXt(&outv, info.text_keys[i], info.text_strings[i], &state->encoder.zlibsettings);
+        state->error = addChunk_zTXt(&outv, info.text_keys[i], info.text_strings[i], &state->encoder.zlibsettings);
+        if(state->error) goto cleanup;
       } else {
-        addChunk_tEXt(&outv, info.text_keys[i], info.text_strings[i]);
+        state->error = addChunk_tEXt(&outv, info.text_keys[i], info.text_strings[i]);
+        if(state->error) goto cleanup;
       }
     }
     /*LodePNG version id in text chunk*/
@@ -5941,7 +6030,8 @@
         }
       }
       if(already_added_id_text == 0) {
-        addChunk_tEXt(&outv, "LodePNG", LODEPNG_VERSION_STRING); /*it's shorter as tEXt than as zTXt chunk*/
+        state->error = addChunk_tEXt(&outv, "LodePNG", LODEPNG_VERSION_STRING); /*it's shorter as tEXt than as zTXt chunk*/
+        if(state->error) goto cleanup;
       }
     }
     /*iTXt*/
@@ -5954,9 +6044,11 @@
         state->error = 67; /*text chunk too small*/
         goto cleanup;
       }
-      addChunk_iTXt(&outv, state->encoder.text_compression,
-                    info.itext_keys[i], info.itext_langtags[i], info.itext_transkeys[i], info.itext_strings[i],
-                    &state->encoder.zlibsettings);
+      state->error = addChunk_iTXt(
+          &outv, state->encoder.text_compression,
+          info.itext_keys[i], info.itext_langtags[i], info.itext_transkeys[i], info.itext_strings[i],
+          &state->encoder.zlibsettings);
+      if(state->error) goto cleanup;
     }
 
     /*unknown chunks between IDAT and IEND*/
@@ -5965,7 +6057,8 @@
       if(state->error) goto cleanup;
     }
 #endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
-    addChunk_IEND(&outv);
+    state->error = addChunk_IEND(&outv);
+    if(state->error) goto cleanup;
   }
 
 cleanup:
@@ -6053,7 +6146,7 @@
     case 14: return "problem while processing dynamic deflate block";
     case 15: return "problem while processing dynamic deflate block";
     /*this error could happen if there are only 0 or 1 symbols present in the huffman code:*/
-    case 16: return "nonexistent code while processing dynamic deflate block";
+    case 16: return "invalid code while processing dynamic deflate block";
     case 17: return "end of out buffer memory reached while inflating";
     case 18: return "invalid distance code while inflating";
     case 19: return "end of out buffer memory reached while inflating";
@@ -6116,8 +6209,8 @@
     case 67: return "the length of a text chunk keyword given to the encoder is smaller than the minimum of 1 byte";
     case 68: return "tried to encode a PLTE chunk with a palette that has less than 1 or more than 256 colors";
     case 69: return "unknown chunk type with 'critical' flag encountered by the decoder";
-    case 71: return "nonexistent interlace mode given to encoder (must be 0 or 1)";
-    case 72: return "while decoding, nonexistent compression method encountering in zTXt or iTXt chunk (it must be 0)";
+    case 71: return "invalid interlace mode given to encoder (must be 0 or 1)";
+    case 72: return "while decoding, invalid compression method encountering in zTXt or iTXt chunk (it must be 0)";
     case 73: return "invalid tIME chunk size";
     case 74: return "invalid pHYs chunk size";
     /*length could be wrong, or data chopped off*/
@@ -6155,6 +6248,16 @@
     case 106: return "PNG file must have PLTE chunk if color type is palette";
     case 107: return "color convert from palette mode requested without setting the palette data in it";
     case 108: return "tried to add more than 256 values to a palette";
+    /*this limit can be configured in LodePNGDecompressSettings*/
+    case 109: return "tried to decompress zlib or deflate data larger than desired max_output_size";
+    case 110: return "custom zlib or inflate decompression failed";
+    case 111: return "custom zlib or deflate compression failed";
+    /*max text size limit can be configured in LodePNGDecoderSettings. This error prevents
+    unreasonable memory consumption when decoding due to impossibly large text sizes.*/
+    case 112: return "compressed text unreasonably large";
+    /*max ICC size limit can be configured in LodePNGDecoderSettings. This error prevents
+    unreasonable memory consumption when decoding due to impossibly large ICC profile*/
+    case 113: return "ICC profile unreasonably large";
   }
   return "unknown error code";
 }
@@ -6189,7 +6292,7 @@
                     const LodePNGDecompressSettings& settings) {
   unsigned char* buffer = 0;
   size_t buffersize = 0;
-  unsigned error = zlib_decompress(&buffer, &buffersize, in, insize, &settings);
+  unsigned error = zlib_decompress(&buffer, &buffersize, 0, in, insize, &settings);
   if(buffer) {
     out.insert(out.end(), &buffer[0], &buffer[buffersize]);
     lodepng_free(buffer);
@@ -6248,7 +6351,7 @@
 
 unsigned decode(std::vector<unsigned char>& out, unsigned& w, unsigned& h, const unsigned char* in,
                 size_t insize, LodePNGColorType colortype, unsigned bitdepth) {
-  unsigned char* buffer;
+  unsigned char* buffer = 0;
   unsigned error = lodepng_decode_memory(&buffer, &w, &h, in, insize, colortype, bitdepth);
   if(buffer && !error) {
     State state;
@@ -6256,8 +6359,8 @@
     state.info_raw.bitdepth = bitdepth;
     size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw);
     out.insert(out.end(), &buffer[0], &buffer[buffersize]);
-    lodepng_free(buffer);
   }
+  lodepng_free(buffer);
   return error;
 }
 
diff --git a/src/zopflipng/lodepng/lodepng.h b/src/zopflipng/lodepng/lodepng.h
index c5363a4..6801cb7 100644
--- a/src/zopflipng/lodepng/lodepng.h
+++ b/src/zopflipng/lodepng/lodepng.h
@@ -1,7 +1,7 @@
 /*
-LodePNG version 20191107
+LodePNG version 20201017
 
-Copyright (c) 2005-2019 Lode Vandevenne
+Copyright (c) 2005-2020 Lode Vandevenne
 
 This software is provided 'as-is', without any express or implied
 warranty. In no event will the authors be held liable for any damages
@@ -270,12 +270,21 @@
   unsigned ignore_adler32; /*if 1, continue and don't give an error message if the Adler32 checksum is corrupted*/
   unsigned ignore_nlen; /*ignore complement of len checksum in uncompressed blocks*/
 
-  /*use custom zlib decoder instead of built in one (default: null)*/
+  /*Maximum decompressed size, beyond this the decoder may (and is encouraged to) stop decoding,
+  return an error, output a data size > max_output_size and all the data up to that point. This is
+  not hard limit nor a guarantee, but can prevent excessive memory usage. This setting is
+  ignored by the PNG decoder, but is used by the deflate/zlib decoder and can be used by custom ones.
+  Set to 0 to impose no limit (the default).*/
+  size_t max_output_size;
+
+  /*use custom zlib decoder instead of built in one (default: null).
+  Should return 0 if success, any non-0 if error (numeric value not exposed).*/
   unsigned (*custom_zlib)(unsigned char**, size_t*,
                           const unsigned char*, size_t,
                           const LodePNGDecompressSettings*);
   /*use custom deflate decoder instead of built in one (default: null)
-  if custom_zlib is not null, custom_inflate is ignored (the zlib format uses deflate)*/
+  if custom_zlib is not null, custom_inflate is ignored (the zlib format uses deflate).
+  Should return 0 if success, any non-0 if error (numeric value not exposed).*/
   unsigned (*custom_inflate)(unsigned char**, size_t*,
                              const unsigned char*, size_t,
                              const LodePNGDecompressSettings*);
@@ -346,8 +355,8 @@
 
   The palette is only supported for color type 3.
   */
-  unsigned char* palette; /*palette in RGBARGBA... order. When allocated, must be either 0, or have size 1024*/
-  size_t palettesize; /*palette size in number of colors (amount of bytes is 4 * palettesize)*/
+  unsigned char* palette; /*palette in RGBARGBA... order. Must be either 0, or when allocated must have 1024 bytes*/
+  size_t palettesize; /*palette size in number of colors (amount of used bytes is 4 * palettesize)*/
 
   /*
   transparent color key (tRNS)
@@ -454,30 +463,36 @@
   unsigned background_b;       /*blue component of suggested background color*/
 
   /*
-  non-international text chunks (tEXt and zTXt)
+  Non-international text chunks (tEXt and zTXt)
 
   The char** arrays each contain num strings. The actual messages are in
   text_strings, while text_keys are keywords that give a short description what
   the actual text represents, e.g. Title, Author, Description, or anything else.
 
-  All the string fields below including keys, names and language tags are null terminated.
+  All the string fields below including strings, keys, names and language tags are null terminated.
   The PNG specification uses null characters for the keys, names and tags, and forbids null
   characters to appear in the main text which is why we can use null termination everywhere here.
 
-  A keyword is minimum 1 character and maximum 79 characters long. It's
-  discouraged to use a single line length longer than 79 characters for texts.
+  A keyword is minimum 1 character and maximum 79 characters long (plus the
+  additional null terminator). It's discouraged to use a single line length
+  longer than 79 characters for texts.
 
   Don't allocate these text buffers yourself. Use the init/cleanup functions
   correctly and use lodepng_add_text and lodepng_clear_text.
+
+  Standard text chunk keywords and strings are encoded using Latin-1.
   */
   size_t text_num; /*the amount of texts in these char** buffers (there may be more texts in itext)*/
   char** text_keys; /*the keyword of a text chunk (e.g. "Comment")*/
   char** text_strings; /*the actual text*/
 
   /*
-  international text chunks (iTXt)
+  International text chunks (iTXt)
   Similar to the non-international text chunks, but with additional strings
-  "langtags" and "transkeys".
+  "langtags" and "transkeys", and the following text encodings are used:
+  keys: Latin-1, langtags: ASCII, transkeys and strings: UTF-8.
+  keys must be 1-79 characters (plus the additional null terminator), the other
+  strings are any length.
   */
   size_t itext_num; /*the amount of international texts in this PNG*/
   char** itext_keys; /*the English keyword of the text chunk (e.g. "Comment")*/
@@ -639,8 +654,19 @@
 
 #ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
   unsigned read_text_chunks; /*if false but remember_unknown_chunks is true, they're stored in the unknown chunks*/
+
   /*store all bytes from unknown chunks in the LodePNGInfo (off by default, useful for a png editor)*/
   unsigned remember_unknown_chunks;
+
+  /* maximum size for decompressed text chunks. If a text chunk's text is larger than this, an error is returned,
+  unless reading text chunks is disabled or this limit is set higher or disabled. Set to 0 to allow any size.
+  By default it is a value that prevents unreasonably large strings from hogging memory. */
+  size_t max_text_size;
+
+  /* maximum size for compressed ICC chunks. If the ICC profile is larger than this, an error will be returned. Set to
+  0 to allow any size. By default this is a value that prevents ICC profiles that would be much larger than any
+  legitimate profile could be to hog memory. */
+  size_t max_icc_size;
 #endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
 } LodePNGDecoderSettings;
 
@@ -693,20 +719,11 @@
 
 void lodepng_color_stats_init(LodePNGColorStats* stats);
 
-/*Get a LodePNGColorStats of the image. The stats must already have been inited.*/
-void lodepng_compute_color_stats(LodePNGColorStats* stats,
-                                 const unsigned char* image, unsigned w, unsigned h,
-                                 const LodePNGColorMode* mode_in);
-/*Computes a minimal PNG color model that can contain all colors as indicated by the stats and it settings.
-The stats should be computed with lodepng_compute_color_stats.
-mode_in is raw color profile of the image the stats were computed on, to copy palette order from when relevant.
-Minimal PNG color model means the color type and bit depth that gives smallest amount of bits in the output image,
-e.g. gray if only grayscale pixels, palette if less than 256 colors, color key if only single transparent color, ...
-LodePNG uses this function internally if auto_convert is enabled (it is by default).
-*/
-unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out,
-                                   const LodePNGColorMode* mode_in,
-                                   const LodePNGColorMode* stats);
+/*Get a LodePNGColorStats of the image. The stats must already have been inited.
+Returns error code (e.g. alloc fail) or 0 if ok.*/
+unsigned lodepng_compute_color_stats(LodePNGColorStats* stats,
+                                     const unsigned char* image, unsigned w, unsigned h,
+                                     const LodePNGColorMode* mode_in);
 
 /*Settings for the encoder.*/
 typedef struct LodePNGEncoderSettings {
@@ -755,10 +772,6 @@
   LodePNGColorMode info_raw; /*specifies the format in which you would like to get the raw pixel buffer*/
   LodePNGInfo info_png; /*info of the PNG image obtained after decoding*/
   unsigned error;
-#ifdef LODEPNG_COMPILE_CPP
-  /* For the lodepng::State subclass. */
-  virtual ~LodePNGState(){}
-#endif
 } LodePNGState;
 
 /*init, cleanup and copy functions to use with this struct*/
@@ -870,32 +883,32 @@
 or the 8th byte of a PNG file which always has the first chunk), or alternatively may
 point to the first byte of the PNG file (which is not a chunk but the magic header, the
 function will then skip over it and return the first real chunk).
-Expects at least 8 readable bytes of memory in the input pointer.
-Will output pointer to the start of the next chunk or the end of the file if there
-is no more chunk after this. Start this process at the 8th byte of the PNG file.
+Will output pointer to the start of the next chunk, or at or beyond end of the file if there
+is no more chunk after this or possibly if the chunk is corrupt.
+Start this process at the 8th byte of the PNG file.
 In a non-corrupt PNG file, the last chunk should have name "IEND".
 */
-unsigned char* lodepng_chunk_next(unsigned char* chunk);
-const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk);
+unsigned char* lodepng_chunk_next(unsigned char* chunk, unsigned char* end);
+const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk, const unsigned char* end);
 
 /*Finds the first chunk with the given type in the range [chunk, end), or returns NULL if not found.*/
-unsigned char* lodepng_chunk_find(unsigned char* chunk, const unsigned char* end, const char type[5]);
+unsigned char* lodepng_chunk_find(unsigned char* chunk, unsigned char* end, const char type[5]);
 const unsigned char* lodepng_chunk_find_const(const unsigned char* chunk, const unsigned char* end, const char type[5]);
 
 /*
 Appends chunk to the data in out. The given chunk should already have its chunk header.
-The out variable and outlength are updated to reflect the new reallocated buffer.
+The out variable and outsize are updated to reflect the new reallocated buffer.
 Returns error code (0 if it went ok)
 */
-unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk);
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outsize, const unsigned char* chunk);
 
 /*
 Appends new chunk to out. The chunk to append is given by giving its length, type
 and data separately. The type is a 4-letter string.
-The out variable and outlength are updated to reflect the new reallocated buffer.
+The out variable and outsize are updated to reflect the new reallocated buffer.
 Returne error code (0 if it went ok)
 */
-unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outsize, unsigned length,
                               const char* type, const unsigned char* data);
 
 
@@ -985,7 +998,7 @@
   public:
     State();
     State(const State& other);
-    virtual ~State();
+    ~State();
     State& operator=(const State& other);
 };
 
@@ -1067,8 +1080,7 @@
 [ ] let the C++ wrapper catch exceptions coming from the standard library and return LodePNG error codes
 [ ] allow user to provide custom color conversion functions, e.g. for premultiplied alpha, padding bits or not, ...
 [ ] allow user to give data (void*) to custom allocator
-[ ] provide alternatives for C library functions not present on some platforms (memcpy, ...)
-[ ] rename "grey" to "gray" everywhere since "color" also uses US spelling (keep "grey" copies for backwards compatibility)
+[X] provide alternatives for C library functions not present on some platforms (memcpy, ...)
 */
 
 #endif /*LODEPNG_H inclusion guard*/
@@ -1519,6 +1531,11 @@
 
 Check the implementation of lodepng_error_text to see the meaning of each code.
 
+It is not recommended to use the numerical values to programmatically make
+different decisions based on error types as the numbers are not guaranteed to
+stay backwards compatible. They are for human consumption only. Programmatically
+only 0 or non-0 matter.
+
 
 8. chunks and PNG editing
 -------------------------
@@ -1584,12 +1601,12 @@
 functions do no boundary checking of the allocated data whatsoever, so make sure there is enough
 data available in the buffer to be able to go to the next chunk.
 
-unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk):
-unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outsize, const unsigned char* chunk):
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outsize, unsigned length,
                               const char* type, const unsigned char* data):
 
 These functions are used to create new chunks that are appended to the data in *out that has
-length *outlength. The append function appends an existing chunk to the new data. The create
+length *outsize. The append function appends an existing chunk to the new data. The create
 function creates a new chunk with the given parameters and appends it. Type is the 4-letter
 name of the chunk.
 
@@ -1789,14 +1806,19 @@
 Not all changes are listed here, the commit history in github lists more:
 https://github.com/lvandeve/lodepng
 
+*) 17 okt 2020: prevent decoding too large text/icc chunks by default.
+*) 06 mar 2020: simplified some of the dynamic memory allocations.
+*) 12 jan 2020: (!) added 'end' argument to lodepng_chunk_next to allow correct
+   overflow checks.
 *) 14 aug 2019: around 25% faster decoding thanks to huffman lookup tables.
-*) 15 jun 2019 (!): auto_choose_color API changed (for bugfix: don't use palette
-   if gray ICC profile) and non-ICC LodePNGColorProfile renamed to LodePNGColorStats.
+*) 15 jun 2019: (!) auto_choose_color API changed (for bugfix: don't use palette
+   if gray ICC profile) and non-ICC LodePNGColorProfile renamed to
+   LodePNGColorStats.
 *) 30 dec 2018: code style changes only: removed newlines before opening braces.
 *) 10 sep 2018: added way to inspect metadata chunks without full decoding.
-*) 19 aug 2018 (!): fixed color mode bKGD is encoded with and made it use
+*) 19 aug 2018: (!) fixed color mode bKGD is encoded with and made it use
    palette index in case of palette.
-*) 10 aug 2018 (!): added support for gAMA, cHRM, sRGB and iCCP chunks. This
+*) 10 aug 2018: (!) added support for gAMA, cHRM, sRGB and iCCP chunks. This
    change is backwards compatible unless you relied on unknown_chunks for those.
 *) 11 jun 2018: less restrictive check for pixel size integer overflow
 *) 14 jan 2018: allow optionally ignoring a few more recoverable errors
@@ -1816,25 +1838,25 @@
 *) 22 dec 2013: Power of two windowsize required for optimization.
 *) 15 apr 2013: Fixed bug with LAC_ALPHA and color key.
 *) 25 mar 2013: Added an optional feature to ignore some PNG errors (fix_png).
-*) 11 mar 2013 (!): Bugfix with custom free. Changed from "my" to "lodepng_"
+*) 11 mar 2013: (!) Bugfix with custom free. Changed from "my" to "lodepng_"
     prefix for the custom allocators and made it possible with a new #define to
     use custom ones in your project without needing to change lodepng's code.
 *) 28 jan 2013: Bugfix with color key.
 *) 27 okt 2012: Tweaks in text chunk keyword length error handling.
-*) 8 okt 2012 (!): Added new filter strategy (entropy) and new auto color mode.
+*) 8 okt 2012: (!) Added new filter strategy (entropy) and new auto color mode.
     (no palette). Better deflate tree encoding. New compression tweak settings.
     Faster color conversions while decoding. Some internal cleanups.
 *) 23 sep 2012: Reduced warnings in Visual Studio a little bit.
-*) 1 sep 2012 (!): Removed #define's for giving custom (de)compression functions
+*) 1 sep 2012: (!) Removed #define's for giving custom (de)compression functions
     and made it work with function pointers instead.
 *) 23 jun 2012: Added more filter strategies. Made it easier to use custom alloc
     and free functions and toggle #defines from compiler flags. Small fixes.
-*) 6 may 2012 (!): Made plugging in custom zlib/deflate functions more flexible.
-*) 22 apr 2012 (!): Made interface more consistent, renaming a lot. Removed
+*) 6 may 2012: (!) Made plugging in custom zlib/deflate functions more flexible.
+*) 22 apr 2012: (!) Made interface more consistent, renaming a lot. Removed
     redundant C++ codec classes. Reduced amount of structs. Everything changed,
     but it is cleaner now imho and functionality remains the same. Also fixed
     several bugs and shrunk the implementation code. Made new samples.
-*) 6 nov 2011 (!): By default, the encoder now automatically chooses the best
+*) 6 nov 2011: (!) By default, the encoder now automatically chooses the best
     PNG color model and bit depth, based on the amount and type of colors of the
     raw image. For this, autoLeaveOutAlphaChannel replaced by auto_choose_color.
 *) 9 okt 2011: simpler hash chain implementation for the encoder.
@@ -1843,7 +1865,7 @@
     A bug with the PNG filtertype heuristic was fixed, so that it chooses much
     better ones (it's quite significant). A setting to do an experimental, slow,
     brute force search for PNG filter types is added.
-*) 17 aug 2011 (!): changed some C zlib related function names.
+*) 17 aug 2011: (!) changed some C zlib related function names.
 *) 16 aug 2011: made the code less wide (max 120 characters per line).
 *) 17 apr 2011: code cleanup. Bugfixes. Convert low to 16-bit per sample colors.
 *) 21 feb 2011: fixed compiling for C90. Fixed compiling with sections disabled.
@@ -1951,5 +1973,5 @@
 Account: lode dot vandevenne.
 
 
-Copyright (c) 2005-2019 Lode Vandevenne
+Copyright (c) 2005-2020 Lode Vandevenne
 */
diff --git a/src/zopflipng/lodepng/lodepng_util.cpp b/src/zopflipng/lodepng/lodepng_util.cpp
index d42fcf0..574138a 100644
--- a/src/zopflipng/lodepng/lodepng_util.cpp
+++ b/src/zopflipng/lodepng/lodepng_util.cpp
@@ -1,7 +1,7 @@
 /*
 LodePNG Utils
 
-Copyright (c) 2005-2019 Lode Vandevenne
+Copyright (c) 2005-2020 Lode Vandevenne
 
 This software is provided 'as-is', without any express or implied
 warranty. In no event will the authors be held liable for any damages
@@ -39,11 +39,11 @@
 unsigned getChunkInfo(std::vector<std::string>& names, std::vector<size_t>& sizes,
                       const std::vector<unsigned char>& png) {
   // Listing chunks is based on the original file, not the decoded png info.
-  const unsigned char *chunk, *begin, *end, *next;
+  const unsigned char *chunk, *end;
   end = &png.back() + 1;
-  begin = chunk = &png.front() + 8;
+  chunk = &png.front() + 8;
 
-  while(chunk + 8 < end && chunk >= begin) {
+  while(chunk < end && end - chunk >= 8) {
     char type[5];
     lodepng_chunk_type(type, chunk);
     if(std::string(type).size() != 4) return 1;
@@ -51,11 +51,7 @@
     unsigned length = lodepng_chunk_length(chunk);
     names.push_back(type);
     sizes.push_back(length);
-    if(chunk + length + 12 > end) return 1;
-
-    next = lodepng_chunk_next_const(chunk);
-    if (next <= chunk) return 1; // integer overflow
-    chunk = next;
+    chunk = lodepng_chunk_next_const(chunk, end);
   }
   return 0;
 }
@@ -63,20 +59,19 @@
 unsigned getChunks(std::vector<std::string> names[3],
                    std::vector<std::vector<unsigned char> > chunks[3],
                    const std::vector<unsigned char>& png) {
-  const unsigned char *chunk, *next, *begin, *end;
+  const unsigned char *chunk, *next, *end;
   end = &png.back() + 1;
-  begin = chunk = &png.front() + 8;
+  chunk = &png.front() + 8;
 
   int location = 0;
 
-  while(chunk + 8 < end && chunk >= begin) {
+  while(chunk < end && end - chunk >= 8) {
     char type[5];
     lodepng_chunk_type(type, chunk);
     std::string name(type);
     if(name.size() != 4) return 1;
 
-    next = lodepng_chunk_next_const(chunk);
-    if (next <= chunk) return 1; // integer overflow
+    next = lodepng_chunk_next_const(chunk, end);
 
     if(name == "IHDR") {
       location = 0;
@@ -87,7 +82,7 @@
     } else if(name == "IEND") {
       break; // anything after IEND is not part of the PNG or the 3 groups here.
     } else {
-      if(next > end) return 1; // invalid chunk, content too far
+      if(next >= end) return 1; // invalid chunk, content too far
       names[location].push_back(name);
       chunks[location].push_back(std::vector<unsigned char>(chunk, next));
     }
@@ -100,7 +95,7 @@
 
 unsigned insertChunks(std::vector<unsigned char>& png,
                       const std::vector<std::vector<unsigned char> > chunks[3]) {
-  const unsigned char *chunk, *next, *begin, *end;
+  const unsigned char *chunk, *begin, *end;
   end = &png.back() + 1;
   begin = chunk = &png.front() + 8;
 
@@ -108,15 +103,12 @@
   long l1 = 0; //location 1: PLTE-l1-IDAT (or IHDR-l0-l1-IDAT)
   long l2 = 0; //location 2: IDAT-l2-IEND
 
-  while(chunk + 8 < end && chunk >= begin) {
+  while(chunk < end && end - chunk >= 8) {
     char type[5];
     lodepng_chunk_type(type, chunk);
     std::string name(type);
     if(name.size() != 4) return 1;
 
-    next = lodepng_chunk_next_const(chunk);
-    if (next <= chunk) return 1; // integer overflow
-
     if(name == "PLTE") {
       if(l0 == 0) l0 = chunk - begin + 8;
     } else if(name == "IDAT") {
@@ -126,7 +118,7 @@
       if(l2 == 0) l2 = chunk - begin + 8;
     }
 
-    chunk = next;
+    chunk = lodepng_chunk_next_const(chunk, end);
   }
 
   std::vector<unsigned char> result;
@@ -153,13 +145,13 @@
   if(error) return 1;
 
   //Read literal data from all IDAT chunks
-  const unsigned char *chunk, *begin, *end, *next;
+  const unsigned char *chunk, *begin, *end;
   end = &png.back() + 1;
   begin = chunk = &png.front() + 8;
 
   std::vector<unsigned char> zdata;
 
-  while(chunk + 8 < end && chunk >= begin) {
+  while(chunk < end && end - chunk >= 8) {
     char type[5];
     lodepng_chunk_type(type, chunk);
     if(std::string(type).size() != 4) break; //Probably not a PNG file
@@ -177,9 +169,7 @@
       }
     }
 
-    next = lodepng_chunk_next_const(chunk);
-    if (next <= chunk) break; // integer overflow
-    chunk = next;
+    chunk = lodepng_chunk_next_const(chunk, end);
   }
 
   //Decompress all IDAT data (if the while loop ended early, this might fail)
@@ -208,8 +198,7 @@
     for(size_t j = 0; j < 7; j++) {
       unsigned w2 = (w - ADAM7_IX[j] + ADAM7_DX[j] - 1) / ADAM7_DX[j];
       unsigned h2 = (h - ADAM7_IY[j] + ADAM7_DY[j] - 1) / ADAM7_DY[j];
-      if(ADAM7_IX[j] >= w) w2 = 0;
-      if(ADAM7_IY[j] >= h) h2 = 0;
+      if(ADAM7_IX[j] >= w || ADAM7_IY[j] >= h) continue;
       size_t linebytes = 1 + lodepng_get_raw_size(w2, 1, &state.info_png.color);
       for(size_t i = 0; i < h2; i++) {
         filterTypes[j].push_back(data[pos]);
@@ -229,17 +218,24 @@
   if(passes.size() == 1) {
     filterTypes.swap(passes[0]);
   } else {
+    // Simplify interlaced filter types to get a single filter value per scanline:
+    // put pass 6 and 7 alternating in the one vector, these filters
+    // correspond to the closest to what it would be for non-interlaced
+    // image. If the image is only 1 pixel wide, pass 6 doesn't exist so the
+    // alternative values column0 are used. The shift values are to match
+    // the y position in the interlaced sub-images.
+    // NOTE: the values 0-6 match Adam7's passes 1-7.
+    const unsigned column0[8] = {0, 6, 4, 6, 2, 6, 4, 6};
+    const unsigned column1[8] = {5, 6, 5, 6, 5, 6, 5, 6};
+    const unsigned shift0[8] = {3, 1, 2, 1, 3, 1, 2, 1};
+    const unsigned shift1[8] = {1, 1, 1, 1, 1, 1, 1, 1};
     lodepng::State state;
     unsigned w, h;
     lodepng_inspect(&w, &h, &state, &png[0], png.size());
-    /*
-    Interlaced. Simplify it: put pass 6 and 7 alternating in the one vector so
-    that one filter per scanline of the uninterlaced image is given, with that
-    filter corresponding the closest to what it would be for non-interlaced
-    image.
-    */
+    const unsigned* column = w > 1 ? column1 : column0;
+    const unsigned* shift = w > 1 ? shift1 : shift0;
     for(size_t i = 0; i < h; i++) {
-      filterTypes.push_back(i % 2 == 0 ? passes[5][i / 2] : passes[6][i / 2]);
+      filterTypes.push_back(passes[column[i & 7u]][i >> shift[i & 7u]]);
     }
   }
   return 0; /* OK */
@@ -286,14 +282,16 @@
 static const float lodepng_flt_inf = 1.0f / lodepng_flt_zero_; /* infinity */
 static const float lodepng_flt_nan = 0.0f / lodepng_flt_zero_; /* not a number */
 
-/* powf polyfill, 5-6 digits accurate, 33% slower than powf, assumes IEEE
+
+/* powf polyfill, 5-6 digits accurate, 33-80% slower than powf, assumes IEEE
 32-bit float, but other than that multiplatform and no math lib needed
 (note: powf also isn't in ISO C90, and pow is slower). */
 static float lodepng_powf(float x, float y) {
-  float j, t0, t1;
+  float j, t0, t1, l;
   int i = 0;
   /* handle all the special floating point rules */
-  if(x == 1 || y == 0) return 1;
+  if(x == 1 || y == 0) return 1; /*these cases return 1 even if the other value is NaN, as specified*/
+  if(y == 1) return x;
   if(!(x > 0 && x <= lodepng_flt_max && y == y && y <= lodepng_flt_max && y >= -lodepng_flt_max)) {
     if(y == 1) return x; /* preserves negative-0 */
     if(x != x || y != y) return x + y; /* nan */
@@ -319,24 +317,26 @@
     if(y < -lodepng_flt_max || y > lodepng_flt_max) return ((x < 1) != (y > 0)) ? (y < 0 ? -y : y) : 0;
   }
 
+  l = x;
   j = 0;
-  while(x < (1.0f / 65536)) { j -= 16; x *= 65536.0f; }
-  while(x > 65536) { j += 16; x *= (1.0f / 65536); }
-  while(x < 1) { j--; x *= 2.0f; }
-  while(x > 2) { j++; x *= 0.5f; }
+  while(l < (1.0f / 65536)) { j -= 16; l *= 65536.0f; }
+  while(l > 65536) { j += 16; l *= (1.0f / 65536); }
+  while(l < 1) { j--; l *= 2.0f; }
+  while(l > 2) { j++; l *= 0.5f; }
   /* polynomial to approximate log2(x) with x in range 1..2 */
-  t0 = -0.393118410458557f + x * (-0.0883639468229365f + x * (0.466142650227994f + x * 0.0153397331014276f));
-  t1 = 0.0907447971403586f + x * (0.388892024755479f + x * 0.137228280305862f);
-  x = t0 / t1 + j;
+  t0 = -0.393118410458557f + l * (-0.0883639468229365f + l * (0.466142650227994f + l * 0.0153397331014276f));
+  t1 = 0.0907447971403586f + l * (0.388892024755479f + l * 0.137228280305862f);
+  l = t0 / t1 + j;
 
-  x *= y; /* using the formula exp2(y * log2(x)) */
+  l *= y; /* using the formula exp2(y * log2(x)) */
 
-  if(!(x > -128.0f && x < 128.0f)) return x > 0 ? lodepng_flt_inf : 0; /* prevent int overflow */
-  i = (int)x;
-  x -= i;
+  /* prevent int shift overflow, 0 or inf result are ok to return since exp will be taken, 127 is max float exponent */
+  if(l <= -128.0f || l >= 128.0f) return ((x > 1) == (y > 0)) ? lodepng_flt_inf : 0;
+  i = (int)l;
+  l -= i;
   /* polynomial to approximate exp2(x) with x in range -1..1 */
-  t0 = 1.0f + x * (0.41777833582744256f + x * (0.0728482595347711f + x * 0.005635023478609625f));
-  t1 = 1.0f + x * (-0.27537016151408167f + x * 0.023501446055084033f);
+  t0 = 1.0f + l * (0.41777833582744256f + l * (0.0728482595347711f + l * 0.005635023478609625f));
+  t1 = 1.0f + l * (-0.27537016151408167f + l * 0.023501446055084033f);
   while(i <= -31) { t0 *= (1.0f / 2147483648.0f); i += 31; }
   while(i >= 31) { t0 *= 2147483648.0f; i -= 31; }
   return (i < 0) ? (t0 / (t1 * (1 << -i))) : ((t0 * (1 << i)) / t1);
@@ -803,24 +803,24 @@
                                     float wx1, float wy1, float wz1) {
   int i;
   static const float bradford[9] = {
-    0.8951, 0.2664, -0.1614,
-    -0.7502, 1.7135, 0.0367,
-    0.0389, -0.0685, 1.0296
+    0.8951f, 0.2664f, -0.1614f,
+    -0.7502f, 1.7135f, 0.0367f,
+    0.0389f, -0.0685f, 1.0296f
   };
   static const float bradfordinv[9] = {
-    0.9869929, -0.1470543, 0.1599627,
-    0.4323053, 0.5183603, 0.0492912,
-   -0.0085287, 0.0400428, 0.9684867
+    0.9869929f, -0.1470543f, 0.1599627f,
+    0.4323053f, 0.5183603f, 0.0492912f,
+   -0.0085287f, 0.0400428f, 0.9684867f
   };
   static const float vonkries[9] = {
-    0.40024, 0.70760, -0.08081,
-    -0.22630, 1.16532, 0.04570,
-    0.00000, 0.00000, 0.91822,
+    0.40024f, 0.70760f, -0.08081f,
+    -0.22630f, 1.16532f, 0.04570f,
+    0.00000f, 0.00000f, 0.91822f,
   };
   static const float vonkriesinv[9] = {
-    1.8599364, -1.1293816, 0.2198974,
-    0.3611914, 0.6388125, -0.0000064,
-   0.0000000, 0.0000000, 1.0890636
+    1.8599364f, -1.1293816f, 0.2198974f,
+    0.3611914f, 0.6388125f, -0.0000064f,
+   0.0000000f, 0.0000000f, 1.0890636f
   };
   if(type == 0) {
     for(i = 0; i < 9; i++) m[i] = 0;
diff --git a/src/zopflipng/lodepng/lodepng_util.h b/src/zopflipng/lodepng/lodepng_util.h
index 514b51c..97fd804 100644
--- a/src/zopflipng/lodepng/lodepng_util.h
+++ b/src/zopflipng/lodepng/lodepng_util.h
@@ -1,7 +1,7 @@
 /*
 LodePNG Utils
 
-Copyright (c) 2005-2019 Lode Vandevenne
+Copyright (c) 2005-2020 Lode Vandevenne
 
 This software is provided 'as-is', without any express or implied
 warranty. In no event will the authors be held liable for any damages