diff --git a/CHANGELOG.md b/CHANGELOG.md index f8e908c157170..1557cdbd1482a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ * Modified to build external libraries as separately build targets: [#787](https://github.com/dartsim/dart/pull/787) * Modified to export CMake target files separately per target: [#786](https://github.com/dartsim/dart/pull/786) +* Misc + + * Updated lodepng up to version 20160501 : [#791](https://github.com/dartsim/dart/pull/791) + ### DART 6.1.0 (2016-10-07) * Collision detection diff --git a/dart/external/lodepng/lodepng.cpp b/dart/external/lodepng/lodepng.cpp index 3d0ce61a8ded4..8c78758ffd8c6 100644 --- a/dart/external/lodepng/lodepng.cpp +++ b/dart/external/lodepng/lodepng.cpp @@ -1,7 +1,7 @@ /* -LodePNG version 20120901 +LodePNG version 20160501 -Copyright (c) 2005-2012 Lode Vandevenne +Copyright (c) 2005-2016 Lode Vandevenne This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -30,14 +30,16 @@ Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for #include "lodepng.h" +#include #include #include -#ifdef LODEPNG_COMPILE_CPP -#include -#endif /*LODEPNG_COMPILE_CPP*/ +#if defined(_MSC_VER) && (_MSC_VER >= 1310) /*Visual Studio: A few warning types are not desired here.*/ +#pragma warning( disable : 4244 ) /*implicit conversions: not warned by gcc -Wall -Wextra and requires too much casts*/ +#pragma warning( disable : 4996 ) /*VS does not like fopen, but fopen_s is not standard C so unusable here*/ +#endif /*_MSC_VER */ -#define VERSION_STRING "20120901" +const char* LODEPNG_VERSION_STRING = "20160501"; /* This source file is built up in the following large parts. The code sections @@ -48,24 +50,35 @@ with the "LODEPNG_COMPILE_" #defines divide this up further in an intermixed way -The C++ wrapper around all of the above */ -/*The malloc, realloc and free functions defined here with "my" in front of the -name, so that you can easily change them to others related to your platform in -this one location if needed. Everything else in the code calls these.*/ +/*The malloc, realloc and free functions defined here with "lodepng_" in front +of the name, so that you can easily change them to others related to your +platform if needed. Everything else in the code calls these. Pass +-DLODEPNG_NO_COMPILE_ALLOCATORS to the compiler, or comment out +#define LODEPNG_COMPILE_ALLOCATORS in the header, to disable the ones here and +define them in your own project's source files without needing to change +lodepng source code. Don't forget to remove "static" if you copypaste them +from here.*/ -static void* mymalloc(size_t size) +#ifdef LODEPNG_COMPILE_ALLOCATORS +static void* lodepng_malloc(size_t size) { return malloc(size); } -static void* myrealloc(void* ptr, size_t new_size) +static void* lodepng_realloc(void* ptr, size_t new_size) { return realloc(ptr, new_size); } -static void myfree(void* ptr) +static void lodepng_free(void* ptr) { free(ptr); } +#else /*LODEPNG_COMPILE_ALLOCATORS*/ +void* lodepng_malloc(size_t size); +void* lodepng_realloc(void* ptr, size_t new_size); +void lodepng_free(void* ptr); +#endif /*LODEPNG_COMPILE_ALLOCATORS*/ /* ////////////////////////////////////////////////////////////////////////// */ /* ////////////////////////////////////////////////////////////////////////// */ @@ -103,6 +116,13 @@ Example: if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83); if(error) return error;\ } +/*Set error var to the error code, and return from the void function.*/ +#define CERROR_RETURN(errorvar, code)\ +{\ + errorvar = code;\ + return;\ +} + /* About uivector, ucvector and string: -All of them wrap dynamic arrays or text strings in a similar way. @@ -124,41 +144,47 @@ typedef struct uivector static void uivector_cleanup(void* p) { ((uivector*)p)->size = ((uivector*)p)->allocsize = 0; - myfree(((uivector*)p)->data); - ((uivector*)p)->data = nullptr; + lodepng_free(((uivector*)p)->data); + ((uivector*)p)->data = NULL; } /*returns 1 if success, 0 if failure ==> nothing done*/ -static unsigned uivector_resize(uivector* p, size_t size) +static unsigned uivector_reserve(uivector* p, size_t allocsize) { - if(size * sizeof(unsigned) > p->allocsize) + if(allocsize > p->allocsize) { - size_t newsize = size * sizeof(unsigned) * 2; - void* data = myrealloc(p->data, newsize); + size_t newsize = (allocsize > p->allocsize * 2) ? allocsize : (allocsize * 3 / 2); + void* data = lodepng_realloc(p->data, newsize); if(data) { p->allocsize = newsize; p->data = (unsigned*)data; - p->size = size; } - else return 0; + else return 0; /*error: not enough memory*/ } - else p->size = size; return 1; } +/*returns 1 if success, 0 if failure ==> nothing done*/ +static unsigned uivector_resize(uivector* p, size_t size) +{ + if(!uivector_reserve(p, size * sizeof(unsigned))) return 0; + p->size = size; + return 1; /*success*/ +} + /*resize and give all new elements the value*/ static unsigned uivector_resizev(uivector* p, size_t size, unsigned value) { size_t oldsize = p->size, i; if(!uivector_resize(p, size)) return 0; - for (i = oldsize; i < size; i++) p->data[i] = value; + for(i = oldsize; i < size; ++i) p->data[i] = value; return 1; } static void uivector_init(uivector* p) { - p->data = nullptr; + p->data = NULL; p->size = p->allocsize = 0; } @@ -170,24 +196,6 @@ static unsigned uivector_push_back(uivector* p, unsigned c) p->data[p->size - 1] = c; return 1; } - -/*copy q to p, returns 1 if success, 0 if failure ==> nothing done*/ -static unsigned uivector_copy(uivector* p, const uivector* q) -{ - size_t i; - if(!uivector_resize(p, q->size)) return 0; - for (i = 0; i < q->size; i++) p->data[i] = q->data[i]; - return 1; -} - -static void uivector_swap(uivector* p, uivector* q) -{ - size_t tmp; - unsigned* tmpp; - tmp = p->size; p->size = q->size; q->size = tmp; - tmp = p->allocsize; p->allocsize = q->allocsize; q->allocsize = tmp; - tmpp = p->data; p->data = q->data; q->data = tmpp; -} #endif /*LODEPNG_COMPILE_ENCODER*/ #endif /*LODEPNG_COMPILE_ZLIB*/ @@ -202,49 +210,44 @@ typedef struct ucvector } ucvector; /*returns 1 if success, 0 if failure ==> nothing done*/ -static unsigned ucvector_resize(ucvector* p, size_t size) +static unsigned ucvector_reserve(ucvector* p, size_t allocsize) { - if(size * sizeof(unsigned char) > p->allocsize) + if(allocsize > p->allocsize) { - size_t newsize = size * sizeof(unsigned char) * 2; - void* data = myrealloc(p->data, newsize); + size_t newsize = (allocsize > p->allocsize * 2) ? allocsize : (allocsize * 3 / 2); + void* data = lodepng_realloc(p->data, newsize); if(data) { p->allocsize = newsize; p->data = (unsigned char*)data; - p->size = size; } else return 0; /*error: not enough memory*/ } - else p->size = size; return 1; } +/*returns 1 if success, 0 if failure ==> nothing done*/ +static unsigned ucvector_resize(ucvector* p, size_t size) +{ + if(!ucvector_reserve(p, size * sizeof(unsigned char))) return 0; + p->size = size; + return 1; /*success*/ +} + #ifdef LODEPNG_COMPILE_PNG static void ucvector_cleanup(void* p) { ((ucvector*)p)->size = ((ucvector*)p)->allocsize = 0; - myfree(((ucvector*)p)->data); - ((ucvector*)p)->data = nullptr; + lodepng_free(((ucvector*)p)->data); + ((ucvector*)p)->data = NULL; } static void ucvector_init(ucvector* p) { - p->data = nullptr; + p->data = NULL; p->size = p->allocsize = 0; } - -#ifdef LODEPNG_COMPILE_DECODER -/*resize and give all new elements the value*/ -static unsigned ucvector_resizev(ucvector* p, size_t size, unsigned char value) -{ - size_t oldsize = p->size, i; - if(!ucvector_resize(p, size)) return 0; - for (i = oldsize; i < size; i++) p->data[i] = value; - return 1; -} -#endif /*LODEPNG_COMPILE_DECODER*/ #endif /*LODEPNG_COMPILE_PNG*/ #ifdef LODEPNG_COMPILE_ZLIB @@ -275,7 +278,7 @@ static unsigned ucvector_push_back(ucvector* p, unsigned char c) /*returns 1 if success, 0 if failure ==> nothing done*/ static unsigned string_resize(char** out, size_t size) { - char* data = (char*)myrealloc(*out, size + 1); + char* data = (char*)lodepng_realloc(*out, size + 1); if(data) { data[size] = 0; /*null termination char*/ @@ -287,23 +290,23 @@ static unsigned string_resize(char** out, size_t size) /*init a {char*, size_t} pair for use as string*/ static void string_init(char** out) { - *out = nullptr; + *out = NULL; string_resize(out, 0); } /*free the above pair again*/ static void string_cleanup(char** out) { - myfree(*out); - *out = nullptr; + lodepng_free(*out); + *out = NULL; } static void string_set(char** out, const char* in) { - size_t insize = strlen(in), i = 0; + size_t insize = strlen(in), i; if(string_resize(out, insize)) { - for (i = 0; i < insize; i++) + for(i = 0; i != insize; ++i) { (*out)[i] = in[i]; } @@ -316,7 +319,7 @@ static void string_set(char** out, const char* in) unsigned lodepng_read32bitInt(const unsigned char* buffer) { - return (buffer[0] << 24) | (buffer[1] << 16) | (buffer[2] << 8) | buffer[3]; + return (unsigned)((buffer[0] << 24) | (buffer[1] << 16) | (buffer[2] << 8) | buffer[3]); } #if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER) @@ -344,33 +347,55 @@ static void lodepng_add32bitInt(ucvector* buffer, unsigned value) #ifdef LODEPNG_COMPILE_DISK -unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename) +/* returns negative value on error. This should be pure C compatible, so no fstat. */ +static long lodepng_filesize(const char* filename) { FILE* file; long size; - - /*provide some proper output values if error will happen*/ - *out = 0; - *outsize = 0; - file = fopen(filename, "rb"); - if(!file) return 78; + if(!file) return -1; + + if(fseek(file, 0, SEEK_END) != 0) + { + fclose(file); + return -1; + } - /*get filesize:*/ - fseek(file , 0 , SEEK_END); size = ftell(file); - rewind(file); + /* It may give LONG_MAX as directory size, this is invalid for us. */ + if(size == LONG_MAX) size = -1; - /*read contents of the file into the vector*/ - *outsize = 0; - *out = (unsigned char*)mymalloc((size_t)size); - if(size && (*out)) (*outsize) = fread(*out, 1, (size_t)size, file); + fclose(file); + return size; +} +/* load file into buffer that already has the correct allocated size. Returns error code.*/ +static unsigned lodepng_buffer_file(unsigned char* out, size_t size, const char* filename) +{ + FILE* file; + size_t readsize; + file = fopen(filename, "rb"); + if(!file) return 78; + + readsize = fread(out, 1, size, file); fclose(file); - if(!(*out) && size) return 83; /*the above malloc failed*/ + + if (readsize != size) return 78; return 0; } +unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename) +{ + long size = lodepng_filesize(filename); + if (size < 0) return 78; + *outsize = (size_t)size; + + *out = (unsigned char*)lodepng_malloc((size_t)size); + if(!(*out) && size > 0) return 83; /*the above malloc failed*/ + + return lodepng_buffer_file(*out, (size_t)size, filename); +} + /*write given buffer to the file, overwriting the file, it doesn't append to it.*/ unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename) { @@ -393,25 +418,25 @@ unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const #ifdef LODEPNG_COMPILE_ZLIB #ifdef LODEPNG_COMPILE_ENCODER /*TODO: this ignores potential out of memory errors*/ -static void addBitToStream(size_t* bitpointer, ucvector* bitstream, unsigned char bit) -{ - /*add a new byte at the end*/ - if((*bitpointer) % 8 == 0) ucvector_push_back(bitstream, (unsigned char)0); - /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/ - (bitstream->data[bitstream->size - 1]) |= (bit << ((*bitpointer) & 0x7)); - (*bitpointer)++; +#define addBitToStream(/*size_t**/ bitpointer, /*ucvector**/ bitstream, /*unsigned char*/ bit)\ +{\ + /*add a new byte at the end*/\ + if(((*bitpointer) & 7) == 0) ucvector_push_back(bitstream, (unsigned char)0);\ + /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/\ + (bitstream->data[bitstream->size - 1]) |= (bit << ((*bitpointer) & 0x7));\ + ++(*bitpointer);\ } static void addBitsToStream(size_t* bitpointer, ucvector* bitstream, unsigned value, size_t nbits) { size_t i; - for (i = 0; i < nbits; i++) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> i) & 1)); + for(i = 0; i != nbits; ++i) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> i) & 1)); } static void addBitsToStreamReversed(size_t* bitpointer, ucvector* bitstream, unsigned value, size_t nbits) { size_t i; - for (i = 0; i < nbits; i++) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> (nbits - 1 - i)) & 1)); + for(i = 0; i != nbits; ++i) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> (nbits - 1 - i)) & 1)); } #endif /*LODEPNG_COMPILE_ENCODER*/ @@ -422,17 +447,17 @@ static void addBitsToStreamReversed(size_t* bitpointer, ucvector* bitstream, uns static unsigned char readBitFromStream(size_t* bitpointer, const unsigned char* bitstream) { unsigned char result = (unsigned char)(READBIT(*bitpointer, bitstream)); - (*bitpointer)++; + ++(*bitpointer); return result; } static unsigned readBitsFromStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits) { unsigned result = 0, i; - for (i = 0; i < nbits; i++) + for(i = 0; i != nbits; ++i) { result += ((unsigned)READBIT(*bitpointer, bitstream)) << i; - (*bitpointer)++; + ++(*bitpointer); } return result; } @@ -491,11 +516,11 @@ typedef struct HuffmanTree } HuffmanTree; /*function used for debug purposes to draw the tree in ascii art with C++*/ -/*#include +/* static void HuffmanTree_draw(HuffmanTree* tree) { std::cout << "tree. length: " << tree->numcodes << " maxbitlen: " << tree->maxbitlen << std::endl; - for (size_t i = 0; i < tree->tree1d.size; i++) + for(size_t i = 0; i != tree->tree1d.size; ++i) { if(tree->lengths.data[i]) std::cout << i << " " << tree->tree1d.data[i] << " " << tree->lengths.data[i] << std::endl; @@ -512,9 +537,9 @@ static void HuffmanTree_init(HuffmanTree* tree) static void HuffmanTree_cleanup(HuffmanTree* tree) { - myfree(tree->tree2d); - myfree(tree->tree1d); - myfree(tree->lengths); + lodepng_free(tree->tree2d); + lodepng_free(tree->tree1d); + lodepng_free(tree->lengths); } /*the tree representation used by the decoder. return value is error*/ @@ -524,7 +549,7 @@ static unsigned HuffmanTree_make2DTree(HuffmanTree* tree) unsigned treepos = 0; /*position in the tree (1 of the numcodes columns)*/ unsigned n, i; - tree->tree2d = (unsigned*)mymalloc(tree->numcodes * 2 * sizeof(unsigned)); + tree->tree2d = (unsigned*)lodepng_malloc(tree->numcodes * 2 * sizeof(unsigned)); if(!tree->tree2d) return 83; /*alloc fail*/ /* @@ -532,22 +557,23 @@ static unsigned HuffmanTree_make2DTree(HuffmanTree* tree) uninited, a value >= numcodes is an address to another bit, a value < numcodes is a code. The 2 rows are the 2 possible bit values (0 or 1), there are as many columns as codes - 1. - A good huffmann tree has N * 2 - 1 nodes, of which N - 1 are internal nodes. + A good huffman tree has N * 2 - 1 nodes, of which N - 1 are internal nodes. Here, the internal nodes are stored (what their 0 and 1 option point to). There is only memory for such good tree currently, if there are more nodes (due to too long length codes), error 55 will happen */ - for (n = 0; n < tree->numcodes * 2; n++) + for(n = 0; n < tree->numcodes * 2; ++n) { tree->tree2d[n] = 32767; /*32767 here means the tree2d isn't filled there yet*/ } - for (n = 0; n < tree->numcodes; n++) /*the codes*/ + for(n = 0; n < tree->numcodes; ++n) /*the codes*/ { - for (i = 0; i < tree->lengths[n]; i++) /*the bits for this code*/ + for(i = 0; i != tree->lengths[n]; ++i) /*the bits for this code*/ { unsigned char bit = (unsigned char)((tree->tree1d[n] >> (tree->lengths[n] - i - 1)) & 1); - if(treepos > tree->numcodes - 2) return 55; /*oversubscribed, see comment in lodepng_error_text*/ + /*oversubscribed, see comment in lodepng_error_text*/ + if(treepos > 2147483647 || treepos + 2 > tree->numcodes) return 55; if(tree->tree2d[2 * treepos + bit] == 32767) /*not yet filled in*/ { if(i + 1 == tree->lengths[n]) /*last bit*/ @@ -559,7 +585,7 @@ static unsigned HuffmanTree_make2DTree(HuffmanTree* tree) { /*put address of the next step in here, first that address has to be found of course (it's just nodefilled + 1)...*/ - nodefilled++; + ++nodefilled; /*addresses encoded with numcodes added to it*/ tree->tree2d[2 * treepos + bit] = nodefilled + tree->numcodes; treepos = nodefilled; @@ -569,7 +595,7 @@ static unsigned HuffmanTree_make2DTree(HuffmanTree* tree) } } - for (n = 0; n < tree->numcodes * 2; n++) + for(n = 0; n < tree->numcodes * 2; ++n) { if(tree->tree2d[n] == 32767) tree->tree2d[n] = 0; /*remove possible remaining 32767's*/ } @@ -586,12 +612,13 @@ static unsigned HuffmanTree_makeFromLengths2(HuffmanTree* tree) { uivector blcount; uivector nextcode; - unsigned bits, n, error = 0; + unsigned error = 0; + unsigned bits, n; uivector_init(&blcount); uivector_init(&nextcode); - tree->tree1d = (unsigned*)mymalloc(tree->numcodes * sizeof(unsigned)); + tree->tree1d = (unsigned*)lodepng_malloc(tree->numcodes * sizeof(unsigned)); if(!tree->tree1d) error = 83; /*alloc fail*/ if(!uivector_resizev(&blcount, tree->maxbitlen + 1, 0) @@ -601,14 +628,14 @@ static unsigned HuffmanTree_makeFromLengths2(HuffmanTree* tree) if(!error) { /*step 1: count number of instances of each code length*/ - for (bits = 0; bits < tree->numcodes; bits++) blcount.data[tree->lengths[bits]]++; + for(bits = 0; bits != tree->numcodes; ++bits) ++blcount.data[tree->lengths[bits]]; /*step 2: generate the nextcode values*/ - for (bits = 1; bits <= tree->maxbitlen; bits++) + for(bits = 1; bits <= tree->maxbitlen; ++bits) { nextcode.data[bits] = (nextcode.data[bits - 1] + blcount.data[bits - 1]) << 1; } /*step 3: generate all the codes*/ - for (n = 0; n < tree->numcodes; n++) + for(n = 0; n != tree->numcodes; ++n) { if(tree->lengths[n] != 0) tree->tree1d[n] = nextcode.data[tree->lengths[n]]++; } @@ -630,9 +657,9 @@ static unsigned HuffmanTree_makeFromLengths(HuffmanTree* tree, const unsigned* b size_t numcodes, unsigned maxbitlen) { unsigned i; - tree->lengths = (unsigned*)mymalloc(numcodes * sizeof(unsigned)); + tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned)); if(!tree->lengths) return 83; /*alloc fail*/ - for (i = 0; i < numcodes; i++) tree->lengths[i] = bitlen[i]; + for(i = 0; i != numcodes; ++i) tree->lengths[i] = bitlen[i]; tree->numcodes = (unsigned)numcodes; /*number of symbols*/ tree->maxbitlen = maxbitlen; return HuffmanTree_makeFromLengths2(tree); @@ -640,209 +667,223 @@ static unsigned HuffmanTree_makeFromLengths(HuffmanTree* tree, const unsigned* b #ifdef LODEPNG_COMPILE_ENCODER -/* -A coin, this is the terminology used for the package-merge algorithm and the -coin collector's problem. This is used to generate the huffman tree. -A coin can be multiple coins (when they're merged) -*/ -typedef struct Coin -{ - uivector symbols; - float weight; /*the sum of all weights in this coin*/ -} Coin; - -static void coin_init(Coin* c) -{ - uivector_init(&c->symbols); -} +/*BPM: Boundary Package Merge, see "A Fast and Space-Economical Algorithm for Length-Limited Coding", +Jyrki Katajainen, Alistair Moffat, Andrew Turpin, 1995.*/ -/*argument c is void* so that this dtor can be given as function pointer to the vector resize function*/ -static void coin_cleanup(void* c) +/*chain node for boundary package merge*/ +typedef struct BPMNode { - uivector_cleanup(&((Coin*)c)->symbols); -} + int weight; /*the sum of all weights in this chain*/ + unsigned index; /*index of this leaf node (called "count" in the paper)*/ + struct BPMNode* tail; /*the next nodes in this chain (null if last)*/ + int in_use; +} BPMNode; -static void coin_copy(Coin* c1, const Coin* c2) +/*lists of chains*/ +typedef struct BPMLists { - c1->weight = c2->weight; - uivector_copy(&c1->symbols, &c2->symbols); -} + /*memory pool*/ + unsigned memsize; + BPMNode* memory; + unsigned numfree; + unsigned nextfree; + BPMNode** freelist; + /*two heads of lookahead chains per list*/ + unsigned listsize; + BPMNode** chains0; + BPMNode** chains1; +} BPMLists; -static void add_coins(Coin* c1, const Coin* c2) +/*creates a new chain node with the given parameters, from the memory in the lists */ +static BPMNode* bpmnode_create(BPMLists* lists, int weight, unsigned index, BPMNode* tail) { - size_t i; - for (i = 0; i < c2->symbols.size; i++) uivector_push_back(&c1->symbols, c2->symbols.data[i]); - c1->weight += c2->weight; -} + unsigned i; + BPMNode* result; -static void init_coins(Coin* coins, size_t num) -{ - size_t i; - for (i = 0; i < num; i++) coin_init(&coins[i]); -} + /*memory full, so garbage collect*/ + if(lists->nextfree >= lists->numfree) + { + /*mark only those that are in use*/ + for(i = 0; i != lists->memsize; ++i) lists->memory[i].in_use = 0; + for(i = 0; i != lists->listsize; ++i) + { + BPMNode* node; + for(node = lists->chains0[i]; node != 0; node = node->tail) node->in_use = 1; + for(node = lists->chains1[i]; node != 0; node = node->tail) node->in_use = 1; + } + /*collect those that are free*/ + lists->numfree = 0; + for(i = 0; i != lists->memsize; ++i) + { + if(!lists->memory[i].in_use) lists->freelist[lists->numfree++] = &lists->memory[i]; + } + lists->nextfree = 0; + } -static void cleanup_coins(Coin* coins, size_t num) -{ - size_t i; - for (i = 0; i < num; i++) coin_cleanup(&coins[i]); + result = lists->freelist[lists->nextfree++]; + result->weight = weight; + result->index = index; + result->tail = tail; + return result; } -/* -This uses a simple combsort to sort the data. This function is not critical for -overall encoding speed and the data amount isn't that large. -*/ -static void sort_coins(Coin* data, size_t amount) +/*sort the leaves with stable mergesort*/ +static void bpmnode_sort(BPMNode* leaves, size_t num) { - size_t gap = amount; - unsigned char swapped = 0; - while((gap > 1) || swapped) + BPMNode* mem = (BPMNode*)lodepng_malloc(sizeof(*leaves) * num); + size_t width, counter = 0; + for(width = 1; width < num; width *= 2) { - size_t i; - gap = (gap * 10) / 13; /*shrink factor 1.3*/ - if(gap == 9 || gap == 10) gap = 11; /*combsort11*/ - if(gap < 1) gap = 1; - swapped = 0; - for (i = 0; i < amount - gap; i++) - { - size_t j = i + gap; - if(data[j].weight < data[i].weight) + BPMNode* a = (counter & 1) ? mem : leaves; + BPMNode* b = (counter & 1) ? leaves : mem; + size_t p; + for(p = 0; p < num; p += 2 * width) + { + size_t q = (p + width > num) ? num : (p + width); + size_t r = (p + 2 * width > num) ? num : (p + 2 * width); + size_t i = p, j = q, k; + for(k = p; k < r; k++) { - float temp = data[j].weight; data[j].weight = data[i].weight; data[i].weight = temp; - uivector_swap(&data[i].symbols, &data[j].symbols); - swapped = 1; + if(i < q && (j >= r || a[i].weight <= a[j].weight)) b[k] = a[i++]; + else b[k] = a[j++]; } } + counter++; } + if(counter & 1) memcpy(leaves, mem, sizeof(*leaves) * num); + lodepng_free(mem); } -static unsigned append_symbol_coins(Coin* coins, const unsigned* frequencies, unsigned numcodes, size_t sum) +/*Boundary Package Merge step, numpresent is the amount of leaves, and c is the current chain.*/ +static void boundaryPM(BPMLists* lists, BPMNode* leaves, size_t numpresent, int c, int num) { - unsigned i; - unsigned j = 0; /*index of present symbols*/ - for (i = 0; i < numcodes; i++) + unsigned lastindex = lists->chains1[c]->index; + + if(c == 0) + { + if(lastindex >= numpresent) return; + lists->chains0[c] = lists->chains1[c]; + lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, 0); + } + else { - if(frequencies[i] != 0) /*only include symbols that are present*/ + /*sum of the weights of the head nodes of the previous lookahead chains.*/ + int sum = lists->chains0[c - 1]->weight + lists->chains1[c - 1]->weight; + lists->chains0[c] = lists->chains1[c]; + if(lastindex < numpresent && sum > leaves[lastindex].weight) { - coins[j].weight = frequencies[i] / (float)sum; - uivector_push_back(&coins[j].symbols, i); - j++; + lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, lists->chains1[c]->tail); + return; + } + lists->chains1[c] = bpmnode_create(lists, sum, lastindex, lists->chains1[c - 1]); + /*in the end we are only interested in the chain of the last list, so no + need to recurse if we're at the last one (this gives measurable speedup)*/ + if(num + 1 < (int)(2 * numpresent - 2)) + { + boundaryPM(lists, leaves, numpresent, c - 1, num); + boundaryPM(lists, leaves, numpresent, c - 1, num); } } - return 0; } unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies, size_t numcodes, unsigned maxbitlen) { - unsigned i, j; - size_t sum = 0, numpresent = 0; unsigned error = 0; - Coin* coins; /*the coins of the currently calculated row*/ - Coin* prev_row; /*the previous row of coins*/ - unsigned numcoins; - unsigned coinmem; + unsigned i; + size_t numpresent = 0; /*number of symbols with non-zero frequency*/ + BPMNode* leaves; /*the symbols, only those with > 0 frequency*/ if(numcodes == 0) return 80; /*error: a tree of 0 symbols is not supposed to be made*/ + if((1u << maxbitlen) < numcodes) return 80; /*error: represent all symbols*/ - for (i = 0; i < numcodes; i++) + leaves = (BPMNode*)lodepng_malloc(numcodes * sizeof(*leaves)); + if(!leaves) return 83; /*alloc fail*/ + + for(i = 0; i != numcodes; ++i) { if(frequencies[i] > 0) { - numpresent++; - sum += frequencies[i]; + leaves[numpresent].weight = (int)frequencies[i]; + leaves[numpresent].index = i; + ++numpresent; } } - for (i = 0; i < numcodes; i++) lengths[i] = 0; + for(i = 0; i != numcodes; ++i) lengths[i] = 0; - /*there are no symbols at all, in that case add one symbol of value 0 to the tree (see RFC 1951 section 3.2.7) */ + /*ensure at least two present symbols. There should be at least one symbol + according to RFC 1951 section 3.2.7. Some decoders incorrectly require two. To + make these work as well ensure there are at least two symbols. The + Package-Merge code below also doesn't work correctly if there's only one + symbol, it'd give it the theoritical 0 bits but in practice zlib wants 1 bit*/ if(numpresent == 0) { - lengths[0] = 1; + lengths[0] = lengths[1] = 1; /*note that for RFC 1951 section 3.2.7, only lengths[0] = 1 is needed*/ } - /*the package merge algorithm gives wrong results if there's only one symbol - (theoretically 0 bits would then suffice, but we need a proper symbol for zlib)*/ else if(numpresent == 1) { - for (i = 0; i < numcodes; i++) if(frequencies[i]) lengths[i] = 1; + lengths[leaves[0].index] = 1; + lengths[leaves[0].index == 0 ? 1 : 0] = 1; } else { - /*Package-Merge algorithm represented by coin collector's problem - For every symbol, maxbitlen coins will be created*/ + BPMLists lists; + BPMNode* node; + + bpmnode_sort(leaves, numpresent); - coinmem = numpresent * 2; /*max amount of coins needed with the current algo*/ - coins = (Coin*)mymalloc(sizeof(Coin) * coinmem); - prev_row = (Coin*)mymalloc(sizeof(Coin) * coinmem); - if(!coins || !prev_row) return 83; /*alloc fail*/ - init_coins(coins, coinmem); - init_coins(prev_row, coinmem); + lists.listsize = maxbitlen; + lists.memsize = 2 * maxbitlen * (maxbitlen + 1); + lists.nextfree = 0; + lists.numfree = lists.memsize; + lists.memory = (BPMNode*)lodepng_malloc(lists.memsize * sizeof(*lists.memory)); + lists.freelist = (BPMNode**)lodepng_malloc(lists.memsize * sizeof(BPMNode*)); + lists.chains0 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*)); + lists.chains1 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*)); + if(!lists.memory || !lists.freelist || !lists.chains0 || !lists.chains1) error = 83; /*alloc fail*/ - /*first row, lowest denominator*/ - error = append_symbol_coins(coins, frequencies, numcodes, sum); - numcoins = numpresent; - sort_coins(coins, numcoins); if(!error) { - unsigned numprev = 0; - for (j = 1; j <= maxbitlen && !error; j++) /*each of the remaining rows*/ - { - unsigned tempnum; - Coin* tempcoins; - /*swap prev_row and coins, and their amounts*/ - tempcoins = prev_row; prev_row = coins; coins = tempcoins; - tempnum = numprev; numprev = numcoins; numcoins = tempnum; - - cleanup_coins(coins, numcoins); - init_coins(coins, numcoins); + for(i = 0; i != lists.memsize; ++i) lists.freelist[i] = &lists.memory[i]; - numcoins = 0; + bpmnode_create(&lists, leaves[0].weight, 1, 0); + bpmnode_create(&lists, leaves[1].weight, 2, 0); - /*fill in the merged coins of the previous row*/ - for (i = 0; i + 1 < numprev; i += 2) - { - /*merge prev_row[i] and prev_row[i + 1] into new coin*/ - Coin* coin = &coins[numcoins++]; - coin_copy(coin, &prev_row[i]); - add_coins(coin, &prev_row[i + 1]); - } - /*fill in all the original symbols again*/ - if(j < maxbitlen) - { - error = append_symbol_coins(coins + numcoins, frequencies, numcodes, sum); - numcoins += numpresent; - } - sort_coins(coins, numcoins); + for(i = 0; i != lists.listsize; ++i) + { + lists.chains0[i] = &lists.memory[0]; + lists.chains1[i] = &lists.memory[1]; } - } - if(!error) - { - /*calculate the lenghts of each symbol, as the amount of times a coin of each symbol is used*/ - for (i = 0; i < numpresent - 1; i++) + /*each boundaryPM call adds one chain to the last list, and we need 2 * numpresent - 2 chains.*/ + for(i = 2; i != 2 * numpresent - 2; ++i) boundaryPM(&lists, leaves, numpresent, (int)maxbitlen - 1, (int)i); + + for(node = lists.chains1[maxbitlen - 1]; node; node = node->tail) { - Coin* coin = &coins[i]; - for (j = 0; j < coin->symbols.size; j++) lengths[coin->symbols.data[j]]++; + for(i = 0; i != node->index; ++i) ++lengths[leaves[i].index]; } } - cleanup_coins(coins, coinmem); - myfree(coins); - cleanup_coins(prev_row, coinmem); - myfree(prev_row); + lodepng_free(lists.memory); + lodepng_free(lists.freelist); + lodepng_free(lists.chains0); + lodepng_free(lists.chains1); } + lodepng_free(leaves); return error; } /*Create the Huffman tree given the symbol frequencies*/ static unsigned HuffmanTree_makeFromFrequencies(HuffmanTree* tree, const unsigned* frequencies, - size_t numcodes, unsigned maxbitlen) + size_t mincodes, size_t numcodes, unsigned maxbitlen) { unsigned error = 0; + while(!frequencies[numcodes - 1] && numcodes > mincodes) --numcodes; /*trim zeroes*/ tree->maxbitlen = maxbitlen; tree->numcodes = (unsigned)numcodes; /*number of symbols*/ - tree->lengths = (unsigned*)myrealloc(tree->lengths, numcodes * sizeof(unsigned)); + tree->lengths = (unsigned*)lodepng_realloc(tree->lengths, numcodes * sizeof(unsigned)); if(!tree->lengths) return 83; /*alloc fail*/ /*initialize all lengths to 0*/ memset(tree->lengths, 0, numcodes * sizeof(unsigned)); @@ -867,18 +908,18 @@ static unsigned HuffmanTree_getLength(const HuffmanTree* tree, unsigned index) static unsigned generateFixedLitLenTree(HuffmanTree* tree) { unsigned i, error = 0; - unsigned* bitlen = (unsigned*)mymalloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned)); + unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned)); if(!bitlen) return 83; /*alloc fail*/ /*288 possible codes: 0-255=literals, 256=endcode, 257-285=lengthcodes, 286-287=unused*/ - for (i = 0; i <= 143; i++) bitlen[i] = 8; - for (i = 144; i <= 255; i++) bitlen[i] = 9; - for (i = 256; i <= 279; i++) bitlen[i] = 7; - for (i = 280; i <= 287; i++) bitlen[i] = 8; + for(i = 0; i <= 143; ++i) bitlen[i] = 8; + for(i = 144; i <= 255; ++i) bitlen[i] = 9; + for(i = 256; i <= 279; ++i) bitlen[i] = 7; + for(i = 280; i <= 287; ++i) bitlen[i] = 8; error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DEFLATE_CODE_SYMBOLS, 15); - myfree(bitlen); + lodepng_free(bitlen); return error; } @@ -886,14 +927,14 @@ static unsigned generateFixedLitLenTree(HuffmanTree* tree) static unsigned generateFixedDistanceTree(HuffmanTree* tree) { unsigned i, error = 0; - unsigned* bitlen = (unsigned*)mymalloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned)); + unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned)); if(!bitlen) return 83; /*alloc fail*/ /*there are 32 distance codes, but 30-31 are unused*/ - for (i = 0; i < NUM_DISTANCE_SYMBOLS; i++) bitlen[i] = 5; + for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen[i] = 5; error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DISTANCE_SYMBOLS, 15); - myfree(bitlen); + lodepng_free(bitlen); return error; } @@ -907,7 +948,7 @@ static unsigned huffmanDecodeSymbol(const unsigned char* in, size_t* bp, const HuffmanTree* codetree, size_t inbitlength) { unsigned treepos = 0, ct; - for (;;) + for(;;) { if(*bp >= inbitlength) return (unsigned)(-1); /*error: end of input memory reached without endcode*/ /* @@ -915,7 +956,7 @@ static unsigned huffmanDecodeSymbol(const unsigned char* in, size_t* bp, the expression below because this is the biggest bottleneck while decoding */ ct = codetree->tree2d[(treepos << 1) + READBIT(*bp, in)]; - (*bp)++; + ++(*bp); if(ct < codetree->numcodes) return ct; /*the symbol is decoded, return it*/ else treepos = ct - codetree->numcodes; /*symbol not yet decoded, instead move tree position*/ @@ -954,7 +995,7 @@ static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d, unsigned* bitlen_cl = 0; HuffmanTree tree_cl; /*the code tree for code length codes (the huffman tree for compressed huffman trees)*/ - if((*bp) >> 3 >= inlength - 2) return 49; /*error: the bit pointer is or will go past the memory*/ + if((*bp) + 14 > (inlength << 3)) return 49; /*error: the bit pointer is or will go past the memory*/ /*number of literal/length codes + 257. Unlike the spec, the value 257 is added to it here already*/ HLIT = readBitsFromStream(bp, in, 5) + 257; @@ -963,16 +1004,18 @@ static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d, /*number of code length codes. Unlike the spec, the value 4 is added to it here already*/ HCLEN = readBitsFromStream(bp, in, 4) + 4; + if((*bp) + HCLEN * 3 > (inlength << 3)) return 50; /*error: the bit pointer is or will go past the memory*/ + HuffmanTree_init(&tree_cl); while(!error) { /*read the code length codes out of 3 * (amount of code length codes) bits*/ - bitlen_cl = (unsigned*)mymalloc(NUM_CODE_LENGTH_CODES * sizeof(unsigned)); + bitlen_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(unsigned)); if(!bitlen_cl) ERROR_BREAK(83 /*alloc fail*/); - for (i = 0; i < NUM_CODE_LENGTH_CODES; i++) + for(i = 0; i != NUM_CODE_LENGTH_CODES; ++i) { if(i < HCLEN) bitlen_cl[CLCL_ORDER[i]] = readBitsFromStream(bp, in, 3); else bitlen_cl[CLCL_ORDER[i]] = 0; /*if not, it must stay 0*/ @@ -982,11 +1025,11 @@ static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d, if(error) break; /*now we can use this tree to read the lengths for the tree that this function will return*/ - bitlen_ll = (unsigned*)mymalloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned)); - bitlen_d = (unsigned*)mymalloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned)); + bitlen_ll = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned)); + bitlen_d = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned)); if(!bitlen_ll || !bitlen_d) ERROR_BREAK(83 /*alloc fail*/); - for (i = 0; i < NUM_DEFLATE_CODE_SYMBOLS; i++) bitlen_ll[i] = 0; - for (i = 0; i < NUM_DISTANCE_SYMBOLS; i++) bitlen_d[i] = 0; + for(i = 0; i != NUM_DEFLATE_CODE_SYMBOLS; ++i) bitlen_ll[i] = 0; + for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen_d[i] = 0; /*i is the current symbol we're reading in the part that contains the code lengths of lit/len and dist codes*/ i = 0; @@ -997,61 +1040,59 @@ static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d, { if(i < HLIT) bitlen_ll[i] = code; else bitlen_d[i - HLIT] = code; - i++; + ++i; } else if(code == 16) /*repeat previous*/ { unsigned replength = 3; /*read in the 2 bits that indicate repeat length (3-6)*/ unsigned value; /*set value to the previous code*/ - if(*bp >= inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/ - if (i == 0) ERROR_BREAK(54); /*can't repeat previous if i is 0*/ + if(i == 0) ERROR_BREAK(54); /*can't repeat previous if i is 0*/ + if((*bp + 2) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/ replength += readBitsFromStream(bp, in, 2); if(i < HLIT + 1) value = bitlen_ll[i - 1]; else value = bitlen_d[i - HLIT - 1]; /*repeat this value in the next lengths*/ - for (n = 0; n < replength; n++) + for(n = 0; n < replength; ++n) { if(i >= HLIT + HDIST) ERROR_BREAK(13); /*error: i is larger than the amount of codes*/ if(i < HLIT) bitlen_ll[i] = value; else bitlen_d[i - HLIT] = value; - i++; + ++i; } } else if(code == 17) /*repeat "0" 3-10 times*/ { unsigned replength = 3; /*read in the bits that indicate repeat length*/ - if(*bp >= inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/ - + if((*bp + 3) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/ replength += readBitsFromStream(bp, in, 3); /*repeat this value in the next lengths*/ - for (n = 0; n < replength; n++) + for(n = 0; n < replength; ++n) { if(i >= HLIT + HDIST) ERROR_BREAK(14); /*error: i is larger than the amount of codes*/ if(i < HLIT) bitlen_ll[i] = 0; else bitlen_d[i - HLIT] = 0; - i++; + ++i; } } else if(code == 18) /*repeat "0" 11-138 times*/ { unsigned replength = 11; /*read in the bits that indicate repeat length*/ - if(*bp >= inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/ - + if((*bp + 7) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/ replength += readBitsFromStream(bp, in, 7); /*repeat this value in the next lengths*/ - for (n = 0; n < replength; n++) + for(n = 0; n < replength; ++n) { if(i >= HLIT + HDIST) ERROR_BREAK(15); /*error: i is larger than the amount of codes*/ if(i < HLIT) bitlen_ll[i] = 0; else bitlen_d[i - HLIT] = 0; - i++; + ++i; } } else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/ @@ -1078,9 +1119,9 @@ static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d, break; /*end of error-while*/ } - myfree(bitlen_cl); - myfree(bitlen_ll); - myfree(bitlen_d); + lodepng_free(bitlen_cl); + lodepng_free(bitlen_ll); + lodepng_free(bitlen_d); HuffmanTree_cleanup(&tree_cl); return error; @@ -1099,10 +1140,7 @@ static unsigned inflateHuffmanBlock(ucvector* out, const unsigned char* in, size HuffmanTree_init(&tree_d); if(btype == 1) getTreeInflateFixed(&tree_ll, &tree_d); - else if(btype == 2) - { - error = getTreeInflateDynamic(&tree_ll, &tree_d, in, bp, inlength); - } + else if(btype == 2) error = getTreeInflateDynamic(&tree_ll, &tree_d, in, bp, inlength); while(!error) /*decode all symbols until end reached, breaks at end code*/ { @@ -1110,13 +1148,10 @@ static unsigned inflateHuffmanBlock(ucvector* out, const unsigned char* in, size unsigned code_ll = huffmanDecodeSymbol(in, bp, &tree_ll, inbitlength); if(code_ll <= 255) /*literal symbol*/ { - if((*pos) >= out->size) - { - /*reserve more room at once*/ - if(!ucvector_resize(out, ((*pos) + 1) * 2)) ERROR_BREAK(83 /*alloc fail*/); - } - out->data[(*pos)] = (unsigned char)(code_ll); - (*pos)++; + /*ucvector_push_back would do the same, but for some reason the two lines below run 10% faster*/ + if(!ucvector_resize(out, (*pos) + 1)) ERROR_BREAK(83 /*alloc fail*/); + out->data[*pos] = (unsigned char)code_ll; + ++(*pos); } else if(code_ll >= FIRST_LENGTH_CODE_INDEX && code_ll <= LAST_LENGTH_CODE_INDEX) /*length code*/ { @@ -1129,7 +1164,7 @@ static unsigned inflateHuffmanBlock(ucvector* out, const unsigned char* in, size /*part 2: get extra bits and add the value of that to length*/ numextrabits_l = LENGTHEXTRA[code_ll - FIRST_LENGTH_CODE_INDEX]; - if(*bp >= inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/ + if((*bp + numextrabits_l) > inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/ length += readBitsFromStream(bp, in, numextrabits_l); /*part 3: get distance code*/ @@ -1149,26 +1184,23 @@ static unsigned inflateHuffmanBlock(ucvector* out, const unsigned char* in, size /*part 4: get extra bits from distance*/ numextrabits_d = DISTANCEEXTRA[code_d]; - if(*bp >= inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/ - + if((*bp + numextrabits_d) > inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/ distance += readBitsFromStream(bp, in, numextrabits_d); /*part 5: fill in all the out[n] values based on the length and dist*/ start = (*pos); if(distance > start) ERROR_BREAK(52); /*too long backward distance*/ backward = start - distance; - if((*pos) + length >= out->size) - { - /*reserve more room at once*/ - if(!ucvector_resize(out, ((*pos) + length) * 2)) ERROR_BREAK(83 /*alloc fail*/); - } - for (forward = 0; forward < length; forward++) - { - out->data[(*pos)] = out->data[backward]; - (*pos)++; - backward++; - if(backward >= start) backward = start - distance; + if(!ucvector_resize(out, (*pos) + length)) ERROR_BREAK(83 /*alloc fail*/); + if (distance < length) { + for(forward = 0; forward < length; ++forward) + { + out->data[(*pos)++] = out->data[backward++]; + } + } else { + memcpy(out->data + *pos, out->data + backward, length); + *pos += length; } } else if(code_ll == 256) @@ -1179,7 +1211,7 @@ static unsigned inflateHuffmanBlock(ucvector* out, const unsigned char* in, size { /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol (10=no endcode, 11=wrong jump outside of tree)*/ - error = (*bp) > inlength * 8 ? 10 : 11; + error = ((*bp) > inlength * 8) ? 10 : 11; break; } } @@ -1192,28 +1224,26 @@ static unsigned inflateHuffmanBlock(ucvector* out, const unsigned char* in, size static unsigned inflateNoCompression(ucvector* out, const unsigned char* in, size_t* bp, size_t* pos, size_t inlength) { - /*go to first boundary of byte*/ size_t p; unsigned LEN, NLEN, n, error = 0; - while(((*bp) & 0x7) != 0) (*bp)++; + + /*go to first boundary of byte*/ + while(((*bp) & 0x7) != 0) ++(*bp); p = (*bp) / 8; /*byte position*/ /*read LEN (2 bytes) and NLEN (2 bytes)*/ - if(p >= inlength - 4) return 52; /*error, bit pointer will jump past memory*/ - LEN = in[p] + 256 * in[p + 1]; p += 2; - NLEN = in[p] + 256 * in[p + 1]; p += 2; + if(p + 4 >= inlength) return 52; /*error, bit pointer will jump past memory*/ + LEN = in[p] + 256u * in[p + 1]; p += 2; + NLEN = in[p] + 256u * in[p + 1]; p += 2; /*check if 16-bit NLEN is really the one's complement of LEN*/ if(LEN + NLEN != 65535) return 21; /*error: NLEN is not one's complement of LEN*/ - if((*pos) + LEN >= out->size) - { - if(!ucvector_resize(out, (*pos) + LEN)) return 83; /*alloc fail*/ - } + if(!ucvector_resize(out, (*pos) + LEN)) return 83; /*alloc fail*/ /*read the literal data: LEN bytes are now stored in the out buffer*/ if(p + LEN > inlength) return 23; /*error: reading outside of in buffer*/ - for (n = 0; n < LEN; n++) out->data[(*pos)++] = in[p++]; + for(n = 0; n < LEN; ++n) out->data[(*pos)++] = in[p++]; (*bp) = p * 8; @@ -1228,7 +1258,6 @@ static unsigned lodepng_inflatev(ucvector* out, size_t bp = 0; unsigned BFINAL = 0; size_t pos = 0; /*byte position in the out buffer*/ - unsigned error = 0; (void)settings; @@ -1238,8 +1267,8 @@ static unsigned lodepng_inflatev(ucvector* out, unsigned BTYPE; if(bp + 2 >= insize * 8) return 52; /*error, bit pointer will jump past memory*/ BFINAL = readBitFromStream(&bp, in); - BTYPE = 1 * readBitFromStream(&bp, in); - BTYPE += 2 * readBitFromStream(&bp, in); + BTYPE = 1u * readBitFromStream(&bp, in); + BTYPE += 2u * readBitFromStream(&bp, in); if(BTYPE == 3) return 20; /*error: invalid BTYPE*/ else if(BTYPE == 0) error = inflateNoCompression(out, in, &bp, &pos, insize); /*no compression*/ @@ -1248,9 +1277,6 @@ static unsigned lodepng_inflatev(ucvector* out, if(error) return error; } - /*Only now we know the true size of out, resize it to that*/ - if(!ucvector_resize(out, pos)) error = 83; /*alloc fail*/ - return error; } @@ -1301,21 +1327,17 @@ static void addHuffmanSymbol(size_t* bp, ucvector* compressed, unsigned code, un given array must be sorted (if no value is smaller, it returns the size of the given array)*/ static size_t searchCodeIndex(const unsigned* array, size_t array_size, size_t value) { - /*linear search implementation*/ - /*for (size_t i = 1; i < array_size; i++) if(array[i] > value) return i - 1; - return array_size - 1;*/ - - /*binary search implementation (not that much faster) (precondition: array_size > 0)*/ - size_t left = 1; + /*binary search (only small gain over linear). TODO: use CPU log2 instruction for getting symbols instead*/ + size_t left = 1; size_t right = array_size - 1; - while(left <= right) - { - size_t mid = (left + right) / 2; - if(array[mid] <= value) left = mid + 1; /*the value to find is more to the right*/ - else if(array[mid - 1] > value) right = mid - 1; /*the value to find is more to the left*/ - else return mid - 1; + + while(left <= right) { + size_t mid = (left + right) >> 1; + if (array[mid] >= value) right = mid - 1; + else left = mid + 1; } - return array_size - 1; + if(left >= array_size || array[left] > value) left--; + return left; } static void addLengthDistance(uivector* values, size_t length, size_t distance) @@ -1337,67 +1359,84 @@ static void addLengthDistance(uivector* values, size_t length, size_t distance) uivector_push_back(values, extra_distance); } +/*3 bytes of data get encoded into two bytes. The hash cannot use more than 3 +bytes as input because 3 is the minimum match length for deflate*/ static const unsigned HASH_NUM_VALUES = 65536; -static const unsigned HASH_NUM_CHARACTERS = 3; -static const unsigned HASH_SHIFT = 2; -/* -The HASH_NUM_CHARACTERS value is used to make encoding faster by using longer -sequences to generate a hash value from the stream bytes. Setting it to 3 -gives exactly the same compression as the brute force method, since deflate's -run length encoding starts with lengths of 3. Setting it to higher values, -like 6, can make the encoding faster (not always though!), but will cause the -encoding to miss any length between 3 and this value, so that the compression -may be worse (but this can vary too depending on the image, sometimes it is -even a bit better instead). -The HASH_NUM_VALUES is the amount of unique possible hash values that -combinations of bytes can give, the higher it is the more memory is needed, but -if it's too low the advantage of hashing is gone. -*/ +static const unsigned HASH_BIT_MASK = 65535; /*HASH_NUM_VALUES - 1, but C90 does not like that as initializer*/ typedef struct Hash { - int* head; /*hash value to head circular pos*/ - int* val; /*circular pos to hash value*/ + int* head; /*hash value to head circular pos - can be outdated if went around window*/ /*circular pos to prev circular pos*/ unsigned short* chain; - unsigned short* zeros; + int* val; /*circular pos to hash value*/ + + /*TODO: do this not only for zeros but for any repeated byte. However for PNG + it's always going to be the zeros that dominate, so not important for PNG*/ + int* headz; /*similar to head, but for chainz*/ + unsigned short* chainz; /*those with same amount of zeros*/ + unsigned short* zeros; /*length of zeros streak, used as a second hash chain*/ } Hash; static unsigned hash_init(Hash* hash, unsigned windowsize) { unsigned i; - hash->head = (int*)mymalloc(sizeof(int) * HASH_NUM_VALUES); - hash->val = (int*)mymalloc(sizeof(int) * windowsize); - hash->chain = (unsigned short*)mymalloc(sizeof(unsigned short) * windowsize); - hash->zeros = (unsigned short*)mymalloc(sizeof(unsigned short) * windowsize); + hash->head = (int*)lodepng_malloc(sizeof(int) * HASH_NUM_VALUES); + hash->val = (int*)lodepng_malloc(sizeof(int) * windowsize); + hash->chain = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize); + + hash->zeros = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize); + hash->headz = (int*)lodepng_malloc(sizeof(int) * (MAX_SUPPORTED_DEFLATE_LENGTH + 1)); + hash->chainz = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize); - if(!hash->head || !hash->val || !hash->chain || !hash->zeros) return 83; /*alloc fail*/ + if(!hash->head || !hash->chain || !hash->val || !hash->headz|| !hash->chainz || !hash->zeros) + { + return 83; /*alloc fail*/ + } /*initialize hash table*/ - for (i = 0; i < HASH_NUM_VALUES; i++) hash->head[i] = -1; - for (i = 0; i < windowsize; i++) hash->val[i] = -1; - for (i = 0; i < windowsize; i++) hash->chain[i] = i; /*same value as index indicates uninitialized*/ + for(i = 0; i != HASH_NUM_VALUES; ++i) hash->head[i] = -1; + for(i = 0; i != windowsize; ++i) hash->val[i] = -1; + for(i = 0; i != windowsize; ++i) hash->chain[i] = i; /*same value as index indicates uninitialized*/ + + for(i = 0; i <= MAX_SUPPORTED_DEFLATE_LENGTH; ++i) hash->headz[i] = -1; + for(i = 0; i != windowsize; ++i) hash->chainz[i] = i; /*same value as index indicates uninitialized*/ return 0; } static void hash_cleanup(Hash* hash) { - myfree(hash->head); - myfree(hash->val); - myfree(hash->chain); - myfree(hash->zeros); + lodepng_free(hash->head); + lodepng_free(hash->val); + lodepng_free(hash->chain); + + lodepng_free(hash->zeros); + lodepng_free(hash->headz); + lodepng_free(hash->chainz); } + + static unsigned getHash(const unsigned char* data, size_t size, size_t pos) { unsigned result = 0; - size_t amount, i; - if(pos >= size) return 0; - amount = HASH_NUM_CHARACTERS; - if(pos + amount >= size) amount = size - pos; - for (i = 0; i < amount; i++) result ^= (data[pos + i] << (i * HASH_SHIFT)); - return result % HASH_NUM_VALUES; + if(pos + 2 < size) + { + /*A simple shift and xor hash is used. Since the data of PNGs is dominated + by zeroes due to the filters, a better hash does not have a significant + effect on speed in traversing the chain, and causes more time spend on + calculating the hash.*/ + result ^= (unsigned)(data[pos + 0] << 0u); + result ^= (unsigned)(data[pos + 1] << 4u); + result ^= (unsigned)(data[pos + 2] << 8u); + } else { + size_t amount, i; + if(pos >= size) return 0; + amount = size - pos; + for(i = 0; i != amount; ++i) result ^= (unsigned)(data[pos + i] << (i * 8u)); + } + return result & HASH_BIT_MASK; } static unsigned countZeros(const unsigned char* data, size_t size, size_t pos) @@ -1406,26 +1445,22 @@ static unsigned countZeros(const unsigned char* data, size_t size, size_t pos) const unsigned char* end = start + MAX_SUPPORTED_DEFLATE_LENGTH; if(end > data + size) end = data + size; data = start; - while (data != end && *data == 0) data++; - return data - start; + while(data != end && *data == 0) ++data; + /*subtracting two addresses returned as 32-bit number (max value is MAX_SUPPORTED_DEFLATE_LENGTH)*/ + return (unsigned)(data - start); } -static void updateHashChain(Hash* hash, - size_t pos, int hashval, unsigned windowsize) +/*wpos = pos & (windowsize - 1)*/ +static void updateHashChain(Hash* hash, size_t wpos, unsigned hashval, unsigned short numzeros) { - unsigned wpos = pos % windowsize; - hash->val[wpos] = hashval; - if(hash->head[hashval] != -1) - { - hash->chain[wpos] = hash->head[hashval]; - } + hash->val[wpos] = (int)hashval; + if(hash->head[hashval] != -1) hash->chain[wpos] = hash->head[hashval]; hash->head[hashval] = wpos; -} -/*Enable to use lazy instead of greedy matching. It looks one byte further -to see if that one gives a longer distance. This gives slightly better compression, at the cost -of a speed loss.*/ -#define LAZY_MATCHING + hash->zeros[wpos] = numzeros; + if(hash->headz[numzeros] != -1) hash->chainz[wpos] = hash->headz[numzeros]; + hash->headz[numzeros] = wpos; +} /* LZ77-encode the data. Return value is error code. The input are raw bytes, the output @@ -1437,106 +1472,124 @@ the "dictionary". A brute force search through all possible distances would be s this hash technique is one out of several ways to speed this up. */ static unsigned encodeLZ77(uivector* out, Hash* hash, - const unsigned char* in, size_t inpos, size_t insize, unsigned windowsize) + const unsigned char* in, size_t inpos, size_t insize, unsigned windowsize, + unsigned minmatch, unsigned nicematch, unsigned lazymatching) { - unsigned short numzeros = 0; - int usezeros = windowsize >= 8192; /*for small window size, the 'max chain length' optimization does a better job*/ - unsigned pos, i, error = 0; + size_t pos; + unsigned i, error = 0; + /*for large window lengths, assume the user wants no compression loss. Otherwise, max hash chain length speedup.*/ + unsigned maxchainlength = windowsize >= 8192 ? windowsize : windowsize / 8; + unsigned maxlazymatch = windowsize >= 8192 ? MAX_SUPPORTED_DEFLATE_LENGTH : 64; - if(!error) + unsigned usezeros = 1; /*not sure if setting it to false for windowsize < 8192 is better or worse*/ + unsigned numzeros = 0; + + unsigned offset; /*the offset represents the distance in LZ77 terminology*/ + unsigned length; + unsigned lazy = 0; + unsigned lazylength = 0, lazyoffset = 0; + unsigned hashval; + unsigned current_offset, current_length; + unsigned prev_offset; + const unsigned char *lastptr, *foreptr, *backptr; + unsigned hashpos; + + if(windowsize == 0 || windowsize > 32768) return 60; /*error: windowsize smaller/larger than allowed*/ + if((windowsize & (windowsize - 1)) != 0) return 90; /*error: must be power of two*/ + + if(nicematch > MAX_SUPPORTED_DEFLATE_LENGTH) nicematch = MAX_SUPPORTED_DEFLATE_LENGTH; + + for(pos = inpos; pos < insize; ++pos) { - unsigned offset; /*the offset represents the distance in LZ77 terminology*/ - unsigned length; -#ifdef LAZY_MATCHING - unsigned lazy = 0; - unsigned lazylength = 0, lazyoffset = 0; -#endif /*LAZY_MATCHING*/ - unsigned hashval; - unsigned current_offset, current_length; - const unsigned char *lastptr, *foreptr, *backptr; - unsigned short hashpos, prevpos; + size_t wpos = pos & (windowsize - 1); /*position for in 'circular' hash buffers*/ + unsigned chainlength = 0; + + hashval = getHash(in, insize, pos); - for (pos = inpos; pos < insize; pos++) + if(usezeros && hashval == 0) + { + if(numzeros == 0) numzeros = countZeros(in, insize, pos); + else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros; + } + else { - size_t wpos = pos % windowsize; /*position for in 'circular' hash buffers*/ + numzeros = 0; + } - hashval = getHash(in, insize, pos); - updateHashChain(hash, pos, hashval, windowsize); + updateHashChain(hash, wpos, hashval, numzeros); - if(usezeros && hashval == 0) - { - numzeros = countZeros(in, insize, pos); - hash->zeros[wpos] = numzeros; - } + /*the length and offset found for the current position*/ + length = 0; + offset = 0; - /*the length and offset found for the current position*/ - length = 0; - offset = 0; + hashpos = hash->chain[wpos]; - prevpos = hash->head[hashval]; - hashpos = hash->chain[prevpos]; + lastptr = &in[insize < pos + MAX_SUPPORTED_DEFLATE_LENGTH ? insize : pos + MAX_SUPPORTED_DEFLATE_LENGTH]; - lastptr = &in[insize < pos + MAX_SUPPORTED_DEFLATE_LENGTH ? insize : pos + MAX_SUPPORTED_DEFLATE_LENGTH]; + /*search for the longest string*/ + prev_offset = 0; + for(;;) + { + if(chainlength++ >= maxchainlength) break; + current_offset = hashpos <= wpos ? wpos - hashpos : wpos - hashpos + windowsize; - /*search for the longest string*/ - if(hash->val[wpos] == (int)hashval) + if(current_offset < prev_offset) break; /*stop when went completely around the circular buffer*/ + prev_offset = current_offset; + if(current_offset > 0) { - /*for large window lengths, assume the user wants no compression loss. Otherwise, max hash chain length speedup.*/ - unsigned maxchainlength = windowsize >= 8192 ? windowsize : windowsize / 8; - for (;;) - { - /*stop when went completely around the circular buffer*/ - if(prevpos < wpos && hashpos > prevpos && hashpos <= wpos) break; - if(prevpos > wpos && (hashpos <= wpos || hashpos > prevpos)) break; - if(maxchainlength-- == 0) break; - - current_offset = hashpos <= wpos ? wpos - hashpos : wpos - hashpos + windowsize; - if(current_offset > 0) - { - /*test the next characters*/ - foreptr = &in[pos]; - backptr = &in[pos - current_offset]; + /*test the next characters*/ + foreptr = &in[pos]; + backptr = &in[pos - current_offset]; - /*common case in PNGs is lots of zeros. Quickly skip over them as a speedup*/ - if(usezeros && hashval == 0 && hash->val[hashpos] == 0 /*hashval[hashpos] may be out of date*/) - { - unsigned short skip = hash->zeros[hashpos]; - if(skip > numzeros) skip = numzeros; - backptr += skip; - foreptr += skip; - } + /*common case in PNGs is lots of zeros. Quickly skip over them as a speedup*/ + if(numzeros >= 3) + { + unsigned skip = hash->zeros[hashpos]; + if(skip > numzeros) skip = numzeros; + backptr += skip; + foreptr += skip; + } - /* multiple checks at once per array bounds check */ - while(foreptr != lastptr && *backptr == *foreptr) /*maximum supported length by deflate is max length*/ - { - ++backptr; - ++foreptr; - } - current_length = (unsigned)(foreptr - &in[pos]); + while(foreptr != lastptr && *backptr == *foreptr) /*maximum supported length by deflate is max length*/ + { + ++backptr; + ++foreptr; + } + current_length = (unsigned)(foreptr - &in[pos]); - if(current_length > length) - { - length = current_length; /*the longest length*/ - offset = current_offset; /*the offset that is related to this longest length*/ - /*jump out once a length of max length is found (speed gain)*/ - if(current_length == MAX_SUPPORTED_DEFLATE_LENGTH) break; - } - } + if(current_length > length) + { + length = current_length; /*the longest length*/ + offset = current_offset; /*the offset that is related to this longest length*/ + /*jump out once a length of max length is found (speed gain). This also jumps + out if length is MAX_SUPPORTED_DEFLATE_LENGTH*/ + if(current_length >= nicematch) break; + } + } - if(hashpos == hash->chain[hashpos]) break; + if(hashpos == hash->chain[hashpos]) break; - prevpos = hashpos; - hashpos = hash->chain[hashpos]; - } + if(numzeros >= 3 && length > numzeros) + { + hashpos = hash->chainz[hashpos]; + if(hash->zeros[hashpos] != numzeros) break; } + else + { + hashpos = hash->chain[hashpos]; + /*outdated hash value, happens if particular value was not encountered in whole last window*/ + if(hash->val[hashpos] != (int)hashval) break; + } + } -#ifdef LAZY_MATCHING - if(!lazy && length >= 3 && length < MAX_SUPPORTED_DEFLATE_LENGTH) + if(lazymatching) + { + if(!lazy && length >= 3 && length <= maxlazymatch && length < MAX_SUPPORTED_DEFLATE_LENGTH) { lazy = 1; lazylength = length; lazyoffset = offset; - continue; + continue; /*try the next byte*/ } if(lazy) { @@ -1552,47 +1605,45 @@ static unsigned encodeLZ77(uivector* out, Hash* hash, length = lazylength; offset = lazyoffset; hash->head[hashval] = -1; /*the same hashchain update will be done, this ensures no wrong alteration*/ - pos--; + hash->headz[numzeros] = -1; /*idem*/ + --pos; } } -#endif /*LAZY_MATCHING*/ - - if(length >= 3 && offset > windowsize) ERROR_BREAK(86 /*too big (or overflown negative) offset*/); - + } + if(length >= 3 && offset > windowsize) ERROR_BREAK(86 /*too big (or overflown negative) offset*/); - /**encode it as length/distance pair or literal value**/ - if(length < 3) /*only lengths of 3 or higher are supported as length/distance pair*/ - { - if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/); - } - else + /*encode it as length/distance pair or literal value*/ + if(length < 3) /*only lengths of 3 or higher are supported as length/distance pair*/ + { + if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/); + } + else if(length < minmatch || (length == 3 && offset > 4096)) + { + /*compensate for the fact that longer offsets have more extra bits, a + length of only 3 may be not worth it then*/ + if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/); + } + else + { + addLengthDistance(out, length, offset); + for(i = 1; i < length; ++i) { - if(length == 3 && offset > 2048) + ++pos; + wpos = pos & (windowsize - 1); + hashval = getHash(in, insize, pos); + if(usezeros && hashval == 0) { - /*compensate for the fact that longer offsets have more extra bits, a - length of only 3 may be not worth it then*/ - if(!uivector_push_back(out, in[pos + 0])) ERROR_BREAK(83 /*alloc fail*/); - if(!uivector_push_back(out, in[pos + 1])) ERROR_BREAK(83 /*alloc fail*/); - if(!uivector_push_back(out, in[pos + 2])) ERROR_BREAK(83 /*alloc fail*/); + if(numzeros == 0) numzeros = countZeros(in, insize, pos); + else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros; } else { - addLengthDistance(out, length, offset); - } - for (i = 1; i < length; i++) - { - pos++; - hashval = getHash(in, insize, pos); - updateHashChain(hash, pos, hashval, windowsize); - if(usezeros && hashval == 0) - { - hash->zeros[pos % windowsize] = countZeros(in, insize, pos); - } + numzeros = 0; } + updateHashChain(hash, wpos, hashval, numzeros); } - - } /*end of the loop through each character of input*/ - } /*end of "if(!error)"*/ + } + } /*end of the loop through each character of input*/ return error; } @@ -1606,7 +1657,7 @@ static unsigned deflateNoCompression(ucvector* out, const unsigned char* data, s size_t i, j, numdeflateblocks = (datasize + 65534) / 65535; unsigned datapos = 0; - for (i = 0; i < numdeflateblocks; i++) + for(i = 0; i != numdeflateblocks; ++i) { unsigned BFINAL, BTYPE, LEN, NLEN; unsigned char firstbyte; @@ -1621,13 +1672,13 @@ static unsigned deflateNoCompression(ucvector* out, const unsigned char* data, s if(datasize - datapos < 65535) LEN = (unsigned)datasize - datapos; NLEN = 65535 - LEN; - ucvector_push_back(out, (unsigned char)(LEN % 256)); - ucvector_push_back(out, (unsigned char)(LEN / 256)); - ucvector_push_back(out, (unsigned char)(NLEN % 256)); - ucvector_push_back(out, (unsigned char)(NLEN / 256)); + ucvector_push_back(out, (unsigned char)(LEN & 255)); + ucvector_push_back(out, (unsigned char)(LEN >> 8)); + ucvector_push_back(out, (unsigned char)(NLEN & 255)); + ucvector_push_back(out, (unsigned char)(NLEN >> 8)); /*Decompressed data*/ - for (j = 0; j < 65535 && datapos < datasize; j++) + for(j = 0; j < 65535 && datapos < datasize; ++j) { ucvector_push_back(out, data[datapos++]); } @@ -1645,7 +1696,7 @@ static void writeLZ77data(size_t* bp, ucvector* out, const uivector* lz77_encode const HuffmanTree* tree_ll, const HuffmanTree* tree_d) { size_t i = 0; - for (i = 0; i < lz77_encoded->size; i++) + for(i = 0; i != lz77_encoded->size; ++i) { unsigned val = lz77_encoded->data[i]; addHuffmanSymbol(bp, out, HuffmanTree_getCode(tree_ll, val), HuffmanTree_getLength(tree_ll, val)); @@ -1672,7 +1723,7 @@ static void writeLZ77data(size_t* bp, ucvector* out, const uivector* lz77_encode /*Deflate for a block of type "dynamic", that is, with freely, optimally, created huffman trees*/ static unsigned deflateDynamic(ucvector* out, size_t* bp, Hash* hash, const unsigned char* data, size_t datapos, size_t dataend, - const LodePNGCompressSettings* settings, int final) + const LodePNGCompressSettings* settings, unsigned final) { unsigned error = 0; @@ -1725,60 +1776,62 @@ static unsigned deflateDynamic(ucvector* out, size_t* bp, Hash* hash, uivector_init(&bitlen_lld_e); uivector_init(&bitlen_cl); - /*This while loop is never loops due to a break at the end, it is here to + /*This while loop never loops due to a break at the end, it is here to allow breaking out of it to the cleanup phase on error conditions.*/ while(!error) { if(settings->use_lz77) { - error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize); /*LZ77 encoded*/ + error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize, + settings->minmatch, settings->nicematch, settings->lazymatching); if(error) break; } else { if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83 /*alloc fail*/); - for (i = datapos; i < dataend; i++) lz77_encoded.data[i] = data[i]; /*no LZ77, but still will be Huffman compressed*/ + for(i = datapos; i < dataend; ++i) lz77_encoded.data[i - datapos] = data[i]; /*no LZ77, but still will be Huffman compressed*/ } if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83 /*alloc fail*/); if(!uivector_resizev(&frequencies_d, 30, 0)) ERROR_BREAK(83 /*alloc fail*/); /*Count the frequencies of lit, len and dist codes*/ - for (i = 0; i < lz77_encoded.size; i++) + for(i = 0; i != lz77_encoded.size; ++i) { unsigned symbol = lz77_encoded.data[i]; - frequencies_ll.data[symbol]++; + ++frequencies_ll.data[symbol]; if(symbol > 256) { unsigned dist = lz77_encoded.data[i + 2]; - frequencies_d.data[dist]++; + ++frequencies_d.data[dist]; i += 3; } } frequencies_ll.data[256] = 1; /*there will be exactly 1 end code, at the end of the block*/ /*Make both huffman trees, one for the lit and len codes, one for the dist codes*/ - error = HuffmanTree_makeFromFrequencies(&tree_ll, frequencies_ll.data, frequencies_ll.size, 15); + error = HuffmanTree_makeFromFrequencies(&tree_ll, frequencies_ll.data, 257, frequencies_ll.size, 15); if(error) break; - error = HuffmanTree_makeFromFrequencies(&tree_d, frequencies_d.data, frequencies_d.size, 15); + /*2, not 1, is chosen for mincodes: some buggy PNG decoders require at least 2 symbols in the dist tree*/ + error = HuffmanTree_makeFromFrequencies(&tree_d, frequencies_d.data, 2, frequencies_d.size, 15); if(error) break; numcodes_ll = tree_ll.numcodes; if(numcodes_ll > 286) numcodes_ll = 286; numcodes_d = tree_d.numcodes; if(numcodes_d > 30) numcodes_d = 30; /*store the code lengths of both generated trees in bitlen_lld*/ - for (i = 0; i < numcodes_ll; i++) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_ll, (unsigned)i)); - for (i = 0; i < numcodes_d; i++) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_d, (unsigned)i)); + for(i = 0; i != numcodes_ll; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_ll, (unsigned)i)); + for(i = 0; i != numcodes_d; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_d, (unsigned)i)); /*run-length compress bitlen_ldd into bitlen_lld_e by using repeat codes 16 (copy length 3-6 times), 17 (3-10 zeroes), 18 (11-138 zeroes)*/ - for (i = 0; i < (unsigned)bitlen_lld.size; i++) + for(i = 0; i != (unsigned)bitlen_lld.size; ++i) { unsigned j = 0; /*amount of repititions*/ - while(i + j + 1 < (unsigned)bitlen_lld.size && bitlen_lld.data[i + j + 1] == bitlen_lld.data[i]) j++; + while(i + j + 1 < (unsigned)bitlen_lld.size && bitlen_lld.data[i + j + 1] == bitlen_lld.data[i]) ++j; if(bitlen_lld.data[i] == 0 && j >= 2) /*repeat code for zeroes*/ { - j++; /*include the first zero*/ + ++j; /*include the first zero*/ if(j <= 10) /*repeat code 17 supports max 10 zeroes*/ { uivector_push_back(&bitlen_lld_e, 17); @@ -1797,7 +1850,7 @@ static unsigned deflateDynamic(ucvector* out, size_t* bp, Hash* hash, size_t k; unsigned num = j / 6, rest = j % 6; uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]); - for (k = 0; k < num; k++) + for(k = 0; k < num; ++k) { uivector_push_back(&bitlen_lld_e, 16); uivector_push_back(&bitlen_lld_e, 6 - 3); @@ -1819,19 +1872,20 @@ static unsigned deflateDynamic(ucvector* out, size_t* bp, Hash* hash, /*generate tree_cl, the huffmantree of huffmantrees*/ if(!uivector_resizev(&frequencies_cl, NUM_CODE_LENGTH_CODES, 0)) ERROR_BREAK(83 /*alloc fail*/); - for (i = 0; i < bitlen_lld_e.size; i++) + for(i = 0; i != bitlen_lld_e.size; ++i) { - frequencies_cl.data[bitlen_lld_e.data[i]]++; + ++frequencies_cl.data[bitlen_lld_e.data[i]]; /*after a repeat code come the bits that specify the number of repetitions, those don't need to be in the frequencies_cl calculation*/ - if(bitlen_lld_e.data[i] >= 16) i++; + if(bitlen_lld_e.data[i] >= 16) ++i; } - error = HuffmanTree_makeFromFrequencies(&tree_cl, frequencies_cl.data, frequencies_cl.size, 7); + error = HuffmanTree_makeFromFrequencies(&tree_cl, frequencies_cl.data, + frequencies_cl.size, frequencies_cl.size, 7); if(error) break; - if(!uivector_resize(&bitlen_cl, NUM_CODE_LENGTH_CODES)) ERROR_BREAK(83 /*alloc fail*/); - for (i = 0; i < NUM_CODE_LENGTH_CODES; i++) + if(!uivector_resize(&bitlen_cl, tree_cl.numcodes)) ERROR_BREAK(83 /*alloc fail*/); + for(i = 0; i != tree_cl.numcodes; ++i) { /*lenghts of code length tree is in the order as specified by deflate*/ bitlen_cl.data[i] = HuffmanTree_getLength(&tree_cl, CLCL_ORDER[i]); @@ -1866,15 +1920,17 @@ static unsigned deflateDynamic(ucvector* out, size_t* bp, Hash* hash, HLIT = (unsigned)(numcodes_ll - 257); HDIST = (unsigned)(numcodes_d - 1); HCLEN = (unsigned)bitlen_cl.size - 4; + /*trim zeroes for HCLEN. HLIT and HDIST were already trimmed at tree creation*/ + while(!bitlen_cl.data[HCLEN + 4 - 1] && HCLEN > 0) --HCLEN; addBitsToStream(bp, out, HLIT, 5); addBitsToStream(bp, out, HDIST, 5); addBitsToStream(bp, out, HCLEN, 4); /*write the code lenghts of the code length alphabet*/ - for (i = 0; i < HCLEN + 4; i++) addBitsToStream(bp, out, bitlen_cl.data[i], 3); + for(i = 0; i != HCLEN + 4; ++i) addBitsToStream(bp, out, bitlen_cl.data[i], 3); /*write the lenghts of the lit/len AND the dist alphabet*/ - for (i = 0; i < bitlen_lld_e.size; i++) + for(i = 0; i != bitlen_lld_e.size; ++i) { addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_cl, bitlen_lld_e.data[i]), HuffmanTree_getLength(&tree_cl, bitlen_lld_e.data[i])); @@ -1913,7 +1969,7 @@ static unsigned deflateDynamic(ucvector* out, size_t* bp, Hash* hash, static unsigned deflateFixed(ucvector* out, size_t* bp, Hash* hash, const unsigned char* data, size_t datapos, size_t dataend, - const LodePNGCompressSettings* settings, int final) + const LodePNGCompressSettings* settings, unsigned final) { HuffmanTree tree_ll; /*tree for literal values and length codes*/ HuffmanTree tree_d; /*tree for distance codes*/ @@ -1936,13 +1992,14 @@ static unsigned deflateFixed(ucvector* out, size_t* bp, Hash* hash, { uivector lz77_encoded; uivector_init(&lz77_encoded); - error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize); + error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize, + settings->minmatch, settings->nicematch, settings->lazymatching); if(!error) writeLZ77data(bp, out, &lz77_encoded, &tree_ll, &tree_d); uivector_cleanup(&lz77_encoded); } else /*no LZ77, but still will be Huffman compressed*/ { - for (i = datapos; i < dataend; i++) + for(i = datapos; i < dataend; ++i) { addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, data[i]), HuffmanTree_getLength(&tree_ll, data[i])); } @@ -1966,14 +2023,14 @@ static unsigned lodepng_deflatev(ucvector* out, const unsigned char* in, size_t Hash hash; if(settings->btype > 2) return 61; - - if(settings->btype == 0) return deflateNoCompression(out, in, insize); - - if(settings->btype == 1) blocksize = insize; + else if(settings->btype == 0) return deflateNoCompression(out, in, insize); + else if(settings->btype == 1) blocksize = insize; else /*if(settings->btype == 2)*/ { + /*on PNGs, deflate blocks of 65-262k seem to give most dense encoding*/ blocksize = insize / 8 + 8; - if(blocksize < 65535) blocksize = 65535; + if(blocksize < 65536) blocksize = 65536; + if(blocksize > 262144) blocksize = 262144; } numdeflateblocks = (insize + blocksize - 1) / blocksize; @@ -1982,9 +2039,9 @@ static unsigned lodepng_deflatev(ucvector* out, const unsigned char* in, size_t error = hash_init(&hash, settings->windowsize); if(error) return error; - for (i = 0; i < numdeflateblocks && !error; i++) + for(i = 0; i != numdeflateblocks && !error; ++i) { - int final = i == numdeflateblocks - 1; + unsigned final = (i == numdeflateblocks - 1); size_t start = i * blocksize; size_t end = start + blocksize; if(end > insize) end = insize; @@ -2043,9 +2100,9 @@ static unsigned update_adler32(unsigned adler, const unsigned char* data, unsign len -= amount; while(amount > 0) { - s1 = (s1 + *data++); - s2 = (s2 + s1); - amount--; + s1 += (*data++); + s2 += s1; + --amount; } s1 %= 65521; s2 %= 65521; @@ -2131,7 +2188,7 @@ static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsi unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in, size_t insize, const LodePNGCompressSettings* settings) { - /*initially, *out must be nullptr and outsize 0, if you just give some random *out + /*initially, *out must be NULL and outsize 0, if you just give some random *out that's pointing to a non allocated buffer, this'll crash*/ ucvector outv; size_t i; @@ -2139,7 +2196,6 @@ unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, const unsig unsigned char* deflatedata = 0; size_t deflatesize = 0; - unsigned ADLER32; /*zlib data: 1 byte CMF (CM+CINFO), 1 byte FLG, deflate data, 4 byte ADLER32 checksum of the Decompressed data*/ unsigned CMF = 120; /*0b01111000: CM 8, CINFO 7. With CINFO 7, any window size up to 32768 can be used.*/ unsigned FLEVEL = 0; @@ -2151,16 +2207,16 @@ unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, const unsig /*ucvector-controlled version of the output buffer, for dynamic array*/ ucvector_init_buffer(&outv, *out, *outsize); - ucvector_push_back(&outv, (unsigned char)(CMFFLG / 256)); - ucvector_push_back(&outv, (unsigned char)(CMFFLG % 256)); + ucvector_push_back(&outv, (unsigned char)(CMFFLG >> 8)); + ucvector_push_back(&outv, (unsigned char)(CMFFLG & 255)); error = deflate(&deflatedata, &deflatesize, in, insize, settings); if(!error) { - ADLER32 = adler32(in, (unsigned)insize); - for (i = 0; i < deflatesize; i++) ucvector_push_back(&outv, deflatedata[i]); - free(deflatedata); + unsigned ADLER32 = adler32(in, (unsigned)insize); + for(i = 0; i != deflatesize; ++i) ucvector_push_back(&outv, deflatedata[i]); + lodepng_free(deflatedata); lodepng_add32bitInt(&outv, ADLER32); } @@ -2192,7 +2248,7 @@ static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsign static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in, size_t insize, const LodePNGDecompressSettings* settings) { - if (!settings->custom_zlib) return 87; /*no custom zlib function provided */ + if(!settings->custom_zlib) return 87; /*no custom zlib function provided */ return settings->custom_zlib(out, outsize, in, insize, settings); } #endif /*LODEPNG_COMPILE_DECODER*/ @@ -2200,7 +2256,7 @@ static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsi static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in, size_t insize, const LodePNGCompressSettings* settings) { - if (!settings->custom_zlib) return 87; /*no custom zlib function provided */ + if(!settings->custom_zlib) return 87; /*no custom zlib function provided */ return settings->custom_zlib(out, outsize, in, insize, settings); } #endif /*LODEPNG_COMPILE_ENCODER*/ @@ -2220,13 +2276,16 @@ void lodepng_compress_settings_init(LodePNGCompressSettings* settings) settings->btype = 2; settings->use_lz77 = 1; settings->windowsize = DEFAULT_WINDOWSIZE; + settings->minmatch = 3; + settings->nicematch = 128; + settings->lazymatching = 1; settings->custom_zlib = 0; settings->custom_deflate = 0; settings->custom_context = 0; } -const LodePNGCompressSettings lodepng_default_compress_settings = {2, 1, DEFAULT_WINDOWSIZE, 0, 0, 0}; +const LodePNGCompressSettings lodepng_default_compress_settings = {2, 1, DEFAULT_WINDOWSIZE, 3, 128, 1, 0, 0, 0}; #endif /*LODEPNG_COMPILE_ENCODER*/ @@ -2258,47 +2317,58 @@ const LodePNGDecompressSettings lodepng_default_decompress_settings = {0, 0, 0, /* / CRC32 / */ /* ////////////////////////////////////////////////////////////////////////// */ -static unsigned Crc32_crc_table_computed = 0; -static unsigned Crc32_crc_table[256]; -/*Make the table for a fast CRC.*/ -static void Crc32_make_crc_table(void) -{ - unsigned c, k, n; - for (n = 0; n < 256; n++) - { - c = n; - for (k = 0; k < 8; k++) - { - if(c & 1) c = 0xedb88320L ^ (c >> 1); - else c = c >> 1; - } - Crc32_crc_table[n] = c; - } - Crc32_crc_table_computed = 1; -} +#ifndef LODEPNG_NO_COMPILE_CRC +/* CRC polynomial: 0xedb88320 */ +static unsigned lodepng_crc32_table[256] = { + 0u, 1996959894u, 3993919788u, 2567524794u, 124634137u, 1886057615u, 3915621685u, 2657392035u, + 249268274u, 2044508324u, 3772115230u, 2547177864u, 162941995u, 2125561021u, 3887607047u, 2428444049u, + 498536548u, 1789927666u, 4089016648u, 2227061214u, 450548861u, 1843258603u, 4107580753u, 2211677639u, + 325883990u, 1684777152u, 4251122042u, 2321926636u, 335633487u, 1661365465u, 4195302755u, 2366115317u, + 997073096u, 1281953886u, 3579855332u, 2724688242u, 1006888145u, 1258607687u, 3524101629u, 2768942443u, + 901097722u, 1119000684u, 3686517206u, 2898065728u, 853044451u, 1172266101u, 3705015759u, 2882616665u, + 651767980u, 1373503546u, 3369554304u, 3218104598u, 565507253u, 1454621731u, 3485111705u, 3099436303u, + 671266974u, 1594198024u, 3322730930u, 2970347812u, 795835527u, 1483230225u, 3244367275u, 3060149565u, + 1994146192u, 31158534u, 2563907772u, 4023717930u, 1907459465u, 112637215u, 2680153253u, 3904427059u, + 2013776290u, 251722036u, 2517215374u, 3775830040u, 2137656763u, 141376813u, 2439277719u, 3865271297u, + 1802195444u, 476864866u, 2238001368u, 4066508878u, 1812370925u, 453092731u, 2181625025u, 4111451223u, + 1706088902u, 314042704u, 2344532202u, 4240017532u, 1658658271u, 366619977u, 2362670323u, 4224994405u, + 1303535960u, 984961486u, 2747007092u, 3569037538u, 1256170817u, 1037604311u, 2765210733u, 3554079995u, + 1131014506u, 879679996u, 2909243462u, 3663771856u, 1141124467u, 855842277u, 2852801631u, 3708648649u, + 1342533948u, 654459306u, 3188396048u, 3373015174u, 1466479909u, 544179635u, 3110523913u, 3462522015u, + 1591671054u, 702138776u, 2966460450u, 3352799412u, 1504918807u, 783551873u, 3082640443u, 3233442989u, + 3988292384u, 2596254646u, 62317068u, 1957810842u, 3939845945u, 2647816111u, 81470997u, 1943803523u, + 3814918930u, 2489596804u, 225274430u, 2053790376u, 3826175755u, 2466906013u, 167816743u, 2097651377u, + 4027552580u, 2265490386u, 503444072u, 1762050814u, 4150417245u, 2154129355u, 426522225u, 1852507879u, + 4275313526u, 2312317920u, 282753626u, 1742555852u, 4189708143u, 2394877945u, 397917763u, 1622183637u, + 3604390888u, 2714866558u, 953729732u, 1340076626u, 3518719985u, 2797360999u, 1068828381u, 1219638859u, + 3624741850u, 2936675148u, 906185462u, 1090812512u, 3747672003u, 2825379669u, 829329135u, 1181335161u, + 3412177804u, 3160834842u, 628085408u, 1382605366u, 3423369109u, 3138078467u, 570562233u, 1426400815u, + 3317316542u, 2998733608u, 733239954u, 1555261956u, 3268935591u, 3050360625u, 752459403u, 1541320221u, + 2607071920u, 3965973030u, 1969922972u, 40735498u, 2617837225u, 3943577151u, 1913087877u, 83908371u, + 2512341634u, 3803740692u, 2075208622u, 213261112u, 2463272603u, 3855990285u, 2094854071u, 198958881u, + 2262029012u, 4057260610u, 1759359992u, 534414190u, 2176718541u, 4139329115u, 1873836001u, 414664567u, + 2282248934u, 4279200368u, 1711684554u, 285281116u, 2405801727u, 4167216745u, 1634467795u, 376229701u, + 2685067896u, 3608007406u, 1308918612u, 956543938u, 2808555105u, 3495958263u, 1231636301u, 1047427035u, + 2932959818u, 3654703836u, 1088359270u, 936918000u, 2847714899u, 3736837829u, 1202900863u, 817233897u, + 3183342108u, 3401237130u, 1404277552u, 615818150u, 3134207493u, 3453421203u, 1423857449u, 601450431u, + 3009837614u, 3294710456u, 1567103746u, 711928724u, 3020668471u, 3272380065u, 1510334235u, 755167117u +}; -/*Update a running CRC with the bytes buf[0..len-1]--the CRC should be -initialized to all 1's, and the transmitted value is the 1's complement of the -final running CRC (see the crc() routine below).*/ -static unsigned Crc32_update_crc(const unsigned char* buf, unsigned crc, size_t len) +/*Return the CRC of the bytes buf[0..len-1].*/ +unsigned lodepng_crc32(const unsigned char* data, size_t length) { - unsigned c = crc; - size_t n; - - if(!Crc32_crc_table_computed) Crc32_make_crc_table(); - for (n = 0; n < len; n++) + unsigned r = 0xffffffffu; + size_t i; + for(i = 0; i < length; ++i) { - c = Crc32_crc_table[(c ^ buf[n]) & 0xff] ^ (c >> 8); + r = lodepng_crc32_table[(r ^ data[i]) & 0xff] ^ (r >> 8); } - return c; -} - -/*Return the CRC of the bytes buf[0..len-1].*/ -unsigned lodepng_crc32(const unsigned char* buf, size_t len) -{ - return Crc32_update_crc(buf, 0xffffffffL, len) ^ 0xffffffffL; + return r ^ 0xffffffffu; } +#else /* !LODEPNG_NO_COMPILE_CRC */ +unsigned lodepng_crc32(const unsigned char* data, size_t length); +#endif /* !LODEPNG_NO_COMPILE_CRC */ /* ////////////////////////////////////////////////////////////////////////// */ /* / Reading and writing single bits and bytes from/to stream for LodePNG / */ @@ -2307,7 +2377,7 @@ unsigned lodepng_crc32(const unsigned char* buf, size_t len) static unsigned char readBitFromReversedStream(size_t* bitpointer, const unsigned char* bitstream) { unsigned char result = (unsigned char)((bitstream[(*bitpointer) >> 3] >> (7 - ((*bitpointer) & 0x7))) & 1); - (*bitpointer)++; + ++(*bitpointer); return result; } @@ -2315,9 +2385,10 @@ static unsigned readBitsFromReversedStream(size_t* bitpointer, const unsigned ch { unsigned result = 0; size_t i; - for (i = nbits - 1; i < nbits; i--) + for(i = 0 ; i < nbits; ++i) { - result += (unsigned)readBitFromReversedStream(bitpointer, bitstream) << i; + result <<= 1; + result |= (unsigned)readBitFromReversedStream(bitpointer, bitstream); } return result; } @@ -2331,7 +2402,7 @@ static void setBitOfReversedStream0(size_t* bitpointer, unsigned char* bitstream /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/ bitstream[(*bitpointer) >> 3] |= (bit << (7 - ((*bitpointer) & 0x7))); } - (*bitpointer)++; + ++(*bitpointer); } #endif /*LODEPNG_COMPILE_DECODER*/ @@ -2340,7 +2411,7 @@ static void setBitOfReversedStream(size_t* bitpointer, unsigned char* bitstream, /*the current bit in bitstream may be 0 or 1 for this to work*/ if(bit == 0) bitstream[(*bitpointer) >> 3] &= (unsigned char)(~(1 << (7 - ((*bitpointer) & 0x7)))); else bitstream[(*bitpointer) >> 3] |= (1 << (7 - ((*bitpointer) & 0x7))); - (*bitpointer)++; + ++(*bitpointer); } /* ////////////////////////////////////////////////////////////////////////// */ @@ -2355,7 +2426,7 @@ unsigned lodepng_chunk_length(const unsigned char* chunk) void lodepng_chunk_type(char type[5], const unsigned char* chunk) { unsigned i; - for (i = 0; i < 4; i++) type[i] = chunk[4 + i]; + for(i = 0; i != 4; ++i) type[i] = (char)chunk[4 + i]; type[4] = 0; /*null termination char*/ } @@ -2427,13 +2498,13 @@ unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsi size_t new_length = (*outlength) + total_chunk_length; if(new_length < total_chunk_length || new_length < (*outlength)) return 77; /*integer overflow happened*/ - new_buffer = (unsigned char*)myrealloc(*out, new_length); + new_buffer = (unsigned char*)lodepng_realloc(*out, new_length); if(!new_buffer) return 83; /*alloc fail*/ (*out) = new_buffer; (*outlength) = new_length; chunk_start = &(*out)[new_length - total_chunk_length]; - for (i = 0; i < total_chunk_length; i++) chunk_start[i] = chunk[i]; + for(i = 0; i != total_chunk_length; ++i) chunk_start[i] = chunk[i]; return 0; } @@ -2445,7 +2516,7 @@ unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned l unsigned char *chunk, *new_buffer; size_t new_length = (*outlength) + length + 12; if(new_length < length + 12 || new_length < (*outlength)) return 77; /*integer overflow happened*/ - new_buffer = (unsigned char*)myrealloc(*out, new_length); + new_buffer = (unsigned char*)lodepng_realloc(*out, new_length); if(!new_buffer) return 83; /*alloc fail*/ (*out) = new_buffer; (*outlength) = new_length; @@ -2455,13 +2526,13 @@ unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned l lodepng_set32bitInt(chunk, (unsigned)length); /*2: chunk name (4 letters)*/ - chunk[4] = type[0]; - chunk[5] = type[1]; - chunk[6] = type[2]; - chunk[7] = type[3]; + chunk[4] = (unsigned char)type[0]; + chunk[5] = (unsigned char)type[1]; + chunk[6] = (unsigned char)type[2]; + chunk[7] = (unsigned char)type[3]; /*3: the data*/ - for (i = 0; i < length; i++) chunk[8 + i] = data[i]; + for(i = 0; i != length; ++i) chunk[8 + i] = data[i]; /*4: CRC (of the chunkname characters and the data)*/ lodepng_chunk_generate_crc(chunk); @@ -2531,9 +2602,9 @@ unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* *dest = *source; if(source->palette) { - dest->palette = (unsigned char*)mymalloc(source->palettesize * 4); + dest->palette = (unsigned char*)lodepng_malloc(1024); if(!dest->palette && source->palettesize) return 83; /*alloc fail*/ - for (i = 0; i < source->palettesize * 4; i++) dest->palette[i] = source->palette[i]; + for(i = 0; i != source->palettesize * 4; ++i) dest->palette[i] = source->palette[i]; } return 0; } @@ -2550,17 +2621,23 @@ static int lodepng_color_mode_equal(const LodePNGColorMode* a, const LodePNGColo if(a->key_g != b->key_g) return 0; if(a->key_b != b->key_b) return 0; } - if(a->palettesize != b->palettesize) return 0; - for (i = 0; i < a->palettesize * 4; i++) - { - if(a->palette[i] != b->palette[i]) return 0; + /*if one of the palette sizes is 0, then we consider it to be the same as the + other: it means that e.g. the palette was not given by the user and should be + considered the same as the palette inside the PNG.*/ + if(1/*a->palettesize != 0 && b->palettesize != 0*/) { + if(a->palettesize != b->palettesize) return 0; + for(i = 0; i != a->palettesize * 4; ++i) + { + if(a->palette[i] != b->palette[i]) return 0; + } } return 1; } void lodepng_palette_clear(LodePNGColorMode* info) { - if(info->palette) myfree(info->palette); + if(info->palette) lodepng_free(info->palette); + info->palette = 0; info->palettesize = 0; } @@ -2570,11 +2647,10 @@ unsigned lodepng_palette_add(LodePNGColorMode* info, unsigned char* data; /*the same resize technique as C++ std::vectors is used, and here it's made so that for a palette with the max of 256 colors, it'll have the exact alloc size*/ - if(!(info->palettesize & (info->palettesize - 1))) /*if palettesize is 0 or a power of two*/ + if(!info->palette) /*allocate palette if empty*/ { - /*allocated data must be at least 4* palettesize (for 4 color bytes)*/ - size_t alloc_size = info->palettesize == 0 ? 4 : info->palettesize * 4 * 2; - data = (unsigned char*)myrealloc(info->palette, alloc_size); + /*room for 256 colors with 4 bytes each*/ + data = (unsigned char*)lodepng_realloc(info->palette, 1024); if(!data) return 83; /*alloc fail*/ else info->palette = data; } @@ -2582,7 +2658,7 @@ unsigned lodepng_palette_add(LodePNGColorMode* info, info->palette[4 * info->palettesize + 1] = g; info->palette[4 * info->palettesize + 2] = b; info->palette[4 * info->palettesize + 3] = a; - info->palettesize++; + ++info->palettesize; return 0; } @@ -2615,7 +2691,7 @@ unsigned lodepng_is_palette_type(const LodePNGColorMode* info) unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info) { size_t i; - for (i = 0; i < info->palettesize; i++) + for(i = 0; i != info->palettesize; ++i) { if(info->palette[i * 4 + 3] < 255) return 1; } @@ -2631,27 +2707,47 @@ unsigned lodepng_can_have_alpha(const LodePNGColorMode* info) size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color) { - return (w * h * lodepng_get_bpp(color) + 7) / 8; + /*will not overflow for any color type if roughly w * h < 268435455*/ + size_t bpp = lodepng_get_bpp(color); + size_t n = w * h; + return ((n / 8) * bpp) + ((n & 7) * bpp + 7) / 8; } size_t lodepng_get_raw_size_lct(unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth) { - return (w * h * lodepng_get_bpp_lct(colortype, bitdepth) + 7) / 8; + /*will not overflow for any color type if roughly w * h < 268435455*/ + size_t bpp = lodepng_get_bpp_lct(colortype, bitdepth); + size_t n = w * h; + return ((n / 8) * bpp) + ((n & 7) * bpp + 7) / 8; +} + + +#ifdef LODEPNG_COMPILE_PNG +#ifdef LODEPNG_COMPILE_DECODER +/*in an idat chunk, each scanline is a multiple of 8 bits, unlike the lodepng output buffer*/ +static size_t lodepng_get_raw_size_idat(unsigned w, unsigned h, const LodePNGColorMode* color) +{ + /*will not overflow for any color type if roughly w * h < 268435455*/ + size_t bpp = lodepng_get_bpp(color); + size_t line = ((w / 8) * bpp) + ((w & 7) * bpp + 7) / 8; + return h * line; } +#endif /*LODEPNG_COMPILE_DECODER*/ +#endif /*LODEPNG_COMPILE_PNG*/ #ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS static void LodePNGUnknownChunks_init(LodePNGInfo* info) { unsigned i; - for (i = 0; i < 3; i++) info->unknown_chunks_data[i] = 0; - for (i = 0; i < 3; i++) info->unknown_chunks_size[i] = 0; + for(i = 0; i != 3; ++i) info->unknown_chunks_data[i] = 0; + for(i = 0; i != 3; ++i) info->unknown_chunks_size[i] = 0; } static void LodePNGUnknownChunks_cleanup(LodePNGInfo* info) { unsigned i; - for (i = 0; i < 3; i++) myfree(info->unknown_chunks_data[i]); + for(i = 0; i != 3; ++i) lodepng_free(info->unknown_chunks_data[i]); } static unsigned LodePNGUnknownChunks_copy(LodePNGInfo* dest, const LodePNGInfo* src) @@ -2660,13 +2756,13 @@ static unsigned LodePNGUnknownChunks_copy(LodePNGInfo* dest, const LodePNGInfo* LodePNGUnknownChunks_cleanup(dest); - for (i = 0; i < 3; i++) + for(i = 0; i != 3; ++i) { size_t j; dest->unknown_chunks_size[i] = src->unknown_chunks_size[i]; - dest->unknown_chunks_data[i] = (unsigned char*)mymalloc(src->unknown_chunks_size[i]); + dest->unknown_chunks_data[i] = (unsigned char*)lodepng_malloc(src->unknown_chunks_size[i]); if(!dest->unknown_chunks_data[i] && dest->unknown_chunks_size[i]) return 83; /*alloc fail*/ - for (j = 0; j < src->unknown_chunks_size[i]; j++) + for(j = 0; j < src->unknown_chunks_size[i]; ++j) { dest->unknown_chunks_data[i][j] = src->unknown_chunks_data[i][j]; } @@ -2680,20 +2776,20 @@ static unsigned LodePNGUnknownChunks_copy(LodePNGInfo* dest, const LodePNGInfo* static void LodePNGText_init(LodePNGInfo* info) { info->text_num = 0; - info->text_keys = nullptr; - info->text_strings = nullptr; + info->text_keys = NULL; + info->text_strings = NULL; } static void LodePNGText_cleanup(LodePNGInfo* info) { size_t i; - for (i = 0; i < info->text_num; i++) + for(i = 0; i != info->text_num; ++i) { string_cleanup(&info->text_keys[i]); string_cleanup(&info->text_strings[i]); } - myfree(info->text_keys); - myfree(info->text_strings); + lodepng_free(info->text_keys); + lodepng_free(info->text_strings); } static unsigned LodePNGText_copy(LodePNGInfo* dest, const LodePNGInfo* source) @@ -2702,7 +2798,7 @@ static unsigned LodePNGText_copy(LodePNGInfo* dest, const LodePNGInfo* source) dest->text_keys = 0; dest->text_strings = 0; dest->text_num = 0; - for (i = 0; i < source->text_num; i++) + for(i = 0; i != source->text_num; ++i) { CERROR_TRY_RETURN(lodepng_add_text(dest, source->text_keys[i], source->text_strings[i])); } @@ -2716,16 +2812,16 @@ void lodepng_clear_text(LodePNGInfo* info) unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str) { - char** new_keys = (char**)(myrealloc(info->text_keys, sizeof(char*) * (info->text_num + 1))); - char** new_strings = (char**)(myrealloc(info->text_strings, sizeof(char*) * (info->text_num + 1))); + char** new_keys = (char**)(lodepng_realloc(info->text_keys, sizeof(char*) * (info->text_num + 1))); + char** new_strings = (char**)(lodepng_realloc(info->text_strings, sizeof(char*) * (info->text_num + 1))); if(!new_keys || !new_strings) { - myfree(new_keys); - myfree(new_strings); + lodepng_free(new_keys); + lodepng_free(new_strings); return 83; /*alloc fail*/ } - info->text_num++; + ++info->text_num; info->text_keys = new_keys; info->text_strings = new_strings; @@ -2743,26 +2839,26 @@ unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str) static void LodePNGIText_init(LodePNGInfo* info) { info->itext_num = 0; - info->itext_keys = nullptr; - info->itext_langtags = nullptr; - info->itext_transkeys = nullptr; - info->itext_strings = nullptr; + info->itext_keys = NULL; + info->itext_langtags = NULL; + info->itext_transkeys = NULL; + info->itext_strings = NULL; } static void LodePNGIText_cleanup(LodePNGInfo* info) { size_t i; - for (i = 0; i < info->itext_num; i++) + for(i = 0; i != info->itext_num; ++i) { string_cleanup(&info->itext_keys[i]); string_cleanup(&info->itext_langtags[i]); string_cleanup(&info->itext_transkeys[i]); string_cleanup(&info->itext_strings[i]); } - myfree(info->itext_keys); - myfree(info->itext_langtags); - myfree(info->itext_transkeys); - myfree(info->itext_strings); + lodepng_free(info->itext_keys); + lodepng_free(info->itext_langtags); + lodepng_free(info->itext_transkeys); + lodepng_free(info->itext_strings); } static unsigned LodePNGIText_copy(LodePNGInfo* dest, const LodePNGInfo* source) @@ -2773,7 +2869,7 @@ static unsigned LodePNGIText_copy(LodePNGInfo* dest, const LodePNGInfo* source) dest->itext_transkeys = 0; dest->itext_strings = 0; dest->itext_num = 0; - for (i = 0; i < source->itext_num; i++) + for(i = 0; i != source->itext_num; ++i) { CERROR_TRY_RETURN(lodepng_add_itext(dest, source->itext_keys[i], source->itext_langtags[i], source->itext_transkeys[i], source->itext_strings[i])); @@ -2789,20 +2885,20 @@ void lodepng_clear_itext(LodePNGInfo* info) unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag, const char* transkey, const char* str) { - char** new_keys = (char**)(myrealloc(info->itext_keys, sizeof(char*) * (info->itext_num + 1))); - char** new_langtags = (char**)(myrealloc(info->itext_langtags, sizeof(char*) * (info->itext_num + 1))); - char** new_transkeys = (char**)(myrealloc(info->itext_transkeys, sizeof(char*) * (info->itext_num + 1))); - char** new_strings = (char**)(myrealloc(info->itext_strings, sizeof(char*) * (info->itext_num + 1))); + char** new_keys = (char**)(lodepng_realloc(info->itext_keys, sizeof(char*) * (info->itext_num + 1))); + char** new_langtags = (char**)(lodepng_realloc(info->itext_langtags, sizeof(char*) * (info->itext_num + 1))); + char** new_transkeys = (char**)(lodepng_realloc(info->itext_transkeys, sizeof(char*) * (info->itext_num + 1))); + char** new_strings = (char**)(lodepng_realloc(info->itext_strings, sizeof(char*) * (info->itext_num + 1))); if(!new_keys || !new_langtags || !new_transkeys || !new_strings) { - myfree(new_keys); - myfree(new_langtags); - myfree(new_transkeys); - myfree(new_strings); + lodepng_free(new_keys); + lodepng_free(new_langtags); + lodepng_free(new_transkeys); + lodepng_free(new_strings); return 83; /*alloc fail*/ } - info->itext_num++; + ++info->itext_num; info->itext_keys = new_keys; info->itext_langtags = new_langtags; info->itext_transkeys = new_transkeys; @@ -2881,13 +2977,14 @@ void lodepng_info_swap(LodePNGInfo* a, LodePNGInfo* b) /* ////////////////////////////////////////////////////////////////////////// */ -/*index: bitgroup index, bits: bitgroup size(1, 2 or 4, in: bitgroup value, out: octet array to add bits to*/ +/*index: bitgroup index, bits: bitgroup size(1, 2 or 4), in: bitgroup value, out: octet array to add bits to*/ static void addColorBits(unsigned char* out, size_t index, unsigned bits, unsigned in) { + unsigned m = bits == 1 ? 7 : bits == 2 ? 3 : 1; /*8 / bits - 1*/ /*p = the partial index in the byte, e.g. with 4 palettebits it is 0 for first half or 1 for second half*/ - unsigned p = index % (8 / bits); - in &= (1 << bits) - 1; /*filter out any other bits of the input value*/ - in = in << (bits * (8 / bits - p - 1)); + unsigned p = index & m; + in &= (1u << bits) - 1u; /*filter out any other bits of the input value*/ + in = in << (bits * (m - p)); if(p == 0) out[index * bits / 8] = in; else out[index * bits / 8] |= in; } @@ -2897,55 +2994,46 @@ typedef struct ColorTree ColorTree; /* One node of a color tree This is the data structure used to count the number of unique colors and to get a palette -index for a color. It's a bit like an octree for RGB bits, except that a node has 256 -children and there is one level of the tree per color channel. This may use more memory -than an octree, but this is only used to count up to 256 so it's good enough. +index for a color. It's like an octree, but because the alpha channel is used too, each +node has 16 instead of 8 children. */ struct ColorTree { - ColorTree* next; /*null, or array of 256 pointers to ColorTree of next level*/ - int index; /*the payload. Only has a meaningful value if this is in the 4th level*/ + ColorTree* children[16]; /*up to 16 pointers to ColorTree of next level*/ + int index; /*the payload. Only has a meaningful value if this is in the last level*/ }; -static void color_tree_make_next(ColorTree* tree); - -/*next: whether to create the next array or not*/ -static void color_tree_init(ColorTree* tree, int next) -{ - if(next) color_tree_make_next(tree); - else tree->next = 0; - tree->index = -1; -} - -static void color_tree_make_next(ColorTree* tree) +static void color_tree_init(ColorTree* tree) { int i; - tree->next = (ColorTree*)mymalloc(256 * sizeof(ColorTree)); - for (i = 0; i < 256; i++) color_tree_init(&tree->next[i], 0); + for(i = 0; i != 16; ++i) tree->children[i] = 0; + tree->index = -1; } static void color_tree_cleanup(ColorTree* tree) { - if(tree->next) + int i; + for(i = 0; i != 16; ++i) { - int i; - for (i = 0; i < 256; i++) color_tree_cleanup(&tree->next[i]); - myfree(tree->next); + if(tree->children[i]) + { + color_tree_cleanup(tree->children[i]); + lodepng_free(tree->children[i]); + } } } /*returns -1 if color not present, its index otherwise*/ static int color_tree_get(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a) { - tree = &tree->next[r]; - if(tree->next == 0) return -1; - tree = &tree->next[g]; - if(tree->next == 0) return -1; - tree = &tree->next[b]; - if(tree->next == 0) return -1; - tree = &tree->next[a]; - - return tree->index; + int bit = 0; + for(bit = 0; bit < 8; ++bit) + { + int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1); + if(!tree->children[i]) return -1; + else tree = tree->children[i]; + } + return tree ? tree->index : -1; } #ifdef LODEPNG_COMPILE_ENCODER @@ -2955,18 +3043,23 @@ static int color_tree_has(ColorTree* tree, unsigned char r, unsigned char g, uns } #endif /*LODEPNG_COMPILE_ENCODER*/ -/*color is not allowed to already exist. Index should be >= 0 (it's signed to be compatible with using -1 for "doesn't exist")*/ -static void color_tree_add(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a, int index) +/*color is not allowed to already exist. +Index should be >= 0 (it's signed to be compatible with using -1 for "doesn't exist")*/ +static void color_tree_add(ColorTree* tree, + unsigned char r, unsigned char g, unsigned char b, unsigned char a, unsigned index) { - tree = &tree->next[r]; - if(tree->next == 0) color_tree_make_next(tree); - tree = &tree->next[g]; - if(tree->next == 0) color_tree_make_next(tree); - tree = &tree->next[b]; - if(tree->next == 0) color_tree_make_next(tree); - tree = &tree->next[a]; - - tree->index = index; + int bit; + for(bit = 0; bit < 8; ++bit) + { + int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1); + if(!tree->children[i]) + { + tree->children[i] = (ColorTree*)lodepng_malloc(sizeof(ColorTree)); + color_tree_init(tree->children[i]); + } + tree = tree->children[i]; + } + tree->index = (int)index; } /*put a pixel, given its RGBA color, into image of any color type*/ @@ -3006,7 +3099,7 @@ static unsigned rgba8ToPixel(unsigned char* out, size_t i, int index = color_tree_get(tree, r, g, b, a); if(index < 0) return 82; /*color not in palette*/ if(mode->bitdepth == 8) out[i] = index; - else addColorBits(out, i, mode->bitdepth, index); + else addColorBits(out, i, mode->bitdepth, (unsigned)index); } else if(mode->colortype == LCT_GREY_ALPHA) { @@ -3044,11 +3137,10 @@ static unsigned rgba8ToPixel(unsigned char* out, size_t i, } /*put a pixel, given its RGBA16 color, into image of any color 16-bitdepth type*/ -static unsigned rgba16ToPixel(unsigned char* out, size_t i, - const LodePNGColorMode* mode, - unsigned short r, unsigned short g, unsigned short b, unsigned short a) +static void rgba16ToPixel(unsigned char* out, size_t i, + const LodePNGColorMode* mode, + unsigned short r, unsigned short g, unsigned short b, unsigned short a) { - if(mode->bitdepth != 16) return 85; /*must be 16 for this function*/ if(mode->colortype == LCT_GREY) { unsigned short grey = r; /*((unsigned)r + g + b) / 3*/; @@ -3083,13 +3175,13 @@ static unsigned rgba16ToPixel(unsigned char* out, size_t i, out[i * 8 + 6] = (a >> 8) & 255; out[i * 8 + 7] = a & 255; } - - return 0; /*no error*/ } /*Get RGBA8 color of pixel with index i (y * width + x) from the raw image with given color type.*/ -static unsigned getPixelColorRGBA8(unsigned char* r, unsigned char* g, unsigned char* b, unsigned char* a, - const unsigned char* in, size_t i, const LodePNGColorMode* mode) +static void getPixelColorRGBA8(unsigned char* r, unsigned char* g, + unsigned char* b, unsigned char* a, + const unsigned char* in, size_t i, + const LodePNGColorMode* mode) { if(mode->colortype == LCT_GREY) { @@ -3143,11 +3235,21 @@ static unsigned getPixelColorRGBA8(unsigned char* r, unsigned char* g, unsigned size_t j = i * mode->bitdepth; index = readBitsFromReversedStream(&j, in, mode->bitdepth); } - if(index >= mode->palettesize) return 47; /*index out of palette*/ - *r = mode->palette[index * 4 + 0]; - *g = mode->palette[index * 4 + 1]; - *b = mode->palette[index * 4 + 2]; - *a = mode->palette[index * 4 + 3]; + + if(index >= mode->palettesize) + { + /*This is an error according to the PNG spec, but common PNG decoders make it black instead. + Done here too, slightly faster due to no error handling needed.*/ + *r = *g = *b = 0; + *a = 255; + } + else + { + *r = mode->palette[index * 4 + 0]; + *g = mode->palette[index * 4 + 1]; + *b = mode->palette[index * 4 + 2]; + *a = mode->palette[index * 4 + 3]; + } } else if(mode->colortype == LCT_GREY_ALPHA) { @@ -3166,24 +3268,163 @@ static unsigned getPixelColorRGBA8(unsigned char* r, unsigned char* g, unsigned { if(mode->bitdepth == 8) { - *r = in[i * 4 + 0]; *g = in[i * 4 + 1]; *b = in[i * 4 + 2]; *a = in[i * 4 + 3]; + *r = in[i * 4 + 0]; + *g = in[i * 4 + 1]; + *b = in[i * 4 + 2]; + *a = in[i * 4 + 3]; } else { - *r = in[i * 8 + 0]; *g = in[i * 8 + 2]; *b = in[i * 8 + 4]; *a = in[i * 8 + 6]; + *r = in[i * 8 + 0]; + *g = in[i * 8 + 2]; + *b = in[i * 8 + 4]; + *a = in[i * 8 + 6]; } } +} - return 0; /*no error*/ +/*Similar to getPixelColorRGBA8, but with all the for loops inside of the color +mode test cases, optimized to convert the colors much faster, when converting +to RGBA or RGB with 8 bit per cannel. buffer must be RGBA or RGB output with +enough memory, if has_alpha is true the output is RGBA. mode has the color mode +of the input buffer.*/ +static void getPixelColorsRGBA8(unsigned char* buffer, size_t numpixels, + unsigned has_alpha, const unsigned char* in, + const LodePNGColorMode* mode) +{ + unsigned num_channels = has_alpha ? 4 : 3; + size_t i; + if(mode->colortype == LCT_GREY) + { + if(mode->bitdepth == 8) + { + for(i = 0; i != numpixels; ++i, buffer += num_channels) + { + buffer[0] = buffer[1] = buffer[2] = in[i]; + if(has_alpha) buffer[3] = mode->key_defined && in[i] == mode->key_r ? 0 : 255; + } + } + else if(mode->bitdepth == 16) + { + for(i = 0; i != numpixels; ++i, buffer += num_channels) + { + buffer[0] = buffer[1] = buffer[2] = in[i * 2]; + if(has_alpha) buffer[3] = mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r ? 0 : 255; + } + } + else + { + unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/ + size_t j = 0; + for(i = 0; i != numpixels; ++i, buffer += num_channels) + { + unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth); + buffer[0] = buffer[1] = buffer[2] = (value * 255) / highest; + if(has_alpha) buffer[3] = mode->key_defined && value == mode->key_r ? 0 : 255; + } + } + } + else if(mode->colortype == LCT_RGB) + { + if(mode->bitdepth == 8) + { + for(i = 0; i != numpixels; ++i, buffer += num_channels) + { + buffer[0] = in[i * 3 + 0]; + buffer[1] = in[i * 3 + 1]; + buffer[2] = in[i * 3 + 2]; + if(has_alpha) buffer[3] = mode->key_defined && buffer[0] == mode->key_r + && buffer[1]== mode->key_g && buffer[2] == mode->key_b ? 0 : 255; + } + } + else + { + for(i = 0; i != numpixels; ++i, buffer += num_channels) + { + buffer[0] = in[i * 6 + 0]; + buffer[1] = in[i * 6 + 2]; + buffer[2] = in[i * 6 + 4]; + if(has_alpha) buffer[3] = mode->key_defined + && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r + && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g + && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b ? 0 : 255; + } + } + } + else if(mode->colortype == LCT_PALETTE) + { + unsigned index; + size_t j = 0; + for(i = 0; i != numpixels; ++i, buffer += num_channels) + { + if(mode->bitdepth == 8) index = in[i]; + else index = readBitsFromReversedStream(&j, in, mode->bitdepth); + + if(index >= mode->palettesize) + { + /*This is an error according to the PNG spec, but most PNG decoders make it black instead. + Done here too, slightly faster due to no error handling needed.*/ + buffer[0] = buffer[1] = buffer[2] = 0; + if(has_alpha) buffer[3] = 255; + } + else + { + buffer[0] = mode->palette[index * 4 + 0]; + buffer[1] = mode->palette[index * 4 + 1]; + buffer[2] = mode->palette[index * 4 + 2]; + if(has_alpha) buffer[3] = mode->palette[index * 4 + 3]; + } + } + } + else if(mode->colortype == LCT_GREY_ALPHA) + { + if(mode->bitdepth == 8) + { + for(i = 0; i != numpixels; ++i, buffer += num_channels) + { + buffer[0] = buffer[1] = buffer[2] = in[i * 2 + 0]; + if(has_alpha) buffer[3] = in[i * 2 + 1]; + } + } + else + { + for(i = 0; i != numpixels; ++i, buffer += num_channels) + { + buffer[0] = buffer[1] = buffer[2] = in[i * 4 + 0]; + if(has_alpha) buffer[3] = in[i * 4 + 2]; + } + } + } + else if(mode->colortype == LCT_RGBA) + { + if(mode->bitdepth == 8) + { + for(i = 0; i != numpixels; ++i, buffer += num_channels) + { + buffer[0] = in[i * 4 + 0]; + buffer[1] = in[i * 4 + 1]; + buffer[2] = in[i * 4 + 2]; + if(has_alpha) buffer[3] = in[i * 4 + 3]; + } + } + else + { + for(i = 0; i != numpixels; ++i, buffer += num_channels) + { + buffer[0] = in[i * 8 + 0]; + buffer[1] = in[i * 8 + 2]; + buffer[2] = in[i * 8 + 4]; + if(has_alpha) buffer[3] = in[i * 8 + 6]; + } + } + } } /*Get RGBA16 color of pixel with index i (y * width + x) from the raw image with given color type, but the given color type must be 16-bit itself.*/ -static unsigned getPixelColorRGBA16(unsigned short* r, unsigned short* g, unsigned short* b, unsigned short* a, - const unsigned char* in, size_t i, const LodePNGColorMode* mode) +static void getPixelColorRGBA16(unsigned short* r, unsigned short* g, unsigned short* b, unsigned short* a, + const unsigned char* in, size_t i, const LodePNGColorMode* mode) { - if(mode->bitdepth != 16) return 85; /*error: this function only supports 16-bit input*/ - if(mode->colortype == LCT_GREY) { *r = *g = *b = 256 * in[i * 2 + 0] + in[i * 2 + 1]; @@ -3192,85 +3433,90 @@ static unsigned getPixelColorRGBA16(unsigned short* r, unsigned short* g, unsign } else if(mode->colortype == LCT_RGB) { - *r = 256 * in[i * 6 + 0] + in[i * 6 + 1]; - *g = 256 * in[i * 6 + 2] + in[i * 6 + 3]; - *b = 256 * in[i * 6 + 4] + in[i * 6 + 5]; - if(mode->key_defined && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r - && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g - && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0; + *r = 256u * in[i * 6 + 0] + in[i * 6 + 1]; + *g = 256u * in[i * 6 + 2] + in[i * 6 + 3]; + *b = 256u * in[i * 6 + 4] + in[i * 6 + 5]; + if(mode->key_defined + && 256u * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r + && 256u * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g + && 256u * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0; else *a = 65535; } else if(mode->colortype == LCT_GREY_ALPHA) { - *r = *g = *b = 256 * in[i * 4 + 0] + in[i * 4 + 1]; - *a = 256 * in[i * 4 + 2] + in[i * 4 + 3]; + *r = *g = *b = 256u * in[i * 4 + 0] + in[i * 4 + 1]; + *a = 256u * in[i * 4 + 2] + in[i * 4 + 3]; } else if(mode->colortype == LCT_RGBA) { - *r = 256 * in[i * 8 + 0] + in[i * 8 + 1]; - *g = 256 * in[i * 8 + 2] + in[i * 8 + 3]; - *b = 256 * in[i * 8 + 4] + in[i * 8 + 5]; - *a = 256 * in[i * 8 + 6] + in[i * 8 + 7]; + *r = 256u * in[i * 8 + 0] + in[i * 8 + 1]; + *g = 256u * in[i * 8 + 2] + in[i * 8 + 3]; + *b = 256u * in[i * 8 + 4] + in[i * 8 + 5]; + *a = 256u * in[i * 8 + 6] + in[i * 8 + 7]; } - else return 85; /*error: this function only supports 16-bit input*/ - - return 0; /*no error*/ } -/* -converts from any color type to 24-bit or 32-bit (later maybe more supported). return value = LodePNG error code -the out buffer must have (w * h * bpp + 7) / 8 bytes, where bpp is the bits per pixel of the output color type -(lodepng_get_bpp) for < 8 bpp images, there may _not_ be padding bits at the end of scanlines. -*/ unsigned lodepng_convert(unsigned char* out, const unsigned char* in, - LodePNGColorMode* mode_out, LodePNGColorMode* mode_in, + const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in, unsigned w, unsigned h) { - unsigned error = 0; size_t i; ColorTree tree; + size_t numpixels = w * h; if(lodepng_color_mode_equal(mode_out, mode_in)) { size_t numbytes = lodepng_get_raw_size(w, h, mode_in); - for (i = 0; i < numbytes; i++) out[i] = in[i]; - return error; + for(i = 0; i != numbytes; ++i) out[i] = in[i]; + return 0; } if(mode_out->colortype == LCT_PALETTE) { - size_t palsize = 1 << mode_out->bitdepth; - if(mode_out->palettesize < palsize) palsize = mode_out->palettesize; - color_tree_init(&tree, 1); - for (i = 0; i < palsize; i++) + size_t palettesize = mode_out->palettesize; + const unsigned char* palette = mode_out->palette; + size_t palsize = 1u << mode_out->bitdepth; + /*if the user specified output palette but did not give the values, assume + they want the values of the input color type (assuming that one is palette). + Note that we never create a new palette ourselves.*/ + if(palettesize == 0) + { + palettesize = mode_in->palettesize; + palette = mode_in->palette; + } + if(palettesize < palsize) palsize = palettesize; + color_tree_init(&tree); + for(i = 0; i != palsize; ++i) { - unsigned char* p = &mode_out->palette[i * 4]; + const unsigned char* p = &palette[i * 4]; color_tree_add(&tree, p[0], p[1], p[2], p[3], i); } } if(mode_in->bitdepth == 16 && mode_out->bitdepth == 16) { - size_t numpixels = w * h; - for (i = 0; i < numpixels; i++) + for(i = 0; i != numpixels; ++i) { unsigned short r = 0, g = 0, b = 0, a = 0; - error = getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in); - if(error) return error; - error = rgba16ToPixel(out, i, mode_out, r, g, b, a); - if(error) return error; + getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in); + rgba16ToPixel(out, i, mode_out, r, g, b, a); } } + else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGBA) + { + getPixelColorsRGBA8(out, numpixels, 1, in, mode_in); + } + else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGB) + { + getPixelColorsRGBA8(out, numpixels, 0, in, mode_in); + } else { - size_t numpixels = w * h; - for (i = 0; i < numpixels; i++) + unsigned char r = 0, g = 0, b = 0, a = 0; + for(i = 0; i != numpixels; ++i) { - unsigned char r = 0, g = 0, b = 0, a = 0; - error = getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in); - if(error) return error; - error = rgba8ToPixel(out, i, mode_out, &tree, r, g, b, a); - if(error) return error; + getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in); + CERROR_TRY_RETURN(rgba8ToPixel(out, i, mode_out, &tree, r, g, b, a)); } } @@ -3279,93 +3525,36 @@ unsigned lodepng_convert(unsigned char* out, const unsigned char* in, color_tree_cleanup(&tree); } - return error; + return 0; /*no error*/ } #ifdef LODEPNG_COMPILE_ENCODER -typedef struct ColorProfile -{ - unsigned char sixteenbit; /*needs more than 8 bits per channel*/ - unsigned char sixteenbit_done; - - - unsigned char colored; /*not greyscale*/ - unsigned char colored_done; - - unsigned char key; /*a color key is required, or more*/ - unsigned short key_r; /*these values are always in 16-bit bitdepth in the profile*/ - unsigned short key_g; - unsigned short key_b; - unsigned char alpha; /*alpha channel, or alpha palette, required*/ - unsigned char alpha_done; - - unsigned numcolors; - ColorTree tree; /*for listing the counted colors, up to 256*/ - unsigned char* palette; /*size 1024. Remember up to the first 256 RGBA colors*/ - unsigned maxnumcolors; /*if more than that amount counted*/ - unsigned char numcolors_done; - - unsigned greybits; /*amount of bits required for greyscale (1, 2, 4, 8). Does not take 16 bit into account.*/ - unsigned char greybits_done; - -} ColorProfile; - -static void color_profile_init(ColorProfile* profile, LodePNGColorMode* mode) +void lodepng_color_profile_init(LodePNGColorProfile* profile) { - profile->sixteenbit = 0; - profile->sixteenbit_done = mode->bitdepth == 16 ? 0 : 1; - profile->colored = 0; - profile->colored_done = lodepng_is_greyscale_type(mode) ? 1 : 0; - profile->key = 0; profile->alpha = 0; - profile->alpha_done = lodepng_can_have_alpha(mode) ? 0 : 1; - + profile->key_r = profile->key_g = profile->key_b = 0; profile->numcolors = 0; - color_tree_init(&profile->tree, 1); - profile->palette = (unsigned char*)mymalloc(1024); - profile->maxnumcolors = 257; - if(lodepng_get_bpp(mode) <= 8) - { - int bpp = lodepng_get_bpp(mode); - profile->maxnumcolors = bpp == 1 ? 2 : (bpp == 2 ? 4 : (bpp == 4 ? 16 : 256)); - } - profile->numcolors_done = 0; - - profile->greybits = 1; - profile->greybits_done = lodepng_get_bpp(mode) == 1 ? 1 : 0; -} - -static void color_profile_cleanup(ColorProfile* profile) -{ - color_tree_cleanup(&profile->tree); - myfree(profile->palette); + profile->bits = 1; } /*function used for debug purposes with C++*/ -/*void printColorProfile(ColorProfile* p) -{ - std::cout << "sixteenbit: " << (int)p->sixteenbit << std::endl; - std::cout << "sixteenbit_done: " << (int)p->sixteenbit_done << std::endl; - std::cout << "colored: " << (int)p->colored << std::endl; - std::cout << "colored_done: " << (int)p->colored_done << std::endl; - std::cout << "key: " << (int)p->key << std::endl; - std::cout << "key_r: " << (int)p->key_r << std::endl; - std::cout << "key_g: " << (int)p->key_g << std::endl; - std::cout << "key_b: " << (int)p->key_b << std::endl; - std::cout << "alpha: " << (int)p->alpha << std::endl; - std::cout << "alpha_done: " << (int)p->alpha_done << std::endl; - std::cout << "numcolors: " << (int)p->numcolors << std::endl; - std::cout << "maxnumcolors: " << (int)p->maxnumcolors << std::endl; - std::cout << "numcolors_done: " << (int)p->numcolors_done << std::endl; - std::cout << "greybits: " << (int)p->greybits << std::endl; - std::cout << "greybits_done: " << (int)p->greybits_done << std::endl; +/*void printColorProfile(LodePNGColorProfile* p) +{ + std::cout << "colored: " << (int)p->colored << ", "; + std::cout << "key: " << (int)p->key << ", "; + std::cout << "key_r: " << (int)p->key_r << ", "; + std::cout << "key_g: " << (int)p->key_g << ", "; + std::cout << "key_b: " << (int)p->key_b << ", "; + std::cout << "alpha: " << (int)p->alpha << ", "; + std::cout << "numcolors: " << (int)p->numcolors << ", "; + std::cout << "bits: " << (int)p->bits << std::endl; }*/ /*Returns how many bits needed to represent given value (max 8 bit)*/ -unsigned getValueRequiredBits(unsigned char value) +static unsigned getValueRequiredBits(unsigned char value) { if(value == 0 || value == 255) return 1; /*The scaling of 2-bit and 4-bit values uses multiples of 85 and 17*/ @@ -3375,312 +3564,258 @@ unsigned getValueRequiredBits(unsigned char value) /*profile must already have been inited with mode. It's ok to set some parameters of profile to done already.*/ -static unsigned get_color_profile(ColorProfile* profile, - const unsigned char* in, size_t numpixels, - LodePNGColorMode* mode) +unsigned lodepng_get_color_profile(LodePNGColorProfile* profile, + const unsigned char* in, unsigned w, unsigned h, + const LodePNGColorMode* mode) { unsigned error = 0; size_t i; + ColorTree tree; + size_t numpixels = w * h; + + unsigned colored_done = lodepng_is_greyscale_type(mode) ? 1 : 0; + unsigned alpha_done = lodepng_can_have_alpha(mode) ? 0 : 1; + unsigned numcolors_done = 0; + unsigned bpp = lodepng_get_bpp(mode); + unsigned bits_done = bpp == 1 ? 1 : 0; + unsigned maxnumcolors = 257; + unsigned sixteen = 0; + if(bpp <= 8) maxnumcolors = bpp == 1 ? 2 : (bpp == 2 ? 4 : (bpp == 4 ? 16 : 256)); + + color_tree_init(&tree); + /*Check if the 16-bit input is truly 16-bit*/ if(mode->bitdepth == 16) { - for (i = 0; i < numpixels; i++) + unsigned short r, g, b, a; + for(i = 0; i != numpixels; ++i) { - unsigned short r, g, b, a; - error = getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode); - if(error) break; - - /*a color is considered good for 8-bit if the first byte and the second byte are equal, - (so if it's divisible through 257), NOT necessarily if the second byte is 0*/ - if(!profile->sixteenbit_done - && (((r & 255) != ((r >> 8) & 255)) - || ((g & 255) != ((g >> 8) & 255)) - || ((b & 255) != ((b >> 8) & 255)))) + getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode); + if((r & 255) != ((r >> 8) & 255) || (g & 255) != ((g >> 8) & 255) || + (b & 255) != ((b >> 8) & 255) || (a & 255) != ((a >> 8) & 255)) /*first and second byte differ*/ { - profile->sixteenbit = 1; - profile->sixteenbit_done = 1; - profile->greybits_done = 1; /*greybits is not applicable anymore at 16-bit*/ - profile->numcolors_done = 1; /*counting colors no longer useful, palette doesn't support 16-bit*/ + sixteen = 1; + break; } + } + } + + if(sixteen) + { + unsigned short r = 0, g = 0, b = 0, a = 0; + profile->bits = 16; + bits_done = numcolors_done = 1; /*counting colors no longer useful, palette doesn't support 16-bit*/ + + for(i = 0; i != numpixels; ++i) + { + getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode); - if(!profile->colored_done && (r != g || r != b)) + if(!colored_done && (r != g || r != b)) { profile->colored = 1; - profile->colored_done = 1; - profile->greybits_done = 1; /*greybits is not applicable anymore*/ + colored_done = 1; } - if(!profile->alpha_done && a != 255) + if(!alpha_done) { - if(a == 0 && !(profile->key && (r != profile->key_r || g != profile->key_g || b != profile->key_b))) + unsigned matchkey = (r == profile->key_r && g == profile->key_g && b == profile->key_b); + if(a != 65535 && (a != 0 || (profile->key && !matchkey))) { - if(!profile->key) - { - profile->key = 1; - profile->key_r = r; - profile->key_g = g; - profile->key_b = b; - } + profile->alpha = 1; + alpha_done = 1; + if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/ } - else + else if(a == 0 && !profile->alpha && !profile->key) + { + profile->key = 1; + profile->key_r = r; + profile->key_g = g; + profile->key_b = b; + } + else if(a == 65535 && profile->key && matchkey) { + /* Color key cannot be used if an opaque pixel also has that RGB color. */ profile->alpha = 1; - profile->alpha_done = 1; - profile->greybits_done = 1; /*greybits is not applicable anymore*/ + alpha_done = 1; } } + if(alpha_done && numcolors_done && colored_done && bits_done) break; + } - if(!profile->greybits_done) - { - /*assuming 8-bit r, this test does not care about 16-bit*/ - unsigned bits = getValueRequiredBits(r); - if(bits > profile->greybits) profile->greybits = bits; - if(profile->greybits >= 8) profile->greybits_done = 1; - } - - if(!profile->numcolors_done) + if(profile->key && !profile->alpha) + { + for(i = 0; i != numpixels; ++i) { - /*assuming 8-bit rgba, this test does not care about 16-bit*/ - if(!color_tree_has(&profile->tree, r, g, b, a)) + getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode); + if(a != 0 && r == profile->key_r && g == profile->key_g && b == profile->key_b) { - color_tree_add(&profile->tree, r, g, b, a, profile->numcolors); - if(profile->numcolors < 256) - { - unsigned char* p = profile->palette; - unsigned i = profile->numcolors; - p[i * 4 + 0] = r; - p[i * 4 + 1] = g; - p[i * 4 + 2] = b; - p[i * 4 + 3] = a; - } - profile->numcolors++; - if(profile->numcolors >= profile->maxnumcolors) profile->numcolors_done = 1; + /* Color key cannot be used if an opaque pixel also has that RGB color. */ + profile->alpha = 1; + alpha_done = 1; } } - - if(profile->alpha_done && profile->numcolors_done - && profile->colored_done && profile->sixteenbit_done && profile->greybits_done) - { - break; - } - }; + } } - else + else /* < 16-bit */ { - for (i = 0; i < numpixels; i++) + unsigned char r = 0, g = 0, b = 0, a = 0; + for(i = 0; i != numpixels; ++i) { - unsigned char r = 0; - unsigned char g = 0; - unsigned char b = 0; - unsigned char a = 0; - error = getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode); - if(error) break; + getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode); + + if(!bits_done && profile->bits < 8) + { + /*only r is checked, < 8 bits is only relevant for greyscale*/ + unsigned bits = getValueRequiredBits(r); + if(bits > profile->bits) profile->bits = bits; + } + bits_done = (profile->bits >= bpp); - if(!profile->colored_done && (r != g || r != b)) + if(!colored_done && (r != g || r != b)) { profile->colored = 1; - profile->colored_done = 1; - profile->greybits_done = 1; /*greybits is not applicable anymore*/ + colored_done = 1; + if(profile->bits < 8) profile->bits = 8; /*PNG has no colored modes with less than 8-bit per channel*/ } - if(!profile->alpha_done && a != 255) + if(!alpha_done) { - if(a == 0 && !(profile->key && (r != profile->key_r || g != profile->key_g || b != profile->key_b))) + unsigned matchkey = (r == profile->key_r && g == profile->key_g && b == profile->key_b); + if(a != 255 && (a != 0 || (profile->key && !matchkey))) { - if(!profile->key) - { - profile->key = 1; - profile->key_r = r; - profile->key_g = g; - profile->key_b = b; - } + profile->alpha = 1; + alpha_done = 1; + if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/ } - else + else if(a == 0 && !profile->alpha && !profile->key) { + profile->key = 1; + profile->key_r = r; + profile->key_g = g; + profile->key_b = b; + } + else if(a == 255 && profile->key && matchkey) + { + /* Color key cannot be used if an opaque pixel also has that RGB color. */ profile->alpha = 1; - profile->alpha_done = 1; - profile->greybits_done = 1; /*greybits is not applicable anymore*/ + alpha_done = 1; + if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/ } } - if(!profile->greybits_done) - { - unsigned bits = getValueRequiredBits(r); - if(bits > profile->greybits) profile->greybits = bits; - if(profile->greybits >= 8) profile->greybits_done = 1; - } - - if(!profile->numcolors_done) + if(!numcolors_done) { - if(!color_tree_has(&profile->tree, r, g, b, a)) + if(!color_tree_has(&tree, r, g, b, a)) { - - color_tree_add(&profile->tree, r, g, b, a, profile->numcolors); + color_tree_add(&tree, r, g, b, a, profile->numcolors); if(profile->numcolors < 256) { unsigned char* p = profile->palette; - unsigned i = profile->numcolors; - p[i * 4 + 0] = r; - p[i * 4 + 1] = g; - p[i * 4 + 2] = b; - p[i * 4 + 3] = a; + unsigned n = profile->numcolors; + p[n * 4 + 0] = r; + p[n * 4 + 1] = g; + p[n * 4 + 2] = b; + p[n * 4 + 3] = a; } - profile->numcolors++; - if(profile->numcolors >= profile->maxnumcolors) profile->numcolors_done = 1; + ++profile->numcolors; + numcolors_done = profile->numcolors >= maxnumcolors; } } - if(profile->alpha_done && profile->numcolors_done && profile->colored_done && profile->greybits_done) + if(alpha_done && numcolors_done && colored_done && bits_done) break; + } + + if(profile->key && !profile->alpha) + { + for(i = 0; i != numpixels; ++i) { - break; + getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode); + if(a != 0 && r == profile->key_r && g == profile->key_g && b == profile->key_b) + { + /* Color key cannot be used if an opaque pixel also has that RGB color. */ + profile->alpha = 1; + alpha_done = 1; + } } - }; - } + } - /*make the profile's key always 16-bit for consistency*/ - if(mode->bitdepth < 16) - { - /*repeat each byte twice*/ - profile->key_r *= 257; - profile->key_g *= 257; - profile->key_b *= 257; + /*make the profile's key always 16-bit for consistency - repeat each byte twice*/ + profile->key_r += (profile->key_r << 8); + profile->key_g += (profile->key_g << 8); + profile->key_b += (profile->key_b << 8); } + color_tree_cleanup(&tree); return error; } -/*updates values of mode with a potentially smaller color model. mode_out should +/*Automatically chooses color type that gives smallest amount of bits in the +output image, e.g. grey if there are only greyscale pixels, palette if there +are less than 256 colors, ... +Updates values of mode with a potentially smaller color model. mode_out should contain the user chosen color model, but will be overwritten with the new chosen one.*/ -static unsigned doAutoChooseColor(LodePNGColorMode* mode_out, - const unsigned char* image, unsigned w, unsigned h, LodePNGColorMode* mode_in, - LodePNGAutoConvert auto_convert) +unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out, + const unsigned char* image, unsigned w, unsigned h, + const LodePNGColorMode* mode_in) { - ColorProfile profile; + LodePNGColorProfile prof; unsigned error = 0; + unsigned i, n, palettebits, grey_ok, palette_ok; - if(auto_convert == LAC_ALPHA) - { - if(mode_out->colortype != LCT_RGBA && mode_out->colortype != LCT_GREY_ALPHA) return 0; - } + lodepng_color_profile_init(&prof); + error = lodepng_get_color_profile(&prof, image, w, h, mode_in); + if(error) return error; + mode_out->key_defined = 0; - color_profile_init(&profile, mode_in); - if(auto_convert == LAC_ALPHA) + if(prof.key && w * h <= 16) { - profile.colored_done = 1; - profile.greybits_done = 1; - profile.numcolors_done = 1; - profile.sixteenbit_done = 1; + prof.alpha = 1; /*too few pixels to justify tRNS chunk overhead*/ + if(prof.bits < 8) prof.bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/ } - error = get_color_profile(&profile, image, w * h, mode_in); + grey_ok = !prof.colored && !prof.alpha; /*grey without alpha, with potentially low bits*/ + n = prof.numcolors; + palettebits = n <= 2 ? 1 : (n <= 4 ? 2 : (n <= 16 ? 4 : 8)); + palette_ok = n <= 256 && (n * 2 < w * h) && prof.bits <= 8; + if(w * h < n * 2) palette_ok = 0; /*don't add palette overhead if image has only a few pixels*/ + if(grey_ok && prof.bits <= palettebits) palette_ok = 0; /*grey is less overhead*/ - if(!error && auto_convert == LAC_ALPHA) + if(palette_ok) { - if(!profile.alpha) + unsigned char* p = prof.palette; + lodepng_palette_clear(mode_out); /*remove potential earlier palette*/ + for(i = 0; i != prof.numcolors; ++i) { - mode_out->colortype = (mode_out->colortype == LCT_RGBA ? LCT_RGB : LCT_GREY); + error = lodepng_palette_add(mode_out, p[i * 4 + 0], p[i * 4 + 1], p[i * 4 + 2], p[i * 4 + 3]); + if(error) break; } - } - else if(!error && auto_convert != LAC_ALPHA) - { - mode_out->key_defined = 0; - if(profile.sixteenbit) - { - mode_out->bitdepth = 16; - if(profile.alpha) - { - mode_out->colortype = profile.colored ? LCT_RGBA : LCT_GREY_ALPHA; - } - else - { - mode_out->colortype = profile.colored ? LCT_RGB : LCT_GREY; - if(profile.key) - { - mode_out->key_defined = 1; - mode_out->key_r = profile.key_r; - mode_out->key_g = profile.key_g; - mode_out->key_b = profile.key_b; - } - } - } - else /*less than 16 bits per channel*/ - { - /*don't add palette overhead if image hasn't got a lot of pixels*/ - unsigned n = profile.numcolors; - int palette_ok = n <= 256 && (n * 2 < w * h); - unsigned palettebits = n <= 2 ? 1 : (n <= 4 ? 2 : (n <= 16 ? 4 : 8)); - int grey_ok = !profile.colored && !profile.alpha; /*grey without alpha, with potentially low bits*/ - if(palette_ok || grey_ok) - { - if(!palette_ok || (grey_ok && profile.greybits <= palettebits)) - { - mode_out->colortype = LCT_GREY; - mode_out->bitdepth = profile.greybits; - if(profile.key) - { - unsigned keyval = profile.key_r; - keyval &= (profile.greybits - 1); /*same subgroup of bits repeated, so taking right bits is fine*/ - mode_out->key_defined = 1; - mode_out->key_r = keyval; - mode_out->key_g = keyval; - mode_out->key_b = keyval; - } - } - else - { - /*fill in the palette*/ - unsigned i; - unsigned char* p = profile.palette; - for (i = 0; i < profile.numcolors; i++) - { - error = lodepng_palette_add(mode_out, p[i * 4 + 0], p[i * 4 + 1], p[i * 4 + 2], p[i * 4 + 3]); - if(error) break; - } + mode_out->colortype = LCT_PALETTE; + mode_out->bitdepth = palettebits; - mode_out->colortype = LCT_PALETTE; - mode_out->bitdepth = palettebits; - } - } - else /*8-bit per channel*/ - { - mode_out->bitdepth = 8; - if(profile.alpha) - { - mode_out->colortype = profile.colored ? LCT_RGBA : LCT_GREY_ALPHA; - } - else - { - mode_out->colortype = profile.colored ? LCT_RGB : LCT_GREY /*LCT_GREY normally won't occur, already done earlier*/; - if(profile.key) - { - mode_out->key_defined = 1; - mode_out->key_r = profile.key_r % 256; - mode_out->key_g = profile.key_g % 256; - mode_out->key_b = profile.key_b % 256; - } - } - } + if(mode_in->colortype == LCT_PALETTE && mode_in->palettesize >= mode_out->palettesize + && mode_in->bitdepth == mode_out->bitdepth) + { + /*If input should have same palette colors, keep original to preserve its order and prevent conversion*/ + lodepng_color_mode_cleanup(mode_out); + lodepng_color_mode_copy(mode_out, mode_in); } } - - color_profile_cleanup(&profile); - - if(mode_out->colortype == LCT_PALETTE && mode_in->palettesize == mode_out->palettesize) + else /*8-bit or 16-bit per channel*/ { - /*In this case keep the palette order of the input, so that the user can choose an optimal one*/ - size_t i; - for (i = 0; i < mode_in->palettesize * 4; i++) + mode_out->bitdepth = prof.bits; + mode_out->colortype = prof.alpha ? (prof.colored ? LCT_RGBA : LCT_GREY_ALPHA) + : (prof.colored ? LCT_RGB : LCT_GREY); + + if(prof.key && !prof.alpha) { - mode_out->palette[i] = mode_in->palette[i]; + unsigned mask = (1u << mode_out->bitdepth) - 1u; /*profile always uses 16-bit, mask converts it*/ + mode_out->key_r = prof.key_r & mask; + mode_out->key_g = prof.key_g & mask; + mode_out->key_b = prof.key_b & mask; + mode_out->key_defined = 1; } } - if(auto_convert == LAC_AUTO_NO_NIBBLES && mode_out->bitdepth < 8) - { - /*palette can keep its small amount of colors, as long as no indices use it*/ - mode_out->bitdepth = 8; - } - return error; } @@ -3693,9 +3828,9 @@ are only needed to make the paeth calculation correct. */ static unsigned char paethPredictor(short a, short b, short c) { - short pa = std::abs(b - c); - short pb = std::abs(a - c); - short pc = std::abs(a + b - c - c); + short pa = abs(b - c); + short pb = abs(a - c); + short pc = abs(a + b - c - c); if(pc < pa && pc < pb) return (unsigned char)c; else if(pb < pa) return (unsigned char)b; @@ -3709,6 +3844,21 @@ static const unsigned ADAM7_IY[7] = { 0, 0, 4, 0, 2, 0, 1 }; /*y start values*/ static const unsigned ADAM7_DX[7] = { 8, 8, 4, 4, 2, 2, 1 }; /*x delta values*/ static const unsigned ADAM7_DY[7] = { 8, 8, 8, 4, 4, 2, 2 }; /*y delta values*/ +/* +Outputs various dimensions and positions in the image related to the Adam7 reduced images. +passw: output containing the width of the 7 passes +passh: output containing the height of the 7 passes +filter_passstart: output containing the index of the start and end of each + reduced image with filter bytes +padded_passstart output containing the index of the start and end of each + reduced image when without filter bytes but with padded scanlines +passstart: output containing the index of the start and end of each reduced + image without padding between scanlines, but still padding between the images +w, h: width and height of non-interlaced image +bpp: bits per pixel +"padded" is only relevant if bpp is less than 8 and a scanline or image does not + end at a full byte +*/ static void Adam7_getpassvalues(unsigned passw[7], unsigned passh[7], size_t filter_passstart[8], size_t padded_passstart[8], size_t passstart[8], unsigned w, unsigned h, unsigned bpp) { @@ -3716,7 +3866,7 @@ static void Adam7_getpassvalues(unsigned passw[7], unsigned passh[7], size_t fil unsigned i; /*calculate width and height in pixels of each pass*/ - for (i = 0; i < 7; i++) + for(i = 0; i != 7; ++i) { passw[i] = (w + ADAM7_DX[i] - ADAM7_IX[i] - 1) / ADAM7_DX[i]; passh[i] = (h + ADAM7_DY[i] - ADAM7_IY[i] - 1) / ADAM7_DY[i]; @@ -3725,7 +3875,7 @@ static void Adam7_getpassvalues(unsigned passw[7], unsigned passh[7], size_t fil } filter_passstart[0] = padded_passstart[0] = passstart[0] = 0; - for (i = 0; i < 7; i++) + for(i = 0; i != 7; ++i) { /*if passw[i] is 0, it's 0 bytes, not 1 (no filtertype-byte)*/ filter_passstart[i + 1] = filter_passstart[i] @@ -3752,7 +3902,7 @@ unsigned lodepng_inspect(unsigned* w, unsigned* h, LodePNGState* state, { CERROR_RETURN_ERROR(state->error, 48); /*error: the given data is empty*/ } - if(insize < 29) + if(insize < 33) { CERROR_RETURN_ERROR(state->error, 27); /*error: the data length is smaller than the length of a PNG header*/ } @@ -3766,7 +3916,11 @@ unsigned lodepng_inspect(unsigned* w, unsigned* h, LodePNGState* state, { CERROR_RETURN_ERROR(state->error, 28); /*error: the first 8 bytes are not the correct PNG signature*/ } - if(in[12] != 'I' || in[13] != 'H' || in[14] != 'D' || in[15] != 'R') + if(lodepng_chunk_length(in + 8) != 13) + { + CERROR_RETURN_ERROR(state->error, 94); /*error: header size must be 13 bytes*/ + } + if(!lodepng_chunk_type_equals(in + 8, "IHDR")) { CERROR_RETURN_ERROR(state->error, 29); /*error: it doesn't start with a IHDR chunk!*/ } @@ -3780,6 +3934,11 @@ unsigned lodepng_inspect(unsigned* w, unsigned* h, LodePNGState* state, info->filter_method = in[27]; info->interlace_method = in[28]; + if(*w == 0 || *h == 0) + { + CERROR_RETURN_ERROR(state->error, 93); + } + if(!state->decoder.ignore_crc) { unsigned CRC = lodepng_read32bitInt(&in[29]); @@ -3817,53 +3976,53 @@ static unsigned unfilterScanline(unsigned char* recon, const unsigned char* scan switch(filterType) { case 0: - for (i = 0; i < length; i++) recon[i] = scanline[i]; + for(i = 0; i != length; ++i) recon[i] = scanline[i]; break; case 1: - for (i = 0; i < bytewidth; i++) recon[i] = scanline[i]; - for (i = bytewidth; i < length; i++) recon[i] = scanline[i] + recon[i - bytewidth]; + for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i]; + for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + recon[i - bytewidth]; break; case 2: if(precon) { - for (i = 0; i < length; i++) recon[i] = scanline[i] + precon[i]; + for(i = 0; i != length; ++i) recon[i] = scanline[i] + precon[i]; } else { - for (i = 0; i < length; i++) recon[i] = scanline[i]; + for(i = 0; i != length; ++i) recon[i] = scanline[i]; } break; case 3: if(precon) { - for (i = 0; i < bytewidth; i++) recon[i] = scanline[i] + precon[i] / 2; - for (i = bytewidth; i < length; i++) recon[i] = scanline[i] + ((recon[i - bytewidth] + precon[i]) / 2); + for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i] + (precon[i] >> 1); + for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + ((recon[i - bytewidth] + precon[i]) >> 1); } else { - for (i = 0; i < bytewidth; i++) recon[i] = scanline[i]; - for (i = bytewidth; i < length; i++) recon[i] = scanline[i] + recon[i - bytewidth] / 2; + for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i]; + for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + (recon[i - bytewidth] >> 1); } break; case 4: if(precon) { - for (i = 0; i < bytewidth; i++) + for(i = 0; i != bytewidth; ++i) { recon[i] = (scanline[i] + precon[i]); /*paethPredictor(0, precon[i], 0) is always precon[i]*/ } - for (i = bytewidth; i < length; i++) + for(i = bytewidth; i < length; ++i) { recon[i] = (scanline[i] + paethPredictor(recon[i - bytewidth], precon[i], precon[i - bytewidth])); } } else { - for (i = 0; i < bytewidth; i++) + for(i = 0; i != bytewidth; ++i) { recon[i] = scanline[i]; } - for (i = bytewidth; i < length; i++) + for(i = bytewidth; i < length; ++i) { /*paethPredictor(recon[i - bytewidth], 0, 0) is always recon[i - bytewidth]*/ recon[i] = (scanline[i] + recon[i - bytewidth]); @@ -3892,7 +4051,7 @@ static unsigned unfilter(unsigned char* out, const unsigned char* in, unsigned w size_t bytewidth = (bpp + 7) / 8; size_t linebytes = (w * bpp + 7) / 8; - for (y = 0; y < h; y++) + for(y = 0; y < h; ++y) { size_t outindex = linebytes * y; size_t inindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/ @@ -3906,12 +4065,19 @@ static unsigned unfilter(unsigned char* out, const unsigned char* in, unsigned w return 0; } +/* +in: Adam7 interlaced image, with no padding bits between scanlines, but between + reduced images so that each reduced image starts at a byte. +out: the same pixels, but re-ordered so that they're now a non-interlaced image with size w*h +bpp: bits per pixel +out has the following size in bits: w * h * bpp. +in is possibly bigger due to padding bits between reduced images. +out must be big enough AND must be 0 everywhere if bpp < 8 in the current implementation +(because that's likely a little bit faster) +NOTE: comments about padding bits are only relevant if bpp < 8 +*/ static void Adam7_deinterlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) { - /*Note: this function works on image buffers WITHOUT padding bits at end of scanlines - with non-multiple-of-8 bit amounts, only between reduced images is padding - out must be big enough AND must be 0 everywhere if bpp < 8 in the current implementation - (because that's likely a little bit faster)*/ unsigned passw[7], passh[7]; size_t filter_passstart[8], padded_passstart[8], passstart[8]; unsigned i; @@ -3920,16 +4086,16 @@ static void Adam7_deinterlace(unsigned char* out, const unsigned char* in, unsig if(bpp >= 8) { - for (i = 0; i < 7; i++) + for(i = 0; i != 7; ++i) { unsigned x, y, b; size_t bytewidth = bpp / 8; - for (y = 0; y < passh[i]; y++) - for (x = 0; x < passw[i]; x++) + for(y = 0; y < passh[i]; ++y) + for(x = 0; x < passw[i]; ++x) { size_t pixelinstart = passstart[i] + (y * passw[i] + x) * bytewidth; size_t pixeloutstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth; - for (b = 0; b < bytewidth; b++) + for(b = 0; b < bytewidth; ++b) { out[pixeloutstart + b] = in[pixelinstart + b]; } @@ -3938,18 +4104,18 @@ static void Adam7_deinterlace(unsigned char* out, const unsigned char* in, unsig } else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/ { - for (i = 0; i < 7; i++) + for(i = 0; i != 7; ++i) { unsigned x, y, b; unsigned ilinebits = bpp * passw[i]; unsigned olinebits = bpp * w; size_t obp, ibp; /*bit pointers (for out and in buffer)*/ - for (y = 0; y < passh[i]; y++) - for (x = 0; x < passw[i]; x++) + for(y = 0; y < passh[i]; ++y) + for(x = 0; x < passw[i]; ++x) { ibp = (8 * passstart[i]) + (y * ilinebits + x * bpp); obp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp; - for (b = 0; b < bpp; b++) + for(b = 0; b < bpp; ++b) { unsigned char bit = readBitFromReversedStream(&ibp, in); /*note that this function assumes the out buffer is completely 0, use setBitOfReversedStream otherwise*/ @@ -3975,70 +4141,376 @@ static void removePaddingBits(unsigned char* out, const unsigned char* in, unsigned y; size_t diff = ilinebits - olinebits; size_t ibp = 0, obp = 0; /*input and output bit pointers*/ - for (y = 0; y < h; y++) + for(y = 0; y < h; ++y) { size_t x; - for (x = 0; x < olinebits; x++) + for(x = 0; x < olinebits; ++x) + { + unsigned char bit = readBitFromReversedStream(&ibp, in); + setBitOfReversedStream(&obp, out, bit); + } + ibp += diff; + } +} + +/*out must be buffer big enough to contain full image, and in must contain the full decompressed data from +the IDAT chunks (with filter index bytes and possible padding bits) +return value is error*/ +static unsigned postProcessScanlines(unsigned char* out, unsigned char* in, + unsigned w, unsigned h, const LodePNGInfo* info_png) +{ + /* + This function converts the filtered-padded-interlaced data into pure 2D image buffer with the PNG's colortype. + Steps: + *) if no Adam7: 1) unfilter 2) remove padding bits (= posible extra bits per scanline if bpp < 8) + *) if adam7: 1) 7x unfilter 2) 7x remove padding bits 3) Adam7_deinterlace + NOTE: the in buffer will be overwritten with intermediate data! + */ + unsigned bpp = lodepng_get_bpp(&info_png->color); + if(bpp == 0) return 31; /*error: invalid colortype*/ + + if(info_png->interlace_method == 0) + { + if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8) + { + CERROR_TRY_RETURN(unfilter(in, in, w, h, bpp)); + removePaddingBits(out, in, w * bpp, ((w * bpp + 7) / 8) * 8, h); + } + /*we can immediately filter into the out buffer, no other steps needed*/ + else CERROR_TRY_RETURN(unfilter(out, in, w, h, bpp)); + } + else /*interlace_method is 1 (Adam7)*/ + { + unsigned passw[7], passh[7]; size_t filter_passstart[8], padded_passstart[8], passstart[8]; + unsigned i; + + Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp); + + for(i = 0; i != 7; ++i) + { + CERROR_TRY_RETURN(unfilter(&in[padded_passstart[i]], &in[filter_passstart[i]], passw[i], passh[i], bpp)); + /*TODO: possible efficiency improvement: if in this reduced image the bits fit nicely in 1 scanline, + move bytes instead of bits or move not at all*/ + if(bpp < 8) + { + /*remove padding bits in scanlines; after this there still may be padding + bits between the different reduced images: each reduced image still starts nicely at a byte*/ + removePaddingBits(&in[passstart[i]], &in[padded_passstart[i]], passw[i] * bpp, + ((passw[i] * bpp + 7) / 8) * 8, passh[i]); + } + } + + Adam7_deinterlace(out, in, w, h, bpp); + } + + return 0; +} + +static unsigned readChunk_PLTE(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength) +{ + unsigned pos = 0, i; + if(color->palette) lodepng_free(color->palette); + color->palettesize = chunkLength / 3; + color->palette = (unsigned char*)lodepng_malloc(4 * color->palettesize); + if(!color->palette && color->palettesize) + { + color->palettesize = 0; + return 83; /*alloc fail*/ + } + if(color->palettesize > 256) return 38; /*error: palette too big*/ + + for(i = 0; i != color->palettesize; ++i) + { + color->palette[4 * i + 0] = data[pos++]; /*R*/ + color->palette[4 * i + 1] = data[pos++]; /*G*/ + color->palette[4 * i + 2] = data[pos++]; /*B*/ + color->palette[4 * i + 3] = 255; /*alpha*/ + } + + return 0; /* OK */ +} + +static unsigned readChunk_tRNS(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength) +{ + unsigned i; + if(color->colortype == LCT_PALETTE) + { + /*error: more alpha values given than there are palette entries*/ + if(chunkLength > color->palettesize) return 38; + + for(i = 0; i != chunkLength; ++i) color->palette[4 * i + 3] = data[i]; + } + else if(color->colortype == LCT_GREY) + { + /*error: this chunk must be 2 bytes for greyscale image*/ + if(chunkLength != 2) return 30; + + color->key_defined = 1; + color->key_r = color->key_g = color->key_b = 256u * data[0] + data[1]; + } + else if(color->colortype == LCT_RGB) + { + /*error: this chunk must be 6 bytes for RGB image*/ + if(chunkLength != 6) return 41; + + color->key_defined = 1; + color->key_r = 256u * data[0] + data[1]; + color->key_g = 256u * data[2] + data[3]; + color->key_b = 256u * data[4] + data[5]; + } + else return 42; /*error: tRNS chunk not allowed for other color models*/ + + return 0; /* OK */ +} + + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS +/*background color chunk (bKGD)*/ +static unsigned readChunk_bKGD(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) +{ + if(info->color.colortype == LCT_PALETTE) + { + /*error: this chunk must be 1 byte for indexed color image*/ + if(chunkLength != 1) return 43; + + info->background_defined = 1; + info->background_r = info->background_g = info->background_b = data[0]; + } + else if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA) + { + /*error: this chunk must be 2 bytes for greyscale image*/ + if(chunkLength != 2) return 44; + + info->background_defined = 1; + info->background_r = info->background_g = info->background_b = 256u * data[0] + data[1]; + } + else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA) + { + /*error: this chunk must be 6 bytes for greyscale image*/ + if(chunkLength != 6) return 45; + + info->background_defined = 1; + info->background_r = 256u * data[0] + data[1]; + info->background_g = 256u * data[2] + data[3]; + info->background_b = 256u * data[4] + data[5]; + } + + return 0; /* OK */ +} + +/*text chunk (tEXt)*/ +static unsigned readChunk_tEXt(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) +{ + unsigned error = 0; + char *key = 0, *str = 0; + unsigned i; + + while(!error) /*not really a while loop, only used to break on error*/ + { + unsigned length, string2_begin; + + length = 0; + while(length < chunkLength && data[length] != 0) ++length; + /*even though it's not allowed by the standard, no error is thrown if + there's no null termination char, if the text is empty*/ + if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/ + + key = (char*)lodepng_malloc(length + 1); + if(!key) CERROR_BREAK(error, 83); /*alloc fail*/ + + key[length] = 0; + for(i = 0; i != length; ++i) key[i] = (char)data[i]; + + string2_begin = length + 1; /*skip keyword null terminator*/ + + length = chunkLength < string2_begin ? 0 : chunkLength - string2_begin; + str = (char*)lodepng_malloc(length + 1); + if(!str) CERROR_BREAK(error, 83); /*alloc fail*/ + + str[length] = 0; + for(i = 0; i != length; ++i) str[i] = (char)data[string2_begin + i]; + + error = lodepng_add_text(info, key, str); + + break; + } + + lodepng_free(key); + lodepng_free(str); + + return error; +} + +/*compressed text chunk (zTXt)*/ +static unsigned readChunk_zTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings, + const unsigned char* data, size_t chunkLength) +{ + unsigned error = 0; + unsigned i; + + unsigned length, string2_begin; + char *key = 0; + ucvector decoded; + + ucvector_init(&decoded); + + while(!error) /*not really a while loop, only used to break on error*/ + { + for(length = 0; length < chunkLength && data[length] != 0; ++length) ; + if(length + 2 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/ + if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/ + + key = (char*)lodepng_malloc(length + 1); + if(!key) CERROR_BREAK(error, 83); /*alloc fail*/ + + key[length] = 0; + for(i = 0; i != length; ++i) key[i] = (char)data[i]; + + if(data[length + 1] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/ + + string2_begin = length + 2; + if(string2_begin > chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/ + + length = chunkLength - string2_begin; + /*will fail if zlib error, e.g. if length is too small*/ + error = zlib_decompress(&decoded.data, &decoded.size, + (unsigned char*)(&data[string2_begin]), + length, zlibsettings); + if(error) break; + ucvector_push_back(&decoded, 0); + + error = lodepng_add_text(info, key, (char*)decoded.data); + + break; + } + + lodepng_free(key); + ucvector_cleanup(&decoded); + + return error; +} + +/*international text chunk (iTXt)*/ +static unsigned readChunk_iTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings, + const unsigned char* data, size_t chunkLength) +{ + unsigned error = 0; + unsigned i; + + unsigned length, begin, compressed; + char *key = 0, *langtag = 0, *transkey = 0; + ucvector decoded; + ucvector_init(&decoded); + + while(!error) /*not really a while loop, only used to break on error*/ + { + /*Quick check if the chunk length isn't too small. Even without check + it'd still fail with other error checks below if it's too short. This just gives a different error code.*/ + if(chunkLength < 5) CERROR_BREAK(error, 30); /*iTXt chunk too short*/ + + /*read the key*/ + for(length = 0; length < chunkLength && data[length] != 0; ++length) ; + if(length + 3 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination char, corrupt?*/ + if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/ + + key = (char*)lodepng_malloc(length + 1); + if(!key) CERROR_BREAK(error, 83); /*alloc fail*/ + + key[length] = 0; + for(i = 0; i != length; ++i) key[i] = (char)data[i]; + + /*read the compression method*/ + compressed = data[length + 1]; + if(data[length + 2] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/ + + /*even though it's not allowed by the standard, no error is thrown if + there's no null termination char, if the text is empty for the next 3 texts*/ + + /*read the langtag*/ + begin = length + 3; + length = 0; + for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length; + + langtag = (char*)lodepng_malloc(length + 1); + if(!langtag) CERROR_BREAK(error, 83); /*alloc fail*/ + + langtag[length] = 0; + for(i = 0; i != length; ++i) langtag[i] = (char)data[begin + i]; + + /*read the transkey*/ + begin += length + 1; + length = 0; + for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length; + + transkey = (char*)lodepng_malloc(length + 1); + if(!transkey) CERROR_BREAK(error, 83); /*alloc fail*/ + + transkey[length] = 0; + for(i = 0; i != length; ++i) transkey[i] = (char)data[begin + i]; + + /*read the actual text*/ + begin += length + 1; + + length = chunkLength < begin ? 0 : chunkLength - begin; + + if(compressed) { - unsigned char bit = readBitFromReversedStream(&ibp, in); - setBitOfReversedStream(&obp, out, bit); + /*will fail if zlib error, e.g. if length is too small*/ + error = zlib_decompress(&decoded.data, &decoded.size, + (unsigned char*)(&data[begin]), + length, zlibsettings); + if(error) break; + if(decoded.allocsize < decoded.size) decoded.allocsize = decoded.size; + ucvector_push_back(&decoded, 0); } - ibp += diff; + else + { + if(!ucvector_resize(&decoded, length + 1)) CERROR_BREAK(error, 83 /*alloc fail*/); + + decoded.data[length] = 0; + for(i = 0; i != length; ++i) decoded.data[i] = data[begin + i]; + } + + error = lodepng_add_itext(info, key, langtag, transkey, (char*)decoded.data); + + break; } + + lodepng_free(key); + lodepng_free(langtag); + lodepng_free(transkey); + ucvector_cleanup(&decoded); + + return error; } -/*out must be buffer big enough to contain full image, and in must contain the full decompressed data from -the IDAT chunks (with filter index bytes and possible padding bits) -return value is error*/ -static unsigned postProcessScanlines(unsigned char* out, unsigned char* in, - unsigned w, unsigned h, const LodePNGInfo* info_png) +static unsigned readChunk_tIME(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) { - /* - This function converts the filtered-padded-interlaced data into pure 2D image buffer with the PNG's colortype. - Steps: - *) if no Adam7: 1) unfilter 2) remove padding bits (= posible extra bits per scanline if bpp < 8) - *) if adam7: 1) 7x unfilter 2) 7x remove padding bits 3) Adam7_deinterlace - NOTE: the in buffer will be overwritten with intermediate data! - */ - unsigned bpp = lodepng_get_bpp(&info_png->color); - if(bpp == 0) return 31; /*error: invalid colortype*/ + if(chunkLength != 7) return 73; /*invalid tIME chunk size*/ - if(info_png->interlace_method == 0) - { - if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8) - { - CERROR_TRY_RETURN(unfilter(in, in, w, h, bpp)); - removePaddingBits(out, in, w * bpp, ((w * bpp + 7) / 8) * 8, h); - } - /*we can immediatly filter into the out buffer, no other steps needed*/ - else CERROR_TRY_RETURN(unfilter(out, in, w, h, bpp)); - } - else /*interlace_method is 1 (Adam7)*/ - { - unsigned passw[7], passh[7]; size_t filter_passstart[8], padded_passstart[8], passstart[8]; - unsigned i; + info->time_defined = 1; + info->time.year = 256u * data[0] + data[1]; + info->time.month = data[2]; + info->time.day = data[3]; + info->time.hour = data[4]; + info->time.minute = data[5]; + info->time.second = data[6]; - Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp); + return 0; /* OK */ +} - for (i = 0; i < 7; i++) - { - CERROR_TRY_RETURN(unfilter(&in[padded_passstart[i]], &in[filter_passstart[i]], passw[i], passh[i], bpp)); - /*TODO: possible efficiency improvement: if in this reduced image the bits fit nicely in 1 scanline, - move bytes instead of bits or move not at all*/ - if(bpp < 8) - { - /*remove padding bits in scanlines; after this there still may be padding - bits between the different reduced images: each reduced image still starts nicely at a byte*/ - removePaddingBits(&in[passstart[i]], &in[padded_passstart[i]], passw[i] * bpp, - ((passw[i] * bpp + 7) / 8) * 8, passh[i]); - } - } +static unsigned readChunk_pHYs(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) +{ + if(chunkLength != 9) return 74; /*invalid pHYs chunk size*/ - Adam7_deinterlace(out, in, w, h, bpp); - } + info->phys_defined = 1; + info->phys_x = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3]; + info->phys_y = 16777216u * data[4] + 65536u * data[5] + 256u * data[6] + data[7]; + info->phys_unit = data[8]; - return 0; + return 0; /* OK */ } +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ /*read a PNG, the result will be in the same color type as the PNG (hence "generic")*/ static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h, @@ -4049,6 +4521,10 @@ static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* chunk; size_t i; ucvector idat; /*the data from idat chunks*/ + ucvector scanlines; + size_t predict; + size_t numpixels; + size_t outsize = 0; /*for unknown chunk order*/ unsigned unknown = 0; @@ -4062,6 +4538,14 @@ static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h, state->error = lodepng_inspect(w, h, state, in, insize); /*reads header and resets other parameters in state->info_png*/ if(state->error) return; + numpixels = *w * *h; + + /*multiplication overflow*/ + if(*h != 0 && numpixels / *h != *w) CERROR_RETURN(state->error, 92); + /*multiplication overflow possible further below. Allows up to 2^31-1 pixel + bytes with 16-bit RGBA, the rest is room for filter bytes.*/ + if(numpixels > 268435455) CERROR_RETURN(state->error, 92); + ucvector_init(&idat); chunk = &in[33]; /*first byte of the first chunk after the header*/ @@ -4092,7 +4576,7 @@ static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h, { size_t oldsize = idat.size; if(!ucvector_resize(&idat, oldsize + chunkLength)) CERROR_BREAK(state->error, 83 /*alloc fail*/); - for (i = 0; i < chunkLength; i++) idat.data[oldsize + i] = data[i]; + for(i = 0; i != chunkLength; ++i) idat.data[oldsize + i] = data[i]; #ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS critical_pos = 3; #endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ @@ -4105,24 +4589,8 @@ static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h, /*palette chunk (PLTE)*/ else if(lodepng_chunk_type_equals(chunk, "PLTE")) { - unsigned pos = 0; - if(state->info_png.color.palette) myfree(state->info_png.color.palette); - state->info_png.color.palettesize = chunkLength / 3; - state->info_png.color.palette = (unsigned char*)mymalloc(4 * state->info_png.color.palettesize); - if(!state->info_png.color.palette && state->info_png.color.palettesize) - { - state->info_png.color.palettesize = 0; - CERROR_BREAK(state->error, 83); /*alloc fail*/ - } - if(state->info_png.color.palettesize > 256) CERROR_BREAK(state->error, 38); /*error: palette too big*/ - - for (i = 0; i < state->info_png.color.palettesize; i++) - { - state->info_png.color.palette[4 * i + 0] = data[pos++]; /*R*/ - state->info_png.color.palette[4 * i + 1] = data[pos++]; /*G*/ - state->info_png.color.palette[4 * i + 2] = data[pos++]; /*B*/ - state->info_png.color.palette[4 * i + 3] = 255; /*alpha*/ - } + state->error = readChunk_PLTE(&state->info_png.color, data, chunkLength); + if(state->error) break; #ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS critical_pos = 2; #endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ @@ -4130,106 +4598,23 @@ static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h, /*palette transparency chunk (tRNS)*/ else if(lodepng_chunk_type_equals(chunk, "tRNS")) { - if(state->info_png.color.colortype == LCT_PALETTE) - { - /*error: more alpha values given than there are palette entries*/ - if(chunkLength > state->info_png.color.palettesize) CERROR_BREAK(state->error, 38); - - for (i = 0; i < chunkLength; i++) state->info_png.color.palette[4 * i + 3] = data[i]; - } - else if(state->info_png.color.colortype == LCT_GREY) - { - /*error: this chunk must be 2 bytes for greyscale image*/ - if(chunkLength != 2) CERROR_BREAK(state->error, 30); - - state->info_png.color.key_defined = 1; - state->info_png.color.key_r = state->info_png.color.key_g - = state->info_png.color.key_b = 256 * data[0] + data[1]; - } - else if(state->info_png.color.colortype == LCT_RGB) - { - /*error: this chunk must be 6 bytes for RGB image*/ - if(chunkLength != 6) CERROR_BREAK(state->error, 41); - - state->info_png.color.key_defined = 1; - state->info_png.color.key_r = 256 * data[0] + data[1]; - state->info_png.color.key_g = 256 * data[2] + data[3]; - state->info_png.color.key_b = 256 * data[4] + data[5]; - } - else CERROR_BREAK(state->error, 42); /*error: tRNS chunk not allowed for other color models*/ + state->error = readChunk_tRNS(&state->info_png.color, data, chunkLength); + if(state->error) break; } #ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS /*background color chunk (bKGD)*/ else if(lodepng_chunk_type_equals(chunk, "bKGD")) { - if(state->info_png.color.colortype == LCT_PALETTE) - { - /*error: this chunk must be 1 byte for indexed color image*/ - if(chunkLength != 1) CERROR_BREAK(state->error, 43); - - state->info_png.background_defined = 1; - state->info_png.background_r = state->info_png.background_g = state->info_png.background_b = data[0]; - } - else if(state->info_png.color.colortype == LCT_GREY || state->info_png.color.colortype == LCT_GREY_ALPHA) - { - /*error: this chunk must be 2 bytes for greyscale image*/ - if(chunkLength != 2) CERROR_BREAK(state->error, 44); - - state->info_png.background_defined = 1; - state->info_png.background_r = state->info_png.background_g = state->info_png.background_b - = 256 * data[0] + data[1]; - } - else if(state->info_png.color.colortype == LCT_RGB || state->info_png.color.colortype == LCT_RGBA) - { - /*error: this chunk must be 6 bytes for greyscale image*/ - if(chunkLength != 6) CERROR_BREAK(state->error, 45); - - state->info_png.background_defined = 1; - state->info_png.background_r = 256 * data[0] + data[1]; - state->info_png.background_g = 256 * data[2] + data[3]; - state->info_png.background_b = 256 * data[4] + data[5]; - } + state->error = readChunk_bKGD(&state->info_png, data, chunkLength); + if(state->error) break; } /*text chunk (tEXt)*/ else if(lodepng_chunk_type_equals(chunk, "tEXt")) { if(state->decoder.read_text_chunks) { - char *key = 0, *str = 0; - - while(!state->error) /*not really a while loop, only used to break on error*/ - { - unsigned length, string2_begin; - - length = 0; - while(length < chunkLength && data[length] != 0) length++; - /*error, end reached, no null terminator?*/ - if(length + 1 >= chunkLength) CERROR_BREAK(state->error, 75); - - key = (char*)mymalloc(length + 1); - if(!key) CERROR_BREAK(state->error, 83); /*alloc fail*/ - - key[length] = 0; - for (i = 0; i < length; i++) key[i] = data[i]; - - string2_begin = length + 1; - /*error, end reached, no null terminator?*/ - if(string2_begin > chunkLength) CERROR_BREAK(state->error, 75); - - length = chunkLength - string2_begin; - str = (char*)mymalloc(length + 1); - if(!str) CERROR_BREAK(state->error, 83); /*alloc fail*/ - - str[length] = 0; - for (i = 0; i < length; i++) str[i] = data[string2_begin + i]; - - state->error = lodepng_add_text(&state->info_png, key, str); - - break; - } - - myfree(key); - myfree(str); + state->error = readChunk_tEXt(&state->info_png, data, chunkLength); + if(state->error) break; } } /*compressed text chunk (zTXt)*/ @@ -4237,42 +4622,7 @@ static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h, { if(state->decoder.read_text_chunks) { - unsigned length, string2_begin; - char *key = 0; - ucvector decoded; - - ucvector_init(&decoded); - - while(!state->error) /*not really a while loop, only used to break on error*/ - { - for (length = 0; length < chunkLength && data[length] != 0; length++) ; - if(length + 2 >= chunkLength) CERROR_BREAK(state->error, 75); /*no null termination, corrupt?*/ - - key = (char*)mymalloc(length + 1); - if(!key) CERROR_BREAK(state->error, 83); /*alloc fail*/ - - key[length] = 0; - for (i = 0; i < length; i++) key[i] = data[i]; - - if(data[length + 1] != 0) CERROR_BREAK(state->error, 72); /*the 0 byte indicating compression must be 0*/ - - string2_begin = length + 2; - if(string2_begin > chunkLength) CERROR_BREAK(state->error, 75); /*no null termination, corrupt?*/ - - length = chunkLength - string2_begin; - state->error = zlib_decompress(&decoded.data, &decoded.size, - (unsigned char*)(&data[string2_begin]), - length, &state->decoder.zlibsettings); - if(state->error) break; - ucvector_push_back(&decoded, 0); - - state->error = lodepng_add_text(&state->info_png, key, (char*)decoded.data); - - break; - } - - myfree(key); - ucvector_cleanup(&decoded); + state->error = readChunk_zTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength); if(state->error) break; } } @@ -4281,110 +4631,19 @@ static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h, { if(state->decoder.read_text_chunks) { - unsigned length, begin, compressed; - char *key = 0, *langtag = 0, *transkey = 0; - ucvector decoded; - ucvector_init(&decoded); - - while(!state->error) /*not really a while loop, only used to break on error*/ - { - /*Quick check if the chunk length isn't too small. Even without check - it'd still fail with other error checks below if it's too short. This just gives a different error code.*/ - if(chunkLength < 5) CERROR_BREAK(state->error, 30); /*iTXt chunk too short*/ - - /*read the key*/ - for (length = 0; length < chunkLength && data[length] != 0; length++) ; - if(length + 2 >= chunkLength) CERROR_BREAK(state->error, 75); /*no null termination char found*/ - - key = (char*)mymalloc(length + 1); - if(!key) CERROR_BREAK(state->error, 83); /*alloc fail*/ - - key[length] = 0; - for (i = 0; i < length; i++) key[i] = data[i]; - - /*read the compression method*/ - compressed = data[length + 1]; - if(data[length + 2] != 0) CERROR_BREAK(state->error, 72); /*the 0 byte indicating compression must be 0*/ - - /*read the langtag*/ - begin = length + 3; - length = 0; - for (i = begin; i < chunkLength && data[i] != 0; i++) length++; - if(begin + length + 1 >= chunkLength) CERROR_BREAK(state->error, 75); /*no null termination char found*/ - - langtag = (char*)mymalloc(length + 1); - if(!langtag) CERROR_BREAK(state->error, 83); /*alloc fail*/ - - langtag[length] = 0; - for (i = 0; i < length; i++) langtag[i] = data[begin + i]; - - /*read the transkey*/ - begin += length + 1; - length = 0; - for (i = begin; i < chunkLength && data[i] != 0; i++) length++; - if(begin + length + 1 >= chunkLength) CERROR_BREAK(state->error, 75); /*no null termination, corrupt?*/ - - transkey = (char*)mymalloc(length + 1); - if(!transkey) CERROR_BREAK(state->error, 83); /*alloc fail*/ - - transkey[length] = 0; - for (i = 0; i < length; i++) transkey[i] = data[begin + i]; - - /*read the actual text*/ - begin += length + 1; - if(begin > chunkLength) CERROR_BREAK(state->error, 75); /*no null termination, corrupt?*/ - - length = chunkLength - begin; - - if(compressed) - { - state->error = zlib_decompress(&decoded.data, &decoded.size, - (unsigned char*)(&data[begin]), - length, &state->decoder.zlibsettings); - if(state->error) break; - if(decoded.allocsize < decoded.size) decoded.allocsize = decoded.size; - ucvector_push_back(&decoded, 0); - } - else - { - if(!ucvector_resize(&decoded, length + 1)) CERROR_BREAK(state->error, 83 /*alloc fail*/); - - decoded.data[length] = 0; - for (i = 0; i < length; i++) decoded.data[i] = data[begin + i]; - } - - state->error = lodepng_add_itext(&state->info_png, key, langtag, transkey, (char*)decoded.data); - - break; - } - - myfree(key); - myfree(langtag); - myfree(transkey); - ucvector_cleanup(&decoded); + state->error = readChunk_iTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength); if(state->error) break; } } else if(lodepng_chunk_type_equals(chunk, "tIME")) { - if(chunkLength != 7) CERROR_BREAK(state->error, 73); /*invalid tIME chunk size*/ - - state->info_png.time_defined = 1; - state->info_png.time.year = 256 * data[0] + data[+ 1]; - state->info_png.time.month = data[2]; - state->info_png.time.day = data[3]; - state->info_png.time.hour = data[4]; - state->info_png.time.minute = data[5]; - state->info_png.time.second = data[6]; + state->error = readChunk_tIME(&state->info_png, data, chunkLength); + if(state->error) break; } else if(lodepng_chunk_type_equals(chunk, "pHYs")) { - if(chunkLength != 9) CERROR_BREAK(state->error, 74); /*invalid pHYs chunk size*/ - - state->info_png.phys_defined = 1; - state->info_png.phys_x = 16777216 * data[0] + 65536 * data[1] + 256 * data[2] + data[3]; - state->info_png.phys_y = 16777216 * data[4] + 65536 * data[5] + 256 * data[6] + data[7]; - state->info_png.phys_unit = data[8]; + state->error = readChunk_pHYs(&state->info_png, data, chunkLength); + if(state->error) break; } #endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ else /*it's not an implemented chunk type, so ignore it: skip over the data*/ @@ -4411,36 +4670,48 @@ static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h, if(!IEND) chunk = lodepng_chunk_next_const(chunk); } + ucvector_init(&scanlines); + /*predict output size, to allocate exact size for output buffer to avoid more dynamic allocation. + If the decompressed size does not match the prediction, the image must be corrupt.*/ + if(state->info_png.interlace_method == 0) + { + /*The extra *h is added because this are the filter bytes every scanline starts with*/ + predict = lodepng_get_raw_size_idat(*w, *h, &state->info_png.color) + *h; + } + else + { + /*Adam-7 interlaced: predicted size is the sum of the 7 sub-images sizes*/ + const LodePNGColorMode* color = &state->info_png.color; + predict = 0; + predict += lodepng_get_raw_size_idat((*w + 7) >> 3, (*h + 7) >> 3, color) + ((*h + 7) >> 3); + if(*w > 4) predict += lodepng_get_raw_size_idat((*w + 3) >> 3, (*h + 7) >> 3, color) + ((*h + 7) >> 3); + predict += lodepng_get_raw_size_idat((*w + 3) >> 2, (*h + 3) >> 3, color) + ((*h + 3) >> 3); + if(*w > 2) predict += lodepng_get_raw_size_idat((*w + 1) >> 2, (*h + 3) >> 2, color) + ((*h + 3) >> 2); + predict += lodepng_get_raw_size_idat((*w + 1) >> 1, (*h + 1) >> 2, color) + ((*h + 1) >> 2); + if(*w > 1) predict += lodepng_get_raw_size_idat((*w + 0) >> 1, (*h + 1) >> 1, color) + ((*h + 1) >> 1); + predict += lodepng_get_raw_size_idat((*w + 0), (*h + 0) >> 1, color) + ((*h + 0) >> 1); + } + if(!state->error && !ucvector_reserve(&scanlines, predict)) state->error = 83; /*alloc fail*/ if(!state->error) { - ucvector scanlines; - ucvector_init(&scanlines); - - /*maximum final image length is already reserved in the vector's length - this is not really necessary*/ - if(!ucvector_resize(&scanlines, lodepng_get_raw_size(*w, *h, &state->info_png.color) + *h)) - { - state->error = 83; /*alloc fail*/ - } - if(!state->error) - { - /*decompress with the Zlib decompressor*/ - state->error = zlib_decompress(&scanlines.data, &scanlines.size, idat.data, - idat.size, &state->decoder.zlibsettings); - } - - if(!state->error) - { - ucvector outv; - ucvector_init(&outv); - if(!ucvector_resizev(&outv, - lodepng_get_raw_size(*w, *h, &state->info_png.color), 0)) state->error = 83; /*alloc fail*/ - if(!state->error) state->error = postProcessScanlines(outv.data, scanlines.data, *w, *h, &state->info_png); - *out = outv.data; - } - ucvector_cleanup(&scanlines); + state->error = zlib_decompress(&scanlines.data, &scanlines.size, idat.data, + idat.size, &state->decoder.zlibsettings); + if(!state->error && scanlines.size != predict) state->error = 91; /*decompressed size doesn't match prediction*/ } - ucvector_cleanup(&idat); + + if(!state->error) + { + outsize = lodepng_get_raw_size(*w, *h, &state->info_png.color); + *out = (unsigned char*)lodepng_malloc(outsize); + if(!*out) state->error = 83; /*alloc fail*/ + } + if(!state->error) + { + for(i = 0; i < outsize; i++) (*out)[i] = 0; + state->error = postProcessScanlines(*out, scanlines.data, *w, *h, &state->info_png); + } + ucvector_cleanup(&scanlines); } unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h, @@ -4476,13 +4747,14 @@ unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h, } outsize = lodepng_get_raw_size(*w, *h, &state->info_raw); - *out = (unsigned char*)mymalloc(outsize); + *out = (unsigned char*)lodepng_malloc(outsize); if(!(*out)) { state->error = 83; /*alloc fail*/ } - else state->error = lodepng_convert(*out, data, &state->info_raw, &state->info_png.color, *w, *h); - myfree(data); + else state->error = lodepng_convert(*out, data, &state->info_raw, + &state->info_png.color, *w, *h); + lodepng_free(data); } return state->error; } @@ -4514,12 +4786,12 @@ unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h, const u unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename, LodePNGColorType colortype, unsigned bitdepth) { - unsigned char* buffer; + unsigned char* buffer = 0; size_t buffersize; unsigned error; error = lodepng_load_file(&buffer, &buffersize, filename); if(!error) error = lodepng_decode_memory(out, w, h, buffer, buffersize, colortype, bitdepth); - myfree(buffer); + lodepng_free(buffer); return error; } @@ -4634,7 +4906,7 @@ static unsigned addChunk_PLTE(ucvector* out, const LodePNGColorMode* info) size_t i; ucvector PLTE; ucvector_init(&PLTE); - for (i = 0; i < info->palettesize * 4; i++) + for(i = 0; i != info->palettesize * 4; ++i) { /*add all channels except alpha channel*/ if(i % 4 != 3) ucvector_push_back(&PLTE, info->palette[i]); @@ -4655,32 +4927,32 @@ static unsigned addChunk_tRNS(ucvector* out, const LodePNGColorMode* info) { size_t amount = info->palettesize; /*the tail of palette values that all have 255 as alpha, does not have to be encoded*/ - for (i = info->palettesize; i > 0; i--) + for(i = info->palettesize; i != 0; --i) { - if(info->palette[4 * (i - 1) + 3] == 255) amount--; + if(info->palette[4 * (i - 1) + 3] == 255) --amount; else break; } /*add only alpha channel*/ - for (i = 0; i < amount; i++) ucvector_push_back(&tRNS, info->palette[4 * i + 3]); + for(i = 0; i != amount; ++i) ucvector_push_back(&tRNS, info->palette[4 * i + 3]); } else if(info->colortype == LCT_GREY) { if(info->key_defined) { - ucvector_push_back(&tRNS, (unsigned char)(info->key_r / 256)); - ucvector_push_back(&tRNS, (unsigned char)(info->key_r % 256)); + ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8)); + ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255)); } } else if(info->colortype == LCT_RGB) { if(info->key_defined) { - ucvector_push_back(&tRNS, (unsigned char)(info->key_r / 256)); - ucvector_push_back(&tRNS, (unsigned char)(info->key_r % 256)); - ucvector_push_back(&tRNS, (unsigned char)(info->key_g / 256)); - ucvector_push_back(&tRNS, (unsigned char)(info->key_g % 256)); - ucvector_push_back(&tRNS, (unsigned char)(info->key_b / 256)); - ucvector_push_back(&tRNS, (unsigned char)(info->key_b % 256)); + ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8)); + ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255)); + ucvector_push_back(&tRNS, (unsigned char)(info->key_g >> 8)); + ucvector_push_back(&tRNS, (unsigned char)(info->key_g & 255)); + ucvector_push_back(&tRNS, (unsigned char)(info->key_b >> 8)); + ucvector_push_back(&tRNS, (unsigned char)(info->key_b & 255)); } } @@ -4720,9 +4992,10 @@ static unsigned addChunk_tEXt(ucvector* out, const char* keyword, const char* te size_t i; ucvector text; ucvector_init(&text); - for (i = 0; keyword[i] != 0; i++) ucvector_push_back(&text, (unsigned char)keyword[i]); - ucvector_push_back(&text, 0); - for (i = 0; textstring[i] != 0; i++) ucvector_push_back(&text, (unsigned char)textstring[i]); + for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)keyword[i]); + if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/ + ucvector_push_back(&text, 0); /*0 termination char*/ + for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)textstring[i]); error = addChunk(out, "tEXt", text.data, text.size); ucvector_cleanup(&text); @@ -4738,7 +5011,8 @@ static unsigned addChunk_zTXt(ucvector* out, const char* keyword, const char* te ucvector_init(&data); ucvector_init(&compressed); - for (i = 0; keyword[i] != 0; i++) ucvector_push_back(&data, (unsigned char)keyword[i]); + for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]); + if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/ ucvector_push_back(&data, 0); /*0 termination char*/ ucvector_push_back(&data, 0); /*compression method: 0*/ @@ -4746,7 +5020,7 @@ static unsigned addChunk_zTXt(ucvector* out, const char* keyword, const char* te (unsigned char*)textstring, textsize, zlibsettings); if(!error) { - for (i = 0; i < compressed.size; i++) ucvector_push_back(&data, compressed.data[i]); + for(i = 0; i != compressed.size; ++i) ucvector_push_back(&data, compressed.data[i]); error = addChunk(out, "zTXt", data.data, data.size); } @@ -4764,13 +5038,14 @@ static unsigned addChunk_iTXt(ucvector* out, unsigned compressed, const char* ke ucvector_init(&data); - for (i = 0; keyword[i] != 0; i++) ucvector_push_back(&data, (unsigned char)keyword[i]); + for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]); + if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/ ucvector_push_back(&data, 0); /*null termination char*/ ucvector_push_back(&data, compressed ? 1 : 0); /*compression flag*/ ucvector_push_back(&data, 0); /*compression method*/ - for (i = 0; langtag[i] != 0; i++) ucvector_push_back(&data, (unsigned char)langtag[i]); + for(i = 0; langtag[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)langtag[i]); ucvector_push_back(&data, 0); /*null termination char*/ - for (i = 0; transkey[i] != 0; i++) ucvector_push_back(&data, (unsigned char)transkey[i]); + for(i = 0; transkey[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)transkey[i]); ucvector_push_back(&data, 0); /*null termination char*/ if(compressed) @@ -4781,13 +5056,13 @@ static unsigned addChunk_iTXt(ucvector* out, unsigned compressed, const char* ke (unsigned char*)textstring, textsize, zlibsettings); if(!error) { - for (i = 0; i < compressed_data.size; i++) ucvector_push_back(&data, compressed_data.data[i]); + for(i = 0; i != compressed_data.size; ++i) ucvector_push_back(&data, compressed_data.data[i]); } ucvector_cleanup(&compressed_data); } else /*not compressed*/ { - for (i = 0; textstring[i] != 0; i++) ucvector_push_back(&data, (unsigned char)textstring[i]); + for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)textstring[i]); } if(!error) error = addChunk(out, "iTXt", data.data, data.size); @@ -4802,21 +5077,21 @@ static unsigned addChunk_bKGD(ucvector* out, const LodePNGInfo* info) ucvector_init(&bKGD); if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA) { - ucvector_push_back(&bKGD, (unsigned char)(info->background_r / 256)); - ucvector_push_back(&bKGD, (unsigned char)(info->background_r % 256)); + ucvector_push_back(&bKGD, (unsigned char)(info->background_r >> 8)); + ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255)); } else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA) { - ucvector_push_back(&bKGD, (unsigned char)(info->background_r / 256)); - ucvector_push_back(&bKGD, (unsigned char)(info->background_r % 256)); - ucvector_push_back(&bKGD, (unsigned char)(info->background_g / 256)); - ucvector_push_back(&bKGD, (unsigned char)(info->background_g % 256)); - ucvector_push_back(&bKGD, (unsigned char)(info->background_b / 256)); - ucvector_push_back(&bKGD, (unsigned char)(info->background_b % 256)); + ucvector_push_back(&bKGD, (unsigned char)(info->background_r >> 8)); + ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255)); + ucvector_push_back(&bKGD, (unsigned char)(info->background_g >> 8)); + ucvector_push_back(&bKGD, (unsigned char)(info->background_g & 255)); + ucvector_push_back(&bKGD, (unsigned char)(info->background_b >> 8)); + ucvector_push_back(&bKGD, (unsigned char)(info->background_b & 255)); } else if(info->color.colortype == LCT_PALETTE) { - ucvector_push_back(&bKGD, (unsigned char)(info->background_r % 256)); /*palette index*/ + ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255)); /*palette index*/ } error = addChunk(out, "bKGD", bKGD.data, bKGD.size); @@ -4828,17 +5103,17 @@ static unsigned addChunk_bKGD(ucvector* out, const LodePNGInfo* info) static unsigned addChunk_tIME(ucvector* out, const LodePNGTime* time) { unsigned error = 0; - unsigned char* data = (unsigned char*)mymalloc(7); + unsigned char* data = (unsigned char*)lodepng_malloc(7); if(!data) return 83; /*alloc fail*/ - data[0] = (unsigned char)(time->year / 256); - data[1] = (unsigned char)(time->year % 256); - data[2] = time->month; - data[3] = time->day; - data[4] = time->hour; - data[5] = time->minute; - data[6] = time->second; + data[0] = (unsigned char)(time->year >> 8); + data[1] = (unsigned char)(time->year & 255); + data[2] = (unsigned char)time->month; + data[3] = (unsigned char)time->day; + data[4] = (unsigned char)time->hour; + data[5] = (unsigned char)time->minute; + data[6] = (unsigned char)time->second; error = addChunk(out, "tIME", data, 7); - myfree(data); + lodepng_free(data); return error; } @@ -4867,63 +5142,64 @@ static void filterScanline(unsigned char* out, const unsigned char* scanline, co switch(filterType) { case 0: /*None*/ - for (i = 0; i < length; i++) out[i] = scanline[i]; + for(i = 0; i != length; ++i) out[i] = scanline[i]; break; case 1: /*Sub*/ - if(prevline) - { - for (i = 0; i < bytewidth; i++) out[i] = scanline[i]; - for (i = bytewidth; i < length; i++) out[i] = scanline[i] - scanline[i - bytewidth]; - } - else - { - for (i = 0; i < bytewidth; i++) out[i] = scanline[i]; - for (i = bytewidth; i < length; i++) out[i] = scanline[i] - scanline[i - bytewidth]; - } + for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; + for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - scanline[i - bytewidth]; break; case 2: /*Up*/ if(prevline) { - for (i = 0; i < length; i++) out[i] = scanline[i] - prevline[i]; + for(i = 0; i != length; ++i) out[i] = scanline[i] - prevline[i]; } else { - for (i = 0; i < length; i++) out[i] = scanline[i]; + for(i = 0; i != length; ++i) out[i] = scanline[i]; } break; case 3: /*Average*/ if(prevline) { - for (i = 0; i < bytewidth; i++) out[i] = scanline[i] - prevline[i] / 2; - for (i = bytewidth; i < length; i++) out[i] = scanline[i] - ((scanline[i - bytewidth] + prevline[i]) / 2); + for(i = 0; i != bytewidth; ++i) out[i] = scanline[i] - (prevline[i] >> 1); + for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - ((scanline[i - bytewidth] + prevline[i]) >> 1); } else { - for (i = 0; i < bytewidth; i++) out[i] = scanline[i]; - for (i = bytewidth; i < length; i++) out[i] = scanline[i] - scanline[i - bytewidth] / 2; + for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; + for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - (scanline[i - bytewidth] >> 1); } break; case 4: /*Paeth*/ if(prevline) { /*paethPredictor(0, prevline[i], 0) is always prevline[i]*/ - for (i = 0; i < bytewidth; i++) out[i] = (scanline[i] - prevline[i]); - for (i = bytewidth; i < length; i++) + for(i = 0; i != bytewidth; ++i) out[i] = (scanline[i] - prevline[i]); + for(i = bytewidth; i < length; ++i) { out[i] = (scanline[i] - paethPredictor(scanline[i - bytewidth], prevline[i], prevline[i - bytewidth])); } } else { - for (i = 0; i < bytewidth; i++) out[i] = scanline[i]; + for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; /*paethPredictor(scanline[i - bytewidth], 0, 0) is always scanline[i - bytewidth]*/ - for (i = bytewidth; i < length; i++) out[i] = (scanline[i] - scanline[i - bytewidth]); + for(i = bytewidth; i < length; ++i) out[i] = (scanline[i] - scanline[i - bytewidth]); } break; default: return; /*unexisting filter type given*/ } } +/* log2 approximation. A slight bit faster than std::log. */ +static float flog2(float f) +{ + float result = 0; + while(f > 32) { result += 4; f /= 16; } + while(f > 2) { ++result; f /= 2; } + return result + 1.442695f * (f * f * f / 3 - 3 * f * f / 2 + 3 * f - 1.83333f); +} + static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, const LodePNGColorMode* info, const LodePNGEncoderSettings* settings) { @@ -4941,6 +5217,8 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, const unsigned char* prevline = 0; unsigned x, y; unsigned error = 0; + LodePNGFilterStrategy strategy = settings->filter_strategy; + /* There is a heuristic called the minimum sum of absolute differences heuristic, suggested by the PNG standard: * If the image type is Palette, or the bit depth is smaller than 8, then do not filter the image (i.e. @@ -4948,51 +5226,66 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, * (The other case) If the image type is Grayscale or RGB (with or without Alpha), and the bit depth is not smaller than 8, then use adaptive filtering heuristic as follows: independently for each row, apply all five filters and select the filter that produces the smallest sum of absolute values per row. - This heuristic is used if filter strategy is LFS_HEURISTIC. Parts of it are used for filter strategies - LFS_MINSUM and LFS_ZERO. + This heuristic is used if filter strategy is LFS_MINSUM and filter_palette_zero is true. + + If filter_palette_zero is true and filter_strategy is not LFS_MINSUM, the above heuristic is followed, + but for "the other case", whatever strategy filter_strategy is set to instead of the minimum sum + heuristic is used. */ - int heuristic_zero = info->colortype == LCT_PALETTE || info->bitdepth < 8; + if(settings->filter_palette_zero && + (info->colortype == LCT_PALETTE || info->bitdepth < 8)) strategy = LFS_ZERO; if(bpp == 0) return 31; /*error: invalid color type*/ - if((!heuristic_zero && settings->filter_strategy == LFS_HEURISTIC)|| - settings->filter_strategy == LFS_MINSUM) + if(strategy == LFS_ZERO) + { + for(y = 0; y != h; ++y) + { + size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/ + size_t inindex = linebytes * y; + out[outindex] = 0; /*filter type byte*/ + filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, 0); + prevline = &in[inindex]; + } + } + else if(strategy == LFS_MINSUM) { /*adaptive filtering*/ size_t sum[5]; - ucvector attempt[5]; /*five filtering attempts, one for each filter type*/ + unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ size_t smallest = 0; - unsigned type, bestType = 0; + unsigned char type, bestType = 0; - for (type = 0; type < 5; type++) ucvector_init(&attempt[type]); - - for (type = 0; type < 5; type++) + for(type = 0; type != 5; ++type) { - if(!ucvector_resize(&attempt[type], linebytes)) ERROR_BREAK(83 /*alloc fail*/); + attempt[type] = (unsigned char*)lodepng_malloc(linebytes); + if(!attempt[type]) return 83; /*alloc fail*/ } if(!error) { - for (y = 0; y < h; y++) + for(y = 0; y != h; ++y) { /*try the 5 filter types*/ - for (type = 0; type < 5; type++) + for(type = 0; type != 5; ++type) { - filterScanline(attempt[type].data, &in[y * linebytes], prevline, linebytes, bytewidth, type); + filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); /*calculate the sum of the result*/ sum[type] = 0; - /*note that not all pixels are checked to speed this up while still having probably the best choice*/ - for (x = 0; x < attempt[type].size; x+=3) + if(type == 0) + { + for(x = 0; x != linebytes; ++x) sum[type] += (unsigned char)(attempt[type][x]); + } + else { - /*For differences, each byte should be treated as signed, values above 127 are negative - (converted to signed char). Filtertype 0 isn't a difference though, so use unsigned there. - This means filtertype 0 is almost never chosen, but that is justified.*/ - if(type == 0) sum[type] += (unsigned char)(attempt[type].data[x]); - else + for(x = 0; x != linebytes; ++x) { - signed char s = (signed char)(attempt[type].data[x]); - sum[type] += s < 0 ? -s : s; + /*For differences, each byte should be treated as signed, values above 127 are negative + (converted to signed char). Filtertype 0 isn't a difference though, so use unsigned there. + This means filtertype 0 is almost never chosen, but that is justified.*/ + unsigned char s = attempt[type][x]; + sum[type] += s < 128 ? s : (255U - s); } } @@ -5008,44 +5301,77 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, /*now fill the out values*/ out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ - for (x = 0; x < linebytes; x++) out[y * (linebytes + 1) + 1 + x] = attempt[bestType].data[x]; + for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; } } - for (type = 0; type < 5; type++) ucvector_cleanup(&attempt[type]); + for(type = 0; type != 5; ++type) lodepng_free(attempt[type]); } - else if((heuristic_zero && settings->filter_strategy == LFS_HEURISTIC)|| - settings->filter_strategy == LFS_ZERO) + else if(strategy == LFS_ENTROPY) { - for (y = 0; y < h; y++) + float sum[5]; + unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ + float smallest = 0; + unsigned type, bestType = 0; + unsigned count[256]; + + for(type = 0; type != 5; ++type) { - size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/ - size_t inindex = linebytes * y; - const unsigned TYPE = 0; - out[outindex] = TYPE; /*filter type byte*/ - filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, TYPE); - prevline = &in[inindex]; + attempt[type] = (unsigned char*)lodepng_malloc(linebytes); + if(!attempt[type]) return 83; /*alloc fail*/ + } + + for(y = 0; y != h; ++y) + { + /*try the 5 filter types*/ + for(type = 0; type != 5; ++type) + { + filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); + for(x = 0; x != 256; ++x) count[x] = 0; + for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]]; + ++count[type]; /*the filter type itself is part of the scanline*/ + sum[type] = 0; + for(x = 0; x != 256; ++x) + { + float p = count[x] / (float)(linebytes + 1); + sum[type] += count[x] == 0 ? 0 : flog2(1 / p) * p; + } + /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/ + if(type == 0 || sum[type] < smallest) + { + bestType = type; + smallest = sum[type]; + } + } + + prevline = &in[y * linebytes]; + + /*now fill the out values*/ + out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ + for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; } + + for(type = 0; type != 5; ++type) lodepng_free(attempt[type]); } - else if(settings->filter_strategy == LFS_PREDEFINED) + else if(strategy == LFS_PREDEFINED) { - for (y = 0; y < h; y++) + for(y = 0; y != h; ++y) { size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/ size_t inindex = linebytes * y; - unsigned type = settings->predefined_filters[y]; + unsigned char type = settings->predefined_filters[y]; out[outindex] = type; /*filter type byte*/ filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, type); prevline = &in[inindex]; } } - else /*LFS_BRUTE_FORCE*/ + else if(strategy == LFS_BRUTE_FORCE) { /*brute force filter chooser. deflate the scanline after every filter attempt to see which one deflates best. This is very slow and gives only slightly smaller, sometimes even larger, result*/ size_t size[5]; - ucvector attempt[5]; /*five filtering attempts, one for each filter type*/ + unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ size_t smallest = 0; unsigned type = 0, bestType = 0; unsigned char* dummy; @@ -5059,23 +5385,23 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, images only, so disable it*/ zlibsettings.custom_zlib = 0; zlibsettings.custom_deflate = 0; - for (type = 0; type < 5; type++) + for(type = 0; type != 5; ++type) { - ucvector_init(&attempt[type]); - ucvector_resize(&attempt[type], linebytes); /*todo: give error if resize failed*/ + attempt[type] = (unsigned char*)lodepng_malloc(linebytes); + if(!attempt[type]) return 83; /*alloc fail*/ } - for (y = 0; y < h; y++) /*try the 5 filter types*/ + for(y = 0; y != h; ++y) /*try the 5 filter types*/ { - for (type = 0; type < 5; type++) + for(type = 0; type != 5; ++type) { - unsigned testsize = attempt[type].size; + unsigned testsize = linebytes; /*if(testsize > 8) testsize /= 8;*/ /*it already works good enough by testing a part of the row*/ - filterScanline(attempt[type].data, &in[y * linebytes], prevline, linebytes, bytewidth, type); + filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); size[type] = 0; dummy = 0; - zlib_compress(&dummy, &size[type], attempt[type].data, testsize, &zlibsettings); - myfree(dummy); + zlib_compress(&dummy, &size[type], attempt[type], testsize, &zlibsettings); + lodepng_free(dummy); /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/ if(type == 0 || size[type] < smallest) { @@ -5085,10 +5411,11 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, } prevline = &in[y * linebytes]; out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ - for (x = 0; x < linebytes; x++) out[y * (linebytes + 1) + 1 + x] = attempt[bestType].data[x]; + for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; } - for (type = 0; type < 5; type++) ucvector_cleanup(&attempt[type]); + for(type = 0; type != 5; ++type) lodepng_free(attempt[type]); } + else return 88; /* unknown filter strategy */ return error; } @@ -5101,24 +5428,33 @@ static void addPaddingBits(unsigned char* out, const unsigned char* in, unsigned y; size_t diff = olinebits - ilinebits; size_t obp = 0, ibp = 0; /*bit pointers*/ - for (y = 0; y < h; y++) + for(y = 0; y != h; ++y) { size_t x; - for (x = 0; x < ilinebits; x++) + for(x = 0; x < ilinebits; ++x) { unsigned char bit = readBitFromReversedStream(&ibp, in); setBitOfReversedStream(&obp, out, bit); } /*obp += diff; --> no, fill in some value in the padding bits too, to avoid "Use of uninitialised value of size ###" warning from valgrind*/ - for (x = 0; x < diff; x++) setBitOfReversedStream(&obp, out, 0); + for(x = 0; x != diff; ++x) setBitOfReversedStream(&obp, out, 0); } } +/* +in: non-interlaced image with size w*h +out: the same pixels, but re-ordered according to PNG's Adam7 interlacing, with + no padding bits between scanlines, but between reduced images so that each + reduced image starts at a byte. +bpp: bits per pixel +there are no padding bits, not between scanlines, not between reduced images +in has the following size in bits: w * h * bpp. +out is possibly bigger due to padding bits between reduced images +NOTE: comments about padding bits are only relevant if bpp < 8 +*/ static void Adam7_interlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) { - /*Note: this function works on image buffers WITHOUT padding bits at end of scanlines with non-multiple-of-8 - bit amounts, only between reduced images is padding*/ unsigned passw[7], passh[7]; size_t filter_passstart[8], padded_passstart[8], passstart[8]; unsigned i; @@ -5127,16 +5463,16 @@ static void Adam7_interlace(unsigned char* out, const unsigned char* in, unsigne if(bpp >= 8) { - for (i = 0; i < 7; i++) + for(i = 0; i != 7; ++i) { unsigned x, y, b; size_t bytewidth = bpp / 8; - for (y = 0; y < passh[i]; y++) - for (x = 0; x < passw[i]; x++) + for(y = 0; y < passh[i]; ++y) + for(x = 0; x < passw[i]; ++x) { size_t pixelinstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth; size_t pixeloutstart = passstart[i] + (y * passw[i] + x) * bytewidth; - for (b = 0; b < bytewidth; b++) + for(b = 0; b < bytewidth; ++b) { out[pixeloutstart + b] = in[pixelinstart + b]; } @@ -5145,18 +5481,18 @@ static void Adam7_interlace(unsigned char* out, const unsigned char* in, unsigne } else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/ { - for (i = 0; i < 7; i++) + for(i = 0; i != 7; ++i) { unsigned x, y, b; unsigned ilinebits = bpp * passw[i]; unsigned olinebits = bpp * w; size_t obp, ibp; /*bit pointers (for out and in buffer)*/ - for (y = 0; y < passh[i]; y++) - for (x = 0; x < passw[i]; x++) + for(y = 0; y < passh[i]; ++y) + for(x = 0; x < passw[i]; ++x) { ibp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp; obp = (8 * passstart[i]) + (y * ilinebits + x * bpp); - for (b = 0; b < bpp; b++) + for(b = 0; b < bpp; ++b) { unsigned char bit = readBitFromReversedStream(&ibp, in); setBitOfReversedStream(&obp, out, bit); @@ -5183,7 +5519,7 @@ static unsigned preProcessScanlines(unsigned char** out, size_t* outsize, const if(info_png->interlace_method == 0) { *outsize = h + (h * ((w * bpp + 7) / 8)); /*image size plus an extra byte per scanline + possible padding bits*/ - *out = (unsigned char*)mymalloc(*outsize); + *out = (unsigned char*)lodepng_malloc(*outsize); if(!(*out) && (*outsize)) error = 83; /*alloc fail*/ if(!error) @@ -5191,70 +5527,65 @@ static unsigned preProcessScanlines(unsigned char** out, size_t* outsize, const /*non multiple of 8 bits per scanline, padding bits needed per scanline*/ if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8) { - ucvector padded; - ucvector_init(&padded); - if(!ucvector_resize(&padded, h * ((w * bpp + 7) / 8))) error = 83; /*alloc fail*/ + unsigned char* padded = (unsigned char*)lodepng_malloc(h * ((w * bpp + 7) / 8)); + if(!padded) error = 83; /*alloc fail*/ if(!error) { - addPaddingBits(padded.data, in, ((w * bpp + 7) / 8) * 8, w * bpp, h); - error = filter(*out, padded.data, w, h, &info_png->color, settings); + addPaddingBits(padded, in, ((w * bpp + 7) / 8) * 8, w * bpp, h); + error = filter(*out, padded, w, h, &info_png->color, settings); } - ucvector_cleanup(&padded); + lodepng_free(padded); } else { - /*we can immediatly filter into the out buffer, no other steps needed*/ + /*we can immediately filter into the out buffer, no other steps needed*/ error = filter(*out, in, w, h, &info_png->color, settings); } } } else /*interlace_method is 1 (Adam7)*/ { - unsigned char* adam7 = (unsigned char*)mymalloc((h * w * bpp + 7) / 8); - if(!adam7 && ((h * w * bpp + 7) / 8)) error = 83; /*alloc fail*/ + unsigned passw[7], passh[7]; + size_t filter_passstart[8], padded_passstart[8], passstart[8]; + unsigned char* adam7; - while(!error) /*not a real while loop, used to break out to cleanup to avoid a goto*/ - { - unsigned passw[7], passh[7]; - size_t filter_passstart[8], padded_passstart[8], passstart[8]; - unsigned i; + Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp); - Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp); + *outsize = filter_passstart[7]; /*image size plus an extra byte per scanline + possible padding bits*/ + *out = (unsigned char*)lodepng_malloc(*outsize); + if(!(*out)) error = 83; /*alloc fail*/ - *outsize = filter_passstart[7]; /*image size plus an extra byte per scanline + possible padding bits*/ - *out = (unsigned char*)mymalloc(*outsize); - if(!(*out) && (*outsize)) ERROR_BREAK(83 /*alloc fail*/); + adam7 = (unsigned char*)lodepng_malloc(passstart[7]); + if(!adam7 && passstart[7]) error = 83; /*alloc fail*/ - Adam7_interlace(adam7, in, w, h, bpp); + if(!error) + { + unsigned i; - for (i = 0; i < 7; i++) + Adam7_interlace(adam7, in, w, h, bpp); + for(i = 0; i != 7; ++i) { if(bpp < 8) { - ucvector padded; - ucvector_init(&padded); - if(!ucvector_resize(&padded, h * ((w * bpp + 7) / 8))) error = 83; /*alloc fail*/ - if(!error) - { - addPaddingBits(&padded.data[padded_passstart[i]], &adam7[passstart[i]], - ((passw[i] * bpp + 7) / 8) * 8, passw[i] * bpp, passh[i]); - error = filter(&(*out)[filter_passstart[i]], &padded.data[padded_passstart[i]], - passw[i], passh[i], &info_png->color, settings); - } - - ucvector_cleanup(&padded); + unsigned char* padded = (unsigned char*)lodepng_malloc(padded_passstart[i + 1] - padded_passstart[i]); + if(!padded) ERROR_BREAK(83); /*alloc fail*/ + addPaddingBits(padded, &adam7[passstart[i]], + ((passw[i] * bpp + 7) / 8) * 8, passw[i] * bpp, passh[i]); + error = filter(&(*out)[filter_passstart[i]], padded, + passw[i], passh[i], &info_png->color, settings); + lodepng_free(padded); } else { error = filter(&(*out)[filter_passstart[i]], &adam7[padded_passstart[i]], passw[i], passh[i], &info_png->color, settings); } - } - break; + if(error) break; + } } - myfree(adam7); + lodepng_free(adam7); } return error; @@ -5268,9 +5599,10 @@ returns 2 if the palette is semi-translucent. */ static unsigned getPaletteTranslucency(const unsigned char* palette, size_t palettesize) { - size_t i, key = 0; - unsigned r, g, b; /*the value of the color with alpha 0, so long as color keying is possible*/ - for (i = 0; i < palettesize; i++) + size_t i; + unsigned key = 0; + unsigned r = 0, g = 0, b = 0; /*the value of the color with alpha 0, so long as color keying is possible*/ + for(i = 0; i != palettesize; ++i) { if(!key && palette[4 * i + 3] == 0) { @@ -5323,17 +5655,12 @@ unsigned lodepng_encode(unsigned char** out, size_t* outsize, return state->error; } - if(state->encoder.auto_convert != LAC_NO) + if(state->encoder.auto_convert) { - state->error = doAutoChooseColor(&info.color, image, w, h, &state->info_raw, - state->encoder.auto_convert); + state->error = lodepng_auto_choose_color(&info.color, image, w, h, &state->info_raw); } if(state->error) return state->error; - if(state->encoder.zlibsettings.windowsize > 32768) - { - CERROR_RETURN_ERROR(state->error, 60); /*error: windowsize larger than allowed*/ - } if(state->encoder.zlibsettings.btype > 2) { CERROR_RETURN_ERROR(state->error, 61); /*error: unexisting btype*/ @@ -5342,24 +5669,25 @@ unsigned lodepng_encode(unsigned char** out, size_t* outsize, { CERROR_RETURN_ERROR(state->error, 71); /*error: unexisting interlace mode*/ } - /*error: unexisting color type given*/ - if((state->error = checkColorValidity(info.color.colortype, info.color.bitdepth))) return state->error; - /*error: unexisting color type given*/ - if((state->error = checkColorValidity(state->info_raw.colortype, state->info_raw.bitdepth))) return state->error; + + state->error = checkColorValidity(info.color.colortype, info.color.bitdepth); + if(state->error) return state->error; /*error: unexisting color type given*/ + state->error = checkColorValidity(state->info_raw.colortype, state->info_raw.bitdepth); + if(state->error) return state->error; /*error: unexisting color type given*/ if(!lodepng_color_mode_equal(&state->info_raw, &info.color)) { unsigned char* converted; - size_t size = (w * h * lodepng_get_bpp(&info.color) + 7) / 8; + size_t size = (w * h * (size_t)lodepng_get_bpp(&info.color) + 7) / 8; - converted = (unsigned char*)mymalloc(size); + converted = (unsigned char*)lodepng_malloc(size); if(!converted && size) state->error = 83; /*alloc fail*/ if(!state->error) { state->error = lodepng_convert(converted, image, &info.color, &state->info_raw, w, h); } if(!state->error) preProcessScanlines(&data, &datasize, converted, w, h, &info, &state->encoder); - myfree(converted); + lodepng_free(converted); } else preProcessScanlines(&data, &datasize, image, w, h, &info, &state->encoder); @@ -5419,7 +5747,7 @@ unsigned lodepng_encode(unsigned char** out, size_t* outsize, /*tIME*/ if(info.time_defined) addChunk_tIME(&outv, &info.time); /*tEXt and/or zTXt*/ - for (i = 0; i < info.text_num; i++) + for(i = 0; i != info.text_num; ++i) { if(strlen(info.text_keys[i]) > 79) { @@ -5432,15 +5760,19 @@ unsigned lodepng_encode(unsigned char** out, size_t* outsize, break; } if(state->encoder.text_compression) + { addChunk_zTXt(&outv, info.text_keys[i], info.text_strings[i], &state->encoder.zlibsettings); + } else + { addChunk_tEXt(&outv, info.text_keys[i], info.text_strings[i]); + } } /*LodePNG version id in text chunk*/ if(state->encoder.add_id) { unsigned alread_added_id_text = 0; - for (i = 0; i < info.text_num; i++) + for(i = 0; i != info.text_num; ++i) { if(!strcmp(info.text_keys[i], "LodePNG")) { @@ -5449,10 +5781,12 @@ unsigned lodepng_encode(unsigned char** out, size_t* outsize, } } if(alread_added_id_text == 0) - addChunk_tEXt(&outv, "LodePNG", VERSION_STRING); /*it's shorter as tEXt than as zTXt chunk*/ + { + addChunk_tEXt(&outv, "LodePNG", LODEPNG_VERSION_STRING); /*it's shorter as tEXt than as zTXt chunk*/ + } } /*iTXt*/ - for (i = 0; i < info.itext_num; i++) + for(i = 0; i != info.itext_num; ++i) { if(strlen(info.itext_keys[i]) > 79) { @@ -5476,14 +5810,13 @@ unsigned lodepng_encode(unsigned char** out, size_t* outsize, if(state->error) break; } #endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ - /*IEND*/ addChunk_IEND(&outv); break; /*this isn't really a while loop; no error happened so break out now!*/ } lodepng_info_cleanup(&info); - myfree(data); + lodepng_free(data); /*instead of cleaning the vector up, give it to the output*/ *out = outv.data; *outsize = outv.size; @@ -5525,7 +5858,7 @@ unsigned lodepng_encode_file(const char* filename, const unsigned char* image, u size_t buffersize; unsigned error = lodepng_encode_memory(&buffer, &buffersize, image, w, h, colortype, bitdepth); if(!error) error = lodepng_save_file(buffer, buffersize, filename); - myfree(buffer); + lodepng_free(buffer); return error; } @@ -5543,8 +5876,9 @@ unsigned lodepng_encode24_file(const char* filename, const unsigned char* image, void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings) { lodepng_compress_settings_init(&settings->zlibsettings); - settings->filter_strategy = LFS_HEURISTIC; - settings->auto_convert = LAC_AUTO; + settings->filter_palette_zero = 1; + settings->filter_strategy = LFS_MINSUM; + settings->auto_convert = 1; settings->force_palette = 0; settings->predefined_filters = 0; #ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS @@ -5578,13 +5912,11 @@ const char* lodepng_error_text(unsigned code) case 19: return "end of out buffer memory reached while inflating"; case 20: return "invalid deflate block BTYPE encountered while decoding"; case 21: return "NLEN is not ones complement of LEN in a deflate block"; - /*end of out buffer memory reached while inflating: This can happen if the inflated deflate data is longer than the amount of bytes required to fill up all the pixels of the image, given the color depth and image dimensions. Something that doesn't happen in a normal, well encoded, PNG image.*/ case 22: return "end of out buffer memory reached while inflating"; - case 23: return "end of in buffer memory reached while inflating"; case 24: return "invalid FCHECK in zlib header"; case 25: return "invalid compression method in zlib header"; @@ -5609,24 +5941,17 @@ const char* lodepng_error_text(unsigned code) case 43: return "bKGD chunk has wrong size for palette image"; case 44: return "bKGD chunk has wrong size for greyscale image"; case 45: return "bKGD chunk has wrong size for RGB image"; - /*Is the palette too small?*/ - case 46: return "a value in indexed image is larger than the palette size (bitdepth = 8)"; - /*Is the palette too small?*/ - case 47: return "a value in indexed image is larger than the palette size (bitdepth < 8)"; - /*the input data is empty, maybe a PNG file doesn't exist or is in the wrong path*/ - case 48: return "empty input or file doesn't exist"; + case 48: return "empty input buffer given to decoder. Maybe caused by non-existing file?"; case 49: return "jumped past memory while generating dynamic huffman tree"; case 50: return "jumped past memory while generating dynamic huffman tree"; case 51: return "jumped past memory while inflating huffman block"; case 52: return "jumped past memory while inflating"; case 53: return "size of zlib data too small"; case 54: return "repeat symbol in tree while there was no value symbol yet"; - /*jumped past tree while generating huffman tree, this could be when the tree will have more leaves than symbols after generating it out of the given lenghts. They call this an oversubscribed dynamic bit lengths tree in zlib.*/ case 55: return "jumped past tree while generating huffman tree"; - case 56: return "given output image colortype or bitdepth not supported for color conversion"; case 57: return "invalid CRC encountered (checking CRC can be disabled)"; case 58: return "invalid ADLER32 encountered (checking ADLER32 can be disabled)"; @@ -5657,9 +5982,16 @@ const char* lodepng_error_text(unsigned code) case 82: return "color conversion to palette requested while a color isn't in palette"; case 83: return "memory allocation failed"; case 84: return "given image too small to contain all pixels to be encoded"; - case 85: return "internal color conversion bug"; case 86: return "impossible offset in lz77 encoding (internal bug)"; case 87: return "must provide custom zlib function pointer if LODEPNG_COMPILE_ZLIB is not defined"; + case 88: return "invalid filter strategy given for LodePNGEncoderSettings.filter_strategy"; + case 89: return "text chunk keyword too short or long: must have size 1-79"; + /*the windowsize in the LodePNGCompressSettings. Requiring POT(==> & instead of %) makes encoding 12% faster.*/ + case 90: return "windowsize must be a power of two"; + case 91: return "invalid decompressed idat size"; + case 92: return "too many pixels, not supported"; + case 93: return "zero width or height is invalid"; + case 94: return "header chunk must have a size of 13 bytes"; } return "unknown error code"; } @@ -5671,33 +6003,25 @@ const char* lodepng_error_text(unsigned code) /* ////////////////////////////////////////////////////////////////////////// */ /* ////////////////////////////////////////////////////////////////////////// */ - #ifdef LODEPNG_COMPILE_CPP namespace lodepng { #ifdef LODEPNG_COMPILE_DISK -void load_file(std::vector& buffer, const std::string& filename) +unsigned load_file(std::vector& buffer, const std::string& filename) { - std::ifstream file(filename.c_str(), std::ios::in|std::ios::binary|std::ios::ate); - - /*get filesize*/ - std::streamsize size = 0; - if(file.seekg(0, std::ios::end).good()) size = file.tellg(); - if(file.seekg(0, std::ios::beg).good()) size -= file.tellg(); - - /*read contents of the file into the vector*/ - buffer.resize(size_t(size)); - if(size > 0) file.read((char*)(&buffer[0]), size); + long size = lodepng_filesize(filename.c_str()); + if(size < 0) return 78; + buffer.resize((size_t)size); + return size == 0 ? 0 : lodepng_buffer_file(&buffer[0], (size_t)size, filename.c_str()); } /*write given buffer to the file, overwriting the file, it doesn't append to it.*/ -void save_file(const std::vector& buffer, const std::string& filename) +unsigned save_file(const std::vector& buffer, const std::string& filename) { - std::ofstream file(filename.c_str(), std::ios::out|std::ios::binary); - file.write(buffer.empty() ? 0 : (char*)&buffer[0], std::streamsize(buffer.size())); + return lodepng_save_file(buffer.empty() ? 0 : &buffer[0], buffer.size(), filename.c_str()); } -#endif //LODEPNG_COMPILE_DISK +#endif /* LODEPNG_COMPILE_DISK */ #ifdef LODEPNG_COMPILE_ZLIB #ifdef LODEPNG_COMPILE_DECODER @@ -5710,7 +6034,7 @@ unsigned decompress(std::vector& out, const unsigned char* in, si if(buffer) { out.insert(out.end(), &buffer[0], &buffer[buffersize]); - myfree(buffer); + lodepng_free(buffer); } return error; } @@ -5720,7 +6044,7 @@ unsigned decompress(std::vector& out, const std::vector& out, const unsigned char* in, size_t insize, @@ -5732,7 +6056,7 @@ unsigned compress(std::vector& out, const unsigned char* in, size if(buffer) { out.insert(out.end(), &buffer[0], &buffer[buffersize]); - myfree(buffer); + lodepng_free(buffer); } return error; } @@ -5742,8 +6066,8 @@ unsigned compress(std::vector& out, const std::vector& out, unsigned& w, unsigned& h, const state.info_raw.bitdepth = bitdepth; size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw); out.insert(out.end(), &buffer[0], &buffer[buffersize]); - myfree(buffer); + lodepng_free(buffer); } return error; } @@ -5799,14 +6123,14 @@ unsigned decode(std::vector& out, unsigned& w, unsigned& h, State& state, const unsigned char* in, size_t insize) { - unsigned char* buffer; + unsigned char* buffer = NULL; unsigned error = lodepng_decode(&buffer, &w, &h, &state, in, insize); if(buffer && !error) { size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw); out.insert(out.end(), &buffer[0], &buffer[buffersize]); - myfree(buffer); } + lodepng_free(buffer); return error; } @@ -5822,11 +6146,12 @@ unsigned decode(std::vector& out, unsigned& w, unsigned& h, const LodePNGColorType colortype, unsigned bitdepth) { std::vector buffer; - load_file(buffer, filename); + unsigned error = load_file(buffer, filename); + if(error) return error; return decode(out, w, h, buffer, colortype, bitdepth); } -#endif //LODEPNG_COMPILE_DECODER -#endif //LODEPNG_COMPILE_DISK +#endif /* LODEPNG_COMPILE_DECODER */ +#endif /* LODEPNG_COMPILE_DISK */ #ifdef LODEPNG_COMPILE_ENCODER unsigned encode(std::vector& out, const unsigned char* in, unsigned w, unsigned h, @@ -5838,7 +6163,7 @@ unsigned encode(std::vector& out, const unsigned char* in, unsign if(buffer) { out.insert(out.end(), &buffer[0], &buffer[buffersize]); - myfree(buffer); + lodepng_free(buffer); } return error; } @@ -5861,7 +6186,7 @@ unsigned encode(std::vector& out, if(buffer) { out.insert(out.end(), &buffer[0], &buffer[buffersize]); - myfree(buffer); + lodepng_free(buffer); } return error; } @@ -5881,7 +6206,7 @@ unsigned encode(const std::string& filename, { std::vector buffer; unsigned error = encode(buffer, in, w, h, colortype, bitdepth); - if(!error) save_file(buffer, filename); + if(!error) error = save_file(buffer, filename); return error; } @@ -5892,8 +6217,8 @@ unsigned encode(const std::string& filename, if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84; return encode(filename, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth); } -#endif //LODEPNG_COMPILE_DISK -#endif //LODEPNG_COMPILE_ENCODER -#endif //LODEPNG_COMPILE_PNG -} //namespace lodepng +#endif /* LODEPNG_COMPILE_DISK */ +#endif /* LODEPNG_COMPILE_ENCODER */ +#endif /* LODEPNG_COMPILE_PNG */ +} /* namespace lodepng */ #endif /*LODEPNG_COMPILE_CPP*/ diff --git a/dart/external/lodepng/lodepng.h b/dart/external/lodepng/lodepng.h index d03d7cce929cc..fcf9f714bc94e 100644 --- a/dart/external/lodepng/lodepng.h +++ b/dart/external/lodepng/lodepng.h @@ -1,7 +1,7 @@ /* -LodePNG version 20120901 +LodePNG version 20160501 -Copyright (c) 2005-2012 Lode Vandevenne +Copyright (c) 2005-2016 Lode Vandevenne This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -28,10 +28,7 @@ freely, subject to the following restrictions: #include /*for size_t*/ -#ifdef __cplusplus -#include -#include -#endif /*__cplusplus*/ +extern const char* LODEPNG_VERSION_STRING; /* The following #defines are used to create code sections. They can be disabled @@ -39,6 +36,8 @@ to disable code sections, which can give faster compile time and smaller binary. The "NO_COMPILE" defines are designed to be used to pass as defines to the compiler command to disable them without modifying this header, e.g. -DLODEPNG_NO_COMPILE_ZLIB for gcc. +In addition to those below, you can also define LODEPNG_NO_COMPILE_CRC to +allow implementing a custom lodepng_crc32. */ /*deflate & zlib. If disabled, you must specify alternative zlib functions in the custom_zlib field of the compress and decompress settings*/ @@ -69,6 +68,12 @@ the custom_zlib field of the compress and decompress settings*/ #ifndef LODEPNG_NO_COMPILE_ERROR_TEXT #define LODEPNG_COMPILE_ERROR_TEXT #endif +/*Compile the default allocators (C's free, malloc and realloc). If you disable this, +you can define the functions lodepng_free, lodepng_malloc and lodepng_realloc in your +source files with custom allocators.*/ +#ifndef LODEPNG_NO_COMPILE_ALLOCATORS +#define LODEPNG_COMPILE_ALLOCATORS +#endif /*compile the C++ version (you can disable the C++ wrapper here even when compiling for C++)*/ #ifdef __cplusplus #ifndef LODEPNG_NO_COMPILE_CPP @@ -76,6 +81,11 @@ the custom_zlib field of the compress and decompress settings*/ #endif #endif +#ifdef LODEPNG_COMPILE_CPP +#include +#include +#endif /*LODEPNG_COMPILE_CPP*/ + #ifdef LODEPNG_COMPILE_PNG /*The PNG color types (also used for raw).*/ typedef enum LodePNGColorType @@ -94,6 +104,7 @@ out: Output parameter. Pointer to buffer that will contain the raw pixel data. After decoding, its size is w * h * (bytes per pixel) bytes larger than initially. Bytes per pixel depends on colortype and bitdepth. Must be freed after usage with free(*out). + Note: for 16-bit per channel colors, uses big endian format like PNG does. w: Output parameter. Pointer to width of pixel data. h: Output parameter. Pointer to height of pixel data. in: Memory buffer with the PNG file. @@ -139,7 +150,8 @@ unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h, Converts raw pixel data into a PNG image in memory. The colortype and bitdepth of the output PNG image cannot be chosen, they are automatically determined by the colortype, bitdepth and content of the input pixel data. -out: Output parameter. Pointer to buffer that will contain the raw pixel data. + Note: for 16-bit per channel colors, needs big endian format like PNG does. +out: Output parameter. Pointer to buffer that will contain the PNG image data. Must be freed after usage with free(*out). outsize: Output parameter. Pointer to the size in bytes of the out buffer. image: The raw pixel data to encode. The size of this buffer should be @@ -187,7 +199,8 @@ unsigned lodepng_encode24_file(const char* filename, namespace lodepng { #ifdef LODEPNG_COMPILE_DECODER -/*Same as lodepng_decode_memory, but decodes to an std::vector.*/ +/*Same as lodepng_decode_memory, but decodes to an std::vector. The colortype +is the format to output the pixels to. Default is RGBA 8-bit per channel.*/ unsigned decode(std::vector& out, unsigned& w, unsigned& h, const unsigned char* in, size_t insize, LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); @@ -202,11 +215,12 @@ Same as the other decode functions, but instead takes a filename as input. unsigned decode(std::vector& out, unsigned& w, unsigned& h, const std::string& filename, LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); -#endif //LODEPNG_COMPILE_DISK -#endif //LODEPNG_COMPILE_DECODER +#endif /* LODEPNG_COMPILE_DISK */ +#endif /* LODEPNG_COMPILE_DECODER */ #ifdef LODEPNG_COMPILE_ENCODER -/*Same as lodepng_encode_memory, but encodes to an std::vector.*/ +/*Same as lodepng_encode_memory, but encodes to an std::vector. colortype +is that of the raw input data. The output PNG color type will be auto chosen.*/ unsigned encode(std::vector& out, const unsigned char* in, unsigned w, unsigned h, LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); @@ -225,9 +239,9 @@ unsigned encode(const std::string& filename, unsigned encode(const std::string& filename, const std::vector& in, unsigned w, unsigned h, LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); -#endif //LODEPNG_COMPILE_DISK -#endif //LODEPNG_COMPILE_ENCODER -} //namespace lodepng +#endif /* LODEPNG_COMPILE_DISK */ +#endif /* LODEPNG_COMPILE_ENCODER */ +} /* namespace lodepng */ #endif /*LODEPNG_COMPILE_CPP*/ #endif /*LODEPNG_COMPILE_PNG*/ @@ -254,7 +268,7 @@ struct LodePNGDecompressSettings const unsigned char*, size_t, const LodePNGDecompressSettings*); - void* custom_context; /*optional custom settings for custom functions*/ + const void* custom_context; /*optional custom settings for custom functions*/ }; extern const LodePNGDecompressSettings lodepng_default_decompress_settings; @@ -272,7 +286,10 @@ struct LodePNGCompressSettings /*deflate = compress*/ /*LZ77 related settings*/ unsigned btype; /*the block type for LZ (0, 1, 2 or 3, see zlib standard). Should be 2 for proper compression.*/ unsigned use_lz77; /*whether or not to use LZ77. Should be 1 for proper compression.*/ - unsigned windowsize; /*the maximum is 32768, higher gives more compression but is slower. Typical value: 2048.*/ + unsigned windowsize; /*must be a power of two <= 32768. higher compresses more but is slower. Default value: 2048.*/ + unsigned minmatch; /*mininum lz77 length. 3 is normally best, 6 can be better for some PNGs. Default: 0*/ + unsigned nicematch; /*stop searching if >= this length found. Set to 258 for best compression. Default: 128*/ + unsigned lazymatching; /*use lazy matching: better compression but a bit slower. Default: true*/ /*use custom zlib encoder instead of built in one (default: null)*/ unsigned (*custom_zlib)(unsigned char**, size_t*, @@ -285,7 +302,7 @@ struct LodePNGCompressSettings /*deflate = compress*/ const unsigned char*, size_t, const LodePNGCompressSettings*); - void* custom_context; /*optional custom settings for custom functions*/ + const void* custom_context; /*optional custom settings for custom functions*/ }; extern const LodePNGCompressSettings lodepng_default_compress_settings; @@ -317,7 +334,7 @@ typedef struct LodePNGColorMode The palette is only supported for color type 3. */ - unsigned char* palette; /*palette in RGBARGBA... order*/ + unsigned char* palette; /*palette in RGBARGBA... order. When allocated, must be either 0, or have size 1024*/ size_t palettesize; /*palette size in number of colors (amount of bytes is 4 * palettesize)*/ /* @@ -485,13 +502,15 @@ LodePNGColorMode structs to describe the input and output color type. See the reference manual at the end of this header file to see which color conversions are supported. return value = LodePNG error code (0 if all went ok, an error if the conversion isn't supported) The out buffer must have size (w * h * bpp + 7) / 8, where bpp is the bits per pixel -of the output color type (lodepng_get_bpp) +of the output color type (lodepng_get_bpp). +For < 8 bpp images, there should not be padding bits at the end of scanlines. +For 16-bit per channel colors, uses big endian format like PNG does. +Return value is LodePNG error code */ unsigned lodepng_convert(unsigned char* out, const unsigned char* in, - LodePNGColorMode* mode_out, LodePNGColorMode* mode_in, + const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in, unsigned w, unsigned h); - #ifdef LODEPNG_COMPILE_DECODER /* Settings for the decoder. This contains settings for the PNG and the Zlib @@ -502,6 +521,7 @@ typedef struct LodePNGDecoderSettings LodePNGDecompressSettings zlibsettings; /*in here is the setting to ignore Adler32 checksums*/ unsigned ignore_crc; /*ignore CRC checksums*/ + unsigned color_convert; /*whether to convert the PNG to the color type you want. Default: yes*/ #ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS @@ -518,46 +538,69 @@ void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings); /*automatically use color type with less bits per pixel if losslessly possible. Default: AUTO*/ typedef enum LodePNGFilterStrategy { - LFS_HEURISTIC, /*official PNG heuristic*/ - LFS_ZERO, /*every filter at zero*/ - LFS_MINSUM, /*like the official PNG heuristic, but use minimal sum always, including palette and low bitdepth images*/ + /*every filter at zero*/ + LFS_ZERO, + /*Use filter that gives minimum sum, as described in the official PNG filter heuristic.*/ + LFS_MINSUM, + /*Use the filter type that gives smallest Shannon entropy for this scanline. Depending + on the image, this is better or worse than minsum.*/ + LFS_ENTROPY, /* Brute-force-search PNG filters by compressing each filter for each scanline. - This gives better compression, at the cost of being super slow. Experimental! - If you enable this, also set zlibsettings.windowsize to 32768 and choose an - optimal color mode for the PNG image for best compression. Default: 0 (false). + Experimental, very slow, and only rarely gives better compression than MINSUM. */ LFS_BRUTE_FORCE, - LFS_PREDEFINED /*use predefined_filters buffer: you specify the filter type for each scanline*/ + /*use predefined_filters buffer: you specify the filter type for each scanline*/ + LFS_PREDEFINED } LodePNGFilterStrategy; -/*automatically use color type with less bits per pixel if losslessly possible. Default: LAC_AUTO*/ -typedef enum LodePNGAutoConvert +/*Gives characteristics about the colors of the image, which helps decide which color model to use for encoding. +Used internally by default if "auto_convert" is enabled. Public because it's useful for custom algorithms.*/ +typedef struct LodePNGColorProfile { - LAC_NO, /*use color type user requested*/ - LAC_ALPHA, /*use color type user requested, but if only opaque pixels and RGBA or grey+alpha, use RGB or grey*/ - LAC_AUTO, /*use PNG color type that can losslessly represent the uncompressed image the smallest possible*/ - /* - like AUTO, but do not choose 1, 2 or 4 bit per pixel types. - sometimes a PNG image compresses worse if less than 8 bits per pixels. - */ - LAC_AUTO_NO_NIBBLES -} LodePNGAutoConvert; - + unsigned colored; /*not greyscale*/ + unsigned key; /*if true, image is not opaque. Only if true and alpha is false, color key is possible.*/ + unsigned short key_r; /*these values are always in 16-bit bitdepth in the profile*/ + unsigned short key_g; + unsigned short key_b; + unsigned alpha; /*alpha channel or alpha palette required*/ + unsigned numcolors; /*amount of colors, up to 257. Not valid if bits == 16.*/ + unsigned char palette[1024]; /*Remembers up to the first 256 RGBA colors, in no particular order*/ + unsigned bits; /*bits per channel (not for palette). 1,2 or 4 for greyscale only. 16 if 16-bit per channel required.*/ +} LodePNGColorProfile; + +void lodepng_color_profile_init(LodePNGColorProfile* profile); + +/*Get a LodePNGColorProfile of the image.*/ +unsigned lodepng_get_color_profile(LodePNGColorProfile* profile, + const unsigned char* image, unsigned w, unsigned h, + const LodePNGColorMode* mode_in); +/*The function LodePNG uses internally to decide the PNG color with auto_convert. +Chooses an optimal color model, e.g. grey if only grey pixels, palette if < 256 colors, ...*/ +unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out, + const unsigned char* image, unsigned w, unsigned h, + const LodePNGColorMode* mode_in); /*Settings for the encoder.*/ typedef struct LodePNGEncoderSettings { LodePNGCompressSettings zlibsettings; /*settings for the zlib encoder, such as window size, ...*/ - LodePNGAutoConvert auto_convert; /*how to automatically choose output PNG color type, if at all*/ + unsigned auto_convert; /*automatically choose output PNG color type. Default: true*/ + /*If true, follows the official PNG heuristic: if the PNG uses a palette or lower than + 8 bit depth, set all filters to zero. Otherwise use the filter_strategy. Note that to + completely follow the official PNG heuristic, filter_palette_zero must be true and + filter_strategy must be LFS_MINSUM*/ + unsigned filter_palette_zero; + /*Which filter strategy to use when not using zeroes due to filter_palette_zero. + Set filter_palette_zero to 0 to ensure always using your chosen strategy. Default: LFS_MINSUM*/ LodePNGFilterStrategy filter_strategy; - /*used if filter_strategy is LFS_PREDEFINED. In that case, this must point to a buffer with - the same length as the amount of scanlines in the image, and each value must <= 5. You - have to cleanup this buffer, LodePNG will never free it.*/ - unsigned char* predefined_filters; + the same length as the amount of scanlines in the image, and each value must <= 5. You + have to cleanup this buffer, LodePNG will never free it. Don't forget that filter_palette_zero + must be set to 0 to ensure this is also used on palette or low bitdepth images.*/ + const unsigned char* predefined_filters; /*force creating a PLTE chunk if colortype is 2 or 6 (= a suggested palette). If colortype is 3, PLTE is _always_ created.*/ @@ -588,7 +631,7 @@ typedef struct LodePNGState LodePNGInfo info_png; /*info of the PNG image obtained after decoding*/ unsigned error; #ifdef LODEPNG_COMPILE_CPP - //For the lodepng::State subclass. + /* For the lodepng::State subclass. */ virtual ~LodePNGState(){} #endif } LodePNGState; @@ -638,7 +681,11 @@ Third byte: must be uppercase Fourth byte: uppercase = unsafe to copy, lowercase = safe to copy */ -/*get the length of the data of the chunk. Total chunk length has 12 bytes more.*/ +/* +Gets the length of the data of the chunk. Total chunk length has 12 bytes more. +There must be at least 4 bytes to read from. If the result value is too large, +it may be corrupt data. +*/ unsigned lodepng_chunk_length(const unsigned char* chunk); /*puts the 4-byte type in null terminated string*/ @@ -708,7 +755,7 @@ unsigned lodepng_inflate(unsigned char** out, size_t* outsize, /* Decompresses Zlib data. Reallocates the out buffer and appends the data. The data must be according to the zlib specification. -Either, *out must be nullptr and *outsize must be 0, or, *out must be a valid +Either, *out must be NULL and *outsize must be 0, or, *out must be a valid buffer and *outsize its size in bytes. out must be freed by user after usage. */ unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, @@ -721,7 +768,7 @@ unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, Compresses data with Zlib. Reallocates the out buffer and appends the data. Zlib adds a small header and trailer around the deflate data. The data is output in the format of the zlib specification. -Either, *out must be nullptr and *outsize must be 0, or, *out must be a valid +Either, *out must be NULL and *outsize must be 0, or, *out must be a valid buffer and *outsize its size in bytes. out must be freed by user after usage. */ unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, @@ -766,7 +813,7 @@ unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const #endif /*LODEPNG_COMPILE_DISK*/ #ifdef LODEPNG_COMPILE_CPP -//The LodePNG C++ wrapper uses std::vectors instead of manually allocated memory buffers. +/* The LodePNG C++ wrapper uses std::vectors instead of manually allocated memory buffers. */ namespace lodepng { #ifdef LODEPNG_COMPILE_PNG @@ -780,7 +827,7 @@ class State : public LodePNGState }; #ifdef LODEPNG_COMPILE_DECODER -//Same as other lodepng::decode, but using a State for more settings and information. +/* Same as other lodepng::decode, but using a State for more settings and information. */ unsigned decode(std::vector& out, unsigned& w, unsigned& h, State& state, const unsigned char* in, size_t insize); @@ -790,7 +837,7 @@ unsigned decode(std::vector& out, unsigned& w, unsigned& h, #endif /*LODEPNG_COMPILE_DECODER*/ #ifdef LODEPNG_COMPILE_ENCODER -//Same as other lodepng::encode, but using a State for more settings and information. +/* Same as other lodepng::encode, but using a State for more settings and information. */ unsigned encode(std::vector& out, const unsigned char* in, unsigned w, unsigned h, State& state); @@ -799,50 +846,49 @@ unsigned encode(std::vector& out, State& state); #endif /*LODEPNG_COMPILE_ENCODER*/ - #ifdef LODEPNG_COMPILE_DISK /* -Load a file from disk into an std::vector. If the vector is empty, then either -the file doesn't exist or is an empty file. +Load a file from disk into an std::vector. +return value: error code (0 means ok) */ -void load_file(std::vector& buffer, const std::string& filename); +unsigned load_file(std::vector& buffer, const std::string& filename); /* Save the binary data in an std::vector to a file on disk. The file is overwritten without warning. */ -void save_file(const std::vector& buffer, const std::string& filename); -#endif //LODEPNG_COMPILE_DISK -#endif //LODEPNG_COMPILE_PNG +unsigned save_file(const std::vector& buffer, const std::string& filename); +#endif /* LODEPNG_COMPILE_DISK */ +#endif /* LODEPNG_COMPILE_PNG */ #ifdef LODEPNG_COMPILE_ZLIB #ifdef LODEPNG_COMPILE_DECODER -//Zlib-decompress an unsigned char buffer +/* Zlib-decompress an unsigned char buffer */ unsigned decompress(std::vector& out, const unsigned char* in, size_t insize, const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings); -//Zlib-decompress an std::vector +/* Zlib-decompress an std::vector */ unsigned decompress(std::vector& out, const std::vector& in, const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings); -#endif //LODEPNG_COMPILE_DECODER +#endif /* LODEPNG_COMPILE_DECODER */ #ifdef LODEPNG_COMPILE_ENCODER -//Zlib-compress an unsigned char buffer +/* Zlib-compress an unsigned char buffer */ unsigned compress(std::vector& out, const unsigned char* in, size_t insize, const LodePNGCompressSettings& settings = lodepng_default_compress_settings); -//Zlib-compress an std::vector +/* Zlib-compress an std::vector */ unsigned compress(std::vector& out, const std::vector& in, const LodePNGCompressSettings& settings = lodepng_default_compress_settings); -#endif //LODEPNG_COMPILE_ENCODER -#endif //LODEPNG_COMPILE_ZLIB -} //namespace lodepng +#endif /* LODEPNG_COMPILE_ENCODER */ +#endif /* LODEPNG_COMPILE_ZLIB */ +} /* namespace lodepng */ #endif /*LODEPNG_COMPILE_CPP*/ /* TODO: [.] test if there are no memory leaks or security exploits - done a lot but needs to be checked often -[.] check compatibility with vareous compilers - done but needs to be redone for every newer version +[.] check compatibility with various compilers - done but needs to be redone for every newer version [X] converting color to 16-bit per channel types [ ] read all public PNG chunk types (but never let the color profile and gamma ones touch RGB values) [ ] make sure encoder generates no chunks with size > (2^31)-1 @@ -850,8 +896,10 @@ unsigned compress(std::vector& out, const std::vector