diff --git a/src/main/native/compression/IntelDeflater.cc b/src/main/native/compression/IntelDeflater.cc index f293eb26..e110d99c 100644 --- a/src/main/native/compression/IntelDeflater.cc +++ b/src/main/native/compression/IntelDeflater.cc @@ -84,7 +84,7 @@ JNIEXPORT void JNICALL Java_com_intel_gkl_compression_IntelDeflater_resetNative jint level = env->GetIntField(obj, FID_level); - if(level == 1) { + if(level == 1 || level ==2) { isal_zstream* lz_stream = (isal_zstream*)env->GetLongField(obj, FID_lz_stream); if (lz_stream == 0) { @@ -95,19 +95,27 @@ JNIEXPORT void JNICALL Java_com_intel_gkl_compression_IntelDeflater_resetNative } env->SetLongField(obj, FID_lz_stream, (jlong)lz_stream); + isal_deflate_stateless_init(lz_stream); - lz_stream->hufftables = 0x0; + + lz_stream->level = level; + lz_stream->level_buf = (uint8_t*)malloc(ISAL_DEF_LVL2_DEFAULT); + lz_stream->level_buf_size = ISAL_DEF_LVL2_DEFAULT; + } else { - isal_hufftables *temp_huffman_pointer = lz_stream->hufftables; - DBG("lz_stream = 0x%lx", (long)temp_huffman_pointer); isal_deflate_stateless_init(lz_stream); - lz_stream->hufftables = temp_huffman_pointer; } + uint8_t *level_buf = NULL; + lz_stream->level = level; + jint level_size =ISAL_DEF_LVL2_DEFAULT; + level_buf = (uint8_t*)malloc(level_size); + lz_stream->level_buf = level_buf; + lz_stream->level_buf_size = ISAL_DEF_LVL2_DEFAULT; lz_stream->end_of_stream = 0; // DBG("lz_stream = 0x%lx", (long)lz_stream); @@ -190,7 +198,7 @@ JNIEXPORT jint JNICALL Java_com_intel_gkl_compression_IntelDeflater_deflateNativ jint level = env->GetIntField(obj, FID_level); - if(level == 1) { + if(level == 1 || level ==2 ) { isal_zstream* lz_stream = (isal_zstream*)env->GetLongField(obj, FID_lz_stream); @@ -211,27 +219,6 @@ JNIEXPORT jint JNICALL Java_com_intel_gkl_compression_IntelDeflater_deflateNativ struct timeval tv1, tv2; gettimeofday(&tv1, NULL); #endif - // compress and update lz_stream state - // Generate the dynamic huff tables using the first buffer of the stream - if(lz_stream->hufftables == 0x0) - { - struct isal_huff_histogram histogram; - struct isal_hufftables *hufftables_custom; - - hufftables_custom = (isal_hufftables*) malloc(sizeof(isal_hufftables)); - - int sixtyfourK = 64*1024; - int usable_buffer= (inputBufferLength < sixtyfourK) ? inputBufferLength : sixtyfourK; - DBG("lz_stream = 0x%lx", (long)hufftables_custom); - - memset(&histogram, 0, sizeof(histogram)); - isal_update_histogram((unsigned char*)next_in,usable_buffer, &histogram); - isal_create_hufftables(hufftables_custom, &histogram); - isal_deflate_set_hufftables(lz_stream, - hufftables_custom, IGZIP_HUFFTABLE_CUSTOM); - lz_stream->hufftables = hufftables_custom; - DBG("lz_stream = 0x%lx", (long)hufftables_custom); - } // compress and update lz_stream state isal_deflate_stateless(lz_stream); @@ -313,15 +300,17 @@ JNIEXPORT void JNICALL Java_com_intel_gkl_compression_IntelDeflater_endNative(JNIEnv *env, jobject obj) { jint level = env->GetIntField(obj, FID_level); + if (level == 1 || level == 2 ) { + isal_zstream* lz_stream = (isal_zstream*)env->GetLongField(obj, FID_lz_stream); + free(lz_stream->level_buf); + free(lz_stream); - if (level != 1) { + } + else { z_stream* lz_stream = (z_stream*)env->GetLongField(obj, FID_lz_stream); deflateEnd(lz_stream); free(lz_stream); } - else { - isal_zstream* lz_stream = (isal_zstream*)env->GetLongField(obj, FID_lz_stream); - free(lz_stream->hufftables); - free(lz_stream); - } + + } diff --git a/src/main/native/compression/isa-l-master/.travis.yml b/src/main/native/compression/isa-l-master/.travis.yml index 9da8e0d5..95ed46f2 100644 --- a/src/main/native/compression/isa-l-master/.travis.yml +++ b/src/main/native/compression/isa-l-master/.travis.yml @@ -1,11 +1,83 @@ +language: c sudo: required -dist: trusty +matrix: + include: + ### OS X + - os: osx + env: C_COMPILER=clang + + ### linux gcc + - dist: trusty + env: C_COMPILER=gcc + + ### linux clang + - dist: trusty + env: C_COMPILER=clang + + ### linux newer clang + - dist: trusty + addons: + apt: + sources: + - ubuntu-toolchain-r-test + - llvm-toolchain-trusty-4.0 + packages: + - clang-4.0 + env: C_COMPILER=clang-4.0 + + ### linux older gcc + - dist: trusty + addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - g++-4.7 + env: C_COMPILER=gcc-4.7 + + ### linux newer gcc + - dist: trusty + addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - g++-6 + env: C_COMPILER=gcc-6 + + ### linux with new nasm + # Removed until travis issue fixed pulling from nasm from debian + #- dist: trusty + # addons: + # apt: + # sources: + # - debian-sid + # packages: + # - nasm + # env: C_COMPILER=gcc AS_ASSEMBL=nasm + + ### linux extended tests + - dist: trusty + addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - binutils-mingw-w64-x86-64 + - gcc-mingw-w64-x86-64 + - wine + env: TEST_TYPE=ext + +before_install: + - if [ -n "${C_COMPILER}" ]; then export CC="${C_COMPILER}"; fi + - if [ -n "${AS_ASSEMBL}" ]; then export AS="${AS_ASSEMBL}"; fi + before_script: - - sudo apt-get -q update - - sudo apt-get install -y yasm nasm - - ./autogen.sh -script: ./configure && make && make check -language: c -compiler: - - clang - - gcc + - if [ $TRAVIS_OS_NAME = linux ]; then sudo apt-get -q update; fi + - if [ $TRAVIS_OS_NAME = linux ]; then sudo apt-get install yasm indent; fi + - if [ $TRAVIS_OS_NAME = osx ]; then brew install yasm; fi + +script: + - if [ -n "${CC}" ]; then $CC --version; fi + - if [ -n "${AS}" ]; then $AS --version; fi + - ./tools/test_autorun.sh "${TEST_TYPE}" diff --git a/src/main/native/compression/isa-l-master/CONTRIBUTING.md b/src/main/native/compression/isa-l-master/CONTRIBUTING.md index 593a4676..a42ae814 100644 --- a/src/main/native/compression/isa-l-master/CONTRIBUTING.md +++ b/src/main/native/compression/isa-l-master/CONTRIBUTING.md @@ -30,6 +30,10 @@ the included indent script to format C code. ./tools/iindent your_files.c +And use check format script before submitting. + + ./tools/check_format.sh + [mailing list]:https://lists.01.org/mailman/listinfo/isal [license]:LICENSE [signed-off-by language]:https://01.org/community/signed-process diff --git a/src/main/native/compression/isa-l-master/Makefile.am b/src/main/native/compression/isa-l-master/Makefile.am index cc824815..ce705e21 100644 --- a/src/main/native/compression/isa-l-master/Makefile.am +++ b/src/main/native/compression/isa-l-master/Makefile.am @@ -31,10 +31,11 @@ include erasure_code/Makefile.am include raid/Makefile.am include crc/Makefile.am include igzip/Makefile.am +include tests/fuzz/Makefile.am # LIB version info not necessarily the same as package version LIBISAL_CURRENT=2 -LIBISAL_REVISION=19 +LIBISAL_REVISION=21 LIBISAL_AGE=0 lib_LTLIBRARIES = libisal.la diff --git a/src/main/native/compression/isa-l-master/Makefile.nmake b/src/main/native/compression/isa-l-master/Makefile.nmake index 77b55929..14c034a2 100644 --- a/src/main/native/compression/isa-l-master/Makefile.nmake +++ b/src/main/native/compression/isa-l-master/Makefile.nmake @@ -90,6 +90,7 @@ objs = \ bin\xor_gen_sse.obj \ bin\crc16_t10dif_01.obj \ bin\crc16_t10dif_by4.obj \ + bin\crc16_t10dif_copy_by4.obj \ bin\crc32_gzip.obj \ bin\crc32_ieee_01.obj \ bin\crc32_ieee_by4.obj \ @@ -120,9 +121,7 @@ objs = \ bin\encode_df.obj \ bin\encode_df_04.obj \ bin\proc_heap.obj \ - bin\igzip_icf_body_01.obj \ - bin\igzip_icf_body_02.obj \ - bin\igzip_icf_body_04.obj \ + bin\igzip_icf_body_h1_gr_bt.obj \ bin\igzip_icf_finish.obj \ bin\igzip_icf_base.obj \ bin\igzip_inflate.obj \ @@ -135,7 +134,10 @@ objs = \ bin\crc32_gzip_refl_by8.obj \ bin\adler32_sse.obj \ bin\adler32_avx2_4.obj \ - bin\igzip_deflate_hash.obj + bin\igzip_deflate_hash.obj \ + bin\igzip_gen_icf_map_lh1_06.obj \ + bin\igzip_set_long_icf_fg_06.obj \ + bin\igzip_icf_body.obj INCLUDES = -I./ -Ierasure_code/ -Iraid/ -Icrc/ -Iigzip/ -Iinclude/ LINKFLAGS = /nologo @@ -201,6 +203,7 @@ checks = \ xor_check_test.exe \ pq_check_test.exe \ crc16_t10dif_test.exe \ + crc16_t10dif_copy_test.exe \ crc32_ieee_test.exe \ crc32_iscsi_test.exe \ crc64_funcs_test.exe \ diff --git a/src/main/native/compression/isa-l-master/Makefile.unx b/src/main/native/compression/isa-l-master/Makefile.unx index bfc0dae3..b8c7a03a 100644 --- a/src/main/native/compression/isa-l-master/Makefile.unx +++ b/src/main/native/compression/isa-l-master/Makefile.unx @@ -33,9 +33,13 @@ default: lib include $(foreach unit,$(units), $(unit)/Makefile.am) +ifneq (,$(findstring igzip,$(units))) + include tests/fuzz/Makefile.am +endif + # Override individual lib names to make one inclusive library. lib_name := bin/isa-l.a include make.inc -VPATH = . $(units) include +VPATH = . $(units) include tests/fuzz diff --git a/src/main/native/compression/isa-l-master/README.md b/src/main/native/compression/isa-l-master/README.md index 48127384..23cdb9c6 100644 --- a/src/main/native/compression/isa-l-master/README.md +++ b/src/main/native/compression/isa-l-master/README.md @@ -18,7 +18,8 @@ applications. ISA-L includes: Also see: * [ISA-L for updates](https://github.com/01org/isa-l). * For crypto functions see [isa-l_crypto on github](https://github.com/01org/isa-l_crypto). -* The [github wiki](https://github.com/01org/isa-l/wiki). +* The [github wiki](https://github.com/01org/isa-l/wiki) including a list of + [distros/ports](https://github.com/01org/isa-l/wiki/Ports--Repos) offering binary packages. * ISA-L [mailing list](https://lists.01.org/mailman/listinfo/isal). * [Contributing](CONTRIBUTING.md). @@ -27,10 +28,11 @@ Building ISA-L ### Prerequisites -* yasm version 1.2.0 or later or nasm v2.11.01 or later. -* gcc, clang, icc or VC compiler. -* GNU 'make' or 'nmake' (Windows). -* Building with autotools requires autoconf/automake packages. +* Assembler: nasm v2.11.01 or later (nasm v2.13 or better suggested for building in AVX512 support) + or yasm version 1.2.0 or later. +* Compiler: gcc, clang, icc or VC compiler. +* Make: GNU 'make' or 'nmake' (Windows). +* Optional: Building with autotools requires autoconf/automake packages. ### Autotools To build and install the library with autotools it is usually sufficient to run: @@ -57,3 +59,4 @@ Other targets include: * `make perfs` : create included performance tests * `make ex` : build examples * `make other` : build other utilities such as compression file tests +* `make doc` : build API manual diff --git a/src/main/native/compression/isa-l-master/Release_notes.txt b/src/main/native/compression/isa-l-master/Release_notes.txt index 4d5fbf09..f142e249 100644 --- a/src/main/native/compression/isa-l-master/Release_notes.txt +++ b/src/main/native/compression/isa-l-master/Release_notes.txt @@ -1,4 +1,4 @@ -v2.19 Intel Intelligent Storage Acceleration Library Release Notes +v2.21 Intel Intelligent Storage Acceleration Library Release Notes ================================================================== RELEASE NOTE CONTENTS @@ -15,6 +15,17 @@ RELEASE NOTE CONTENTS 2. FIXED ISSUES --------------- +v2.20 + +* Inflate total_out behavior corrected for in-progress decompression. + Previously total_out represented the total bytes decompressed into the output + buffer or temp internal buffer. This is changed to be only the bytes put into + the output buffer. + +* Fixed issue with isal_create_hufftables_subset. Affects semi-dynamic + compression use case when explicitly creating hufftables from histogram. The + _hufftables_subset function could fail to generate length symbols for any + length that were never seen. v2.19 @@ -55,6 +66,30 @@ v2.10 3. CHANGE LOG & FEATURES ADDED ------------------------------ +v2.21 + +* Igzip improvements + - New compression levels added. ISA-L fast deflate now has more levels to + balance speed vs. target compression level. Level 0, 1 are as in previous + generations. New levels 2 & 3 target higher compression roughly comparable + to zlib levels 2-3. Level 3 is currently only optimized for processors with + AVX512 instructions. + +* New T10dif & copy function - crc16_t10dif_copy() + - CRC and copy was added to emulate T10dif operations such as DIF insert and + strip. This function stitches together CRC and memcpy operations + eliminating an extra data read. + +* CRC32 iscsi performance improvements + - Fixes issue under some distributions where warm cache performance was + reduced. + +v2.20 + +* Igzip improvements + - Optimized deflate_hash in compression functions. + Improves performance of using preset dictionary. + - Removed alignment restrictions on input structure. v2.19 diff --git a/src/main/native/compression/isa-l-master/configure.ac b/src/main/native/compression/isa-l-master/configure.ac index 01714837..bce97818 100644 --- a/src/main/native/compression/isa-l-master/configure.ac +++ b/src/main/native/compression/isa-l-master/configure.ac @@ -3,7 +3,7 @@ AC_PREREQ(2.69) AC_INIT([libisal], - [2.19.0], + [2.21.0], [sg.support.isal@intel.com], [isa-l], [http://01.org/storage-acceleration-library]) @@ -85,8 +85,8 @@ else with_modern_nasm=yes AC_MSG_RESULT([yes]) AC_MSG_CHECKING([for optional nasm AVX512 support]) - AC_LANG_CONFTEST([AC_LANG_SOURCE([[vpshufb zmm0, zmm1, zmm2;]])]) - sed -i -e '/vpshufb/!d' conftest.c + AC_LANG_CONFTEST([AC_LANG_SOURCE([[vinserti32x8 zmm0, ymm1, 1;]])]) + sed -i -e '/vinsert/!d' conftest.c if nasm -f elf64 conftest.c 2> /dev/null; then nasm_knows_avx512=yes AC_MSG_RESULT([yes]) @@ -109,7 +109,7 @@ if test x"$AS" = x""; then elif test x"$with_modern_nasm" = x"yes"; then AS=nasm else - AC_MSG_ERROR([No modern yasm or nasm found as required. Yasm should be 1.2.0 or later, and nasm should be v2.11.01 or later.]) + AC_MSG_ERROR([No modern yasm or nasm found as required. Yasm should be 1.2.0 or later, and nasm should be v2.11.01 or later (v2.13 for AVX512).]) fi fi echo "Using assembler $AS" diff --git a/src/main/native/compression/isa-l-master/crc/Makefile.am b/src/main/native/compression/isa-l-master/crc/Makefile.am index 20f2592f..c7746077 100644 --- a/src/main/native/compression/isa-l-master/crc/Makefile.am +++ b/src/main/native/compression/isa-l-master/crc/Makefile.am @@ -37,6 +37,7 @@ lsrc_x86_32 += crc/crc_base_aliases.c lsrc_x86_64 += \ crc/crc16_t10dif_01.asm \ crc/crc16_t10dif_by4.asm \ + crc/crc16_t10dif_copy_by4.asm \ crc/crc32_ieee_01.asm \ crc/crc32_ieee_by4.asm \ crc/crc32_iscsi_01.asm \ @@ -57,9 +58,12 @@ extern_hdrs += include/crc.h include/crc64.h other_src += include/reg_sizes.asm include/types.h include/test.h check_tests += crc/crc16_t10dif_test crc/crc32_ieee_test crc/crc32_iscsi_test \ + crc/crc16_t10dif_copy_test \ crc/crc64_funcs_test crc/crc32_gzip_refl_test -perf_tests += crc/crc16_t10dif_perf crc/crc32_ieee_perf crc/crc32_iscsi_perf \ +perf_tests += crc/crc16_t10dif_perf crc/crc16_t10dif_copy_perf \ + crc/crc16_t10dif_op_perf \ + crc/crc32_ieee_perf crc/crc32_iscsi_perf \ crc/crc64_funcs_perf crc/crc32_gzip_refl_perf examples += crc/crc_simple_test crc/crc64_example diff --git a/src/main/native/compression/isa-l-master/crc/crc16_t10dif_copy_by4.asm b/src/main/native/compression/isa-l-master/crc/crc16_t10dif_copy_by4.asm new file mode 100644 index 00000000..e1a18bc6 --- /dev/null +++ b/src/main/native/compression/isa-l-master/crc/crc16_t10dif_copy_by4.asm @@ -0,0 +1,598 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2017 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Function API: +; UINT16 crc16_t10dif_copy_by4( +; UINT16 init_crc, //initial CRC value, 16 bits +; unsigned char *dst, //buffer pointer destination for copy +; const unsigned char *src, //buffer pointer to calculate CRC on +; UINT64 len //buffer length in bytes (64-bit data) +; ); +; +; Authors: +; Erdinc Ozturk +; Vinodh Gopal +; James Guilford +; +; Reference paper titled "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" +; URL: http://download.intel.com/design/intarch/papers/323102.pdf +; + +%include "reg_sizes.asm" + +%define fetch_dist 1024 + +[bits 64] +default rel + +section .text +%ifidn __OUTPUT_FORMAT__, win64 + %xdefine arg1 rcx + %xdefine arg2 rdx + %xdefine arg3 r8 + %xdefine arg4 r9 + %xdefine tmp1 r10 + %xdefine arg1_low32 ecx +%else + %xdefine arg1 rdi + %xdefine arg2 rsi + %xdefine arg3 rdx + %xdefine arg4 rcx + %xdefine tmp1 r10 + %xdefine arg1_low32 edi +%endif + +align 16 +global crc16_t10dif_copy_by4:function +crc16_t10dif_copy_by4: + + ; adjust the 16-bit initial_crc value, scale it to 32 bits + shl arg1_low32, 16 + + ; After this point, code flow is exactly same as a 32-bit CRC. + ; The only difference is before returning eax, we will shift + ; it right 16 bits, to scale back to 16 bits. + + sub rsp,16*4+8 + + ; push the xmm registers into the stack to maintain + movdqa [rsp+16*2],xmm6 + movdqa [rsp+16*3],xmm7 + + ; check if smaller than 128B + cmp arg4, 128 + + ; for sizes less than 128, we can't fold 64B at a time... + jl _less_than_128 + + + ; load the initial crc value + movd xmm6, arg1_low32 ; initial crc + + ; crc value does not need to be byte-reflected, but it needs to + ; be moved to the high part of the register. + ; because data will be byte-reflected and will align with + ; initial crc at correct place. + pslldq xmm6, 12 + + movdqa xmm7, [SHUF_MASK] + ; receive the initial 64B data, xor the initial crc value + movdqu xmm0, [arg3] + movdqu xmm1, [arg3+16] + movdqu xmm2, [arg3+32] + movdqu xmm3, [arg3+48] + + ; copy initial data + movdqu [arg2], xmm0 + movdqu [arg2+16], xmm1 + movdqu [arg2+32], xmm2 + movdqu [arg2+48], xmm3 + + pshufb xmm0, xmm7 + ; XOR the initial_crc value + pxor xmm0, xmm6 + pshufb xmm1, xmm7 + pshufb xmm2, xmm7 + pshufb xmm3, xmm7 + + movdqa xmm6, [rk3] ;xmm6 has rk3 and rk4 + ;imm value of pclmulqdq instruction + ;will determine which constant to use + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; we subtract 128 instead of 64 to save one instruction from the loop + sub arg4, 128 + + ; at this section of the code, there is 64*x+y (0<=y<64) bytes of + ; buffer. The _fold_64_B_loop + ; loop will fold 64B at a time until we have 64+y Bytes of buffer + + + ; fold 64B at a time. This section of the code folds 4 xmm + ; registers in parallel +_fold_64_B_loop: + + ; update the buffer pointer + add arg3, 64 ; buf += 64; + add arg2, 64 + + prefetchnta [arg3+fetch_dist+0] + movdqu xmm4, xmm0 + movdqu xmm5, xmm1 + + pclmulqdq xmm0, xmm6 , 0x11 + pclmulqdq xmm1, xmm6 , 0x11 + + pclmulqdq xmm4, xmm6, 0x0 + pclmulqdq xmm5, xmm6, 0x0 + + pxor xmm0, xmm4 + pxor xmm1, xmm5 + + prefetchnta [arg3+fetch_dist+32] + movdqu xmm4, xmm2 + movdqu xmm5, xmm3 + + pclmulqdq xmm2, xmm6, 0x11 + pclmulqdq xmm3, xmm6, 0x11 + + pclmulqdq xmm4, xmm6, 0x0 + pclmulqdq xmm5, xmm6, 0x0 + + pxor xmm2, xmm4 + pxor xmm3, xmm5 + + movdqu xmm4, [arg3] + movdqu xmm5, [arg3+16] + movdqu [arg2], xmm4 + movdqu [arg2+16], xmm5 + pshufb xmm4, xmm7 + pshufb xmm5, xmm7 + pxor xmm0, xmm4 + pxor xmm1, xmm5 + + movdqu xmm4, [arg3+32] + movdqu xmm5, [arg3+48] + movdqu [arg2+32], xmm4 + movdqu [arg2+48], xmm5 + pshufb xmm4, xmm7 + pshufb xmm5, xmm7 + + pxor xmm2, xmm4 + pxor xmm3, xmm5 + + sub arg4, 64 + + ; check if there is another 64B in the buffer to be able to fold + jge _fold_64_B_loop + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + + add arg3, 64 + add arg2, 64 + ; at this point, the buffer pointer is pointing at the last y Bytes of the buffer + ; the 64B of folded data is in 4 of the xmm registers: xmm0, xmm1, xmm2, xmm3 + + + ; fold the 4 xmm registers to 1 xmm register with different constants + + movdqa xmm6, [rk1] ;xmm6 has rk1 and rk2 + ;imm value of pclmulqdq instruction will + ;determine which constant to use + + movdqa xmm4, xmm0 + pclmulqdq xmm0, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm1, xmm4 + pxor xmm1, xmm0 + + movdqa xmm4, xmm1 + pclmulqdq xmm1, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm2, xmm4 + pxor xmm2, xmm1 + + movdqa xmm4, xmm2 + pclmulqdq xmm2, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm3, xmm4 + pxor xmm3, xmm2 + + + ; instead of 64, we add 48 to the loop counter to save 1 instruction from the loop + ; instead of a cmp instruction, we use the negative flag with the jl instruction + add arg4, 64-16 + jl _final_reduction_for_128 + + ; now we have 16+y bytes left to reduce. 16 Bytes + ; is in register xmm3 and the rest is in memory + ; we can fold 16 bytes at a time if y>=16 + ; continue folding 16B at a time + +_16B_reduction_loop: + movdqa xmm4, xmm3 + pclmulqdq xmm3, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm3, xmm4 + movdqu xmm0, [arg3] + movdqu [arg2], xmm0 + pshufb xmm0, xmm7 + pxor xmm3, xmm0 + add arg3, 16 + add arg2, 16 + sub arg4, 16 + ; instead of a cmp instruction, we utilize the flags with the jge instruction + ; equivalent of: cmp arg4, 16-16 + ; check if there is any more 16B in the buffer to be able to fold + jge _16B_reduction_loop + + ;now we have 16+z bytes left to reduce, where 0<= z < 16. + ;first, we reduce the data in the xmm3 register + + +_final_reduction_for_128: + ; check if any more data to fold. If not, compute the CRC of the final 128 bits + add arg4, 16 + je _128_done + + ; here we are getting data that is less than 16 bytes. + ; since we know that there was data before the pointer, + ; we can offset the input pointer before the actual point, + ; to receive exactly 16 bytes. + ; after that the registers need to be adjusted. +_get_last_two_xmms: + movdqa xmm2, xmm3 + + movdqu xmm1, [arg3 - 16 + arg4] + movdqu [arg2 - 16 + arg4], xmm1 + pshufb xmm1, xmm7 + + ; get rid of the extra data that was loaded before + ; load the shift constant + lea rax, [pshufb_shf_table + 16] + sub rax, arg4 + movdqu xmm0, [rax] + + ; shift xmm2 to the left by arg4 bytes + pshufb xmm2, xmm0 + + ; shift xmm3 to the right by 16-arg4 bytes + pxor xmm0, [mask1] + pshufb xmm3, xmm0 + pblendvb xmm1, xmm2 ;xmm0 is implicit + + ; fold 16 Bytes + movdqa xmm2, xmm1 + movdqa xmm4, xmm3 + pclmulqdq xmm3, xmm6, 0x11 + pclmulqdq xmm4, xmm6, 0x0 + pxor xmm3, xmm4 + pxor xmm3, xmm2 + +_128_done: + ; compute crc of a 128-bit value + movdqa xmm6, [rk5] ; rk5 and rk6 in xmm6 + movdqa xmm0, xmm3 + + ;64b fold + pclmulqdq xmm3, xmm6, 0x1 + pslldq xmm0, 8 + pxor xmm3, xmm0 + + ;32b fold + movdqa xmm0, xmm3 + + pand xmm0, [mask2] + + psrldq xmm3, 12 + pclmulqdq xmm3, xmm6, 0x10 + pxor xmm3, xmm0 + + ;barrett reduction +_barrett: + movdqa xmm6, [rk7] ; rk7 and rk8 in xmm6 + movdqa xmm0, xmm3 + pclmulqdq xmm3, xmm6, 0x01 + pslldq xmm3, 4 + pclmulqdq xmm3, xmm6, 0x11 + + pslldq xmm3, 4 + pxor xmm3, xmm0 + pextrd eax, xmm3,1 + +_cleanup: + ; scale the result back to 16 bits + shr eax, 16 + movdqa xmm6, [rsp+16*2] + movdqa xmm7, [rsp+16*3] + add rsp,16*4+8 + ret + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +align 16 +_less_than_128: + + ; check if there is enough buffer to be able to fold 16B at a time + cmp arg4, 32 + jl _less_than_32 + movdqa xmm7, [SHUF_MASK] + + ; if there is, load the constants + movdqa xmm6, [rk1] ; rk1 and rk2 in xmm6 + + movd xmm0, arg1_low32 ; get the initial crc value + pslldq xmm0, 12 ; align it to its correct place + movdqu xmm3, [arg3] ; load the plaintext + movdqu [arg2], xmm3 ; store copy + pshufb xmm3, xmm7 ; byte-reflect the plaintext + pxor xmm3, xmm0 + + + ; update the buffer pointer + add arg3, 16 + add arg2, 16 + + ; update the counter. subtract 32 instead of 16 to save one instruction from the loop + sub arg4, 32 + + jmp _16B_reduction_loop + + +align 16 +_less_than_32: + ; mov initial crc to the return value. this is necessary for zero-length buffers. + mov eax, arg1_low32 + test arg4, arg4 + je _cleanup + + movdqa xmm7, [SHUF_MASK] + + movd xmm0, arg1_low32 ; get the initial crc value + pslldq xmm0, 12 ; align it to its correct place + + cmp arg4, 16 + je _exact_16_left + jl _less_than_16_left + + movdqu xmm3, [arg3] ; load the plaintext + movdqu [arg2], xmm3 ; store the copy + pshufb xmm3, xmm7 ; byte-reflect the plaintext + pxor xmm3, xmm0 ; xor the initial crc value + add arg3, 16 + add arg2, 16 + sub arg4, 16 + movdqa xmm6, [rk1] ; rk1 and rk2 in xmm6 + jmp _get_last_two_xmms + + +align 16 +_less_than_16_left: + ; use stack space to load data less than 16 bytes, zero-out the 16B in memory first. + + pxor xmm1, xmm1 + mov r11, rsp + movdqa [r11], xmm1 + + cmp arg4, 4 + jl _only_less_than_4 + + ; backup the counter value + mov tmp1, arg4 + cmp arg4, 8 + jl _less_than_8_left + + ; load 8 Bytes + mov rax, [arg3] + mov [arg2], rax + mov [r11], rax + add r11, 8 + sub arg4, 8 + add arg3, 8 + add arg2, 8 +_less_than_8_left: + + cmp arg4, 4 + jl _less_than_4_left + + ; load 4 Bytes + mov eax, [arg3] + mov [arg2], eax + mov [r11], eax + add r11, 4 + sub arg4, 4 + add arg3, 4 + add arg2, 4 +_less_than_4_left: + + cmp arg4, 2 + jl _less_than_2_left + + ; load 2 Bytes + mov ax, [arg3] + mov [arg2], ax + mov [r11], ax + add r11, 2 + sub arg4, 2 + add arg3, 2 + add arg2, 2 +_less_than_2_left: + cmp arg4, 1 + jl _zero_left + + ; load 1 Byte + mov al, [arg3] + mov [arg2], al + mov [r11], al +_zero_left: + movdqa xmm3, [rsp] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 ; xor the initial crc value + + ; shl tmp1, 4 + lea rax, [pshufb_shf_table + 16] + sub rax, tmp1 + movdqu xmm0, [rax] + pxor xmm0, [mask1] + + pshufb xmm3, xmm0 + jmp _128_done + +align 16 +_exact_16_left: + movdqu xmm3, [arg3] + movdqu [arg2], xmm3 + pshufb xmm3, xmm7 + pxor xmm3, xmm0 ; xor the initial crc value + + jmp _128_done + +_only_less_than_4: + cmp arg4, 3 + jl _only_less_than_3 + + ; load 3 Bytes + mov al, [arg3] + mov [arg2], al + mov [r11], al + + mov al, [arg3+1] + mov [arg2+1], al + mov [r11+1], al + + mov al, [arg3+2] + mov [arg2+2], al + mov [r11+2], al + + movdqa xmm3, [rsp] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 ; xor the initial crc value + + psrldq xmm3, 5 + + jmp _barrett +_only_less_than_3: + cmp arg4, 2 + jl _only_less_than_2 + + ; load 2 Bytes + mov al, [arg3] + mov [arg2], al + mov [r11], al + + mov al, [arg3+1] + mov [arg2+1], al + mov [r11+1], al + + movdqa xmm3, [rsp] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 ; xor the initial crc value + + psrldq xmm3, 6 + + jmp _barrett +_only_less_than_2: + + ; load 1 Byte + mov al, [arg3] + mov [arg2],al + mov [r11], al + + movdqa xmm3, [rsp] + pshufb xmm3, xmm7 + pxor xmm3, xmm0 ; xor the initial crc value + + psrldq xmm3, 7 + + jmp _barrett + +section .data + +; precomputed constants +; these constants are precomputed from the poly: 0x8bb70000 (0x8bb7 scaled to 32 bits) +align 16 +; Q = 0x18BB70000 +; rk1 = 2^(32*3) mod Q << 32 +; rk2 = 2^(32*5) mod Q << 32 +; rk3 = 2^(32*15) mod Q << 32 +; rk4 = 2^(32*17) mod Q << 32 +; rk5 = 2^(32*3) mod Q << 32 +; rk6 = 2^(32*2) mod Q << 32 +; rk7 = floor(2^64/Q) +; rk8 = Q +rk1: +DQ 0x2d56000000000000 +rk2: +DQ 0x06df000000000000 +rk3: +DQ 0x044c000000000000 +rk4: +DQ 0xe658000000000000 +rk5: +DQ 0x2d56000000000000 +rk6: +DQ 0x1368000000000000 +rk7: +DQ 0x00000001f65a57f8 +rk8: +DQ 0x000000018bb70000 +mask1: +dq 0x8080808080808080, 0x8080808080808080 +mask2: +dq 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF + +SHUF_MASK: +dq 0x08090A0B0C0D0E0F, 0x0001020304050607 + +pshufb_shf_table: +; use these values for shift constants for the pshufb instruction +; different alignments result in values as shown: +; dq 0x8887868584838281, 0x008f8e8d8c8b8a89 ; shl 15 (16-1) / shr1 +; dq 0x8988878685848382, 0x01008f8e8d8c8b8a ; shl 14 (16-3) / shr2 +; dq 0x8a89888786858483, 0x0201008f8e8d8c8b ; shl 13 (16-4) / shr3 +; dq 0x8b8a898887868584, 0x030201008f8e8d8c ; shl 12 (16-4) / shr4 +; dq 0x8c8b8a8988878685, 0x04030201008f8e8d ; shl 11 (16-5) / shr5 +; dq 0x8d8c8b8a89888786, 0x0504030201008f8e ; shl 10 (16-6) / shr6 +; dq 0x8e8d8c8b8a898887, 0x060504030201008f ; shl 9 (16-7) / shr7 +; dq 0x8f8e8d8c8b8a8988, 0x0706050403020100 ; shl 8 (16-8) / shr8 +; dq 0x008f8e8d8c8b8a89, 0x0807060504030201 ; shl 7 (16-9) / shr9 +; dq 0x01008f8e8d8c8b8a, 0x0908070605040302 ; shl 6 (16-10) / shr10 +; dq 0x0201008f8e8d8c8b, 0x0a09080706050403 ; shl 5 (16-11) / shr11 +; dq 0x030201008f8e8d8c, 0x0b0a090807060504 ; shl 4 (16-12) / shr12 +; dq 0x04030201008f8e8d, 0x0c0b0a0908070605 ; shl 3 (16-13) / shr13 +; dq 0x0504030201008f8e, 0x0d0c0b0a09080706 ; shl 2 (16-14) / shr14 +; dq 0x060504030201008f, 0x0e0d0c0b0a090807 ; shl 1 (16-15) / shr15 +dq 0x8786858483828100, 0x8f8e8d8c8b8a8988 +dq 0x0706050403020100, 0x000e0d0c0b0a0908 + +;;; func core, ver, snum +slversion crc16_t10dif_copy_by4, 05, 02, 0000 diff --git a/src/main/native/compression/isa-l-master/erasure_code/gf_vect_mul_sse_perf.c b/src/main/native/compression/isa-l-master/crc/crc16_t10dif_copy_perf.c similarity index 69% rename from src/main/native/compression/isa-l-master/erasure_code/gf_vect_mul_sse_perf.c rename to src/main/native/compression/isa-l-master/crc/crc16_t10dif_copy_perf.c index bbc598b3..01c8038f 100644 --- a/src/main/native/compression/isa-l-master/erasure_code/gf_vect_mul_sse_perf.c +++ b/src/main/native/compression/isa-l-master/crc/crc16_t10dif_copy_perf.c @@ -1,5 +1,5 @@ /********************************************************************** - Copyright(c) 2011-2015 Intel Corporation All rights reserved. + Copyright(c) 2011-2017 Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -29,8 +29,10 @@ #include #include -#include // for memset -#include "erasure_code.h" +#include +#include +#include +#include "crc.h" #include "test.h" //#define CACHED_TEST @@ -40,58 +42,51 @@ # define TEST_LOOPS 4000000 # define TEST_TYPE_STR "_warm" #else -# ifndef TEST_CUSTOM // Uncached test. Pull from large mem base. -# define TEST_SOURCES 10 # define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ -# define TEST_LEN GT_L3_CACHE / 2 -# define TEST_LOOPS 1000 +# define TEST_LEN (2 * GT_L3_CACHE) +# define TEST_LOOPS 100 # define TEST_TYPE_STR "_cold" -# else -# define TEST_TYPE_STR "_cus" -# ifndef TEST_LOOPS -# define TEST_LOOPS 1000 -# endif -# endif #endif -#define TEST_MEM (2 * TEST_LEN) +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif -typedef unsigned char u8; +#define TEST_MEM TEST_LEN int main(int argc, char *argv[]) { int i; - u8 *buff1, *buff2, gf_const_tbl[64], a = 2; + void *src, *dst; + uint16_t crc; struct perf start, stop; - printf("gf_vect_mul_sse_perf:\n"); - - gf_vect_mul_init(a, gf_const_tbl); + printf("crc16_t10dif_copy_perf:\n"); - // Allocate large mem region - buff1 = (u8 *) malloc(TEST_LEN); - buff2 = (u8 *) malloc(TEST_LEN); - if (NULL == buff1 || NULL == buff2) { - printf("Failed to allocate %dB\n", TEST_LEN); - return 1; + if (posix_memalign(&src, 1024, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; + } + if (posix_memalign(&dst, 1024, TEST_LEN)) { + printf("alloc error: Fail"); + return -1; } - - memset(buff1, 0, TEST_LEN); - memset(buff2, 0, TEST_LEN); printf("Start timed tests\n"); fflush(0); - gf_vect_mul_sse(TEST_LEN, gf_const_tbl, buff1, buff2); + memset(src, 0, TEST_LEN); + crc = crc16_t10dif_copy(TEST_SEED, dst, src, TEST_LEN); + perf_start(&start); for (i = 0; i < TEST_LOOPS; i++) { - gf_vect_mul_init(a, gf_const_tbl); // in a re-build would only calc once - gf_vect_mul_sse(TEST_LEN, gf_const_tbl, buff1, buff2); + crc = crc16_t10dif_copy(TEST_SEED, dst, src, TEST_LEN); } perf_stop(&stop); - printf("gf_vect_mul_sse" TEST_TYPE_STR ": "); + printf("crc16_t10dif_copy" TEST_TYPE_STR ": "); perf_print(stop, start, (long long)TEST_LEN * i); + printf("finish 0x%x\n", crc); return 0; } diff --git a/src/main/native/compression/isa-l-master/crc/crc16_t10dif_copy_test.c b/src/main/native/compression/isa-l-master/crc/crc16_t10dif_copy_test.c new file mode 100644 index 00000000..b6133966 --- /dev/null +++ b/src/main/native/compression/isa-l-master/crc/crc16_t10dif_copy_test.c @@ -0,0 +1,155 @@ +/********************************************************************** + Copyright(c) 2011-2017 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include +#include +#include +#include "crc.h" + +#ifndef RANDOMS +# define RANDOMS 20 +#endif +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define MAX_BUF 2345 +#define TEST_SIZE 217 +#define TEST_LEN (8 * 1024) + +typedef uint16_t u16; +typedef uint8_t u8; + +void rand_buffer(unsigned char *buf, long buffer_size) +{ + long i; + for (i = 0; i < buffer_size; i++) + buf[i] = rand(); +} + +int memtst(unsigned char *buf, unsigned char c, int len) +{ + int i; + for (i = 0; i < len; i++) + if (*buf++ != c) + return 1; + + return 0; +} + +int crc_copy_check(const char *description, u8 * dst, u8 * src, u8 dst_fill_val, int len, + int tot) +{ + u16 seed; + int rem; + + assert(tot >= len); + seed = rand(); + rem = tot - len; + memset(dst, dst_fill_val, tot); + u16 crc_dut = crc16_t10dif_copy(seed, dst, src, len); + u16 crc_ref = crc16_t10dif(seed, src, len); + if (crc_dut != crc_ref) { + printf("%s, crc gen fail: 0x%4x 0x%4x len=%d\n", description, crc_dut, + crc_ref, len); + return 1; + } else if (memcmp(dst, src, len)) { + printf("%s, copy fail: len=%d\n", description, len); + return 1; + } else if (memtst(&dst[len], dst_fill_val, rem)) { + printf("%s, writeover fail: len=%d\n", description, len); + return 1; + } + return 0; +} + +int main(int argc, char *argv[]) +{ + int r = 0; + int i; + int len, tot; + u8 *src_raw, *dst_raw; + u8 *src, *dst; + + printf("Test crc16_t10dif_copy_test:\n"); + src_raw = (u8 *) malloc(TEST_LEN); + dst_raw = (u8 *) malloc(TEST_LEN); + if (NULL == src_raw || NULL == dst_raw) { + printf("alloc error: Fail"); + return -1; + } + src = src_raw; + dst = dst_raw; + + srand(TEST_SEED); + + // Test of all zeros + memset(src, 0, TEST_LEN); + r |= crc_copy_check("zero tst", dst, src, 0x5e, MAX_BUF, TEST_LEN); + + // Another simple test pattern + memset(src, 0xff, TEST_LEN); + r |= crc_copy_check("simp tst", dst, src, 0x5e, MAX_BUF, TEST_LEN); + + // Do a few short len random data tests + rand_buffer(src, TEST_LEN); + rand_buffer(dst, TEST_LEN); + for (i = 0; i < MAX_BUF; i++) { + r |= crc_copy_check("short len", dst, src, rand(), i, MAX_BUF); + } + printf("."); + + // Do a few longer tests, random data + for (i = TEST_LEN; i >= (TEST_LEN - TEST_SIZE); i--) { + r |= crc_copy_check("long len", dst, src, rand(), i, TEST_LEN); + } + printf("."); + + // Do random size, random data + for (i = 0; i < RANDOMS; i++) { + len = rand() % TEST_LEN; + r |= crc_copy_check("rand len", dst, src, rand(), len, TEST_LEN); + } + printf("."); + + // Run tests at end of buffer + for (i = 0; i < RANDOMS; i++) { + len = rand() % TEST_LEN; + src = &src_raw[TEST_LEN - len - 1]; + dst = &dst_raw[TEST_LEN - len - 1]; + tot = len; + r |= crc_copy_check("end of buffer", dst, src, rand(), len, tot); + } + printf("."); + + printf("Test done: %s\n", r ? "Fail" : "Pass"); + return r; +} diff --git a/src/main/native/compression/isa-l-master/erasure_code/gf_vect_mul_avx_perf.c b/src/main/native/compression/isa-l-master/crc/crc16_t10dif_op_perf.c similarity index 57% rename from src/main/native/compression/isa-l-master/erasure_code/gf_vect_mul_avx_perf.c rename to src/main/native/compression/isa-l-master/crc/crc16_t10dif_op_perf.c index e5a818d8..0b461994 100644 --- a/src/main/native/compression/isa-l-master/erasure_code/gf_vect_mul_avx_perf.c +++ b/src/main/native/compression/isa-l-master/crc/crc16_t10dif_op_perf.c @@ -1,5 +1,5 @@ /********************************************************************** - Copyright(c) 2011-2015 Intel Corporation All rights reserved. + Copyright(c) 2011-2017 Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -29,71 +29,86 @@ #include #include -#include // for memset -#include "erasure_code.h" +#include +#include +#include "crc.h" #include "test.h" +#define BLKSIZE (512) + //#define CACHED_TEST #ifdef CACHED_TEST // Cached test, loop many times over small dataset -# define TEST_LEN 8*1024 -# define TEST_LOOPS 4000000 +# define NBLOCKS 100 +# define TEST_LOOPS 1000000 # define TEST_TYPE_STR "_warm" #else -# ifndef TEST_CUSTOM // Uncached test. Pull from large mem base. -# define TEST_SOURCES 10 # define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ -# define TEST_LEN GT_L3_CACHE / 2 -# define TEST_LOOPS 1000 +# define TEST_LEN (2 * GT_L3_CACHE) +# define NBLOCKS (TEST_LEN / BLKSIZE) +# define TEST_LOOPS 100 # define TEST_TYPE_STR "_cold" -# else -# define TEST_TYPE_STR "_cus" -# ifndef TEST_LOOPS -# define TEST_LOOPS 1000 -# endif -# endif #endif -#define TEST_MEM (2 * TEST_LEN) +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +struct blk { + uint8_t data[BLKSIZE]; +}; -typedef unsigned char u8; +struct blk_ext { + uint8_t data[BLKSIZE]; + uint32_t tag; + uint16_t meta; + uint16_t crc; +}; int main(int argc, char *argv[]) { - int i; - u8 *buff1, *buff2, gf_const_tbl[64], a = 2; + int i, j; + uint16_t crc; + struct blk *blks, *blkp; + struct blk_ext *blks_ext, *blkp_ext; struct perf start, stop; - printf("gf_vect_mul_avx_perf:\n"); + printf("crc16_t10dif_streaming_insert_perf:\n"); - gf_vect_mul_init(a, gf_const_tbl); - - // Allocate large mem region - buff1 = (u8 *) malloc(TEST_LEN); - buff2 = (u8 *) malloc(TEST_LEN); - if (NULL == buff1 || NULL == buff2) { - printf("Failed to allocate %dB\n", TEST_LEN); - return 1; + if (posix_memalign((void *)&blks, 1024, NBLOCKS * sizeof(*blks))) { + printf("alloc error: Fail"); + return -1; + } + if (posix_memalign((void *)&blks_ext, 1024, NBLOCKS * sizeof(*blks_ext))) { + printf("alloc error: Fail"); + return -1; } - memset(buff1, 0, TEST_LEN); - memset(buff2, 0, TEST_LEN); - - gf_vect_mul_avx(TEST_LEN, gf_const_tbl, buff1, buff2); + printf(" size blk: %ld, blk_ext: %ld, blk data: %ld, stream: %ld\n", + sizeof(*blks), sizeof(*blks_ext), sizeof(blks->data), + NBLOCKS * sizeof(blks->data)); + memset(blks, 0xe5, NBLOCKS * sizeof(*blks)); + memset(blks_ext, 0xe5, NBLOCKS * sizeof(*blks_ext)); printf("Start timed tests\n"); fflush(0); - gf_vect_mul_avx(TEST_LEN, gf_const_tbl, buff1, buff2); + // Copy and insert test perf_start(&start); - for (i = 0; i < TEST_LOOPS; i++) { - gf_vect_mul_init(a, gf_const_tbl); - gf_vect_mul_avx(TEST_LEN, gf_const_tbl, buff1, buff2); + for (j = 0; j < TEST_LOOPS; j++) { + for (i = 0, blkp = blks, blkp_ext = blks_ext; i < NBLOCKS; i++) { + crc = crc16_t10dif_copy(TEST_SEED, blkp_ext->data, blkp->data, + sizeof(blks->data)); + blkp_ext->crc = crc; + blkp++; + blkp_ext++; + } } perf_stop(&stop); - printf("gf_vect_mul_avx" TEST_TYPE_STR ": "); - perf_print(stop, start, (long long)TEST_LEN * i); + printf("crc16_t10pi_op_copy_insert" TEST_TYPE_STR ": "); + perf_print(stop, start, (long long)sizeof(blks->data) * NBLOCKS * TEST_LOOPS); + printf("finish 0x%x\n", crc); return 0; } diff --git a/src/main/native/compression/isa-l-master/crc/crc16_t10dif_perf.c b/src/main/native/compression/isa-l-master/crc/crc16_t10dif_perf.c index 34f1ddbd..9fddb49e 100644 --- a/src/main/native/compression/isa-l-master/crc/crc16_t10dif_perf.c +++ b/src/main/native/compression/isa-l-master/crc/crc16_t10dif_perf.c @@ -39,7 +39,7 @@ #ifdef CACHED_TEST // Cached test, loop many times over small dataset # define TEST_LEN 8*1024 -# define TEST_LOOPS 400000 +# define TEST_LOOPS 4000000 # define TEST_TYPE_STR "_warm" #else // Uncached test. Pull from large mem base. diff --git a/src/main/native/compression/isa-l-master/crc/crc32_iscsi_00.asm b/src/main/native/compression/isa-l-master/crc/crc32_iscsi_00.asm index 2833a8d0..d0e85081 100644 --- a/src/main/native/compression/isa-l-master/crc/crc32_iscsi_00.asm +++ b/src/main/native/compression/isa-l-master/crc/crc32_iscsi_00.asm @@ -55,6 +55,9 @@ default rel xor rbx, rbx ;; rbx = crc1 = 0; xor r10, r10 ;; r10 = crc2 = 0; + cmp len, %%bSize*3*2 + jbe %%non_prefetch + %assign i 0 %rep %%bSize/8 - 1 %if i < %%bSize*3/4 @@ -65,6 +68,18 @@ default rel crc32 r10, qword [bufptmp+i + 2*%%bSize] ;; update crc2 %assign i (i+8) %endrep + jmp %%next %+ %1 + +%%non_prefetch: + %assign i 0 + %rep %%bSize/8 - 1 + crc32 rax, qword [bufptmp+i + 0*%%bSize] ;; update crc0 + crc32 rbx, qword [bufptmp+i + 1*%%bSize] ;; update crc1 + crc32 r10, qword [bufptmp+i + 2*%%bSize] ;; update crc2 + %assign i (i+8) + %endrep + +%%next %+ %1: crc32 rax, qword [bufptmp+i + 0*%%bSize] ;; update crc0 crc32 rbx, qword [bufptmp+i + 1*%%bSize] ;; update crc1 ; SKIP ;crc32 r10, [bufptmp+i + 2*%%bSize] ;; update crc2 @@ -652,5 +667,5 @@ DD 0x2f2aa980,0xf24c623b,0x900b4807,0x4d6d83bc DD 0x54851c7f,0x89e3d7c4,0xeba4fdf8,0x36c23643 ;;; func core, ver, snum -slversion crc32_iscsi_00, 00, 03, 0014 +slversion crc32_iscsi_00, 00, 04, 0014 diff --git a/src/main/native/compression/isa-l-master/crc/crc32_iscsi_01.asm b/src/main/native/compression/isa-l-master/crc/crc32_iscsi_01.asm index 5b730f63..3493adf5 100644 --- a/src/main/native/compression/isa-l-master/crc/crc32_iscsi_01.asm +++ b/src/main/native/compression/isa-l-master/crc/crc32_iscsi_01.asm @@ -190,9 +190,13 @@ full_block: ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; crc_array: + cmp len, 128*24*2 + jbe non_prefetch + %assign i 128 %rep 128-1 -CONCAT(crc_,i,:) + +CONCAT(_crc_,i,:) crc32 crc_init, qword [block_0 - i*8] crc32 crc1, qword [block_1 - i*8] crc32 crc2, qword [block_2 - i*8] @@ -200,9 +204,23 @@ CONCAT(crc_,i,:) %if i > 128*8 / 32 ; prefetch next 3KB data prefetchnta [block_2 + 128*32 - i*32] %endif + +%assign i (i-1) +%endrep + jmp next_ + +non_prefetch: +%assign i 128 +%rep 128-1 + +CONCAT(crc_,i,:) + crc32 crc_init, qword [block_0 - i*8] + crc32 crc1, qword [block_1 - i*8] + crc32 crc2, qword [block_2 - i*8] %assign i (i-1) %endrep +next_: CONCAT(crc_,i,:) crc32 crc_init, qword [block_0 - i*8] crc32 crc1, qword [block_1 - i*8] @@ -568,5 +586,5 @@ K_table: dq 0x1a0f717c4, 0x0170076fa ;;; func core, ver, snum -slversion crc32_iscsi_01, 01, 03, 0015 +slversion crc32_iscsi_01, 01, 04, 0015 diff --git a/src/main/native/compression/isa-l-master/crc/crc64_multibinary.asm b/src/main/native/compression/isa-l-master/crc/crc64_multibinary.asm index a20c8a79..81ae2ec5 100644 --- a/src/main/native/compression/isa-l-master/crc/crc64_multibinary.asm +++ b/src/main/native/compression/isa-l-master/crc/crc64_multibinary.asm @@ -35,12 +35,6 @@ default rel [bits 64] -%ifidn __OUTPUT_FORMAT__, elf64 -%define WRT_OPT wrt ..plt -%else -%define WRT_OPT -%endif - %include "reg_sizes.asm" extern crc64_ecma_refl_by8 diff --git a/src/main/native/compression/isa-l-master/crc/crc_base.c b/src/main/native/compression/isa-l-master/crc/crc_base.c index 9f89b0cf..60c5f447 100644 --- a/src/main/native/compression/isa-l-master/crc/crc_base.c +++ b/src/main/native/compression/isa-l-master/crc/crc_base.c @@ -135,6 +135,26 @@ uint16_t crc16_t10dif_base(uint16_t seed, uint8_t * buf, uint64_t len) return rem; } +// crc16_t10dif baseline function +// Slow crc16 from the definition. Can be sped up with a lookup table. +uint16_t crc16_t10dif_copy_base(uint16_t seed, uint8_t * dst, uint8_t * src, uint64_t len) +{ + size_t rem = seed; + unsigned int i, j; + + uint16_t poly = 0x8bb7; // t10dif standard + + for (i = 0; i < len; i++) { + rem = rem ^ (src[i] << 8); + dst[i] = src[i]; + for (j = 0; j < MAX_ITER; j++) { + rem = rem << 1; + rem = (rem & 0x10000) ? rem ^ poly : rem; + } + } + return rem; +} + // crc32_ieee baseline function // Slow crc32 from the definition. Can be sped up with a lookup table. uint32_t crc32_ieee_base(uint32_t seed, uint8_t * buf, uint64_t len) diff --git a/src/main/native/compression/isa-l-master/crc/crc_base_aliases.c b/src/main/native/compression/isa-l-master/crc/crc_base_aliases.c index 855b18fb..0ffc62f9 100644 --- a/src/main/native/compression/isa-l-master/crc/crc_base_aliases.c +++ b/src/main/native/compression/isa-l-master/crc/crc_base_aliases.c @@ -41,6 +41,11 @@ uint16_t crc16_t10dif(uint16_t seed, const unsigned char *buf, uint64_t len) return crc16_t10dif_base(seed, (uint8_t *) buf, len); } +uint16_t crc16_t10dif_copy(uint16_t seed, uint8_t * dst, uint8_t * src, uint64_t len) +{ + return crc16_t10dif_copy_base(seed, dst, src, len); +} + uint32_t crc32_ieee(uint32_t seed, const unsigned char *buf, uint64_t len) { return crc32_ieee_base(seed, (uint8_t *) buf, len); diff --git a/src/main/native/compression/isa-l-master/crc/crc_multibinary.asm b/src/main/native/compression/isa-l-master/crc/crc_multibinary.asm index 2bbab0ea..64075be1 100644 --- a/src/main/native/compression/isa-l-master/crc/crc_multibinary.asm +++ b/src/main/native/compression/isa-l-master/crc/crc_multibinary.asm @@ -30,12 +30,6 @@ default rel [bits 64] -%ifidn __OUTPUT_FORMAT__, elf64 -%define WRT_OPT wrt ..plt -%else -%define WRT_OPT -%endif - %include "reg_sizes.asm" extern crc32_iscsi_00 @@ -53,6 +47,9 @@ extern crc16_t10dif_base extern crc32_gzip_refl_by8 extern crc32_gzip_refl_base +extern crc16_t10dif_copy_by4 +extern crc16_t10dif_copy_base + %include "multibinary.asm" section .data @@ -182,6 +179,9 @@ use_t10dif_base: mbin_interface crc32_gzip_refl mbin_dispatch_init_clmul crc32_gzip_refl, crc32_gzip_refl_base, crc32_gzip_refl_by8 +mbin_interface crc16_t10dif_copy +mbin_dispatch_init_clmul crc16_t10dif_copy, crc16_t10dif_copy_base, crc16_t10dif_copy_by4 + ;;; func core, ver, snum slversion crc16_t10dif, 00, 03, 011a slversion crc32_ieee, 00, 03, 011b diff --git a/src/main/native/compression/isa-l-master/erasure_code/Makefile.am b/src/main/native/compression/isa-l-master/erasure_code/Makefile.am index 4edd4c50..28f54d41 100644 --- a/src/main/native/compression/isa-l-master/erasure_code/Makefile.am +++ b/src/main/native/compression/isa-l-master/erasure_code/Makefile.am @@ -102,28 +102,16 @@ lsrc_x86_32 += \ unit_tests32 += erasure_code/erasure_code_base_test \ erasure_code/erasure_code_test \ - erasure_code/erasure_code_sse_test \ erasure_code/gf_vect_mul_test \ erasure_code/gf_vect_mul_base_test \ erasure_code/gf_vect_dot_prod_base_test \ - erasure_code/gf_vect_dot_prod_test \ - erasure_code/gf_vect_dot_prod_avx_test \ - erasure_code/gf_vect_dot_prod_sse_test \ - erasure_code/gf_2vect_dot_prod_sse_test \ - erasure_code/gf_3vect_dot_prod_sse_test \ - erasure_code/gf_4vect_dot_prod_sse_test + erasure_code/gf_vect_dot_prod_test perf_tests32 += erasure_code/gf_vect_mul_perf \ erasure_code/gf_vect_dot_prod_perf \ erasure_code/erasure_code_perf \ erasure_code/erasure_code_base_perf \ - erasure_code/erasure_code_sse_perf \ - erasure_code/gf_vect_dot_prod_1tbl \ - erasure_code/gf_vect_dot_prod_avx_perf\ - erasure_code/gf_vect_dot_prod_sse_perf\ - erasure_code/gf_2vect_dot_prod_sse_perf \ - erasure_code/gf_3vect_dot_prod_sse_perf \ - erasure_code/gf_4vect_dot_prod_sse_perf + erasure_code/gf_vect_dot_prod_1tbl src_include += -I $(srcdir)/erasure_code extern_hdrs += include/erasure_code.h \ @@ -138,41 +126,28 @@ check_tests += erasure_code/gf_vect_mul_test \ erasure_code/gf_inverse_test \ erasure_code/erasure_code_update_test -unit_tests += erasure_code/gf_vect_mul_sse_test \ - erasure_code/gf_vect_mul_avx_test \ +unit_tests += \ erasure_code/gf_vect_mul_base_test \ - erasure_code/gf_vect_dot_prod_sse_test \ - erasure_code/gf_vect_dot_prod_avx_test \ - erasure_code/gf_2vect_dot_prod_sse_test \ - erasure_code/gf_3vect_dot_prod_sse_test \ - erasure_code/gf_4vect_dot_prod_sse_test \ - erasure_code/gf_5vect_dot_prod_sse_test \ - erasure_code/gf_6vect_dot_prod_sse_test \ erasure_code/gf_vect_dot_prod_base_test \ erasure_code/gf_vect_dot_prod_test \ erasure_code/gf_vect_mad_test \ - erasure_code/erasure_code_base_test \ - erasure_code/erasure_code_sse_test + erasure_code/erasure_code_base_test perf_tests += erasure_code/gf_vect_mul_perf \ - erasure_code/gf_vect_mul_sse_perf \ - erasure_code/gf_vect_mul_avx_perf \ - erasure_code/gf_vect_dot_prod_sse_perf \ - erasure_code/gf_vect_dot_prod_avx_perf \ - erasure_code/gf_2vect_dot_prod_sse_perf \ - erasure_code/gf_3vect_dot_prod_sse_perf \ - erasure_code/gf_4vect_dot_prod_sse_perf \ - erasure_code/gf_5vect_dot_prod_sse_perf \ - erasure_code/gf_6vect_dot_prod_sse_perf \ erasure_code/gf_vect_dot_prod_perf \ erasure_code/gf_vect_dot_prod_1tbl \ erasure_code/gf_vect_mad_perf \ erasure_code/erasure_code_perf \ erasure_code/erasure_code_base_perf \ - erasure_code/erasure_code_sse_perf \ erasure_code/erasure_code_update_perf -other_tests += erasure_code/gen_rs_matrix_limits +other_tests += \ + erasure_code/gen_rs_matrix_limits \ + erasure_code/gf_2vect_dot_prod_sse_test \ + erasure_code/gf_3vect_dot_prod_sse_test \ + erasure_code/gf_4vect_dot_prod_sse_test \ + erasure_code/gf_5vect_dot_prod_sse_test \ + erasure_code/gf_6vect_dot_prod_sse_test other_src += include/test.h \ include/types.h diff --git a/src/main/native/compression/isa-l-master/erasure_code/ec_multibinary.asm b/src/main/native/compression/isa-l-master/erasure_code/ec_multibinary.asm index 4bb4273b..e862850b 100644 --- a/src/main/native/compression/isa-l-master/erasure_code/ec_multibinary.asm +++ b/src/main/native/compression/isa-l-master/erasure_code/ec_multibinary.asm @@ -27,12 +27,6 @@ ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -%ifidn __OUTPUT_FORMAT__, elf64 - %define WRT_OPT wrt ..plt -%else - %define WRT_OPT -%endif - %include "reg_sizes.asm" %include "multibinary.asm" diff --git a/src/main/native/compression/isa-l-master/erasure_code/erasure_code_sse_perf.c b/src/main/native/compression/isa-l-master/erasure_code/erasure_code_sse_perf.c deleted file mode 100644 index 459fa7cf..00000000 --- a/src/main/native/compression/isa-l-master/erasure_code/erasure_code_sse_perf.c +++ /dev/null @@ -1,168 +0,0 @@ -/********************************************************************** - Copyright(c) 2011-2015 Intel Corporation All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ - -#include -#include -#include // for memset, memcmp -#include "erasure_code.h" -#include "test.h" - -//#define CACHED_TEST -#ifdef CACHED_TEST -// Cached test, loop many times over small dataset -# define TEST_SOURCES 32 -# define TEST_LEN(m) ((128*1024 / m) & ~(64-1)) -# define TEST_LOOPS(m) (10000*m) -# define TEST_TYPE_STR "_warm" -#else -# ifndef TEST_CUSTOM -// Uncached test. Pull from large mem base. -# define TEST_SOURCES 32 -# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ -# define TEST_LEN(m) ((GT_L3_CACHE / m) & ~(64-1)) -# define TEST_LOOPS(m) (50*m) -# define TEST_TYPE_STR "_cold" -# else -# define TEST_TYPE_STR "_cus" -# ifndef TEST_LOOPS -# define TEST_LOOPS(m) 1000 -# endif -# endif -#endif - -#define MMAX TEST_SOURCES -#define KMAX TEST_SOURCES - -typedef unsigned char u8; - -int main(int argc, char *argv[]) -{ - int i, j, rtest, m, k, nerrs, r; - void *buf; - u8 *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES]; - u8 a[MMAX * KMAX], b[MMAX * KMAX], c[MMAX * KMAX], d[MMAX * KMAX]; - u8 g_tbls[KMAX * TEST_SOURCES * 32], src_in_err[TEST_SOURCES]; - u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES]; - struct perf start, stop; - - // Pick test parameters - m = 14; - k = 10; - nerrs = 4; - const u8 err_list[] = { 2, 4, 5, 7 }; - - printf("erasure_code_sse_perf: %dx%d %d\n", m, TEST_LEN(m), nerrs); - - if (m > MMAX || k > KMAX || nerrs > (m - k)) { - printf(" Input test parameter error\n"); - return -1; - } - - memcpy(src_err_list, err_list, nerrs); - memset(src_in_err, 0, TEST_SOURCES); - for (i = 0; i < nerrs; i++) - src_in_err[src_err_list[i]] = 1; - - // Allocate the arrays - for (i = 0; i < m; i++) { - if (posix_memalign(&buf, 64, TEST_LEN(m))) { - printf("alloc error: Fail\n"); - return -1; - } - buffs[i] = buf; - } - - for (i = 0; i < (m - k); i++) { - if (posix_memalign(&buf, 64, TEST_LEN(m))) { - printf("alloc error: Fail\n"); - return -1; - } - temp_buffs[i] = buf; - } - - // Make random data - for (i = 0; i < k; i++) - for (j = 0; j < TEST_LEN(m); j++) - buffs[i][j] = rand(); - - gf_gen_rs_matrix(a, m, k); - ec_init_tables(k, m - k, &a[k * k], g_tbls); - ec_encode_data_sse(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]); - - // Start encode test - perf_start(&start); - for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) { - // Make parity vects - ec_init_tables(k, m - k, &a[k * k], g_tbls); - ec_encode_data_sse(TEST_LEN(m), k, m - k, g_tbls, buffs, &buffs[k]); - } - perf_stop(&stop); - printf("erasure_code_sse_encode" TEST_TYPE_STR ": "); - perf_print(stop, start, (long long)(TEST_LEN(m)) * (m) * rtest); - - // Start decode test - perf_start(&start); - for (rtest = 0; rtest < TEST_LOOPS(m); rtest++) { - // Construct b by removing error rows - for (i = 0, r = 0; i < k; i++, r++) { - while (src_in_err[r]) - r++; - recov[i] = buffs[r]; - for (j = 0; j < k; j++) - b[k * i + j] = a[k * r + j]; - } - - if (gf_invert_matrix(b, d, k) < 0) { - printf("BAD MATRIX\n"); - return -1; - } - - for (i = 0; i < nerrs; i++) - for (j = 0; j < k; j++) - c[k * i + j] = d[k * src_err_list[i] + j]; - - // Recover data - ec_init_tables(k, nerrs, c, g_tbls); - ec_encode_data_sse(TEST_LEN(m), k, nerrs, g_tbls, recov, temp_buffs); - } - perf_stop(&stop); - - for (i = 0; i < nerrs; i++) { - if (0 != memcmp(temp_buffs[i], buffs[src_err_list[i]], TEST_LEN(m))) { - printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); - return -1; - } - } - - printf("erasure_code_sse_decode" TEST_TYPE_STR ": "); - perf_print(stop, start, (long long)(TEST_LEN(m)) * (k + nerrs) * rtest); - - printf("done all: Pass\n"); - return 0; -} diff --git a/src/main/native/compression/isa-l-master/erasure_code/erasure_code_sse_test.c b/src/main/native/compression/isa-l-master/erasure_code/erasure_code_sse_test.c deleted file mode 100644 index c0cd0b9f..00000000 --- a/src/main/native/compression/isa-l-master/erasure_code/erasure_code_sse_test.c +++ /dev/null @@ -1,764 +0,0 @@ -/********************************************************************** - Copyright(c) 2011-2015 Intel Corporation All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ - -#include -#include -#include // for memset, memcmp -#include "erasure_code.h" -#include "types.h" - -#define TEST_LEN 8192 -#define TEST_SIZE (TEST_LEN/2) - -#ifndef TEST_SOURCES -# define TEST_SOURCES 127 -#endif -#ifndef RANDOMS -# define RANDOMS 200 -#endif - -#define MMAX TEST_SOURCES -#define KMAX TEST_SOURCES - -#define EFENCE_TEST_MIN_SIZE 16 - -#ifdef EC_ALIGNED_ADDR -// Define power of 2 range to check ptr, len alignment -# define PTR_ALIGN_CHK_B 0 -# define LEN_ALIGN_CHK_B 0 // 0 for aligned only -#else -// Define power of 2 range to check ptr, len alignment -# define PTR_ALIGN_CHK_B 32 -# define LEN_ALIGN_CHK_B 32 // 0 for aligned only -#endif - -#ifndef TEST_SEED -#define TEST_SEED 11 -#endif - -typedef unsigned char u8; - -void dump(unsigned char *buf, int len) -{ - int i; - for (i = 0; i < len;) { - printf(" %2x", 0xff & buf[i++]); - if (i % 32 == 0) - printf("\n"); - } - printf("\n"); -} - -void dump_matrix(unsigned char **s, int k, int m) -{ - int i, j; - for (i = 0; i < k; i++) { - for (j = 0; j < m; j++) { - printf(" %2x", s[i][j]); - } - printf("\n"); - } - printf("\n"); -} - -void dump_u8xu8(unsigned char *s, int k, int m) -{ - int i, j; - for (i = 0; i < k; i++) { - for (j = 0; j < m; j++) { - printf(" %2x", 0xff & s[j + (i * m)]); - } - printf("\n"); - } - printf("\n"); -} - -// Generate Random errors -static void gen_err_list(unsigned char *src_err_list, - unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m) -{ - int i, err; - int nerrs = 0, nsrcerrs = 0; - - for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) { - err = 1 & rand(); - src_in_err[i] = err; - if (err) { - src_err_list[nerrs++] = i; - if (i < k) { - nsrcerrs++; - } - } - } - if (nerrs == 0) { // should have at least one error - while ((err = (rand() % KMAX)) >= m) ; - src_err_list[nerrs++] = err; - src_in_err[err] = 1; - if (err < k) - nsrcerrs = 1; - } - *pnerrs = nerrs; - *pnsrcerrs = nsrcerrs; - return; -} - -#define NO_INVERT_MATRIX -2 -// Generate decode matrix from encode matrix -static int gf_gen_decode_matrix(unsigned char *encode_matrix, - unsigned char *decode_matrix, - unsigned char *invert_matrix, - unsigned int *decode_index, - unsigned char *src_err_list, - unsigned char *src_in_err, - int nerrs, int nsrcerrs, int k, int m) -{ - int i, j, p; - int r; - unsigned char *backup, *b, s; - int incr = 0; - - b = malloc(MMAX * KMAX); - backup = malloc(MMAX * KMAX); - - if (b == NULL || backup == NULL) { - printf("Test failure! Error with malloc\n"); - free(b); - free(backup); - return -1; - } - // Construct matrix b by removing error rows - for (i = 0, r = 0; i < k; i++, r++) { - while (src_in_err[r]) - r++; - for (j = 0; j < k; j++) { - b[k * i + j] = encode_matrix[k * r + j]; - backup[k * i + j] = encode_matrix[k * r + j]; - } - decode_index[i] = r; - } - incr = 0; - while (gf_invert_matrix(b, invert_matrix, k) < 0) { - if (nerrs == (m - k)) { - free(b); - free(backup); - printf("BAD MATRIX\n"); - return NO_INVERT_MATRIX; - } - incr++; - memcpy(b, backup, MMAX * KMAX); - for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) { - if (src_err_list[i] == (decode_index[k - 1] + incr)) { - // skip the erased parity line - incr++; - continue; - } - } - if (decode_index[k - 1] + incr >= m) { - free(b); - free(backup); - printf("BAD MATRIX\n"); - return NO_INVERT_MATRIX; - } - decode_index[k - 1] += incr; - for (j = 0; j < k; j++) - b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j]; - - }; - - for (i = 0; i < nsrcerrs; i++) { - for (j = 0; j < k; j++) { - decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j]; - } - } - /* src_err_list from encode_matrix * invert of b for parity decoding */ - for (p = nsrcerrs; p < nerrs; p++) { - for (i = 0; i < k; i++) { - s = 0; - for (j = 0; j < k; j++) - s ^= gf_mul(invert_matrix[j * k + i], - encode_matrix[k * src_err_list[p] + j]); - - decode_matrix[k * p + i] = s; - } - } - free(b); - free(backup); - return 0; -} - -int main(int argc, char *argv[]) -{ - int re = 0; - int i, j, p, rtest, m, k; - int nerrs, nsrcerrs; - void *buf; - unsigned int decode_index[MMAX]; - unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES]; - unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls; - unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES]; - unsigned char *recov[TEST_SOURCES]; - - int rows, align, size; - unsigned char *efence_buffs[TEST_SOURCES]; - unsigned int offset; - u8 *ubuffs[TEST_SOURCES]; - u8 *temp_ubuffs[TEST_SOURCES]; - - printf("erasure_code_sse_test: %dx%d ", TEST_SOURCES, TEST_LEN); - srand(TEST_SEED); - - // Allocate the arrays - for (i = 0; i < TEST_SOURCES; i++) { - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - buffs[i] = buf; - } - - for (i = 0; i < TEST_SOURCES; i++) { - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - temp_buffs[i] = buf; - } - - // Test erasure code by encode and recovery - - encode_matrix = malloc(MMAX * KMAX); - decode_matrix = malloc(MMAX * KMAX); - invert_matrix = malloc(MMAX * KMAX); - g_tbls = malloc(KMAX * TEST_SOURCES * 32); - if (encode_matrix == NULL || decode_matrix == NULL - || invert_matrix == NULL || g_tbls == NULL) { - printf("Test failure! Error with malloc\n"); - return -1; - } - // Pick a first test - m = 9; - k = 5; - if (m > MMAX || k > KMAX) - return -1; - - // Make random data - for (i = 0; i < k; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - // Generate encode matrix encode_matrix - // The matrix generated by gf_gen_rs_matrix - // is not always invertable. - gf_gen_rs_matrix(encode_matrix, m, k); - - // Generate g_tbls from encode matrix encode_matrix - ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); - - // Perform matrix dot_prod for EC encoding - // using g_tbls from encode matrix encode_matrix - ec_encode_data_sse(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); - - // Choose random buffers to be in erasure - memset(src_in_err, 0, TEST_SOURCES); - gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); - - // Generate decode matrix - re = gf_gen_decode_matrix(encode_matrix, decode_matrix, - invert_matrix, decode_index, src_err_list, src_in_err, - nerrs, nsrcerrs, k, m); - if (re != 0) { - printf("Fail to gf_gen_decode_matrix\n"); - return -1; - } - // Pack recovery array as list of valid sources - // Its order must be the same as the order - // to generate matrix b in gf_gen_decode_matrix - for (i = 0; i < k; i++) { - recov[i] = buffs[decode_index[i]]; - } - - // Recover data - ec_init_tables(k, nerrs, decode_matrix, g_tbls); - ec_encode_data_sse(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); - for (i = 0; i < nerrs; i++) { - - if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) { - printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); - printf(" - erase list = "); - for (j = 0; j < nerrs; j++) - printf(" %d", src_err_list[j]); - printf(" - Index = "); - for (p = 0; p < k; p++) - printf(" %d", decode_index[p]); - printf("\nencode_matrix:\n"); - dump_u8xu8((u8 *) encode_matrix, m, k); - printf("inv b:\n"); - dump_u8xu8((u8 *) invert_matrix, k, k); - printf("\ndecode_matrix:\n"); - dump_u8xu8((u8 *) decode_matrix, m, k); - printf("recov %d:", src_err_list[i]); - dump(temp_buffs[k + i], 25); - printf("orig :"); - dump(buffs[src_err_list[i]], 25); - return -1; - } - } - - // Pick a first test - m = 9; - k = 5; - if (m > MMAX || k > KMAX) - return -1; - - // Make random data - for (i = 0; i < k; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - // The matrix generated by gf_gen_cauchy1_matrix - // is always invertable. - gf_gen_cauchy1_matrix(encode_matrix, m, k); - - // Generate g_tbls from encode matrix encode_matrix - ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); - - // Perform matrix dot_prod for EC encoding - // using g_tbls from encode matrix encode_matrix - ec_encode_data_sse(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); - - // Choose random buffers to be in erasure - memset(src_in_err, 0, TEST_SOURCES); - gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); - - // Generate decode matrix - re = gf_gen_decode_matrix(encode_matrix, decode_matrix, - invert_matrix, decode_index, src_err_list, src_in_err, - nerrs, nsrcerrs, k, m); - if (re != 0) { - printf("Fail to gf_gen_decode_matrix\n"); - return -1; - } - // Pack recovery array as list of valid sources - // Its order must be the same as the order - // to generate matrix b in gf_gen_decode_matrix - for (i = 0; i < k; i++) { - recov[i] = buffs[decode_index[i]]; - } - - // Recover data - ec_init_tables(k, nerrs, decode_matrix, g_tbls); - ec_encode_data_sse(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); - for (i = 0; i < nerrs; i++) { - - if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) { - printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); - printf(" - erase list = "); - for (j = 0; j < nerrs; j++) - printf(" %d", src_err_list[j]); - printf(" - Index = "); - for (p = 0; p < k; p++) - printf(" %d", decode_index[p]); - printf("\nencode_matrix:\n"); - dump_u8xu8((u8 *) encode_matrix, m, k); - printf("inv b:\n"); - dump_u8xu8((u8 *) invert_matrix, k, k); - printf("\ndecode_matrix:\n"); - dump_u8xu8((u8 *) decode_matrix, m, k); - printf("recov %d:", src_err_list[i]); - dump(temp_buffs[k + i], 25); - printf("orig :"); - dump(buffs[src_err_list[i]], 25); - return -1; - } - } - - // Do more random tests - for (rtest = 0; rtest < RANDOMS; rtest++) { - while ((m = (rand() % MMAX)) < 2) ; - while ((k = (rand() % KMAX)) >= m || k < 1) ; - - if (m > MMAX || k > KMAX) - continue; - - // Make random data - for (i = 0; i < k; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - // The matrix generated by gf_gen_cauchy1_matrix - // is always invertable. - gf_gen_cauchy1_matrix(encode_matrix, m, k); - - // Make parity vects - // Generate g_tbls from encode matrix a - ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); - // Perform matrix dot_prod for EC encoding - // using g_tbls from encode matrix a - ec_encode_data_sse(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]); - - // Random errors - memset(src_in_err, 0, TEST_SOURCES); - gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); - - // Generate decode matrix - re = gf_gen_decode_matrix(encode_matrix, decode_matrix, - invert_matrix, decode_index, src_err_list, - src_in_err, nerrs, nsrcerrs, k, m); - if (re != 0) { - printf("Fail to gf_gen_decode_matrix\n"); - return -1; - } - // Pack recovery array as list of valid sources - // Its order must be the same as the order - // to generate matrix b in gf_gen_decode_matrix - for (i = 0; i < k; i++) { - recov[i] = buffs[decode_index[i]]; - } - - // Recover data - ec_init_tables(k, nerrs, decode_matrix, g_tbls); - ec_encode_data_sse(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]); - - for (i = 0; i < nerrs; i++) { - - if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) { - printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); - printf(" - erase list = "); - for (j = 0; j < nerrs; j++) - printf(" %d", src_err_list[j]); - printf(" - Index = "); - for (p = 0; p < k; p++) - printf(" %d", decode_index[p]); - printf("\nencode_matrix:\n"); - dump_u8xu8((u8 *) encode_matrix, m, k); - printf("inv b:\n"); - dump_u8xu8((u8 *) invert_matrix, k, k); - printf("\ndecode_matrix:\n"); - dump_u8xu8((u8 *) decode_matrix, m, k); - printf("orig data:\n"); - dump_matrix(buffs, m, 25); - printf("orig :"); - dump(buffs[src_err_list[i]], 25); - printf("recov %d:", src_err_list[i]); - dump(temp_buffs[k + i], 25); - return -1; - } - } - putchar('.'); - } - - // Run tests at end of buffer for Electric Fence - k = 16; - align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; - if (k > KMAX) - return -1; - - for (rows = 1; rows <= 16; rows++) { - m = k + rows; - if (m > MMAX) - return -1; - - // Make random data - for (i = 0; i < k; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - for (size = EFENCE_TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { - for (i = 0; i < m; i++) { // Line up TEST_SIZE from end - efence_buffs[i] = buffs[i] + TEST_LEN - size; - } - - // The matrix generated by gf_gen_cauchy1_matrix - // is always invertable. - gf_gen_cauchy1_matrix(encode_matrix, m, k); - - // Make parity vects - // Generate g_tbls from encode matrix a - ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); - // Perform matrix dot_prod for EC encoding - // using g_tbls from encode matrix a - ec_encode_data_sse(size, k, m - k, g_tbls, efence_buffs, - &efence_buffs[k]); - - // Random errors - memset(src_in_err, 0, TEST_SOURCES); - gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); - - // Generate decode matrix - re = gf_gen_decode_matrix(encode_matrix, decode_matrix, - invert_matrix, decode_index, src_err_list, - src_in_err, nerrs, nsrcerrs, k, m); - if (re != 0) { - printf("Fail to gf_gen_decode_matrix\n"); - return -1; - } - // Pack recovery array as list of valid sources - // Its order must be the same as the order - // to generate matrix b in gf_gen_decode_matrix - for (i = 0; i < k; i++) { - recov[i] = efence_buffs[decode_index[i]]; - } - - // Recover data - ec_init_tables(k, nerrs, decode_matrix, g_tbls); - ec_encode_data_sse(size, k, nerrs, g_tbls, recov, &temp_buffs[k]); - - for (i = 0; i < nerrs; i++) { - - if (0 != - memcmp(temp_buffs[k + i], efence_buffs[src_err_list[i]], - size)) { - printf("Efence: Fail error recovery (%d, %d, %d)\n", m, - k, nerrs); - - printf("size = %d\n", size); - - printf("Test erase list = "); - for (j = 0; j < nerrs; j++) - printf(" %d", src_err_list[j]); - printf(" - Index = "); - for (p = 0; p < k; p++) - printf(" %d", decode_index[p]); - printf("\nencode_matrix:\n"); - dump_u8xu8((u8 *) encode_matrix, m, k); - printf("inv b:\n"); - dump_u8xu8((u8 *) invert_matrix, k, k); - printf("\ndecode_matrix:\n"); - dump_u8xu8((u8 *) decode_matrix, m, k); - - printf("recov %d:", src_err_list[i]); - dump(temp_buffs[k + i], align); - printf("orig :"); - dump(efence_buffs[src_err_list[i]], align); - return -1; - } - } - } - - } - - // Test rand ptr alignment if available - - for (rtest = 0; rtest < RANDOMS; rtest++) { - while ((m = (rand() % MMAX)) < 2) ; - while ((k = (rand() % KMAX)) >= m || k < 1) ; - - if (m > MMAX || k > KMAX) - continue; - - size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~15; - - offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; - // Add random offsets - for (i = 0; i < m; i++) { - memset(buffs[i], 0, TEST_LEN); // zero pad to check write-over - memset(temp_buffs[i], 0, TEST_LEN); // zero pad to check write-over - ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); - temp_ubuffs[i] = temp_buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); - } - - for (i = 0; i < k; i++) - for (j = 0; j < size; j++) - ubuffs[i][j] = rand(); - - // The matrix generated by gf_gen_cauchy1_matrix - // is always invertable. - gf_gen_cauchy1_matrix(encode_matrix, m, k); - - // Make parity vects - // Generate g_tbls from encode matrix a - ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); - // Perform matrix dot_prod for EC encoding - // using g_tbls from encode matrix a - ec_encode_data_sse(size, k, m - k, g_tbls, ubuffs, &ubuffs[k]); - - // Random errors - memset(src_in_err, 0, TEST_SOURCES); - gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); - - // Generate decode matrix - re = gf_gen_decode_matrix(encode_matrix, decode_matrix, - invert_matrix, decode_index, src_err_list, - src_in_err, nerrs, nsrcerrs, k, m); - if (re != 0) { - printf("Fail to gf_gen_decode_matrix\n"); - return -1; - } - // Pack recovery array as list of valid sources - // Its order must be the same as the order - // to generate matrix b in gf_gen_decode_matrix - for (i = 0; i < k; i++) { - recov[i] = ubuffs[decode_index[i]]; - } - - // Recover data - ec_init_tables(k, nerrs, decode_matrix, g_tbls); - ec_encode_data_sse(size, k, nerrs, g_tbls, recov, &temp_ubuffs[k]); - - for (i = 0; i < nerrs; i++) { - - if (0 != memcmp(temp_ubuffs[k + i], ubuffs[src_err_list[i]], size)) { - printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); - printf(" - erase list = "); - for (j = 0; j < nerrs; j++) - printf(" %d", src_err_list[j]); - printf(" - Index = "); - for (p = 0; p < k; p++) - printf(" %d", decode_index[p]); - printf("\nencode_matrix:\n"); - dump_u8xu8((unsigned char *)encode_matrix, m, k); - printf("inv b:\n"); - dump_u8xu8((unsigned char *)invert_matrix, k, k); - printf("\ndecode_matrix:\n"); - dump_u8xu8((unsigned char *)decode_matrix, m, k); - printf("orig data:\n"); - dump_matrix(ubuffs, m, 25); - printf("orig :"); - dump(ubuffs[src_err_list[i]], 25); - printf("recov %d:", src_err_list[i]); - dump(temp_ubuffs[k + i], 25); - return -1; - } - } - - // Confirm that padding around dests is unchanged - memset(temp_buffs[0], 0, PTR_ALIGN_CHK_B); // Make reference zero buff - - for (i = 0; i < m; i++) { - - offset = ubuffs[i] - buffs[i]; - - if (memcmp(buffs[i], temp_buffs[0], offset)) { - printf("Fail rand ualign encode pad start\n"); - return -1; - } - if (memcmp - (buffs[i] + offset + size, temp_buffs[0], - PTR_ALIGN_CHK_B - offset)) { - printf("Fail rand ualign encode pad end\n"); - return -1; - } - } - - for (i = 0; i < nerrs; i++) { - - offset = temp_ubuffs[k + i] - temp_buffs[k + i]; - if (memcmp(temp_buffs[k + i], temp_buffs[0], offset)) { - printf("Fail rand ualign decode pad start\n"); - return -1; - } - if (memcmp - (temp_buffs[k + i] + offset + size, temp_buffs[0], - PTR_ALIGN_CHK_B - offset)) { - printf("Fail rand ualign decode pad end\n"); - return -1; - } - } - - putchar('.'); - } - - // Test size alignment - - align = (LEN_ALIGN_CHK_B != 0) ? 13 : 16; - - for (size = TEST_LEN; size > 0; size -= align) { - while ((m = (rand() % MMAX)) < 2) ; - while ((k = (rand() % KMAX)) >= m || k < 1) ; - - if (m > MMAX || k > KMAX) - continue; - - for (i = 0; i < k; i++) - for (j = 0; j < size; j++) - buffs[i][j] = rand(); - - // The matrix generated by gf_gen_cauchy1_matrix - // is always invertable. - gf_gen_cauchy1_matrix(encode_matrix, m, k); - - // Make parity vects - // Generate g_tbls from encode matrix a - ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls); - // Perform matrix dot_prod for EC encoding - // using g_tbls from encode matrix a - ec_encode_data_sse(size, k, m - k, g_tbls, buffs, &buffs[k]); - - // Random errors - memset(src_in_err, 0, TEST_SOURCES); - gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m); - // Generate decode matrix - re = gf_gen_decode_matrix(encode_matrix, decode_matrix, - invert_matrix, decode_index, src_err_list, - src_in_err, nerrs, nsrcerrs, k, m); - if (re != 0) { - printf("Fail to gf_gen_decode_matrix\n"); - return -1; - } - // Pack recovery array as list of valid sources - // Its order must be the same as the order - // to generate matrix b in gf_gen_decode_matrix - for (i = 0; i < k; i++) { - recov[i] = buffs[decode_index[i]]; - } - - // Recover data - ec_init_tables(k, nerrs, decode_matrix, g_tbls); - ec_encode_data_sse(size, k, nerrs, g_tbls, recov, &temp_buffs[k]); - - for (i = 0; i < nerrs; i++) { - - if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], size)) { - printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); - printf(" - erase list = "); - for (j = 0; j < nerrs; j++) - printf(" %d", src_err_list[j]); - printf(" - Index = "); - for (p = 0; p < k; p++) - printf(" %d", decode_index[p]); - printf("\nencode_matrix:\n"); - dump_u8xu8((unsigned char *)encode_matrix, m, k); - printf("inv b:\n"); - dump_u8xu8((unsigned char *)invert_matrix, k, k); - printf("\ndecode_matrix:\n"); - dump_u8xu8((unsigned char *)decode_matrix, m, k); - printf("orig data:\n"); - dump_matrix(buffs, m, 25); - printf("orig :"); - dump(buffs[src_err_list[i]], 25); - printf("recov %d:", src_err_list[i]); - dump(temp_buffs[k + i], 25); - return -1; - } - } - } - - printf("done EC tests: Pass\n"); - return 0; -} diff --git a/src/main/native/compression/isa-l-master/erasure_code/gf_2vect_dot_prod_sse_perf.c b/src/main/native/compression/isa-l-master/erasure_code/gf_2vect_dot_prod_sse_perf.c deleted file mode 100644 index 8345796d..00000000 --- a/src/main/native/compression/isa-l-master/erasure_code/gf_2vect_dot_prod_sse_perf.c +++ /dev/null @@ -1,216 +0,0 @@ -/********************************************************************** - Copyright(c) 2011-2015 Intel Corporation All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ - -#include -#include -#include // for memset, memcmp -#include "erasure_code.h" -#include "test.h" - -#ifndef FUNCTION_UNDER_TEST -# define FUNCTION_UNDER_TEST gf_2vect_dot_prod_sse -#endif - -#define str(s) #s -#define xstr(s) str(s) - -//#define CACHED_TEST -#ifdef CACHED_TEST -// Cached test, loop many times over small dataset -# define TEST_SOURCES 10 -# define TEST_LEN 8*1024 -# define TEST_LOOPS 40000 -# define TEST_TYPE_STR "_warm" -#else -# ifndef TEST_CUSTOM -// Uncached test. Pull from large mem base. -# define TEST_SOURCES 10 -# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ -# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1)) -# define TEST_LOOPS 100 -# define TEST_TYPE_STR "_cold" -# else -# define TEST_TYPE_STR "_cus" -# ifndef TEST_LOOPS -# define TEST_LOOPS 1000 -# endif -# endif -#endif - -typedef unsigned char u8; - -void dump(unsigned char *buf, int len) -{ - int i; - for (i = 0; i < len;) { - printf(" %2x", 0xff & buf[i++]); - if (i % 32 == 0) - printf("\n"); - } - printf("\n"); -} - -void dump_matrix(unsigned char **s, int k, int m) -{ - int i, j; - for (i = 0; i < k; i++) { - for (j = 0; j < m; j++) { - printf(" %2x", s[i][j]); - } - printf("\n"); - } - printf("\n"); -} - -int main(int argc, char *argv[]) -{ - int i, j; - void *buf; - u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g_tbls[2 * TEST_SOURCES * 32]; - u8 *dest1, *dest2, *dest_ref1, *dest_ref2, *dest_ptrs[2]; - u8 *buffs[TEST_SOURCES]; - struct perf start, stop; - - printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN); - - // Allocate the arrays - for (i = 0; i < TEST_SOURCES; i++) { - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - buffs[i] = buf; - } - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest1 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest2 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref1 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref2 = buf; - - dest_ptrs[0] = dest1; - dest_ptrs[1] = dest2; - - // Performance test - for (i = 0; i < TEST_SOURCES; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - memset(dest1, 0, TEST_LEN); - memset(dest2, 0, TEST_LEN); - memset(dest_ref1, 0, TEST_LEN); - memset(dest_ref2, 0, TEST_LEN); - - for (i = 0; i < TEST_SOURCES; i++) { - g1[i] = rand(); - g2[i] = rand(); - } - - for (j = 0; j < TEST_SOURCES; j++) { - gf_vect_mul_init(g1[j], &g_tbls[j * 32]); - gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]); - } - - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, - dest_ref2); - -#ifdef DO_REF_PERF - perf_start(&start); - for (i = 0; i < TEST_LOOPS / 100; i++) { - for (j = 0; j < TEST_SOURCES; j++) { - gf_vect_mul_init(g1[j], &g_tbls[j * 32]); - gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]); - } - - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], - buffs, dest_ref2); - } - perf_stop(&stop); - printf("gf_2vect_dot_prod_base" TEST_TYPE_STR ": "); - perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 2) * i); -#endif - - FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); - - perf_start(&start); - for (i = 0; i < TEST_LOOPS; i++) { - for (j = 0; j < TEST_SOURCES; j++) { - gf_vect_mul_init(g1[j], &g_tbls[j * 32]); - gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]); - } - - FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); - } - perf_stop(&stop); - printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": "); - perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 2) * i); - - if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test1\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref1, 25); - printf("dprod_dut:"); - dump(dest1, 25); - return -1; - } - if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test2\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref2, 25); - printf("dprod_dut:"); - dump(dest2, 25); - return -1; - } - - printf("pass perf check\n"); - return 0; - -} diff --git a/src/main/native/compression/isa-l-master/erasure_code/gf_3vect_dot_prod_sse_perf.c b/src/main/native/compression/isa-l-master/erasure_code/gf_3vect_dot_prod_sse_perf.c deleted file mode 100644 index a82f8297..00000000 --- a/src/main/native/compression/isa-l-master/erasure_code/gf_3vect_dot_prod_sse_perf.c +++ /dev/null @@ -1,246 +0,0 @@ -/********************************************************************** - Copyright(c) 2011-2015 Intel Corporation All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ - -#include -#include -#include // for memset, memcmp -#include "erasure_code.h" -#include "test.h" - -#ifndef FUNCTION_UNDER_TEST -# define FUNCTION_UNDER_TEST gf_3vect_dot_prod_sse -#endif - -#define str(s) #s -#define xstr(s) str(s) - -//#define CACHED_TEST -#ifdef CACHED_TEST -// Cached test, loop many times over small dataset -# define TEST_SOURCES 10 -# define TEST_LEN 8*1024 -# define TEST_LOOPS 40000 -# define TEST_TYPE_STR "_warm" -#else -# ifndef TEST_CUSTOM -// Uncached test. Pull from large mem base. -# define TEST_SOURCES 10 -# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ -# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1)) -# define TEST_LOOPS 100 -# define TEST_TYPE_STR "_cold" -# else -# define TEST_TYPE_STR "_cus" -# ifndef TEST_LOOPS -# define TEST_LOOPS 1000 -# endif -# endif -#endif - -typedef unsigned char u8; - -void dump(unsigned char *buf, int len) -{ - int i; - for (i = 0; i < len;) { - printf(" %2x", 0xff & buf[i++]); - if (i % 32 == 0) - printf("\n"); - } - printf("\n"); -} - -void dump_matrix(unsigned char **s, int k, int m) -{ - int i, j; - for (i = 0; i < k; i++) { - for (j = 0; j < m; j++) { - printf(" %2x", s[i][j]); - } - printf("\n"); - } - printf("\n"); -} - -int main(int argc, char *argv[]) -{ - int i, j; - void *buf; - u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; - u8 g_tbls[3 * TEST_SOURCES * 32], *dest_ptrs[3], *buffs[TEST_SOURCES]; - u8 *dest1, *dest2, *dest3, *dest_ref1, *dest_ref2, *dest_ref3; - struct perf start, stop; - - printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN); - - // Allocate the arrays - for (i = 0; i < TEST_SOURCES; i++) { - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - buffs[i] = buf; - } - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest1 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest2 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest3 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref1 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref2 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref3 = buf; - - dest_ptrs[0] = dest1; - dest_ptrs[1] = dest2; - dest_ptrs[2] = dest3; - - // Performance test - for (i = 0; i < TEST_SOURCES; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - memset(dest1, 0, TEST_LEN); - memset(dest2, 0, TEST_LEN); - memset(dest_ref1, 0, TEST_LEN); - memset(dest_ref2, 0, TEST_LEN); - - for (i = 0; i < TEST_SOURCES; i++) { - g1[i] = rand(); - g2[i] = rand(); - g3[i] = rand(); - } - - for (j = 0; j < TEST_SOURCES; j++) { - gf_vect_mul_init(g1[j], &g_tbls[j * 32]); - gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]); - } - - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, - dest_ref2); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, - dest_ref3); - -#ifdef DO_REF_PERF - perf_start(&start); - for (i = 0; i < TEST_LOOPS / 100; i++) { - for (j = 0; j < TEST_SOURCES; j++) { - gf_vect_mul_init(g1[j], &g_tbls[j * 32]); - gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]); - } - - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], - buffs, dest_ref2); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], - buffs, dest_ref3); - } - perf_stop(&stop); - printf("gf_3vect_dot_prod_base" TEST_TYPE_STR ": "); - perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 3) * i); -#endif - - FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); - - perf_start(&start); - for (i = 0; i < TEST_LOOPS; i++) { - for (j = 0; j < TEST_SOURCES; j++) { - gf_vect_mul_init(g1[j], &g_tbls[j * 32]); - gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]); - } - - FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); - } - perf_stop(&stop); - printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": "); - perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 3) * i); - - if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test1\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref1, 25); - printf("dprod_dut:"); - dump(dest1, 25); - return -1; - } - if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test2\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref2, 25); - printf("dprod_dut:"); - dump(dest2, 25); - return -1; - } - if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test3\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref3, 25); - printf("dprod_dut:"); - dump(dest3, 25); - return -1; - } - - printf("pass perf check\n"); - return 0; - -} diff --git a/src/main/native/compression/isa-l-master/erasure_code/gf_4vect_dot_prod_sse_perf.c b/src/main/native/compression/isa-l-master/erasure_code/gf_4vect_dot_prod_sse_perf.c deleted file mode 100644 index 1ea0fcaa..00000000 --- a/src/main/native/compression/isa-l-master/erasure_code/gf_4vect_dot_prod_sse_perf.c +++ /dev/null @@ -1,281 +0,0 @@ -/********************************************************************** - Copyright(c) 2011-2015 Intel Corporation All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ - -#include -#include -#include // for memset, memcmp -#include "erasure_code.h" -#include "test.h" - -#ifndef FUNCTION_UNDER_TEST -# define FUNCTION_UNDER_TEST gf_4vect_dot_prod_sse -#endif - -#define str(s) #s -#define xstr(s) str(s) - -//#define CACHED_TEST -#ifdef CACHED_TEST -// Cached test, loop many times over small dataset -# define TEST_SOURCES 10 -# define TEST_LEN 8*1024 -# define TEST_LOOPS 40000 -# define TEST_TYPE_STR "_warm" -#else -# ifndef TEST_CUSTOM -// Uncached test. Pull from large mem base. -# define TEST_SOURCES 10 -# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ -# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1)) -# define TEST_LOOPS 100 -# define TEST_TYPE_STR "_cold" -# else -# define TEST_TYPE_STR "_cus" -# ifndef TEST_LOOPS -# define TEST_LOOPS 1000 -# endif -# endif -#endif - -typedef unsigned char u8; - -void dump(unsigned char *buf, int len) -{ - int i; - for (i = 0; i < len;) { - printf(" %2x", 0xff & buf[i++]); - if (i % 32 == 0) - printf("\n"); - } - printf("\n"); -} - -void dump_matrix(unsigned char **s, int k, int m) -{ - int i, j; - for (i = 0; i < k; i++) { - for (j = 0; j < m; j++) { - printf(" %2x", s[i][j]); - } - printf("\n"); - } - printf("\n"); -} - -int main(int argc, char *argv[]) -{ - int i, j; - void *buf; - u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; - u8 g4[TEST_SOURCES], g_tbls[4 * TEST_SOURCES * 32], *buffs[TEST_SOURCES]; - u8 *dest1, *dest2, *dest3, *dest4, *dest_ref1, *dest_ref2, *dest_ref3; - u8 *dest_ref4, *dest_ptrs[4]; - struct perf start, stop; - - printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN); - - // Allocate the arrays - for (i = 0; i < TEST_SOURCES; i++) { - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - buffs[i] = buf; - } - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest1 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest2 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest3 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest4 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref1 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref2 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref3 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref4 = buf; - - dest_ptrs[0] = dest1; - dest_ptrs[1] = dest2; - dest_ptrs[2] = dest3; - dest_ptrs[3] = dest4; - - // Performance test - for (i = 0; i < TEST_SOURCES; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - memset(dest1, 0, TEST_LEN); - memset(dest2, 0, TEST_LEN); - memset(dest3, 0, TEST_LEN); - memset(dest4, 0, TEST_LEN); - memset(dest_ref1, 0, TEST_LEN); - memset(dest_ref2, 0, TEST_LEN); - memset(dest_ref3, 0, TEST_LEN); - memset(dest_ref4, 0, TEST_LEN); - - for (i = 0; i < TEST_SOURCES; i++) { - g1[i] = rand(); - g2[i] = rand(); - g3[i] = rand(); - g4[i] = rand(); - } - - for (j = 0; j < TEST_SOURCES; j++) { - gf_vect_mul_init(g1[j], &g_tbls[j * 32]); - gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]); - } - - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, - dest_ref2); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, - dest_ref3); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs, - dest_ref4); - -#ifdef DO_REF_PERF - perf_start(&start); - for (i = 0; i < TEST_LOOPS / 100; i++) { - for (j = 0; j < TEST_SOURCES; j++) { - gf_vect_mul_init(g1[j], &g_tbls[j * 32]); - gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]); - } - - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], - buffs, dest_ref2); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], - buffs, dest_ref3); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], - buffs, dest_ref4); - } - perf_stop(&stop); - printf("gf_4vect_dot_prod_base" TEST_TYPE_STR ": "); - perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 4) * i); -#endif - - FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); - - perf_start(&start); - for (i = 0; i < TEST_LOOPS; i++) { - for (j = 0; j < TEST_SOURCES; j++) { - gf_vect_mul_init(g1[j], &g_tbls[j * 32]); - gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]); - } - - FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); - } - perf_stop(&stop); - printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": "); - perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 4) * i); - - if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test1\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref1, 25); - printf("dprod_dut:"); - dump(dest1, 25); - return -1; - } - if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test2\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref2, 25); - printf("dprod_dut:"); - dump(dest2, 25); - return -1; - } - if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test3\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref3, 25); - printf("dprod_dut:"); - dump(dest3, 25); - return -1; - } - if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test3\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref4, 25); - printf("dprod_dut:"); - dump(dest4, 25); - return -1; - } - - printf("pass perf check\n"); - return 0; - -} diff --git a/src/main/native/compression/isa-l-master/erasure_code/gf_5vect_dot_prod_sse_perf.c b/src/main/native/compression/isa-l-master/erasure_code/gf_5vect_dot_prod_sse_perf.c deleted file mode 100644 index 7f53d9fa..00000000 --- a/src/main/native/compression/isa-l-master/erasure_code/gf_5vect_dot_prod_sse_perf.c +++ /dev/null @@ -1,319 +0,0 @@ -/********************************************************************** - Copyright(c) 2011-2015 Intel Corporation All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ - -#include -#include -#include // for memset, memcmp -#include "erasure_code.h" -#include "test.h" - -#ifndef FUNCTION_UNDER_TEST -# define FUNCTION_UNDER_TEST gf_5vect_dot_prod_sse -#endif - -#define str(s) #s -#define xstr(s) str(s) - -//#define CACHED_TEST -#ifdef CACHED_TEST -// Cached test, loop many times over small dataset -# define TEST_SOURCES 10 -# define TEST_LEN 8*1024 -# define TEST_LOOPS 40000 -# define TEST_TYPE_STR "_warm" -#else -# ifndef TEST_CUSTOM -// Uncached test. Pull from large mem base. -# define TEST_SOURCES 10 -# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ -# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1)) -# define TEST_LOOPS 100 -# define TEST_TYPE_STR "_cold" -# else -# define TEST_TYPE_STR "_cus" -# ifndef TEST_LOOPS -# define TEST_LOOPS 1000 -# endif -# endif -#endif - -typedef unsigned char u8; - -void dump(unsigned char *buf, int len) -{ - int i; - for (i = 0; i < len;) { - printf(" %2x", 0xff & buf[i++]); - if (i % 32 == 0) - printf("\n"); - } - printf("\n"); -} - -void dump_matrix(unsigned char **s, int k, int m) -{ - int i, j; - for (i = 0; i < k; i++) { - for (j = 0; j < m; j++) { - printf(" %2x", s[i][j]); - } - printf("\n"); - } - printf("\n"); -} - -int main(int argc, char *argv[]) -{ - int i, j; - void *buf; - u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; - u8 g4[TEST_SOURCES], g5[TEST_SOURCES], *g_tbls, *buffs[TEST_SOURCES]; - u8 *dest1, *dest2, *dest3, *dest4, *dest5, *dest_ref1, *dest_ref2; - u8 *dest_ref3, *dest_ref4, *dest_ref5, *dest_ptrs[5]; - struct perf start, stop; - - printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN); - - // Allocate the arrays - for (i = 0; i < TEST_SOURCES; i++) { - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - buffs[i] = buf; - } - - if (posix_memalign(&buf, 16, 6 * TEST_SOURCES * 32)) { - printf("alloc error: Fail"); - return -1; - } - g_tbls = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest1 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest2 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest3 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest4 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest5 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref1 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref2 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref3 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref4 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref5 = buf; - - dest_ptrs[0] = dest1; - dest_ptrs[1] = dest2; - dest_ptrs[2] = dest3; - dest_ptrs[3] = dest4; - dest_ptrs[4] = dest5; - - // Performance test - for (i = 0; i < TEST_SOURCES; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - memset(dest1, 0, TEST_LEN); - memset(dest2, 0, TEST_LEN); - memset(dest3, 0, TEST_LEN); - memset(dest4, 0, TEST_LEN); - memset(dest5, 0, TEST_LEN); - memset(dest_ref1, 0, TEST_LEN); - memset(dest_ref2, 0, TEST_LEN); - memset(dest_ref3, 0, TEST_LEN); - memset(dest_ref4, 0, TEST_LEN); - memset(dest_ref5, 0, TEST_LEN); - - for (i = 0; i < TEST_SOURCES; i++) { - g1[i] = rand(); - g2[i] = rand(); - g3[i] = rand(); - g4[i] = rand(); - g5[i] = rand(); - } - - for (j = 0; j < TEST_SOURCES; j++) { - gf_vect_mul_init(g1[j], &g_tbls[j * 32]); - gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]); - } - - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, - dest_ref2); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, - dest_ref3); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs, - dest_ref4); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs, - dest_ref5); - -#ifdef DO_REF_PERF - perf_start(&start); - for (i = 0; i < TEST_LOOPS / 20; i++) { - for (j = 0; j < TEST_SOURCES; j++) { - gf_vect_mul_init(g1[j], &g_tbls[j * 32]); - gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]); - } - - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], - buffs, dest_ref2); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], - buffs, dest_ref3); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], - buffs, dest_ref4); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], - buffs, dest_ref5); - } - perf_stop(&stop); - printf("gf_5vect_dot_prod_base" TEST_TYPE_STR ": "); - perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 5) * i); -#endif - - FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); - - perf_start(&start); - for (i = 0; i < TEST_LOOPS; i++) { - for (j = 0; j < TEST_SOURCES; j++) { - gf_vect_mul_init(g1[j], &g_tbls[j * 32]); - gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]); - } - - FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); - } - perf_stop(&stop); - printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": "); - perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 5) * i); - - if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test1\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref1, 25); - printf("dprod_dut:"); - dump(dest1, 25); - return -1; - } - if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test2\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref2, 25); - printf("dprod_dut:"); - dump(dest2, 25); - return -1; - } - if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test3\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref3, 25); - printf("dprod_dut:"); - dump(dest3, 25); - return -1; - } - if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test4\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref4, 25); - printf("dprod_dut:"); - dump(dest4, 25); - return -1; - } - if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test5\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref5, 25); - printf("dprod_dut:"); - dump(dest5, 25); - return -1; - } - - printf("pass perf check\n"); - return 0; - -} diff --git a/src/main/native/compression/isa-l-master/erasure_code/gf_6vect_dot_prod_sse_perf.c b/src/main/native/compression/isa-l-master/erasure_code/gf_6vect_dot_prod_sse_perf.c deleted file mode 100644 index eafd2cba..00000000 --- a/src/main/native/compression/isa-l-master/erasure_code/gf_6vect_dot_prod_sse_perf.c +++ /dev/null @@ -1,352 +0,0 @@ -/********************************************************************** - Copyright(c) 2011-2015 Intel Corporation All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ - -#include -#include -#include // for memset, memcmp -#include "erasure_code.h" -#include "test.h" - -#ifndef FUNCTION_UNDER_TEST -# define FUNCTION_UNDER_TEST gf_6vect_dot_prod_sse -#endif - -#define str(s) #s -#define xstr(s) str(s) - -//#define CACHED_TEST -#ifdef CACHED_TEST -// Cached test, loop many times over small dataset -# define TEST_SOURCES 10 -# define TEST_LEN 8*1024 -# define TEST_LOOPS 40000 -# define TEST_TYPE_STR "_warm" -#else -# ifndef TEST_CUSTOM -// Uncached test. Pull from large mem base. -# define TEST_SOURCES 10 -# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ -# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1)) -# define TEST_LOOPS 100 -# define TEST_TYPE_STR "_cold" -# else -# define TEST_TYPE_STR "_cus" -# ifndef TEST_LOOPS -# define TEST_LOOPS 1000 -# endif -# endif -#endif - -typedef unsigned char u8; - -void dump(unsigned char *buf, int len) -{ - int i; - for (i = 0; i < len;) { - printf(" %2x", 0xff & buf[i++]); - if (i % 32 == 0) - printf("\n"); - } - printf("\n"); -} - -void dump_matrix(unsigned char **s, int k, int m) -{ - int i, j; - for (i = 0; i < k; i++) { - for (j = 0; j < m; j++) { - printf(" %2x", s[i][j]); - } - printf("\n"); - } - printf("\n"); -} - -int main(int argc, char *argv[]) -{ - int i, j; - void *buf; - u8 g1[TEST_SOURCES], g2[TEST_SOURCES], g3[TEST_SOURCES]; - u8 g4[TEST_SOURCES], g5[TEST_SOURCES], g6[TEST_SOURCES], *g_tbls; - u8 *dest1, *dest2, *dest3, *dest4, *dest5, *dest6, *dest_ref1; - u8 *dest_ref2, *dest_ref3, *dest_ref4, *dest_ref5, *dest_ref6; - u8 *dest_ptrs[6], *buffs[TEST_SOURCES]; - struct perf start, stop; - - printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN); - - // Allocate the arrays - for (i = 0; i < TEST_SOURCES; i++) { - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - buffs[i] = buf; - } - - if (posix_memalign(&buf, 16, 6 * TEST_SOURCES * 32)) { - printf("alloc error: Fail"); - return -1; - } - g_tbls = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest1 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest2 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest3 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest4 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest5 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest6 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref1 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref2 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref3 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref4 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref5 = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref6 = buf; - - dest_ptrs[0] = dest1; - dest_ptrs[1] = dest2; - dest_ptrs[2] = dest3; - dest_ptrs[3] = dest4; - dest_ptrs[4] = dest5; - dest_ptrs[5] = dest6; - - // Performance test - for (i = 0; i < TEST_SOURCES; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - memset(dest1, 0, TEST_LEN); - memset(dest2, 0, TEST_LEN); - memset(dest3, 0, TEST_LEN); - memset(dest4, 0, TEST_LEN); - memset(dest5, 0, TEST_LEN); - memset(dest6, 0, TEST_LEN); - memset(dest_ref1, 0, TEST_LEN); - memset(dest_ref2, 0, TEST_LEN); - memset(dest_ref3, 0, TEST_LEN); - memset(dest_ref4, 0, TEST_LEN); - memset(dest_ref5, 0, TEST_LEN); - memset(dest_ref6, 0, TEST_LEN); - - for (i = 0; i < TEST_SOURCES; i++) { - g1[i] = rand(); - g2[i] = rand(); - g3[i] = rand(); - g4[i] = rand(); - g5[i] = rand(); - g6[i] = rand(); - } - - for (j = 0; j < TEST_SOURCES; j++) { - gf_vect_mul_init(g1[j], &g_tbls[j * 32]); - gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g6[j], &g_tbls[(160 * TEST_SOURCES) + (j * 32)]); - } - - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], buffs, - dest_ref2); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], buffs, - dest_ref3); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], buffs, - dest_ref4); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], buffs, - dest_ref5); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], buffs, - dest_ref6); - -#ifdef DO_REF_PERF - perf_start(&start); - for (i = 0; i < TEST_LOOPS / 20; i++) { - for (j = 0; j < TEST_SOURCES; j++) { - gf_vect_mul_init(g1[j], &g_tbls[j * 32]); - gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g6[j], &g_tbls[(160 * TEST_SOURCES) + (j * 32)]); - } - - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref1); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[32 * TEST_SOURCES], - buffs, dest_ref2); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[64 * TEST_SOURCES], - buffs, dest_ref3); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[96 * TEST_SOURCES], - buffs, dest_ref4); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[128 * TEST_SOURCES], - buffs, dest_ref5); - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[160 * TEST_SOURCES], - buffs, dest_ref6); - } - perf_stop(&stop); - printf("gf_6vect_dot_prod_base" TEST_TYPE_STR ": "); - perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 6) * i); -#endif - - FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); - - perf_start(&start); - for (i = 0; i < TEST_LOOPS; i++) { - for (j = 0; j < TEST_SOURCES; j++) { - gf_vect_mul_init(g1[j], &g_tbls[j * 32]); - gf_vect_mul_init(g2[j], &g_tbls[(32 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g3[j], &g_tbls[(64 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g4[j], &g_tbls[(96 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g5[j], &g_tbls[(128 * TEST_SOURCES) + (j * 32)]); - gf_vect_mul_init(g6[j], &g_tbls[(160 * TEST_SOURCES) + (j * 32)]); - } - - FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest_ptrs); - } - perf_stop(&stop); - printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": "); - perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 6) * i); - - if (0 != memcmp(dest_ref1, dest1, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test1\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref1, 25); - printf("dprod_dut:"); - dump(dest1, 25); - return -1; - } - if (0 != memcmp(dest_ref2, dest2, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test2\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref2, 25); - printf("dprod_dut:"); - dump(dest2, 25); - return -1; - } - if (0 != memcmp(dest_ref3, dest3, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test3\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref3, 25); - printf("dprod_dut:"); - dump(dest3, 25); - return -1; - } - if (0 != memcmp(dest_ref4, dest4, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test4\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref4, 25); - printf("dprod_dut:"); - dump(dest4, 25); - return -1; - } - if (0 != memcmp(dest_ref5, dest5, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test5\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref5, 25); - printf("dprod_dut:"); - dump(dest5, 25); - return -1; - } - if (0 != memcmp(dest_ref6, dest6, TEST_LEN)) { - printf("Fail perf " xstr(FUNCTION_UNDER_TEST) " test6\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref6, 25); - printf("dprod_dut:"); - dump(dest6, 25); - return -1; - } - - printf("pass perf check\n"); - return 0; - -} diff --git a/src/main/native/compression/isa-l-master/erasure_code/gf_vect_dot_prod_avx_perf.c b/src/main/native/compression/isa-l-master/erasure_code/gf_vect_dot_prod_avx_perf.c deleted file mode 100644 index be7dbb71..00000000 --- a/src/main/native/compression/isa-l-master/erasure_code/gf_vect_dot_prod_avx_perf.c +++ /dev/null @@ -1,184 +0,0 @@ -/********************************************************************** - Copyright(c) 2011-2015 Intel Corporation All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ - -#include -#include -#include // for memset, memcmp -#include "erasure_code.h" -#include "test.h" - -#ifndef FUNCTION_UNDER_TEST -# define FUNCTION_UNDER_TEST gf_vect_dot_prod_avx -#endif - -#define str(s) #s -#define xstr(s) str(s) - -//#define CACHED_TEST -#ifdef CACHED_TEST -// Cached test, loop many times over small dataset -# define TEST_SOURCES 10 -# define TEST_LEN 8*1024 -# define TEST_LOOPS 40000 -# define TEST_TYPE_STR "_warm" -#else -# ifndef TEST_CUSTOM -// Uncached test. Pull from large mem base. -# define TEST_SOURCES 10 -# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ -# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1)) -# define TEST_LOOPS 100 -# define TEST_TYPE_STR "_cold" -# else -# define TEST_TYPE_STR "_cus" -# ifndef TEST_LOOPS -# define TEST_LOOPS 1000 -# endif -# endif -#endif - -typedef unsigned char u8; - -void dump(unsigned char *buf, int len) -{ - int i; - for (i = 0; i < len;) { - printf(" %2x", 0xff & buf[i++]); - if (i % 32 == 0) - printf("\n"); - } - printf("\n"); -} - -void dump_matrix(unsigned char **s, int k, int m) -{ - int i, j; - for (i = 0; i < k; i++) { - for (j = 0; j < m; j++) { - printf(" %2x", s[i][j]); - } - printf("\n"); - } - printf("\n"); -} - -int main(int argc, char *argv[]) -{ - int i, j; - void *buf; - u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref; - u8 *temp_buff, *buffs[TEST_SOURCES]; - struct perf start, stop; - - printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN); - - // Allocate the arrays - for (i = 0; i < TEST_SOURCES; i++) { - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - buffs[i] = buf; - } - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - temp_buff = buf; - - // Performance test - for (i = 0; i < TEST_SOURCES; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - memset(dest, 0, TEST_LEN); - memset(temp_buff, 0, TEST_LEN); - memset(dest_ref, 0, TEST_LEN); - memset(g, 0, TEST_SOURCES); - - for (i = 0; i < TEST_SOURCES; i++) - g[i] = rand(); - - for (j = 0; j < TEST_SOURCES; j++) - gf_vect_mul_init(g[j], &g_tbls[j * 32]); - - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref); - -#ifdef DO_REF_PERF - perf_start(&start); - for (i = 0; i < TEST_LOOPS; i++) { - for (j = 0; j < TEST_SOURCES; j++) - gf_vect_mul_init(g[j], &g_tbls[j * 32]); - - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref); - } - perf_stop(&stop); - printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": "); - perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i); -#endif - - FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest); - - perf_start(&start); - for (i = 0; i < TEST_LOOPS; i++) { - for (j = 0; j < TEST_SOURCES; j++) - gf_vect_mul_init(g[j], &g_tbls[j * 32]); - - FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest); - } - perf_stop(&stop); - printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": "); - perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i); - - if (0 != memcmp(dest_ref, dest, TEST_LEN)) { - printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref, 25); - printf("dprod:"); - dump(dest, 25); - return -1; - } - - printf("pass perf check\n"); - return 0; -} diff --git a/src/main/native/compression/isa-l-master/erasure_code/gf_vect_dot_prod_avx_test.c b/src/main/native/compression/isa-l-master/erasure_code/gf_vect_dot_prod_avx_test.c deleted file mode 100644 index bcf461e2..00000000 --- a/src/main/native/compression/isa-l-master/erasure_code/gf_vect_dot_prod_avx_test.c +++ /dev/null @@ -1,525 +0,0 @@ -/********************************************************************** - Copyright(c) 2011-2015 Intel Corporation All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ - -#include -#include -#include // for memset, memcmp -#include "erasure_code.h" -#include "types.h" - -#ifndef FUNCTION_UNDER_TEST -# define FUNCTION_UNDER_TEST gf_vect_dot_prod_avx -#endif -#ifndef TEST_MIN_SIZE -# define TEST_MIN_SIZE 16 -#endif - -#define str(s) #s -#define xstr(s) str(s) - -#define TEST_LEN 8192 -#define TEST_SIZE (TEST_LEN/2) - -#ifndef TEST_SOURCES -# define TEST_SOURCES 16 -#endif -#ifndef RANDOMS -# define RANDOMS 20 -#endif - -#define MMAX TEST_SOURCES -#define KMAX TEST_SOURCES - -#ifdef EC_ALIGNED_ADDR -// Define power of 2 range to check ptr, len alignment -# define PTR_ALIGN_CHK_B 0 -# define LEN_ALIGN_CHK_B 0 // 0 for aligned only -#else -// Define power of 2 range to check ptr, len alignment -# define PTR_ALIGN_CHK_B 32 -# define LEN_ALIGN_CHK_B 32 // 0 for aligned only -#endif - -typedef unsigned char u8; - -void dump(unsigned char *buf, int len) -{ - int i; - for (i = 0; i < len;) { - printf(" %2x", 0xff & buf[i++]); - if (i % 32 == 0) - printf("\n"); - } - printf("\n"); -} - -void dump_matrix(unsigned char **s, int k, int m) -{ - int i, j; - for (i = 0; i < k; i++) { - for (j = 0; j < m; j++) { - printf(" %2x", s[i][j]); - } - printf("\n"); - } - printf("\n"); -} - -void dump_u8xu8(unsigned char *s, int k, int m) -{ - int i, j; - for (i = 0; i < k; i++) { - for (j = 0; j < m; j++) { - printf(" %2x", 0xff & s[j + (i * m)]); - } - printf("\n"); - } - printf("\n"); -} - -int main(int argc, char *argv[]) -{ - int i, j, rtest, srcs, m, k, nerrs, r, err; - void *buf; - u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES]; - u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES]; - u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX]; - u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES]; - - int align, size; - unsigned char *efence_buffs[TEST_SOURCES]; - unsigned int offset; - u8 *ubuffs[TEST_SOURCES]; - u8 *udest_ptr; - - printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); - - // Allocate the arrays - for (i = 0; i < TEST_SOURCES; i++) { - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - buffs[i] = buf; - } - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - temp_buff = buf; - - // Test of all zeros - for (i = 0; i < TEST_SOURCES; i++) - memset(buffs[i], 0, TEST_LEN); - - memset(dest, 0, TEST_LEN); - memset(temp_buff, 0, TEST_LEN); - memset(dest_ref, 0, TEST_LEN); - memset(g, 0, TEST_SOURCES); - - for (i = 0; i < TEST_SOURCES; i++) - gf_vect_mul_init(g[i], &g_tbls[i * 32]); - - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref); - - FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest); - - if (0 != memcmp(dest_ref, dest, TEST_LEN)) { - printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref, 25); - printf("dprod:"); - dump(dest, 25); - return -1; - } else - putchar('.'); - - // Rand data test - for (rtest = 0; rtest < RANDOMS; rtest++) { - for (i = 0; i < TEST_SOURCES; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - for (i = 0; i < TEST_SOURCES; i++) - g[i] = rand(); - - for (i = 0; i < TEST_SOURCES; i++) - gf_vect_mul_init(g[i], &g_tbls[i * 32]); - - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref); - FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest); - - if (0 != memcmp(dest_ref, dest, TEST_LEN)) { - printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref, 25); - printf("dprod:"); - dump(dest, 25); - return -1; - } - - putchar('.'); - } - - // Rand data test with varied parameters - for (rtest = 0; rtest < RANDOMS; rtest++) { - for (srcs = TEST_SOURCES; srcs > 0; srcs--) { - for (i = 0; i < srcs; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - for (i = 0; i < srcs; i++) - g[i] = rand(); - - for (i = 0; i < srcs; i++) - gf_vect_mul_init(g[i], &g_tbls[i * 32]); - - gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref); - FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest); - - if (0 != memcmp(dest_ref, dest, TEST_LEN)) { - printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n"); - dump_matrix(buffs, 5, srcs); - printf("dprod_base:"); - dump(dest_ref, 5); - printf("dprod:"); - dump(dest, 5); - return -1; - } - - putchar('.'); - } - } - - // Test erasure code using gf_vect_dot_prod - - // Pick a first test - m = 9; - k = 5; - if (m > MMAX || k > KMAX) - return -1; - - gf_gen_rs_matrix(a, m, k); - - // Make random data - for (i = 0; i < k; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - // Make parity vects - for (i = k; i < m; i++) { - for (j = 0; j < k; j++) - gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]); -#ifndef USEREF - FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]); -#else - gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]); -#endif - } - - // Random buffers in erasure - memset(src_in_err, 0, TEST_SOURCES); - for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) { - err = 1 & rand(); - src_in_err[i] = err; - if (err) - src_err_list[nerrs++] = i; - } - - // construct b by removing error rows - for (i = 0, r = 0; i < k; i++, r++) { - while (src_in_err[r]) { - r++; - continue; - } - for (j = 0; j < k; j++) - b[k * i + j] = a[k * r + j]; - } - - if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0) - printf("BAD MATRIX\n"); - - for (i = 0, r = 0; i < k; i++, r++) { - while (src_in_err[r]) { - r++; - continue; - } - recov[i] = buffs[r]; - } - - // Recover data - for (i = 0; i < nerrs; i++) { - for (j = 0; j < k; j++) - gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]); -#ifndef USEREF - FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff); -#else - gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff); -#endif - - if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) { - printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); - printf("recov %d:", src_err_list[i]); - dump(temp_buff, 25); - printf("orig :"); - dump(buffs[src_err_list[i]], 25); - return -1; - } - } - - // Do more random tests - - for (rtest = 0; rtest < RANDOMS; rtest++) { - while ((m = (rand() % MMAX)) < 2) ; - while ((k = (rand() % KMAX)) >= m || k < 1) ; - - if (m > MMAX || k > KMAX) - continue; - - gf_gen_rs_matrix(a, m, k); - - // Make random data - for (i = 0; i < k; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - // Make parity vects - for (i = k; i < m; i++) { - for (j = 0; j < k; j++) - gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]); -#ifndef USEREF - FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]); -#else - gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]); -#endif - } - - // Random errors - memset(src_in_err, 0, TEST_SOURCES); - for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) { - err = 1 & rand(); - src_in_err[i] = err; - if (err) - src_err_list[nerrs++] = i; - } - if (nerrs == 0) { // should have at least one error - while ((err = (rand() % KMAX)) >= k) ; - src_err_list[nerrs++] = err; - src_in_err[err] = 1; - } - // construct b by removing error rows - for (i = 0, r = 0; i < k; i++, r++) { - while (src_in_err[r]) { - r++; - continue; - } - for (j = 0; j < k; j++) - b[k * i + j] = a[k * r + j]; - } - - if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0) - printf("BAD MATRIX\n"); - - for (i = 0, r = 0; i < k; i++, r++) { - while (src_in_err[r]) { - r++; - continue; - } - recov[i] = buffs[r]; - } - - // Recover data - for (i = 0; i < nerrs; i++) { - for (j = 0; j < k; j++) - gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]); -#ifndef USEREF - FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff); -#else - gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff); -#endif - if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) { - printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); - printf(" - erase list = "); - for (i = 0; i < nerrs; i++) - printf(" %d", src_err_list[i]); - printf("\na:\n"); - dump_u8xu8((u8 *) a, m, k); - printf("inv b:\n"); - dump_u8xu8((u8 *) d, k, k); - printf("orig data:\n"); - dump_matrix(buffs, m, 25); - printf("orig :"); - dump(buffs[src_err_list[i]], 25); - printf("recov %d:", src_err_list[i]); - dump(temp_buff, 25); - return -1; - } - } - putchar('.'); - } - - // Run tests at end of buffer for Electric Fence - align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; - for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { - for (i = 0; i < TEST_SOURCES; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end - efence_buffs[i] = buffs[i] + TEST_LEN - size; - - for (i = 0; i < TEST_SOURCES; i++) - g[i] = rand(); - - for (i = 0; i < TEST_SOURCES; i++) - gf_vect_mul_init(g[i], &g_tbls[i * 32]); - - gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref); - FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest); - - if (0 != memcmp(dest_ref, dest, size)) { - printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n"); - dump_matrix(efence_buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref, align); - printf("dprod:"); - dump(dest, align); - return -1; - } - - putchar('.'); - } - - // Test rand ptr alignment if available - - for (rtest = 0; rtest < RANDOMS; rtest++) { - size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); - srcs = rand() % TEST_SOURCES; - if (srcs == 0) - continue; - - offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; - // Add random offsets - for (i = 0; i < srcs; i++) - ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); - - udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset)); - - memset(dest, 0, TEST_LEN); // zero pad to check write-over - - for (i = 0; i < srcs; i++) - for (j = 0; j < size; j++) - ubuffs[i][j] = rand(); - - for (i = 0; i < srcs; i++) - g[i] = rand(); - - for (i = 0; i < srcs; i++) - gf_vect_mul_init(g[i], &g_tbls[i * 32]); - - gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref); - - FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr); - - if (memcmp(dest_ref, udest_ptr, size)) { - printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n", - srcs); - dump_matrix(ubuffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref, 25); - printf("dprod:"); - dump(udest_ptr, 25); - return -1; - } - // Confirm that padding around dests is unchanged - memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff - offset = udest_ptr - dest; - - if (memcmp(dest, dest_ref, offset)) { - printf("Fail rand ualign pad start\n"); - return -1; - } - if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) { - printf("Fail rand ualign pad end\n"); - return -1; - } - - putchar('.'); - } - - // Test all size alignment - align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; - - for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { - srcs = TEST_SOURCES; - - for (i = 0; i < srcs; i++) - for (j = 0; j < size; j++) - buffs[i][j] = rand(); - - for (i = 0; i < srcs; i++) - g[i] = rand(); - - for (i = 0; i < srcs; i++) - gf_vect_mul_init(g[i], &g_tbls[i * 32]); - - gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref); - - FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest); - - if (memcmp(dest_ref, dest, size)) { - printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n", - size); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref, 25); - printf("dprod:"); - dump(dest, 25); - return -1; - } - } - - printf("done all: Pass\n"); - return 0; -} diff --git a/src/main/native/compression/isa-l-master/erasure_code/gf_vect_dot_prod_sse_perf.c b/src/main/native/compression/isa-l-master/erasure_code/gf_vect_dot_prod_sse_perf.c deleted file mode 100644 index 52716295..00000000 --- a/src/main/native/compression/isa-l-master/erasure_code/gf_vect_dot_prod_sse_perf.c +++ /dev/null @@ -1,184 +0,0 @@ -/********************************************************************** - Copyright(c) 2011-2015 Intel Corporation All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ - -#include -#include -#include // for memset, memcmp -#include "erasure_code.h" -#include "test.h" - -#ifndef FUNCTION_UNDER_TEST -# define FUNCTION_UNDER_TEST gf_vect_dot_prod_sse -#endif - -#define str(s) #s -#define xstr(s) str(s) - -//#define CACHED_TEST -#ifdef CACHED_TEST -// Cached test, loop many times over small dataset -# define TEST_SOURCES 10 -# define TEST_LEN 8*1024 -# define TEST_LOOPS 40000 -# define TEST_TYPE_STR "_warm" -#else -# ifndef TEST_CUSTOM -// Uncached test. Pull from large mem base. -# define TEST_SOURCES 10 -# define GT_L3_CACHE 32*1024*1024 /* some number > last level cache */ -# define TEST_LEN ((GT_L3_CACHE / TEST_SOURCES) & ~(64-1)) -# define TEST_LOOPS 100 -# define TEST_TYPE_STR "_cold" -# else -# define TEST_TYPE_STR "_cus" -# ifndef TEST_LOOPS -# define TEST_LOOPS 1000 -# endif -# endif -#endif - -typedef unsigned char u8; - -void dump(unsigned char *buf, int len) -{ - int i; - for (i = 0; i < len;) { - printf(" %2x", 0xff & buf[i++]); - if (i % 32 == 0) - printf("\n"); - } - printf("\n"); -} - -void dump_matrix(unsigned char **s, int k, int m) -{ - int i, j; - for (i = 0; i < k; i++) { - for (j = 0; j < m; j++) { - printf(" %2x", s[i][j]); - } - printf("\n"); - } - printf("\n"); -} - -int main(int argc, char *argv[]) -{ - int i, j; - void *buf; - u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], *dest, *dest_ref; - u8 *temp_buff, *buffs[TEST_SOURCES]; - struct perf start, stop; - - printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d\n", TEST_SOURCES, TEST_LEN); - - // Allocate the arrays - for (i = 0; i < TEST_SOURCES; i++) { - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - buffs[i] = buf; - } - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - temp_buff = buf; - - // Performance test - for (i = 0; i < TEST_SOURCES; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - memset(dest, 0, TEST_LEN); - memset(temp_buff, 0, TEST_LEN); - memset(dest_ref, 0, TEST_LEN); - memset(g, 0, TEST_SOURCES); - - for (i = 0; i < TEST_SOURCES; i++) - g[i] = rand(); - - for (j = 0; j < TEST_SOURCES; j++) - gf_vect_mul_init(g[j], &g_tbls[j * 32]); - - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref); - -#ifdef DO_REF_PERF - perf_start(&start); - for (i = 0; i < TEST_LOOPS; i++) { - for (j = 0; j < TEST_SOURCES; j++) - gf_vect_mul_init(g[j], &g_tbls[j * 32]); - - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref); - } - perf_stop(&stop); - printf("gf_vect_dot_prod_base" TEST_TYPE_STR ": "); - perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i); -#endif - - FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest); - - perf_start(&start); - for (i = 0; i < TEST_LOOPS; i++) { - for (j = 0; j < TEST_SOURCES; j++) - gf_vect_mul_init(g[j], &g_tbls[j * 32]); - - FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest); - } - perf_stop(&stop); - printf(xstr(FUNCTION_UNDER_TEST) TEST_TYPE_STR ": "); - perf_print(stop, start, (long long)TEST_LEN * (TEST_SOURCES + 1) * i); - - if (0 != memcmp(dest_ref, dest, TEST_LEN)) { - printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " test\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref, 25); - printf("dprod:"); - dump(dest, 25); - return -1; - } - - printf("pass perf check\n"); - return 0; -} diff --git a/src/main/native/compression/isa-l-master/erasure_code/gf_vect_dot_prod_sse_test.c b/src/main/native/compression/isa-l-master/erasure_code/gf_vect_dot_prod_sse_test.c deleted file mode 100644 index 4e8c3a6d..00000000 --- a/src/main/native/compression/isa-l-master/erasure_code/gf_vect_dot_prod_sse_test.c +++ /dev/null @@ -1,528 +0,0 @@ -/********************************************************************** - Copyright(c) 2011-2015 Intel Corporation All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ - -#include -#include -#include // for memset, memcmp -#include "erasure_code.h" -#include "types.h" - -#ifndef FUNCTION_UNDER_TEST -# define FUNCTION_UNDER_TEST gf_vect_dot_prod_sse -#endif -#ifndef TEST_MIN_SIZE -# define TEST_MIN_SIZE 16 -#endif - -#define str(s) #s -#define xstr(s) str(s) - -#define TEST_LEN 8192 -#define TEST_SIZE (TEST_LEN/2) - -#ifndef TEST_SOURCES -# define TEST_SOURCES 16 -#endif -#ifndef RANDOMS -# define RANDOMS 20 -#endif - -#define MMAX TEST_SOURCES -#define KMAX TEST_SOURCES - -#ifdef EC_ALIGNED_ADDR -// Define power of 2 range to check ptr, len alignment -# define PTR_ALIGN_CHK_B 0 -# define LEN_ALIGN_CHK_B 0 // 0 for aligned only -#else -// Define power of 2 range to check ptr, len alignment -# define PTR_ALIGN_CHK_B 32 -# define LEN_ALIGN_CHK_B 32 // 0 for aligned only -#endif - -extern void FUNCTION_UNDER_TEST(int len, int vlen, unsigned char *gftbls, - unsigned char **src, unsigned char *dest); - -typedef unsigned char u8; - -void dump(unsigned char *buf, int len) -{ - int i; - for (i = 0; i < len;) { - printf(" %2x", 0xff & buf[i++]); - if (i % 32 == 0) - printf("\n"); - } - printf("\n"); -} - -void dump_matrix(unsigned char **s, int k, int m) -{ - int i, j; - for (i = 0; i < k; i++) { - for (j = 0; j < m; j++) { - printf(" %2x", s[i][j]); - } - printf("\n"); - } - printf("\n"); -} - -void dump_u8xu8(unsigned char *s, int k, int m) -{ - int i, j; - for (i = 0; i < k; i++) { - for (j = 0; j < m; j++) { - printf(" %2x", 0xff & s[j + (i * m)]); - } - printf("\n"); - } - printf("\n"); -} - -int main(int argc, char *argv[]) -{ - int i, j, rtest, srcs, m, k, nerrs, r, err; - void *buf; - u8 g[TEST_SOURCES], g_tbls[TEST_SOURCES * 32], src_in_err[TEST_SOURCES]; - u8 *dest, *dest_ref, *temp_buff, *buffs[TEST_SOURCES]; - u8 a[MMAX * KMAX], b[MMAX * KMAX], d[MMAX * KMAX]; - u8 src_err_list[TEST_SOURCES], *recov[TEST_SOURCES]; - - int align, size; - unsigned char *efence_buffs[TEST_SOURCES]; - unsigned int offset; - u8 *ubuffs[TEST_SOURCES]; - u8 *udest_ptr; - - printf(xstr(FUNCTION_UNDER_TEST) ": %dx%d ", TEST_SOURCES, TEST_LEN); - - // Allocate the arrays - for (i = 0; i < TEST_SOURCES; i++) { - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - buffs[i] = buf; - } - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - dest_ref = buf; - - if (posix_memalign(&buf, 64, TEST_LEN)) { - printf("alloc error: Fail"); - return -1; - } - temp_buff = buf; - - // Test of all zeros - for (i = 0; i < TEST_SOURCES; i++) - memset(buffs[i], 0, TEST_LEN); - - memset(dest, 0, TEST_LEN); - memset(temp_buff, 0, TEST_LEN); - memset(dest_ref, 0, TEST_LEN); - memset(g, 0, TEST_SOURCES); - - for (i = 0; i < TEST_SOURCES; i++) - gf_vect_mul_init(g[i], &g_tbls[i * 32]); - - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref); - - FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest); - - if (0 != memcmp(dest_ref, dest, TEST_LEN)) { - printf("Fail zero " xstr(FUNCTION_UNDER_TEST) " \n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref, 25); - printf("dprod:"); - dump(dest, 25); - return -1; - } else - putchar('.'); - - // Rand data test - for (rtest = 0; rtest < RANDOMS; rtest++) { - for (i = 0; i < TEST_SOURCES; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - for (i = 0; i < TEST_SOURCES; i++) - g[i] = rand(); - - for (i = 0; i < TEST_SOURCES; i++) - gf_vect_mul_init(g[i], &g_tbls[i * 32]); - - gf_vect_dot_prod_base(TEST_LEN, TEST_SOURCES, &g_tbls[0], buffs, dest_ref); - FUNCTION_UNDER_TEST(TEST_LEN, TEST_SOURCES, g_tbls, buffs, dest); - - if (0 != memcmp(dest_ref, dest, TEST_LEN)) { - printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " 1\n"); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref, 25); - printf("dprod:"); - dump(dest, 25); - return -1; - } - - putchar('.'); - } - - // Rand data test with varied parameters - for (rtest = 0; rtest < RANDOMS; rtest++) { - for (srcs = TEST_SOURCES; srcs > 0; srcs--) { - for (i = 0; i < srcs; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - for (i = 0; i < srcs; i++) - g[i] = rand(); - - for (i = 0; i < srcs; i++) - gf_vect_mul_init(g[i], &g_tbls[i * 32]); - - gf_vect_dot_prod_base(TEST_LEN, srcs, &g_tbls[0], buffs, dest_ref); - FUNCTION_UNDER_TEST(TEST_LEN, srcs, g_tbls, buffs, dest); - - if (0 != memcmp(dest_ref, dest, TEST_LEN)) { - printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 2\n"); - dump_matrix(buffs, 5, srcs); - printf("dprod_base:"); - dump(dest_ref, 5); - printf("dprod:"); - dump(dest, 5); - return -1; - } - - putchar('.'); - } - } - - // Test erasure code using gf_vect_dot_prod - - // Pick a first test - m = 9; - k = 5; - if (m > MMAX || k > KMAX) - return -1; - - gf_gen_rs_matrix(a, m, k); - - // Make random data - for (i = 0; i < k; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - // Make parity vects - for (i = k; i < m; i++) { - for (j = 0; j < k; j++) - gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]); -#ifndef USEREF - FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]); -#else - gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]); -#endif - } - - // Random buffers in erasure - memset(src_in_err, 0, TEST_SOURCES); - for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) { - err = 1 & rand(); - src_in_err[i] = err; - if (err) - src_err_list[nerrs++] = i; - } - - // construct b by removing error rows - for (i = 0, r = 0; i < k; i++, r++) { - while (src_in_err[r]) { - r++; - continue; - } - for (j = 0; j < k; j++) - b[k * i + j] = a[k * r + j]; - } - - if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0) - printf("BAD MATRIX\n"); - - for (i = 0, r = 0; i < k; i++, r++) { - while (src_in_err[r]) { - r++; - continue; - } - recov[i] = buffs[r]; - } - - // Recover data - for (i = 0; i < nerrs; i++) { - for (j = 0; j < k; j++) - gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]); -#ifndef USEREF - FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff); -#else - gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff); -#endif - - if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) { - printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs); - printf("recov %d:", src_err_list[i]); - dump(temp_buff, 25); - printf("orig :"); - dump(buffs[src_err_list[i]], 25); - return -1; - } - } - - // Do more random tests - - for (rtest = 0; rtest < RANDOMS; rtest++) { - while ((m = (rand() % MMAX)) < 2) ; - while ((k = (rand() % KMAX)) >= m || k < 1) ; - - if (m > MMAX || k > KMAX) - continue; - - gf_gen_rs_matrix(a, m, k); - - // Make random data - for (i = 0; i < k; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - // Make parity vects - for (i = k; i < m; i++) { - for (j = 0; j < k; j++) - gf_vect_mul_init(a[k * i + j], &g_tbls[j * 32]); -#ifndef USEREF - FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, buffs, buffs[i]); -#else - gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], buffs, buffs[i]); -#endif - } - - // Random errors - memset(src_in_err, 0, TEST_SOURCES); - for (i = 0, nerrs = 0; i < k && nerrs < m - k; i++) { - err = 1 & rand(); - src_in_err[i] = err; - if (err) - src_err_list[nerrs++] = i; - } - if (nerrs == 0) { // should have at least one error - while ((err = (rand() % KMAX)) >= k) ; - src_err_list[nerrs++] = err; - src_in_err[err] = 1; - } - // construct b by removing error rows - for (i = 0, r = 0; i < k; i++, r++) { - while (src_in_err[r]) { - r++; - continue; - } - for (j = 0; j < k; j++) - b[k * i + j] = a[k * r + j]; - } - - if (gf_invert_matrix((u8 *) b, (u8 *) d, k) < 0) - printf("BAD MATRIX\n"); - - for (i = 0, r = 0; i < k; i++, r++) { - while (src_in_err[r]) { - r++; - continue; - } - recov[i] = buffs[r]; - } - - // Recover data - for (i = 0; i < nerrs; i++) { - for (j = 0; j < k; j++) - gf_vect_mul_init(d[k * src_err_list[i] + j], &g_tbls[j * 32]); -#ifndef USEREF - FUNCTION_UNDER_TEST(TEST_LEN, k, g_tbls, recov, temp_buff); -#else - gf_vect_dot_prod_base(TEST_LEN, k, &g_tbls[0], recov, temp_buff); -#endif - if (0 != memcmp(temp_buff, buffs[src_err_list[i]], TEST_LEN)) { - printf("Fail error recovery (%d, %d, %d) - ", m, k, nerrs); - printf(" - erase list = "); - for (i = 0; i < nerrs; i++) - printf(" %d", src_err_list[i]); - printf("\na:\n"); - dump_u8xu8((u8 *) a, m, k); - printf("inv b:\n"); - dump_u8xu8((u8 *) d, k, k); - printf("orig data:\n"); - dump_matrix(buffs, m, 25); - printf("orig :"); - dump(buffs[src_err_list[i]], 25); - printf("recov %d:", src_err_list[i]); - dump(temp_buff, 25); - return -1; - } - } - putchar('.'); - } - - // Run tests at end of buffer for Electric Fence - align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; - for (size = TEST_MIN_SIZE; size <= TEST_SIZE; size += align) { - for (i = 0; i < TEST_SOURCES; i++) - for (j = 0; j < TEST_LEN; j++) - buffs[i][j] = rand(); - - for (i = 0; i < TEST_SOURCES; i++) // Line up TEST_SIZE from end - efence_buffs[i] = buffs[i] + TEST_LEN - size; - - for (i = 0; i < TEST_SOURCES; i++) - g[i] = rand(); - - for (i = 0; i < TEST_SOURCES; i++) - gf_vect_mul_init(g[i], &g_tbls[i * 32]); - - gf_vect_dot_prod_base(size, TEST_SOURCES, &g_tbls[0], efence_buffs, dest_ref); - FUNCTION_UNDER_TEST(size, TEST_SOURCES, g_tbls, efence_buffs, dest); - - if (0 != memcmp(dest_ref, dest, size)) { - printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " test 3\n"); - dump_matrix(efence_buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref, align); - printf("dprod:"); - dump(dest, align); - return -1; - } - - putchar('.'); - } - - // Test rand ptr alignment if available - - for (rtest = 0; rtest < RANDOMS; rtest++) { - size = (TEST_LEN - PTR_ALIGN_CHK_B) & ~(TEST_MIN_SIZE - 1); - srcs = rand() % TEST_SOURCES; - if (srcs == 0) - continue; - - offset = (PTR_ALIGN_CHK_B != 0) ? 1 : PTR_ALIGN_CHK_B; - // Add random offsets - for (i = 0; i < srcs; i++) - ubuffs[i] = buffs[i] + (rand() & (PTR_ALIGN_CHK_B - offset)); - - udest_ptr = dest + (rand() & (PTR_ALIGN_CHK_B - offset)); - - memset(dest, 0, TEST_LEN); // zero pad to check write-over - - for (i = 0; i < srcs; i++) - for (j = 0; j < size; j++) - ubuffs[i][j] = rand(); - - for (i = 0; i < srcs; i++) - g[i] = rand(); - - for (i = 0; i < srcs; i++) - gf_vect_mul_init(g[i], &g_tbls[i * 32]); - - gf_vect_dot_prod_base(size, srcs, &g_tbls[0], ubuffs, dest_ref); - - FUNCTION_UNDER_TEST(size, srcs, g_tbls, ubuffs, udest_ptr); - - if (memcmp(dest_ref, udest_ptr, size)) { - printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign srcs=%d\n", - srcs); - dump_matrix(ubuffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref, 25); - printf("dprod:"); - dump(udest_ptr, 25); - return -1; - } - // Confirm that padding around dests is unchanged - memset(dest_ref, 0, PTR_ALIGN_CHK_B); // Make reference zero buff - offset = udest_ptr - dest; - - if (memcmp(dest, dest_ref, offset)) { - printf("Fail rand ualign pad start\n"); - return -1; - } - if (memcmp(dest + offset + size, dest_ref, PTR_ALIGN_CHK_B - offset)) { - printf("Fail rand ualign pad end\n"); - return -1; - } - - putchar('.'); - } - - // Test all size alignment - align = (LEN_ALIGN_CHK_B != 0) ? 1 : 16; - - for (size = TEST_LEN; size >= TEST_MIN_SIZE; size -= align) { - srcs = TEST_SOURCES; - - for (i = 0; i < srcs; i++) - for (j = 0; j < size; j++) - buffs[i][j] = rand(); - - for (i = 0; i < srcs; i++) - g[i] = rand(); - - for (i = 0; i < srcs; i++) - gf_vect_mul_init(g[i], &g_tbls[i * 32]); - - gf_vect_dot_prod_base(size, srcs, &g_tbls[0], buffs, dest_ref); - - FUNCTION_UNDER_TEST(size, srcs, g_tbls, buffs, dest); - - if (memcmp(dest_ref, dest, size)) { - printf("Fail rand " xstr(FUNCTION_UNDER_TEST) " ualign len=%d\n", - size); - dump_matrix(buffs, 5, TEST_SOURCES); - printf("dprod_base:"); - dump(dest_ref, 25); - printf("dprod:"); - dump(dest, 25); - return -1; - } - } - - printf("done all: Pass\n"); - return 0; -} diff --git a/src/main/native/compression/isa-l-master/erasure_code/gf_vect_mul_avx_test.c b/src/main/native/compression/isa-l-master/erasure_code/gf_vect_mul_avx_test.c deleted file mode 100644 index 5c742301..00000000 --- a/src/main/native/compression/isa-l-master/erasure_code/gf_vect_mul_avx_test.c +++ /dev/null @@ -1,143 +0,0 @@ -/********************************************************************** - Copyright(c) 2011-2015 Intel Corporation All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ - -#include -#include -#include // for memset -#include "erasure_code.h" - -#define TEST_SIZE 8192 -#define TEST_MEM TEST_SIZE -#define TEST_LOOPS 100000 -#define TEST_TYPE_STR "" - -typedef unsigned char u8; - -int main(int argc, char *argv[]) -{ - int i; - u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2; - int align, size; - unsigned char *efence_buff1; - unsigned char *efence_buff2; - unsigned char *efence_buff3; - - printf("gf_vect_mul_avx:\n"); - - gf_vect_mul_init(a, gf_const_tbl); - - buff1 = (u8 *) malloc(TEST_SIZE); - buff2 = (u8 *) malloc(TEST_SIZE); - buff3 = (u8 *) malloc(TEST_SIZE); - - if (NULL == buff1 || NULL == buff2 || NULL == buff3) { - printf("buffer alloc error\n"); - return -1; - } - // Fill with rand data - for (i = 0; i < TEST_SIZE; i++) - buff1[i] = rand(); - - gf_vect_mul_avx(TEST_SIZE, gf_const_tbl, buff1, buff2); - - for (i = 0; i < TEST_SIZE; i++) - if (gf_mul(a, buff1[i]) != buff2[i]) { - printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i], - gf_mul(2, buff1[i])); - return 1; - } - - gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3); - - // Check reference function - for (i = 0; i < TEST_SIZE; i++) - if (buff2[i] != buff3[i]) { - printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", - i, a, buff1[i], buff2[i], gf_mul(a, buff1[i])); - return 1; - } - - for (i = 0; i < TEST_SIZE; i++) - buff1[i] = rand(); - - // Check each possible constant - printf("Random tests "); - for (a = 0; a != 255; a++) { - gf_vect_mul_init(a, gf_const_tbl); - gf_vect_mul_avx(TEST_SIZE, gf_const_tbl, buff1, buff2); - - for (i = 0; i < TEST_SIZE; i++) - if (gf_mul(a, buff1[i]) != buff2[i]) { - printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", - i, a, buff1[i], buff2[i], gf_mul(2, buff1[i])); - return 1; - } - putchar('.'); - } - - // Run tests at end of buffer for Electric Fence - align = 32; - a = 2; - - gf_vect_mul_init(a, gf_const_tbl); - for (size = 0; size < TEST_SIZE; size += align) { - // Line up TEST_SIZE from end - efence_buff1 = buff1 + size; - efence_buff2 = buff2 + size; - efence_buff3 = buff3 + size; - - gf_vect_mul_avx(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2); - - for (i = 0; i < TEST_SIZE - size; i++) - if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) { - printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", - i, efence_buff1[i], efence_buff2[i], gf_mul(2, - efence_buff1 - [i])); - return 1; - } - - gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3); - - // Check reference function - for (i = 0; i < TEST_SIZE - size; i++) - if (efence_buff2[i] != efence_buff3[i]) { - printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", - i, a, efence_buff2[i], efence_buff3[i], gf_mul(2, - efence_buff1 - [i])); - return 1; - } - - putchar('.'); - } - - printf(" done: Pass\n"); - return 0; -} diff --git a/src/main/native/compression/isa-l-master/erasure_code/gf_vect_mul_sse_test.c b/src/main/native/compression/isa-l-master/erasure_code/gf_vect_mul_sse_test.c deleted file mode 100644 index c1e6e743..00000000 --- a/src/main/native/compression/isa-l-master/erasure_code/gf_vect_mul_sse_test.c +++ /dev/null @@ -1,160 +0,0 @@ -/********************************************************************** - Copyright(c) 2011-2015 Intel Corporation All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ - -#include -#include -#include "erasure_code.h" - -#define TEST_SIZE (128*1024) - -typedef unsigned char u8; - -int main(int argc, char *argv[]) -{ - int i; - u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2; - int tsize; - int align, size; - unsigned char *efence_buff1; - unsigned char *efence_buff2; - unsigned char *efence_buff3; - - printf("gf_vect_mul_sse_test: "); - - gf_vect_mul_init(a, gf_const_tbl); - - buff1 = (u8 *) malloc(TEST_SIZE); - buff2 = (u8 *) malloc(TEST_SIZE); - buff3 = (u8 *) malloc(TEST_SIZE); - - if (NULL == buff1 || NULL == buff2 || NULL == buff3) { - printf("buffer alloc error\n"); - return -1; - } - // Fill with rand data - for (i = 0; i < TEST_SIZE; i++) - buff1[i] = rand(); - - gf_vect_mul_sse(TEST_SIZE, gf_const_tbl, buff1, buff2); - - for (i = 0; i < TEST_SIZE; i++) { - if (gf_mul(a, buff1[i]) != buff2[i]) { - printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, - buff1[i], buff2[i], gf_mul(2, buff1[i])); - return -1; - } - } - - gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3); - - // Check reference function - for (i = 0; i < TEST_SIZE; i++) { - if (buff2[i] != buff3[i]) { - printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", - i, a, buff1[i], buff2[i], gf_mul(a, buff1[i])); - return -1; - } - } - - for (i = 0; i < TEST_SIZE; i++) - buff1[i] = rand(); - - // Check each possible constant - for (a = 0; a != 255; a++) { - gf_vect_mul_init(a, gf_const_tbl); - gf_vect_mul_sse(TEST_SIZE, gf_const_tbl, buff1, buff2); - - for (i = 0; i < TEST_SIZE; i++) - if (gf_mul(a, buff1[i]) != buff2[i]) { - printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", - i, a, buff1[i], buff2[i], gf_mul(2, buff1[i])); - return -1; - } - putchar('.'); - } - - // Check buffer len - for (tsize = TEST_SIZE; tsize > 0; tsize -= 32) { - a = rand(); - gf_vect_mul_init(a, gf_const_tbl); - gf_vect_mul_sse(tsize, gf_const_tbl, buff1, buff2); - - for (i = 0; i < tsize; i++) - if (gf_mul(a, buff1[i]) != buff2[i]) { - printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", - i, a, buff1[i], buff2[i], gf_mul(2, buff1[i])); - return -1; - } - if (0 == tsize % (32 * 8)) { - putchar('.'); - fflush(0); - } - } - - // Run tests at end of buffer for Electric Fence - align = 32; - a = 2; - - gf_vect_mul_init(a, gf_const_tbl); - for (size = 0; size < TEST_SIZE; size += align) { - // Line up TEST_SIZE from end - efence_buff1 = buff1 + size; - efence_buff2 = buff2 + size; - efence_buff3 = buff3 + size; - - gf_vect_mul_sse(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff2); - - for (i = 0; i < TEST_SIZE - size; i++) - if (gf_mul(a, efence_buff1[i]) != efence_buff2[i]) { - printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", - i, efence_buff1[i], efence_buff2[i], gf_mul(2, - efence_buff1 - [i])); - return 1; - } - - gf_vect_mul_base(TEST_SIZE - size, gf_const_tbl, efence_buff1, efence_buff3); - - // Check reference function - for (i = 0; i < TEST_SIZE - size; i++) - if (efence_buff2[i] != efence_buff3[i]) { - printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", - i, a, efence_buff2[i], efence_buff3[i], gf_mul(2, - efence_buff1 - [i])); - return 1; - } - - putchar('.'); - } - - printf(" done: Pass\n"); - fflush(0); - return 0; -} diff --git a/src/main/native/compression/isa-l-master/erasure_code/gf_vect_mul_test.c b/src/main/native/compression/isa-l-master/erasure_code/gf_vect_mul_test.c index df25600e..b1a40662 100644 --- a/src/main/native/compression/isa-l-master/erasure_code/gf_vect_mul_test.c +++ b/src/main/native/compression/isa-l-master/erasure_code/gf_vect_mul_test.c @@ -29,13 +29,9 @@ #include #include -#include // for memset #include "erasure_code.h" -#define TEST_SIZE 8192 -#define TEST_MEM TEST_SIZE -#define TEST_LOOPS 100000 -#define TEST_TYPE_STR "" +#define TEST_SIZE (128*1024) typedef unsigned char u8; @@ -43,12 +39,13 @@ int main(int argc, char *argv[]) { int i; u8 *buff1, *buff2, *buff3, gf_const_tbl[64], a = 2; + int tsize; int align, size; unsigned char *efence_buff1; unsigned char *efence_buff2; unsigned char *efence_buff3; - printf("gf_vect_mul_test:\n"); + printf("gf_vect_mul_test: "); gf_vect_mul_init(a, gf_const_tbl); @@ -66,42 +63,60 @@ int main(int argc, char *argv[]) gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2); - for (i = 0; i < TEST_SIZE; i++) + for (i = 0; i < TEST_SIZE; i++) { if (gf_mul(a, buff1[i]) != buff2[i]) { - printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, buff1[i], buff2[i], - gf_mul(2, buff1[i])); - return 1; + printf("fail at %d, 0x%x x 2 = 0x%x (0x%x)\n", i, + buff1[i], buff2[i], gf_mul(2, buff1[i])); + return -1; } + } gf_vect_mul_base(TEST_SIZE, gf_const_tbl, buff1, buff3); // Check reference function - for (i = 0; i < TEST_SIZE; i++) + for (i = 0; i < TEST_SIZE; i++) { if (buff2[i] != buff3[i]) { printf("fail at %d, 0x%x x 0x%d = 0x%x (0x%x)\n", i, a, buff1[i], buff2[i], gf_mul(a, buff1[i])); - return 1; + return -1; } + } for (i = 0; i < TEST_SIZE; i++) buff1[i] = rand(); // Check each possible constant - printf("Random tests "); for (a = 0; a != 255; a++) { gf_vect_mul_init(a, gf_const_tbl); gf_vect_mul(TEST_SIZE, gf_const_tbl, buff1, buff2); - for (i = 0; i < TEST_SIZE; i++) { + for (i = 0; i < TEST_SIZE; i++) if (gf_mul(a, buff1[i]) != buff2[i]) { printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", i, a, buff1[i], buff2[i], gf_mul(2, buff1[i])); - return 1; + return -1; } - } putchar('.'); } + // Check buffer len + for (tsize = TEST_SIZE; tsize > 0; tsize -= 32) { + a = rand(); + gf_vect_mul_init(a, gf_const_tbl); + gf_vect_mul(tsize, gf_const_tbl, buff1, buff2); + + for (i = 0; i < tsize; i++) + if (gf_mul(a, buff1[i]) != buff2[i]) { + printf("fail at %d, 0x%x x %d = 0x%x (0x%x)\n", + i, a, buff1[i], buff2[i], gf_mul(2, buff1[i])); + return -1; + } + if (0 == tsize % (32 * 8)) { + putchar('.'); + fflush(0); + } + } + // Run tests at end of buffer for Electric Fence align = 32; a = 2; @@ -138,5 +153,6 @@ int main(int argc, char *argv[]) } printf(" done: Pass\n"); + fflush(0); return 0; } diff --git a/src/main/native/compression/isa-l-master/igzip/Makefile.am b/src/main/native/compression/isa-l-master/igzip/Makefile.am index 85d0c05b..fc6bb5ac 100644 --- a/src/main/native/compression/isa-l-master/igzip/Makefile.am +++ b/src/main/native/compression/isa-l-master/igzip/Makefile.am @@ -33,19 +33,17 @@ lsrc += igzip/igzip.c \ igzip/igzip_icf_base.c \ igzip/crc32_gzip_base.c \ igzip/flatten_ll.c \ - igzip/encode_df.c + igzip/encode_df.c \ + igzip/igzip_icf_body.c lsrc_base_aliases += igzip/igzip_base_aliases.c igzip/proc_heap_base.c lsrc_x86_32 += igzip/igzip_base_aliases.c igzip/proc_heap_base.c -lsrc_x86_64 += \ - igzip/igzip_body_01.asm \ +lsrc_x86_64 += igzip/igzip_body_01.asm \ igzip/igzip_body_02.asm \ igzip/igzip_body_04.asm \ igzip/igzip_finish.asm \ - igzip/igzip_icf_body_01.asm \ - igzip/igzip_icf_body_02.asm \ - igzip/igzip_icf_body_04.asm \ + igzip/igzip_icf_body_h1_gr_bt.asm \ igzip/igzip_icf_finish.asm \ igzip/rfc1951_lookup.asm \ igzip/crc32_gzip.asm igzip/detect_repeated_char.asm \ @@ -60,7 +58,9 @@ lsrc_x86_64 += \ igzip/encode_df_04.asm \ igzip/encode_df_06.asm \ igzip/proc_heap.asm \ - igzip/igzip_deflate_hash.asm + igzip/igzip_deflate_hash.asm \ + igzip/igzip_gen_icf_map_lh1_06.asm \ + igzip/igzip_set_long_icf_fg_06.asm src_include += -I $(srcdir)/igzip extern_hdrs += include/igzip_lib.h @@ -70,16 +70,16 @@ pkginclude_HEADERS += include/types.h check_tests += igzip/igzip_rand_test unit_tests += igzip/checksum32_funcs_test -perf_tests += igzip/igzip_perf igzip/igzip_sync_flush_perf +perf_tests += igzip/igzip_perf other_tests += igzip/igzip_file_perf igzip/igzip_sync_flush_file_perf igzip/igzip_stateless_file_perf igzip/igzip_hist_perf other_tests += igzip/igzip_semi_dyn_file_perf +other_tests += igzip/igzip_build_hash_table_perf other_src += igzip/bitbuf2.asm \ igzip/data_struct2.asm \ igzip/inflate_data_structs.asm \ igzip/igzip_body.asm \ - igzip/igzip_icf_body.asm \ igzip/igzip_finish.asm \ igzip/lz0a_const.asm \ igzip/options.asm \ @@ -113,7 +113,6 @@ lsrc += igzip/huff_codes.c # Include tools and tests using the reference inflate other_tests += igzip/igzip_inflate_perf other_tests += igzip/igzip_inflate_test -other_tests += igzip/igzip_fuzz_inflate lsrc += igzip/igzip_inflate.c other_src += igzip/checksum_test_ref.h @@ -124,6 +123,3 @@ igzip_inflate_test: LDLIBS += -lz igzip_igzip_inflate_test_LDADD = libisal.la igzip_igzip_inflate_test_LDFLAGS = -lz igzip_igzip_hist_perf_LDADD = libisal.la -igzip_fuzz_inflate: LDLIBS += -lz -igzip_igzip_fuzz_inflate_LDADD = libisal.la -igzip_igzip_fuzz_inflate_LDFLAGS = -lz diff --git a/src/main/native/compression/isa-l-master/igzip/bitbuf2.h b/src/main/native/compression/isa-l-master/igzip/bitbuf2.h index b7889247..a0a0aeba 100644 --- a/src/main/native/compression/isa-l-master/igzip/bitbuf2.h +++ b/src/main/native/compression/isa-l-master/igzip/bitbuf2.h @@ -95,6 +95,19 @@ static inline void flush_bits(struct BitBuf2 *me) } +/* Can write up to 8 bytes to output buffer */ +static inline void flush(struct BitBuf2 *me) +{ + uint32_t bytes; + if (me->m_bit_count) { + _mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits); + bytes = (me->m_bit_count + 7) / 8; + me->m_out_buf += bytes; + } + me->m_bits = 0; + me->m_bit_count = 0; +} + static inline void check_space(struct BitBuf2 *me, uint32_t num_bits) { /* Checks if bitbuf has num_bits extra space and flushes the bytes in @@ -116,17 +129,11 @@ static inline void write_bits(struct BitBuf2 *me, uint64_t code, uint32_t count) flush_bits(me); } -/* Can write up to 8 bytes to output buffer */ -static inline void flush(struct BitBuf2 *me) -{ - uint32_t bytes; - if (me->m_bit_count) { - _mm_stream_si64x((int64_t *) me->m_out_buf, me->m_bits); - bytes = (me->m_bit_count + 7) / 8; - me->m_out_buf += bytes; - } - me->m_bits = 0; - me->m_bit_count = 0; +static inline void write_bits_flush(struct BitBuf2 *me, uint64_t code, uint32_t count) +{ /* Assumes there is space to fit code into m_bits. */ + me->m_bits |= code << me->m_bit_count; + me->m_bit_count += count; + flush(me); } #endif //BITBUF2_H diff --git a/src/main/native/compression/isa-l-master/igzip/data_struct2.asm b/src/main/native/compression/isa-l-master/igzip/data_struct2.asm index edac60da..38896cad 100644 --- a/src/main/native/compression/isa-l-master/igzip/data_struct2.asm +++ b/src/main/native/compression/isa-l-master/igzip/data_struct2.asm @@ -96,21 +96,58 @@ FIELD _lit_len_table, 513 * HUFF_CODE_SIZE, HUFF_CODE_SIZE ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +START_FIELDS ;; hash8k_buf + +;; name size align +FIELD _hash8k_table, 2 * IGZIP_HASH8K_HASH_SIZE, 2 + +%assign _hash_buf1_size _FIELD_OFFSET +%assign _hash_buf1_align _STRUCT_ALIGN + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +START_FIELDS ;; hash_map_buf + +;; name size align +FIELD _hash_table, 2 * IGZIP_HASH_MAP_HASH_SIZE, 2 +FIELD _matches_next, 8, 8 +FIELD _matches_end, 8, 8 +FIELD _matches, 4*4*1024, 4 +FIELD _overflow, 4*LA, 4 + +%assign _hash_map_buf_size _FIELD_OFFSET +%assign _hash_map_buf_align _STRUCT_ALIGN + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + %define DEF_MAX_HDR_SIZE 328 -START_FIELDS ;; level_2_buf +START_FIELDS ;; level_buf ;; name size align FIELD _encode_tables, _hufftables_icf_size, _hufftables_icf_align -FIELD _deflate_hdr_buf_used, 8, 8 -FIELD _deflate_hdr_buf, DEF_MAX_HDR_SIZE, 1 -FIELD _block_start_index, 4, 4 -FIELD _block_in_length, 4, 4 +FIELD _hist, _isal_mod_hist_size, _isal_mod_hist_align +FIELD _deflate_hdr_count, 4, 4 +FIELD _deflate_hdr_extra_bits,4, 4 +FIELD _deflate_hdr, DEF_MAX_HDR_SIZE, 1 FIELD _icf_buf_next, 8, 8 FIELD _icf_buf_avail_out, 8, 8 -FIELD _icf_buf_start, 0, 0 +FIELD _icf_buf_start, 8, 8 +FIELD _lvl_extra, _hash_map_buf_size, _hash_map_buf_align -%assign _level_2_buf_size _FIELD_OFFSET -%assign _level_2_buf_align _STRUCT_ALIGN +%assign _level_buf_base_size _FIELD_OFFSET +%assign _level_buf_base_align _STRUCT_ALIGN + +_hash8k_hash_table equ _lvl_extra + _hash8k_table +_hash_map_hash_table equ _lvl_extra + _hash_table +_hash_map_matches_next equ _lvl_extra + _matches_next +_hash_map_matches_end equ _lvl_extra + _matches_end +_hash_map_matches equ _lvl_extra + _matches +_hist_lit_len equ _hist+_ll_hist +_hist_dist equ _hist+_d_hist ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -119,24 +156,25 @@ FIELD _icf_buf_start, 0, 0 START_FIELDS ;; isal_zstate ;; name size align -FIELD _b_bytes_valid, 4, 4 -FIELD _b_bytes_processed, 4, 4 -FIELD _file_start, 8, 8 -FIELD _crc, 4, 4 +FIELD _total_in_start,4, 4 +FIELD _block_next, 4, 4 +FIELD _block_end, 4, 4 FIELD _bitbuf, _BitBuf2_size, _BitBuf2_align +FIELD _crc, 4, 4 FIELD _state, 4, 4 +FIELD _has_wrap_hdr, 1, 1 +FIELD _has_eob_hdr, 1, 1 +FIELD _has_eob, 1, 1 +FIELD _has_hist, 1, 1 +FIELD _has_level_buf_init, 2, 2 FIELD _count, 4, 4 FIELD _tmp_out_buff, 16, 1 FIELD _tmp_out_start, 4, 4 FIELD _tmp_out_end, 4, 4 -FIELD _has_wrap_hdr, 4, 4 -FIELD _has_eob, 4, 4 -FIELD _has_eob_hdr, 4, 4 -FIELD _has_hist, 4, 4 -FIELD _hist, _isal_mod_hist_size, _isal_mod_hist_align -FIELD _buffer, BSIZE, 32 -FIELD _head, IGZIP_HASH_SIZE*2, 16 - +FIELD _b_bytes_valid, 4, 4 +FIELD _b_bytes_processed, 4, 4 +FIELD _buffer, BSIZE, 1 +FIELD _head, IGZIP_LVL0_HASH_SIZE*2, 2 %assign _isal_zstate_size _FIELD_OFFSET %assign _isal_zstate_align _STRUCT_ALIGN @@ -146,8 +184,6 @@ _bitbuf_m_out_buf equ _bitbuf+_m_out_buf _bitbuf_m_out_end equ _bitbuf+_m_out_end _bitbuf_m_out_start equ _bitbuf+_m_out_start -_hist_lit_len equ _hist+_ll_hist -_hist_dist equ _hist+_d_hist ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -165,17 +201,19 @@ FIELD _hufftables, 8, 8 FIELD _level, 4, 4 FIELD _level_buf_size, 4, 4 FIELD _level_buf, 8, 8 -FIELD _end_of_stream, 4, 4 -FIELD _flush, 4, 4 +FIELD _end_of_stream, 2, 2 +FIELD _flush, 2, 2 FIELD _gzip_flag, 4, 4 FIELD _internal_state, _isal_zstate_size, _isal_zstate_align %assign _isal_zstream_size _FIELD_OFFSET %assign _isal_zstream_align _STRUCT_ALIGN +_internal_state_total_in_start equ _internal_state+_total_in_start +_internal_state_block_next equ _internal_state+_block_next +_internal_state_block_end equ _internal_state+_block_end _internal_state_b_bytes_valid equ _internal_state+_b_bytes_valid _internal_state_b_bytes_processed equ _internal_state+_b_bytes_processed -_internal_state_file_start equ _internal_state+_file_start _internal_state_crc equ _internal_state+_crc _internal_state_bitbuf equ _internal_state+_bitbuf _internal_state_state equ _internal_state+_state @@ -187,6 +225,7 @@ _internal_state_has_wrap_hdr equ _internal_state+_has_wrap_hdr _internal_state_has_eob equ _internal_state+_has_eob _internal_state_has_eob_hdr equ _internal_state+_has_eob_hdr _internal_state_has_hist equ _internal_state+_has_hist +_internal_state_has_level_buf_init equ _internal_state+_has_level_buf_init _internal_state_buffer equ _internal_state+_buffer _internal_state_head equ _internal_state+_head _internal_state_bitbuf_m_bits equ _internal_state+_bitbuf_m_bits @@ -194,8 +233,6 @@ _internal_state_bitbuf_m_bit_count equ _internal_state+_bitbuf_m_bit_count _internal_state_bitbuf_m_out_buf equ _internal_state+_bitbuf_m_out_buf _internal_state_bitbuf_m_out_end equ _internal_state+_bitbuf_m_out_end _internal_state_bitbuf_m_out_start equ _internal_state+_bitbuf_m_out_start -_internal_state_hist_lit_len equ _internal_state+_hist_lit_len -_internal_state_hist_dist equ _internal_state+_hist_dist ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/src/main/native/compression/isa-l-master/igzip/encode_df.h b/src/main/native/compression/isa-l-master/igzip/encode_df.h index 40b785e3..168e02c0 100644 --- a/src/main/native/compression/isa-l-master/igzip/encode_df.h +++ b/src/main/native/compression/isa-l-master/igzip/encode_df.h @@ -6,14 +6,21 @@ /* Deflate Intermediate Compression Format */ #define LIT_LEN_BIT_COUNT 10 +#define LIT_LEN_MASK ((1 << LIT_LEN_BIT_COUNT) - 1) #define DIST_LIT_BIT_COUNT 9 +#define DIST_LIT_MASK ((1 << DIST_LIT_BIT_COUNT) - 1) #define ICF_DIST_OFFSET LIT_LEN_BIT_COUNT #define NULL_DIST_SYM 30 +#define LEN_START 257 +#define LEN_OFFSET (LEN_START - 3) +#define LIT_START (NULL_DIST_SYM + 1) +#define ICF_CODE_LEN 32 + struct deflate_icf { uint32_t lit_len:LIT_LEN_BIT_COUNT; uint32_t lit_dist:DIST_LIT_BIT_COUNT; - uint32_t dist_extra:32 - DIST_LIT_BIT_COUNT - ICF_DIST_OFFSET; + uint32_t dist_extra:ICF_CODE_LEN - DIST_LIT_BIT_COUNT - ICF_DIST_OFFSET; }; struct deflate_icf *encode_deflate_icf(struct deflate_icf *next_in, struct deflate_icf *end_in, diff --git a/src/main/native/compression/isa-l-master/igzip/huff_codes.c b/src/main/native/compression/isa-l-master/igzip/huff_codes.c index 3f57e086..d39a7693 100644 --- a/src/main/native/compression/isa-l-master/igzip/huff_codes.c +++ b/src/main/native/compression/isa-l-master/igzip/huff_codes.c @@ -58,551 +58,551 @@ const uint32_t dist_code_extra_bits[] = { struct hufftables_icf static_hufftables = { .lit_len_table = { - {.code_and_extra = 0x00c,.length2 = 0x8}, - {.code_and_extra = 0x08c,.length2 = 0x8}, - {.code_and_extra = 0x04c,.length2 = 0x8}, - {.code_and_extra = 0x0cc,.length2 = 0x8}, - {.code_and_extra = 0x02c,.length2 = 0x8}, - {.code_and_extra = 0x0ac,.length2 = 0x8}, - {.code_and_extra = 0x06c,.length2 = 0x8}, - {.code_and_extra = 0x0ec,.length2 = 0x8}, - {.code_and_extra = 0x01c,.length2 = 0x8}, - {.code_and_extra = 0x09c,.length2 = 0x8}, - {.code_and_extra = 0x05c,.length2 = 0x8}, - {.code_and_extra = 0x0dc,.length2 = 0x8}, - {.code_and_extra = 0x03c,.length2 = 0x8}, - {.code_and_extra = 0x0bc,.length2 = 0x8}, - {.code_and_extra = 0x07c,.length2 = 0x8}, - {.code_and_extra = 0x0fc,.length2 = 0x8}, - {.code_and_extra = 0x002,.length2 = 0x8}, - {.code_and_extra = 0x082,.length2 = 0x8}, - {.code_and_extra = 0x042,.length2 = 0x8}, - {.code_and_extra = 0x0c2,.length2 = 0x8}, - {.code_and_extra = 0x022,.length2 = 0x8}, - {.code_and_extra = 0x0a2,.length2 = 0x8}, - {.code_and_extra = 0x062,.length2 = 0x8}, - {.code_and_extra = 0x0e2,.length2 = 0x8}, - {.code_and_extra = 0x012,.length2 = 0x8}, - {.code_and_extra = 0x092,.length2 = 0x8}, - {.code_and_extra = 0x052,.length2 = 0x8}, - {.code_and_extra = 0x0d2,.length2 = 0x8}, - {.code_and_extra = 0x032,.length2 = 0x8}, - {.code_and_extra = 0x0b2,.length2 = 0x8}, - {.code_and_extra = 0x072,.length2 = 0x8}, - {.code_and_extra = 0x0f2,.length2 = 0x8}, - {.code_and_extra = 0x00a,.length2 = 0x8}, - {.code_and_extra = 0x08a,.length2 = 0x8}, - {.code_and_extra = 0x04a,.length2 = 0x8}, - {.code_and_extra = 0x0ca,.length2 = 0x8}, - {.code_and_extra = 0x02a,.length2 = 0x8}, - {.code_and_extra = 0x0aa,.length2 = 0x8}, - {.code_and_extra = 0x06a,.length2 = 0x8}, - {.code_and_extra = 0x0ea,.length2 = 0x8}, - {.code_and_extra = 0x01a,.length2 = 0x8}, - {.code_and_extra = 0x09a,.length2 = 0x8}, - {.code_and_extra = 0x05a,.length2 = 0x8}, - {.code_and_extra = 0x0da,.length2 = 0x8}, - {.code_and_extra = 0x03a,.length2 = 0x8}, - {.code_and_extra = 0x0ba,.length2 = 0x8}, - {.code_and_extra = 0x07a,.length2 = 0x8}, - {.code_and_extra = 0x0fa,.length2 = 0x8}, - {.code_and_extra = 0x006,.length2 = 0x8}, - {.code_and_extra = 0x086,.length2 = 0x8}, - {.code_and_extra = 0x046,.length2 = 0x8}, - {.code_and_extra = 0x0c6,.length2 = 0x8}, - {.code_and_extra = 0x026,.length2 = 0x8}, - {.code_and_extra = 0x0a6,.length2 = 0x8}, - {.code_and_extra = 0x066,.length2 = 0x8}, - {.code_and_extra = 0x0e6,.length2 = 0x8}, - {.code_and_extra = 0x016,.length2 = 0x8}, - {.code_and_extra = 0x096,.length2 = 0x8}, - {.code_and_extra = 0x056,.length2 = 0x8}, - {.code_and_extra = 0x0d6,.length2 = 0x8}, - {.code_and_extra = 0x036,.length2 = 0x8}, - {.code_and_extra = 0x0b6,.length2 = 0x8}, - {.code_and_extra = 0x076,.length2 = 0x8}, - {.code_and_extra = 0x0f6,.length2 = 0x8}, - {.code_and_extra = 0x00e,.length2 = 0x8}, - {.code_and_extra = 0x08e,.length2 = 0x8}, - {.code_and_extra = 0x04e,.length2 = 0x8}, - {.code_and_extra = 0x0ce,.length2 = 0x8}, - {.code_and_extra = 0x02e,.length2 = 0x8}, - {.code_and_extra = 0x0ae,.length2 = 0x8}, - {.code_and_extra = 0x06e,.length2 = 0x8}, - {.code_and_extra = 0x0ee,.length2 = 0x8}, - {.code_and_extra = 0x01e,.length2 = 0x8}, - {.code_and_extra = 0x09e,.length2 = 0x8}, - {.code_and_extra = 0x05e,.length2 = 0x8}, - {.code_and_extra = 0x0de,.length2 = 0x8}, - {.code_and_extra = 0x03e,.length2 = 0x8}, - {.code_and_extra = 0x0be,.length2 = 0x8}, - {.code_and_extra = 0x07e,.length2 = 0x8}, - {.code_and_extra = 0x0fe,.length2 = 0x8}, - {.code_and_extra = 0x001,.length2 = 0x8}, - {.code_and_extra = 0x081,.length2 = 0x8}, - {.code_and_extra = 0x041,.length2 = 0x8}, - {.code_and_extra = 0x0c1,.length2 = 0x8}, - {.code_and_extra = 0x021,.length2 = 0x8}, - {.code_and_extra = 0x0a1,.length2 = 0x8}, - {.code_and_extra = 0x061,.length2 = 0x8}, - {.code_and_extra = 0x0e1,.length2 = 0x8}, - {.code_and_extra = 0x011,.length2 = 0x8}, - {.code_and_extra = 0x091,.length2 = 0x8}, - {.code_and_extra = 0x051,.length2 = 0x8}, - {.code_and_extra = 0x0d1,.length2 = 0x8}, - {.code_and_extra = 0x031,.length2 = 0x8}, - {.code_and_extra = 0x0b1,.length2 = 0x8}, - {.code_and_extra = 0x071,.length2 = 0x8}, - {.code_and_extra = 0x0f1,.length2 = 0x8}, - {.code_and_extra = 0x009,.length2 = 0x8}, - {.code_and_extra = 0x089,.length2 = 0x8}, - {.code_and_extra = 0x049,.length2 = 0x8}, - {.code_and_extra = 0x0c9,.length2 = 0x8}, - {.code_and_extra = 0x029,.length2 = 0x8}, - {.code_and_extra = 0x0a9,.length2 = 0x8}, - {.code_and_extra = 0x069,.length2 = 0x8}, - {.code_and_extra = 0x0e9,.length2 = 0x8}, - {.code_and_extra = 0x019,.length2 = 0x8}, - {.code_and_extra = 0x099,.length2 = 0x8}, - {.code_and_extra = 0x059,.length2 = 0x8}, - {.code_and_extra = 0x0d9,.length2 = 0x8}, - {.code_and_extra = 0x039,.length2 = 0x8}, - {.code_and_extra = 0x0b9,.length2 = 0x8}, - {.code_and_extra = 0x079,.length2 = 0x8}, - {.code_and_extra = 0x0f9,.length2 = 0x8}, - {.code_and_extra = 0x005,.length2 = 0x8}, - {.code_and_extra = 0x085,.length2 = 0x8}, - {.code_and_extra = 0x045,.length2 = 0x8}, - {.code_and_extra = 0x0c5,.length2 = 0x8}, - {.code_and_extra = 0x025,.length2 = 0x8}, - {.code_and_extra = 0x0a5,.length2 = 0x8}, - {.code_and_extra = 0x065,.length2 = 0x8}, - {.code_and_extra = 0x0e5,.length2 = 0x8}, - {.code_and_extra = 0x015,.length2 = 0x8}, - {.code_and_extra = 0x095,.length2 = 0x8}, - {.code_and_extra = 0x055,.length2 = 0x8}, - {.code_and_extra = 0x0d5,.length2 = 0x8}, - {.code_and_extra = 0x035,.length2 = 0x8}, - {.code_and_extra = 0x0b5,.length2 = 0x8}, - {.code_and_extra = 0x075,.length2 = 0x8}, - {.code_and_extra = 0x0f5,.length2 = 0x8}, - {.code_and_extra = 0x00d,.length2 = 0x8}, - {.code_and_extra = 0x08d,.length2 = 0x8}, - {.code_and_extra = 0x04d,.length2 = 0x8}, - {.code_and_extra = 0x0cd,.length2 = 0x8}, - {.code_and_extra = 0x02d,.length2 = 0x8}, - {.code_and_extra = 0x0ad,.length2 = 0x8}, - {.code_and_extra = 0x06d,.length2 = 0x8}, - {.code_and_extra = 0x0ed,.length2 = 0x8}, - {.code_and_extra = 0x01d,.length2 = 0x8}, - {.code_and_extra = 0x09d,.length2 = 0x8}, - {.code_and_extra = 0x05d,.length2 = 0x8}, - {.code_and_extra = 0x0dd,.length2 = 0x8}, - {.code_and_extra = 0x03d,.length2 = 0x8}, - {.code_and_extra = 0x0bd,.length2 = 0x8}, - {.code_and_extra = 0x07d,.length2 = 0x8}, - {.code_and_extra = 0x0fd,.length2 = 0x8}, - {.code_and_extra = 0x013,.length2 = 0x9}, - {.code_and_extra = 0x113,.length2 = 0x9}, - {.code_and_extra = 0x093,.length2 = 0x9}, - {.code_and_extra = 0x193,.length2 = 0x9}, - {.code_and_extra = 0x053,.length2 = 0x9}, - {.code_and_extra = 0x153,.length2 = 0x9}, - {.code_and_extra = 0x0d3,.length2 = 0x9}, - {.code_and_extra = 0x1d3,.length2 = 0x9}, - {.code_and_extra = 0x033,.length2 = 0x9}, - {.code_and_extra = 0x133,.length2 = 0x9}, - {.code_and_extra = 0x0b3,.length2 = 0x9}, - {.code_and_extra = 0x1b3,.length2 = 0x9}, - {.code_and_extra = 0x073,.length2 = 0x9}, - {.code_and_extra = 0x173,.length2 = 0x9}, - {.code_and_extra = 0x0f3,.length2 = 0x9}, - {.code_and_extra = 0x1f3,.length2 = 0x9}, - {.code_and_extra = 0x00b,.length2 = 0x9}, - {.code_and_extra = 0x10b,.length2 = 0x9}, - {.code_and_extra = 0x08b,.length2 = 0x9}, - {.code_and_extra = 0x18b,.length2 = 0x9}, - {.code_and_extra = 0x04b,.length2 = 0x9}, - {.code_and_extra = 0x14b,.length2 = 0x9}, - {.code_and_extra = 0x0cb,.length2 = 0x9}, - {.code_and_extra = 0x1cb,.length2 = 0x9}, - {.code_and_extra = 0x02b,.length2 = 0x9}, - {.code_and_extra = 0x12b,.length2 = 0x9}, - {.code_and_extra = 0x0ab,.length2 = 0x9}, - {.code_and_extra = 0x1ab,.length2 = 0x9}, - {.code_and_extra = 0x06b,.length2 = 0x9}, - {.code_and_extra = 0x16b,.length2 = 0x9}, - {.code_and_extra = 0x0eb,.length2 = 0x9}, - {.code_and_extra = 0x1eb,.length2 = 0x9}, - {.code_and_extra = 0x01b,.length2 = 0x9}, - {.code_and_extra = 0x11b,.length2 = 0x9}, - {.code_and_extra = 0x09b,.length2 = 0x9}, - {.code_and_extra = 0x19b,.length2 = 0x9}, - {.code_and_extra = 0x05b,.length2 = 0x9}, - {.code_and_extra = 0x15b,.length2 = 0x9}, - {.code_and_extra = 0x0db,.length2 = 0x9}, - {.code_and_extra = 0x1db,.length2 = 0x9}, - {.code_and_extra = 0x03b,.length2 = 0x9}, - {.code_and_extra = 0x13b,.length2 = 0x9}, - {.code_and_extra = 0x0bb,.length2 = 0x9}, - {.code_and_extra = 0x1bb,.length2 = 0x9}, - {.code_and_extra = 0x07b,.length2 = 0x9}, - {.code_and_extra = 0x17b,.length2 = 0x9}, - {.code_and_extra = 0x0fb,.length2 = 0x9}, - {.code_and_extra = 0x1fb,.length2 = 0x9}, - {.code_and_extra = 0x007,.length2 = 0x9}, - {.code_and_extra = 0x107,.length2 = 0x9}, - {.code_and_extra = 0x087,.length2 = 0x9}, - {.code_and_extra = 0x187,.length2 = 0x9}, - {.code_and_extra = 0x047,.length2 = 0x9}, - {.code_and_extra = 0x147,.length2 = 0x9}, - {.code_and_extra = 0x0c7,.length2 = 0x9}, - {.code_and_extra = 0x1c7,.length2 = 0x9}, - {.code_and_extra = 0x027,.length2 = 0x9}, - {.code_and_extra = 0x127,.length2 = 0x9}, - {.code_and_extra = 0x0a7,.length2 = 0x9}, - {.code_and_extra = 0x1a7,.length2 = 0x9}, - {.code_and_extra = 0x067,.length2 = 0x9}, - {.code_and_extra = 0x167,.length2 = 0x9}, - {.code_and_extra = 0x0e7,.length2 = 0x9}, - {.code_and_extra = 0x1e7,.length2 = 0x9}, - {.code_and_extra = 0x017,.length2 = 0x9}, - {.code_and_extra = 0x117,.length2 = 0x9}, - {.code_and_extra = 0x097,.length2 = 0x9}, - {.code_and_extra = 0x197,.length2 = 0x9}, - {.code_and_extra = 0x057,.length2 = 0x9}, - {.code_and_extra = 0x157,.length2 = 0x9}, - {.code_and_extra = 0x0d7,.length2 = 0x9}, - {.code_and_extra = 0x1d7,.length2 = 0x9}, - {.code_and_extra = 0x037,.length2 = 0x9}, - {.code_and_extra = 0x137,.length2 = 0x9}, - {.code_and_extra = 0x0b7,.length2 = 0x9}, - {.code_and_extra = 0x1b7,.length2 = 0x9}, - {.code_and_extra = 0x077,.length2 = 0x9}, - {.code_and_extra = 0x177,.length2 = 0x9}, - {.code_and_extra = 0x0f7,.length2 = 0x9}, - {.code_and_extra = 0x1f7,.length2 = 0x9}, - {.code_and_extra = 0x00f,.length2 = 0x9}, - {.code_and_extra = 0x10f,.length2 = 0x9}, - {.code_and_extra = 0x08f,.length2 = 0x9}, - {.code_and_extra = 0x18f,.length2 = 0x9}, - {.code_and_extra = 0x04f,.length2 = 0x9}, - {.code_and_extra = 0x14f,.length2 = 0x9}, - {.code_and_extra = 0x0cf,.length2 = 0x9}, - {.code_and_extra = 0x1cf,.length2 = 0x9}, - {.code_and_extra = 0x02f,.length2 = 0x9}, - {.code_and_extra = 0x12f,.length2 = 0x9}, - {.code_and_extra = 0x0af,.length2 = 0x9}, - {.code_and_extra = 0x1af,.length2 = 0x9}, - {.code_and_extra = 0x06f,.length2 = 0x9}, - {.code_and_extra = 0x16f,.length2 = 0x9}, - {.code_and_extra = 0x0ef,.length2 = 0x9}, - {.code_and_extra = 0x1ef,.length2 = 0x9}, - {.code_and_extra = 0x01f,.length2 = 0x9}, - {.code_and_extra = 0x11f,.length2 = 0x9}, - {.code_and_extra = 0x09f,.length2 = 0x9}, - {.code_and_extra = 0x19f,.length2 = 0x9}, - {.code_and_extra = 0x05f,.length2 = 0x9}, - {.code_and_extra = 0x15f,.length2 = 0x9}, - {.code_and_extra = 0x0df,.length2 = 0x9}, - {.code_and_extra = 0x1df,.length2 = 0x9}, - {.code_and_extra = 0x03f,.length2 = 0x9}, - {.code_and_extra = 0x13f,.length2 = 0x9}, - {.code_and_extra = 0x0bf,.length2 = 0x9}, - {.code_and_extra = 0x1bf,.length2 = 0x9}, - {.code_and_extra = 0x07f,.length2 = 0x9}, - {.code_and_extra = 0x17f,.length2 = 0x9}, - {.code_and_extra = 0x0ff,.length2 = 0x9}, - {.code_and_extra = 0x1ff,.length2 = 0x9}, - {.code_and_extra = 0x000,.length2 = 0x7}, - {.code_and_extra = 0x040,.length2 = 0x7}, - {.code_and_extra = 0x020,.length2 = 0x7}, - {.code_and_extra = 0x060,.length2 = 0x7}, - {.code_and_extra = 0x010,.length2 = 0x7}, - {.code_and_extra = 0x050,.length2 = 0x7}, - {.code_and_extra = 0x030,.length2 = 0x7}, - {.code_and_extra = 0x070,.length2 = 0x7}, - {.code_and_extra = 0x008,.length2 = 0x7}, - {.code_and_extra = 0x048,.length2 = 0x7}, - {.code_and_extra = 0x028,.length2 = 0x7}, - {.code_and_extra = 0x068,.length2 = 0x7}, - {.code_and_extra = 0x018,.length2 = 0x7}, - {.code_and_extra = 0x058,.length2 = 0x7}, - {.code_and_extra = 0x038,.length2 = 0x7}, - {.code_and_extra = 0x078,.length2 = 0x7}, - {.code_and_extra = 0x004,.length2 = 0x7}, - {.code_and_extra = 0x044,.length2 = 0x7}, - {.code_and_extra = 0x024,.length2 = 0x7}, - {.code_and_extra = 0x064,.length2 = 0x7}, - {.code_and_extra = 0x014,.length2 = 0x7}, - {.code_and_extra = 0x054,.length2 = 0x7}, - {.code_and_extra = 0x034,.length2 = 0x7}, - {.code_and_extra = 0x074,.length2 = 0x7}, - {.code_and_extra = 0x003,.length2 = 0x8}, - {.code_and_extra = 0x083,.length2 = 0x8}, - {.code_and_extra = 0x043,.length2 = 0x8}, - {.code_and_extra = 0x0c3,.length2 = 0x8}, - {.code_and_extra = 0x023,.length2 = 0x8}, - {.code_and_extra = 0x0a3,.length2 = 0x8}, - {.code_and_extra = 0x063,.length2 = 0x8}, - {.code_and_extra = 0x0e3,.length2 = 0x8}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}, - {.code_and_extra = 0x000,.length2 = 0x0}}, + {{{.code_and_extra = 0x00c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x08c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x04c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0cc,.length2 = 0x8}}}, + {{{.code_and_extra = 0x02c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ac,.length2 = 0x8}}}, + {{{.code_and_extra = 0x06c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ec,.length2 = 0x8}}}, + {{{.code_and_extra = 0x01c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x09c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x05c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0dc,.length2 = 0x8}}}, + {{{.code_and_extra = 0x03c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0bc,.length2 = 0x8}}}, + {{{.code_and_extra = 0x07c,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0fc,.length2 = 0x8}}}, + {{{.code_and_extra = 0x002,.length2 = 0x8}}}, + {{{.code_and_extra = 0x082,.length2 = 0x8}}}, + {{{.code_and_extra = 0x042,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0c2,.length2 = 0x8}}}, + {{{.code_and_extra = 0x022,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0a2,.length2 = 0x8}}}, + {{{.code_and_extra = 0x062,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0e2,.length2 = 0x8}}}, + {{{.code_and_extra = 0x012,.length2 = 0x8}}}, + {{{.code_and_extra = 0x092,.length2 = 0x8}}}, + {{{.code_and_extra = 0x052,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0d2,.length2 = 0x8}}}, + {{{.code_and_extra = 0x032,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0b2,.length2 = 0x8}}}, + {{{.code_and_extra = 0x072,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0f2,.length2 = 0x8}}}, + {{{.code_and_extra = 0x00a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x08a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x04a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ca,.length2 = 0x8}}}, + {{{.code_and_extra = 0x02a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0aa,.length2 = 0x8}}}, + {{{.code_and_extra = 0x06a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ea,.length2 = 0x8}}}, + {{{.code_and_extra = 0x01a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x09a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x05a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0da,.length2 = 0x8}}}, + {{{.code_and_extra = 0x03a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ba,.length2 = 0x8}}}, + {{{.code_and_extra = 0x07a,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0fa,.length2 = 0x8}}}, + {{{.code_and_extra = 0x006,.length2 = 0x8}}}, + {{{.code_and_extra = 0x086,.length2 = 0x8}}}, + {{{.code_and_extra = 0x046,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0c6,.length2 = 0x8}}}, + {{{.code_and_extra = 0x026,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0a6,.length2 = 0x8}}}, + {{{.code_and_extra = 0x066,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0e6,.length2 = 0x8}}}, + {{{.code_and_extra = 0x016,.length2 = 0x8}}}, + {{{.code_and_extra = 0x096,.length2 = 0x8}}}, + {{{.code_and_extra = 0x056,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0d6,.length2 = 0x8}}}, + {{{.code_and_extra = 0x036,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0b6,.length2 = 0x8}}}, + {{{.code_and_extra = 0x076,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0f6,.length2 = 0x8}}}, + {{{.code_and_extra = 0x00e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x08e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x04e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ce,.length2 = 0x8}}}, + {{{.code_and_extra = 0x02e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ae,.length2 = 0x8}}}, + {{{.code_and_extra = 0x06e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ee,.length2 = 0x8}}}, + {{{.code_and_extra = 0x01e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x09e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x05e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0de,.length2 = 0x8}}}, + {{{.code_and_extra = 0x03e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0be,.length2 = 0x8}}}, + {{{.code_and_extra = 0x07e,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0fe,.length2 = 0x8}}}, + {{{.code_and_extra = 0x001,.length2 = 0x8}}}, + {{{.code_and_extra = 0x081,.length2 = 0x8}}}, + {{{.code_and_extra = 0x041,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0c1,.length2 = 0x8}}}, + {{{.code_and_extra = 0x021,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0a1,.length2 = 0x8}}}, + {{{.code_and_extra = 0x061,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0e1,.length2 = 0x8}}}, + {{{.code_and_extra = 0x011,.length2 = 0x8}}}, + {{{.code_and_extra = 0x091,.length2 = 0x8}}}, + {{{.code_and_extra = 0x051,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0d1,.length2 = 0x8}}}, + {{{.code_and_extra = 0x031,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0b1,.length2 = 0x8}}}, + {{{.code_and_extra = 0x071,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0f1,.length2 = 0x8}}}, + {{{.code_and_extra = 0x009,.length2 = 0x8}}}, + {{{.code_and_extra = 0x089,.length2 = 0x8}}}, + {{{.code_and_extra = 0x049,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0c9,.length2 = 0x8}}}, + {{{.code_and_extra = 0x029,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0a9,.length2 = 0x8}}}, + {{{.code_and_extra = 0x069,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0e9,.length2 = 0x8}}}, + {{{.code_and_extra = 0x019,.length2 = 0x8}}}, + {{{.code_and_extra = 0x099,.length2 = 0x8}}}, + {{{.code_and_extra = 0x059,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0d9,.length2 = 0x8}}}, + {{{.code_and_extra = 0x039,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0b9,.length2 = 0x8}}}, + {{{.code_and_extra = 0x079,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0f9,.length2 = 0x8}}}, + {{{.code_and_extra = 0x005,.length2 = 0x8}}}, + {{{.code_and_extra = 0x085,.length2 = 0x8}}}, + {{{.code_and_extra = 0x045,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0c5,.length2 = 0x8}}}, + {{{.code_and_extra = 0x025,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0a5,.length2 = 0x8}}}, + {{{.code_and_extra = 0x065,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0e5,.length2 = 0x8}}}, + {{{.code_and_extra = 0x015,.length2 = 0x8}}}, + {{{.code_and_extra = 0x095,.length2 = 0x8}}}, + {{{.code_and_extra = 0x055,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0d5,.length2 = 0x8}}}, + {{{.code_and_extra = 0x035,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0b5,.length2 = 0x8}}}, + {{{.code_and_extra = 0x075,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0f5,.length2 = 0x8}}}, + {{{.code_and_extra = 0x00d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x08d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x04d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0cd,.length2 = 0x8}}}, + {{{.code_and_extra = 0x02d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ad,.length2 = 0x8}}}, + {{{.code_and_extra = 0x06d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0ed,.length2 = 0x8}}}, + {{{.code_and_extra = 0x01d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x09d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x05d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0dd,.length2 = 0x8}}}, + {{{.code_and_extra = 0x03d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0bd,.length2 = 0x8}}}, + {{{.code_and_extra = 0x07d,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0fd,.length2 = 0x8}}}, + {{{.code_and_extra = 0x013,.length2 = 0x9}}}, + {{{.code_and_extra = 0x113,.length2 = 0x9}}}, + {{{.code_and_extra = 0x093,.length2 = 0x9}}}, + {{{.code_and_extra = 0x193,.length2 = 0x9}}}, + {{{.code_and_extra = 0x053,.length2 = 0x9}}}, + {{{.code_and_extra = 0x153,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0d3,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1d3,.length2 = 0x9}}}, + {{{.code_and_extra = 0x033,.length2 = 0x9}}}, + {{{.code_and_extra = 0x133,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0b3,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1b3,.length2 = 0x9}}}, + {{{.code_and_extra = 0x073,.length2 = 0x9}}}, + {{{.code_and_extra = 0x173,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0f3,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1f3,.length2 = 0x9}}}, + {{{.code_and_extra = 0x00b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x10b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x08b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x18b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x04b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x14b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0cb,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1cb,.length2 = 0x9}}}, + {{{.code_and_extra = 0x02b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x12b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0ab,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1ab,.length2 = 0x9}}}, + {{{.code_and_extra = 0x06b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x16b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0eb,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1eb,.length2 = 0x9}}}, + {{{.code_and_extra = 0x01b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x11b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x09b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x19b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x05b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x15b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0db,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1db,.length2 = 0x9}}}, + {{{.code_and_extra = 0x03b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x13b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0bb,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1bb,.length2 = 0x9}}}, + {{{.code_and_extra = 0x07b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x17b,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0fb,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1fb,.length2 = 0x9}}}, + {{{.code_and_extra = 0x007,.length2 = 0x9}}}, + {{{.code_and_extra = 0x107,.length2 = 0x9}}}, + {{{.code_and_extra = 0x087,.length2 = 0x9}}}, + {{{.code_and_extra = 0x187,.length2 = 0x9}}}, + {{{.code_and_extra = 0x047,.length2 = 0x9}}}, + {{{.code_and_extra = 0x147,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0c7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1c7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x027,.length2 = 0x9}}}, + {{{.code_and_extra = 0x127,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0a7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1a7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x067,.length2 = 0x9}}}, + {{{.code_and_extra = 0x167,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0e7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1e7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x017,.length2 = 0x9}}}, + {{{.code_and_extra = 0x117,.length2 = 0x9}}}, + {{{.code_and_extra = 0x097,.length2 = 0x9}}}, + {{{.code_and_extra = 0x197,.length2 = 0x9}}}, + {{{.code_and_extra = 0x057,.length2 = 0x9}}}, + {{{.code_and_extra = 0x157,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0d7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1d7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x037,.length2 = 0x9}}}, + {{{.code_and_extra = 0x137,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0b7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1b7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x077,.length2 = 0x9}}}, + {{{.code_and_extra = 0x177,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0f7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1f7,.length2 = 0x9}}}, + {{{.code_and_extra = 0x00f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x10f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x08f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x18f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x04f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x14f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0cf,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1cf,.length2 = 0x9}}}, + {{{.code_and_extra = 0x02f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x12f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0af,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1af,.length2 = 0x9}}}, + {{{.code_and_extra = 0x06f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x16f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0ef,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1ef,.length2 = 0x9}}}, + {{{.code_and_extra = 0x01f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x11f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x09f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x19f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x05f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x15f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0df,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1df,.length2 = 0x9}}}, + {{{.code_and_extra = 0x03f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x13f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0bf,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1bf,.length2 = 0x9}}}, + {{{.code_and_extra = 0x07f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x17f,.length2 = 0x9}}}, + {{{.code_and_extra = 0x0ff,.length2 = 0x9}}}, + {{{.code_and_extra = 0x1ff,.length2 = 0x9}}}, + {{{.code_and_extra = 0x000,.length2 = 0x7}}}, + {{{.code_and_extra = 0x040,.length2 = 0x7}}}, + {{{.code_and_extra = 0x020,.length2 = 0x7}}}, + {{{.code_and_extra = 0x060,.length2 = 0x7}}}, + {{{.code_and_extra = 0x010,.length2 = 0x7}}}, + {{{.code_and_extra = 0x050,.length2 = 0x7}}}, + {{{.code_and_extra = 0x030,.length2 = 0x7}}}, + {{{.code_and_extra = 0x070,.length2 = 0x7}}}, + {{{.code_and_extra = 0x008,.length2 = 0x7}}}, + {{{.code_and_extra = 0x048,.length2 = 0x7}}}, + {{{.code_and_extra = 0x028,.length2 = 0x7}}}, + {{{.code_and_extra = 0x068,.length2 = 0x7}}}, + {{{.code_and_extra = 0x018,.length2 = 0x7}}}, + {{{.code_and_extra = 0x058,.length2 = 0x7}}}, + {{{.code_and_extra = 0x038,.length2 = 0x7}}}, + {{{.code_and_extra = 0x078,.length2 = 0x7}}}, + {{{.code_and_extra = 0x004,.length2 = 0x7}}}, + {{{.code_and_extra = 0x044,.length2 = 0x7}}}, + {{{.code_and_extra = 0x024,.length2 = 0x7}}}, + {{{.code_and_extra = 0x064,.length2 = 0x7}}}, + {{{.code_and_extra = 0x014,.length2 = 0x7}}}, + {{{.code_and_extra = 0x054,.length2 = 0x7}}}, + {{{.code_and_extra = 0x034,.length2 = 0x7}}}, + {{{.code_and_extra = 0x074,.length2 = 0x7}}}, + {{{.code_and_extra = 0x003,.length2 = 0x8}}}, + {{{.code_and_extra = 0x083,.length2 = 0x8}}}, + {{{.code_and_extra = 0x043,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0c3,.length2 = 0x8}}}, + {{{.code_and_extra = 0x023,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0a3,.length2 = 0x8}}}, + {{{.code_and_extra = 0x063,.length2 = 0x8}}}, + {{{.code_and_extra = 0x0e3,.length2 = 0x8}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}}, .dist_table = { - {.code_and_extra = 0x000,.length2 = 0x5}, - {.code_and_extra = 0x010,.length2 = 0x5}, - {.code_and_extra = 0x008,.length2 = 0x5}, - {.code_and_extra = 0x018,.length2 = 0x5}, - {.code_and_extra = 0x10004,.length2 = 0x5}, - {.code_and_extra = 0x10014,.length2 = 0x5}, - {.code_and_extra = 0x2000c,.length2 = 0x5}, - {.code_and_extra = 0x2001c,.length2 = 0x5}, - {.code_and_extra = 0x30002,.length2 = 0x5}, - {.code_and_extra = 0x30012,.length2 = 0x5}, - {.code_and_extra = 0x4000a,.length2 = 0x5}, - {.code_and_extra = 0x4001a,.length2 = 0x5}, - {.code_and_extra = 0x50006,.length2 = 0x5}, - {.code_and_extra = 0x50016,.length2 = 0x5}, - {.code_and_extra = 0x6000e,.length2 = 0x5}, - {.code_and_extra = 0x6001e,.length2 = 0x5}, - {.code_and_extra = 0x70001,.length2 = 0x5}, - {.code_and_extra = 0x70011,.length2 = 0x5}, - {.code_and_extra = 0x80009,.length2 = 0x5}, - {.code_and_extra = 0x80019,.length2 = 0x5}, - {.code_and_extra = 0x90005,.length2 = 0x5}, - {.code_and_extra = 0x90015,.length2 = 0x5}, - {.code_and_extra = 0xa000d,.length2 = 0x5}, - {.code_and_extra = 0xa001d,.length2 = 0x5}, - {.code_and_extra = 0xb0003,.length2 = 0x5}, - {.code_and_extra = 0xb0013,.length2 = 0x5}, - {.code_and_extra = 0xc000b,.length2 = 0x5}, - {.code_and_extra = 0xc001b,.length2 = 0x5}, - {.code_and_extra = 0xd0007,.length2 = 0x5}, - {.code_and_extra = 0xd0017,.length2 = 0x5}, - {.code_and_extra = 0x000,.length2 = 0x0}} + {{{.code_and_extra = 0x000,.length2 = 0x5}}}, + {{{.code_and_extra = 0x010,.length2 = 0x5}}}, + {{{.code_and_extra = 0x008,.length2 = 0x5}}}, + {{{.code_and_extra = 0x018,.length2 = 0x5}}}, + {{{.code_and_extra = 0x10004,.length2 = 0x5}}}, + {{{.code_and_extra = 0x10014,.length2 = 0x5}}}, + {{{.code_and_extra = 0x2000c,.length2 = 0x5}}}, + {{{.code_and_extra = 0x2001c,.length2 = 0x5}}}, + {{{.code_and_extra = 0x30002,.length2 = 0x5}}}, + {{{.code_and_extra = 0x30012,.length2 = 0x5}}}, + {{{.code_and_extra = 0x4000a,.length2 = 0x5}}}, + {{{.code_and_extra = 0x4001a,.length2 = 0x5}}}, + {{{.code_and_extra = 0x50006,.length2 = 0x5}}}, + {{{.code_and_extra = 0x50016,.length2 = 0x5}}}, + {{{.code_and_extra = 0x6000e,.length2 = 0x5}}}, + {{{.code_and_extra = 0x6001e,.length2 = 0x5}}}, + {{{.code_and_extra = 0x70001,.length2 = 0x5}}}, + {{{.code_and_extra = 0x70011,.length2 = 0x5}}}, + {{{.code_and_extra = 0x80009,.length2 = 0x5}}}, + {{{.code_and_extra = 0x80019,.length2 = 0x5}}}, + {{{.code_and_extra = 0x90005,.length2 = 0x5}}}, + {{{.code_and_extra = 0x90015,.length2 = 0x5}}}, + {{{.code_and_extra = 0xa000d,.length2 = 0x5}}}, + {{{.code_and_extra = 0xa001d,.length2 = 0x5}}}, + {{{.code_and_extra = 0xb0003,.length2 = 0x5}}}, + {{{.code_and_extra = 0xb0013,.length2 = 0x5}}}, + {{{.code_and_extra = 0xc000b,.length2 = 0x5}}}, + {{{.code_and_extra = 0xc001b,.length2 = 0x5}}}, + {{{.code_and_extra = 0xd0007,.length2 = 0x5}}}, + {{{.code_and_extra = 0xd0017,.length2 = 0x5}}}, + {{{.code_and_extra = 0x000,.length2 = 0x0}}}} }; struct slver { @@ -684,7 +684,7 @@ void isal_update_histogram_base(uint8_t * start_stream, int length, memset(last_seen, 0, sizeof(histogram->hash_table)); /* Initialize last_seen to be 0. */ for (current = start_stream; current < end_stream - 3; current++) { literal = *(uint32_t *) current; - hash = compute_hash(literal) & HASH_MASK; + hash = compute_hash(literal) & LVL0_HASH_MASK; seen = last_seen[hash]; last_seen[hash] = (current - start_stream) & 0xFFFF; dist = (current - start_stream - seen) & 0xFFFF; @@ -704,7 +704,7 @@ void isal_update_histogram_base(uint8_t * start_stream, int length, next_hash++; for (; next_hash < end; next_hash++) { literal = *(uint32_t *) next_hash; - hash = compute_hash(literal) & HASH_MASK; + hash = compute_hash(literal) & LVL0_HASH_MASK; last_seen[hash] = (next_hash - start_stream) & 0xFFFF; } @@ -718,7 +718,7 @@ void isal_update_histogram_base(uint8_t * start_stream, int length, lit_len_histogram[literal & 0xFF] += 1; } literal = literal >> 8; - hash = compute_hash(literal) & HASH_MASK; + hash = compute_hash(literal) & LVL0_HASH_MASK; seen = last_seen[hash]; last_seen[hash] = (current - start_stream) & 0xFFFF; dist = (current - start_stream - seen) & 0xFFFF; @@ -870,6 +870,44 @@ static inline uint32_t init_heap64(struct heap_tree *heap_space, uint64_t * hist return heap_size; } +static inline uint32_t init_heap64_semi_complete(struct heap_tree *heap_space, + uint64_t * histogram, uint64_t hist_size, + uint64_t complete_start) +{ + uint32_t heap_size, i; + + memset(heap_space, 0, sizeof(struct heap_tree)); + + heap_size = 0; + for (i = 0; i < complete_start; i++) { + if (histogram[i] != 0) + heap_space->heap[++heap_size] = ((histogram[i]) << FREQ_SHIFT) | i; + } + + for (; i < hist_size; i++) + heap_space->heap[++heap_size] = ((histogram[i]) << FREQ_SHIFT) | i; + + // make sure heap has at least two elements in it + if (heap_size < 2) { + if (heap_size == 0) { + heap_space->heap[1] = 1ULL << FREQ_SHIFT; + heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1; + heap_size = 2; + } else { + // heap size == 1 + if (histogram[0] == 0) + heap_space->heap[2] = 1ULL << FREQ_SHIFT; + else + heap_space->heap[2] = (1ULL << FREQ_SHIFT) | 1; + heap_size = 2; + } + } + + build_heap(heap_space->heap, heap_size); + + return heap_size; +} + static inline uint32_t init_heap64_complete(struct heap_tree *heap_space, uint64_t * histogram, uint64_t hist_size) { @@ -1444,7 +1482,9 @@ int isal_create_hufftables_subset(struct isal_hufftables *hufftables, memset(hufftables, 0, sizeof(struct isal_hufftables)); - heap_size = init_heap64(&heap_space, lit_len_histogram, LIT_LEN); + heap_size = + init_heap64_semi_complete(&heap_space, lit_len_histogram, LIT_LEN, + ISAL_DEF_LIT_SYMBOLS); gen_huff_code_lens(&heap_space, heap_size, code_len_count, (struct huff_code *)lit_huff_table, LIT_LEN, MAX_DEFLATE_CODE_LEN); max_lit_len_sym = set_huff_codes(lit_huff_table, LIT_LEN, code_len_count); diff --git a/src/main/native/compression/isa-l-master/igzip/huff_codes.h b/src/main/native/compression/isa-l-master/igzip/huff_codes.h index fe833d35..d773c6c3 100644 --- a/src/main/native/compression/isa-l-master/igzip/huff_codes.h +++ b/src/main/native/compression/isa-l-master/igzip/huff_codes.h @@ -76,7 +76,14 @@ #define INVALID_DIST_HUFFCODE 1 #define INVALID_HUFFCODE 1 -#define HASH_MASK (IGZIP_HASH_SIZE - 1) +#define HASH8K_HASH_MASK (IGZIP_HASH8K_HASH_SIZE - 1) +#define HASH_HIST_HASH_MASK (IGZIP_HASH_HIST_SIZE - 1) +#define HASH_MAP_HASH_MASK (IGZIP_HASH_MAP_HASH_SIZE - 1) + +#define LVL0_HASH_MASK (IGZIP_LVL0_HASH_SIZE - 1) +#define LVL1_HASH_MASK (IGZIP_LVL1_HASH_SIZE - 1) +#define LVL2_HASH_MASK (IGZIP_LVL2_HASH_SIZE - 1) +#define LVL3_HASH_MASK (IGZIP_LVL3_HASH_SIZE - 1) #define SHORTEST_MATCH 4 #define LENGTH_BITS 5 @@ -95,15 +102,16 @@ */ struct huff_code { union { + struct { + uint32_t code_and_extra:24; + uint32_t length2:8; + }; + struct { uint16_t code; uint8_t extra_bit_count; uint8_t length; }; - struct { - uint32_t code_and_extra:24; - uint32_t length2:8; - }; }; }; diff --git a/src/main/native/compression/isa-l-master/igzip/huffman.h b/src/main/native/compression/isa-l-master/igzip/huffman.h index 0553b815..313b6e18 100644 --- a/src/main/native/compression/isa-l-master/igzip/huffman.h +++ b/src/main/native/compression/isa-l-master/igzip/huffman.h @@ -55,9 +55,15 @@ static inline uint32_t tzcnt(uint64_t val) { uint32_t cnt; -#ifdef __x86_64__ +#ifdef __BMI__ + cnt = __tzcnt_u64(val); + cnt = cnt / 8; +#elifdef __x86_64__ - cnt = __builtin_ctzll(val) / 8;//__tzcnt_u64(val); + cnt = __bsfq(val); + if(val == 0) + cnt = 64; + cnt = cnt / 8; #else for(cnt = 8; val > 0; val <<= 8) @@ -178,12 +184,41 @@ static inline uint32_t compute_hash(uint32_t data) return _mm_crc32_u32(0, data); #else + uint64_t hash; /* Use multiplication to create a hash, 0xBDD06057 is a prime number */ - return ((uint64_t)data * 0xB2D06057) >> 16; + hash = data; + hash *= 0xB2D06057; + hash >>= 16; + hash *= 0xB2D06057; + hash >>= 16; + + return hash; #endif /* __SSE4_2__ */ } +#define PROD1 0xFFFFE84B +#define PROD2 0xFFFF97B1 +static inline uint32_t compute_hash_mad(uint32_t data) +{ + int16_t data_low; + int16_t data_high; + + data_low = data; ; + data_high = data >> 16; + data = PROD1 * data_low + PROD2 * data_high; + + data_low = data; + data_high = data >> 16; + data = PROD1 * data_low + PROD2 * data_high; + + return data; +} + +static inline uint32_t compute_long_hash(uint64_t data) { + + return compute_hash(data >> 32)^compute_hash(data); +} /** * @brief Returns how long str1 and str2 have the same symbols. diff --git a/src/main/native/compression/isa-l-master/igzip/igzip.c b/src/main/native/compression/isa-l-master/igzip/igzip.c index 01fcb0b7..eb2f059c 100644 --- a/src/main/native/compression/isa-l-master/igzip/igzip.c +++ b/src/main/native/compression/isa-l-master/igzip/igzip.c @@ -41,8 +41,6 @@ #define NON_EMPTY_BLOCK_SIZE 6 #define MAX_SYNC_FLUSH_SIZE NON_EMPTY_BLOCK_SIZE + MAX_WRITE_BITS_SIZE -#define MAX_TOKENS (16 * 1024) - #include "huffman.h" #include "bitbuf2.h" #include "igzip_lib.h" @@ -66,7 +64,10 @@ # define to_be32(x) _byteswap_ulong(x) #endif -extern void isal_deflate_hash_lvl0(struct isal_zstream *stream, uint8_t * dict, int dict_len); +extern void isal_deflate_hash_lvl0(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t); +extern void isal_deflate_hash_lvl1(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t); +extern void isal_deflate_hash_lvl2(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t); +extern void isal_deflate_hash_lvl3(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t); extern const uint8_t gzip_hdr[]; extern const uint32_t gzip_hdr_bytes; extern const uint32_t gzip_trl_bytes; @@ -76,7 +77,7 @@ extern const uint32_t zlib_trl_bytes; extern const struct isal_hufftables hufftables_default; extern const struct isal_hufftables hufftables_static; -static uint32_t write_stored_block(struct isal_zstream *stream, uint32_t block_size); +static uint32_t write_stored_block(struct isal_zstream *stream); static int write_stream_header_stateless(struct isal_zstream *stream); static void write_stream_header(struct isal_zstream *stream); @@ -93,16 +94,17 @@ void isal_deflate_body(struct isal_zstream *stream); void isal_deflate_finish(struct isal_zstream *stream); void isal_deflate_icf_body(struct isal_zstream *stream); -void isal_deflate_icf_finish(struct isal_zstream *stream); +void isal_deflate_icf_finish_lvl1(struct isal_zstream *stream); +void isal_deflate_icf_finish_lvl2(struct isal_zstream *stream); +void isal_deflate_icf_finish_lvl3(struct isal_zstream *stream); /*****************************************************************/ /* Forward declarations */ static inline void reset_match_history(struct isal_zstream *stream); -void write_header(struct isal_zstream *stream, uint8_t * deflate_hdr, - uint32_t deflate_hdr_count, uint32_t extra_bits_count, uint32_t next_state, - uint32_t toggle_end_of_stream); -void write_deflate_header(struct isal_zstream *stream); -void write_trailer(struct isal_zstream *stream); +static void write_header(struct isal_zstream *stream, uint8_t * deflate_hdr, + uint32_t deflate_hdr_count, uint32_t extra_bits_count, + uint32_t next_state, uint32_t toggle_end_of_stream); +static void write_trailer(struct isal_zstream *stream); struct slver { uint16_t snum; @@ -218,7 +220,7 @@ static void flush_write_buffer(struct isal_zstream *stream) static void flush_icf_block(struct isal_zstream *stream) { struct isal_zstate *state = &stream->internal_state; - struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; struct BitBuf2 *write_buf = &state->bitbuf; struct deflate_icf *icf_buf_encoded_next; @@ -244,33 +246,135 @@ static void flush_icf_block(struct isal_zstream *stream) } } +static int check_level_req(struct isal_zstream *stream) +{ + if (stream->level == 0) + return 0; + + if (stream->level_buf == NULL) + return ISAL_INVALID_LEVEL_BUF; + + switch (stream->level) { + case 3: + if (stream->level_buf_size < ISAL_DEF_LVL3_MIN) + return ISAL_INVALID_LEVEL; + break; + + case 2: + if (stream->level_buf_size < ISAL_DEF_LVL2_MIN) + return ISAL_INVALID_LEVEL; + break; + case 1: + if (stream->level_buf_size < ISAL_DEF_LVL1_MIN) + return ISAL_INVALID_LEVEL; + break; + default: + return ISAL_INVALID_LEVEL; + } + + return 0; +} + +static int init_hash8k_buf(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + state->has_level_buf_init = 1; + return sizeof(struct level_buf) - MAX_LVL_BUF_SIZE + sizeof(level_buf->hash8k); +} + +static int init_hash_hist_buf(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + state->has_level_buf_init = 1; + return sizeof(struct level_buf) - MAX_LVL_BUF_SIZE + sizeof(level_buf->hash_hist); +} + +static int init_hash_map_buf(struct isal_zstream *stream) +{ + struct isal_zstate *state = &stream->internal_state; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + if (!state->has_level_buf_init) { + level_buf->hash_map.matches_next = level_buf->hash_map.matches; + level_buf->hash_map.matches_end = level_buf->hash_map.matches; + } + state->has_level_buf_init = 1; + + return sizeof(struct level_buf) - MAX_LVL_BUF_SIZE + sizeof(level_buf->hash_map); + +} + +/* returns the size of the level specific buffer */ +static int init_lvlX_buf(struct isal_zstream *stream) +{ + switch (stream->level) { + case 3: + return init_hash_map_buf(stream); + case 2: + return init_hash_hist_buf(stream); + default: + return init_hash8k_buf(stream); + } + +} + static void init_new_icf_block(struct isal_zstream *stream) { struct isal_zstate *state = &stream->internal_state; - struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf; - - if (stream->level_buf_size >= - sizeof(struct level_2_buf) + 100 * sizeof(struct deflate_icf)) { - level_buf->block_start_index = stream->total_in; - level_buf->icf_buf_next = level_buf->icf_buf_start; - level_buf->icf_buf_avail_out = - stream->level_buf_size - sizeof(struct level_2_buf) - - sizeof(struct deflate_icf); - memset(&state->hist, 0, sizeof(struct isal_mod_hist)); - state->state = ZSTATE_BODY; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + int level_struct_size; + + level_struct_size = init_lvlX_buf(stream); + + state->block_next = state->block_end; + level_buf->icf_buf_start = + (struct deflate_icf *)(stream->level_buf + level_struct_size); + + level_buf->icf_buf_next = level_buf->icf_buf_start; + level_buf->icf_buf_avail_out = + stream->level_buf_size - level_struct_size - sizeof(struct deflate_icf); + + memset(&level_buf->hist, 0, sizeof(struct isal_mod_hist)); + state->state = ZSTATE_BODY; +} + +static int are_buffers_empty_hashX(struct isal_zstream *stream) +{ + return !stream->avail_in; +} + +static int are_buffers_empty_hash_map(struct isal_zstream *stream) +{ + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + + return (!stream->avail_in + && level_buf->hash_map.matches_next >= level_buf->hash_map.matches_end); +} + +static int are_buffers_empty(struct isal_zstream *stream) +{ + + switch (stream->level) { + case 3: + return are_buffers_empty_hash_map(stream); + case 2: + return are_buffers_empty_hashX(stream); + default: + return are_buffers_empty_hashX(stream); } } -static void create_icf_block_hdr(struct isal_zstream *stream, uint8_t * start_in) +static void create_icf_block_hdr(struct isal_zstream *stream) { struct isal_zstate *state = &stream->internal_state; - struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; struct BitBuf2 *write_buf = &state->bitbuf; struct BitBuf2 write_buf_tmp; uint32_t out_size = stream->avail_out; uint8_t *end_out = stream->next_out + out_size; uint64_t bit_count; - uint64_t block_in_size = stream->total_in - level_buf->block_start_index; + uint64_t block_in_size = state->block_end - state->block_next; uint64_t block_size; int buffer_header = 0; @@ -281,14 +385,13 @@ static void create_icf_block_hdr(struct isal_zstream *stream, uint8_t * start_in block_size = block_size ? block_size : TYPE0_BLK_HDR_LEN; /* Write EOB in icf_buf */ - state->hist.ll_hist[256] = 1; + level_buf->hist.ll_hist[256] = 1; level_buf->icf_buf_next->lit_len = 0x100; level_buf->icf_buf_next->lit_dist = NULL_DIST_SYM; level_buf->icf_buf_next->dist_extra = 0; level_buf->icf_buf_next++; - level_buf->block_in_length = block_in_size; - state->has_eob_hdr = (stream->end_of_stream && !stream->avail_in) ? 1 : 0; + state->has_eob_hdr = (stream->end_of_stream && are_buffers_empty(stream)) ? 1 : 0; if (end_out - stream->next_out >= ISAL_DEF_MAX_HDR_SIZE) { /* Assumes ISAL_DEF_MAX_HDR_SIZE is large enough to contain a @@ -305,13 +408,13 @@ static void create_icf_block_hdr(struct isal_zstream *stream, uint8_t * start_in } bit_count = create_hufftables_icf(write_buf, &level_buf->encode_tables, - &state->hist, state->has_eob_hdr); + &level_buf->hist, state->has_eob_hdr); - if (bit_count / 8 >= block_size && stream->next_in - block_in_size >= start_in) { + if (bit_count / 8 >= block_size && state->block_next >= state->total_in_start && + block_size <= + stream->avail_out + sizeof(state->buffer) - (stream->total_in - state->block_end) - + ISAL_LOOK_AHEAD) { /* Reset stream for writing out a type0 block */ - stream->next_in -= block_in_size; - stream->avail_in += block_in_size; - stream->total_in -= block_in_size; state->has_eob_hdr = 0; memcpy(write_buf, &write_buf_tmp, sizeof(struct BitBuf2)); state->state = ZSTATE_TYPE0_HDR; @@ -368,11 +471,25 @@ static void isal_deflate_pass(struct isal_zstream *stream) write_trailer(stream); } +static void isal_deflate_icf_finish(struct isal_zstream *stream) +{ + switch (stream->level) { + case 3: + isal_deflate_icf_finish_lvl3(stream); + break; + case 2: + isal_deflate_icf_finish_lvl2(stream); + break; + default: + isal_deflate_icf_finish_lvl1(stream); + } +} + static void isal_deflate_icf_pass(struct isal_zstream *stream) { uint8_t *start_in = stream->next_in; struct isal_zstate *state = &stream->internal_state; - struct level_2_buf *level_buf = (struct level_2_buf *)stream->level_buf; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; do { if (state->state == ZSTATE_NEW_HDR) @@ -385,7 +502,7 @@ static void isal_deflate_icf_pass(struct isal_zstream *stream) isal_deflate_icf_finish(stream); if (state->state == ZSTATE_CREATE_HDR) - create_icf_block_hdr(stream, start_in); + create_icf_block_hdr(stream); if (state->state == ZSTATE_HDR) /* Note that the header may be prepended by the @@ -402,8 +519,7 @@ static void isal_deflate_icf_pass(struct isal_zstream *stream) if (state->state == ZSTATE_TYPE0_HDR || state->state == ZSTATE_TYPE0_BODY) { if (stream->gzip_flag == IGZIP_GZIP || stream->gzip_flag == IGZIP_ZLIB) write_stream_header(stream); - level_buf->block_in_length = - write_stored_block(stream, level_buf->block_in_length); + write_stored_block(stream); } } @@ -574,6 +690,7 @@ static void write_constant_compressed_stateless(struct isal_zstream *stream, stream->next_in += repeated_length; stream->avail_in -= repeated_length; stream->total_in += repeated_length; + state->block_end += repeated_length; bytes = buffer_used(&state->bitbuf); stream->next_out = buffer_ptr(&state->bitbuf); @@ -631,25 +748,16 @@ static int isal_deflate_int_stateless(struct isal_zstream *stream) reset_match_history(stream); } - state->file_start = stream->next_in - stream->total_in; isal_deflate_pass(stream); - } else if (stream->level == 1) { - if (stream->level_buf == NULL || stream->level_buf_size < ISAL_DEF_LVL1_MIN) { - /* Default to internal buffer if invalid size is supplied */ - stream->level_buf = state->buffer; - stream->level_buf_size = sizeof(state->buffer); - } - + } else if (stream->level <= ISAL_DEF_MAX_LEVEL) { if (state->state == ZSTATE_NEW_HDR || state->state == ZSTATE_HDR) reset_match_history(stream); state->count = 0; - state->file_start = stream->next_in - stream->total_in; isal_deflate_icf_pass(stream); - } else - return ISAL_INVALID_LEVEL; + } if (state->state == ZSTATE_END || (state->state == ZSTATE_NEW_HDR && stream->flush == FULL_FLUSH)) @@ -658,11 +766,13 @@ static int isal_deflate_int_stateless(struct isal_zstream *stream) return STATELESS_OVERFLOW; } -static void write_type0_header(struct isal_zstream *stream, uint32_t block_in_size) +static void write_type0_header(struct isal_zstream *stream) { + struct isal_zstate *state = &stream->internal_state; uint64_t stored_blk_hdr; uint32_t copy_size; - uint32_t memcpy_len; + uint32_t memcpy_len, avail_in; + uint32_t block_in_size = state->block_end - state->block_next; struct BitBuf2 *bitbuf = &stream->internal_state.bitbuf; if (block_in_size > TYPE0_MAX_BLK_LEN) { @@ -675,7 +785,8 @@ static void write_type0_header(struct isal_zstream *stream, uint32_t block_in_si copy_size = block_in_size; /* Handle BFINAL bit */ - if (stream->end_of_stream && stream->avail_in == block_in_size) + avail_in = stream->total_in + stream->avail_in - state->block_next; + if (stream->end_of_stream && avail_in == block_in_size) stream->internal_state.has_eob_hdr = 1; } @@ -686,8 +797,7 @@ static void write_type0_header(struct isal_zstream *stream, uint32_t block_in_si memcpy(stream->next_out, &stored_blk_hdr, memcpy_len); } else if (stream->avail_out >= 8) { set_buf(bitbuf, stream->next_out, stream->avail_out); - write_bits(bitbuf, stream->internal_state.has_eob_hdr, 3); - flush(bitbuf); + write_bits_flush(bitbuf, stream->internal_state.has_eob_hdr, 3); stream->next_out = buffer_ptr(bitbuf); stream->total_out += buffer_used(bitbuf); stream->avail_out -= buffer_used(bitbuf); @@ -706,48 +816,47 @@ static void write_type0_header(struct isal_zstream *stream, uint32_t block_in_si stream->internal_state.count = copy_size; } -static uint32_t write_stored_block(struct isal_zstream *stream, uint32_t block_in_size) +static uint32_t write_stored_block(struct isal_zstream *stream) { - uint32_t copy_size; + uint32_t copy_size, avail_in; + uint8_t *next_in; struct isal_zstate *state = &stream->internal_state; do { if (state->state == ZSTATE_TYPE0_HDR) { - write_type0_header(stream, block_in_size); + write_type0_header(stream); if (state->state == ZSTATE_TYPE0_HDR) break; } - assert(state->count <= block_in_size); - block_in_size -= state->count; + assert(state->count <= state->block_end - state->block_next); copy_size = state->count; - if (copy_size > stream->avail_out || copy_size > stream->avail_in) { + next_in = stream->next_in - stream->total_in + state->block_next; + avail_in = stream->total_in + stream->avail_in - state->block_next; + if (copy_size > stream->avail_out || copy_size > avail_in) { state->count = copy_size; - copy_size = (stream->avail_out <= stream->avail_in) ? - stream->avail_out : stream->avail_in; + copy_size = (stream->avail_out <= avail_in) ? + stream->avail_out : avail_in; - memcpy(stream->next_out, stream->next_in, copy_size); + memcpy(stream->next_out, next_in, copy_size); state->count -= copy_size; } else { - memcpy(stream->next_out, stream->next_in, copy_size); + memcpy(stream->next_out, next_in, copy_size); state->count = 0; state->state = ZSTATE_TYPE0_HDR; } - stream->next_in += copy_size; - stream->avail_in -= copy_size; - stream->total_in += copy_size; + state->block_next += copy_size; stream->next_out += copy_size; stream->avail_out -= copy_size; stream->total_out += copy_size; - block_in_size += state->count; - if (block_in_size == 0) { + if (state->block_next == state->block_end) { state->state = state->has_eob_hdr ? ZSTATE_TRL : ZSTATE_NEW_HDR; if (stream->flush == FULL_FLUSH && state->state == ZSTATE_NEW_HDR - && stream->avail_in == 0) { + && are_buffers_empty(stream)) { /* Clear match history so there are no cross * block length distance pairs */ reset_match_history(stream); @@ -755,22 +864,43 @@ static uint32_t write_stored_block(struct isal_zstream *stream, uint32_t block_i } } while (state->state == ZSTATE_TYPE0_HDR); - return block_in_size; + return state->block_end - state->block_next; } static inline void reset_match_history(struct isal_zstream *stream) { struct isal_zstate *state = &stream->internal_state; - uint16_t *head = stream->internal_state.head; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + uint16_t *hash_table; + uint32_t hash_table_size; int i = 0; + switch (stream->level) { + case 3: + hash_table = level_buf->lvl3.hash_table; + hash_table_size = sizeof(level_buf->lvl3.hash_table); + break; + + case 2: + hash_table = level_buf->lvl2.hash_table; + hash_table_size = sizeof(level_buf->lvl2.hash_table); + break; + case 1: + hash_table = level_buf->lvl1.hash_table; + hash_table_size = sizeof(level_buf->lvl1.hash_table); + break; + default: + hash_table = state->head; + hash_table_size = sizeof(state->head); + } + state->has_hist = IGZIP_NO_HIST; if ((stream->total_in & 0xFFFF) == 0) - memset(stream->internal_state.head, 0, sizeof(stream->internal_state.head)); + memset(hash_table, 0, hash_table_size); else { - for (i = 0; i < sizeof(state->head) / 2; i++) { - head[i] = (uint16_t) (stream->total_in); + for (i = 0; i < hash_table_size / 2; i++) { + hash_table[i] = (uint16_t) (stream->total_in); } } } @@ -789,12 +919,16 @@ void isal_deflate_init(struct isal_zstream *stream) stream->flush = NO_FLUSH; stream->gzip_flag = 0; + state->block_next = 0; + state->block_end = 0; state->b_bytes_valid = 0; state->b_bytes_processed = 0; + state->total_in_start = 0; state->has_wrap_hdr = 0; state->has_eob = 0; state->has_eob_hdr = 0; state->has_hist = IGZIP_NO_HIST; + state->has_level_buf_init = 0; state->state = ZSTATE_NEW_HDR; state->count = 0; @@ -815,10 +949,14 @@ void isal_deflate_reset(struct isal_zstream *stream) stream->total_in = 0; stream->total_out = 0; + state->block_next = 0; + state->block_end = 0; state->b_bytes_valid = 0; state->b_bytes_processed = 0; + state->total_in_start = 0; state->has_wrap_hdr = 0; state->has_eob = 0; + state->has_level_buf_init = 0; state->has_eob_hdr = 0; state->has_hist = IGZIP_NO_HIST; state->state = ZSTATE_NEW_HDR; @@ -876,7 +1014,32 @@ void isal_deflate_stateless_init(struct isal_zstream *stream) void isal_deflate_hash(struct isal_zstream *stream, uint8_t * dict, uint32_t dict_len) { - isal_deflate_hash_lvl0(stream, dict, dict_len); + /* Reset history to prevent out of bounds matches this works because + * dictionary must set at least 1 element in the history */ + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + switch (stream->level) { + case 3: + memset(level_buf->lvl3.hash_table, -1, sizeof(level_buf->lvl3.hash_table)); + isal_deflate_hash_lvl3(level_buf->lvl3.hash_table, LVL3_HASH_MASK, + stream->total_in, dict, dict_len); + break; + + case 2: + memset(level_buf->lvl2.hash_table, -1, sizeof(level_buf->lvl2.hash_table)); + isal_deflate_hash_lvl2(level_buf->lvl2.hash_table, LVL2_HASH_MASK, + stream->total_in, dict, dict_len); + break; + case 1: + memset(level_buf->lvl1.hash_table, -1, sizeof(level_buf->lvl1.hash_table)); + isal_deflate_hash_lvl1(level_buf->lvl1.hash_table, LVL1_HASH_MASK, + stream->total_in, dict, dict_len); + break; + default: + memset(stream->internal_state.head, -1, sizeof(stream->internal_state.head)); + isal_deflate_hash_lvl0(stream->internal_state.head, LVL0_HASH_MASK, + stream->total_in, dict, dict_len); + } + stream->internal_state.has_hist = IGZIP_HIST; } @@ -899,10 +1062,6 @@ int isal_deflate_set_dict(struct isal_zstream *stream, uint8_t * dict, uint32_t state->b_bytes_processed = dict_len; state->b_bytes_valid = dict_len; - /* Reset history to prevent out of bounds matches this works because - * dictionary must set at least 1 element in the history */ - memset(stream->internal_state.head, -1, sizeof(stream->internal_state.head)); - state->has_hist = IGZIP_DICT_HIST; return COMP_OK; @@ -921,13 +1080,18 @@ int isal_deflate_stateless(struct isal_zstream *stream) const uint32_t gzip_flag = stream->gzip_flag; const uint32_t has_wrap_hdr = state->has_wrap_hdr; + int level_check; uint32_t stored_len; /* Final block has already been written */ - stream->internal_state.has_eob_hdr = 0; - init(&stream->internal_state.bitbuf); - stream->internal_state.state = ZSTATE_NEW_HDR; - stream->internal_state.crc = 0; + state->total_in_start = stream->total_in; + state->block_next = stream->total_in; + state->block_end = stream->total_in; + state->has_eob_hdr = 0; + init(&state->bitbuf); + state->state = ZSTATE_NEW_HDR; + state->crc = 0; + state->has_level_buf_init = 0; if (stream->flush == NO_FLUSH) stream->end_of_stream = 1; @@ -935,8 +1099,15 @@ int isal_deflate_stateless(struct isal_zstream *stream) if (stream->flush != NO_FLUSH && stream->flush != FULL_FLUSH) return INVALID_FLUSH; - if (stream->level != 0 && stream->level != 1) - return ISAL_INVALID_LEVEL; + level_check = check_level_req(stream); + if (level_check) { + if (stream->level == 1 && stream->level_buf == NULL) { + /* Default to internal buffer if invalid size is supplied */ + stream->level_buf = state->buffer; + stream->level_buf_size = sizeof(state->buffer) + sizeof(state->head); + } else + return level_check; + } if (avail_in == 0) stored_len = TYPE0_BLK_HDR_LEN; @@ -971,8 +1142,6 @@ int isal_deflate_stateless(struct isal_zstream *stream) return COMP_OK; else { if (stream->flush == FULL_FLUSH) { - stream->internal_state.file_start = - (uint8_t *) & stream->internal_state.buffer; reset_match_history(stream); } stream->internal_state.has_eob_hdr = 0; @@ -981,9 +1150,12 @@ int isal_deflate_stateless(struct isal_zstream *stream) if (avail_out < stored_len) return STATELESS_OVERFLOW; - stream->next_in = next_in; - stream->avail_in = avail_in; - stream->total_in = total_in; + stream->next_in = next_in + avail_in; + stream->avail_in = 0; + stream->total_in = total_in + avail_in; + + state->block_next = 0; + state->block_end = avail_in; stream->next_out = next_out; stream->avail_out = avail_out; @@ -998,7 +1170,8 @@ int isal_deflate_stateless(struct isal_zstream *stream) write_stream_header_stateless(stream); stream->internal_state.state = ZSTATE_TYPE0_HDR; - write_stored_block(stream, stream->avail_in); + + write_stored_block(stream); if (stream->gzip_flag) { stream->internal_state.crc = 0; @@ -1010,29 +1183,56 @@ int isal_deflate_stateless(struct isal_zstream *stream) } +static inline uint32_t hist_add(struct isal_zstream *stream, uint32_t history_size, + uint32_t add_size) +{ + struct isal_zstate *state = &stream->internal_state; + + /* Calculate requried match History */ + history_size += add_size; + if (history_size > IGZIP_HIST_SIZE) + history_size = IGZIP_HIST_SIZE; + + /* Calculate required block history */ + if (state->state == ZSTATE_TYPE0_HDR + || state->state == ZSTATE_TYPE0_BODY + || state->state == ZSTATE_TMP_TYPE0_HDR || state->state == ZSTATE_TMP_TYPE0_BODY) { + if (stream->total_in - state->block_next > history_size) + history_size = (stream->total_in - state->block_next); + } + + return history_size; +} + int isal_deflate(struct isal_zstream *stream) { struct isal_zstate *state = &stream->internal_state; int ret = COMP_OK; uint8_t *next_in; - uint32_t avail_in, avail_in_start; + uint32_t avail_in, avail_in_start, total_start, hist_size, future_size; uint32_t flush_type = stream->flush; uint32_t end_of_stream = stream->end_of_stream; - int size = 0; + uint32_t size = 0; uint8_t *copy_down_src = NULL; - uint64_t copy_down_size = 0; - int32_t processed = -(state->b_bytes_valid - state->b_bytes_processed); + uint64_t copy_down_size = 0, copy_start_offset; if (stream->flush >= 3) return INVALID_FLUSH; + ret = check_level_req(stream); + if (ret) + return ret; + next_in = stream->next_in; avail_in = stream->avail_in; + total_start = stream->total_in; stream->total_in -= state->b_bytes_valid - state->b_bytes_processed; - if (state->has_hist == IGZIP_NO_HIST) + hist_size = hist_add(stream, state->b_bytes_processed, 0); + if (state->has_hist == IGZIP_NO_HIST) { reset_match_history(stream); - else if (state->has_hist == IGZIP_DICT_HIST) + hist_size = 0; + } else if (state->has_hist == IGZIP_DICT_HIST) isal_deflate_hash(stream, state->buffer, state->b_bytes_processed); do { @@ -1050,63 +1250,60 @@ int isal_deflate(struct isal_zstream *stream) stream->next_in = &state->buffer[state->b_bytes_processed]; stream->avail_in = state->b_bytes_valid - state->b_bytes_processed; - state->file_start = stream->next_in - stream->total_in; - processed += stream->avail_in; if (stream->avail_in > IGZIP_HIST_SIZE + || stream->total_in - state->block_next > IGZIP_HIST_SIZE || stream->end_of_stream || stream->flush != NO_FLUSH) { avail_in_start = stream->avail_in; + state->total_in_start = stream->total_in - state->b_bytes_processed; isal_deflate_int(stream); state->b_bytes_processed += avail_in_start - stream->avail_in; + hist_size = + hist_add(stream, hist_size, avail_in_start - stream->avail_in); + + if (state->b_bytes_processed > hist_size) { + copy_start_offset = state->b_bytes_processed - hist_size; - if (state->b_bytes_processed > IGZIP_HIST_SIZE) { - copy_down_src = - &state->buffer[state->b_bytes_processed - IGZIP_HIST_SIZE]; - copy_down_size = - state->b_bytes_valid - state->b_bytes_processed + - IGZIP_HIST_SIZE; + copy_down_src = &state->buffer[copy_start_offset]; + copy_down_size = state->b_bytes_valid - copy_start_offset; memmove(state->buffer, copy_down_src, copy_down_size); state->b_bytes_valid -= copy_down_src - state->buffer; state->b_bytes_processed -= copy_down_src - state->buffer; } - } stream->flush = flush_type; stream->end_of_stream = end_of_stream; - processed -= stream->avail_in; - } while (processed < IGZIP_HIST_SIZE + ISAL_LOOK_AHEAD && avail_in > 0 + } while (stream->total_in < total_start + hist_size && avail_in > 0 && stream->avail_out > 0); - if (processed >= IGZIP_HIST_SIZE + ISAL_LOOK_AHEAD) { - stream->next_in = next_in - stream->avail_in; - stream->avail_in = avail_in + stream->avail_in; - - state->file_start = stream->next_in - stream->total_in; - - if (stream->avail_in > 0 && stream->avail_out > 0) - isal_deflate_int(stream); - - size = stream->avail_in; - if (stream->avail_in > IGZIP_HIST_SIZE) - size = 0; - - memmove(state->buffer, stream->next_in - IGZIP_HIST_SIZE, - size + IGZIP_HIST_SIZE); - state->b_bytes_processed = IGZIP_HIST_SIZE; - state->b_bytes_valid = size + IGZIP_HIST_SIZE; - - stream->next_in += size; - stream->avail_in -= size; - stream->total_in += size; - - } else { - stream->total_in += state->b_bytes_valid - state->b_bytes_processed; - stream->next_in = next_in; - stream->avail_in = avail_in; - state->file_start = stream->next_in - stream->total_in; + stream->total_in += state->b_bytes_valid - state->b_bytes_processed; + stream->next_in = next_in; + stream->avail_in = avail_in; + if (stream->avail_in > 0 && stream->avail_out > 0) { + /* Due to exiting conditions for the while loop, we know that + * stream->total_in < total_start + hist_size */ + stream->next_in -= state->b_bytes_valid - state->b_bytes_processed; + stream->avail_in += state->b_bytes_valid - state->b_bytes_processed; + stream->total_in -= state->b_bytes_valid - state->b_bytes_processed; + + avail_in_start = stream->avail_in; + state->total_in_start = total_start; + isal_deflate_int(stream); + + hist_size = hist_add(stream, hist_size, avail_in_start - stream->avail_in); + future_size = stream->avail_in; + if (future_size > ISAL_LOOK_AHEAD) + future_size = ISAL_LOOK_AHEAD; + + memmove(state->buffer, stream->next_in - hist_size, hist_size + future_size); + state->b_bytes_processed = hist_size; + state->b_bytes_valid = hist_size + future_size; + stream->next_in += future_size; + stream->total_in += future_size; + stream->avail_in -= future_size; } return ret; @@ -1288,9 +1485,9 @@ static int write_deflate_header_unaligned_stateless(struct isal_zstream *stream) } /* Toggle end of stream only works when deflate header is aligned */ -void write_header(struct isal_zstream *stream, uint8_t * deflate_hdr, - uint32_t deflate_hdr_count, uint32_t extra_bits_count, - uint32_t next_state, uint32_t toggle_end_of_stream) +static void write_header(struct isal_zstream *stream, uint8_t * deflate_hdr, + uint32_t deflate_hdr_count, uint32_t extra_bits_count, + uint32_t next_state, uint32_t toggle_end_of_stream) { struct isal_zstate *state = &stream->internal_state; uint32_t hdr_extra_bits = deflate_hdr[deflate_hdr_count]; @@ -1354,7 +1551,7 @@ void write_header(struct isal_zstream *stream, uint8_t * deflate_hdr, } -void write_trailer(struct isal_zstream *stream) +static void write_trailer(struct isal_zstream *stream) { struct isal_zstate *state = &stream->internal_state; unsigned int bytes = 0; diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_base.c b/src/main/native/compression/isa-l-master/igzip/igzip_base.c index d5720eb5..52d4dbc0 100644 --- a/src/main/native/compression/isa-l-master/igzip/igzip_base.c +++ b/src/main/native/compression/isa-l-master/igzip/igzip_base.c @@ -12,6 +12,9 @@ static inline void update_state(struct isal_zstream *stream, uint8_t * start_in, struct isal_zstate *state = &stream->internal_state; uint32_t bytes_written; + if (next_in - start_in > 0) + state->has_hist = IGZIP_HIST; + stream->next_in = next_in; stream->total_in += next_in - start_in; stream->avail_in = end_in - next_in; @@ -32,6 +35,7 @@ void isal_deflate_body_base(struct isal_zstream *stream) uint64_t code, code_len, code2, code_len2; struct isal_zstate *state = &stream->internal_state; uint16_t *last_seen = state->head; + uint8_t *file_start = stream->next_in - stream->total_in; if (stream->avail_in == 0) { if (stream->end_of_stream || stream->flush != NO_FLUSH) @@ -53,9 +57,9 @@ void isal_deflate_body_base(struct isal_zstream *stream) } literal = *(uint32_t *) next_in; - hash = compute_hash(literal) & HASH_MASK; - dist = (next_in - state->file_start - last_seen[hash]) & 0xFFFF; - last_seen[hash] = (uint64_t) (next_in - state->file_start); + hash = compute_hash(literal) & LVL0_HASH_MASK; + dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; + last_seen[hash] = (uint64_t) (next_in - file_start); /* The -1 are to handle the case when dist = 0 */ if (dist - 1 < IGZIP_HIST_SIZE - 1) { @@ -74,9 +78,8 @@ void isal_deflate_body_base(struct isal_zstream *stream) for (; next_hash < end; next_hash++) { literal = *(uint32_t *) next_hash; - hash = compute_hash(literal) & HASH_MASK; - last_seen[hash] = - (uint64_t) (next_hash - state->file_start); + hash = compute_hash(literal) & LVL0_HASH_MASK; + last_seen[hash] = (uint64_t) (next_hash - file_start); } get_len_code(stream->hufftables, match_length, &code, @@ -118,6 +121,7 @@ void isal_deflate_finish_base(struct isal_zstream *stream) uint64_t code, code_len, code2, code_len2; struct isal_zstate *state = &stream->internal_state; uint16_t *last_seen = state->head; + uint8_t *file_start = stream->next_in - stream->total_in; set_buf(&state->bitbuf, stream->next_out, stream->avail_out); @@ -133,9 +137,9 @@ void isal_deflate_finish_base(struct isal_zstream *stream) } literal = *(uint32_t *) next_in; - hash = compute_hash(literal) & HASH_MASK; - dist = (next_in - state->file_start - last_seen[hash]) & 0xFFFF; - last_seen[hash] = (uint64_t) (next_in - state->file_start); + hash = compute_hash(literal) & LVL0_HASH_MASK; + dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; + last_seen[hash] = (uint64_t) (next_in - file_start); if (dist - 1 < IGZIP_HIST_SIZE - 1) { /* The -1 are to handle the case when dist = 0 */ match_length = @@ -152,9 +156,9 @@ void isal_deflate_finish_base(struct isal_zstream *stream) for (; next_hash < end - 3; next_hash++) { literal = *(uint32_t *) next_hash; - hash = compute_hash(literal) & HASH_MASK; + hash = compute_hash(literal) & LVL0_HASH_MASK; last_seen[hash] = - (uint64_t) (next_hash - state->file_start); + (uint64_t) (next_hash - file_start); } get_len_code(stream->hufftables, match_length, &code, @@ -209,21 +213,20 @@ void isal_deflate_finish_base(struct isal_zstream *stream) return; } -void isal_deflate_hash_lvl0_base(struct isal_zstream *stream, uint8_t * dict, - uint32_t dict_len) +void isal_deflate_hash_base(uint16_t * hash_table, uint32_t hash_mask, + uint32_t current_index, uint8_t * dict, uint32_t dict_len) { uint8_t *next_in = dict; uint8_t *end_in = dict + dict_len - SHORTEST_MATCH; uint32_t literal; uint32_t hash; - uint16_t lookup_val = stream->total_in - dict_len; - uint16_t *last_seen = stream->internal_state.head; + uint16_t index = current_index - dict_len; while (next_in <= end_in) { literal = *(uint32_t *) next_in; - hash = compute_hash(literal) & HASH_MASK; - last_seen[hash] = lookup_val; - lookup_val++; + hash = compute_hash(literal) & hash_mask; + hash_table[hash] = index; + index++; next_in++; } } diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_base_aliases.c b/src/main/native/compression/isa-l-master/igzip/igzip_base_aliases.c index ad2a4060..daf918a7 100644 --- a/src/main/native/compression/isa-l-master/igzip/igzip_base_aliases.c +++ b/src/main/native/compression/isa-l-master/igzip/igzip_base_aliases.c @@ -30,11 +30,16 @@ #include #include "igzip_lib.h" #include "encode_df.h" +#include "igzip_level_buf_structs.h" void isal_deflate_body_base(struct isal_zstream *stream); void isal_deflate_finish_base(struct isal_zstream *stream); void isal_deflate_icf_body_base(struct isal_zstream *stream); -void isal_deflate_icf_finish_base(struct isal_zstream *stream); +void isal_deflate_icf_body_hash8k_base(struct isal_zstream *stream); +void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream); +void isal_deflate_icf_finish_hash8k_base(struct isal_zstream *stream); +void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream); +void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream); void isal_update_histogram_base(uint8_t * start_stream, int length, struct isal_huff_histogram *histogram); struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in, @@ -43,8 +48,13 @@ struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in, uint32_t crc32_gzip_base(uint32_t init_crc, const unsigned char *buf, uint64_t len); uint32_t adler32_base(uint32_t init, const unsigned char *buf, uint64_t len); int decode_huffman_code_block_stateless_base(struct inflate_state *s); -void isal_deflate_hash_lvl0_base(struct isal_zstream *stream, uint8_t * dict, - uint32_t dict_len); + +extern void isal_deflate_hash_base(uint16_t *, uint32_t, uint32_t, uint8_t *, uint32_t); + +void set_long_icf_fg_base(uint8_t * next_in, uint8_t * end_in, + struct deflate_icf *match_lookup, struct level_buf *level_buf); +void gen_icf_map_h1_base(struct isal_zstream *stream, + struct deflate_icf *matches_icf_lookup, uint64_t input_size); void isal_deflate_body(struct isal_zstream *stream) { @@ -61,9 +71,29 @@ void isal_deflate_icf_body(struct isal_zstream *stream) isal_deflate_icf_body_base(stream); } -void isal_deflate_icf_finish(struct isal_zstream *stream) +void isal_deflate_icf_body_lvl1(struct isal_zstream *stream) +{ + isal_deflate_icf_body_hash8k_base(stream); +} + +void isal_deflate_icf_body_lvl2(struct isal_zstream *stream) +{ + isal_deflate_icf_body_hash_hist_base(stream); +} + +void isal_deflate_icf_finish_lvl1(struct isal_zstream *stream) { - isal_deflate_icf_finish_base(stream); + isal_deflate_icf_finish_hash8k_base(stream); +} + +void isal_deflate_icf_finish_lvl2(struct isal_zstream *stream) +{ + isal_deflate_icf_finish_hash_hist_base(stream); +} + +void isal_deflate_icf_finish_lvl3(struct isal_zstream *stream) +{ + isal_deflate_icf_finish_hash_map_base(stream); } void isal_update_histogram(uint8_t * start_stream, int length, @@ -94,7 +124,38 @@ int decode_huffman_code_block_stateless(struct inflate_state *s) return decode_huffman_code_block_stateless_base(s); } -void isal_deflate_hash_lvl0(struct isal_zstream *stream, uint8_t * dict, uint32_t dict_len) +void isal_deflate_hash_lvl0(uint16_t * hash_table, uint32_t hash_mask, + uint32_t current_index, uint8_t * dict, uint32_t dict_len) +{ + isal_deflate_hash_base(hash_table, hash_mask, current_index, dict, dict_len); +} + +void isal_deflate_hash_lvl1(uint16_t * hash_table, uint32_t hash_mask, + uint32_t current_index, uint8_t * dict, uint32_t dict_len) +{ + isal_deflate_hash_base(hash_table, hash_mask, current_index, dict, dict_len); +} + +void isal_deflate_hash_lvl2(uint16_t * hash_table, uint32_t hash_mask, + uint32_t current_index, uint8_t * dict, uint32_t dict_len) +{ + isal_deflate_hash_base(hash_table, hash_mask, current_index, dict, dict_len); +} + +void isal_deflate_hash_lvl3(uint16_t * hash_table, uint32_t hash_mask, + uint32_t current_index, uint8_t * dict, uint32_t dict_len) +{ + isal_deflate_hash_base(hash_table, hash_mask, current_index, dict, dict_len); +} + +void set_long_icf_fg(uint8_t * next_in, uint8_t * end_in, + struct deflate_icf *match_lookup, struct level_buf *level_buf) +{ + set_long_icf_fg_base(next_in, end_in, match_lookup, level_buf); +} + +void gen_icf_map_lh1(struct isal_zstream *stream, + struct deflate_icf *matches_icf_lookup, uint64_t input_size) { - return isal_deflate_hash_lvl0_base(stream, dict, dict_len); + gen_icf_map_h1_base(stream, matches_icf_lookup, input_size); } diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_body.asm b/src/main/native/compression/isa-l-master/igzip/igzip_body.asm index 3a84d023..9045afe6 100644 --- a/src/main/native/compression/isa-l-master/igzip/igzip_body.asm +++ b/src/main/native/compression/isa-l-master/igzip/igzip_body.asm @@ -137,9 +137,9 @@ isal_deflate_body_ %+ ARCH %+ : ;; Set stream's next state mov rdx, ZSTATE_FLUSH_READ_BUFFER mov rax, ZSTATE_BODY - cmp dword [rcx + _end_of_stream], 0 + cmp word [rcx + _end_of_stream], 0 cmovne rax, rdx - cmp dword [rcx + _flush], _NO_FLUSH + cmp word [rcx + _flush], _NO_FLUSH cmovne rax, rdx mov dword [rcx + _internal_state_state], eax ret @@ -164,7 +164,7 @@ skip1: mov [rsp + gpr_save_mem_offset + 7*8], r15 mov stream, rcx - mov dword [stream + _internal_state_has_eob], 0 + mov byte [stream + _internal_state_has_eob], 0 MOVDQU xmask, [mask] @@ -209,10 +209,10 @@ MARK __body_compute_hash_ %+ ARCH shr tmp3, 8 compute_hash hash2, tmp3 - and hash, HASH_MASK - and hash2, HASH_MASK + and hash, LVL0_HASH_MASK + and hash2, LVL0_HASH_MASK - cmp dword [stream + _internal_state_has_hist], IGZIP_NO_HIST + cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST je write_first_byte jmp loop2 @@ -321,7 +321,7 @@ len_dist_lit_huffman: MOVQ tmp5, xdata shr tmp5, 24 compute_hash tmp4, tmp5 - and tmp4, HASH_MASK + and tmp4, LVL0_HASH_MASK SHLX code4, code4, code_len3 or code4, code3 @@ -359,15 +359,15 @@ loop3: jae loop3_done mov tmp6, [file_start + tmp3] compute_hash tmp4, tmp6 - and tmp4 %+ d, HASH_MASK + and tmp4 %+ d, LVL0_HASH_MASK ; state->head[hash] = k; mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w jmp loop3 loop3_done: %endif - ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; - and hash %+ d, HASH_MASK - and hash2 %+ d, HASH_MASK + ; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK; + and hash %+ d, LVL0_HASH_MASK + and hash2 %+ d, LVL0_HASH_MASK ; continue cmp f_i, f_end_i @@ -429,15 +429,15 @@ loop4: jae loop4_done mov tmp6, [file_start + tmp3] compute_hash tmp4, tmp6 - and tmp4, HASH_MASK + and tmp4, LVL0_HASH_MASK mov [stream + _internal_state_head + 2 * tmp4], tmp3 %+ w jmp loop4 loop4_done: %endif - ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; - and hash %+ d, HASH_MASK - and hash2 %+ d, HASH_MASK + ; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK; + and hash %+ d, LVL0_HASH_MASK + and hash2 %+ d, LVL0_HASH_MASK ; continue cmp f_i, f_end_i @@ -464,9 +464,9 @@ write_lit_bits: input_end: mov tmp1, ZSTATE_FLUSH_READ_BUFFER mov tmp5, ZSTATE_BODY - cmp dword [stream + _end_of_stream], 0 + cmp word [stream + _end_of_stream], 0 cmovne tmp5, tmp1 - cmp dword [stream + _flush], _NO_FLUSH + cmp word [stream + _flush], _NO_FLUSH cmovne tmp5, tmp1 mov dword [stream + _internal_state_state], tmp5 %+ d @@ -545,7 +545,7 @@ write_first_byte: cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] ja output_end - mov dword [stream + _internal_state_has_hist], IGZIP_HIST + mov byte [stream + _internal_state_has_hist], IGZIP_HIST mov [stream + _internal_state_head + 2 * hash], f_i %+ w @@ -563,5 +563,5 @@ write_first_byte: section .data align 16 -mask: dd HASH_MASK, HASH_MASK, HASH_MASK, HASH_MASK +mask: dd LVL0_HASH_MASK, LVL0_HASH_MASK, LVL0_HASH_MASK, LVL0_HASH_MASK const_D: dq D diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_build_hash_table_perf.c b/src/main/native/compression/isa-l-master/igzip/igzip_build_hash_table_perf.c new file mode 100644 index 00000000..003f5764 --- /dev/null +++ b/src/main/native/compression/isa-l-master/igzip/igzip_build_hash_table_perf.c @@ -0,0 +1,44 @@ +#include +#include +#include +#include +#include +#include "igzip_lib.h" +#include "test.h" + +#define DICT_LEN 32*1024 +#define ITERATIONS 100000 + +extern void isal_deflate_hash(struct isal_zstream *stream, uint8_t * dict, int dict_len); + +void create_rand_data(uint8_t * data, uint32_t size) +{ + int i; + for (i = 0; i < size; i++) { + data[i] = rand() % 256; + } +} + +int main(int argc, char *argv[]) +{ + int i, iterations = ITERATIONS; + struct isal_zstream stream; + uint8_t dict[DICT_LEN]; + uint32_t dict_len = DICT_LEN; + + stream.level = 0; + create_rand_data(dict, dict_len); + + struct perf start, stop; + perf_start(&start); + + for (i = 0; i < iterations; i++) { + isal_deflate_hash(&stream, dict, dict_len); + } + + perf_stop(&stop); + + printf("igzip_build_hash_table_perf:\n"); + printf(" in_size=%u iter=%d ", dict_len, i); + perf_print(stop, start, (long long)dict_len * i); +} diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_decode_block_stateless.asm b/src/main/native/compression/isa-l-master/igzip/igzip_decode_block_stateless.asm index 7f92c63f..ac760686 100644 --- a/src/main/native/compression/isa-l-master/igzip/igzip_decode_block_stateless.asm +++ b/src/main/native/compression/isa-l-master/igzip/igzip_decode_block_stateless.asm @@ -12,8 +12,24 @@ default rel %define ISAL_DECODE_LONG_BITS 12 %define ISAL_DECODE_SHORT_BITS 10 -%define MAX_LONG_CODE_LARGE (288 + (1 << (15 - ISAL_DECODE_LONG_BITS))) -%define MAX_LONG_CODE_SMALL (32 + (1 << (15 - ISAL_DECODE_SHORT_BITS))) +;; See inflate_huff_code structure declaration in igzip_lib.h calculation explanation +%define L_REM (15 - ISAL_DECODE_LONG_BITS) +%define S_REM (15 - ISAL_DECODE_SHORT_BITS) + +%define L_DUP ((1 << L_REM) - (L_REM + 1)) +%define S_DUP ((1 << S_REM) - (S_REM + 1)) + +%define L_UNUSED ((1 << L_REM) - (1 << ((L_REM)/2)) - (1 << ((L_REM + 1)/2)) + 1) +%define S_UNUSED ((1 << S_REM) - (1 << ((S_REM)/2)) - (1 << ((S_REM + 1)/2)) + 1) + +%define L_SIZE (286 + L_DUP + L_UNUSED) +%define S_SIZE (30 + S_DUP + S_UNUSED) + +%define HUFF_CODE_LARGE_LONG_ALIGNED (L_SIZE + (-L_SIZE & 0xf)) +%define HUFF_CODE_SMALL_LONG_ALIGNED (S_SIZE + (-S_SIZE & 0xf)) + +%define MAX_LONG_CODE_LARGE (L_SIZE + (-L_SIZE & 0xf)) +%define MAX_LONG_CODE_SMALL (S_SIZE + (-S_SIZE & 0xf)) %define COPY_SIZE 16 %define COPY_LEN_MAX 258 diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_deflate_hash.asm b/src/main/native/compression/isa-l-master/igzip/igzip_deflate_hash.asm index 162014e5..312abd0e 100644 --- a/src/main/native/compression/isa-l-master/igzip/igzip_deflate_hash.asm +++ b/src/main/native/compression/isa-l-master/igzip/igzip_deflate_hash.asm @@ -4,41 +4,75 @@ %include "huffman.asm" %include "reg_sizes.asm" -%define DICT_SLOP 4 +%define DICT_SLOP 8 +%define DICT_END_SLOP 4 %ifidn __OUTPUT_FORMAT__, win64 %define arg1 rcx %define arg2 rdx %define arg3 r8 +%define arg4 r9 +%define arg5 rdi +%define swap1 rsi +%define stack_size 3 * 8 +%define PS 8 +%define arg(x) [rsp + stack_size + PS*x] %else %define arg1 rdi %define arg2 rsi %define arg3 rdx +%define arg4 rcx +%define arg5 r8 +%define swap1 r9 %endif -%define stream arg1 +%define hash_table arg1 -%define dict_offset arg2 +%define hash_mask arg2 -%define dict_len arg3 -%define f_i arg3 +%define f_i_end arg3 -%define data r9 +%define dict_offset arg4 -%define hash r10 +%define dict_len arg5 +%define f_i arg5 -%define f_i_end r11 +%define f_i_tmp rax -global isal_deflate_hash_lvl0_01 -isal_deflate_hash_lvl0_01: -%ifnidn (arg1, stream) - mov stream, arg1 +%define hash swap1 + +%define hash2 r10 + +%define hash3 r11 + +%define hash4 r12 + + +%macro FUNC_SAVE 0 +%ifidn __OUTPUT_FORMAT__, win64 + push rsi + push rdi + push r12 + mov arg5 %+ d, arg(5) +%else + push r12 %endif -%ifnidn (arg2, dict_next) - mov dict_offset, arg2 +%endm + +%macro FUNC_RESTORE 0 +%ifidn __OUTPUT_FORMAT__, win64 + pop r12 + pop rdi + pop rsi +%else + pop r12 %endif +%endm + +global isal_deflate_hash_crc_01 +isal_deflate_hash_crc_01: + FUNC_SAVE - mov f_i_end %+ d, dword [stream + _total_in] neg f_i add f_i, f_i_end @@ -46,16 +80,57 @@ isal_deflate_hash_lvl0_01: sub f_i_end, DICT_SLOP cmp f_i, f_i_end - jg end + jg end_main main_loop: - mov data %+ d, [f_i + dict_offset] - compute_hash hash, data - and hash, HASH_MASK - mov [stream + _internal_state_head + 2 * hash], f_i %+ w + lea f_i_tmp, [f_i + 2] + + xor hash, hash + crc32 hash %+ d, dword [f_i + dict_offset] + + xor hash2, hash2 + crc32 hash2 %+ d, dword [f_i + dict_offset + 1] + + xor hash3, hash3 + crc32 hash3 %+ d, dword [f_i_tmp + dict_offset] + xor hash4, hash4 + crc32 hash4 %+ d, dword [f_i_tmp + dict_offset + 1] + + and hash, hash_mask + and hash2, hash_mask + and hash3, hash_mask + and hash4, hash_mask + + mov [hash_table + 2 * hash], f_i %+ w add f_i, 1 + + mov [hash_table + 2 * hash2], f_i %+ w + add f_i, 3 + + mov [hash_table + 2 * hash3], f_i_tmp %+ w + add f_i_tmp, 1 + + mov [hash_table + 2 * hash4], f_i_tmp %+ w + cmp f_i, f_i_end jle main_loop + +end_main: + add f_i_end, DICT_SLOP - DICT_END_SLOP + cmp f_i, f_i_end + jg end + +end_loop: + xor hash, hash + crc32 hash %+ d, dword [f_i + dict_offset] + + and hash, hash_mask + mov [hash_table + 2 * hash], f_i %+ w + + add f_i, 1 + cmp f_i, f_i_end + jle end_loop end: + FUNC_RESTORE ret diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_file_perf.c b/src/main/native/compression/isa-l-master/igzip/igzip_file_perf.c index 08a05405..8e8de446 100644 --- a/src/main/native/compression/isa-l-master/igzip/igzip_file_perf.c +++ b/src/main/native/compression/isa-l-master/igzip/igzip_file_perf.c @@ -42,6 +42,59 @@ # define RUN_MEM_SIZE 500000000 #endif +int level_size_buf[10] = { +#ifdef ISAL_DEF_LVL0_DEFAULT + ISAL_DEF_LVL0_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL1_DEFAULT + ISAL_DEF_LVL1_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL2_DEFAULT + ISAL_DEF_LVL2_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL3_DEFAULT + ISAL_DEF_LVL3_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL4_DEFAULT + ISAL_DEF_LVL4_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL5_DEFAULT + ISAL_DEF_LVL5_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL6_DEFAULT + ISAL_DEF_LVL6_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL7_DEFAULT + ISAL_DEF_LVL7_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL8_DEFAULT + ISAL_DEF_LVL8_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL9_DEFAULT + ISAL_DEF_LVL9_DEFAULT, +#else + 0, +#endif +}; + struct isal_zstream stream; int usage(void) @@ -69,7 +122,17 @@ int main(int argc, char *argv[]) int level = 0, level_size = 0, avail_in; char *in_file_name = NULL, *out_file_name = NULL, *dict_file_name = NULL; - while ((c = getopt(argc, argv, "h01i:b:o:d:")) != -1) { + while ((c = getopt(argc, argv, "h0123456789i:b:o:d:")) != -1) { + if (c >= '0' && c <= '9') { + if (c > '0' + ISAL_DEF_MAX_LEVEL) + usage(); + else { + level = c - '0'; + level_size = level_size_buf[level]; + } + continue; + } + switch (c) { case 'o': out_file_name = optarg; @@ -85,12 +148,6 @@ int main(int argc, char *argv[]) case 'b': inbuf_size = atoi(optarg); break; - case '1': - level = 1; - level_size = ISAL_DEF_LVL1_LARGE; - break; - case '0': - break; case 'h': default: usage(); @@ -137,9 +194,7 @@ int main(int argc, char *argv[]) outbuf_size = 2 * infile_size + BUF_SIZE; - dictfile_size = 0; - if (dict_file_name != NULL) - dictfile_size = get_filesize(dict); + dictfile_size = (dict_file_name != NULL) ? get_filesize(dict) : 0; if (iterations == 0) { iterations = infile_size ? RUN_MEM_SIZE / infile_size : MIN_TEST_LOOPS; @@ -177,6 +232,7 @@ int main(int argc, char *argv[]) inbuf_size = inbuf_size ? inbuf_size : infile_size; printf("igzip_file_perf: %s %d iterations\n", in_file_name, iterations); + /* Read complete input file into buffer */ stream.avail_in = (uint32_t) fread(inbuf, 1, infile_size, in); if (stream.avail_in != infile_size) { @@ -184,7 +240,8 @@ int main(int argc, char *argv[]) exit(0); } - if (dictfile_size != (uint32_t) fread(dictbuf, 1, dictfile_size, dict)) { + /* Read complete dictionary into buffer */ + if ((dictfile_size != 0) && (dictfile_size != fread(dictbuf, 1, dictfile_size, dict))) { fprintf(stderr, "Couldn't fit all of dictionary file into buffer\n"); exit(0); } diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_finish.asm b/src/main/native/compression/isa-l-master/igzip/igzip_finish.asm index dbbe0d9b..584b75b7 100644 --- a/src/main/native/compression/isa-l-master/igzip/igzip_finish.asm +++ b/src/main/native/compression/isa-l-master/igzip/igzip_finish.asm @@ -126,16 +126,16 @@ skip_SLOP: mov curr_data %+ d, [file_start + f_i] - cmp dword [stream + _internal_state_has_hist], IGZIP_NO_HIST + cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST jne skip_write_first_byte cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] ja end_loop_2 compute_hash hash, curr_data - and hash %+ d, HASH_MASK + and hash %+ d, LVL0_HASH_MASK mov [stream + _internal_state_head + 2 * hash], f_i %+ w - mov dword [stream + _internal_state_has_hist], IGZIP_HIST + mov byte [stream + _internal_state_has_hist], IGZIP_HIST jmp encode_literal skip_write_first_byte: @@ -145,10 +145,10 @@ loop2: cmp m_out_buf, [stream + _internal_state_bitbuf_m_out_end] ja end_loop_2 - ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; + ; hash = compute_hash(state->file_start + f_i) & LVL0_HASH_MASK; mov curr_data %+ d, [file_start + f_i] compute_hash hash, curr_data - and hash %+ d, HASH_MASK + and hash %+ d, LVL0_HASH_MASK ; f_index = state->head[hash]; movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash] @@ -211,19 +211,19 @@ loop2: ; only update hash twice - ; hash = compute_hash(state->file_start + k) & HASH_MASK; + ; hash = compute_hash(state->file_start + k) & LVL0_HASH_MASK; mov tmp6 %+ d, dword [file_start + tmp3] compute_hash hash, tmp6 - and hash %+ d, HASH_MASK + and hash %+ d, LVL0_HASH_MASK ; state->head[hash] = k; mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w add tmp3, 1 - ; hash = compute_hash(state->file_start + k) & HASH_MASK; + ; hash = compute_hash(state->file_start + k) & LVL0_HASH_MASK; mov tmp6 %+ d, dword [file_start + tmp3] compute_hash hash, tmp6 - and hash %+ d, HASH_MASK + and hash %+ d, LVL0_HASH_MASK ; state->head[hash] = k; mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w @@ -276,8 +276,8 @@ write_eob: write_bits m_bits, m_bit_count, code2, code_len2, m_out_buf, tmp1 - mov dword [stream + _internal_state_has_eob], 1 - cmp dword [stream + _end_of_stream], 1 + mov byte [stream + _internal_state_has_eob], 1 + cmp word [stream + _end_of_stream], 1 jne sync_flush ; state->state = ZSTATE_TRL; mov dword [stream + _internal_state_state], ZSTATE_TRL diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_fuzz_inflate.c b/src/main/native/compression/isa-l-master/igzip/igzip_fuzz_inflate.c deleted file mode 100644 index af9c72e4..00000000 --- a/src/main/native/compression/isa-l-master/igzip/igzip_fuzz_inflate.c +++ /dev/null @@ -1,96 +0,0 @@ -#define _FILE_OFFSET_BITS 64 -#include -#include -#include -#include "huff_codes.h" -#include "igzip_lib.h" -#include "test.h" - -#define OUT_BUFFER_SIZE 64*1024 - -int main(int argc, char *argv[]) -{ - FILE *in = NULL; - unsigned char *in_buf = NULL, *isal_out_buf = NULL, *zlib_out_buf = NULL; - uint64_t in_file_size; - int out_buf_size, zret, iret; - struct inflate_state *state = NULL; - z_stream zstate; - char z_msg_invalid_code_set[] = "invalid code lengths set"; - char z_msg_invalid_dist_set[] = "invalid distances set"; - char z_msg_invalid_lit_len_set[] = "invalid literal/lengths set"; - - if (argc != 2) { - fprintf(stderr, "Usage: isal_inflate_file_perf infile\n" - "\t - Runs multiple iterations of igzip on a file to " - "get more accurate time results.\n"); - exit(1); - } - in = fopen(argv[1], "rb"); - if (!in) { - fprintf(stderr, "Can't open %s for reading\n", argv[1]); - exit(1); - } - - /* Allocate space for entire input file and output - * (assuming some possible expansion on output size) - */ - in_file_size = get_filesize(in); - - out_buf_size = OUT_BUFFER_SIZE; - - state = malloc(sizeof(struct inflate_state)); - in_buf = malloc(in_file_size); - isal_out_buf = malloc(OUT_BUFFER_SIZE); - zlib_out_buf = malloc(OUT_BUFFER_SIZE); - - if (state == NULL || in_buf == NULL || isal_out_buf == NULL || zlib_out_buf == NULL) { - fprintf(stderr, "Failed to malloc input and outputs buffers\n"); - exit(1); - } - - fread(in_buf, 1, in_file_size, in); - - /* Inflate data with isal_inflate */ - memset(state, 0xff, sizeof(struct inflate_state)); - - isal_inflate_init(state); - state->next_in = in_buf; - state->avail_in = in_file_size; - state->next_out = isal_out_buf; - state->avail_out = out_buf_size; - - iret = isal_inflate_stateless(state); - - /* Inflate data with zlib */ - zstate.zalloc = Z_NULL; - zstate.zfree = Z_NULL; - zstate.opaque = Z_NULL; - zstate.avail_in = in_file_size; - zstate.next_in = in_buf; - zstate.avail_out = out_buf_size; - zstate.next_out = zlib_out_buf; - inflateInit2(&zstate, -15); - - zret = inflate(&zstate, Z_FINISH); - - if (zret == Z_STREAM_END) { - /* If zlib finished, assert isal finished with the same answer */ - assert(state->block_state == ISAL_BLOCK_FINISH); - assert(zstate.total_out == state->total_out); - assert(memcmp(isal_out_buf, zlib_out_buf, state->total_out) == 0); - } else if (zret < 0) { - if (zret != Z_BUF_ERROR) - /* If zlib errors, assert isal errors, excluding a few - * cases where zlib is overzealous */ - assert(iret < 0 || strcmp(zstate.msg, z_msg_invalid_code_set) == 0 - || strcmp(zstate.msg, z_msg_invalid_dist_set) == 0 - || strcmp(zstate.msg, z_msg_invalid_lit_len_set) == 0); - } else - /* If zlib did not finish or error, assert isal did not finish - * or that isal found an invalid header since isal notices the - * error faster than zlib */ - assert(iret > 0 || iret == ISAL_INVALID_BLOCK); - - return 0; -} diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_gen_icf_map_lh1_06.asm b/src/main/native/compression/isa-l-master/igzip/igzip_gen_icf_map_lh1_06.asm new file mode 100644 index 00000000..7810f2f0 --- /dev/null +++ b/src/main/native/compression/isa-l-master/igzip/igzip_gen_icf_map_lh1_06.asm @@ -0,0 +1,503 @@ +%include "reg_sizes.asm" +%include "lz0a_const.asm" +%include "data_struct2.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 +%ifidn __OUTPUT_FORMAT__, win64 +%define arg1 rcx +%define arg2 rdx +%define arg3 r8 +%define hash rsi +%define next_in rdi +%else +%define arg1 rdi +%define arg2 rsi +%define arg3 rdx +%define hash r8 +%define next_in rcx +%endif + +%define stream arg1 +%define level_buf arg1 +%define matches_next arg2 +%define f_i_end arg3 + +%define f_i rax +%define file_start rbp +%define next_byte r9 +%define encode_size r10 +%define prev_len r11 +%define prev_dist r12 + +%define hash_table level_buf + _hash_map_hash_table + +%define datas zmm0 +%define datas_lookup zmm1 +%define zhashes zmm2 +%define zdists zmm3 +%define zdists_lookup zmm4 +%define zscatter zmm5 +%define zdists2 zmm6 +%define zlens1 zmm7 +%define zlens2 zmm8 +%define zlookup zmm9 +%define zlookup2 zmm10 +%define match_lookups zmm11 +%define zindex zmm12 +%define zdist_extra zmm13 +%define zdists_tmp zmm14 +%define znull_dist_syms zmm15 +%define zcode zmm16 +%define zthirty zmm17 +%define zdist_mask zmm18 +%define zshortest_matches zmm19 +%define zrot_left zmm20 +%define zdatas_perm zmm21 +%define zdatas_perm2 zmm22 +%define zdatas_perm3 zmm23 +%define zdatas_shuf zmm24 +%define zhash_prod zmm25 +%define zhash_mask zmm26 +%define zincrement zmm27 +%define zqword_shuf zmm28 +%define zones zmm29 +%define ztwofiftyfour zmm30 +%define zbswap zmm31 + +%ifidn __OUTPUT_FORMAT__, win64 +%define stack_size 10*16 + 4 * 8 + 8 +%define func(x) proc_frame x + +%macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + vmovdqa [rsp + 3*16], xmm9 + vmovdqa [rsp + 4*16], xmm10 + vmovdqa [rsp + 5*16], xmm11 + vmovdqa [rsp + 6*16], xmm12 + vmovdqa [rsp + 7*16], xmm13 + vmovdqu [rsp + 8*16], xmm14 + vmovdqa [rsp + 9*16], xmm15 + save_reg rsi, 10*16 + 0*8 + save_reg rdi, 10*16 + 1*8 + save_reg rbp, 10*16 + 2*8 + save_reg r12, 10*16 + 3*8 + end_prolog +%endm + +%macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + vmovdqa xmm14, [rsp + 8*16] + vmovdqa xmm15, [rsp + 9*16] + + mov [rsp + 10*16 + 0*8], rsi + mov [rsp + 10*16 + 1*8], rdi + mov [rsp + 10*16 + 2*8], rbp + mov [rsp + 10*16 + 3*8], r12 + add rsp, stack_size +%endm +%else +%define func(x) x: +%macro FUNC_SAVE 0 + push rbp + push r12 +%endm + +%macro FUNC_RESTORE 0 + pop r12 + pop rbp +%endm +%endif + +%define VECT_SIZE 16 +%define HASH_BYTES 2 + +global gen_icf_map_lh1_06 +func(gen_icf_map_lh1_06) + FUNC_SAVE + + mov file_start, [stream + _next_in] + mov f_i %+ d, dword [stream + _total_in] + + sub file_start, f_i + add f_i_end, f_i + cmp f_i, f_i_end + jge end_main + +;; Prep for main loop + mov level_buf, [stream + _level_buf] + sub f_i_end, LA + vmovdqu64 zdatas_perm, [datas_perm] + vmovdqu64 zdatas_shuf, [datas_shuf] + vmovdqu64 zhash_prod, [hash_prod] + vmovdqu64 zhash_mask, [hash_mask] + vmovdqu64 zincrement, [increment] + vmovdqu64 zqword_shuf, [qword_shuf] + vmovdqu64 zdatas_perm2, [datas_perm2] + vmovdqu64 zdatas_perm3, [datas_perm3] + vmovdqu64 zones, [ones] + vmovdqu64 zbswap, [bswap_shuf] + vmovdqu64 zthirty, [thirty] + vmovdqu64 zrot_left, [drot_left] + vmovdqu64 zdist_mask, [dist_mask] + vmovdqu64 zshortest_matches, [shortest_matches] + vmovdqu64 ztwofiftyfour, [twofiftyfour] + vmovdqu64 znull_dist_syms, [null_dist_syms] + kxorq k0, k0, k0 + kmovq k1, [k_mask_1] + kmovq k2, [k_mask_2] + + xor prev_len, prev_len + xor prev_dist, prev_dist + +;; Process first byte + vmovd zhashes %+ x, dword [f_i + file_start] + vpmaddwd zhashes, zhashes, zhash_prod + vpmaddwd zhashes, zhashes, zhash_prod + vpandd zhashes, zhashes, zhash_mask + vmovd hash %+ d, zhashes %+ x + mov word [hash_table + HASH_BYTES * hash], f_i %+ w + + add f_i, 1 + cmp f_i, f_i_end + jg end_main + +;;hash + vmovdqu64 datas %+ y, [f_i + file_start] + vpermq zhashes, zdatas_perm, datas + vpshufb zhashes, zhashes, zdatas_shuf + vpmaddwd zhashes, zhashes, zhash_prod + vpmaddwd zhashes, zhashes, zhash_prod + vpandd zhashes, zhashes, zhash_mask + + vpermq zlookup, zdatas_perm2, datas + vpshufb zlookup, zlookup, zqword_shuf + vpermq zlookup2, zdatas_perm3, datas + vpshufb zlookup2, zlookup2, zqword_shuf + +;;gather/scatter hashes + knotq k6, k0 + vpgatherdd zdists_lookup {k6}, [hash_table + HASH_BYTES * zhashes] + + vpbroadcastd zindex, f_i %+ d + vpaddd zindex, zindex, zincrement + vpblendmw zscatter {k1}, zindex, zdists_lookup + + knotq k6, k0 + vpscatterdd [hash_table + HASH_BYTES * zhashes] {k6}, zscatter + +;; Compute hash for next loop + vmovdqu64 datas %+ y, [f_i + file_start + VECT_SIZE] + vpermq zhashes, zdatas_perm, datas + vpshufb zhashes, zhashes, zdatas_shuf + vpmaddwd zhashes, zhashes, zhash_prod + vpmaddwd zhashes, zhashes, zhash_prod + vpandd zhashes, zhashes, zhash_mask + + vmovdqu64 datas_lookup %+ y, [f_i + file_start + 2 * VECT_SIZE] + + sub f_i_end, VECT_SIZE + cmp f_i, f_i_end + jg loop1_end + +loop1: + lea next_in, [f_i + file_start] + +;; Calculate look back dists + vpaddd zdists, zdists_lookup, zones + vpsubd zdists, zindex, zdists + vpandd zdists, zdists, zdist_mask + vpaddd zdists, zdists, zones + vpsubd zdists, zincrement, zdists + +;;gather/scatter hashes + add f_i, VECT_SIZE + + kxnorq k6, k6, k6 + kxnorq k7, k7, k7 + vpgatherdd zdists_lookup {k6}, [hash_table + HASH_BYTES * zhashes] + + vpbroadcastd zindex, f_i %+ d + vpaddd zindex, zindex, zincrement + vpblendmw zscatter {k1}, zindex, zdists_lookup + + vpscatterdd [hash_table + HASH_BYTES * zhashes] {k7}, zscatter + +;; Compute hash for next loop + vpermq zhashes, zdatas_perm, datas_lookup + vpshufb zhashes, zhashes, zdatas_shuf + vpmaddwd zhashes, zhashes, zhash_prod + vpmaddwd zhashes, zhashes, zhash_prod + vpandd zhashes, zhashes, zhash_mask + +;;lookup old codes + vextracti32x8 zdists2 %+ y, zdists, 1 + kxnorq k6, k6, k6 + kxnorq k7, k7, k7 + vpgatherdq zlens1 {k6}, [next_in + zdists %+ y] + vpgatherdq zlens2 {k7}, [next_in + zdists2 %+ y] + +;; Calculate dist_icf_code + vpaddd zdists, zdists, zones + vpsubd zdists, zincrement, zdists + vpcmpgtd k5, zdists, zones + vplzcntd zdist_extra, zdists + vpsubd zdist_extra {k5}{z}, zthirty, zdist_extra + vpsllvd zcode, zones, zdist_extra + vpsubd zcode, zcode, zones + vpandd zcode {k5}{z}, zdists, zcode + vpsrlvd zdists, zdists, zdist_extra + vpslld zdist_extra, zdist_extra, 1 + vpaddd zdists, zdists, zdist_extra + vpslld zcode, zcode, EXTRA_BITS_OFFSET - DIST_OFFSET + vpaddd zdists, zdists, zcode + +;; Setup zdists for combining with zlens + vpslld zdists, zdists, DIST_OFFSET + +;; xor current data with lookback dist + vpxorq zlens1, zlens1, zlookup + vpxorq zlens2, zlens2, zlookup2 + +;; Setup registers for next loop + vpermq zlookup, zdatas_perm2, datas + vpshufb zlookup, zlookup, zqword_shuf + vpermq zlookup2, zdatas_perm3, datas + vpshufb zlookup2, zlookup2, zqword_shuf + +;; Compute match length + vpshufb zlens1, zlens1, zbswap + vpshufb zlens2, zlens2, zbswap + vplzcntq zlens1, zlens1 + vplzcntq zlens2, zlens2 + vpmovqd zlens1 %+ y, zlens1 + vpmovqd zlens2 %+ y, zlens2 + vinserti32x8 zlens1, zlens2 %+ y, 1 + vpsrld zlens1, zlens1, 3 + +;; Preload for next loops + vmovdqu64 datas, datas_lookup + vmovdqu64 datas_lookup %+ y, [f_i + file_start + 2 * VECT_SIZE] + +;; Zero out matches which should not be taken + kshiftrw k3, k1, 15 + vpermd zlens2, zrot_left, zlens1 + vpermd zdists, zrot_left, zdists + + vmovd zdists_tmp %+ x, prev_len %+ d + vmovd prev_len %+ d, zlens2 %+ x + vmovdqu32 zlens2 {k3}, zdists_tmp + + vmovd zdists_tmp %+ x, prev_dist %+ d + vmovd prev_dist %+ d, zdists %+ x + vmovdqu32 zdists {k3}, zdists_tmp + + vpcmpgtd k3, zlens2, zshortest_matches + vpcmpgtd k4, zlens1, zlens2 + + knotq k3, k3 + korq k3, k3, k4 + knotq k4, k3 + vmovdqu32 zlens1 {k4}{z}, zlens2 + +;; Update zdists to match zlens1 + vpaddd zdists, zdists, zlens1 + vpaddd zdists, zdists, ztwofiftyfour + vpmovzxbd zdists {k3}, [f_i + file_start - VECT_SIZE - 1] + vpaddd zdists {k3}, zdists, znull_dist_syms + +;;Store zdists + vmovdqu64 [matches_next], zdists + add matches_next, ICF_CODE_BYTES * VECT_SIZE + + cmp f_i, f_i_end + jle loop1 + +loop1_end: + lea next_in, [f_i + file_start] + +;; Calculate look back dists + vpaddd zdists, zdists_lookup, zones + vpsubd zdists, zindex, zdists + vpandd zdists, zdists, zdist_mask + vpaddd zdists, zdists, zones + vpsubd zdists, zincrement, zdists + +;;lookup old codes + vextracti32x8 zdists2 %+ y, zdists, 1 + kxnorq k6, k6, k6 + kxnorq k7, k7, k7 + vpgatherdq zlens1 {k6}, [next_in + zdists %+ y] + vpgatherdq zlens2 {k7}, [next_in + zdists2 %+ y] + +;; Calculate dist_icf_code + vpaddd zdists, zdists, zones + vpsubd zdists, zincrement, zdists + vpcmpgtd k5, zdists, zones + vplzcntd zdist_extra, zdists + vpsubd zdist_extra {k5}{z}, zthirty, zdist_extra + vpsllvd zcode, zones, zdist_extra + vpsubd zcode, zcode, zones + vpandd zcode {k5}{z}, zdists, zcode + vpsrlvd zdists, zdists, zdist_extra + vpslld zdist_extra, zdist_extra, 1 + vpaddd zdists, zdists, zdist_extra + vpslld zcode, zcode, EXTRA_BITS_OFFSET - DIST_OFFSET + vpaddd zdists, zdists, zcode + +;; Setup zdists for combining with zlens + vpslld zdists, zdists, DIST_OFFSET + +;; xor current data with lookback dist + vpxorq zlens1, zlens1, zlookup + vpxorq zlens2, zlens2, zlookup2 + +;; Compute match length + vpshufb zlens1, zlens1, zbswap + vpshufb zlens2, zlens2, zbswap + vplzcntq zlens1, zlens1 + vplzcntq zlens2, zlens2 + vpmovqd zlens1 %+ y, zlens1 + vpmovqd zlens2 %+ y, zlens2 + vinserti32x8 zlens1, zlens2 %+ y, 1 + vpsrld zlens1, zlens1, 3 + +;; Zero out matches which should not be taken + kshiftrw k3, k1, 15 + vpermd zlens2, zrot_left, zlens1 + vpermd zdists, zrot_left, zdists + + vmovd zdists_tmp %+ x, prev_len %+ d + vmovd prev_len %+ d, zlens2 %+ x + vmovdqu32 zlens2 {k3}, zdists_tmp + + vmovd zdists_tmp %+ x, prev_dist %+ d + vmovd prev_dist %+ d, zdists %+ x + vmovdqu32 zdists {k3}, zdists_tmp + + vpcmpgtd k3, zlens2, zshortest_matches + vpcmpgtd k4, zlens1, zlens2 + + knotq k3, k3 + korq k3, k3, k4 + knotq k4, k3 + vmovdqu32 zlens1 {k4}{z}, zlens2 + +;; Update zdists to match zlens1 + vpaddd zdists, zdists, zlens1 + vpaddd zdists, zdists, ztwofiftyfour + vpmovzxbd zdists {k3}, [f_i + file_start - 1] + vpaddd zdists {k3}, zdists, znull_dist_syms + +;;Store zdists + vmovdqu64 [matches_next], zdists + +end_main: + FUNC_RESTORE + ret + +section .data +align 64 +datas_perm: + dq 0x0, 0x1, 0x0, 0x1, 0x1, 0x2, 0x1, 0x2 +datas_perm2: + dq 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1 +datas_perm3: + dq 0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x2 +drot_left: + dd 0xf, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6 + dd 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe +datas_shuf: + db 0x0, 0x1, 0x2, 0x3 + db 0x1, 0x2, 0x3, 0x4 + db 0x2, 0x3, 0x4, 0x5 + db 0x3, 0x4, 0x5, 0x6 + db 0x4, 0x5, 0x6, 0x7 + db 0x5, 0x6, 0x7, 0x8 + db 0x6, 0x7, 0x8, 0x9 + db 0x7, 0x8, 0x9, 0xa + db 0x0, 0x1, 0x2, 0x3 + db 0x1, 0x2, 0x3, 0x4 + db 0x2, 0x3, 0x4, 0x5 + db 0x3, 0x4, 0x5, 0x6 + db 0x4, 0x5, 0x6, 0x7 + db 0x5, 0x6, 0x7, 0x8 + db 0x6, 0x7, 0x8, 0x9 + db 0x7, 0x8, 0x9, 0xa +bswap_shuf: + db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 + db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 + db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 + db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 + db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 + db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 + db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 + db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 + +qword_shuf: + db 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 + db 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8 + db 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9 + db 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa + db 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb + db 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc + db 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd + db 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe + db 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf + +%define PROD1 0xE84B +%define PROD2 0x97B1 + +hash_prod: + dw PROD1, PROD2, PROD1, PROD2, PROD1, PROD2, PROD1, PROD2 + dw PROD1, PROD2, PROD1, PROD2, PROD1, PROD2, PROD1, PROD2 + dw PROD1, PROD2, PROD1, PROD2, PROD1, PROD2, PROD1, PROD2 + dw PROD1, PROD2, PROD1, PROD2, PROD1, PROD2, PROD1, PROD2 +null_dist_syms: + dd LIT, LIT, LIT, LIT, LIT, LIT, LIT, LIT + dd LIT, LIT, LIT, LIT, LIT, LIT, LIT, LIT +increment: + dd 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 + dd 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf +ones: + dd 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 + dd 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1 +thirty: + dd 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e + dd 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e +twofiftyfour: + dd 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe + dd 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe +dist_mask: + dd D-1, D-1, D-1, D-1, D-1, D-1, D-1, D-1 + dd D-1, D-1, D-1, D-1, D-1, D-1, D-1, D-1 +hash_mask: + dd HASH_MAP_HASH_MASK, HASH_MAP_HASH_MASK, HASH_MAP_HASH_MASK, HASH_MAP_HASH_MASK + dd HASH_MAP_HASH_MASK, HASH_MAP_HASH_MASK, HASH_MAP_HASH_MASK, HASH_MAP_HASH_MASK + dd HASH_MAP_HASH_MASK, HASH_MAP_HASH_MASK, HASH_MAP_HASH_MASK, HASH_MAP_HASH_MASK + dd HASH_MAP_HASH_MASK, HASH_MAP_HASH_MASK, HASH_MAP_HASH_MASK, HASH_MAP_HASH_MASK +lit_len_mask: + dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK + dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK + dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK + dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK +shortest_matches: + dd MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH + dd MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH + dd MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH + dd MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH, MIN_DEF_MATCH + +k_mask_1: + dq 0xaaaaaaaaaaaaaaaa +k_mask_2: + dq 0x7fff +%endif diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_icf_base.c b/src/main/native/compression/isa-l-master/igzip/igzip_icf_base.c index be090f85..3c3954d7 100644 --- a/src/main/native/compression/isa-l-master/igzip/igzip_icf_base.c +++ b/src/main/native/compression/isa-l-master/igzip/igzip_icf_base.c @@ -18,15 +18,116 @@ static inline void update_state(struct isal_zstream *stream, uint8_t * start_in, struct deflate_icf *start_out, struct deflate_icf *next_out, struct deflate_icf *end_out) { + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + + if (next_in - start_in > 0) + stream->internal_state.has_hist = IGZIP_HIST; + stream->next_in = next_in; stream->total_in += next_in - start_in; + stream->internal_state.block_end = stream->total_in; stream->avail_in = end_in - next_in; - ((struct level_2_buf *)stream->level_buf)->icf_buf_next = next_out; - ((struct level_2_buf *)stream->level_buf)->icf_buf_avail_out = end_out - next_out; + level_buf->icf_buf_next = next_out; + level_buf->icf_buf_avail_out = end_out - next_out; +} + +void isal_deflate_icf_body_hash8k_base(struct isal_zstream *stream) +{ + uint32_t literal, hash; + uint8_t *start_in, *next_in, *end_in, *end, *next_hash; + struct deflate_icf *start_out, *next_out, *end_out; + uint16_t match_length; + uint32_t dist; + uint32_t code, code2, extra_bits; + struct isal_zstate *state = &stream->internal_state; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + uint16_t *last_seen = level_buf->hash8k.hash_table; + uint8_t *file_start = stream->next_in - stream->total_in; + + if (stream->avail_in == 0) { + if (stream->end_of_stream || stream->flush != NO_FLUSH) + state->state = ZSTATE_FLUSH_READ_BUFFER; + return; + } + + start_in = stream->next_in; + end_in = start_in + stream->avail_in; + next_in = start_in; + + start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next; + end_out = + start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out / + sizeof(struct deflate_icf); + next_out = start_out; + + while (next_in + ISAL_LOOK_AHEAD < end_in) { + + if (next_out >= end_out) { + state->state = ZSTATE_CREATE_HDR; + update_state(stream, start_in, next_in, end_in, start_out, next_out, + end_out); + return; + } + + literal = *(uint32_t *) next_in; + hash = compute_hash(literal) & HASH8K_HASH_MASK; + dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; + last_seen[hash] = (uint64_t) (next_in - file_start); + + /* The -1 are to handle the case when dist = 0 */ + if (dist - 1 < IGZIP_HIST_SIZE - 1) { + assert(dist != 0); + + match_length = compare258(next_in - dist, next_in, 258); + + if (match_length >= SHORTEST_MATCH) { + next_hash = next_in; +#ifdef ISAL_LIMIT_HASH_UPDATE + end = next_hash + 3; +#else + end = next_hash + match_length; +#endif + next_hash++; + + for (; next_hash < end; next_hash++) { + literal = *(uint32_t *) next_hash; + hash = compute_hash(literal) & HASH8K_HASH_MASK; + last_seen[hash] = (uint64_t) (next_hash - file_start); + } + + get_len_icf_code(match_length, &code); + get_dist_icf_code(dist, &code2, &extra_bits); + + level_buf->hist.ll_hist[code]++; + level_buf->hist.d_hist[code2]++; + + write_deflate_icf(next_out, code, code2, extra_bits); + next_out++; + next_in += match_length; + + continue; + } + } + + get_lit_icf_code(literal & 0xFF, &code); + level_buf->hist.ll_hist[code]++; + write_deflate_icf(next_out, code, NULL_DIST_SYM, 0); + next_out++; + next_in++; + } + + update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out); + + assert(stream->avail_in <= ISAL_LOOK_AHEAD); + if (stream->end_of_stream || stream->flush != NO_FLUSH) + state->state = ZSTATE_FLUSH_READ_BUFFER; + + return; + } -void isal_deflate_icf_body_base(struct isal_zstream *stream) +void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream) { uint32_t literal, hash; uint8_t *start_in, *next_in, *end_in, *end, *next_hash; @@ -35,7 +136,9 @@ void isal_deflate_icf_body_base(struct isal_zstream *stream) uint32_t dist; uint32_t code, code2, extra_bits; struct isal_zstate *state = &stream->internal_state; - uint16_t *last_seen = state->head; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + uint16_t *last_seen = level_buf->hash_hist.hash_table; + uint8_t *file_start = stream->next_in - stream->total_in; if (stream->avail_in == 0) { if (stream->end_of_stream || stream->flush != NO_FLUSH) @@ -47,9 +150,9 @@ void isal_deflate_icf_body_base(struct isal_zstream *stream) end_in = start_in + stream->avail_in; next_in = start_in; - start_out = ((struct level_2_buf *)stream->level_buf)->icf_buf_next; + start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next; end_out = - start_out + ((struct level_2_buf *)stream->level_buf)->icf_buf_avail_out / + start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out / sizeof(struct deflate_icf); next_out = start_out; @@ -63,9 +166,9 @@ void isal_deflate_icf_body_base(struct isal_zstream *stream) } literal = *(uint32_t *) next_in; - hash = compute_hash(literal) & HASH_MASK; - dist = (next_in - state->file_start - last_seen[hash]) & 0xFFFF; - last_seen[hash] = (uint64_t) (next_in - state->file_start); + hash = compute_hash(literal) & HASH_HIST_HASH_MASK; + dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; + last_seen[hash] = (uint64_t) (next_in - file_start); /* The -1 are to handle the case when dist = 0 */ if (dist - 1 < IGZIP_HIST_SIZE - 1) { @@ -84,16 +187,15 @@ void isal_deflate_icf_body_base(struct isal_zstream *stream) for (; next_hash < end; next_hash++) { literal = *(uint32_t *) next_hash; - hash = compute_hash(literal) & HASH_MASK; - last_seen[hash] = - (uint64_t) (next_hash - state->file_start); + hash = compute_hash(literal) & HASH_HIST_HASH_MASK; + last_seen[hash] = (uint64_t) (next_hash - file_start); } get_len_icf_code(match_length, &code); get_dist_icf_code(dist, &code2, &extra_bits); - state->hist.ll_hist[code]++; - state->hist.d_hist[code2]++; + level_buf->hist.ll_hist[code]++; + level_buf->hist.d_hist[code2]++; write_deflate_icf(next_out, code, code2, extra_bits); next_out++; @@ -104,7 +206,7 @@ void isal_deflate_icf_body_base(struct isal_zstream *stream) } get_lit_icf_code(literal & 0xFF, &code); - state->hist.ll_hist[code]++; + level_buf->hist.ll_hist[code]++; write_deflate_icf(next_out, code, NULL_DIST_SYM, 0); next_out++; next_in++; @@ -120,7 +222,7 @@ void isal_deflate_icf_body_base(struct isal_zstream *stream) } -void isal_deflate_icf_finish_base(struct isal_zstream *stream) +void isal_deflate_icf_finish_hash8k_base(struct isal_zstream *stream) { uint32_t literal = 0, hash; uint8_t *start_in, *next_in, *end_in, *end, *next_hash; @@ -129,17 +231,25 @@ void isal_deflate_icf_finish_base(struct isal_zstream *stream) uint32_t dist; uint32_t code, code2, extra_bits; struct isal_zstate *state = &stream->internal_state; - uint16_t *last_seen = state->head; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + uint16_t *last_seen = level_buf->hash8k.hash_table; + uint8_t *file_start = stream->next_in - stream->total_in; start_in = stream->next_in; end_in = start_in + stream->avail_in; next_in = start_in; - start_out = ((struct level_2_buf *)stream->level_buf)->icf_buf_next; - end_out = start_out + ((struct level_2_buf *)stream->level_buf)->icf_buf_avail_out / + start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next; + end_out = start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out / sizeof(struct deflate_icf); next_out = start_out; + if (stream->avail_in == 0) { + if (stream->end_of_stream || stream->flush != NO_FLUSH) + state->state = ZSTATE_CREATE_HDR; + return; + } + while (next_in + 3 < end_in) { if (next_out >= end_out) { state->state = ZSTATE_CREATE_HDR; @@ -149,9 +259,9 @@ void isal_deflate_icf_finish_base(struct isal_zstream *stream) } literal = *(uint32_t *) next_in; - hash = compute_hash(literal) & HASH_MASK; - dist = (next_in - state->file_start - last_seen[hash]) & 0xFFFF; - last_seen[hash] = (uint64_t) (next_in - state->file_start); + hash = compute_hash(literal) & HASH8K_HASH_MASK; + dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; + last_seen[hash] = (uint64_t) (next_in - file_start); if (dist - 1 < IGZIP_HIST_SIZE - 1) { /* The -1 are to handle the case when dist = 0 */ match_length = compare258(next_in - dist, next_in, end_in - next_in); @@ -167,16 +277,15 @@ void isal_deflate_icf_finish_base(struct isal_zstream *stream) for (; next_hash < end - 3; next_hash++) { literal = *(uint32_t *) next_hash; - hash = compute_hash(literal) & HASH_MASK; - last_seen[hash] = - (uint64_t) (next_hash - state->file_start); + hash = compute_hash(literal) & HASH8K_HASH_MASK; + last_seen[hash] = (uint64_t) (next_hash - file_start); } get_len_icf_code(match_length, &code); get_dist_icf_code(dist, &code2, &extra_bits); - state->hist.ll_hist[code]++; - state->hist.d_hist[code2]++; + level_buf->hist.ll_hist[code]++; + level_buf->hist.d_hist[code2]++; write_deflate_icf(next_out, code, code2, extra_bits); @@ -188,7 +297,7 @@ void isal_deflate_icf_finish_base(struct isal_zstream *stream) } get_lit_icf_code(literal & 0xFF, &code); - state->hist.ll_hist[code]++; + level_buf->hist.ll_hist[code]++; write_deflate_icf(next_out, code, NULL_DIST_SYM, 0); next_out++; next_in++; @@ -205,7 +314,7 @@ void isal_deflate_icf_finish_base(struct isal_zstream *stream) literal = *next_in; get_lit_icf_code(literal & 0xFF, &code); - state->hist.ll_hist[code]++; + level_buf->hist.ll_hist[code]++; write_deflate_icf(next_out, code, NULL_DIST_SYM, 0); next_out++; next_in++; @@ -221,3 +330,238 @@ void isal_deflate_icf_finish_base(struct isal_zstream *stream) return; } + +void isal_deflate_icf_finish_hash_hist_base(struct isal_zstream *stream) +{ + uint32_t literal = 0, hash; + uint8_t *start_in, *next_in, *end_in, *end, *next_hash; + struct deflate_icf *start_out, *next_out, *end_out; + uint16_t match_length; + uint32_t dist; + uint32_t code, code2, extra_bits; + struct isal_zstate *state = &stream->internal_state; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + uint16_t *last_seen = level_buf->hash_hist.hash_table; + uint8_t *file_start = stream->next_in - stream->total_in; + + start_in = stream->next_in; + end_in = start_in + stream->avail_in; + next_in = start_in; + + start_out = ((struct level_buf *)stream->level_buf)->icf_buf_next; + end_out = start_out + ((struct level_buf *)stream->level_buf)->icf_buf_avail_out / + sizeof(struct deflate_icf); + next_out = start_out; + + if (stream->avail_in == 0) { + if (stream->end_of_stream || stream->flush != NO_FLUSH) + state->state = ZSTATE_CREATE_HDR; + return; + } + + while (next_in + 3 < end_in) { + if (next_out >= end_out) { + state->state = ZSTATE_CREATE_HDR; + update_state(stream, start_in, next_in, end_in, start_out, next_out, + end_out); + return; + } + + literal = *(uint32_t *) next_in; + hash = compute_hash(literal) & HASH_HIST_HASH_MASK; + dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; + last_seen[hash] = (uint64_t) (next_in - file_start); + + if (dist - 1 < IGZIP_HIST_SIZE - 1) { /* The -1 are to handle the case when dist = 0 */ + match_length = compare258(next_in - dist, next_in, end_in - next_in); + + if (match_length >= SHORTEST_MATCH) { + next_hash = next_in; +#ifdef ISAL_LIMIT_HASH_UPDATE + end = next_hash + 3; +#else + end = next_hash + match_length; +#endif + next_hash++; + + for (; next_hash < end - 3; next_hash++) { + literal = *(uint32_t *) next_hash; + hash = compute_hash(literal) & HASH_HIST_HASH_MASK; + last_seen[hash] = (uint64_t) (next_hash - file_start); + } + + get_len_icf_code(match_length, &code); + get_dist_icf_code(dist, &code2, &extra_bits); + + level_buf->hist.ll_hist[code]++; + level_buf->hist.d_hist[code2]++; + + write_deflate_icf(next_out, code, code2, extra_bits); + + next_out++; + next_in += match_length; + + continue; + } + } + + get_lit_icf_code(literal & 0xFF, &code); + level_buf->hist.ll_hist[code]++; + write_deflate_icf(next_out, code, NULL_DIST_SYM, 0); + next_out++; + next_in++; + + } + + while (next_in < end_in) { + if (next_out >= end_out) { + state->state = ZSTATE_CREATE_HDR; + update_state(stream, start_in, next_in, end_in, start_out, next_out, + end_out); + return; + } + + literal = *next_in; + get_lit_icf_code(literal & 0xFF, &code); + level_buf->hist.ll_hist[code]++; + write_deflate_icf(next_out, code, NULL_DIST_SYM, 0); + next_out++; + next_in++; + + } + + if (next_in == end_in) { + if (stream->end_of_stream || stream->flush != NO_FLUSH) + state->state = ZSTATE_CREATE_HDR; + } + + update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out); + + return; +} + +void isal_deflate_icf_finish_hash_map_base(struct isal_zstream *stream) +{ + uint32_t literal = 0, hash; + uint8_t *start_in, *next_in, *end_in, *end, *next_hash; + struct deflate_icf *start_out, *next_out, *end_out; + uint16_t match_length; + uint32_t dist; + uint32_t code, code2, extra_bits; + struct isal_zstate *state = &stream->internal_state; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + uint16_t *last_seen = level_buf->hash_map.hash_table; + uint8_t *file_start = stream->next_in - stream->total_in; + + start_in = stream->next_in; + end_in = start_in + stream->avail_in; + next_in = start_in; + + start_out = level_buf->icf_buf_next; + end_out = start_out + level_buf->icf_buf_avail_out / sizeof(struct deflate_icf); + next_out = start_out; + + if (stream->avail_in == 0) { + if (stream->end_of_stream || stream->flush != NO_FLUSH) + state->state = ZSTATE_CREATE_HDR; + return; + } + + while (next_in + 3 < end_in) { + if (next_out >= end_out) { + state->state = ZSTATE_CREATE_HDR; + update_state(stream, start_in, next_in, end_in, start_out, next_out, + end_out); + return; + } + + literal = *(uint32_t *) next_in; + hash = compute_hash_mad(literal) & HASH_MAP_HASH_MASK; + dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; + last_seen[hash] = (uint64_t) (next_in - file_start); + + if (dist - 1 < IGZIP_HIST_SIZE - 1) { /* The -1 are to handle the case when dist = 0 */ + match_length = compare258(next_in - dist, next_in, end_in - next_in); + + if (match_length >= SHORTEST_MATCH) { + next_hash = next_in; +#ifdef ISAL_LIMIT_HASH_UPDATE + end = next_hash + 3; +#else + end = next_hash + match_length; +#endif + next_hash++; + + for (; next_hash < end - 3; next_hash++) { + literal = *(uint32_t *) next_hash; + hash = compute_hash_mad(literal) & HASH_MAP_HASH_MASK; + last_seen[hash] = (uint64_t) (next_hash - file_start); + } + + get_len_icf_code(match_length, &code); + get_dist_icf_code(dist, &code2, &extra_bits); + + level_buf->hist.ll_hist[code]++; + level_buf->hist.d_hist[code2]++; + + write_deflate_icf(next_out, code, code2, extra_bits); + + next_out++; + next_in += match_length; + + continue; + } + } + + get_lit_icf_code(literal & 0xFF, &code); + level_buf->hist.ll_hist[code]++; + write_deflate_icf(next_out, code, NULL_DIST_SYM, 0); + next_out++; + next_in++; + + } + + while (next_in < end_in) { + if (next_out >= end_out) { + state->state = ZSTATE_CREATE_HDR; + update_state(stream, start_in, next_in, end_in, start_out, next_out, + end_out); + return; + } + + literal = *next_in; + get_lit_icf_code(literal & 0xFF, &code); + level_buf->hist.ll_hist[code]++; + write_deflate_icf(next_out, code, NULL_DIST_SYM, 0); + next_out++; + next_in++; + + } + + if (next_in == end_in) { + if (stream->end_of_stream || stream->flush != NO_FLUSH) + state->state = ZSTATE_CREATE_HDR; + } + + update_state(stream, start_in, next_in, end_in, start_out, next_out, end_out); + + return; +} + +void isal_deflate_hash_mad_base(uint16_t * hash_table, uint32_t hash_mask, + uint32_t current_index, uint8_t * dict, uint32_t dict_len) +{ + uint8_t *next_in = dict; + uint8_t *end_in = dict + dict_len - SHORTEST_MATCH; + uint32_t literal; + uint32_t hash; + uint16_t index = current_index - dict_len; + + while (next_in <= end_in) { + literal = *(uint32_t *) next_in; + hash = compute_hash_mad(literal) & hash_mask; + hash_table[hash] = index; + index++; + next_in++; + } +} diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_icf_body.c b/src/main/native/compression/isa-l-master/igzip/igzip_icf_body.c new file mode 100644 index 00000000..3e6f5a73 --- /dev/null +++ b/src/main/native/compression/isa-l-master/igzip/igzip_icf_body.c @@ -0,0 +1,349 @@ +#include "igzip_lib.h" +#include "huffman.h" +#include "encode_df.h" +#include "igzip_level_buf_structs.h" + +extern void gen_icf_map_lh1(struct isal_zstream *, struct deflate_icf *, uint32_t); +extern void set_long_icf_fg(uint8_t *, uint8_t *, struct deflate_icf *, struct level_buf *); +extern void isal_deflate_icf_body_lvl1(struct isal_zstream *); +extern void isal_deflate_icf_body_lvl2(struct isal_zstream *); +/* +************************************************************* + * Helper functions + ************************************************************ +*/ +static inline void write_deflate_icf(struct deflate_icf *icf, uint32_t lit_len, + uint32_t lit_dist, uint32_t extra_bits) +{ + /* icf->lit_len = lit_len; */ + /* icf->lit_dist = lit_dist; */ + /* icf->dist_extra = extra_bits; */ + + *(uint32_t *) icf = lit_len | (lit_dist << LIT_LEN_BIT_COUNT) + | (extra_bits << (LIT_LEN_BIT_COUNT + DIST_LIT_BIT_COUNT)); +} + +void hash_section(struct isal_zstream *stream, uint8_t * next_in, uint8_t * end_in, + uint16_t * last_seen) +{ + uint32_t index, hash_input, hash; + uint8_t *file_start = stream->next_in - stream->total_in; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + uint16_t *hash_table = level_buf->hash_map.hash_table; + + /* Compute Hashes */ + for (index = 0; index < end_in - next_in - ISAL_LOOK_AHEAD; index++) { + hash_input = *(uint32_t *) (next_in + index); + hash = compute_hash(hash_input) & HASH_MAP_HASH_MASK; + last_seen[index] = hash_table[hash]; + hash_table[hash] = (uint64_t) (next_in + index - file_start); + } + + return; +} + +void set_long_icf_fg_base(uint8_t * next_in, uint8_t * end_in, + struct deflate_icf *match_lookup, struct level_buf *level_buf) +{ + uint32_t dist_code, dist_extra, dist, len; + uint32_t match_len; + uint32_t dist_start[] = { + 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d, + 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1, + 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01, + 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000 + }; + + while (next_in < end_in - ISAL_LOOK_AHEAD) { + dist_code = match_lookup->lit_dist; + dist_extra = match_lookup->dist_extra; + dist = dist_start[dist_code] + dist_extra; + len = match_lookup->lit_len; + if (len >= 8 + LEN_OFFSET) { + match_len = + compare258(next_in - dist + 8, next_in + 8, 250) + LEN_OFFSET + 8; + + while (match_len > match_lookup->lit_len + && match_len >= LEN_OFFSET + SHORTEST_MATCH) { + write_deflate_icf(match_lookup, match_len, dist_code, + dist_extra); + match_lookup++; + next_in++; + match_len--; + } + } + + match_lookup++; + next_in++; + } +} + +/* +************************************************************* + * Methods for generating one pass match lookup table + ************************************************************ +*/ +void gen_icf_map_h1_base(struct isal_zstream *stream, + struct deflate_icf *matches_icf_lookup, uint64_t input_size) +{ + + uint32_t dist, len, extra_bits; + uint8_t *next_in = stream->next_in, *end_in = stream->next_in + input_size; + uint8_t *file_start = stream->next_in - stream->total_in; + uint32_t hash; + uint64_t next_bytes, match_bytes; + uint64_t match; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + uint16_t *hash_table = level_buf->hash_map.hash_table; + + if (input_size < ISAL_LOOK_AHEAD) + return; + + matches_icf_lookup->lit_len = *next_in; + matches_icf_lookup->lit_dist = 0x1e; + matches_icf_lookup->dist_extra = 0; + + hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK; + hash_table[hash] = (uint64_t) (next_in - file_start); + + next_in++; + matches_icf_lookup++; + + while (next_in < end_in - ISAL_LOOK_AHEAD) { + hash = compute_hash(*(uint32_t *) next_in) & HASH_MAP_HASH_MASK; + dist = (next_in - file_start - hash_table[hash]); + dist = ((dist - 1) & (IGZIP_HIST_SIZE - 1)) + 1; + hash_table[hash] = (uint64_t) (next_in - file_start); + + match_bytes = *(uint64_t *) (next_in - dist); + next_bytes = *(uint64_t *) next_in; + match = next_bytes ^ match_bytes; + + len = tzcnt(match); + + if (len >= SHORTEST_MATCH) { + len += LEN_OFFSET; + get_dist_icf_code(dist, &dist, &extra_bits); + write_deflate_icf(matches_icf_lookup, len, dist, extra_bits); + } else { + write_deflate_icf(matches_icf_lookup, *next_in, 0x1e, 0); + } + + next_in++; + matches_icf_lookup++; + } +} + +/* +************************************************************* + * One pass methods for parsing provided match lookup table + ************************************************************ +*/ +struct deflate_icf *compress_icf_map_g(struct isal_zstream *stream, + struct deflate_icf *matches_next, + struct deflate_icf *matches_end) +{ + uint32_t lit_len, lit_len2, dist; + uint64_t code; + struct isal_zstate *state = &stream->internal_state; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + struct deflate_icf *matches_start = matches_next; + struct deflate_icf *icf_buf_end = + level_buf->icf_buf_next + + level_buf->icf_buf_avail_out / sizeof(struct deflate_icf); + + while (matches_next < matches_end - 1 && level_buf->icf_buf_next < icf_buf_end - 1) { + code = *(uint64_t *) matches_next; + lit_len = code & LIT_LEN_MASK; + lit_len2 = (code >> ICF_CODE_LEN) & LIT_LEN_MASK; + level_buf->hist.ll_hist[lit_len]++; + + if (lit_len >= LEN_START) { + *(uint32_t *) level_buf->icf_buf_next = code; + level_buf->icf_buf_next++; + + dist = (code >> ICF_DIST_OFFSET) & DIST_LIT_MASK; + level_buf->hist.d_hist[dist]++; + lit_len -= LEN_OFFSET; + matches_next += lit_len; + + } else if (lit_len2 >= LEN_START) { + *(uint64_t *) level_buf->icf_buf_next = code; + level_buf->icf_buf_next += 2; + + level_buf->hist.ll_hist[lit_len2]++; + + dist = (code >> (ICF_CODE_LEN + ICF_DIST_OFFSET)) & DIST_LIT_MASK; + level_buf->hist.d_hist[dist]++; + lit_len2 -= LEN_OFFSET - 1; + matches_next += lit_len2; + + } else { + code = ((lit_len2 + LIT_START) << ICF_DIST_OFFSET) | lit_len; + *(uint32_t *) level_buf->icf_buf_next = code; + level_buf->icf_buf_next++; + + level_buf->hist.ll_hist[lit_len2]++; + + matches_next += 2; + } + } + + while (matches_next < matches_end && level_buf->icf_buf_next < icf_buf_end) { + code = *(uint32_t *) matches_next; + lit_len = code & LIT_LEN_MASK; + *(uint32_t *) level_buf->icf_buf_next = code; + level_buf->icf_buf_next++; + + level_buf->hist.ll_hist[lit_len]++; + if (lit_len >= LEN_START) { + dist = (code >> 10) & 0x1ff; + level_buf->hist.d_hist[dist]++; + lit_len -= LEN_OFFSET; + matches_next += lit_len; + } else { + matches_next++; + } + } + + level_buf->icf_buf_avail_out = + (icf_buf_end - level_buf->icf_buf_next) * sizeof(struct deflate_icf); + + state->block_end += matches_next - matches_start; + if (matches_next > matches_end && matches_start < matches_end) { + stream->next_in += matches_next - matches_end; + stream->avail_in -= matches_next - matches_end; + stream->total_in += matches_next - matches_end; + } + + return matches_next; + +} + +/* +************************************************************* + * Compression functions combining different methods + ************************************************************ +*/ +static inline void icf_body_next_state(struct isal_zstream *stream) +{ + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + struct isal_zstate *state = &stream->internal_state; + + if (level_buf->icf_buf_avail_out <= 0) + state->state = ZSTATE_CREATE_HDR; + + else if (stream->avail_in <= ISAL_LOOK_AHEAD + && (stream->end_of_stream || stream->flush != NO_FLUSH)) + state->state = ZSTATE_FLUSH_READ_BUFFER; +} + +void icf_body_hash1_fillgreedy_lazy(struct isal_zstream *stream) +{ + struct deflate_icf *matches_icf, *matches_next_icf, *matches_end_icf; + struct deflate_icf *matches_icf_lookup; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + uint32_t input_size; + + matches_icf = level_buf->hash_map.matches; + matches_icf_lookup = matches_icf; + matches_next_icf = level_buf->hash_map.matches_next; + matches_end_icf = level_buf->hash_map.matches_end; + + matches_next_icf = compress_icf_map_g(stream, matches_next_icf, matches_end_icf); + + while (matches_next_icf >= matches_end_icf) { + input_size = MATCH_BUF_SIZE; + input_size = (input_size > stream->avail_in) ? stream->avail_in : input_size; + + if (input_size <= ISAL_LOOK_AHEAD) + break; + + gen_icf_map_h1_base(stream, matches_icf_lookup, input_size); + + set_long_icf_fg(stream->next_in, stream->next_in + input_size, + matches_icf_lookup, level_buf); + + stream->next_in += input_size - ISAL_LOOK_AHEAD; + stream->avail_in -= input_size - ISAL_LOOK_AHEAD; + stream->total_in += input_size - ISAL_LOOK_AHEAD; + + matches_end_icf = matches_icf + input_size - ISAL_LOOK_AHEAD; + matches_next_icf = compress_icf_map_g(stream, matches_icf, matches_end_icf); + } + + level_buf->hash_map.matches_next = matches_next_icf; + level_buf->hash_map.matches_end = matches_end_icf; + + icf_body_next_state(stream); +} + +void icf_body_lazyhash1_fillgreedy_greedy(struct isal_zstream *stream) +{ + struct deflate_icf *matches_icf, *matches_next_icf, *matches_end_icf; + struct deflate_icf *matches_icf_lookup; + struct level_buf *level_buf = (struct level_buf *)stream->level_buf; + uint32_t input_size; + + matches_icf = level_buf->hash_map.matches; + matches_icf_lookup = matches_icf; + matches_next_icf = level_buf->hash_map.matches_next; + matches_end_icf = level_buf->hash_map.matches_end; + + matches_next_icf = compress_icf_map_g(stream, matches_next_icf, matches_end_icf); + + while (matches_next_icf >= matches_end_icf) { + input_size = MATCH_BUF_SIZE; + input_size = (input_size > stream->avail_in) ? stream->avail_in : input_size; + + if (input_size <= ISAL_LOOK_AHEAD) + break; + + gen_icf_map_lh1(stream, matches_icf_lookup, input_size); + + set_long_icf_fg(stream->next_in, stream->next_in + input_size, + matches_icf_lookup, level_buf); + + stream->next_in += input_size - ISAL_LOOK_AHEAD; + stream->avail_in -= input_size - ISAL_LOOK_AHEAD; + stream->total_in += input_size - ISAL_LOOK_AHEAD; + + matches_end_icf = matches_icf + input_size - ISAL_LOOK_AHEAD; + matches_next_icf = compress_icf_map_g(stream, matches_icf, matches_end_icf); + } + + level_buf->hash_map.matches_next = matches_next_icf; + level_buf->hash_map.matches_end = matches_end_icf; + + icf_body_next_state(stream); +} + +void isal_deflate_icf_body_base(struct isal_zstream *stream) +{ + switch (stream->level) { + case 3: + icf_body_hash1_fillgreedy_lazy(stream); + break; + case 2: + isal_deflate_icf_body_lvl2(stream); + break; + case 1: + default: + isal_deflate_icf_body_lvl1(stream); + } +} + +void isal_deflate_icf_body_06(struct isal_zstream *stream) +{ + switch (stream->level) { + case 3: + icf_body_lazyhash1_fillgreedy_greedy(stream); + break; + case 2: + isal_deflate_icf_body_lvl2(stream); + break; + case 1: + default: + isal_deflate_icf_body_lvl1(stream); + } +} diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_icf_body_01.asm b/src/main/native/compression/isa-l-master/igzip/igzip_icf_body_01.asm deleted file mode 100644 index 477919a7..00000000 --- a/src/main/native/compression/isa-l-master/igzip/igzip_icf_body_01.asm +++ /dev/null @@ -1,7 +0,0 @@ -%define ARCH 01 - -%ifndef COMPARE_TYPE -%define COMPARE_TYPE 2 -%endif - -%include "igzip_icf_body.asm" diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_icf_body_02.asm b/src/main/native/compression/isa-l-master/igzip/igzip_icf_body_02.asm deleted file mode 100644 index 5f8ab951..00000000 --- a/src/main/native/compression/isa-l-master/igzip/igzip_icf_body_02.asm +++ /dev/null @@ -1,7 +0,0 @@ -%define ARCH 02 - -%ifndef COMPARE_TYPE -%define COMPARE_TYPE 2 -%endif - -%include "igzip_icf_body.asm" diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_icf_body_04.asm b/src/main/native/compression/isa-l-master/igzip/igzip_icf_body_04.asm deleted file mode 100644 index dda99d92..00000000 --- a/src/main/native/compression/isa-l-master/igzip/igzip_icf_body_04.asm +++ /dev/null @@ -1,8 +0,0 @@ -%define ARCH 04 -%define USE_HSWNI - -%ifndef COMPARE_TYPE -%define COMPARE_TYPE 3 -%endif - -%include "igzip_icf_body.asm" diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_icf_body.asm b/src/main/native/compression/isa-l-master/igzip/igzip_icf_body_h1_gr_bt.asm similarity index 72% rename from src/main/native/compression/isa-l-master/igzip/igzip_icf_body.asm rename to src/main/native/compression/isa-l-master/igzip/igzip_icf_body_h1_gr_bt.asm index abfc8143..903e1b37 100644 --- a/src/main/native/compression/isa-l-master/igzip/igzip_icf_body.asm +++ b/src/main/native/compression/isa-l-master/igzip/igzip_icf_body_h1_gr_bt.asm @@ -52,7 +52,7 @@ global %1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; %define file_start rdi %define file_length r15 -%define stream r14 +%define level_buf r14 %define f_i r10 %define m_out_buf r11 @@ -68,7 +68,6 @@ global %1 %define dist_code r12 %define tmp1 rsi - %define lit_code rsi %define curr_data2 r8 @@ -79,6 +78,7 @@ global %1 %define len_code rdx %define hash3 rdx +%define stream r13 %define tmp3 r13 %define hash rbp @@ -93,6 +93,9 @@ global %1 %define ytmp0 ymm0 ; tmp %define ytmp1 ymm1 ; tmp +%define hash_table level_buf + _hash8k_hash_table +%define lit_len_hist level_buf + _hist_lit_len +%define dist_hist level_buf + _hist_dist ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -101,35 +104,57 @@ global %1 m_out_end equ 0 ; local variable (8 bytes) m_out_start equ 8 f_end_i_mem_offset equ 16 -gpr_save_mem_offset equ 24 ; gpr save area (8*8 bytes) -xmm_save_mem_offset equ 24 + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned) -stack_size equ 3*8 + 8*8 + 4*16 +stream_offset equ 24 +gpr_save_mem_offset equ 32 ; gpr save area (8*8 bytes) +xmm_save_mem_offset equ gpr_save_mem_offset + 8*8 ; xmm save area (4*16 bytes) (16 byte aligned) +stack_size equ 5*8 + 8*8 + 4*16 ;;; 8 because stack address is odd multiple of 8 after a function call and ;;; we want it aligned to 16 bytes -; void isal_deflate_icf_body ( isal_zstream *stream ) +;; Defines to generate functions for different architecture +%xdefine ARCH 01 +%xdefine ARCH1 02 +%xdefine ARCH2 04 + +%ifndef COMPARE_TYPE +%xdefine COMPARE_TYPE_NOT_DEF +%xdefine COMPARE_TYPE 1 +%xdefine COMPARE_TYPE1 2 +%xdefine COMPARE_TYPE2 3 +%endif + +%rep 3 +;; Defines to generate functions for different levels +%xdefine HASH_MASK HASH8K_HASH_MASK +%xdefine HASH_MASK1 HASH_HIST_HASH_MASK +%xdefine METHOD hash8k +%xdefine METHOD1 hash_hist + +%rep 2 +; void isal_deflate_icf_body ( isal_zstream *stream ) +; we make 6 different versions of this function ; arg 1: rcx: addr of stream -global isal_deflate_icf_body_ %+ ARCH -isal_deflate_icf_body_ %+ ARCH %+ : +global isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH +isal_deflate_icf_body_ %+ METHOD %+ _ %+ ARCH %+ : %ifidn __OUTPUT_FORMAT__, elf64 mov rcx, rdi %endif ;; do nothing if (avail_in == 0) cmp dword [rcx + _avail_in], 0 - jne skip1 + jne .skip1 ;; Set stream's next state mov rdx, ZSTATE_FLUSH_READ_BUFFER mov rax, ZSTATE_CREATE_HDR - cmp dword [rcx + _end_of_stream], 0 + cmp word [rcx + _end_of_stream], 0 cmovne rax, rdx - cmp dword [rcx + _flush], _NO_FLUSH + cmp word [rcx + _flush], _NO_FLUSH cmovne rax, rdx mov dword [rcx + _internal_state_state], eax ret -skip1: +.skip1: %ifdef ALIGN_STACK push rbp @@ -150,14 +175,16 @@ skip1: mov [rsp + gpr_save_mem_offset + 7*8], r15 mov stream, rcx - mov dword [stream + _internal_state_has_eob], 0 + mov [rsp + stream_offset], stream + + mov byte [stream + _internal_state_has_eob], 0 ; state->bitbuf.set_buf(stream->next_out, stream->avail_out); - mov tmp1, [stream + _level_buf] - mov m_out_buf, [tmp1 + _icf_buf_next] + mov level_buf, [stream + _level_buf] + mov m_out_buf, [level_buf + _icf_buf_next] mov [rsp + m_out_start], m_out_buf - mov tmp1, [tmp1 + _icf_buf_avail_out] + mov tmp1, [level_buf + _icf_buf_avail_out] add tmp1, m_out_buf sub tmp1, SLOP @@ -176,33 +203,32 @@ skip1: ; if (file_length <= 0) continue; cmp file_length, f_i - jle input_end + jle .input_end ; for (f_i = f_start_i; f_i < file_length; f_i++) { -MARK __body_compute_hash_ %+ ARCH MOVDQU xdata, [file_start + f_i] mov curr_data, [file_start + f_i] - mov tmp3, curr_data - mov tmp4, curr_data + mov tmp1, curr_data + mov tmp2, curr_data compute_hash hash, curr_data - shr tmp3, 8 - compute_hash hash2, tmp3 + shr tmp1, 8 + compute_hash hash2, tmp1 and hash, HASH_MASK and hash2, HASH_MASK - cmp dword [stream + _internal_state_has_hist], IGZIP_NO_HIST - je write_first_byte + cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST + je .write_first_byte - jmp loop2 + jmp .loop2 align 16 -loop2: +.loop2: ; if (state->bitbuf.is_full()) { cmp m_out_buf, [rsp + m_out_end] - ja output_end + ja .output_end xor dist, dist xor dist2, dist2 @@ -212,8 +238,8 @@ loop2: mov dist %+ w, f_i %+ w dec dist - sub dist %+ w, word [stream + _internal_state_head + 2 * hash] - mov [stream + _internal_state_head + 2 * hash], f_i %+ w + sub dist %+ w, word [hash_table + 2 * hash] + mov [hash_table + 2 * hash], f_i %+ w inc f_i @@ -224,8 +250,8 @@ loop2: mov dist2 %+ w, f_i %+ w dec dist2 - sub dist2 %+ w, word [stream + _internal_state_head + 2 * hash2] - mov [stream + _internal_state_head + 2 * hash2], f_i %+ w + sub dist2 %+ w, word [hash_table + 2 * hash2] + mov [hash_table + 2 * hash2], f_i %+ w ; if ((dist-1) < (D-1)) { and dist %+ d, (D-1) @@ -238,38 +264,36 @@ loop2: and dist2 %+ d, (D-1) neg dist2 -MARK __body_compare_ %+ ARCH ;; Check for long len/dist match (>7) with first literal MOVQ len, xdata mov curr_data, len PSRLDQ xdata, 1 xor len, [tmp1 + dist - 1] - jz compare_loop + jz .compare_loop ;; Check for len/dist match (>7) with second literal MOVQ len2, xdata xor len2, [tmp1 + dist2] - jz compare_loop2 + jz .compare_loop2 movzx lit_code, curr_data %+ b shr curr_data, 8 ;; Check for len/dist match for first literal test len %+ d, 0xFFFFFFFF - jz len_dist_huffman_pre + jz .len_dist_huffman_pre - inc word [stream + _internal_state_hist_lit_len + HIST_ELEM_SIZE*lit_code] + inc dword [lit_len_hist + HIST_ELEM_SIZE*lit_code] movzx lit_code2, curr_data %+ b ;; Check for len/dist match for second literal test len2 %+ d, 0xFFFFFFFF - jnz write_lit_bits + jnz .write_lit_bits -MARK __body_len_dist_lit_huffman_ %+ ARCH -len_dist_lit_huffman_pre: +.len_dist_lit_huffman_pre: bsf len2, len2 shr len2, 3 -len_dist_lit_huffman: +.len_dist_lit_huffman: or lit_code, LIT movnti dword [m_out_buf], lit_code %+ d @@ -294,29 +318,29 @@ len_dist_lit_huffman: sub file_start, f_i add f_i, len2 - mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w + mov [hash_table + 2 * hash], tmp3 %+ w compute_hash hash, curr_data add tmp3,1 - mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w + mov [hash_table + 2 * hash2], tmp3 %+ w compute_hash hash2, tmp1 add tmp3, 1 - mov [stream + _internal_state_head + 2 * hash3], tmp3 %+ w + mov [hash_table + 2 * hash3], tmp3 %+ w add dist_code2, 254 add dist_code2, len2 - inc word [stream + _internal_state_hist_lit_len + HIST_ELEM_SIZE*(len2 + 254)] + inc dword [lit_len_hist + HIST_ELEM_SIZE*(len2 + 254)] movnti dword [m_out_buf + 4], dist_code2 %+ d add m_out_buf, 8 shr dist_code2, DIST_OFFSET and dist_code2, 0x1F - inc word [stream + _internal_state_hist_dist + HIST_ELEM_SIZE*dist_code2] + inc dword [dist_hist + HIST_ELEM_SIZE*dist_code2] ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; and hash %+ d, HASH_MASK @@ -324,16 +348,15 @@ len_dist_lit_huffman: ; continue cmp f_i, file_length - jl loop2 - jmp input_end + jl .loop2 + jmp .input_end ;; encode as dist/len -MARK __body_len_dist_huffman_ %+ ARCH -len_dist_huffman_pre: +.len_dist_huffman_pre: bsf len, len shr len, 3 -len_dist_huffman: +.len_dist_huffman: dec f_i ;; Setup for updateing hash lea tmp3, [f_i + 2] ; tmp3 <= k @@ -353,23 +376,23 @@ len_dist_huffman: lea len_code, [len + 254] or dist_code, len_code - mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w + mov [hash_table + 2 * hash], tmp3 %+ w add tmp3,1 - mov [stream + _internal_state_head + 2 * hash2], tmp3 %+ w + mov [hash_table + 2 * hash2], tmp3 %+ w compute_hash hash, curr_data shr curr_data2, 8 compute_hash hash2, curr_data2 - inc word [stream + _internal_state_hist_lit_len + HIST_ELEM_SIZE*len_code] + inc dword [lit_len_hist + HIST_ELEM_SIZE*len_code] movnti dword [m_out_buf], dist_code %+ d add m_out_buf, 4 shr dist_code, DIST_OFFSET and dist_code, 0x1F - inc word [stream + _internal_state_hist_dist + HIST_ELEM_SIZE*dist_code] + inc dword [dist_hist + HIST_ELEM_SIZE*dist_code] ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; and hash %+ d, HASH_MASK @@ -377,16 +400,15 @@ len_dist_huffman: ; continue cmp f_i, file_length - jl loop2 - jmp input_end + jl .loop2 + jmp .input_end -MARK __body_write_lit_bits_ %+ ARCH -write_lit_bits: +.write_lit_bits: MOVDQU xdata, [file_start + f_i + 1] add f_i, 1 MOVQ curr_data, xdata - inc word [stream + _internal_state_hist_lit_len + HIST_ELEM_SIZE*lit_code2] + inc dword [lit_len_hist + HIST_ELEM_SIZE*lit_code2] shl lit_code2, DIST_OFFSET lea lit_code, [lit_code + lit_code2 + (31 << DIST_OFFSET)] @@ -396,36 +418,38 @@ write_lit_bits: ; continue cmp f_i, file_length - jl loop2 + jl .loop2 -input_end: +.input_end: + mov stream, [rsp + stream_offset] mov tmp1, ZSTATE_FLUSH_READ_BUFFER mov tmp2, ZSTATE_BODY - cmp dword [stream + _end_of_stream], 0 + cmp word [stream + _end_of_stream], 0 cmovne tmp2, tmp1 - cmp dword [stream + _flush], _NO_FLUSH + cmp word [stream + _flush], _NO_FLUSH cmovne tmp2, tmp1 mov dword [stream + _internal_state_state], tmp2 %+ d - jmp end + jmp .end -output_end: +.output_end: + mov stream, [rsp + stream_offset] mov dword [stream + _internal_state_state], ZSTATE_CREATE_HDR -end: +.end: ;; update input buffer add file_length, LA mov [stream + _total_in], f_i %+ d + mov [stream + _internal_state_block_end], f_i %+ d add file_start, f_i mov [stream + _next_in], file_start sub file_length, f_i mov [stream + _avail_in], file_length %+ d ;; update output buffer - mov tmp1, [stream + _level_buf] - mov [tmp1 + _icf_buf_next], m_out_buf + mov [level_buf + _icf_buf_next], m_out_buf sub m_out_buf, [rsp + m_out_start] - sub [tmp1 + _icf_buf_avail_out], m_out_buf %+ d + sub [level_buf + _icf_buf_avail_out], m_out_buf %+ d mov rbx, [rsp + gpr_save_mem_offset + 0*8] mov rsi, [rsp + gpr_save_mem_offset + 1*8] @@ -444,8 +468,7 @@ end: %endif ret -MARK __body_compare_loops_ %+ ARCH -compare_loop: +.compare_loop: lea tmp2, [tmp1 + dist - 1] %if (COMPARE_TYPE == 1) compare250 tmp1, tmp2, len, tmp3 @@ -457,9 +480,9 @@ compare_loop: %error Unknown Compare type COMPARE_TYPE % error %endif - jmp len_dist_huffman + jmp .len_dist_huffman -compare_loop2: +.compare_loop2: lea tmp2, [tmp1 + dist2] add tmp1, 1 %if (COMPARE_TYPE == 1) @@ -474,24 +497,23 @@ compare_loop2: %endif movzx lit_code, curr_data %+ b shr curr_data, 8 - inc word [stream + _internal_state_hist_lit_len + HIST_ELEM_SIZE*lit_code] - jmp len_dist_lit_huffman + inc dword [lit_len_hist + HIST_ELEM_SIZE*lit_code] + jmp .len_dist_lit_huffman -MARK __write_first_byte_ %+ ARCH -write_first_byte: +.write_first_byte: cmp m_out_buf, [rsp + m_out_end] - ja output_end + ja .output_end - mov dword [stream + _internal_state_has_hist], IGZIP_HIST + mov byte [stream + _internal_state_has_hist], IGZIP_HIST - mov [stream + _internal_state_head + 2 * hash], f_i %+ w + mov [hash_table + 2 * hash], f_i %+ w mov hash, hash2 - shr tmp4, 16 - compute_hash hash2, tmp4 + shr tmp2, 16 + compute_hash hash2, tmp2 and curr_data, 0xff - inc word [stream + _internal_state_hist_lit_len + HIST_ELEM_SIZE*curr_data] + inc dword [lit_len_hist + HIST_ELEM_SIZE*curr_data] or curr_data, LIT movnti dword [m_out_buf], curr_data %+ d @@ -504,10 +526,28 @@ write_first_byte: and hash2 %+ d, HASH_MASK cmp f_i, file_length - jl loop2 - jmp input_end + jl .loop2 + jmp .input_end + + +;; Shift defines over in order to iterate over all versions +%undef HASH_MASK +%xdefine HASH_MASK HASH_MASK1 -section .data - align 16 -mask: dd HASH_MASK, HASH_MASK, HASH_MASK, HASH_MASK -const_D: dq D +%undef METHOD +%xdefine METHOD METHOD1 +%endrep + +;; Shift defines over in order to iterate over all versions +%undef ARCH +%xdefine ARCH ARCH1 +%undef ARCH1 +%xdefine ARCH1 ARCH2 + +%ifdef COMPARE_TYPE_NOT_DEF +%undef COMPARE_TYPE +%xdefine COMPARE_TYPE COMPARE_TYPE1 +%undef COMPARE_TYPE1 +%xdefine COMPARE_TYPE1 COMPARE_TYPE2 +%endif +%endrep diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_icf_finish.asm b/src/main/native/compression/isa-l-master/igzip/igzip_icf_finish.asm index 15be09ce..dbfbf32f 100644 --- a/src/main/native/compression/isa-l-master/igzip/igzip_icf_finish.asm +++ b/src/main/native/compression/isa-l-master/igzip/igzip_icf_finish.asm @@ -63,6 +63,8 @@ %define m_out_buf r8 +%define level_buf r9 + %define dist r10 %define code2 r12 @@ -74,6 +76,10 @@ %define hufftables r15 +%define hash_table level_buf + _hash8k_hash_table +%define lit_len_hist level_buf + _hist_lit_len +%define dist_hist level_buf + _hist_dist + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -81,10 +87,17 @@ f_end_i_mem_offset equ 0 ; local variable (8 bytes) m_out_end equ 8 m_out_start equ 16 stack_size equ 32 + +%xdefine HASH_MASK HASH8K_HASH_MASK +%xdefine HASH_MASK1 HASH_HIST_HASH_MASK +%xdefine METHOD hash8k +%xdefine METHOD1 hash_hist + +%rep 2 ; void isal_deflate_icf_finish ( isal_zstream *stream ) ; arg 1: rcx: addr of stream -global isal_deflate_icf_finish_01 -isal_deflate_icf_finish_01: +global isal_deflate_icf_finish_ %+ METHOD %+ _01 +isal_deflate_icf_finish_ %+ METHOD %+ _01: PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15 sub rsp, stack_size @@ -95,10 +108,10 @@ isal_deflate_icf_finish_01: %endif ; state->bitbuf.set_buf(stream->next_out, stream->avail_out); - mov tmp1, [stream + _level_buf] - mov m_out_buf, [tmp1 + _icf_buf_next] + mov level_buf, [stream + _level_buf] + mov m_out_buf, [level_buf + _icf_buf_next] mov [rsp + m_out_start], m_out_buf - mov tmp1, [tmp1 + _icf_buf_avail_out] + mov tmp1, [level_buf + _icf_buf_avail_out] add tmp1, m_out_buf sub tmp1, 4 @@ -118,28 +131,28 @@ isal_deflate_icf_finish_01: mov [rsp + f_end_i_mem_offset], f_end_i ; for (f_i = f_start_i; f_i < f_end_i; f_i++) { cmp f_i, f_end_i - jge end_loop_2 + jge .end_loop_2 mov curr_data %+ d, [file_start + f_i] - cmp dword [stream + _internal_state_has_hist], IGZIP_NO_HIST - jne skip_write_first_byte + cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST + jne .skip_write_first_byte cmp m_out_buf, [rsp + m_out_end] - ja end_loop_2 + ja .end_loop_2 compute_hash hash, curr_data and hash %+ d, HASH_MASK - mov [stream + _internal_state_head + 2 * hash], f_i %+ w - mov dword [stream + _internal_state_has_hist], IGZIP_HIST - jmp encode_literal + mov [hash_table + 2 * hash], f_i %+ w + mov byte [stream + _internal_state_has_hist], IGZIP_HIST + jmp .encode_literal -skip_write_first_byte: +.skip_write_first_byte: -loop2: +.loop2: ; if (state->bitbuf.is_full()) { cmp m_out_buf, [rsp + m_out_end] - ja end_loop_2 + ja .end_loop_2 ; hash = compute_hash(state->file_start + f_i) & HASH_MASK; mov curr_data %+ d, [file_start + f_i] @@ -147,10 +160,10 @@ loop2: and hash %+ d, HASH_MASK ; f_index = state->head[hash]; - movzx f_index %+ d, word [stream + _internal_state_head + 2 * hash] + movzx f_index %+ d, word [hash_table + 2 * hash] ; state->head[hash] = (uint16_t) f_i; - mov [stream + _internal_state_head + 2 * hash], f_i %+ w + mov [hash_table + 2 * hash], f_i %+ w ; dist = f_i - f_index; // mod 64k mov dist %+ d, f_i %+ d @@ -161,7 +174,7 @@ loop2: mov tmp1 %+ d, dist %+ d sub tmp1 %+ d, 1 cmp tmp1 %+ d, (D-1) - jae encode_literal + jae .encode_literal ; len = f_end_i - f_i; mov tmp4, [rsp + f_end_i_mem_offset] @@ -181,7 +194,7 @@ loop2: ; if (len >= SHORTEST_MATCH) { cmp len, SHORTEST_MATCH - jb encode_literal + jb .encode_literal ;; encode as dist/len @@ -193,13 +206,13 @@ loop2: lea code, [len + 254] or code2, code - inc word [stream + _internal_state_hist_lit_len + HIST_ELEM_SIZE*code] + inc dword [lit_len_hist + HIST_ELEM_SIZE*code] ; for (k = f_i+1, f_i += len-1; k <= f_i; k++) { lea tmp3, [f_i + 1] ; tmp3 <= k add f_i, len cmp f_i, [rsp + f_end_i_mem_offset] - jae skip_hash_update + jae .skip_hash_update ; only update hash twice @@ -208,7 +221,7 @@ loop2: compute_hash hash, tmp6 and hash %+ d, HASH_MASK ; state->head[hash] = k; - mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w + mov [hash_table + 2 * hash], tmp3 %+ w add tmp3, 1 @@ -217,83 +230,92 @@ loop2: compute_hash hash, tmp6 and hash %+ d, HASH_MASK ; state->head[hash] = k; - mov [stream + _internal_state_head + 2 * hash], tmp3 %+ w + mov [hash_table + 2 * hash], tmp3 %+ w -skip_hash_update: +.skip_hash_update: write_dword code2, m_out_buf shr code2, DIST_OFFSET and code2, 0x1F - inc word [stream + _internal_state_hist_dist + HIST_ELEM_SIZE*code2] + inc dword [dist_hist + HIST_ELEM_SIZE*code2] ; continue cmp f_i, [rsp + f_end_i_mem_offset] - jl loop2 - jmp end_loop_2 + jl .loop2 + jmp .end_loop_2 -encode_literal: +.encode_literal: ; get_lit_code(state->file_start[f_i], &code2, &code_len2); movzx tmp5, byte [file_start + f_i] - inc word [stream + _internal_state_hist_lit_len + HIST_ELEM_SIZE*tmp5] + inc dword [lit_len_hist + HIST_ELEM_SIZE*tmp5] or tmp5, LIT write_dword tmp5, m_out_buf ; continue add f_i, 1 cmp f_i, [rsp + f_end_i_mem_offset] - jl loop2 + jl .loop2 -end_loop_2: +.end_loop_2: mov f_end_i, [rsp + f_end_i_mem_offset] add f_end_i, LAST_BYTES_COUNT mov [rsp + f_end_i_mem_offset], f_end_i ; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) { cmp f_i, f_end_i - jge input_end + jge .input_end xor tmp5, tmp5 -final_bytes: +.final_bytes: cmp m_out_buf, [rsp + m_out_end] - ja out_end + ja .out_end movzx tmp5, byte [file_start + f_i] - inc word [stream + _internal_state_hist_lit_len + HIST_ELEM_SIZE*tmp5] + inc dword [lit_len_hist + HIST_ELEM_SIZE*tmp5] or tmp5, LIT write_dword tmp5, m_out_buf inc f_i cmp f_i, [rsp + f_end_i_mem_offset] - jl final_bytes + jl .final_bytes -input_end: - cmp dword [stream + _end_of_stream], 0 - jne out_end - cmp dword [stream + _flush], _NO_FLUSH - jne out_end - jmp end +.input_end: + cmp word [stream + _end_of_stream], 0 + jne .out_end + cmp word [stream + _flush], _NO_FLUSH + jne .out_end + jmp .end -out_end: +.out_end: mov dword [stream + _internal_state_state], ZSTATE_CREATE_HDR -end: +.end: ;; Update input buffer mov f_end_i, [rsp + f_end_i_mem_offset] mov [stream + _total_in], f_i %+ d + mov [stream + _internal_state_block_end], f_i %+ d + add file_start, f_i mov [stream + _next_in], file_start sub f_end_i, f_i mov [stream + _avail_in], f_end_i %+ d ;; Update output buffer - mov tmp1, [stream + _level_buf] - mov [tmp1 + _icf_buf_next], m_out_buf + mov [level_buf + _icf_buf_next], m_out_buf ; len = state->bitbuf.buffer_used(); sub m_out_buf, [rsp + m_out_start] ; stream->avail_out -= len; - sub [tmp1 + _icf_buf_avail_out], m_out_buf + sub [level_buf + _icf_buf_avail_out], m_out_buf add rsp, stack_size POP_ALL ret +;; Shift defines over in order to iterate over all versions +%undef HASH_MASK +%xdefine HASH_MASK HASH_MASK1 + +%undef METHOD +%xdefine METHOD METHOD1 +%endrep + section .data align 4 c258: dq 258 diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_inflate.c b/src/main/native/compression/isa-l-master/igzip/igzip_inflate.c index 6b3f1f06..d8b9e13b 100644 --- a/src/main/native/compression/isa-l-master/igzip/igzip_inflate.c +++ b/src/main/native/compression/isa-l-master/igzip/igzip_inflate.c @@ -832,7 +832,7 @@ static int inline setup_dynamic_header(struct inflate_state *state) /* Reads in the header pointed to by in_stream and sets up state to reflect that * header information*/ -int read_header(struct inflate_state *state) +static int read_header(struct inflate_state *state) { uint8_t bytes; uint32_t btype; @@ -895,7 +895,7 @@ int read_header(struct inflate_state *state) /* Reads in the header pointed to by in_stream and sets up state to reflect that * header information*/ -int read_header_stateful(struct inflate_state *state) +static int read_header_stateful(struct inflate_state *state) { uint64_t read_in_start = state->read_in; int32_t read_in_length_start = state->read_in_length; @@ -1213,6 +1213,7 @@ int isal_inflate(struct inflate_state *state) int ret = 0; if (state->block_state != ISAL_BLOCK_FINISH) { + state->total_out += state->tmp_out_valid - state->tmp_out_processed; /* If space in tmp_out buffer, decompress into the tmp_out_buffer */ if (state->tmp_out_valid < 2 * ISAL_DEF_HIST_SIZE) { /* Setup to start decoding into temp buffer */ @@ -1346,8 +1347,11 @@ int isal_inflate(struct inflate_state *state) } if (ret == ISAL_INVALID_LOOKBACK || ret == ISAL_INVALID_BLOCK - || ret == ISAL_INVALID_SYMBOL) + || ret == ISAL_INVALID_SYMBOL) { + state->total_out -= + state->tmp_out_valid - state->tmp_out_processed; return ret; + } } else if (state->tmp_out_valid == state->tmp_out_processed) { state->block_state = ISAL_BLOCK_FINISH; @@ -1355,6 +1359,8 @@ int isal_inflate(struct inflate_state *state) || state->crc_flag == ISAL_ZLIB_NO_HDR) finalize_adler32(state); } + + state->total_out -= state->tmp_out_valid - state->tmp_out_processed; } return ISAL_DECOMP_OK; diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_inflate_multibinary.asm b/src/main/native/compression/isa-l-master/igzip/igzip_inflate_multibinary.asm index 4d608025..ef2ce683 100644 --- a/src/main/native/compression/isa-l-master/igzip/igzip_inflate_multibinary.asm +++ b/src/main/native/compression/isa-l-master/igzip/igzip_inflate_multibinary.asm @@ -30,12 +30,6 @@ default rel [bits 64] -%ifidn __OUTPUT_FORMAT__, elf64 -%define WRT_OPT wrt ..plt -%else -%define WRT_OPT -%endif - %include "reg_sizes.asm" extern decode_huffman_code_block_stateless_base diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_level_buf_structs.h b/src/main/native/compression/isa-l-master/igzip/igzip_level_buf_structs.h index 5fda9cc9..5c195e3f 100644 --- a/src/main/native/compression/isa-l-master/igzip/igzip_level_buf_structs.h +++ b/src/main/native/compression/isa-l-master/igzip/igzip_level_buf_structs.h @@ -1,18 +1,48 @@ #ifndef IGZIP_LEVEL_BUF_STRUCTS_H #define IGZIP_LEVEL_BUF_STRUCTS_H +#include "igzip_lib.h" #include "huff_codes.h" #include "encode_df.h" -struct level_2_buf { +#define MATCH_BUF_SIZE (4 * 1024) + +struct hash8k_buf { + uint16_t hash_table[IGZIP_HASH8K_HASH_SIZE]; +}; + +struct hash_hist_buf { + uint16_t hash_table[IGZIP_HASH_HIST_SIZE]; +}; + +struct hash_map_buf { + uint16_t hash_table[IGZIP_HASH_MAP_HASH_SIZE]; + struct deflate_icf *matches_next; + struct deflate_icf *matches_end; + struct deflate_icf matches[MATCH_BUF_SIZE]; + struct deflate_icf overflow[ISAL_LOOK_AHEAD]; +}; + +#define MAX_LVL_BUF_SIZE sizeof(struct hash_map_buf) + +struct level_buf { struct hufftables_icf encode_tables; + struct isal_mod_hist hist; uint32_t deflate_hdr_count; uint32_t deflate_hdr_extra_bits; uint8_t deflate_hdr[ISAL_DEF_MAX_HDR_SIZE]; - uint32_t block_start_index; - uint32_t block_in_length; struct deflate_icf *icf_buf_next; uint64_t icf_buf_avail_out; - struct deflate_icf icf_buf_start[]; + struct deflate_icf *icf_buf_start; + union { + struct hash8k_buf hash8k; + struct hash_hist_buf hash_hist; + struct hash_map_buf hash_map; + + struct hash8k_buf lvl1; + struct hash_hist_buf lvl2; + struct hash_map_buf lvl3; + }; }; + #endif diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_multibinary.asm b/src/main/native/compression/isa-l-master/igzip/igzip_multibinary.asm index 52df6d34..a006f415 100644 --- a/src/main/native/compression/isa-l-master/igzip/igzip_multibinary.asm +++ b/src/main/native/compression/isa-l-master/igzip/igzip_multibinary.asm @@ -30,12 +30,6 @@ default rel [bits 64] -%ifidn __OUTPUT_FORMAT__, elf64 -%define WRT_OPT wrt ..plt -%else -%define WRT_OPT -%endif - %include "reg_sizes.asm" extern isal_deflate_body_base @@ -45,23 +39,37 @@ extern isal_deflate_body_04 extern isal_deflate_finish_base extern isal_deflate_finish_01 +extern isal_deflate_icf_body_hash8k_base +extern isal_deflate_icf_body_hash8k_01 +extern isal_deflate_icf_body_hash8k_02 +extern isal_deflate_icf_body_hash8k_04 +extern isal_deflate_icf_finish_hash8k_base +extern isal_deflate_icf_finish_hash8k_01 -extern isal_deflate_icf_body_base -extern isal_deflate_icf_body_01 -extern isal_deflate_icf_body_02 -extern isal_deflate_icf_body_04 -extern isal_deflate_icf_finish_base -extern isal_deflate_icf_finish_01 +extern isal_deflate_icf_body_hash_hist_base +extern isal_deflate_icf_body_hash_hist_01 +extern isal_deflate_icf_body_hash_hist_02 +extern isal_deflate_icf_body_hash_hist_04 +extern isal_deflate_icf_finish_hash_hist_base +extern isal_deflate_icf_finish_hash_hist_01 + +extern isal_deflate_icf_finish_hash_map_base extern isal_update_histogram_base extern isal_update_histogram_01 extern isal_update_histogram_04 +extern gen_icf_map_h1_base + extern encode_deflate_icf_base extern encode_deflate_icf_04 +extern set_long_icf_fg_base + %ifdef HAVE_AS_KNOWS_AVX512 extern encode_deflate_icf_06 +extern set_long_icf_fg_06 +extern gen_icf_map_lh1_06 %endif extern crc32_gzip_base @@ -71,8 +79,13 @@ extern adler32_base extern adler32_avx2_4 extern adler32_sse -extern isal_deflate_hash_lvl0_base -extern isal_deflate_hash_lvl0_01 +extern isal_deflate_hash_base +extern isal_deflate_hash_crc_01 + +extern isal_deflate_hash_mad_base + +extern isal_deflate_icf_body_base +extern isal_deflate_icf_body_06 section .text @@ -83,10 +96,20 @@ mbin_dispatch_init5 isal_deflate_body, isal_deflate_body_base, isal_deflate_body mbin_interface isal_deflate_finish mbin_dispatch_init5 isal_deflate_finish, isal_deflate_finish_base, isal_deflate_finish_01, isal_deflate_finish_01, isal_deflate_finish_01 -mbin_interface isal_deflate_icf_body -mbin_dispatch_init5 isal_deflate_icf_body, isal_deflate_icf_body_base, isal_deflate_icf_body_01, isal_deflate_icf_body_02, isal_deflate_icf_body_04 -mbin_interface isal_deflate_icf_finish -mbin_dispatch_init5 isal_deflate_icf_finish, isal_deflate_icf_finish_base, isal_deflate_icf_finish_01, isal_deflate_icf_finish_01, isal_deflate_icf_finish_01 +mbin_interface isal_deflate_icf_body_lvl1 +mbin_dispatch_init5 isal_deflate_icf_body_lvl1, isal_deflate_icf_body_hash8k_base, isal_deflate_icf_body_hash8k_01, isal_deflate_icf_body_hash8k_02, isal_deflate_icf_body_hash8k_04 + +mbin_interface isal_deflate_icf_body_lvl2 +mbin_dispatch_init5 isal_deflate_icf_body_lvl2, isal_deflate_icf_body_hash_hist_base, isal_deflate_icf_body_hash_hist_01, isal_deflate_icf_body_hash_hist_02, isal_deflate_icf_body_hash_hist_04 + +mbin_interface isal_deflate_icf_finish_lvl1 +mbin_dispatch_init5 isal_deflate_icf_finish_lvl1, isal_deflate_icf_finish_hash8k_base, isal_deflate_icf_finish_hash8k_01, isal_deflate_icf_finish_hash8k_01, isal_deflate_icf_finish_hash8k_01 + +mbin_interface isal_deflate_icf_finish_lvl2 +mbin_dispatch_init5 isal_deflate_icf_finish_lvl2, isal_deflate_icf_finish_hash_hist_base, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01, isal_deflate_icf_finish_hash_hist_01 + +mbin_interface isal_deflate_icf_finish_lvl3 +mbin_dispatch_init5 isal_deflate_icf_finish_lvl3, isal_deflate_icf_finish_hash_map_base, isal_deflate_icf_finish_hash_map_base, isal_deflate_icf_finish_hash_map_base, isal_deflate_icf_finish_hash_map_base mbin_interface isal_update_histogram mbin_dispatch_init5 isal_update_histogram, isal_update_histogram_base, isal_update_histogram_01, isal_update_histogram_01, isal_update_histogram_04 @@ -94,16 +117,45 @@ mbin_dispatch_init5 isal_update_histogram, isal_update_histogram_base, isal_upda %ifdef HAVE_AS_KNOWS_AVX512 mbin_interface encode_deflate_icf mbin_dispatch_init6 encode_deflate_icf, encode_deflate_icf_base, encode_deflate_icf_base, encode_deflate_icf_base, encode_deflate_icf_04, encode_deflate_icf_06 + +mbin_interface set_long_icf_fg +mbin_dispatch_init6 set_long_icf_fg, set_long_icf_fg_base, set_long_icf_fg_base, set_long_icf_fg_base, set_long_icf_fg_base, set_long_icf_fg_06 + +mbin_interface gen_icf_map_lh1 +mbin_dispatch_init6 gen_icf_map_lh1, gen_icf_map_h1_base, gen_icf_map_h1_base, gen_icf_map_h1_base, gen_icf_map_h1_base, gen_icf_map_lh1_06 %else mbin_interface encode_deflate_icf mbin_dispatch_init5 encode_deflate_icf, encode_deflate_icf_base, encode_deflate_icf_base, encode_deflate_icf_base, encode_deflate_icf_04 + +mbin_interface set_long_icf_fg +mbin_dispatch_init5 set_long_icf_fg, set_long_icf_fg_base, set_long_icf_fg_base, set_long_icf_fg_base, set_long_icf_fg_base + +mbin_interface gen_icf_map_lh1 +mbin_dispatch_init5 gen_icf_map_lh1, gen_icf_map_h1_base, gen_icf_map_h1_base, gen_icf_map_h1_base, gen_icf_map_h1_base %endif mbin_interface crc32_gzip -mbin_dispatch_init5 crc32_gzip, crc32_gzip_base, crc32_gzip_base, crc32_gzip_01, crc32_gzip_01 +mbin_dispatch_init_clmul crc32_gzip, crc32_gzip_base, crc32_gzip_01 mbin_interface isal_adler32 mbin_dispatch_init5 isal_adler32, adler32_base, adler32_sse, adler32_sse, adler32_avx2_4 mbin_interface isal_deflate_hash_lvl0 -mbin_dispatch_init5 isal_deflate_hash_lvl0, isal_deflate_hash_lvl0_base, isal_deflate_hash_lvl0_01, isal_deflate_hash_lvl0_01, isal_deflate_hash_lvl0_01 +mbin_dispatch_init5 isal_deflate_hash_lvl0, isal_deflate_hash_base, isal_deflate_hash_crc_01, isal_deflate_hash_crc_01, isal_deflate_hash_crc_01 + +mbin_interface isal_deflate_hash_lvl1 +mbin_dispatch_init5 isal_deflate_hash_lvl1, isal_deflate_hash_base, isal_deflate_hash_crc_01, isal_deflate_hash_crc_01, isal_deflate_hash_crc_01 + +mbin_interface isal_deflate_hash_lvl2 +mbin_dispatch_init5 isal_deflate_hash_lvl2, isal_deflate_hash_base, isal_deflate_hash_crc_01, isal_deflate_hash_crc_01, isal_deflate_hash_crc_01 + +mbin_interface isal_deflate_hash_lvl3 +mbin_dispatch_init5 isal_deflate_hash_lvl3, isal_deflate_hash_base, isal_deflate_hash_base, isal_deflate_hash_base, isal_deflate_hash_mad_base + +%ifdef HAVE_AS_KNOWS_AVX512 +mbin_interface isal_deflate_icf_body +mbin_dispatch_init6 isal_deflate_icf_body, isal_deflate_icf_body_base, isal_deflate_icf_body_base, isal_deflate_icf_body_base, isal_deflate_icf_body_base, isal_deflate_icf_body_06 +%else +mbin_interface isal_deflate_icf_body +mbin_dispatch_init5 isal_deflate_icf_body, isal_deflate_icf_body_base, isal_deflate_icf_body_base, isal_deflate_icf_body_base, isal_deflate_icf_body_base +%endif diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_rand_test.c b/src/main/native/compression/isa-l-master/igzip/igzip_rand_test.c index e93f792b..5a537e9f 100644 --- a/src/main/native/compression/isa-l-master/igzip/igzip_rand_test.c +++ b/src/main/native/compression/isa-l-master/igzip/igzip_rand_test.c @@ -52,6 +52,8 @@ #define PAGE_SIZE 4*1024 +#define MAX_FILE_SIZE 0x7fff8fff + #define str1 "Short test string" #define str2 "one two three four five six seven eight nine ten eleven twelve " \ "thirteen fourteen fifteen sixteen" @@ -83,6 +85,8 @@ enum IGZIP_TEST_ERROR_CODES { INFLATE_LEFTOVER_INPUT, INFLATE_INCORRECT_OUTPUT_SIZE, INFLATE_INVALID_LOOK_BACK_DISTANCE, + INFLATE_INPUT_STREAM_INTEGRITY_ERROR, + INFLATE_OUTPUT_STREAM_INTEGRITY_ERROR, INVALID_GZIP_HEADER, INCORRECT_GZIP_TRAILER, INVALID_ZLIB_HEADER, @@ -240,6 +244,29 @@ int get_rand_data_length(void) return rand() & max_mask; } +int get_rand_level(void) +{ + return ISAL_DEF_MIN_LEVEL + rand() % (ISAL_DEF_MAX_LEVEL - ISAL_DEF_MIN_LEVEL + 1); + +} + +int get_rand_level_buf_size(int level) +{ + int size; + switch (level) { + case 3: + size = rand() % IBUF_SIZE + ISAL_DEF_LVL3_MIN; + break; + case 2: + size = rand() % IBUF_SIZE + ISAL_DEF_LVL2_MIN; + break; + case 1: + default: + size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN; + } + return size; +} + void print_error(int error_code) { switch (error_code) { @@ -296,6 +323,12 @@ void print_error(int error_code) case INFLATE_INVALID_LOOK_BACK_DISTANCE: printf("error: invalid look back distance found while decompressing\n"); break; + case INFLATE_INPUT_STREAM_INTEGRITY_ERROR: + printf("error: inconsistent input buffer\n"); + break; + case INFLATE_OUTPUT_STREAM_INTEGRITY_ERROR: + printf("error: inconsistent output buffer\n"); + break; case INVALID_GZIP_HEADER: printf("error: incorrect gzip header found when inflating data\n"); break; @@ -457,6 +490,68 @@ int inflate_stateless_pass(uint8_t * compress_buf, uint64_t compress_len, return ret; } +/* Check if that the state of the data stream is consistent */ +int inflate_state_valid_check(struct inflate_state *state, uint8_t * in_buf, uint32_t in_size, + uint8_t * out_buf, uint32_t out_size, uint32_t in_processed, + uint32_t out_processed, uint32_t data_size) +{ + uint32_t in_buffer_size, total_out, out_buffer_size; + + in_buffer_size = (in_size == 0) ? 0 : state->next_in - in_buf + state->avail_in; + + /* Check for a consistent amount of data processed */ + if (in_buffer_size != in_size) + return INFLATE_INPUT_STREAM_INTEGRITY_ERROR; + + total_out = + (out_size == 0) ? out_processed : out_processed + (state->next_out - out_buf); + out_buffer_size = (out_size == 0) ? 0 : state->next_out - out_buf + state->avail_out; + + /* Check for a consistent amount of data compressed */ + if (total_out != state->total_out || out_buffer_size != out_size) + return INFLATE_OUTPUT_STREAM_INTEGRITY_ERROR; + + return 0; +} + +/* Performs compression with checks to discover and verify the state of the + * stream + * state: inflate data structure which has been initialized to use + * in_buf and out_buf as the buffers + * compress_len: size of all input compressed data + * data_size: size of all available output buffers + * in_buf: next buffer of data to be inflated + * in_size: size of in_buf + * out_buf: next out put buffer where data is stored + * out_size: size of out_buf + * in_processed: the amount of input data which has been loaded into buffers + * to be inflated, this includes the data in in_buf + * out_processed: the amount of output data which has been decompressed and stored, + * this does not include the data in the current out_buf +*/ +int isal_inflate_with_checks(struct inflate_state *state, uint32_t compress_len, + uint32_t data_size, uint8_t * in_buf, uint32_t in_size, + uint32_t in_processed, uint8_t * out_buf, uint32_t out_size, + uint32_t out_processed) +{ + int ret, stream_check = 0; + + ret = isal_inflate(state); + + /* Verify the stream is in a valid state when no errors occured */ + if (ret >= 0) { + stream_check = + inflate_state_valid_check(state, in_buf, in_size, out_buf, out_size, + in_processed, out_processed, data_size); + } + + if (stream_check != 0) + return stream_check; + + return ret; + +} + int inflate_multi_pass(uint8_t * compress_buf, uint64_t compress_len, uint8_t * uncompress_buf, uint32_t * uncompress_len, uint32_t gzip_flag, uint8_t * dict, uint32_t dict_len) @@ -562,7 +657,9 @@ int inflate_multi_pass(uint8_t * compress_buf, uint64_t compress_len, } } - ret = isal_inflate(state); + ret = isal_inflate_with_checks(state, compress_len, *uncompress_len, comp_tmp, + comp_tmp_size, comp_processed, uncomp_tmp, + uncomp_tmp_size, uncomp_processed); if (state->block_state == ISAL_BLOCK_FINISH || ret != 0) { memcpy(uncompress_buf + uncomp_processed, uncomp_tmp, uncomp_tmp_size); @@ -620,7 +717,7 @@ int inflate_multi_pass(uint8_t * compress_buf, uint64_t compress_len, } /* Inflate the compressed data and check that the decompressed data agrees with the input data */ -int inflate_check(uint8_t * z_buf, int z_size, uint8_t * in_buf, int in_size, +int inflate_check(uint8_t * z_buf, uint32_t z_size, uint8_t * in_buf, uint32_t in_size, uint32_t gzip_flag, uint8_t * dict, uint32_t dict_len) { /* Test inflate with reference inflate */ @@ -706,7 +803,12 @@ int inflate_check(uint8_t * z_buf, int z_size, uint8_t * in_buf, int in_size, case INCORRECT_ZLIB_TRAILER: gzip_trl_result = INCORRECT_ZLIB_TRAILER; break; - + case INFLATE_INPUT_STREAM_INTEGRITY_ERROR: + return INFLATE_INPUT_STREAM_INTEGRITY_ERROR; + break; + case INFLATE_OUTPUT_STREAM_INTEGRITY_ERROR: + return INFLATE_OUTPUT_STREAM_INTEGRITY_ERROR; + break; default: return INFLATE_GENERAL_ERROR; break; @@ -849,6 +951,7 @@ int compress_multi_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed uint8_t *level_buf = NULL; struct isal_hufftables *huff_tmp; uint32_t reset_test_flag = 0; + uint8_t tmp_symbol; #ifdef VERBOSE printf("Starting Compress Multi Pass\n"); @@ -885,7 +988,7 @@ int compress_multi_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed stream.level = level; if (level >= 1) { - level_buf_size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN; + level_buf_size = get_rand_level_buf_size(stream.level); level_buf = malloc(level_buf_size); create_rand_repeat_data(level_buf, level_buf_size); stream.level_buf = level_buf; @@ -931,6 +1034,20 @@ int compress_multi_pass(uint8_t * data, uint32_t data_size, uint8_t * compressed stream.next_in = in_buf; } } + } else { + /* Randomly modify data after next in */ + if (rand() % 4 == 0) { + + tmp_symbol = rand(); +#ifdef VERBOSE + printf + ("Modifying data at index 0x%x from 0x%x to 0x%x before recalling isal_deflate\n", + in_processed - stream.avail_in, + data[in_processed - stream.avail_in], tmp_symbol); +#endif + *stream.next_in = tmp_symbol; + data[in_processed - stream.avail_in] = tmp_symbol; + } } /* Setup out buffer for next round of compression */ @@ -1055,7 +1172,7 @@ int compress_single_pass(uint8_t * data, uint32_t data_size, uint8_t * compresse stream.level = level; if (level >= 1) { - level_buf_size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN; + level_buf_size = get_rand_level_buf_size(stream.level); level_buf = malloc(level_buf_size); create_rand_repeat_data(level_buf, level_buf_size); stream.level_buf = level_buf; @@ -1126,21 +1243,28 @@ int compress_stateless(uint8_t * data, uint32_t data_size, uint8_t * compressed_ stream.gzip_flag = gzip_flag; stream.level = level; - if (level >= 1) { + if (level == 1) { + /* This is to test case where level buf uses already existing + * internal buffers */ level_buf_size = rand() % IBUF_SIZE; - /* printf("level_buf_size = %d\n", level_buf_size); */ + if (level_buf_size >= ISAL_DEF_LVL1_MIN) { level_buf = malloc(level_buf_size); create_rand_repeat_data(level_buf, level_buf_size); stream.level_buf = level_buf; stream.level_buf_size = level_buf_size; } + } else if (level > 1) { + level_buf_size = get_rand_level_buf_size(level); + level_buf = malloc(level_buf_size); + create_rand_repeat_data(level_buf, level_buf_size); + stream.level_buf = level_buf; + stream.level_buf_size = level_buf_size; } - if (reset_test_flag) { + if (reset_test_flag) isal_deflate_reset(&stream); - /* printf("post reset level_buf_size = %d\n", level_buf_size); */ - } + ret = isal_deflate_stateless(&stream); if (level_buf != NULL) @@ -1161,8 +1285,11 @@ int compress_stateless(uint8_t * data, uint32_t data_size, uint8_t * compressed_ return COMPRESS_OUT_BUFFER_OVERFLOW; else if (ret == INVALID_FLUSH) return INVALID_FLUSH_ERROR; - else + else { + printf("Return due to ret = %d with level = %d or %d\n", ret, level, + stream.level); return COMPRESS_GENERAL_ERROR; + } } if (!stream.end_of_stream) { @@ -1211,6 +1338,7 @@ int compress_stateless_full_flush(uint8_t * data, uint32_t data_size, uint8_t * stream.level = 0; stream.level_buf = NULL; stream.level_buf_size = 0; + stream.gzip_flag = 0; } stream.flush = FULL_FLUSH; @@ -1219,14 +1347,23 @@ int compress_stateless_full_flush(uint8_t * data, uint32_t data_size, uint8_t * stream.next_out = compressed_buf; stream.level = level; - if (level >= 1) { + if (level == 1) { + /* This is to test case where level_buf uses already existing + * internal buffers */ level_buf_size = rand() % IBUF_SIZE; + if (level_buf_size >= ISAL_DEF_LVL1_MIN) { level_buf = malloc(level_buf_size); create_rand_repeat_data(level_buf, level_buf_size); stream.level_buf = level_buf; stream.level_buf_size = level_buf_size; } + } else if (level > 1) { + level_buf_size = get_rand_level_buf_size(level); + level_buf = malloc(level_buf_size); + create_rand_repeat_data(level_buf, level_buf_size); + stream.level_buf = level_buf; + stream.level_buf_size = level_buf_size; } if (reset_test_flag) @@ -1269,6 +1406,7 @@ int compress_stateless_full_flush(uint8_t * data, uint32_t data_size, uint8_t * set_random_hufftable(&stream); ret = isal_deflate_stateless(&stream); + assert(stream.internal_state.bitbuf.m_bit_count == 0); assert(compressed_buf == stream.next_out - stream.total_out); @@ -1356,7 +1494,7 @@ int compress_full_flush(uint8_t * data, uint32_t data_size, uint8_t * compressed stream.level = level; if (level >= 1) { - level_buf_size = rand() % IBUF_SIZE + ISAL_DEF_LVL1_MIN; + level_buf_size = get_rand_level_buf_size(stream.level); if (level_buf_size >= ISAL_DEF_LVL1_MIN) { level_buf = malloc(level_buf_size); create_rand_repeat_data(level_buf, level_buf_size); @@ -1534,7 +1672,7 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_ uint8_t *in_buf = NULL; gzip_flag = rand() % 5; - level = rand() % 2; + level = get_rand_level(); if (in_size != 0) { in_buf = malloc(in_size); @@ -1592,7 +1730,9 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_ #ifdef VERBOSE if (ret) { - printf("Compressed array at level %d with gzip flag %d: ", level, gzip_flag); + printf + ("Compressed array at level %d with gzip flag %d and flush type %d: ", + level, gzip_flag, flush_type); print_uint8_t(z_buf, z_size); printf("\n"); printf("Data: "); @@ -1639,7 +1779,9 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_ ret = inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag, NULL, 0); #ifdef VERBOSE if (ret) { - printf("Compressed array at level %d with gzip flag %d: ", level, gzip_flag); + printf + ("Compressed array at level %d with gzip flag %d and flush type %d: ", + level, gzip_flag, flush_type); print_uint8_t(z_buf, z_size); printf("\n"); printf("Data: "); @@ -1669,23 +1811,31 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_ level); if (overflow != COMPRESS_OUT_BUFFER_OVERFLOW) { -#ifdef VERBOSE - printf("overflow error = %d\n", overflow); - print_error(overflow); - if (overflow == 0) { - overflow = + if (overflow == 0) + ret = inflate_check(z_buf, z_size, in_buf, in_size, gzip_flag, NULL, 0); - printf("inflate ret = %d\n", overflow); + + if (overflow != 0 || ret != 0) { +#ifdef VERBOSE + printf("overflow error = %d\n", overflow); print_error(overflow); - } - printf("Compressed array at level %d with gzip flag %d: ", level, - gzip_flag); - print_uint8_t(z_buf, z_size); - printf("\n"); - printf("Data: "); - print_uint8_t(in_buf, in_size); + printf("inflate ret = %d\n", ret); + print_error(overflow); + + printf + ("Compressed array at level %d with gzip flag %d and flush type %d: ", + level, gzip_flag, flush_type); + + print_uint8_t(z_buf, z_size); + printf("\n"); + printf("Data: "); + print_uint8_t(in_buf, in_size); #endif + printf("Failed on compress single pass overflow\n"); + print_error(ret); + ret = OVERFLOW_TEST_ERROR; + } } } @@ -1724,8 +1874,9 @@ int test_compress_stateless(uint8_t * in_data, uint32_t in_size, uint32_t flush_ print_error(ret); #ifdef VERBOSE if (ret) { - printf("Compressed array at level %d with gzip flag %d: ", level, - gzip_flag); + printf + ("Compressed array at level %d with gzip flag %d and flush type %d: ", + level, gzip_flag, FULL_FLUSH); print_uint8_t(z_buf, z_size); printf("\n"); printf("Data: "); @@ -1761,7 +1912,7 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type) } gzip_flag = rand() % 5; - level = rand() % 2; + level = get_rand_level(); z_size = z_size_max; @@ -1799,7 +1950,9 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type) if (ret) { #ifdef VERBOSE - printf("Compressed array at level %d with gzip flag %d: ", level, gzip_flag); + printf + ("Compressed array at level %d with gzip flag %d and flush type %d: ", + level, gzip_flag, flush_type); print_uint8_t(z_buf, z_size); printf("\n"); if (dict != NULL) { @@ -1845,7 +1998,9 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type) if (ret) { #ifdef VERBOSE - printf("Compressed array at level %d with gzip flag %d: ", level, gzip_flag); + printf + ("Compressed array at level %d with gzip flag %d and flush type %d: ", + level, gzip_flag, flush_type); print_uint8_t(z_buf, z_size); printf("\n"); if (dict != NULL) { @@ -1875,7 +2030,7 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type) z_compressed_size = in_size + 1; z_size = rand() % z_compressed_size; - create_rand_repeat_data(z_buf, z_size_max); + create_rand_repeat_data(z_buf, z_size); overflow = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type, gzip_flag, level, dict, dict_len); @@ -1894,16 +2049,17 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type) printf("overflow error = %d\n", overflow); print_error(overflow); printf("inflate ret = %d\n", ret); - print_error(overflow); + print_error(ret); - printf("Compressed array at level %d with gzip flag %d: ", level, - gzip_flag); + printf + ("Compressed array at level %d with gzip flag %d and flush type %d: ", + level, gzip_flag, flush_type); print_uint8_t(z_buf, z_size); printf("\n"); printf("Data: "); print_uint8_t(in_buf, in_size); #endif - printf("Failed on compress multi pass overflow\n"); + printf("Failed on compress single pass overflow\n"); print_error(ret); ret = OVERFLOW_TEST_ERROR; } @@ -1912,7 +2068,7 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type) fin_ret |= ret; if (flush_type == NO_FLUSH) { - create_rand_repeat_data(z_buf, z_size_max); + create_rand_repeat_data(z_buf, z_size); overflow = compress_multi_pass(in_buf, in_size, z_buf, &z_size, flush_type, @@ -1932,10 +2088,10 @@ int test_compress(uint8_t * in_buf, uint32_t in_size, uint32_t flush_type) printf("overflow error = %d\n", overflow); print_error(overflow); printf("inflate ret = %d\n", ret); - print_error(overflow); - - printf("Compressed array at level %d with gzip flag %d: ", - level, gzip_flag); + print_error(ret); + printf + ("Compressed array at level %d with gzip flag %d and flush type %d: ", + level, gzip_flag, flush_type); print_uint8_t(z_buf, z_size); printf("\n"); printf("Data: "); @@ -1962,7 +2118,7 @@ int test_flush(uint8_t * in_buf, uint32_t in_size) uint8_t *z_buf = NULL; gzip_flag = rand() % 5; - level = rand() % 2; + level = get_rand_level(); z_size = 2 * in_size + 2 * hdr_bytes + 8; if (gzip_flag == IGZIP_GZIP) @@ -1982,7 +2138,7 @@ int test_flush(uint8_t * in_buf, uint32_t in_size) create_rand_repeat_data(z_buf, z_size); while (flush_type < 3) - flush_type = rand(); + flush_type = rand() & 0xFFFF; /* Test invalid flush */ ret = compress_single_pass(in_buf, in_size, z_buf, &z_size, flush_type, @@ -2032,7 +2188,7 @@ int test_full_flush(uint8_t * in_buf, uint32_t in_size) uint8_t *z_buf = NULL; gzip_flag = rand() % 5; - level = rand() % 2; + level = get_rand_level(); z_size = 2 * in_size + MAX_LOOPS * (hdr_bytes + 5); if (gzip_flag == IGZIP_GZIP) @@ -2059,7 +2215,9 @@ int test_full_flush(uint8_t * in_buf, uint32_t in_size) if (ret) { #ifdef VERBOSE - printf("Compressed array: "); + printf + ("Compressed array at level %d with gzip flag %d and flush type %d: ", + level, gzip_flag, FULL_FLUSH); print_uint8_t(z_buf, z_size); printf("\n"); printf("Data: "); @@ -2128,6 +2286,9 @@ int test_compress_file(char *file_name) return FILE_READ_FAILED; in_size = get_filesize(in_file); + if (in_size > MAX_FILE_SIZE) + in_size = MAX_FILE_SIZE; + if (in_size != 0) { in_buf = malloc(in_size); if (in_buf == NULL) diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_set_long_icf_fg_06.asm b/src/main/native/compression/isa-l-master/igzip/igzip_set_long_icf_fg_06.asm new file mode 100644 index 00000000..966981cc --- /dev/null +++ b/src/main/native/compression/isa-l-master/igzip/igzip_set_long_icf_fg_06.asm @@ -0,0 +1,319 @@ +%include "reg_sizes.asm" +%include "lz0a_const.asm" +%include "data_struct2.asm" +%include "igzip_compare_types.asm" +%define NEQ 4 + +%ifdef HAVE_AS_KNOWS_AVX512 +%ifidn __OUTPUT_FORMAT__, win64 +%define arg1 rcx +%define arg2 rdx +%define arg3 r8 +%define dist_code rsi +%define len rdi +%else +%define arg1 rdi +%define arg2 rsi +%define arg3 rdx +%define dist_code rcx +%define len r8 +%endif + +%define next_in arg1 +%define end_in arg2 +%define match_lookup arg3 +%define match_in rax +%define dist r9 +%define match_offset r10 +%define tmp1 r11 + +%define zmatch_lookup zmm0 +%define zmatch_lookup2 zmm1 +%define zlens zmm2 +%define zdist_codes zmm3 +%define zdist_extras zmm4 +%define zdists zmm5 +%define zdists2 zmm6 +%define zlens1 zmm7 +%define zlens2 zmm8 +%define zlookup zmm9 +%define zlookup2 zmm10 +%define datas zmm11 +%define ztmp1 zmm12 +%define ztmp2 zmm13 +%define zvect_size zmm17 +%define ztwofiftyfour zmm18 +%define ztwofiftysix zmm19 +%define ztwosixtytwo zmm20 +%define znlen_mask zmm21 +%define zbswap zmm22 +%define zqword_shuf zmm23 +%define zdatas_perm3 zmm24 +%define zdatas_perm2 zmm25 +%define zincrement zmm26 +%define zdists_mask zmm27 +%define zdists_start zmm28 +%define zlong_lens2 zmm29 +%define zlong_lens zmm30 +%define zlens_mask zmm31 + +%ifidn __OUTPUT_FORMAT__, win64 +%define stack_size 8*16 + 2 * 8 + 8 +%define func(x) proc_frame x +%macro FUNC_SAVE 0 + alloc_stack stack_size + vmovdqa [rsp + 0*16], xmm6 + vmovdqa [rsp + 1*16], xmm7 + vmovdqa [rsp + 2*16], xmm8 + vmovdqa [rsp + 3*16], xmm9 + vmovdqa [rsp + 4*16], xmm10 + vmovdqa [rsp + 5*16], xmm11 + vmovdqa [rsp + 6*16], xmm12 + vmovdqa [rsp + 7*16], xmm13 + save_reg rsi, 8*16 + 0*8 + save_reg rdi, 8*16 + 1*8 + end_prolog +%endm + +%macro FUNC_RESTORE 0 + vmovdqa xmm6, [rsp + 0*16] + vmovdqa xmm7, [rsp + 1*16] + vmovdqa xmm8, [rsp + 2*16] + vmovdqa xmm9, [rsp + 3*16] + vmovdqa xmm10, [rsp + 4*16] + vmovdqa xmm11, [rsp + 5*16] + vmovdqa xmm12, [rsp + 6*16] + vmovdqa xmm13, [rsp + 7*16] + + mov [rsp + 8*16 + 0*8], rsi + mov [rsp + 8*16 + 1*8], rdi + add rsp, stack_size +%endm +%else +%define func(x) x: +%macro FUNC_SAVE 0 +%endm + +%macro FUNC_RESTORE 0 +%endm +%endif +%define VECT_SIZE 16 + +global set_long_icf_fg_06 +func(set_long_icf_fg_06) + FUNC_SAVE + + sub end_in, LA + 15 + vmovdqu32 zlong_lens, [long_len] + vmovdqu32 zlong_lens2, [long_len2] + vmovdqu32 zlens_mask, [len_mask] + vmovdqu16 zdists_start, [dist_start] + vmovdqu32 zdists_mask, [dists_mask] + vmovdqu32 zincrement, [increment] + vmovdqu64 zdatas_perm2, [datas_perm2] + vmovdqu64 zdatas_perm3, [datas_perm3] + vmovdqu64 zqword_shuf, [qword_shuf] + vmovdqu64 zbswap, [bswap_shuf] + vmovdqu64 znlen_mask, [nlen_mask] + vmovdqu64 zvect_size, [vect_size] + vmovdqu64 ztwofiftyfour, [twofiftyfour] + vmovdqu64 ztwofiftysix, [twofiftysix] + vmovdqu64 ztwosixtytwo, [twosixtytwo] + vmovdqu32 zmatch_lookup, [match_lookup] + +fill_loop: ; Tahiti is a magical place + vmovdqu32 zmatch_lookup2, zmatch_lookup + vmovdqu32 zmatch_lookup, [match_lookup + ICF_CODE_BYTES * VECT_SIZE] + + cmp next_in, end_in + jae end_fill + vpandd zlens, zmatch_lookup2, zlens_mask + vpcmpgtd k3, zlens, zlong_lens + +;; Speculatively increment + add next_in, VECT_SIZE + add match_lookup, ICF_CODE_BYTES * VECT_SIZE + + ktestw k3, k3 + jz fill_loop + + vpsrld zdist_codes, zmatch_lookup2, DIST_OFFSET + vpmovdw zdists %+ y, zdist_codes ; Relies on perm working mod 32 + vpermw zdists, zdists, zdists_start + vpmovzxwd zdists, zdists %+ y + + vpsrld zdist_extras, zmatch_lookup2, EXTRA_BITS_OFFSET + vpsubd zdist_extras, zincrement, zdist_extras + + vpsubd zdists, zdist_extras, zdists + vextracti32x8 zdists2 %+ y, zdists, 1 + kmovb k6, k3 + kshiftrw k7, k3, 8 + vpgatherdq zlens1 {k6}, [next_in + zdists %+ y - 8] + vpgatherdq zlens2 {k7}, [next_in + zdists2 %+ y - 8] + + vmovdqu8 datas %+ y, [next_in - 8] + vpermq zlookup, zdatas_perm2, datas + vpshufb zlookup, zlookup, zqword_shuf + vpermq zlookup2, zdatas_perm3, datas + vpshufb zlookup2, zlookup2, zqword_shuf + + vpxorq zlens1, zlens1, zlookup + vpxorq zlens2, zlens2, zlookup2 + + vpshufb zlens1, zlens1, zbswap + vpshufb zlens2, zlens2, zbswap + vplzcntq zlens1, zlens1 + vplzcntq zlens2, zlens2 + vpmovqd zlens1 %+ y, zlens1 + vpmovqd zlens2 %+ y, zlens2 + vinserti32x8 zlens1, zlens2 %+ y, 1 + vpsrld zlens1 {k3}{z}, zlens1, 3 + + vpandd zmatch_lookup2 {k3}{z}, zmatch_lookup2, znlen_mask + vpaddd zmatch_lookup2 {k3}{z}, zmatch_lookup2, ztwosixtytwo + vpaddd zmatch_lookup2 {k3}{z}, zmatch_lookup2, zlens1 + + vmovdqu32 [match_lookup - ICF_CODE_BYTES * VECT_SIZE] {k3}, zmatch_lookup2 + + vpcmpgtd k3, zlens1, zlong_lens2 + ktestw k3, k3 + jz fill_loop + + vpsubd zdists, zincrement, zdists + + vpcompressd zdists2 {k3}, zdists + vpcompressd zmatch_lookup2 {k3}, zmatch_lookup2 + kmovq match_offset, k3 + tzcnt match_offset, match_offset + + vmovd dist %+ d, zdists2 %+ x + lea next_in, [next_in + match_offset - VECT_SIZE] + lea match_lookup, [match_lookup + ICF_CODE_BYTES * (match_offset - VECT_SIZE)] + mov match_in, next_in + sub match_in, dist + + mov len, 2 +%rep 3 + vmovdqu8 ztmp1, [next_in + len] + vmovdqu8 ztmp2, [match_in + len] + vpcmpb k3, ztmp1, [match_in + len], NEQ + ktestq k3, k3 + jnz miscompare + + add len, 64 +%endrep + + vmovdqu8 ztmp1, [next_in + len] + vmovdqu8 ztmp2, [match_in + len] + vpcmpb k3, ztmp1, ztmp2, 4 + +miscompare: + kmovq tmp1, k3 + tzcnt tmp1, tmp1 + add len, tmp1 + add next_in, len + lea match_lookup, [match_lookup + ICF_CODE_BYTES * len] + vmovdqu32 zmatch_lookup, [match_lookup] + + vpbroadcastd zmatch_lookup2, zmatch_lookup2 %+ x + vpandd zmatch_lookup2, zmatch_lookup2, znlen_mask + + vpbroadcastd zlens1, len %+ d + vpsubd zlens1, zlens1, zincrement + vpaddd zlens1, zlens1, ztwofiftyfour + neg len + +update_match_lookup: + vpandd zlens2, zlens_mask, [match_lookup + ICF_CODE_BYTES * len] + vpcmpgtd k3, zlens1, zlens2 + vpcmpgtd k4, zlens1, ztwofiftysix + kandw k3, k3, k4 + + vpaddd zlens2 {k3}{z}, zlens1, zmatch_lookup2 + + vmovdqu32 [match_lookup + ICF_CODE_BYTES * len] {k3}, zlens2 + + knotw k3, k3 + ktestw k3, k3 + jnz fill_loop + + add len, VECT_SIZE + vpsubd zlens1, zlens1, zvect_size + + jmp update_match_lookup +end_fill: + + FUNC_RESTORE + ret + +section .data +align 64 +dist_start: + dw 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d + dw 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1 + dw 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01 + dw 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000 +len_mask: + dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK + dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK + dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK + dd LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK, LIT_LEN_MASK +dists_mask: + dd LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK + dd LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK + dd LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK + dd LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK, LIT_DIST_MASK +long_len: + dd 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105 + dd 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105, 0x105 +long_len2: + dd 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7 + dd 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7 + +increment: + dd 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 + dd 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf +datas_perm2: + dq 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1 +datas_perm3: + dq 0x1, 0x2, 0x1, 0x2, 0x1, 0x2, 0x1, 0x2 +bswap_shuf: + db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 + db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 + db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 + db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 + db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 + db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 + db 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 + db 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 +qword_shuf: + db 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 + db 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8 + db 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9 + db 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa + db 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb + db 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc + db 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd + db 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe + db 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf +vect_size: + dd VECT_SIZE, VECT_SIZE, VECT_SIZE, VECT_SIZE + dd VECT_SIZE, VECT_SIZE, VECT_SIZE, VECT_SIZE + dd VECT_SIZE, VECT_SIZE, VECT_SIZE, VECT_SIZE + dd VECT_SIZE, VECT_SIZE, VECT_SIZE, VECT_SIZE +twofiftyfour: + dd 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe + dd 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe +twofiftysix: + dd 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100 + dd 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100 +twosixtytwo: + dd 0x106, 0x106, 0x106, 0x106, 0x106, 0x106, 0x106, 0x106 + dd 0x106, 0x106, 0x106, 0x106, 0x106, 0x106, 0x106, 0x106 +nlen_mask: + dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00 + dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00 + dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00 + dd 0xfffffc00, 0xfffffc00, 0xfffffc00, 0xfffffc00 +%endif diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_stateless_file_perf.c b/src/main/native/compression/isa-l-master/igzip/igzip_stateless_file_perf.c index a4c1195d..23182060 100644 --- a/src/main/native/compression/isa-l-master/igzip/igzip_stateless_file_perf.c +++ b/src/main/native/compression/isa-l-master/igzip/igzip_stateless_file_perf.c @@ -42,6 +42,59 @@ # define RUN_MEM_SIZE 500000000 #endif +int level_size_buf[10] = { +#ifdef ISAL_DEF_LVL0_DEFAULT + ISAL_DEF_LVL0_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL1_DEFAULT + ISAL_DEF_LVL1_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL2_DEFAULT + ISAL_DEF_LVL2_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL3_DEFAULT + ISAL_DEF_LVL3_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL4_DEFAULT + ISAL_DEF_LVL4_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL5_DEFAULT + ISAL_DEF_LVL5_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL6_DEFAULT + ISAL_DEF_LVL6_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL7_DEFAULT + ISAL_DEF_LVL7_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL8_DEFAULT + ISAL_DEF_LVL8_DEFAULT, +#else + 0, +#endif +#ifdef ISAL_DEF_LVL9_DEFAULT + ISAL_DEF_LVL9_DEFAULT, +#else + 0, +#endif +}; + struct isal_zstream stream; int usage(void) @@ -49,7 +102,7 @@ int usage(void) fprintf(stderr, "Usage: igzip_stateless_file_perf [options] \n" " -h help\n" - " -X use compression level X with 0 <= X <= 1\n" + " -X use compression level X with 0 <= X <= 2\n" " -i number of iterations (at least 1)\n" " -o output file for compresed data\n"); exit(0); @@ -66,7 +119,17 @@ int main(int argc, char *argv[]) int level = 0, level_size = 0; char *in_file_name = NULL, *out_file_name = NULL; - while ((c = getopt(argc, argv, "h01i:o:")) != -1) { + while ((c = getopt(argc, argv, "h0123456789i:o:")) != -1) { + if (c >= '0' && c <= '9') { + if (c > '0' + ISAL_DEF_MAX_LEVEL) + usage(); + else { + level = c - '0'; + level_size = level_size_buf[level]; + } + continue; + } + switch (c) { case 'o': out_file_name = optarg; @@ -76,12 +139,6 @@ int main(int argc, char *argv[]) if (iterations < 1) usage(); break; - case '1': - level = 1; - level_size = ISAL_DEF_LVL1_LARGE; - break; - case '0': - break; case 'h': default: usage(); diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_sync_flush_perf.c b/src/main/native/compression/isa-l-master/igzip/igzip_sync_flush_perf.c deleted file mode 100644 index 46d89d67..00000000 --- a/src/main/native/compression/isa-l-master/igzip/igzip_sync_flush_perf.c +++ /dev/null @@ -1,96 +0,0 @@ -/********************************************************************** - Copyright(c) 2011-2016 Intel Corporation All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**********************************************************************/ - -#include -#include -#include -#include -#include "igzip_lib.h" -#include "test.h" - -#define TEST_LEN (1024*1024) -#define IBUF_SIZE (1024*1024) -#define OBUF_SIZE (1024*1024) - -#define TEST_LOOPS 400 -#define TEST_TYPE_STR "_warm" - -void create_data(unsigned char *data, int size) -{ - char c = 'a'; - while (size--) - *data++ = c = c < 'z' ? c + 1 : 'a'; -} - -int main(int argc, char *argv[]) -{ - int i = 1; - struct isal_zstream stream; - unsigned char inbuf[IBUF_SIZE], zbuf[OBUF_SIZE]; - struct perf start, stop; - - create_data(inbuf, TEST_LEN); - printf("Window Size: %d K\n", IGZIP_HIST_SIZE / 1024); - printf("igzip_sync_flush_perf: \n"); - fflush(0); - - perf_start(&start); - - for (i = 0; i < TEST_LOOPS; i++) { - isal_deflate_init(&stream); - - stream.avail_in = TEST_LEN; - if (i == (TEST_LOOPS - 1)) - stream.end_of_stream = 1; - else - stream.end_of_stream = 0; - stream.next_in = inbuf; - stream.flush = SYNC_FLUSH; - - do { - stream.avail_out = OBUF_SIZE; - stream.next_out = zbuf; - isal_deflate(&stream); - } while (stream.avail_out == 0); - - } - - perf_stop(&stop); - - printf("igzip_sync_flush_perf" TEST_TYPE_STR ": "); - perf_print(stop, start, (long long)(TEST_LEN) * (i)); - - if (!stream.end_of_stream) { - printf("error: compression test could not fit into allocated buffers\n"); - return -1; - } - printf("End of igzip_sync_flush_perf\n\n"); - fflush(0); - return 0; -} diff --git a/src/main/native/compression/isa-l-master/igzip/igzip_update_histogram.asm b/src/main/native/compression/isa-l-master/igzip/igzip_update_histogram.asm index 4d91f7fb..0b71be0e 100644 --- a/src/main/native/compression/isa-l-master/igzip/igzip_update_histogram.asm +++ b/src/main/native/compression/isa-l-master/igzip/igzip_update_histogram.asm @@ -247,7 +247,7 @@ isal_update_histogram_ %+ ARCH %+ : ;; Init hash_table PXOR vtmp0, vtmp0, vtmp0 - mov rcx, (IGZIP_HASH_SIZE - V_LENGTH) + mov rcx, (IGZIP_LVL0_HASH_SIZE - V_LENGTH) init_hash_table: MOVDQU [histogram + _hash_offset + 2 * rcx], vtmp0 MOVDQU [histogram + _hash_offset + 2 * (rcx + V_LENGTH / 2)], vtmp0 @@ -262,7 +262,7 @@ init_hash_table: ;; Load first literal into histogram mov curr_data, [file_start + f_i] compute_hash hash, curr_data - and hash %+ d, HASH_MASK + and hash %+ d, LVL0_HASH_MASK mov [histogram + _hash_offset + 2 * hash], f_i %+ w and curr_data, 0xff inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * curr_data] @@ -276,8 +276,8 @@ init_hash_table: shr curr_data2, 8 compute_hash hash2, curr_data2 - and hash2 %+ d, HASH_MASK - and hash, HASH_MASK + and hash2 %+ d, LVL0_HASH_MASK + and hash, LVL0_HASH_MASK loop2: xor dist, dist xor dist2, dist2 @@ -324,8 +324,8 @@ loop2: xor len, [tmp1 + dist - 1] jz compare_loop - and hash %+ d, HASH_MASK - and hash2 %+ d, HASH_MASK + and hash %+ d, LVL0_HASH_MASK + and hash2 %+ d, LVL0_HASH_MASK MOVQ len2, xdata xor len2, [tmp1 + dist2] @@ -370,7 +370,7 @@ len_dist_lit_huffman: mov tmp1, curr_data compute_hash hash, curr_data - and hash3, HASH_MASK + and hash3, LVL0_HASH_MASK mov [histogram + _hash_offset + 2 * hash3], tmp3 %+ w dist_to_dist_code2 dist_code2, dist2 @@ -383,8 +383,8 @@ len_dist_lit_huffman: inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * len_code] inc qword [histogram + _dist_offset + HIST_ELEM_SIZE * dist_code2] - and hash2 %+ d, HASH_MASK - and hash, HASH_MASK + and hash2 %+ d, LVL0_HASH_MASK + and hash, LVL0_HASH_MASK cmp f_i, file_length jl loop2 @@ -418,8 +418,8 @@ len_dist_huffman: inc qword [histogram + _lit_len_offset + HIST_ELEM_SIZE * len_code] inc qword [histogram + _dist_offset + HIST_ELEM_SIZE * dist_code] - and hash2 %+ d, HASH_MASK - and hash, HASH_MASK + and hash2 %+ d, LVL0_HASH_MASK + and hash, LVL0_HASH_MASK cmp f_i, file_length jl loop2 @@ -442,7 +442,7 @@ end_loop_2: loop2_finish: mov curr_data %+ d, dword [file_start + f_i] compute_hash hash, curr_data - and hash %+ d, HASH_MASK + and hash %+ d, LVL0_HASH_MASK ;; Calculate possible distance for length/dist pair. xor dist, dist @@ -513,8 +513,8 @@ exit_ret: ret compare_loop: - and hash %+ d, HASH_MASK - and hash2 %+ d, HASH_MASK + and hash %+ d, LVL0_HASH_MASK + and hash2 %+ d, LVL0_HASH_MASK lea tmp2, [tmp1 + dist - 1] %if (COMPARE_TYPE == 1) compare250 tmp1, tmp2, len, tmp3 diff --git a/src/main/native/compression/isa-l-master/igzip/lz0a_const.asm b/src/main/native/compression/isa-l-master/igzip/lz0a_const.asm index a37abc0b..deb6d232 100644 --- a/src/main/native/compression/isa-l-master/igzip/lz0a_const.asm +++ b/src/main/native/compression/isa-l-master/igzip/lz0a_const.asm @@ -38,19 +38,28 @@ %define LAST_BYTES_COUNT 3 ;; Bytes to prevent reading out of array bounds %define LA_STATELESS 258 ;; No round up since no data is copied to a buffer -%ifndef IGZIP_HASH_SIZE -%assign IGZIP_HASH_SIZE (8 * K) -%endif +%assign IGZIP_LVL0_HASH_SIZE (8 * K) +%assign IGZIP_HASH8K_HASH_SIZE (8 * K) +%assign IGZIP_HASH_HIST_HASH_SIZE IGZIP_HIST_SIZE +%assign IGZIP_HASH_MAP_HASH_SIZE IGZIP_HIST_SIZE -%assign HASH_MASK (IGZIP_HASH_SIZE - 1) +%xdefine LVL0_HASH_MASK (IGZIP_LVL0_HASH_SIZE - 1) +%xdefine HASH8K_HASH_MASK (IGZIP_HASH8K_HASH_SIZE - 1) +%xdefine HASH_HIST_HASH_MASK (IGZIP_HASH_HIST_HASH_SIZE - 1) +%xdefine HASH_MAP_HASH_MASK (IGZIP_HASH_MAP_HASH_SIZE - 1) +%assign MIN_DEF_MATCH 3 ; Minimum length of a match in deflate %assign SHORTEST_MATCH 4 %assign SLOP 8 +%define ICF_CODE_BYTES 4 %define LIT_LEN_BIT_COUNT 10 %define DIST_LIT_BIT_COUNT 9 +%define LIT_LEN_MASK ((1 << LIT_LEN_BIT_COUNT) - 1) +%define LIT_DIST_MASK ((1 << DIST_LIT_BIT_COUNT) - 1) + %define DIST_OFFSET LIT_LEN_BIT_COUNT %define EXTRA_BITS_OFFSET (DIST_OFFSET + DIST_LIT_BIT_COUNT) %define LIT (0x1E << DIST_OFFSET) diff --git a/src/main/native/compression/isa-l-master/include/crc.h b/src/main/native/compression/isa-l-master/include/crc.h index fe0cd722..07149608 100644 --- a/src/main/native/compression/isa-l-master/include/crc.h +++ b/src/main/native/compression/isa-l-master/include/crc.h @@ -61,6 +61,21 @@ uint16_t crc16_t10dif( ); +/** + * @brief Generate CRC and copy T10 standard, runs appropriate version. + * + * Stitched CRC + copy function. + * + * @returns 16 bit CRC + */ +uint16_t crc16_t10dif_copy( + uint16_t init_crc, //!< initial CRC value, 16 bits + uint8_t *dst, //!< buffer destination for copy + uint8_t *src, //!< buffer source to crc + copy + uint64_t len //!< buffer length in bytes (64-bit data) + ); + + /** * @brief Generate CRC from the IEEE standard, runs appropriate version. * @@ -155,6 +170,18 @@ uint16_t crc16_t10dif_base( ); +/** + * @brief Generate CRC and copy T10 standard, runs baseline version. + * @returns 16 bit CRC + */ +uint16_t crc16_t10dif_copy_base( + uint16_t init_crc, //!< initial CRC value, 16 bits + uint8_t *dst, //!< buffer destination for copy + uint8_t *src, //!< buffer source to crc + copy + uint64_t len //!< buffer length in bytes (64-bit data) + ); + + /** * @brief Generate CRC from the IEEE standard, runs baseline version * @returns 32 bit CRC diff --git a/src/main/native/compression/isa-l-master/include/erasure_code.h b/src/main/native/compression/isa-l-master/include/erasure_code.h index f66d8380..e807c91d 100644 --- a/src/main/native/compression/isa-l-master/include/erasure_code.h +++ b/src/main/native/compression/isa-l-master/include/erasure_code.h @@ -890,13 +890,13 @@ unsigned char gf_inv(unsigned char a); * for certain pairs m and k. If m and k satisfy one of the following * inequalities, no adjustment is required: * - * k <= 3 - * k = 4, m <= 25 - * k = 5, m <= 10 - * k <= 21, m-k = 4 - * m - k <= 3. + * - k <= 3 + * - k = 4, m <= 25 + * - k = 5, m <= 10 + * - k <= 21, m-k = 4 + * - m - k <= 3. * - * @param a [mxk] array to hold coefficients + * @param a [m x k] array to hold coefficients * @param m number of rows in matrix corresponding to srcs + parity. * @param k number of columns in matrix corresponding to srcs. * @returns none @@ -911,7 +911,7 @@ void gf_gen_rs_matrix(unsigned char *a, int m, int k); * is identity matrix I and lower portion is constructed as 1/(i + j) | i != j, * i:{0,k-1} j:{k,m-1}. Any sub-matrix of a Cauchy matrix should be invertable. * - * @param a [mxk] array to hold coefficients + * @param a [m x k] array to hold coefficients * @param m number of rows in matrix corresponding to srcs + parity. * @param k number of columns in matrix corresponding to srcs. * @returns none diff --git a/src/main/native/compression/isa-l-master/include/igzip_lib.h b/src/main/native/compression/isa-l-master/include/igzip_lib.h index a51a4da3..eb9cc17f 100644 --- a/src/main/native/compression/isa-l-master/include/igzip_lib.h +++ b/src/main/native/compression/isa-l-master/include/igzip_lib.h @@ -115,9 +115,14 @@ extern "C" { #define ISAL_LIMIT_HASH_UPDATE -#ifndef IGZIP_HASH_SIZE -#define IGZIP_HASH_SIZE (8 * IGZIP_K) -#endif +#define IGZIP_HASH8K_HASH_SIZE (8 * IGZIP_K) +#define IGZIP_HASH_HIST_SIZE IGZIP_HIST_SIZE +#define IGZIP_HASH_MAP_HASH_SIZE IGZIP_HIST_SIZE + +#define IGZIP_LVL0_HASH_SIZE (8 * IGZIP_K) +#define IGZIP_LVL1_HASH_SIZE IGZIP_HASH8K_HASH_SIZE +#define IGZIP_LVL2_HASH_SIZE IGZIP_HASH_HIST_SIZE +#define IGZIP_LVL3_HASH_SIZE IGZIP_HASH_MAP_HASH_SIZE #ifdef LONGER_HUFFTABLE enum {IGZIP_DIST_TABLE_SIZE = 8*1024}; @@ -155,8 +160,10 @@ enum {IGZIP_LIT_TABLE_SIZE = ISAL_DEF_LIT_SYMBOLS}; #define INVALID_PARAM -8 #define STATELESS_OVERFLOW -1 #define ISAL_INVALID_OPERATION -9 -#define ISAL_INVALID_LEVEL -4 /* Invalid Compression level set */ #define ISAL_INVALID_STATE -3 +#define ISAL_INVALID_LEVEL -4 /* Invalid Compression level set */ +#define ISAL_INVALID_LEVEL_BUF -5 /* Invalid buffer specified for the compression level */ + /** * @enum isal_zstate_state * @brief Compression State please note ZSTATE_TRL only applies for GZIP compression @@ -185,6 +192,8 @@ enum isal_zstate_state { ZSTATE_TMP_BODY, //!< Temporary Body state ZSTATE_TMP_FLUSH_READ_BUFFER, //!< Flush buffer ZSTATE_TMP_FLUSH_ICF_BUFFER, + ZSTATE_TMP_TYPE0_HDR, //! Temporary Type0 block header to be written + ZSTATE_TMP_TYPE0_BODY, //!< Temporary Type0 block body to be written ZSTATE_TMP_SYNC_FLUSH, //!< Write sync flush block ZSTATE_TMP_FLUSH_WRITE_BUFFER, //!< Flush bitbuf ZSTATE_TMP_TRL, //!< Temporary Trailer state @@ -233,7 +242,7 @@ enum isal_block_state { struct isal_huff_histogram { uint64_t lit_len_histogram[ISAL_DEF_LIT_LEN_SYMBOLS]; //!< Histogram of Literal/Len symbols seen uint64_t dist_histogram[ISAL_DEF_DIST_SYMBOLS]; //!< Histogram of Distance Symbols seen - uint16_t hash_table[IGZIP_HASH_SIZE]; //!< Tmp space used as a hash table + uint16_t hash_table[IGZIP_LVL0_HASH_SIZE]; //!< Tmp space used as a hash table }; struct isal_mod_hist { @@ -242,12 +251,17 @@ struct isal_mod_hist { }; #define ISAL_DEF_MIN_LEVEL 0 -#define ISAL_DEF_MAX_LEVEL 1 +#define ISAL_DEF_MAX_LEVEL 3 /* Defines used set level data sizes */ +/* has to be at least sizeof(struct level_buf) + sizeof(struct lvlX_buf */ #define ISAL_DEF_LVL0_REQ 0 -#define ISAL_DEF_LVL1_REQ 4 * IGZIP_K /* has to be at least sizeof(struct level_2_buf) */ +#define ISAL_DEF_LVL1_REQ (4 * IGZIP_K + 2 * IGZIP_LVL1_HASH_SIZE) #define ISAL_DEF_LVL1_TOKEN_SIZE 4 +#define ISAL_DEF_LVL2_REQ (4 * IGZIP_K + 2 * IGZIP_LVL2_HASH_SIZE) +#define ISAL_DEF_LVL2_TOKEN_SIZE 4 +#define ISAL_DEF_LVL3_REQ 4 * IGZIP_K + 4 * 4 * IGZIP_K + 2 * IGZIP_LVL3_HASH_SIZE +#define ISAL_DEF_LVL3_TOKEN_SIZE 4 /* Data sizes for level specific data options */ #define ISAL_DEF_LVL0_MIN ISAL_DEF_LVL0_REQ @@ -264,6 +278,20 @@ struct isal_mod_hist { #define ISAL_DEF_LVL1_EXTRA_LARGE (ISAL_DEF_LVL1_REQ + ISAL_DEF_LVL1_TOKEN_SIZE * 128 * IGZIP_K) #define ISAL_DEF_LVL1_DEFAULT ISAL_DEF_LVL1_LARGE +#define ISAL_DEF_LVL2_MIN (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 1 * IGZIP_K) +#define ISAL_DEF_LVL2_SMALL (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 16 * IGZIP_K) +#define ISAL_DEF_LVL2_MEDIUM (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 32 * IGZIP_K) +#define ISAL_DEF_LVL2_LARGE (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 64 * IGZIP_K) +#define ISAL_DEF_LVL2_EXTRA_LARGE (ISAL_DEF_LVL2_REQ + ISAL_DEF_LVL2_TOKEN_SIZE * 128 * IGZIP_K) +#define ISAL_DEF_LVL2_DEFAULT ISAL_DEF_LVL2_LARGE + +#define ISAL_DEF_LVL3_MIN (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 1 * IGZIP_K) +#define ISAL_DEF_LVL3_SMALL (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 16 * IGZIP_K) +#define ISAL_DEF_LVL3_MEDIUM (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 32 * IGZIP_K) +#define ISAL_DEF_LVL3_LARGE (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 64 * IGZIP_K) +#define ISAL_DEF_LVL3_EXTRA_LARGE (ISAL_DEF_LVL3_REQ + ISAL_DEF_LVL3_TOKEN_SIZE * 128 * IGZIP_K) +#define ISAL_DEF_LVL3_DEFAULT ISAL_DEF_LVL3_LARGE + #define IGZIP_NO_HIST 0 #define IGZIP_HIST 1 #define IGZIP_DICT_HIST 2 @@ -284,26 +312,27 @@ struct BitBuf2 { /** @brief Holds the internal state information for input and output compression streams*/ struct isal_zstate { - uint32_t b_bytes_valid; //!< number of bytes of valid data in buffer - uint32_t b_bytes_processed; //!< keeps track of the number of bytes processed in isal_zstate.buffer - uint8_t *file_start; //!< pointer to where file would logically start - uint32_t crc; //!< Current crc + uint32_t total_in_start; //!< Start of total_in (inlcuding buffered data) on function call + uint32_t block_next; //!< Start of current deflate block in the input + uint32_t block_end; //!< End of current deflate block in the input struct BitBuf2 bitbuf; //!< Bit Buffer + uint32_t crc; //!< Current crc enum isal_zstate_state state; //!< Current state in processing the data stream + uint8_t has_wrap_hdr; //!< keeps track of wrapper header + uint8_t has_eob_hdr; //!< keeps track of eob hdr (with BFINAL set) + uint8_t has_eob; //!< keeps track of eob on the last deflate block + uint8_t has_hist; //!< flag to track if there is match history + uint16_t has_level_buf_init; //!< flag to track if user supplied memory has been initialized. uint32_t count; //!< used for partial header/trailer writes uint8_t tmp_out_buff[16]; //!< temporary array uint32_t tmp_out_start; //!< temporary variable uint32_t tmp_out_end; //!< temporary variable - uint32_t has_wrap_hdr; //!< keeps track of wrapper header - uint32_t has_eob; //!< keeps track of eob on the last deflate block - uint32_t has_eob_hdr; //!< keeps track of eob hdr (with BFINAL set) - uint32_t has_hist; //!< flag to track if there is match history - - struct isal_mod_hist hist; - - DECLARE_ALIGNED(uint8_t buffer[2 * IGZIP_HIST_SIZE + ISAL_LOOK_AHEAD], 32); //!< Internal buffer - DECLARE_ALIGNED(uint16_t head[IGZIP_HASH_SIZE], 16); //!< Hash array + uint32_t b_bytes_valid; //!< number of valid bytes in buffer + uint32_t b_bytes_processed; //!< number of bytes processed in buffer + uint8_t buffer[2 * IGZIP_HIST_SIZE + ISAL_LOOK_AHEAD]; //!< Internal buffer + /* Stream should be setup such that the head is cache aligned*/ + uint16_t head[IGZIP_LVL0_HASH_SIZE]; //!< Hash array }; /** @brief Holds the huffman tree used to huffman encode the input stream **/ @@ -335,8 +364,8 @@ struct isal_zstream { uint32_t level; //!< Compression level to use uint32_t level_buf_size; //!< Size of level_buf uint8_t * level_buf; //!< User allocated buffer required for different compression levels - uint32_t end_of_stream; //!< non-zero if this is the last input buffer - uint32_t flush; //!< Flush type can be NO_FLUSH, SYNC_FLUSH or FULL_FLUSH + uint16_t end_of_stream; //!< non-zero if this is the last input buffer + uint16_t flush; //!< Flush type can be NO_FLUSH, SYNC_FLUSH or FULL_FLUSH uint32_t gzip_flag; //!< Indicate if gzip compression is to be performed struct isal_zstate internal_state; //!< Internal state for this stream @@ -379,28 +408,51 @@ struct isal_zstream { * Since small_code_lookup is a lookup on DECODE_LOOKUP_SIZE bits, it must have * size 2^DECODE_LOOKUP_SIZE. * - * Since deflate Huffman are stored such that the code size and the code value - * form an increasing function, At most 2^(15 - DECODE_LOOKUP_SIZE) - 1 elements - * of long_code_lookup duplicate an existing symbol. Since there are at most 285 - * - DECODE_LOOKUP_SIZE possible symbols contained in long_code lookup. Rounding - * this to the nearest 16 byte boundary yields the size of long_code_lookup of - * 288 + 2^(15 - DECODE_LOOKUP_SIZE). + * To determine the amoutn of memory required for long_code_lookup, note that + * any element of long_code_lookup corresponds to a code, a duplicate of an + * existing code, or a invalid code. Since deflate Huffman are stored such that + * the code size and the code value form an increasing function, the number of + * duplicates is maximized when all the duplicates are contained in a single + * array, thus there are at most 2^(15 - DECODE_LOOKUP_SIZE) - + * (DECODE_LOOKUP_SIZE + 1) duplicate elements. Similarly the number of invalid + * elements is maximized at 2^(15 - DECODE_LOOKUP_SIZE) - 2^(floor((15 - + * DECODE_LOOKUP_SIZE)/2) - 2^(ceil((15 - DECODE_LOOKUP_SIZE)/2) + 1. Thus the + * amount of memory requried is: NUM_CODES + 2^(16 - DECODE_LOOKUP_SIZE) - + * (DECODE_LOOKUP_SIZE + 1) - 2^(floor((15 - DECODE_LOOKUP_SIZE)/2) - + * 2^(ceil((15 - DECODE_LOOKUP_SIZE)/2) + 1. The values used below are those + * valuse rounded up to the nearest 16 byte boundary * * Note that DECODE_LOOKUP_SIZE can be any length even though the offset in * small_lookup_code is 9 bits long because the increasing relationship between * code length and code value forces the maximum offset to be less than 288. */ +/* In the following defines, L stands for LARGE and S for SMALL */ +#define ISAL_L_REM (15 - ISAL_DECODE_LONG_BITS) +#define ISAL_S_REM (15 - ISAL_DECODE_SHORT_BITS) + +#define ISAL_L_DUP ((1 << ISAL_L_REM) - (ISAL_L_REM + 1)) +#define ISAL_S_DUP ((1 << ISAL_S_REM) - (ISAL_S_REM + 1)) + +#define ISAL_L_UNUSED ((1 << ISAL_L_REM) - (1 << ((ISAL_L_REM)/2)) - (1 << ((ISAL_L_REM + 1)/2)) + 1) +#define ISAL_S_UNUSED ((1 << ISAL_S_REM) - (1 << ((ISAL_S_REM)/2)) - (1 << ((ISAL_S_REM + 1)/2)) + 1) + +#define ISAL_L_SIZE (ISAL_DEF_LIT_LEN_SYMBOLS + ISAL_L_DUP + ISAL_L_UNUSED) +#define ISAL_S_SIZE (ISAL_DEF_DIST_SYMBOLS + ISAL_S_DUP + ISAL_S_UNUSED) + +#define ISAL_HUFF_CODE_LARGE_LONG_ALIGNED (ISAL_L_SIZE + (-ISAL_L_SIZE & 0xf)) +#define ISAL_HUFF_CODE_SMALL_LONG_ALIGNED (ISAL_S_SIZE + (-ISAL_S_SIZE & 0xf)) + /* Large lookup table for decoding huffman codes */ struct inflate_huff_code_large { uint16_t short_code_lookup[1 << (ISAL_DECODE_LONG_BITS)]; - uint16_t long_code_lookup[288 + (1 << (15 - ISAL_DECODE_LONG_BITS))]; + uint16_t long_code_lookup[ISAL_HUFF_CODE_LARGE_LONG_ALIGNED]; }; /* Small lookup table for decoding huffman codes */ struct inflate_huff_code_small { uint16_t short_code_lookup[1 << (ISAL_DECODE_SHORT_BITS)]; - uint16_t long_code_lookup[32 + (1 << (15 - ISAL_DECODE_SHORT_BITS))]; + uint16_t long_code_lookup[ISAL_HUFF_CODE_SMALL_LONG_ALIGNED]; }; /** @brief Holds decompression state information*/ @@ -589,7 +641,8 @@ int isal_deflate_set_dict(struct isal_zstream *stream, uint8_t *dict, uint32_t d * @param stream Structure holding state information on the compression streams. * @return COMP_OK (if everything is ok), * INVALID_FLUSH (if an invalid FLUSH is selected), - * ISAL_INVALID_LEVEL (if an invalid compression level is selected). + * ISAL_INVALID_LEVEL (if an invalid compression level is selected), + * ISAL_INVALID_LEVEL_BUF (if the level buffer is not large enough). */ int isal_deflate(struct isal_zstream *stream); @@ -618,6 +671,7 @@ int isal_deflate(struct isal_zstream *stream); * @return COMP_OK (if everything is ok), * INVALID_FLUSH (if an invalid FLUSH is selected), * ISAL_INVALID_LEVEL (if an invalid compression level is selected), + * ISAL_INVALID_LEVEL_BUF (if the level buffer is not large enough), * STATELESS_OVERFLOW (if output buffer will not fit output). */ int isal_deflate_stateless(struct isal_zstream *stream); diff --git a/src/main/native/compression/isa-l-master/include/reg_sizes.asm b/src/main/native/compression/isa-l-master/include/reg_sizes.asm index 48de53cf..872ebb50 100644 --- a/src/main/native/compression/isa-l-master/include/reg_sizes.asm +++ b/src/main/native/compression/isa-l-master/include/reg_sizes.asm @@ -198,6 +198,15 @@ section .text section .note.GNU-stack noalloc noexec nowrite progbits section .text %endif + +%ifdef REL_TEXT + %define WRT_OPT +%elifidn __OUTPUT_FORMAT__, elf64 + %define WRT_OPT wrt ..plt +%else + %define WRT_OPT +%endif + %ifidn __OUTPUT_FORMAT__, macho64 %define elf64 macho64 %endif diff --git a/src/main/native/compression/isa-l-master/isa-l.def b/src/main/native/compression/isa-l-master/isa-l.def index de1e797c..20a8725f 100644 --- a/src/main/native/compression/isa-l-master/isa-l.def +++ b/src/main/native/compression/isa-l-master/isa-l.def @@ -104,3 +104,4 @@ isal_deflate_set_dict @100 isal_deflate_reset @101 isal_inflate_set_dict @102 isal_inflate_reset @103 +crc16_t10dif_copy @104 diff --git a/src/main/native/compression/isa-l-master/make.inc b/src/main/native/compression/isa-l-master/make.inc index ffb4f616..41e1f412 100644 --- a/src/main/native/compression/isa-l-master/make.inc +++ b/src/main/native/compression/isa-l-master/make.inc @@ -39,7 +39,7 @@ # trace - get simulator trace # clean - remove object files -version ?= 2.19.0 +version ?= 2.21.0 host_cpu ?= $(shell uname -m | sed -e 's/amd/x86_/') arch ?= $(shell uname | grep -v -e Linux -e BSD ) @@ -88,7 +88,7 @@ endif ASFLAGS_Darwin = -f macho64 --prefix=_ ARFLAGS_Darwin = -r $@ -ifeq ($(arch),Darwin) +ifeq ($(shell uname),Darwin) LDFLAGS_so = STRIP_gcc = endif @@ -126,18 +126,27 @@ all_tests = $(notdir $(sort $(perf_tests) $(check_tests) $(unit_tests) $(example all_unit_tests = $(notdir $(sort $(check_tests) $(unit_tests))) all_perf_tests = $(notdir $(sort $(perf_tests))) all_check_tests = $(notdir $(sort $(check_tests))) +all_llvm_fuzz_tests = $(notdir $(sort $(llvm_fuzz_tests))) $(all_unit_tests): % : %.c $(lib_name) $(all_perf_tests): % : %.c $(lib_name) $(sort $(notdir $(examples))): % : %.c $(lib_name) $(sort $(notdir $(other_tests))): % : %.c $(lib_name) +$(all_llvm_fuzz_tests): LDLIBS += -lFuzzer +$(all_llvm_fuzz_tests): CFLAGS += -fsanitize-coverage=trace-pc-guard -fsanitize=address +$(all_llvm_fuzz_tests): CXXFLAGS += -fsanitize-coverage=trace-pc-guard -fsanitize=address +$(all_llvm_fuzz_tests): % : %.o $(lib_name) + $(CXX) $(CXXFLAGS) $^ $(LDLIBS) -o $@ + + sim test trace: $(addsuffix .run,$(all_unit_tests)) perf: $(addsuffix .run,$(all_perf_tests)) check: $(addsuffix .run,$(all_check_tests)) ex: $(notdir $(examples)) all: lib $(all_tests) other: $(notdir $(other_tests)) +llvm_fuzz_tests: $(all_llvm_fuzz_tests) tests: $(all_unit_tests) perfs: $(all_perf_tests) checks: $(all_check_tests) @@ -273,7 +282,7 @@ perf_report: clean: @echo Cleaning up - @$(RM) -r $(O) *.o *.a $(all_tests) $(lib_name) $(so_lib_name) + @$(RM) -r $(O) *.o *.a $(all_tests) $(lib_name) $(so_lib_name) $(all_llvm_fuzz_tests) doc: isa-l.h diff --git a/src/main/native/compression/isa-l-master/raid/raid_multibinary.asm b/src/main/native/compression/isa-l-master/raid/raid_multibinary.asm index 72ef5d40..25d4aad9 100644 --- a/src/main/native/compression/isa-l-master/raid/raid_multibinary.asm +++ b/src/main/native/compression/isa-l-master/raid/raid_multibinary.asm @@ -27,12 +27,6 @@ ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -%ifidn __OUTPUT_FORMAT__, elf64 -%define WRT_OPT wrt ..plt -%else -%define WRT_OPT -%endif - %include "reg_sizes.asm" %include "multibinary.asm" diff --git a/src/main/native/compression/isa-l-master/raid/raid_multibinary_i32.asm b/src/main/native/compression/isa-l-master/raid/raid_multibinary_i32.asm index 6da4c9dc..eee7fd5a 100644 --- a/src/main/native/compression/isa-l-master/raid/raid_multibinary_i32.asm +++ b/src/main/native/compression/isa-l-master/raid/raid_multibinary_i32.asm @@ -27,12 +27,6 @@ ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -%ifidn __OUTPUT_FORMAT__, elf64 -%define WRT_OPT wrt ..plt -%else -%define WRT_OPT -%endif - %include "reg_sizes.asm" %include "multibinary.asm" diff --git a/src/main/native/compression/isa-l-master/tests/fuzz/Makefile.am b/src/main/native/compression/isa-l-master/tests/fuzz/Makefile.am new file mode 100644 index 00000000..ccbb8c3e --- /dev/null +++ b/src/main/native/compression/isa-l-master/tests/fuzz/Makefile.am @@ -0,0 +1,52 @@ +######################################################################## +# Copyright(c) 2011-2017 Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +src_include += -I $(srcdir)/tests/fuzz + +# AFL fuzz tests +other_tests += tests/fuzz/igzip_fuzz_inflate +igzip_fuzz_inflate: igzip_checked_inflate_fuzz_test.o +igzip_fuzz_inflate: LDLIBS += -lz +tests_fuzz_igzip_fuzz_inflate_LDADD = tests/fuzz/igzip_checked_inflate_fuzz_test.lo libisal.la +tests_fuzz_igzip_fuzz_inflate_LDFLAGS = -lz + +other_tests += tests/fuzz/igzip_dump_inflate_corpus +tests_fuzz_igzip_dump_inflate_corpus_LDADD = libisal.la + +# LLVM fuzz tests +llvm_fuzz_tests = tests/fuzz/igzip_simple_inflate_fuzz_test +other_src += tests/fuzz/igzip_simple_inflate_fuzz_test.c + +llvm_fuzz_tests += tests/fuzz/igzip_checked_inflate_fuzz_test +other_src += tests/fuzz/igzip_checked_inflate_fuzz_test.c + +llvm_fuzz_tests += tests/fuzz/igzip_simple_round_trip_fuzz_test +other_src += tests/fuzz/igzip_simple_round_trip_fuzz_test.c + +igzip_checked_inflate_fuzz_test: LDLIBS += -lz diff --git a/src/main/native/compression/isa-l-master/tests/fuzz/Makefile.unx b/src/main/native/compression/isa-l-master/tests/fuzz/Makefile.unx new file mode 100644 index 00000000..afa20a36 --- /dev/null +++ b/src/main/native/compression/isa-l-master/tests/fuzz/Makefile.unx @@ -0,0 +1,12 @@ + +default: llvm_fuzz_tests + +include ../../igzip/Makefile.am +include Makefile.am +include ../../make.inc + +CC = clang +CXX = clang++ +CXXFLAGS += $(DEFINES) + +VPATH = . ../../igzip ../../include diff --git a/src/main/native/compression/isa-l-master/tests/fuzz/igzip_checked_inflate_fuzz_test.c b/src/main/native/compression/isa-l-master/tests/fuzz/igzip_checked_inflate_fuzz_test.c new file mode 100644 index 00000000..376c0d3f --- /dev/null +++ b/src/main/native/compression/isa-l-master/tests/fuzz/igzip_checked_inflate_fuzz_test.c @@ -0,0 +1,69 @@ +#include +#include +#include +#include +#include +#include +#include "igzip_lib.h" + +int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) +{ + struct inflate_state state; + z_stream zstate; + size_t out_buf_size = 2 * size; + int zret, iret; + char z_msg_invalid_code_set[] = "invalid code lengths set"; + char z_msg_invalid_dist_set[] = "invalid distances set"; + char z_msg_invalid_lit_len_set[] = "invalid literal/lengths set"; + + uint8_t *isal_out_buf = (uint8_t *) malloc(size * 2); + uint8_t *zlib_out_buf = (uint8_t *) malloc(size * 2); + + assert(NULL != isal_out_buf && NULL != zlib_out_buf); + + /* Inflate data with isal_inflate */ + memset(&state, 0xff, sizeof(struct inflate_state)); + + isal_inflate_init(&state); + state.next_in = (uint8_t *) data; + state.avail_in = size; + state.next_out = isal_out_buf; + state.avail_out = out_buf_size; + + iret = isal_inflate_stateless(&state); + + /* Inflate data with zlib */ + zstate.zalloc = Z_NULL; + zstate.zfree = Z_NULL; + zstate.opaque = Z_NULL; + zstate.avail_in = size; + zstate.next_in = (Bytef *) data; + zstate.avail_out = out_buf_size; + zstate.next_out = zlib_out_buf; + inflateInit2(&zstate, -15); + + zret = inflate(&zstate, Z_FINISH); + + if (zret == Z_STREAM_END) { + /* If zlib finished, assert isal finished with the same answer */ + assert(state.block_state == ISAL_BLOCK_FINISH); + assert(zstate.total_out == state.total_out); + assert(memcmp(isal_out_buf, zlib_out_buf, state.total_out) == 0); + } else if (zret < 0) { + if (zret != Z_BUF_ERROR) + /* If zlib errors, assert isal errors, excluding a few + * cases where zlib is overzealous */ + assert(iret < 0 || strcmp(zstate.msg, z_msg_invalid_code_set) == 0 + || strcmp(zstate.msg, z_msg_invalid_dist_set) == 0 + || strcmp(zstate.msg, z_msg_invalid_lit_len_set) == 0); + } else + /* If zlib did not finish or error, assert isal did not finish + * or that isal found an invalid header since isal notices the + * error faster than zlib */ + assert(iret > 0 || iret == ISAL_INVALID_BLOCK); + + inflateEnd(&zstate); + free(isal_out_buf); + free(zlib_out_buf); + return 0; +} diff --git a/src/main/native/compression/isa-l-master/tests/fuzz/igzip_dump_inflate_corpus.c b/src/main/native/compression/isa-l-master/tests/fuzz/igzip_dump_inflate_corpus.c new file mode 100644 index 00000000..490655eb --- /dev/null +++ b/src/main/native/compression/isa-l-master/tests/fuzz/igzip_dump_inflate_corpus.c @@ -0,0 +1,37 @@ +#include +#include +#include +#include "inflate_std_vects.h" + +#define FNAME_MAX 180 + +int main(int argc, char *argv[]) +{ + uint8_t *buf; + int i, len, err; + FILE *fout = NULL; + char fname[FNAME_MAX]; + char dname[FNAME_MAX]; + + if (argc != 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + exit(1); + } + strncpy(dname, argv[1], FNAME_MAX); + + for (i = 0; i < sizeof(std_vect_array) / sizeof(struct vect_result); i++) { + buf = std_vect_array[i].vector; + len = std_vect_array[i].vector_length; + err = std_vect_array[i].expected_error; + + snprintf(fname, FNAME_MAX, "%s/inflate_corp_n%03d_e%d", dname, i, err); + printf(" writing %s\n", fname); + fout = fopen(fname, "w+"); + if (!fout) { + fprintf(stderr, "Can't open %s for writing\n", fname); + exit(1); + } + fwrite(buf, len, 1, fout); + fclose(fout); + } +} diff --git a/src/main/native/compression/isa-l-master/tests/fuzz/igzip_fuzz_inflate.c b/src/main/native/compression/isa-l-master/tests/fuzz/igzip_fuzz_inflate.c new file mode 100644 index 00000000..85050b27 --- /dev/null +++ b/src/main/native/compression/isa-l-master/tests/fuzz/igzip_fuzz_inflate.c @@ -0,0 +1,37 @@ +#define _FILE_OFFSET_BITS 64 +#include +#include +#include +#include "huff_codes.h" +#include "igzip_lib.h" +#include "test.h" + +extern int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size); + +int main(int argc, char *argv[]) +{ + FILE *in = NULL; + unsigned char *in_buf = NULL; + uint64_t in_file_size; + + if (argc != 2) { + fprintf(stderr, "Usage: isal_fuzz_inflate \n"); + exit(1); + } + in = fopen(argv[1], "rb"); + if (!in) { + fprintf(stderr, "Can't open %s for reading\n", argv[1]); + exit(1); + } + in_file_size = get_filesize(in); + in_buf = malloc(in_file_size); + + if (in_buf == NULL) { + fprintf(stderr, "Failed to malloc input and outputs buffers\n"); + exit(1); + } + + fread(in_buf, 1, in_file_size, in); + + return LLVMFuzzerTestOneInput(in_buf, in_file_size); +} diff --git a/src/main/native/compression/isa-l-master/tests/fuzz/igzip_simple_inflate_fuzz_test.c b/src/main/native/compression/isa-l-master/tests/fuzz/igzip_simple_inflate_fuzz_test.c new file mode 100644 index 00000000..b5f22e84 --- /dev/null +++ b/src/main/native/compression/isa-l-master/tests/fuzz/igzip_simple_inflate_fuzz_test.c @@ -0,0 +1,22 @@ +#include +#include +#include +#include "igzip_lib.h" + +int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) +{ + struct inflate_state state; + uint8_t *isal_out_buf = (uint8_t *) (malloc(size * 2)); + size_t out_buf_size = 2 * size; + + isal_inflate_init(&state); + state.next_in = (uint8_t *) data; + state.avail_in = size; + state.next_out = isal_out_buf; + state.avail_out = out_buf_size; + + isal_inflate_stateless(&state); + + free(isal_out_buf); + return 0; +} diff --git a/src/main/native/compression/isa-l-master/tests/fuzz/igzip_simple_round_trip_fuzz_test.c b/src/main/native/compression/isa-l-master/tests/fuzz/igzip_simple_round_trip_fuzz_test.c new file mode 100644 index 00000000..1190e0ca --- /dev/null +++ b/src/main/native/compression/isa-l-master/tests/fuzz/igzip_simple_round_trip_fuzz_test.c @@ -0,0 +1,129 @@ +#include +#include +#include +#include +#include +#include +#include "igzip_lib.h" + +#define LEVEL_BITS 2 +#define HEADER_BITS 3 +#define LVL_BUF_BITS 3 + +#define LEVEL_BIT_MASK ((1< (y)) ? y : x ) + +const int header_size[] = { + 0, //IGZIP_DEFLATE + 10, //IGZIP_GZIP + 0, //IGZIP_GZIP_NO_HDR + 2, //IGZIP_ZLIB + 0, //IGZIP_ZLIB_NO_HDR +}; + +const int trailer_size[] = { + 0, //IGZIP_DEFLATE + 8, //IGZIP_GZIP + 8, //IGZIP_GZIP_NO_HDR + 4, //IGZIP_ZLIB + 4, //IGZIP_ZLIB_NO_HDR +}; + +int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) +{ + struct inflate_state istate; + struct isal_zstream cstate; + uint8_t *in_data = (uint8_t *) data; + int ret = 1; + + // Parameter default + int level = 1; + int lev_buf_size = ISAL_DEF_LVL1_DEFAULT; + int wrapper_type = 0; + size_t cmp_buf_size = size + ISAL_DEF_MAX_HDR_SIZE; + + // Parameters are set by one byte of data input + if (size > 1) { + uint8_t in_param = in_data[--size]; + level = MIN(in_param & LEVEL_BIT_MASK, ISAL_DEF_MAX_LEVEL); + in_param >>= LEVEL_BITS; + + wrapper_type = (in_param & HEADER_BIT_MASK) % (IGZIP_ZLIB_NO_HDR + 1); + in_param >>= HEADER_BITS; + + switch (level) { + case 0: + lev_buf_size = ISAL_DEF_LVL0_MIN + (in_param) * + (ISAL_DEF_LVL0_EXTRA_LARGE / LEVEL_BIT_MASK); + break; + case 1: + lev_buf_size = ISAL_DEF_LVL1_MIN + (in_param) * + (ISAL_DEF_LVL1_EXTRA_LARGE / LEVEL_BIT_MASK); + break; +#ifdef ISAL_DEF_LVL2_MIN + case 2: + lev_buf_size = ISAL_DEF_LVL2_MIN + (in_param) * + (ISAL_DEF_LVL2_EXTRA_LARGE / LEVEL_BIT_MASK); + break; +#endif +#ifdef ISAL_DEF_LVL3_MIN + case 3: + lev_buf_size = ISAL_DEF_LVL3_MIN + (in_param) * + (ISAL_DEF_LVL3_EXTRA_LARGE / LEVEL_BIT_MASK); + break; +#endif + } + if (0 == level) + cmp_buf_size = 2 * size + ISAL_DEF_MAX_HDR_SIZE; + else + cmp_buf_size = size + 8 + (TYPE0_HDR_SIZE * (size / TYPE0_MAX_SIZE)); + + cmp_buf_size += header_size[wrapper_type] + trailer_size[wrapper_type]; + } + + uint8_t *isal_cmp_buf = (uint8_t *) malloc(cmp_buf_size); + uint8_t *isal_out_buf = (uint8_t *) malloc(size); + uint8_t *isal_lev_buf = (uint8_t *) malloc(lev_buf_size); + assert(NULL != isal_cmp_buf || NULL != isal_out_buf || NULL != isal_lev_buf); + + isal_deflate_init(&cstate); + cstate.end_of_stream = 1; + cstate.flush = NO_FLUSH; + cstate.next_in = in_data; + cstate.avail_in = size; + cstate.next_out = isal_cmp_buf; + cstate.avail_out = cmp_buf_size; + cstate.level = level; + cstate.level_buf = isal_lev_buf; + cstate.level_buf_size = lev_buf_size; + cstate.gzip_flag = wrapper_type; + ret = isal_deflate_stateless(&cstate); + + isal_inflate_init(&istate); + istate.next_in = isal_cmp_buf + header_size[wrapper_type]; + istate.avail_in = cstate.total_out - header_size[wrapper_type];; + istate.next_out = isal_out_buf; + istate.avail_out = size; + istate.crc_flag = wrapper_type; + ret |= isal_inflate_stateless(&istate); + ret |= memcmp(isal_out_buf, in_data, size); + + // Check trailer + uint32_t crc = 0; + int trailer_idx = cstate.total_out - trailer_size[wrapper_type]; + + if (wrapper_type == IGZIP_GZIP || wrapper_type == IGZIP_GZIP_NO_HDR) + crc = *(uint32_t *) & isal_cmp_buf[trailer_idx]; + else if (wrapper_type == IGZIP_ZLIB || wrapper_type == IGZIP_ZLIB_NO_HDR) + crc = bswap_32(*(uint32_t *) & isal_cmp_buf[trailer_idx]); + + assert(istate.crc == crc); + free(isal_cmp_buf); + free(isal_out_buf); + free(isal_lev_buf); + return ret; +} diff --git a/src/main/native/compression/isa-l-master/tools/check_format.sh b/src/main/native/compression/isa-l-master/tools/check_format.sh new file mode 100755 index 00000000..de841be4 --- /dev/null +++ b/src/main/native/compression/isa-l-master/tools/check_format.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +set -e +rc=0 +verbose=0 +indent_args='-npro -kr -i8 -ts8 -sob -l95 -ss -ncs -cp1 -lps' + +while [ -n "$*" ]; do + case "$1" in + -v ) + verbose=1 + shift + ;; + -h ) + echo check_format.sh [-h -v] + exit 0 + ;; + esac +done + +echo "Checking format of files in the git index at $PWD" +if ! git rev-parse --is-inside-work-tree >& /dev/null; then + echo "Not in a git repo: Fail" + exit 1 +fi + +if hash indent && indent --version | grep -q GNU; then + echo "Checking C files for coding style..." + for f in `git ls-files '*.c'`; do + [ "$verbose" -gt 0 ] 2> /dev/null && echo "checking $f" + if ! indent $indent_args -st $f | diff -q $f - >& /dev/null; then + echo " File found with formatting issues: $f" + [ "$verbose" -gt 0 ] 2> /dev/null && indent $indent_args -st $f | diff -u $f - + rc=1 + fi + done + [ "$rc" -gt 0 ] && echo " Run ./tools/iindent on files" +else + echo "You do not have indent installed so your code style is not being checked!" +fi + +if hash grep; then + echo "Checking for dos and whitespace violations..." + for f in `git ls-files '*.c' '*.h' '*.asm' '*.inc' '*.am' '*.txt' '*.md' `; do + [ "$verbose" -gt 0 ] 2> /dev/null && echo "checking $f" + if grep -q '[[:space:]]$' $f ; then + echo " File found with trailing whitespace: $f" + rc=1 + fi + if grep -q $'\r' $f ; then + echo " File found with dos formatting: $f" + rc=1 + fi + done +fi + +[ "$rc" -gt 0 ] && echo Format Fail || echo Format Pass + +exit $rc diff --git a/src/main/native/compression/isa-l-master/tools/remove_trailing_whitespace b/src/main/native/compression/isa-l-master/tools/remove_trailing_whitespace new file mode 100755 index 00000000..bb82b9fa --- /dev/null +++ b/src/main/native/compression/isa-l-master/tools/remove_trailing_whitespace @@ -0,0 +1,2 @@ +#!/bin/sh +sed -i -i.bak 's/[[:blank:]]*$//' "$@" diff --git a/src/main/native/compression/isa-l-master/tools/test_autorun.sh b/src/main/native/compression/isa-l-master/tools/test_autorun.sh new file mode 100755 index 00000000..958c1b66 --- /dev/null +++ b/src/main/native/compression/isa-l-master/tools/test_autorun.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +set -e #exit on fail + +# Override defaults if exist +READLINK=readlink +command -V greadlink >/dev/null 2>&1 && READLINK=greadlink + + +out="$PWD" +src=$($READLINK -f $(dirname $0))/.. +cd "$src" + +[ -z "$1" ] && ./tools/test_checks.sh + +while [ -n "$1" ]; do + case "$1" in + check ) + ./tools/test_checks.sh + shift ;; + ext ) + ./tools/test_extended.sh + shift ;; + format ) + shift ;; + all ) + ./tools/test_checks.sh + ./tools/test_extended.sh + shift ;; + * ) + echo $0 undefined option: $1 + shift ;; + esac +done + +./tools/check_format.sh + diff --git a/src/main/native/compression/isa-l-master/tools/test_checks.sh b/src/main/native/compression/isa-l-master/tools/test_checks.sh new file mode 100755 index 00000000..7bcb5c47 --- /dev/null +++ b/src/main/native/compression/isa-l-master/tools/test_checks.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash + +set -xe #exit on fail + +# Defaults +cpus=1 +S=$RANDOM +MAKE=make +READLINK=readlink + +# Override defaults if exist +command -V gmake >/dev/null 2>&1 && MAKE=gmake +command -V greadlink >/dev/null 2>&1 && READLINK=greadlink + +out="$PWD" +src=$($READLINK -f $(dirname $0))/.. +cd "$src" +tmp_install_dir=$out/tmp_install + +# Run on mult cpus +if command -V lscpu >/dev/null 2>&1; then + cpus=`lscpu -p | tail -1 | cut -d, -f 2` + cpus=$(($cpus + 1)) +elif command -V sysctl; then + if sysctl -n hw.ncpu >/dev/null 2>&1; then + cpus=$(sysctl -n hw.ncpu) + cpus=$(($cpus + 1)) + fi +fi +echo "Using $cpus cpu threads" + +# Pick a random test seed +if [ -z "$S" ]; then + S=`tr -cd 0-9 /dev/null || S="123" +fi +echo "Running with TEST_SEED=$S" + +# Fix Darwin issues +if uname | grep -q 'Darwin' 2>&1; then + export SED=`which sed` +fi + +# Tests +time ./autogen.sh +time ./configure --prefix=$tmp_install_dir $opt_config_target +time $MAKE -j $cpus +time $MAKE check -j $cpus D="-D TEST_SEED=$S" +time $MAKE install + +# Check for gnu executable stack set +if command -V readelf >/dev/null 2>&1; then + if readelf -W -l $tmp_install_dir/lib/libisal.so | grep 'GNU_STACK' | grep -q 'RWE'; then + echo Stack NX check $tmp_install_dir/lib/libisal.so Fail + exit 1 + else + echo Stack NX check $tmp_install_dir/lib/libisal.so Pass + fi +else + echo Stack NX check not supported +fi + +$MAKE clean + + + +echo $0: Pass diff --git a/src/main/native/compression/isa-l-master/tools/test_extended.sh b/src/main/native/compression/isa-l-master/tools/test_extended.sh new file mode 100755 index 00000000..4a1dfb6c --- /dev/null +++ b/src/main/native/compression/isa-l-master/tools/test_extended.sh @@ -0,0 +1,161 @@ +#!/usr/bin/env bash + +# Extended tests: Run a few more options other than make check + +set -xe #exit on fail + +# Defaults +cpus=1 +S=$RANDOM +MAKE=make +READLINK=readlink +test_level=check +build_opt='' +msg='' + +# Override defaults if exist +command -V gmake >/dev/null 2>&1 && MAKE=gmake +command -V greadlink >/dev/null 2>&1 && READLINK=greadlink +[ -n "$CC" ] && build_opt+="CC=$CC " +[ -n "$AS" ] && build_opt+="AS=$AS " + +out="$PWD" +src=$($READLINK -f $(dirname $0))/.. +cd "$src" + +# Run on mult cpus +if command -V lscpu >/dev/null 2>&1; then + cpus=`lscpu -p | tail -1 | cut -d, -f 2` + cpus=$(($cpus + 1)) +elif command -V sysctl; then + if sysctl -n hw.ncpu >/dev/null 2>&1; then + cpus=$(sysctl -n hw.ncpu) + cpus=$(($cpus + 1)) + fi +fi +echo "Using $cpus cpu threads" + +if [ -z "$S" ]; then + S=`tr -cd 0-9 /dev/null || S="123" +fi +msg+="Running with TEST_SEED=$S".$'\n' + +# Fix Darwin issues +if uname | grep -q 'Darwin' 2>&1; then + export SED=`which sed` +fi + +# Check for test libs to add +if command -V ldconfig >/dev/null 2>&1; then + if ldconfig -p | grep -q libz.so; then + test_level=test + msg+=$'With extra tests\n' + fi + if ldconfig -p | grep -q libefence.so; then + build_opt+="LDFLAGS+='-lefence' " + msg+=$'With efence\n' + fi +fi + +# Std makefile build test +$MAKE -f Makefile.unx clean +time $MAKE -f Makefile.unx -j $cpus $build_opt +msg+=$'Std makefile build: Pass\n' + +# Check for gnu executable stack set +if command -V readelf >/dev/null 2>&1; then + if readelf -W -l bin/libisal.so | grep 'GNU_STACK' | grep -q 'RWE'; then + echo $0: Stack NX check bin/libisal.so: Fail + exit 1 + else + msg+=$'Stack NX check bin/lib/libisal.so: Pass\n' + fi +else + msg+=$'Stack NX check not supported: Skip\n' +fi + +# Std makefile build perf tests +time $MAKE -f Makefile.unx -j $cpus perfs +msg+=$'Std makefile build perf: Pass\n' + +# Std makefile run tests +time $MAKE -f Makefile.unx -j $cpus $build_opt D="TEST_SEED=$S" $test_level +msg+=$'Std makefile tests: Pass\n' + +# Std makefile build other +time $MAKE -f Makefile.unx -j $cpus $build_opt D="TEST_SEED=$S" other +msg+=$'Other tests build: Pass\n' + +# Try to pick a random src file +if command -V shuf >/dev/null 2>&1; then + in_file=$(find $src -type f -size +0 -name \*.c -o -name \*.asm -print 2>/dev/null | shuf | head -1 ); +else + in_file=configure.ac +fi + +echo Other tests using $in_file +./igzip_file_perf $in_file +./igzip_stateless_file_perf $in_file +./igzip_hist_perf $in_file +./igzip_semi_dyn_file_perf $in_file +./igzip_inflate_perf $in_file +./igzip_fuzz_inflate $in_file +msg+=$'Other tests run: Pass\n' + +if command -V shuf >/dev/null 2>&1; then + in_files=$(find $src -type f -size +0 -print 2>/dev/null | shuf | head -10 ); + ./igzip_rand_test $in_files + ./igzip_inflate_test $in_files + msg+=$'Compression file tests: Pass\n' +else + msg+=$'Compression file test: Skip\n' +fi + +time $MAKE -f Makefile.unx -j $cpus $build_opt ex +msg+=$'Examples build: Pass\n' + +./crc_simple_test +./crc64_example +./xor_example +./igzip_example ${in_file} ${in_file}.cmp +rm -rf ${in_file}.cmp +msg+=$'Examples run: Pass\n' + +# Test custom hufftables +./generate_custom_hufftables $in_file +$MAKE -f Makefile.unx -j $cpus checks +./igzip_rand_test $in_file +rm -rf hufftables_c.c +msg+=$'Custom hufftable build: Pass\n' + +$MAKE -f Makefile.unx clean + +# noarch build +time $MAKE -f Makefile.unx -j $cpus arch=noarch $build_opt +time $MAKE -f Makefile.unx -j $cpus arch=noarch $build_opt D="TEST_SEED=$S" check +$MAKE -f Makefile.unx arch=noarch clean +msg+=$'Noarch build: Pass\n' + +# Try mingw build +if command -V x86_64-w64-mingw32-gcc >/dev/null 2>&1; then + time $MAKE -f Makefile.unx -j $cpus arch=mingw + msg+=$'Mingw build: Pass\n' + + if command -V wine >/dev/null 2>&1; then + time $MAKE -f Makefile.unx -j $cpus arch=mingw D="TEST_SEED=$S" check + msg+=$'Mingw check tests: Pass\n' + else + msg+=$'No wine, mingw check: Skip\n' + fi + $MAKE -f Makefile.unx arch=mingw clean +else + msg+=$'No mingw build: Skip\n' +fi + +set +x +echo +echo "Summary test $0:" +echo "Build opt: $build_opt" +echo "$msg" +echo "$0: Final: Pass" diff --git a/src/main/native/compression/isa-l-master/tools/test_fuzz.sh b/src/main/native/compression/isa-l-master/tools/test_fuzz.sh new file mode 100755 index 00000000..08f3e54c --- /dev/null +++ b/src/main/native/compression/isa-l-master/tools/test_fuzz.sh @@ -0,0 +1,144 @@ +#!/usr/bin/env bash + +usage () +{ +cat << EOF +usage: $0 options +options: + -h Help + -l, --llvm Use llvm fuzz tests and run n times 0=just build, -1=skip (default $use_llvm). + -a, --afl Use AFL fuzz tests and run n times 0=just build, -1=skip (default $use_afl). + -t, --time Run each group of max time [s,h,m,d] - n seconds, hours, minutes or days. + -e Run a specific llvm test or [test, rand, all]. + -f Use this file as initial raw input. Can be repeated. + -d <0,1> Use dump of internal inflate test corpus (default $use_internal_corp). + -i Fuzz input dir (default $fuzzin_dir). + -o Fuzz output dir (default $fuzzout_dir). +EOF +exit 0 +} + +# Defaults +use_afl=-1 +use_llvm=1 +samp_files= +use_internal_corp=1 +fuzzin_dir=fuzzin +fuzzout_dir=fuzzout +llvm_opts=" -print_final_stats=1" +afl_timeout_cmd="" +run_secs=0 +llvm_tests=("igzip_simple_inflate_fuzz_test") +llvm_all_tests=("igzip_simple_inflate_fuzz_test" "igzip_checked_inflate_fuzz_test" "igzip_simple_round_trip_fuzz_test") + +# Options +while [ "$1" != "${1##-}" ]; do + case $1 in + -h | --help) + usage + ;; + -t | --time) + run_secs=$(echo $2 | sed -e 's/d$/*24h/' -e 's/h$/*60m/' -e 's/m$/*60/' -e 's/s$//'| bc) + llvm_opts+=" -max_total_time=$run_secs" + afl_timeout_cmd="timeout --preserve-status $run_secs" + echo Run each for $run_secs seconds + shift 2 + ;; + -a | --afl) + use_afl=$2 + shift 2 + ;; + -l | --llvm) + use_llvm=$2 + shift 2 + ;; + -f) + samp_files+="$2 " + use_internal_corp=0 + shift 2 + ;; + -d) + use_internal_corp=$2 + shift 2 + ;; + -e) + case $2 in + all) + llvm_tests=${llvm_all_tests[@]} + ;; + rand) + llvm_tests=${llvm_all_tests[$RANDOM % ${#llvm_all_tests[@]} ]} + ;; + *) + llvm_tests[0]="$2" + ;; + esac + shift 2 + ;; + -i) + fuzzin_dir=$2 + shift 2 + ;; + -o) + fuzzout_dir=$2 + shift 2 + ;; + esac +done + +set -xe #exit on fail +mkdir -p $fuzzout_dir $fuzzin_dir + +# Optionally build afl fuzz tests +if [ $use_afl -ge 0 ]; then + echo Build afl fuzz tests + if ! command -V afl-gcc > /dev/null; then + echo $0 option --afl requires package afl installed + exit 0 + fi + make -f Makefile.unx clean + make -f Makefile.unx units=igzip CC=afl-gcc other +fi + +# Optionally build llvm fuzz tests +if [ $use_llvm -ge 0 ]; then + echo Build llvm fuzz tests + if ! ( command -V clang++ > /dev/null && + echo int LLVMFuzzerTestOneInput\(\)\{return 0\;\} | clang++ -x c - -lFuzzer -lpthread -o /dev/null); then + echo $0 option --llvm requires clang++ and libFuzzer + exit 0 + fi + rm -rf bin + make -f Makefile.unx units=igzip llvm_fuzz_tests igzip_dump_inflate_corpus CC=clang CXX=clang++ +fi + +# Optionally fill fuzz input with internal tests corpus +[ $use_internal_corp -gt 0 ] && ./igzip_dump_inflate_corpus $fuzzin_dir + +# Optionally compress input samples as input into fuzz dir +for f in $samp_files; do + echo Using sample file $f + f_base=`basename $f` + ./igzip_stateless_file_perf $f -o $fuzzin_dir/samp_${f_base}_cmp +done + +# Optionally run tests alternately one after the other +while [ $use_llvm -gt 0 -o $use_afl -gt 0 ]; do + if [ $use_afl -gt 0 ]; then + echo afl run $use_afl + let use_afl-- + $afl_timeout_cmd afl-fuzz -T "Run inflate $run_secs s" -i $fuzzin_dir -o $fuzzout_dir -M fuzzer1 -- ./igzip_fuzz_inflate @@ + afl-whatsup $fuzzout_dir + fi + + if [ $use_llvm -gt 0 ]; then + echo llvm run $use_llvm + let use_llvm-- + for test in $llvm_tests; do + echo "Run llvm test $test" + ./$test $fuzzin_dir $llvm_opts + done + fi +done + +make -f Makefile.unx clean