diff --git a/.gitignore b/.gitignore index b1c7422fe..e324531f9 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,9 @@ *.gcno *.gcov +/crc32_test +/crc32_test64 +/crc32_testsh /example /example64 /examplesh diff --git a/CMakeLists.txt b/CMakeLists.txt index 7f1b69f4a..0464ba3b1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,8 @@ project(zlib C) set(VERSION "1.3") +option(POWER "Enable building power implementation") + set(INSTALL_BIN_DIR "${CMAKE_INSTALL_PREFIX}/bin" CACHE PATH "Installation directory for executables") set(INSTALL_LIB_DIR "${CMAKE_INSTALL_PREFIX}/lib" CACHE PATH "Installation directory for libraries") set(INSTALL_INC_DIR "${CMAKE_INSTALL_PREFIX}/include" CACHE PATH "Installation directory for headers") @@ -126,6 +128,76 @@ if(NOT MINGW) ) endif() +if(CMAKE_COMPILER_IS_GNUCC) + + # test to see if we can use a GNU indirect function to detect and load optimized code at runtime + CHECK_C_SOURCE_COMPILES(" + static int test_ifunc_native(void) + { + return 1; + } + static int (*(check_ifunc_native(void)))(void) + { + return test_ifunc_native; + } + int test_ifunc(void) __attribute__ ((ifunc (\"check_ifunc_native\"))); + int main(void) + { + return 0; + } + " HAS_C_ATTR_IFUNC) + + if(HAS_C_ATTR_IFUNC) + add_definitions(-DHAVE_IFUNC) + set(ZLIB_PRIVATE_HDRS ${ZLIB_PRIVATE_HDRS} contrib/gcc/zifunc.h) + endif() + + if(POWER) + # Test to see if we can use the optimizations for Power + CHECK_C_SOURCE_COMPILES(" + #ifndef _ARCH_PPC + #error \"Target is not Power\" + #endif + #ifndef __BUILTIN_CPU_SUPPORTS__ + #error \"Target doesn't support __builtin_cpu_supports()\" + #endif + int main() { return 0; } + " HAS_POWER_SUPPORT) + + if(HAS_POWER_SUPPORT AND HAS_C_ATTR_IFUNC) + add_definitions(-DZ_POWER_OPT) + + set(CMAKE_REQUIRED_FLAGS -mcpu=power8) + CHECK_C_SOURCE_COMPILES("int main(void){return 0;}" POWER8) + + if(POWER8) + add_definitions(-DZ_POWER8) + set(ZLIB_POWER8 + contrib/power/crc32_z_power8.c) + + set_source_files_properties( + ${ZLIB_POWER8} + PROPERTIES COMPILE_FLAGS -mcpu=power8) + endif() + + set(CMAKE_REQUIRED_FLAGS -mcpu=power9) + CHECK_C_SOURCE_COMPILES("int main(void){return 0;}" POWER9) + + if(POWER9) + add_definitions(-DZ_POWER9) + set(ZLIB_POWER9 ) + + set_source_files_properties( + ${ZLIB_POWER9} + PROPERTIES COMPILE_FLAGS -mcpu=power9) + endif() + + set(ZLIB_PRIVATE_HDRS ${ZLIB_PRIVATE_HDRS} contrib/power/power.h) + set(ZLIB_SRCS ${ZLIB_SRCS} ${ZLIB_POWER8} ${ZLIB_POWER9}) + endif() + endif() +endif() + # parse the full version number from zlib.h and include in ZLIB_FULL_VERSION file(READ ${CMAKE_CURRENT_SOURCE_DIR}/zlib.h _zlib_h_contents) string(REGEX REPLACE ".*#define[ \t]+ZLIB_VERSION[ \t]+\"([-0-9A-Za-z.]+)\".*" @@ -198,6 +270,10 @@ add_executable(example test/example.c) target_link_libraries(example zlib) add_test(example example) +add_executable(crc32_test test/crc32_test.c) +target_link_libraries(crc32_test zlib) +add_test(crc32_test crc32_test) + add_executable(minigzip test/minigzip.c) target_link_libraries(minigzip zlib) diff --git a/Makefile.in b/Makefile.in index 34d3cd722..1710f6328 100644 --- a/Makefile.in +++ b/Makefile.in @@ -25,6 +25,7 @@ LDFLAGS= TEST_LDFLAGS=$(LDFLAGS) -L. libz.a LDSHARED=$(CC) CPP=$(CC) -E +VGFMAFLAG= STATICLIB=libz.a SHAREDLIB=libz.so @@ -71,11 +72,11 @@ PIC_OBJS = $(PIC_OBJC) $(PIC_OBJA) all: static shared -static: example$(EXE) minigzip$(EXE) +static: crc32_test$(EXE) example$(EXE) minigzip$(EXE) -shared: examplesh$(EXE) minigzipsh$(EXE) +shared: crc32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE) -all64: example64$(EXE) minigzip64$(EXE) +all64: crc32_test64$(EXE) example64$(EXE) minigzip64$(EXE) check: test @@ -83,7 +84,7 @@ test: all teststatic testshared teststatic: static @TMPST=tmpst_$$; \ - if echo hello world | ${QEMU_RUN} ./minigzip | ${QEMU_RUN} ./minigzip -d && ${QEMU_RUN} ./example $$TMPST ; then \ + if echo hello world | ${QEMU_RUN} ./minigzip | ${QEMU_RUN} ./minigzip -d && ${QEMU_RUN} ./example $$TMPST && ${QEMU_RUN} ./crc32_test; then \ echo ' *** zlib test OK ***'; \ else \ echo ' *** zlib test FAILED ***'; false; \ @@ -96,7 +97,7 @@ testshared: shared DYLD_LIBRARY_PATH=`pwd`:$(DYLD_LIBRARY_PATH) ; export DYLD_LIBRARY_PATH; \ SHLIB_PATH=`pwd`:$(SHLIB_PATH) ; export SHLIB_PATH; \ TMPSH=tmpsh_$$; \ - if echo hello world | ${QEMU_RUN} ./minigzipsh | ${QEMU_RUN} ./minigzipsh -d && ${QEMU_RUN} ./examplesh $$TMPSH; then \ + if echo hello world | ${QEMU_RUN} ./minigzipsh | ${QEMU_RUN} ./minigzipsh -d && ${QEMU_RUN} ./examplesh $$TMPSH && ${QEMU_RUN} ./crc32_testsh; then \ echo ' *** zlib shared test OK ***'; \ else \ echo ' *** zlib shared test FAILED ***'; false; \ @@ -105,7 +106,7 @@ testshared: shared test64: all64 @TMP64=tmp64_$$; \ - if echo hello world | ${QEMU_RUN} ./minigzip64 | ${QEMU_RUN} ./minigzip64 -d && ${QEMU_RUN} ./example64 $$TMP64; then \ + if echo hello world | ${QEMU_RUN} ./minigzip64 | ${QEMU_RUN} ./minigzip64 -d && ${QEMU_RUN} ./example64 $$TMP64 && ${QEMU_RUN} ./crc32_test64; then \ echo ' *** zlib 64-bit test OK ***'; \ else \ echo ' *** zlib 64-bit test FAILED ***'; false; \ @@ -139,12 +140,26 @@ match.lo: match.S mv _match.o match.lo rm -f _match.s +dfltcc.o: $(SRCDIR)contrib/s390/dfltcc.c $(SRCDIR)zlib.h zconf.h + $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)contrib/s390/dfltcc.c + +dfltcc.lo: $(SRCDIR)contrib/s390/dfltcc.c $(SRCDIR)zlib.h zconf.h + -@mkdir objs 2>/dev/null || test -d objs + $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/dfltcc.o $(SRCDIR)contrib/s390/dfltcc.c + -@mv objs/dfltcc.o $@ + +crc32_test.o: $(SRCDIR)test/crc32_test.c $(SRCDIR)zlib.h zconf.h + $(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/crc32_test.c + example.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h $(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/example.c minigzip.o: $(SRCDIR)test/minigzip.c $(SRCDIR)zlib.h zconf.h $(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/minigzip.c +crc32_test64.o: $(SRCDIR)test/crc32_test.c $(SRCDIR)zlib.h zconf.h + $(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/crc32_test.c + example64.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h $(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/example.c @@ -158,6 +173,12 @@ adler32.o: $(SRCDIR)adler32.c crc32.o: $(SRCDIR)crc32.c $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c +crc32_z_power8.o: $(SRCDIR)contrib/power/crc32_z_power8.c + $(CC) $(CFLAGS) -mcpu=power8 $(ZINC) -c -o $@ $(SRCDIR)contrib/power/crc32_z_power8.c + +crc32-vx.o: $(SRCDIR)contrib/s390/crc32-vx.c + $(CC) $(CFLAGS) $(VGFMAFLAG) $(ZINC) -c -o $@ $(SRCDIR)contrib/s390/crc32-vx.c + deflate.o: $(SRCDIR)deflate.c $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c @@ -208,6 +229,16 @@ crc32.lo: $(SRCDIR)crc32.c $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c -@mv objs/crc32.o $@ +crc32-vx.lo: $(SRCDIR)contrib/s390/crc32-vx.c + -@mkdir objs 2>/dev/null || test -d objs + $(CC) $(SFLAGS) $(VGFMAFLAG) $(ZINC) -DPIC -c -o objs/crc32-vx.o $(SRCDIR)contrib/s390/crc32-vx.c + -@mv objs/crc32-vx.o $@ + +crc32_z_power8.lo: $(SRCDIR)contrib/power/crc32_z_power8.c + -@mkdir objs 2>/dev/null || test -d objs + $(CC) $(SFLAGS) -mcpu=power8 $(ZINC) -DPIC -c -o objs/crc32_z_power8.o $(SRCDIR)contrib/power/crc32_z_power8.c + -@mv objs/crc32_z_power8.o $@ + deflate.lo: $(SRCDIR)deflate.c -@mkdir objs 2>/dev/null || test -d objs $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c @@ -281,18 +312,27 @@ placebo $(SHAREDLIBV): $(PIC_OBJS) libz.a ln -s $@ $(SHAREDLIBM) -@rmdir objs +crc32_test$(EXE): crc32_test.o $(STATICLIB) + $(CC) $(CFLAGS) -o $@ crc32_test.o $(TEST_LDFLAGS) + example$(EXE): example.o $(STATICLIB) $(CC) $(CFLAGS) -o $@ example.o $(TEST_LDFLAGS) minigzip$(EXE): minigzip.o $(STATICLIB) $(CC) $(CFLAGS) -o $@ minigzip.o $(TEST_LDFLAGS) +crc32_testsh$(EXE): crc32_test.o $(SHAREDLIBV) + $(CC) $(CFLAGS) -o $@ crc32_test.o -L. $(SHAREDLIBV) + examplesh$(EXE): example.o $(SHAREDLIBV) $(CC) $(CFLAGS) -o $@ example.o $(LDFLAGS) -L. $(SHAREDLIBV) minigzipsh$(EXE): minigzip.o $(SHAREDLIBV) $(CC) $(CFLAGS) -o $@ minigzip.o $(LDFLAGS) -L. $(SHAREDLIBV) +crc32_test64$(EXE): crc32_test64.o $(STATICLIB) + $(CC) $(CFLAGS) -o $@ crc32_test64.o $(TEST_LDFLAGS) + example64$(EXE): example64.o $(STATICLIB) $(CC) $(CFLAGS) -o $@ example64.o $(TEST_LDFLAGS) @@ -368,8 +408,8 @@ minizip-clean: mostlyclean: clean clean: minizip-clean rm -f *.o *.lo *~ \ - example$(EXE) minigzip$(EXE) examplesh$(EXE) minigzipsh$(EXE) \ - example64$(EXE) minigzip64$(EXE) \ + crc32_test$(EXE) example$(EXE) minigzip$(EXE) crc32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE) \ + crc32_test64$(EXE) example64$(EXE) minigzip64$(EXE) \ infcover \ libz.* foo.gz so_locations \ _match.s maketree contrib/infback9/*.o @@ -391,7 +431,7 @@ tags: adler32.o zutil.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h gzclose.o gzlib.o gzread.o gzwrite.o: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h -compress.o example.o minigzip.o uncompr.o: $(SRCDIR)zlib.h zconf.h +compress.o crc32_test.o example.o minigzip.o uncompr.o: $(SRCDIR)zlib.h zconf.h crc32.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h deflate.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h infback.o inflate.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h @@ -401,7 +441,7 @@ trees.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)tr adler32.lo zutil.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h gzclose.lo gzlib.lo gzread.lo gzwrite.lo: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h -compress.lo example.lo minigzip.lo uncompr.lo: $(SRCDIR)zlib.h zconf.h +compress.lo crc32_test.lo example.lo minigzip.lo uncompr.lo: $(SRCDIR)zlib.h zconf.h crc32.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h deflate.lo: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h infback.lo inflate.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h diff --git a/compress.c b/compress.c index f43bacf7a..08a066095 100644 --- a/compress.c +++ b/compress.c @@ -5,9 +5,15 @@ /* @(#) $Id$ */ -#define ZLIB_INTERNAL +#include "zutil.h" #include "zlib.h" +#ifdef DFLTCC +# include "contrib/s390/dfltcc.h" +#else +#define DEFLATE_BOUND_COMPLEN(source_len) 0 +#endif + /* =========================================================================== Compresses the source buffer into the destination buffer. The level parameter has the same meaning as in deflateInit. sourceLen is the byte @@ -70,6 +76,12 @@ int ZEXPORT compress(Bytef *dest, uLongf *destLen, const Bytef *source, this function needs to be updated. */ uLong ZEXPORT compressBound(uLong sourceLen) { + uLong complen = DEFLATE_BOUND_COMPLEN(sourceLen); + + if (complen > 0) + /* Architecture-specific code provided an upper bound. */ + return complen + ZLIB_WRAPLEN; + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + (sourceLen >> 25) + 13; } diff --git a/configure b/configure index cc867c944..b99a3484d 100755 --- a/configure +++ b/configure @@ -117,6 +117,7 @@ case "$1" in echo ' configure [--const] [--zprefix] [--prefix=PREFIX] [--eprefix=EXPREFIX]' | tee -a configure.log echo ' [--static] [--64] [--libdir=LIBDIR] [--sharedlibdir=LIBDIR]' | tee -a configure.log echo ' [--includedir=INCLUDEDIR] [--archs="-arch i386 -arch x86_64"]' | tee -a configure.log + echo ' [--dfltcc] [--dfltcc-level-mask=MASK]' | tee -a configure.log exit 0 ;; -p*=* | --prefix=*) prefix=`echo $1 | sed 's/.*=//'`; shift ;; -e*=* | --eprefix=*) exec_prefix=`echo $1 | sed 's/.*=//'`; shift ;; @@ -143,6 +144,16 @@ case "$1" in --sanitize) address=1; shift ;; --address) address=1; shift ;; --memory) memory=1; shift ;; + --dfltcc) + CFLAGS="$CFLAGS -DDFLTCC" + OBJC="$OBJC dfltcc.o" + PIC_OBJC="$PIC_OBJC dfltcc.lo" + shift + ;; + --dfltcc-level-mask=*) + CFLAGS="$CFLAGS -DDFLTCC_LEVEL_MASK=`echo $1 | sed 's/.*=//'`" + shift + ;; *) echo "unknown option: $1" | tee -a configure.log echo "$0 --help for help" | tee -a configure.log @@ -834,6 +845,114 @@ EOF fi fi +# Check whether sys/sdt.h is available +cat > $test.c << EOF +#include +int main() { return 0; } +EOF +if try $CC -c $CFLAGS $test.c; then + echo "Checking for sys/sdt.h ... Yes." | tee -a configure.log + CFLAGS="$CFLAGS -DHAVE_SYS_SDT_H" + SFLAGS="$SFLAGS -DHAVE_SYS_SDT_H" +else + echo "Checking for sys/sdt.h ... No." | tee -a configure.log +fi + +# test to see if we can use a gnu indirection function to detect and load optimized code at runtime +echo >> configure.log +cat > $test.c <> configure.log +cat > $test.c < $test.c + + if tryboth $CC -c $CFLAGS -mcpu=power8 $test.c; then + POWER8="-DZ_POWER8" + PIC_OBJC="${PIC_OBJC} crc32_z_power8.lo" + OBJC="${OBJC} crc32_z_power8.o" + echo "Checking for -mcpu=power8 support... Yes." | tee -a configure.log + else + echo "Checking for -mcpu=power8 support... No." | tee -a configure.log + fi + + if tryboth $CC -c $CFLAGS -mcpu=power9 $test.c; then + POWER9="-DZ_POWER9" + PIC_OBJC="${PIC_OBJC}" + OBJC="${OBJC}" + echo "Checking for -mcpu=power9 support... Yes." | tee -a configure.log + else + echo "Checking for -mcpu=power9 support... No." | tee -a configure.log + fi + + SFLAGS="${SFLAGS} ${POWER8} ${POWER9} -DZ_POWER_OPT" + CFLAGS="${CFLAGS} ${POWER8} ${POWER9} -DZ_POWER_OPT" + echo "Checking for Power optimizations support... Yes." | tee -a configure.log +else + echo "Checking for Power optimizations support... No." | tee -a configure.log +fi + +# check if we are compiling for s390 and binutils support vector extensions +VGFMAFLAG=-march=z13 +cat > $test.c <> configure.log echo ALL = $ALL >> configure.log @@ -865,6 +984,7 @@ echo mandir = $mandir >> configure.log echo prefix = $prefix >> configure.log echo sharedlibdir = $sharedlibdir >> configure.log echo uname = $uname >> configure.log +echo VGFMAFLAG = $VGFMAFLAG >> configure.log # update Makefile with the configure results sed < ${SRCDIR}Makefile.in " @@ -874,6 +994,7 @@ sed < ${SRCDIR}Makefile.in " /^LDFLAGS *=/s#=.*#=$LDFLAGS# /^LDSHARED *=/s#=.*#=$LDSHARED# /^CPP *=/s#=.*#=$CPP# +/^VGFMAFLAG *=/s#=.*#=$VGFMAFLAG# /^STATICLIB *=/s#=.*#=$STATICLIB# /^SHAREDLIB *=/s#=.*#=$SHAREDLIB# /^SHAREDLIBV *=/s#=.*#=$SHAREDLIBV# diff --git a/contrib/README.contrib b/contrib/README.contrib index 5e5f95054..a36d4047c 100644 --- a/contrib/README.contrib +++ b/contrib/README.contrib @@ -11,6 +11,10 @@ ada/ by Dmitriy Anisimkov blast/ by Mark Adler Decompressor for output of PKWare Data Compression Library (DCL) +gcc/ by Matheus Castanho + and Rogerio Alves + Optimization helpers using GCC-specific extensions + delphi/ by Cosmin Truta Support for Delphi and C++ Builder @@ -42,10 +46,19 @@ minizip/ by Gilles Vollant pascal/ by Bob Dellaca et al. Support for Pascal +power/ by Daniel Black + Matheus Castanho + and Rogerio Alves + Optimized functions for Power processors + puff/ by Mark Adler Small, low memory usage inflate. Also serves to provide an unambiguous description of the deflate format. +s390/ by Ilya Leoshkevich + Hardware-accelerated deflate on IBM Z with DEFLATE CONVERSION CALL + instruction. + testzlib/ by Gilles Vollant Example of the use of zlib diff --git a/contrib/gcc/zifunc.h b/contrib/gcc/zifunc.h new file mode 100644 index 000000000..b62379ed8 --- /dev/null +++ b/contrib/gcc/zifunc.h @@ -0,0 +1,79 @@ +/* Copyright (C) 2019 Matheus Castanho , IBM + * 2019 Rogerio Alves , IBM + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef Z_IFUNC_H_ +#define Z_IFUNC_H_ + +/* Helpers for arch optimizations */ + +#if defined(__clang__) +#if __has_feature(coverage_sanitizer) +#define Z_IFUNC_NO_SANCOV __attribute__((no_sanitize("coverage"))) +#else /* __has_feature(coverage_sanitizer) */ +#define Z_IFUNC_NO_SANCOV +#endif /* __has_feature(coverage_sanitizer) */ +#else /* __clang__ */ +#define Z_IFUNC_NO_SANCOV +#endif /* __clang__ */ + +#ifdef __s390__ +#define Z_IFUNC_PARAMS unsigned long hwcap +#define Z_IFUNC_ATTRS Z_IFUNC_NO_SANCOV +#else /* __s390__ */ +#define Z_IFUNC_PARAMS void +#define Z_IFUNC_ATTRS +#endif /* __s390__ */ + +#define Z_IFUNC(fname) \ + typeof(fname) fname __attribute__ ((ifunc (#fname "_resolver"))); \ + Z_IFUNC_ATTRS \ + local typeof(fname) *fname##_resolver(Z_IFUNC_PARAMS) +/* This is a helper macro to declare a resolver for an indirect function + * (ifunc). Let's say you have function + * + * int foo (int a); + * + * for which you want to provide different implementations, for example: + * + * int foo_clever (int a) { + * ... clever things ... + * } + * + * int foo_smart (int a) { + * ... smart things ... + * } + * + * You will have to declare foo() as an indirect function and also provide a + * resolver for it, to choose between foo_clever() and foo_smart() based on + * some criteria you define (e.g. processor features). + * + * Since most likely foo() has a default implementation somewhere in zlib, you + * may have to rename it so the 'foo' symbol can be used by the ifunc without + * conflicts. + * + * #define foo foo_default + * int foo (int a) { + * ... + * } + * #undef foo + * + * Now you just have to provide a resolver function to choose which function + * should be used (decided at runtime on the first call to foo()): + * + * Z_IFUNC(foo) { + * if (... some condition ...) + * return foo_clever; + * + * if (... other condition ...) + * return foo_smart; + * + * return foo_default; + * } + * + * All calls to foo() throughout the code can remain untouched, all the magic + * will be done by the linker using the resolver function. + */ + +#endif /* Z_IFUNC_H_ */ diff --git a/contrib/power/clang_workaround.h b/contrib/power/clang_workaround.h new file mode 100644 index 000000000..915f7e528 --- /dev/null +++ b/contrib/power/clang_workaround.h @@ -0,0 +1,87 @@ +#ifndef CLANG_WORKAROUNDS_H +#define CLANG_WORKAROUNDS_H + +/* + * These stubs fix clang incompatibilities with GCC builtins. + */ + +#ifndef __builtin_crypto_vpmsumw +#define __builtin_crypto_vpmsumw __builtin_crypto_vpmsumb +#endif +#ifndef __builtin_crypto_vpmsumd +#define __builtin_crypto_vpmsumd __builtin_crypto_vpmsumb +#endif + +static inline +__vector unsigned long long __attribute__((overloadable)) +vec_ld(int __a, const __vector unsigned long long* __b) +{ + return (__vector unsigned long long)__builtin_altivec_lvx(__a, __b); +} + +/* + * GCC __builtin_pack_vector_int128 returns a vector __int128_t but Clang + * does not recognize this type. On GCC this builtin is translated to a + * xxpermdi instruction that only moves the registers __a, __b instead generates + * a load. + * + * Clang has vec_xxpermdi intrinsics. It was implemented in 4.0.0. + */ +static inline +__vector unsigned long long __builtin_pack_vector (unsigned long __a, + unsigned long __b) +{ + #if defined(__BIG_ENDIAN__) + __vector unsigned long long __v = {__a, __b}; + #else + __vector unsigned long long __v = {__b, __a}; + #endif + return __v; +} + +/* + * Clang 7 changed the behavior of vec_xxpermdi in order to provide the same + * behavior of GCC. That means code adapted to Clang >= 7 does not work on + * Clang <= 6. So, fallback to __builtin_unpack_vector() on Clang <= 6. + */ +#if !defined vec_xxpermdi || __clang_major__ <= 6 + +static inline +unsigned long __builtin_unpack_vector (__vector unsigned long long __v, + int __o) +{ + return __v[__o]; +} + +#if defined(__BIG_ENDIAN__) +#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 0) +#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 1) +#else +#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 1) +#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 0) +#endif + +#else + +static inline +unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v) +{ + #if defined(__BIG_ENDIAN__) + return vec_xxpermdi(__v, __v, 0x0)[0]; + #else + return vec_xxpermdi(__v, __v, 0x3)[0]; + #endif +} + +static inline +unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v) +{ + #if defined(__BIG_ENDIAN__) + return vec_xxpermdi(__v, __v, 0x3)[0]; + #else + return vec_xxpermdi(__v, __v, 0x0)[0]; + #endif +} +#endif /* vec_xxpermdi */ + +#endif diff --git a/contrib/power/crc32_constants.h b/contrib/power/crc32_constants.h new file mode 100644 index 000000000..3d011507d --- /dev/null +++ b/contrib/power/crc32_constants.h @@ -0,0 +1,1206 @@ +/* +* +* THIS FILE IS GENERATED WITH +./crc32_constants -c -r -x 0x04C11DB7 + +* This is from https://github.com/antonblanchard/crc32-vpmsum/ +* DO NOT MODIFY IT MANUALLY! +* +*/ + +#define CRC 0x4c11db7 +#define CRC_XOR +#define REFLECT +#define MAX_SIZE 32768 + +#ifndef __ASSEMBLER__ +#ifdef CRC_TABLE +static const unsigned int crc_table[] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,}; + +#endif /* CRC_TABLE */ +#ifdef POWER8_INTRINSICS + +/* Constants */ + +/* Reduce 262144 kbits to 1024 bits */ +static const __vector unsigned long long vcrc_const[255] + __attribute__((aligned (16))) = { +#ifdef __LITTLE_ENDIAN__ + /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ + { 0x0000000099ea94a8, 0x00000001651797d2 }, + /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ + { 0x00000000945a8420, 0x0000000021e0d56c }, + /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ + { 0x0000000030762706, 0x000000000f95ecaa }, + /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ + { 0x00000001a52fc582, 0x00000001ebd224ac }, + /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ + { 0x00000001a4a7167a, 0x000000000ccb97ca }, + /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ + { 0x000000000c18249a, 0x00000001006ec8a8 }, + /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ + { 0x00000000a924ae7c, 0x000000014f58f196 }, + /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ + { 0x00000001e12ccc12, 0x00000001a7192ca6 }, + /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ + { 0x00000000a0b9d4ac, 0x000000019a64bab2 }, + /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ + { 0x0000000095e8ddfe, 0x0000000014f4ed2e }, + /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ + { 0x00000000233fddc4, 0x000000011092b6a2 }, + /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ + { 0x00000001b4529b62, 0x00000000c8a1629c }, + /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ + { 0x00000001a7fa0e64, 0x000000017bf32e8e }, + /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ + { 0x00000001b5334592, 0x00000001f8cc6582 }, + /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ + { 0x000000011f8ee1b4, 0x000000008631ddf0 }, + /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ + { 0x000000006252e632, 0x000000007e5a76d0 }, + /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ + { 0x00000000ab973e84, 0x000000002b09b31c }, + /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ + { 0x000000007734f5ec, 0x00000001b2df1f84 }, + /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ + { 0x000000007c547798, 0x00000001d6f56afc }, + /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ + { 0x000000007ec40210, 0x00000001b9b5e70c }, + /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ + { 0x00000001ab1695a8, 0x0000000034b626d2 }, + /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ + { 0x0000000090494bba, 0x000000014c53479a }, + /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ + { 0x00000001123fb816, 0x00000001a6d179a4 }, + /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ + { 0x00000001e188c74c, 0x000000015abd16b4 }, + /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ + { 0x00000001c2d3451c, 0x00000000018f9852 }, + /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ + { 0x00000000f55cf1ca, 0x000000001fb3084a }, + /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ + { 0x00000001a0531540, 0x00000000c53dfb04 }, + /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ + { 0x0000000132cd7ebc, 0x00000000e10c9ad6 }, + /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ + { 0x0000000073ab7f36, 0x0000000025aa994a }, + /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ + { 0x0000000041aed1c2, 0x00000000fa3a74c4 }, + /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ + { 0x0000000136c53800, 0x0000000033eb3f40 }, + /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ + { 0x0000000126835a30, 0x000000017193f296 }, + /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ + { 0x000000006241b502, 0x0000000043f6c86a }, + /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ + { 0x00000000d5196ad4, 0x000000016b513ec6 }, + /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ + { 0x000000009cfa769a, 0x00000000c8f25b4e }, + /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ + { 0x00000000920e5df4, 0x00000001a45048ec }, + /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ + { 0x0000000169dc310e, 0x000000000c441004 }, + /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ + { 0x0000000009fc331c, 0x000000000e17cad6 }, + /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ + { 0x000000010d94a81e, 0x00000001253ae964 }, + /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ + { 0x0000000027a20ab2, 0x00000001d7c88ebc }, + /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ + { 0x0000000114f87504, 0x00000001e7ca913a }, + /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ + { 0x000000004b076d96, 0x0000000033ed078a }, + /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ + { 0x00000000da4d1e74, 0x00000000e1839c78 }, + /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ + { 0x000000001b81f672, 0x00000001322b267e }, + /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ + { 0x000000009367c988, 0x00000000638231b6 }, + /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ + { 0x00000001717214ca, 0x00000001ee7f16f4 }, + /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ + { 0x000000009f47d820, 0x0000000117d9924a }, + /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ + { 0x000000010d9a47d2, 0x00000000e1a9e0c4 }, + /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ + { 0x00000000a696c58c, 0x00000001403731dc }, + /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ + { 0x000000002aa28ec6, 0x00000001a5ea9682 }, + /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ + { 0x00000001fe18fd9a, 0x0000000101c5c578 }, + /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ + { 0x000000019d4fc1ae, 0x00000000dddf6494 }, + /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ + { 0x00000001ba0e3dea, 0x00000000f1c3db28 }, + /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ + { 0x0000000074b59a5e, 0x000000013112fb9c }, + /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ + { 0x00000000f2b5ea98, 0x00000000b680b906 }, + /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ + { 0x0000000187132676, 0x000000001a282932 }, + /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ + { 0x000000010a8c6ad4, 0x0000000089406e7e }, + /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ + { 0x00000001e21dfe70, 0x00000001def6be8c }, + /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ + { 0x00000001da0050e4, 0x0000000075258728 }, + /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ + { 0x00000000772172ae, 0x000000019536090a }, + /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ + { 0x00000000e47724aa, 0x00000000f2455bfc }, + /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ + { 0x000000003cd63ac4, 0x000000018c40baf4 }, + /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ + { 0x00000001bf47d352, 0x000000004cd390d4 }, + /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ + { 0x000000018dc1d708, 0x00000001e4ece95a }, + /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ + { 0x000000002d4620a4, 0x000000001a3ee918 }, + /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ + { 0x0000000058fd1740, 0x000000007c652fb8 }, + /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ + { 0x00000000dadd9bfc, 0x000000011c67842c }, + /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ + { 0x00000001ea2140be, 0x00000000254f759c }, + /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ + { 0x000000009de128ba, 0x000000007ece94ca }, + /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ + { 0x000000013ac3aa8e, 0x0000000038f258c2 }, + /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ + { 0x0000000099980562, 0x00000001cdf17b00 }, + /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ + { 0x00000001c1579c86, 0x000000011f882c16 }, + /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ + { 0x0000000068dbbf94, 0x0000000100093fc8 }, + /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ + { 0x000000004509fb04, 0x00000001cd684f16 }, + /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ + { 0x00000001202f6398, 0x000000004bc6a70a }, + /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ + { 0x000000013aea243e, 0x000000004fc7e8e4 }, + /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ + { 0x00000001b4052ae6, 0x0000000130103f1c }, + /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ + { 0x00000001cd2a0ae8, 0x0000000111b0024c }, + /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ + { 0x00000001fe4aa8b4, 0x000000010b3079da }, + /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ + { 0x00000001d1559a42, 0x000000010192bcc2 }, + /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ + { 0x00000001f3e05ecc, 0x0000000074838d50 }, + /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ + { 0x0000000104ddd2cc, 0x000000001b20f520 }, + /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ + { 0x000000015393153c, 0x0000000050c3590a }, + /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ + { 0x0000000057e942c6, 0x00000000b41cac8e }, + /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ + { 0x000000012c633850, 0x000000000c72cc78 }, + /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ + { 0x00000000ebcaae4c, 0x0000000030cdb032 }, + /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ + { 0x000000013ee532a6, 0x000000013e09fc32 }, + /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ + { 0x00000001bf0cbc7e, 0x000000001ed624d2 }, + /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ + { 0x00000000d50b7a5a, 0x00000000781aee1a }, + /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ + { 0x0000000002fca6e8, 0x00000001c4d8348c }, + /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ + { 0x000000007af40044, 0x0000000057a40336 }, + /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ + { 0x0000000016178744, 0x0000000085544940 }, + /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ + { 0x000000014c177458, 0x000000019cd21e80 }, + /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ + { 0x000000011b6ddf04, 0x000000013eb95bc0 }, + /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ + { 0x00000001f3e29ccc, 0x00000001dfc9fdfc }, + /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ + { 0x0000000135ae7562, 0x00000000cd028bc2 }, + /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ + { 0x0000000190ef812c, 0x0000000090db8c44 }, + /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ + { 0x0000000067a2c786, 0x000000010010a4ce }, + /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ + { 0x0000000048b9496c, 0x00000001c8f4c72c }, + /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ + { 0x000000015a422de6, 0x000000001c26170c }, + /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ + { 0x00000001ef0e3640, 0x00000000e3fccf68 }, + /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ + { 0x00000001006d2d26, 0x00000000d513ed24 }, + /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ + { 0x00000001170d56d6, 0x00000000141beada }, + /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ + { 0x00000000a5fb613c, 0x000000011071aea0 }, + /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ + { 0x0000000040bbf7fc, 0x000000012e19080a }, + /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ + { 0x000000016ac3a5b2, 0x0000000100ecf826 }, + /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ + { 0x00000000abf16230, 0x0000000069b09412 }, + /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ + { 0x00000001ebe23fac, 0x0000000122297bac }, + /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ + { 0x000000008b6a0894, 0x00000000e9e4b068 }, + /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ + { 0x00000001288ea478, 0x000000004b38651a }, + /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ + { 0x000000016619c442, 0x00000001468360e2 }, + /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ + { 0x0000000086230038, 0x00000000121c2408 }, + /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ + { 0x000000017746a756, 0x00000000da7e7d08 }, + /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ + { 0x0000000191b8f8f8, 0x00000001058d7652 }, + /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ + { 0x000000008e167708, 0x000000014a098a90 }, + /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ + { 0x0000000148b22d54, 0x0000000020dbe72e }, + /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ + { 0x0000000044ba2c3c, 0x000000011e7323e8 }, + /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ + { 0x00000000b54d2b52, 0x00000000d5d4bf94 }, + /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ + { 0x0000000005a4fd8a, 0x0000000199d8746c }, + /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ + { 0x0000000139f9fc46, 0x00000000ce9ca8a0 }, + /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ + { 0x000000015a1fa824, 0x00000000136edece }, + /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ + { 0x000000000a61ae4c, 0x000000019b92a068 }, + /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ + { 0x0000000145e9113e, 0x0000000071d62206 }, + /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ + { 0x000000006a348448, 0x00000000dfc50158 }, + /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ + { 0x000000004d80a08c, 0x00000001517626bc }, + /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ + { 0x000000014b6837a0, 0x0000000148d1e4fa }, + /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ + { 0x000000016896a7fc, 0x0000000094d8266e }, + /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ + { 0x000000014f187140, 0x00000000606c5e34 }, + /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ + { 0x000000019581b9da, 0x000000019766beaa }, + /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ + { 0x00000001091bc984, 0x00000001d80c506c }, + /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ + { 0x000000001067223c, 0x000000001e73837c }, + /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ + { 0x00000001ab16ea02, 0x0000000064d587de }, + /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ + { 0x000000013c4598a8, 0x00000000f4a507b0 }, + /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ + { 0x00000000b3735430, 0x0000000040e342fc }, + /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ + { 0x00000001bb3fc0c0, 0x00000001d5ad9c3a }, + /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ + { 0x00000001570ae19c, 0x0000000094a691a4 }, + /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ + { 0x00000001ea910712, 0x00000001271ecdfa }, + /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ + { 0x0000000167127128, 0x000000009e54475a }, + /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ + { 0x0000000019e790a2, 0x00000000c9c099ee }, + /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ + { 0x000000003788f710, 0x000000009a2f736c }, + /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ + { 0x00000001682a160e, 0x00000000bb9f4996 }, + /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ + { 0x000000007f0ebd2e, 0x00000001db688050 }, + /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ + { 0x000000002b032080, 0x00000000e9b10af4 }, + /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ + { 0x00000000cfd1664a, 0x000000012d4545e4 }, + /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ + { 0x00000000aa1181c2, 0x000000000361139c }, + /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ + { 0x00000000ddd08002, 0x00000001a5a1a3a8 }, + /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ + { 0x00000000e8dd0446, 0x000000006844e0b0 }, + /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ + { 0x00000001bbd94a00, 0x00000000c3762f28 }, + /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ + { 0x00000000ab6cd180, 0x00000001d26287a2 }, + /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ + { 0x0000000031803ce2, 0x00000001f6f0bba8 }, + /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ + { 0x0000000024f40b0c, 0x000000002ffabd62 }, + /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ + { 0x00000001ba1d9834, 0x00000000fb4516b8 }, + /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ + { 0x0000000104de61aa, 0x000000018cfa961c }, + /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ + { 0x0000000113e40d46, 0x000000019e588d52 }, + /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ + { 0x00000001415598a0, 0x00000001180f0bbc }, + /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ + { 0x00000000bf6c8c90, 0x00000000e1d9177a }, + /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ + { 0x00000001788b0504, 0x0000000105abc27c }, + /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ + { 0x0000000038385d02, 0x00000000972e4a58 }, + /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ + { 0x00000001b6c83844, 0x0000000183499a5e }, + /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ + { 0x0000000051061a8a, 0x00000001c96a8cca }, + /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ + { 0x000000017351388a, 0x00000001a1a5b60c }, + /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ + { 0x0000000132928f92, 0x00000000e4b6ac9c }, + /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ + { 0x00000000e6b4f48a, 0x00000001807e7f5a }, + /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ + { 0x0000000039d15e90, 0x000000017a7e3bc8 }, + /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ + { 0x00000000312d6074, 0x00000000d73975da }, + /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ + { 0x000000017bbb2cc4, 0x000000017375d038 }, + /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ + { 0x000000016ded3e18, 0x00000000193680bc }, + /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ + { 0x00000000f1638b16, 0x00000000999b06f6 }, + /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ + { 0x00000001d38b9ecc, 0x00000001f685d2b8 }, + /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ + { 0x000000018b8d09dc, 0x00000001f4ecbed2 }, + /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ + { 0x00000000e7bc27d2, 0x00000000ba16f1a0 }, + /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ + { 0x00000000275e1e96, 0x0000000115aceac4 }, + /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ + { 0x00000000e2e3031e, 0x00000001aeff6292 }, + /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ + { 0x00000001041c84d8, 0x000000009640124c }, + /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ + { 0x00000000706ce672, 0x0000000114f41f02 }, + /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ + { 0x000000015d5070da, 0x000000009c5f3586 }, + /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ + { 0x0000000038f9493a, 0x00000001878275fa }, + /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ + { 0x00000000a3348a76, 0x00000000ddc42ce8 }, + /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ + { 0x00000001ad0aab92, 0x0000000181d2c73a }, + /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ + { 0x000000019e85f712, 0x0000000141c9320a }, + /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ + { 0x000000005a871e76, 0x000000015235719a }, + /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ + { 0x000000017249c662, 0x00000000be27d804 }, + /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ + { 0x000000003a084712, 0x000000006242d45a }, + /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ + { 0x00000000ed438478, 0x000000009a53638e }, + /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ + { 0x00000000abac34cc, 0x00000001001ecfb6 }, + /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ + { 0x000000005f35ef3e, 0x000000016d7c2d64 }, + /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ + { 0x0000000047d6608c, 0x00000001d0ce46c0 }, + /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ + { 0x000000002d01470e, 0x0000000124c907b4 }, + /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ + { 0x0000000158bbc7b0, 0x0000000018a555ca }, + /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ + { 0x00000000c0a23e8e, 0x000000006b0980bc }, + /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ + { 0x00000001ebd85c88, 0x000000008bbba964 }, + /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ + { 0x000000019ee20bb2, 0x00000001070a5a1e }, + /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ + { 0x00000001acabf2d6, 0x000000002204322a }, + /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ + { 0x00000001b7963d56, 0x00000000a27524d0 }, + /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ + { 0x000000017bffa1fe, 0x0000000020b1e4ba }, + /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ + { 0x000000001f15333e, 0x0000000032cc27fc }, + /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ + { 0x000000018593129e, 0x0000000044dd22b8 }, + /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ + { 0x000000019cb32602, 0x00000000dffc9e0a }, + /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ + { 0x0000000142b05cc8, 0x00000001b7a0ed14 }, + /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ + { 0x00000001be49e7a4, 0x00000000c7842488 }, + /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ + { 0x0000000108f69d6c, 0x00000001c02a4fee }, + /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ + { 0x000000006c0971f0, 0x000000003c273778 }, + /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ + { 0x000000005b16467a, 0x00000001d63f8894 }, + /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ + { 0x00000001551a628e, 0x000000006be557d6 }, + /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ + { 0x000000019e42ea92, 0x000000006a7806ea }, + /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ + { 0x000000012fa83ff2, 0x000000016155aa0c }, + /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ + { 0x000000011ca9cde0, 0x00000000908650ac }, + /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ + { 0x00000000c8e5cd74, 0x00000000aa5a8084 }, + /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ + { 0x0000000096c27f0c, 0x0000000191bb500a }, + /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ + { 0x000000002baed926, 0x0000000064e9bed0 }, + /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ + { 0x000000017c8de8d2, 0x000000009444f302 }, + /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ + { 0x00000000d43d6068, 0x000000019db07d3c }, + /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ + { 0x00000000cb2c4b26, 0x00000001359e3e6e }, + /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ + { 0x0000000145b8da26, 0x00000001e4f10dd2 }, + /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ + { 0x000000018fff4b08, 0x0000000124f5735e }, + /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ + { 0x0000000150b58ed0, 0x0000000124760a4c }, + /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ + { 0x00000001549f39bc, 0x000000000f1fc186 }, + /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ + { 0x00000000ef4d2f42, 0x00000000150e4cc4 }, + /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ + { 0x00000001b1468572, 0x000000002a6204e8 }, + /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ + { 0x000000013d7403b2, 0x00000000beb1d432 }, + /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ + { 0x00000001a4681842, 0x0000000135f3f1f0 }, + /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ + { 0x0000000167714492, 0x0000000074fe2232 }, + /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ + { 0x00000001e599099a, 0x000000001ac6e2ba }, + /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ + { 0x00000000fe128194, 0x0000000013fca91e }, + /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ + { 0x0000000077e8b990, 0x0000000183f4931e }, + /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ + { 0x00000001a267f63a, 0x00000000b6d9b4e4 }, + /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ + { 0x00000001945c245a, 0x00000000b5188656 }, + /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ + { 0x0000000149002e76, 0x0000000027a81a84 }, + /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ + { 0x00000001bb8310a4, 0x0000000125699258 }, + /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ + { 0x000000019ec60bcc, 0x00000001b23de796 }, + /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ + { 0x000000012d8590ae, 0x00000000fe4365dc }, + /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ + { 0x0000000065b00684, 0x00000000c68f497a }, + /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ + { 0x000000015e5aeadc, 0x00000000fbf521ee }, + /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ + { 0x00000000b77ff2b0, 0x000000015eac3378 }, + /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ + { 0x0000000188da2ff6, 0x0000000134914b90 }, + /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ + { 0x0000000063da929a, 0x0000000016335cfe }, + /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ + { 0x00000001389caa80, 0x000000010372d10c }, + /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ + { 0x000000013db599d2, 0x000000015097b908 }, + /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ + { 0x0000000122505a86, 0x00000001227a7572 }, + /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ + { 0x000000016bd72746, 0x000000009a8f75c0 }, + /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ + { 0x00000001c3faf1d4, 0x00000000682c77a2 }, + /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ + { 0x00000001111c826c, 0x00000000231f091c }, + /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ + { 0x00000000153e9fb2, 0x000000007d4439f2 }, + /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ + { 0x000000002b1f7b60, 0x000000017e221efc }, + /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ + { 0x00000000b1dba570, 0x0000000167457c38 }, + /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ + { 0x00000001f6397b76, 0x00000000bdf081c4 }, + /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ + { 0x0000000156335214, 0x000000016286d6b0 }, + /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ + { 0x00000001d70e3986, 0x00000000c84f001c }, + /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ + { 0x000000003701a774, 0x0000000064efe7c0 }, + /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ + { 0x00000000ac81ef72, 0x000000000ac2d904 }, + /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ + { 0x0000000133212464, 0x00000000fd226d14 }, + /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ + { 0x00000000e4e45610, 0x000000011cfd42e0 }, + /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ + { 0x000000000c1bd370, 0x000000016e5a5678 }, + /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ + { 0x00000001a7b9e7a6, 0x00000001d888fe22 }, + /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ + { 0x000000007d657a10, 0x00000001af77fcd4 } +#else /* __LITTLE_ENDIAN__ */ + /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ + { 0x00000001651797d2, 0x0000000099ea94a8 }, + /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ + { 0x0000000021e0d56c, 0x00000000945a8420 }, + /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ + { 0x000000000f95ecaa, 0x0000000030762706 }, + /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ + { 0x00000001ebd224ac, 0x00000001a52fc582 }, + /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ + { 0x000000000ccb97ca, 0x00000001a4a7167a }, + /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ + { 0x00000001006ec8a8, 0x000000000c18249a }, + /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ + { 0x000000014f58f196, 0x00000000a924ae7c }, + /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ + { 0x00000001a7192ca6, 0x00000001e12ccc12 }, + /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ + { 0x000000019a64bab2, 0x00000000a0b9d4ac }, + /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ + { 0x0000000014f4ed2e, 0x0000000095e8ddfe }, + /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ + { 0x000000011092b6a2, 0x00000000233fddc4 }, + /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ + { 0x00000000c8a1629c, 0x00000001b4529b62 }, + /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ + { 0x000000017bf32e8e, 0x00000001a7fa0e64 }, + /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ + { 0x00000001f8cc6582, 0x00000001b5334592 }, + /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ + { 0x000000008631ddf0, 0x000000011f8ee1b4 }, + /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ + { 0x000000007e5a76d0, 0x000000006252e632 }, + /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ + { 0x000000002b09b31c, 0x00000000ab973e84 }, + /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ + { 0x00000001b2df1f84, 0x000000007734f5ec }, + /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ + { 0x00000001d6f56afc, 0x000000007c547798 }, + /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ + { 0x00000001b9b5e70c, 0x000000007ec40210 }, + /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ + { 0x0000000034b626d2, 0x00000001ab1695a8 }, + /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ + { 0x000000014c53479a, 0x0000000090494bba }, + /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ + { 0x00000001a6d179a4, 0x00000001123fb816 }, + /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ + { 0x000000015abd16b4, 0x00000001e188c74c }, + /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ + { 0x00000000018f9852, 0x00000001c2d3451c }, + /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ + { 0x000000001fb3084a, 0x00000000f55cf1ca }, + /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ + { 0x00000000c53dfb04, 0x00000001a0531540 }, + /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ + { 0x00000000e10c9ad6, 0x0000000132cd7ebc }, + /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ + { 0x0000000025aa994a, 0x0000000073ab7f36 }, + /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ + { 0x00000000fa3a74c4, 0x0000000041aed1c2 }, + /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ + { 0x0000000033eb3f40, 0x0000000136c53800 }, + /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ + { 0x000000017193f296, 0x0000000126835a30 }, + /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ + { 0x0000000043f6c86a, 0x000000006241b502 }, + /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ + { 0x000000016b513ec6, 0x00000000d5196ad4 }, + /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ + { 0x00000000c8f25b4e, 0x000000009cfa769a }, + /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ + { 0x00000001a45048ec, 0x00000000920e5df4 }, + /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ + { 0x000000000c441004, 0x0000000169dc310e }, + /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ + { 0x000000000e17cad6, 0x0000000009fc331c }, + /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ + { 0x00000001253ae964, 0x000000010d94a81e }, + /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ + { 0x00000001d7c88ebc, 0x0000000027a20ab2 }, + /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ + { 0x00000001e7ca913a, 0x0000000114f87504 }, + /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ + { 0x0000000033ed078a, 0x000000004b076d96 }, + /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ + { 0x00000000e1839c78, 0x00000000da4d1e74 }, + /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ + { 0x00000001322b267e, 0x000000001b81f672 }, + /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ + { 0x00000000638231b6, 0x000000009367c988 }, + /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ + { 0x00000001ee7f16f4, 0x00000001717214ca }, + /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ + { 0x0000000117d9924a, 0x000000009f47d820 }, + /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ + { 0x00000000e1a9e0c4, 0x000000010d9a47d2 }, + /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ + { 0x00000001403731dc, 0x00000000a696c58c }, + /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ + { 0x00000001a5ea9682, 0x000000002aa28ec6 }, + /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ + { 0x0000000101c5c578, 0x00000001fe18fd9a }, + /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ + { 0x00000000dddf6494, 0x000000019d4fc1ae }, + /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ + { 0x00000000f1c3db28, 0x00000001ba0e3dea }, + /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ + { 0x000000013112fb9c, 0x0000000074b59a5e }, + /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ + { 0x00000000b680b906, 0x00000000f2b5ea98 }, + /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ + { 0x000000001a282932, 0x0000000187132676 }, + /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ + { 0x0000000089406e7e, 0x000000010a8c6ad4 }, + /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ + { 0x00000001def6be8c, 0x00000001e21dfe70 }, + /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ + { 0x0000000075258728, 0x00000001da0050e4 }, + /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ + { 0x000000019536090a, 0x00000000772172ae }, + /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ + { 0x00000000f2455bfc, 0x00000000e47724aa }, + /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ + { 0x000000018c40baf4, 0x000000003cd63ac4 }, + /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ + { 0x000000004cd390d4, 0x00000001bf47d352 }, + /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ + { 0x00000001e4ece95a, 0x000000018dc1d708 }, + /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ + { 0x000000001a3ee918, 0x000000002d4620a4 }, + /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ + { 0x000000007c652fb8, 0x0000000058fd1740 }, + /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ + { 0x000000011c67842c, 0x00000000dadd9bfc }, + /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ + { 0x00000000254f759c, 0x00000001ea2140be }, + /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ + { 0x000000007ece94ca, 0x000000009de128ba }, + /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ + { 0x0000000038f258c2, 0x000000013ac3aa8e }, + /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ + { 0x00000001cdf17b00, 0x0000000099980562 }, + /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ + { 0x000000011f882c16, 0x00000001c1579c86 }, + /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ + { 0x0000000100093fc8, 0x0000000068dbbf94 }, + /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ + { 0x00000001cd684f16, 0x000000004509fb04 }, + /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ + { 0x000000004bc6a70a, 0x00000001202f6398 }, + /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ + { 0x000000004fc7e8e4, 0x000000013aea243e }, + /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ + { 0x0000000130103f1c, 0x00000001b4052ae6 }, + /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ + { 0x0000000111b0024c, 0x00000001cd2a0ae8 }, + /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ + { 0x000000010b3079da, 0x00000001fe4aa8b4 }, + /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ + { 0x000000010192bcc2, 0x00000001d1559a42 }, + /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ + { 0x0000000074838d50, 0x00000001f3e05ecc }, + /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ + { 0x000000001b20f520, 0x0000000104ddd2cc }, + /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ + { 0x0000000050c3590a, 0x000000015393153c }, + /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ + { 0x00000000b41cac8e, 0x0000000057e942c6 }, + /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ + { 0x000000000c72cc78, 0x000000012c633850 }, + /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ + { 0x0000000030cdb032, 0x00000000ebcaae4c }, + /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ + { 0x000000013e09fc32, 0x000000013ee532a6 }, + /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ + { 0x000000001ed624d2, 0x00000001bf0cbc7e }, + /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ + { 0x00000000781aee1a, 0x00000000d50b7a5a }, + /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ + { 0x00000001c4d8348c, 0x0000000002fca6e8 }, + /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ + { 0x0000000057a40336, 0x000000007af40044 }, + /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ + { 0x0000000085544940, 0x0000000016178744 }, + /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ + { 0x000000019cd21e80, 0x000000014c177458 }, + /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ + { 0x000000013eb95bc0, 0x000000011b6ddf04 }, + /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ + { 0x00000001dfc9fdfc, 0x00000001f3e29ccc }, + /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ + { 0x00000000cd028bc2, 0x0000000135ae7562 }, + /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ + { 0x0000000090db8c44, 0x0000000190ef812c }, + /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ + { 0x000000010010a4ce, 0x0000000067a2c786 }, + /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ + { 0x00000001c8f4c72c, 0x0000000048b9496c }, + /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ + { 0x000000001c26170c, 0x000000015a422de6 }, + /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ + { 0x00000000e3fccf68, 0x00000001ef0e3640 }, + /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ + { 0x00000000d513ed24, 0x00000001006d2d26 }, + /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ + { 0x00000000141beada, 0x00000001170d56d6 }, + /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ + { 0x000000011071aea0, 0x00000000a5fb613c }, + /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ + { 0x000000012e19080a, 0x0000000040bbf7fc }, + /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ + { 0x0000000100ecf826, 0x000000016ac3a5b2 }, + /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ + { 0x0000000069b09412, 0x00000000abf16230 }, + /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ + { 0x0000000122297bac, 0x00000001ebe23fac }, + /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ + { 0x00000000e9e4b068, 0x000000008b6a0894 }, + /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ + { 0x000000004b38651a, 0x00000001288ea478 }, + /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ + { 0x00000001468360e2, 0x000000016619c442 }, + /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ + { 0x00000000121c2408, 0x0000000086230038 }, + /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ + { 0x00000000da7e7d08, 0x000000017746a756 }, + /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ + { 0x00000001058d7652, 0x0000000191b8f8f8 }, + /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ + { 0x000000014a098a90, 0x000000008e167708 }, + /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ + { 0x0000000020dbe72e, 0x0000000148b22d54 }, + /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ + { 0x000000011e7323e8, 0x0000000044ba2c3c }, + /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ + { 0x00000000d5d4bf94, 0x00000000b54d2b52 }, + /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ + { 0x0000000199d8746c, 0x0000000005a4fd8a }, + /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ + { 0x00000000ce9ca8a0, 0x0000000139f9fc46 }, + /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ + { 0x00000000136edece, 0x000000015a1fa824 }, + /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ + { 0x000000019b92a068, 0x000000000a61ae4c }, + /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ + { 0x0000000071d62206, 0x0000000145e9113e }, + /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ + { 0x00000000dfc50158, 0x000000006a348448 }, + /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ + { 0x00000001517626bc, 0x000000004d80a08c }, + /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ + { 0x0000000148d1e4fa, 0x000000014b6837a0 }, + /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ + { 0x0000000094d8266e, 0x000000016896a7fc }, + /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ + { 0x00000000606c5e34, 0x000000014f187140 }, + /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ + { 0x000000019766beaa, 0x000000019581b9da }, + /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ + { 0x00000001d80c506c, 0x00000001091bc984 }, + /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ + { 0x000000001e73837c, 0x000000001067223c }, + /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ + { 0x0000000064d587de, 0x00000001ab16ea02 }, + /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ + { 0x00000000f4a507b0, 0x000000013c4598a8 }, + /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ + { 0x0000000040e342fc, 0x00000000b3735430 }, + /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ + { 0x00000001d5ad9c3a, 0x00000001bb3fc0c0 }, + /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ + { 0x0000000094a691a4, 0x00000001570ae19c }, + /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ + { 0x00000001271ecdfa, 0x00000001ea910712 }, + /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ + { 0x000000009e54475a, 0x0000000167127128 }, + /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ + { 0x00000000c9c099ee, 0x0000000019e790a2 }, + /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ + { 0x000000009a2f736c, 0x000000003788f710 }, + /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ + { 0x00000000bb9f4996, 0x00000001682a160e }, + /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ + { 0x00000001db688050, 0x000000007f0ebd2e }, + /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ + { 0x00000000e9b10af4, 0x000000002b032080 }, + /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ + { 0x000000012d4545e4, 0x00000000cfd1664a }, + /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ + { 0x000000000361139c, 0x00000000aa1181c2 }, + /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ + { 0x00000001a5a1a3a8, 0x00000000ddd08002 }, + /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ + { 0x000000006844e0b0, 0x00000000e8dd0446 }, + /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ + { 0x00000000c3762f28, 0x00000001bbd94a00 }, + /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ + { 0x00000001d26287a2, 0x00000000ab6cd180 }, + /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ + { 0x00000001f6f0bba8, 0x0000000031803ce2 }, + /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ + { 0x000000002ffabd62, 0x0000000024f40b0c }, + /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ + { 0x00000000fb4516b8, 0x00000001ba1d9834 }, + /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ + { 0x000000018cfa961c, 0x0000000104de61aa }, + /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ + { 0x000000019e588d52, 0x0000000113e40d46 }, + /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ + { 0x00000001180f0bbc, 0x00000001415598a0 }, + /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ + { 0x00000000e1d9177a, 0x00000000bf6c8c90 }, + /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ + { 0x0000000105abc27c, 0x00000001788b0504 }, + /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ + { 0x00000000972e4a58, 0x0000000038385d02 }, + /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ + { 0x0000000183499a5e, 0x00000001b6c83844 }, + /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ + { 0x00000001c96a8cca, 0x0000000051061a8a }, + /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ + { 0x00000001a1a5b60c, 0x000000017351388a }, + /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ + { 0x00000000e4b6ac9c, 0x0000000132928f92 }, + /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ + { 0x00000001807e7f5a, 0x00000000e6b4f48a }, + /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ + { 0x000000017a7e3bc8, 0x0000000039d15e90 }, + /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ + { 0x00000000d73975da, 0x00000000312d6074 }, + /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ + { 0x000000017375d038, 0x000000017bbb2cc4 }, + /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ + { 0x00000000193680bc, 0x000000016ded3e18 }, + /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ + { 0x00000000999b06f6, 0x00000000f1638b16 }, + /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ + { 0x00000001f685d2b8, 0x00000001d38b9ecc }, + /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ + { 0x00000001f4ecbed2, 0x000000018b8d09dc }, + /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ + { 0x00000000ba16f1a0, 0x00000000e7bc27d2 }, + /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ + { 0x0000000115aceac4, 0x00000000275e1e96 }, + /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ + { 0x00000001aeff6292, 0x00000000e2e3031e }, + /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ + { 0x000000009640124c, 0x00000001041c84d8 }, + /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ + { 0x0000000114f41f02, 0x00000000706ce672 }, + /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ + { 0x000000009c5f3586, 0x000000015d5070da }, + /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ + { 0x00000001878275fa, 0x0000000038f9493a }, + /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ + { 0x00000000ddc42ce8, 0x00000000a3348a76 }, + /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ + { 0x0000000181d2c73a, 0x00000001ad0aab92 }, + /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ + { 0x0000000141c9320a, 0x000000019e85f712 }, + /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ + { 0x000000015235719a, 0x000000005a871e76 }, + /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ + { 0x00000000be27d804, 0x000000017249c662 }, + /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ + { 0x000000006242d45a, 0x000000003a084712 }, + /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ + { 0x000000009a53638e, 0x00000000ed438478 }, + /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ + { 0x00000001001ecfb6, 0x00000000abac34cc }, + /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ + { 0x000000016d7c2d64, 0x000000005f35ef3e }, + /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ + { 0x00000001d0ce46c0, 0x0000000047d6608c }, + /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ + { 0x0000000124c907b4, 0x000000002d01470e }, + /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ + { 0x0000000018a555ca, 0x0000000158bbc7b0 }, + /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ + { 0x000000006b0980bc, 0x00000000c0a23e8e }, + /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ + { 0x000000008bbba964, 0x00000001ebd85c88 }, + /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ + { 0x00000001070a5a1e, 0x000000019ee20bb2 }, + /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ + { 0x000000002204322a, 0x00000001acabf2d6 }, + /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ + { 0x00000000a27524d0, 0x00000001b7963d56 }, + /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ + { 0x0000000020b1e4ba, 0x000000017bffa1fe }, + /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ + { 0x0000000032cc27fc, 0x000000001f15333e }, + /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ + { 0x0000000044dd22b8, 0x000000018593129e }, + /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ + { 0x00000000dffc9e0a, 0x000000019cb32602 }, + /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ + { 0x00000001b7a0ed14, 0x0000000142b05cc8 }, + /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ + { 0x00000000c7842488, 0x00000001be49e7a4 }, + /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ + { 0x00000001c02a4fee, 0x0000000108f69d6c }, + /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ + { 0x000000003c273778, 0x000000006c0971f0 }, + /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ + { 0x00000001d63f8894, 0x000000005b16467a }, + /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ + { 0x000000006be557d6, 0x00000001551a628e }, + /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ + { 0x000000006a7806ea, 0x000000019e42ea92 }, + /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ + { 0x000000016155aa0c, 0x000000012fa83ff2 }, + /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ + { 0x00000000908650ac, 0x000000011ca9cde0 }, + /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ + { 0x00000000aa5a8084, 0x00000000c8e5cd74 }, + /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ + { 0x0000000191bb500a, 0x0000000096c27f0c }, + /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ + { 0x0000000064e9bed0, 0x000000002baed926 }, + /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ + { 0x000000009444f302, 0x000000017c8de8d2 }, + /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ + { 0x000000019db07d3c, 0x00000000d43d6068 }, + /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ + { 0x00000001359e3e6e, 0x00000000cb2c4b26 }, + /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ + { 0x00000001e4f10dd2, 0x0000000145b8da26 }, + /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ + { 0x0000000124f5735e, 0x000000018fff4b08 }, + /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ + { 0x0000000124760a4c, 0x0000000150b58ed0 }, + /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ + { 0x000000000f1fc186, 0x00000001549f39bc }, + /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ + { 0x00000000150e4cc4, 0x00000000ef4d2f42 }, + /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ + { 0x000000002a6204e8, 0x00000001b1468572 }, + /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ + { 0x00000000beb1d432, 0x000000013d7403b2 }, + /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ + { 0x0000000135f3f1f0, 0x00000001a4681842 }, + /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ + { 0x0000000074fe2232, 0x0000000167714492 }, + /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ + { 0x000000001ac6e2ba, 0x00000001e599099a }, + /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ + { 0x0000000013fca91e, 0x00000000fe128194 }, + /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ + { 0x0000000183f4931e, 0x0000000077e8b990 }, + /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ + { 0x00000000b6d9b4e4, 0x00000001a267f63a }, + /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ + { 0x00000000b5188656, 0x00000001945c245a }, + /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ + { 0x0000000027a81a84, 0x0000000149002e76 }, + /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ + { 0x0000000125699258, 0x00000001bb8310a4 }, + /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ + { 0x00000001b23de796, 0x000000019ec60bcc }, + /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ + { 0x00000000fe4365dc, 0x000000012d8590ae }, + /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ + { 0x00000000c68f497a, 0x0000000065b00684 }, + /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ + { 0x00000000fbf521ee, 0x000000015e5aeadc }, + /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ + { 0x000000015eac3378, 0x00000000b77ff2b0 }, + /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ + { 0x0000000134914b90, 0x0000000188da2ff6 }, + /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ + { 0x0000000016335cfe, 0x0000000063da929a }, + /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ + { 0x000000010372d10c, 0x00000001389caa80 }, + /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ + { 0x000000015097b908, 0x000000013db599d2 }, + /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ + { 0x00000001227a7572, 0x0000000122505a86 }, + /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ + { 0x000000009a8f75c0, 0x000000016bd72746 }, + /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ + { 0x00000000682c77a2, 0x00000001c3faf1d4 }, + /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ + { 0x00000000231f091c, 0x00000001111c826c }, + /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ + { 0x000000007d4439f2, 0x00000000153e9fb2 }, + /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ + { 0x000000017e221efc, 0x000000002b1f7b60 }, + /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ + { 0x0000000167457c38, 0x00000000b1dba570 }, + /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ + { 0x00000000bdf081c4, 0x00000001f6397b76 }, + /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ + { 0x000000016286d6b0, 0x0000000156335214 }, + /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ + { 0x00000000c84f001c, 0x00000001d70e3986 }, + /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ + { 0x0000000064efe7c0, 0x000000003701a774 }, + /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ + { 0x000000000ac2d904, 0x00000000ac81ef72 }, + /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ + { 0x00000000fd226d14, 0x0000000133212464 }, + /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ + { 0x000000011cfd42e0, 0x00000000e4e45610 }, + /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ + { 0x000000016e5a5678, 0x000000000c1bd370 }, + /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ + { 0x00000001d888fe22, 0x00000001a7b9e7a6 }, + /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ + { 0x00000001af77fcd4, 0x000000007d657a10 } +#endif /* __LITTLE_ENDIAN__ */ + }; + +/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */ + +static const __vector unsigned long long vcrc_short_const[16] + __attribute__((aligned (16))) = { +#ifdef __LITTLE_ENDIAN__ + /* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x) */ + { 0x99168a18ec447f11, 0xed837b2613e8221e }, + /* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x) */ + { 0xe23e954e8fd2cd3c, 0xc8acdd8147b9ce5a }, + /* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x) */ + { 0x92f8befe6b1d2b53, 0xd9ad6d87d4277e25 }, + /* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x) */ + { 0xf38a3556291ea462, 0xc10ec5e033fbca3b }, + /* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x) */ + { 0x974ac56262b6ca4b, 0xc0b55b0e82e02e2f }, + /* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x) */ + { 0x855712b3784d2a56, 0x71aa1df0e172334d }, + /* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x) */ + { 0xa5abe9f80eaee722, 0xfee3053e3969324d }, + /* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x) */ + { 0x1fa0943ddb54814c, 0xf44779b93eb2bd08 }, + /* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x) */ + { 0xa53ff440d7bbfe6a, 0xf5449b3f00cc3374 }, + /* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x) */ + { 0xebe7e3566325605c, 0x6f8346e1d777606e }, + /* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x) */ + { 0xc65a272ce5b592b8, 0xe3ab4f2ac0b95347 }, + /* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x) */ + { 0x5705a9ca4721589f, 0xaa2215ea329ecc11 }, + /* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x) */ + { 0xe3720acb88d14467, 0x1ed8f66ed95efd26 }, + /* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x) */ + { 0xba1aca0315141c31, 0x78ed02d5a700e96a }, + /* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x) */ + { 0xad2a31b3ed627dae, 0xba8ccbe832b39da3 }, + /* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x) */ + { 0x6655004fa06a2517, 0xedb88320b1e6b092 } +#else /* __LITTLE_ENDIAN__ */ + /* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x) */ + { 0xed837b2613e8221e, 0x99168a18ec447f11 }, + /* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x) */ + { 0xc8acdd8147b9ce5a, 0xe23e954e8fd2cd3c }, + /* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x) */ + { 0xd9ad6d87d4277e25, 0x92f8befe6b1d2b53 }, + /* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x) */ + { 0xc10ec5e033fbca3b, 0xf38a3556291ea462 }, + /* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x) */ + { 0xc0b55b0e82e02e2f, 0x974ac56262b6ca4b }, + /* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x) */ + { 0x71aa1df0e172334d, 0x855712b3784d2a56 }, + /* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x) */ + { 0xfee3053e3969324d, 0xa5abe9f80eaee722 }, + /* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x) */ + { 0xf44779b93eb2bd08, 0x1fa0943ddb54814c }, + /* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x) */ + { 0xf5449b3f00cc3374, 0xa53ff440d7bbfe6a }, + /* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x) */ + { 0x6f8346e1d777606e, 0xebe7e3566325605c }, + /* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x) */ + { 0xe3ab4f2ac0b95347, 0xc65a272ce5b592b8 }, + /* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x) */ + { 0xaa2215ea329ecc11, 0x5705a9ca4721589f }, + /* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x) */ + { 0x1ed8f66ed95efd26, 0xe3720acb88d14467 }, + /* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x) */ + { 0x78ed02d5a700e96a, 0xba1aca0315141c31 }, + /* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x) */ + { 0xba8ccbe832b39da3, 0xad2a31b3ed627dae }, + /* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x) */ + { 0xedb88320b1e6b092, 0x6655004fa06a2517 } +#endif /* __LITTLE_ENDIAN__ */ + }; + +/* Barrett constants */ +/* 33 bit reflected Barrett constant m - (4^32)/n */ + +static const __vector unsigned long long v_Barrett_const[2] + __attribute__((aligned (16))) = { + /* x^64 div p(x) */ +#ifdef __LITTLE_ENDIAN__ + { 0x00000001f7011641, 0x0000000000000000 }, + { 0x00000001db710641, 0x0000000000000000 } +#else /* __LITTLE_ENDIAN__ */ + { 0x0000000000000000, 0x00000001f7011641 }, + { 0x0000000000000000, 0x00000001db710641 } +#endif /* __LITTLE_ENDIAN__ */ + }; +#endif /* POWER8_INTRINSICS */ + +#endif /* __ASSEMBLER__ */ diff --git a/contrib/power/crc32_z_power8.c b/contrib/power/crc32_z_power8.c new file mode 100644 index 000000000..7858cfe0e --- /dev/null +++ b/contrib/power/crc32_z_power8.c @@ -0,0 +1,679 @@ +/* + * Calculate the checksum of data that is 16 byte aligned and a multiple of + * 16 bytes. + * + * The first step is to reduce it to 1024 bits. We do this in 8 parallel + * chunks in order to mask the latency of the vpmsum instructions. If we + * have more than 32 kB of data to checksum we repeat this step multiple + * times, passing in the previous 1024 bits. + * + * The next step is to reduce the 1024 bits to 64 bits. This step adds + * 32 bits of 0s to the end - this matches what a CRC does. We just + * calculate constants that land the data in this 32 bits. + * + * We then use fixed point Barrett reduction to compute a mod n over GF(2) + * for n = CRC using POWER8 instructions. We use x = 32. + * + * http://en.wikipedia.org/wiki/Barrett_reduction + * + * This code uses gcc vector builtins instead using assembly directly. + * + * Copyright (C) 2017 Rogerio Alves , IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of either: + * + * a) the GNU General Public License as published by the Free Software + * Foundation; either version 2 of the License, or (at your option) + * any later version, or + * b) the Apache License, Version 2.0 + */ + +#include +#include "../../zutil.h" +#include "power.h" + +#define POWER8_INTRINSICS +#define CRC_TABLE + +#ifdef CRC32_CONSTANTS_HEADER +#include CRC32_CONSTANTS_HEADER +#else +#include "crc32_constants.h" +#endif + +#define VMX_ALIGN 16 +#define VMX_ALIGN_MASK (VMX_ALIGN-1) + +#ifdef REFLECT +static unsigned int crc32_align(unsigned int crc, const unsigned char *p, + unsigned long len) +{ + while (len--) + crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8); + return crc; +} +#else +static unsigned int crc32_align(unsigned int crc, const unsigned char *p, + unsigned long len) +{ + while (len--) + crc = crc_table[((crc >> 24) ^ *p++) & 0xff] ^ (crc << 8); + return crc; +} +#endif + +static unsigned int __attribute__ ((aligned (32))) +__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len); + +unsigned long ZLIB_INTERNAL _crc32_z_power8(uLong _crc, const Bytef *_p, + z_size_t _len) +{ + unsigned int prealign; + unsigned int tail; + + /* Map zlib API to crc32_vpmsum API */ + unsigned int crc = (unsigned int) (0xffffffff & _crc); + const unsigned char *p = _p; + unsigned long len = (unsigned long) _len; + + if (p == (const unsigned char *) 0x0) return 0; +#ifdef CRC_XOR + crc ^= 0xffffffff; +#endif + + if (len < VMX_ALIGN + VMX_ALIGN_MASK) { + crc = crc32_align(crc, p, len); + goto out; + } + + if ((unsigned long)p & VMX_ALIGN_MASK) { + prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); + crc = crc32_align(crc, p, prealign); + len -= prealign; + p += prealign; + } + + crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); + + tail = len & VMX_ALIGN_MASK; + if (tail) { + p += len & ~VMX_ALIGN_MASK; + crc = crc32_align(crc, p, tail); + } + +out: +#ifdef CRC_XOR + crc ^= 0xffffffff; +#endif + + /* Convert to zlib API */ + return (unsigned long) crc; +} + +#if defined (__clang__) +#include "clang_workaround.h" +#else +#define __builtin_pack_vector(a, b) __builtin_pack_vector_int128 ((a), (b)) +#define __builtin_unpack_vector_0(a) __builtin_unpack_vector_int128 ((vector __int128_t)(a), 0) +#define __builtin_unpack_vector_1(a) __builtin_unpack_vector_int128 ((vector __int128_t)(a), 1) +#endif + +/* When we have a load-store in a single-dispatch group and address overlap + * such that foward is not allowed (load-hit-store) the group must be flushed. + * A group ending NOP prevents the flush. + */ +#define GROUP_ENDING_NOP asm("ori 2,2,0" ::: "memory") + +#if defined(__BIG_ENDIAN__) && defined (REFLECT) +#define BYTESWAP_DATA +#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT) +#define BYTESWAP_DATA +#endif + +#ifdef BYTESWAP_DATA +#define VEC_PERM(vr, va, vb, vc) vr = vec_perm(va, vb,\ + (__vector unsigned char) vc) +#if defined(__LITTLE_ENDIAN__) +/* Byte reverse permute constant LE. */ +static const __vector unsigned long long vperm_const + __attribute__ ((aligned(16))) = { 0x08090A0B0C0D0E0FUL, + 0x0001020304050607UL }; +#else +static const __vector unsigned long long vperm_const + __attribute__ ((aligned(16))) = { 0x0F0E0D0C0B0A0908UL, + 0X0706050403020100UL }; +#endif +#else +#define VEC_PERM(vr, va, vb, vc) +#endif + +static unsigned int __attribute__ ((aligned (32))) +__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) { + + const __vector unsigned long long vzero = {0,0}; + const __vector unsigned long long vones = {0xffffffffffffffffUL, + 0xffffffffffffffffUL}; + +#ifdef REFLECT + const __vector unsigned long long vmask_32bit = + (__vector unsigned long long)vec_sld((__vector unsigned char)vzero, + (__vector unsigned char)vones, 4); +#endif + + const __vector unsigned long long vmask_64bit = + (__vector unsigned long long)vec_sld((__vector unsigned char)vzero, + (__vector unsigned char)vones, 8); + + __vector unsigned long long vcrc; + + __vector unsigned long long vconst1, vconst2; + + /* vdata0-vdata7 will contain our data (p). */ + __vector unsigned long long vdata0, vdata1, vdata2, vdata3, vdata4, + vdata5, vdata6, vdata7; + + /* v0-v7 will contain our checksums */ + __vector unsigned long long v0 = {0,0}; + __vector unsigned long long v1 = {0,0}; + __vector unsigned long long v2 = {0,0}; + __vector unsigned long long v3 = {0,0}; + __vector unsigned long long v4 = {0,0}; + __vector unsigned long long v5 = {0,0}; + __vector unsigned long long v6 = {0,0}; + __vector unsigned long long v7 = {0,0}; + + + /* Vector auxiliary variables. */ + __vector unsigned long long va0, va1, va2, va3, va4, va5, va6, va7; + + unsigned int result = 0; + unsigned int offset; /* Constant table offset. */ + + unsigned long i; /* Counter. */ + unsigned long chunks; + + unsigned long block_size; + int next_block = 0; + + /* Align by 128 bits. The last 128 bit block will be processed at end. */ + unsigned long length = len & 0xFFFFFFFFFFFFFF80UL; + +#ifdef REFLECT + vcrc = (__vector unsigned long long)__builtin_pack_vector(0UL, crc); +#else + vcrc = (__vector unsigned long long)__builtin_pack_vector(crc, 0UL); + + /* Shift into top 32 bits */ + vcrc = (__vector unsigned long long)vec_sld((__vector unsigned char)vcrc, + (__vector unsigned char)vzero, 4); +#endif + + /* Short version. */ + if (len < 256) { + /* Calculate where in the constant table we need to start. */ + offset = 256 - len; + + vconst1 = vec_ld(offset, vcrc_short_const); + vdata0 = vec_ld(0, (__vector unsigned long long*) p); + VEC_PERM(vdata0, vdata0, vconst1, vperm_const); + + /* xor initial value*/ + vdata0 = vec_xor(vdata0, vcrc); + + vdata0 = (__vector unsigned long long) __builtin_crypto_vpmsumw + ((__vector unsigned int)vdata0, (__vector unsigned int)vconst1); + v0 = vec_xor(v0, vdata0); + + for (i = 16; i < len; i += 16) { + vconst1 = vec_ld(offset + i, vcrc_short_const); + vdata0 = vec_ld(i, (__vector unsigned long long*) p); + VEC_PERM(vdata0, vdata0, vconst1, vperm_const); + vdata0 = (__vector unsigned long long) __builtin_crypto_vpmsumw + ((__vector unsigned int)vdata0, (__vector unsigned int)vconst1); + v0 = vec_xor(v0, vdata0); + } + } else { + + /* Load initial values. */ + vdata0 = vec_ld(0, (__vector unsigned long long*) p); + vdata1 = vec_ld(16, (__vector unsigned long long*) p); + + VEC_PERM(vdata0, vdata0, vdata0, vperm_const); + VEC_PERM(vdata1, vdata1, vdata1, vperm_const); + + vdata2 = vec_ld(32, (__vector unsigned long long*) p); + vdata3 = vec_ld(48, (__vector unsigned long long*) p); + + VEC_PERM(vdata2, vdata2, vdata2, vperm_const); + VEC_PERM(vdata3, vdata3, vdata3, vperm_const); + + vdata4 = vec_ld(64, (__vector unsigned long long*) p); + vdata5 = vec_ld(80, (__vector unsigned long long*) p); + + VEC_PERM(vdata4, vdata4, vdata4, vperm_const); + VEC_PERM(vdata5, vdata5, vdata5, vperm_const); + + vdata6 = vec_ld(96, (__vector unsigned long long*) p); + vdata7 = vec_ld(112, (__vector unsigned long long*) p); + + VEC_PERM(vdata6, vdata6, vdata6, vperm_const); + VEC_PERM(vdata7, vdata7, vdata7, vperm_const); + + /* xor in initial value */ + vdata0 = vec_xor(vdata0, vcrc); + + p = (char *)p + 128; + + do { + /* Checksum in blocks of MAX_SIZE. */ + block_size = length; + if (block_size > MAX_SIZE) { + block_size = MAX_SIZE; + } + + length = length - block_size; + + /* + * Work out the offset into the constants table to start at. Each + * constant is 16 bytes, and it is used against 128 bytes of input + * data - 128 / 16 = 8 + */ + offset = (MAX_SIZE/8) - (block_size/8); + /* We reduce our final 128 bytes in a separate step */ + chunks = (block_size/128)-1; + + vconst1 = vec_ld(offset, vcrc_const); + + va0 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata0, + (__vector unsigned long long)vconst1); + va1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata1, + (__vector unsigned long long)vconst1); + va2 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata2, + (__vector unsigned long long)vconst1); + va3 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata3, + (__vector unsigned long long)vconst1); + va4 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata4, + (__vector unsigned long long)vconst1); + va5 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata5, + (__vector unsigned long long)vconst1); + va6 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata6, + (__vector unsigned long long)vconst1); + va7 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata7, + (__vector unsigned long long)vconst1); + + if (chunks > 1) { + offset += 16; + vconst2 = vec_ld(offset, vcrc_const); + GROUP_ENDING_NOP; + + vdata0 = vec_ld(0, (__vector unsigned long long*) p); + VEC_PERM(vdata0, vdata0, vdata0, vperm_const); + + vdata1 = vec_ld(16, (__vector unsigned long long*) p); + VEC_PERM(vdata1, vdata1, vdata1, vperm_const); + + vdata2 = vec_ld(32, (__vector unsigned long long*) p); + VEC_PERM(vdata2, vdata2, vdata2, vperm_const); + + vdata3 = vec_ld(48, (__vector unsigned long long*) p); + VEC_PERM(vdata3, vdata3, vdata3, vperm_const); + + vdata4 = vec_ld(64, (__vector unsigned long long*) p); + VEC_PERM(vdata4, vdata4, vdata4, vperm_const); + + vdata5 = vec_ld(80, (__vector unsigned long long*) p); + VEC_PERM(vdata5, vdata5, vdata5, vperm_const); + + vdata6 = vec_ld(96, (__vector unsigned long long*) p); + VEC_PERM(vdata6, vdata6, vdata6, vperm_const); + + vdata7 = vec_ld(112, (__vector unsigned long long*) p); + VEC_PERM(vdata7, vdata7, vdata7, vperm_const); + + p = (char *)p + 128; + + /* + * main loop. We modulo schedule it such that it takes three + * iterations to complete - first iteration load, second + * iteration vpmsum, third iteration xor. + */ + for (i = 0; i < chunks-2; i++) { + vconst1 = vec_ld(offset, vcrc_const); + offset += 16; + GROUP_ENDING_NOP; + + v0 = vec_xor(v0, va0); + va0 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata0, (__vector unsigned long long)vconst2); + vdata0 = vec_ld(0, (__vector unsigned long long*) p); + VEC_PERM(vdata0, vdata0, vdata0, vperm_const); + GROUP_ENDING_NOP; + + v1 = vec_xor(v1, va1); + va1 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata1, (__vector unsigned long long)vconst2); + vdata1 = vec_ld(16, (__vector unsigned long long*) p); + VEC_PERM(vdata1, vdata1, vdata1, vperm_const); + GROUP_ENDING_NOP; + + v2 = vec_xor(v2, va2); + va2 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata2, (__vector unsigned long long)vconst2); + vdata2 = vec_ld(32, (__vector unsigned long long*) p); + VEC_PERM(vdata2, vdata2, vdata2, vperm_const); + GROUP_ENDING_NOP; + + v3 = vec_xor(v3, va3); + va3 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata3, (__vector unsigned long long)vconst2); + vdata3 = vec_ld(48, (__vector unsigned long long*) p); + VEC_PERM(vdata3, vdata3, vdata3, vperm_const); + + vconst2 = vec_ld(offset, vcrc_const); + GROUP_ENDING_NOP; + + v4 = vec_xor(v4, va4); + va4 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata4, (__vector unsigned long long)vconst1); + vdata4 = vec_ld(64, (__vector unsigned long long*) p); + VEC_PERM(vdata4, vdata4, vdata4, vperm_const); + GROUP_ENDING_NOP; + + v5 = vec_xor(v5, va5); + va5 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata5, (__vector unsigned long long)vconst1); + vdata5 = vec_ld(80, (__vector unsigned long long*) p); + VEC_PERM(vdata5, vdata5, vdata5, vperm_const); + GROUP_ENDING_NOP; + + v6 = vec_xor(v6, va6); + va6 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata6, (__vector unsigned long long)vconst1); + vdata6 = vec_ld(96, (__vector unsigned long long*) p); + VEC_PERM(vdata6, vdata6, vdata6, vperm_const); + GROUP_ENDING_NOP; + + v7 = vec_xor(v7, va7); + va7 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata7, (__vector unsigned long long)vconst1); + vdata7 = vec_ld(112, (__vector unsigned long long*) p); + VEC_PERM(vdata7, vdata7, vdata7, vperm_const); + + p = (char *)p + 128; + } + + /* First cool down*/ + vconst1 = vec_ld(offset, vcrc_const); + offset += 16; + + v0 = vec_xor(v0, va0); + va0 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata0, (__vector unsigned long long)vconst1); + GROUP_ENDING_NOP; + + v1 = vec_xor(v1, va1); + va1 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata1, (__vector unsigned long long)vconst1); + GROUP_ENDING_NOP; + + v2 = vec_xor(v2, va2); + va2 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata2, (__vector unsigned long long)vconst1); + GROUP_ENDING_NOP; + + v3 = vec_xor(v3, va3); + va3 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata3, (__vector unsigned long long)vconst1); + GROUP_ENDING_NOP; + + v4 = vec_xor(v4, va4); + va4 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata4, (__vector unsigned long long)vconst1); + GROUP_ENDING_NOP; + + v5 = vec_xor(v5, va5); + va5 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata5, (__vector unsigned long long)vconst1); + GROUP_ENDING_NOP; + + v6 = vec_xor(v6, va6); + va6 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata6, (__vector unsigned long long)vconst1); + GROUP_ENDING_NOP; + + v7 = vec_xor(v7, va7); + va7 = __builtin_crypto_vpmsumd ((__vector unsigned long + long)vdata7, (__vector unsigned long long)vconst1); + }/* else */ + + /* Second cool down. */ + v0 = vec_xor(v0, va0); + v1 = vec_xor(v1, va1); + v2 = vec_xor(v2, va2); + v3 = vec_xor(v3, va3); + v4 = vec_xor(v4, va4); + v5 = vec_xor(v5, va5); + v6 = vec_xor(v6, va6); + v7 = vec_xor(v7, va7); + +#ifdef REFLECT + /* + * vpmsumd produces a 96 bit result in the least significant bits + * of the register. Since we are bit reflected we have to shift it + * left 32 bits so it occupies the least significant bits in the + * bit reflected domain. + */ + v0 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0, + (__vector unsigned char)vzero, 4); + v1 = (__vector unsigned long long)vec_sld((__vector unsigned char)v1, + (__vector unsigned char)vzero, 4); + v2 = (__vector unsigned long long)vec_sld((__vector unsigned char)v2, + (__vector unsigned char)vzero, 4); + v3 = (__vector unsigned long long)vec_sld((__vector unsigned char)v3, + (__vector unsigned char)vzero, 4); + v4 = (__vector unsigned long long)vec_sld((__vector unsigned char)v4, + (__vector unsigned char)vzero, 4); + v5 = (__vector unsigned long long)vec_sld((__vector unsigned char)v5, + (__vector unsigned char)vzero, 4); + v6 = (__vector unsigned long long)vec_sld((__vector unsigned char)v6, + (__vector unsigned char)vzero, 4); + v7 = (__vector unsigned long long)vec_sld((__vector unsigned char)v7, + (__vector unsigned char)vzero, 4); +#endif + + /* xor with the last 1024 bits. */ + va0 = vec_ld(0, (__vector unsigned long long*) p); + VEC_PERM(va0, va0, va0, vperm_const); + + va1 = vec_ld(16, (__vector unsigned long long*) p); + VEC_PERM(va1, va1, va1, vperm_const); + + va2 = vec_ld(32, (__vector unsigned long long*) p); + VEC_PERM(va2, va2, va2, vperm_const); + + va3 = vec_ld(48, (__vector unsigned long long*) p); + VEC_PERM(va3, va3, va3, vperm_const); + + va4 = vec_ld(64, (__vector unsigned long long*) p); + VEC_PERM(va4, va4, va4, vperm_const); + + va5 = vec_ld(80, (__vector unsigned long long*) p); + VEC_PERM(va5, va5, va5, vperm_const); + + va6 = vec_ld(96, (__vector unsigned long long*) p); + VEC_PERM(va6, va6, va6, vperm_const); + + va7 = vec_ld(112, (__vector unsigned long long*) p); + VEC_PERM(va7, va7, va7, vperm_const); + + p = (char *)p + 128; + + vdata0 = vec_xor(v0, va0); + vdata1 = vec_xor(v1, va1); + vdata2 = vec_xor(v2, va2); + vdata3 = vec_xor(v3, va3); + vdata4 = vec_xor(v4, va4); + vdata5 = vec_xor(v5, va5); + vdata6 = vec_xor(v6, va6); + vdata7 = vec_xor(v7, va7); + + /* Check if we have more blocks to process */ + next_block = 0; + if (length != 0) { + next_block = 1; + + /* zero v0-v7 */ + v0 = vec_xor(v0, v0); + v1 = vec_xor(v1, v1); + v2 = vec_xor(v2, v2); + v3 = vec_xor(v3, v3); + v4 = vec_xor(v4, v4); + v5 = vec_xor(v5, v5); + v6 = vec_xor(v6, v6); + v7 = vec_xor(v7, v7); + } + length = length + 128; + + } while (next_block); + + /* Calculate how many bytes we have left. */ + length = (len & 127); + + /* Calculate where in (short) constant table we need to start. */ + offset = 128 - length; + + v0 = vec_ld(offset, vcrc_short_const); + v1 = vec_ld(offset + 16, vcrc_short_const); + v2 = vec_ld(offset + 32, vcrc_short_const); + v3 = vec_ld(offset + 48, vcrc_short_const); + v4 = vec_ld(offset + 64, vcrc_short_const); + v5 = vec_ld(offset + 80, vcrc_short_const); + v6 = vec_ld(offset + 96, vcrc_short_const); + v7 = vec_ld(offset + 112, vcrc_short_const); + + offset += 128; + + v0 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( + (__vector unsigned int)vdata0,(__vector unsigned int)v0); + v1 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( + (__vector unsigned int)vdata1,(__vector unsigned int)v1); + v2 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( + (__vector unsigned int)vdata2,(__vector unsigned int)v2); + v3 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( + (__vector unsigned int)vdata3,(__vector unsigned int)v3); + v4 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( + (__vector unsigned int)vdata4,(__vector unsigned int)v4); + v5 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( + (__vector unsigned int)vdata5,(__vector unsigned int)v5); + v6 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( + (__vector unsigned int)vdata6,(__vector unsigned int)v6); + v7 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( + (__vector unsigned int)vdata7,(__vector unsigned int)v7); + + /* Now reduce the tail (0-112 bytes). */ + for (i = 0; i < length; i+=16) { + vdata0 = vec_ld(i,(__vector unsigned long long*)p); + VEC_PERM(vdata0, vdata0, vdata0, vperm_const); + va0 = vec_ld(offset + i,vcrc_short_const); + va0 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( + (__vector unsigned int)vdata0,(__vector unsigned int)va0); + v0 = vec_xor(v0, va0); + } + + /* xor all parallel chunks together. */ + v0 = vec_xor(v0, v1); + v2 = vec_xor(v2, v3); + v4 = vec_xor(v4, v5); + v6 = vec_xor(v6, v7); + + v0 = vec_xor(v0, v2); + v4 = vec_xor(v4, v6); + + v0 = vec_xor(v0, v4); + } + + /* Barrett Reduction */ + vconst1 = vec_ld(0, v_Barrett_const); + vconst2 = vec_ld(16, v_Barrett_const); + + v1 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0, + (__vector unsigned char)v0, 8); + v0 = vec_xor(v1,v0); + +#ifdef REFLECT + /* shift left one bit */ + __vector unsigned char vsht_splat = vec_splat_u8 (1); + v0 = (__vector unsigned long long)vec_sll ((__vector unsigned char)v0, + vsht_splat); +#endif + + v0 = vec_and(v0, vmask_64bit); + +#ifndef REFLECT + + /* + * Now for the actual algorithm. The idea is to calculate q, + * the multiple of our polynomial that we need to subtract. By + * doing the computation 2x bits higher (ie 64 bits) and shifting the + * result back down 2x bits, we round down to the nearest multiple. + */ + + /* ma */ + v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v0, + (__vector unsigned long long)vconst1); + /* q = floor(ma/(2^64)) */ + v1 = (__vector unsigned long long)vec_sld ((__vector unsigned char)vzero, + (__vector unsigned char)v1, 8); + /* qn */ + v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v1, + (__vector unsigned long long)vconst2); + /* a - qn, subtraction is xor in GF(2) */ + v0 = vec_xor (v0, v1); + /* + * Get the result into r3. We need to shift it left 8 bytes: + * V0 [ 0 1 2 X ] + * V0 [ 0 X 2 3 ] + */ + result = __builtin_unpack_vector_1 (v0); +#else + + /* + * The reflected version of Barrett reduction. Instead of bit + * reflecting our data (which is expensive to do), we bit reflect our + * constants and our algorithm, which means the intermediate data in + * our vector registers goes from 0-63 instead of 63-0. We can reflect + * the algorithm because we don't carry in mod 2 arithmetic. + */ + + /* bottom 32 bits of a */ + v1 = vec_and(v0, vmask_32bit); + + /* ma */ + v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v1, + (__vector unsigned long long)vconst1); + + /* bottom 32bits of ma */ + v1 = vec_and(v1, vmask_32bit); + /* qn */ + v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v1, + (__vector unsigned long long)vconst2); + /* a - qn, subtraction is xor in GF(2) */ + v0 = vec_xor (v0, v1); + + /* + * Since we are bit reflected, the result (ie the low 32 bits) is in + * the high 32 bits. We just need to shift it left 4 bytes + * V0 [ 0 1 X 3 ] + * V0 [ 0 X 2 3 ] + */ + + /* shift result into top 64 bits of */ + v0 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0, + (__vector unsigned char)vzero, 4); + + result = __builtin_unpack_vector_0 (v0); +#endif + + return result; +} diff --git a/contrib/power/crc32_z_resolver.c b/contrib/power/crc32_z_resolver.c new file mode 100644 index 000000000..f4e9aa491 --- /dev/null +++ b/contrib/power/crc32_z_resolver.c @@ -0,0 +1,15 @@ +/* Copyright (C) 2019 Matheus Castanho , IBM + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "../gcc/zifunc.h" +#include "power.h" + +Z_IFUNC(crc32_z) { +#ifdef Z_POWER8 + if (__builtin_cpu_supports("arch_2_07")) + return _crc32_z_power8; +#endif + + return crc32_z_default; +} diff --git a/contrib/power/power.h b/contrib/power/power.h new file mode 100644 index 000000000..79123aa90 --- /dev/null +++ b/contrib/power/power.h @@ -0,0 +1,8 @@ +/* Copyright (C) 2019 Matheus Castanho , IBM + * 2019 Rogerio Alves , IBM + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "../../zconf.h" + +unsigned long _crc32_z_power8(unsigned long, const Bytef *, z_size_t); diff --git a/contrib/s390/README.txt b/contrib/s390/README.txt new file mode 100644 index 000000000..48be008bd --- /dev/null +++ b/contrib/s390/README.txt @@ -0,0 +1,17 @@ +IBM Z mainframes starting from version z15 provide DFLTCC instruction, +which implements deflate algorithm in hardware with estimated +compression and decompression performance orders of magnitude faster +than the current zlib and ratio comparable with that of level 1. + +This directory adds DFLTCC support. In order to enable it, the following +build commands should be used: + + $ ./configure --dfltcc + $ make + +When built like this, zlib would compress in hardware on level 1, and in +software on all other levels. Decompression will always happen in +hardware. In order to enable DFLTCC compression for levels 1-6 (i.e. to +make it used by default) one could either configure with +--dfltcc-level-mask=0x7e or set the environment variable +DFLTCC_LEVEL_MASK to 0x7e at run time. diff --git a/contrib/s390/crc32-vx.c b/contrib/s390/crc32-vx.c new file mode 100644 index 000000000..fa5387c11 --- /dev/null +++ b/contrib/s390/crc32-vx.c @@ -0,0 +1,195 @@ +/* + * Hardware-accelerated CRC-32 variants for Linux on z Systems + * + * Use the z/Architecture Vector Extension Facility to accelerate the + * computing of bitreflected CRC-32 checksums. + * + * This CRC-32 implementation algorithm is bitreflected and processes + * the least-significant bit first (Little-Endian). + * + * This code was originally written by Hendrik Brueckner + * for use in the Linux kernel and has been + * relicensed under the zlib license. + */ + +#include "../../zutil.h" + +#include +#include + +typedef unsigned char uv16qi __attribute__((vector_size(16))); +typedef unsigned int uv4si __attribute__((vector_size(16))); +typedef unsigned long long uv2di __attribute__((vector_size(16))); + +uint32_t crc32_le_vgfm_16(uint32_t crc, const unsigned char *buf, size_t len) { + /* + * The CRC-32 constant block contains reduction constants to fold and + * process particular chunks of the input data stream in parallel. + * + * For the CRC-32 variants, the constants are precomputed according to + * these definitions: + * + * R1 = [(x4*128+32 mod P'(x) << 32)]' << 1 + * R2 = [(x4*128-32 mod P'(x) << 32)]' << 1 + * R3 = [(x128+32 mod P'(x) << 32)]' << 1 + * R4 = [(x128-32 mod P'(x) << 32)]' << 1 + * R5 = [(x64 mod P'(x) << 32)]' << 1 + * R6 = [(x32 mod P'(x) << 32)]' << 1 + * + * The bitreflected Barret reduction constant, u', is defined as + * the bit reversal of floor(x**64 / P(x)). + * + * where P(x) is the polynomial in the normal domain and the P'(x) is the + * polynomial in the reversed (bitreflected) domain. + * + * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials: + * + * P(x) = 0x04C11DB7 + * P'(x) = 0xEDB88320 + */ + const uv16qi perm_le2be = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; /* BE->LE mask */ + const uv2di r2r1 = {0x1C6E41596, 0x154442BD4}; /* R2, R1 */ + const uv2di r4r3 = {0x0CCAA009E, 0x1751997D0}; /* R4, R3 */ + const uv2di r5 = {0, 0x163CD6124}; /* R5 */ + const uv2di ru_poly = {0, 0x1F7011641}; /* u' */ + const uv2di crc_poly = {0, 0x1DB710641}; /* P'(x) << 1 */ + + /* + * Load the initial CRC value. + * + * The CRC value is loaded into the rightmost word of the + * vector register and is later XORed with the LSB portion + * of the loaded input data. + */ + uv2di v0 = {0, 0}; + v0 = (uv2di)vec_insert(crc, (uv4si)v0, 3); + + /* Load a 64-byte data chunk and XOR with CRC */ + uv2di v1 = vec_perm(((uv2di *)buf)[0], ((uv2di *)buf)[0], perm_le2be); + uv2di v2 = vec_perm(((uv2di *)buf)[1], ((uv2di *)buf)[1], perm_le2be); + uv2di v3 = vec_perm(((uv2di *)buf)[2], ((uv2di *)buf)[2], perm_le2be); + uv2di v4 = vec_perm(((uv2di *)buf)[3], ((uv2di *)buf)[3], perm_le2be); + + v1 ^= v0; + buf += 64; + len -= 64; + + while (len >= 64) { + /* Load the next 64-byte data chunk */ + uv16qi part1 = vec_perm(((uv16qi *)buf)[0], ((uv16qi *)buf)[0], perm_le2be); + uv16qi part2 = vec_perm(((uv16qi *)buf)[1], ((uv16qi *)buf)[1], perm_le2be); + uv16qi part3 = vec_perm(((uv16qi *)buf)[2], ((uv16qi *)buf)[2], perm_le2be); + uv16qi part4 = vec_perm(((uv16qi *)buf)[3], ((uv16qi *)buf)[3], perm_le2be); + + /* + * Perform a GF(2) multiplication of the doublewords in V1 with + * the R1 and R2 reduction constants in V0. The intermediate result + * is then folded (accumulated) with the next data chunk in PART1 and + * stored in V1. Repeat this step for the register contents + * in V2, V3, and V4 respectively. + */ + v1 = (uv2di)vec_gfmsum_accum_128(r2r1, v1, part1); + v2 = (uv2di)vec_gfmsum_accum_128(r2r1, v2, part2); + v3 = (uv2di)vec_gfmsum_accum_128(r2r1, v3, part3); + v4 = (uv2di)vec_gfmsum_accum_128(r2r1, v4, part4); + + buf += 64; + len -= 64; + } + + /* + * Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3 + * and R4 and accumulating the next 128-bit chunk until a single 128-bit + * value remains. + */ + v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2); + v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v3); + v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v4); + + while (len >= 16) { + /* Load next data chunk */ + v2 = vec_perm(*(uv2di *)buf, *(uv2di *)buf, perm_le2be); + + /* Fold next data chunk */ + v1 = (uv2di)vec_gfmsum_accum_128(r4r3, v1, (uv16qi)v2); + + buf += 16; + len -= 16; + } + + /* + * Set up a vector register for byte shifts. The shift value must + * be loaded in bits 1-4 in byte element 7 of a vector register. + * Shift by 8 bytes: 0x40 + * Shift by 4 bytes: 0x20 + */ + uv16qi v9 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + v9 = vec_insert((unsigned char)0x40, v9, 7); + + /* + * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes + * to move R4 into the rightmost doubleword and set the leftmost + * doubleword to 0x1. + */ + v0 = vec_srb(r4r3, (uv2di)v9); + v0[0] = 1; + + /* + * Compute GF(2) product of V1 and V0. The rightmost doubleword + * of V1 is multiplied with R4. The leftmost doubleword of V1 is + * multiplied by 0x1 and is then XORed with rightmost product. + * Implicitly, the intermediate leftmost product becomes padded + */ + v1 = (uv2di)vec_gfmsum_128(v0, v1); + + /* + * Now do the final 32-bit fold by multiplying the rightmost word + * in V1 with R5 and XOR the result with the remaining bits in V1. + * + * To achieve this by a single VGFMAG, right shift V1 by a word + * and store the result in V2 which is then accumulated. Use the + * vector unpack instruction to load the rightmost half of the + * doubleword into the rightmost doubleword element of V1; the other + * half is loaded in the leftmost doubleword. + * The vector register with CONST_R5 contains the R5 constant in the + * rightmost doubleword and the leftmost doubleword is zero to ignore + * the leftmost product of V1. + */ + v9 = vec_insert((unsigned char)0x20, v9, 7); + v2 = vec_srb(v1, (uv2di)v9); + v1 = vec_unpackl((uv4si)v1); /* Split rightmost doubleword */ + v1 = (uv2di)vec_gfmsum_accum_128(r5, v1, (uv16qi)v2); + + /* + * Apply a Barret reduction to compute the final 32-bit CRC value. + * + * The input values to the Barret reduction are the degree-63 polynomial + * in V1 (R(x)), degree-32 generator polynomial, and the reduction + * constant u. The Barret reduction result is the CRC value of R(x) mod + * P(x). + * + * The Barret reduction algorithm is defined as: + * + * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u + * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x) + * 3. C(x) = R(x) XOR T2(x) mod x^32 + * + * Note: The leftmost doubleword of vector register containing + * CONST_RU_POLY is zero and, thus, the intermediate GF(2) product + * is zero and does not contribute to the final result. + */ + + /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */ + v2 = vec_unpackl((uv4si)v1); + v2 = (uv2di)vec_gfmsum_128(ru_poly, v2); + + /* + * Compute the GF(2) product of the CRC polynomial with T1(x) in + * V2 and XOR the intermediate result, T2(x), with the value in V1. + * The final result is stored in word element 2 of V2. + */ + v2 = vec_unpackl((uv4si)v2); + v2 = (uv2di)vec_gfmsum_accum_128(crc_poly, v2, (uv16qi)v1); + + return ((uv4si)v2)[2]; +} diff --git a/contrib/s390/crc32_z_resolver.c b/contrib/s390/crc32_z_resolver.c new file mode 100644 index 000000000..9749cab40 --- /dev/null +++ b/contrib/s390/crc32_z_resolver.c @@ -0,0 +1,41 @@ +#include +#include "../gcc/zifunc.h" + +#define VX_MIN_LEN 64 +#define VX_ALIGNMENT 16L +#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) + +unsigned int crc32_le_vgfm_16(unsigned int crc, const unsigned char FAR *buf, z_size_t len); + +local unsigned long s390_crc32_vx(unsigned long crc, const unsigned char FAR *buf, z_size_t len) +{ + uintptr_t prealign, aligned, remaining; + + if (buf == Z_NULL) return 0UL; + + if (len < VX_MIN_LEN + VX_ALIGN_MASK) + return crc32_z_default(crc, buf, len); + + if ((uintptr_t)buf & VX_ALIGN_MASK) { + prealign = VX_ALIGNMENT - ((uintptr_t)buf & VX_ALIGN_MASK); + len -= prealign; + crc = crc32_z_default(crc, buf, prealign); + buf += prealign; + } + aligned = len & ~VX_ALIGN_MASK; + remaining = len & VX_ALIGN_MASK; + + crc = crc32_le_vgfm_16(crc ^ 0xffffffff, buf, (size_t)aligned) ^ 0xffffffff; + + if (remaining) + crc = crc32_z_default(crc, buf + aligned, remaining); + + return crc; +} + +Z_IFUNC(crc32_z) +{ + if (hwcap & HWCAP_S390_VX) + return s390_crc32_vx; + return crc32_z_default; +} diff --git a/contrib/s390/dfltcc.c b/contrib/s390/dfltcc.c new file mode 100644 index 000000000..f2b222dc5 --- /dev/null +++ b/contrib/s390/dfltcc.c @@ -0,0 +1,1004 @@ +/* dfltcc.c - SystemZ DEFLATE CONVERSION CALL support. */ + +/* + Use the following commands to build zlib with DFLTCC support: + + $ ./configure --dfltcc + $ make +*/ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include "../../zutil.h" +#include "../../deflate.h" +#include "../../inftrees.h" +#include "../../inflate.h" +#include "dfltcc.h" +#include "dfltcc_deflate.h" +#ifdef HAVE_SYS_SDT_H +#include +#endif + +/* + C wrapper for the DEFLATE CONVERSION CALL instruction. + */ +typedef enum { + DFLTCC_CC_OK = 0, + DFLTCC_CC_OP1_TOO_SHORT = 1, + DFLTCC_CC_OP2_TOO_SHORT = 2, + DFLTCC_CC_OP2_CORRUPT = 2, + DFLTCC_CC_AGAIN = 3, +} dfltcc_cc; + +#define DFLTCC_QAF 0 +#define DFLTCC_GDHT 1 +#define DFLTCC_CMPR 2 +#define DFLTCC_XPND 4 +#define HBT_CIRCULAR (1 << 7) +#define HB_BITS 15 +#define HB_SIZE (1 << HB_BITS) +#define DFLTCC_FACILITY 151 + +local inline dfltcc_cc dfltcc(int fn, void *param, + Bytef **op1, size_t *len1, + z_const Bytef **op2, size_t *len2, + void *hist) +{ + Bytef *t2 = op1 ? *op1 : NULL; + size_t t3 = len1 ? *len1 : 0; + z_const Bytef *t4 = op2 ? *op2 : NULL; + size_t t5 = len2 ? *len2 : 0; + register int r0 __asm__("r0") = fn; + register void *r1 __asm__("r1") = param; + register Bytef *r2 __asm__("r2") = t2; + register size_t r3 __asm__("r3") = t3; + register z_const Bytef *r4 __asm__("r4") = t4; + register size_t r5 __asm__("r5") = t5; + int cc; + + __asm__ volatile( +#ifdef HAVE_SYS_SDT_H + STAP_PROBE_ASM(zlib, dfltcc_entry, + STAP_PROBE_ASM_TEMPLATE(5)) +#endif + ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n" +#ifdef HAVE_SYS_SDT_H + STAP_PROBE_ASM(zlib, dfltcc_exit, + STAP_PROBE_ASM_TEMPLATE(5)) +#endif + "ipm %[cc]\n" + : [r2] "+r" (r2) + , [r3] "+r" (r3) + , [r4] "+r" (r4) + , [r5] "+r" (r5) + , [cc] "=r" (cc) + : [r0] "r" (r0) + , [r1] "r" (r1) + , [hist] "r" (hist) +#ifdef HAVE_SYS_SDT_H + , STAP_PROBE_ASM_OPERANDS(5, r2, r3, r4, r5, hist) +#endif + : "cc", "memory"); + t2 = r2; t3 = r3; t4 = r4; t5 = r5; + + if (op1) + *op1 = t2; + if (len1) + *len1 = t3; + if (op2) + *op2 = t4; + if (len2) + *len2 = t5; + return (cc >> 28) & 3; +} + +/* + Parameter Block for Query Available Functions. + */ +#define static_assert(c, msg) \ + __attribute__((unused)) \ + static char static_assert_failed_ ## msg[c ? 1 : -1] + +struct dfltcc_qaf_param { + char fns[16]; + char reserved1[8]; + char fmts[2]; + char reserved2[6]; +}; + +static_assert(sizeof(struct dfltcc_qaf_param) == 32, + sizeof_struct_dfltcc_qaf_param_is_32); + +local inline int is_bit_set(const char *bits, int n) +{ + return bits[n / 8] & (1 << (7 - (n % 8))); +} + +local inline void clear_bit(char *bits, int n) +{ + bits[n / 8] &= ~(1 << (7 - (n % 8))); +} + +#define DFLTCC_FMT0 0 + +/* + Parameter Block for Generate Dynamic-Huffman Table, Compress and Expand. + */ +#define CVT_CRC32 0 +#define CVT_ADLER32 1 +#define HTT_FIXED 0 +#define HTT_DYNAMIC 1 + +struct dfltcc_param_v0 { + uint16_t pbvn; /* Parameter-Block-Version Number */ + uint8_t mvn; /* Model-Version Number */ + uint8_t ribm; /* Reserved for IBM use */ + unsigned reserved32 : 31; + unsigned cf : 1; /* Continuation Flag */ + uint8_t reserved64[8]; + unsigned nt : 1; /* New Task */ + unsigned reserved129 : 1; + unsigned cvt : 1; /* Check Value Type */ + unsigned reserved131 : 1; + unsigned htt : 1; /* Huffman-Table Type */ + unsigned bcf : 1; /* Block-Continuation Flag */ + unsigned bcc : 1; /* Block Closing Control */ + unsigned bhf : 1; /* Block Header Final */ + unsigned reserved136 : 1; + unsigned reserved137 : 1; + unsigned dhtgc : 1; /* DHT Generation Control */ + unsigned reserved139 : 5; + unsigned reserved144 : 5; + unsigned sbb : 3; /* Sub-Byte Boundary */ + uint8_t oesc; /* Operation-Ending-Supplemental Code */ + unsigned reserved160 : 12; + unsigned ifs : 4; /* Incomplete-Function Status */ + uint16_t ifl; /* Incomplete-Function Length */ + uint8_t reserved192[8]; + uint8_t reserved256[8]; + uint8_t reserved320[4]; + uint16_t hl; /* History Length */ + unsigned reserved368 : 1; + uint16_t ho : 15; /* History Offset */ + uint32_t cv; /* Check Value */ + unsigned eobs : 15; /* End-of-block Symbol */ + unsigned reserved431: 1; + uint8_t eobl : 4; /* End-of-block Length */ + unsigned reserved436 : 12; + unsigned reserved448 : 4; + uint16_t cdhtl : 12; /* Compressed-Dynamic-Huffman Table + Length */ + uint8_t reserved464[6]; + uint8_t cdht[288]; + uint8_t reserved[32]; + uint8_t csb[1152]; +}; + +static_assert(sizeof(struct dfltcc_param_v0) == 1536, + sizeof_struct_dfltcc_param_v0_is_1536); + +local z_const char *oesc_msg(char *buf, int oesc) +{ + if (oesc == 0x00) + return NULL; /* Successful completion */ + else { + sprintf(buf, "Operation-Ending-Supplemental Code is 0x%.2X", oesc); + return buf; + } +} + +/* + Extension of inflate_state and deflate_state. Must be doubleword-aligned. +*/ +struct dfltcc_state { + struct dfltcc_param_v0 param; /* Parameter block. */ + struct dfltcc_qaf_param af; /* Available functions. */ + uLong level_mask; /* Levels on which to use DFLTCC */ + uLong block_size; /* New block each X bytes */ + uLong block_threshold; /* New block after total_in > X */ + uLong dht_threshold; /* New block only if avail_in >= X */ + char msg[64]; /* Buffer for strm->msg */ +}; + +#define ALIGN_UP(p, size) \ + (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1)) + +#define GET_DFLTCC_STATE(state) ((struct dfltcc_state *)( \ + (char *)(state) + ALIGN_UP(sizeof(*state), 8))) + +/* + Compress. + */ +local inline int dfltcc_can_deflate_with_params(z_streamp strm, + int level, + uInt window_bits, + int strategy) +{ + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + + /* Unsupported compression settings */ + if ((dfltcc_state->level_mask & (1 << level)) == 0) + return 0; + if (window_bits != HB_BITS) + return 0; + if (strategy != Z_FIXED && strategy != Z_DEFAULT_STRATEGY) + return 0; + + /* Unsupported hardware */ + if (!is_bit_set(dfltcc_state->af.fns, DFLTCC_GDHT) || + !is_bit_set(dfltcc_state->af.fns, DFLTCC_CMPR) || + !is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0)) + return 0; + + return 1; +} + +int ZLIB_INTERNAL dfltcc_can_deflate(z_streamp strm) +{ + deflate_state *state = (deflate_state *)strm->state; + + return dfltcc_can_deflate_with_params(strm, + state->level, + state->w_bits, + state->strategy); +} + +local void dfltcc_gdht(z_streamp strm) +{ + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param; + size_t avail_in = avail_in = strm->avail_in; + + dfltcc(DFLTCC_GDHT, + param, NULL, NULL, + &strm->next_in, &avail_in, NULL); +} + +local dfltcc_cc dfltcc_cmpr(z_streamp strm) +{ + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param; + size_t avail_in = strm->avail_in; + size_t avail_out = strm->avail_out; + dfltcc_cc cc; + + cc = dfltcc(DFLTCC_CMPR | HBT_CIRCULAR, + param, &strm->next_out, &avail_out, + &strm->next_in, &avail_in, state->window); + strm->total_in += (strm->avail_in - avail_in); + strm->total_out += (strm->avail_out - avail_out); + strm->avail_in = avail_in; + strm->avail_out = avail_out; + return cc; +} + +local void send_eobs(z_streamp strm, + z_const struct dfltcc_param_v0 *param) +{ + deflate_state *state = (deflate_state *)strm->state; + + _tr_send_bits( + state, + bi_reverse(param->eobs >> (15 - param->eobl), param->eobl), + param->eobl); + flush_pending(strm); + if (state->pending != 0) { + /* The remaining data is located in pending_out[0:pending]. If someone + * calls put_byte() - this might happen in deflate() - the byte will be + * placed into pending_buf[pending], which is incorrect. Move the + * remaining data to the beginning of pending_buf so that put_byte() is + * usable again. + */ + memmove(state->pending_buf, state->pending_out, state->pending); + state->pending_out = state->pending_buf; + } +#ifdef ZLIB_DEBUG + state->compressed_len += param->eobl; +#endif +} + +int ZLIB_INTERNAL dfltcc_deflate(z_streamp strm, int flush, + block_state *result) +{ + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + uInt masked_avail_in; + dfltcc_cc cc; + int need_empty_block; + int soft_bcc; + int no_flush; + + if (!dfltcc_can_deflate(strm)) { + /* Clear history. */ + if (flush == Z_FULL_FLUSH) + param->hl = 0; + return 0; + } + +again: + masked_avail_in = 0; + soft_bcc = 0; + no_flush = flush == Z_NO_FLUSH; + + /* No input data. Return, except when Continuation Flag is set, which means + * that DFLTCC has buffered some output in the parameter block and needs to + * be called again in order to flush it. + */ + if (strm->avail_in == 0 && !param->cf) { + /* A block is still open, and the hardware does not support closing + * blocks without adding data. Thus, close it manually. + */ + if (!no_flush && param->bcf) { + send_eobs(strm, param); + param->bcf = 0; + } + /* Let one of deflate_* functions write a trailing empty block. */ + if (flush == Z_FINISH) + return 0; + /* Clear history. */ + if (flush == Z_FULL_FLUSH) + param->hl = 0; + /* Trigger block post-processing if necessary. */ + *result = no_flush ? need_more : block_done; + return 1; + } + + /* There is an open non-BFINAL block, we are not going to close it just + * yet, we have compressed more than DFLTCC_BLOCK_SIZE bytes and we see + * more than DFLTCC_DHT_MIN_SAMPLE_SIZE bytes. Open a new block with a new + * DHT in order to adapt to a possibly changed input data distribution. + */ + if (param->bcf && no_flush && + strm->total_in > dfltcc_state->block_threshold && + strm->avail_in >= dfltcc_state->dht_threshold) { + if (param->cf) { + /* We need to flush the DFLTCC buffer before writing the + * End-of-block Symbol. Mask the input data and proceed as usual. + */ + masked_avail_in += strm->avail_in; + strm->avail_in = 0; + no_flush = 0; + } else { + /* DFLTCC buffer is empty, so we can manually write the + * End-of-block Symbol right away. + */ + send_eobs(strm, param); + param->bcf = 0; + dfltcc_state->block_threshold = + strm->total_in + dfltcc_state->block_size; + } + } + + /* No space for compressed data. If we proceed, dfltcc_cmpr() will return + * DFLTCC_CC_OP1_TOO_SHORT without buffering header bits, but we will still + * set BCF=1, which is wrong. Avoid complications and return early. + */ + if (strm->avail_out == 0) { + *result = need_more; + return 1; + } + + /* The caller gave us too much data. Pass only one block worth of + * uncompressed data to DFLTCC and mask the rest, so that on the next + * iteration we start a new block. + */ + if (no_flush && strm->avail_in > dfltcc_state->block_size) { + masked_avail_in += (strm->avail_in - dfltcc_state->block_size); + strm->avail_in = dfltcc_state->block_size; + } + + /* When we have an open non-BFINAL deflate block and caller indicates that + * the stream is ending, we need to close an open deflate block and open a + * BFINAL one. + */ + need_empty_block = flush == Z_FINISH && param->bcf && !param->bhf; + + /* Translate stream to parameter block */ + param->cvt = state->wrap == 2 ? CVT_CRC32 : CVT_ADLER32; + if (!no_flush) + /* We need to close a block. Always do this in software - when there is + * no input data, the hardware will not honor BCC. */ + soft_bcc = 1; + if (flush == Z_FINISH && !param->bcf) + /* We are about to open a BFINAL block, set Block Header Final bit + * until the stream ends. + */ + param->bhf = 1; + /* DFLTCC-CMPR will write to next_out, so make sure that buffers with + * higher precedence are empty. + */ + Assert(state->pending == 0, "There must be no pending bytes"); + Assert(state->bi_valid < 8, "There must be less than 8 pending bits"); + param->sbb = (unsigned int)state->bi_valid; + if (param->sbb > 0) + *strm->next_out = (Bytef)state->bi_buf; + /* Honor history and check value */ + param->nt = 0; + if (state->wrap == 1) + param->cv = strm->adler; + else if (state->wrap == 2) + param->cv = ZSWAP32(strm->adler); + + /* When opening a block, choose a Huffman-Table Type */ + if (!param->bcf) { + if (state->strategy == Z_FIXED || + (strm->total_in == 0 && dfltcc_state->block_threshold > 0)) + param->htt = HTT_FIXED; + else { + param->htt = HTT_DYNAMIC; + dfltcc_gdht(strm); + } + } + + /* Deflate */ + do { + cc = dfltcc_cmpr(strm); + if (strm->avail_in < 4096 && masked_avail_in > 0) + /* We are about to call DFLTCC with a small input buffer, which is + * inefficient. Since there is masked data, there will be at least + * one more DFLTCC call, so skip the current one and make the next + * one handle more data. + */ + break; + } while (cc == DFLTCC_CC_AGAIN); + + /* Translate parameter block to stream */ + strm->msg = oesc_msg(dfltcc_state->msg, param->oesc); + state->bi_valid = param->sbb; + if (state->bi_valid == 0) + state->bi_buf = 0; /* Avoid accessing next_out */ + else + state->bi_buf = *strm->next_out & ((1 << state->bi_valid) - 1); + if (state->wrap == 1) + strm->adler = param->cv; + else if (state->wrap == 2) + strm->adler = ZSWAP32(param->cv); + + /* Unmask the input data */ + strm->avail_in += masked_avail_in; + masked_avail_in = 0; + + /* If we encounter an error, it means there is a bug in DFLTCC call */ + Assert(cc != DFLTCC_CC_OP2_CORRUPT || param->oesc == 0, "BUG"); + + /* Update Block-Continuation Flag. It will be used to check whether to call + * GDHT the next time. + */ + if (cc == DFLTCC_CC_OK) { + if (soft_bcc) { + send_eobs(strm, param); + param->bcf = 0; + dfltcc_state->block_threshold = + strm->total_in + dfltcc_state->block_size; + } else + param->bcf = 1; + if (flush == Z_FINISH) { + if (need_empty_block) + /* Make the current deflate() call also close the stream */ + return 0; + else { + bi_windup(state); + *result = finish_done; + } + } else { + if (flush == Z_FULL_FLUSH) + param->hl = 0; /* Clear history */ + *result = flush == Z_NO_FLUSH ? need_more : block_done; + } + } else { + param->bcf = 1; + *result = need_more; + } + if (strm->avail_in != 0 && strm->avail_out != 0) + goto again; /* deflate() must use all input or all output */ + return 1; +} + +/* + Expand. + */ +int ZLIB_INTERNAL dfltcc_can_inflate(z_streamp strm) +{ + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + + /* Unsupported hardware */ + return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) && + is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0); +} + +local dfltcc_cc dfltcc_xpnd(z_streamp strm) +{ + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param; + size_t avail_in = strm->avail_in; + size_t avail_out = strm->avail_out; + dfltcc_cc cc; + + cc = dfltcc(DFLTCC_XPND | HBT_CIRCULAR, + param, &strm->next_out, &avail_out, + &strm->next_in, &avail_in, state->window); + strm->avail_in = avail_in; + strm->avail_out = avail_out; + return cc; +} + +dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(z_streamp strm, int flush, + int *ret) +{ + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + dfltcc_cc cc; + + if (flush == Z_BLOCK || flush == Z_TREES) { + /* DFLTCC does not support stopping on block boundaries */ + if (dfltcc_inflate_disable(strm)) { + *ret = Z_STREAM_ERROR; + return DFLTCC_INFLATE_BREAK; + } else + return DFLTCC_INFLATE_SOFTWARE; + } + + if (state->last) { + if (state->bits != 0) { + strm->next_in++; + strm->avail_in--; + state->bits = 0; + } + state->mode = CHECK; + return DFLTCC_INFLATE_CONTINUE; + } + + if (strm->avail_in == 0 && !param->cf) + return DFLTCC_INFLATE_BREAK; + + if (inflate_ensure_window(state)) { + state->mode = MEM; + return DFLTCC_INFLATE_CONTINUE; + } + + /* Translate stream to parameter block */ + param->cvt = ((state->wrap & 4) && state->flags) ? CVT_CRC32 : CVT_ADLER32; + param->sbb = state->bits; + if (param->hl) + param->nt = 0; /* Honor history for the first block */ + if (state->wrap & 4) + param->cv = state->flags ? ZSWAP32(state->check) : state->check; + + /* Inflate */ + do { + cc = dfltcc_xpnd(strm); + } while (cc == DFLTCC_CC_AGAIN); + + /* Translate parameter block to stream */ + strm->msg = oesc_msg(dfltcc_state->msg, param->oesc); + state->last = cc == DFLTCC_CC_OK; + state->bits = param->sbb; + if (state->wrap & 4) + strm->adler = state->check = state->flags ? + ZSWAP32(param->cv) : param->cv; + if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) { + /* Report an error if stream is corrupted */ + state->mode = BAD; + return DFLTCC_INFLATE_CONTINUE; + } + state->mode = TYPEDO; + /* Break if operands are exhausted, otherwise continue looping */ + return (cc == DFLTCC_CC_OP1_TOO_SHORT || cc == DFLTCC_CC_OP2_TOO_SHORT) ? + DFLTCC_INFLATE_BREAK : DFLTCC_INFLATE_CONTINUE; +} + +int ZLIB_INTERNAL dfltcc_was_inflate_used(z_streamp strm) +{ + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param; + + return !param->nt; +} + +/* + Rotates a circular buffer. + The implementation is based on https://cplusplus.com/reference/algorithm/rotate/ + */ +local void rotate(Bytef *start, Bytef *pivot, Bytef *end) +{ + Bytef *p = pivot; + Bytef tmp; + + while (p != start) { + tmp = *start; + *start = *p; + *p = tmp; + + start++; + p++; + + if (p == end) + p = pivot; + else if (start == pivot) + pivot = p; + } +} + +#define MIN(x, y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + _x < _y ? _x : _y; \ +}) + +#define MAX(x, y) ({ \ + typeof(x) _x = (x); \ + typeof(y) _y = (y); \ + _x > _y ? _x : _y; \ +}) + +int ZLIB_INTERNAL dfltcc_inflate_disable(z_streamp strm) +{ + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + + if (!dfltcc_can_inflate(strm)) + return 0; + if (dfltcc_was_inflate_used(strm)) + /* DFLTCC has already decompressed some data. Since there is not + * enough information to resume decompression in software, the call + * must fail. + */ + return 1; + /* DFLTCC was not used yet - decompress in software */ + memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af)); + /* Convert the window from the hardware to the software format */ + rotate(state->window, state->window + param->ho, state->window + HB_SIZE); + state->whave = state->wnext = MIN(param->hl, state->wsize); + return 0; +} + +local int env_dfltcc_disabled; +local int env_source_date_epoch; +local unsigned long env_level_mask; +local unsigned long env_block_size; +local unsigned long env_block_threshold; +local unsigned long env_dht_threshold; +local unsigned long env_ribm; +local uint64_t cpu_facilities[(DFLTCC_FACILITY / 64) + 1]; +local struct dfltcc_qaf_param cpu_af __attribute__((aligned(8))); + +local inline int is_dfltcc_enabled(void) +{ + if (env_dfltcc_disabled) + /* User has explicitly disabled DFLTCC. */ + return 0; + + return is_bit_set((const char *)cpu_facilities, DFLTCC_FACILITY); +} + +local unsigned long xstrtoul(const char *s, unsigned long _default) +{ + char *endptr; + unsigned long result; + + if (!(s && *s)) + return _default; + errno = 0; + result = strtoul(s, &endptr, 0); + return (errno || *endptr) ? _default : result; +} + +__attribute__((constructor)) local void init_globals(void) +{ + const char *env; + register char r0 __asm__("r0"); + + env = secure_getenv("DFLTCC"); + env_dfltcc_disabled = env && !strcmp(env, "0"); + + env = secure_getenv("SOURCE_DATE_EPOCH"); + env_source_date_epoch = !!env; + +#ifndef DFLTCC_LEVEL_MASK +#define DFLTCC_LEVEL_MASK 0x2 +#endif + env_level_mask = xstrtoul(secure_getenv("DFLTCC_LEVEL_MASK"), + DFLTCC_LEVEL_MASK); + +#ifndef DFLTCC_BLOCK_SIZE +#define DFLTCC_BLOCK_SIZE 1048576 +#endif + env_block_size = xstrtoul(secure_getenv("DFLTCC_BLOCK_SIZE"), + DFLTCC_BLOCK_SIZE); + +#ifndef DFLTCC_FIRST_FHT_BLOCK_SIZE +#define DFLTCC_FIRST_FHT_BLOCK_SIZE 4096 +#endif + env_block_threshold = xstrtoul(secure_getenv("DFLTCC_FIRST_FHT_BLOCK_SIZE"), + DFLTCC_FIRST_FHT_BLOCK_SIZE); + +#ifndef DFLTCC_DHT_MIN_SAMPLE_SIZE +#define DFLTCC_DHT_MIN_SAMPLE_SIZE 4096 +#endif + env_dht_threshold = xstrtoul(secure_getenv("DFLTCC_DHT_MIN_SAMPLE_SIZE"), + DFLTCC_DHT_MIN_SAMPLE_SIZE); + +#ifndef DFLTCC_RIBM +#define DFLTCC_RIBM 0 +#endif + env_ribm = xstrtoul(secure_getenv("DFLTCC_RIBM"), DFLTCC_RIBM); + + memset(cpu_facilities, 0, sizeof(cpu_facilities)); + r0 = sizeof(cpu_facilities) / sizeof(cpu_facilities[0]) - 1; + /* STFLE is supported since z9-109 and only in z/Architecture mode. When + * compiling with -m31, gcc defaults to ESA mode, however, since the kernel + * is 64-bit, it's always z/Architecture mode at runtime. + */ + __asm__ volatile( +#ifndef __clang__ + ".machinemode push\n" + ".machinemode zarch\n" +#endif + "stfle %[facilities]\n" +#ifndef __clang__ + ".machinemode pop\n" +#endif + : [facilities] "=Q" (cpu_facilities) + , [r0] "+r" (r0) + : + : "cc"); + + /* Initialize available functions */ + if (is_dfltcc_enabled()) + dfltcc(DFLTCC_QAF, &cpu_af, NULL, NULL, NULL, NULL, NULL); + else + memset(&cpu_af, 0, sizeof(cpu_af)); +} + +/* + Memory management. + + DFLTCC requires parameter blocks and window to be aligned. zlib allows + users to specify their own allocation functions, so using e.g. + `posix_memalign' is not an option. Thus, we overallocate and take the + aligned portion of the buffer. +*/ +void ZLIB_INTERNAL dfltcc_reset(z_streamp strm, uInt size) +{ + struct dfltcc_state *dfltcc_state = + (struct dfltcc_state *)((char *)strm->state + ALIGN_UP(size, 8)); + + memcpy(&dfltcc_state->af, &cpu_af, sizeof(dfltcc_state->af)); + + if (env_source_date_epoch) + /* User needs reproducible results, but the output of DFLTCC_CMPR + * depends on buffers' page offsets. + */ + clear_bit(dfltcc_state->af.fns, DFLTCC_CMPR); + + /* Initialize parameter block */ + memset(&dfltcc_state->param, 0, sizeof(dfltcc_state->param)); + dfltcc_state->param.nt = 1; + + /* Initialize tuning parameters */ + dfltcc_state->level_mask = env_level_mask; + dfltcc_state->block_size = env_block_size; + dfltcc_state->block_threshold = env_block_threshold; + dfltcc_state->dht_threshold = env_dht_threshold; + dfltcc_state->param.ribm = env_ribm; +} + +voidpf ZLIB_INTERNAL dfltcc_alloc_state(z_streamp strm, uInt items, uInt size) +{ + return ZALLOC(strm, + ALIGN_UP(items * size, 8) + sizeof(struct dfltcc_state), + sizeof(unsigned char)); +} + +void ZLIB_INTERNAL dfltcc_copy_state(voidpf dst, const voidpf src, uInt size) +{ + zmemcpy(dst, src, ALIGN_UP(size, 8) + sizeof(struct dfltcc_state)); +} + +static const int PAGE_ALIGN = 0x1000; + +voidpf ZLIB_INTERNAL dfltcc_alloc_window(z_streamp strm, uInt items, uInt size) +{ + voidpf p, w; + + /* To simplify freeing, we store the pointer to the allocated buffer right + * before the window. Note that DFLTCC always uses HB_SIZE bytes. + */ + p = ZALLOC(strm, sizeof(voidpf) + MAX(items * size, HB_SIZE) + PAGE_ALIGN, + sizeof(unsigned char)); + if (p == NULL) + return NULL; + w = ALIGN_UP((char *)p + sizeof(voidpf), PAGE_ALIGN); + *(voidpf *)((char *)w - sizeof(voidpf)) = p; + return w; +} + +void ZLIB_INTERNAL dfltcc_copy_window(void *dest, const void *src, size_t n) +{ + memcpy(dest, src, MAX(n, HB_SIZE)); +} + +void ZLIB_INTERNAL dfltcc_free_window(z_streamp strm, voidpf w) +{ + if (w) + ZFREE(strm, *(voidpf *)((unsigned char *)w - sizeof(voidpf))); +} + +/* + Switching between hardware and software compression. + + DFLTCC does not support all zlib settings, e.g. generation of non-compressed + blocks or alternative window sizes. When such settings are applied on the + fly with deflateParams, we need to convert between hardware and software + window formats. +*/ +int ZLIB_INTERNAL dfltcc_deflate_params(z_streamp strm, int level, + int strategy, int *flush) +{ + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + int could_deflate = dfltcc_can_deflate(strm); + int can_deflate = dfltcc_can_deflate_with_params(strm, + level, + state->w_bits, + strategy); + + if (can_deflate == could_deflate) + /* We continue to work in the same mode - no changes needed */ + return Z_OK; + + if (strm->total_in == 0 && param->nt == 1 && param->hl == 0) + /* DFLTCC was not used yet - no changes needed */ + return Z_OK; + + /* For now, do not convert between window formats - simply get rid of the + * old data instead. + */ + *flush = Z_FULL_FLUSH; + return Z_OK; +} + +int ZLIB_INTERNAL dfltcc_deflate_done(z_streamp strm, int flush) +{ + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + + /* When deflate(Z_FULL_FLUSH) is called with small avail_out, it might + * close the block without resetting the compression state. Detect this + * situation and return that deflation is not done. + */ + if (flush == Z_FULL_FLUSH && strm->avail_out == 0) + return 0; + + /* Return that deflation is not done if DFLTCC is used and either it + * buffered some data (Continuation Flag is set), or has not written EOBS + * yet (Block-Continuation Flag is set). + */ + return !dfltcc_can_deflate(strm) || (!param->cf && !param->bcf); +} + +/* + Preloading history. +*/ +local void append_history(struct dfltcc_param_v0 *param, + Bytef *history, + const Bytef *buf, + uInt count) +{ + size_t offset; + size_t n; + + /* Do not use more than 32K */ + if (count > HB_SIZE) { + buf += count - HB_SIZE; + count = HB_SIZE; + } + offset = (param->ho + param->hl) % HB_SIZE; + if (offset + count <= HB_SIZE) + /* Circular history buffer does not wrap - copy one chunk */ + zmemcpy(history + offset, buf, count); + else { + /* Circular history buffer wraps - copy two chunks */ + n = HB_SIZE - offset; + zmemcpy(history + offset, buf, n); + zmemcpy(history, buf + n, count - n); + } + n = param->hl + count; + if (n <= HB_SIZE) + /* All history fits into buffer - no need to discard anything */ + param->hl = n; + else { + /* History does not fit into buffer - discard extra bytes */ + param->ho = (param->ho + (n - HB_SIZE)) % HB_SIZE; + param->hl = HB_SIZE; + } +} + +local void get_history(struct dfltcc_param_v0 *param, + const Bytef *history, + Bytef *buf) +{ + if (param->ho + param->hl <= HB_SIZE) + /* Circular history buffer does not wrap - copy one chunk */ + memcpy(buf, history + param->ho, param->hl); + else { + /* Circular history buffer wraps - copy two chunks */ + memcpy(buf, history + param->ho, HB_SIZE - param->ho); + memcpy(buf + HB_SIZE - param->ho, history, param->ho + param->hl - HB_SIZE); + } +} + +int ZLIB_INTERNAL dfltcc_deflate_set_dictionary(z_streamp strm, + const Bytef *dictionary, + uInt dict_length) +{ + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + + append_history(param, state->window, dictionary, dict_length); + state->strstart = 1; /* Add FDICT to zlib header */ + state->block_start = state->strstart; /* Make deflate_stored happy */ + return Z_OK; +} + +int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(z_streamp strm, + Bytef *dictionary, + uInt *dict_length) +{ + deflate_state *state = (deflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + + if (dictionary) + get_history(param, state->window, dictionary); + if (dict_length) + *dict_length = param->hl; + return Z_OK; +} + +int ZLIB_INTERNAL dfltcc_inflate_set_dictionary(z_streamp strm, + const Bytef *dictionary, + uInt dict_length) +{ + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + + if (inflate_ensure_window(state)) { + state->mode = MEM; + return Z_MEM_ERROR; + } + + append_history(param, state->window, dictionary, dict_length); + state->havedict = 1; + return Z_OK; +} + +int ZLIB_INTERNAL dfltcc_inflate_get_dictionary(z_streamp strm, + Bytef *dictionary, + uInt *dict_length) +{ + struct inflate_state *state = (struct inflate_state *)strm->state; + struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 *param = &dfltcc_state->param; + + if (dictionary && state->window) + get_history(param, state->window, dictionary); + if (dict_length) + *dict_length = param->hl; + return Z_OK; +} diff --git a/contrib/s390/dfltcc.h b/contrib/s390/dfltcc.h new file mode 100644 index 000000000..c8491c4d9 --- /dev/null +++ b/contrib/s390/dfltcc.h @@ -0,0 +1,97 @@ +#ifndef DFLTCC_H +#define DFLTCC_H + +#include "../../zlib.h" +#include "../../zutil.h" + +voidpf ZLIB_INTERNAL dfltcc_alloc_state(z_streamp strm, uInt items, uInt size); +void ZLIB_INTERNAL dfltcc_copy_state(voidpf dst, const voidpf src, uInt size); +void ZLIB_INTERNAL dfltcc_reset(z_streamp strm, uInt size); +voidpf ZLIB_INTERNAL dfltcc_alloc_window(z_streamp strm, uInt items, + uInt size); +void ZLIB_INTERNAL dfltcc_copy_window(void *dest, const void *src, size_t n); +void ZLIB_INTERNAL dfltcc_free_window(z_streamp strm, voidpf w); +#define DFLTCC_BLOCK_HEADER_BITS 3 +#define DFLTCC_HLITS_COUNT_BITS 5 +#define DFLTCC_HDISTS_COUNT_BITS 5 +#define DFLTCC_HCLENS_COUNT_BITS 4 +#define DFLTCC_MAX_HCLENS 19 +#define DFLTCC_HCLEN_BITS 3 +#define DFLTCC_MAX_HLITS 286 +#define DFLTCC_MAX_HDISTS 30 +#define DFLTCC_MAX_HLIT_HDIST_BITS 7 +#define DFLTCC_MAX_SYMBOL_BITS 16 +#define DFLTCC_MAX_EOBS_BITS 15 +#define DFLTCC_MAX_PADDING_BITS 7 +#define DEFLATE_BOUND_COMPLEN(source_len) \ + ((DFLTCC_BLOCK_HEADER_BITS + \ + DFLTCC_HLITS_COUNT_BITS + \ + DFLTCC_HDISTS_COUNT_BITS + \ + DFLTCC_HCLENS_COUNT_BITS + \ + DFLTCC_MAX_HCLENS * DFLTCC_HCLEN_BITS + \ + (DFLTCC_MAX_HLITS + DFLTCC_MAX_HDISTS) * DFLTCC_MAX_HLIT_HDIST_BITS + \ + (source_len) * DFLTCC_MAX_SYMBOL_BITS + \ + DFLTCC_MAX_EOBS_BITS + \ + DFLTCC_MAX_PADDING_BITS) >> 3) +int ZLIB_INTERNAL dfltcc_can_inflate(z_streamp strm); +typedef enum { + DFLTCC_INFLATE_CONTINUE, + DFLTCC_INFLATE_BREAK, + DFLTCC_INFLATE_SOFTWARE, +} dfltcc_inflate_action; +dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(z_streamp strm, + int flush, int *ret); +int ZLIB_INTERNAL dfltcc_was_inflate_used(z_streamp strm); +int ZLIB_INTERNAL dfltcc_inflate_disable(z_streamp strm); +int ZLIB_INTERNAL dfltcc_inflate_set_dictionary(z_streamp strm, + const Bytef *dictionary, + uInt dict_length); +int ZLIB_INTERNAL dfltcc_inflate_get_dictionary(z_streamp strm, + Bytef *dictionary, + uInt* dict_length); + +#define ZALLOC_STATE dfltcc_alloc_state +#define ZFREE_STATE ZFREE +#define ZCOPY_STATE dfltcc_copy_state +#define ZALLOC_WINDOW dfltcc_alloc_window +#define ZCOPY_WINDOW dfltcc_copy_window +#define ZFREE_WINDOW dfltcc_free_window +#define TRY_FREE_WINDOW dfltcc_free_window +#define INFLATE_RESET_KEEP_HOOK(strm) \ + dfltcc_reset((strm), sizeof(struct inflate_state)) +#define INFLATE_PRIME_HOOK(strm, bits, value) \ + do { if (dfltcc_inflate_disable((strm))) return Z_STREAM_ERROR; } while (0) +#define INFLATE_TYPEDO_HOOK(strm, flush) \ + if (dfltcc_can_inflate((strm))) { \ + dfltcc_inflate_action action; \ +\ + RESTORE(); \ + action = dfltcc_inflate((strm), (flush), &ret); \ + LOAD(); \ + if (action == DFLTCC_INFLATE_CONTINUE) \ + break; \ + else if (action == DFLTCC_INFLATE_BREAK) \ + goto inf_leave; \ + } +#define INFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_inflate((strm))) +#define INFLATE_NEED_UPDATEWINDOW(strm) (!dfltcc_can_inflate((strm))) +#define INFLATE_MARK_HOOK(strm) \ + do { \ + if (dfltcc_was_inflate_used((strm))) return -(1L << 16); \ + } while (0) +#define INFLATE_SYNC_POINT_HOOK(strm) \ + do { \ + if (dfltcc_was_inflate_used((strm))) return Z_STREAM_ERROR; \ + } while (0) +#define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \ + do { \ + if (dfltcc_can_inflate(strm)) \ + return dfltcc_inflate_set_dictionary(strm, dict, dict_len); \ + } while (0) +#define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \ + do { \ + if (dfltcc_can_inflate(strm)) \ + return dfltcc_inflate_get_dictionary(strm, dict, dict_len); \ + } while (0) + +#endif diff --git a/contrib/s390/dfltcc_deflate.h b/contrib/s390/dfltcc_deflate.h new file mode 100644 index 000000000..2699d15e9 --- /dev/null +++ b/contrib/s390/dfltcc_deflate.h @@ -0,0 +1,53 @@ +#ifndef DFLTCC_DEFLATE_H +#define DFLTCC_DEFLATE_H + +#include "dfltcc.h" + +int ZLIB_INTERNAL dfltcc_can_deflate(z_streamp strm); +int ZLIB_INTERNAL dfltcc_deflate(z_streamp strm, + int flush, + block_state *result); +int ZLIB_INTERNAL dfltcc_deflate_params(z_streamp strm, int level, + int strategy, int *flush); +int ZLIB_INTERNAL dfltcc_deflate_done(z_streamp strm, int flush); +int ZLIB_INTERNAL dfltcc_deflate_set_dictionary(z_streamp strm, + const Bytef *dictionary, + uInt dict_length); +int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(z_streamp strm, + Bytef *dictionary, + uInt* dict_length); + +#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \ + do { \ + if (dfltcc_can_deflate((strm))) \ + return dfltcc_deflate_set_dictionary((strm), (dict), (dict_len)); \ + } while (0) +#define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \ + do { \ + if (dfltcc_can_deflate((strm))) \ + return dfltcc_deflate_get_dictionary((strm), (dict), (dict_len)); \ + } while (0) +#define DEFLATE_RESET_KEEP_HOOK(strm) \ + dfltcc_reset((strm), sizeof(deflate_state)) +#define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) \ + do { \ + int err; \ +\ + err = dfltcc_deflate_params((strm), \ + (level), \ + (strategy), \ + (hook_flush)); \ + if (err == Z_STREAM_ERROR) \ + return err; \ + } while (0) +#define DEFLATE_DONE dfltcc_deflate_done +#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \ + do { \ + if (deflateStateCheck((strm)) || dfltcc_can_deflate((strm))) \ + (complen) = DEFLATE_BOUND_COMPLEN(source_len); \ + } while (0) +#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) (dfltcc_can_deflate((strm))) +#define DEFLATE_HOOK dfltcc_deflate +#define DEFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_deflate((strm))) + +#endif diff --git a/crc32.c b/crc32.c index 6c38f5c04..afff3d777 100644 --- a/crc32.c +++ b/crc32.c @@ -691,6 +691,13 @@ local z_word_t crc_word_big(z_word_t data) { #endif /* ========================================================================= */ +#if defined(Z_POWER_OPT) || defined(HAVE_S390X_VX) +/* Rename function so resolver can use its symbol. The default version will be + * returned by the resolver if the host has no support for an optimized version. + */ +#define crc32_z crc32_z_default +#endif /* defined(Z_POWER_OPT) || defined(HAVE_S390X_VX) */ + unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, z_size_t len) { /* Return initial CRC, if requested. */ @@ -1009,6 +1016,16 @@ unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, return crc ^ 0xffffffff; } +#if defined(Z_POWER_OPT) || defined(HAVE_S390X_VX) +#undef crc32_z +#ifdef Z_POWER_OPT +#include "contrib/power/crc32_z_resolver.c" +#endif /* Z_POWER_OPT */ +#ifdef HAVE_S390X_VX +#include "contrib/s390/crc32_z_resolver.c" +#endif /* HAVE_S390X_VX */ +#endif /* defined(Z_POWER_OPT) || defined(HAVE_S390X_VX) */ + #endif /* ========================================================================= */ diff --git a/deflate.c b/deflate.c index bd0117519..9f5bc8b56 100644 --- a/deflate.c +++ b/deflate.c @@ -60,12 +60,24 @@ const char deflate_copyright[] = copyright string in the executable of your product. */ -typedef enum { - need_more, /* block not completed, need more input or more output */ - block_done, /* block flush performed */ - finish_started, /* finish started, need only more output at next deflate */ - finish_done /* finish done, accept no more input or output */ -} block_state; +#ifdef DFLTCC +#include "contrib/s390/dfltcc_deflate.h" +#else +#define ZALLOC_STATE ZALLOC +#define ZFREE_STATE ZFREE +#define ZCOPY_STATE zmemcpy +#define ZALLOC_WINDOW ZALLOC +#define TRY_FREE_WINDOW TRY_FREE +#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) +#define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) +#define DEFLATE_RESET_KEEP_HOOK(strm) do {} while (0) +#define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) do {} while (0) +#define DEFLATE_DONE(strm, flush) 1 +#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, sourceLen) do {} while (0) +#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) 0 +#define DEFLATE_HOOK(strm, flush, bstate) 0 +#define DEFLATE_NEED_CHECKSUM(strm) 1 +#endif typedef block_state (*compress_func)(deflate_state *s, int flush); /* Compression function. Returns the block state after the call. */ @@ -224,7 +236,8 @@ local unsigned read_buf(z_streamp strm, Bytef *buf, unsigned size) { strm->avail_in -= len; zmemcpy(buf, strm->next_in, len); - if (strm->state->wrap == 1) { + if (!DEFLATE_NEED_CHECKSUM(strm)) {} + else if (strm->state->wrap == 1) { strm->adler = adler32(strm->adler, buf, len); } #ifdef GZIP @@ -429,7 +442,7 @@ int ZEXPORT deflateInit2_(z_streamp strm, int level, int method, return Z_STREAM_ERROR; } if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ - s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); + s = (deflate_state *) ZALLOC_STATE(strm, 1, sizeof(deflate_state)); if (s == Z_NULL) return Z_MEM_ERROR; strm->state = (struct internal_state FAR *)s; s->strm = strm; @@ -446,7 +459,7 @@ int ZEXPORT deflateInit2_(z_streamp strm, int level, int method, s->hash_mask = s->hash_size - 1; s->hash_shift = ((s->hash_bits + MIN_MATCH-1) / MIN_MATCH); - s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); + s->window = (Bytef *) ZALLOC_WINDOW(strm, s->w_size, 2*sizeof(Byte)); s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); @@ -559,6 +572,7 @@ int ZEXPORT deflateSetDictionary(z_streamp strm, const Bytef *dictionary, /* when using zlib wrappers, compute Adler-32 for provided dictionary */ if (wrap == 1) strm->adler = adler32(strm->adler, dictionary, dictLength); + DEFLATE_SET_DICTIONARY_HOOK(strm, dictionary, dictLength); s->wrap = 0; /* avoid computing Adler-32 in read_buf */ /* if dictionary would fill window, just replace the history */ @@ -614,6 +628,7 @@ int ZEXPORT deflateGetDictionary(z_streamp strm, Bytef *dictionary, if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + DEFLATE_GET_DICTIONARY_HOOK(strm, dictionary, dictLength); s = strm->state; len = s->strstart + s->lookahead; if (len > s->w_size) @@ -658,6 +673,8 @@ int ZEXPORT deflateResetKeep(z_streamp strm) { _tr_init(s); + DEFLATE_RESET_KEEP_HOOK(strm); + return Z_OK; } @@ -740,6 +757,7 @@ int ZEXPORT deflatePrime(z_streamp strm, int bits, int value) { int ZEXPORT deflateParams(z_streamp strm, int level, int strategy) { deflate_state *s; compress_func func; + int hook_flush = Z_NO_FLUSH; if (deflateStateCheck(strm)) return Z_STREAM_ERROR; s = strm->state; @@ -752,15 +770,18 @@ int ZEXPORT deflateParams(z_streamp strm, int level, int strategy) { if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) { return Z_STREAM_ERROR; } + DEFLATE_PARAMS_HOOK(strm, level, strategy, &hook_flush); func = configuration_table[s->level].func; - if ((strategy != s->strategy || func != configuration_table[level].func) && - s->last_flush != -2) { + if (((strategy != s->strategy || func != configuration_table[level].func) && + s->last_flush != -2) || hook_flush != Z_NO_FLUSH) { /* Flush the last buffer: */ - int err = deflate(strm, Z_BLOCK); + int flush = RANK(hook_flush) > RANK(Z_BLOCK) ? hook_flush : Z_BLOCK; + int err = deflate(strm, flush); if (err == Z_STREAM_ERROR) return err; - if (strm->avail_in || (s->strstart - s->block_start) + s->lookahead) + if (strm->avail_in || (s->strstart - s->block_start) + s->lookahead || + !DEFLATE_DONE(strm, flush)) return Z_BUF_ERROR; } if (s->level != level) { @@ -828,11 +849,13 @@ uLong ZEXPORT deflateBound(z_streamp strm, uLong sourceLen) { ~13% overhead plus a small constant */ fixedlen = sourceLen + (sourceLen >> 3) + (sourceLen >> 8) + (sourceLen >> 9) + 4; + DEFLATE_BOUND_ADJUST_COMPLEN(strm, fixedlen, sourceLen); /* upper bound for stored blocks with length 127 (memLevel == 1) -- ~4% overhead plus a small constant */ storelen = sourceLen + (sourceLen >> 5) + (sourceLen >> 7) + (sourceLen >> 11) + 7; + DEFLATE_BOUND_ADJUST_COMPLEN(strm, storelen, sourceLen); /* if can't get parameters, return larger bound plus a zlib wrapper */ if (deflateStateCheck(strm)) @@ -874,7 +897,8 @@ uLong ZEXPORT deflateBound(z_streamp strm, uLong sourceLen) { } /* if not default parameters, return one of the conservative bounds */ - if (s->w_bits != 15 || s->hash_bits != 8 + 7) + if (DEFLATE_NEED_CONSERVATIVE_BOUND(strm) || + s->w_bits != 15 || s->hash_bits != 8 + 7) return (s->w_bits <= s->hash_bits && s->level ? fixedlen : storelen) + wraplen; @@ -900,7 +924,7 @@ local void putShortMSB(deflate_state *s, uInt b) { * applications may wish to modify it to avoid allocating a large * strm->next_out buffer and copying into it. (See also read_buf()). */ -local void flush_pending(z_streamp strm) { +void ZLIB_INTERNAL flush_pending(z_streamp strm) { unsigned len; deflate_state *s = strm->state; @@ -1167,7 +1191,8 @@ int ZEXPORT deflate(z_streamp strm, int flush) { (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { block_state bstate; - bstate = s->level == 0 ? deflate_stored(s, flush) : + bstate = DEFLATE_HOOK(strm, flush, &bstate) ? bstate : + s->level == 0 ? deflate_stored(s, flush) : s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) : s->strategy == Z_RLE ? deflate_rle(s, flush) : (*(configuration_table[s->level].func))(s, flush); @@ -1214,7 +1239,6 @@ int ZEXPORT deflate(z_streamp strm, int flush) { } if (flush != Z_FINISH) return Z_OK; - if (s->wrap <= 0) return Z_STREAM_END; /* Write the trailer */ #ifdef GZIP @@ -1230,7 +1254,7 @@ int ZEXPORT deflate(z_streamp strm, int flush) { } else #endif - { + if (s->wrap == 1) { putShortMSB(s, (uInt)(strm->adler >> 16)); putShortMSB(s, (uInt)(strm->adler & 0xffff)); } @@ -1239,7 +1263,11 @@ int ZEXPORT deflate(z_streamp strm, int flush) { * to flush the rest. */ if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */ - return s->pending != 0 ? Z_OK : Z_STREAM_END; + if (s->pending == 0) { + Assert(s->bi_valid == 0, "bi_buf not flushed"); + return Z_STREAM_END; + } + return Z_OK; } /* ========================================================================= */ @@ -1254,9 +1282,9 @@ int ZEXPORT deflateEnd(z_streamp strm) { TRY_FREE(strm, strm->state->pending_buf); TRY_FREE(strm, strm->state->head); TRY_FREE(strm, strm->state->prev); - TRY_FREE(strm, strm->state->window); + TRY_FREE_WINDOW(strm, strm->state->window); - ZFREE(strm, strm->state); + ZFREE_STATE(strm, strm->state); strm->state = Z_NULL; return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; @@ -1285,13 +1313,13 @@ int ZEXPORT deflateCopy(z_streamp dest, z_streamp source) { zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); - ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); + ds = (deflate_state *) ZALLOC_STATE(dest, 1, sizeof(deflate_state)); if (ds == Z_NULL) return Z_MEM_ERROR; dest->state = (struct internal_state FAR *) ds; - zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state)); + ZCOPY_STATE((voidpf)ds, (voidpf)ss, sizeof(deflate_state)); ds->strm = dest; - ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); + ds->window = (Bytef *) ZALLOC_WINDOW(dest, ds->w_size, 2*sizeof(Byte)); ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4); diff --git a/deflate.h b/deflate.h index 869679142..d49e698c8 100644 --- a/deflate.h +++ b/deflate.h @@ -299,6 +299,7 @@ void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s); void ZLIB_INTERNAL _tr_align(deflate_state *s); void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf, ulg stored_len, int last); +void ZLIB_INTERNAL _tr_send_bits(deflate_state *s, int value, int length); #define d_code(dist) \ ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) @@ -343,4 +344,15 @@ void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf, flush = _tr_tally(s, distance, length) #endif +typedef enum { + need_more, /* block not completed, need more input or more output */ + block_done, /* block flush performed */ + finish_started, /* finish started, need only more output at next deflate */ + finish_done /* finish done, accept no more input or output */ +} block_state; + +unsigned ZLIB_INTERNAL bi_reverse(unsigned code, int len); +void ZLIB_INTERNAL bi_windup(deflate_state *s); +void ZLIB_INTERNAL flush_pending(z_streamp strm); + #endif /* DEFLATE_H */ diff --git a/gzguts.h b/gzguts.h index f9375047e..5adfd1d52 100644 --- a/gzguts.h +++ b/gzguts.h @@ -152,7 +152,11 @@ /* default i/o buffer size -- double this for output when reading (this and twice this must be able to fit in an unsigned type) */ +#ifdef DFLTCC +#define GZBUFSIZE 131072 +#else #define GZBUFSIZE 8192 +#endif /* gzip modes, also provide a little integrity check on the passed structure */ #define GZ_NONE 0 diff --git a/inflate.c b/inflate.c index b0757a9b2..c0f808faa 100644 --- a/inflate.c +++ b/inflate.c @@ -85,6 +85,27 @@ #include "inflate.h" #include "inffast.h" +/* architecture-specific bits */ +#ifdef DFLTCC +#include "contrib/s390/dfltcc.h" +#else +#define ZALLOC_STATE ZALLOC +#define ZFREE_STATE ZFREE +#define ZCOPY_STATE zmemcpy +#define ZALLOC_WINDOW ZALLOC +#define ZCOPY_WINDOW zmemcpy +#define ZFREE_WINDOW ZFREE +#define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0) +#define INFLATE_PRIME_HOOK(strm, bits, value) do {} while (0) +#define INFLATE_TYPEDO_HOOK(strm, flush) do {} while (0) +#define INFLATE_NEED_CHECKSUM(strm) 1 +#define INFLATE_NEED_UPDATEWINDOW(strm) 1 +#define INFLATE_MARK_HOOK(strm) do {} while (0) +#define INFLATE_SYNC_POINT_HOOK(strm) do {} while (0) +#define INFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) +#define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) +#endif + #ifdef MAKEFIXED # ifndef BUILDFIXED # define BUILDFIXED @@ -123,6 +144,7 @@ int ZEXPORT inflateResetKeep(z_streamp strm) { state->lencode = state->distcode = state->next = state->codes; state->sane = 1; state->back = -1; + INFLATE_RESET_KEEP_HOOK(strm); Tracev((stderr, "inflate: reset\n")); return Z_OK; } @@ -165,7 +187,7 @@ int ZEXPORT inflateReset2(z_streamp strm, int windowBits) { if (windowBits && (windowBits < 8 || windowBits > 15)) return Z_STREAM_ERROR; if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) { - ZFREE(strm, state->window); + ZFREE_WINDOW(strm, state->window); state->window = Z_NULL; } @@ -200,7 +222,7 @@ int ZEXPORT inflateInit2_(z_streamp strm, int windowBits, strm->zfree = zcfree; #endif state = (struct inflate_state FAR *) - ZALLOC(strm, 1, sizeof(struct inflate_state)); + ZALLOC_STATE(strm, 1, sizeof(struct inflate_state)); if (state == Z_NULL) return Z_MEM_ERROR; Tracev((stderr, "inflate: allocated\n")); strm->state = (struct internal_state FAR *)state; @@ -209,7 +231,7 @@ int ZEXPORT inflateInit2_(z_streamp strm, int windowBits, state->mode = HEAD; /* to pass state test in inflateReset2() */ ret = inflateReset2(strm, windowBits); if (ret != Z_OK) { - ZFREE(strm, state); + ZFREE_STATE(strm, state); strm->state = Z_NULL; } return ret; @@ -226,6 +248,7 @@ int ZEXPORT inflatePrime(z_streamp strm, int bits, int value) { if (inflateStateCheck(strm)) return Z_STREAM_ERROR; if (bits == 0) return Z_OK; + INFLATE_PRIME_HOOK(strm, bits, value); state = (struct inflate_state FAR *)strm->state; if (bits < 0) { state->hold = 0; @@ -351,6 +374,27 @@ void makefixed(void) } #endif /* MAKEFIXED */ +int ZLIB_INTERNAL inflate_ensure_window(state) + struct inflate_state *state; +{ + /* if it hasn't been done already, allocate space for the window */ + if (state->window == Z_NULL) { + state->window = (unsigned char FAR *) + ZALLOC_WINDOW(state->strm, 1U << state->wbits, + sizeof(unsigned char)); + if (state->window == Z_NULL) return 1; + } + + /* if window not in use yet, initialize */ + if (state->wsize == 0) { + state->wsize = 1U << state->wbits; + state->wnext = 0; + state->whave = 0; + } + + return 0; +} + /* Update the window with the last wsize (normally 32K) bytes written before returning. If window does not exist yet, create it. This is only called @@ -371,20 +415,7 @@ local int updatewindow(z_streamp strm, const Bytef *end, unsigned copy) { state = (struct inflate_state FAR *)strm->state; - /* if it hasn't been done already, allocate space for the window */ - if (state->window == Z_NULL) { - state->window = (unsigned char FAR *) - ZALLOC(strm, 1U << state->wbits, - sizeof(unsigned char)); - if (state->window == Z_NULL) return 1; - } - - /* if window not in use yet, initialize */ - if (state->wsize == 0) { - state->wsize = 1U << state->wbits; - state->wnext = 0; - state->whave = 0; - } + if (inflate_ensure_window(state)) return 1; /* copy state->wsize or less output bytes into the circular window */ if (copy >= state->wsize) { @@ -825,6 +856,7 @@ int ZEXPORT inflate(z_streamp strm, int flush) { if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave; /* fallthrough */ case TYPEDO: + INFLATE_TYPEDO_HOOK(strm, flush); if (state->last) { BYTEBITS(); state->mode = CHECK; @@ -1186,7 +1218,7 @@ int ZEXPORT inflate(z_streamp strm, int flush) { out -= left; strm->total_out += out; state->total += out; - if ((state->wrap & 4) && out) + if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4) && out) strm->adler = state->check = UPDATE_CHECK(state->check, put - out, out); out = left; @@ -1241,8 +1273,9 @@ int ZEXPORT inflate(z_streamp strm, int flush) { */ inf_leave: RESTORE(); - if (state->wsize || (out != strm->avail_out && state->mode < BAD && - (state->mode < CHECK || flush != Z_FINISH))) + if (INFLATE_NEED_UPDATEWINDOW(strm) && + (state->wsize || (out != strm->avail_out && state->mode < BAD && + (state->mode < CHECK || flush != Z_FINISH)))) if (updatewindow(strm, strm->next_out, out - strm->avail_out)) { state->mode = MEM; return Z_MEM_ERROR; @@ -1252,7 +1285,7 @@ int ZEXPORT inflate(z_streamp strm, int flush) { strm->total_in += in; strm->total_out += out; state->total += out; - if ((state->wrap & 4) && out) + if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4) && out) strm->adler = state->check = UPDATE_CHECK(state->check, strm->next_out - out, out); strm->data_type = (int)state->bits + (state->last ? 64 : 0) + @@ -1268,8 +1301,8 @@ int ZEXPORT inflateEnd(z_streamp strm) { if (inflateStateCheck(strm)) return Z_STREAM_ERROR; state = (struct inflate_state FAR *)strm->state; - if (state->window != Z_NULL) ZFREE(strm, state->window); - ZFREE(strm, strm->state); + if (state->window != Z_NULL) ZFREE_WINDOW(strm, state->window); + ZFREE_STATE(strm, strm->state); strm->state = Z_NULL; Tracev((stderr, "inflate: end\n")); return Z_OK; @@ -1283,6 +1316,8 @@ int ZEXPORT inflateGetDictionary(z_streamp strm, Bytef *dictionary, if (inflateStateCheck(strm)) return Z_STREAM_ERROR; state = (struct inflate_state FAR *)strm->state; + INFLATE_GET_DICTIONARY_HOOK(strm, dictionary, dictLength); + /* copy dictionary */ if (state->whave && dictionary != Z_NULL) { zmemcpy(dictionary, state->window + state->wnext, @@ -1315,6 +1350,8 @@ int ZEXPORT inflateSetDictionary(z_streamp strm, const Bytef *dictionary, return Z_DATA_ERROR; } + INFLATE_SET_DICTIONARY_HOOK(strm, dictionary, dictLength); + /* copy dictionary to window using updatewindow(), which will amend the existing dictionary if appropriate */ ret = updatewindow(strm, dictionary + dictLength, dictLength); @@ -1432,6 +1469,7 @@ int ZEXPORT inflateSyncPoint(z_streamp strm) { struct inflate_state FAR *state; if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + INFLATE_SYNC_POINT_HOOK(strm); state = (struct inflate_state FAR *)strm->state; return state->mode == STORED && state->bits == 0; } @@ -1440,7 +1478,6 @@ int ZEXPORT inflateCopy(z_streamp dest, z_streamp source) { struct inflate_state FAR *state; struct inflate_state FAR *copy; unsigned char FAR *window; - unsigned wsize; /* check input */ if (inflateStateCheck(source) || dest == Z_NULL) @@ -1449,21 +1486,22 @@ int ZEXPORT inflateCopy(z_streamp dest, z_streamp source) { /* allocate space */ copy = (struct inflate_state FAR *) - ZALLOC(source, 1, sizeof(struct inflate_state)); + ZALLOC_STATE(source, 1, sizeof(struct inflate_state)); if (copy == Z_NULL) return Z_MEM_ERROR; window = Z_NULL; if (state->window != Z_NULL) { window = (unsigned char FAR *) - ZALLOC(source, 1U << state->wbits, sizeof(unsigned char)); + ZALLOC_WINDOW(source, 1U << state->wbits, + sizeof(unsigned char)); if (window == Z_NULL) { - ZFREE(source, copy); + ZFREE_STATE(source, copy); return Z_MEM_ERROR; } } /* copy state */ zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); - zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state)); + ZCOPY_STATE((voidpf)copy, (voidpf)state, sizeof(struct inflate_state)); copy->strm = dest; if (state->lencode >= state->codes && state->lencode <= state->codes + ENOUGH - 1) { @@ -1472,8 +1510,7 @@ int ZEXPORT inflateCopy(z_streamp dest, z_streamp source) { } copy->next = copy->codes + (state->next - state->codes); if (window != Z_NULL) { - wsize = 1U << state->wbits; - zmemcpy(window, state->window, wsize); + ZCOPY_WINDOW(window, state->window, 1U << state->wbits); } copy->window = window; dest->state = (struct internal_state FAR *)copy; @@ -1512,6 +1549,7 @@ long ZEXPORT inflateMark(z_streamp strm) { if (inflateStateCheck(strm)) return -(1L << 16); + INFLATE_MARK_HOOK(strm); state = (struct inflate_state FAR *)strm->state; return (long)(((unsigned long)((long)state->back)) << 16) + (state->mode == COPY ? state->length : diff --git a/inflate.h b/inflate.h index f127b6b1f..3d504e3c4 100644 --- a/inflate.h +++ b/inflate.h @@ -124,3 +124,5 @@ struct inflate_state { int back; /* bits back of last unprocessed length/lit */ unsigned was; /* initial length of match */ }; + +int ZLIB_INTERNAL inflate_ensure_window(struct inflate_state *state); diff --git a/test/crc32_test.c b/test/crc32_test.c new file mode 100644 index 000000000..3155553e6 --- /dev/null +++ b/test/crc32_test.c @@ -0,0 +1,205 @@ +/* crc32_tes.c -- unit test for crc32 in the zlib compression library + * Copyright (C) 1995-2006, 2010, 2011, 2016, 2019 Rogerio Alves + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zlib.h" +#include + +#ifdef STDC +# include +# include +#endif + +void test_crc32 OF((uLong crc, Byte* buf, z_size_t len, uLong chk, int line)); +int main OF((void)); + +typedef struct { + int line; + uLong crc; + char* buf; + int len; + uLong expect; +} crc32_test; + +void test_crc32(crc, buf, len, chk, line) + uLong crc; + Byte *buf; + z_size_t len; + uLong chk; + int line; +{ + uLong res = crc32(crc, buf, len); + if (res != chk) { + fprintf(stderr, "FAIL [%d]: crc32 returned 0x%08X expected 0x%08X\n", + line, (unsigned int)res, (unsigned int)chk); + exit(1); + } +} + +static const crc32_test tests[] = { + {__LINE__, 0x0, 0x0, 0, 0x0}, + {__LINE__, 0xffffffff, 0x0, 0, 0x0}, + {__LINE__, 0x0, 0x0, 255, 0x0}, /* BZ 174799. */ + {__LINE__, 0x0, 0x0, 256, 0x0}, + {__LINE__, 0x0, 0x0, 257, 0x0}, + {__LINE__, 0x0, 0x0, 32767, 0x0}, + {__LINE__, 0x0, 0x0, 32768, 0x0}, + {__LINE__, 0x0, 0x0, 32769, 0x0}, + {__LINE__, 0x0, "", 0, 0x0}, + {__LINE__, 0xffffffff, "", 0, 0xffffffff}, + {__LINE__, 0x0, "abacus", 6, 0xc3d7115b}, + {__LINE__, 0x0, "backlog", 7, 0x269205}, + {__LINE__, 0x0, "campfire", 8, 0x22a515f8}, + {__LINE__, 0x0, "delta", 5, 0x9643fed9}, + {__LINE__, 0x0, "executable", 10, 0xd68eda01}, + {__LINE__, 0x0, "file", 4, 0x8c9f3610}, + {__LINE__, 0x0, "greatest", 8, 0xc1abd6cd}, + {__LINE__, 0x0, "hello", 5, 0x3610a686}, + {__LINE__, 0x0, "inverter", 8, 0xc9e962c9}, + {__LINE__, 0x0, "jigsaw", 6, 0xce4e3f69}, + {__LINE__, 0x0, "karate", 6, 0x890be0e2}, + {__LINE__, 0x0, "landscape", 9, 0xc4e0330b}, + {__LINE__, 0x0, "machine", 7, 0x1505df84}, + {__LINE__, 0x0, "nanometer", 9, 0xd4e19f39}, + {__LINE__, 0x0, "oblivion", 8, 0xdae9de77}, + {__LINE__, 0x0, "panama", 6, 0x66b8979c}, + {__LINE__, 0x0, "quest", 5, 0x4317f817}, + {__LINE__, 0x0, "resource", 8, 0xbc91f416}, + {__LINE__, 0x0, "secret", 6, 0x5ca2e8e5}, + {__LINE__, 0x0, "test", 4, 0xd87f7e0c}, + {__LINE__, 0x0, "ultimate", 8, 0x3fc79b0b}, + {__LINE__, 0x0, "vector", 6, 0x1b6e485b}, + {__LINE__, 0x0, "walrus", 6, 0xbe769b97}, + {__LINE__, 0x0, "xeno", 4, 0xe7a06444}, + {__LINE__, 0x0, "yelling", 7, 0xfe3944e5}, + {__LINE__, 0x0, "zlib", 4, 0x73887d3a}, + {__LINE__, 0x0, "4BJD7PocN1VqX0jXVpWB", 20, 0xd487a5a1}, + {__LINE__, 0x0, "F1rPWI7XvDs6nAIRx41l", 20, 0x61a0132e}, + {__LINE__, 0x0, "ldhKlsVkPFOveXgkGtC2", 20, 0xdf02f76}, + {__LINE__, 0x0, "5KKnGOOrs8BvJ35iKTOS", 20, 0x579b2b0a}, + {__LINE__, 0x0, "0l1tw7GOcem06Ddu7yn4", 20, 0xf7d16e2d}, + {__LINE__, 0x0, "MCr47CjPIn9R1IvE1Tm5", 20, 0x731788f5}, + {__LINE__, 0x0, "UcixbzPKTIv0SvILHVdO", 20, 0x7112bb11}, + {__LINE__, 0x0, "dGnAyAhRQDsWw0ESou24", 20, 0xf32a0dac}, + {__LINE__, 0x0, "di0nvmY9UYMYDh0r45XT", 20, 0x625437bb}, + {__LINE__, 0x0, "2XKDwHfAhFsV0RhbqtvH", 20, 0x896930f9}, + {__LINE__, 0x0, "ZhrANFIiIvRnqClIVyeD", 20, 0x8579a37}, + {__LINE__, 0x0, "v7Q9ehzioTOVeDIZioT1", 20, 0x632aa8e0}, + {__LINE__, 0x0, "Yod5hEeKcYqyhfXbhxj2", 20, 0xc829af29}, + {__LINE__, 0x0, "GehSWY2ay4uUKhehXYb0", 20, 0x1b08b7e8}, + {__LINE__, 0x0, "kwytJmq6UqpflV8Y8GoE", 20, 0x4e33b192}, + {__LINE__, 0x0, "70684206568419061514", 20, 0x59a179f0}, + {__LINE__, 0x0, "42015093765128581010", 20, 0xcd1013d7}, + {__LINE__, 0x0, "88214814356148806939", 20, 0xab927546}, + {__LINE__, 0x0, "43472694284527343838", 20, 0x11f3b20c}, + {__LINE__, 0x0, "49769333513942933689", 20, 0xd562d4ca}, + {__LINE__, 0x0, "54979784887993251199", 20, 0x233395f7}, + {__LINE__, 0x0, "58360544869206793220", 20, 0x2d167fd5}, + {__LINE__, 0x0, "27347953487840714234", 20, 0x8b5108ba}, + {__LINE__, 0x0, "07650690295365319082", 20, 0xc46b3cd8}, + {__LINE__, 0x0, "42655507906821911703", 20, 0xc10b2662}, + {__LINE__, 0x0, "29977409200786225655", 20, 0xc9a0f9d2}, + {__LINE__, 0x0, "85181542907229116674", 20, 0x9341357b}, + {__LINE__, 0x0, "87963594337989416799", 20, 0xf0424937}, + {__LINE__, 0x0, "21395988329504168551", 20, 0xd7c4c31f}, + {__LINE__, 0x0, "51991013580943379423", 20, 0xf11edcc4}, + {__LINE__, 0x0, "*]+@!);({_$;}[_},?{?;(_?,=-][@", 30, 0x40795df4}, + {__LINE__, 0x0, "_@:_).&(#.[:[{[:)$++-($_;@[)}+", 30, 0xdd61a631}, + {__LINE__, 0x0, "&[!,[$_==}+.]@!;*(+},[;:)$;)-@", 30, 0xca907a99}, + {__LINE__, 0x0, "]{.[.+?+[[=;[?}_#&;[=)__$$:+=_", 30, 0xf652deac}, + {__LINE__, 0x0, "-%.)=/[@].:.(:,()$;=%@-$?]{%+%", 30, 0xaf39a5a9}, + {__LINE__, 0x0, "+]#$(@&.=:,*];/.!]%/{:){:@(;)$", 30, 0x6bebb4cf}, + {__LINE__, 0x0, ")-._.:?[&:.=+}(*$/=!.${;(=$@!}", 30, 0x76430bac}, + {__LINE__, 0x0, ":(_*&%/[[}+,?#$&*+#[([*-/#;%(]", 30, 0x6c80c388}, + {__LINE__, 0x0, "{[#-;:$/{)(+[}#]/{&!%(@)%:@-$:", 30, 0xd54d977d}, + {__LINE__, 0x0, "_{$*,}(&,@.)):=!/%(&(,,-?$}}}!", 30, 0xe3966ad5}, + {__LINE__, 0x0, "e$98KNzqaV)Y:2X?]77].{gKRD4G5{mHZk,Z)SpU%L3FSgv!Wb8MLAFdi{+fp)c,@8m6v)yXg@]HBDFk?.4&}g5_udE*JHCiH=aL", 100, 0xe7c71db9}, + {__LINE__, 0x0, "r*Fd}ef+5RJQ;+W=4jTR9)R*p!B;]Ed7tkrLi;88U7g@3v!5pk2X6D)vt,.@N8c]@yyEcKi[vwUu@.Ppm@C6%Mv*3Nw}Y,58_aH)", 100, 0xeaa52777}, + {__LINE__, 0x0, "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&", 100, 0xcd472048}, + {__LINE__, 0x7a30360d, "abacus", 6, 0xf8655a84}, + {__LINE__, 0x6fd767ee, "backlog", 7, 0x1ed834b1}, + {__LINE__, 0xefeb7589, "campfire", 8, 0x686cfca}, + {__LINE__, 0x61cf7e6b, "delta", 5, 0x1554e4b1}, + {__LINE__, 0xdc712e2, "executable", 10, 0x761b4254}, + {__LINE__, 0xad23c7fd, "file", 4, 0x7abdd09b}, + {__LINE__, 0x85cb2317, "greatest", 8, 0x4ba91c6b}, + {__LINE__, 0x9eed31b0, "inverter", 8, 0xd5e78ba5}, + {__LINE__, 0xb94f34ca, "jigsaw", 6, 0x23649109}, + {__LINE__, 0xab058a2, "karate", 6, 0xc5591f41}, + {__LINE__, 0x5bff2b7a, "landscape", 9, 0xf10eb644}, + {__LINE__, 0x605c9a5f, "machine", 7, 0xbaa0a636}, + {__LINE__, 0x51bdeea5, "nanometer", 9, 0x6af89afb}, + {__LINE__, 0x85c21c79, "oblivion", 8, 0xecae222b}, + {__LINE__, 0x97216f56, "panama", 6, 0x47dffac4}, + {__LINE__, 0x18444af2, "quest", 5, 0x70c2fe36}, + {__LINE__, 0xbe6ce359, "resource", 8, 0x1471d925}, + {__LINE__, 0x843071f1, "secret", 6, 0x50c9a0db}, + {__LINE__, 0xf2480c60, "ultimate", 8, 0xf973daf8}, + {__LINE__, 0x2d2feb3d, "vector", 6, 0x344ac03d}, + {__LINE__, 0x7490310a, "walrus", 6, 0x6d1408ef}, + {__LINE__, 0x97d247d4, "xeno", 4, 0xe62670b5}, + {__LINE__, 0x93cf7599, "yelling", 7, 0x1b36da38}, + {__LINE__, 0x73c84278, "zlib", 4, 0x6432d127}, + {__LINE__, 0x228a87d1, "4BJD7PocN1VqX0jXVpWB", 20, 0x997107d0}, + {__LINE__, 0xa7a048d0, "F1rPWI7XvDs6nAIRx41l", 20, 0xdc567274}, + {__LINE__, 0x1f0ded40, "ldhKlsVkPFOveXgkGtC2", 20, 0xdcc63870}, + {__LINE__, 0xa804a62f, "5KKnGOOrs8BvJ35iKTOS", 20, 0x6926cffd}, + {__LINE__, 0x508fae6a, "0l1tw7GOcem06Ddu7yn4", 20, 0xb52b38bc}, + {__LINE__, 0xe5adaf4f, "MCr47CjPIn9R1IvE1Tm5", 20, 0xf83b8178}, + {__LINE__, 0x67136a40, "UcixbzPKTIv0SvILHVdO", 20, 0xc5213070}, + {__LINE__, 0xb00c4a10, "dGnAyAhRQDsWw0ESou24", 20, 0xbc7648b0}, + {__LINE__, 0x2e0c84b5, "di0nvmY9UYMYDh0r45XT", 20, 0xd8123a72}, + {__LINE__, 0x81238d44, "2XKDwHfAhFsV0RhbqtvH", 20, 0xd5ac5620}, + {__LINE__, 0xf853aa92, "ZhrANFIiIvRnqClIVyeD", 20, 0xceae099d}, + {__LINE__, 0x5a692325, "v7Q9ehzioTOVeDIZioT1", 20, 0xb07d2b24}, + {__LINE__, 0x3275b9f, "Yod5hEeKcYqyhfXbhxj2", 20, 0x24ce91df}, + {__LINE__, 0x38371feb, "GehSWY2ay4uUKhehXYb0", 20, 0x707b3b30}, + {__LINE__, 0xafc8bf62, "kwytJmq6UqpflV8Y8GoE", 20, 0x16abc6a9}, + {__LINE__, 0x9b07db73, "70684206568419061514", 20, 0xae1fb7b7}, + {__LINE__, 0xe75b214, "42015093765128581010", 20, 0xd4eecd2d}, + {__LINE__, 0x72d0fe6f, "88214814356148806939", 20, 0x4660ec7}, + {__LINE__, 0xf857a4b1, "43472694284527343838", 20, 0xfd8afdf7}, + {__LINE__, 0x54b8e14, "49769333513942933689", 20, 0xc6d1b5f2}, + {__LINE__, 0xd6aa5616, "54979784887993251199", 20, 0x32476461}, + {__LINE__, 0x11e63098, "58360544869206793220", 20, 0xd917cf1a}, + {__LINE__, 0xbe92385, "27347953487840714234", 20, 0x4ad14a12}, + {__LINE__, 0x49511de0, "07650690295365319082", 20, 0xe37b5c6c}, + {__LINE__, 0x3db13bc1, "42655507906821911703", 20, 0x7cc497f1}, + {__LINE__, 0xbb899bea, "29977409200786225655", 20, 0x99781bb2}, + {__LINE__, 0xf6cd9436, "85181542907229116674", 20, 0x132256a1}, + {__LINE__, 0x9109e6c3, "87963594337989416799", 20, 0xbfdb2c83}, + {__LINE__, 0x75770fc, "21395988329504168551", 20, 0x8d9d1e81}, + {__LINE__, 0x69b1d19b, "51991013580943379423", 20, 0x7b6d4404}, + {__LINE__, 0xc6132975, "*]+@!);({_$;}[_},?{?;(_?,=-][@", 30, 0x8619f010}, + {__LINE__, 0xd58cb00c, "_@:_).&(#.[:[{[:)$++-($_;@[)}+", 30, 0x15746ac3}, + {__LINE__, 0xb63b8caa, "&[!,[$_==}+.]@!;*(+},[;:)$;)-@", 30, 0xaccf812f}, + {__LINE__, 0x8a45a2b8, "]{.[.+?+[[=;[?}_#&;[=)__$$:+=_", 30, 0x78af45de}, + {__LINE__, 0xcbe95b78, "-%.)=/[@].:.(:,()$;=%@-$?]{%+%", 30, 0x25b06b59}, + {__LINE__, 0x4ef8a54b, "+]#$(@&.=:,*];/.!]%/{:){:@(;)$", 30, 0x4ba0d08f}, + {__LINE__, 0x76ad267a, ")-._.:?[&:.=+}(*$/=!.${;(=$@!}", 30, 0xe26b6aac}, + {__LINE__, 0x569e613c, ":(_*&%/[[}+,?#$&*+#[([*-/#;%(]", 30, 0x7e2b0a66}, + {__LINE__, 0x36aa61da, "{[#-;:$/{)(+[}#]/{&!%(@)%:@-$:", 30, 0xb3430dc7}, + {__LINE__, 0xf67222df, "_{$*,}(&,@.)):=!/%(&(,,-?$}}}!", 30, 0x626c17a}, + {__LINE__, 0x74b34fd3, "e$98KNzqaV)Y:2X?]77].{gKRD4G5{mHZk,Z)SpU%L3FSgv!Wb8MLAFdi{+fp)c,@8m6v)yXg@]HBDFk?.4&}g5_udE*JHCiH=aL", 100, 0xccf98060}, + {__LINE__, 0x351fd770, "r*Fd}ef+5RJQ;+W=4jTR9)R*p!B;]Ed7tkrLi;88U7g@3v!5pk2X6D)vt,.@N8c]@yyEcKi[vwUu@.Ppm@C6%Mv*3Nw}Y,58_aH)", 100, 0xd8b95312}, + {__LINE__, 0xc45aef77, "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&", 100, 0xbb1c9912}, + {__LINE__, 0xc45aef77, "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&" + "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&" + "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&" + "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&" + "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&" + "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&", 600, 0x888AFA5B} +}; + +static const int test_size = sizeof(tests) / sizeof(tests[0]); + +int main(void) +{ + int i; + for (i = 0; i < test_size; i++) { + test_crc32(tests[i].crc, (Byte*) tests[i].buf, tests[i].len, + tests[i].expect, tests[i].line); + } + return 0; +} diff --git a/test/infcover.c b/test/infcover.c index 8912c403d..d1e8f5ebd 100644 --- a/test/infcover.c +++ b/test/infcover.c @@ -462,8 +462,7 @@ local unsigned pull(void *desc, unsigned char **buf) local int push(void *desc, unsigned char *buf, unsigned len) { - (void)buf; - (void)len; + buf += len; return desc != Z_NULL; /* force error if desc not null */ } diff --git a/test/minigzip.c b/test/minigzip.c index 8a21ddfb5..a9d6cbc34 100644 --- a/test/minigzip.c +++ b/test/minigzip.c @@ -132,7 +132,11 @@ static void pwinerror (s) #endif #define SUFFIX_LEN (sizeof(GZ_SUFFIX)-1) +#ifdef DFLTCC +#define BUFLEN 262144 +#else #define BUFLEN 16384 +#endif #define MAX_NAME_LEN 1024 #ifdef MAXSEG_64K diff --git a/trees.c b/trees.c index 8dbdc40ba..c2786d6cd 100644 --- a/trees.c +++ b/trees.c @@ -151,7 +151,7 @@ local TCONST static_tree_desc static_bl_desc = * method would use a table) * IN assertion: 1 <= len <= 15 */ -local unsigned bi_reverse(unsigned code, int len) { +unsigned ZLIB_INTERNAL bi_reverse(unsigned code, int len) { register unsigned res = 0; do { res |= code & 1; @@ -178,7 +178,7 @@ local void bi_flush(deflate_state *s) { /* =========================================================================== * Flush the bit buffer and align the output on a byte boundary */ -local void bi_windup(deflate_state *s) { +void ZLIB_INTERNAL bi_windup(deflate_state *s) { if (s->bi_valid > 8) { put_short(s, s->bi_buf); } else if (s->bi_valid > 0) { @@ -285,6 +285,10 @@ local void send_bits(deflate_state *s, int value, int length) { } #endif /* ZLIB_DEBUG */ +void ZLIB_INTERNAL _tr_send_bits(deflate_state *s, int value, int length) +{ + send_bits(s, value, length); +} /* the arguments must not have side effects */ diff --git a/zutil.h b/zutil.h index 902a304cc..573d954f0 100644 --- a/zutil.h +++ b/zutil.h @@ -87,6 +87,8 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ #define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ +#define ZLIB_WRAPLEN 6 /* zlib format overhead */ + /* target dependencies */ #if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32))