diff --git a/thirdparty/snappy/NEWS b/thirdparty/snappy/NEWS index db70b448..98048dbd 100644 --- a/thirdparty/snappy/NEWS +++ b/thirdparty/snappy/NEWS @@ -1,3 +1,11 @@ +Snappy v1.1.8, January 15th 2020: + + * Small performance improvements. + + * Removed snappy::string alias for std::string. + + * Improved CMake configuration. + Snappy v1.1.7, August 24th 2017: * Improved CMake build support for 64-bit Linux distributions. diff --git a/thirdparty/snappy/README.md b/thirdparty/snappy/README.md index b9db833d..cef40174 100644 --- a/thirdparty/snappy/README.md +++ b/thirdparty/snappy/README.md @@ -51,7 +51,7 @@ In particular: - Snappy uses 64-bit operations in several places to process more data at once than would otherwise be possible. - - Snappy assumes unaligned 32- and 64-bit loads and stores are cheap. + - Snappy assumes unaligned 32 and 64-bit loads and stores are cheap. On some platforms, these must be emulated with single-byte loads and stores, which is much slower. - Snappy assumes little-endian throughout, and needs to byte-swap data in @@ -65,32 +65,37 @@ are of course most welcome; see "Contact", below. Building ======== -CMake is supported and autotools will soon be deprecated. -You need CMake 3.4 or above to build: - - mkdir build - cd build && cmake ../ && make +You need the CMake version specified in [CMakeLists.txt](./CMakeLists.txt) +or later to build: +```bash +mkdir build +cd build && cmake ../ && make +``` Usage ===== Note that Snappy, both the implementation and the main interface, is written in C++. However, several third-party bindings to other languages -are available; see the home page at http://google.github.io/snappy/ -for more information. Also, if you want to use Snappy from C code, you can -use the included C bindings in snappy-c.h. +are available; see the [home page](docs/README.md) for more information. +Also, if you want to use Snappy from C code, you can use the included C +bindings in snappy-c.h. To use Snappy from your own C++ program, include the file "snappy.h" from your calling file, and link against the compiled library. There are many ways to call Snappy, but the simplest possible is - snappy::Compress(input.data(), input.size(), &output); +```c++ +snappy::Compress(input.data(), input.size(), &output); +``` and similarly - snappy::Uncompress(input.data(), input.size(), &output); +```c++ +snappy::Uncompress(input.data(), input.size(), &output); +``` where "input" and "output" are both instances of std::string. @@ -112,12 +117,12 @@ tests to verify you have not broken anything. Note that if you have the Google Test library installed, unit test behavior (especially failures) will be significantly more user-friendly. You can find Google Test at - http://github.com/google/googletest + https://github.com/google/googletest You probably also want the gflags library for handling of command-line flags; you can find it at - http://gflags.github.io/gflags/ + https://gflags.github.io/gflags/ In addition to the unit tests, snappy contains microbenchmarks used to tune compression and decompression performance. These are automatically run @@ -140,10 +145,4 @@ Contact ======= Snappy is distributed through GitHub. For the latest version, a bug tracker, -and other information, see - - http://google.github.io/snappy/ - -or the repository at - - https://github.com/google/snappy +and other information, see https://github.com/google/snappy. diff --git a/thirdparty/snappy/config/config.h b/thirdparty/snappy/config/config.h index ebe2a246..d7f5a92b 100644 --- a/thirdparty/snappy/config/config.h +++ b/thirdparty/snappy/config/config.h @@ -37,13 +37,6 @@ /* Define to 1 if you have the `z' library (-lz). */ /* #undef HAVE_LIBZ */ -/* Define to 1 if you have the header file. */ -#define HAVE_STDDEF_H 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ /* Define to 1 if you have the header file. */ /* #undef HAVE_SYS_ENDIAN_H */ @@ -69,6 +62,12 @@ # define HAVE_WINDOWS_H 1 #endif +/* Define to 1 if you target processors with SSSE3+ and have . */ +/* #undef SNAPPY_HAVE_SSSE3 */ + +/* Define to 1 if you target processors with BMI2+ and have . */ +/* #undef SNAPPY_HAVE_BMI2 */ + /* Define to 1 if your processor stores words with the most significant byte first (like Motorola and SPARC, unlike Intel and VAX). */ #if defined(__GLIBC__) diff --git a/thirdparty/snappy/snappy-internal.h b/thirdparty/snappy/snappy-internal.h index 4b53d596..1e1c307f 100644 --- a/thirdparty/snappy/snappy-internal.h +++ b/thirdparty/snappy/snappy-internal.h @@ -36,19 +36,26 @@ namespace snappy { namespace internal { +// Working memory performs a single allocation to hold all scratch space +// required for compression. class WorkingMemory { public: - WorkingMemory() : large_table_(NULL) { } - ~WorkingMemory() { delete[] large_table_; } + explicit WorkingMemory(size_t input_size); + ~WorkingMemory(); // Allocates and clears a hash table using memory in "*this", // stores the number of buckets in "*table_size" and returns a pointer to // the base of the hash table. - uint16* GetHashTable(size_t input_size, int* table_size); + uint16* GetHashTable(size_t fragment_size, int* table_size) const; + char* GetScratchInput() const { return input_; } + char* GetScratchOutput() const { return output_; } private: - uint16 small_table_[1<<10]; // 2KB - uint16* large_table_; // Allocated only when needed + char* mem_; // the allocated memory, never nullptr + size_t size_; // the size of the allocated memory, never 0 + uint16* table_; // the pointer to the hashtable + char* input_; // the pointer to the input scratch buffer + char* output_; // the pointer to the output scratch buffer // No copying WorkingMemory(const WorkingMemory&); diff --git a/thirdparty/snappy/snappy-stubs-internal.cc b/thirdparty/snappy/snappy-stubs-internal.cc index 6ed33437..66ed2e90 100644 --- a/thirdparty/snappy/snappy-stubs-internal.cc +++ b/thirdparty/snappy/snappy-stubs-internal.cc @@ -33,7 +33,7 @@ namespace snappy { -void Varint::Append32(string* s, uint32 value) { +void Varint::Append32(std::string* s, uint32 value) { char buf[Varint::kMax32]; const char* p = Varint::Encode32(buf, value); s->append(buf, p - buf); diff --git a/thirdparty/snappy/snappy-stubs-internal.h b/thirdparty/snappy/snappy-stubs-internal.h index cb605f82..4854689d 100644 --- a/thirdparty/snappy/snappy-stubs-internal.h +++ b/thirdparty/snappy/snappy-stubs-internal.h @@ -53,6 +53,18 @@ #include #endif // defined(_MSC_VER) +#ifndef __has_feature +#define __has_feature(x) 0 +#endif + +#if __has_feature(memory_sanitizer) +#include +#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \ + __msan_unpoison((address), (size)) +#else +#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) /* empty */ +#endif // __has_feature(memory_sanitizer) + #include "snappy-stubs-public.h" #if defined(__x86_64__) @@ -187,7 +199,7 @@ struct Unaligned32Struct { ((reinterpret_cast< ::snappy::base::internal::Unaligned32Struct *>(_p))->value = \ (_val)) -// TODO(user): NEON supports unaligned 64-bit loads and stores. +// TODO: NEON supports unaligned 64-bit loads and stores. // See if that would be more efficient on platforms supporting it, // at least for copies. @@ -353,6 +365,9 @@ class LittleEndian { // Some bit-manipulation functions. class Bits { public: + // Return floor(log2(n)) for positive integer n. + static int Log2FloorNonZero(uint32 n); + // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. static int Log2Floor(uint32 n); @@ -373,50 +388,72 @@ class Bits { #ifdef HAVE_BUILTIN_CTZ +inline int Bits::Log2FloorNonZero(uint32 n) { + assert(n != 0); + // (31 ^ x) is equivalent to (31 - x) for x in [0, 31]. An easy proof + // represents subtraction in base 2 and observes that there's no carry. + // + // GCC and Clang represent __builtin_clz on x86 as 31 ^ _bit_scan_reverse(x). + // Using "31 ^" here instead of "31 -" allows the optimizer to strip the + // function body down to _bit_scan_reverse(x). + return 31 ^ __builtin_clz(n); +} + inline int Bits::Log2Floor(uint32 n) { - return n == 0 ? -1 : 31 ^ __builtin_clz(n); + return (n == 0) ? -1 : Bits::Log2FloorNonZero(n); } inline int Bits::FindLSBSetNonZero(uint32 n) { + assert(n != 0); return __builtin_ctz(n); } #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) inline int Bits::FindLSBSetNonZero64(uint64 n) { + assert(n != 0); return __builtin_ctzll(n); } #endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) #elif defined(_MSC_VER) +inline int Bits::Log2FloorNonZero(uint32 n) { + assert(n != 0); + unsigned long where; + _BitScanReverse(&where, n); + return static_cast(where); +} + inline int Bits::Log2Floor(uint32 n) { unsigned long where; - if (_BitScanReverse(&where, n)) { - return where; - } else { - return -1; - } + if (_BitScanReverse(&where, n)) + return static_cast(where); + return -1; } inline int Bits::FindLSBSetNonZero(uint32 n) { + assert(n != 0); unsigned long where; - if (_BitScanForward(&where, n)) return static_cast(where); + if (_BitScanForward(&where, n)) + return static_cast(where); return 32; } #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) inline int Bits::FindLSBSetNonZero64(uint64 n) { + assert(n != 0); unsigned long where; - if (_BitScanForward64(&where, n)) return static_cast(where); + if (_BitScanForward64(&where, n)) + return static_cast(where); return 64; } #endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) #else // Portable versions. -inline int Bits::Log2Floor(uint32 n) { - if (n == 0) - return -1; +inline int Bits::Log2FloorNonZero(uint32 n) { + assert(n != 0); + int log = 0; uint32 value = n; for (int i = 4; i >= 0; --i) { @@ -431,7 +468,13 @@ inline int Bits::Log2Floor(uint32 n) { return log; } +inline int Bits::Log2Floor(uint32 n) { + return (n == 0) ? -1 : Bits::Log2FloorNonZero(n); +} + inline int Bits::FindLSBSetNonZero(uint32 n) { + assert(n != 0); + int rc = 31; for (int i = 4, shift = 1 << 4; i >= 0; --i) { const uint32 x = n << shift; @@ -447,6 +490,8 @@ inline int Bits::FindLSBSetNonZero(uint32 n) { #if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) // FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero(). inline int Bits::FindLSBSetNonZero64(uint64 n) { + assert(n != 0); + const uint32 bottombits = static_cast(n); if (bottombits == 0) { // Bottom bits are zero, so scan in top bits @@ -479,7 +524,7 @@ class Varint { static char* Encode32(char* ptr, uint32 v); // EFFECTS Appends the varint representation of "value" to "*s". - static void Append32(string* s, uint32 value); + static void Append32(std::string* s, uint32 value); }; inline const char* Varint::Parse32WithLimit(const char* p, @@ -536,7 +581,7 @@ inline char* Varint::Encode32(char* sptr, uint32 v) { // replace this function with one that resizes the string without // filling the new space with zeros (if applicable) -- // it will be non-portable but faster. -inline void STLStringResizeUninitialized(string* s, size_t new_size) { +inline void STLStringResizeUninitialized(std::string* s, size_t new_size) { s->resize(new_size); } @@ -552,7 +597,7 @@ inline void STLStringResizeUninitialized(string* s, size_t new_size) { // (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530) // proposes this as the method. It will officially be part of the standard // for C++0x. This should already work on all current implementations. -inline char* string_as_array(string* str) { +inline char* string_as_array(std::string* str) { return str->empty() ? NULL : &*str->begin(); } diff --git a/thirdparty/snappy/snappy-stubs-public.h.in b/thirdparty/snappy/snappy-stubs-public.h.in index 96989ac3..416ab997 100644 --- a/thirdparty/snappy/snappy-stubs-public.h.in +++ b/thirdparty/snappy/snappy-stubs-public.h.in @@ -1,5 +1,4 @@ // Copyright 2011 Google Inc. All Rights Reserved. -// Author: sesse@google.com (Steinar H. Gunderson) // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are @@ -36,64 +35,39 @@ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ -#if @ac_cv_have_stdint_h@ -#include -#endif - -#if @ac_cv_have_stddef_h@ -#include -#endif +#include +#include +#include -#if @ac_cv_have_sys_uio_h@ +#if ${HAVE_SYS_UIO_H_01} // HAVE_SYS_UIO_H #include -#endif +#endif // HAVE_SYS_UIO_H -#define SNAPPY_MAJOR @SNAPPY_MAJOR@ -#define SNAPPY_MINOR @SNAPPY_MINOR@ -#define SNAPPY_PATCHLEVEL @SNAPPY_PATCHLEVEL@ +#define SNAPPY_MAJOR ${PROJECT_VERSION_MAJOR} +#define SNAPPY_MINOR ${PROJECT_VERSION_MINOR} +#define SNAPPY_PATCHLEVEL ${PROJECT_VERSION_PATCH} #define SNAPPY_VERSION \ ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) -#include - namespace snappy { -#if @ac_cv_have_stdint_h@ -typedef int8_t int8; -typedef uint8_t uint8; -typedef int16_t int16; -typedef uint16_t uint16; -typedef int32_t int32; -typedef uint32_t uint32; -typedef int64_t int64; -typedef uint64_t uint64; -#else -typedef signed char int8; -typedef unsigned char uint8; -typedef short int16; -typedef unsigned short uint16; -typedef int int32; -typedef unsigned int uint32; -typedef long long int64; -typedef unsigned long long uint64; -#endif - -typedef std::string string; - -#ifndef DISALLOW_COPY_AND_ASSIGN -#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName&); \ - void operator=(const TypeName&) -#endif +using int8 = std::int8_t; +using uint8 = std::uint8_t; +using int16 = std::int16_t; +using uint16 = std::uint16_t; +using int32 = std::int32_t; +using uint32 = std::uint32_t; +using int64 = std::int64_t; +using uint64 = std::uint64_t; -#if !@ac_cv_have_sys_uio_h@ +#if !${HAVE_SYS_UIO_H_01} // !HAVE_SYS_UIO_H // Windows does not have an iovec type, yet the concept is universally useful. // It is simple to define it ourselves, so we put it inside our own namespace. struct iovec { - void* iov_base; - size_t iov_len; + void* iov_base; + size_t iov_len; }; -#endif +#endif // !HAVE_SYS_UIO_H } // namespace snappy diff --git a/thirdparty/snappy/snappy-test.cc b/thirdparty/snappy/snappy-test.cc index 01d5541f..83be2d36 100644 --- a/thirdparty/snappy/snappy-test.cc +++ b/thirdparty/snappy/snappy-test.cc @@ -48,12 +48,12 @@ DEFINE_bool(run_microbenchmarks, true, namespace snappy { -string ReadTestDataFile(const string& base, size_t size_limit) { - string contents; +std::string ReadTestDataFile(const std::string& base, size_t size_limit) { + std::string contents; const char* srcdir = getenv("srcdir"); // This is set by Automake. - string prefix; + std::string prefix; if (srcdir) { - prefix = string(srcdir) + "/"; + prefix = std::string(srcdir) + "/"; } file::GetContents(prefix + "testdata/" + base, &contents, file::Defaults() ).CheckSuccess(); @@ -63,11 +63,11 @@ string ReadTestDataFile(const string& base, size_t size_limit) { return contents; } -string ReadTestDataFile(const string& base) { +std::string ReadTestDataFile(const std::string& base) { return ReadTestDataFile(base, 0); } -string StringPrintf(const char* format, ...) { +std::string StrFormat(const char* format, ...) { char buf[4096]; va_list ap; va_start(ap, format); @@ -79,7 +79,7 @@ string StringPrintf(const char* format, ...) { bool benchmark_running = false; int64 benchmark_real_time_us = 0; int64 benchmark_cpu_time_us = 0; -string *benchmark_label = NULL; +std::string* benchmark_label = nullptr; int64 benchmark_bytes_processed = 0; void ResetBenchmarkTiming() { @@ -163,11 +163,11 @@ void StopBenchmarkTiming() { benchmark_running = false; } -void SetBenchmarkLabel(const string& str) { +void SetBenchmarkLabel(const std::string& str) { if (benchmark_label) { delete benchmark_label; } - benchmark_label = new string(str); + benchmark_label = new std::string(str); } void SetBenchmarkBytesProcessed(int64 bytes) { @@ -217,8 +217,8 @@ void Benchmark::Run() { benchmark_runs[run].cpu_time_us = benchmark_cpu_time_us; } - string heading = StringPrintf("%s/%d", name_.c_str(), test_case_num); - string human_readable_speed; + std::string heading = StrFormat("%s/%d", name_.c_str(), test_case_num); + std::string human_readable_speed; std::nth_element(benchmark_runs, benchmark_runs + kMedianPos, @@ -232,15 +232,16 @@ void Benchmark::Run() { int64 bytes_per_second = benchmark_bytes_processed * 1000000 / cpu_time_us; if (bytes_per_second < 1024) { - human_readable_speed = StringPrintf("%dB/s", bytes_per_second); + human_readable_speed = + StrFormat("%dB/s", static_cast(bytes_per_second)); } else if (bytes_per_second < 1024 * 1024) { - human_readable_speed = StringPrintf( + human_readable_speed = StrFormat( "%.1fkB/s", bytes_per_second / 1024.0f); } else if (bytes_per_second < 1024 * 1024 * 1024) { - human_readable_speed = StringPrintf( + human_readable_speed = StrFormat( "%.1fMB/s", bytes_per_second / (1024.0f * 1024.0f)); } else { - human_readable_speed = StringPrintf( + human_readable_speed = StrFormat( "%.1fGB/s", bytes_per_second / (1024.0f * 1024.0f * 1024.0f)); } } diff --git a/thirdparty/snappy/snappy-test.h b/thirdparty/snappy/snappy-test.h index 078f3211..c8b7d388 100644 --- a/thirdparty/snappy/snappy-test.h +++ b/thirdparty/snappy/snappy-test.h @@ -55,8 +55,6 @@ #include #endif -#include - #ifdef HAVE_GTEST #include @@ -169,7 +167,7 @@ namespace file { namespace snappy { #define FLAGS_test_random_seed 301 -typedef string TypeParam; +using TypeParam = std::string; void Test_CorruptedTest_VerifyCorrupted(); void Test_Snappy_SimpleTests(); @@ -183,63 +181,13 @@ void Test_Snappy_ReadPastEndOfBuffer(); void Test_Snappy_FindMatchLength(); void Test_Snappy_FindMatchLengthRandom(); -string ReadTestDataFile(const string& base, size_t size_limit); +std::string ReadTestDataFile(const std::string& base, size_t size_limit); -string ReadTestDataFile(const string& base); +std::string ReadTestDataFile(const std::string& base); // A sprintf() variant that returns a std::string. // Not safe for general use due to truncation issues. -string StringPrintf(const char* format, ...); - -// A simple, non-cryptographically-secure random generator. -class ACMRandom { - public: - explicit ACMRandom(uint32 seed) : seed_(seed) {} - - int32 Next(); - - int32 Uniform(int32 n) { - return Next() % n; - } - uint8 Rand8() { - return static_cast((Next() >> 1) & 0x000000ff); - } - bool OneIn(int X) { return Uniform(X) == 0; } - - // Skewed: pick "base" uniformly from range [0,max_log] and then - // return "base" random bits. The effect is to pick a number in the - // range [0,2^max_log-1] with bias towards smaller numbers. - int32 Skewed(int max_log); - - private: - static const uint32 M = 2147483647L; // 2^31-1 - uint32 seed_; -}; - -inline int32 ACMRandom::Next() { - static const uint64 A = 16807; // bits 14, 8, 7, 5, 2, 1, 0 - // We are computing - // seed_ = (seed_ * A) % M, where M = 2^31-1 - // - // seed_ must not be zero or M, or else all subsequent computed values - // will be zero or M respectively. For all other values, seed_ will end - // up cycling through every number in [1,M-1] - uint64 product = seed_ * A; - - // Compute (product % M) using the fact that ((x << 31) % M) == x. - seed_ = (product >> 31) + (product & M); - // The first reduction may overflow by 1 bit, so we may need to repeat. - // mod == M is not possible; using > allows the faster sign-bit-based test. - if (seed_ > M) { - seed_ -= M; - } - return seed_; -} - -inline int32 ACMRandom::Skewed(int max_log) { - const int32 base = (Next() - 1) % (max_log+1); - return (Next() - 1) & ((1u << base)-1); -} +std::string StrFormat(const char* format, ...); // A wall-time clock. This stub is not super-accurate, nor resistant to the // system time changing. @@ -293,8 +241,8 @@ typedef void (*BenchmarkFunction)(int, int); class Benchmark { public: - Benchmark(const string& name, BenchmarkFunction function) : - name_(name), function_(function) {} + Benchmark(const std::string& name, BenchmarkFunction function) + : name_(name), function_(function) {} Benchmark* DenseRange(int start, int stop) { start_ = start; @@ -305,7 +253,7 @@ class Benchmark { void Run(); private: - const string name_; + const std::string name_; const BenchmarkFunction function_; int start_, stop_; }; @@ -317,11 +265,13 @@ extern Benchmark* Benchmark_BM_UFlat; extern Benchmark* Benchmark_BM_UIOVec; extern Benchmark* Benchmark_BM_UValidate; extern Benchmark* Benchmark_BM_ZFlat; +extern Benchmark* Benchmark_BM_ZFlatAll; +extern Benchmark* Benchmark_BM_ZFlatIncreasingTableSize; void ResetBenchmarkTiming(); void StartBenchmarkTiming(); void StopBenchmarkTiming(); -void SetBenchmarkLabel(const string& str); +void SetBenchmarkLabel(const std::string& str); void SetBenchmarkBytesProcessed(int64 bytes); #ifdef HAVE_LIBZ @@ -468,6 +418,8 @@ static inline void RunSpecifiedBenchmarks() { snappy::Benchmark_BM_UIOVec->Run(); snappy::Benchmark_BM_UValidate->Run(); snappy::Benchmark_BM_ZFlat->Run(); + snappy::Benchmark_BM_ZFlatAll->Run(); + snappy::Benchmark_BM_ZFlatIncreasingTableSize->Run(); fprintf(stderr, "\n"); } diff --git a/thirdparty/snappy/snappy.cc b/thirdparty/snappy/snappy.cc index fd519e5a..ce1eef46 100644 --- a/thirdparty/snappy/snappy.cc +++ b/thirdparty/snappy/snappy.cc @@ -30,25 +30,50 @@ #include "snappy-internal.h" #include "snappy-sinksource.h" -#ifndef SNAPPY_HAVE_SSE2 -#if defined(__SSE2__) || defined(_M_X64) || \ - (defined(_M_IX86_FP) && _M_IX86_FP >= 2) -#define SNAPPY_HAVE_SSE2 1 +#if !defined(SNAPPY_HAVE_SSSE3) +// __SSSE3__ is defined by GCC and Clang. Visual Studio doesn't target SIMD +// support between SSE2 and AVX (so SSSE3 instructions require AVX support), and +// defines __AVX__ when AVX support is available. +#if defined(__SSSE3__) || defined(__AVX__) +#define SNAPPY_HAVE_SSSE3 1 #else -#define SNAPPY_HAVE_SSE2 0 +#define SNAPPY_HAVE_SSSE3 0 #endif +#endif // !defined(SNAPPY_HAVE_SSSE3) + +#if !defined(SNAPPY_HAVE_BMI2) +// __BMI2__ is defined by GCC and Clang. Visual Studio doesn't target BMI2 +// specifically, but it does define __AVX2__ when AVX2 support is available. +// Fortunately, AVX2 was introduced in Haswell, just like BMI2. +// +// BMI2 is not defined as a subset of AVX2 (unlike SSSE3 and AVX above). So, +// GCC and Clang can build code with AVX2 enabled but BMI2 disabled, in which +// case issuing BMI2 instructions results in a compiler error. +#if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__)) +#define SNAPPY_HAVE_BMI2 1 +#else +#define SNAPPY_HAVE_BMI2 0 +#endif +#endif // !defined(SNAPPY_HAVE_BMI2) + +#if SNAPPY_HAVE_SSSE3 +// Please do not replace with . or with headers that assume more +// advanced SSE versions without checking with all the OWNERS. +#include #endif -#if SNAPPY_HAVE_SSE2 -#include +#if SNAPPY_HAVE_BMI2 +// Please do not replace with . or with headers that assume more +// advanced SSE versions without checking with all the OWNERS. +#include #endif + #include #include #include #include - namespace snappy { using internal::COPY_1_BYTE_OFFSET; @@ -103,16 +128,12 @@ void UnalignedCopy64(const void* src, void* dst) { } void UnalignedCopy128(const void* src, void* dst) { - // TODO(alkis): Remove this when we upgrade to a recent compiler that emits - // SSE2 moves for memcpy(dst, src, 16). -#if SNAPPY_HAVE_SSE2 - __m128i x = _mm_loadu_si128(static_cast(src)); - _mm_storeu_si128(static_cast<__m128i*>(dst), x); -#else + // memcpy gets vectorized when the appropriate compiler options are used. + // For example, x86 compilers targeting SSE2+ will optimize to an SSE2 load + // and store. char tmp[16]; memcpy(tmp, src, 16); memcpy(dst, tmp, 16); -#endif } // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) a byte at a time. Used @@ -127,12 +148,35 @@ void UnalignedCopy128(const void* src, void* dst) { // Note that this does not match the semantics of either memcpy() or memmove(). inline char* IncrementalCopySlow(const char* src, char* op, char* const op_limit) { + // TODO: Remove pragma when LLVM is aware this + // function is only called in cold regions and when cold regions don't get + // vectorized or unrolled. +#ifdef __clang__ +#pragma clang loop unroll(disable) +#endif while (op < op_limit) { *op++ = *src++; } return op_limit; } +#if SNAPPY_HAVE_SSSE3 + +// This is a table of shuffle control masks that can be used as the source +// operand for PSHUFB to permute the contents of the destination XMM register +// into a repeating byte pattern. +alignas(16) const char pshufb_fill_patterns[7][16] = { + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}, + {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0}, + {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}, + {0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0}, + {0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3}, + {0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1}, +}; + +#endif // SNAPPY_HAVE_SSSE3 + // Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) but faster than // IncrementalCopySlow. buf_limit is the address past the end of the writable // region of the buffer. @@ -144,9 +188,10 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, // pat = op - src // len = limit - op assert(src < op); + assert(op <= op_limit); assert(op_limit <= buf_limit); // NOTE: The compressor always emits 4 <= len <= 64. It is ok to assume that - // to optimize this function but we have to also handle these cases in case + // to optimize this function but we have to also handle other cases in case // the input does not satisfy these conditions. size_t pattern_size = op - src; @@ -176,16 +221,45 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, // Handle the uncommon case where pattern is less than 8 bytes. if (SNAPPY_PREDICT_FALSE(pattern_size < 8)) { - // Expand pattern to at least 8 bytes. The worse case scenario in terms of - // buffer usage is when the pattern is size 3. ^ is the original position - // of op. x are irrelevant bytes copied by the last UnalignedCopy64. +#if SNAPPY_HAVE_SSSE3 + // Load the first eight bytes into an 128-bit XMM register, then use PSHUFB + // to permute the register's contents in-place into a repeating sequence of + // the first "pattern_size" bytes. + // For example, suppose: + // src == "abc" + // op == op + 3 + // After _mm_shuffle_epi8(), "pattern" will have five copies of "abc" + // followed by one byte of slop: abcabcabcabcabca. // - // abc - // abcabcxxxxx - // abcabcabcabcxxxxx - // ^ - // The last x is 14 bytes after ^. - if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 14)) { + // The non-SSE fallback implementation suffers from store-forwarding stalls + // because its loads and stores partly overlap. By expanding the pattern + // in-place, we avoid the penalty. + if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 16)) { + const __m128i shuffle_mask = _mm_load_si128( + reinterpret_cast(pshufb_fill_patterns) + + pattern_size - 1); + const __m128i pattern = _mm_shuffle_epi8( + _mm_loadl_epi64(reinterpret_cast(src)), shuffle_mask); + // Uninitialized bytes are masked out by the shuffle mask. + // TODO: remove annotation and macro defs once MSan is fixed. + SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(&pattern, sizeof(pattern)); + pattern_size *= 16 / pattern_size; + char* op_end = std::min(op_limit, buf_limit - 15); + while (op < op_end) { + _mm_storeu_si128(reinterpret_cast<__m128i*>(op), pattern); + op += pattern_size; + } + if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit; + } + return IncrementalCopySlow(src, op, op_limit); +#else // !SNAPPY_HAVE_SSSE3 + // If plenty of buffer space remains, expand the pattern to at least 8 + // bytes. The way the following loop is written, we need 8 bytes of buffer + // space if pattern_size >= 4, 11 bytes if pattern_size is 1 or 3, and 10 + // bytes if pattern_size is 2. Precisely encoding that is probably not + // worthwhile; instead, invoke the slow path if we cannot write 11 bytes + // (because 11 are required in the worst case). + if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 11)) { while (pattern_size < 8) { UnalignedCopy64(src, op); op += pattern_size; @@ -195,6 +269,7 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, } else { return IncrementalCopySlow(src, op, op_limit); } +#endif // SNAPPY_HAVE_SSSE3 } assert(pattern_size >= 8); @@ -202,13 +277,48 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, // UnalignedCopy128 might overwrite data in op. UnalignedCopy64 is safe // because expanding the pattern to at least 8 bytes guarantees that // op - src >= 8. - while (op <= buf_limit - 16) { + // + // Typically, the op_limit is the gating factor so try to simplify the loop + // based on that. + if (SNAPPY_PREDICT_TRUE(op_limit <= buf_limit - 16)) { + // There is at least one, and at most four 16-byte blocks. Writing four + // conditionals instead of a loop allows FDO to layout the code with respect + // to the actual probabilities of each length. + // TODO: Replace with loop with trip count hint. + UnalignedCopy64(src, op); + UnalignedCopy64(src + 8, op + 8); + + if (op + 16 < op_limit) { + UnalignedCopy64(src + 16, op + 16); + UnalignedCopy64(src + 24, op + 24); + } + if (op + 32 < op_limit) { + UnalignedCopy64(src + 32, op + 32); + UnalignedCopy64(src + 40, op + 40); + } + if (op + 48 < op_limit) { + UnalignedCopy64(src + 48, op + 48); + UnalignedCopy64(src + 56, op + 56); + } + return op_limit; + } + + // Fall back to doing as much as we can with the available slop in the + // buffer. This code path is relatively cold however so we save code size by + // avoiding unrolling and vectorizing. + // + // TODO: Remove pragma when when cold regions don't get vectorized + // or unrolled. +#ifdef __clang__ +#pragma clang loop unroll(disable) +#endif + for (char *op_end = buf_limit - 16; op < op_end; op += 16, src += 16) { UnalignedCopy64(src, op); UnalignedCopy64(src + 8, op + 8); - src += 16; - op += 16; - if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit; } + if (op >= op_limit) + return op_limit; + // We only take this branch if we didn't have enough slop and we can do a // single 8 byte copy. if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) { @@ -221,10 +331,10 @@ inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, } // namespace +template static inline char* EmitLiteral(char* op, const char* literal, - int len, - bool allow_fast_path) { + int len) { // The vast majority of copies are below 16 bytes, for which a // call to memcpy is overkill. This fast path can sometimes // copy up to 15 bytes too much, but that is okay in the @@ -249,25 +359,23 @@ static inline char* EmitLiteral(char* op, // Fits in tag byte *op++ = LITERAL | (n << 2); } else { - // Encode in upcoming bytes - char* base = op; - int count = 0; - op++; - while (n > 0) { - *op++ = n & 0xff; - n >>= 8; - count++; - } + int count = (Bits::Log2Floor(n) >> 3) + 1; assert(count >= 1); assert(count <= 4); - *base = LITERAL | ((59+count) << 2); + *op++ = LITERAL | ((59 + count) << 2); + // Encode in upcoming bytes. + // Write 4 bytes, though we may care about only 1 of them. The output buffer + // is guaranteed to have at least 3 more spaces left as 'len >= 61' holds + // here and there is a memcpy of size 'len' below. + LittleEndian::Store32(op, n); + op += count; } memcpy(op, literal, len); return op + len; } -static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len, - bool len_less_than_12) { +template +static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len) { assert(len <= 64); assert(len >= 4); assert(offset < 65536); @@ -288,29 +396,33 @@ static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len, return op; } -static inline char* EmitCopy(char* op, size_t offset, size_t len, - bool len_less_than_12) { +template +static inline char* EmitCopy(char* op, size_t offset, size_t len) { assert(len_less_than_12 == (len < 12)); if (len_less_than_12) { - return EmitCopyAtMost64(op, offset, len, true); + return EmitCopyAtMost64(op, offset, len); } else { // A special case for len <= 64 might help, but so far measurements suggest // it's in the noise. // Emit 64 byte copies but make sure to keep at least four bytes reserved. while (SNAPPY_PREDICT_FALSE(len >= 68)) { - op = EmitCopyAtMost64(op, offset, 64, false); + op = EmitCopyAtMost64(op, offset, 64); len -= 64; } // One or two copies will now finish the job. if (len > 64) { - op = EmitCopyAtMost64(op, offset, 60, false); + op = EmitCopyAtMost64(op, offset, 60); len -= 60; } // Emit remainder. - op = EmitCopyAtMost64(op, offset, len, len < 12); + if (len < 12) { + op = EmitCopyAtMost64(op, offset, len); + } else { + op = EmitCopyAtMost64(op, offset, len); + } return op; } } @@ -326,31 +438,45 @@ bool GetUncompressedLength(const char* start, size_t n, size_t* result) { } } -namespace internal { -uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) { - // Use smaller hash table when input.size() is smaller, since we - // fill the table, incurring O(hash table size) overhead for - // compression, and if the input is short, we won't need that - // many hash table entries anyway. - assert(kMaxHashTableSize >= 256); - size_t htsize = 256; - while (htsize < kMaxHashTableSize && htsize < input_size) { - htsize <<= 1; +namespace { +uint32 CalculateTableSize(uint32 input_size) { + static_assert( + kMaxHashTableSize >= kMinHashTableSize, + "kMaxHashTableSize should be greater or equal to kMinHashTableSize."); + if (input_size > kMaxHashTableSize) { + return kMaxHashTableSize; } - - uint16* table; - if (htsize <= ARRAYSIZE(small_table_)) { - table = small_table_; - } else { - if (large_table_ == NULL) { - large_table_ = new uint16[kMaxHashTableSize]; - } - table = large_table_; + if (input_size < kMinHashTableSize) { + return kMinHashTableSize; } + // This is equivalent to Log2Ceiling(input_size), assuming input_size > 1. + // 2 << Log2Floor(x - 1) is equivalent to 1 << (1 + Log2Floor(x - 1)). + return 2u << Bits::Log2Floor(input_size - 1); +} +} // namespace +namespace internal { +WorkingMemory::WorkingMemory(size_t input_size) { + const size_t max_fragment_size = std::min(input_size, kBlockSize); + const size_t table_size = CalculateTableSize(max_fragment_size); + size_ = table_size * sizeof(*table_) + max_fragment_size + + MaxCompressedLength(max_fragment_size); + mem_ = std::allocator().allocate(size_); + table_ = reinterpret_cast(mem_); + input_ = mem_ + table_size * sizeof(*table_); + output_ = input_ + max_fragment_size; +} + +WorkingMemory::~WorkingMemory() { + std::allocator().deallocate(mem_, size_); +} + +uint16* WorkingMemory::GetHashTable(size_t fragment_size, + int* table_size) const { + const size_t htsize = CalculateTableSize(fragment_size); + memset(table_, 0, htsize * sizeof(*table_)); *table_size = htsize; - memset(table, 0, htsize * sizeof(*table)); - return table; + return table_; } } // end namespace internal @@ -417,7 +543,7 @@ char* CompressFragment(const char* input, // "ip" is the input pointer, and "op" is the output pointer. const char* ip = input; assert(input_size <= kBlockSize); - assert((table_size & (table_size - 1)) == 0); // table must be power of two + assert((table_size & (table_size - 1)) == 0); // table must be power of two const int shift = 32 - Bits::Log2Floor(table_size); assert(static_cast(kuint32max >> shift) == table_size - 1); const char* ip_end = input + input_size; @@ -484,7 +610,7 @@ char* CompressFragment(const char* input, // than 4 bytes match. But, prior to the match, input // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." assert(next_emit + 16 <= ip_end); - op = EmitLiteral(op, next_emit, ip - next_emit, true); + op = EmitLiteral(op, next_emit, ip - next_emit); // Step 3: Call EmitCopy, and then see if another EmitCopy could // be our next move. Repeat until we find no match for the @@ -507,7 +633,11 @@ char* CompressFragment(const char* input, ip += matched; size_t offset = base - candidate; assert(0 == memcmp(base, candidate, matched)); - op = EmitCopy(op, offset, matched, p.second); + if (p.second) { + op = EmitCopy(op, offset, matched); + } else { + op = EmitCopy(op, offset, matched); + } next_emit = ip; if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) { goto emit_remainder; @@ -532,7 +662,8 @@ char* CompressFragment(const char* input, emit_remainder: // Emit the remaining bytes as a literal if (next_emit < ip_end) { - op = EmitLiteral(op, next_emit, ip_end - next_emit, false); + op = EmitLiteral(op, next_emit, + ip_end - next_emit); } return op; @@ -583,14 +714,28 @@ static inline void Report(const char *algorithm, size_t compressed_size, // bool TryFastAppend(const char* ip, size_t available, size_t length); // }; -namespace internal { - -// Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits -static const uint32 wordmask[] = { - 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu -}; +static inline uint32 ExtractLowBytes(uint32 v, int n) { + assert(n >= 0); + assert(n <= 4); +#if SNAPPY_HAVE_BMI2 + return _bzhi_u32(v, 8 * n); +#else + // This needs to be wider than uint32 otherwise `mask << 32` will be + // undefined. + uint64 mask = 0xffffffff; + return v & ~(mask << (8 * n)); +#endif +} -} // end namespace internal +static inline bool LeftShiftOverflows(uint8 value, uint32 shift) { + assert(shift < 32); + static const uint8 masks[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe}; + return (value & masks[shift]) != 0; +} // Helper class for decompression class SnappyDecompressor { @@ -629,7 +774,7 @@ class SnappyDecompressor { } // Read the uncompressed length stored at the start of the compressed data. - // On succcess, stores the length in *result and returns true. + // On success, stores the length in *result and returns true. // On failure, returns false. bool ReadUncompressedLength(uint32* result) { assert(ip_ == NULL); // Must not have read anything yet @@ -644,7 +789,7 @@ class SnappyDecompressor { const unsigned char c = *(reinterpret_cast(ip)); reader_->Skip(1); uint32 val = c & 0x7f; - if (((val << shift) >> shift) != val) return false; + if (LeftShiftOverflows(static_cast(val), shift)) return false; *result |= val << shift; if (c < 128) { break; @@ -657,22 +802,27 @@ class SnappyDecompressor { // Process the next item found in the input. // Returns true if successful, false on error or end of input. template +#if defined(__GNUC__) && defined(__x86_64__) + __attribute__((aligned(32))) +#endif void DecompressAllTags(Writer* writer) { - const char* ip = ip_; - // For position-independent executables, accessing global arrays can be - // slow. Move wordmask array onto the stack to mitigate this. - uint32 wordmask[sizeof(internal::wordmask)/sizeof(uint32)]; - // Do not use memcpy to copy internal::wordmask to - // wordmask. LLVM converts stack arrays to global arrays if it detects - // const stack arrays and this hurts the performance of position - // independent code. This change is temporary and can be reverted when - // https://reviews.llvm.org/D30759 is approved. - wordmask[0] = internal::wordmask[0]; - wordmask[1] = internal::wordmask[1]; - wordmask[2] = internal::wordmask[2]; - wordmask[3] = internal::wordmask[3]; - wordmask[4] = internal::wordmask[4]; + // In x86, pad the function body to start 16 bytes later. This function has + // a couple of hotspots that are highly sensitive to alignment: we have + // observed regressions by more than 20% in some metrics just by moving the + // exact same code to a different position in the benchmark binary. + // + // Putting this code on a 32-byte-aligned boundary + 16 bytes makes us hit + // the "lucky" case consistently. Unfortunately, this is a very brittle + // workaround, and future differences in code generation may reintroduce + // this regression. If you experience a big, difficult to explain, benchmark + // performance regression here, first try removing this hack. +#if defined(__GNUC__) && defined(__x86_64__) + // Two 8-byte "NOP DWORD ptr [EAX + EAX*1 + 00000000H]" instructions. + asm(".byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00"); + asm(".byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00"); +#endif + const char* ip = ip_; // We could have put this refill fragment only at the beginning of the loop. // However, duplicating it at the end of each branch gives the compiler more // scope to optimize the expression based on the local @@ -685,13 +835,6 @@ class SnappyDecompressor { } MAYBE_REFILL(); - // Add loop alignment directive. Without this directive, we observed - // significant performance degradation on several intel architectures - // in snappy benchmark built with LLVM. The degradation was caused by - // increased branch miss prediction. -#if defined(__clang__) && defined(__x86_64__) - asm volatile (".p2align 5"); -#endif for ( ;; ) { const unsigned char c = *(reinterpret_cast(ip++)); @@ -712,7 +855,7 @@ class SnappyDecompressor { if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) { assert(literal_length < 61); ip += literal_length; - // NOTE(user): There is no MAYBE_REFILL() here, as TryFastAppend() + // NOTE: There is no MAYBE_REFILL() here, as TryFastAppend() // will not return true unless there's already at least five spare // bytes in addition to the literal. continue; @@ -721,7 +864,8 @@ class SnappyDecompressor { // Long literal. const size_t literal_length_length = literal_length - 60; literal_length = - (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1; + ExtractLowBytes(LittleEndian::Load32(ip), literal_length_length) + + 1; ip += literal_length_length; } @@ -744,7 +888,8 @@ class SnappyDecompressor { MAYBE_REFILL(); } else { const size_t entry = char_table[c]; - const size_t trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11]; + const size_t trailer = + ExtractLowBytes(LittleEndian::Load32(ip), entry >> 11); const size_t length = entry & 0xff; ip += entry >> 11; @@ -860,9 +1005,7 @@ size_t Compress(Source* reader, Sink* writer) { writer->Append(ulength, p-ulength); written += (p - ulength); - internal::WorkingMemory wmem; - char* scratch = NULL; - char* scratch_output = NULL; + internal::WorkingMemory wmem(N); while (N > 0) { // Get next block to compress (without copying if possible) @@ -878,13 +1021,7 @@ size_t Compress(Source* reader, Sink* writer) { pending_advance = num_to_read; fragment_size = num_to_read; } else { - // Read into scratch buffer - if (scratch == NULL) { - // If this is the last iteration, we want to allocate N bytes - // of space, otherwise the max possible kBlockSize space. - // num_to_read contains exactly the correct value - scratch = new char[num_to_read]; - } + char* scratch = wmem.GetScratchInput(); memcpy(scratch, fragment, bytes_read); reader->Skip(bytes_read); @@ -910,16 +1047,13 @@ size_t Compress(Source* reader, Sink* writer) { // Need a scratch buffer for the output, in case the byte sink doesn't // have room for us directly. - if (scratch_output == NULL) { - scratch_output = new char[max_output]; - } else { - // Since we encode kBlockSize regions followed by a region - // which is <= kBlockSize in length, a previously allocated - // scratch_output[] region is big enough for this iteration. - } - char* dest = writer->GetAppendBuffer(max_output, scratch_output); - char* end = internal::CompressFragment(fragment, fragment_size, - dest, table, table_size); + + // Since we encode kBlockSize regions followed by a region + // which is <= kBlockSize in length, a previously allocated + // scratch_output[] region is big enough for this iteration. + char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput()); + char* end = internal::CompressFragment(fragment, fragment_size, dest, table, + table_size); writer->Append(dest, end - dest); written += (end - dest); @@ -929,9 +1063,6 @@ size_t Compress(Source* reader, Sink* writer) { Report("snappy_compress", written, uncompressed_size); - delete[] scratch; - delete[] scratch_output; - return written; } @@ -944,14 +1075,22 @@ size_t Compress(Source* reader, Sink* writer) { // Writer template argument to SnappyDecompressor::DecompressAllTags(). class SnappyIOVecWriter { private: + // output_iov_end_ is set to iov + count and used to determine when + // the end of the iovs is reached. + const struct iovec* output_iov_end_; + +#if !defined(NDEBUG) const struct iovec* output_iov_; - const size_t output_iov_count_; +#endif // !defined(NDEBUG) + + // Current iov that is being written into. + const struct iovec* curr_iov_; - // We are currently writing into output_iov_[curr_iov_index_]. - size_t curr_iov_index_; + // Pointer to current iov's write location. + char* curr_iov_output_; - // Bytes written to output_iov_[curr_iov_index_] so far. - size_t curr_iov_written_; + // Remaining bytes to write into curr_iov_output. + size_t curr_iov_remaining_; // Total bytes decompressed into output_iov_ so far. size_t total_written_; @@ -959,22 +1098,24 @@ class SnappyIOVecWriter { // Maximum number of bytes that will be decompressed into output_iov_. size_t output_limit_; - inline char* GetIOVecPointer(size_t index, size_t offset) { - return reinterpret_cast(output_iov_[index].iov_base) + - offset; + static inline char* GetIOVecPointer(const struct iovec* iov, size_t offset) { + return reinterpret_cast(iov->iov_base) + offset; } public: // Does not take ownership of iov. iov must be valid during the // entire lifetime of the SnappyIOVecWriter. inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count) - : output_iov_(iov), - output_iov_count_(iov_count), - curr_iov_index_(0), - curr_iov_written_(0), + : output_iov_end_(iov + iov_count), +#if !defined(NDEBUG) + output_iov_(iov), +#endif // !defined(NDEBUG) + curr_iov_(iov), + curr_iov_output_(iov_count ? reinterpret_cast(iov->iov_base) + : nullptr), + curr_iov_remaining_(iov_count ? iov->iov_len : 0), total_written_(0), - output_limit_(-1) { - } + output_limit_(-1) {} inline void SetExpectedLength(size_t len) { output_limit_ = len; @@ -989,23 +1130,25 @@ class SnappyIOVecWriter { return false; } + return AppendNoCheck(ip, len); + } + + inline bool AppendNoCheck(const char* ip, size_t len) { while (len > 0) { - assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len); - if (curr_iov_written_ >= output_iov_[curr_iov_index_].iov_len) { + if (curr_iov_remaining_ == 0) { // This iovec is full. Go to the next one. - if (curr_iov_index_ + 1 >= output_iov_count_) { + if (curr_iov_ + 1 >= output_iov_end_) { return false; } - curr_iov_written_ = 0; - ++curr_iov_index_; + ++curr_iov_; + curr_iov_output_ = reinterpret_cast(curr_iov_->iov_base); + curr_iov_remaining_ = curr_iov_->iov_len; } - const size_t to_write = std::min( - len, output_iov_[curr_iov_index_].iov_len - curr_iov_written_); - memcpy(GetIOVecPointer(curr_iov_index_, curr_iov_written_), - ip, - to_write); - curr_iov_written_ += to_write; + const size_t to_write = std::min(len, curr_iov_remaining_); + memcpy(curr_iov_output_, ip, to_write); + curr_iov_output_ += to_write; + curr_iov_remaining_ -= to_write; total_written_ += to_write; ip += to_write; len -= to_write; @@ -1017,11 +1160,11 @@ class SnappyIOVecWriter { inline bool TryFastAppend(const char* ip, size_t available, size_t len) { const size_t space_left = output_limit_ - total_written_; if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 && - output_iov_[curr_iov_index_].iov_len - curr_iov_written_ >= 16) { + curr_iov_remaining_ >= 16) { // Fast path, used for the majority (about 95%) of invocations. - char* ptr = GetIOVecPointer(curr_iov_index_, curr_iov_written_); - UnalignedCopy128(ip, ptr); - curr_iov_written_ += len; + UnalignedCopy128(ip, curr_iov_output_); + curr_iov_output_ += len; + curr_iov_remaining_ -= len; total_written_ += len; return true; } @@ -1030,7 +1173,9 @@ class SnappyIOVecWriter { } inline bool AppendFromSelf(size_t offset, size_t len) { - if (offset > total_written_ || offset == 0) { + // See SnappyArrayWriter::AppendFromSelf for an explanation of + // the "offset - 1u" trick. + if (offset - 1u >= total_written_) { return false; } const size_t space_left = output_limit_ - total_written_; @@ -1039,8 +1184,8 @@ class SnappyIOVecWriter { } // Locate the iovec from which we need to start the copy. - size_t from_iov_index = curr_iov_index_; - size_t from_iov_offset = curr_iov_written_; + const iovec* from_iov = curr_iov_; + size_t from_iov_offset = curr_iov_->iov_len - curr_iov_remaining_; while (offset > 0) { if (from_iov_offset >= offset) { from_iov_offset -= offset; @@ -1048,47 +1193,47 @@ class SnappyIOVecWriter { } offset -= from_iov_offset; - assert(from_iov_index > 0); - --from_iov_index; - from_iov_offset = output_iov_[from_iov_index].iov_len; + --from_iov; +#if !defined(NDEBUG) + assert(from_iov >= output_iov_); +#endif // !defined(NDEBUG) + from_iov_offset = from_iov->iov_len; } // Copy bytes starting from the iovec pointed to by from_iov_index to // the current iovec. while (len > 0) { - assert(from_iov_index <= curr_iov_index_); - if (from_iov_index != curr_iov_index_) { - const size_t to_copy = std::min( - output_iov_[from_iov_index].iov_len - from_iov_offset, - len); - Append(GetIOVecPointer(from_iov_index, from_iov_offset), to_copy); + assert(from_iov <= curr_iov_); + if (from_iov != curr_iov_) { + const size_t to_copy = + std::min(from_iov->iov_len - from_iov_offset, len); + AppendNoCheck(GetIOVecPointer(from_iov, from_iov_offset), to_copy); len -= to_copy; if (len > 0) { - ++from_iov_index; + ++from_iov; from_iov_offset = 0; } } else { - assert(curr_iov_written_ <= output_iov_[curr_iov_index_].iov_len); - size_t to_copy = std::min(output_iov_[curr_iov_index_].iov_len - - curr_iov_written_, - len); + size_t to_copy = curr_iov_remaining_; if (to_copy == 0) { // This iovec is full. Go to the next one. - if (curr_iov_index_ + 1 >= output_iov_count_) { + if (curr_iov_ + 1 >= output_iov_end_) { return false; } - ++curr_iov_index_; - curr_iov_written_ = 0; + ++curr_iov_; + curr_iov_output_ = reinterpret_cast(curr_iov_->iov_base); + curr_iov_remaining_ = curr_iov_->iov_len; continue; } if (to_copy > len) { to_copy = len; } - IncrementalCopySlow( - GetIOVecPointer(from_iov_index, from_iov_offset), - GetIOVecPointer(curr_iov_index_, curr_iov_written_), - GetIOVecPointer(curr_iov_index_, curr_iov_written_) + to_copy); - curr_iov_written_ += to_copy; + + IncrementalCopy(GetIOVecPointer(from_iov, from_iov_offset), + curr_iov_output_, curr_iov_output_ + to_copy, + curr_iov_output_ + curr_iov_remaining_); + curr_iov_output_ += to_copy; + curr_iov_remaining_ -= to_copy; from_iov_offset += to_copy; total_written_ += to_copy; len -= to_copy; @@ -1197,7 +1342,7 @@ bool RawUncompress(Source* compressed, char* uncompressed) { return InternalUncompress(compressed, &output); } -bool Uncompress(const char* compressed, size_t n, string* uncompressed) { +bool Uncompress(const char* compressed, size_t n, std::string* uncompressed) { size_t ulength; if (!GetUncompressedLength(compressed, n, &ulength)) { return false; @@ -1265,7 +1410,8 @@ void RawCompress(const char* input, *compressed_length = (writer.CurrentDestination() - compressed); } -size_t Compress(const char* input, size_t input_length, string* compressed) { +size_t Compress(const char* input, size_t input_length, + std::string* compressed) { // Pre-grow the buffer to the max length of the compressed output STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length)); @@ -1512,4 +1658,4 @@ bool Uncompress(Source* compressed, Sink* uncompressed) { } } -} // end namespace snappy +} // namespace snappy diff --git a/thirdparty/snappy/snappy.h b/thirdparty/snappy/snappy.h index 4568db89..e9805bfb 100644 --- a/thirdparty/snappy/snappy.h +++ b/thirdparty/snappy/snappy.h @@ -39,7 +39,7 @@ #ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__ #define THIRD_PARTY_SNAPPY_SNAPPY_H__ -#include +#include #include #include "snappy-stubs-public.h" @@ -69,11 +69,12 @@ namespace snappy { // Higher-level string based routines (should be sufficient for most users) // ------------------------------------------------------------------------ - // Sets "*output" to the compressed version of "input[0,input_length-1]". - // Original contents of *output are lost. + // Sets "*compressed" to the compressed version of "input[0,input_length-1]". + // Original contents of *compressed are lost. // - // REQUIRES: "input[]" is not an alias of "*output". - size_t Compress(const char* input, size_t input_length, string* output); + // REQUIRES: "input[]" is not an alias of "*compressed". + size_t Compress(const char* input, size_t input_length, + std::string* compressed); // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed". // Original contents of "*uncompressed" are lost. @@ -82,7 +83,7 @@ namespace snappy { // // returns false if the message is corrupted and could not be decompressed bool Uncompress(const char* compressed, size_t compressed_length, - string* uncompressed); + std::string* uncompressed); // Decompresses "compressed" to "*uncompressed". // @@ -193,11 +194,14 @@ namespace snappy { // Note that there might be older data around that is compressed with larger // block sizes, so the decompression code should not rely on the // non-existence of long backreferences. - static const int kBlockLog = 16; - static const size_t kBlockSize = 1 << kBlockLog; + static constexpr int kBlockLog = 16; + static constexpr size_t kBlockSize = 1 << kBlockLog; - static const int kMaxHashTableBits = 14; - static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits; + static constexpr int kMinHashTableBits = 8; + static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits; + + static constexpr int kMaxHashTableBits = 14; + static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits; } // end namespace snappy #endif // THIRD_PARTY_SNAPPY_SNAPPY_H__ diff --git a/thirdparty/snappy/snappy_compress_fuzzer.cc b/thirdparty/snappy/snappy_compress_fuzzer.cc new file mode 100644 index 00000000..1d0119e1 --- /dev/null +++ b/thirdparty/snappy/snappy_compress_fuzzer.cc @@ -0,0 +1,59 @@ +// Copyright 2019 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// libFuzzer harness for fuzzing snappy compression code. + +#include +#include +#include +#include + +#include "snappy.h" + +// Entry point for LibFuzzer. +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + std::string input(reinterpret_cast(data), size); + + std::string compressed; + size_t compressed_size = + snappy::Compress(input.data(), input.size(), &compressed); + + (void)compressed_size; // Variable only used in debug builds. + assert(compressed_size == compressed.size()); + assert(compressed.size() <= snappy::MaxCompressedLength(input.size())); + assert(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); + + std::string uncompressed_after_compress; + bool uncompress_succeeded = snappy::Uncompress( + compressed.data(), compressed.size(), &uncompressed_after_compress); + + (void)uncompress_succeeded; // Variable only used in debug builds. + assert(uncompress_succeeded); + assert(input == uncompressed_after_compress); + return 0; +} diff --git a/thirdparty/snappy/snappy_uncompress_fuzzer.cc b/thirdparty/snappy/snappy_uncompress_fuzzer.cc new file mode 100644 index 00000000..8071c00e --- /dev/null +++ b/thirdparty/snappy/snappy_uncompress_fuzzer.cc @@ -0,0 +1,57 @@ +// Copyright 2019 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// libFuzzer harness for fuzzing snappy's decompression code. + +#include +#include +#include +#include + +#include "snappy.h" + +// Entry point for LibFuzzer. +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + std::string input(reinterpret_cast(data), size); + + // Avoid self-crafted decompression bombs. + size_t uncompressed_size; + constexpr size_t kMaxUncompressedSize = 1 << 20; + bool get_uncompressed_length_succeeded = snappy::GetUncompressedLength( + input.data(), input.size(), &uncompressed_size); + if (!get_uncompressed_length_succeeded || + (uncompressed_size > kMaxUncompressedSize)) { + return 0; + } + + std::string uncompressed; + // The return value of snappy::Uncompress() is ignored because decompression + // will fail on invalid inputs. + snappy::Uncompress(input.data(), input.size(), &uncompressed); + return 0; +} diff --git a/thirdparty/snappy/snappy_unittest.cc b/thirdparty/snappy/snappy_unittest.cc index fcb3261d..37159c32 100644 --- a/thirdparty/snappy/snappy_unittest.cc +++ b/thirdparty/snappy/snappy_unittest.cc @@ -29,8 +29,8 @@ #include #include - #include +#include #include #include #include @@ -73,7 +73,7 @@ namespace snappy { // be able to read previously allocated memory while doing heap allocations. class DataEndingAtUnreadablePage { public: - explicit DataEndingAtUnreadablePage(const string& s) { + explicit DataEndingAtUnreadablePage(const std::string& s) { const size_t page_size = sysconf(_SC_PAGESIZE); const size_t size = s.size(); // Round up space for string to a multiple of page_size. @@ -112,7 +112,7 @@ class DataEndingAtUnreadablePage { #else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF) // Fallback for systems without mmap. -typedef string DataEndingAtUnreadablePage; +using DataEndingAtUnreadablePage = std::string; #endif @@ -154,7 +154,7 @@ static size_t MinimumRequiredOutputSpace(size_t input_size, // "compressed" must be preinitialized to at least MinCompressbufSize(comp) // number of bytes, and may contain junk bytes at the end after return. static bool Compress(const char* input, size_t input_size, CompressorType comp, - string* compressed, bool compressed_is_preallocated) { + std::string* compressed, bool compressed_is_preallocated) { if (!compressed_is_preallocated) { compressed->resize(MinimumRequiredOutputSpace(input_size, comp)); } @@ -215,8 +215,8 @@ static bool Compress(const char* input, size_t input_size, CompressorType comp, return true; } -static bool Uncompress(const string& compressed, CompressorType comp, - int size, string* output) { +static bool Uncompress(const std::string& compressed, CompressorType comp, + int size, std::string* output) { switch (comp) { #ifdef ZLIB_VERSION case ZLIB: { @@ -279,8 +279,8 @@ static void Measure(const char* data, int num_blocks = (length + block_size - 1) / block_size; std::vector input(num_blocks); std::vector input_length(num_blocks); - std::vector compressed(num_blocks); - std::vector output(num_blocks); + std::vector compressed(num_blocks); + std::vector output(num_blocks); for (int b = 0; b < num_blocks; b++) { int input_start = b * block_size; int input_limit = std::min((b+1)*block_size, length); @@ -344,11 +344,10 @@ static void Measure(const char* data, float comp_rate = (length / ctime[med]) * repeats / 1048576.0; float uncomp_rate = (length / utime[med]) * repeats / 1048576.0; - string x = names[comp]; + std::string x = names[comp]; x += ":"; - string urate = (uncomp_rate >= 0) - ? StringPrintf("%.1f", uncomp_rate) - : string("?"); + std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate) + : std::string("?"); printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% " "comp %5.1f MB/s uncomp %5s MB/s\n", x.c_str(), @@ -359,8 +358,8 @@ static void Measure(const char* data, urate.c_str()); } -static int VerifyString(const string& input) { - string compressed; +static int VerifyString(const std::string& input) { + std::string compressed; DataEndingAtUnreadablePage i(input); const size_t written = snappy::Compress(i.data(), i.size(), &compressed); CHECK_EQ(written, compressed.size()); @@ -368,15 +367,15 @@ static int VerifyString(const string& input) { snappy::MaxCompressedLength(input.size())); CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); - string uncompressed; + std::string uncompressed; DataEndingAtUnreadablePage c(compressed); CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed)); CHECK_EQ(uncompressed, input); return uncompressed.size(); } -static void VerifyStringSink(const string& input) { - string compressed; +static void VerifyStringSink(const std::string& input) { + std::string compressed; DataEndingAtUnreadablePage i(input); const size_t written = snappy::Compress(i.data(), i.size(), &compressed); CHECK_EQ(written, compressed.size()); @@ -384,7 +383,7 @@ static void VerifyStringSink(const string& input) { snappy::MaxCompressedLength(input.size())); CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); - string uncompressed; + std::string uncompressed; uncompressed.resize(input.size()); snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed)); DataEndingAtUnreadablePage c(compressed); @@ -393,8 +392,8 @@ static void VerifyStringSink(const string& input) { CHECK_EQ(uncompressed, input); } -static void VerifyIOVec(const string& input) { - string compressed; +static void VerifyIOVec(const std::string& input) { + std::string compressed; DataEndingAtUnreadablePage i(input); const size_t written = snappy::Compress(i.data(), i.size(), &compressed); CHECK_EQ(written, compressed.size()); @@ -405,23 +404,28 @@ static void VerifyIOVec(const string& input) { // Try uncompressing into an iovec containing a random number of entries // ranging from 1 to 10. char* buf = new char[input.size()]; - ACMRandom rnd(input.size()); - size_t num = rnd.Next() % 10 + 1; + std::minstd_rand0 rng(input.size()); + std::uniform_int_distribution uniform_1_to_10(1, 10); + size_t num = uniform_1_to_10(rng); if (input.size() < num) { num = input.size(); } struct iovec* iov = new iovec[num]; int used_so_far = 0; + std::bernoulli_distribution one_in_five(1.0 / 5); for (size_t i = 0; i < num; ++i) { + assert(used_so_far < input.size()); iov[i].iov_base = buf + used_so_far; if (i == num - 1) { iov[i].iov_len = input.size() - used_so_far; } else { // Randomly choose to insert a 0 byte entry. - if (rnd.OneIn(5)) { + if (one_in_five(rng)) { iov[i].iov_len = 0; } else { - iov[i].iov_len = rnd.Uniform(input.size()); + std::uniform_int_distribution uniform_not_used_so_far( + 0, input.size() - used_so_far - 1); + iov[i].iov_len = uniform_not_used_so_far(rng); } } used_so_far += iov[i].iov_len; @@ -435,22 +439,22 @@ static void VerifyIOVec(const string& input) { // Test that data compressed by a compressor that does not // obey block sizes is uncompressed properly. -static void VerifyNonBlockedCompression(const string& input) { +static void VerifyNonBlockedCompression(const std::string& input) { if (input.length() > snappy::kBlockSize) { // We cannot test larger blocks than the maximum block size, obviously. return; } - string prefix; + std::string prefix; Varint::Append32(&prefix, input.size()); // Setup compression table - snappy::internal::WorkingMemory wmem; + snappy::internal::WorkingMemory wmem(input.size()); int table_size; uint16* table = wmem.GetHashTable(input.size(), &table_size); // Compress entire input in one shot - string compressed; + std::string compressed; compressed += prefix; compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size())); char* dest = string_as_array(&compressed) + prefix.size(); @@ -458,13 +462,13 @@ static void VerifyNonBlockedCompression(const string& input) { dest, table, table_size); compressed.resize(end - compressed.data()); - // Uncompress into string - string uncomp_str; + // Uncompress into std::string + std::string uncomp_str; CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str)); CHECK_EQ(uncomp_str, input); // Uncompress using source/sink - string uncomp_str2; + std::string uncomp_str2; uncomp_str2.resize(input.size()); snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2)); snappy::ByteArraySource source(compressed.data(), compressed.size()); @@ -476,28 +480,28 @@ static void VerifyNonBlockedCompression(const string& input) { static const int kNumBlocks = 10; struct iovec vec[kNumBlocks]; const int block_size = 1 + input.size() / kNumBlocks; - string iovec_data(block_size * kNumBlocks, 'x'); + std::string iovec_data(block_size * kNumBlocks, 'x'); for (int i = 0; i < kNumBlocks; i++) { vec[i].iov_base = string_as_array(&iovec_data) + i * block_size; vec[i].iov_len = block_size; } CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(), vec, kNumBlocks)); - CHECK_EQ(string(iovec_data.data(), input.size()), input); + CHECK_EQ(std::string(iovec_data.data(), input.size()), input); } } // Expand the input so that it is at least K times as big as block size -static string Expand(const string& input) { +static std::string Expand(const std::string& input) { static const int K = 3; - string data = input; + std::string data = input; while (data.size() < K * snappy::kBlockSize) { data += input; } return data; } -static int Verify(const string& input) { +static int Verify(const std::string& input) { VLOG(1) << "Verifying input of size " << input.size(); // Compress using string based routines @@ -509,7 +513,7 @@ static int Verify(const string& input) { VerifyNonBlockedCompression(input); VerifyIOVec(input); if (!input.empty()) { - const string expanded = Expand(input); + const std::string expanded = Expand(input); VerifyNonBlockedCompression(expanded); VerifyIOVec(input); } @@ -517,21 +521,20 @@ static int Verify(const string& input) { return result; } - -static bool IsValidCompressedBuffer(const string& c) { +static bool IsValidCompressedBuffer(const std::string& c) { return snappy::IsValidCompressedBuffer(c.data(), c.size()); } -static bool Uncompress(const string& c, string* u) { +static bool Uncompress(const std::string& c, std::string* u) { return snappy::Uncompress(c.data(), c.size(), u); } // This test checks to ensure that snappy doesn't coredump if it gets // corrupted data. TEST(CorruptedTest, VerifyCorrupted) { - string source = "making sure we don't crash with corrupted input"; + std::string source = "making sure we don't crash with corrupted input"; VLOG(1) << source; - string dest; - string uncmp; + std::string dest; + std::string uncmp; snappy::Compress(source.data(), source.size(), &dest); // Mess around with the data. It's hard to simulate all possible @@ -578,9 +581,9 @@ TEST(CorruptedTest, VerifyCorrupted) { // try reading stuff in from a bad file. for (int i = 1; i <= 3; ++i) { - string data = ReadTestDataFile(StringPrintf("baddata%d.snappy", i).c_str(), - 0); - string uncmp; + std::string data = + ReadTestDataFile(StrFormat("baddata%d.snappy", i).c_str(), 0); + std::string uncmp; // check that we don't return a crazy length size_t ulen; CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen) @@ -598,7 +601,7 @@ TEST(CorruptedTest, VerifyCorrupted) { // These mirror the compression code in snappy.cc, but are copied // here so that we can bypass some limitations in the how snappy.cc // invokes these routines. -static void AppendLiteral(string* dst, const string& literal) { +static void AppendLiteral(std::string* dst, const std::string& literal) { if (literal.empty()) return; int n = literal.size() - 1; if (n < 60) { @@ -613,12 +616,12 @@ static void AppendLiteral(string* dst, const string& literal) { n >>= 8; } dst->push_back(0 | ((59+count) << 2)); - *dst += string(number, count); + *dst += std::string(number, count); } *dst += literal; } -static void AppendCopy(string* dst, int offset, int length) { +static void AppendCopy(std::string* dst, int offset, int length) { while (length > 0) { // Figure out how much to copy in one shot int to_copy; @@ -655,51 +658,67 @@ TEST(Snappy, SimpleTests) { Verify("ab"); Verify("abc"); - Verify("aaaaaaa" + string(16, 'b') + string("aaaaa") + "abc"); - Verify("aaaaaaa" + string(256, 'b') + string("aaaaa") + "abc"); - Verify("aaaaaaa" + string(2047, 'b') + string("aaaaa") + "abc"); - Verify("aaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc"); - Verify("abcaaaaaaa" + string(65536, 'b') + string("aaaaa") + "abc"); + Verify("aaaaaaa" + std::string(16, 'b') + std::string("aaaaa") + "abc"); + Verify("aaaaaaa" + std::string(256, 'b') + std::string("aaaaa") + "abc"); + Verify("aaaaaaa" + std::string(2047, 'b') + std::string("aaaaa") + "abc"); + Verify("aaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc"); + Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc"); } // Verify max blowup (lots of four-byte copies) TEST(Snappy, MaxBlowup) { - string input; - for (int i = 0; i < 20000; i++) { - ACMRandom rnd(i); - uint32 bytes = static_cast(rnd.Next()); - input.append(reinterpret_cast(&bytes), sizeof(bytes)); - } - for (int i = 19999; i >= 0; i--) { - ACMRandom rnd(i); - uint32 bytes = static_cast(rnd.Next()); - input.append(reinterpret_cast(&bytes), sizeof(bytes)); + std::mt19937 rng; + std::uniform_int_distribution uniform_byte(0, 255); + std::string input; + for (int i = 0; i < 80000; ++i) + input.push_back(static_cast(uniform_byte(rng))); + + for (int i = 0; i < 80000; i += 4) { + std::string four_bytes(input.end() - i - 4, input.end() - i); + input.append(four_bytes); } Verify(input); } TEST(Snappy, RandomData) { - ACMRandom rnd(FLAGS_test_random_seed); - - const int num_ops = 20000; + std::minstd_rand0 rng(FLAGS_test_random_seed); + std::uniform_int_distribution uniform_0_to_3(0, 3); + std::uniform_int_distribution uniform_0_to_8(0, 8); + std::uniform_int_distribution uniform_byte(0, 255); + std::uniform_int_distribution uniform_4k(0, 4095); + std::uniform_int_distribution uniform_64k(0, 65535); + std::bernoulli_distribution one_in_ten(1.0 / 10); + + constexpr int num_ops = 20000; for (int i = 0; i < num_ops; i++) { if ((i % 1000) == 0) { VLOG(0) << "Random op " << i << " of " << num_ops; } - string x; - size_t len = rnd.Uniform(4096); + std::string x; + size_t len = uniform_4k(rng); if (i < 100) { - len = 65536 + rnd.Uniform(65536); + len = 65536 + uniform_64k(rng); } while (x.size() < len) { int run_len = 1; - if (rnd.OneIn(10)) { - run_len = rnd.Skewed(8); + if (one_in_ten(rng)) { + int skewed_bits = uniform_0_to_8(rng); + // int is guaranteed to hold at least 16 bits, this uses at most 8 bits. + std::uniform_int_distribution skewed_low(0, + (1 << skewed_bits) - 1); + run_len = skewed_low(rng); + } + char c = static_cast(uniform_byte(rng)); + if (i >= 100) { + int skewed_bits = uniform_0_to_3(rng); + // int is guaranteed to hold at least 16 bits, this uses at most 3 bits. + std::uniform_int_distribution skewed_low(0, + (1 << skewed_bits) - 1); + c = static_cast(skewed_low(rng)); } - char c = (i < 100) ? rnd.Uniform(256) : rnd.Skewed(3); while (run_len-- > 0 && x.size() < len) { - x += c; + x.push_back(c); } } @@ -713,19 +732,19 @@ TEST(Snappy, FourByteOffset) { // copy manually. // The two fragments that make up the input string. - string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz"; - string fragment2 = "some other string"; + std::string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz"; + std::string fragment2 = "some other string"; // How many times each fragment is emitted. const int n1 = 2; const int n2 = 100000 / fragment2.size(); const int length = n1 * fragment1.size() + n2 * fragment2.size(); - string compressed; + std::string compressed; Varint::Append32(&compressed, length); AppendLiteral(&compressed, fragment1); - string src = fragment1; + std::string src = fragment1; for (int i = 0; i < n2; i++) { AppendLiteral(&compressed, fragment2); src += fragment2; @@ -734,7 +753,7 @@ TEST(Snappy, FourByteOffset) { src += fragment1; CHECK_EQ(length, src.size()); - string uncompressed; + std::string uncompressed; CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncompressed)); @@ -756,7 +775,7 @@ TEST(Snappy, IOVecEdgeCases) { iov[i].iov_len = kLengths[i]; } - string compressed; + std::string compressed; Varint::Append32(&compressed, 22); // A literal whose output crosses three blocks. @@ -817,7 +836,7 @@ TEST(Snappy, IOVecLiteralOverflow) { iov[i].iov_len = kLengths[i]; } - string compressed; + std::string compressed; Varint::Append32(&compressed, 8); AppendLiteral(&compressed, "12345678"); @@ -839,7 +858,7 @@ TEST(Snappy, IOVecCopyOverflow) { iov[i].iov_len = kLengths[i]; } - string compressed; + std::string compressed; Varint::Append32(&compressed, 8); AppendLiteral(&compressed, "123"); @@ -853,7 +872,7 @@ TEST(Snappy, IOVecCopyOverflow) { } } -static bool CheckUncompressedLength(const string& compressed, +static bool CheckUncompressedLength(const std::string& compressed, size_t* ulength) { const bool result1 = snappy::GetUncompressedLength(compressed.data(), compressed.size(), @@ -867,7 +886,7 @@ static bool CheckUncompressedLength(const string& compressed, } TEST(SnappyCorruption, TruncatedVarint) { - string compressed, uncompressed; + std::string compressed, uncompressed; size_t ulength; compressed.push_back('\xf0'); CHECK(!CheckUncompressedLength(compressed, &ulength)); @@ -877,7 +896,7 @@ TEST(SnappyCorruption, TruncatedVarint) { } TEST(SnappyCorruption, UnterminatedVarint) { - string compressed, uncompressed; + std::string compressed, uncompressed; size_t ulength; compressed.push_back('\x80'); compressed.push_back('\x80'); @@ -892,7 +911,7 @@ TEST(SnappyCorruption, UnterminatedVarint) { } TEST(SnappyCorruption, OverflowingVarint) { - string compressed, uncompressed; + std::string compressed, uncompressed; size_t ulength; compressed.push_back('\xfb'); compressed.push_back('\xff'); @@ -909,14 +928,14 @@ TEST(Snappy, ReadPastEndOfBuffer) { // Check that we do not read past end of input // Make a compressed string that ends with a single-byte literal - string compressed; + std::string compressed; Varint::Append32(&compressed, 1); AppendLiteral(&compressed, "x"); - string uncompressed; + std::string uncompressed; DataEndingAtUnreadablePage c(compressed); CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed)); - CHECK_EQ(uncompressed, string("x")); + CHECK_EQ(uncompressed, std::string("x")); } // Check for an infinite loop caused by a copy with offset==0 @@ -1037,17 +1056,20 @@ TEST(Snappy, FindMatchLength) { } TEST(Snappy, FindMatchLengthRandom) { - const int kNumTrials = 10000; - const int kTypicalLength = 10; - ACMRandom rnd(FLAGS_test_random_seed); + constexpr int kNumTrials = 10000; + constexpr int kTypicalLength = 10; + std::minstd_rand0 rng(FLAGS_test_random_seed); + std::uniform_int_distribution uniform_byte(0, 255); + std::bernoulli_distribution one_in_two(1.0 / 2); + std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength); for (int i = 0; i < kNumTrials; i++) { - string s, t; - char a = rnd.Rand8(); - char b = rnd.Rand8(); - while (!rnd.OneIn(kTypicalLength)) { - s.push_back(rnd.OneIn(2) ? a : b); - t.push_back(rnd.OneIn(2) ? a : b); + std::string s, t; + char a = static_cast(uniform_byte(rng)); + char b = static_cast(uniform_byte(rng)); + while (!one_in_typical_length(rng)) { + s.push_back(one_in_two(rng) ? a : b); + t.push_back(one_in_two(rng) ? a : b); } DataEndingAtUnreadablePage u(s); DataEndingAtUnreadablePage v(t); @@ -1157,33 +1179,33 @@ TEST(Snappy, VerifyCharTable) { } static void CompressFile(const char* fname) { - string fullinput; + std::string fullinput; CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); - string compressed; + std::string compressed; Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false); - CHECK_OK(file::SetContents(string(fname).append(".comp"), compressed, + CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed, file::Defaults())); } static void UncompressFile(const char* fname) { - string fullinput; + std::string fullinput; CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); size_t uncompLength; CHECK(CheckUncompressedLength(fullinput, &uncompLength)); - string uncompressed; + std::string uncompressed; uncompressed.resize(uncompLength); CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed)); - CHECK_OK(file::SetContents(string(fname).append(".uncomp"), uncompressed, + CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed, file::Defaults())); } static void MeasureFile(const char* fname) { - string fullinput; + std::string fullinput; CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults())); printf("%-40s :\n", fname); @@ -1236,10 +1258,10 @@ static void BM_UFlat(int iters, int arg) { // Pick file to process based on "arg" CHECK_GE(arg, 0); CHECK_LT(arg, ARRAYSIZE(files)); - string contents = ReadTestDataFile(files[arg].filename, - files[arg].size_limit); + std::string contents = + ReadTestDataFile(files[arg].filename, files[arg].size_limit); - string zcontents; + std::string zcontents; snappy::Compress(contents.data(), contents.size(), &zcontents); char* dst = new char[contents.size()]; @@ -1262,10 +1284,10 @@ static void BM_UValidate(int iters, int arg) { // Pick file to process based on "arg" CHECK_GE(arg, 0); CHECK_LT(arg, ARRAYSIZE(files)); - string contents = ReadTestDataFile(files[arg].filename, - files[arg].size_limit); + std::string contents = + ReadTestDataFile(files[arg].filename, files[arg].size_limit); - string zcontents; + std::string zcontents; snappy::Compress(contents.data(), contents.size(), &zcontents); SetBenchmarkBytesProcessed(static_cast(iters) * @@ -1285,10 +1307,10 @@ static void BM_UIOVec(int iters, int arg) { // Pick file to process based on "arg" CHECK_GE(arg, 0); CHECK_LT(arg, ARRAYSIZE(files)); - string contents = ReadTestDataFile(files[arg].filename, - files[arg].size_limit); + std::string contents = + ReadTestDataFile(files[arg].filename, files[arg].size_limit); - string zcontents; + std::string zcontents; snappy::Compress(contents.data(), contents.size(), &zcontents); // Uncompress into an iovec containing ten entries. @@ -1331,10 +1353,10 @@ static void BM_UFlatSink(int iters, int arg) { // Pick file to process based on "arg" CHECK_GE(arg, 0); CHECK_LT(arg, ARRAYSIZE(files)); - string contents = ReadTestDataFile(files[arg].filename, - files[arg].size_limit); + std::string contents = + ReadTestDataFile(files[arg].filename, files[arg].size_limit); - string zcontents; + std::string zcontents; snappy::Compress(contents.data(), contents.size(), &zcontents); char* dst = new char[contents.size()]; @@ -1349,7 +1371,7 @@ static void BM_UFlatSink(int iters, int arg) { } StopBenchmarkTiming(); - string s(dst, contents.size()); + std::string s(dst, contents.size()); CHECK_EQ(contents, s); delete[] dst; @@ -1363,8 +1385,8 @@ static void BM_ZFlat(int iters, int arg) { // Pick file to process based on "arg" CHECK_GE(arg, 0); CHECK_LT(arg, ARRAYSIZE(files)); - string contents = ReadTestDataFile(files[arg].filename, - files[arg].size_limit); + std::string contents = + ReadTestDataFile(files[arg].filename, files[arg].size_limit); char* dst = new char[snappy::MaxCompressedLength(contents.size())]; @@ -1379,14 +1401,88 @@ static void BM_ZFlat(int iters, int arg) { StopBenchmarkTiming(); const double compression_ratio = static_cast(zsize) / std::max(1, contents.size()); - SetBenchmarkLabel(StringPrintf("%s (%.2f %%)", - files[arg].label, 100.0 * compression_ratio)); - VLOG(0) << StringPrintf("compression for %s: %zd -> %zd bytes", - files[arg].label, contents.size(), zsize); + SetBenchmarkLabel(StrFormat("%s (%.2f %%)", files[arg].label, + 100.0 * compression_ratio)); + VLOG(0) << StrFormat("compression for %s: %zd -> %zd bytes", + files[arg].label, static_cast(contents.size()), + static_cast(zsize)); delete[] dst; } BENCHMARK(BM_ZFlat)->DenseRange(0, ARRAYSIZE(files) - 1); +static void BM_ZFlatAll(int iters, int arg) { + StopBenchmarkTiming(); + + CHECK_EQ(arg, 0); + const int num_files = ARRAYSIZE(files); + + std::vector contents(num_files); + std::vector dst(num_files); + + int64 total_contents_size = 0; + for (int i = 0; i < num_files; ++i) { + contents[i] = ReadTestDataFile(files[i].filename, files[i].size_limit); + dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())]; + total_contents_size += contents[i].size(); + } + + SetBenchmarkBytesProcessed(static_cast(iters) * total_contents_size); + StartBenchmarkTiming(); + + size_t zsize = 0; + while (iters-- > 0) { + for (int i = 0; i < num_files; ++i) { + snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i], + &zsize); + } + } + StopBenchmarkTiming(); + + for (int i = 0; i < num_files; ++i) { + delete[] dst[i]; + } + SetBenchmarkLabel(StrFormat("%d files", num_files)); +} +BENCHMARK(BM_ZFlatAll)->DenseRange(0, 0); + +static void BM_ZFlatIncreasingTableSize(int iters, int arg) { + StopBenchmarkTiming(); + + CHECK_EQ(arg, 0); + CHECK_GT(ARRAYSIZE(files), 0); + const std::string base_content = + ReadTestDataFile(files[0].filename, files[0].size_limit); + + std::vector contents; + std::vector dst; + int64 total_contents_size = 0; + for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits; + ++table_bits) { + std::string content = base_content; + content.resize(1 << table_bits); + dst.push_back(new char[snappy::MaxCompressedLength(content.size())]); + total_contents_size += content.size(); + contents.push_back(std::move(content)); + } + + size_t zsize = 0; + SetBenchmarkBytesProcessed(static_cast(iters) * total_contents_size); + StartBenchmarkTiming(); + while (iters-- > 0) { + for (int i = 0; i < contents.size(); ++i) { + snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i], + &zsize); + } + } + StopBenchmarkTiming(); + + for (int i = 0; i < dst.size(); ++i) { + delete[] dst[i]; + } + SetBenchmarkLabel(StrFormat("%zd tables", contents.size())); +} +BENCHMARK(BM_ZFlatIncreasingTableSize)->DenseRange(0, 0); + } // namespace snappy int main(int argc, char** argv) {