diff --git a/.travis.yml b/.travis.yml index 3ac1dfbed..1615cf255 100644 --- a/.travis.yml +++ b/.travis.yml @@ -72,9 +72,14 @@ matrix: # clang 3.7 on osx - os: osx - osx_image: xcode7.1 + osx_image: xcode7.3 env: C_COMPILER=clang + # # clang 4.2 on osx + # - os: osx + # osx_image: xcode8.2 + # env: C_COMPILER=clang + before_install: # for osx: 0. update brew; 1. install cmake if missing; 2. (gcc) unlink pre-installed gcc; 3. (gcc) install desired version of gcc @@ -93,4 +98,5 @@ script: - cmake .. - make -j - make check + - egrep -r ":F:|:E:" . || true - cd ../test/integration && python test_twemcache.py diff --git a/deps/ccommon/.travis.yml b/deps/ccommon/.travis.yml index 47c18dabf..e8657f7b2 100644 --- a/deps/ccommon/.travis.yml +++ b/deps/ccommon/.travis.yml @@ -75,6 +75,11 @@ matrix: osx_image: xcode7.1 env: C_COMPILER=clang + # # clang 4.2 on osx + # - os: osx + # osx_image: xcode8.2 + # env: C_COMPILER=clang + before_install: # for osx: 0. update brew; 1. install cmake if missing; 2. (gcc) unlink pre-installed gcc; 3. (gcc) install desired version of gcc @@ -93,3 +98,4 @@ script: - cmake .. - make -j - make check + - egrep -r ":F:|:E:" . || true diff --git a/deps/ccommon/CMakeLists.txt b/deps/ccommon/CMakeLists.txt index f378dec25..1dfce90f9 100644 --- a/deps/ccommon/CMakeLists.txt +++ b/deps/ccommon/CMakeLists.txt @@ -37,7 +37,7 @@ endif() # version info set(${PROJECT_NAME}_VERSION_MAJOR 1) -set(${PROJECT_NAME}_VERSION_MINOR 1) +set(${PROJECT_NAME}_VERSION_MINOR 2) set(${PROJECT_NAME}_VERSION_PATCH 0) set(${PROJECT_NAME}_VERSION ${${PROJECT_NAME}_VERSION_MAJOR}.${${PROJECT_NAME}_VERSION_MINOR}.${${PROJECT_NAME}_VERSION_PATCH} diff --git a/deps/ccommon/include/buffer/cc_buf.h b/deps/ccommon/include/buffer/cc_buf.h index f0d7924db..4fe111dd3 100644 --- a/deps/ccommon/include/buffer/cc_buf.h +++ b/deps/ccommon/include/buffer/cc_buf.h @@ -169,6 +169,10 @@ buf_read(char *dst, struct buf *src, uint32_t count) static inline uint32_t buf_write(struct buf *dst, char *src, uint32_t count) { + if (count == 0) { + return 0; + } + ASSERT(dst != NULL && src != NULL); uint32_t len = MIN(buf_wsize(dst), count); diff --git a/deps/ccommon/include/cc_lookup3.h b/deps/ccommon/include/cc_lookup3.h deleted file mode 100644 index 204d74f97..000000000 --- a/deps/ccommon/include/cc_lookup3.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * ccommon - a cache common library. - * Copyright (C) 2013 Twitter, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -/* -Excerpt and modified from lookup3.c (http://burtleburtle.net/bob/c/lookup3.c), -originally by Bob Jenkins, May 2006, Public Domain. -*/ - -#include - -#include /* defines uint32_t etc */ -#include - -uint32_t hashlittle( const void *key, size_t length, uint32_t initval); - -#ifdef __cplusplus -} -#endif diff --git a/deps/ccommon/include/cc_print.h b/deps/ccommon/include/cc_print.h index f26da912a..55d87edf7 100644 --- a/deps/ccommon/include/cc_print.h +++ b/deps/ccommon/include/cc_print.h @@ -44,7 +44,10 @@ extern "C" { /* behavior undefined if there isn't enough space in buf */ size_t cc_print_uint64_unsafe(char *buf, uint64_t n); +size_t cc_print_int64_unsafe(char *buf, int64_t n); + size_t cc_print_uint64(char *buf, size_t size, uint64_t n); +size_t cc_print_int64(char *buf, size_t size, int64_t n); size_t _scnprintf(char *buf, size_t size, const char *fmt, ...); size_t _vscnprintf(char *buf, size_t size, const char *fmt, va_list args); diff --git a/deps/ccommon/include/cc_util.h b/deps/ccommon/include/cc_util.h index e0967a852..d9c13955b 100644 --- a/deps/ccommon/include/cc_util.h +++ b/deps/ccommon/include/cc_util.h @@ -57,6 +57,8 @@ extern "C" { * # define UINT16_MAX (65535) * # define UINT32_MAX (4294967295U) * # define UINT64_MAX (__UINT64_C(18446744073709551615)) + * + * # define INT64_MIN -9223372036854775808LL */ #define CC_UINT8_MAXLEN (3 + 1) #define CC_UINT16_MAXLEN (5 + 1) @@ -64,6 +66,8 @@ extern "C" { #define CC_UINT64_MAXLEN (20 + 1) #define CC_UINTMAX_MAXLEN CC_UINT64_MAXLEN +#define CC_INT64_MAXLEN (1 + 19 + 1) + /* alignment */ /* Make data 'd' or pointer 'p', n-byte aligned, where n is a power of 2 */ #define CC_ALIGNMENT sizeof(unsigned long) /* platform word */ diff --git a/deps/ccommon/include/cc_hash.h b/deps/ccommon/include/hash/cc_lookup3.h similarity index 90% rename from deps/ccommon/include/cc_hash.h rename to deps/ccommon/include/hash/cc_lookup3.h index ec35edc1c..af6f8318c 100644 --- a/deps/ccommon/include/cc_hash.h +++ b/deps/ccommon/include/hash/cc_lookup3.h @@ -26,7 +26,7 @@ extern "C" { #include #include -uint32_t hash(const void *key, size_t length, const uint32_t initval); +uint32_t hash_lookup3(const void *key, size_t length, const uint32_t initval); #ifdef __cplusplus } diff --git a/deps/ccommon/include/hash/cc_murmur3.h b/deps/ccommon/include/hash/cc_murmur3.h new file mode 100644 index 000000000..8993e198a --- /dev/null +++ b/deps/ccommon/include/hash/cc_murmur3.h @@ -0,0 +1,39 @@ +/* + * ccommon - a cache common library. + * Copyright (C) 2013 Twitter, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * The cc_murmur3.[ch] are adapated from the canonical implementation of + * MurmurHash3 by Austin Appleby, released as part of SMHasher: + * https://github.com/aappleby/smhasher + * + * Changes include renaming fuctions, removing MSVC-related code, adding "static" + * keyword to local-scope functions according to C language spec (original code is + * in C++), to better fit them into the scope and style of ccommon + * + * The actual implementation is untouched. + */ + +#pragma once + +#include + + +void hash_murmur3_32 ( const void * key, int len, uint32_t seed, void * out ); + +void hash_murmur3_128_x86 ( const void * key, int len, uint32_t seed, void * out ); + +void hash_murmur3_128_x64 ( const void * key, int len, uint32_t seed, void * out ); diff --git a/deps/ccommon/src/cc_print.c b/deps/ccommon/src/cc_print.c index 65243fa41..8a16430f5 100644 --- a/deps/ccommon/src/cc_print.c +++ b/deps/ccommon/src/cc_print.c @@ -22,6 +22,9 @@ * implementation as a reference (folly/Conv.h) */ +/* use our own macro instead of llabs() to make sure it works with INT64_MIN */ +#define abs_int64(_x) ((_x) >= 0 ? (_x) : -(_x)) + static inline void _print_uint64(char *buf, size_t d, uint64_t n) { @@ -46,6 +49,22 @@ cc_print_uint64_unsafe(char *buf, uint64_t n) return d; } +size_t +cc_print_int64_unsafe(char *buf, int64_t n) +{ + size_t d; + uint64_t ab = abs_int64(n); + + if (n < 0) { + *buf++ = '-'; + } + + d = digits(ab); + _print_uint64(buf, d, ab); + + return d + (n < 0); +} + size_t cc_print_uint64(char *buf, size_t size, uint64_t n) { @@ -61,6 +80,26 @@ cc_print_uint64(char *buf, size_t size, uint64_t n) return d; } +size_t +cc_print_int64(char *buf, size_t size, int64_t n) +{ + size_t d; + uint64_t ab = abs_int64(n); + + d = digits(ab); + if (size < d + (n < 0)) { + return 0; + } + + if (n < 0) { + *buf++ = '-'; + } + + _print_uint64(buf, d, n); + + return d + (n < 0); +} + size_t _vscnprintf(char *buf, size_t size, const char *fmt, va_list args) { diff --git a/deps/ccommon/src/hash/CMakeLists.txt b/deps/ccommon/src/hash/CMakeLists.txt index 20fdf65ee..60f178c00 100644 --- a/deps/ccommon/src/hash/CMakeLists.txt +++ b/deps/ccommon/src/hash/CMakeLists.txt @@ -1,5 +1,5 @@ set(SOURCE ${SOURCE} - hash/cc_hash.c hash/cc_lookup3.c + hash/cc_murmur3.c PARENT_SCOPE) diff --git a/deps/ccommon/src/hash/cc_hash.c b/deps/ccommon/src/hash/cc_hash.c deleted file mode 100644 index 05a6db7f1..000000000 --- a/deps/ccommon/src/hash/cc_hash.c +++ /dev/null @@ -1,446 +0,0 @@ -/* - * ccommon - a cache common library. - * Copyright (C) 2013 Twitter, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Hash table - * - * The hash function used here is by Bob Jenkins, 1996: - * - * "By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. - * You may use this code any way you wish, private, educational, - * or commercial. It's free." - * - */ - -/* - * Since the hash function does bit manipulation, it needs to know - * whether it's big or little-endian. HAVE_LITTLE_ENDIAN and HAVE_BIG_ENDIAN - * are set in the configure script. - */ -#include - -#if defined CC_BIG_ENDIAN && CC_BIG_ENDIAN == 1 -# define HASH_LITTLE_ENDIAN 0 -# define HASH_BIG_ENDIAN 1 -#elif defined CC_LITTLE_ENDIAN && CC_LITTLE_ENDIAN == 1 -# define HASH_LITTLE_ENDIAN 1 -# define HASH_BIG_ENDIAN 0 -#else -# define HASH_LITTLE_ENDIAN 0 -# define HASH_BIG_ENDIAN 0 -#endif - -#define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k)))) - -/* -------------------------------------------------------------------------------- -mix -- mix 3 32-bit values reversibly. - -This is reversible, so any information in (a,b,c) before mix() is -still in (a,b,c) after mix(). - -If four pairs of (a,b,c) inputs are run through mix(), or through -mix() in reverse, there are at least 32 bits of the output that -are sometimes the same for one pair and different for another pair. -This was tested for: -* pairs that differed by one bit, by two bits, in any combination - of top bits of (a,b,c), or in any combination of bottom bits of - (a,b,c). -* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed - the output delta to a Gray code (a^(a>>1)) so a string of 1's (as - is commonly produced by subtraction) look like a single 1-bit - difference. -* the base values were pseudorandom, all zero but one bit set, or - all zero plus a counter that starts at zero. - -Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that -satisfy this are - 4 6 8 16 19 4 - 9 15 3 18 27 15 - 14 9 3 7 17 3 -Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing -for "differ" defined as + with a one-bit base and a two-bit delta. I -used http://burtleburtle.net/bob/hash/avalanche.html to choose -the operations, constants, and arrangements of the variables. - -This does not achieve avalanche. There are input bits of (a,b,c) -that fail to affect some output bits of (a,b,c), especially of a. The -most thoroughly mixed value is c, but it doesn't really even achieve -avalanche in c. - -This allows some parallelism. Read-after-writes are good at doubling -the number of bits affected, so the goal of mixing pulls in the opposite -direction as the goal of parallelism. I did what I could. Rotates -seem to cost as much as shifts on every machine I could lay my hands -on, and rotates are much kinder to the top and bottom bits, so I used -rotates. -------------------------------------------------------------------------------- -*/ -#define mix(a,b,c) \ -{ \ - a -= c; a ^= rot(c, 4); c += b; \ - b -= a; b ^= rot(a, 6); a += c; \ - c -= b; c ^= rot(b, 8); b += a; \ - a -= c; a ^= rot(c,16); c += b; \ - b -= a; b ^= rot(a,19); a += c; \ - c -= b; c ^= rot(b, 4); b += a; \ -} - -/* -------------------------------------------------------------------------------- -final -- final mixing of 3 32-bit values (a,b,c) into c - -Pairs of (a,b,c) values differing in only a few bits will usually -produce values of c that look totally different. This was tested for -* pairs that differed by one bit, by two bits, in any combination - of top bits of (a,b,c), or in any combination of bottom bits of - (a,b,c). -* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed - the output delta to a Gray code (a^(a>>1)) so a string of 1's (as - is commonly produced by subtraction) look like a single 1-bit - difference. -* the base values were pseudorandom, all zero but one bit set, or - all zero plus a counter that starts at zero. - -These constants passed: - 14 11 25 16 4 14 24 - 12 14 25 16 4 14 24 -and these came close: - 4 8 15 26 3 22 24 - 10 8 15 26 3 22 24 - 11 8 15 26 3 22 24 -------------------------------------------------------------------------------- -*/ -#define final(a,b,c) \ -{ \ - c ^= b; c -= rot(b,14); \ - a ^= c; a -= rot(c,11); \ - b ^= a; b -= rot(a,25); \ - c ^= b; c -= rot(b,16); \ - a ^= c; a -= rot(c,4); \ - b ^= a; b -= rot(a,14); \ - c ^= b; c -= rot(b,24); \ -} - -#if HASH_LITTLE_ENDIAN == 1 -uint32_t hash( - const void *key, /* the key to hash */ - size_t length, /* length of the key */ - const uint32_t initval) /* initval */ -{ - uint32_t a,b,c; /* internal state */ - union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ - - /* Set up the internal state */ - a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; - - u.ptr = key; - if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { - const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ -#ifdef VALGRIND - const uint8_t *k8; -#endif /* ifdef VALGRIND */ - - /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ - while (length > 12) - { - a += k[0]; - b += k[1]; - c += k[2]; - mix(a,b,c); - length -= 12; - k += 3; - } - - /*----------------------------- handle the last (probably partial) block */ - /* - * "k[2]&0xffffff" actually reads beyond the end of the string, but - * then masks off the part it's not allowed to read. Because the - * string is aligned, the masked-off tail is in the same word as the - * rest of the string. Every machine with memory protection I've seen - * does it on word boundaries, so is OK with this. But VALGRIND will - * still catch it and complain. The masking trick does make the hash - * noticably faster for short strings (like English words). - */ -#ifndef VALGRIND - - switch(length) - { - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; - case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; - case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; - case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; - case 8 : b+=k[1]; a+=k[0]; break; - case 7 : b+=k[1]&0xffffff; a+=k[0]; break; - case 6 : b+=k[1]&0xffff; a+=k[0]; break; - case 5 : b+=k[1]&0xff; a+=k[0]; break; - case 4 : a+=k[0]; break; - case 3 : a+=k[0]&0xffffff; break; - case 2 : a+=k[0]&0xffff; break; - case 1 : a+=k[0]&0xff; break; - case 0 : return c; /* zero length strings require no mixing */ - } - -#else /* make valgrind happy */ - - k8 = (const uint8_t *)k; - switch(length) - { - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; - case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ - case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ - case 9 : c+=k8[8]; /* fall through */ - case 8 : b+=k[1]; a+=k[0]; break; - case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ - case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ - case 5 : b+=k8[4]; /* fall through */ - case 4 : a+=k[0]; break; - case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ - case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ - case 1 : a+=k8[0]; break; - case 0 : return c; /* zero length strings require no mixing */ - } - -#endif /* !valgrind */ - - } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { - const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ - const uint8_t *k8; - - /*--------------- all but last block: aligned reads and different mixing */ - while (length > 12) - { - a += k[0] + (((uint32_t)k[1])<<16); - b += k[2] + (((uint32_t)k[3])<<16); - c += k[4] + (((uint32_t)k[5])<<16); - mix(a,b,c); - length -= 12; - k += 6; - } - - /*----------------------------- handle the last (probably partial) block */ - k8 = (const uint8_t *)k; - switch(length) - { - case 12: c+=k[4]+(((uint32_t)k[5])<<16); - b+=k[2]+(((uint32_t)k[3])<<16); - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 11: c+=((uint32_t)k8[10])<<16; /* @fallthrough */ - case 10: c+=k[4]; /* @fallthrough@ */ - b+=k[2]+(((uint32_t)k[3])<<16); - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 9 : c+=k8[8]; /* @fallthrough */ - case 8 : b+=k[2]+(((uint32_t)k[3])<<16); - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 7 : b+=((uint32_t)k8[6])<<16; /* @fallthrough */ - case 6 : b+=k[2]; - a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 5 : b+=k8[4]; /* @fallthrough */ - case 4 : a+=k[0]+(((uint32_t)k[1])<<16); - break; - case 3 : a+=((uint32_t)k8[2])<<16; /* @fallthrough */ - case 2 : a+=k[0]; - break; - case 1 : a+=k8[0]; - break; - case 0 : return c; /* zero length strings require no mixing */ - } - - } else { /* need to read the key one byte at a time */ - const uint8_t *k = (const uint8_t *)key; - - /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ - while (length > 12) - { - a += k[0]; - a += ((uint32_t)k[1])<<8; - a += ((uint32_t)k[2])<<16; - a += ((uint32_t)k[3])<<24; - b += k[4]; - b += ((uint32_t)k[5])<<8; - b += ((uint32_t)k[6])<<16; - b += ((uint32_t)k[7])<<24; - c += k[8]; - c += ((uint32_t)k[9])<<8; - c += ((uint32_t)k[10])<<16; - c += ((uint32_t)k[11])<<24; - mix(a,b,c); - length -= 12; - k += 12; - } - - /*-------------------------------- last block: affect all 32 bits of (c) */ - switch(length) /* all the case statements fall through */ - { - case 12: c+=((uint32_t)k[11])<<24; - case 11: c+=((uint32_t)k[10])<<16; - case 10: c+=((uint32_t)k[9])<<8; - case 9 : c+=k[8]; - case 8 : b+=((uint32_t)k[7])<<24; - case 7 : b+=((uint32_t)k[6])<<16; - case 6 : b+=((uint32_t)k[5])<<8; - case 5 : b+=k[4]; - case 4 : a+=((uint32_t)k[3])<<24; - case 3 : a+=((uint32_t)k[2])<<16; - case 2 : a+=((uint32_t)k[1])<<8; - case 1 : a+=k[0]; - break; - case 0 : return c; /* zero length strings require no mixing */ - } - } - - final(a,b,c); - return c; /* zero length strings require no mixing */ -} - -#elif HASH_BIG_ENDIAN == 1 -/* - * hashbig(): - * This is the same as hashword() on big-endian machines. It is different - * from hashlittle() on all machines. hashbig() takes advantage of - * big-endian byte ordering. - */ -uint32_t hash( const void *key, size_t length, const uint32_t initval) -{ - uint32_t a,b,c; - union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */ - - /* Set up the internal state */ - a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; - - u.ptr = key; - if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) { - const uint32_t *k = key; /* read 32-bit chunks */ -#ifdef VALGRIND - const uint8_t *k8; -#endif /* ifdef VALGRIND */ - - /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ - while (length > 12) - { - a += k[0]; - b += k[1]; - c += k[2]; - mix(a,b,c); - length -= 12; - k += 3; - } - - /*----------------------------- handle the last (probably partial) block */ - /* - * "k[2]<<8" actually reads beyond the end of the string, but - * then shifts out the part it's not allowed to read. Because the - * string is aligned, the illegal read is in the same word as the - * rest of the string. Every machine with memory protection I've seen - * does it on word boundaries, so is OK with this. But VALGRIND will - * still catch it and complain. The masking trick does make the hash - * noticably faster for short strings (like English words). - */ -#ifndef VALGRIND - - switch(length) - { - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; - case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break; - case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break; - case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break; - case 8 : b+=k[1]; a+=k[0]; break; - case 7 : b+=k[1]&0xffffff00; a+=k[0]; break; - case 6 : b+=k[1]&0xffff0000; a+=k[0]; break; - case 5 : b+=k[1]&0xff000000; a+=k[0]; break; - case 4 : a+=k[0]; break; - case 3 : a+=k[0]&0xffffff00; break; - case 2 : a+=k[0]&0xffff0000; break; - case 1 : a+=k[0]&0xff000000; break; - case 0 : return c; /* zero length strings require no mixing */ - } - -#else /* make valgrind happy */ - - k8 = (const uint8_t *)k; - switch(length) /* all the case statements fall through */ - { - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; - case 11: c+=((uint32_t)k8[10])<<8; /* fall through */ - case 10: c+=((uint32_t)k8[9])<<16; /* fall through */ - case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */ - case 8 : b+=k[1]; a+=k[0]; break; - case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */ - case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */ - case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */ - case 4 : a+=k[0]; break; - case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */ - case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */ - case 1 : a+=((uint32_t)k8[0])<<24; break; - case 0 : return c; - } - -#endif /* !VALGRIND */ - - } else { /* need to read the key one byte at a time */ - const uint8_t *k = key; - - /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ - while (length > 12) - { - a += ((uint32_t)k[0])<<24; - a += ((uint32_t)k[1])<<16; - a += ((uint32_t)k[2])<<8; - a += ((uint32_t)k[3]); - b += ((uint32_t)k[4])<<24; - b += ((uint32_t)k[5])<<16; - b += ((uint32_t)k[6])<<8; - b += ((uint32_t)k[7]); - c += ((uint32_t)k[8])<<24; - c += ((uint32_t)k[9])<<16; - c += ((uint32_t)k[10])<<8; - c += ((uint32_t)k[11]); - mix(a,b,c); - length -= 12; - k += 12; - } - - /*-------------------------------- last block: affect all 32 bits of (c) */ - switch(length) /* all the case statements fall through */ - { - case 12: c+=k[11]; - case 11: c+=((uint32_t)k[10])<<8; - case 10: c+=((uint32_t)k[9])<<16; - case 9 : c+=((uint32_t)k[8])<<24; - case 8 : b+=k[7]; - case 7 : b+=((uint32_t)k[6])<<8; - case 6 : b+=((uint32_t)k[5])<<16; - case 5 : b+=((uint32_t)k[4])<<24; - case 4 : a+=k[3]; - case 3 : a+=((uint32_t)k[2])<<8; - case 2 : a+=((uint32_t)k[1])<<16; - case 1 : a+=((uint32_t)k[0])<<24; - break; - case 0 : return c; - } - } - - final(a,b,c); - return c; -} -#else /* HASH_XXX_ENDIAN == 1 */ -#error Must define HASH_BIG_ENDIAN or HASH_LITTLE_ENDIAN -#endif /* HASH_XXX_ENDIAN == 1 */ diff --git a/deps/ccommon/src/hash/cc_lookup3.c b/deps/ccommon/src/hash/cc_lookup3.c index 581bc22d1..cef9b1186 100644 --- a/deps/ccommon/src/hash/cc_lookup3.c +++ b/deps/ccommon/src/hash/cc_lookup3.c @@ -1,36 +1,50 @@ /* -Excerpt and modified from lookup3.c (http://burtleburtle.net/bob/c/lookup3.c), -originally by Bob Jenkins, May 2006, Public Domain. -*/ - -#include - -#include + * ccommon - a cache common library. + * Copyright (C) 2013 Twitter, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ -#include /* defines uint32_t etc */ -#include /* attempt to define endianness */ +/* + * Hash table + * + * The hash function used here is by Bob Jenkins, 1996: + * + * "By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. + * You may use this code any way you wish, private, educational, + * or commercial. It's free." + * + */ /* - * My best guess at if you are big-endian or little-endian. This may - * need adjustment. + * Since the hash function does bit manipulation, it needs to know + * whether it's big or little-endian. HAVE_LITTLE_ENDIAN and HAVE_BIG_ENDIAN + * are set in the configure script. */ -#if (defined(CC_LITTLE_ENDIAN)) || \ - (defined(i386) || defined(__i386__) || defined(__i486__) || \ - defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL)) -# define HASH_LITTLE_ENDIAN 1 -# define HASH_BIG_ENDIAN 0 -#elif (defined(CC_BIG_ENDIAN)) || \ - (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel)) +#include + +#if defined CC_BIG_ENDIAN && CC_BIG_ENDIAN == 1 # define HASH_LITTLE_ENDIAN 0 -# define HASH_BIG_ENDIAN 1 +# define HASH_BIG_ENDIAN 1 +#elif defined CC_LITTLE_ENDIAN && CC_LITTLE_ENDIAN == 1 +# define HASH_LITTLE_ENDIAN 1 +# define HASH_BIG_ENDIAN 0 #else # define HASH_LITTLE_ENDIAN 0 -# define HASH_BIG_ENDIAN 0 +# define HASH_BIG_ENDIAN 0 #endif -#define hashsize(n) ((uint32_t)1<<(n)) -#define hashmask(n) (hashsize(n)-1) -#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) +#define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k)))) /* ------------------------------------------------------------------------------- @@ -122,35 +136,11 @@ and these came close: c ^= b; c -= rot(b,24); \ } -/* -------------------------------------------------------------------------------- -hashlittle() -- hash a variable-length key into a 32-bit value - k : the key (the unaligned variable-length array of bytes) - length : the length of the key, counting by bytes - initval : can be any 4-byte value -Returns a 32-bit value. Every bit of the key affects every bit of -the return value. Two keys differing by one or two bits will have -totally different hash values. - -The best hash table sizes are powers of 2. There is no need to do -mod a prime (mod is sooo slow!). If you need less than 32 bits, -use a bitmask. For example, if you need only 10 bits, do - h = (h & hashmask(10)); -In which case, the hash table should have hashsize(10) elements. - -If you are hashing n strings (uint8_t **)k, do it like this: - for (i=0, h=0; i 12) @@ -173,6 +166,7 @@ hashlittle( const void *key, size_t length, uint32_t initval) k += 3; } + /*----------------------------- handle the last (probably partial) block */ /* * "k[2]&0xffffff" actually reads beyond the end of the string, but * then masks off the part it's not allowed to read. Because the @@ -182,6 +176,8 @@ hashlittle( const void *key, size_t length, uint32_t initval) * still catch it and complain. The masking trick does make the hash * noticably faster for short strings (like English words). */ +#ifndef VALGRIND + switch(length) { case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; @@ -196,9 +192,31 @@ hashlittle( const void *key, size_t length, uint32_t initval) case 3 : a+=k[0]&0xffffff; break; case 2 : a+=k[0]&0xffff; break; case 1 : a+=k[0]&0xff; break; - case 0 : return c; /* zero length strings require no mixing */ + case 0 : return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ + case 1 : a+=k8[0]; break; + case 0 : return c; /* zero length strings require no mixing */ } +#endif /* !valgrind */ + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ const uint8_t *k8; @@ -222,28 +240,28 @@ hashlittle( const void *key, size_t length, uint32_t initval) b+=k[2]+(((uint32_t)k[3])<<16); a+=k[0]+(((uint32_t)k[1])<<16); break; - case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ - case 10: c+=k[4]; + case 11: c+=((uint32_t)k8[10])<<16; /* @fallthrough */ + case 10: c+=k[4]; /* @fallthrough@ */ b+=k[2]+(((uint32_t)k[3])<<16); a+=k[0]+(((uint32_t)k[1])<<16); break; - case 9 : c+=k8[8]; /* fall through */ + case 9 : c+=k8[8]; /* @fallthrough */ case 8 : b+=k[2]+(((uint32_t)k[3])<<16); a+=k[0]+(((uint32_t)k[1])<<16); break; - case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 7 : b+=((uint32_t)k8[6])<<16; /* @fallthrough */ case 6 : b+=k[2]; a+=k[0]+(((uint32_t)k[1])<<16); break; - case 5 : b+=k8[4]; /* fall through */ + case 5 : b+=k8[4]; /* @fallthrough */ case 4 : a+=k[0]+(((uint32_t)k[1])<<16); break; - case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 3 : a+=((uint32_t)k8[2])<<16; /* @fallthrough */ case 2 : a+=k[0]; break; case 1 : a+=k8[0]; break; - case 0 : return c; /* zero length requires no mixing */ + case 0 : return c; /* zero length strings require no mixing */ } } else { /* need to read the key one byte at a time */ @@ -285,6 +303,137 @@ hashlittle( const void *key, size_t length, uint32_t initval) case 2 : a+=((uint32_t)k[1])<<8; case 1 : a+=k[0]; break; + case 0 : return c; /* zero length strings require no mixing */ + } + } + + final(a,b,c); + return c; /* zero length strings require no mixing */ +} + +#elif HASH_BIG_ENDIAN == 1 +/* + * hashbig(): + * This is the same as hashword() on big-endian machines. It is different + * from hashlittle() on all machines. hashbig() takes advantage of + * big-endian byte ordering. + */ +uint32_t hash_lookup3( const void *key, size_t length, const uint32_t initval) +{ + uint32_t a,b,c; + union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */ + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; + + u.ptr = key; + if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) { + const uint32_t *k = key; /* read 32-bit chunks */ +#ifdef VALGRIND + const uint8_t *k8; +#endif /* ifdef VALGRIND */ + + /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]<<8" actually reads beyond the end of the string, but + * then shifts out the part it's not allowed to read. Because the + * string is aligned, the illegal read is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff00; a+=k[0]; break; + case 6 : b+=k[1]&0xffff0000; a+=k[0]; break; + case 5 : b+=k[1]&0xff000000; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff00; break; + case 2 : a+=k[0]&0xffff0000; break; + case 1 : a+=k[0]&0xff000000; break; + case 0 : return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) /* all the case statements fall through */ + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<8; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<16; /* fall through */ + case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */ + case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */ + case 1 : a+=((uint32_t)k8[0])<<24; break; + case 0 : return c; + } + +#endif /* !VALGRIND */ + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += ((uint32_t)k[0])<<24; + a += ((uint32_t)k[1])<<16; + a += ((uint32_t)k[2])<<8; + a += ((uint32_t)k[3]); + b += ((uint32_t)k[4])<<24; + b += ((uint32_t)k[5])<<16; + b += ((uint32_t)k[6])<<8; + b += ((uint32_t)k[7]); + c += ((uint32_t)k[8])<<24; + c += ((uint32_t)k[9])<<16; + c += ((uint32_t)k[10])<<8; + c += ((uint32_t)k[11]); + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=k[11]; + case 11: c+=((uint32_t)k[10])<<8; + case 10: c+=((uint32_t)k[9])<<16; + case 9 : c+=((uint32_t)k[8])<<24; + case 8 : b+=k[7]; + case 7 : b+=((uint32_t)k[6])<<8; + case 6 : b+=((uint32_t)k[5])<<16; + case 5 : b+=((uint32_t)k[4])<<24; + case 4 : a+=k[3]; + case 3 : a+=((uint32_t)k[2])<<8; + case 2 : a+=((uint32_t)k[1])<<16; + case 1 : a+=((uint32_t)k[0])<<24; + break; case 0 : return c; } } @@ -292,3 +441,6 @@ hashlittle( const void *key, size_t length, uint32_t initval) final(a,b,c); return c; } +#else /* HASH_XXX_ENDIAN == 1 */ +#error Must define HASH_BIG_ENDIAN or HASH_LITTLE_ENDIAN +#endif /* HASH_XXX_ENDIAN == 1 */ diff --git a/deps/ccommon/src/hash/cc_murmur3.c b/deps/ccommon/src/hash/cc_murmur3.c new file mode 100644 index 000000000..7152523d5 --- /dev/null +++ b/deps/ccommon/src/hash/cc_murmur3.c @@ -0,0 +1,328 @@ +/* + * ccommon - a cache common library. + * Copyright (C) 2013 Twitter, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +// Note - The x86 and x64 versions do _not_ produce the same results, as the +// algorithms are optimized for their respective platforms. You can still +// compile and run any of them on any platform, but your performance with the +// non-native version will be less than optimal. + +#include "hash/cc_murmur3.h" + +#define FORCE_INLINE inline __attribute__((always_inline)) + +static inline uint32_t rotl32 ( uint32_t x, int8_t r ) +{ + return (x << r) | (x >> (32 - r)); +} + +static inline uint64_t rotl64 ( uint64_t x, int8_t r ) +{ + return (x << r) | (x >> (64 - r)); +} + +#define ROTL32(x,y) rotl32(x,y) +#define ROTL64(x,y) rotl64(x,y) + +#define BIG_CONSTANT(x) (x##LLU) + +//----------------------------------------------------------------------------- +// Block read - if your platform needs to do endian-swapping or can only +// handle aligned reads, do the conversion here + +static FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i ) +{ + return p[i]; +} + +static FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i ) +{ + return p[i]; +} + +//----------------------------------------------------------------------------- +// Finalization mix - force all bits of a hash block to avalanche + +static FORCE_INLINE uint32_t fmix32 ( uint32_t h ) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +//---------- + +static FORCE_INLINE uint64_t fmix64 ( uint64_t k ) +{ + k ^= k >> 33; + k *= BIG_CONSTANT(0xff51afd7ed558ccd); + k ^= k >> 33; + k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); + k ^= k >> 33; + + return k; +} + +//----------------------------------------------------------------------------- + +void hash_murmur3_32 ( const void * key, int len, + uint32_t seed, void * out ) +{ + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + //---------- + // body + + const uint32_t * blocks = (const uint32_t *)(data + nblocks*4); + + for(int i = -nblocks; i; i++) + { + uint32_t k1 = getblock32(blocks,i); + + k1 *= c1; + k1 = ROTL32(k1,15); + k1 *= c2; + + h1 ^= k1; + h1 = ROTL32(h1,13); + h1 = h1*5+0xe6546b64; + } + + //---------- + // tail + + const uint8_t * tail = (const uint8_t*)(data + nblocks*4); + + uint32_t k1 = 0; + + switch(len & 3) + { + case 3: k1 ^= tail[2] << 16; + case 2: k1 ^= tail[1] << 8; + case 1: k1 ^= tail[0]; + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; + + h1 = fmix32(h1); + + *(uint32_t*)out = h1; +} + +//----------------------------------------------------------------------------- + +void hash_murmur3_128_x86 ( const void * key, const int len, + uint32_t seed, void * out ) +{ + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 16; + + uint32_t h1 = seed; + uint32_t h2 = seed; + uint32_t h3 = seed; + uint32_t h4 = seed; + + const uint32_t c1 = 0x239b961b; + const uint32_t c2 = 0xab0e9789; + const uint32_t c3 = 0x38b34ae5; + const uint32_t c4 = 0xa1e38b93; + + //---------- + // body + + const uint32_t * blocks = (const uint32_t *)(data + nblocks*16); + + for(int i = -nblocks; i; i++) + { + uint32_t k1 = getblock32(blocks,i*4+0); + uint32_t k2 = getblock32(blocks,i*4+1); + uint32_t k3 = getblock32(blocks,i*4+2); + uint32_t k4 = getblock32(blocks,i*4+3); + + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; + + h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b; + + k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; + + h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747; + + k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; + + h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35; + + k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; + + h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17; + } + + //---------- + // tail + + const uint8_t * tail = (const uint8_t*)(data + nblocks*16); + + uint32_t k1 = 0; + uint32_t k2 = 0; + uint32_t k3 = 0; + uint32_t k4 = 0; + + switch(len & 15) + { + case 15: k4 ^= tail[14] << 16; + case 14: k4 ^= tail[13] << 8; + case 13: k4 ^= tail[12] << 0; + k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; + + case 12: k3 ^= tail[11] << 24; + case 11: k3 ^= tail[10] << 16; + case 10: k3 ^= tail[ 9] << 8; + case 9: k3 ^= tail[ 8] << 0; + k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; + + case 8: k2 ^= tail[ 7] << 24; + case 7: k2 ^= tail[ 6] << 16; + case 6: k2 ^= tail[ 5] << 8; + case 5: k2 ^= tail[ 4] << 0; + k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; + + case 4: k1 ^= tail[ 3] << 24; + case 3: k1 ^= tail[ 2] << 16; + case 2: k1 ^= tail[ 1] << 8; + case 1: k1 ^= tail[ 0] << 0; + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + h1 = fmix32(h1); + h2 = fmix32(h2); + h3 = fmix32(h3); + h4 = fmix32(h4); + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + ((uint32_t*)out)[0] = h1; + ((uint32_t*)out)[1] = h2; + ((uint32_t*)out)[2] = h3; + ((uint32_t*)out)[3] = h4; +} + +//----------------------------------------------------------------------------- + +void hash_murmur3_128_x64 ( const void * key, const int len, + const uint32_t seed, void * out ) +{ + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 16; + + uint64_t h1 = seed; + uint64_t h2 = seed; + + const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); + const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); + + //---------- + // body + + const uint64_t * blocks = (const uint64_t *)(data); + + for(int i = 0; i < nblocks; i++) + { + uint64_t k1 = getblock64(blocks,i*2+0); + uint64_t k2 = getblock64(blocks,i*2+1); + + k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; + + h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; + + k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; + + h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; + } + + //---------- + // tail + + const uint8_t * tail = (const uint8_t*)(data + nblocks*16); + + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch(len & 15) + { + case 15: k2 ^= ((uint64_t)tail[14]) << 48; + case 14: k2 ^= ((uint64_t)tail[13]) << 40; + case 13: k2 ^= ((uint64_t)tail[12]) << 32; + case 12: k2 ^= ((uint64_t)tail[11]) << 24; + case 11: k2 ^= ((uint64_t)tail[10]) << 16; + case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; + case 9: k2 ^= ((uint64_t)tail[ 8]) << 0; + k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; + + case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; + case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; + case 6: k1 ^= ((uint64_t)tail[ 5]) << 40; + case 5: k1 ^= ((uint64_t)tail[ 4]) << 32; + case 4: k1 ^= ((uint64_t)tail[ 3]) << 24; + case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; + case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; + case 1: k1 ^= ((uint64_t)tail[ 0]) << 0; + k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = fmix64(h1); + h2 = fmix64(h2); + + h1 += h2; + h2 += h1; + + ((uint64_t*)out)[0] = h1; + ((uint64_t*)out)[1] = h2; +} + +//----------------------------------------------------------------------------- + + diff --git a/deps/ccommon/test/time/wheel/check_wheel.c b/deps/ccommon/test/time/wheel/check_wheel.c index c2631d1eb..4bd4df0ee 100644 --- a/deps/ccommon/test/time/wheel/check_wheel.c +++ b/deps/ccommon/test/time/wheel/check_wheel.c @@ -116,7 +116,7 @@ END_TEST START_TEST(test_timing_wheel_recur) { -#define TICK_NS 10000000 +#define TICK_NS 50000000 #define NSLOT 3 #define NTICK 2 @@ -136,11 +136,13 @@ START_TEST(test_timing_wheel_recur) timing_wheel_insert(tw, &delay, true, _incr_cb, &i); - nanosleep(&ts, NULL); + /* tick unchanged */ timing_wheel_execute(tw); ck_assert_int_eq(tw->nprocess, 0); ck_assert_int_eq(tw->nevent, 1); + /* next 2 tick */ + nanosleep(&ts, NULL); nanosleep(&ts, NULL); timing_wheel_execute(tw); ck_assert_int_eq(tw->nevent, 1); @@ -152,6 +154,7 @@ START_TEST(test_timing_wheel_recur) ck_assert_int_eq(tw->nprocess, 2); ck_assert_int_eq(i, 2); + /* flush events */ timing_wheel_stop(tw); timing_wheel_flush(tw); ck_assert_int_eq(tw->nevent, 0); diff --git a/src/hotkey/kc_map.c b/src/hotkey/kc_map.c index b4b4d6356..ea7c16c9f 100644 --- a/src/hotkey/kc_map.c +++ b/src/hotkey/kc_map.c @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include @@ -180,7 +180,7 @@ kc_map_teardown(void) static inline struct kcme_slh * _get_bucket(const struct bstring *key) { - return &(table[hash(key->data, key->len, 0) % table_size]); + return &(table[hash_lookup3(key->data, key->len, 0) % table_size]); } uint32_t diff --git a/src/protocol/data/CMakeLists.txt b/src/protocol/data/CMakeLists.txt index 3506ac53d..dd4eb3c78 100644 --- a/src/protocol/data/CMakeLists.txt +++ b/src/protocol/data/CMakeLists.txt @@ -1,2 +1,3 @@ add_subdirectory(memcache) add_subdirectory(ping) +add_subdirectory(redis) diff --git a/src/protocol/data/memcache/compose.c b/src/protocol/data/memcache/compose.c index 8a56cdd50..c7a7bc1fc 100644 --- a/src/protocol/data/memcache/compose.c +++ b/src/protocol/data/memcache/compose.c @@ -70,7 +70,7 @@ _write_uint64(struct buf **buf, uint64_t val) struct buf *b; - /* NOTE(yao): here we are being conservative on how many bytes wee need + /* NOTE(yao): here we are being conservative on how many bytes we need * to print a (64-bit) integer. The actual number might be smaller. * But since it is 21 bytes at most (including \0' while buffers usually * are KBs in size, it is unlikely to cause many extra expansions. diff --git a/src/protocol/data/redis/CMakeLists.txt b/src/protocol/data/redis/CMakeLists.txt new file mode 100644 index 000000000..52280aee7 --- /dev/null +++ b/src/protocol/data/redis/CMakeLists.txt @@ -0,0 +1,8 @@ +set(SOURCE + compose.c + parse.c + request.c + response.c + token.c) + +add_library(protocol_redis ${SOURCE}) diff --git a/src/protocol/data/redis/cmd_hash.h b/src/protocol/data/redis/cmd_hash.h new file mode 100644 index 000000000..a5c762c44 --- /dev/null +++ b/src/protocol/data/redis/cmd_hash.h @@ -0,0 +1,28 @@ +#pragma once + +/* + * Note: negative # of arguments means variable number of arguments: + * e.g. `-2' means at least two arguments. This notation is inherited from + * the original Redis server implementation. + */ + +/* type string # of args */ +#define REQ_HASH(ACTION) \ + ACTION( REQ_HDEL, "hdel", -3 )\ + ACTION( REQ_HDELALL, "hdelall", 2 )\ + ACTION( REQ_HEXISTS, "hexists", 3 )\ + ACTION( REQ_HGET, "hget", 3 )\ + ACTION( REQ_HGETALL, "hgetall", 2 )\ + ACTION( REQ_HINCRBY, "hincrby", 4 )\ + ACTION( REQ_HINCRBYFLOAT, "hincrbyfloat", 4 )\ + ACTION( REQ_HKEYS, "hkeys", 2 )\ + ACTION( REQ_HLEN, "hlen", 2 )\ + ACTION( REQ_HMGET, "hmget", -3 )\ + ACTION( REQ_HMSET, "hmset", -4 )\ + ACTION( REQ_HSET, "hset", 4 )\ + ACTION( REQ_HSETNX, "hsetnx", 4 )\ + ACTION( REQ_HSTRLEN, "hstrlen", 3 )\ + ACTION( REQ_HVALS, "hvals", 2 )\ + ACTION( REQ_HSCAN, "hscan", -3 ) + +/* "hlen KEY" == "*2\r\n$4\r\nhlen\r\n$3\r\nKEY\r\n" */ diff --git a/src/protocol/data/redis/cmd_misc.h b/src/protocol/data/redis/cmd_misc.h new file mode 100644 index 000000000..44b94265d --- /dev/null +++ b/src/protocol/data/redis/cmd_misc.h @@ -0,0 +1,6 @@ +#pragma once + +/* type string # of args */ +#define REQ_MISC(ACTION) \ + ACTION( REQ_PING, "ping", -1 )\ + ACTION( REQ_QUIT, "quit", 1 ) diff --git a/src/protocol/data/redis/cmd_zset.h b/src/protocol/data/redis/cmd_zset.h new file mode 100644 index 000000000..772069bc6 --- /dev/null +++ b/src/protocol/data/redis/cmd_zset.h @@ -0,0 +1,25 @@ +#pragma once + +/* type string # of args */ +#define REQ_ZSET(ACTION) \ + ACTION( REQ_ZADD, "zadd", -4 )\ + ACTION( REQ_ZINCRBY, "zincrby", 4 )\ + ACTION( REQ_ZREM, "zrem", -3 )\ + ACTION( REQ_ZREMRANGEBYSCORE, "zremrangebyscore", 4 )\ + ACTION( REQ_ZREMRANGEBYRANK, "zremrangebyrank", 4 )\ + ACTION( REQ_ZREMRANGEBYLEX, "zremrangebylex", 4 )\ + ACTION( REQ_ZUNIONSTORE, "zunionstore", -4 )\ + ACTION( REQ_ZINTERSTORE, "zinterstore", -4 )\ + ACTION( REQ_ZRANGE, "zrange", -4 )\ + ACTION( REQ_ZRANGEBYSCORE, "zrangebyscore", -4 )\ + ACTION( REQ_ZREVRANGEBYSCORE, "zrevrangebyscore", -4 )\ + ACTION( REQ_ZRANGEBYLEX, "zrangebylex", -4 )\ + ACTION( REQ_ZREVRANGEBYLEX, "zrevrangebylex", -4 )\ + ACTION( REQ_ZCOUNT, "zcount", 4 )\ + ACTION( REQ_ZLEXCOUNT, "zlexcount", 4 )\ + ACTION( REQ_ZREVRANGE, "zrevrange", -4 )\ + ACTION( REQ_ZCARD, "zcard", 2 )\ + ACTION( REQ_ZSCORE, "zscore", 3 )\ + ACTION( REQ_ZRANK, "zrank", 3 )\ + ACTION( REQ_ZREVRANK, "zrevrank", 3 )\ + ACTION( REQ_ZSCAN, "zscan", -3 ) diff --git a/src/protocol/data/redis/compose.c b/src/protocol/data/redis/compose.c new file mode 100644 index 000000000..9b829cde9 --- /dev/null +++ b/src/protocol/data/redis/compose.c @@ -0,0 +1,92 @@ +#include "compose.h" + +#include "request.h" +#include "response.h" +#include "token.h" + +#include +#include + +#define COMPOSE_MODULE_NAME "protocol::redis::compose" + +static bool compose_init = false; +static compose_req_metrics_st *compose_req_metrics = NULL; +static compose_rsp_metrics_st *compose_rsp_metrics = NULL; + +void +compose_setup(compose_req_metrics_st *req, compose_rsp_metrics_st *rsp) +{ + log_info("set up the %s module", COMPOSE_MODULE_NAME); + + if (compose_init) { + log_warn("%s has already been setup, overwrite", COMPOSE_MODULE_NAME); + } + + compose_req_metrics = req; + compose_rsp_metrics = rsp; + + compose_init = true; +} + +void +compose_teardown(void) +{ + log_info("tear down the %s module", COMPOSE_MODULE_NAME); + + if (!compose_init) { + log_warn("%s has never been setup", COMPOSE_MODULE_NAME); + } + compose_req_metrics = NULL; + compose_rsp_metrics = NULL; + compose_init = false; +} + +int +compose_req(struct buf **buf, struct request *req) +{ + int n; + + n = compose_array_header(buf, req->token->nelem); + if (n < 0) { + return n; + } + + for (int i = 0; i < req->token->nelem; i++) { + int ret; + + ret = compose_element(buf, array_get(req->token, i)); + if (ret < 0) { + return ret; + } else { + n += ret; + } + } + + return n; +} + +int +compose_rsp(struct buf **buf, struct response *rsp) +{ + int n = 0; + + if (rsp->type == ELEM_ARRAY) { + n = compose_array_header(buf, rsp->token->nelem); + if (n < 0) { + return n; + } + } + + for (int i = 0; i < rsp->token->nelem; i++) { + int ret; + + ret = compose_element(buf, array_get(rsp->token, i)); + if (ret < 0) { + return ret; + } else { + n += ret; + } + } + + return n; +} diff --git a/src/protocol/data/redis/compose.h b/src/protocol/data/redis/compose.h new file mode 100644 index 000000000..0b15f8c42 --- /dev/null +++ b/src/protocol/data/redis/compose.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include +#include + +#include + +/* name Type description */ +#define COMPOSE_REQ_METRIC(ACTION) \ + ACTION( request_compose, METRIC_COUNTER, "# requests composed" )\ + ACTION( request_compose_ex, METRIC_COUNTER, "# composing error" ) + +/* name Type description */ +#define COMPOSE_RSP_METRIC(ACTION) \ + ACTION( response_compose, METRIC_COUNTER, "# responses composed" )\ + ACTION( response_compose_ex, METRIC_COUNTER, "# rsp composing error") + +typedef struct { + COMPOSE_REQ_METRIC(METRIC_DECLARE) +} compose_req_metrics_st; + +typedef struct { + COMPOSE_RSP_METRIC(METRIC_DECLARE) +} compose_rsp_metrics_st; + +typedef enum compose_rstatus { + COMPOSE_OK = 0, + COMPOSE_EUNFIN = -1, + COMPOSE_ENOMEM = -2, + COMPOSE_EINVALID = -3, + COMPOSE_EOTHER = -4, +} compose_rstatus_t; + +struct request; +struct response; + +void compose_setup(compose_req_metrics_st *req, compose_rsp_metrics_st *rsp); +void compose_teardown(void); + +/* if the return value is negative, it can be interpreted as compose_rstatus */ +int compose_req(struct buf **buf, struct request *req); + +int compose_rsp(struct buf **buf, struct response *rsp); diff --git a/src/protocol/data/redis/parse.c b/src/protocol/data/redis/parse.c new file mode 100644 index 000000000..855971955 --- /dev/null +++ b/src/protocol/data/redis/parse.c @@ -0,0 +1,180 @@ +#include "parse.h" + +#include "request.h" +#include "response.h" +#include "token.h" + +#include +#include +#include +#include + +#include + +#define PARSE_MODULE_NAME "protocol::redis::parse" + +static bool parse_init = false; +static parse_req_metrics_st *parse_req_metrics = NULL; +static parse_rsp_metrics_st *parse_rsp_metrics = NULL; + +void +parse_setup(parse_req_metrics_st *req, parse_rsp_metrics_st *rsp) +{ + log_info("set up the %s module", PARSE_MODULE_NAME); + + if (parse_init) { + log_warn("%s has already been setup, overwrite", PARSE_MODULE_NAME); + } + + parse_req_metrics = req; + parse_rsp_metrics = rsp; + parse_init = true; +} + +void +parse_teardown(void) +{ + log_info("tear down the %s module", PARSE_MODULE_NAME); + + if (!parse_init) { + log_warn("%s has never been setup", PARSE_MODULE_NAME); + } + + parse_req_metrics = NULL; + parse_rsp_metrics = NULL; + parse_init = false; +} + +static parse_rstatus_t +_parse_cmd(struct request *req) +{ + cmd_type_e type; + struct command cmd; + struct element *el; + int narg; + + ASSERT(req != NULL); + + /* check verb */ + type = REQ_UNKNOWN; + el = array_first(req->token); + + ASSERT (el->type == ELEM_BULK); + while (++type < REQ_SENTINEL && + bstring_compare(&command_table[type].bstr, &el->bstr) != 0) {} + if (type == REQ_SENTINEL) { + log_warn("unrecognized command detected: %.*s", el->bstr.len, + el->bstr.data); + return PARSE_EINVALID; + } + req->type = type; + + /* check narg */ + cmd = command_table[type]; + narg = req->token->nelem; + if ((cmd.narg >= 0 && cmd.narg != narg) || narg + cmd.narg < 0) { + log_warn("wrong number of arguments for '%.*s': %d expected, %d given", + cmd.bstr.len, cmd.bstr.data, cmd.narg, narg); + return PARSE_EINVALID; + } + + return PARSE_OK; +} + + +parse_rstatus_t +parse_req(struct request *req, struct buf *buf) +{ + parse_rstatus_t status = PARSE_OK; + char *old_rpos = buf->rpos; + int64_t nelem; + struct element *el; + + log_verb("parsing buf %p into req %p", buf, req); + + /* get number of elements in the array */ + if (!token_is_array(buf)) { + log_debug("parse req failed: not an array"); + return PARSE_EINVALID; + } + status = token_array_nelem(&nelem, buf); + if (status != PARSE_OK) { + buf->rpos = old_rpos; + return status; + } + if (nelem < 1 || nelem > req->token->nalloc) { + log_debug("parse req: invalid array size, %d not in [1, %"PRIu32"]", + nelem, req->token->nalloc); + return PARSE_EINVALID; + } + + /* parse elements */ + while (nelem > 0) { + el = array_push(req->token); + status = parse_element(el, buf); + if (status != PARSE_OK) { + log_verb("parse element returned status %d", status); + request_reset(req); + buf->rpos = old_rpos; + return status; + } + nelem--; + } + + status = _parse_cmd(req); + if (status != PARSE_OK) { + buf->rpos = old_rpos; + return status; + } + + return PARSE_OK; +} + +parse_rstatus_t +parse_rsp(struct response *rsp, struct buf *buf) +{ + parse_rstatus_t status = PARSE_OK; + char *old_rpos = buf->rpos; + int64_t nelem = 1; + struct element *el; + + ASSERT(rsp->type == ELEM_UNKNOWN); + + log_verb("parsing buf %p into rsp %p", buf, rsp); + + if (token_is_array(buf)) { + status = token_array_nelem(&nelem, buf); + if (status != PARSE_OK) { + buf->rpos = old_rpos; + return status; + } + rsp->type = ELEM_ARRAY; + if (nelem > rsp->token->nalloc) { + log_debug("parse rsp: invalid # of elements, %d > %"PRIu32, nelem, + rsp->token->nalloc); + return PARSE_EOVERSIZE; + } + if (nelem < 0) { + rsp->nil = true; + return PARSE_OK; + } + } + + /* parse elements */ + while (nelem > 0) { + el = array_push(rsp->token); + status = parse_element(el, buf); + if (status != PARSE_OK) { + log_verb("parse element returned status %d", status); + response_reset(rsp); + buf->rpos = old_rpos; + return status; + } + if (rsp->type == ELEM_UNKNOWN) { + rsp->type = el->type; + } + nelem--; + } + + return PARSE_OK; +} diff --git a/src/protocol/data/redis/parse.h b/src/protocol/data/redis/parse.h new file mode 100644 index 000000000..ce33167d3 --- /dev/null +++ b/src/protocol/data/redis/parse.h @@ -0,0 +1,56 @@ +#pragma once + +#include "request.h" +#include "response.h" + +#include +#include +#include + +#include + +/* Note(yao): the prefix cmd_ is mostly to be compatible with Twemcache metric + * names. + * On the other hand, the choice of putting request in front of parse instead of + * the other way around in `request_parse' is to allow users to easily query all + * metrics related to requests , similar for responses. + */ +/* name type description */ +#define PARSE_REQ_METRIC(ACTION) \ + ACTION( request_parse, METRIC_COUNTER, "# requests parsed" )\ + ACTION( request_parse_ex, METRIC_COUNTER, "# parsing error" ) + +/* name type description */ +#define PARSE_RSP_METRIC(ACTION) \ + ACTION( response_parse, METRIC_COUNTER, "# responses parsed" )\ + ACTION( response_parse_ex, METRIC_COUNTER, "# rsp parsing error" )\ + +typedef struct { + PARSE_REQ_METRIC(METRIC_DECLARE) +} parse_req_metrics_st; + +typedef struct { + PARSE_RSP_METRIC(METRIC_DECLARE) +} parse_rsp_metrics_st; + +typedef enum parse_rstatus { + PARSE_OK = 0, + PARSE_EUNFIN = -1, + PARSE_EEMPTY = -2, + PARSE_EOVERSIZE = -3, + PARSE_EINVALID = -4, + PARSE_EOTHER = -5, +} parse_rstatus_t; + +void parse_setup(parse_req_metrics_st *req, parse_rsp_metrics_st *rsp); +void parse_teardown(void); + +static inline bool +key_valid(struct bstring *key) +{ + return (key->len > 0 && key->len <= KEY_MAXLEN); +} + +parse_rstatus_t parse_req(struct request *req, struct buf *buf); + +parse_rstatus_t parse_rsp(struct response *rsp, struct buf *buf); diff --git a/src/protocol/data/redis/process.h b/src/protocol/data/redis/process.h new file mode 100644 index 000000000..81370a75c --- /dev/null +++ b/src/protocol/data/redis/process.h @@ -0,0 +1,17 @@ +#pragma once + +struct request; +struct response; + +/** + * Responses can be chained, using the same field that supports pooling. It is + * the responsibility of the caller to provide enough response structs if more + * than one response is necessary- e.g. get/gets commands with batching, or + * the stats command. + * + * Since response pool is not thread-safe, it is very important not trying to + * use the same response pool from more than one thread, including the helper + * thread(s). When the need arises for that, we will need to support resource + * pool(s) that are either thread-local or identifiable instead of static ones. + */ +void process_request(struct response *rsp, struct request *req); diff --git a/src/protocol/data/redis/request.c b/src/protocol/data/redis/request.c new file mode 100644 index 000000000..2a38e5c46 --- /dev/null +++ b/src/protocol/data/redis/request.c @@ -0,0 +1,214 @@ +#include "request.h" + +#include "token.h" + +#include +#include +#include + +#define REQUEST_MODULE_NAME "protocol::redis::request" + +static bool request_init = false; +static request_metrics_st *request_metrics = NULL; + +struct command command_table[REQ_SENTINEL]; +#define CMD_INIT(_type, _str, _narg) \ + { .type = _type, .bstr = { sizeof(_str) - 1, (_str) }, .narg = _narg }, +struct command command_table[REQ_SENTINEL] = { + { .type = REQ_UNKNOWN, .bstr = { 0, NULL }, .narg = 0 }, + REQ_HASH(CMD_INIT) + REQ_ZSET(CMD_INIT) + REQ_MISC(CMD_INIT) +}; +#undef CMD_INIT + +static size_t ntoken = REQ_NTOKEN; +FREEPOOL(req_pool, reqq, request); +static struct req_pool reqp; +static bool reqp_init = false; + +void +request_reset(struct request *req) +{ + ASSERT(req != NULL); + + STAILQ_NEXT(req, next) = NULL; + req->free = false; + + req->noreply = 0; + req->serror = 0; + req->cerror = 0; + + req->type = REQ_UNKNOWN; + req->token->nelem = 0; +} + +struct request * +request_create(void) +{ + rstatus_i status; + struct request *req = cc_alloc(sizeof(struct request)); + + if (req == NULL) { + return NULL; + } + + status = array_create(&req->token, ntoken, sizeof(struct element)); + if (status != CC_OK) { + cc_free(req); + return NULL; + } + request_reset(req); + + INCR(request_metrics, request_create); + INCR(request_metrics, request_curr); + + return req; +} + +static struct request * +_request_create(void) +{ + struct request *req = request_create(); + + if (req != NULL) { + INCR(request_metrics, request_free); + } + + return req; +} + +void +request_destroy(struct request **request) +{ + struct request *req = *request; + ASSERT(req != NULL); + + INCR(request_metrics, request_destroy); + DECR(request_metrics, request_curr); + array_destroy(&req->token); + cc_free(req); + *request = NULL; +} + +static void +_request_destroy(struct request **request) +{ + request_destroy(request); + DECR(request_metrics, request_free); +} + +static void +request_pool_destroy(void) +{ + struct request *req, *treq; + + if (!reqp_init) { + log_warn("request pool was never created, ignore"); + } + + log_info("destroying request pool: free %"PRIu32, reqp.nfree); + + FREEPOOL_DESTROY(req, treq, &reqp, next, _request_destroy); + reqp_init = false; +} + +static void +request_pool_create(uint32_t max) +{ + struct request *req; + + if (reqp_init) { + log_warn("request pool has already been created, re-creating"); + + request_pool_destroy(); + } + + log_info("creating request pool: max %"PRIu32, max); + + FREEPOOL_CREATE(&reqp, max); + reqp_init = true; + + FREEPOOL_PREALLOC(req, &reqp, max, next, _request_create); + if (reqp.nfree < max) { + log_crit("cannot preallocate request pool, OOM. abort"); + exit(EXIT_FAILURE); + } +} + +struct request * +request_borrow(void) +{ + struct request *req; + + FREEPOOL_BORROW(req, &reqp, next, _request_create); + if (req == NULL) { + log_debug("borrow req failed: OOM %d"); + + return NULL; + } + request_reset(req); + + DECR(request_metrics, request_free); + INCR(request_metrics, request_borrow); + log_vverb("borrowing req %p", req); + + return req; +} + +void +request_return(struct request **request) +{ + struct request *req = *request; + + if (req == NULL) { + return; + } + + INCR(request_metrics, request_free); + INCR(request_metrics, request_return); + log_vverb("return req %p", req); + + req->free = true; + FREEPOOL_RETURN(req, &reqp, next); + + *request = NULL; +} + +void +request_setup(request_options_st *options, request_metrics_st *metrics) +{ + uint32_t max = REQ_POOLSIZE; + + log_info("set up the %s module", REQUEST_MODULE_NAME); + + if (request_init) { + log_warn("%s has already been setup, overwrite", REQUEST_MODULE_NAME); + } + + request_metrics = metrics; + + if (options != NULL) { + ntoken = option_uint(&options->request_ntoken); + max = option_uint(&options->request_poolsize); + } + request_pool_create(max); + + request_init = true; +} + +void +request_teardown(void) +{ + log_info("tear down the %s module", REQUEST_MODULE_NAME); + + if (!request_init) { + log_warn("%s has never been setup", REQUEST_MODULE_NAME); + } + + ntoken = REQ_NTOKEN; + request_pool_destroy(); + request_metrics = NULL; + + request_init = false; +} diff --git a/src/protocol/data/redis/request.h b/src/protocol/data/redis/request.h new file mode 100644 index 000000000..d5fb5f938 --- /dev/null +++ b/src/protocol/data/redis/request.h @@ -0,0 +1,79 @@ +#pragma once + +#include "cmd_hash.h" +#include "cmd_misc.h" +#include "cmd_zset.h" + +#include +#include +#include +#include +#include + +#include + +#define REQ_NTOKEN 127 /* # tokens in a command */ +#define KEY_MAXLEN 255 +#define REQ_POOLSIZE 0 + +/* name type default description */ +#define REQUEST_OPTION(ACTION) \ + ACTION( request_ntoken, OPTION_TYPE_UINT, REQ_NTOKEN, "# tokens in request")\ + ACTION( request_poolsize, OPTION_TYPE_UINT, REQ_POOLSIZE, "request pool size") + +typedef struct { + REQUEST_OPTION(OPTION_DECLARE) +} request_options_st; + +/* name type description */ +#define REQUEST_METRIC(ACTION) \ + ACTION( request_curr, METRIC_GAUGE, "# req created" )\ + ACTION( request_free, METRIC_GAUGE, "# free req in pool" )\ + ACTION( request_borrow, METRIC_COUNTER, "# reqs borrowed" )\ + ACTION( request_return, METRIC_COUNTER, "# reqs returned" )\ + ACTION( request_create, METRIC_COUNTER, "# reqs created" )\ + ACTION( request_destroy, METRIC_COUNTER, "# reqs destroyed" ) + +typedef struct { + REQUEST_METRIC(METRIC_DECLARE) +} request_metrics_st; + +#define GET_TYPE(_type, _str, narg) _type, +typedef enum cmd_type { + REQ_UNKNOWN, + REQ_HASH(GET_TYPE) + REQ_ZSET(GET_TYPE) + REQ_MISC(GET_TYPE) + REQ_SENTINEL +} cmd_type_e; +#undef GET_TYPE + +struct command { + cmd_type_e type; + struct bstring bstr; + int32_t narg; +}; + +extern struct command command_table[REQ_SENTINEL]; + +struct request { + STAILQ_ENTRY(request) next; /* allow request pooling/chaining */ + bool free; + + bool noreply; /* skip response */ + bool serror; /* server error */ + bool cerror; /* client error */ + + cmd_type_e type; + struct array *token; /* array elements are tokens */ +}; + +void request_setup(request_options_st *options, request_metrics_st *metrics); +void request_teardown(void); + +struct request *request_create(void); +void request_destroy(struct request **req); +void request_reset(struct request *req); + +struct request *request_borrow(void); +void request_return(struct request **req); diff --git a/src/protocol/data/redis/response.c b/src/protocol/data/redis/response.c new file mode 100644 index 000000000..0d1ee49ce --- /dev/null +++ b/src/protocol/data/redis/response.c @@ -0,0 +1,206 @@ +#include "response.h" + +#include "token.h" + +#include +#include +#include + +#define RESPONSE_MODULE_NAME "protocol::redis::response" + +static bool response_init = false; +static response_metrics_st *response_metrics = NULL; + +static size_t ntoken = RSP_NTOKEN; +FREEPOOL(rsp_pool, rspq, response); +static struct rsp_pool rspp; +static bool rspp_init = false; + +void +response_reset(struct response *rsp) +{ + ASSERT(rsp != NULL); + + STAILQ_NEXT(rsp, next) = NULL; + rsp->free = false; + + rsp->type = ELEM_UNKNOWN; + rsp->nil = false; + rsp->token->nelem = 0; +} + +struct response * +response_create(void) +{ + rstatus_i status; + struct response *rsp = cc_alloc(sizeof(struct response)); + + if (rsp == NULL) { + return NULL; + } + + status = array_create(&rsp->token, ntoken, sizeof(struct element)); + if (status != CC_OK) { + cc_free(rsp); + return NULL; + } + response_reset(rsp); + + INCR(response_metrics, response_create); + INCR(response_metrics, response_curr); + + return rsp; +} + +static struct response * +_response_create(void) +{ + struct response *rsp = response_create(); + + if (rsp != NULL) { + INCR(response_metrics, response_free); + } + + return rsp; +} + +void +response_destroy(struct response **response) +{ + struct response *rsp = *response; + ASSERT(rsp != NULL); + + INCR(response_metrics, response_destroy); + DECR(response_metrics, response_curr); + array_destroy(&rsp->token); + cc_free(rsp); + *response = NULL; +} + +static void +_response_destroy(struct response **response) +{ + response_destroy(response); + DECR(response_metrics, response_free); +} + +static void +response_pool_destroy(void) +{ + struct response *rsp, *trsp; + + if (rspp_init) { + log_info("destroying response pool: free %"PRIu32, rspp.nfree); + + FREEPOOL_DESTROY(rsp, trsp, &rspp, next, _response_destroy); + rspp_init = false; + } else { + log_warn("response pool was never created, ignore"); + } +} + +static void +response_pool_create(uint32_t max) +{ + struct response *rsp; + + if (rspp_init) { + log_warn("response pool has already been created, re-creating"); + + response_pool_destroy(); + } + + log_info("creating response pool: max %"PRIu32, max); + + FREEPOOL_CREATE(&rspp, max); + rspp_init = true; + + FREEPOOL_PREALLOC(rsp, &rspp, max, next, _response_create); + if (rspp.nfree < max) { + log_crit("cannot preallocate response pool, OOM. abort"); + exit(EXIT_FAILURE); + } +} + +struct response * +response_borrow(void) +{ + struct response *rsp; + + FREEPOOL_BORROW(rsp, &rspp, next, _response_create); + if (rsp == NULL) { + log_debug("borrow rsp failed: OOM %d"); + + return NULL; + } + response_reset(rsp); + + DECR(response_metrics, response_free); + INCR(response_metrics, response_borrow); + log_vverb("borrowing rsp %p", rsp); + + return rsp; +} + +/* + * Return a single response object + */ +void +response_return(struct response **response) +{ + ASSERT(response != NULL); + + struct response *rsp = *response; + + if (rsp == NULL) { + return; + } + + INCR(response_metrics, response_free); + INCR(response_metrics, response_return); + log_vverb("return rsp %p", rsp); + + rsp->free = true; + FREEPOOL_RETURN(rsp, &rspp, next); + + *response = NULL; +} + +void +response_setup(response_options_st *options, response_metrics_st *metrics) +{ + uint32_t max = RSP_POOLSIZE; + + log_info("set up the %s module", RESPONSE_MODULE_NAME); + + if (response_init) { + log_warn("%s has already been setup, overwrite", RESPONSE_MODULE_NAME); + } + + response_metrics = metrics; + + if (options != NULL) { + ntoken = option_uint(&options->response_ntoken); + max = option_uint(&options->response_poolsize); + } + + response_pool_create(max); + + response_init = true; +} + +void +response_teardown(void) +{ + log_info("tear down the %s module", RESPONSE_MODULE_NAME); + + if (!response_init) { + log_warn("%s has never been setup", RESPONSE_MODULE_NAME); + } + + ntoken = RSP_NTOKEN; + response_pool_destroy(); + response_metrics = NULL; + + response_init = false; +} diff --git a/src/protocol/data/redis/response.h b/src/protocol/data/redis/response.h new file mode 100644 index 000000000..51672e4f5 --- /dev/null +++ b/src/protocol/data/redis/response.h @@ -0,0 +1,66 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#define RSP_NTOKEN 255 /* # tokens in a command */ +#define RSP_POOLSIZE 0 + +/* name type default description */ +#define RESPONSE_OPTION(ACTION) \ + ACTION( response_ntoken, OPTION_TYPE_UINT, RSP_NTOKEN, "# tokens in response" )\ + ACTION( response_poolsize, OPTION_TYPE_UINT, RSP_POOLSIZE, "response pool size" ) + +typedef struct { + RESPONSE_OPTION(OPTION_DECLARE) +} response_options_st; + +/* name type description */ +#define RESPONSE_METRIC(ACTION) \ + ACTION( response_curr, METRIC_GAUGE, "# rsp created" )\ + ACTION( response_free, METRIC_GAUGE, "# free rsp in pool" )\ + ACTION( response_borrow, METRIC_COUNTER, "# rsps borrowed" )\ + ACTION( response_return, METRIC_COUNTER, "# rsps returned" )\ + ACTION( response_create, METRIC_COUNTER, "# rsps created" )\ + ACTION( response_destroy, METRIC_COUNTER, "# rsps destroyed" ) + +typedef struct { + RESPONSE_METRIC(METRIC_DECLARE) +} response_metrics_st; + +/** + * Note: there are some semi special values here: + * - a dummy entry RSP_UNKNOWN so we can use it as the initial type value; + * - a RSP_NUMERIC type that doesn't have a corresponding message body. + */ +#define RSP_STR_OK "+OK\r\n" + +/* + * NOTE(yao): we store fields as location in rbuf, this assumes the data will + * not be overwritten prematurely. + * Whether this is a reasonable design decision eventually remains to be seen. + */ + +struct response { + STAILQ_ENTRY(response) next; /* allow response pooling/chaining */ + bool free; + + int type; + bool nil; + struct array *token; /* array elements are tokens */ +}; + +void response_setup(response_options_st *options, response_metrics_st *metrics); +void response_teardown(void); + +struct response *response_create(void); +void response_destroy(struct response **rsp); +void response_reset(struct response *rsp); + +struct response *response_borrow(void); +void response_return(struct response **rsp); diff --git a/src/protocol/data/redis/token.c b/src/protocol/data/redis/token.c new file mode 100644 index 000000000..e6c5ca25e --- /dev/null +++ b/src/protocol/data/redis/token.c @@ -0,0 +1,340 @@ +#include "token.h" + +#include "request.h" +#include "response.h" + +#include +#include +#include + +#include + +#define STR_MAXLEN 255 /* max length for simple string or error */ +#define BULK_MAXLEN (512 * MiB) +#define ARRAY_MAXLEN (64 * MiB) + +#define NIL_STR "$-1\r\n" + + +static inline compose_rstatus_t +_check_buf_size(struct buf **buf, uint32_t n) +{ + while (n > buf_wsize(*buf)) { + if (dbuf_double(buf) != CC_OK) { + log_debug("failed to write %u bytes to buf %p: insufficient " + "buffer space", n, *buf); + + return COMPOSE_ENOMEM; + } + } + + return COMPOSE_OK; +} + + +static parse_rstatus_t +_read_str(struct bstring *str, struct buf *buf) +{ + /* + * Note: buf->rpos is updated in this function, the caller is responsible + * for resetting the pointer if necessary. + */ + + str->len = 0; + str->data = buf->rpos; + /* + * Note: according to @antirez, simple strings are not supposed to be empty. + * However, there's no particular harm allowing a null simple string, so + * we allow it in this function + */ + for (; buf->rpos < buf->wpos; buf->rpos++) { + if (line_end(buf)) { + buf->rpos += CRLF_LEN; + log_vverb("simple string detected at %p, length %"PRIu32, str->len); + + return PARSE_OK; + } + if (++str->len > STR_MAXLEN) { + log_warn("simple string max length (%d) exceeded", STR_MAXLEN); + + return PARSE_EOVERSIZE; + } + } + + return PARSE_EUNFIN; +} + + +static parse_rstatus_t +_read_int(int64_t *num, struct buf *buf, int64_t min, int64_t max) +{ + /* + * Note: buf->rpos is updated in this function, the caller is responsible + * for resetting the pointer if necessary. + */ + size_t len = 0; + int64_t sign = 1; + + if (*buf->rpos == '-') { + sign = -1; + buf->rpos++; + } + + *num = 0; + for (; buf_rsize(buf) > 0; buf->rpos++) { + if (isdigit(*buf->rpos)) { + if (*num < min / 10 || *num > max / 10) { + /* TODO(yao): catch the few numbers that will still overflow */ + log_warn("ill formatted token: integer out of bounds"); + + return PARSE_EOVERSIZE; + } + + len++; + *num = *num * 10ULL + sign * (*buf->rpos - '0'); + } else { + if (len == 0 || *buf->rpos != CR) { + log_warn("invalid character encountered: %c", *buf->rpos); + + return PARSE_EINVALID; + } + if (line_end(buf)) { + buf->rpos += CRLF_LEN; + log_vverb("parsed integer, value %"PRIi64, *num); + + return PARSE_OK; + } else { + return PARSE_EUNFIN; + } + } + } + + return PARSE_EUNFIN; +} + +static parse_rstatus_t +_read_bulk(struct bstring *str, struct buf *buf) +{ + parse_rstatus_t status; + int64_t len; + + bstring_init(str); + status = _read_int(&len, buf, -1, BULK_MAXLEN); + if (status != PARSE_OK) { + return status; + } + if (len < 0) { + log_vverb("null bulk string detected at %p", buf->rpos); + + return PARSE_EEMPTY; + } + + if (buf_rsize(buf) >= len + CRLF_LEN) { + /* have enough bytes for the whole payload plus CRLF */ + str->len = len; + str->data = buf->rpos; + buf->rpos += str->len; + + if (line_end(buf)) { + buf->rpos += CRLF_LEN; + log_vverb("bulk string detected at %p, length %"PRIu32, buf->rpos, + len); + + return PARSE_OK; + } else { + if (*buf->rpos == CR) { + return PARSE_EUNFIN; + } + + log_warn("invalid character encountered, expecting CRLF: %c%c", + *buf->rpos, *(buf->rpos + 1)); + + return PARSE_EINVALID; + } + } + + return PARSE_EUNFIN; +} + +static inline int +_write_int(struct buf *buf, int64_t val) +{ + size_t n = 0; + + n = cc_print_int64_unsafe(buf->wpos, val); + buf->wpos += n; + + buf_write(buf, CRLF, CRLF_LEN); + + return (n + CRLF_LEN); +} + +static inline int +_write_bstr(struct buf *buf, struct bstring *bstr) +{ + buf_write(buf, bstr->data, bstr->len); + buf_write(buf, CRLF, CRLF_LEN); + + return (bstr->len + CRLF_LEN); +} + + +bool +token_is_array(struct buf *buf) +{ + ASSERT(buf != NULL); + + return *(buf->rpos) == '*'; +} + +parse_rstatus_t +token_array_nelem(int64_t *nelem, struct buf *buf) +{ + ASSERT(nelem != NULL && buf != NULL); + ASSERT(token_is_array(buf)); + + buf->rpos++; + return _read_int(nelem, buf, -1, ARRAY_MAXLEN); +} + + +/* this function does not handle array, which is a composite type */ +parse_rstatus_t +parse_element(struct element *el, struct buf *buf) +{ + char *p; + parse_rstatus_t status; + + ASSERT(buf_rsize(buf) > 0); + + log_verb("detecting the next element %p in buf %p", el, buf); + + p = buf->rpos++; + switch (*p) { + case '+': + /* simple string */ + el->type = ELEM_STR; + status = _read_str(&el->bstr, buf); + break; + + case '-': + /* error */ + el->type = ELEM_ERR; + status = _read_str(&el->bstr, buf); + break; + + case ':': + /* integer */ + el->type = ELEM_INT; + status = _read_int(&el->num, buf, INT64_MIN, INT64_MAX); + break; + + case '$': + /* bulk string */ + el->type = ELEM_BULK; + status = _read_bulk(&el->bstr, buf); + if (status == PARSE_EEMPTY) { + status = PARSE_OK; + el->type = ELEM_NIL; + } + break; + + default: + return PARSE_EINVALID; + } + + if (status != PARSE_OK) { /* rewind */ + buf->rpos = p; + } + + return status; +} + + +int +compose_array_header(struct buf **buf, int nelem) +{ + struct buf *b; + size_t n = 1 + CRLF_LEN + CC_INT64_MAXLEN; + + if (_check_buf_size(buf, n) != COMPOSE_OK) { + return COMPOSE_ENOMEM; + } + + b = *buf; + *b->wpos++ = '*'; + return (1 + _write_int(b, nelem)); +} + +/* this function does not handle array, which is a composite type */ +int +compose_element(struct buf **buf, struct element *el) +{ + size_t n = 1 + CRLF_LEN; + struct buf *b; + + ASSERT(el->type > 0); + + /* estimate size (overestimate space needed for integers (int, bulk)) */ + switch (el->type) { + case ELEM_STR: + case ELEM_ERR: + n += el->bstr.len; + break; + + case ELEM_INT: + n += CC_INT64_MAXLEN; + break; + + case ELEM_BULK: + n += el->bstr.len + CC_INT64_MAXLEN + CRLF_LEN; + break; + + case ELEM_NIL: + n += 2; /* "-1" */ + break; + + default: + return COMPOSE_EINVALID; + } + + if (_check_buf_size(buf, n) != COMPOSE_OK) { + return COMPOSE_ENOMEM; + } + + b = *buf; + log_verb("write element %p in buf %p", el, b); + + switch (el->type) { + case ELEM_STR: + n = buf_write(b, "+", 1); + n += _write_bstr(b, &el->bstr); + break; + + case ELEM_ERR: + n = buf_write(b, "-", 1); + n += _write_bstr(b, &el->bstr); + break; + + case ELEM_INT: + n = buf_write(b, ":", 1); + n += _write_int(b, el->num); + break; + + case ELEM_BULK: + n = buf_write(b, "$", 1); + n += _write_int(b, el->bstr.len); + n += _write_bstr(b, &el->bstr); + break; + + case ELEM_NIL: + n = sizeof(NIL_STR) - 1; + buf_write(b, NIL_STR, n); + break; + + default: + NOT_REACHED(); + } + + return n; +} diff --git a/src/protocol/data/redis/token.h b/src/protocol/data/redis/token.h new file mode 100644 index 000000000..adf779175 --- /dev/null +++ b/src/protocol/data/redis/token.h @@ -0,0 +1,96 @@ +#pragma once + +/* + * this file handles the serialization / desrialization formats used by Redis: + * - RESP (REdis Serialization Protocol) + * - Simple format (not implemented yet) + * - Cap'n'proto (not implemented yet) + */ + +/** + * functions that deal with tokens in RESP (REdis Serialization Protocol). + * RESP is text-based protocol that uses special characters and prefixed-length + * to achieve high-performance parsing. + * + * RESP has the following guidelines for requests/responses: + * - Clients send commands to a Redis server as a RESP Array of Bulk Strings. + * - The server replies with one of the RESP types according to the command + * implementation. + * + * Different types have different leading character + * - For Simple Strings the first byte of the reply is "+" + * - For Errors the first byte of the reply is "-" + * - For Integers the first byte of the reply is ":" + * - For Bulk Strings the first byte of the reply is "$" + * - For Arrays the first byte of the reply is "*" + * + * Note: + * - In RESP, tokens of each type are always terminated with "\r\n" (CRLF). + * - There are multiple ways of representing Null values: + * + Null Bulk String: "$-1\r\n" + * + Null Array: "*-1\r\n" + */ + +/** + * It makes sense to always parse Simple Strings, Errors, and Integers in + * full. However, for Bulk Strings and Arrays, it is possible that they + * will be big enough that we cannot always expect the full content to be + * received at once, and hence it makes sense to allow partial parsing. + * + * For Bulk Strings, there are always two tokens, 1) the length; and 2) the + * string content. Since the content can be quite large, we should remember + * how many bytes have been received and how many more to expect. + * + * Array is a composite type, where individual elements can be any of the other + * type, and different types can mix in a single array. So to parse an array, + * we need to handle both a subset of all elements and incompleteness of the + * last element. + */ + +#include "parse.h" +#include "compose.h" + +#include +#include +#include + +/* array is not a basic element type */ +typedef enum element_type { + ELEM_UNKNOWN = 0, + ELEM_STR = 1, + ELEM_ERR = 2, + ELEM_INT = 3, + ELEM_BULK = 4, + ELEM_ARRAY = 5, + ELEM_NIL = 6, +} element_type_e; + +struct element { + element_type_e type; + union { + struct bstring bstr; + int64_t num; + }; +}; + +static inline bool +is_crlf(struct buf *buf) +{ + ASSERT(buf_rsize(buf) >= CRLF_LEN); + + return (*buf->rpos == CR && *(buf->rpos + 1) == LF); +} + + +static inline bool +line_end(struct buf *buf) +{ + return (buf_rsize(buf) >= CRLF_LEN && is_crlf(buf)); +} + +bool token_is_array(struct buf *buf); +parse_rstatus_t token_array_nelem(int64_t *nelem, struct buf *buf); +parse_rstatus_t parse_element(struct element *el, struct buf *buf); + +int compose_array_header(struct buf **buf, int nelem); +int compose_element(struct buf **buf, struct element *el); diff --git a/src/protocol/data/redis_include.h b/src/protocol/data/redis_include.h new file mode 100644 index 000000000..dea202d2e --- /dev/null +++ b/src/protocol/data/redis_include.h @@ -0,0 +1,6 @@ +#include "redis/compose.h" +#include "redis/token.h" +#include "redis/parse.h" +#include "redis/process.h" +#include "redis/request.h" +#include "redis/response.h" diff --git a/src/storage/cuckoo/cuckoo.c b/src/storage/cuckoo/cuckoo.c index 0d65d70a9..b87e81ae5 100644 --- a/src/storage/cuckoo/cuckoo.c +++ b/src/storage/cuckoo/cuckoo.c @@ -2,7 +2,7 @@ #include #include -#include +#include #include /* TODO(yao): make D and iv[] configurable */ @@ -82,9 +82,11 @@ static void cuckoo_hash(uint32_t offset[], struct bstring *key) { int i; + uint32_t hv; for (i = 0; i < D; ++i) { - offset[i] = hashlittle(key->data, key->len, iv[i]) % max_nitem; + hash_murmur3_32(key->data, key->len, iv[i], &hv); + offset[i] = hv % max_nitem; } return; diff --git a/src/storage/slab/hashtable.c b/src/storage/slab/hashtable.c index 86c257e61..941b9a4bb 100644 --- a/src/storage/slab/hashtable.c +++ b/src/storage/slab/hashtable.c @@ -1,6 +1,6 @@ #include "hashtable.h" -#include +#include #include /* @@ -65,7 +65,7 @@ hashtable_destroy(struct hash_table *ht) static struct item_slh * _get_bucket(const char *key, size_t klen, struct hash_table *ht) { - return &(ht->table[hash(key, klen, 0) & HASHMASK(ht->hash_power)]); + return &(ht->table[hash_lookup3(key, klen, 0) & HASHMASK(ht->hash_power)]); } void diff --git a/test/protocol/data/CMakeLists.txt b/test/protocol/data/CMakeLists.txt index 7e5457343..da74fe8e4 100644 --- a/test/protocol/data/CMakeLists.txt +++ b/test/protocol/data/CMakeLists.txt @@ -1 +1,2 @@ add_subdirectory(memcache) +add_subdirectory(redis) diff --git a/test/protocol/data/redis/CMakeLists.txt b/test/protocol/data/redis/CMakeLists.txt new file mode 100644 index 000000000..dff05da00 --- /dev/null +++ b/test/protocol/data/redis/CMakeLists.txt @@ -0,0 +1,11 @@ +set(suite redis) +set(test_name check_${suite}) + +set(source check_${suite}.c) + +add_executable(${test_name} ${source}) +target_link_libraries(${test_name} protocol_${suite}) +target_link_libraries(${test_name} ccommon-static ${CHECK_LIBRARIES}) + +add_dependencies(check ${test_name}) +add_test(${test_name} ${test_name}) diff --git a/test/protocol/data/redis/check_redis.c b/test/protocol/data/redis/check_redis.c new file mode 100644 index 000000000..5e10ed05f --- /dev/null +++ b/test/protocol/data/redis/check_redis.c @@ -0,0 +1,582 @@ +#include + +#include +#include +#include +#include + +#include + +#include +#include + +/* define for each suite, local scope due to macro visibility rule */ +#define SUITE_NAME "redis" +#define DEBUG_LOG SUITE_NAME ".log" + +struct request *req; +struct response *rsp; +struct buf *buf; + +/* + * utilities + */ +static void +test_setup(void) +{ + req = request_create(); + rsp = response_create(); + buf = buf_create(); +} + +static void +test_reset(void) +{ + request_reset(req); + response_reset(rsp); + buf_reset(buf); +} + +static void +test_teardown(void) +{ + buf_destroy(&buf); + response_destroy(&rsp); + request_destroy(&req); +} + +/************** + * test cases * + **************/ + +/* + * token + */ +START_TEST(test_simple_string) +{ +#define STR "foobar" +#define SERIALIZED "+" STR "\r\n" + + struct element el_c, el_p; + int ret; + int len = sizeof(SERIALIZED) - 1; + char *pos; + + test_reset(); + + /* compose */ + el_c.type = ELEM_STR; + el_c.bstr = str2bstr(STR); + ret = compose_element(&buf, &el_c); + ck_assert_msg(ret == len, "bytes expected: %d, returned: %d", len, ret); + ck_assert_int_eq(cc_bcmp(buf->rpos, SERIALIZED, ret), 0); + + /* parse */ + pos = buf->rpos + 1; + ret = parse_element(&el_p, buf); + ck_assert_int_eq(ret, PARSE_OK); + ck_assert(buf->rpos == buf->wpos); + ck_assert(el_p.type == ELEM_STR); + ck_assert(el_p.bstr.len == sizeof(STR) - 1); + ck_assert(el_p.bstr.data == pos); + +#undef SERIALIZED +#undef STR +} +END_TEST + +START_TEST(test_error) +{ +#define ERR "something is wrong" +#define SERIALIZED "-" ERR "\r\n" + + struct element el_c, el_p; + int ret; + int len = sizeof(SERIALIZED) - 1; + char *pos; + + test_reset(); + + /* compose */ + el_c.type = ELEM_ERR; + el_c.bstr = str2bstr(ERR); + ret = compose_element(&buf, &el_c); + ck_assert_msg(ret == len, "bytes expected: %d, returned: %d", len, ret); + ck_assert_int_eq(cc_bcmp(buf->rpos, SERIALIZED, ret), 0); + + /* parse */ + pos = buf->rpos + 1; + ret = parse_element(&el_p, buf); + ck_assert_int_eq(ret, PARSE_OK); + ck_assert(buf->rpos == buf->wpos); + ck_assert(el_p.type == ELEM_ERR); + ck_assert(el_p.bstr.len == sizeof(ERR) - 1); + ck_assert(el_p.bstr.data == pos); + +#undef SERIALIZED +#undef ERR +} +END_TEST + +START_TEST(test_integer) +{ +#define OVERSIZE ":19223372036854775807\r\n" +#define INVALID1 ":123lOl456\r\n" +#define INVALID2 ":\r\n" + + struct element el_c, el_p; + int ret; + + struct int_pair { + char *serialized; + uint64_t num; + } pairs[3] = { + {":-1\r\n", -1}, + {":9223372036854775807\r\n", 9223372036854775807}, + {":128\r\n", 128} + }; + + + test_reset(); + for (int i = 0; i < 3; i++) { + size_t len = strlen(pairs[i].serialized); + + buf_reset(buf); + el_c.type = ELEM_INT; + el_c.num = pairs[i].num; + ret = compose_element(&buf, &el_c); + ck_assert(ret == len); + ck_assert_int_eq(cc_bcmp(buf->rpos, pairs[i].serialized, len), 0); + + el_p.type = ELEM_UNKNOWN; + ret = parse_element(&el_p, buf); + ck_assert_int_eq(ret, PARSE_OK); + ck_assert(buf->rpos == buf->wpos); + ck_assert(el_p.type == ELEM_INT); + ck_assert(el_p.num == pairs[i].num); + } + + buf_reset(buf); + buf_write(buf, OVERSIZE, sizeof(OVERSIZE) - 1); + ret = parse_element(&el_p, buf); + ck_assert_int_eq(ret, PARSE_EOVERSIZE); + + buf_reset(buf); + buf_write(buf, INVALID1, sizeof(INVALID1) - 1); + ret = parse_element(&el_p, buf); + ck_assert_int_eq(ret, PARSE_EINVALID); + + buf_reset(buf); + buf_write(buf, INVALID2, sizeof(INVALID2) - 1); + ret = parse_element(&el_p, buf); + ck_assert_int_eq(ret, PARSE_EINVALID); + +#undef INVALID2 +#undef INVALID1 +#undef OVERSIZE +} +END_TEST + +START_TEST(test_bulk_string) +{ +#define BULK "foo bar\r\n" +#define SERIALIZED "$9\r\n" BULK "\r\n" +#define EMPTY "$0\r\n\r\n" + + struct element el_c, el_p; + int ret; + int len = sizeof(SERIALIZED) - 1; + + test_reset(); + + /* compose */ + el_c.type = ELEM_BULK; + el_c.bstr = str2bstr(BULK); + ret = compose_element(&buf, &el_c); + ck_assert_msg(ret == len, "bytes expected: %d, returned: %d", len, ret); + ck_assert_int_eq(cc_bcmp(buf->rpos, SERIALIZED, ret), 0); + + /* parse */ + ck_assert_int_eq(parse_element(&el_p, buf), PARSE_OK); + ck_assert(buf->rpos == buf->wpos); + ck_assert(el_p.type == ELEM_BULK); + ck_assert(el_p.bstr.len == sizeof(BULK) - 1); + ck_assert(el_p.bstr.data + el_p.bstr.len == buf->rpos - CRLF_LEN); + ck_assert(buf->rpos == buf->wpos); + + /* empty string */ + buf_reset(buf); + len = sizeof(EMPTY) - 1; + el_c.bstr = null_bstring; + ret = compose_element(&buf, &el_c); + ck_assert_msg(ret == len, "bytes expected: %d, returned: %d", len, ret); + ck_assert_int_eq(cc_bcmp(buf->rpos, EMPTY, ret), 0); + ck_assert_int_eq(parse_element(&el_p, buf), PARSE_OK); + ck_assert(el_p.bstr.len == 0); + + +#undef EMPTY +#undef SERIALIZED +#undef BULK +} +END_TEST + +START_TEST(test_array) +{ +#define SERIALIZED "*2\r\n+foo\r\n$4\r\nbarr\r\n" +#define NELEM 2 + + size_t len = sizeof(SERIALIZED) - 1; + int64_t nelem; + + test_reset(); + + buf_write(buf, SERIALIZED, len); + ck_assert(token_is_array(buf)); + ck_assert_int_eq(token_array_nelem(&nelem, buf), PARSE_OK); + ck_assert_int_eq(nelem, NELEM); + +#undef NELEM +#undef SERIALIZED +} +END_TEST + +START_TEST(test_nil_bulk) +{ +#define NIL_BULK "$-1\r\n" + + size_t len = sizeof(NIL_BULK) - 1; + struct element el_c, el_p; + + test_reset(); + + el_c.type = ELEM_NIL; + ck_assert_int_eq(compose_element(&buf, &el_c), len); + ck_assert_int_eq(buf_rsize(buf), len); + ck_assert_int_eq(cc_bcmp(buf->rpos, NIL_BULK, len), 0); + + el_p.type = ELEM_UNKNOWN; + ck_assert_int_eq(parse_element(&el_p, buf), PARSE_OK); + ck_assert_int_eq(el_p.type, ELEM_NIL); + +#undef NIL_BULK +} +END_TEST + +START_TEST(test_unfin) +{ + char *token[10] = { + "+hello ", + "-err", + "-err\r", + ":5", + ":5\r", + "$5", + "$5\r", + "$5\r\n", + "$5\r\nabc", + "$5\r\nabcde\r", + }; + + for (int i = 0; i < 10; i++) { + size_t len = strlen(token[i]); + struct element el; + char *pos; + + buf_reset(buf); + buf_write(buf, token[i], len); + pos = buf->rpos; + ck_assert_int_eq(parse_element(&el, buf), PARSE_EUNFIN); + ck_assert(buf->rpos == pos); + } +} +END_TEST + + +/* + * request + */ + +START_TEST(test_quit) +{ +#define QUIT "quit" +#define SERIALIZED "*1\r\n$4\r\n" QUIT "\r\n" +#define INVALID "*2\r\n$4\r\n" QUIT "\r\n$3\r\nnow\r\n" + int ret; + struct element *el; + + test_reset(); + + req->type = REQ_QUIT; + el = array_push(req->token); + el->type = ELEM_BULK; + el->bstr = (struct bstring){sizeof(QUIT) - 1, QUIT}; + ret = compose_req(&buf, req); + ck_assert_int_eq(ret, sizeof(SERIALIZED) - 1); + ck_assert_int_eq(cc_bcmp(buf->rpos, SERIALIZED, ret), 0); + + el->type = ELEM_UNKNOWN; /* this effectively resets *el */ + request_reset(req); + ck_assert_int_eq(parse_req(req, buf), PARSE_OK); + ck_assert_int_eq(req->type, REQ_QUIT); + ck_assert_int_eq(req->token->nelem, 1); + el = array_first(req->token); + ck_assert_int_eq(el->type, ELEM_BULK); + ck_assert_int_eq(cc_bcmp(el->bstr.data, QUIT, sizeof(QUIT) - 1), 0); + + /* invalid number of arguments */ + test_reset(); + buf_write(buf, INVALID, sizeof(INVALID) - 1); + ck_assert_int_eq(parse_req(req, buf), PARSE_EINVALID); +#undef INVALID +#undef SERIALIZED +#undef QUIT +} +END_TEST + + +START_TEST(test_ping) +{ +#define PING "ping" +#define VAL "hello" +#define S_PING "*1\r\n$4\r\n" PING "\r\n" +#define S_ECHO "*2\r\n$4\r\n" PING "\r\n$5\r\nhello\r\n" +#define S_ECHO2 "*3\r\n$4\r\n" PING "\r\n$5\r\nhello\r\n$5\r\nworld\r\n" + int ret; + struct element *el; + + test_reset(); + + /* simple ping */ + buf_write(buf, S_PING, sizeof(S_PING) - 1); + ck_assert_int_eq(parse_req(req, buf), PARSE_OK); + ck_assert_int_eq(req->type, REQ_PING); + + /* ping as echo */ + test_reset(); + + req->type = REQ_PING; + el = array_push(req->token); + el->type = ELEM_BULK; + el->bstr = (struct bstring){sizeof(PING) - 1, PING}; + el = array_push(req->token); + el->type = ELEM_BULK; + el->bstr = (struct bstring){sizeof(VAL) - 1, VAL}; + ret = compose_req(&buf, req); + ck_assert_int_eq(ret, sizeof(S_ECHO) - 1); + ck_assert_int_eq(cc_bcmp(buf->rpos, S_ECHO, ret), 0); + + el->type = ELEM_UNKNOWN; /* resets *el */ + request_reset(req); + ck_assert_int_eq(parse_req(req, buf), PARSE_OK); + ck_assert_int_eq(req->type, REQ_PING); + ck_assert_int_eq(req->token->nelem, 2); + el = array_first(req->token); + ck_assert_int_eq(el->type, ELEM_BULK); + ck_assert_int_eq(cc_bcmp(el->bstr.data, PING, sizeof(PING) - 1), 0); + el = array_get(req->token, 1); + ck_assert_int_eq(el->type, ELEM_BULK); + ck_assert_int_eq(cc_bcmp(el->bstr.data, VAL, sizeof(VAL) - 1), 0); + + /* more arguments */ + test_reset(); + buf_write(buf, S_ECHO2, sizeof(S_ECHO2) - 1); + ck_assert_int_eq(parse_req(req, buf), PARSE_OK); + ck_assert_int_eq(req->token->nelem, 3); +#undef S_ECHO2 +#undef S_ECHO +#undef ECHO +#undef S_PING +#undef QUIT +} +END_TEST + +/* + * response + */ +START_TEST(test_ok) +{ +#define OK "OK" +#define SERIALIZED "+" OK "\r\n" + int ret; + struct element *el; + + test_reset(); + + rsp->type = ELEM_STR; + el = array_push(rsp->token); + el->type = ELEM_STR; + el->bstr = (struct bstring){sizeof(OK) - 1, OK}; + ret = compose_rsp(&buf, rsp); + ck_assert_int_eq(ret, sizeof(SERIALIZED) - 1); + ck_assert_int_eq(cc_bcmp(buf->rpos, SERIALIZED, ret), 0); + + el->type = ELEM_UNKNOWN; /* resets *el */ + response_reset(rsp); + ck_assert_int_eq(parse_rsp(rsp, buf), PARSE_OK); + ck_assert_int_eq(rsp->type, ELEM_STR); + ck_assert_int_eq(rsp->token->nelem, 1); + el = array_first(rsp->token); + ck_assert_int_eq(el->type, ELEM_STR); + ck_assert_int_eq(cc_bcmp(el->bstr.data, OK, sizeof(OK) - 1), 0); +#undef SERIALIZED +#undef OK +} +END_TEST + +START_TEST(test_array_reply) +{ +#define SERIALIZED "*5\r\n:-10\r\n$-1\r\n-ERR invalid arg\r\n+foo\r\n$5\r\nHELLO\r\n" + size_t len = sizeof(SERIALIZED) - 1; + struct element *el; + + test_reset(); + + buf_write(buf, SERIALIZED, len); + ck_assert_int_eq(parse_rsp(rsp, buf), PARSE_OK); + ck_assert_int_eq(rsp->type, ELEM_ARRAY); + ck_assert_int_eq(rsp->token->nelem, 5); + el = array_first(rsp->token); + ck_assert_int_eq(el->type, ELEM_INT); + el = array_get(rsp->token, 1); + ck_assert_int_eq(el->type, ELEM_NIL); + el = array_get(rsp->token, 2); + ck_assert_int_eq(el->type, ELEM_ERR); + el = array_get(rsp->token, 3); + ck_assert_int_eq(el->type, ELEM_STR); + el = array_get(rsp->token, 4); + ck_assert_int_eq(el->type, ELEM_BULK); + ck_assert_int_eq(el->bstr.len, 5); + ck_assert_int_eq(cc_bcmp(el->bstr.data, "HELLO", 5), 0); + ck_assert_int_eq(buf_rsize(buf), 0); + + ck_assert_int_eq(compose_rsp(&buf, rsp), len); + ck_assert_int_eq(buf_rsize(buf), len); + ck_assert_int_eq(cc_bcmp(buf->rpos, SERIALIZED, len), 0); +#undef SERIALIZED +} +END_TEST + +/* + * request/response pool + */ + +START_TEST(test_req_pool_basic) +{ +#define POOL_SIZE 10 + int i; + struct request *reqs[POOL_SIZE]; + request_options_st options = { + .request_ntoken = {.type = OPTION_TYPE_UINT, .val.vuint = REQ_NTOKEN}, + .request_poolsize = {.type = OPTION_TYPE_UINT, .val.vuint = POOL_SIZE}}; + + request_setup(&options, NULL); + + for (i = 0; i < POOL_SIZE; i++) { + reqs[i] = request_borrow(); + ck_assert_msg(reqs[i] != NULL, "expected to borrow a request"); + } + ck_assert_msg(request_borrow() == NULL, "expected request pool to be depleted"); + for (i = 0; i < POOL_SIZE; i++) { + request_return(&reqs[i]); + ck_assert_msg(reqs[i] == NULL, "expected request to be nulled after return"); + } + + request_teardown(); +#undef POOL_SIZE +} +END_TEST + +START_TEST(test_rsp_pool_basic) +{ +#define POOL_SIZE 10 + int i; + struct response *rsps[POOL_SIZE]; + response_options_st options = { + .response_ntoken = {.type = OPTION_TYPE_UINT, .val.vuint = RSP_NTOKEN}, + .response_poolsize = {.type = OPTION_TYPE_UINT, .val.vuint = POOL_SIZE}}; + + response_setup(&options, NULL); + + for (i = 0; i < POOL_SIZE; i++) { + rsps[i] = response_borrow(); + ck_assert_msg(rsps[i] != NULL, "expected to borrow a response"); + } + ck_assert_msg(response_borrow() == NULL, "expected response pool to be depleted"); + for (i = 0; i < POOL_SIZE; i++) { + response_return(&rsps[i]); + ck_assert_msg(rsps[i] == NULL, "expected response to be nulled after return"); + } + + response_teardown(); +#undef POOL_SIZE +} +END_TEST + +/* + * test suite + */ +static Suite * +redis_suite(void) +{ + Suite *s = suite_create(SUITE_NAME); + + /* token */ + TCase *tc_token = tcase_create("token"); + suite_add_tcase(s, tc_token); + + tcase_add_test(tc_token, test_simple_string); + tcase_add_test(tc_token, test_error); + tcase_add_test(tc_token, test_integer); + tcase_add_test(tc_token, test_bulk_string); + tcase_add_test(tc_token, test_array); + tcase_add_test(tc_token, test_nil_bulk); + tcase_add_test(tc_token, test_unfin); + + /* basic requests */ + TCase *tc_request = tcase_create("request"); + suite_add_tcase(s, tc_request); + + tcase_add_test(tc_request, test_quit); + tcase_add_test(tc_request, test_ping); + + /* basic response */ + TCase *tc_response = tcase_create("response"); + suite_add_tcase(s, tc_response); + + tcase_add_test(tc_response, test_ok); + tcase_add_test(tc_response, test_array_reply); + + /* basic responses */ + + /* req/rsp objects, pooling */ + TCase *tc_pool = tcase_create("request/response pool"); + suite_add_tcase(s, tc_pool); + + tcase_add_test(tc_pool, test_req_pool_basic); + tcase_add_test(tc_pool, test_rsp_pool_basic); + + return s; +} + +/* TODO(yao): move main to a different file, keep most test files main-less */ +int +main(void) +{ + int nfail; + + /* setup */ + test_setup(); + + Suite *suite = redis_suite(); + SRunner *srunner = srunner_create(suite); + srunner_set_log(srunner, DEBUG_LOG); + srunner_run_all(srunner, CK_ENV); /* set CK_VEBOSITY in ENV to customize */ + nfail = srunner_ntests_failed(srunner); + srunner_free(srunner); + + /* teardown */ + test_teardown(); + + return (nfail == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +}