diff --git a/LICENSE.hdrhistogram b/LICENSE.hdrhistogram new file mode 100644 index 0000000000..7f20d7dc43 --- /dev/null +++ b/LICENSE.hdrhistogram @@ -0,0 +1,27 @@ +This license covers src/rdhdrhistogram.c which is a C port of +Coda Hale's Golang HdrHistogram https://github.com/codahale/hdrhistogram +at revision 3a0bb77429bd3a61596f5e8a3172445844342120 + +----------------------------------------------------------------------------- + +The MIT License (MIT) + +Copyright (c) 2014 Coda Hale + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE diff --git a/LICENSES.txt b/LICENSES.txt index ee8a6f4507..26aea08d90 100644 --- a/LICENSES.txt +++ b/LICENSES.txt @@ -59,6 +59,37 @@ LICENSE.crc32c */ +LICENSE.hdrhistogram +-------------------------------------------------------------- +This license covers src/rdhdrhistogram.c which is a C port of +Coda Hale's Golang HdrHistogram https://github.com/codahale/hdrhistogram +at revision 3a0bb77429bd3a61596f5e8a3172445844342120 + +----------------------------------------------------------------------------- + +The MIT License (MIT) + +Copyright (c) 2014 Coda Hale + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE + + LICENSE.lz4 -------------------------------------------------------------- src/xxhash.[ch] src/lz4*.[ch]: git@github.com:lz4/lz4.git e2827775ee80d2ef985858727575df31fc60f1f3 diff --git a/configure.librdkafka b/configure.librdkafka index 500d1e08d8..01aaba127d 100644 --- a/configure.librdkafka +++ b/configure.librdkafka @@ -67,6 +67,11 @@ void foo (void) { "#include " mkl_lib_check "libcrypto" "" disable CC "-lcrypto" + if mkl_lib_check "libm" "" disable CC "-lm" \ + "#include "; then + mkl_allvar_set WITH_HDRHISTOGRAM WITH_HDRHISTOGRAM y + fi + if [[ "$ENABLE_LZ4_EXT" == "y" ]]; then mkl_lib_check --static=-llz4 "liblz4" "WITH_LZ4_EXT" disable CC "-llz4" \ "#include " diff --git a/src/Makefile b/src/Makefile index 6c340b732e..48ee237ac2 100644 --- a/src/Makefile +++ b/src/Makefile @@ -13,6 +13,7 @@ SRCS_$(WITH_SASL_CYRUS) += rdkafka_sasl_cyrus.c SRCS_$(WITH_SASL_SCRAM) += rdkafka_sasl_scram.c SRCS_$(WITH_SNAPPY) += snappy.c SRCS_$(WITH_ZLIB) += rdgz.c +SRCS_$(WITH_HDRHISTOGRAM) += rdhdrhistogram.c SRCS_LZ4 = xxhash.c ifneq ($(WITH_LZ4_EXT), y) diff --git a/src/rdfloat.h b/src/rdfloat.h new file mode 100644 index 0000000000..d36ac3ee90 --- /dev/null +++ b/src/rdfloat.h @@ -0,0 +1,67 @@ +/* + * librdkafka - Apache Kafka C library + * + * Copyright (c) 2012-2018, Magnus Edenhill + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + + +/** + * rd_dbl_eq0(a,b,prec) + * Check two doubles for equality with the specified precision. + * Use this instead of != and == for all floats/doubles. + * More info: + * http://docs.sun.com/source/806-3568/ncg_goldberg.html + */ +static inline int rd_dbl_eq0 (double a, double b, double prec) RD_UNUSED; +static inline int rd_dbl_eq0 (double a, double b, double prec) { + return fabs(a - b) < prec; +} + +/* A default 'good' double-equality precision value. + * This rather timid epsilon value is useful for tenths, hundreths, + * and thousands parts, but not anything more precis than that. + * If a higher precision is needed, use dbl_eq0 and dbl_eq0 directly + * and specify your own precision. */ +#define RD_DBL_EPSILON 0.00001 + +/** + * rd_dbl_eq(a,b) + * Same as rd_dbl_eq0() above but with a predefined 'good' precision. + */ +#define rd_dbl_eq(a,b) rd_dbl_eq0(a,b,RD_DBL_EPSILON) + +/** + * rd_dbl_ne(a,b) + * Same as rd_dbl_eq() above but with reversed logic: not-equal. + */ +#define rd_dbl_ne(a,b) (!rd_dbl_eq0(a,b,RD_DBL_EPSILON)) + +/** + * rd_dbl_zero(a) + * Checks if the double `a' is zero (or close enough). + */ +#define rd_dbl_zero(a) rd_dbl_eq0(a,0.0,RD_DBL_EPSILON) diff --git a/src/rdhdrhistogram.c b/src/rdhdrhistogram.c new file mode 100644 index 0000000000..24413f9084 --- /dev/null +++ b/src/rdhdrhistogram.c @@ -0,0 +1,726 @@ +/* + * This license covers this C port of + * Coda Hale's Golang HdrHistogram https://github.com/codahale/hdrhistogram + * at revision 3a0bb77429bd3a61596f5e8a3172445844342120 + * + * ---------------------------------------------------------------------------- + * + * The MIT License (MIT) + * + * Copyright (c) 2014 Coda Hale + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. +*/ + +/* + * librdkafka - Apache Kafka C library + * + * Copyright (c) 2018, Magnus Edenhill + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Minimal C Hdr_Histogram based on Coda Hale's Golang implementation. + * https://github.com/codahale/hdr_histogram + * + * + * A Histogram is a lossy data structure used to record the distribution of + * non-normally distributed data (like latency) with a high degree of accuracy + * and a bounded degree of precision. + * + * + */ + +#include "rd.h" + +#include +#include +#include + +#include "rdhdrhistogram.h" +#include "rdunittest.h" +#include "rdfloat.h" + +void rd_hdr_histogram_destroy (rd_hdr_histogram_t *hdr) { + free(hdr); +} + +rd_hdr_histogram_t *rd_hdr_histogram_new (int64_t minValue, int64_t maxValue, + int significantFigures) { + rd_hdr_histogram_t *hdr; + int64_t largestValueWithSingleUnitResolution; + int32_t subBucketCountMagnitude; + int32_t subBucketHalfCountMagnitude; + int32_t unitMagnitude; + int32_t subBucketCount; + int32_t subBucketHalfCount; + int64_t subBucketMask; + int64_t smallestUntrackableValue; + int32_t bucketsNeeded = 1; + int32_t bucketCount; + int32_t countsLen; + + if (significantFigures < 1 || significantFigures > 5) + return NULL; + + largestValueWithSingleUnitResolution = + (int64_t)(2.0 * pow(10.0, (double)significantFigures)); + + subBucketCountMagnitude = + (int32_t)ceil( + log2((double)largestValueWithSingleUnitResolution)); + + subBucketHalfCountMagnitude = RD_MAX(subBucketCountMagnitude, 1) - 1; + + unitMagnitude = RD_MAX((int32_t)floor(log2((double)minValue)), 0); + + subBucketCount = (int32_t)pow(2, + (double)subBucketHalfCountMagnitude+1.0); + + subBucketHalfCount = subBucketCount / 2; + + subBucketMask = (int64_t)(subBucketCount-1) << unitMagnitude; + + /* Determine exponent range needed to support the trackable + * value with no overflow: */ + smallestUntrackableValue = (int64_t)subBucketCount << unitMagnitude; + while (smallestUntrackableValue < maxValue) { + smallestUntrackableValue <<= 1; + bucketsNeeded++; + } + + bucketCount = bucketsNeeded; + countsLen = (bucketCount + 1) * (subBucketCount / 2); + hdr = calloc(1, sizeof(*hdr) + (sizeof(*hdr->counts) * countsLen)); + hdr->counts = (int64_t *)(hdr+1); + + hdr->lowestTrackableValue = minValue; + hdr->highestTrackableValue = maxValue; + hdr->unitMagnitude = unitMagnitude; + hdr->significantFigures = significantFigures; + hdr->subBucketHalfCountMagnitude = subBucketHalfCountMagnitude; + hdr->subBucketHalfCount = subBucketHalfCount; + hdr->subBucketMask = subBucketMask; + hdr->subBucketCount = subBucketCount; + hdr->bucketCount = bucketCount; + hdr->countsLen = countsLen; + hdr->totalCount = 0; + hdr->lowestOutOfRange = minValue; + hdr->highestOutOfRange = maxValue; + + return hdr; +} + +/** + * @brief Deletes all recorded values and resets histogram. + */ +void rd_hdr_histogram_reset (rd_hdr_histogram_t *hdr) { + int32_t i; + hdr->totalCount = 0; + for (i = 0 ; i < hdr->countsLen ; i++) + hdr->counts[i] = 0; +} + + + +static int32_t +rd_hdr_countsIndex (const rd_hdr_histogram_t *hdr, + int32_t bucketIdx, int32_t subBucketIdx) { + int32_t bucketBaseIdx = (bucketIdx + 1) << + hdr->subBucketHalfCountMagnitude; + int32_t offsetInBucket = subBucketIdx - hdr->subBucketHalfCount; + return bucketBaseIdx + offsetInBucket; +} + +static __inline int64_t +rd_hdr_getCountAtIndex (const rd_hdr_histogram_t *hdr, + int32_t bucketIdx, int32_t subBucketIdx) { + return hdr->counts[rd_hdr_countsIndex(hdr, bucketIdx, subBucketIdx)]; +} + + +static __inline int64_t bitLen (int64_t x) { + int64_t n = 0; + for (; x >= 0x8000; x >>= 16) + n += 16; + if (x >= 0x80) { + x >>= 8; + n += 8; + } + if (x >= 0x8) { + x >>= 4; + n += 4; + } + if (x >= 0x2) { + x >>= 2; + n += 2; + } + if (x >= 0x1) + n++; + return n; +} + + +static __inline int32_t +rd_hdr_getBucketIndex (const rd_hdr_histogram_t *hdr, int64_t v) { + int64_t pow2Ceiling = bitLen(v | hdr->subBucketMask); + return (int32_t)(pow2Ceiling - (int64_t)hdr->unitMagnitude - + (int64_t)(hdr->subBucketHalfCountMagnitude+1)); +} + +static __inline int32_t +rd_hdr_getSubBucketIdx (const rd_hdr_histogram_t *hdr, int64_t v, int32_t idx) { + return (int32_t)(v >> ((int64_t)idx + (int64_t)hdr->unitMagnitude)); +} + +static __inline int64_t +rd_hdr_valueFromIndex (const rd_hdr_histogram_t *hdr, + int32_t bucketIdx, int32_t subBucketIdx) { + return (int64_t)subBucketIdx << + ((int64_t)bucketIdx + hdr->unitMagnitude); +} + +static __inline int64_t +rd_hdr_sizeOfEquivalentValueRange (const rd_hdr_histogram_t *hdr, int64_t v) { + int32_t bucketIdx = rd_hdr_getBucketIndex(hdr, v); + int32_t subBucketIdx = rd_hdr_getSubBucketIdx(hdr, v, bucketIdx); + int32_t adjustedBucket = bucketIdx; + if (subBucketIdx >= hdr->subBucketCount) + adjustedBucket++; + return 1 << (hdr->unitMagnitude + (int64_t)adjustedBucket); +} + +static __inline int64_t +rd_hdr_lowestEquivalentValue (const rd_hdr_histogram_t *hdr, int64_t v) { + int32_t bucketIdx = rd_hdr_getBucketIndex(hdr, v); + int32_t subBucketIdx = rd_hdr_getSubBucketIdx(hdr, v, bucketIdx); + return rd_hdr_valueFromIndex(hdr, bucketIdx, subBucketIdx); +} + + +static __inline int64_t +rd_hdr_nextNonEquivalentValue (const rd_hdr_histogram_t *hdr, int64_t v) { + return rd_hdr_lowestEquivalentValue(hdr, v) + + rd_hdr_sizeOfEquivalentValueRange(hdr, v); +} + + +static __inline int64_t +rd_hdr_highestEquivalentValue (const rd_hdr_histogram_t *hdr, int64_t v) { + return rd_hdr_nextNonEquivalentValue(hdr, v) - 1; +} + +static __inline int64_t +rd_hdr_medianEquivalentValue (const rd_hdr_histogram_t *hdr, int64_t v) { + return rd_hdr_lowestEquivalentValue(hdr, v) + + (rd_hdr_sizeOfEquivalentValueRange(hdr, v) >> 1); +} + + +static __inline int32_t +rd_hdr_countsIndexFor (const rd_hdr_histogram_t *hdr, int64_t v) { + int32_t bucketIdx = rd_hdr_getBucketIndex(hdr, v); + int32_t subBucketIdx = rd_hdr_getSubBucketIdx(hdr, v, bucketIdx); + return rd_hdr_countsIndex(hdr, bucketIdx, subBucketIdx); +} + + + +typedef struct rd_hdr_iter_s { + const rd_hdr_histogram_t *hdr; + int bucketIdx; + int subBucketIdx; + int64_t countAtIdx; + int64_t countToIdx; + int64_t valueFromIdx; + int64_t highestEquivalentValue; +} rd_hdr_iter_t; + +#define RD_HDR_ITER_INIT(hdr) { hdr: hdr, subBucketIdx: -1 } + +static int rd_hdr_iter_next (rd_hdr_iter_t *it) { + const rd_hdr_histogram_t *hdr = it->hdr; + + if (it->countToIdx >= hdr->totalCount) + return 0; + + it->subBucketIdx++; + if (it->subBucketIdx >= hdr->subBucketCount) { + it->subBucketIdx = hdr->subBucketHalfCount; + it->bucketIdx++; + } + + if (it->bucketIdx >= hdr->bucketCount) + return 0; + + it->countAtIdx = rd_hdr_getCountAtIndex(hdr, + it->bucketIdx, + it->subBucketIdx); + it->countToIdx += it->countAtIdx; + it->valueFromIdx = rd_hdr_valueFromIndex(hdr, + it->bucketIdx, + it->subBucketIdx); + it->highestEquivalentValue = + rd_hdr_highestEquivalentValue(hdr, it->valueFromIdx); + + return 1; +} + + +double rd_hdr_histogram_stddev (rd_hdr_histogram_t *hdr) { + double mean; + double geometricDevTotal = 0.0; + rd_hdr_iter_t it = RD_HDR_ITER_INIT(hdr); + + if (hdr->totalCount == 0) + return 0; + + mean = rd_hdr_histogram_mean(hdr); + + + while (rd_hdr_iter_next(&it)) { + double dev; + + if (it.countAtIdx == 0) + continue; + + dev = (double)rd_hdr_medianEquivalentValue( + hdr, it.valueFromIdx) - mean; + geometricDevTotal += (dev * dev) * (double)it.countAtIdx; + } + + return sqrt(geometricDevTotal / (double)hdr->totalCount); +} + + +/** + * @returns the approximate maximum recorded value. + */ +int64_t rd_hdr_histogram_max (const rd_hdr_histogram_t *hdr) { + int64_t vmax = 0; + rd_hdr_iter_t it = RD_HDR_ITER_INIT(hdr); + + while (rd_hdr_iter_next(&it)) { + if (it.countAtIdx != 0) + vmax = it.highestEquivalentValue; + } + return rd_hdr_highestEquivalentValue(hdr, vmax); +} + +/** + * @returns the approximate minimum recorded value. + */ +int64_t rd_hdr_histogram_min (const rd_hdr_histogram_t *hdr) { + int64_t vmin = 0; + rd_hdr_iter_t it = RD_HDR_ITER_INIT(hdr); + + while (rd_hdr_iter_next(&it)) { + if (it.countAtIdx != 0 && vmin == 0) { + vmin = it.highestEquivalentValue; + break; + } + } + return rd_hdr_lowestEquivalentValue(hdr, vmin); +} + +/** + * @returns the approximate arithmetic mean of the recorded values. + */ +double rd_hdr_histogram_mean (const rd_hdr_histogram_t *hdr) { + int64_t total = 0; + rd_hdr_iter_t it = RD_HDR_ITER_INIT(hdr); + + if (hdr->totalCount == 0) + return 0.0; + + while (rd_hdr_iter_next(&it)) { + if (it.countAtIdx != 0) + total += it.countAtIdx * + rd_hdr_medianEquivalentValue(hdr, + it.valueFromIdx); + } + return (double)total / (double)hdr->totalCount; +} + + + +/** + * @brief Records the given value. + * + * @returns 1 if value was recorded or 0 if value is out of range. + */ + +int rd_hdr_histogram_record (rd_hdr_histogram_t *hdr, int64_t v) { + int32_t idx = rd_hdr_countsIndexFor(hdr, v); + + if (idx < 0 || hdr->countsLen <= idx) { + hdr->outOfRangeCount++; + if (v > hdr->highestOutOfRange) + hdr->highestOutOfRange = v; + if (v < hdr->lowestOutOfRange) + hdr->lowestOutOfRange = v; + return 0; + } + + hdr->counts[idx]++; + hdr->totalCount++; + + return 1; +} + + +/** + * @returns the recorded value at the given quantile (0..100). + */ +int64_t rd_hdr_histogram_quantile (const rd_hdr_histogram_t *hdr, double q) { + int64_t total = 0; + int64_t countAtPercentile; + rd_hdr_iter_t it = RD_HDR_ITER_INIT(hdr); + + if (q > 100.0) + q = 100.0; + + countAtPercentile = + (int64_t)(((q / 100.0) * (double)hdr->totalCount) + 0.5); + + while (rd_hdr_iter_next(&it)) { + total += it.countAtIdx; + if (total >= countAtPercentile) + return rd_hdr_highestEquivalentValue( + hdr, it.valueFromIdx); + } + + return 0; +} + + + +/** + * @name Unit tests + * @{ + * + * + * + */ + +/** + * @returns 0 on success or 1 on failure. + */ +static int ut_high_sigfig (void) { + rd_hdr_histogram_t *hdr; + const int64_t input[] = { + 459876, 669187, 711612, 816326, 931423, + 1033197, 1131895, 2477317, 3964974, 12718782, + }; + size_t i; + int64_t v; + const int64_t exp = 1048575; + + hdr = rd_hdr_histogram_new(459876, 12718782, 5); + for (i = 0 ; i < RD_ARRAYSIZE(input) ; i++) { + /* Ignore errors (some should fail) */ + rd_hdr_histogram_record(hdr, input[i]); + } + + v = rd_hdr_histogram_quantile(hdr, 50); + RD_UT_ASSERT(v == exp, "Median is %"PRId64", expected %"PRId64, + v, exp); + + rd_hdr_histogram_destroy(hdr); + RD_UT_PASS(); +} + +static int ut_quantile (void) { + rd_hdr_histogram_t *hdr = rd_hdr_histogram_new(1, 10000000, 3); + size_t i; + const struct { + double q; + int64_t v; + } exp[] = { + {q: 50, v: 500223}, + {q: 75, v: 750079}, + {q: 90, v: 900095}, + {q: 95, v: 950271}, + {q: 99, v: 990207}, + {q: 99.9, v: 999423}, + {q: 99.99, v: 999935}, + }; + + for (i = 0 ; i < 1000000 ; i++) { + int r = rd_hdr_histogram_record(hdr, (int64_t)i); + RD_UT_ASSERT(r, "record(%"PRId64") failed\n", (int64_t)i); + } + + for (i = 0 ; i < RD_ARRAYSIZE(exp) ; i++) { + int64_t v = rd_hdr_histogram_quantile(hdr, exp[i].q); + RD_UT_ASSERT(v == exp[i].v, + "P%.2f is %"PRId64", expected %"PRId64, + exp[i].q, v, exp[i].v); + } + + rd_hdr_histogram_destroy(hdr); + RD_UT_PASS(); +} + +static int ut_mean (void) { + rd_hdr_histogram_t *hdr = rd_hdr_histogram_new(1, 10000000, 3); + size_t i; + const double exp = 500000.013312; + double v; + + for (i = 0 ; i < 1000000 ; i++) { + int r = rd_hdr_histogram_record(hdr, (int64_t)i); + RD_UT_ASSERT(r, "record(%"PRId64") failed\n", (int64_t)i); + } + + v = rd_hdr_histogram_mean(hdr); + RD_UT_ASSERT(rd_dbl_eq0(v, exp, 0.0000001), + "Mean is %f, expected %f", v, exp); + + rd_hdr_histogram_destroy(hdr); + RD_UT_PASS(); +} + + +static int ut_stddev (void) { + rd_hdr_histogram_t *hdr = rd_hdr_histogram_new(1, 10000000, 3); + size_t i; + const double exp = 288675.1403682715; + double v; + + for (i = 0 ; i < 1000000 ; i++) { + int r = rd_hdr_histogram_record(hdr, (int64_t)i); + RD_UT_ASSERT(r, "record(%"PRId64") failed\n", (int64_t)i); + } + + v = rd_hdr_histogram_stddev(hdr); + RD_UT_ASSERT(rd_dbl_eq0(v, exp, 0.00000000001), + "StdDev is %f, expected %f", v, exp); + + rd_hdr_histogram_destroy(hdr); + RD_UT_PASS(); +} + +static int ut_totalcount (void) { + rd_hdr_histogram_t *hdr = rd_hdr_histogram_new(1, 10000000, 3); + int64_t i; + + for (i = 0 ; i < 1000000 ; i++) { + int64_t v; + int r = rd_hdr_histogram_record(hdr, i); + RD_UT_ASSERT(r, "record(%"PRId64") failed\n", i); + + v = hdr->totalCount; + RD_UT_ASSERT(v == i+1, + "total_count is %"PRId64", expected %"PRId64, + v, i+1); + } + + rd_hdr_histogram_destroy(hdr); + RD_UT_PASS(); +} + + +static int ut_max (void) { + rd_hdr_histogram_t *hdr = rd_hdr_histogram_new(1, 10000000, 3); + int64_t i, v; + const int64_t exp = 1000447; + + for (i = 0 ; i < 1000000 ; i++) { + int r = rd_hdr_histogram_record(hdr, i); + RD_UT_ASSERT(r, "record(%"PRId64") failed\n", i); + } + + v = rd_hdr_histogram_max(hdr); + RD_UT_ASSERT(v == exp, + "Max is %"PRId64", expected %"PRId64, v, exp); + + rd_hdr_histogram_destroy(hdr); + RD_UT_PASS(); +} + +static int ut_min (void) { + rd_hdr_histogram_t *hdr = rd_hdr_histogram_new(1, 10000000, 3); + int64_t i, v; + const int64_t exp = 0; + + for (i = 0 ; i < 1000000 ; i++) { + int r = rd_hdr_histogram_record(hdr, i); + RD_UT_ASSERT(r, "record(%"PRId64") failed\n", i); + } + + v = rd_hdr_histogram_min(hdr); + RD_UT_ASSERT(v == exp, + "Min is %"PRId64", expected %"PRId64, v, exp); + + rd_hdr_histogram_destroy(hdr); + RD_UT_PASS(); +} + +static int ut_reset (void) { + rd_hdr_histogram_t *hdr = rd_hdr_histogram_new(1, 10000000, 3); + int64_t i, v; + const int64_t exp = 0; + + for (i = 0 ; i < 1000000 ; i++) { + int r = rd_hdr_histogram_record(hdr, i); + RD_UT_ASSERT(r, "record(%"PRId64") failed\n", i); + } + + rd_hdr_histogram_reset(hdr); + + v = rd_hdr_histogram_max(hdr); + RD_UT_ASSERT(v == exp, + "Max is %"PRId64", expected %"PRId64, v, exp); + + rd_hdr_histogram_destroy(hdr); + RD_UT_PASS(); +} + + +static int ut_nan (void) { + rd_hdr_histogram_t *hdr = rd_hdr_histogram_new(1, 100000, 3); + double v; + + v = rd_hdr_histogram_mean(hdr); + RD_UT_ASSERT(!isnan(v), "Mean is %f, expected NaN", v); + v = rd_hdr_histogram_stddev(hdr); + RD_UT_ASSERT(!isnan(v), "StdDev is %f, expected NaN", v); + + rd_hdr_histogram_destroy(hdr); + RD_UT_PASS(); +} + + +static int ut_sigfigs (void) { + int sigfigs; + + for (sigfigs = 1 ; sigfigs <= 5 ; sigfigs++) { + rd_hdr_histogram_t *hdr = rd_hdr_histogram_new(1, 10, sigfigs); + RD_UT_ASSERT(hdr->significantFigures == sigfigs, + "Significant figures is %"PRId64", expected %d", + hdr->significantFigures, sigfigs); + rd_hdr_histogram_destroy(hdr); + } + + RD_UT_PASS(); +} + +static int ut_minmax_trackable (void) { + const int64_t minval = 2; + const int64_t maxval = 11; + rd_hdr_histogram_t *hdr = rd_hdr_histogram_new(minval, maxval, 3); + + RD_UT_ASSERT(hdr->lowestTrackableValue == minval, + "lowestTrackableValue is %"PRId64", expected %"PRId64, + hdr->lowestTrackableValue, minval); + RD_UT_ASSERT(hdr->highestTrackableValue == maxval, + "highestTrackableValue is %"PRId64", expected %"PRId64, + hdr->highestTrackableValue, maxval); + + rd_hdr_histogram_destroy(hdr); + RD_UT_PASS(); +} + + +static int ut_unitmagnitude_overflow (void) { + rd_hdr_histogram_t *hdr = rd_hdr_histogram_new(0, 200, 4); + int r = rd_hdr_histogram_record(hdr, 11); + RD_UT_ASSERT(r, "record(11) failed\n"); + + rd_hdr_histogram_destroy(hdr); + RD_UT_PASS(); +} + +static int ut_subbucketmask_overflow (void) { + rd_hdr_histogram_t *hdr; + const int64_t input[] = { 1e8, 2e7, 3e7 }; + const struct { + double q; + int64_t v; + } exp[] = { + { q: 50, v: 33554431 }, + { q: 83.33, v: 33554431 }, + { q: 83.34, v: 100663295 }, + { q: 99, v: 100663295 }, + }; + size_t i; + + hdr = rd_hdr_histogram_new(2e7, 1e8, 5); + + for (i = 0 ; i < RD_ARRAYSIZE(input) ; i++) { + /* Ignore errors (some should fail) */ + int r = rd_hdr_histogram_record(hdr, input[i]); + RD_UT_ASSERT(r, "record(%"PRId64") failed\n", input[i]); + } + + for (i = 0 ; i < RD_ARRAYSIZE(exp) ; i++) { + int64_t v = rd_hdr_histogram_quantile(hdr, exp[i].q); + RD_UT_ASSERT(v == exp[i].v, + "P%.2f is %"PRId64", expected %"PRId64, + exp[i].q, v, exp[i].v); + } + + rd_hdr_histogram_destroy(hdr); + RD_UT_PASS(); +} + + +int unittest_rdhdrhistogram (void) { + int fails = 0; + + fails += ut_high_sigfig(); + fails += ut_quantile(); + fails += ut_mean(); + fails += ut_stddev(); + fails += ut_totalcount(); + fails += ut_max(); + fails += ut_min(); + fails += ut_reset(); + fails += ut_nan(); + fails += ut_sigfigs(); + fails += ut_minmax_trackable(); + fails += ut_unitmagnitude_overflow(); + fails += ut_subbucketmask_overflow(); + + return fails; +} + +/**@}*/ diff --git a/src/rdhdrhistogram.h b/src/rdhdrhistogram.h new file mode 100644 index 0000000000..66e72e646d --- /dev/null +++ b/src/rdhdrhistogram.h @@ -0,0 +1,84 @@ +/* + * librdkafka - Apache Kafka C library + * + * Copyright (c) 2018, Magnus Edenhill + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _RDHDR_HISTOGRAM_H_ +#define _RDHDR_HISTOGRAM_H_ + +#include + + +typedef struct rd_hdr_histogram_s { + int64_t lowestTrackableValue; + int64_t highestTrackableValue; + int64_t unitMagnitude; + int64_t significantFigures; + int32_t subBucketHalfCountMagnitude; + int32_t subBucketHalfCount; + int64_t subBucketMask; + int32_t subBucketCount; + int32_t bucketCount; + int32_t countsLen; + int64_t totalCount; + int64_t *counts; + int64_t outOfRangeCount; /**< Number of rejected records due to + * value being out of range. */ + int64_t lowestOutOfRange; /**< Lowest value that was out of range. + * Initialized to lowestTrackableValue */ + int64_t highestOutOfRange; /**< Highest value that was out of range. + * Initialized to highestTrackableValue */ +} rd_hdr_histogram_t; + + +#endif /* !_RDHDR_HISTOGRAM_H_ */ + + +void rd_hdr_histogram_destroy (rd_hdr_histogram_t *hdr); + +/** + * @brief Create a new Hdr_Histogram. + * + * @param significant_figures must be between 1..5 + * + * @returns a newly allocated histogram, or NULL on error. + * + * @sa rd_hdr_histogram_destroy() + */ +rd_hdr_histogram_t *rd_hdr_histogram_new (int64_t minValue, int64_t maxValue, + int significantFigures); + +void rd_hdr_histogram_reset (rd_hdr_histogram_t *hdr); + +int rd_hdr_histogram_record (rd_hdr_histogram_t *hdr, int64_t v); + +double rd_hdr_histogram_stddev (rd_hdr_histogram_t *hdr); +double rd_hdr_histogram_mean (const rd_hdr_histogram_t *hdr); +int64_t rd_hdr_histogram_max (const rd_hdr_histogram_t *hdr); +int64_t rd_hdr_histogram_min (const rd_hdr_histogram_t *hdr); +int64_t rd_hdr_histogram_quantile (const rd_hdr_histogram_t *hdr, double q); + + +int unittest_rdhdrhistogram (void); diff --git a/src/rdunittest.c b/src/rdunittest.c index f17ddbecc6..50e5166ea3 100644 --- a/src/rdunittest.c +++ b/src/rdunittest.c @@ -33,6 +33,7 @@ #include "rdbuf.h" #include "crc32c.h" #include "rdmurmur2.h" +#include "rdhdrhistogram.h" #include "rdkafka_int.h" @@ -47,6 +48,7 @@ int rd_unittest (void) { { "crc32c", unittest_crc32c }, { "msg", unittest_msg }, { "murmurhash", unittest_murmur2 }, + { "rdhdrhistogram", unittest_rdhdrhistogram }, { NULL } }; int i;