From 6d76231f10b002f04625d619f38377d0b73c4e38 Mon Sep 17 00:00:00 2001 From: Joel Rosdahl Date: Mon, 22 Jul 2019 22:52:09 +0200 Subject: [PATCH] Document BLAKE2b and clean up after #445 Closes #412. --- Makefile.in | 1 - dev.mk.in | 1 - doc/MANUAL.adoc | 20 ++-- src/ccache.c | 4 +- src/mdfour.c | 215 ------------------------------------------- src/mdfour.h | 37 -------- unittest/test_hash.c | 4 +- 7 files changed, 12 insertions(+), 270 deletions(-) delete mode 100644 src/mdfour.c delete mode 100644 src/mdfour.h diff --git a/Makefile.in b/Makefile.in index 0f7ed09765..e36166805a 100644 --- a/Makefile.in +++ b/Makefile.in @@ -51,7 +51,6 @@ non_third_party_sources = \ src/language.c \ src/lockfile.c \ src/manifest.c \ - src/mdfour.c \ src/result.c \ src/stats.c \ src/unify.c \ diff --git a/dev.mk.in b/dev.mk.in index 7b2bca5bdb..d1a511deaf 100644 --- a/dev.mk.in +++ b/dev.mk.in @@ -48,7 +48,6 @@ headers = \ src/language.h \ src/macroskip.h \ src/manifest.h \ - src/mdfour.h \ src/result.h \ src/system.h \ src/third_party/getopt_long.h \ diff --git a/doc/MANUAL.adoc b/doc/MANUAL.adoc index 4ee2bc8102..a8e1f5e91f 100644 --- a/doc/MANUAL.adoc +++ b/doc/MANUAL.adoc @@ -130,10 +130,8 @@ compiler options apply and you should refer to the compiler's documentation. *`--hash-file`*=_PATH_:: - Print the hash of the file at PATH in the format ``, where - `` is 32 hex digits and `` is 8 hex digits (representing the - size of the file modulo 2^32). This is only useful when debugging ccache - and its behavior. + Print the hash (160 bit BLAKE2b) of the file at PATH. This is only useful + when debugging ccache and its behavior. *`-h, --help`*:: @@ -909,11 +907,9 @@ The basic idea is to detect when you are compiling exactly the same code a second time and reuse the previously produced output. The detection is done by hashing different kinds of information that should be unique for the compilation and then using the hash sum to identify the cached output. ccache -uses MD4, a very fast cryptographic hash algorithm, for the hashing. (MD4 is -nowadays too weak to be useful in cryptographic contexts, but it should be safe -enough to be used to identify recompilations.) On a cache hit, ccache is able -to supply all of the correct compiler outputs (including all warnings, -dependency file, etc) from the cache. +uses BLAKE2b, a very fast cryptographic hash algorithm, for the hashing. On a +cache hit, ccache is able to supply all of the correct compiler outputs +(including all warnings, dependency file, etc) from the cache. ccache has two ways of gathering information used to look up results in the cache: @@ -1073,9 +1069,9 @@ Log for this object file. |============================================================================== -In the direct mode, ccache uses the MD4 hash of the *ccache-input-c* -+ *ccache-input-d* data (where *+* means concatenation), while the -*ccache-input-c* + *ccache-input-p* data is used in the preprocessor mode. +In the direct mode, ccache uses the 160 bit BLAKE2b hash of the +*ccache-input-c* + *ccache-input-d* data (where *+* means concatenation), while +the *ccache-input-c* + *ccache-input-p* data is used in the preprocessor mode. The *ccache-input-text* file is a combined text version of the three binary input files. It has three sections (“COMMON”, “DIRECT MODE” and diff --git a/src/ccache.c b/src/ccache.c index b178bed277..1781fb1140 100644 --- a/src/ccache.c +++ b/src/ccache.c @@ -86,8 +86,8 @@ static const char USAGE_TEXT[] = "Options for scripting or debugging:\n" " --dump-manifest=PATH dump manifest file at PATH in text format\n" " -k, --get-config=K print the value of configuration key K\n" - " --hash-file=PATH print the hash () of the\n" - " file at PATH\n" + " --hash-file=PATH print the hash (160 bit BLAKE2b) of the file at\n" + " PATH\n" " --print-stats print statistics counter IDs and corresponding\n" " values in machine-parsable format\n" " -o, --set-config=K=V set configuration item K to value V\n" diff --git a/src/mdfour.c b/src/mdfour.c deleted file mode 100644 index 04c4fb2818..0000000000 --- a/src/mdfour.c +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright (C) 1997-1998 Andrew Tridgell -// Copyright (C) 2009-2019 Joel Rosdahl and other contributors -// -// See doc/AUTHORS.adoc for a complete list of contributors. -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 3 of the License, or (at your option) -// any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 51 -// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -#include "ccache.h" -#include "mdfour.h" - -// NOTE: This code makes no attempt to be fast! - -#define MASK32 (0xffffffff) - -#define F(X, Y, Z) ((((X)&(Y)) | ((~(X))&(Z)))) -#define G(X, Y, Z) ((((X)&(Y)) | ((X)&(Z)) | ((Y)&(Z)))) -#define H(X, Y, Z) (((X)^(Y)^(Z))) -#define lshift(x, s) (((((x)<<(s))&MASK32) | (((x)>>(32-(s)))&MASK32))) - -#define ROUND1(a, b, c, d, k, s) \ - a = lshift((a + F(b, c, d) + M[k])&MASK32, s) -#define ROUND2(a, b, c, d, k, s) \ - a = lshift((a + G(b, c, d) + M[k] + 0x5A827999)&MASK32, s) -#define ROUND3(a, b, c, d, k, s) \ - a = lshift((a + H(b, c, d) + M[k] + 0x6ED9EBA1)&MASK32, s) - -// This applies md4 to 64 byte chunks. -static void -mdfour64(struct mdfour *md, uint32_t *M) -{ - uint32_t AA, BB, CC, DD; - uint32_t A, B, C, D; - - A = md->A; - B = md->B; - C = md->C; - D = md->D; - AA = A; - BB = B; - CC = C; - DD = D; - - ROUND1(A, B, C, D, 0, 3); ROUND1(D, A, B, C, 1, 7); - ROUND1(C, D, A, B, 2, 11); ROUND1(B, C, D, A, 3, 19); - ROUND1(A, B, C, D, 4, 3); ROUND1(D, A, B, C, 5, 7); - ROUND1(C, D, A, B, 6, 11); ROUND1(B, C, D, A, 7, 19); - ROUND1(A, B, C, D, 8, 3); ROUND1(D, A, B, C, 9, 7); - ROUND1(C, D, A, B, 10, 11); ROUND1(B, C, D, A, 11, 19); - ROUND1(A, B, C, D, 12, 3); ROUND1(D, A, B, C, 13, 7); - ROUND1(C, D, A, B, 14, 11); ROUND1(B, C, D, A, 15, 19); - - - ROUND2(A, B, C, D, 0, 3); ROUND2(D, A, B, C, 4, 5); - ROUND2(C, D, A, B, 8, 9); ROUND2(B, C, D, A, 12, 13); - ROUND2(A, B, C, D, 1, 3); ROUND2(D, A, B, C, 5, 5); - ROUND2(C, D, A, B, 9, 9); ROUND2(B, C, D, A, 13, 13); - ROUND2(A, B, C, D, 2, 3); ROUND2(D, A, B, C, 6, 5); - ROUND2(C, D, A, B, 10, 9); ROUND2(B, C, D, A, 14, 13); - ROUND2(A, B, C, D, 3, 3); ROUND2(D, A, B, C, 7, 5); - ROUND2(C, D, A, B, 11, 9); ROUND2(B, C, D, A, 15, 13); - - ROUND3(A, B, C, D, 0, 3); ROUND3(D, A, B, C, 8, 9); - ROUND3(C, D, A, B, 4, 11); ROUND3(B, C, D, A, 12, 15); - ROUND3(A, B, C, D, 2, 3); ROUND3(D, A, B, C, 10, 9); - ROUND3(C, D, A, B, 6, 11); ROUND3(B, C, D, A, 14, 15); - ROUND3(A, B, C, D, 1, 3); ROUND3(D, A, B, C, 9, 9); - ROUND3(C, D, A, B, 5, 11); ROUND3(B, C, D, A, 13, 15); - ROUND3(A, B, C, D, 3, 3); ROUND3(D, A, B, C, 11, 9); - ROUND3(C, D, A, B, 7, 11); ROUND3(B, C, D, A, 15, 15); - - A += AA; - B += BB; - C += CC; - D += DD; - - A &= MASK32; - B &= MASK32; - C &= MASK32; - D &= MASK32; - - md->A = A; - md->B = B; - md->C = C; - md->D = D; -} - -static void -copy64(uint32_t *M, const unsigned char *in) -{ -#ifdef WORDS_BIGENDIAN - for (int i = 0; i < 16; i++) { - M[i] = (in[i*4+3]<<24) | (in[i*4+2]<<16) | - (in[i*4+1]<<8) | (in[i*4+0]<<0); - } -#else - memcpy(M, in, 16*4); -#endif -} - -static void -copy4(unsigned char *out, uint32_t x) -{ -#ifdef WORDS_BIGENDIAN - out[0] = x&0xFF; - out[1] = (x>>8)&0xFF; - out[2] = (x>>16)&0xFF; - out[3] = (x>>24)&0xFF; -#else - memcpy(out, &x, 4); -#endif -} - -void -mdfour_begin(struct mdfour *md) -{ - md->A = 0x67452301; - md->B = 0xefcdab89; - md->C = 0x98badcfe; - md->D = 0x10325476; - md->totalN = 0; - md->tail_len = 0; -} - -static -void mdfour_tail(struct mdfour *md, const unsigned char *in, size_t n) -{ - md->totalN += n; - uint32_t b = md->totalN * 8; - unsigned char buf[128] = { 0 }; - uint32_t M[16]; - if (n) { - memcpy(buf, in, n); - } - buf[n] = 0x80; - - if (n <= 55) { - copy4(buf+56, b); - copy64(M, buf); - mdfour64(md, M); - } else { - copy4(buf+120, b); - copy64(M, buf); - mdfour64(md, M); - copy64(M, buf+64); - mdfour64(md, M); - } -} - -void -mdfour_update(struct mdfour *md, const unsigned char *in, size_t n) -{ - assert(in); - - uint32_t M[16]; - if (md->tail_len) { - size_t len = 64 - md->tail_len; - if (len > n) { - len = n; - } - memcpy(md->tail+md->tail_len, in, len); - md->tail_len += len; - n -= len; - in += len; - if (md->tail_len == 64) { - copy64(M, md->tail); - mdfour64(md, M); - md->totalN += 64; - md->tail_len = 0; - } - } - - while (n >= 64) { - copy64(M, in); - mdfour64(md, M); - in += 64; - n -= 64; - md->totalN += 64; - } - - if (n) { - memcpy(md->tail, in, n); - md->tail_len = n; - } -} - -void -mdfour_result(struct mdfour *md, unsigned char *out) -{ - struct mdfour result; - result.A = md->A; - result.B = md->B; - result.C = md->C; - result.D = md->D; - result.totalN = md->totalN; - result.tail_len = md->tail_len; - memcpy(result.tail, md->tail, result.tail_len); - - mdfour_tail(&result, result.tail, result.tail_len); - copy4(out, result.A); - copy4(out+4, result.B); - copy4(out+8, result.C); - copy4(out+12, result.D); -} diff --git a/src/mdfour.h b/src/mdfour.h deleted file mode 100644 index 3ad1b68491..0000000000 --- a/src/mdfour.h +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (C) 1997-1998 Andrew Tridgell -// Copyright (C) 2009-2019 Joel Rosdahl and other contributors -// -// See doc/AUTHORS.adoc for a complete list of contributors. -// -// This program is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 3 of the License, or (at your option) -// any later version. -// -// This program is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -// more details. -// -// You should have received a copy of the GNU General Public License along with -// this program; if not, write to the Free Software Foundation, Inc., 51 -// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -#ifndef MDFOUR_H -#define MDFOUR_H - -#include -#include - -struct mdfour { - uint32_t A, B, C, D; - size_t totalN; - size_t tail_len; - unsigned char tail[64]; -}; - -void mdfour_begin(struct mdfour *md); -void mdfour_update(struct mdfour *md, const unsigned char *in, size_t n); -void mdfour_result(struct mdfour *md, unsigned char *out); - -#endif diff --git a/unittest/test_hash.c b/unittest/test_hash.c index 25858de8e2..e25250afbe 100644 --- a/unittest/test_hash.c +++ b/unittest/test_hash.c @@ -22,9 +22,9 @@ #include "../src/hash.h" #include "framework.h" -TEST_SUITE(mdfour) +TEST_SUITE(hash) -TEST(test_vectors_from_rfc_1320_should_be_correct) +TEST(test_known_strings) { char d[DIGEST_STRING_BUFFER_SIZE];