forked from simonsj/fdupes-jody
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjody_hash.c
114 lines (103 loc) · 3.51 KB
/
jody_hash.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
/* Jody Bruchon's fast hashing function
*
* This function was written to generate a fast hash that also has a
* fairly low collision rate. The collision rate is much higher than
* a secure hash algorithm, but the calculation is drastically simpler
* and faster.
*
* Copyright (C) 2014-2017 by Jody Bruchon <[email protected]>
* Released under The MIT License
*/
#include <stdio.h>
#include <stdlib.h>
#include "jody_hash.h"
/* DO NOT modify the shift unless you know what you're doing.
* This shift was decided upon after lots of testing and
* changing it will likely cause lots of hash collisions. */
#ifndef JODY_HASH_SHIFT
#define JODY_HASH_SHIFT 14
#endif
/* The salt value's purpose is to cause each byte in the
* jodyhash_t word to have a positionally dependent variation.
* It is injected into the calculation to prevent a string of
* identical bytes from easily producing an identical hash. */
/* The tail mask table is used for block sizes that are
* indivisible by the width of a jodyhash_t. It is ANDed with the
* final jodyhash_t-sized element to zero out data in the buffer
* that is not part of the data to be hashed. */
/* Set hash parameters based on requested hash width */
#if JODY_HASH_WIDTH == 64
#define JODY_HASH_CONSTANT 0x1f3d5b79U
static const jodyhash_t tail_mask[] = {
0x0000000000000000,
0x00000000000000ff,
0x000000000000ffff,
0x0000000000ffffff,
0x00000000ffffffff,
0x000000ffffffffff,
0x0000ffffffffffff,
0x00ffffffffffffff,
0xffffffffffffffff
};
#endif /* JODY_HASH_WIDTH == 64 */
#if JODY_HASH_WIDTH == 32
#define JODY_HASH_CONSTANT 0x1f3d5b79U
static const jodyhash_t tail_mask[] = {
0x00000000,
0x000000ff,
0x0000ffff,
0x00ffffff,
0xffffffff,
};
#endif /* JODY_HASH_WIDTH == 32 */
#if JODY_HASH_WIDTH == 16
#define JODY_HASH_CONSTANT 0x1f5bU
static const jodyhash_t tail_mask[] = {
0x0000,
0x00ff,
0xffff,
};
#endif /* JODY_HASH_WIDTH == 16 */
/* Hash a block of arbitrary size; must be divisible by sizeof(jodyhash_t)
* The first block should pass a start_hash of zero.
* All blocks after the first should pass start_hash as the value
* returned by the last call to this function. This allows hashing
* of any amount of data. If data is not divisible by the size of
* jodyhash_t, it is MANDATORY that the caller provide a data buffer
* which is divisible by sizeof(jodyhash_t). */
extern jodyhash_t jody_block_hash(const jodyhash_t * restrict data,
const jodyhash_t start_hash, const size_t count)
{
jodyhash_t hash = start_hash;
jodyhash_t element;
jodyhash_t partial_salt;
size_t len;
/* Don't bother trying to hash a zero-length block */
if (count == 0) return hash;
len = count / sizeof(jodyhash_t);
for (; len > 0; len--) {
element = *data;
hash += element;
hash += JODY_HASH_CONSTANT;
hash = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(jodyhash_t) * 8 - JODY_HASH_SHIFT); /* bit rotate left */
hash ^= element;
hash = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(jodyhash_t) * 8 - JODY_HASH_SHIFT);
hash ^= JODY_HASH_CONSTANT;
hash += element;
data++;
}
/* Handle data tail (for blocks indivisible by sizeof(jodyhash_t)) */
len = count & (sizeof(jodyhash_t) - 1);
if (len) {
partial_salt = JODY_HASH_CONSTANT & tail_mask[len];
element = *data & tail_mask[len];
hash += element;
hash += partial_salt;
hash = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(jodyhash_t) * 8 - JODY_HASH_SHIFT);
hash ^= element;
hash = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(jodyhash_t) * 8 - JODY_HASH_SHIFT);
hash ^= partial_salt;
hash += element;
}
return hash;
}