Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-43475: Fix worst case collision behavior for NaN instances #25493

Merged
merged 6 commits into from
Apr 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions Doc/library/stdtypes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -692,10 +692,9 @@ Here are the rules in detail:
as ``-hash(-x)``. If the resulting hash is ``-1``, replace it with
``-2``.

- The particular values ``sys.hash_info.inf``, ``-sys.hash_info.inf``
and ``sys.hash_info.nan`` are used as hash values for positive
infinity, negative infinity, or nans (respectively). (All hashable
nans have the same hash value.)
- The particular values ``sys.hash_info.inf`` and ``-sys.hash_info.inf``
are used as hash values for positive
infinity or negative infinity (respectively).

- For a :class:`complex` number ``z``, the hash values of the real
and imaginary parts are combined by computing ``hash(z.real) +
Expand Down Expand Up @@ -740,7 +739,7 @@ number, :class:`float`, or :class:`complex`::
"""Compute the hash of a float x."""

if math.isnan(x):
return sys.hash_info.nan
return super().__hash__()
elif math.isinf(x):
return sys.hash_info.inf if x > 0 else -sys.hash_info.inf
else:
Expand Down
2 changes: 1 addition & 1 deletion Doc/library/sys.rst
Original file line number Diff line number Diff line change
Expand Up @@ -855,7 +855,7 @@ always available.
+---------------------+--------------------------------------------------+
| :const:`inf` | hash value returned for a positive infinity |
+---------------------+--------------------------------------------------+
| :const:`nan` | hash value returned for a nan |
| :const:`nan` | (this attribute is no longer used) |
+---------------------+--------------------------------------------------+
| :const:`imag` | multiplier used for the imaginary part of a |
| | complex number |
Expand Down
3 changes: 1 addition & 2 deletions Include/pyhash.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ extern "C" {

/* Helpers for hash functions */
#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
PyAPI_FUNC(Py_hash_t) _Py_HashDouble(PyObject *, double);
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*);
// Similar to _Py_HashPointer(), but don't replace -1 with -2
PyAPI_FUNC(Py_hash_t) _Py_HashPointerRaw(const void*);
Expand All @@ -29,7 +29,6 @@ PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);

#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
#define _PyHASH_INF 314159
#define _PyHASH_NAN 0
#define _PyHASH_IMAG _PyHASH_MULTIPLIER


Expand Down
2 changes: 1 addition & 1 deletion Lib/_pydecimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -951,7 +951,7 @@ def __hash__(self):
if self.is_snan():
raise TypeError('Cannot hash a signaling NaN value.')
elif self.is_nan():
return _PyHASH_NAN
return super().__hash__()
else:
if self._sign:
return -_PyHASH_INF
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Hashes of NaN values now depend on object identity. Formerly, they always
hashed to 0 even though NaN values are not equal to one another. Having the
same hash for unequal values caused pile-ups in hash tables.
5 changes: 1 addition & 4 deletions Modules/_decimal/_decimal.c
Original file line number Diff line number Diff line change
Expand Up @@ -4536,7 +4536,6 @@ _dec_hash(PyDecObject *v)
#error "No valid combination of CONFIG_64, CONFIG_32 and _PyHASH_BITS"
#endif
const Py_hash_t py_hash_inf = 314159;
const Py_hash_t py_hash_nan = 0;
mpd_uint_t ten_data[1] = {10};
mpd_t ten = {MPD_POS|MPD_STATIC|MPD_CONST_DATA,
0, 2, 1, 1, ten_data};
Expand All @@ -4555,7 +4554,7 @@ _dec_hash(PyDecObject *v)
return -1;
}
else if (mpd_isnan(MPD(v))) {
return py_hash_nan;
return _Py_HashPointer(v);
}
else {
return py_hash_inf * mpd_arith_sign(MPD(v));
Expand Down Expand Up @@ -5939,5 +5938,3 @@ PyInit__decimal(void)

return NULL; /* GCOV_NOT_REACHED */
}


4 changes: 2 additions & 2 deletions Objects/complexobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -412,10 +412,10 @@ static Py_hash_t
complex_hash(PyComplexObject *v)
{
Py_uhash_t hashreal, hashimag, combined;
hashreal = (Py_uhash_t)_Py_HashDouble(v->cval.real);
hashreal = (Py_uhash_t)_Py_HashDouble((PyObject *) v, v->cval.real);
if (hashreal == (Py_uhash_t)-1)
return -1;
hashimag = (Py_uhash_t)_Py_HashDouble(v->cval.imag);
hashimag = (Py_uhash_t)_Py_HashDouble((PyObject *)v, v->cval.imag);
if (hashimag == (Py_uhash_t)-1)
return -1;
/* Note: if the imaginary part is 0, hashimag is 0 now,
Expand Down
2 changes: 1 addition & 1 deletion Objects/floatobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ float_richcompare(PyObject *v, PyObject *w, int op)
static Py_hash_t
float_hash(PyFloatObject *v)
{
return _Py_HashDouble(v->ob_fval);
return _Py_HashDouble((PyObject *)v, v->ob_fval);
}

static PyObject *
Expand Down
14 changes: 10 additions & 4 deletions Python/pyhash.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,12 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
If the result of the reduction is infinity (this is impossible for
integers, floats and Decimals) then use the predefined hash value
_PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
_PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the
hashes of float and Decimal infinities and nans.
_PyHASH_INF and -_PyHASH_INF are also used for the
hashes of float and Decimal infinities.

NaNs hash with a pointer hash. Having distinct hash values prevents
catastrophic pileups from distinct NaN instances which used to always
have the same hash value but would compare unequal.

A selling point for the above strategy is that it makes it possible
to compute hashes of decimal and binary floating-point numbers
Expand All @@ -82,8 +86,10 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};

*/

Py_hash_t _Py_HashPointer(const void *);

Py_hash_t
_Py_HashDouble(double v)
_Py_HashDouble(PyObject *inst, double v)
{
int e, sign;
double m;
Expand All @@ -93,7 +99,7 @@ _Py_HashDouble(double v)
if (Py_IS_INFINITY(v))
return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
else
return _PyHASH_NAN;
return _Py_HashPointer(inst);
}

m = frexp(v, &e);
Expand Down
2 changes: 1 addition & 1 deletion Python/sysmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1405,7 +1405,7 @@ get_hash_info(PyThreadState *tstate)
PyStructSequence_SET_ITEM(hash_info, field++,
PyLong_FromLong(_PyHASH_INF));
PyStructSequence_SET_ITEM(hash_info, field++,
PyLong_FromLong(_PyHASH_NAN));
PyLong_FromLong(0)); // This is no longer used
PyStructSequence_SET_ITEM(hash_info, field++,
PyLong_FromLong(_PyHASH_IMAG));
PyStructSequence_SET_ITEM(hash_info, field++,
Expand Down