Skip to content

Commit

Permalink
Rework HashMap internals and add RNG support
Browse files Browse the repository at this point in the history
The internals of HashMap, and in particular the hashing logic, were
broken. Internally the VM reused Rust's DefaultHasher type for hashing
values. This type is mutable. When storing a HashMap in a constant,
concurrent access to this HashMap could result in wrong hashes being
produced, as all threads use the same DefaultHasher.

To solve this, Inko takes a similar approach as Rust: we provide a
RandomState type, which can be used to create a DefaultHasher. A
DefaultHasher now takes two keys as arguments, used for seeding the
hasher. The RandomState type generates two keys randomly, similar to
Rust. The hash seeds are generated by taking a thread-local randomly
generated number, then incrementing it (wrapping around on overflow).
This ensures that it is very unlikely for two different HashMaps to use
the same seeds, making certain hash attacks [1] more difficult.

Random number generation is provided by the std::random module. This
module provides methods for randomly generating integers, floats, and
bytes. Integers and floats can also be generated in a given range, for
example:

    import std::random

    random.integer_between(min: 0, max: 10)

[1]: rust-lang/rust#36481 and
     https://internals.rust-lang.org/t/help-harden-hashmap-in-libstd/4138/18
  • Loading branch information
Yorick Peterse committed Jun 16, 2019
1 parent 23f2e7f commit 770bc94
Show file tree
Hide file tree
Showing 28 changed files with 846 additions and 202 deletions.
6 changes: 5 additions & 1 deletion compiler/lib/inkoc/codegen/instruction.rb
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ class Instruction
StringConcat
HasherNew
HasherWrite
HasherFinish
HasherToHash
Stacktrace
ProcessTerminateCurrent
StringSlice
Expand Down Expand Up @@ -180,6 +180,10 @@ class Instruction
SocketListen
SocketConnect
SocketShutdown
HasherReset
RandomNumber
RandomRange
RandomBytes
]
.each_with_index
.each_with_object({}) { |(value, index), hash| hash[value] = index }
Expand Down
22 changes: 19 additions & 3 deletions compiler/lib/inkoc/pass/define_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1501,14 +1501,18 @@ def on_raw_hasher_new(*)
typedb.hasher_type.new_instance
end

def on_raw_hasher_write(*)
typedb.nil_type.new_instance
def on_raw_hasher_write(node, _)
node.arguments.fetch(1).type
end

def on_raw_hasher_finish(*)
def on_raw_hasher_to_hash(*)
typedb.integer_type.new_instance
end

def on_raw_hasher_reset(node, _)
node.arguments.fetch(0).type
end

def on_raw_stacktrace(*)
tuple = typedb.new_array_of_type(TypeSystem::Dynamic.new)

Expand Down Expand Up @@ -1779,6 +1783,18 @@ def on_raw_socket_listen(*)
typedb.integer_type.new_instance
end

def on_raw_random_number(*)
TypeSystem::Dynamic.new
end

def on_raw_random_range(*)
TypeSystem::Dynamic.new
end

def on_raw_random_bytes(*)
typedb.byte_array_type.new_instance
end

def define_block_signature(node, scope, expected_block = nil)
define_type_parameters(node, scope)
define_argument_types(node, scope, expected_block)
Expand Down
22 changes: 19 additions & 3 deletions compiler/lib/inkoc/pass/generate_tir.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1267,15 +1267,19 @@ def on_raw_platform(node, body)
end

def on_raw_hasher_new(node, body)
raw_nullary_instruction(:HasherNew, node, body)
raw_binary_instruction(:HasherNew, node, body)
end

def on_raw_hasher_write(node, body)
raw_binary_instruction(:HasherWrite, node, body)
end

def on_raw_hasher_finish(node, body)
raw_unary_instruction(:HasherFinish, node, body)
def on_raw_hasher_to_hash(node, body)
raw_unary_instruction(:HasherToHash, node, body)
end

def on_raw_hasher_reset(node, body)
raw_unary_instruction(:HasherReset, node, body)
end

def on_raw_stacktrace(node, body)
Expand Down Expand Up @@ -1553,6 +1557,18 @@ def on_raw_socket_listen(node, body)
raw_binary_instruction(:SocketListen, node, body)
end

def on_raw_random_number(node, body)
raw_unary_instruction(:RandomNumber, node, body)
end

def on_raw_random_range(node, body)
raw_binary_instruction(:RandomRange, node, body)
end

def on_raw_random_bytes(node, body)
raw_unary_instruction(:RandomBytes, node, body)
end

def on_return(node, body)
location = node.location
register =
Expand Down
2 changes: 1 addition & 1 deletion runtime/src/std/boolean.inko
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ impl ToString for Boolean {

impl Hash for Boolean {
def hash(hasher: Hasher) {
_INKOC.hasher_write(hasher, self)
hasher.write_boolean(self)
}
}

Expand Down
20 changes: 13 additions & 7 deletions runtime/src/std/hash.inko
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#! Types and methods for hashing data.

import std::operators::Equal

## Trait for hashing integers.
Expand All @@ -11,17 +10,24 @@ import std::operators::Equal
## such as the bytes in a `String`.
trait Hasher {
## Writes the given `Integer` into this hasher.
def write_integer(value: Integer)
def write_integer(value: Integer) -> Integer

## Writes the given `Float` into this hasher.
def write_float(value: Float)
def write_float(value: Float) -> Float

## Writes the given `String` into this hasher.
def write_string(value: String) -> String

## Writes the given `Boolean` into this hasher.
def write_boolean(value: Boolean) -> Boolean

## Returns the hash for the values written so far.
##
## Once a hash has been produced this method will reset the internal state of
## this `Hasher`, removing the need for allocating a new `Hasher` every time
## you want to hash an object.
def finish -> Integer
## This method should not reset the internal state of the `Hasher`.
def to_hash -> Integer

## Resets the internal state of `self`.
def reset -> Self
}

## A value that can be hashed.
Expand Down
97 changes: 79 additions & 18 deletions runtime/src/std/hash_map.inko
Original file line number Diff line number Diff line change
@@ -1,15 +1,83 @@
#! A hash map using linear probing and Robin Hood bucket stealing.
import std::hash::(Hash, Hasher)
import std::hasher::DefaultHasher
import std::index::(Index, SetIndex)
import std::iterator::(Enumerator, Iterator)
import std::length::Length
import std::operators::Equal
import std::process
import std::random

# The load factor of a Table before it should be resized.
## The load factor of a Table before it should be resized.
let LOAD_FACTOR = 0.75

## The default `Hasher` used for a `HashMap`.
##
## Different instances of a `DefaultHasher` may produce different hash values
## for the same object. The internal hashing algorithm may also change, and so
## the exact hash values should not be relied upon.
##
## Internally this hasher uses the hashing instructions provided by IVM, which
## currently uses SipHash 1-3.
let DefaultHasher = _INKOC.get_hasher_prototype

_INKOC.set_object_name(DefaultHasher, 'DefaultHasher')

impl DefaultHasher {
## Returns a new `DefaultHasher`.
##
## The given keys will be used as secrets for the hasher.
##
## # Panics
##
## This method will panic if any of the provided keys are below zero.
def new(key0: Integer, key1: Integer) -> Self {
_INKOC.hasher_new(key0, key1)
}
}

impl Hasher for DefaultHasher {
def write_integer(value: Integer) -> Integer {
_INKOC.hasher_write(self, value)
}

def write_float(value: Float) -> Float {
_INKOC.hasher_write(self, value)
}

def write_string(value: String) -> String {
_INKOC.hasher_write(self, value)
}

def write_boolean(value: Boolean) -> Boolean {
_INKOC.hasher_write(self, value)
}

def to_hash -> Integer {
_INKOC.hasher_to_hash(self)
}

def reset -> Self {
_INKOC.hasher_reset(self)
}
}

## The state used for creating a `DefaultHasher`.
##
## Different `DefaultHasher` instances created from the same `RandomState` will
## produce the same hash values for the same input values. However, hashers
## created from different `RandomState` objects will produce different hashes.
object RandomState {
def init {
let @key0 = random.incremental_integer
let @key1 = random.incremental_integer
}

## Creates a new `DefaultHasher`.
def to_hasher -> DefaultHasher {
DefaultHasher.new(key0: @key0, key1: @key1)
}
}

## A single key-value pair
object Pair!(K: Hash + Equal, V) {
def init(key: K, value: V, hash: Integer) {
Expand Down Expand Up @@ -72,9 +140,9 @@ object Pair!(K: Hash + Equal, V) {
## * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/
## * https://www.sebastiansylvan.com/post/robin-hood-hashing-should-be-your-default-hash-table-implementation/
object Table!(K: Hash + Equal, V) {
def init(hasher: Hasher = DefaultHasher.new) {
## The Hasher to use for hashing keys.
let mut @hasher = hasher
def init {
## The state to use for creating hashers.
let @random_state = RandomState.new

## The buckets to store pairs in. Each bucket can only contain a single
## pair.
Expand Down Expand Up @@ -144,9 +212,11 @@ object Table!(K: Hash + Equal, V) {

## Returns the hash for the given key.
def hash_key(key: K) -> Integer {
key.hash(@hasher)
let hasher = @random_state.to_hasher

@hasher.finish
key.hash(hasher)

hasher.to_hash
}

## Returns the desired bucket index for the given hash.
Expand Down Expand Up @@ -294,19 +364,10 @@ impl SetIndex!(K, V) for Table!(K, V) {
##
## A `HashMap` is unordered, meaning that keys can be returned in a (seemingly)
## random order.
##
## # Custom Hashers
##
## By default a `HashMap` uses `DefaultHasher` for hashing objects. You can
## provide a custom hasher using `HashMap.new(hasher: YourCustomerHasher.new)`,
## as long as the custom hasher implements the `Hasher` trait.
object HashMap!(K: Hash + Equal, V) {
## Creates a new, empty `HashMap`.
##
## The `hasher` argument can be used to provide an alternative `Hasher` to use
## for this `HashMap`.
def init(hasher: Hasher = DefaultHasher.new) {
let mut @table = Table.new(hasher)
def init {
let mut @table = Table.new
}

## Removes the given key, returning its value if the key was present in the
Expand Down
35 changes: 0 additions & 35 deletions runtime/src/std/hasher.inko

This file was deleted.

73 changes: 73 additions & 0 deletions runtime/src/std/random.inko
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#! Generating of random values.
#!
#! This module provides methods for generating random numbers and bytes.
import std::byte_array::ByteArray
import std::conversion::(ToFloat, ToInteger)

## Returns a random `Integer`.
def integer -> Integer {
_INKOC.random_number(0) as Integer
}

## Returns a random `Integer` that is incremented on every request.
##
## The base number is a OS thread-specific randomly generated number. This
## number is incremented upon calling this method. The number will wrap around
## when it can not fit in a 64 bits unsigned integer.
##
## Since the base values are thread-specific, the values of this method may
## differ depending on what OS thread the current process is running on.
##
## # Examples
##
## Requesting an incremental random `Integer`:
##
## import std::random
##
## let one = random.incremental_integer
## let two = random.incremental_integer
##
## two - one # => 1
def incremental_integer -> Integer {
_INKOC.random_number(1) as Integer
}

## Returns a random `Float`.
def float -> Float {
_INKOC.random_number(2) as Float
}

## Returns a random `Integer` in the given range.
##
## The returned `Integer` is greater than or equal to `min`, and lower than or
## equal to `max`.
##
## # Panics
##
## This method will panic if `min` is equal to or greater than `max`.
def integer_between(min: ToInteger, max: ToInteger) -> Integer {
_INKOC.random_range(min.to_integer, max.to_integer) as Integer
}

## Returns a random `Float` in the given range.
##
## The returned `Float` is greater than or equal to `min`, and lower than or
## equal to `max`.
##
## # Panics
##
## This method will panic if `min` is equal to or greater than `max`.
def float_between(min: ToFloat, max: ToFloat) -> Float {
_INKOC.random_range(min.to_float, max.to_float) as Float
}

## Returns a `ByteArray` containing random bytes.
##
## The returned `ByteArray` will contain exactly `size` bytes.
##
## # Panics
##
## This method might panic if no random bytes could be generated.
def bytes(size: Integer) -> ByteArray {
_INKOC.random_bytes(size)
}
Loading

0 comments on commit 770bc94

Please sign in to comment.