From 770bc94636c2730450bb761e20c5fe548e1f7712 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Sun, 16 Jun 2019 13:11:02 +0200 Subject: [PATCH] Rework HashMap internals and add RNG support The internals of HashMap, and in particular the hashing logic, were broken. Internally the VM reused Rust's DefaultHasher type for hashing values. This type is mutable. When storing a HashMap in a constant, concurrent access to this HashMap could result in wrong hashes being produced, as all threads use the same DefaultHasher. To solve this, Inko takes a similar approach as Rust: we provide a RandomState type, which can be used to create a DefaultHasher. A DefaultHasher now takes two keys as arguments, used for seeding the hasher. The RandomState type generates two keys randomly, similar to Rust. The hash seeds are generated by taking a thread-local randomly generated number, then incrementing it (wrapping around on overflow). This ensures that it is very unlikely for two different HashMaps to use the same seeds, making certain hash attacks [1] more difficult. Random number generation is provided by the std::random module. This module provides methods for randomly generating integers, floats, and bytes. Integers and floats can also be generated in a given range, for example: import std::random random.integer_between(min: 0, max: 10) [1]: https://github.com/rust-lang/rust/issues/36481 and https://internals.rust-lang.org/t/help-harden-hashmap-in-libstd/4138/18 --- compiler/lib/inkoc/codegen/instruction.rb | 6 +- compiler/lib/inkoc/pass/define_type.rb | 22 +++- compiler/lib/inkoc/pass/generate_tir.rb | 22 +++- runtime/src/std/boolean.inko | 2 +- runtime/src/std/hash.inko | 20 ++-- runtime/src/std/hash_map.inko | 97 ++++++++++++--- runtime/src/std/hasher.inko | 35 ------ runtime/src/std/random.inko | 73 +++++++++++ runtime/src/std/string.inko | 6 +- runtime/tests/main.inko | 2 +- runtime/tests/test/std/test_boolean.inko | 20 ++-- runtime/tests/test/std/test_float.inko | 31 +++-- runtime/tests/test/std/test_hash_map.inko | 140 ++++++++++++++++++---- runtime/tests/test/std/test_hasher.inko | 35 ------ runtime/tests/test/std/test_integer.inko | 13 +- runtime/tests/test/std/test_random.inko | 117 ++++++++++++++++++ runtime/tests/test/std/test_string.inko | 13 ++ vm/Cargo.lock | 22 ++++ vm/Cargo.toml | 7 +- vm/src/hasher.rs | 53 ++++---- vm/src/object_pointer.rs | 5 + vm/src/object_value.rs | 10 ++ vm/src/scheduler/process_worker.rs | 108 +++++++++++++++++ vm/src/vm/hasher.rs | 27 +++-- vm/src/vm/instruction.rs | 6 +- vm/src/vm/machine.rs | 43 ++++++- vm/src/vm/mod.rs | 1 + vm/src/vm/random.rs | 112 +++++++++++++++++ 28 files changed, 846 insertions(+), 202 deletions(-) delete mode 100644 runtime/src/std/hasher.inko create mode 100644 runtime/src/std/random.inko delete mode 100644 runtime/tests/test/std/test_hasher.inko create mode 100644 runtime/tests/test/std/test_random.inko create mode 100644 vm/src/vm/random.rs diff --git a/compiler/lib/inkoc/codegen/instruction.rb b/compiler/lib/inkoc/codegen/instruction.rb index 5397b2362..27dd3379b 100644 --- a/compiler/lib/inkoc/codegen/instruction.rb +++ b/compiler/lib/inkoc/codegen/instruction.rb @@ -121,7 +121,7 @@ class Instruction StringConcat HasherNew HasherWrite - HasherFinish + HasherToHash Stacktrace ProcessTerminateCurrent StringSlice @@ -180,6 +180,10 @@ class Instruction SocketListen SocketConnect SocketShutdown + HasherReset + RandomNumber + RandomRange + RandomBytes ] .each_with_index .each_with_object({}) { |(value, index), hash| hash[value] = index } diff --git a/compiler/lib/inkoc/pass/define_type.rb b/compiler/lib/inkoc/pass/define_type.rb index a961365c0..171ac9355 100644 --- a/compiler/lib/inkoc/pass/define_type.rb +++ b/compiler/lib/inkoc/pass/define_type.rb @@ -1501,14 +1501,18 @@ def on_raw_hasher_new(*) typedb.hasher_type.new_instance end - def on_raw_hasher_write(*) - typedb.nil_type.new_instance + def on_raw_hasher_write(node, _) + node.arguments.fetch(1).type end - def on_raw_hasher_finish(*) + def on_raw_hasher_to_hash(*) typedb.integer_type.new_instance end + def on_raw_hasher_reset(node, _) + node.arguments.fetch(0).type + end + def on_raw_stacktrace(*) tuple = typedb.new_array_of_type(TypeSystem::Dynamic.new) @@ -1779,6 +1783,18 @@ def on_raw_socket_listen(*) typedb.integer_type.new_instance end + def on_raw_random_number(*) + TypeSystem::Dynamic.new + end + + def on_raw_random_range(*) + TypeSystem::Dynamic.new + end + + def on_raw_random_bytes(*) + typedb.byte_array_type.new_instance + end + def define_block_signature(node, scope, expected_block = nil) define_type_parameters(node, scope) define_argument_types(node, scope, expected_block) diff --git a/compiler/lib/inkoc/pass/generate_tir.rb b/compiler/lib/inkoc/pass/generate_tir.rb index 8e62a2f07..ae3b584de 100644 --- a/compiler/lib/inkoc/pass/generate_tir.rb +++ b/compiler/lib/inkoc/pass/generate_tir.rb @@ -1267,15 +1267,19 @@ def on_raw_platform(node, body) end def on_raw_hasher_new(node, body) - raw_nullary_instruction(:HasherNew, node, body) + raw_binary_instruction(:HasherNew, node, body) end def on_raw_hasher_write(node, body) raw_binary_instruction(:HasherWrite, node, body) end - def on_raw_hasher_finish(node, body) - raw_unary_instruction(:HasherFinish, node, body) + def on_raw_hasher_to_hash(node, body) + raw_unary_instruction(:HasherToHash, node, body) + end + + def on_raw_hasher_reset(node, body) + raw_unary_instruction(:HasherReset, node, body) end def on_raw_stacktrace(node, body) @@ -1553,6 +1557,18 @@ def on_raw_socket_listen(node, body) raw_binary_instruction(:SocketListen, node, body) end + def on_raw_random_number(node, body) + raw_unary_instruction(:RandomNumber, node, body) + end + + def on_raw_random_range(node, body) + raw_binary_instruction(:RandomRange, node, body) + end + + def on_raw_random_bytes(node, body) + raw_unary_instruction(:RandomBytes, node, body) + end + def on_return(node, body) location = node.location register = diff --git a/runtime/src/std/boolean.inko b/runtime/src/std/boolean.inko index 09fd024ce..011cef74a 100644 --- a/runtime/src/std/boolean.inko +++ b/runtime/src/std/boolean.inko @@ -71,7 +71,7 @@ impl ToString for Boolean { impl Hash for Boolean { def hash(hasher: Hasher) { - _INKOC.hasher_write(hasher, self) + hasher.write_boolean(self) } } diff --git a/runtime/src/std/hash.inko b/runtime/src/std/hash.inko index 7d71929bf..1b45f2b22 100644 --- a/runtime/src/std/hash.inko +++ b/runtime/src/std/hash.inko @@ -1,5 +1,4 @@ #! Types and methods for hashing data. - import std::operators::Equal ## Trait for hashing integers. @@ -11,17 +10,24 @@ import std::operators::Equal ## such as the bytes in a `String`. trait Hasher { ## Writes the given `Integer` into this hasher. - def write_integer(value: Integer) + def write_integer(value: Integer) -> Integer ## Writes the given `Float` into this hasher. - def write_float(value: Float) + def write_float(value: Float) -> Float + + ## Writes the given `String` into this hasher. + def write_string(value: String) -> String + + ## Writes the given `Boolean` into this hasher. + def write_boolean(value: Boolean) -> Boolean ## Returns the hash for the values written so far. ## - ## Once a hash has been produced this method will reset the internal state of - ## this `Hasher`, removing the need for allocating a new `Hasher` every time - ## you want to hash an object. - def finish -> Integer + ## This method should not reset the internal state of the `Hasher`. + def to_hash -> Integer + + ## Resets the internal state of `self`. + def reset -> Self } ## A value that can be hashed. diff --git a/runtime/src/std/hash_map.inko b/runtime/src/std/hash_map.inko index 466458586..b151cc063 100644 --- a/runtime/src/std/hash_map.inko +++ b/runtime/src/std/hash_map.inko @@ -1,15 +1,83 @@ #! A hash map using linear probing and Robin Hood bucket stealing. import std::hash::(Hash, Hasher) -import std::hasher::DefaultHasher import std::index::(Index, SetIndex) import std::iterator::(Enumerator, Iterator) import std::length::Length import std::operators::Equal import std::process +import std::random -# The load factor of a Table before it should be resized. +## The load factor of a Table before it should be resized. let LOAD_FACTOR = 0.75 +## The default `Hasher` used for a `HashMap`. +## +## Different instances of a `DefaultHasher` may produce different hash values +## for the same object. The internal hashing algorithm may also change, and so +## the exact hash values should not be relied upon. +## +## Internally this hasher uses the hashing instructions provided by IVM, which +## currently uses SipHash 1-3. +let DefaultHasher = _INKOC.get_hasher_prototype + +_INKOC.set_object_name(DefaultHasher, 'DefaultHasher') + +impl DefaultHasher { + ## Returns a new `DefaultHasher`. + ## + ## The given keys will be used as secrets for the hasher. + ## + ## # Panics + ## + ## This method will panic if any of the provided keys are below zero. + def new(key0: Integer, key1: Integer) -> Self { + _INKOC.hasher_new(key0, key1) + } +} + +impl Hasher for DefaultHasher { + def write_integer(value: Integer) -> Integer { + _INKOC.hasher_write(self, value) + } + + def write_float(value: Float) -> Float { + _INKOC.hasher_write(self, value) + } + + def write_string(value: String) -> String { + _INKOC.hasher_write(self, value) + } + + def write_boolean(value: Boolean) -> Boolean { + _INKOC.hasher_write(self, value) + } + + def to_hash -> Integer { + _INKOC.hasher_to_hash(self) + } + + def reset -> Self { + _INKOC.hasher_reset(self) + } +} + +## The state used for creating a `DefaultHasher`. +## +## Different `DefaultHasher` instances created from the same `RandomState` will +## produce the same hash values for the same input values. However, hashers +## created from different `RandomState` objects will produce different hashes. +object RandomState { + def init { + let @key0 = random.incremental_integer + let @key1 = random.incremental_integer + } + + ## Creates a new `DefaultHasher`. + def to_hasher -> DefaultHasher { + DefaultHasher.new(key0: @key0, key1: @key1) + } +} + ## A single key-value pair object Pair!(K: Hash + Equal, V) { def init(key: K, value: V, hash: Integer) { @@ -72,9 +140,9 @@ object Pair!(K: Hash + Equal, V) { ## * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/ ## * https://www.sebastiansylvan.com/post/robin-hood-hashing-should-be-your-default-hash-table-implementation/ object Table!(K: Hash + Equal, V) { - def init(hasher: Hasher = DefaultHasher.new) { - ## The Hasher to use for hashing keys. - let mut @hasher = hasher + def init { + ## The state to use for creating hashers. + let @random_state = RandomState.new ## The buckets to store pairs in. Each bucket can only contain a single ## pair. @@ -144,9 +212,11 @@ object Table!(K: Hash + Equal, V) { ## Returns the hash for the given key. def hash_key(key: K) -> Integer { - key.hash(@hasher) + let hasher = @random_state.to_hasher - @hasher.finish + key.hash(hasher) + + hasher.to_hash } ## Returns the desired bucket index for the given hash. @@ -294,19 +364,10 @@ impl SetIndex!(K, V) for Table!(K, V) { ## ## A `HashMap` is unordered, meaning that keys can be returned in a (seemingly) ## random order. -## -## # Custom Hashers -## -## By default a `HashMap` uses `DefaultHasher` for hashing objects. You can -## provide a custom hasher using `HashMap.new(hasher: YourCustomerHasher.new)`, -## as long as the custom hasher implements the `Hasher` trait. object HashMap!(K: Hash + Equal, V) { ## Creates a new, empty `HashMap`. - ## - ## The `hasher` argument can be used to provide an alternative `Hasher` to use - ## for this `HashMap`. - def init(hasher: Hasher = DefaultHasher.new) { - let mut @table = Table.new(hasher) + def init { + let mut @table = Table.new } ## Removes the given key, returning its value if the key was present in the diff --git a/runtime/src/std/hasher.inko b/runtime/src/std/hasher.inko deleted file mode 100644 index 23be4e272..000000000 --- a/runtime/src/std/hasher.inko +++ /dev/null @@ -1,35 +0,0 @@ -#! Hashers for various data types. -import std::hash::Hasher - -## The default `Hasher` used for various data types, such as `HashMap`. -## -## Different instances of a `DefaultHasher` may produce different hash values -## for the same object. The internal hashing algorithm may also change, and so -## the exact hash values should not be relied upon. -## -## Internally this hasher uses the hashing instructions provided by IVM, which -## currently uses SipHash 1-3. -let DefaultHasher = _INKOC.get_hasher_prototype - -_INKOC.set_object_name(DefaultHasher, 'DefaultHasher') - -impl DefaultHasher { - ## Returns a new `DefaultHasher`. - def new -> Self { - _INKOC.hasher_new - } -} - -impl Hasher for DefaultHasher { - def write_integer(value: Integer) { - _INKOC.hasher_write(self, value) - } - - def write_float(value: Float) { - _INKOC.hasher_write(self, value) - } - - def finish -> Integer { - _INKOC.hasher_finish(self) - } -} diff --git a/runtime/src/std/random.inko b/runtime/src/std/random.inko new file mode 100644 index 000000000..70bacd98b --- /dev/null +++ b/runtime/src/std/random.inko @@ -0,0 +1,73 @@ +#! Generating of random values. +#! +#! This module provides methods for generating random numbers and bytes. +import std::byte_array::ByteArray +import std::conversion::(ToFloat, ToInteger) + +## Returns a random `Integer`. +def integer -> Integer { + _INKOC.random_number(0) as Integer +} + +## Returns a random `Integer` that is incremented on every request. +## +## The base number is a OS thread-specific randomly generated number. This +## number is incremented upon calling this method. The number will wrap around +## when it can not fit in a 64 bits unsigned integer. +## +## Since the base values are thread-specific, the values of this method may +## differ depending on what OS thread the current process is running on. +## +## # Examples +## +## Requesting an incremental random `Integer`: +## +## import std::random +## +## let one = random.incremental_integer +## let two = random.incremental_integer +## +## two - one # => 1 +def incremental_integer -> Integer { + _INKOC.random_number(1) as Integer +} + +## Returns a random `Float`. +def float -> Float { + _INKOC.random_number(2) as Float +} + +## Returns a random `Integer` in the given range. +## +## The returned `Integer` is greater than or equal to `min`, and lower than or +## equal to `max`. +## +## # Panics +## +## This method will panic if `min` is equal to or greater than `max`. +def integer_between(min: ToInteger, max: ToInteger) -> Integer { + _INKOC.random_range(min.to_integer, max.to_integer) as Integer +} + +## Returns a random `Float` in the given range. +## +## The returned `Float` is greater than or equal to `min`, and lower than or +## equal to `max`. +## +## # Panics +## +## This method will panic if `min` is equal to or greater than `max`. +def float_between(min: ToFloat, max: ToFloat) -> Float { + _INKOC.random_range(min.to_float, max.to_float) as Float +} + +## Returns a `ByteArray` containing random bytes. +## +## The returned `ByteArray` will contain exactly `size` bytes. +## +## # Panics +## +## This method might panic if no random bytes could be generated. +def bytes(size: Integer) -> ByteArray { + _INKOC.random_bytes(size) +} diff --git a/runtime/src/std/string.inko b/runtime/src/std/string.inko index 1889fc986..7ea1ac5e5 100644 --- a/runtime/src/std/string.inko +++ b/runtime/src/std/string.inko @@ -219,10 +219,6 @@ impl Add!(String) for String { impl Hash for String { def hash(hasher: Hasher) { - # Because hashing Strings is a very common operation we use a dedicated VM - # instruction to write our String to the Hasher. This removes the need for - # heap allocating any objects and reduces the time spent hashing as much as - # possible. - _INKOC.hasher_write(hasher, self) + hasher.write_string(self) } } diff --git a/runtime/tests/main.inko b/runtime/tests/main.inko index 07a78a07d..2c5caed8a 100644 --- a/runtime/tests/main.inko +++ b/runtime/tests/main.inko @@ -31,7 +31,6 @@ import test::std::test_float import test::std::test_format import test::std::test_fs import test::std::test_hash_map -import test::std::test_hasher import test::std::test_inspect import test::std::test_integer import test::std::test_io @@ -42,6 +41,7 @@ import test::std::test_nil import test::std::test_object import test::std::test_os import test::std::test_process +import test::std::test_random import test::std::test_range import test::std::test_string import test::std::test_string_buffer diff --git a/runtime/tests/test/std/test_boolean.inko b/runtime/tests/test/std/test_boolean.inko index fec8840f6..357c7ef06 100644 --- a/runtime/tests/test/std/test_boolean.inko +++ b/runtime/tests/test/std/test_boolean.inko @@ -1,5 +1,5 @@ import std::format::DefaultFormatter -import std::hasher::DefaultHasher +import std::hash_map::DefaultHasher import std::test import std::test::assert @@ -70,8 +70,8 @@ test.group('std::boolean::Boolean.to_string') do (g) { test.group('std::boolean::Boolean.hash') do (g) { g.test('Hashing a Boolean') { - let hasher1 = DefaultHasher.new - let hasher2 = DefaultHasher.new + let hasher1 = DefaultHasher.new(1, 2) + let hasher2 = DefaultHasher.new(1, 2) Boolean.hash(hasher1) Boolean.hash(hasher2) @@ -79,7 +79,7 @@ test.group('std::boolean::Boolean.hash') do (g) { # The exact hash value may change between OS processes or releases, so all # we can do is assert that the value is the same every time we send `hash` # to `Boolean`. - assert.equal(hasher1.finish, hasher2.finish) + assert.equal(hasher1.to_hash, hasher2.to_hash) } } @@ -166,8 +166,8 @@ test.group('std::boolean::True.to_string') do (g) { test.group('std::boolean::True.hash') do (g) { g.test('Hashing a True') { - let hasher1 = DefaultHasher.new - let hasher2 = DefaultHasher.new + let hasher1 = DefaultHasher.new(1, 2) + let hasher2 = DefaultHasher.new(1, 2) True.hash(hasher1) True.hash(hasher2) @@ -175,7 +175,7 @@ test.group('std::boolean::True.hash') do (g) { # The exact hash value may change between OS processes or releases, so all # we can do is assert that the value is the same every time we send `hash` # to `True`. - assert.equal(hasher1.finish, hasher2.finish) + assert.equal(hasher1.to_hash, hasher2.to_hash) } } @@ -262,8 +262,8 @@ test.group('std::boolean::False.to_string') do (g) { test.group('std::boolean::False.hash') do (g) { g.test('Hashing a False') { - let hasher1 = DefaultHasher.new - let hasher2 = DefaultHasher.new + let hasher1 = DefaultHasher.new(1, 2) + let hasher2 = DefaultHasher.new(1, 2) False.hash(hasher1) False.hash(hasher2) @@ -271,7 +271,7 @@ test.group('std::boolean::False.hash') do (g) { # The exact hash value may change between OS processes or releases, so all # we can do is assert that the value is the same every time we send `hash` # to `True`. - assert.equal(hasher1.finish, hasher2.finish) + assert.equal(hasher1.to_hash, hasher2.to_hash) } } diff --git a/runtime/tests/test/std/test_float.inko b/runtime/tests/test/std/test_float.inko index 4730339e0..96458483d 100644 --- a/runtime/tests/test/std/test_float.inko +++ b/runtime/tests/test/std/test_float.inko @@ -1,6 +1,6 @@ import std::float::(self, NAN, INFINITY, NEGATIVE_INFINITY) import std::format::DefaultFormatter -import std::hasher::DefaultHasher +import std::hash_map::DefaultHasher import std::test import std::test::assert @@ -782,44 +782,43 @@ test.group('std::float::Float.format') do (g) { test.group('std::float::Float.hash') do (g) { g.test('Hashing a Float') { - let hasher1 = DefaultHasher.new - let hasher2 = DefaultHasher.new - let float = 1.5 + let hasher1 = DefaultHasher.new(1, 2) + let hasher2 = DefaultHasher.new(1, 2) - float.hash(hasher1) - float.hash(hasher2) + 1.5.hash(hasher1) + 1.5.hash(hasher2) - assert.equal(hasher1.finish, hasher2.finish) + assert.equal(hasher1.to_hash, hasher2.to_hash) } g.test('Hashing a NaN') { - let hasher1 = DefaultHasher.new - let hasher2 = DefaultHasher.new + let hasher1 = DefaultHasher.new(1, 2) + let hasher2 = DefaultHasher.new(1, 2) NAN.hash(hasher1) NAN.hash(hasher2) - assert.equal(hasher1.finish, hasher2.finish) + assert.equal(hasher1.to_hash, hasher2.to_hash) } g.test('Hashing Infinity') { - let hasher1 = DefaultHasher.new - let hasher2 = DefaultHasher.new + let hasher1 = DefaultHasher.new(1, 2) + let hasher2 = DefaultHasher.new(1, 2) INFINITY.hash(hasher1) INFINITY.hash(hasher2) - assert.equal(hasher1.finish, hasher2.finish) + assert.equal(hasher1.to_hash, hasher2.to_hash) } g.test('Hashing negative Infinity') { - let hasher1 = DefaultHasher.new - let hasher2 = DefaultHasher.new + let hasher1 = DefaultHasher.new(1, 2) + let hasher2 = DefaultHasher.new(1, 2) NEGATIVE_INFINITY.hash(hasher1) NEGATIVE_INFINITY.hash(hasher2) - assert.equal(hasher1.finish, hasher2.finish) + assert.equal(hasher1.to_hash, hasher2.to_hash) } } diff --git a/runtime/tests/test/std/test_hash_map.inko b/runtime/tests/test/std/test_hash_map.inko index 4c5e81781..1f1b88a09 100644 --- a/runtime/tests/test/std/test_hash_map.inko +++ b/runtime/tests/test/std/test_hash_map.inko @@ -1,8 +1,102 @@ -import std::hash_map::(self, Pair, Table) -import std::hasher::DefaultHasher +import std::hash_map::(self, DefaultHasher, Pair, RandomState, Table) import std::test import std::test::assert +test.group('std::hash_map::DefaultHasher.write_integer') do (g) { + g.test('Hashing an Integer') { + let hasher = DefaultHasher.new(1, 2) + + hasher.write_integer(10) + + let hash1 = hasher.to_hash + + hasher.reset + hasher.write_integer(10) + + let hash2 = hasher.to_hash + + assert.equal(hash1, hash2) + } +} + +test.group('std::hash_map::DefaultHasher.write_float') do (g) { + g.test('Hashing an Float') { + let hasher = DefaultHasher.new(1, 2) + + hasher.write_float(10.0) + + let hash1 = hasher.to_hash + + hasher.reset + hasher.write_float(10.0) + + let hash2 = hasher.to_hash + + assert.equal(hash1, hash2) + } +} + +test.group('std::hash_map::DefaultHasher.write_string') do (g) { + g.test('Hashing a String') { + let hasher = DefaultHasher.new(1, 2) + + hasher.write_string('hello') + + let hash1 = hasher.to_hash + + hasher.reset + hasher.write_string('hello') + + let hash2 = hasher.to_hash + + assert.equal(hash1, hash2) + } +} + +test.group('std::hash_map::DefaultHasher.write_boolean') do (g) { + g.test('Hashing a Boolean') { + let hasher = DefaultHasher.new(1, 2) + + hasher.write_boolean(True) + + let hash1 = hasher.to_hash + + hasher.reset + hasher.write_boolean(True) + + let hash2 = hasher.to_hash + + assert.equal(hash1, hash2) + } +} + +test.group('std::hash_map::DefaultHasher.reset') do (g) { + g.test('Resetting the internal state of a hasher') { + let hasher = DefaultHasher.new(1, 2) + let hash1 = hasher.to_hash + + hasher.write_integer(10) + hasher.reset + + let hash2 = hasher.to_hash + + assert.equal(hash1, hash2) + } +} + +test.group('std::hash_map::RandomState.to_hasher') do (g) { + g.test('Creating a DefaultHasher') { + let state = RandomState.new + let hasher1 = state.to_hasher + let hasher2 = state.to_hasher + + 10.hash(hasher1) + 10.hash(hasher2) + + assert.equal(hasher1.to_hash, hasher2.to_hash) + } +} + test.group('std::hash_map::Pair.distance') do (g) { g.test('Obtaining the distance of a Pair') { let pair = Pair.new(key: 'key', value: 'value', hash: 0) @@ -79,7 +173,7 @@ test.group('std::hash_map::Pair.hash') do (g) { test.group('std::hash_map::Table.buckets') do (g) { g.test('Obtaining the buckets in a Table') { - let table = Table.new(DefaultHasher.new) + let table = Table.new assert.equal(table.buckets, []) } @@ -87,7 +181,7 @@ test.group('std::hash_map::Table.buckets') do (g) { test.group('std::hash_map::Table.length') do (g) { g.test('Obtaining the number of pairs in a Table') { - let table = Table.new(DefaultHasher.new) + let table = Table.new let pair = Pair.new(key: 'key', value: 'value', hash: 0) table.insert_pair(pair) @@ -98,7 +192,7 @@ test.group('std::hash_map::Table.length') do (g) { test.group('std::hash_map::Table.resize') do (g) { g.test('Resizing a Table') { - let table = Table.new(DefaultHasher.new) + let table = Table.new assert.equal(table.buckets, []) @@ -110,7 +204,7 @@ test.group('std::hash_map::Table.resize') do (g) { test.group('std::hash_map::Table.rehash') do (g) { g.test('Rehashing a Table') { - let table = Table.new(DefaultHasher.new) + let table = Table.new let pair1 = Pair.new(key: 'a', value: 'value', hash: 0) let pair2 = Pair.new(key: 'b', value: 'value', hash: 1) @@ -138,7 +232,7 @@ test.group('std::hash_map::Table.rehash') do (g) { test.group('std::hash_map::Table.resize?') do (g) { g.test('Checking if a table needs to be resized') { - let table = Table.new(DefaultHasher.new) + let table = Table.new let pair = Pair.new(key: 'a', value: 'value', hash: 0) assert.false(table.resize?) @@ -151,7 +245,7 @@ test.group('std::hash_map::Table.resize?') do (g) { test.group('std::hash_map::Table.hash_key') do (g) { g.test("Hashing a key using the Table's hasher") { - let table = Table.new(DefaultHasher.new) + let table = Table.new # We can't really maky any guarantees about the exact value returned, all we # can guarantee is that the same key should produce the same hash. @@ -166,7 +260,7 @@ test.group('std::hash_map::Table.hash_key') do (g) { test.group('std::hash_map::Table.desired_bucket') do (g) { g.test('Obtaining the desired bucket index of a hash') { - let table = Table.new(DefaultHasher.new) + let table = Table.new table.resize @@ -178,7 +272,7 @@ test.group('std::hash_map::Table.desired_bucket') do (g) { test.group('std::hash_map::Table.insert_pair') do (g) { g.test('Inserting a Pair into a Table') { - let table = Table.new(DefaultHasher.new) + let table = Table.new let pair = Pair.new(key: 'key', value: 'value', hash: 0) table.insert_pair(pair) @@ -188,7 +282,7 @@ test.group('std::hash_map::Table.insert_pair') do (g) { } g.test('Inserting a Pair into a Table without incrementing its length') { - let table = Table.new(DefaultHasher.new) + let table = Table.new let pair = Pair.new(key: 'key', value: 'value', hash: 0) table.insert_pair(pair: pair, increment_length: False) @@ -198,7 +292,7 @@ test.group('std::hash_map::Table.insert_pair') do (g) { } g.test('Inserting a Pair into an existing bucket in a Table') { - let table = Table.new(DefaultHasher.new) + let table = Table.new let pair1 = Pair.new(key: 'a', value: 'a', hash: 0) let pair2 = Pair.new(key: 'b', value: 'b', hash: 0) @@ -214,7 +308,7 @@ test.group('std::hash_map::Table.insert_pair') do (g) { } g.test('Inserting a Pair using an already used key') { - let table = Table.new(DefaultHasher.new) + let table = Table.new let pair1 = Pair.new(key: 'a', value: 'a', hash: 0) let pair2 = Pair.new(key: 'a', value: 'b', hash: 0) @@ -228,7 +322,7 @@ test.group('std::hash_map::Table.insert_pair') do (g) { test.group('std::hash_map::Table.bucket_index') do (g) { g.test('Obtaining the bucket index of an existing key') { - let table = Table.new(DefaultHasher.new) + let table = Table.new let pair = Pair.new(key: 'a', value: 'a', hash: 0) table.insert_pair(pair) @@ -237,7 +331,7 @@ test.group('std::hash_map::Table.bucket_index') do (g) { } g.test('Obtaining the bucket index of a non existing key') { - let table = Table.new(DefaultHasher.new) + let table = Table.new assert.equal(table.bucket_index('a'), Nil) } @@ -245,7 +339,7 @@ test.group('std::hash_map::Table.bucket_index') do (g) { test.group('std::hash_map::Table.remove') do (g) { g.test('Removing an existing key from a Table') { - let table = Table.new(DefaultHasher.new) + let table = Table.new let pair = Pair.new(key: 'a', value: 'a', hash: 0) table.insert_pair(pair) @@ -255,13 +349,13 @@ test.group('std::hash_map::Table.remove') do (g) { } g.test('Removing a non-existing key from a Table') { - let table: Table!(String, String) = Table.new(DefaultHasher.new) + let table: Table!(String, String) = Table.new assert.equal(table.remove('a'), Nil) } g.test('Backwards shifting Pairs that follow the removed Pair') { - let table = Table.new(DefaultHasher.new) + let table = Table.new let pair1 = Pair.new(key: 'a', value: 'a', hash: 0) let pair2 = Pair.new(key: 'b', value: 'b', hash: 0) @@ -277,7 +371,7 @@ test.group('std::hash_map::Table.remove') do (g) { test.group('std::hash_map::Table.backwards_shift') do (g) { g.test('Performing a backwards shift starting at a particular bucket') { - let table = Table.new(DefaultHasher.new) + let table = Table.new let pair1 = Pair.new(key: 'a', value: 'a', hash: 0) let pair2 = Pair.new(key: 'b', value: 'b', hash: 0) let pair3 = Pair.new(key: 'c', value: 'c', hash: 0) @@ -302,7 +396,7 @@ test.group('std::hash_map::Table.backwards_shift') do (g) { test.group('std::hash_map::Table.[]') do (g) { g.test('Obtaining the value of a Pair by its key') { - let table = Table.new(DefaultHasher.new) + let table = Table.new let pair = Pair.new(key: 'a', value: 'b', hash: 0) table.insert_pair(pair) @@ -311,7 +405,7 @@ test.group('std::hash_map::Table.[]') do (g) { } g.test('Obtaining the value of a Pair using a non-existing key') { - let table: Table!(String, String) = Table.new(DefaultHasher.new) + let table: Table!(String, String) = Table.new assert.equal(table['a'], Nil) } @@ -319,7 +413,7 @@ test.group('std::hash_map::Table.[]') do (g) { test.group('std::hash_map::Table.[]=') do (g) { g.test('Creating an inserting a Pair') { - let table = Table.new(DefaultHasher.new) + let table = Table.new let val1 = table['a'] = 'foo' let val2 = table['b'] = 'bar' @@ -333,7 +427,7 @@ test.group('std::hash_map::Table.[]=') do (g) { } g.test('Overwriting an existing Pair') { - let table = Table.new(DefaultHasher.new) + let table = Table.new let val1 = table['a'] = 'foo' let val2 = table['a'] = 'bar' diff --git a/runtime/tests/test/std/test_hasher.inko b/runtime/tests/test/std/test_hasher.inko deleted file mode 100644 index 27bb99872..000000000 --- a/runtime/tests/test/std/test_hasher.inko +++ /dev/null @@ -1,35 +0,0 @@ -import std::hasher::DefaultHasher -import std::test -import std::test::assert - -test.group('std::hasher::DefaultHasher.write_integer') do (g) { - g.test('Hashing an Integer') { - let hasher = DefaultHasher.new - - hasher.write_integer(10) - - let hash1 = hasher.finish - - hasher.write_integer(10) - - let hash2 = hasher.finish - - assert.equal(hash1, hash2) - } -} - -test.group('std::hasher::DefaultHasher.write_float') do (g) { - g.test('Hashing an Float') { - let hasher = DefaultHasher.new - - hasher.write_float(10.0) - - let hash1 = hasher.finish - - hasher.write_float(10.0) - - let hash2 = hasher.finish - - assert.equal(hash1, hash2) - } -} diff --git a/runtime/tests/test/std/test_integer.inko b/runtime/tests/test/std/test_integer.inko index 76735511b..0c5b9c868 100644 --- a/runtime/tests/test/std/test_integer.inko +++ b/runtime/tests/test/std/test_integer.inko @@ -1,4 +1,4 @@ -import std::hasher::DefaultHasher +import std::hash_map::DefaultHasher import std::integer import std::test import std::test::assert @@ -194,14 +194,13 @@ test.group('std::integer::Integer.opposite') do (g) { test.group('std::integer::Integer.hash') do (g) { g.test('Hashing an Integer') { - let hasher1 = DefaultHasher.new - let hasher2 = DefaultHasher.new - let int = 5 + let hasher1 = DefaultHasher.new(1, 2) + let hasher2 = DefaultHasher.new(1, 2) - int.hash(hasher1) - int.hash(hasher2) + 5.hash(hasher1) + 5.hash(hasher2) - assert.equal(hasher1.finish, hasher2.finish) + assert.equal(hasher1.to_hash, hasher2.to_hash) } } diff --git a/runtime/tests/test/std/test_random.inko b/runtime/tests/test/std/test_random.inko new file mode 100644 index 000000000..f9e6f865b --- /dev/null +++ b/runtime/tests/test/std/test_random.inko @@ -0,0 +1,117 @@ +import std::mirror +import std::process +import std::random +import std::test +import std::test::assert + +test.group('std::random.integer') do (g) { + g.test('Generating a random Integer') { + let int = random.integer + let int_mirror = mirror.reflect_object(int) + + # There isn't much we can test, other than making sure the value is indeed + # an Integer. + assert.true(int_mirror.instance_of?(Integer)) + } +} + +test.group('std::random.incremental_integer') do (g) { + g.test('Generating a random incremental Integer') { + # We pin the code below so that the process isn't moved between threads + # before the assertion. A proces being moved could result in it producing + # the same value twice. + process.pinned { + let one = random.incremental_integer + let two = random.incremental_integer + + assert.not_equal(one, two) + } + } +} + +test.group('std::random.float') do (g) { + g.test('Generating a random Float') { + let float = random.float + let float_mirror = mirror.reflect_object(float) + + # There isn't much we can test, other than making sure the value is indeed + # a Float. + assert.true(float_mirror.instance_of?(Float)) + } +} + +test.group('std::random.integer_between') do (g) { + g.test('Generating an Integer in a range') { + let min = 0 + let max = 10 + let val = random.integer_between(min: min, max: max) + + assert.true((val >= min).and { val <= max }) + } + + g.test('Generating an Integer in a really big range') { + let min = 1_000_000_000_000_000_000_000 + let max = 5_000_000_000_000_000_000_000 + let val = random.integer_between(min: min, max: max) + + assert.true((val >= min).and { val <= max }) + } + + g.test('Generating a negative Integer') { + let min = -10 + let max = 0 + let val = random.integer_between(min: min, max: max) + + assert.true((val >= min).and { val <= max }) + } + + g.test('Using a minimum value greater than the maximum value') { + assert.panic { + random.integer_between(min: 10, max: 0) + } + } + + g.test('Using a minimum and maximum value that are equal') { + assert.panic { + random.integer_between(min: 0, max: 0) + } + } +} + +test.group('std::random.float_between') do (g) { + g.test('Generating a Float in a range') { + let min = 0.0 + let max = 10.0 + let val = random.float_between(min: min, max: max) + + assert.true((val >= min).and { val <= max }) + } + + g.test('Generating a negative Float') { + let min = -10.0 + let max = 0.0 + let val = random.float_between(min: min, max: max) + + assert.true((val >= min).and { val <= max }) + } + + g.test('Using a minimum value greater than the maximum value') { + assert.panic { + random.float_between(min: 10.0, max: 0.0) + } + } + + g.test('Using a minimum and maximum value that are equal') { + assert.panic { + random.float_between(min: 0.0, max: 0.0) + } + } +} + +test.group('std::random.bytes') do (g) { + g.test('Generating a ByteArray containing random bytes') { + let bytes = random.bytes(size: 4) + + assert.equal(bytes.length, 4) + } +} diff --git a/runtime/tests/test/std/test_string.inko b/runtime/tests/test/std/test_string.inko index 1ceb155e4..6485f015d 100644 --- a/runtime/tests/test/std/test_string.inko +++ b/runtime/tests/test/std/test_string.inko @@ -1,4 +1,5 @@ import std::fs::path::Path +import std::hash_map::DefaultHasher import std::test import std::test::assert @@ -115,3 +116,15 @@ test.group('std::string::String.to_float') do (g) { assert.equal('foo'.to_float, 0.0) } } + +test.group('std::string::String.hash') do (g) { + g.test('Hashing a String') { + let hasher1 = DefaultHasher.new(1, 2) + let hasher2 = DefaultHasher.new(1, 2) + + 'foo'.hash(hasher1) + 'foo'.hash(hasher2) + + assert.equal(hasher1.to_hash, hasher2.to_hash) + } +} diff --git a/vm/Cargo.lock b/vm/Cargo.lock index 6b3510f8b..45102831d 100644 --- a/vm/Cargo.lock +++ b/vm/Cargo.lock @@ -372,7 +372,9 @@ dependencies = [ "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", "num_cpus 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)", "parking_lot 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", "rayon 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", + "siphasher 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "socket2 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", "time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)", "wepoll-binding 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", @@ -502,6 +504,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)", "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -585,6 +588,18 @@ dependencies = [ "proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "rand" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", + "fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.51 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "rand" version = "0.6.5" @@ -795,6 +810,11 @@ name = "shlex" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "siphasher" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "smallvec" version = "0.6.9" @@ -1053,6 +1073,7 @@ dependencies = [ "checksum proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)" = "4d317f9caece796be1980837fd5cb3dfec5613ebdb04ad0956deea83ce168915" "checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0" "checksum quote 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "faf4799c5d274f3868a4aae320a0a182cbd2baee377b378f080e16a23e9d80db" +"checksum rand 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c618c47cd3ebd209790115ab837de41425723956ad3ce2e6a7f09890947cacb9" "checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca" "checksum rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef" "checksum rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" @@ -1078,6 +1099,7 @@ dependencies = [ "checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" "checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" "checksum shlex 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2" +"checksum siphasher 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9913c75df657d84a03fa689c016b0bb2863ff0b497b26a8d6e9703f8d5df03a8" "checksum smallvec 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c4488ae950c49d403731982257768f48fada354a5203fe81f9bb6f43ca9002be" "checksum socket2 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "4e626972d3593207547f14bf5fc9efa4d0e7283deb73fef1dff313dae9ab8878" "checksum stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8" diff --git a/vm/Cargo.toml b/vm/Cargo.toml index 09d273b48..e75e82aec 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -25,7 +25,6 @@ fnv = "^1.0" time = "^0.1" num-integer = "^0.1" float-cmp = "^0.4" -num-bigint = "^0.2" num-traits = "^0.2" dirs = "^1.0" libloading = "^0.5" @@ -36,6 +35,12 @@ crossbeam-channel = "^0.3" crossbeam-queue = "^0.1" libc = "^0.2" jemallocator = { version = "^0.1", optional = true } +siphasher = "^0.3" +rand = "^0.6" + +[dependencies.num-bigint] +version = "^0.2" +features = ["rand"] [dependencies.socket2] version = "^0.3.9" diff --git a/vm/src/hasher.rs b/vm/src/hasher.rs index b27331bfe..dc98245bf 100644 --- a/vm/src/hasher.rs +++ b/vm/src/hasher.rs @@ -1,6 +1,6 @@ //! Types and methods for hashing objects. use num_bigint::BigInt; -use std::collections::hash_map::DefaultHasher; +use siphasher::sip::SipHasher13; use std::hash::{Hash, Hasher as HasherTrait}; use std::i64; use std::u64; @@ -10,13 +10,17 @@ const U64_I64_DIFF: u64 = u64::MAX - i64::MAX as u64; #[derive(Clone)] pub struct Hasher { - hasher: DefaultHasher, + hasher: SipHasher13, + key0: u64, + key1: u64, } impl Hasher { - pub fn new() -> Self { + pub fn new(key0: u64, key1: u64) -> Self { Hasher { - hasher: DefaultHasher::new(), + hasher: SipHasher13::new_with_keys(key0, key1), + key0, + key1, } } @@ -42,23 +46,21 @@ impl Hasher { value.hash(&mut self.hasher); } - pub fn finish(&mut self) -> i64 { + pub fn to_hash(&self) -> i64 { let hash = self.hasher.finish(); - // Rust's DefaultHasher does not reset its internal state upon calling - // "finish", which can be very confusing. To work around this we swap - // the hasher with a new one. - self.hasher = DefaultHasher::new(); - self.convert_hash(hash) } + pub fn reset(&mut self) { + self.hasher = SipHasher13::new_with_keys(self.key0, self.key1); + } + fn convert_hash(&self, raw_hash: u64) -> i64 { - // Rust's hasher produces a u64. This value is usually too large to - // store as an i64 (even when heap allocating), requiring the use of a - // bigint. To work around that we subtract the difference between the - // maximum u64 and i64 values, ensuring our final hash value fits in a - // i64. + // Hashers produce a u64. This value is usually too large to store as an + // i64 (even when heap allocating), requiring the use of a bigint. To + // work around that we subtract the difference between the maximum u64 + // and i64 values, ensuring our final hash value fits in a i64. if raw_hash > i64::MAX as u64 { (raw_hash - U64_I64_DIFF) as i64 } else { @@ -75,43 +77,46 @@ mod tests { #[test] fn test_write_float() { - let mut hasher = Hasher::new(); + let mut hasher = Hasher::new(1, 2); hasher.write_float(10.5); - let hash1 = hasher.finish(); + let hash1 = hasher.to_hash(); + hasher.reset(); hasher.write_float(10.5); - let hash2 = hasher.finish(); + let hash2 = hasher.to_hash(); assert_eq!(hash1, hash2); } #[test] fn test_write_string() { - let mut hasher = Hasher::new(); + let mut hasher = Hasher::new(1, 2); let string = "hello".to_string(); hasher.write_string(&string); - let hash1 = hasher.finish(); + let hash1 = hasher.to_hash(); + hasher.reset(); hasher.write_string(&string); - let hash2 = hasher.finish(); + let hash2 = hasher.to_hash(); assert_eq!(hash1, hash2); } #[test] fn test_finish() { - let mut hasher = Hasher::new(); + let mut hasher = Hasher::new(1, 2); let mut hashes = Vec::new(); for _ in 0..2 { hasher.write_integer(10_i64); - hashes.push(hasher.finish()); + hashes.push(hasher.to_hash()); + hasher.reset(); } assert_eq!(hashes[0], hashes[1]); @@ -119,7 +124,7 @@ mod tests { #[test] fn test_convert_hash() { - let hasher = Hasher::new(); + let hasher = Hasher::new(1, 2); assert_eq!(hasher.convert_hash(u64::MAX), 9223372036854775807_i64); assert_eq!(hasher.convert_hash(i64::MAX as u64), 0); diff --git a/vm/src/object_pointer.rs b/vm/src/object_pointer.rs index b276f08d5..eb8571465 100644 --- a/vm/src/object_pointer.rs +++ b/vm/src/object_pointer.rs @@ -451,6 +451,10 @@ impl ObjectPointer { self.is_tagged_integer() || self.get().value.is_integer() } + pub fn is_float(&self) -> bool { + self.float_value().is_ok() + } + pub fn is_bigint(&self) -> bool { if self.is_integer() { false @@ -603,6 +607,7 @@ impl ObjectPointer { def_value_getter!(binding_value, get, as_binding, RcBinding); def_value_getter!(bigint_value, get, as_bigint, &BigInt); def_value_getter!(hasher_value_mut, get_mut, as_hasher_mut, &mut Hasher); + def_value_getter!(hasher_value, get, as_hasher, &Hasher); def_value_getter!(byte_array_value, get, as_byte_array, &Vec); def_value_getter!( diff --git a/vm/src/object_value.rs b/vm/src/object_value.rs index 7f073f4db..94fb948b8 100644 --- a/vm/src/object_value.rs +++ b/vm/src/object_value.rs @@ -257,6 +257,16 @@ impl ObjectValue { } } + pub fn as_hasher(&self) -> Result<&Hasher, String> { + match *self { + ObjectValue::Hasher(ref val) => Ok(val), + _ => { + Err("ObjectValue::as_hasher() called on a non hasher" + .to_string()) + } + } + } + pub fn as_library(&self) -> Result<&RcLibrary, String> { match *self { ObjectValue::Library(ref lib) => Ok(lib), diff --git a/vm/src/scheduler/process_worker.rs b/vm/src/scheduler/process_worker.rs index d765d6960..aa6e26bdf 100644 --- a/vm/src/scheduler/process_worker.rs +++ b/vm/src/scheduler/process_worker.rs @@ -4,6 +4,12 @@ use crate::process::RcProcess; use crate::scheduler::pool_state::PoolState; use crate::scheduler::queue::RcQueue; use crate::scheduler::worker::Worker; +use num_bigint::BigInt; +use num_bigint::RandBigInt; +use rand::distributions::uniform::{SampleBorrow, SampleUniform}; +use rand::distributions::{Distribution, Standard}; +use rand::rngs::ThreadRng; +use rand::{thread_rng, Rng}; /// The state that a worker is in. #[derive(Eq, PartialEq, Debug)] @@ -22,6 +28,15 @@ pub struct ProcessWorker { /// The unique ID of this worker, used for pinning jobs. pub id: usize, + /// A randomly generated integer that is incremented upon request. This can + /// be used as a seed for hashing. The integer is incremented to ensure + /// every seed is unique, without having to generate an entirely new random + /// number. + pub random_number: u64, + + /// The random number generator for this thread. + pub rng: ThreadRng, + /// The queue owned by this worker. queue: RcQueue, @@ -41,6 +56,8 @@ impl ProcessWorker { ) -> Self { ProcessWorker { id, + random_number: rand::random(), + rng: thread_rng(), queue, state, mode: Mode::Normal, @@ -66,6 +83,52 @@ impl ProcessWorker { self.mode = Mode::Normal; } + pub fn random_incremental_number(&mut self) -> u64 { + self.random_number = self.random_number.wrapping_add(1); + + self.random_number + } + + pub fn random_number(&mut self) -> T + where + Standard: Distribution, + { + self.rng.gen() + } + + pub fn random_number_between( + &mut self, + min: V, + max: V, + ) -> T + where + V: SampleBorrow + Sized, + { + self.rng.gen_range(min, max) + } + + pub fn random_bigint_between( + &mut self, + min: &BigInt, + max: &BigInt, + ) -> BigInt { + self.rng.gen_bigint_range(min, max) + } + + pub fn random_bytes(&mut self, size: usize) -> Result, String> { + let mut bytes = Vec::with_capacity(size); + + unsafe { + bytes.set_len(size); + } + + self.rng + .try_fill(&mut bytes[..]) + .map_err(|e| e.to_string())?; + + Ok(bytes) + } + /// Performs a single iteration of the normal work loop. fn normal_iteration(&mut self, callback: &F) where @@ -306,4 +369,49 @@ mod tests { assert_eq!(worker.mode, Mode::Normal); } + + #[test] + fn test_random_number() { + let mut worker = worker(); + + // There is no particular way we can test the exact value, so this is + // just a smoke test to see if the method works or not. + worker.random_number::(); + } + + #[test] + fn test_random_number_between() { + let mut worker = worker(); + let number: u8 = worker.random_number_between(0, 10); + + assert!(number <= 10); + } + + #[test] + fn test_random_bigint_between() { + let mut worker = worker(); + let min = BigInt::from(0); + let max = BigInt::from(10); + let number = worker.random_bigint_between(&min, &max); + + assert!(number >= min && number <= max); + } + + #[test] + fn test_random_incremental_number() { + let mut worker = worker(); + let num1 = worker.random_incremental_number(); + let num2 = worker.random_incremental_number(); + + assert_eq!(num2 - num1, 1); + } + + #[test] + fn test_random_bytes() { + let mut worker = worker(); + let bytes = worker.random_bytes(4).unwrap(); + + assert_eq!(bytes.len(), 4); + assert_eq!(bytes.capacity(), 4); + } } diff --git a/vm/src/vm/hasher.rs b/vm/src/vm/hasher.rs index a487431f8..b99be09a7 100644 --- a/vm/src/vm/hasher.rs +++ b/vm/src/vm/hasher.rs @@ -5,27 +5,40 @@ use crate::object_value; use crate::process::RcProcess; use crate::vm::state::RcState; -pub fn create(state: &RcState, process: &RcProcess) -> ObjectPointer { - process - .allocate(object_value::hasher(Hasher::new()), state.hasher_prototype) +pub fn create( + state: &RcState, + process: &RcProcess, + key0_ptr: ObjectPointer, + key1_ptr: ObjectPointer, +) -> Result { + let key0 = key0_ptr.u64_value()?; + let key1 = key1_ptr.u64_value()?; + let hasher = Hasher::new(key0, key1); + + Ok(process.allocate(object_value::hasher(hasher), state.hasher_prototype)) } pub fn write( - state: &RcState, hasher: ObjectPointer, value: ObjectPointer, ) -> Result { value.hash_object(hasher.hasher_value_mut()?)?; - Ok(state.nil_object) + Ok(value) } -pub fn finish( +pub fn to_hash( state: &RcState, process: &RcProcess, hasher: ObjectPointer, ) -> Result { - let result = hasher.hasher_value_mut()?.finish(); + let result = hasher.hasher_value()?.to_hash(); Ok(process.allocate_i64(result, state.integer_prototype)) } + +pub fn reset(hasher: ObjectPointer) -> Result { + hasher.hasher_value_mut()?.reset(); + + Ok(hasher) +} diff --git a/vm/src/vm/instruction.rs b/vm/src/vm/instruction.rs index d5f865057..3a17be6ef 100644 --- a/vm/src/vm/instruction.rs +++ b/vm/src/vm/instruction.rs @@ -119,7 +119,7 @@ pub enum InstructionType { StringConcat, HasherNew, HasherWrite, - HasherFinish, + HasherToHash, Stacktrace, ProcessTerminateCurrent, StringSlice, @@ -178,6 +178,10 @@ pub enum InstructionType { SocketListen, SocketConnect, SocketShutdown, + HasherReset, + RandomNumber, + RandomRange, + RandomBytes, } /// Struct for storing information about a single instruction. diff --git a/vm/src/vm/machine.rs b/vm/src/vm/machine.rs index a334858c5..8249e7250 100644 --- a/vm/src/vm/machine.rs +++ b/vm/src/vm/machine.rs @@ -28,6 +28,7 @@ use crate::vm::io; use crate::vm::module; use crate::vm::object; use crate::vm::process; +use crate::vm::random; use crate::vm::socket; use crate::vm::state::RcState; use crate::vm::string; @@ -1405,7 +1406,9 @@ impl Machine { } InstructionType::HasherNew => { let reg = instruction.arg(0); - let res = hasher::create(&self.state, process); + let key0 = context.get_register(instruction.arg(1)); + let key1 = context.get_register(instruction.arg(2)); + let res = hasher::create(&self.state, process, key0, key1)?; context.set_register(reg, res); } @@ -1413,14 +1416,21 @@ impl Machine { let reg = instruction.arg(0); let hasher = context.get_register(instruction.arg(1)); let value = context.get_register(instruction.arg(2)); - let res = hasher::write(&self.state, hasher, value)?; + let res = hasher::write(hasher, value)?; context.set_register(reg, res); } - InstructionType::HasherFinish => { + InstructionType::HasherToHash => { let reg = instruction.arg(0); let hasher = context.get_register(instruction.arg(1)); - let res = hasher::finish(&self.state, process, hasher)?; + let res = hasher::to_hash(&self.state, process, hasher)?; + + context.set_register(reg, res); + } + InstructionType::HasherReset => { + let reg = instruction.arg(0); + let hasher = context.get_register(instruction.arg(1)); + let res = hasher::reset(hasher)?; context.set_register(reg, res); } @@ -2024,6 +2034,31 @@ impl Machine { index ); + context.set_register(reg, res); + } + InstructionType::RandomNumber => { + let reg = instruction.arg(0); + let kind = context.get_register(instruction.arg(1)); + let res = + random::number(&self.state, process, worker, kind)?; + + context.set_register(reg, res); + } + InstructionType::RandomRange => { + let reg = instruction.arg(0); + let min = context.get_register(instruction.arg(1)); + let max = context.get_register(instruction.arg(2)); + let res = + random::range(&self.state, process, worker, min, max)?; + + context.set_register(reg, res); + } + InstructionType::RandomBytes => { + let reg = instruction.arg(0); + let size = context.get_register(instruction.arg(1)); + let res = + random::bytes(&self.state, process, worker, size)?; + context.set_register(reg, res); } }; diff --git a/vm/src/vm/mod.rs b/vm/src/vm/mod.rs index 701369640..2596d3d02 100644 --- a/vm/src/vm/mod.rs +++ b/vm/src/vm/mod.rs @@ -12,6 +12,7 @@ pub mod machine; pub mod module; pub mod object; pub mod process; +pub mod random; pub mod socket; pub mod state; pub mod string; diff --git a/vm/src/vm/random.rs b/vm/src/vm/random.rs new file mode 100644 index 000000000..1d0bf5d3c --- /dev/null +++ b/vm/src/vm/random.rs @@ -0,0 +1,112 @@ +//! VM functions for generating random values. +use crate::object_pointer::ObjectPointer; +use crate::object_value; +use crate::process::RcProcess; +use crate::scheduler::process_worker::ProcessWorker; +use crate::vm::state::RcState; +use num_bigint::{BigInt, ToBigInt}; + +const INTEGER: i64 = 0; +const INCREMENTAL_INTEGER: i64 = 1; +const FLOAT: i64 = 2; + +macro_rules! verify_min_max { + ($min:expr, $max:expr) => { + if $min >= $max { + return Err(format!( + "The minimum range value {} must be smaller than the maximum {}", + $min, $max + )); + } + }; +} + +pub fn number( + state: &RcState, + process: &RcProcess, + worker: &mut ProcessWorker, + kind_ptr: ObjectPointer, +) -> Result { + let kind = kind_ptr.integer_value()?; + + match kind { + INTEGER => Ok(process + .allocate_i64(worker.random_number(), state.integer_prototype)), + INCREMENTAL_INTEGER => Ok(process + .allocate_u64(worker.random_number(), state.integer_prototype)), + FLOAT => Ok(process.allocate( + object_value::float(worker.random_number()), + state.float_prototype, + )), + _ => Err(format!( + "{} is not a valid type to generate a random value for", + kind + )), + } +} + +pub fn range( + state: &RcState, + process: &RcProcess, + worker: &mut ProcessWorker, + min_ptr: ObjectPointer, + max_ptr: ObjectPointer, +) -> Result { + if min_ptr.is_integer() && max_ptr.is_integer() { + let min = min_ptr.integer_value()?; + let max = max_ptr.integer_value()?; + + verify_min_max!(min, max); + + Ok(process.allocate_i64( + worker.random_number_between(min, max), + state.integer_prototype, + )) + } else if min_ptr.is_bigint() || max_ptr.is_bigint() { + let min = to_bigint(min_ptr)?; + let max = to_bigint(max_ptr)?; + + verify_min_max!(min, max); + + Ok(process.allocate( + object_value::bigint(worker.random_bigint_between(&min, &max)), + state.integer_prototype, + )) + } else if min_ptr.is_float() || max_ptr.is_float() { + let min = min_ptr.float_value()?; + let max = max_ptr.float_value()?; + + verify_min_max!(min, max); + + Ok(process.allocate( + object_value::float(worker.random_number_between(min, max)), + state.float_prototype, + )) + } else { + Err( + "Random values can only be generated for Integers and Floats" + .to_string(), + ) + } +} + +pub fn bytes( + state: &RcState, + process: &RcProcess, + worker: &mut ProcessWorker, + size_ptr: ObjectPointer, +) -> Result { + let size = size_ptr.usize_value()?; + let bytes = worker.random_bytes(size)?; + + Ok(process + .allocate(object_value::byte_array(bytes), state.byte_array_prototype)) +} + +fn to_bigint(pointer: ObjectPointer) -> Result { + if let Ok(bigint) = pointer.bigint_value() { + Ok(bigint.clone()) + } else { + Ok(pointer.integer_value()?.to_bigint().unwrap()) + } +}