From 72349878f6b38951bb513c396b248fc16efb0343 Mon Sep 17 00:00:00 2001 From: Daniel Yates Date: Sun, 16 Jan 2022 16:01:28 +0000 Subject: [PATCH] Implement support for custom writers This mirrors recent changes submitted for the deserialization API and provides a generic system for custom writer support to control how and where the byte stream generated during serialization is written. The change as-is is fully backwards compatible and passes the existing and expanded test cases. Primary use cases would align with existing async serialization attempts but don't put the complexity of thread management or throttling logic into the library, instead leaving the minutia of how those aspects are handled to callers. It also enables some other advanced use cases such as streaming the data to a destination without buffering it fully beforehand, which could enable efficiencies elsewhere. --- LibSerialize.lua | 136 +++++++++++++++++++++++++++++++++++++++++------ README.md | 59 +++++++++++++++++--- tests.lua | 136 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 308 insertions(+), 23 deletions(-) diff --git a/LibSerialize.lua b/LibSerialize.lua index 7e1a7e5..eba6194 100644 --- a/LibSerialize.lua +++ b/LibSerialize.lua @@ -104,7 +104,7 @@ end * **`LibSerialize:SerializeEx(opts, ...)`** Arguments: - * `opts`: options (see below) + * `opts`: options (see [Serialization Options]) * `...`: a variable number of serializable values Returns: @@ -118,7 +118,7 @@ end Returns: * `result`: `...` serialized as a string - Calls `SerializeEx(opts, ...)` with the default options (see below) + Calls `SerializeEx(opts, ...)` with the default serialization options (see [Serialization Options]) * **`LibSerialize:Deserialize(input)`** @@ -153,19 +153,26 @@ end This will occur if any of the following exceed 16777215: any string length, any table key count, number of unique strings, number of unique tables. It will also occur by default if any unserializable types are encountered, -though that behavior may be disabled (see options). +though that behavior may be disabled (see [Serialization Options]). `Deserialize()` and `DeserializeValue()` are equivalent, except the latter returns the deserialization result directly and will not catch any Lua errors that may occur when deserializing invalid input. -Note that none of the serialization/deseriazation methods support reentrancy, -and modifying tables during the serialization process is unspecified and -should be avoided. Table serialization is multi-phased and assumes a consistent -state for the key/value pairs across the phases. +As of recent releases, the library supports reentrancy and concurrent usage +from multiple threads (coroutines) through the public API. Modifying tables +during the serialization process is unspecified and should be avoided. +Table serialization is multi-phased and assumes a consistent state for the +key/value pairs across the phases. +It is permitted for any user-supplied functions to suspend the current +thread during the serialization or deserialization process. It is however +not possible to yield the current thread if the `Deserialize()` API is used, +as this function inserts a C call boundary onto the call stack. This issue +does not affect the `DeserializeValue()` function. -## Options: + +## Serialization Options: The following serialization options are supported: * `errorOnUnserializableType`: `boolean` (default true) * `true`: unserializable types will raise a Lua error @@ -182,6 +189,10 @@ The following serialization options are supported: table encountered during serialization. The function must return true for the pair to be serialized. It may be called multiple times on a table for the same key/value pair. See notes on reeentrancy and table modification. +* `writer`: `any` (default nil) + * If specified, the object referenced by this field will be checked to see + if it implements the [Writer protocol]. If so, the functions it defines + will be used to control how serialized data is written. If an option is unspecified in the table, then its default will be used. This means that if an option `foo` defaults to true, then: @@ -189,6 +200,35 @@ This means that if an option `foo` defaults to true, then: * `myOpts.foo = nil`: option `foo` is true +## Writer Protocol +The library supports customizing how byte strings are written during the +serialization process through the use of an object that implements the +"Writer" protocol. This enables advanced use cases such as batched or throttled +serialization via coroutines, or streaming the data to a target instead of +processing it all in one giant chunk. + +Any value stored on the `writer` key of the options table passed to the +`SerializeEx()` function will be inspected and indexed to search for the +following keys. If the required keys are all found, all operations provided +by the writer will override the default behaviors otherwise implemented by +the library. Otherwise, the writer is ignored and not used for any operations. + +* `WriteString`: `function(writer, str)` (required) + * This function will be called each time the library submits a byte string + that was created as result of serializing data. + + If this function is not supplied, the supplied `writer` is considered + incomplete and will be ignored for all operations. + +* `Flush`: `function(writer)` (optional) + * If specified, this function will be called at the end of the serialization + process. It may return any number of values - including zero - all of + which will be passed through to the caller of `SerializeEx()` verbatim. + + The default behavior if this function is not specified - and if the writer + is otherwise valid - is a no-op that returns no values. + + ## Customizing table serialization: For any serialized table, LibSerialize will check for the presence of a metatable key `__LibSerialize`. It will be interpreted as a table with @@ -384,6 +424,29 @@ local function GetRequiredBytesNumber(value) return 7 end +-- Queries a given object for the value assigned to a specific key. +-- +-- If the given object cannot be indexed, an error may be raised by the Lua +-- implementation. +local function GetValueByKey(object, key) + return object[key] +end + +-- Queries a given object for the value assigned to a specific key, returning +-- it if non-nil or giving back a default. +-- +-- If the given object cannot be indexed, the default will be returned and +-- no error raised. +local function GetValueByKeyOrDefault(object, key, default) + local ok, value = pcall(GetValueByKey, object, key) + + if not ok or value == nil then + return default + else + return value + end +end + -- Returns whether the value (a number) is NaN. local function IsNaN(value) -- With floating point optimizations enabled all comparisons involving @@ -416,6 +479,11 @@ local function IsArrayKey(k, arrayCount) return type(k) == "number" and k >= 1 and k <= arrayCount and not IsFloatingPoint(k) end +-- Portable no-op function that does absolutely nothing, and pushes no returns +-- onto the stack. +local function Noop() +end + -- Sort compare function which is used to sort table keys to ensure that the -- serialization of maps is stable. We arbitrarily put strings first, then -- numbers, and finally booleans. @@ -453,7 +521,6 @@ local DebugPrint = function(...) print(...) end - --[[--------------------------------------------------------------------------- Helpers for reading/writing streams of bytes from/to a string --]]--------------------------------------------------------------------------- @@ -461,19 +528,17 @@ end -- Creates a writer to lazily construct a string over multiple writes. -- Return values: -- 1. WriteString(str) --- 2. Flush() -local function CreateWriter() +-- 2. FlushWriter() + +local function CreateBufferedWriter() local bufferSize = 0 local buffer = {} - -- Write the entire string into the writer. local function WriteString(str) - -- DebugPrint("Writing string:", str, #str) bufferSize = bufferSize + 1 buffer[bufferSize] = str end - -- Return a string built from the previous calls to WriteString. local function FlushWriter() local flushed = table_concat(buffer, "", 1, bufferSize) bufferSize = 0 @@ -483,6 +548,47 @@ local function CreateWriter() return WriteString, FlushWriter end +local function CreateWriterFromObject(object) + -- Note that for custom writers if no Flush implementation is given the + -- default is a no-op; this means that no values will be returned to the + -- caller of Serialize/SerializeEx. It's expected in such a case that + -- you will have written the strings elsewhere yourself; perhaps having + -- already submitted them for transmission via a comms API for example. + + local writeString = object.WriteString -- Assumed to exist. + local flushWriter = GetValueByKeyOrDefault(object, "Flush", Noop) + + -- To minimize changes elsewhere with this initial implementation, this + -- function must return new closures that bind the 'object' to the first + -- parameter of the above functions. This could be optimized to remove the + -- indirection, but requires modifying all call sites of these functions. + + local function WriteString(str) + writeString(object, str) + end + + local function FlushWriter() + return flushWriter(object) + end + + return WriteString, FlushWriter +end + +local function CreateWriter(object) + -- If the supplied object implements the required functions to satisfy + -- the Writer interface, it will be used exclusively. Otherwise if any + -- of those are missing, the object is entirely ignored and we'll use + -- the original buffer-of-strings approach. + + local writeString = GetValueByKeyOrDefault(object, "WriteString", nil) + + if writeString == nil then + return CreateBufferedWriter() + else + return CreateWriterFromObject(object) + end +end + -- Creates a reader to sequentially read bytes from the input string. -- Return values: -- 1. ReadBytes(bytelen) @@ -645,7 +751,7 @@ local function CreateSerializer(opts) state._tableRefs = {} -- Create the writer functions. - state._writeString, state._flushWriter = CreateWriter() + state._writeString, state._flushWriter = CreateWriter(opts.writer) -- Create a combined options table, starting with the defaults -- and then overwriting any user-supplied keys. diff --git a/README.md b/README.md index d527ad3..a0930e0 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ end * **`LibSerialize:SerializeEx(opts, ...)`** Arguments: - * `opts`: options (see below) + * `opts`: options (see [Serialization Options]) * `...`: a variable number of serializable values Returns: @@ -82,7 +82,7 @@ end Returns: * `result`: `...` serialized as a string - Calls `SerializeEx(opts, ...)` with the default options (see below) + Calls `SerializeEx(opts, ...)` with the default serialization options (see [Serialization Options]) * **`LibSerialize:Deserialize(input)`** @@ -117,19 +117,26 @@ end This will occur if any of the following exceed 16777215: any string length, any table key count, number of unique strings, number of unique tables. It will also occur by default if any unserializable types are encountered, -though that behavior may be disabled (see options). +though that behavior may be disabled (see [Serialization Options]). `Deserialize()` and `DeserializeValue()` are equivalent, except the latter returns the deserialization result directly and will not catch any Lua errors that may occur when deserializing invalid input. -Note that none of the serialization/deseriazation methods support reentrancy, -and modifying tables during the serialization process is unspecified and -should be avoided. Table serialization is multi-phased and assumes a consistent -state for the key/value pairs across the phases. +As of recent releases, the library supports reentrancy and concurrent usage +from multiple threads (coroutines) through the public API. Modifying tables +during the serialization process is unspecified and should be avoided. +Table serialization is multi-phased and assumes a consistent state for the +key/value pairs across the phases. +It is permitted for any user-supplied functions to suspend the current +thread during the serialization or deserialization process. It is however +not possible to yield the current thread if the `Deserialize()` API is used, +as this function inserts a C call boundary onto the call stack. This issue +does not affect the `DeserializeValue()` function. -## Options: + +## Serialization Options: The following serialization options are supported: * `errorOnUnserializableType`: `boolean` (default true) * `true`: unserializable types will raise a Lua error @@ -146,6 +153,10 @@ The following serialization options are supported: table encountered during serialization. The function must return true for the pair to be serialized. It may be called multiple times on a table for the same key/value pair. See notes on reeentrancy and table modification. +* `writer`: `any` (default nil) + * If specified, the object referenced by this field will be checked to see + if it implements the [Writer protocol]. If so, the functions it defines + will be used to control how serialized data is written. If an option is unspecified in the table, then its default will be used. This means that if an option `foo` defaults to true, then: @@ -153,6 +164,35 @@ This means that if an option `foo` defaults to true, then: * `myOpts.foo = nil`: option `foo` is true +## Writer Protocol +The library supports customizing how byte strings are written during the +serialization process through the use of an object that implements the +"Writer" protocol. This enables advanced use cases such as batched or throttled +serialization via coroutines, or streaming the data to a target instead of +processing it all in one giant chunk. + +Any value stored on the `writer` key of the options table passed to the +`SerializeEx()` function will be inspected and indexed to search for the +following keys. If the required keys are all found, all operations provided +by the writer will override the default behaviors otherwise implemented by +the library. Otherwise, the writer is ignored and not used for any operations. + +* `WriteString`: `function(writer, str)` (required) + * This function will be called each time the library submits a byte string + that was created as result of serializing data. + + If this function is not supplied, the supplied `writer` is considered + incomplete and will be ignored for all operations. + +* `Flush`: `function(writer)` (optional) + * If specified, this function will be called at the end of the serialization + process. It may return any number of values - including zero - all of + which will be passed through to the caller of `SerializeEx()` verbatim. + + The default behavior if this function is not specified - and if the writer + is otherwise valid - is a no-op that returns no values. + + ## Customizing table serialization: For any serialized table, LibSerialize will check for the presence of a metatable key `__LibSerialize`. It will be interpreted as a table with @@ -270,3 +310,6 @@ The type byte uses the following formats to implement the above: * Followed by a byte for the upper bits * `TTTT T000`: a 5 bit type index * Followed by the type-dependent payload, including count(s) if needed + +[Serialization Options]: #serialization-options +[Writer protocol]: #writer-protocol diff --git a/tests.lua b/tests.lua index bb10ad5..cf9fb1b 100644 --- a/tests.lua +++ b/tests.lua @@ -181,6 +181,20 @@ function LibSerialize:RunTests() return true end + local function Mixin(obj, ...) + for i = 1, select("#", ...) do + for k, v in pairs((select(i, ...))) do + obj[k] = v + end + end + + return obj + end + + local function PackTable(...) + return { n = select("#", ...), ... } + end + --[[--------------------------------------------------------------------------- Test cases for serialization @@ -345,6 +359,128 @@ function LibSerialize:RunTests() assert(success == false) end + + --[[--------------------------------------------------------------------------- + Test cases for generic writers + --]]--------------------------------------------------------------------------- + + -- This test verifies the basic functionality of a custom writer that + -- writes to a reusable buffer and returns its concatenated result to + -- the serializer. + + do + local PersistentBuffer = {} + + function PersistentBuffer:WriteString(str) + self.n = self.n + 1 + self[self.n] = str + end + + function PersistentBuffer:Flush() + local flushed = table.concat(self, "", 1, self.n) + self.n = 0 + return flushed + end + + local function CreatePersistentBuffer() + return Mixin({ n = 0 }, PersistentBuffer) + end + + local value = { 1, 2, 3, 4, 5, true, false, "banana" } + local writer = CreatePersistentBuffer() + local bytes = LibSerialize:SerializeEx({ writer = writer }, value) + + assert(type(bytes) == "string", "expected 'bytes' to be a string") + assert(writer.n == 0, "expected 'writer' to have been flushed") + + local output = LibSerialize:DeserializeValue(bytes) + + assert(type(output) == type(value), "expected 'output' to be of the same type as 'value'") + assert(tCompare(output, value), "expected 'output' to be fully comparable to 'value'") + end + + -- This test verifies that if no Flush implementation is given, the default + -- will return nothing from the Serialize function. As documented in the + -- library, it's expected that such a writer would likely be submitting + -- string as it gets them to another destination. + + do + local NullWriter = {} + + function NullWriter:WriteString(str) + assert(type(str) == "string") -- 'str' should always be a string + self.writes = self.writes + 1 + end + + local function CreateNullWriter() + return Mixin({ writes = 0 }, NullWriter) + end + + local value = { 1, 2, 3, 4, 5, true, false, "banana" } + local writer = CreateNullWriter() + local result = PackTable(LibSerialize:SerializeEx({ writer = writer }, value)) + + assert(result.n == 0, "expected no return values from 'SerializeEx' call") + assert(writer.writes > 0, "expected 'WriteString' to have been called at least once") + end + + -- This test verifies that the pace at which serialization occurs can be + -- throttled within a coroutine. + + do + local ThrottledWriter = {} + + function ThrottledWriter:WriteString(str) + if self.written > self.rate then + coroutine.yield() + self.written = self.written - self.rate + end + + local length = #str + self.written = self.written + length + self.size = self.size + 1 + self.buffer[self.size] = str + end + + function ThrottledWriter:Flush() + local flushed = table.concat(self.buffer, "", 1, self.size) + self.size = 0 + return flushed + end + + local function CreateThrottledWriter(rate) + return Mixin({ buffer = {}, size = 0, written = 0, rate = rate }, ThrottledWriter) + end + + -- Use a large table for 'value' so that the thread the serializer + -- yields a few times. + + local value = {} + + for i = 1, 1000 do + value[i] = i * 1000 + end + + local writer = CreateThrottledWriter(100) + local thread = coroutine.create(function() return LibSerialize:SerializeEx({ writer = writer }, value) end) + + local bytes + + while coroutine.status(thread) ~= "dead" do + local ok + ok, bytes = coroutine.resume(thread) + assert(ok, bytes) -- If not ok, 'bytes' will be an error. + end + + assert(type(bytes) == "string", "expected 'bytes' to be a string") + assert(writer.size == 0, "expected 'writer' to have been flushed") + + local output = LibSerialize:DeserializeValue(bytes) + + assert(type(output) == type(value), "expected 'output' to be of the same type as 'value'") + assert(tCompare(output, value), "expected 'output' to be fully comparable to 'value'") + end + print("All tests passed!") end