Skip to content

Commit

Permalink
Add BinaryEncodingOptions & option for deterministic ordering
Browse files Browse the repository at this point in the history
Implements the same setting added for JSON in apple#1478 for binary serialization.

Related to apple#1477.
  • Loading branch information
rebello95 committed Oct 19, 2023
1 parent 2020c51 commit 1ea1a8a
Show file tree
Hide file tree
Showing 5 changed files with 186 additions and 38 deletions.
32 changes: 32 additions & 0 deletions Sources/SwiftProtobuf/BinaryEncodingOptions.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Sources/SwiftProtobuf/BinaryDecodingOptions.swift - Binary decoding options
//
// Copyright (c) 2014 - 2017 Apple Inc. and the project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See LICENSE.txt for license information:
// https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
//
// -----------------------------------------------------------------------------
///
/// Binary decoding options
///
// -----------------------------------------------------------------------------

/// Options for binary encoding.
public struct BinaryEncodingOptions: Sendable {
/// Whether to use deterministic ordering when serializing.
///
/// Note that the deterministic serialization is NOT canonical across languages.
/// It is not guaranteed to remain stable over time. It is unstable across
/// different builds with schema changes due to unknown fields. Users who need
/// canonical serialization (e.g., persistent storage in a canonical form,
/// fingerprinting, etc.) should define their own canonicalization specification
/// and implement their own serializer rather than relying on this API.
///
/// If deterministic serialization is requested, map entries will be sorted
/// by keys in lexographical order. This is an implementation detail
/// and subject to change.
public var useDeterministicOrdering: Bool = false

public init() {}
}
98 changes: 66 additions & 32 deletions Sources/SwiftProtobuf/BinaryEncodingVisitor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import Foundation

/// Visitor that encodes a message graph in the protobuf binary wire format.
internal struct BinaryEncodingVisitor: Visitor {
private let options: BinaryEncodingOptions

var encoder: BinaryEncoder

Expand All @@ -26,8 +27,9 @@ internal struct BinaryEncodingVisitor: Visitor {
/// - Precondition: `pointer` must point to an allocated block of memory that
/// is large enough to hold the entire encoded message. For performance
/// reasons, the encoder does not make any attempts to verify this.
init(forWritingInto pointer: UnsafeMutableRawPointer) {
encoder = BinaryEncoder(forWritingInto: pointer)
init(forWritingInto pointer: UnsafeMutableRawPointer, options: BinaryEncodingOptions) {
self.encoder = BinaryEncoder(forWritingInto: pointer)
self.options = options
}

mutating func visitUnknown(bytes: Data) throws {
Expand Down Expand Up @@ -258,49 +260,79 @@ internal struct BinaryEncodingVisitor: Visitor {
value: _ProtobufMap<KeyType, ValueType>.BaseType,
fieldNumber: Int
) throws {
for (k,v) in value {
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
var sizer = BinaryEncodingSizeVisitor()
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &sizer)
try ValueType.visitSingular(value: v, fieldNumber: 2, with: &sizer)
let entrySize = sizer.serializedSize
encoder.putVarInt(value: entrySize)
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &self)
try ValueType.visitSingular(value: v, fieldNumber: 2, with: &self)
}
try iterateAndEncode(
map: value, fieldNumber: fieldNumber, isOrderedBefore: KeyType._lessThan,
encodeWithSizer: { sizer, key, value in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &sizer)
try ValueType.visitSingular(value: value, fieldNumber: 2, with: &sizer)
}, encodeWithVisitor: { visitor, key, value in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &visitor)
try ValueType.visitSingular(value: value, fieldNumber: 2, with: &visitor)
}
)
}

mutating func visitMapField<KeyType, ValueType>(
fieldType: _ProtobufEnumMap<KeyType, ValueType>.Type,
value: _ProtobufEnumMap<KeyType, ValueType>.BaseType,
fieldNumber: Int
) throws where ValueType.RawValue == Int {
for (k,v) in value {
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
var sizer = BinaryEncodingSizeVisitor()
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &sizer)
try sizer.visitSingularEnumField(value: v, fieldNumber: 2)
let entrySize = sizer.serializedSize
encoder.putVarInt(value: entrySize)
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &self)
try visitSingularEnumField(value: v, fieldNumber: 2)
}
try iterateAndEncode(
map: value, fieldNumber: fieldNumber, isOrderedBefore: KeyType._lessThan,
encodeWithSizer: { sizer, key, value in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &sizer)
try sizer.visitSingularEnumField(value: value, fieldNumber: 2)
}, encodeWithVisitor: { visitor, key, value in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &visitor)
try visitor.visitSingularEnumField(value: value, fieldNumber: 2)
}
)
}

mutating func visitMapField<KeyType, ValueType>(
fieldType: _ProtobufMessageMap<KeyType, ValueType>.Type,
value: _ProtobufMessageMap<KeyType, ValueType>.BaseType,
fieldNumber: Int
) throws {
for (k,v) in value {
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
var sizer = BinaryEncodingSizeVisitor()
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &sizer)
try sizer.visitSingularMessageField(value: v, fieldNumber: 2)
let entrySize = sizer.serializedSize
encoder.putVarInt(value: entrySize)
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &self)
try visitSingularMessageField(value: v, fieldNumber: 2)
try iterateAndEncode(
map: value, fieldNumber: fieldNumber, isOrderedBefore: KeyType._lessThan,
encodeWithSizer: { sizer, key, value in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &sizer)
try sizer.visitSingularMessageField(value: value, fieldNumber: 2)
}, encodeWithVisitor: { visitor, key, value in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &visitor)
try visitor.visitSingularMessageField(value: value, fieldNumber: 2)
}
)
}

/// Helper to encapsulate the common structure of iterating over a map
/// and encoding the keys and values.
private mutating func iterateAndEncode<K, V>(
map: Dictionary<K, V>,
fieldNumber: Int,
isOrderedBefore: (K, K) -> Bool,
encodeWithSizer: (inout BinaryEncodingSizeVisitor, K, V) throws -> (),
encodeWithVisitor: (inout BinaryEncodingVisitor, K, V) throws -> ()
) throws {
if options.useDeterministicOrdering {
for (k,v) in map.sorted(by: { isOrderedBefore( $0.0, $1.0) }) {
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
var sizer = BinaryEncodingSizeVisitor()
try encodeWithSizer(&sizer, k, v)
let entrySize = sizer.serializedSize
encoder.putVarInt(value: entrySize)
try encodeWithVisitor(&self, k, v)
}
} else {
for (k,v) in map {
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
var sizer = BinaryEncodingSizeVisitor()
try encodeWithSizer(&sizer, k, v)
let entrySize = sizer.serializedSize
encoder.putVarInt(value: entrySize)
try encodeWithVisitor(&self, k, v)
}
}
}

Expand Down Expand Up @@ -338,7 +370,9 @@ extension BinaryEncodingVisitor {
let length = try value.serializedDataSize()
encoder.putVarInt(value: length)
// Create the sub encoder after writing the length.
var subVisitor = BinaryEncodingVisitor(forWritingInto: encoder.pointer)
var subVisitor = BinaryEncodingVisitor(
forWritingInto: encoder.pointer, options: BinaryEncodingOptions()
)
try value.traverse(visitor: &subVisitor)
encoder.pointer = subVisitor.encoder.pointer

Expand Down
10 changes: 7 additions & 3 deletions Sources/SwiftProtobuf/Message+BinaryAdditions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,17 @@ extension Message {
/// - Parameters:
/// - partial: If `false` (the default), this method will check
/// `Message.isInitialized` before encoding to verify that all required
/// fields are present. If any are missing, this method throws
/// fields are present. If any are missing, this method throws.
/// `BinaryEncodingError.missingRequiredFields`.
/// - options: The `BinaryEncodingOptions` to use.
/// - Returns: A `SwiftProtobufContiguousBytes` instance containing the binary serialization
/// of the message.
///
/// - Throws: `BinaryEncodingError` if encoding fails.
public func serializedBytes<Bytes: SwiftProtobufContiguousBytes>(partial: Bool = false) throws -> Bytes {
public func serializedBytes<Bytes: SwiftProtobufContiguousBytes>(
partial: Bool = false,
options: BinaryEncodingOptions = BinaryEncodingOptions()
) throws -> Bytes {
if !partial && !isInitialized {
throw BinaryEncodingError.missingRequiredFields
}
Expand All @@ -48,7 +52,7 @@ extension Message {
var data = Bytes(repeating: 0, count: requiredSize)
try data.withUnsafeMutableBytes { (body: UnsafeMutableRawBufferPointer) in
if let baseAddress = body.baseAddress, body.count > 0 {
var visitor = BinaryEncodingVisitor(forWritingInto: baseAddress)
var visitor = BinaryEncodingVisitor(forWritingInto: baseAddress, options: options)
try traverse(visitor: &visitor)
// Currently not exposing this from the api because it really would be
// an internal error in the library and should never happen.
Expand Down
10 changes: 7 additions & 3 deletions Sources/SwiftProtobuf/Message+BinaryAdditions_Data.swift
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ extension Message {
/// `Message.isInitialized` after decoding to verify that all required
/// fields are present. If any are missing, this method throws
/// `BinaryDecodingError.missingRequiredFields`.
/// - options: The BinaryDecodingOptions to use.
/// - options: The `BinaryDecodingOptions` to use.
/// - Throws: `BinaryDecodingError` if decoding fails.
@inlinable
public mutating func merge(
Expand All @@ -78,9 +78,13 @@ extension Message {
/// `Message.isInitialized` before encoding to verify that all required
/// fields are present. If any are missing, this method throws
/// `BinaryEncodingError.missingRequiredFields`.
/// - options: The `BinaryEncodingOptions` to use.
/// - Returns: A `Data` instance containing the binary serialization of the message.
/// - Throws: `BinaryEncodingError` if encoding fails.
public func serializedData(partial: Bool = false) throws -> Data {
try serializedBytes(partial: partial)
public func serializedData(
partial: Bool = false,
options: BinaryEncodingOptions = BinaryEncodingOptions()
) throws -> Data {
try serializedBytes(partial: partial, options: options)
}
}
74 changes: 74 additions & 0 deletions Tests/SwiftProtobufTests/Test_BinaryEncodingOptions.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Tests/SwiftProtobufTests/Test_JSONEncodingOptions.swift - Various JSON tests
//
// Copyright (c) 2014 - 2018 Apple Inc. and the project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See LICENSE.txt for license information:
// https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
//
// -----------------------------------------------------------------------------
///
/// Test for the use of BinaryEncodingOptions
///
// -----------------------------------------------------------------------------

import Foundation
import XCTest
import SwiftProtobuf

class Test_BinaryEncodingOptions: XCTestCase {

func testUseDeterministicOrdering() throws {
var options = BinaryEncodingOptions()
options.useDeterministicOrdering = true

let message1 = SwiftProtoTesting_Message3.with {
$0.mapStringString = [
"b": "B",
"a": "A",
"0": "0",
"UPPER": "v",
"x": "X",
]
$0.mapInt32Message = [
5: .with { $0.optionalSint32 = 5 },
1: .with { $0.optionalSint32 = 1 },
3: .with { $0.optionalSint32 = 3 },
]
$0.mapInt32Enum = [
5: .foo,
3: .bar,
0: .baz,
1: .extra3,
]
}

let message2 = SwiftProtoTesting_Message3.with {
$0.mapStringString = [
"UPPER": "v",
"a": "A",
"b": "B",
"x": "X",
"0": "0",
]
$0.mapInt32Message = [
1: .with { $0.optionalSint32 = 1 },
3: .with { $0.optionalSint32 = 3 },
5: .with { $0.optionalSint32 = 5 },
]
$0.mapInt32Enum = [
3: .bar,
5: .foo,
1: .extra3,
0: .baz,
]
}

// Approximation that serializing models with the same data (but initialized with keys in
// different orders) consistently produces the same outputs.
let expectedOutput = try message1.serializedData(options: options)
for _ in 0..<10 {
XCTAssertEqual(try message2.serializedData(options: options), expectedOutput)
}
}
}

0 comments on commit 1ea1a8a

Please sign in to comment.