Skip to content

Commit

Permalink
Add BinaryEncodingOptions with option for deterministic ordering (a…
Browse files Browse the repository at this point in the history
…pple#1480)

* Add `BinaryEncodingOptions` & option for deterministic ordering

Implements the same setting added for JSON in apple#1478 for binary serialization.

Related to apple#1477.
  • Loading branch information
rebello95 committed Oct 26, 2023
1 parent 437f105 commit 5b0f123
Show file tree
Hide file tree
Showing 4 changed files with 201 additions and 38 deletions.
32 changes: 32 additions & 0 deletions Sources/SwiftProtobuf/BinaryEncodingOptions.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Sources/SwiftProtobuf/BinaryEncodingOptions.swift - Binary encoding options
//
// Copyright (c) 2014 - 2023 Apple Inc. and the project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See LICENSE.txt for license information:
// https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
//
// -----------------------------------------------------------------------------
///
/// Binary encoding options
///
// -----------------------------------------------------------------------------

/// Options for binary encoding.
public struct BinaryEncodingOptions: Sendable {
/// Whether to use deterministic ordering when serializing.
///
/// Note that the deterministic serialization is NOT canonical across languages.
/// It is NOT guaranteed to remain stable over time. It is unstable across
/// different builds with schema changes due to unknown fields. Users who need
/// canonical serialization (e.g., persistent storage in a canonical form,
/// fingerprinting, etc.) should define their own canonicalization specification
/// and implement their own serializer rather than relying on this API.
///
/// If deterministic serialization is requested, map entries will be sorted
/// by keys in lexographical order. This is an implementation detail
/// and subject to change.
public var useDeterministicOrdering: Bool = false

public init() {}
}
107 changes: 71 additions & 36 deletions Sources/SwiftProtobuf/BinaryEncodingVisitor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import Foundation

/// Visitor that encodes a message graph in the protobuf binary wire format.
internal struct BinaryEncodingVisitor: Visitor {
private let options: BinaryEncodingOptions

var encoder: BinaryEncoder

Expand All @@ -26,12 +27,14 @@ internal struct BinaryEncodingVisitor: Visitor {
/// - Precondition: `pointer` must point to an allocated block of memory that
/// is large enough to hold the entire encoded message. For performance
/// reasons, the encoder does not make any attempts to verify this.
init(forWritingInto pointer: UnsafeMutableRawPointer) {
encoder = BinaryEncoder(forWritingInto: pointer)
init(forWritingInto pointer: UnsafeMutableRawPointer, options: BinaryEncodingOptions) {
self.encoder = BinaryEncoder(forWritingInto: pointer)
self.options = options
}

init(encoder: BinaryEncoder) {
init(encoder: BinaryEncoder, options: BinaryEncodingOptions) {
self.encoder = encoder
self.options = options
}

mutating func visitUnknown(bytes: Data) throws {
Expand Down Expand Up @@ -262,49 +265,79 @@ internal struct BinaryEncodingVisitor: Visitor {
value: _ProtobufMap<KeyType, ValueType>.BaseType,
fieldNumber: Int
) throws {
for (k,v) in value {
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
var sizer = BinaryEncodingSizeVisitor()
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &sizer)
try ValueType.visitSingular(value: v, fieldNumber: 2, with: &sizer)
let entrySize = sizer.serializedSize
encoder.putVarInt(value: entrySize)
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &self)
try ValueType.visitSingular(value: v, fieldNumber: 2, with: &self)
}
try iterateAndEncode(
map: value, fieldNumber: fieldNumber, isOrderedBefore: KeyType._lessThan,
encodeWithSizer: { sizer, key, value in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &sizer)
try ValueType.visitSingular(value: value, fieldNumber: 2, with: &sizer)
}, encodeWithVisitor: { visitor, key, value in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &visitor)
try ValueType.visitSingular(value: value, fieldNumber: 2, with: &visitor)
}
)
}

mutating func visitMapField<KeyType, ValueType>(
fieldType: _ProtobufEnumMap<KeyType, ValueType>.Type,
value: _ProtobufEnumMap<KeyType, ValueType>.BaseType,
fieldNumber: Int
) throws where ValueType.RawValue == Int {
for (k,v) in value {
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
var sizer = BinaryEncodingSizeVisitor()
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &sizer)
try sizer.visitSingularEnumField(value: v, fieldNumber: 2)
let entrySize = sizer.serializedSize
encoder.putVarInt(value: entrySize)
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &self)
try visitSingularEnumField(value: v, fieldNumber: 2)
}
try iterateAndEncode(
map: value, fieldNumber: fieldNumber, isOrderedBefore: KeyType._lessThan,
encodeWithSizer: { sizer, key, value in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &sizer)
try sizer.visitSingularEnumField(value: value, fieldNumber: 2)
}, encodeWithVisitor: { visitor, key, value in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &visitor)
try visitor.visitSingularEnumField(value: value, fieldNumber: 2)
}
)
}

mutating func visitMapField<KeyType, ValueType>(
fieldType: _ProtobufMessageMap<KeyType, ValueType>.Type,
value: _ProtobufMessageMap<KeyType, ValueType>.BaseType,
fieldNumber: Int
) throws {
for (k,v) in value {
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
var sizer = BinaryEncodingSizeVisitor()
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &sizer)
try sizer.visitSingularMessageField(value: v, fieldNumber: 2)
let entrySize = sizer.serializedSize
encoder.putVarInt(value: entrySize)
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &self)
try visitSingularMessageField(value: v, fieldNumber: 2)
try iterateAndEncode(
map: value, fieldNumber: fieldNumber, isOrderedBefore: KeyType._lessThan,
encodeWithSizer: { sizer, key, value in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &sizer)
try sizer.visitSingularMessageField(value: value, fieldNumber: 2)
}, encodeWithVisitor: { visitor, key, value in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &visitor)
try visitor.visitSingularMessageField(value: value, fieldNumber: 2)
}
)
}

/// Helper to encapsulate the common structure of iterating over a map
/// and encoding the keys and values.
private mutating func iterateAndEncode<K, V>(
map: Dictionary<K, V>,
fieldNumber: Int,
isOrderedBefore: (K, K) -> Bool,
encodeWithSizer: (inout BinaryEncodingSizeVisitor, K, V) throws -> (),
encodeWithVisitor: (inout BinaryEncodingVisitor, K, V) throws -> ()
) throws {
if options.useDeterministicOrdering {
for (k,v) in map.sorted(by: { isOrderedBefore( $0.0, $1.0) }) {
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
var sizer = BinaryEncodingSizeVisitor()
try encodeWithSizer(&sizer, k, v)
let entrySize = sizer.serializedSize
encoder.putVarInt(value: entrySize)
try encodeWithVisitor(&self, k, v)
}
} else {
for (k,v) in map {
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
var sizer = BinaryEncodingSizeVisitor()
try encodeWithSizer(&sizer, k, v)
let entrySize = sizer.serializedSize
encoder.putVarInt(value: entrySize)
try encodeWithVisitor(&self, k, v)
}
}
}

Expand All @@ -313,7 +346,7 @@ internal struct BinaryEncodingVisitor: Visitor {
start: Int,
end: Int
) throws {
var subVisitor = BinaryEncodingMessageSetVisitor(encoder: encoder)
var subVisitor = BinaryEncodingMessageSetVisitor(encoder: encoder, options: options)
try fields.traverse(visitor: &subVisitor, start: start, end: end)
encoder = subVisitor.encoder
}
Expand All @@ -323,9 +356,12 @@ extension BinaryEncodingVisitor {

// Helper Visitor to when writing out the extensions as MessageSets.
internal struct BinaryEncodingMessageSetVisitor: SelectiveVisitor {
private let options: BinaryEncodingOptions

var encoder: BinaryEncoder

init(encoder: BinaryEncoder) {
init(encoder: BinaryEncoder, options: BinaryEncodingOptions) {
self.options = options
self.encoder = encoder
}

Expand All @@ -342,7 +378,7 @@ extension BinaryEncodingVisitor {
let length = try value.serializedDataSize()
encoder.putVarInt(value: length)
// Create the sub encoder after writing the length.
var subVisitor = BinaryEncodingVisitor(encoder: encoder)
var subVisitor = BinaryEncodingVisitor(encoder: encoder, options: options)
try value.traverse(visitor: &subVisitor)
encoder = subVisitor.encoder

Expand All @@ -351,5 +387,4 @@ extension BinaryEncodingVisitor {

// SelectiveVisitor handles the rest.
}

}
26 changes: 24 additions & 2 deletions Sources/SwiftProtobuf/Message+BinaryAdditions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,37 @@ extension Message {
/// - Parameters:
/// - partial: If `false` (the default), this method will check
/// `Message.isInitialized` before encoding to verify that all required
/// fields are present. If any are missing, this method throws
/// fields are present. If any are missing, this method throws.
/// `BinaryEncodingError.missingRequiredFields`.
/// - Returns: A `Data` value containing the binary serialization of the
/// message.
/// - Throws: `BinaryEncodingError` if encoding fails.
public func serializedData(partial: Bool = false) throws -> Data {
try serializedData(partial: partial, options: BinaryEncodingOptions())
}

/// Returns a `Data` value containing the Protocol Buffer binary format
/// serialization of the message.
///
/// - Parameters:
/// - partial: If `false` (the default), this method will check
/// `Message.isInitialized` before encoding to verify that all required
/// fields are present. If any are missing, this method throws.
/// `BinaryEncodingError.missingRequiredFields`.
/// - options: The `BinaryEncodingOptions` to use.
/// - Returns: A `SwiftProtobufContiguousBytes` instance containing the binary serialization
/// of the message.
///
/// - Throws: `BinaryEncodingError` if encoding fails.
public func serializedData(
partial: Bool = false,
options: BinaryEncodingOptions
) throws -> Data {
if !partial && !isInitialized {
throw BinaryEncodingError.missingRequiredFields
}

// Note that this assumes `options` will not change the required size.
let requiredSize = try serializedDataSize()

// Messages have a 2GB limit in encoded size, the upstread C++ code
Expand All @@ -48,7 +70,7 @@ extension Message {
var data = Data(count: requiredSize)
try data.withUnsafeMutableBytes { (body: UnsafeMutableRawBufferPointer) in
if let baseAddress = body.baseAddress, body.count > 0 {
var visitor = BinaryEncodingVisitor(forWritingInto: baseAddress)
var visitor = BinaryEncodingVisitor(forWritingInto: baseAddress, options: options)
try traverse(visitor: &visitor)
// Currently not exposing this from the api because it really would be
// an internal error in the library and should never happen.
Expand Down
74 changes: 74 additions & 0 deletions Tests/SwiftProtobufTests/Test_BinaryEncodingOptions.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Tests/SwiftProtobufTests/Test_BinaryEncodingOptions.swift - Tests for binary encoding options
//
// Copyright (c) 2014 - 2023 Apple Inc. and the project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See LICENSE.txt for license information:
// https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
//
// -----------------------------------------------------------------------------
///
/// Test for the use of BinaryEncodingOptions
///
// -----------------------------------------------------------------------------

import Foundation
import XCTest
import SwiftProtobuf

final class Test_BinaryEncodingOptions: XCTestCase {

func testUseDeterministicOrdering() throws {
var options = BinaryEncodingOptions()
options.useDeterministicOrdering = true

let message1 = SwiftProtoTesting_Message3.with {
$0.mapStringString = [
"b": "B",
"a": "A",
"0": "0",
"UPPER": "v",
"x": "X",
]
$0.mapInt32Message = [
5: .with { $0.optionalSint32 = 5 },
1: .with { $0.optionalSint32 = 1 },
3: .with { $0.optionalSint32 = 3 },
]
$0.mapInt32Enum = [
5: .foo,
3: .bar,
0: .baz,
1: .extra3,
]
}

let message2 = SwiftProtoTesting_Message3.with {
$0.mapStringString = [
"UPPER": "v",
"a": "A",
"b": "B",
"x": "X",
"0": "0",
]
$0.mapInt32Message = [
1: .with { $0.optionalSint32 = 1 },
3: .with { $0.optionalSint32 = 3 },
5: .with { $0.optionalSint32 = 5 },
]
$0.mapInt32Enum = [
3: .bar,
5: .foo,
1: .extra3,
0: .baz,
]
}

// Approximation that serializing models with the same data (but initialized with keys in
// different orders) consistently produces the same outputs.
let expectedOutput = try message1.serializedData(options: options)
for _ in 0..<10 {
XCTAssertEqual(try message2.serializedData(options: options), expectedOutput)
}
}
}

0 comments on commit 5b0f123

Please sign in to comment.