Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add useDeterministicOrdering opt-in option to JSON + Binary encoding #1487

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions Sources/SwiftProtobuf/BinaryEncodingOptions.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Sources/SwiftProtobuf/BinaryEncodingOptions.swift - Binary encoding options
//
// Copyright (c) 2014 - 2023 Apple Inc. and the project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See LICENSE.txt for license information:
// https://github.com/apple/swift-protobuf/blob/main/LICENSE.txt
//
// -----------------------------------------------------------------------------
///
/// Binary encoding options
///
// -----------------------------------------------------------------------------

/// Options for binary encoding.
public struct BinaryEncodingOptions {
/// Whether to use deterministic ordering when serializing.
///
/// Note that the deterministic serialization is NOT canonical across languages.
/// It is NOT guaranteed to remain stable over time. It is unstable across
/// different builds with schema changes due to unknown fields. Users who need
/// canonical serialization (e.g., persistent storage in a canonical form,
/// fingerprinting, etc.) should define their own canonicalization specification
/// and implement their own serializer rather than relying on this API.
///
/// If deterministic serialization is requested, map entries will be sorted
/// by keys in lexographical order. This is an implementation detail
/// and subject to change.
public var useDeterministicOrdering: Bool = false

public init() {}
}
107 changes: 71 additions & 36 deletions Sources/SwiftProtobuf/BinaryEncodingVisitor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import Foundation

/// Visitor that encodes a message graph in the protobuf binary wire format.
internal struct BinaryEncodingVisitor: Visitor {
private let options: BinaryEncodingOptions

var encoder: BinaryEncoder

Expand All @@ -26,12 +27,14 @@ internal struct BinaryEncodingVisitor: Visitor {
/// - Precondition: `pointer` must point to an allocated block of memory that
/// is large enough to hold the entire encoded message. For performance
/// reasons, the encoder does not make any attempts to verify this.
init(forWritingInto pointer: UnsafeMutableRawPointer) {
encoder = BinaryEncoder(forWritingInto: pointer)
init(forWritingInto pointer: UnsafeMutableRawPointer, options: BinaryEncodingOptions) {
self.encoder = BinaryEncoder(forWritingInto: pointer)
self.options = options
}

init(encoder: BinaryEncoder) {
init(encoder: BinaryEncoder, options: BinaryEncodingOptions) {
self.encoder = encoder
self.options = options
}

mutating func visitUnknown(bytes: Data) throws {
Expand Down Expand Up @@ -262,49 +265,79 @@ internal struct BinaryEncodingVisitor: Visitor {
value: _ProtobufMap<KeyType, ValueType>.BaseType,
fieldNumber: Int
) throws {
for (k,v) in value {
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
var sizer = BinaryEncodingSizeVisitor()
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &sizer)
try ValueType.visitSingular(value: v, fieldNumber: 2, with: &sizer)
let entrySize = sizer.serializedSize
encoder.putVarInt(value: entrySize)
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &self)
try ValueType.visitSingular(value: v, fieldNumber: 2, with: &self)
}
try iterateAndEncode(
map: value, fieldNumber: fieldNumber, isOrderedBefore: KeyType._lessThan,
encodeWithSizer: { sizer, key, value in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &sizer)
try ValueType.visitSingular(value: value, fieldNumber: 2, with: &sizer)
}, encodeWithVisitor: { visitor, key, value in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &visitor)
try ValueType.visitSingular(value: value, fieldNumber: 2, with: &visitor)
}
)
}

mutating func visitMapField<KeyType, ValueType>(
fieldType: _ProtobufEnumMap<KeyType, ValueType>.Type,
value: _ProtobufEnumMap<KeyType, ValueType>.BaseType,
fieldNumber: Int
) throws where ValueType.RawValue == Int {
for (k,v) in value {
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
var sizer = BinaryEncodingSizeVisitor()
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &sizer)
try sizer.visitSingularEnumField(value: v, fieldNumber: 2)
let entrySize = sizer.serializedSize
encoder.putVarInt(value: entrySize)
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &self)
try visitSingularEnumField(value: v, fieldNumber: 2)
}
try iterateAndEncode(
map: value, fieldNumber: fieldNumber, isOrderedBefore: KeyType._lessThan,
encodeWithSizer: { sizer, key, value in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &sizer)
try sizer.visitSingularEnumField(value: value, fieldNumber: 2)
}, encodeWithVisitor: { visitor, key, value in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &visitor)
try visitor.visitSingularEnumField(value: value, fieldNumber: 2)
}
)
}

mutating func visitMapField<KeyType, ValueType>(
fieldType: _ProtobufMessageMap<KeyType, ValueType>.Type,
value: _ProtobufMessageMap<KeyType, ValueType>.BaseType,
fieldNumber: Int
) throws {
for (k,v) in value {
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
var sizer = BinaryEncodingSizeVisitor()
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &sizer)
try sizer.visitSingularMessageField(value: v, fieldNumber: 2)
let entrySize = sizer.serializedSize
encoder.putVarInt(value: entrySize)
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &self)
try visitSingularMessageField(value: v, fieldNumber: 2)
try iterateAndEncode(
map: value, fieldNumber: fieldNumber, isOrderedBefore: KeyType._lessThan,
encodeWithSizer: { sizer, key, value in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &sizer)
try sizer.visitSingularMessageField(value: value, fieldNumber: 2)
}, encodeWithVisitor: { visitor, key, value in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &visitor)
try visitor.visitSingularMessageField(value: value, fieldNumber: 2)
}
)
}

/// Helper to encapsulate the common structure of iterating over a map
/// and encoding the keys and values.
private mutating func iterateAndEncode<K, V>(
map: Dictionary<K, V>,
fieldNumber: Int,
isOrderedBefore: (K, K) -> Bool,
encodeWithSizer: (inout BinaryEncodingSizeVisitor, K, V) throws -> (),
encodeWithVisitor: (inout BinaryEncodingVisitor, K, V) throws -> ()
) throws {
if options.useDeterministicOrdering {
for (k,v) in map.sorted(by: { isOrderedBefore( $0.0, $1.0) }) {
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
var sizer = BinaryEncodingSizeVisitor()
try encodeWithSizer(&sizer, k, v)
let entrySize = sizer.serializedSize
encoder.putVarInt(value: entrySize)
try encodeWithVisitor(&self, k, v)
}
} else {
for (k,v) in map {
encoder.startField(fieldNumber: fieldNumber, wireFormat: .lengthDelimited)
var sizer = BinaryEncodingSizeVisitor()
try encodeWithSizer(&sizer, k, v)
let entrySize = sizer.serializedSize
encoder.putVarInt(value: entrySize)
try encodeWithVisitor(&self, k, v)
}
}
}

Expand All @@ -313,7 +346,7 @@ internal struct BinaryEncodingVisitor: Visitor {
start: Int,
end: Int
) throws {
var subVisitor = BinaryEncodingMessageSetVisitor(encoder: encoder)
var subVisitor = BinaryEncodingMessageSetVisitor(encoder: encoder, options: options)
try fields.traverse(visitor: &subVisitor, start: start, end: end)
encoder = subVisitor.encoder
}
Expand All @@ -323,9 +356,12 @@ extension BinaryEncodingVisitor {

// Helper Visitor to when writing out the extensions as MessageSets.
internal struct BinaryEncodingMessageSetVisitor: SelectiveVisitor {
private let options: BinaryEncodingOptions

var encoder: BinaryEncoder

init(encoder: BinaryEncoder) {
init(encoder: BinaryEncoder, options: BinaryEncodingOptions) {
self.options = options
self.encoder = encoder
}

Expand All @@ -342,7 +378,7 @@ extension BinaryEncodingVisitor {
let length = try value.serializedDataSize()
encoder.putVarInt(value: length)
// Create the sub encoder after writing the length.
var subVisitor = BinaryEncodingVisitor(encoder: encoder)
var subVisitor = BinaryEncodingVisitor(encoder: encoder, options: options)
try value.traverse(visitor: &subVisitor)
encoder = subVisitor.encoder

Expand All @@ -351,5 +387,4 @@ extension BinaryEncodingVisitor {

// SelectiveVisitor handles the rest.
}

}
14 changes: 14 additions & 0 deletions Sources/SwiftProtobuf/JSONEncodingOptions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,19 @@ public struct JSONEncodingOptions {
/// By default they are converted to JSON(lowerCamelCase) names.
public var preserveProtoFieldNames: Bool = false

/// Whether to use deterministic ordering when serializing.
///
/// Note that the deterministic serialization is NOT canonical across languages.
/// It is NOT guaranteed to remain stable over time. It is unstable across
/// different builds with schema changes due to unknown fields. Users who need
/// canonical serialization (e.g., persistent storage in a canonical form,
/// fingerprinting, etc.) should define their own canonicalization specification
/// and implement their own serializer rather than relying on this API.
///
/// If deterministic serialization is requested, map entries will be sorted
/// by keys in lexographical order. This is an implementation detail
/// and subject to change.
public var useDeterministicOrdering: Bool = false

public init() {}
}
52 changes: 31 additions & 21 deletions Sources/SwiftProtobuf/JSONEncodingVisitor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -340,39 +340,49 @@ internal struct JSONEncodingVisitor: Visitor {
// Packed fields are handled the same as non-packed fields, so JSON just
// relies on the default implementations in Visitor.swift



mutating func visitMapField<KeyType, ValueType: MapValueType>(fieldType: _ProtobufMap<KeyType, ValueType>.Type, value: _ProtobufMap<KeyType, ValueType>.BaseType, fieldNumber: Int) throws {
try startField(for: fieldNumber)
encoder.append(text: "{")
var mapVisitor = JSONMapEncodingVisitor(encoder: encoder, options: options)
for (k,v) in value {
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &mapVisitor)
try ValueType.visitSingular(value: v, fieldNumber: 2, with: &mapVisitor)
try iterateAndEncode(map: value, fieldNumber: fieldNumber, isOrderedBefore: KeyType._lessThan) {
(visitor: inout JSONMapEncodingVisitor, key, value) throws -> () in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &visitor)
try ValueType.visitSingular(value: value, fieldNumber: 2, with: &visitor)
}
encoder = mapVisitor.encoder
encoder.append(text: "}")
}

mutating func visitMapField<KeyType, ValueType>(fieldType: _ProtobufEnumMap<KeyType, ValueType>.Type, value: _ProtobufEnumMap<KeyType, ValueType>.BaseType, fieldNumber: Int) throws where ValueType.RawValue == Int {
try startField(for: fieldNumber)
encoder.append(text: "{")
var mapVisitor = JSONMapEncodingVisitor(encoder: encoder, options: options)
for (k, v) in value {
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &mapVisitor)
try mapVisitor.visitSingularEnumField(value: v, fieldNumber: 2)
try iterateAndEncode(map: value, fieldNumber: fieldNumber, isOrderedBefore: KeyType._lessThan) {
(visitor: inout JSONMapEncodingVisitor, key, value) throws -> () in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &visitor)
try visitor.visitSingularEnumField(value: value, fieldNumber: 2)
}
encoder = mapVisitor.encoder
encoder.append(text: "}")
}

mutating func visitMapField<KeyType, ValueType>(fieldType: _ProtobufMessageMap<KeyType, ValueType>.Type, value: _ProtobufMessageMap<KeyType, ValueType>.BaseType, fieldNumber: Int) throws {
try iterateAndEncode(map: value, fieldNumber: fieldNumber, isOrderedBefore: KeyType._lessThan) {
(visitor: inout JSONMapEncodingVisitor, key, value) throws -> () in
try KeyType.visitSingular(value: key, fieldNumber: 1, with: &visitor)
try visitor.visitSingularMessageField(value: value, fieldNumber: 2)
}
}

/// Helper to encapsulate the common structure of iterating over a map
/// and encoding the keys and values.
private mutating func iterateAndEncode<K, V>(
map: Dictionary<K, V>,
fieldNumber: Int,
isOrderedBefore: (K, K) -> Bool,
encode: (inout JSONMapEncodingVisitor, K, V) throws -> ()
) throws {
try startField(for: fieldNumber)
encoder.append(text: "{")
var mapVisitor = JSONMapEncodingVisitor(encoder: encoder, options: options)
for (k,v) in value {
try KeyType.visitSingular(value: k, fieldNumber: 1, with: &mapVisitor)
try mapVisitor.visitSingularMessageField(value: v, fieldNumber: 2)
if options.useDeterministicOrdering {
for (k,v) in map.sorted(by: { isOrderedBefore( $0.0, $1.0) }) {
try encode(&mapVisitor, k, v)
}
} else {
for (k,v) in map {
try encode(&mapVisitor, k, v)
}
}
encoder = mapVisitor.encoder
encoder.append(text: "}")
Expand Down
26 changes: 24 additions & 2 deletions Sources/SwiftProtobuf/Message+BinaryAdditions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,37 @@ extension Message {
/// - Parameters:
/// - partial: If `false` (the default), this method will check
/// `Message.isInitialized` before encoding to verify that all required
/// fields are present. If any are missing, this method throws
/// fields are present. If any are missing, this method throws.
/// `BinaryEncodingError.missingRequiredFields`.
/// - Returns: A `Data` value containing the binary serialization of the
/// message.
/// - Throws: `BinaryEncodingError` if encoding fails.
public func serializedData(partial: Bool = false) throws -> Data {
return try serializedData(partial: partial, options: BinaryEncodingOptions())
}

/// Returns a `Data` value containing the Protocol Buffer binary format
/// serialization of the message.
///
/// - Parameters:
/// - partial: If `false` (the default), this method will check
/// `Message.isInitialized` before encoding to verify that all required
/// fields are present. If any are missing, this method throws.
/// `BinaryEncodingError.missingRequiredFields`.
/// - options: The `BinaryEncodingOptions` to use.
/// - Returns: A `SwiftProtobufContiguousBytes` instance containing the binary serialization
/// of the message.
///
/// - Throws: `BinaryEncodingError` if encoding fails.
public func serializedData(
partial: Bool = false,
options: BinaryEncodingOptions
) throws -> Data {
if !partial && !isInitialized {
throw BinaryEncodingError.missingRequiredFields
}

// Note that this assumes `options` will not change the required size.
let requiredSize = try serializedDataSize()

// Messages have a 2GB limit in encoded size, the upstread C++ code
Expand All @@ -48,7 +70,7 @@ extension Message {
var data = Data(count: requiredSize)
try data.withUnsafeMutableBytes { (body: UnsafeMutableRawBufferPointer) in
if let baseAddress = body.baseAddress, body.count > 0 {
var visitor = BinaryEncodingVisitor(forWritingInto: baseAddress)
var visitor = BinaryEncodingVisitor(forWritingInto: baseAddress, options: options)
try traverse(visitor: &visitor)
// Currently not exposing this from the api because it really would be
// an internal error in the library and should never happen.
Expand Down
Loading