Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add type checking to strings/bytes for Python/Ruby #1597

Merged
merged 1 commit into from
Jun 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions fixtures/type-limits/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,8 @@ fn take_string(v: String) -> String {
assert!(str::from_utf8(v.as_bytes()).is_ok());
v
}
fn take_bytes(v: Vec<u8>) -> Vec<u8> {
v
}

uniffi::include_scaffolding!("type-limits");
1 change: 1 addition & 0 deletions fixtures/type-limits/src/type-limits.udl
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ namespace uniffi_type_limits {
f64 take_f64(f64 v);

string take_string(string v);
bytes take_bytes(bytes v);
};
37 changes: 37 additions & 0 deletions fixtures/type-limits/tests/bindings/test_type_limits.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,11 +195,48 @@ def test_special_floats(self):
self.assertTrue(math.isnan(take_f32(math.nan)))
self.assertTrue(math.isnan(take_f64(math.nan)))

def test_non_string(self):
self.assertRaises(TypeError, lambda: take_string(None))
self.assertRaises(TypeError, lambda: take_string(False))
self.assertRaises(TypeError, lambda: take_string(True))
self.assertRaises(TypeError, lambda: take_string(0))
self.assertRaises(TypeError, lambda: take_string(0.0))
self.assertRaises(TypeError, lambda: take_string(b""))

class A:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not clear to me what the current semantics are - does this work without this patch? If so, what's the motivation for preventing this from working?

Copy link
Member

@mhammond mhammond Jun 15, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I mean, I guess "does the object allow str(ob)?" isn't really a great indicator of "this object wants to be seen as string like", but my question is really more about "do we want to exclude objects which want to be string like?"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not clear to me what the current semantics are - does this work without this patch?

I didn't change the current semantics except for throwing TypeError instead of AttributeError. So no, it does not work without this patch.

In this way, this test is trying to test for the current behavior.

def __str__(self):
return ""

self.assertRaises(TypeError, lambda: take_string(A()))

def test_strings(self):
self.assertRaises(ValueError, lambda: take_string("\ud800")) # surrogate
self.assertEqual(take_string(""), "")
self.assertEqual(take_string("愛"), "愛")
self.assertEqual(take_string("💖"), "💖")

def test_non_bytes(self):
self.assertRaises(TypeError, lambda: take_bytes(None))
self.assertRaises(TypeError, lambda: take_bytes(False))
self.assertRaises(TypeError, lambda: take_bytes(True))
self.assertRaises(TypeError, lambda: take_bytes(0))
self.assertRaises(TypeError, lambda: take_bytes(0.0))
self.assertRaises(TypeError, lambda: take_bytes(""))

class A:
def __str__(self):
return ""

self.assertRaises(TypeError, lambda: take_bytes(A()))

def test_bytes(self):
self.assertEqual(take_bytes(b""), b"")
self.assertEqual(take_bytes(b"\xff"), b"\xff") # invalid utf-8 byte
self.assertEqual(take_bytes(b"\xed\xa0\x80"), b"\xed\xa0\x80") # surrogate
self.assertEqual(take_bytes("愛".encode()), "愛".encode())
self.assertEqual(take_bytes("💖".encode()), "💖".encode())
self.assertEqual(take_bytes("愛".encode("utf-16-le")), b"\x1b\x61")
self.assertEqual(take_bytes("💖".encode("utf-16-le")), b"\x3d\xd8\x96\xdc")

if __name__ == "__main__":
unittest.main()
39 changes: 37 additions & 2 deletions fixtures/type-limits/tests/bindings/test_type_limits.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# frozen_string_literal: true

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/. */
Expand Down Expand Up @@ -214,11 +212,48 @@ def test_special_floats
assert(UniffiTypeLimits.take_f32(Float::NAN).nan?)
assert(UniffiTypeLimits.take_f64(Float::NAN).nan?)
end
class NonString
end
def test_non_string
assert_raise TypeError do UniffiTypeLimits.take_string(nil) end
assert_raise TypeError do UniffiTypeLimits.take_string(false) end
assert_raise TypeError do UniffiTypeLimits.take_string(true) end
assert_raise TypeError do UniffiTypeLimits.take_string(0) end
assert_raise TypeError do UniffiTypeLimits.take_string(0.0) end
assert_raise TypeError do UniffiTypeLimits.take_string(NonString.new) end
end
class StringLike
def to_str
"💕"
end
end
def test_strings
assert_raise Encoding::InvalidByteSequenceError do UniffiTypeLimits.take_string("\xff") end # invalid byte
assert_raise Encoding::InvalidByteSequenceError do UniffiTypeLimits.take_string("\xed\xa0\x80") end # surrogate
assert_equal(UniffiTypeLimits.take_string(""), "")
assert_equal(UniffiTypeLimits.take_string("愛"), "愛")
assert_equal(UniffiTypeLimits.take_string("💖"), "💖")
assert_equal(UniffiTypeLimits.take_string("愛".encode(Encoding::UTF_16LE)), "愛")
assert_equal(UniffiTypeLimits.take_string("💖".encode(Encoding::UTF_16LE)), "💖")
assert_equal(UniffiTypeLimits.take_string("💖"), "💖")
assert_equal(UniffiTypeLimits.take_string(StringLike.new), "💕")
end
def test_non_bytes
assert_raise TypeError do UniffiTypeLimits.take_bytes(nil) end
assert_raise TypeError do UniffiTypeLimits.take_bytes(false) end
assert_raise TypeError do UniffiTypeLimits.take_bytes(true) end
assert_raise TypeError do UniffiTypeLimits.take_bytes(0) end
assert_raise TypeError do UniffiTypeLimits.take_bytes(0.0) end
assert_raise TypeError do UniffiTypeLimits.take_string(NonString.new) end
end
def test_bytes
assert_equal(UniffiTypeLimits.take_bytes(""), "".force_encoding(Encoding::BINARY))
assert_equal(UniffiTypeLimits.take_bytes("\xff"), "\xff".force_encoding(Encoding::BINARY)) # invalid utf-8 byte
assert_equal(UniffiTypeLimits.take_bytes("\xed\xa0\x80"), "\xed\xa0\x80".force_encoding(Encoding::BINARY)) # surrogate
assert_equal(UniffiTypeLimits.take_bytes("愛"), "愛".force_encoding(Encoding::BINARY))
assert_equal(UniffiTypeLimits.take_bytes("💖"), "💖".force_encoding(Encoding::BINARY))
assert_equal(UniffiTypeLimits.take_bytes("愛".encode(Encoding::UTF_16LE)), "\x1b\x61".force_encoding(Encoding::BINARY))
assert_equal(UniffiTypeLimits.take_bytes("💖".encode(Encoding::UTF_16LE)), "\x3d\xd8\x96\xdc".force_encoding(Encoding::BINARY))
assert_equal(UniffiTypeLimits.take_bytes(StringLike.new), "\xf0\x9f\x92\x95".force_encoding(Encoding::BINARY))
end
end
4 changes: 4 additions & 0 deletions uniffi_bindgen/src/bindings/python/templates/BytesHelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,9 @@ def read(buf):

@staticmethod
def write(value, buf):
try:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not isinstance() here? I assume it's because you want to support anything exposing the buffer protocol - so why do bytes get "anything bytes-like" but strings aren't "anything string-like"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assume it's because you want to support anything exposing the buffer protocol

Yes, basically that.

I assume it's because you want to support anything exposing the buffer protocol - so why do bytes get "anything bytes-like" but strings aren't "anything string-like"?

Because I'm trying to mirror Python's native semantics here. Take e.g. the lambda x: str(x, encoding="utf8") function. It allows x to be anything adhering to the buffer protocol. But lambda x: bytes(x, encoding="utf8") only allows x to be str, no other classes pretending to be str. This also seems to be the case for other functions in Python that take bytes or str: Functions taking bytes usually take anything implementing the buffer protocol, functions taking str only allow str.

memoryview(value)
except TypeError:
raise TypeError("a bytes-like object is required, not {!r}".format(type(value).__name__))
buf.writeI32(len(value))
buf.write(value)
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
class FfiConverterString:
@staticmethod
def check(value):
if not isinstance(value, str):
raise TypeError("argument must be str, not {}".format(type(value).__name__))
return value

@staticmethod
def read(buf):
size = buf.readI32()
Expand All @@ -9,6 +15,7 @@ def read(buf):

@staticmethod
def write(value, buf):
value = FfiConverterString.check(value)
utf8Bytes = value.encode("utf-8")
buf.writeI32(len(utf8Bytes))
buf.write(utf8Bytes)
Expand All @@ -20,6 +27,7 @@ def lift(buf):

@staticmethod
def lower(value):
value = FfiConverterString.check(value)
with RustBuffer.allocWithBuilder() as builder:
builder.write(value.encode("utf-8"))
return builder.finalize()
3 changes: 2 additions & 1 deletion uniffi_bindgen/src/bindings/ruby/gen_ruby/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,8 @@ mod filters {
Type::Float32 | Type::Float64 => nm.to_string(),
Type::Boolean => format!("{nm} ? true : false"),
Type::Object { .. } | Type::Enum(_) | Type::Record(_) => nm.to_string(),
Type::String | Type::Bytes => format!("{ns}::uniffi_utf8({nm})"),
Type::String => format!("{ns}::uniffi_utf8({nm})"),
Type::Bytes => format!("{ns}::uniffi_bytes({nm})"),
Type::Timestamp | Type::Duration => nm.to_string(),
Type::CallbackInterface(_) => panic!("No support for coercing callback interfaces yet"),
Type::Optional(t) => format!("({nm} ? {} : nil)", coerce_rb(nm, ns, t)?),
Expand Down
6 changes: 6 additions & 0 deletions uniffi_bindgen/src/bindings/ruby/templates/Helpers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@ def self.uniffi_in_range(i, type_name, min, max)
end

def self.uniffi_utf8(v)
raise TypeError, "no implicit conversion of #{v} into String" unless v.respond_to?(:to_str)
v = v.to_str.encode(Encoding::UTF_8)
raise Encoding::InvalidByteSequenceError, "not a valid UTF-8 encoded string" unless v.valid_encoding?
v
end

def self.uniffi_bytes(v)
raise TypeError, "no implicit conversion of #{v} into String" unless v.respond_to?(:to_str)
v.to_str
end
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def write_String(v)
{% when Type::Bytes -%}

def write_Bytes(v)
v = v.to_s
v = {{ ci.namespace()|class_name_rb }}::uniffi_bytes(v)
pack_into 4, 'l>', v.bytes.size
write v
end
Expand Down