Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unsafe audit #560

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions form_urlencoded/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,10 @@ impl<'a> Iterator for ByteSerialize<'a> {
None => (self.bytes, &[][..]),
};
self.bytes = remaining;
// This unsafe is appropriate because we have already checked these
// bytes in byte_serialized_unchanged, which checks for a subset
// of UTF-8. So we know these bytes are valid UTF-8, and doing
// another UTF-8 check would be wasteful.
Some(unsafe { str::from_utf8_unchecked(unchanged_slice) })
} else {
None
Expand Down
30 changes: 23 additions & 7 deletions form_urlencoded/src/query_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,34 @@ pub(crate) fn encode<'a>(encoding_override: EncodingOverride, input: &'a str) ->
}

pub(crate) fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> {
// Note: This function is duplicated in `percent_encoding/lib.rs`.
match input {
Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
Cow::Owned(bytes) => {
let raw_utf8: *const [u8];
match String::from_utf8_lossy(&bytes) {
Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(),
Cow::Owned(s) => return s.into(),
Cow::Borrowed(utf8) => {
// If from_utf8_lossy returns a Cow::Borrowed, then we can
// be sure our original bytes were valid UTF-8. This is because
// if the bytes were invalid UTF-8 from_utf8_lossy would have
// to allocate a new owned string to back the Cow so it could
// replace invalid bytes with a placeholder.

// First we do a debug_assert to confirm our description above.
let raw_utf8: *const [u8];
raw_utf8 = utf8.as_bytes();
debug_assert!(raw_utf8 == &*bytes as *const [u8]);

// Given we know the original input bytes are valid UTF-8,
// and we have ownership of those bytes, we re-use them and
// return a Cow::Owned here. Ideally we'd put our return statement
// right below this line, but to support the old lexically scoped
// borrow checker the return must be moved to outside the match
// statement.
},
Cow::Owned(s) => return Cow::Owned(s),
}
// from_utf8_lossy returned a borrow of `bytes` unchanged.
debug_assert!(raw_utf8 == &*bytes as *const [u8]);
// Reuse the existing `Vec` allocation.
unsafe { String::from_utf8_unchecked(bytes) }.into()

Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
}
}
}
5 changes: 2 additions & 3 deletions idna/src/punycode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,10 @@ pub fn encode_str(input: &str) -> Option<String> {
/// 63 encoded bytes, the DNS limit on domain name labels.
pub fn encode(input: &[char]) -> Option<String> {
// Handle "basic" (ASCII) code points. They are encoded as-is.
let output_bytes = input
let mut output : String = input
.iter()
.filter_map(|&c| if c.is_ascii() { Some(c as u8) } else { None })
.filter(|&c| c.is_ascii())
.collect();
let mut output = unsafe { String::from_utf8_unchecked(output_bytes) };
let basic_length = output.len() as u32;
if basic_length > 0 {
output.push_str("-")
Expand Down
32 changes: 25 additions & 7 deletions percent_encoding/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ impl<'a> Iterator for PercentEncode<'a> {
self.bytes = remaining;
Some(percent_encode_byte(first_byte))
} else {
// The unsafe blocks here are appropriate because the bytes are
// confirmed as a subset of UTF-8 in should_percent_encode.
for (i, &byte) in remaining.iter().enumerate() {
if self.ascii_set.should_percent_encode(byte) {
// 1 for first_byte + i for previous iterations of this loop
Expand Down Expand Up @@ -425,18 +427,34 @@ impl<'a> PercentDecode<'a> {
}

fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> {
// Note: This function is duplicated in `form_urlencoded/src/query_encoding.rs`.
match input {
Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
Cow::Owned(bytes) => {
let raw_utf8: *const [u8];
match String::from_utf8_lossy(&bytes) {
Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(),
Cow::Owned(s) => return s.into(),
Cow::Borrowed(utf8) => {
// If from_utf8_lossy returns a Cow::Borrowed, then we can
// be sure our original bytes were valid UTF-8. This is because
// if the bytes were invalid UTF-8 from_utf8_lossy would have
// to allocate a new owned string to back the Cow so it could
// replace invalid bytes with a placeholder.

// First we do a debug_assert to confirm our description above.
let raw_utf8: *const [u8];
raw_utf8 = utf8.as_bytes();
debug_assert!(raw_utf8 == &*bytes as *const [u8]);

// Given we know the original input bytes are valid UTF-8,
// and we have ownership of those bytes, we re-use them and
// return a Cow::Owned here. Ideally we'd put our return statement
// right below this line, but to support the old lexically scoped
// borrow checker the return must be moved to outside the match
// statement.
},
Cow::Owned(s) => return Cow::Owned(s),
}
// from_utf8_lossy returned a borrow of `bytes` unchanged.
debug_assert!(raw_utf8 == &*bytes as *const [u8]);
// Reuse the existing `Vec` allocation.
unsafe { String::from_utf8_unchecked(bytes) }.into()

Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
}
}
}