Skip to content

Commit

Permalink
Merge branch 'main' into idna-v1x
Browse files Browse the repository at this point in the history
  • Loading branch information
hsivonen committed Oct 29, 2024
2 parents e81799f + 5d363cc commit 9163f30
Show file tree
Hide file tree
Showing 15 changed files with 698 additions and 201 deletions.
19 changes: 17 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ jobs:
- uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ matrix.rust }}
# Add toolchain for no_std tests
- run: rustup toolchain install nightly
- name: Add `aarch64-unknown-none` toolchain for `no_std` tests
if: |
matrix.os == 'ubuntu-latest' &&
matrix.rust == 'nightly'
run: rustup target add aarch64-unknown-none && rustup component add rust-src --toolchain nightly-x86_64-unknown-linux-gnu
- run: cargo build --all-targets
# Run tests
- name: Run tests
Expand All @@ -47,10 +54,18 @@ jobs:
- name: Run debugger_visualizer tests
if: |
matrix.os == 'windows-latest' &&
matrix.rust != '1.67.0'
run: cargo test --test debugger_visualizer --features "url/debugger_visualizer,url_debug_tests/debugger_visualizer" -- --test-threads=1
matrix.rust != '1.56.0'
run: cargo test --test debugger_visualizer --features "url/debugger_visualizer,url_debug_tests/debugger_visualizer" -- --test-threads=1 || echo "debugger test failed"
continue-on-error: true # Fails on GH actions, but not locally.
- name: Test `no_std` support
run: cargo test --no-default-features --features=alloc
- name: Build `url` crate for `aarch64-unknown-none` with `no_std`
if: |
matrix.os == 'ubuntu-latest' &&
matrix.rust == 'nightly'
run: >
cd url
&& cargo check --target aarch64-unknown-none -v --no-default-features
WASM:
runs-on: ubuntu-latest
Expand Down
3 changes: 2 additions & 1 deletion data-url/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ alloc = []

[dev-dependencies]
tester = "0.9"
# We pin this transitive dev dep so that MSRV CI can continue to run.
unicode-width = "=0.1.12"
serde = {version = "1.0", features = ["derive"]}
serde = { version = "1.0", default-features = false, features = ["alloc", "derive"] }
serde_json = "1.0"

[lib]
Expand Down
3 changes: 3 additions & 0 deletions idna/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ impl From<Errors> for Result<(), Errors> {
#[cfg(feature = "std")]
impl std::error::Error for Errors {}

#[cfg(not(feature = "std"))]
impl core::error::Error for Errors {}

impl core::fmt::Display for Errors {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
core::fmt::Debug::fmt(self, f)
Expand Down
213 changes: 213 additions & 0 deletions percent_encoding/src/ascii_set.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
// Copyright 2013-2016 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use core::{mem, ops};

/// Represents a set of characters or bytes in the ASCII range.
///
/// This is used in [`percent_encode`] and [`utf8_percent_encode`].
/// This is similar to [percent-encode sets](https://url.spec.whatwg.org/#percent-encoded-bytes).
///
/// Use the `add` method of an existing set to define a new set. For example:
///
/// [`percent_encode`]: crate::percent_encode
/// [`utf8_percent_encode`]: crate::utf8_percent_encode
///
/// ```
/// use percent_encoding::{AsciiSet, CONTROLS};
///
/// /// https://url.spec.whatwg.org/#fragment-percent-encode-set
/// const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');
/// ```
#[derive(Debug, PartialEq, Eq)]
pub struct AsciiSet {
mask: [Chunk; ASCII_RANGE_LEN / BITS_PER_CHUNK],
}

type Chunk = u32;

const ASCII_RANGE_LEN: usize = 0x80;

const BITS_PER_CHUNK: usize = 8 * mem::size_of::<Chunk>();

impl AsciiSet {
/// An empty set.
pub const EMPTY: AsciiSet = AsciiSet {
mask: [0; ASCII_RANGE_LEN / BITS_PER_CHUNK],
};

/// Called with UTF-8 bytes rather than code points.
/// Not used for non-ASCII bytes.
pub(crate) const fn contains(&self, byte: u8) -> bool {
let chunk = self.mask[byte as usize / BITS_PER_CHUNK];
let mask = 1 << (byte as usize % BITS_PER_CHUNK);
(chunk & mask) != 0
}

pub(crate) fn should_percent_encode(&self, byte: u8) -> bool {
!byte.is_ascii() || self.contains(byte)
}

pub const fn add(&self, byte: u8) -> Self {
let mut mask = self.mask;
mask[byte as usize / BITS_PER_CHUNK] |= 1 << (byte as usize % BITS_PER_CHUNK);
AsciiSet { mask }
}

pub const fn remove(&self, byte: u8) -> Self {
let mut mask = self.mask;
mask[byte as usize / BITS_PER_CHUNK] &= !(1 << (byte as usize % BITS_PER_CHUNK));
AsciiSet { mask }
}

/// Return the union of two sets.
pub const fn union(&self, other: Self) -> Self {
let mask = [
self.mask[0] | other.mask[0],
self.mask[1] | other.mask[1],
self.mask[2] | other.mask[2],
self.mask[3] | other.mask[3],
];
AsciiSet { mask }
}

/// Return the negation of the set.
pub const fn complement(&self) -> Self {
let mask = [!self.mask[0], !self.mask[1], !self.mask[2], !self.mask[3]];
AsciiSet { mask }
}
}

impl ops::Add for AsciiSet {
type Output = Self;

fn add(self, other: Self) -> Self {
self.union(other)
}
}

impl ops::Not for AsciiSet {
type Output = Self;

fn not(self) -> Self {
self.complement()
}
}

/// The set of 0x00 to 0x1F (C0 controls), and 0x7F (DEL).
///
/// Note that this includes the newline and tab characters, but not the space 0x20.
///
/// <https://url.spec.whatwg.org/#c0-control-percent-encode-set>
pub const CONTROLS: &AsciiSet = &AsciiSet {
mask: [
!0_u32, // C0: 0x00 to 0x1F (32 bits set)
0,
0,
1 << (0x7F_u32 % 32), // DEL: 0x7F (one bit set)
],
};

macro_rules! static_assert {
($( $bool: expr, )+) => {
fn _static_assert() {
$(
let _ = mem::transmute::<[u8; $bool as usize], u8>;
)+
}
}
}

static_assert! {
CONTROLS.contains(0x00),
CONTROLS.contains(0x1F),
!CONTROLS.contains(0x20),
!CONTROLS.contains(0x7E),
CONTROLS.contains(0x7F),
}

/// Everything that is not an ASCII letter or digit.
///
/// This is probably more eager than necessary in any context.
pub const NON_ALPHANUMERIC: &AsciiSet = &CONTROLS
.add(b' ')
.add(b'!')
.add(b'"')
.add(b'#')
.add(b'$')
.add(b'%')
.add(b'&')
.add(b'\'')
.add(b'(')
.add(b')')
.add(b'*')
.add(b'+')
.add(b',')
.add(b'-')
.add(b'.')
.add(b'/')
.add(b':')
.add(b';')
.add(b'<')
.add(b'=')
.add(b'>')
.add(b'?')
.add(b'@')
.add(b'[')
.add(b'\\')
.add(b']')
.add(b'^')
.add(b'_')
.add(b'`')
.add(b'{')
.add(b'|')
.add(b'}')
.add(b'~');

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn add_op() {
let left = AsciiSet::EMPTY.add(b'A');
let right = AsciiSet::EMPTY.add(b'B');
let expected = AsciiSet::EMPTY.add(b'A').add(b'B');
assert_eq!(left + right, expected);
}

#[test]
fn not_op() {
let set = AsciiSet::EMPTY.add(b'A').add(b'B');
let not_set = !set;
assert!(!not_set.contains(b'A'));
assert!(not_set.contains(b'C'));
}

/// This test ensures that we can get the union of two sets as a constant value, which is
/// useful for defining sets in a modular way.
#[test]
fn union() {
const A: AsciiSet = AsciiSet::EMPTY.add(b'A');
const B: AsciiSet = AsciiSet::EMPTY.add(b'B');
const UNION: AsciiSet = A.union(B);
const EXPECTED: AsciiSet = AsciiSet::EMPTY.add(b'A').add(b'B');
assert_eq!(UNION, EXPECTED);
}

/// This test ensures that we can get the complement of a set as a constant value, which is
/// useful for defining sets in a modular way.
#[test]
fn complement() {
const BOTH: AsciiSet = AsciiSet::EMPTY.add(b'A').add(b'B');
const COMPLEMENT: AsciiSet = BOTH.complement();
assert!(!COMPLEMENT.contains(b'A'));
assert!(!COMPLEMENT.contains(b'B'));
assert!(COMPLEMENT.contains(b'C'));
}
}
Loading

0 comments on commit 9163f30

Please sign in to comment.