Skip to content

Commit

Permalink
fix(ext/node): Match punycode module behavior to node (#22847)
Browse files Browse the repository at this point in the history
Fixes #19214.

We were using the `idna` crate to implement our polyfill for
`punycode.toASCII` and `punycode.toUnicode`. The `idna` crate is
correct, and adheres to the IDNA2003/2008 spec, but it turns out
`node`'s implementations don't really follow any spec! Instead, node
splits the domain by `'.'` and punycode encodes/decodes each part. This
means that node's implementations will happily work on codepoints that
are disallowed by the IDNA specs, causing the error in #19214.

While fixing this, I went ahead and matched the node behavior on all of
the punycode functions and enabled node's punycode test in our
`node_compat` suite.
  • Loading branch information
nathanwhit committed Mar 14, 2024
1 parent 5d1bb44 commit 93c6f92
Show file tree
Hide file tree
Showing 11 changed files with 491 additions and 21 deletions.
2 changes: 2 additions & 0 deletions ext/node/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ deno_core::extension!(deno_node,
ops::v8::op_vm_run_in_new_context,
ops::idna::op_node_idna_domain_to_ascii,
ops::idna::op_node_idna_domain_to_unicode,
ops::idna::op_node_idna_punycode_to_ascii,
ops::idna::op_node_idna_punycode_to_unicode,
ops::idna::op_node_idna_punycode_decode,
ops::idna::op_node_idna_punycode_encode,
ops::zlib::op_zlib_new,
Expand Down
141 changes: 136 additions & 5 deletions ext/node/ops/idna.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,126 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.

use deno_core::error::AnyError;
use deno_core::anyhow::Error;
use deno_core::error::range_error;
use deno_core::op2;

use std::borrow::Cow;

// map_domain, to_ascii and to_unicode are based on the punycode implementation in node.js
// https://github.com/nodejs/node/blob/73025c4dec042e344eeea7912ed39f7b7c4a3991/lib/punycode.js

const PUNY_PREFIX: &str = "xn--";

fn invalid_input_err() -> Error {
range_error("Invalid input")
}

fn not_basic_err() -> Error {
range_error("Illegal input >= 0x80 (not a basic code point)")
}

/// map a domain by mapping each label with the given function
fn map_domain<E>(
domain: &str,
f: impl Fn(&str) -> Result<Cow<'_, str>, E>,
) -> Result<String, E> {
let mut result = String::with_capacity(domain.len());
let mut domain = domain;

// if it's an email, leave the local part as is
let mut parts = domain.split('@');
if let (Some(local), Some(remaining)) = (parts.next(), parts.next()) {
result.push_str(local);
result.push('@');
domain = remaining;
}

// split into labels and map each one
for (i, label) in domain.split('.').enumerate() {
if i > 0 {
result.push('.');
}
result.push_str(&f(label)?);
}
Ok(result)
}

/// Maps a unicode domain to ascii by punycode encoding each label
///
/// Note this is not IDNA2003 or IDNA2008 compliant, rather it matches node.js's punycode implementation
fn to_ascii(input: &str) -> Result<String, Error> {
if input.is_ascii() {
return Ok(input.into());
}

let mut result = String::with_capacity(input.len()); // at least as long as input

let rest = map_domain(input, |label| {
if label.is_ascii() {
Ok(label.into())
} else {
idna::punycode::encode_str(label)
.map(|encoded| [PUNY_PREFIX, &encoded].join("").into()) // add the prefix
.ok_or_else(|| {
Error::msg("Input would take more than 63 characters to encode") // only error possible per the docs
})
}
})?;

result.push_str(&rest);
Ok(result)
}

/// Maps an ascii domain to unicode by punycode decoding each label
///
/// Note this is not IDNA2003 or IDNA2008 compliant, rather it matches node.js's punycode implementation
fn to_unicode(input: &str) -> Result<String, Error> {
map_domain(input, |s| {
if let Some(puny) = s.strip_prefix(PUNY_PREFIX) {
// it's a punycode encoded label
Ok(
idna::punycode::decode_to_string(&puny.to_lowercase())
.ok_or_else(invalid_input_err)?
.into(),
)
} else {
Ok(s.into())
}
})
}

/// Converts a domain to unicode with behavior that is
/// compatible with the `punycode` module in node.js
#[op2]
#[string]
pub fn op_node_idna_punycode_to_ascii(
#[string] domain: String,
) -> Result<String, Error> {
to_ascii(&domain)
}

/// Converts a domain to ASCII with behavior that is
/// compatible with the `punycode` module in node.js
#[op2]
#[string]
pub fn op_node_idna_punycode_to_unicode(
#[string] domain: String,
) -> Result<String, Error> {
to_unicode(&domain)
}

/// Converts a domain to ASCII as per the IDNA spec
/// (specifically UTS #46)
#[op2]
#[string]
pub fn op_node_idna_domain_to_ascii(
#[string] domain: String,
) -> Result<String, AnyError> {
Ok(idna::domain_to_ascii(&domain)?)
) -> Result<String, Error> {
idna::domain_to_ascii(&domain).map_err(|e| e.into())
}

/// Converts a domain to Unicode as per the IDNA spec
/// (specifically UTS #46)
#[op2]
#[string]
pub fn op_node_idna_domain_to_unicode(#[string] domain: String) -> String {
Expand All @@ -19,8 +129,29 @@ pub fn op_node_idna_domain_to_unicode(#[string] domain: String) -> String {

#[op2]
#[string]
pub fn op_node_idna_punycode_decode(#[string] domain: String) -> String {
idna::punycode::decode_to_string(&domain).unwrap_or_default()
pub fn op_node_idna_punycode_decode(
#[string] domain: String,
) -> Result<String, Error> {
if domain.is_empty() {
return Ok(domain);
}

// all code points before the last delimiter must be basic
// see https://github.com/nodejs/node/blob/73025c4dec042e344eeea7912ed39f7b7c4a3991/lib/punycode.js#L215-L227
let last_dash = domain.len()
- 1
- domain
.bytes()
.rev()
.position(|b| b == b'-')
.unwrap_or(domain.len() - 1);

if !domain[..last_dash].is_ascii() {
return Err(not_basic_err());
}

idna::punycode::decode_to_string(&domain)
.ok_or_else(|| deno_core::error::range_error("Invalid input"))
}

#[op2]
Expand Down
12 changes: 9 additions & 3 deletions ext/node/polyfills/dns.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ import {
GetAddrInfoReqWrap,
QueryReqWrap,
} from "ext:deno_node/internal_binding/cares_wrap.ts";
import { toASCII } from "node:punycode";
import { domainToASCII } from "ext:deno_node/internal/idna.ts";
import { notImplemented } from "ext:deno_node/_utils.ts";

function onlookup(
Expand Down Expand Up @@ -264,7 +264,13 @@ export function lookup(
req.hostname = hostname;
req.oncomplete = all ? onlookupall : onlookup;

const err = getaddrinfo(req, toASCII(hostname), family, hints, verbatim);
const err = getaddrinfo(
req,
domainToASCII(hostname),
family,
hints,
verbatim,
);

if (err) {
nextTick(
Expand Down Expand Up @@ -332,7 +338,7 @@ function resolver(bindingName: keyof ChannelWrapQuery) {

req.ttl = !!(options && (options as ResolveOptions).ttl);

const err = this._handle[bindingName](req, toASCII(name));
const err = this._handle[bindingName](req, domainToASCII(name));

if (err) {
throw dnsException(err, bindingName, name);
Expand Down
19 changes: 19 additions & 0 deletions ext/node/polyfills/internal/idna.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@

"use strict";

import {
op_node_idna_domain_to_ascii,
op_node_idna_domain_to_unicode,
} from "ext:core/ops";

/**
* Creates an array containing the numeric code points of each Unicode
* character in the string. While JavaScript uses UCS-2 internally,
Expand Down Expand Up @@ -105,3 +110,17 @@ export const ucs2 = {
decode: ucs2decode,
encode: ucs2encode,
};

/**
* Converts a domain to ASCII as per the IDNA spec
*/
export function domainToASCII(domain: string) {
return op_node_idna_domain_to_ascii(domain);
}

/**
* Converts a domain to Unicode as per the IDNA spec
*/
export function domainToUnicode(domain: string) {
return op_node_idna_domain_to_unicode(domain);
}
24 changes: 18 additions & 6 deletions ext/node/polyfills/punycode.ts
Original file line number Diff line number Diff line change
@@ -1,28 +1,40 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.

import {
op_node_idna_domain_to_ascii,
op_node_idna_domain_to_unicode,
op_node_idna_punycode_decode,
op_node_idna_punycode_encode,
op_node_idna_punycode_to_ascii,
op_node_idna_punycode_to_unicode,
} from "ext:core/ops";

import { deprecate } from "node:util";

import { ucs2 } from "ext:deno_node/internal/idna.ts";

// deno-lint-ignore no-explicit-any
function punyDeprecated(fn: any) {
return deprecate(
fn,
"The `punycode` module is deprecated. Please use a userland " +
"alternative instead.",
"DEP0040",
);
}

function toASCII(domain) {
return op_node_idna_domain_to_ascii(domain);
return punyDeprecated(op_node_idna_punycode_to_ascii)(domain);
}

function toUnicode(domain) {
return op_node_idna_domain_to_unicode(domain);
return punyDeprecated(op_node_idna_punycode_to_unicode)(domain);
}

function decode(domain) {
return op_node_idna_punycode_decode(domain);
return punyDeprecated(op_node_idna_punycode_decode)(domain);
}

function encode(domain) {
return op_node_idna_punycode_encode(domain);
return punyDeprecated(op_node_idna_punycode_encode)(domain);
}

export { decode, encode, toASCII, toUnicode, ucs2 };
Expand Down
13 changes: 8 additions & 5 deletions ext/node/polyfills/url.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@ import {
CHAR_ZERO_WIDTH_NOBREAK_SPACE,
} from "ext:deno_node/path/_constants.ts";
import * as path from "node:path";
import { toASCII, toUnicode } from "node:punycode";
import {
domainToASCII as idnaToASCII,
domainToUnicode as idnaToUnicode,
} from "ext:deno_node/internal/idna.ts";
import { isWindows, osType } from "ext:deno_node/_util/os.ts";
import { encodeStr, hexTable } from "ext:deno_node/internal/querystring.ts";
import querystring from "node:querystring";
Expand Down Expand Up @@ -813,7 +816,7 @@ export class Url {

// Use lenient mode (`true`) to try to support even non-compliant
// URLs.
this.hostname = toASCII(this.hostname);
this.hostname = idnaToASCII(this.hostname);

// Prevent two potential routes of hostname spoofing.
// 1. If this.hostname is empty, it must have become empty due to toASCII
Expand Down Expand Up @@ -1251,7 +1254,7 @@ export function resolveObject(source: string | Url, relative: string) {
* @see https://www.rfc-editor.org/rfc/rfc3490#section-4
*/
export function domainToASCII(domain: string) {
return toASCII(domain);
return idnaToASCII(domain);
}

/**
Expand All @@ -1261,7 +1264,7 @@ export function domainToASCII(domain: string) {
* @see https://www.rfc-editor.org/rfc/rfc3490#section-4
*/
export function domainToUnicode(domain: string) {
return toUnicode(domain);
return idnaToUnicode(domain);
}

/**
Expand Down Expand Up @@ -1396,7 +1399,7 @@ export function pathToFileURL(filepath: string): URL {
);
}

outURL.hostname = domainToASCII(hostname);
outURL.hostname = idnaToASCII(hostname);
outURL.pathname = encodePathChars(paths.slice(3).join("/"));
} else {
let resolved = path.resolve(filepath);
Expand Down
1 change: 1 addition & 0 deletions tests/integration/node_unit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ util::unit_test_factory!(
path_test,
perf_hooks_test,
process_test,
punycode_test,
querystring_test,
readline_test,
repl_test,
Expand Down
1 change: 1 addition & 0 deletions tests/node_compat/config.jsonc
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,7 @@
"test-process-uptime.js",
"test-promise-unhandled-silent.js",
"test-promise-unhandled-throw-handler.js",
"test-punycode.js",
"test-querystring-escape.js",
"test-querystring-maxKeys-non-finite.js",
"test-querystring-multichar-separator.js",
Expand Down
Loading

0 comments on commit 93c6f92

Please sign in to comment.