Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add utf16_units #14613

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 9 additions & 12 deletions src/libcollections/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -803,15 +803,9 @@ pub trait StrAllocating: Str {
}

/// Converts to a vector of `u16` encoded as UTF-16.
#[deprecated = "use `utf16_units` instead"]
fn to_utf16(&self) -> Vec<u16> {
let me = self.as_slice();
let mut u = Vec::new();
for ch in me.chars() {
let mut buf = [0u16, ..2];
let n = ch.encode_utf16(buf /* as mut slice! */);
u.push_all(buf.slice_to(n));
}
u
self.as_slice().utf16_units().collect::<Vec<u16>>()
}

/// Given a string, make a new string with repeated copies of it.
Expand Down Expand Up @@ -1619,14 +1613,17 @@ mod tests {

for p in pairs.iter() {
let (s, u) = (*p).clone();
let s_as_utf16 = s.as_slice().utf16_units().collect::<Vec<u16>>();
let u_as_string = from_utf16(u.as_slice()).unwrap();

assert!(is_utf16(u.as_slice()));
assert_eq!(s.to_utf16(), u);
assert_eq!(s_as_utf16, u);

assert_eq!(from_utf16(u.as_slice()).unwrap(), s);
assert_eq!(u_as_string, s);
assert_eq!(from_utf16_lossy(u.as_slice()), s);

assert_eq!(from_utf16(s.to_utf16().as_slice()).unwrap(), s);
assert_eq!(from_utf16(u.as_slice()).unwrap().to_utf16(), u);
assert_eq!(from_utf16(s_as_utf16.as_slice()).unwrap(), s);
assert_eq!(u_as_string.as_slice().utf16_units().collect::<Vec<u16>>(), u);
}
}

Expand Down
46 changes: 45 additions & 1 deletion src/libcore/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

use mem;
use char;
use char::Char;
use clone::Clone;
use cmp;
use cmp::{PartialEq, Eq};
Expand All @@ -24,7 +25,7 @@ use default::Default;
use iter::{Filter, Map, Iterator};
use iter::{DoubleEndedIterator, ExactSize};
use iter::range;
use num::Saturating;
use num::{CheckedMul, Saturating};
use option::{None, Option, Some};
use raw::Repr;
use slice::ImmutableVector;
Expand Down Expand Up @@ -557,6 +558,41 @@ impl<'a> Iterator<&'a str> for StrSplits<'a> {
}
}

/// External iterator for a string's UTF16 codeunits.
/// Use with the `std::iter` module.
#[deriving(Clone)]
pub struct Utf16CodeUnits<'a> {
chars: Chars<'a>,
extra: u16
}

impl<'a> Iterator<u16> for Utf16CodeUnits<'a> {
#[inline]
fn next(&mut self) -> Option<u16> {
if self.extra != 0 {
let tmp = self.extra;
self.extra = 0;
return Some(tmp);
}

let mut buf = [0u16, ..2];
self.chars.next().map(|ch| {
let n = ch.encode_utf16(buf /* as mut slice! */);
if n == 2 { self.extra = buf[1]; }
buf[0]
})
}

#[inline]
fn size_hint(&self) -> (uint, Option<uint>) {
let (low, high) = self.chars.size_hint();
// every char gets either one u16 or two u16,
// so this iterator is between 1 or 2 times as
// long as the underlying iterator.
(low, high.and_then(|n| n.checked_mul(&2)))
}
}

/*
Section: Comparing strings
*/
Expand Down Expand Up @@ -1619,6 +1655,9 @@ pub trait StrSlice<'a> {
/// and that it is not reallocated (e.g. by pushing to the
/// string).
fn as_ptr(&self) -> *const u8;

/// Return an iterator of `u16` over the string encoded as UTF-16.
fn utf16_units(&self) -> Utf16CodeUnits<'a>;
}

impl<'a> StrSlice<'a> for &'a str {
Expand Down Expand Up @@ -1967,6 +2006,11 @@ impl<'a> StrSlice<'a> for &'a str {
fn as_ptr(&self) -> *const u8 {
self.repr().data
}

#[inline]
fn utf16_units(&self) -> Utf16CodeUnits<'a> {
Utf16CodeUnits{ chars: self.chars(), extra: 0}
}
}

impl<'a> Default for &'a str {
Expand Down
4 changes: 3 additions & 1 deletion src/libnative/io/c_win32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ extern "system" {

pub mod compat {
use std::intrinsics::{atomic_store_relaxed, transmute};
use std::iter::Iterator;
use libc::types::os::arch::extra::{LPCWSTR, HMODULE, LPCSTR, LPVOID};

extern "system" {
Expand All @@ -82,7 +83,8 @@ pub mod compat {
// layer (after it's loaded) shouldn't be any slower than a regular DLL
// call.
unsafe fn store_func(ptr: *mut uint, module: &str, symbol: &str, fallback: uint) {
let module = module.to_utf16().append_one(0);
let module: Vec<u16> = module.utf16_units().collect();
let module = module.append_one(0);
symbol.with_c_str(|symbol| {
let handle = GetModuleHandleW(module.as_ptr());
let func: uint = transmute(GetProcAddress(handle, symbol));
Expand Down
2 changes: 1 addition & 1 deletion src/libnative/io/file_win32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ impl Drop for Inner {

pub fn to_utf16(s: &CString) -> IoResult<Vec<u16>> {
match s.as_str() {
Some(s) => Ok(s.to_utf16().append_one(0)),
Some(s) => Ok(s.utf16_units().collect::<Vec<u16>>().append_one(0)),
None => Err(IoError {
code: libc::ERROR_INVALID_NAME as uint,
extra: 0,
Expand Down
14 changes: 10 additions & 4 deletions src/libnative/io/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,8 @@ fn spawn_process_os(cfg: ProcessConfig,
use libc::funcs::extra::msvcrt::get_osfhandle;

use std::mem;
use std::iter::Iterator;
use std::str::StrSlice;

if cfg.gid.is_some() || cfg.uid.is_some() {
return Err(IoError {
Expand Down Expand Up @@ -328,7 +330,8 @@ fn spawn_process_os(cfg: ProcessConfig,
lpSecurityDescriptor: ptr::mut_null(),
bInheritHandle: 1,
};
let filename = "NUL".to_utf16().append_one(0);
let filename: Vec<u16> = "NUL".utf16_units().collect();
let filename = filename.append_one(0);
*slot = libc::CreateFileW(filename.as_ptr(),
access,
libc::FILE_SHARE_READ |
Expand Down Expand Up @@ -371,7 +374,8 @@ fn spawn_process_os(cfg: ProcessConfig,

with_envp(cfg.env, |envp| {
with_dirp(cfg.cwd, |dirp| {
let mut cmd_str = cmd_str.to_utf16().append_one(0);
let mut cmd_str: Vec<u16> = cmd_str.as_slice().utf16_units().collect();
cmd_str = cmd_str.append_one(0);
let created = CreateProcessW(ptr::null(),
cmd_str.as_mut_ptr(),
ptr::mut_null(),
Expand Down Expand Up @@ -770,7 +774,7 @@ fn with_envp<T>(env: Option<&[(CString, CString)]>, cb: |*mut c_void| -> T) -> T
let kv = format!("{}={}",
pair.ref0().as_str().unwrap(),
pair.ref1().as_str().unwrap());
blk.push_all(kv.to_utf16().as_slice());
blk.extend(kv.as_slice().utf16_units());
blk.push(0);
}

Expand All @@ -788,7 +792,9 @@ fn with_dirp<T>(d: Option<&CString>, cb: |*const u16| -> T) -> T {
Some(dir) => {
let dir_str = dir.as_str()
.expect("expected workingdirectory to be utf-8 encoded");
let dir_str = dir_str.to_utf16().append_one(0);
let dir_str: Vec<u16> = dir_str.utf16_units().collect();
let dir_str = dir_str.append_one(0);

cb(dir_str.as_ptr())
},
None => cb(ptr::null())
Expand Down
3 changes: 2 additions & 1 deletion src/librustdoc/flock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ mod imp {

impl Lock {
pub fn new(p: &Path) -> Lock {
let p_16 = p.as_str().unwrap().to_utf16().append_one(0);
let p_16: Vec<u16> = p.as_str().unwrap().utf16_units().collect();
let p_16 = p_16.append_one(0);
let handle = unsafe {
libc::CreateFileW(p_16.as_ptr(),
libc::FILE_GENERIC_READ |
Expand Down
7 changes: 5 additions & 2 deletions src/libstd/dynamic_lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,19 +281,22 @@ pub mod dl {
#[cfg(target_os = "win32")]
pub mod dl {
use c_str::ToCStr;
use iter::Iterator;
use libc;
use os;
use ptr;
use result::{Ok, Err, Result};
use str::StrAllocating;
use str::StrSlice;
use str;
use string::String;
use vec::Vec;

pub unsafe fn open_external<T: ToCStr>(filename: T) -> *mut u8 {
// Windows expects Unicode data
let filename_cstr = filename.to_c_str();
let filename_str = str::from_utf8(filename_cstr.as_bytes_no_nul()).unwrap();
let filename_str = filename_str.to_utf16().append_one(0);
let filename_str: Vec<u16> = filename_str.utf16_units().collect();
let filename_str = filename_str.append_one(0);
LoadLibraryW(filename_str.as_ptr() as *const libc::c_void) as *mut u8
}

Expand Down
14 changes: 9 additions & 5 deletions src/libstd/os.rs
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,8 @@ pub fn getenv(n: &str) -> Option<String> {
unsafe {
with_env_lock(|| {
use os::win32::{fill_utf16_buf_and_decode};
let n = n.to_utf16().append_one(0);
let n: Vec<u16> = n.utf16_units().collect();
let n = n.append_one(0);
fill_utf16_buf_and_decode(|buf, sz| {
libc::GetEnvironmentVariableW(n.as_ptr(), buf, sz)
})
Expand Down Expand Up @@ -411,8 +412,10 @@ pub fn setenv(n: &str, v: &str) {

#[cfg(windows)]
fn _setenv(n: &str, v: &str) {
let n = n.to_utf16().append_one(0);
let v = v.to_utf16().append_one(0);
let n: Vec<u16> = n.utf16_units().collect();
let n = n.append_one(0);
let v: Vec<u16> = v.utf16_units().collect();
let v = v.append_one(0);
unsafe {
with_env_lock(|| {
libc::SetEnvironmentVariableW(n.as_ptr(), v.as_ptr());
Expand All @@ -437,7 +440,8 @@ pub fn unsetenv(n: &str) {

#[cfg(windows)]
fn _unsetenv(n: &str) {
let n = n.to_utf16().append_one(0);
let n: Vec<u16> = n.utf16_units().collect();
let n = n.append_one(0);
unsafe {
with_env_lock(|| {
libc::SetEnvironmentVariableW(n.as_ptr(), ptr::null());
Expand Down Expand Up @@ -804,7 +808,7 @@ pub fn change_dir(p: &Path) -> bool {
#[cfg(windows)]
fn chdir(p: &Path) -> bool {
let p = match p.as_str() {
Some(s) => s.to_utf16().append_one(0),
Some(s) => s.utf16_units().collect::<Vec<u16>>().append_one(0),
None => return false,
};
unsafe {
Expand Down