From 02bd23bbf5ae47d9479660a1ce9e960705897d19 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Wed, 3 Jun 2020 21:41:03 +0000
Subject: [PATCH 01/30] initial import of char_collection

---
 Cargo.toml                                    |   1 +
 components/char_collection/BUILD.gn           |  32 +
 components/char_collection/Cargo.toml         |  14 +
 .../meta/char_collection_lib_test.cmx         |   5 +
 .../char_collection/src/char_collection.rs    | 653 ++++++++++++++++++
 components/char_collection/src/conversions.rs | 156 +++++
 components/char_collection/src/lib.rs         |  12 +
 components/char_collection/src/macros.rs      |  54 ++
 components/char_collection/src/operators.rs   |  62 ++
 9 files changed, 989 insertions(+)
 create mode 100644 components/char_collection/BUILD.gn
 create mode 100644 components/char_collection/Cargo.toml
 create mode 100644 components/char_collection/meta/char_collection_lib_test.cmx
 create mode 100644 components/char_collection/src/char_collection.rs
 create mode 100644 components/char_collection/src/conversions.rs
 create mode 100644 components/char_collection/src/lib.rs
 create mode 100644 components/char_collection/src/macros.rs
 create mode 100644 components/char_collection/src/operators.rs

diff --git a/Cargo.toml b/Cargo.toml
index 17ed9200055..1752ce7e4bd 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,5 +3,6 @@
 members = [
     "components/icu",
     "components/icu4x",
+    "components/char_collection",
     "components/locale",
 ]
diff --git a/components/char_collection/BUILD.gn b/components/char_collection/BUILD.gn
new file mode 100644
index 00000000000..f772f5023e6
--- /dev/null
+++ b/components/char_collection/BUILD.gn
@@ -0,0 +1,32 @@
+# Copyright 2019 The Fuchsia Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+import("//build/rust/rustc_library.gni")
+import("//build/test/test_package.gni")
+import("//build/testing/environments.gni")
+# Library for working with collections of Unicode code points.
+rustc_library("char_collection") {
+  edition = "2018"
+  with_unit_tests = true
+  deps = [
+    "//src/lib/intl/unicode_utils/unicode_blocks",
+    "//third_party/rust_crates:anyhow",
+    "//third_party/rust_crates:paste",
+    "//third_party/rust_crates:thiserror",
+    "//third_party/rust_crates:unic-char-range",
+    "//third_party/rust_crates:unic-ucd-block",
+  ]
+}
+test_package("char_collection_tests") {
+  deps = [ ":char_collection_test" ]
+  tests = [
+    {
+      name = "char_collection_lib_test"
+      environments = basic_envs
+    },
+  ]
+}
+group("tests") {
+  testonly = true
+  public_deps = [ ":char_collection_tests" ]
+}
diff --git a/components/char_collection/Cargo.toml b/components/char_collection/Cargo.toml
new file mode 100644
index 00000000000..8e66cb5a0d5
--- /dev/null
+++ b/components/char_collection/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "icu-char-collection"
+description = "API for managing Unicode Language and Locale Identifiers"
+version = "0.0.1"
+authors = ["The ICU4X Project Developers"]
+edition = "2018"
+readme = "README.md"
+repository = "https://github.com/unicode-org/icu4x"
+license = "MIT/Apache-2.0"
+categories = ["internationalization"]
+include = [
+    "src/**/*",
+    "Cargo.toml",
+]
diff --git a/components/char_collection/meta/char_collection_lib_test.cmx b/components/char_collection/meta/char_collection_lib_test.cmx
new file mode 100644
index 00000000000..3bb56a96aa2
--- /dev/null
+++ b/components/char_collection/meta/char_collection_lib_test.cmx
@@ -0,0 +1,5 @@
+{
+    "program": {
+        "binary": "test/char_collection_lib_test"
+    }
+}
\ No newline at end of file
diff --git a/components/char_collection/src/char_collection.rs b/components/char_collection/src/char_collection.rs
new file mode 100644
index 00000000000..5e97361db63
--- /dev/null
+++ b/components/char_collection/src/char_collection.rs
@@ -0,0 +1,653 @@
+// Copyright 2019 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+use {
+    anyhow::{format_err, Error},
+    std::{
+        clone::Clone,
+        cmp::Ordering,
+        hash::{Hash, Hasher},
+        iter::Iterator,
+        ops::Range,
+        vec::Vec,
+    },
+    unic_char_range::{chars, CharIter, CharRange},
+};
+/// A trait for objects that represent one or more disjoint, non-adjacent
+/// [CharRanges](unic_char_range::CharRange).
+pub trait MultiCharRange {
+    /// Iterate over the disjoint, non-adjacent [CharRange]s in the collection in ascending order.
+    fn iter_ranges<'a>(&'a self) -> Box<dyn Iterator<Item = CharRange> + 'a>;
+    /// The number of ranges in the collection.
+    fn range_count(&self) -> usize;
+}
+/// A collection of `char`s (i.e. Unicode code points), used for storing large continuous ranges
+/// efficiently.
+///
+/// Lookups and insertions are O(log <var>R</var>), where <var>R</var> is the number of disjoint
+/// ranges in the collection.
+///
+/// The easiest way to create instances is using the
+/// [char_collect!](::char_collection::char_collect) macro.
+///
+/// ```
+/// use char_collection::CharCollection;
+///
+/// let mut collection: CharCollection = char_collect!('a'..='d', 'x'..='z');
+/// char_collection += 'e';
+/// char_collection += chars!('p'..='t');
+/// assert_eq!(
+///     collection.iter_ranges().collect(),
+///     vec![chars!('a'..='e'), chars!('p'..='t'), chars!('x'..='z')]);
+///
+/// assert!(collection.contains(&'c'));
+/// assert!(collection.contains_range(chars!('q'..='s')));
+/// assert!(!collection.contains(&'9'));
+///
+/// collection -= chars!('t'..='y');
+/// assert_eq!(
+///     collection.iter_ranges().collect(),
+///     vec![chars!('a'..='e', chars!('p'..'s'), chars!('z'..='z'))]);
+/// ```
+///
+/// TODO(kpozin): Implement IntoIter.
+#[derive(Clone, Debug, Eq, PartialEq, Default)]
+pub struct CharCollection {
+    ranges: Vec<CharRange>,
+}
+impl CharCollection {
+    /// Create a new, empty `CharCollection`.
+    pub fn new() -> CharCollection {
+        CharCollection::default()
+    }
+    /// Create a new `CharCollection` from a list of disjoint, non-adjacent `CharRange`s, pre-sorted
+    /// in ascending code point order.
+    ///
+    /// This factory method is primarily intended for use in deserializing valid representations of
+    /// `CharCollections`. Will return an error if ranges are out of order, overlapping, or
+    /// adjacent.
+    pub fn from_sorted_ranges<T>(ranges: T) -> Result<CharCollection, Error>
+    where
+        T: IntoIterator<Item = CharRange>,
+    {
+        // If the original `ranges` is also a Vec, this doesn't result in an extra copy.
+        let collection = CharCollection { ranges: ranges.into_iter().collect() };
+        let ranges: &Vec<CharRange> = &collection.ranges;
+        match (1..ranges.len()).find(|i| (ranges[*i].low as i64 - ranges[*i - 1].high as i64) <= 1)
+        {
+            Some(i) => Err(format_err!(
+                "These ranges are out of order, overlapping, or adjacent: {}, {}",
+                format_range(&ranges[i - 1]),
+                format_range(&ranges[i])
+            )),
+            None => Ok(collection),
+        }
+    }
+    /// Create a new `CharCollection` from a list of `char`s, pre-sorted in ascending code point
+    /// order.
+    ///
+    /// This factory method is primarily intended for use in deserializing valid representations of
+    /// `CharCollections`. Will return an error if chars are out of order or contain duplicates.
+    pub fn from_sorted_chars<T>(chars: T) -> Result<CharCollection, Error>
+    where
+        T: IntoIterator<Item = char>,
+    {
+        let mut collection = CharCollection::new();
+        for ch in chars.into_iter() {
+            collection.append(ch)?;
+        }
+        Ok(collection)
+    }
+    /// Iterate over all the `char`s in the collection.
+    pub fn iter(&self) -> impl Iterator<Item = char> + '_ {
+        self.ranges.iter().flat_map(CharRange::iter)
+    }
+    /// Test whether the collection contains a specific `char`.
+    ///
+    /// The time complexity is O(log <var>R</var>), where <var>R</var> is the number of ranges in
+    /// the collection.
+    pub fn contains(&self, ch: &char) -> bool {
+        self.find_containing_range(ch).is_ok()
+    }
+    /// Test whether the collection contains an entire range of characters.
+    ///
+    /// The time complexity is O(log <var>R</var>), where <var>R</var> is the number of ranges in
+    /// the collection.
+    pub fn contains_range(&self, range: &CharRange) -> bool {
+        if range.is_empty() {
+            return false;
+        }
+        let lower_existing_range = self.find_containing_range(&range.low);
+        let upper_existing_range = self.find_containing_range(&range.high);
+        // Fully enclosed in existing range.
+        return lower_existing_range == upper_existing_range && lower_existing_range.is_ok();
+    }
+    /// Insert a `char` or other collection of chars into this collection.
+    ///
+    /// Returns `&mut self` for easy chaining.
+    ///
+    /// The time complexity is O(<var>T</var> log(<var>R</var> + <var>T</var>)), where <var>R</var>
+    /// is the number of ranges in this collection and <var>T</var> is the number of ranges in
+    /// `to_add`.
+    pub fn insert<V: MultiCharRange>(&mut self, to_add: &V) -> &mut Self {
+        to_add.iter_ranges().for_each(|range| self.insert_char_range(&range));
+        self
+    }
+    /// Appends a `char` to the end of the existing collection. Panics if the given `char` is not
+    /// higher than the highest code point in the existing collection.
+    ///
+    /// Returns `&mut self` for easy chaining.
+    ///
+    /// The time complexity is O(1).
+    pub fn append(&mut self, ch: char) -> Result<&mut Self, Error> {
+        let mut coalesced = false;
+        if let Some(last_range) = self.ranges.last_mut() {
+            if last_range.cmp_char(ch) != Ordering::Less {
+                return Err(format_err!("Cannot append {} after {}", ch, last_range.high));
+            }
+            if are_chars_adjacent(&last_range.high, &ch) {
+                last_range.high = ch;
+                coalesced = true;
+            }
+        }
+        if !coalesced {
+            self.ranges.push(chars!(ch..=ch));
+        }
+        Ok(self)
+    }
+    /// Appends a `CharRange` to the end of the existing collection. Panics if the given range is
+    /// not higher than the highest code point in the existing collection. (The new range _may_ be
+    /// adjacent to the previous highest range, but may not overlap.)
+    ///
+    /// Returns `&mut self` for easy chaining.
+    ///
+    /// The time complexity is O(1).
+    pub fn append_range(&mut self, range: CharRange) -> Result<&mut Self, Error> {
+        let mut coalesced = false;
+        if let Some(last_range) = self.ranges.last_mut() {
+            if last_range.cmp_char(range.low) != Ordering::Less {
+                return Err(format_err!(
+                    "Cannot append {} after {}",
+                    format_range(&range),
+                    last_range.high
+                ));
+            }
+            if are_chars_adjacent(&last_range.high, &range.low) {
+                last_range.high = range.high;
+                coalesced = true;
+            }
+        }
+        if !coalesced {
+            self.ranges.push(range);
+        }
+        Ok(self)
+    }
+    /// Remove a `char` or other collection of chars from this collection.
+    ///
+    /// Returns `&mut self` for easy chaining.
+    ///
+    /// The time complexity is O(<var>T</var> log(<var>R</var> + <var>T</var>)), where <var>R</var>
+    /// is the number of ranges in this collection and <var>T</var> is the number of ranges in
+    /// `to_remove`.
+    pub fn remove<V: MultiCharRange>(&mut self, to_remove: &V) -> &mut Self {
+        to_remove.iter_ranges().for_each(|range| self.remove_char_range(&range));
+        self
+    }
+    /// Remove all entries from this collection.
+    ///
+    /// Returns `&mut self` for easy chaining.
+    pub fn clear(&mut self) -> &mut Self {
+        self.ranges.clear();
+        self
+    }
+    /// Return the set union of this collection and another one.
+    ///
+    /// The time complexity is O(min(<var>R</var>, <var>T</var>) log(<var>R</var> + <var>T</var>)),
+    /// where <var>R</var> is the number of ranges in this collection and <var>T</var> is the number
+    /// of ranges in `rhs`.
+    pub fn union<V: MultiCharRange>(&self, rhs: &V) -> CharCollection {
+        let mut result: CharCollection;
+        if self.range_count() > rhs.range_count() {
+            result = self.clone();
+            result.insert(rhs);
+        } else {
+            result = rhs.into();
+            result.insert(self);
+        }
+        result
+    }
+    /// Return the set intersection of this collection and another one.
+    ///
+    /// The time complexity is O(min(<var>R</var>, <var>T</var>) log(<var>R</var> + <var>T</var>)),
+    /// where <var>R</var> is the number of ranges in this collection and <var>T</var> is the number
+    /// of ranges in `rhs`.
+    pub fn intersection<V: MultiCharRange>(&self, rhs: &V) -> CharCollection {
+        let mut result: CharCollection;
+        if self.range_count() > rhs.range_count() {
+            result = self.clone();
+            let rhs: CharCollection = rhs.into();
+            result.remove(&rhs.complement());
+        } else {
+            result = rhs.into();
+            result.remove(&self.complement());
+        }
+        result
+    }
+    /// Return the (non-symmetric) set difference of this collection and another one.
+    ///
+    /// The time complexity is O(<var>T</var> log(<var>R</var> + <var>T</var>)), where <var>R</var>
+    /// is the number of ranges in this collection and <var>T</var> is the number of ranges in
+    /// `rhs`.
+    pub fn difference<V: MultiCharRange>(&self, rhs: &V) -> CharCollection {
+        let mut result: CharCollection = self.clone();
+        result.remove(rhs);
+        result
+    }
+    /// Return the set complement of this collection (over the universe of `char`s).
+    ///
+    /// The time complexity is O(<var>R</var>), where <var>R</var> is the number of ranges in this
+    /// collection.
+    pub fn complement(&self) -> CharCollection {
+        if self.ranges.is_empty() {
+            return CharCollection::from(&CharRange::all());
+        }
+        let mut result_ranges: Vec<CharRange> = Vec::new();
+        if self.ranges[0].low != '\u{0}' {
+            result_ranges.push(CharRange::open_right('\u{0}', self.ranges[0].low));
+        }
+        let mut prev_high = self.ranges[0].high;
+        for range in &self.ranges[1..] {
+            result_ranges.push(CharRange::open(prev_high, range.low));
+            prev_high = range.high;
+        }
+        if prev_high != std::char::MAX {
+            result_ranges.push(CharRange::open_left(prev_high, std::char::MAX));
+        }
+        CharCollection { ranges: result_ranges }
+    }
+    /// Insert a single `CharRange`.
+    ///
+    /// Depending on how the new range relates to existing ranges in
+    /// the collection, it might be subsumed by an existing range, modify the endpoints of an
+    /// existing range, or replace one or more existing ranges.
+    fn insert_char_range(&mut self, new_range: &CharRange) {
+        if new_range.is_empty() {
+            return;
+        }
+        let lower_existing_range = self.find_containing_range(&new_range.low);
+        let upper_existing_range = self.find_containing_range(&new_range.high);
+        // Fully enclosed in existing range.
+        if lower_existing_range == upper_existing_range && lower_existing_range.is_ok() {
+            return;
+        }
+        let new_low: char;
+        let new_high: char;
+        let remove_from_idx: usize;
+        let remove_to_idx: usize;
+        match lower_existing_range {
+            Ok((idx, lower_existing_range)) => {
+                new_low = lower_existing_range.low;
+                remove_from_idx = idx;
+            }
+            Err(idx) => {
+                new_low = new_range.low;
+                remove_from_idx = idx;
+            }
+        }
+        match upper_existing_range {
+            Ok((idx, higher_existing_range)) => {
+                new_high = higher_existing_range.high;
+                remove_to_idx = idx + 1;
+            }
+            Err(idx) => {
+                new_high = new_range.high;
+                remove_to_idx = idx;
+            }
+        }
+        self.replace_ranges(chars!(new_low..=new_high), remove_from_idx..remove_to_idx);
+    }
+    /// Remove a single `CharRange`.
+    ///
+    /// Depending on how the removed range relates to existing ranges in the collection, it might
+    /// remove or modify the endpoints of existing ranges.
+    fn remove_char_range(&mut self, range_to_remove: &CharRange) {
+        if range_to_remove.is_empty() {
+            return;
+        }
+        let lower_existing_range = self.find_containing_range(&range_to_remove.low);
+        let upper_existing_range = self.find_containing_range(&range_to_remove.high);
+        let mut replacement_ranges: Vec<CharRange> = Vec::new();
+        let remove_from_idx: usize;
+        let remove_to_idx: usize;
+        match lower_existing_range {
+            Ok((idx, lower_existing_range)) => {
+                if lower_existing_range.low < range_to_remove.low {
+                    replacement_ranges
+                        .push(CharRange::open_right(lower_existing_range.low, range_to_remove.low));
+                }
+                remove_from_idx = idx;
+            }
+            Err(idx) => remove_from_idx = idx,
+        }
+        match upper_existing_range {
+            Ok((idx, higher_existing_range)) => {
+                if range_to_remove.high < higher_existing_range.high {
+                    replacement_ranges.push(CharRange::open_left(
+                        range_to_remove.high,
+                        higher_existing_range.high,
+                    ));
+                }
+                remove_to_idx = idx + 1;
+            }
+            Err(idx) => {
+                remove_to_idx = idx;
+            }
+        }
+        self.ranges.splice(remove_from_idx..remove_to_idx, replacement_ranges);
+    }
+    /// Delete all the existing `CharRange`s that fall within `indices_to_replace` in the vector,
+    /// and insert `char_range_to_insert` in their place. If the newly formed range is adjacent to
+    /// a kept range on its left or right, coalesce them.
+    fn replace_ranges(
+        &mut self,
+        mut char_range_to_insert: CharRange,
+        mut indices_to_replace: Range<usize>,
+    ) {
+        // If the newly formed range is adjacent to the range on its left, coalesce the two.
+        if indices_to_replace.start > 0 {
+            let prev_char_range = self.ranges[indices_to_replace.start - 1];
+            if are_chars_adjacent(&prev_char_range.high, &char_range_to_insert.low) {
+                char_range_to_insert.low = prev_char_range.low;
+                indices_to_replace.start -= 1;
+            }
+        }
+        // If the newly formed range is adjacent to the range on its right, coalesce the two.
+        if indices_to_replace.end < self.ranges.len() {
+            let next_char_range = self.ranges[indices_to_replace.end];
+            if are_chars_adjacent(&char_range_to_insert.high, &next_char_range.low) {
+                char_range_to_insert.high = next_char_range.high;
+                indices_to_replace.end += 1;
+            }
+        }
+        self.ranges.splice(indices_to_replace, vec![char_range_to_insert]);
+    }
+    fn find_containing_range(&self, query: &char) -> Result<(usize, CharRange), usize> {
+        let result = self.ranges.binary_search_by(|range| range.cmp_char(query.clone()));
+        match result {
+            Ok(index) => Ok((index, self.ranges[index])),
+            Err(index) => Err(index),
+        }
+    }
+}
+impl MultiCharRange for CharCollection {
+    fn iter_ranges<'a>(&'a self) -> Box<dyn Iterator<Item = CharRange> + 'a> {
+        Box::new(self.ranges.iter().map(|range| range.clone()))
+    }
+    fn range_count(&self) -> usize {
+        self.ranges.len()
+    }
+}
+impl Hash for CharCollection {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.ranges.iter().for_each(|range| hash_char_range(range, state));
+    }
+}
+fn hash_char_range<H: Hasher>(range: &CharRange, state: &mut H) {
+    range.low.hash(state);
+    range.high.hash(state);
+}
+fn are_chars_adjacent(left: &char, right: &char) -> bool {
+    let mut iter: CharIter = CharRange::open_right(left.clone(), right.clone()).iter();
+    match iter.next_back() {
+        None => false,
+        Some(next_right) => left == &next_right,
+    }
+}
+fn format_range(range: &CharRange) -> String {
+    format!("{}..={}", range.low, range.high)
+}
+#[cfg(test)]
+mod tests {
+    use {
+        super::{are_chars_adjacent, CharCollection},
+        anyhow::Error,
+        std::char,
+        unic_char_range::{chars, CharRange},
+    };
+    #[test]
+    fn test_from_sorted_ranges() -> Result<(), Error> {
+        let expected = char_collect!('a'..='d', 'g'..='l', 'z');
+        let actual = CharCollection::from_sorted_ranges(vec![
+            chars!('a'..='d'),
+            chars!('g'..='l'),
+            chars!('z'..='z'),
+        ])?;
+        assert_eq!(actual, expected);
+        Ok(())
+    }
+    #[test]
+    fn test_from_sorted_ranges_out_of_order() {
+        assert!(CharCollection::from_sorted_ranges(vec![
+            chars!('g'..='l'),
+            chars!('a'..='d'),
+            chars!('z'..='z'),
+        ])
+        .is_err());
+    }
+    #[test]
+    fn test_from_sorted_ranges_overlap() {
+        assert!(CharCollection::from_sorted_ranges(vec![
+            chars!('a'..='d'),
+            chars!('c'..='l'),
+            chars!('z'..='z'),
+        ])
+        .is_err());
+    }
+    #[test]
+    fn test_from_sorted_ranges_adjacent() {
+        assert!(
+            CharCollection::from_sorted_ranges(vec![chars!('a'..='d'), chars!('e'..='g')]).is_err()
+        );
+    }
+    #[test]
+    fn test_from_sorted_chars() -> Result<(), Error> {
+        let chars = vec!['a', 'b', 'c', 'd', 'g', 'h', 'i', 'j', 'k', 'l', 'z'];
+        let expected = char_collect!('a'..='d', 'g'..='l', 'z');
+        let actual = CharCollection::from_sorted_chars(chars)?;
+        assert_eq!(actual, expected);
+        Ok(())
+    }
+    #[test]
+    fn test_from_sorted_chars_out_of_order() {
+        let chars = vec!['a', 'b', 'c', 'd', 'g', 'h', 'i', 'j', 'k', 'l', 'e'];
+        assert!(CharCollection::from_sorted_chars(chars).is_err());
+    }
+    #[test]
+    fn test_find_containing_range() {
+        let collection = char_collect!({ ('a'..='d') + ('g'..='j') + ('l'..='o') + 'z' });
+        assert_eq!(collection.find_containing_range(&'0'), Err(0));
+        assert_eq!(collection.find_containing_range(&'c'), Ok((0, chars!('a'..='d'))));
+        assert_eq!(collection.find_containing_range(&'e'), Err(1));
+    }
+    #[test]
+    fn test_insert_initial() {
+        let collection = char_collect!('a'..='d');
+        assert_eq!(collection.ranges, vec![chars!('a'..='d')])
+    }
+    #[test]
+    fn test_insert_exact_match() {
+        let mut collection = char_collect!('a'..='d', 'g'..='l');
+        collection += 'a'..='d';
+        assert_eq!(collection.ranges, vec![chars!('a'..='d'), chars!('g'..='l')]);
+    }
+    #[test]
+    fn test_insert_non_overlapping_sorted() {
+        let collection = char_collect!('a'..='d', 'g'..='j', 'l'..='o');
+        assert_eq!(
+            collection.ranges,
+            vec![chars!('a'..='d'), chars!('g'..='j'), chars!('l'..='o')]
+        );
+    }
+    #[test]
+    fn test_insert_non_overlapping_unsorted() {
+        let collection = char_collect!('l'..='o', 'a'..='d', 'l'..='o', 'a'..='d', 'g'..='j');
+        assert_eq!(
+            collection.ranges,
+            vec![chars!('a'..='d'), chars!('g'..='j'), chars!('l'..='o')]
+        );
+    }
+    #[test]
+    fn test_insert_overlapping_all_existent() {
+        let mut collection = char_collect!('l'..='o', 'a'..='d');
+        collection += 'a'..='o';
+        assert_eq!(collection.ranges, vec![chars!('a'..='o')]);
+    }
+    #[test]
+    fn test_insert_overlapping_some_existent() {
+        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
+        collection += 'i'..='n';
+        assert_eq!(
+            collection.ranges,
+            vec![chars!('c'..='e'), chars!('i'..='n'), chars!('p'..='s')]
+        );
+    }
+    #[test]
+    fn test_insert_overlapping_with_intersections() {
+        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
+        collection += 'd'..='k';
+        assert_eq!(collection.ranges, vec![chars!('c'..='m'), chars!('p'..='s')]);
+    }
+    #[test]
+    fn test_insert_coalesce_adjacent_ranges() {
+        let mut collection = char_collect!('a'..='c', 'j'..='m');
+        collection += 'd'..='i';
+        assert_eq!(collection.ranges, vec![chars!('a'..='m')]);
+    }
+    #[test]
+    fn test_append() -> Result<(), Error> {
+        let mut collection = char_collect!('a'..='c');
+        collection.append('d')?.append('g')?.append('h')?.append('i')?.append('z')?;
+        assert_eq!(collection, char_collect!('a'..='d', 'g'..='i', 'z'));
+        Ok(())
+    }
+    #[test]
+    fn test_append_out_of_order() -> Result<(), Error> {
+        let mut collection = char_collect!('a'..='c');
+        assert!(collection
+            .append('d')?
+            .append('g')?
+            .append('h')?
+            .append('i')?
+            .append('e')
+            .is_err());
+        Ok(())
+    }
+    #[test]
+    fn test_append_range() -> Result<(), Error> {
+        let mut collection = char_collect!('a'..='c');
+        collection.append_range(chars!('g'..='i'))?.append_range(chars!('j'..='m'))?;
+        assert_eq!(collection, char_collect!('a'..='c', 'g'..='m'));
+        Ok(())
+    }
+    #[test]
+    fn test_append_range_out_of_order() -> Result<(), Error> {
+        let mut collection = char_collect!('a'..='c');
+        assert!(collection
+            .append_range(chars!('g'..='i'))?
+            .append_range(chars!('j'..='m'))?
+            .append_range(chars!('k'..='m'))
+            .is_err());
+        Ok(())
+    }
+    #[test]
+    fn test_remove_exact_range() {
+        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
+        collection -= 'j'..='m';
+        assert_eq!(collection.ranges, vec![chars!('c'..='e'), chars!['p'..='s']]);
+    }
+    #[test]
+    fn test_remove_overlapping_all_existent() {
+        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
+        collection -= 'c'..='s';
+        assert_eq!(collection.ranges, vec![]);
+    }
+    #[test]
+    fn test_remove_overlapping_all_existent_superset() {
+        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
+        collection -= 'a'..='z';
+        assert_eq!(collection.ranges, vec![]);
+    }
+    #[test]
+    fn test_remove_one_subrange() {
+        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
+        collection -= 'k'..='l';
+        assert_eq!(
+            collection.ranges,
+            vec![chars!('c'..='e'), chars!('j'..='j'), chars!('m'..='m'), chars!('p'..='s')]
+        );
+    }
+    #[test]
+    fn test_remove_intersection() {
+        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
+        collection -= 'd'..='q';
+        assert_eq!(collection.ranges, vec![chars!('c'..='c'), chars!('r'..='s')]);
+    }
+    #[test]
+    fn test_complement_simple() {
+        let collection = char_collect!(0x10..=0x50, 0x70..=0x70, 0x99..=0x640);
+        assert_eq!(
+            collection.complement(),
+            char_collect!(0x00..=0x0F, 0x51..=0x6F, 0x71..=0x98, 0x641..=(char::MAX as u32))
+        );
+    }
+    #[test]
+    fn test_complement_all() {
+        let collection = char_collect!(CharRange::all());
+        assert_eq!(collection.complement(), char_collect!());
+    }
+    #[test]
+    fn test_complement_none() {
+        let collection = char_collect!();
+        assert_eq!(collection.complement(), char_collect!(CharRange::all()));
+    }
+    #[test]
+    fn test_complement_includes_min_and_max() {
+        let collection = char_collect!(0x0..=0x10, 0x40..=0x50, 0xCCCC..=(char::MAX as u32));
+        assert_eq!(collection.complement(), char_collect!(0x11..=0x3F, 0x51..=0xCCCB));
+    }
+    #[test]
+    fn test_union() {
+        let collection_a = char_collect!('a'..='g', 'm'..='z', 'B'..='R');
+        let collection_b = char_collect!('e'..='q', 'W'..='Y');
+        let expected = char_collect!('a'..='z', 'B'..='R', 'W'..='Y');
+        assert_eq!(collection_a.union(&collection_b), expected);
+        assert_eq!(collection_b.union(&collection_a), expected);
+    }
+    #[test]
+    fn test_intersection() {
+        let collection_a = char_collect!('a'..='g', 'm'..='z');
+        let collection_b = char_collect!('e'..='q');
+        let expected = char_collect!('e'..='g', 'm'..='q');
+        assert_eq!(collection_a.intersection(&collection_b), expected);
+        assert_eq!(collection_b.intersection(&collection_a), expected);
+    }
+    #[test]
+    fn test_macro_expressions() {
+        use unicode_blocks::UnicodeBlockId::Arabic;
+        let collection =
+            char_collect!({ ('c'..='e') + ('f'..='h') - ('a'..='d') + Arabic + (0x5..=0x42) });
+        assert_eq!(collection, char_collect!(0x5..=0x42, 'e'..='h', Arabic));
+    }
+    #[test]
+    fn test_iter() {
+        let collection = char_collect!('a'..='c', 'j'..='l', 'x'..='z');
+        let v = collection.iter().collect::<Vec<char>>();
+        assert_eq!(v, vec!['a', 'b', 'c', 'j', 'k', 'l', 'x', 'y', 'z']);
+    }
+    #[test]
+    fn test_are_chars_adjacent() {
+        assert!(are_chars_adjacent(&'a', &'b'));
+        assert!(!are_chars_adjacent(&'b', &'a'));
+        assert!(!are_chars_adjacent(&'a', &'c'));
+    }
+}
\ No newline at end of file
diff --git a/components/char_collection/src/conversions.rs b/components/char_collection/src/conversions.rs
new file mode 100644
index 00000000000..a9a58b6e7ff
--- /dev/null
+++ b/components/char_collection/src/conversions.rs
@@ -0,0 +1,156 @@
+// Copyright 2019 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//! Conversion (`From`) implementations for [CharCollection], via [MultiCharRange].
+use std::boxed::Box;
+use std::convert::TryFrom;
+use std::iter;
+use std::ops::RangeInclusive;
+use unic_char_range::CharRange;
+use unic_ucd_block::Block;
+use unicode_blocks::UnicodeBlockId;
+use crate::{CharCollection, MultiCharRange};
+macro_rules! impl_for_range_inclusive_int_type {
+    ($($t:ty),*) => {$(
+        impl MultiCharRange for RangeInclusive<$t> {
+            fn iter_ranges(&self) -> Box<dyn Iterator<Item=CharRange>> {
+                Box::new(iter::once(to_char_range!(self)))
+            }
+            fn range_count(&self) -> usize {
+                1
+            }
+    })*}
+}
+// This macro is needed because there is no way to express "can be cast as u32" using traits.
+macro_rules! to_char_range {
+    ($range:expr) => {
+        CharRange::closed(
+            char::try_from(*$range.start() as u32).unwrap(),
+            char::try_from(*$range.end() as u32).unwrap(),
+        )
+    };
+}
+impl MultiCharRange for char {
+    fn iter_ranges(&self) -> Box<dyn Iterator<Item = CharRange>> {
+        Box::new(std::iter::once(CharRange::closed(*self, *self)))
+    }
+    fn range_count(&self) -> usize {
+        1
+    }
+}
+impl MultiCharRange for CharRange {
+    fn iter_ranges(&self) -> Box<dyn Iterator<Item = CharRange>> {
+        Box::new(iter::once(self.clone()))
+    }
+    fn range_count(&self) -> usize {
+        1
+    }
+}
+impl MultiCharRange for RangeInclusive<char> {
+    fn iter_ranges(&self) -> Box<dyn Iterator<Item = CharRange>> {
+        Box::new(iter::once(CharRange::closed(*self.start(), *self.end())))
+    }
+    fn range_count(&self) -> usize {
+        1
+    }
+}
+impl_for_range_inclusive_int_type!(u8, i8, u32, i32);
+impl MultiCharRange for UnicodeBlockId {
+    fn iter_ranges(&self) -> Box<dyn Iterator<Item = CharRange>> {
+        self.block().iter_ranges()
+    }
+    fn range_count(&self) -> usize {
+        1
+    }
+}
+impl MultiCharRange for Block {
+    fn iter_ranges<'a>(&'a self) -> Box<dyn Iterator<Item = CharRange> + 'a> {
+        Box::new(self.range.iter_ranges())
+    }
+    fn range_count(&self) -> usize {
+        1
+    }
+}
+impl<T: MultiCharRange> From<&T> for CharCollection {
+    fn from(source: &T) -> Self {
+        let mut collection = CharCollection::new();
+        collection.insert(source);
+        collection
+    }
+}
+#[cfg(test)]
+mod multi_char_range_tests {
+    use crate::MultiCharRange;
+    use paste;
+    use unic_char_range::{chars, CharRange};
+    #[test]
+    fn test_char() {
+        let source = 'a';
+        assert_eq!(source.iter_ranges().collect::<Vec<CharRange>>(), vec![chars!('a'..='a')]);
+        assert_eq!(source.range_count(), 1);
+    }
+    #[test]
+    fn test_char_range() {
+        let source = chars!('d'..='g');
+        assert_eq!(source.iter_ranges().collect::<Vec<CharRange>>(), vec![chars!('d'..='g')]);
+        assert_eq!(source.range_count(), 1);
+    }
+    #[test]
+    fn test_range_inclusive_char() {
+        let source = 'd'..='g';
+        assert_eq!(source.iter_ranges().collect::<Vec<CharRange>>(), vec![chars!('d'..='g')]);
+        assert_eq!(source.range_count(), 1);
+    }
+    macro_rules! test_range_inclusive_int {
+        ($t:ty) => {
+            paste::item! {
+                #[test]
+                fn [<test_char_range_inclusive_ $t>]() {
+                    let source: std::ops::RangeInclusive<$t> = 0x0..=0x9;
+                        assert_eq!(
+                            source.iter_ranges().collect::<Vec<CharRange>>(),
+                            vec![chars!('\u{0}'..='\u{9}')]
+                    );
+                    assert_eq!(source.range_count(), 1);
+                }
+            }
+        };
+    }
+    test_range_inclusive_int!(u8);
+    test_range_inclusive_int!(i8);
+    test_range_inclusive_int!(u32);
+    test_range_inclusive_int!(i32);
+    #[test]
+    fn test_unicode_block_id() {
+        let source = unicode_blocks::UnicodeBlockId::BasicLatin;
+        assert_eq!(
+            source.iter_ranges().collect::<Vec<CharRange>>(),
+            vec![chars!('\u{0000}'..='\u{007f}')]
+        );
+        assert_eq!(source.range_count(), 1);
+    }
+    #[test]
+    fn test_unicode_block() {
+        let source = unicode_blocks::UnicodeBlockId::BasicLatin.block();
+        assert_eq!(
+            source.iter_ranges().collect::<Vec<CharRange>>(),
+            vec![chars!('\u{0000}'..='\u{007f}')]
+        );
+        assert_eq!(source.range_count(), 1);
+    }
+}
+#[cfg(test)]
+mod from_tests {
+    use crate::CharCollection;
+    use unicode_blocks::UnicodeBlockId;
+    #[test]
+    fn test_char() {
+        let actual: CharCollection = (&'a').into();
+        assert_eq!(actual, char_collect!('a'..='a'));
+    }
+    #[test]
+    fn test_unicode_block_id() {
+        let actual: CharCollection = (&UnicodeBlockId::BasicLatin).into();
+        assert_eq!(actual, char_collect!('\u{0000}'..='\u{007f}'));
+    }
+}
\ No newline at end of file
diff --git a/components/char_collection/src/lib.rs b/components/char_collection/src/lib.rs
new file mode 100644
index 00000000000..ee5e0de5205
--- /dev/null
+++ b/components/char_collection/src/lib.rs
@@ -0,0 +1,12 @@
+// Copyright 2019 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+#[macro_use]
+mod macros;
+mod char_collection;
+mod conversions;
+mod operators;
+pub use char_collection::CharCollection;
+pub use char_collection::MultiCharRange;
+pub use conversions::*;
+pub use operators::*;
\ No newline at end of file
diff --git a/components/char_collection/src/macros.rs b/components/char_collection/src/macros.rs
new file mode 100644
index 00000000000..83a6a1b45fc
--- /dev/null
+++ b/components/char_collection/src/macros.rs
@@ -0,0 +1,54 @@
+// Copyright 2019 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+/// Generate a [CharCollection] from a sequence of `char`s,
+/// [CharRanges](unic_char_range::CharRange), or Unicode [Blocks](unic_ucd_block::Block).
+///
+/// The macro can be used with either a comma-separated list of items, or with an expression
+/// representing set operations.
+///
+/// ```
+/// use char_collection::char_collect;
+/// use unicode_blocks::UnicodeBlockId;
+/// use unic_char_range::CharRange;
+///
+/// let c1 = char_collect!(
+///     'a'..='z',
+///     CharRange::closed('D', 'G'),
+///     UnicodeBlockId::Cyrillic,
+///     0x01..=0x05,
+///     '@');
+///
+/// let c2 = char_collect!({ ('a'..='z') - ('p'..='t') + UnicodeBlockId::Bengali });
+/// ```
+///
+/// *NOTE:* Parenthetical expressions currently aren't supported unless they start with a
+/// `CharCollection`.
+/// ```
+/// use char_collection::char_collect;
+///
+/// // This works:
+/// let c1 = char_collect!({ ('a'..='z') + (char_collect!('A'..='Z') - ('L'..='P')) });
+///
+/// // This doesn't:
+/// let c1 = char_collect!({ ('a'..='z') + (('A'..='Z') - ('L'..='P')) });
+/// ```
+#[macro_export]
+macro_rules! char_collect {
+    ({ $($x:tt)+ }) => {
+        {
+            $crate::CharCollection::new() + $($x)*
+        }
+    };
+    ( $( $x:expr ),* ) => {
+        {
+            // Allow unused mut in case the collection is empty.
+            #[allow(unused_mut)]
+            let mut col = $crate::CharCollection::new();
+            $(
+                col.insert(& $x);
+            )*
+            col
+        }
+    };
+}
\ No newline at end of file
diff --git a/components/char_collection/src/operators.rs b/components/char_collection/src/operators.rs
new file mode 100644
index 00000000000..1beb4656073
--- /dev/null
+++ b/components/char_collection/src/operators.rs
@@ -0,0 +1,62 @@
+// Copyright 2019 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//! Implementations of standard operators for [CharCollection].
+//!
+//! `+` and `|` are equivalent. `+` is easier to use with `-`, as they have the same operator
+//! precedence.
+use crate::{CharCollection, MultiCharRange};
+use std::convert::Into;
+use std::ops;
+impl<V: MultiCharRange> ops::BitOr<V> for CharCollection {
+    type Output = CharCollection;
+    fn bitor(self, rhs: V) -> Self::Output {
+        let result: CharCollection = self.into();
+        result.union(&rhs)
+    }
+}
+impl<V: MultiCharRange> ops::Add<V> for CharCollection {
+    type Output = CharCollection;
+    fn add(self, rhs: V) -> Self::Output {
+        let result: CharCollection = self.into();
+        result.union(&rhs)
+    }
+}
+impl<V: MultiCharRange> ops::BitOrAssign<V> for CharCollection {
+    fn bitor_assign(&mut self, rhs: V) {
+        self.insert(&rhs);
+    }
+}
+impl<V: MultiCharRange> ops::AddAssign<V> for CharCollection {
+    fn add_assign(&mut self, rhs: V) {
+        self.insert(&rhs);
+    }
+}
+impl<V: MultiCharRange> ops::Sub<V> for CharCollection {
+    type Output = CharCollection;
+    fn sub(self, rhs: V) -> Self::Output {
+        self.difference(&rhs)
+    }
+}
+impl<V: MultiCharRange> ops::SubAssign<V> for CharCollection {
+    fn sub_assign(&mut self, rhs: V) {
+        self.remove(&rhs);
+    }
+}
+impl<V: MultiCharRange> ops::BitAnd<V> for CharCollection {
+    type Output = CharCollection;
+    fn bitand(self, rhs: V) -> Self::Output {
+        self.intersection(&rhs)
+    }
+}
+impl<V: MultiCharRange> ops::BitAndAssign<V> for CharCollection {
+    fn bitand_assign(&mut self, rhs: V) {
+        *self = self.intersection(&rhs);
+    }
+}
+impl ops::Not for CharCollection {
+    type Output = CharCollection;
+    fn not(self) -> Self::Output {
+        self.complement()
+    }
+}
\ No newline at end of file

From c66b7707e4f5f4668f5f810eb9e270ae9aef9fa3 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Wed, 3 Jun 2020 22:49:11 +0000
Subject: [PATCH 02/30] anyhow::Error dependency removed and std::error::Error
 added

---
 .../char_collection/src/char_collection.rs    | 38 +++++++++----------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/components/char_collection/src/char_collection.rs b/components/char_collection/src/char_collection.rs
index 5e97361db63..b31def4f444 100644
--- a/components/char_collection/src/char_collection.rs
+++ b/components/char_collection/src/char_collection.rs
@@ -2,7 +2,6 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 use {
-    anyhow::{format_err, Error},
     std::{
         clone::Clone,
         cmp::Ordering,
@@ -10,6 +9,7 @@ use {
         iter::Iterator,
         ops::Range,
         vec::Vec,
+        error::Error,
     },
     unic_char_range::{chars, CharIter, CharRange},
 };
@@ -66,7 +66,7 @@ impl CharCollection {
     /// This factory method is primarily intended for use in deserializing valid representations of
     /// `CharCollections`. Will return an error if ranges are out of order, overlapping, or
     /// adjacent.
-    pub fn from_sorted_ranges<T>(ranges: T) -> Result<CharCollection, Error>
+    pub fn from_sorted_ranges<T>(ranges: T) -> Result<CharCollection, Box<dyn Error>>
     where
         T: IntoIterator<Item = CharRange>,
     {
@@ -75,11 +75,11 @@ impl CharCollection {
         let ranges: &Vec<CharRange> = &collection.ranges;
         match (1..ranges.len()).find(|i| (ranges[*i].low as i64 - ranges[*i - 1].high as i64) <= 1)
         {
-            Some(i) => Err(format_err!(
-                "These ranges are out of order, overlapping, or adjacent: {}, {}",
+            Some(i) => Err(format!(
+                "These ranges are out of order, overlapping, or adjacent: {:?}, {:?}",
                 format_range(&ranges[i - 1]),
                 format_range(&ranges[i])
-            )),
+            ).into()),
             None => Ok(collection),
         }
     }
@@ -88,7 +88,7 @@ impl CharCollection {
     ///
     /// This factory method is primarily intended for use in deserializing valid representations of
     /// `CharCollections`. Will return an error if chars are out of order or contain duplicates.
-    pub fn from_sorted_chars<T>(chars: T) -> Result<CharCollection, Error>
+    pub fn from_sorted_chars<T>(chars: T) -> Result<CharCollection, Box<dyn Error>>
     where
         T: IntoIterator<Item = char>,
     {
@@ -139,11 +139,11 @@ impl CharCollection {
     /// Returns `&mut self` for easy chaining.
     ///
     /// The time complexity is O(1).
-    pub fn append(&mut self, ch: char) -> Result<&mut Self, Error> {
+    pub fn append(&mut self, ch: char) -> Result<&mut Self, Box<dyn Error>> {
         let mut coalesced = false;
         if let Some(last_range) = self.ranges.last_mut() {
             if last_range.cmp_char(ch) != Ordering::Less {
-                return Err(format_err!("Cannot append {} after {}", ch, last_range.high));
+                return Err(format!("Cannot append {:?} after {:?}", ch, last_range.high).into());
             }
             if are_chars_adjacent(&last_range.high, &ch) {
                 last_range.high = ch;
@@ -162,15 +162,15 @@ impl CharCollection {
     /// Returns `&mut self` for easy chaining.
     ///
     /// The time complexity is O(1).
-    pub fn append_range(&mut self, range: CharRange) -> Result<&mut Self, Error> {
+    pub fn append_range(&mut self, range: CharRange) -> Result<&mut Self, Box<dyn Error>> {
         let mut coalesced = false;
         if let Some(last_range) = self.ranges.last_mut() {
             if last_range.cmp_char(range.low) != Ordering::Less {
-                return Err(format_err!(
-                    "Cannot append {} after {}",
+                return Err(format!(
+                    "Cannot append {:?} after {:?}",
                     format_range(&range),
                     last_range.high
-                ));
+                ).into());
             }
             if are_chars_adjacent(&last_range.high, &range.low) {
                 last_range.high = range.high;
@@ -410,12 +410,12 @@ fn format_range(range: &CharRange) -> String {
 mod tests {
     use {
         super::{are_chars_adjacent, CharCollection},
-        anyhow::Error,
+        std::error::Error,
         std::char,
         unic_char_range::{chars, CharRange},
     };
     #[test]
-    fn test_from_sorted_ranges() -> Result<(), Error> {
+    fn test_from_sorted_ranges() -> Result<(), Box<dyn Error>> {
         let expected = char_collect!('a'..='d', 'g'..='l', 'z');
         let actual = CharCollection::from_sorted_ranges(vec![
             chars!('a'..='d'),
@@ -450,7 +450,7 @@ mod tests {
         );
     }
     #[test]
-    fn test_from_sorted_chars() -> Result<(), Error> {
+    fn test_from_sorted_chars() -> Result<(), Box<dyn Error>> {
         let chars = vec!['a', 'b', 'c', 'd', 'g', 'h', 'i', 'j', 'k', 'l', 'z'];
         let expected = char_collect!('a'..='d', 'g'..='l', 'z');
         let actual = CharCollection::from_sorted_chars(chars)?;
@@ -524,14 +524,14 @@ mod tests {
         assert_eq!(collection.ranges, vec![chars!('a'..='m')]);
     }
     #[test]
-    fn test_append() -> Result<(), Error> {
+    fn test_append() -> Result<(), Box<dyn Error>> {
         let mut collection = char_collect!('a'..='c');
         collection.append('d')?.append('g')?.append('h')?.append('i')?.append('z')?;
         assert_eq!(collection, char_collect!('a'..='d', 'g'..='i', 'z'));
         Ok(())
     }
     #[test]
-    fn test_append_out_of_order() -> Result<(), Error> {
+    fn test_append_out_of_order() -> Result<(), Box<dyn Error>> {
         let mut collection = char_collect!('a'..='c');
         assert!(collection
             .append('d')?
@@ -543,14 +543,14 @@ mod tests {
         Ok(())
     }
     #[test]
-    fn test_append_range() -> Result<(), Error> {
+    fn test_append_range() -> Result<(), Box<dyn Error>> {
         let mut collection = char_collect!('a'..='c');
         collection.append_range(chars!('g'..='i'))?.append_range(chars!('j'..='m'))?;
         assert_eq!(collection, char_collect!('a'..='c', 'g'..='m'));
         Ok(())
     }
     #[test]
-    fn test_append_range_out_of_order() -> Result<(), Error> {
+    fn test_append_range_out_of_order() -> Result<(), Box<dyn Error>> {
         let mut collection = char_collect!('a'..='c');
         assert!(collection
             .append_range(chars!('g'..='i'))?

From e4b8f56ad7e69c4ed2171d71b2fab8254679eeeb Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Wed, 3 Jun 2020 22:55:26 +0000
Subject: [PATCH 03/30] std imports made consistent and unic-ucd-block
 dependency removed

---
 .../char_collection/src/char_collection.rs    |  2 +
 components/char_collection/src/conversions.rs | 84 +++++++++----------
 2 files changed, 44 insertions(+), 42 deletions(-)

diff --git a/components/char_collection/src/char_collection.rs b/components/char_collection/src/char_collection.rs
index b31def4f444..1094c4964c6 100644
--- a/components/char_collection/src/char_collection.rs
+++ b/components/char_collection/src/char_collection.rs
@@ -10,6 +10,8 @@ use {
         ops::Range,
         vec::Vec,
         error::Error,
+        convert::Into,
+        boxed::Box
     },
     unic_char_range::{chars, CharIter, CharRange},
 };
diff --git a/components/char_collection/src/conversions.rs b/components/char_collection/src/conversions.rs
index a9a58b6e7ff..f2c4d18ffa7 100644
--- a/components/char_collection/src/conversions.rs
+++ b/components/char_collection/src/conversions.rs
@@ -7,8 +7,8 @@ use std::convert::TryFrom;
 use std::iter;
 use std::ops::RangeInclusive;
 use unic_char_range::CharRange;
-use unic_ucd_block::Block;
-use unicode_blocks::UnicodeBlockId;
+// use unic_ucd_block::Block;
+// use unicode_blocks::UnicodeBlockId;
 use crate::{CharCollection, MultiCharRange};
 macro_rules! impl_for_range_inclusive_int_type {
     ($($t:ty),*) => {$(
@@ -55,22 +55,22 @@ impl MultiCharRange for RangeInclusive<char> {
     }
 }
 impl_for_range_inclusive_int_type!(u8, i8, u32, i32);
-impl MultiCharRange for UnicodeBlockId {
-    fn iter_ranges(&self) -> Box<dyn Iterator<Item = CharRange>> {
-        self.block().iter_ranges()
-    }
-    fn range_count(&self) -> usize {
-        1
-    }
-}
-impl MultiCharRange for Block {
-    fn iter_ranges<'a>(&'a self) -> Box<dyn Iterator<Item = CharRange> + 'a> {
-        Box::new(self.range.iter_ranges())
-    }
-    fn range_count(&self) -> usize {
-        1
-    }
-}
+// impl MultiCharRange for UnicodeBlockId {
+//     fn iter_ranges(&self) -> Box<dyn Iterator<Item = CharRange>> {
+//         self.block().iter_ranges()
+//     }
+//     fn range_count(&self) -> usize {
+//         1
+//     }
+// }
+// impl MultiCharRange for Block {
+//     fn iter_ranges<'a>(&'a self) -> Box<dyn Iterator<Item = CharRange> + 'a> {
+//         Box::new(self.range.iter_ranges())
+//     }
+//     fn range_count(&self) -> usize {
+//         1
+//     }
+// }
 impl<T: MultiCharRange> From<&T> for CharCollection {
     fn from(source: &T) -> Self {
         let mut collection = CharCollection::new();
@@ -120,37 +120,37 @@ mod multi_char_range_tests {
     test_range_inclusive_int!(i8);
     test_range_inclusive_int!(u32);
     test_range_inclusive_int!(i32);
-    #[test]
-    fn test_unicode_block_id() {
-        let source = unicode_blocks::UnicodeBlockId::BasicLatin;
-        assert_eq!(
-            source.iter_ranges().collect::<Vec<CharRange>>(),
-            vec![chars!('\u{0000}'..='\u{007f}')]
-        );
-        assert_eq!(source.range_count(), 1);
-    }
-    #[test]
-    fn test_unicode_block() {
-        let source = unicode_blocks::UnicodeBlockId::BasicLatin.block();
-        assert_eq!(
-            source.iter_ranges().collect::<Vec<CharRange>>(),
-            vec![chars!('\u{0000}'..='\u{007f}')]
-        );
-        assert_eq!(source.range_count(), 1);
-    }
+    // #[test]
+    // fn test_unicode_block_id() {
+    //     let source = unicode_blocks::UnicodeBlockId::BasicLatin;
+    //     assert_eq!(
+    //         source.iter_ranges().collect::<Vec<CharRange>>(),
+    //         vec![chars!('\u{0000}'..='\u{007f}')]
+    //     );
+    //     assert_eq!(source.range_count(), 1);
+    // }
+    // #[test]
+    // fn test_unicode_block() {
+    //     let source = unicode_blocks::UnicodeBlockId::BasicLatin.block();
+    //     assert_eq!(
+    //         source.iter_ranges().collect::<Vec<CharRange>>(),
+    //         vec![chars!('\u{0000}'..='\u{007f}')]
+    //     );
+    //     assert_eq!(source.range_count(), 1);
+    // }
 }
 #[cfg(test)]
 mod from_tests {
     use crate::CharCollection;
-    use unicode_blocks::UnicodeBlockId;
+    // use unicode_blocks::UnicodeBlockId;
     #[test]
     fn test_char() {
         let actual: CharCollection = (&'a').into();
         assert_eq!(actual, char_collect!('a'..='a'));
     }
-    #[test]
-    fn test_unicode_block_id() {
-        let actual: CharCollection = (&UnicodeBlockId::BasicLatin).into();
-        assert_eq!(actual, char_collect!('\u{0000}'..='\u{007f}'));
-    }
+    // #[test]
+    // fn test_unicode_block_id() {
+    //     let actual: CharCollection = (&UnicodeBlockId::BasicLatin).into();
+    //     assert_eq!(actual, char_collect!('\u{0000}'..='\u{007f}'));
+    // }
 }
\ No newline at end of file

From 4963cc1a5d2e6c548ecf04f02dfcb7a3b93fa4ca Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Wed, 10 Jun 2020 01:05:18 +0000
Subject: [PATCH 04/30] Replaced CharRange, passing 36/40 tests

---
 components/char_collection/Cargo.toml         |   4 +
 .../char_collection/src/char_collection.rs    | 164 ++++++++++++++++--
 components/char_collection/src/conversions.rs |   8 +-
 components/char_collection/src/lib.rs         |   2 +
 components/char_collection/src/macros.rs      |  38 ++--
 5 files changed, 176 insertions(+), 40 deletions(-)

diff --git a/components/char_collection/Cargo.toml b/components/char_collection/Cargo.toml
index 8e66cb5a0d5..8ed0275f2a2 100644
--- a/components/char_collection/Cargo.toml
+++ b/components/char_collection/Cargo.toml
@@ -12,3 +12,7 @@ include = [
     "src/**/*",
     "Cargo.toml",
 ]
+
+[dependencies]
+unic-char-range = "0.9.0"
+paste = "0.1.16"
diff --git a/components/char_collection/src/char_collection.rs b/components/char_collection/src/char_collection.rs
index 1094c4964c6..63f4c7cb88b 100644
--- a/components/char_collection/src/char_collection.rs
+++ b/components/char_collection/src/char_collection.rs
@@ -3,6 +3,7 @@
 // found in the LICENSE file.
 use {
     std::{
+        char,
         clone::Clone,
         cmp::Ordering,
         hash::{Hash, Hasher},
@@ -13,8 +14,151 @@ use {
         convert::Into,
         boxed::Box
     },
-    unic_char_range::{chars, CharIter, CharRange},
+    // unic_char_range::{chars, CharIter, CharRange},
 };
+
+#[derive(Copy, Clone, Debug, Eq)]
+pub struct CharRange {
+    low: char,
+    high: char,
+}
+
+impl CharRange {
+// open_right
+    // would we want this to return a Option next time?
+    pub fn open_right(low: char, high: char) -> CharRange { 
+        // nothing happens if this fails
+        let high: char = char::from_u32(high as u32 - 1).unwrap();
+        CharRange{low,  high}
+    }
+// closed 
+    pub fn closed(low: char, high: char) -> CharRange {
+        // if low == '\u{0}' { // need way to handle this
+        //     // for now just leave alone 
+        // } 
+        CharRange{low, high}
+    }
+// open
+    pub fn open(low: char, high: char) -> CharRange {
+        // this is repeated here
+        let low: char = char::from_u32(low as u32 + 1).unwrap();
+        let high: char = char::from_u32(high as u32 - 1).unwrap();
+        CharRange{low, high}
+    }
+// open_left
+    pub fn open_left(low: char, high: char) -> CharRange {
+        // this is repeated here
+        let high: char = char::from_u32(high as u32 + 1).unwrap();
+        CharRange{low, high}
+    }
+// all
+    pub fn all() -> CharRange {
+        CharRange{low: '\u{0}', high: char::MAX}
+    }
+// cmp_char
+    pub fn cmp_char(&self, comp_char: char) -> Ordering {
+        if self.high < comp_char {
+            Ordering::Less
+        }
+        else if self.low > comp_char {
+            Ordering::Greater
+        }
+        else {
+            Ordering::Equal
+        }
+    }
+// contains 
+    pub fn contains(&self, ch: char) -> bool {
+        self.low <=  ch && ch <= self.high
+    }
+// is_empty
+    pub fn is_empty(&self) -> bool {
+        self.low > self.high
+    }
+    pub fn iter(&self) -> CharIter {
+        (*self).into()
+    }
+}
+
+impl IntoIterator for CharRange {
+    type IntoIter = CharIter;
+    type Item = char;
+    fn into_iter(self) -> CharIter {
+        self.iter()
+    }
+}
+
+impl PartialEq<CharRange> for CharRange {
+    fn eq(&self, other: &CharRange) -> bool {
+        (self.is_empty() && other.is_empty()) || (self.low == other.low  && self.high == other.high)
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct CharIter {
+    low: char,
+    high: char
+}
+
+impl From<CharRange> for CharIter {
+    fn from(range: CharRange) -> CharIter {
+        CharIter {
+            low: range.low,
+            high: range.high
+        }
+    }
+}
+
+impl From<CharIter> for CharRange {
+    fn from(iter: CharIter) -> CharRange {
+        CharRange {
+            low: iter.low,
+            high: iter.high
+        }
+    }
+}
+
+impl CharIter {
+    fn advance(&mut self) {
+        if self.low == char::MAX {
+            self.high = '\0';
+        }
+        else {
+            self.low = char::from_u32(self.low as u32 + 1).unwrap();
+        }
+    }
+    fn retreat(&mut self) {
+        if self.high == '\0' {
+            self.low = char::MAX;
+        }
+        else {
+            self.high = char::from_u32(self.high as u32 - 1).unwrap();
+        }
+    }
+    fn next_back(&mut self) -> Option<char> {
+        if self.low > self.high {
+            None
+        }
+        else {
+            let ch = self.high;
+            self.retreat();
+            Some(ch)
+        }
+    }
+}
+
+impl Iterator for CharIter {
+    type Item = char;
+    fn next(&mut self) -> Option<char> {
+        if self.low > self.high {
+            return None;
+        }
+        let ch = self.low;
+        self.advance();
+        Some(ch)
+    }
+}
+
 /// A trait for objects that represent one or more disjoint, non-adjacent
 /// [CharRanges](unic_char_range::CharRange).
 pub trait MultiCharRange {
@@ -411,10 +555,10 @@ fn format_range(range: &CharRange) -> String {
 #[cfg(test)]
 mod tests {
     use {
-        super::{are_chars_adjacent, CharCollection},
+        super::{are_chars_adjacent, CharCollection, CharRange},
         std::error::Error,
         std::char,
-        unic_char_range::{chars, CharRange},
+        // unic_char_range::{chars, CharRange},
     };
     #[test]
     fn test_from_sorted_ranges() -> Result<(), Box<dyn Error>> {
@@ -633,13 +777,13 @@ mod tests {
         assert_eq!(collection_a.intersection(&collection_b), expected);
         assert_eq!(collection_b.intersection(&collection_a), expected);
     }
-    #[test]
-    fn test_macro_expressions() {
-        use unicode_blocks::UnicodeBlockId::Arabic;
-        let collection =
-            char_collect!({ ('c'..='e') + ('f'..='h') - ('a'..='d') + Arabic + (0x5..=0x42) });
-        assert_eq!(collection, char_collect!(0x5..=0x42, 'e'..='h', Arabic));
-    }
+    // #[test]
+    // fn test_macro_expressions() {
+    //     use unicode_blocks::UnicodeBlockId::Arabic;
+    //     let collection =
+    //         char_collect!({ ('c'..='e') + ('f'..='h') - ('a'..='d') + Arabic + (0x5..=0x42) });
+    //     assert_eq!(collection, char_collect!(0x5..=0x42, 'e'..='h', Arabic));
+    // }
     #[test]
     fn test_iter() {
         let collection = char_collect!('a'..='c', 'j'..='l', 'x'..='z');
diff --git a/components/char_collection/src/conversions.rs b/components/char_collection/src/conversions.rs
index f2c4d18ffa7..1ca0d36b26d 100644
--- a/components/char_collection/src/conversions.rs
+++ b/components/char_collection/src/conversions.rs
@@ -6,10 +6,10 @@ use std::boxed::Box;
 use std::convert::TryFrom;
 use std::iter;
 use std::ops::RangeInclusive;
-use unic_char_range::CharRange;
+// use unic_char_range::CharRange;
 // use unic_ucd_block::Block;
 // use unicode_blocks::UnicodeBlockId;
-use crate::{CharCollection, MultiCharRange};
+use crate::{CharRange, CharCollection, MultiCharRange};
 macro_rules! impl_for_range_inclusive_int_type {
     ($($t:ty),*) => {$(
         impl MultiCharRange for RangeInclusive<$t> {
@@ -80,9 +80,9 @@ impl<T: MultiCharRange> From<&T> for CharCollection {
 }
 #[cfg(test)]
 mod multi_char_range_tests {
-    use crate::MultiCharRange;
+    use crate::{MultiCharRange, CharRange};
     use paste;
-    use unic_char_range::{chars, CharRange};
+    // use unic_char_range::{chars, CharRange};
     #[test]
     fn test_char() {
         let source = 'a';
diff --git a/components/char_collection/src/lib.rs b/components/char_collection/src/lib.rs
index ee5e0de5205..d6d4a1096f5 100644
--- a/components/char_collection/src/lib.rs
+++ b/components/char_collection/src/lib.rs
@@ -8,5 +8,7 @@ mod conversions;
 mod operators;
 pub use char_collection::CharCollection;
 pub use char_collection::MultiCharRange;
+pub use char_collection::CharRange;
+pub use char_collection::CharIter;
 pub use conversions::*;
 pub use operators::*;
\ No newline at end of file
diff --git a/components/char_collection/src/macros.rs b/components/char_collection/src/macros.rs
index 83a6a1b45fc..3830b9d3f66 100644
--- a/components/char_collection/src/macros.rs
+++ b/components/char_collection/src/macros.rs
@@ -7,32 +7,6 @@
 /// The macro can be used with either a comma-separated list of items, or with an expression
 /// representing set operations.
 ///
-/// ```
-/// use char_collection::char_collect;
-/// use unicode_blocks::UnicodeBlockId;
-/// use unic_char_range::CharRange;
-///
-/// let c1 = char_collect!(
-///     'a'..='z',
-///     CharRange::closed('D', 'G'),
-///     UnicodeBlockId::Cyrillic,
-///     0x01..=0x05,
-///     '@');
-///
-/// let c2 = char_collect!({ ('a'..='z') - ('p'..='t') + UnicodeBlockId::Bengali });
-/// ```
-///
-/// *NOTE:* Parenthetical expressions currently aren't supported unless they start with a
-/// `CharCollection`.
-/// ```
-/// use char_collection::char_collect;
-///
-/// // This works:
-/// let c1 = char_collect!({ ('a'..='z') + (char_collect!('A'..='Z') - ('L'..='P')) });
-///
-/// // This doesn't:
-/// let c1 = char_collect!({ ('a'..='z') + (('A'..='Z') - ('L'..='P')) });
-/// ```
 #[macro_export]
 macro_rules! char_collect {
     ({ $($x:tt)+ }) => {
@@ -51,4 +25,16 @@ macro_rules! char_collect {
             col
         }
     };
+}
+#[macro_export]
+macro_rules! chars {
+    ($low:tt .. $high:tt) => {
+        $crate::CharRange::open_right($low, $high)
+    };
+    ($low:tt ..= $high:tt) => {
+        $crate::CharRange::closed($low, $high)
+    };
+    (..) => {
+        $crate::CharRange::all()
+    }
 }
\ No newline at end of file

From 01ca5efa5784bc635cec90f4fbbea2a74b1f1b65 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Wed, 10 Jun 2020 02:13:01 +0000
Subject: [PATCH 05/30] Fixed bug, pass all 40 tests

---
 .../char_collection/src/char_collection.rs    | 21 +------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/components/char_collection/src/char_collection.rs b/components/char_collection/src/char_collection.rs
index 63f4c7cb88b..e7914c9993a 100644
--- a/components/char_collection/src/char_collection.rs
+++ b/components/char_collection/src/char_collection.rs
@@ -48,7 +48,7 @@ impl CharRange {
 // open_left
     pub fn open_left(low: char, high: char) -> CharRange {
         // this is repeated here
-        let high: char = char::from_u32(high as u32 + 1).unwrap();
+        let low: char = char::from_u32(low as u32 + 1).unwrap();
         CharRange{low, high}
     }
 // all
@@ -176,25 +176,6 @@ pub trait MultiCharRange {
 /// The easiest way to create instances is using the
 /// [char_collect!](::char_collection::char_collect) macro.
 ///
-/// ```
-/// use char_collection::CharCollection;
-///
-/// let mut collection: CharCollection = char_collect!('a'..='d', 'x'..='z');
-/// char_collection += 'e';
-/// char_collection += chars!('p'..='t');
-/// assert_eq!(
-///     collection.iter_ranges().collect(),
-///     vec![chars!('a'..='e'), chars!('p'..='t'), chars!('x'..='z')]);
-///
-/// assert!(collection.contains(&'c'));
-/// assert!(collection.contains_range(chars!('q'..='s')));
-/// assert!(!collection.contains(&'9'));
-///
-/// collection -= chars!('t'..='y');
-/// assert_eq!(
-///     collection.iter_ranges().collect(),
-///     vec![chars!('a'..='e', chars!('p'..'s'), chars!('z'..='z'))]);
-/// ```
 ///
 /// TODO(kpozin): Implement IntoIter.
 #[derive(Clone, Debug, Eq, PartialEq, Default)]

From b57d18fbeb95c02ad413bb36adfc4f5a504e066a Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Fri, 12 Jun 2020 16:19:43 +0000
Subject: [PATCH 06/30] Remove dependency file

---
 components/char_collection/BUILD.gn | 32 -----------------------------
 1 file changed, 32 deletions(-)
 delete mode 100644 components/char_collection/BUILD.gn

diff --git a/components/char_collection/BUILD.gn b/components/char_collection/BUILD.gn
deleted file mode 100644
index f772f5023e6..00000000000
--- a/components/char_collection/BUILD.gn
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright 2019 The Fuchsia Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-import("//build/rust/rustc_library.gni")
-import("//build/test/test_package.gni")
-import("//build/testing/environments.gni")
-# Library for working with collections of Unicode code points.
-rustc_library("char_collection") {
-  edition = "2018"
-  with_unit_tests = true
-  deps = [
-    "//src/lib/intl/unicode_utils/unicode_blocks",
-    "//third_party/rust_crates:anyhow",
-    "//third_party/rust_crates:paste",
-    "//third_party/rust_crates:thiserror",
-    "//third_party/rust_crates:unic-char-range",
-    "//third_party/rust_crates:unic-ucd-block",
-  ]
-}
-test_package("char_collection_tests") {
-  deps = [ ":char_collection_test" ]
-  tests = [
-    {
-      name = "char_collection_lib_test"
-      environments = basic_envs
-    },
-  ]
-}
-group("tests") {
-  testonly = true
-  public_deps = [ ":char_collection_tests" ]
-}

From dedebe25466cf513923af8de8b56380051908a41 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Tue, 16 Jun 2020 19:24:57 +0000
Subject: [PATCH 07/30] github actions and README fixes

---
 components/char_collection/README.md          |  12 ++
 .../char_collection/src/char_collection.rs    | 186 +++++++++++-------
 components/char_collection/src/conversions.rs |  21 +-
 components/char_collection/src/lib.rs         |   6 +-
 components/char_collection/src/macros.rs      |   4 +-
 components/char_collection/src/operators.rs   |   2 +-
 6 files changed, 150 insertions(+), 81 deletions(-)
 create mode 100644 components/char_collection/README.md

diff --git a/components/char_collection/README.md b/components/char_collection/README.md
new file mode 100644
index 00000000000..9cb580caa61
--- /dev/null
+++ b/components/char_collection/README.md
@@ -0,0 +1,12 @@
+# ICU4X
+
+ICU4X is a set of internationalization components for Unicode.
+
+# Status [![crates.io](http://meritbadge.herokuapp.com/icu4x)](https://crates.io/crates/icu4x)
+
+The project is in an incubation period.
+
+# Authors
+
+The project is managed by a subcommittee of ICU-TC in the Unicode Consortium focused on providing solutions for client-side internationalization.
+
diff --git a/components/char_collection/src/char_collection.rs b/components/char_collection/src/char_collection.rs
index e7914c9993a..639e955e7c1 100644
--- a/components/char_collection/src/char_collection.rs
+++ b/components/char_collection/src/char_collection.rs
@@ -1,20 +1,17 @@
 // Copyright 2019 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
-use {
-    std::{
-        char,
-        clone::Clone,
-        cmp::Ordering,
-        hash::{Hash, Hasher},
-        iter::Iterator,
-        ops::Range,
-        vec::Vec,
-        error::Error,
-        convert::Into,
-        boxed::Box
-    },
-    // unic_char_range::{chars, CharIter, CharRange},
+use std::{
+    boxed::Box,
+    char,
+    clone::Clone,
+    cmp::Ordering,
+    convert::Into,
+    error::Error,
+    hash::{Hash, Hasher},
+    iter::Iterator,
+    ops::Range,
+    vec::Vec,
 };
 
 #[derive(Copy, Clone, Debug, Eq)]
@@ -24,54 +21,55 @@ pub struct CharRange {
 }
 
 impl CharRange {
-// open_right
+    // open_right
     // would we want this to return a Option next time?
-    pub fn open_right(low: char, high: char) -> CharRange { 
+    pub fn open_right(low: char, high: char) -> CharRange {
         // nothing happens if this fails
         let high: char = char::from_u32(high as u32 - 1).unwrap();
-        CharRange{low,  high}
+        CharRange { low, high }
     }
-// closed 
+    // closed
     pub fn closed(low: char, high: char) -> CharRange {
         // if low == '\u{0}' { // need way to handle this
-        //     // for now just leave alone 
-        // } 
-        CharRange{low, high}
+        //     // for now just leave alone
+        // }
+        CharRange { low, high }
     }
-// open
+    // open
     pub fn open(low: char, high: char) -> CharRange {
         // this is repeated here
         let low: char = char::from_u32(low as u32 + 1).unwrap();
         let high: char = char::from_u32(high as u32 - 1).unwrap();
-        CharRange{low, high}
+        CharRange { low, high }
     }
-// open_left
+    // open_left
     pub fn open_left(low: char, high: char) -> CharRange {
         // this is repeated here
         let low: char = char::from_u32(low as u32 + 1).unwrap();
-        CharRange{low, high}
+        CharRange { low, high }
     }
-// all
+    // all
     pub fn all() -> CharRange {
-        CharRange{low: '\u{0}', high: char::MAX}
+        CharRange {
+            low: '\u{0}',
+            high: char::MAX,
+        }
     }
-// cmp_char
+    // cmp_char
     pub fn cmp_char(&self, comp_char: char) -> Ordering {
         if self.high < comp_char {
             Ordering::Less
-        }
-        else if self.low > comp_char {
+        } else if self.low > comp_char {
             Ordering::Greater
-        }
-        else {
+        } else {
             Ordering::Equal
         }
     }
-// contains 
+    // contains
     pub fn contains(&self, ch: char) -> bool {
-        self.low <=  ch && ch <= self.high
+        self.low <= ch && ch <= self.high
     }
-// is_empty
+    // is_empty
     pub fn is_empty(&self) -> bool {
         self.low > self.high
     }
@@ -90,21 +88,21 @@ impl IntoIterator for CharRange {
 
 impl PartialEq<CharRange> for CharRange {
     fn eq(&self, other: &CharRange) -> bool {
-        (self.is_empty() && other.is_empty()) || (self.low == other.low  && self.high == other.high)
+        (self.is_empty() && other.is_empty()) || (self.low == other.low && self.high == other.high)
     }
 }
 
 #[derive(Clone, Debug)]
 pub struct CharIter {
     low: char,
-    high: char
+    high: char,
 }
 
 impl From<CharRange> for CharIter {
     fn from(range: CharRange) -> CharIter {
         CharIter {
             low: range.low,
-            high: range.high
+            high: range.high,
         }
     }
 }
@@ -113,7 +111,7 @@ impl From<CharIter> for CharRange {
     fn from(iter: CharIter) -> CharRange {
         CharRange {
             low: iter.low,
-            high: iter.high
+            high: iter.high,
         }
     }
 }
@@ -122,24 +120,21 @@ impl CharIter {
     fn advance(&mut self) {
         if self.low == char::MAX {
             self.high = '\0';
-        }
-        else {
+        } else {
             self.low = char::from_u32(self.low as u32 + 1).unwrap();
         }
     }
     fn retreat(&mut self) {
         if self.high == '\0' {
             self.low = char::MAX;
-        }
-        else {
+        } else {
             self.high = char::from_u32(self.high as u32 - 1).unwrap();
         }
     }
     fn next_back(&mut self) -> Option<char> {
         if self.low > self.high {
             None
-        }
-        else {
+        } else {
             let ch = self.high;
             self.retreat();
             Some(ch)
@@ -198,7 +193,9 @@ impl CharCollection {
         T: IntoIterator<Item = CharRange>,
     {
         // If the original `ranges` is also a Vec, this doesn't result in an extra copy.
-        let collection = CharCollection { ranges: ranges.into_iter().collect() };
+        let collection = CharCollection {
+            ranges: ranges.into_iter().collect(),
+        };
         let ranges: &Vec<CharRange> = &collection.ranges;
         match (1..ranges.len()).find(|i| (ranges[*i].low as i64 - ranges[*i - 1].high as i64) <= 1)
         {
@@ -206,7 +203,8 @@ impl CharCollection {
                 "These ranges are out of order, overlapping, or adjacent: {:?}, {:?}",
                 format_range(&ranges[i - 1]),
                 format_range(&ranges[i])
-            ).into()),
+            )
+            .into()),
             None => Ok(collection),
         }
     }
@@ -257,7 +255,9 @@ impl CharCollection {
     /// is the number of ranges in this collection and <var>T</var> is the number of ranges in
     /// `to_add`.
     pub fn insert<V: MultiCharRange>(&mut self, to_add: &V) -> &mut Self {
-        to_add.iter_ranges().for_each(|range| self.insert_char_range(&range));
+        to_add
+            .iter_ranges()
+            .for_each(|range| self.insert_char_range(&range));
         self
     }
     /// Appends a `char` to the end of the existing collection. Panics if the given `char` is not
@@ -297,7 +297,8 @@ impl CharCollection {
                     "Cannot append {:?} after {:?}",
                     format_range(&range),
                     last_range.high
-                ).into());
+                )
+                .into());
             }
             if are_chars_adjacent(&last_range.high, &range.low) {
                 last_range.high = range.high;
@@ -317,7 +318,9 @@ impl CharCollection {
     /// is the number of ranges in this collection and <var>T</var> is the number of ranges in
     /// `to_remove`.
     pub fn remove<V: MultiCharRange>(&mut self, to_remove: &V) -> &mut Self {
-        to_remove.iter_ranges().for_each(|range| self.remove_char_range(&range));
+        to_remove
+            .iter_ranges()
+            .for_each(|range| self.remove_char_range(&range));
         self
     }
     /// Remove all entries from this collection.
@@ -390,7 +393,9 @@ impl CharCollection {
         if prev_high != std::char::MAX {
             result_ranges.push(CharRange::open_left(prev_high, std::char::MAX));
         }
-        CharCollection { ranges: result_ranges }
+        CharCollection {
+            ranges: result_ranges,
+        }
     }
     /// Insert a single `CharRange`.
     ///
@@ -449,8 +454,10 @@ impl CharCollection {
         match lower_existing_range {
             Ok((idx, lower_existing_range)) => {
                 if lower_existing_range.low < range_to_remove.low {
-                    replacement_ranges
-                        .push(CharRange::open_right(lower_existing_range.low, range_to_remove.low));
+                    replacement_ranges.push(CharRange::open_right(
+                        lower_existing_range.low,
+                        range_to_remove.low,
+                    ));
                 }
                 remove_from_idx = idx;
             }
@@ -470,7 +477,8 @@ impl CharCollection {
                 remove_to_idx = idx;
             }
         }
-        self.ranges.splice(remove_from_idx..remove_to_idx, replacement_ranges);
+        self.ranges
+            .splice(remove_from_idx..remove_to_idx, replacement_ranges);
     }
     /// Delete all the existing `CharRange`s that fall within `indices_to_replace` in the vector,
     /// and insert `char_range_to_insert` in their place. If the newly formed range is adjacent to
@@ -496,10 +504,13 @@ impl CharCollection {
                 indices_to_replace.end += 1;
             }
         }
-        self.ranges.splice(indices_to_replace, vec![char_range_to_insert]);
+        self.ranges
+            .splice(indices_to_replace, vec![char_range_to_insert]);
     }
     fn find_containing_range(&self, query: &char) -> Result<(usize, CharRange), usize> {
-        let result = self.ranges.binary_search_by(|range| range.cmp_char(query.clone()));
+        let result = self
+            .ranges
+            .binary_search_by(|range| range.cmp_char(query.clone()));
         match result {
             Ok(index) => Ok((index, self.ranges[index])),
             Err(index) => Err(index),
@@ -516,7 +527,9 @@ impl MultiCharRange for CharCollection {
 }
 impl Hash for CharCollection {
     fn hash<H: Hasher>(&self, state: &mut H) {
-        self.ranges.iter().for_each(|range| hash_char_range(range, state));
+        self.ranges
+            .iter()
+            .for_each(|range| hash_char_range(range, state));
     }
 }
 fn hash_char_range<H: Hasher>(range: &CharRange, state: &mut H) {
@@ -537,9 +550,9 @@ fn format_range(range: &CharRange) -> String {
 mod tests {
     use {
         super::{are_chars_adjacent, CharCollection, CharRange},
-        std::error::Error,
         std::char,
         // unic_char_range::{chars, CharRange},
+        std::error::Error,
     };
     #[test]
     fn test_from_sorted_ranges() -> Result<(), Box<dyn Error>> {
@@ -593,7 +606,10 @@ mod tests {
     fn test_find_containing_range() {
         let collection = char_collect!({ ('a'..='d') + ('g'..='j') + ('l'..='o') + 'z' });
         assert_eq!(collection.find_containing_range(&'0'), Err(0));
-        assert_eq!(collection.find_containing_range(&'c'), Ok((0, chars!('a'..='d'))));
+        assert_eq!(
+            collection.find_containing_range(&'c'),
+            Ok((0, chars!('a'..='d')))
+        );
         assert_eq!(collection.find_containing_range(&'e'), Err(1));
     }
     #[test]
@@ -605,7 +621,10 @@ mod tests {
     fn test_insert_exact_match() {
         let mut collection = char_collect!('a'..='d', 'g'..='l');
         collection += 'a'..='d';
-        assert_eq!(collection.ranges, vec![chars!('a'..='d'), chars!('g'..='l')]);
+        assert_eq!(
+            collection.ranges,
+            vec![chars!('a'..='d'), chars!('g'..='l')]
+        );
     }
     #[test]
     fn test_insert_non_overlapping_sorted() {
@@ -642,7 +661,10 @@ mod tests {
     fn test_insert_overlapping_with_intersections() {
         let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
         collection += 'd'..='k';
-        assert_eq!(collection.ranges, vec![chars!('c'..='m'), chars!('p'..='s')]);
+        assert_eq!(
+            collection.ranges,
+            vec![chars!('c'..='m'), chars!('p'..='s')]
+        );
     }
     #[test]
     fn test_insert_coalesce_adjacent_ranges() {
@@ -653,7 +675,12 @@ mod tests {
     #[test]
     fn test_append() -> Result<(), Box<dyn Error>> {
         let mut collection = char_collect!('a'..='c');
-        collection.append('d')?.append('g')?.append('h')?.append('i')?.append('z')?;
+        collection
+            .append('d')?
+            .append('g')?
+            .append('h')?
+            .append('i')?
+            .append('z')?;
         assert_eq!(collection, char_collect!('a'..='d', 'g'..='i', 'z'));
         Ok(())
     }
@@ -672,7 +699,9 @@ mod tests {
     #[test]
     fn test_append_range() -> Result<(), Box<dyn Error>> {
         let mut collection = char_collect!('a'..='c');
-        collection.append_range(chars!('g'..='i'))?.append_range(chars!('j'..='m'))?;
+        collection
+            .append_range(chars!('g'..='i'))?
+            .append_range(chars!('j'..='m'))?;
         assert_eq!(collection, char_collect!('a'..='c', 'g'..='m'));
         Ok(())
     }
@@ -690,7 +719,10 @@ mod tests {
     fn test_remove_exact_range() {
         let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
         collection -= 'j'..='m';
-        assert_eq!(collection.ranges, vec![chars!('c'..='e'), chars!['p'..='s']]);
+        assert_eq!(
+            collection.ranges,
+            vec![chars!('c'..='e'), chars!['p'..='s']]
+        );
     }
     #[test]
     fn test_remove_overlapping_all_existent() {
@@ -710,21 +742,34 @@ mod tests {
         collection -= 'k'..='l';
         assert_eq!(
             collection.ranges,
-            vec![chars!('c'..='e'), chars!('j'..='j'), chars!('m'..='m'), chars!('p'..='s')]
+            vec![
+                chars!('c'..='e'),
+                chars!('j'..='j'),
+                chars!('m'..='m'),
+                chars!('p'..='s')
+            ]
         );
     }
     #[test]
     fn test_remove_intersection() {
         let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
         collection -= 'd'..='q';
-        assert_eq!(collection.ranges, vec![chars!('c'..='c'), chars!('r'..='s')]);
+        assert_eq!(
+            collection.ranges,
+            vec![chars!('c'..='c'), chars!('r'..='s')]
+        );
     }
     #[test]
     fn test_complement_simple() {
         let collection = char_collect!(0x10..=0x50, 0x70..=0x70, 0x99..=0x640);
         assert_eq!(
             collection.complement(),
-            char_collect!(0x00..=0x0F, 0x51..=0x6F, 0x71..=0x98, 0x641..=(char::MAX as u32))
+            char_collect!(
+                0x00..=0x0F,
+                0x51..=0x6F,
+                0x71..=0x98,
+                0x641..=(char::MAX as u32)
+            )
         );
     }
     #[test]
@@ -740,7 +785,10 @@ mod tests {
     #[test]
     fn test_complement_includes_min_and_max() {
         let collection = char_collect!(0x0..=0x10, 0x40..=0x50, 0xCCCC..=(char::MAX as u32));
-        assert_eq!(collection.complement(), char_collect!(0x11..=0x3F, 0x51..=0xCCCB));
+        assert_eq!(
+            collection.complement(),
+            char_collect!(0x11..=0x3F, 0x51..=0xCCCB)
+        );
     }
     #[test]
     fn test_union() {
@@ -777,4 +825,4 @@ mod tests {
         assert!(!are_chars_adjacent(&'b', &'a'));
         assert!(!are_chars_adjacent(&'a', &'c'));
     }
-}
\ No newline at end of file
+}
diff --git a/components/char_collection/src/conversions.rs b/components/char_collection/src/conversions.rs
index 1ca0d36b26d..02e3c223ace 100644
--- a/components/char_collection/src/conversions.rs
+++ b/components/char_collection/src/conversions.rs
@@ -9,7 +9,7 @@ use std::ops::RangeInclusive;
 // use unic_char_range::CharRange;
 // use unic_ucd_block::Block;
 // use unicode_blocks::UnicodeBlockId;
-use crate::{CharRange, CharCollection, MultiCharRange};
+use crate::{CharCollection, CharRange, MultiCharRange};
 macro_rules! impl_for_range_inclusive_int_type {
     ($($t:ty),*) => {$(
         impl MultiCharRange for RangeInclusive<$t> {
@@ -80,25 +80,34 @@ impl<T: MultiCharRange> From<&T> for CharCollection {
 }
 #[cfg(test)]
 mod multi_char_range_tests {
-    use crate::{MultiCharRange, CharRange};
+    use crate::{CharRange, MultiCharRange};
     use paste;
     // use unic_char_range::{chars, CharRange};
     #[test]
     fn test_char() {
         let source = 'a';
-        assert_eq!(source.iter_ranges().collect::<Vec<CharRange>>(), vec![chars!('a'..='a')]);
+        assert_eq!(
+            source.iter_ranges().collect::<Vec<CharRange>>(),
+            vec![chars!('a'..='a')]
+        );
         assert_eq!(source.range_count(), 1);
     }
     #[test]
     fn test_char_range() {
         let source = chars!('d'..='g');
-        assert_eq!(source.iter_ranges().collect::<Vec<CharRange>>(), vec![chars!('d'..='g')]);
+        assert_eq!(
+            source.iter_ranges().collect::<Vec<CharRange>>(),
+            vec![chars!('d'..='g')]
+        );
         assert_eq!(source.range_count(), 1);
     }
     #[test]
     fn test_range_inclusive_char() {
         let source = 'd'..='g';
-        assert_eq!(source.iter_ranges().collect::<Vec<CharRange>>(), vec![chars!('d'..='g')]);
+        assert_eq!(
+            source.iter_ranges().collect::<Vec<CharRange>>(),
+            vec![chars!('d'..='g')]
+        );
         assert_eq!(source.range_count(), 1);
     }
     macro_rules! test_range_inclusive_int {
@@ -153,4 +162,4 @@ mod from_tests {
     //     let actual: CharCollection = (&UnicodeBlockId::BasicLatin).into();
     //     assert_eq!(actual, char_collect!('\u{0000}'..='\u{007f}'));
     // }
-}
\ No newline at end of file
+}
diff --git a/components/char_collection/src/lib.rs b/components/char_collection/src/lib.rs
index d6d4a1096f5..f135d871fe0 100644
--- a/components/char_collection/src/lib.rs
+++ b/components/char_collection/src/lib.rs
@@ -7,8 +7,8 @@ mod char_collection;
 mod conversions;
 mod operators;
 pub use char_collection::CharCollection;
-pub use char_collection::MultiCharRange;
-pub use char_collection::CharRange;
 pub use char_collection::CharIter;
+pub use char_collection::CharRange;
+pub use char_collection::MultiCharRange;
 pub use conversions::*;
-pub use operators::*;
\ No newline at end of file
+pub use operators::*;
diff --git a/components/char_collection/src/macros.rs b/components/char_collection/src/macros.rs
index 3830b9d3f66..49569781f50 100644
--- a/components/char_collection/src/macros.rs
+++ b/components/char_collection/src/macros.rs
@@ -36,5 +36,5 @@ macro_rules! chars {
     };
     (..) => {
         $crate::CharRange::all()
-    }
-}
\ No newline at end of file
+    };
+}
diff --git a/components/char_collection/src/operators.rs b/components/char_collection/src/operators.rs
index 1beb4656073..cec8cad4e3d 100644
--- a/components/char_collection/src/operators.rs
+++ b/components/char_collection/src/operators.rs
@@ -59,4 +59,4 @@ impl ops::Not for CharCollection {
     fn not(self) -> Self::Output {
         self.complement()
     }
-}
\ No newline at end of file
+}

From a1c2d6afcbb6a86e866ef253585891bf43210182 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Tue, 23 Jun 2020 06:19:05 +0000
Subject: [PATCH 08/30] L1 initial completion, unit tests  not complete

---
 components/char_collection/src/lib.rs    |   2 +
 components/char_collection/src/uniset.rs | 182 +++++++++++++++++++++++
 2 files changed, 184 insertions(+)
 create mode 100644 components/char_collection/src/uniset.rs

diff --git a/components/char_collection/src/lib.rs b/components/char_collection/src/lib.rs
index f135d871fe0..54eacbe847e 100644
--- a/components/char_collection/src/lib.rs
+++ b/components/char_collection/src/lib.rs
@@ -6,9 +6,11 @@ mod macros;
 mod char_collection;
 mod conversions;
 mod operators;
+mod uniset;
 pub use char_collection::CharCollection;
 pub use char_collection::CharIter;
 pub use char_collection::CharRange;
 pub use char_collection::MultiCharRange;
 pub use conversions::*;
 pub use operators::*;
+pub use uniset::UnicodeSet;
diff --git a/components/char_collection/src/uniset.rs b/components/char_collection/src/uniset.rs
new file mode 100644
index 00000000000..8ebfda757eb
--- /dev/null
+++ b/components/char_collection/src/uniset.rs
@@ -0,0 +1,182 @@
+use std::{
+    boxed::Box,
+    char,
+    clone::Clone,
+    cmp::Ordering,
+    convert::From, // https://doc.rust-lang.org/std/convert/trait.From.html rust practice says do not use Into
+    error::Error,
+    hash::{Hash, Hasher},
+    iter::Iterator,
+    num::ParseIntError,
+    ops::Range,
+    str::Split,
+    vec::Vec,
+};
+
+const UNICODESET_MAX: u32 = 0x110000; // does max imply inclusive? else should be 10FFFF
+const UNICODESET_MIN: u32 = 0x000000;
+
+/// Given string representation of inversion list create set
+/// Check if sorted during iteration
+fn parse_serial_string(serialize_str: &str) -> Result<Vec<u32>, Box<dyn Error>> {
+    // wondering how much this method catches in tests
+    // let split_serialize: Split<&str> = serialize.split(" ");
+    // let capacity: u8 = split_serialize.next().unwrap().
+    let mut serialize = serialize_str.split(" ");
+    let capacity: usize = serialize.next().unwrap().parse()?;
+    if capacity % 2 != 0 {
+        return Err("Capacity must be even".into());
+    }
+    let mut serialized_vec: Vec<u32> = Vec::with_capacity(capacity);
+    let mut prev: u32 = 0;
+    for str_ele in serialize {
+        // unsure if the capacity matters if we can expand, but that might be an issue if you expand into too much memory
+        // otherwise shrink_to_fit is possible
+        let parsed: u32 = str_ele.parse()?;
+        if serialized_vec.len() + 1 > serialized_vec.capacity() {
+            return Err("Serialization capacity is too small".into());
+        }
+        if parsed < prev {
+            return Err("Serialization must be sorted".into());
+        }
+        serialized_vec.push(parsed);
+        prev = parsed;
+    }
+    if serialized_vec.len() % 2 != 0 {
+        return Err("Serialization must be even".into());
+    }
+    serialized_vec.shrink_to_fit(); // necessary if the length < capacity
+    Ok(serialized_vec)
+}
+
+//#[derive(Copy, Clone, Debug, Eq)]
+pub struct UnicodeSet {
+    // If we wanted to use an array to keep the memory on the stack, there is an unsafe nightly feature
+    // https://doc.rust-lang.org/nightly/core/array/trait.FixedSizeArray.html
+    // Allows for traits of fixed size arrays
+    set: Vec<u32>, // is set misleading? could be uset
+}
+
+impl UnicodeSet {
+    pub fn new(serialize: &str) -> Result<UnicodeSet, Box<dyn Error>> {
+        match parse_serial_string(serialize) {
+            Ok(serialize) => Ok(UnicodeSet { set: serialize }),
+            Err(e) => Err(e),
+        }
+    }
+
+    pub fn from_range(start: &u32, end: &u32) -> UnicodeSet {
+        UnicodeSet {
+            set: vec![*start, *end],
+        }
+    }
+
+    pub fn all() -> UnicodeSet {
+        UnicodeSet {
+            set: vec![UNICODESET_MIN, UNICODESET_MAX],
+        }
+    }
+
+    pub fn bmp() -> UnicodeSet {
+        UnicodeSet {
+            set: vec![UNICODESET_MIN, 0xFFFF],
+        }
+    }
+
+    pub fn contains(&self, query: &u32) -> bool {
+        // need an enforcement of pattern
+        //Need to evaluate
+        // let mut low = 0;
+        // let mut high = self.set.len() - 1;
+        // if low >= high || query > self.set[high] || query < self.set[low]{
+        //     false
+        // }
+        // // [2, 5, 10, 12] => [2, 4], [10, 11]
+        // // [2, 5, 10] => [2, 4], [10]
+        // // [2, 5, 10, 10, 12]
+        // // [1, 1, 0]
+        // // 5, 9
+        // let mut pos: i8 = -1;
+        // while low <= high {
+        //     let middle = (low + high) >> 1;
+        //     let check = self.set[middle];
+        //     if middle == low {
+        //         pos = middle;
+        //         break
+        //     }
+        //     if check < query {
+        //         low = middle + 1;
+        //     }
+        //     else {
+        //         high = middle - 1;
+        //     }
+        // }
+        // if pos == -1 {
+        //     pos = middle + 1;
+        // }
+        // [2, 5, 10, 15]
+        match self.set.binary_search(query) {
+            // relies on having even # elements
+            Ok(pos) => {
+                if pos % 2 == 0 {
+                    true
+                } else {
+                    if pos > 0 && &self.set[pos - 1] == query {
+                        true
+                    } else {
+                        false
+                    }
+                }
+            }
+            Err(pos) => {
+                if pos % 2 == 0 {
+                    false
+                } else {
+                    if pos >= self.set.len() - 1 {
+                        false
+                    } else {
+                        true
+                    }
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use {
+        super::{parse_serial_string, UnicodeSet},
+        std::num::ParseIntError,
+    };
+    // parse_serial_string
+    #[test]
+    fn test_parse_serial_string() {
+        let expected = vec![2, 3, 4, 5];
+        let actual = parse_serial_string("4 2 3 4 5").unwrap();
+        assert_eq!(actual, expected);
+    }
+    #[test]
+    fn test_parse_serial_string_no_char() {
+        assert!(parse_serial_string("4 2 A 3 4 5").is_err());
+    }
+    #[test]
+    fn test_parse_serial_string_empty() {
+        assert!(parse_serial_string("").is_err());
+    }
+    #[test]
+    fn test_parse_serial_string_wrong_format() {
+        assert!(parse_serial_string("[4, 2, 3, 4, 5  ]").is_err());
+    }
+    #[test]
+    fn test_parse_serial_string_capacity_not_even() {
+        assert!(parse_serial_string("3 2 3 4").is_err());
+    }
+    #[test]
+    fn test_parse_serial_string_size_not_even() {
+        assert!(parse_serial_string("4 3 2 1").is_err());
+    }
+
+    // UnicodeSet constructors
+}
+// impl From<io:: // need to define an error

From 11bdaecb096f0b8f08b77b59b30b752390d94043 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Tue, 23 Jun 2020 16:51:57 +0000
Subject: [PATCH 09/30] UnicodeSet tests

---
 components/char_collection/src/uniset.rs | 119 +++++++++++++++--------
 1 file changed, 77 insertions(+), 42 deletions(-)

diff --git a/components/char_collection/src/uniset.rs b/components/char_collection/src/uniset.rs
index 8ebfda757eb..217d6900daa 100644
--- a/components/char_collection/src/uniset.rs
+++ b/components/char_collection/src/uniset.rs
@@ -13,11 +13,17 @@ use std::{
     vec::Vec,
 };
 
-const UNICODESET_MAX: u32 = 0x110000; // does max imply inclusive? else should be 10FFFF
+const UNICODESET_MAX: u32 = 0x10FFFF; // does max imply inclusive? else should be 10FFFF
 const UNICODESET_MIN: u32 = 0x000000;
-
+const BMP_MAX: u32 = 0xFFFF;
 /// Given string representation of inversion list create set
-/// Check if sorted during iteration
+///
+/// Requires starting capacity integer, followed by space delimited integer code points.
+/// There must be an even number of elements (not including the capacity int), and must be
+/// in ascending sorted order.
+///
+/// Example String: `4 0 5 10 15` designates a capacity of size 4, followed by 2 ranges
+/// The ranges are {0, 4} and {10, 14} inclusive
 fn parse_serial_string(serialize_str: &str) -> Result<Vec<u32>, Box<dyn Error>> {
     // wondering how much this method catches in tests
     // let split_serialize: Split<&str> = serialize.split(" ");
@@ -49,6 +55,10 @@ fn parse_serial_string(serialize_str: &str) -> Result<Vec<u32>, Box<dyn Error>>
     Ok(serialized_vec)
 }
 
+/// UnicodeSet membership wrapper
+///
+/// Provides exposure to membership functions and constructors from serialized UnicodeSets
+/// and predefined ranges.
 //#[derive(Copy, Clone, Debug, Eq)]
 pub struct UnicodeSet {
     // If we wanted to use an array to keep the memory on the stack, there is an unsafe nightly feature
@@ -65,58 +75,32 @@ impl UnicodeSet {
         }
     }
 
-    pub fn from_range(start: &u32, end: &u32) -> UnicodeSet {
-        UnicodeSet {
-            set: vec![*start, *end],
+    pub fn from_range(start: &u32, end: &u32) -> Result<UnicodeSet, Box<dyn Error>> {
+        if start > end {
+            return Err("Range is out of order".into())
+        }
+        if start < &UNICODESET_MIN || end > &UNICODESET_MAX {
+            return Err("Range is out of bounds".into())
         }
+        Ok(UnicodeSet {
+            set: vec![*start, *end],
+        })
     }
 
     pub fn all() -> UnicodeSet {
         UnicodeSet {
-            set: vec![UNICODESET_MIN, UNICODESET_MAX],
+            set: vec![UNICODESET_MIN, UNICODESET_MAX + 1],
         }
     }
 
     pub fn bmp() -> UnicodeSet {
         UnicodeSet {
-            set: vec![UNICODESET_MIN, 0xFFFF],
+            set: vec![UNICODESET_MIN, BMP_MAX + 1],
         }
     }
 
     pub fn contains(&self, query: &u32) -> bool {
-        // need an enforcement of pattern
-        //Need to evaluate
-        // let mut low = 0;
-        // let mut high = self.set.len() - 1;
-        // if low >= high || query > self.set[high] || query < self.set[low]{
-        //     false
-        // }
-        // // [2, 5, 10, 12] => [2, 4], [10, 11]
-        // // [2, 5, 10] => [2, 4], [10]
-        // // [2, 5, 10, 10, 12]
-        // // [1, 1, 0]
-        // // 5, 9
-        // let mut pos: i8 = -1;
-        // while low <= high {
-        //     let middle = (low + high) >> 1;
-        //     let check = self.set[middle];
-        //     if middle == low {
-        //         pos = middle;
-        //         break
-        //     }
-        //     if check < query {
-        //         low = middle + 1;
-        //     }
-        //     else {
-        //         high = middle - 1;
-        //     }
-        // }
-        // if pos == -1 {
-        //     pos = middle + 1;
-        // }
-        // [2, 5, 10, 15]
         match self.set.binary_search(query) {
-            // relies on having even # elements
             Ok(pos) => {
                 if pos % 2 == 0 {
                     true
@@ -132,7 +116,7 @@ impl UnicodeSet {
                 if pos % 2 == 0 {
                     false
                 } else {
-                    if pos >= self.set.len() - 1 {
+                    if pos >= self.set.len() {
                         false
                     } else {
                         true
@@ -146,7 +130,7 @@ impl UnicodeSet {
 #[cfg(test)]
 mod tests {
     use {
-        super::{parse_serial_string, UnicodeSet},
+        super::{parse_serial_string, UnicodeSet, UNICODESET_MIN, UNICODESET_MAX, BMP_MAX},
         std::num::ParseIntError,
     };
     // parse_serial_string
@@ -178,5 +162,56 @@ mod tests {
     }
 
     // UnicodeSet constructors
+    #[test]
+    fn test_unicodeset_new() {
+        let expected = vec![2, 3, 4, 5];
+        let actual = UnicodeSet::new("4 2 3 4 5").unwrap().set;
+        assert_eq!(actual, expected);
+    }
+    #[test]
+    fn test_unicodeset_new_error() {
+        assert!(UnicodeSet::new("3 2 4 3").is_err());
+    }
+    #[test]
+    fn test_unicodeset_from_range() {
+        let expected = vec![4, 10];
+        let actual = UnicodeSet::from_range(&4, &10).unwrap().set;
+        assert_eq!(actual, expected);
+    }
+    #[test]
+    fn test_unicodeset_from_range_bad_order() {
+        assert!(UnicodeSet::from_range(&10, &5).is_err());
+    }
+    #[test]
+    fn test_unicodeset_from_range_out_of_bounds() {
+        assert!(UnicodeSet::from_range(&0, &0x110000).is_err());
+    }
+    #[test]
+    fn test_unicodeset_all() {
+        let expected = vec![UNICODESET_MIN, UNICODESET_MAX + 1];
+        assert_eq!(UnicodeSet::all().set, expected);
+    }
+    #[test]
+    fn test_unicodeset_bmp() {
+        let expected = vec![UNICODESET_MIN, BMP_MAX + 1];
+        assert_eq!(UnicodeSet::bmp().set, expected);
+    }
+    #[test]
+    fn test_unicodeset_contains() {
+        let check = UnicodeSet::new("4 2 5 10 15").unwrap();
+        assert!(check.contains(&2));
+        assert!(check.contains(&4));
+        assert!(check.contains(&10));
+        assert!(check.contains(&14));
+    }
+    #[test]
+    fn test_unicodeset_contains_false() {
+        let check = UnicodeSet::new("4 2 5 10 15").unwrap();
+        assert!(!check.contains(&1));
+        assert!(!check.contains(&5));
+        assert!(!check.contains(&9));
+        assert!(!check.contains(&15));
+        assert!(!check.contains(&16));
+    }
 }
 // impl From<io:: // need to define an error

From dc41bdf13b613f1a1e64089ed5c37f777fa31714 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Tue, 23 Jun 2020 17:04:20 +0000
Subject: [PATCH 10/30] Complete contains test and docs

---
 components/char_collection/src/uniset.rs | 27 ++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/components/char_collection/src/uniset.rs b/components/char_collection/src/uniset.rs
index 217d6900daa..7e066829c21 100644
--- a/components/char_collection/src/uniset.rs
+++ b/components/char_collection/src/uniset.rs
@@ -16,6 +16,7 @@ use std::{
 const UNICODESET_MAX: u32 = 0x10FFFF; // does max imply inclusive? else should be 10FFFF
 const UNICODESET_MIN: u32 = 0x000000;
 const BMP_MAX: u32 = 0xFFFF;
+
 /// Given string representation of inversion list create set
 ///
 /// Requires starting capacity integer, followed by space delimited integer code points.
@@ -68,6 +69,15 @@ pub struct UnicodeSet {
 }
 
 impl UnicodeSet {
+    /// Returns Result of UnicodeSet from serialized string 
+    ///
+    /// Returns an error if the serialized string fails to parse. 
+    /// The serialized string requires starting capacity integer, followed by space delimited
+    /// integer code points. There must be an even number of elements (not including the 
+    /// capacity int), and must be in ascending sorted order.
+    ///
+    /// Example String: `"4 0 5 10 15"` designates a capacity of size `4`, followed by 2 ranges
+    /// The ranges are `{0, 4}` and `{10, 14}` inclusive
     pub fn new(serialize: &str) -> Result<UnicodeSet, Box<dyn Error>> {
         match parse_serial_string(serialize) {
             Ok(serialize) => Ok(UnicodeSet { set: serialize }),
@@ -75,6 +85,13 @@ impl UnicodeSet {
         }
     }
 
+    /// Returns Result of UnicodeSet from a single pair of integers defining a range
+    /// 
+    /// `start`: inclusive, `end`: exclusive
+    /// 
+    /// Returns an error if the range is invalid (out of order and out of bounds).
+    /// 
+    /// Example Call: `UnicodeSet::from_range(&0, &15)`
     pub fn from_range(start: &u32, end: &u32) -> Result<UnicodeSet, Box<dyn Error>> {
         if start > end {
             return Err("Range is out of order".into())
@@ -87,18 +104,28 @@ impl UnicodeSet {
         })
     }
 
+    /// Returns UnicodeSet spanning entire Unicode range 
+    /// 
+    /// The range spans from `0x0 -> 0x10FFFF` inclusive
     pub fn all() -> UnicodeSet {
         UnicodeSet {
             set: vec![UNICODESET_MIN, UNICODESET_MAX + 1],
         }
     }
 
+    /// Returns UnicodeSet spanning BMP range 
+    /// 
+    /// The range spans from `0x0 -> 0xFFFF` inclusive
     pub fn bmp() -> UnicodeSet {
         UnicodeSet {
             set: vec![UNICODESET_MIN, BMP_MAX + 1],
         }
     }
 
+    /// Checks to see the query is in the UnicodeSet
+    /// 
+    /// Runs a binary search in `O(log(n))` where `n` is the number of start and end points
+    /// on the set using `std::vec::Vec` implementation 
     pub fn contains(&self, query: &u32) -> bool {
         match self.set.binary_search(query) {
             Ok(pos) => {

From 0a55933cf3680c318533ffcf04de7ebb8beb26a7 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Tue, 23 Jun 2020 17:04:39 +0000
Subject: [PATCH 11/30] formatting

---
 components/char_collection/src/uniset.rs | 30 ++++++++++++------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/components/char_collection/src/uniset.rs b/components/char_collection/src/uniset.rs
index 7e066829c21..0c85d26daea 100644
--- a/components/char_collection/src/uniset.rs
+++ b/components/char_collection/src/uniset.rs
@@ -69,11 +69,11 @@ pub struct UnicodeSet {
 }
 
 impl UnicodeSet {
-    /// Returns Result of UnicodeSet from serialized string 
+    /// Returns Result of UnicodeSet from serialized string
     ///
-    /// Returns an error if the serialized string fails to parse. 
+    /// Returns an error if the serialized string fails to parse.
     /// The serialized string requires starting capacity integer, followed by space delimited
-    /// integer code points. There must be an even number of elements (not including the 
+    /// integer code points. There must be an even number of elements (not including the
     /// capacity int), and must be in ascending sorted order.
     ///
     /// Example String: `"4 0 5 10 15"` designates a capacity of size `4`, followed by 2 ranges
@@ -86,26 +86,26 @@ impl UnicodeSet {
     }
 
     /// Returns Result of UnicodeSet from a single pair of integers defining a range
-    /// 
+    ///
     /// `start`: inclusive, `end`: exclusive
-    /// 
+    ///
     /// Returns an error if the range is invalid (out of order and out of bounds).
-    /// 
+    ///
     /// Example Call: `UnicodeSet::from_range(&0, &15)`
     pub fn from_range(start: &u32, end: &u32) -> Result<UnicodeSet, Box<dyn Error>> {
         if start > end {
-            return Err("Range is out of order".into())
+            return Err("Range is out of order".into());
         }
         if start < &UNICODESET_MIN || end > &UNICODESET_MAX {
-            return Err("Range is out of bounds".into())
+            return Err("Range is out of bounds".into());
         }
         Ok(UnicodeSet {
             set: vec![*start, *end],
         })
     }
 
-    /// Returns UnicodeSet spanning entire Unicode range 
-    /// 
+    /// Returns UnicodeSet spanning entire Unicode range
+    ///
     /// The range spans from `0x0 -> 0x10FFFF` inclusive
     pub fn all() -> UnicodeSet {
         UnicodeSet {
@@ -113,8 +113,8 @@ impl UnicodeSet {
         }
     }
 
-    /// Returns UnicodeSet spanning BMP range 
-    /// 
+    /// Returns UnicodeSet spanning BMP range
+    ///
     /// The range spans from `0x0 -> 0xFFFF` inclusive
     pub fn bmp() -> UnicodeSet {
         UnicodeSet {
@@ -123,9 +123,9 @@ impl UnicodeSet {
     }
 
     /// Checks to see the query is in the UnicodeSet
-    /// 
+    ///
     /// Runs a binary search in `O(log(n))` where `n` is the number of start and end points
-    /// on the set using `std::vec::Vec` implementation 
+    /// on the set using `std::vec::Vec` implementation
     pub fn contains(&self, query: &u32) -> bool {
         match self.set.binary_search(query) {
             Ok(pos) => {
@@ -157,7 +157,7 @@ impl UnicodeSet {
 #[cfg(test)]
 mod tests {
     use {
-        super::{parse_serial_string, UnicodeSet, UNICODESET_MIN, UNICODESET_MAX, BMP_MAX},
+        super::{parse_serial_string, UnicodeSet, BMP_MAX, UNICODESET_MAX, UNICODESET_MIN},
         std::num::ParseIntError,
     };
     // parse_serial_string

From 4a04a96f3c4479593792c1e291f7346462ef41e1 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Tue, 23 Jun 2020 18:14:54 +0000
Subject: [PATCH 12/30] added is_empty() and size()

---
 components/char_collection/src/uniset.rs | 71 +++++++++++++++++-------
 1 file changed, 52 insertions(+), 19 deletions(-)

diff --git a/components/char_collection/src/uniset.rs b/components/char_collection/src/uniset.rs
index 0c85d26daea..d399e34f00e 100644
--- a/components/char_collection/src/uniset.rs
+++ b/components/char_collection/src/uniset.rs
@@ -1,17 +1,4 @@
-use std::{
-    boxed::Box,
-    char,
-    clone::Clone,
-    cmp::Ordering,
-    convert::From, // https://doc.rust-lang.org/std/convert/trait.From.html rust practice says do not use Into
-    error::Error,
-    hash::{Hash, Hasher},
-    iter::Iterator,
-    num::ParseIntError,
-    ops::Range,
-    str::Split,
-    vec::Vec,
-};
+use std::{boxed::Box, error::Error, iter::Iterator, slice::Iter, vec::Vec};
 
 const UNICODESET_MAX: u32 = 0x10FFFF; // does max imply inclusive? else should be 10FFFF
 const UNICODESET_MIN: u32 = 0x000000;
@@ -43,7 +30,7 @@ fn parse_serial_string(serialize_str: &str) -> Result<Vec<u32>, Box<dyn Error>>
         if serialized_vec.len() + 1 > serialized_vec.capacity() {
             return Err("Serialization capacity is too small".into());
         }
-        if parsed < prev {
+        if parsed <= prev {
             return Err("Serialization must be sorted".into());
         }
         serialized_vec.push(parsed);
@@ -121,6 +108,25 @@ impl UnicodeSet {
             set: vec![UNICODESET_MIN, BMP_MAX + 1],
         }
     }
+    /// Returns an `Iter` of start and stop `u32` points of the UnicodeSet
+    pub fn iter(&self) -> Iter<u32> {
+        self.set.iter()
+    }
+
+    /// Returns the cardinality of the UnicodeSet
+    pub fn size(&self) -> Result<usize, Box<dyn Error>> {
+        if self.set.len() < 2 {
+            return Err("UnicodeSet length < 2".into());
+        }
+        let end: u32 = self.iter().skip(1).step_by(2).sum::<u32>();
+        let start: u32 = self.iter().step_by(2).sum::<u32>();
+        Ok((end - start) as usize)
+    }
+
+    /// Returns whether or not the UnicodeSet is empty
+    pub fn is_empty(&self) -> bool {
+        self.set.len() < 2 // unsure if this is appropriate definition of just self.set.is_empty()
+    }
 
     /// Checks to see the query is in the UnicodeSet
     ///
@@ -156,10 +162,7 @@ impl UnicodeSet {
 
 #[cfg(test)]
 mod tests {
-    use {
-        super::{parse_serial_string, UnicodeSet, BMP_MAX, UNICODESET_MAX, UNICODESET_MIN},
-        std::num::ParseIntError,
-    };
+    use super::{parse_serial_string, UnicodeSet, BMP_MAX, UNICODESET_MAX, UNICODESET_MIN};
     // parse_serial_string
     #[test]
     fn test_parse_serial_string() {
@@ -180,6 +183,14 @@ mod tests {
         assert!(parse_serial_string("[4, 2, 3, 4, 5  ]").is_err());
     }
     #[test]
+    fn test_parse_serial_string_wrong_order() {
+        assert!(parse_serial_string("4 1 0 4 2").is_err());
+    }
+    #[test]
+    fn test_parse_serial_string_single_char_error() {
+        assert!(parse_serial_string("4 1 1 2 2").is_err());
+    }
+    #[test]
     fn test_parse_serial_string_capacity_not_even() {
         assert!(parse_serial_string("3 2 3 4").is_err());
     }
@@ -240,5 +251,27 @@ mod tests {
         assert!(!check.contains(&15));
         assert!(!check.contains(&16));
     }
+    #[test]
+    fn test_unicodeset_size() {
+        let check = UnicodeSet::new("4 2 5 10 15").unwrap();
+        assert_eq!(8, check.size().unwrap());
+        let check = UnicodeSet::all();
+        let expected = UNICODESET_MAX + 1 - UNICODESET_MIN;
+        assert_eq!(expected as usize, check.size().unwrap());
+    }
+    #[test]
+    fn test_unicodeset_size_error() {
+        let check = UnicodeSet { set: vec![0] };
+        assert!(check.size().is_err());
+    }
+    #[test]
+    fn test_unicodeset_is_empty() {
+        let check = UnicodeSet { set: vec![] };
+        assert!(check.is_empty());
+        let check = UnicodeSet { set: vec![0] };
+        assert!(check.is_empty());
+        let check = UnicodeSet::all();
+        assert!(!check.is_empty());
+    }
 }
 // impl From<io:: // need to define an error

From e8c2b1a1e8c32e776f86ee2dd1039d9e65facbd2 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Tue, 23 Jun 2020 18:24:24 +0000
Subject: [PATCH 13/30] proposed changes

---
 components/char_collection/Cargo.toml         |  1 +
 components/char_collection/src/conversions.rs | 48 ++-----------------
 components/char_collection/src/uniset.rs      |  1 -
 3 files changed, 4 insertions(+), 46 deletions(-)

diff --git a/components/char_collection/Cargo.toml b/components/char_collection/Cargo.toml
index 8ed0275f2a2..b649bc92db3 100644
--- a/components/char_collection/Cargo.toml
+++ b/components/char_collection/Cargo.toml
@@ -7,6 +7,7 @@ edition = "2018"
 readme = "README.md"
 repository = "https://github.com/unicode-org/icu4x"
 license = "MIT/Apache-2.0"
+license-file = "LICENSE"
 categories = ["internationalization"]
 include = [
     "src/**/*",
diff --git a/components/char_collection/src/conversions.rs b/components/char_collection/src/conversions.rs
index 02e3c223ace..7da45e5467d 100644
--- a/components/char_collection/src/conversions.rs
+++ b/components/char_collection/src/conversions.rs
@@ -2,14 +2,12 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 //! Conversion (`From`) implementations for [CharCollection], via [MultiCharRange].
+use crate::{CharCollection, CharRange, MultiCharRange};
 use std::boxed::Box;
 use std::convert::TryFrom;
 use std::iter;
 use std::ops::RangeInclusive;
-// use unic_char_range::CharRange;
-// use unic_ucd_block::Block;
-// use unicode_blocks::UnicodeBlockId;
-use crate::{CharCollection, CharRange, MultiCharRange};
+
 macro_rules! impl_for_range_inclusive_int_type {
     ($($t:ty),*) => {$(
         impl MultiCharRange for RangeInclusive<$t> {
@@ -55,22 +53,7 @@ impl MultiCharRange for RangeInclusive<char> {
     }
 }
 impl_for_range_inclusive_int_type!(u8, i8, u32, i32);
-// impl MultiCharRange for UnicodeBlockId {
-//     fn iter_ranges(&self) -> Box<dyn Iterator<Item = CharRange>> {
-//         self.block().iter_ranges()
-//     }
-//     fn range_count(&self) -> usize {
-//         1
-//     }
-// }
-// impl MultiCharRange for Block {
-//     fn iter_ranges<'a>(&'a self) -> Box<dyn Iterator<Item = CharRange> + 'a> {
-//         Box::new(self.range.iter_ranges())
-//     }
-//     fn range_count(&self) -> usize {
-//         1
-//     }
-// }
+
 impl<T: MultiCharRange> From<&T> for CharCollection {
     fn from(source: &T) -> Self {
         let mut collection = CharCollection::new();
@@ -82,7 +65,6 @@ impl<T: MultiCharRange> From<&T> for CharCollection {
 mod multi_char_range_tests {
     use crate::{CharRange, MultiCharRange};
     use paste;
-    // use unic_char_range::{chars, CharRange};
     #[test]
     fn test_char() {
         let source = 'a';
@@ -129,37 +111,13 @@ mod multi_char_range_tests {
     test_range_inclusive_int!(i8);
     test_range_inclusive_int!(u32);
     test_range_inclusive_int!(i32);
-    // #[test]
-    // fn test_unicode_block_id() {
-    //     let source = unicode_blocks::UnicodeBlockId::BasicLatin;
-    //     assert_eq!(
-    //         source.iter_ranges().collect::<Vec<CharRange>>(),
-    //         vec![chars!('\u{0000}'..='\u{007f}')]
-    //     );
-    //     assert_eq!(source.range_count(), 1);
-    // }
-    // #[test]
-    // fn test_unicode_block() {
-    //     let source = unicode_blocks::UnicodeBlockId::BasicLatin.block();
-    //     assert_eq!(
-    //         source.iter_ranges().collect::<Vec<CharRange>>(),
-    //         vec![chars!('\u{0000}'..='\u{007f}')]
-    //     );
-    //     assert_eq!(source.range_count(), 1);
-    // }
 }
 #[cfg(test)]
 mod from_tests {
     use crate::CharCollection;
-    // use unicode_blocks::UnicodeBlockId;
     #[test]
     fn test_char() {
         let actual: CharCollection = (&'a').into();
         assert_eq!(actual, char_collect!('a'..='a'));
     }
-    // #[test]
-    // fn test_unicode_block_id() {
-    //     let actual: CharCollection = (&UnicodeBlockId::BasicLatin).into();
-    //     assert_eq!(actual, char_collect!('\u{0000}'..='\u{007f}'));
-    // }
 }
diff --git a/components/char_collection/src/uniset.rs b/components/char_collection/src/uniset.rs
index d399e34f00e..5604c45043e 100644
--- a/components/char_collection/src/uniset.rs
+++ b/components/char_collection/src/uniset.rs
@@ -274,4 +274,3 @@ mod tests {
         assert!(!check.is_empty());
     }
 }
-// impl From<io:: // need to define an error

From 1471213e0e7f54953cddafd0964ddf210d705ab2 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Tue, 23 Jun 2020 20:13:03 +0000
Subject: [PATCH 14/30] Closure for contains and docs

---
 components/char_collection/Cargo.toml    |   3 +-
 components/char_collection/src/uniset.rs | 111 ++++++++++++++++-------
 2 files changed, 79 insertions(+), 35 deletions(-)

diff --git a/components/char_collection/Cargo.toml b/components/char_collection/Cargo.toml
index b649bc92db3..1dd07f3dc60 100644
--- a/components/char_collection/Cargo.toml
+++ b/components/char_collection/Cargo.toml
@@ -6,8 +6,7 @@ authors = ["The ICU4X Project Developers"]
 edition = "2018"
 readme = "README.md"
 repository = "https://github.com/unicode-org/icu4x"
-license = "MIT/Apache-2.0"
-license-file = "LICENSE"
+license-file = "../../LICENSE"
 categories = ["internationalization"]
 include = [
     "src/**/*",
diff --git a/components/char_collection/src/uniset.rs b/components/char_collection/src/uniset.rs
index 5604c45043e..950cb2c74cc 100644
--- a/components/char_collection/src/uniset.rs
+++ b/components/char_collection/src/uniset.rs
@@ -13,16 +13,13 @@ const BMP_MAX: u32 = 0xFFFF;
 /// Example String: `4 0 5 10 15` designates a capacity of size 4, followed by 2 ranges
 /// The ranges are {0, 4} and {10, 14} inclusive
 fn parse_serial_string(serialize_str: &str) -> Result<Vec<u32>, Box<dyn Error>> {
-    // wondering how much this method catches in tests
-    // let split_serialize: Split<&str> = serialize.split(" ");
-    // let capacity: u8 = split_serialize.next().unwrap().
     let mut serialize = serialize_str.split(" ");
     let capacity: usize = serialize.next().unwrap().parse()?;
     if capacity % 2 != 0 {
         return Err("Capacity must be even".into());
     }
     let mut serialized_vec: Vec<u32> = Vec::with_capacity(capacity);
-    let mut prev: u32 = 0;
+    let mut prev: Option<u32> = None;
     for str_ele in serialize {
         // unsure if the capacity matters if we can expand, but that might be an issue if you expand into too much memory
         // otherwise shrink_to_fit is possible
@@ -30,11 +27,11 @@ fn parse_serial_string(serialize_str: &str) -> Result<Vec<u32>, Box<dyn Error>>
         if serialized_vec.len() + 1 > serialized_vec.capacity() {
             return Err("Serialization capacity is too small".into());
         }
-        if parsed <= prev {
+        if Some(parsed) <= prev {
             return Err("Serialization must be sorted".into());
         }
         serialized_vec.push(parsed);
-        prev = parsed;
+        prev = Some(parsed);
     }
     if serialized_vec.len() % 2 != 0 {
         return Err("Serialization must be even".into());
@@ -128,36 +125,62 @@ impl UnicodeSet {
         self.set.len() < 2 // unsure if this is appropriate definition of just self.set.is_empty()
     }
 
-    /// Checks to see the query is in the UnicodeSet
-    ///
-    /// Runs a binary search in `O(log(n))` where `n` is the number of start and end points
-    /// on the set using `std::vec::Vec` implementation
-    pub fn contains(&self, query: &u32) -> bool {
+    /// Wrapper for contains conditions closures
+    fn contains<C>(&self, query: &u32, condition: C) -> bool
+    where
+        C: Fn(usize) -> bool,
+    {
         match self.set.binary_search(query) {
             Ok(pos) => {
                 if pos % 2 == 0 {
-                    true
+                    return condition(pos);
                 } else {
-                    if pos > 0 && &self.set[pos - 1] == query {
-                        true
-                    } else {
-                        false
-                    }
+                    false
                 }
             }
             Err(pos) => {
-                if pos % 2 == 0 {
-                    false
+                if pos % 2 != 0 && pos < self.set.len() {
+                    return condition(pos);
                 } else {
-                    if pos >= self.set.len() {
-                        false
-                    } else {
-                        true
-                    }
+                    false
                 }
             }
         }
     }
+
+    /// Checks to see the query is in the UnicodeSet
+    ///
+    /// Runs a binary search in `O(log(n))` where `n` is the number of start and end points
+    /// in the set using `std::vec::Vec` implementation
+    ///
+    /// Example: `contains_point(&10)`
+    pub fn contains_point(&self, query: &u32) -> bool {
+        let condition_closure = |_: usize| -> bool { true };
+        self.contains(query, condition_closure)
+    }
+
+    /// Checks to see if the range is in the UnicodeSet, returns a Result
+    ///
+    /// Runs a binary search in `O(log(n))` where `n` is the number of start and end points
+    /// in the set using `std::vec::Vec` implementation
+    ///
+    /// Only runs the search once on the `start` parameter, while the `end` parameter is checked
+    /// in a single `O(1)` step
+    ///
+    /// Example: `contains_range(&0, &10)`
+    pub fn contains_range(&self, start: &u32, end: &u32) -> Result<bool, Box<dyn Error>> {
+        if start >= end {
+            return Err("Range cannot be out of order".into());
+        }
+        let condition_closure = |pos: usize| -> bool {
+            if end < &self.set[pos + 1] {
+                true
+            } else {
+                false
+            }
+        };
+        return Ok(self.contains(start, condition_closure));
+    }
 }
 
 #[cfg(test)]
@@ -237,19 +260,41 @@ mod tests {
     #[test]
     fn test_unicodeset_contains() {
         let check = UnicodeSet::new("4 2 5 10 15").unwrap();
-        assert!(check.contains(&2));
-        assert!(check.contains(&4));
-        assert!(check.contains(&10));
-        assert!(check.contains(&14));
+        assert!(check.contains_point(&2));
+        assert!(check.contains_point(&4));
+        assert!(check.contains_point(&10));
+        assert!(check.contains_point(&14));
     }
     #[test]
     fn test_unicodeset_contains_false() {
         let check = UnicodeSet::new("4 2 5 10 15").unwrap();
-        assert!(!check.contains(&1));
-        assert!(!check.contains(&5));
-        assert!(!check.contains(&9));
-        assert!(!check.contains(&15));
-        assert!(!check.contains(&16));
+        assert!(!check.contains_point(&1));
+        assert!(!check.contains_point(&5));
+        assert!(!check.contains_point(&9));
+        assert!(!check.contains_point(&15));
+        assert!(!check.contains_point(&16));
+    }
+    #[test]
+    fn test_unicodeset_contains_range() {
+        let check = UnicodeSet::new("4 0 10 15 25").unwrap();
+        assert!(check.contains_range(&2, &5).unwrap());
+        assert!(check.contains_range(&0, &9).unwrap());
+        assert!(check.contains_range(&15, &24).unwrap());
+    }
+    #[test]
+    fn test_unicodeset_contains_range_false() {
+        let check = UnicodeSet::new("4 0 10 15 25").unwrap();
+        assert!(!check.contains_range(&0, &10).unwrap());
+        assert!(!check.contains_range(&15, &25).unwrap());
+        assert!(!check.contains_range(&0, &16).unwrap());
+        assert!(!check.contains_range(&10, &15).unwrap());
+        assert!(!check.contains_range(&11, &14).unwrap());
+    }
+    #[test]
+    fn test_unicodeset_contains_range_invalid() {
+        let check = UnicodeSet::all();
+        assert!(check.contains_range(&10, &0).is_err());
+        assert!(check.contains_range(&0, &0).is_err());
     }
     #[test]
     fn test_unicodeset_size() {

From 8e2a34aa189c57b1901da83aae0e9786698f9128 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Tue, 23 Jun 2020 22:57:41 +0000
Subject: [PATCH 15/30] Removed unnecessary files and formatting changes

---
 Cargo.toml                                    |   2 +-
 components/char_collection/Cargo.toml         |  18 -
 components/char_collection/README.md          |  12 -
 .../meta/char_collection_lib_test.cmx         |   5 -
 .../char_collection/src/char_collection.rs    | 828 ------------------
 components/char_collection/src/conversions.rs | 123 ---
 components/char_collection/src/lib.rs         |  16 -
 components/char_collection/src/macros.rs      |  40 -
 components/char_collection/src/operators.rs   |  62 --
 components/char_collection/src/uniset.rs      | 321 -------
 10 files changed, 1 insertion(+), 1426 deletions(-)
 delete mode 100644 components/char_collection/Cargo.toml
 delete mode 100644 components/char_collection/README.md
 delete mode 100644 components/char_collection/meta/char_collection_lib_test.cmx
 delete mode 100644 components/char_collection/src/char_collection.rs
 delete mode 100644 components/char_collection/src/conversions.rs
 delete mode 100644 components/char_collection/src/lib.rs
 delete mode 100644 components/char_collection/src/macros.rs
 delete mode 100644 components/char_collection/src/operators.rs
 delete mode 100644 components/char_collection/src/uniset.rs

diff --git a/Cargo.toml b/Cargo.toml
index 1752ce7e4bd..d679ba2c20e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,6 +3,6 @@
 members = [
     "components/icu",
     "components/icu4x",
-    "components/char_collection",
+    "components/uniset",
     "components/locale",
 ]
diff --git a/components/char_collection/Cargo.toml b/components/char_collection/Cargo.toml
deleted file mode 100644
index 1dd07f3dc60..00000000000
--- a/components/char_collection/Cargo.toml
+++ /dev/null
@@ -1,18 +0,0 @@
-[package]
-name = "icu-char-collection"
-description = "API for managing Unicode Language and Locale Identifiers"
-version = "0.0.1"
-authors = ["The ICU4X Project Developers"]
-edition = "2018"
-readme = "README.md"
-repository = "https://github.com/unicode-org/icu4x"
-license-file = "../../LICENSE"
-categories = ["internationalization"]
-include = [
-    "src/**/*",
-    "Cargo.toml",
-]
-
-[dependencies]
-unic-char-range = "0.9.0"
-paste = "0.1.16"
diff --git a/components/char_collection/README.md b/components/char_collection/README.md
deleted file mode 100644
index 9cb580caa61..00000000000
--- a/components/char_collection/README.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# ICU4X
-
-ICU4X is a set of internationalization components for Unicode.
-
-# Status [![crates.io](http://meritbadge.herokuapp.com/icu4x)](https://crates.io/crates/icu4x)
-
-The project is in an incubation period.
-
-# Authors
-
-The project is managed by a subcommittee of ICU-TC in the Unicode Consortium focused on providing solutions for client-side internationalization.
-
diff --git a/components/char_collection/meta/char_collection_lib_test.cmx b/components/char_collection/meta/char_collection_lib_test.cmx
deleted file mode 100644
index 3bb56a96aa2..00000000000
--- a/components/char_collection/meta/char_collection_lib_test.cmx
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-    "program": {
-        "binary": "test/char_collection_lib_test"
-    }
-}
\ No newline at end of file
diff --git a/components/char_collection/src/char_collection.rs b/components/char_collection/src/char_collection.rs
deleted file mode 100644
index 639e955e7c1..00000000000
--- a/components/char_collection/src/char_collection.rs
+++ /dev/null
@@ -1,828 +0,0 @@
-// Copyright 2019 The Fuchsia Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-use std::{
-    boxed::Box,
-    char,
-    clone::Clone,
-    cmp::Ordering,
-    convert::Into,
-    error::Error,
-    hash::{Hash, Hasher},
-    iter::Iterator,
-    ops::Range,
-    vec::Vec,
-};
-
-#[derive(Copy, Clone, Debug, Eq)]
-pub struct CharRange {
-    low: char,
-    high: char,
-}
-
-impl CharRange {
-    // open_right
-    // would we want this to return a Option next time?
-    pub fn open_right(low: char, high: char) -> CharRange {
-        // nothing happens if this fails
-        let high: char = char::from_u32(high as u32 - 1).unwrap();
-        CharRange { low, high }
-    }
-    // closed
-    pub fn closed(low: char, high: char) -> CharRange {
-        // if low == '\u{0}' { // need way to handle this
-        //     // for now just leave alone
-        // }
-        CharRange { low, high }
-    }
-    // open
-    pub fn open(low: char, high: char) -> CharRange {
-        // this is repeated here
-        let low: char = char::from_u32(low as u32 + 1).unwrap();
-        let high: char = char::from_u32(high as u32 - 1).unwrap();
-        CharRange { low, high }
-    }
-    // open_left
-    pub fn open_left(low: char, high: char) -> CharRange {
-        // this is repeated here
-        let low: char = char::from_u32(low as u32 + 1).unwrap();
-        CharRange { low, high }
-    }
-    // all
-    pub fn all() -> CharRange {
-        CharRange {
-            low: '\u{0}',
-            high: char::MAX,
-        }
-    }
-    // cmp_char
-    pub fn cmp_char(&self, comp_char: char) -> Ordering {
-        if self.high < comp_char {
-            Ordering::Less
-        } else if self.low > comp_char {
-            Ordering::Greater
-        } else {
-            Ordering::Equal
-        }
-    }
-    // contains
-    pub fn contains(&self, ch: char) -> bool {
-        self.low <= ch && ch <= self.high
-    }
-    // is_empty
-    pub fn is_empty(&self) -> bool {
-        self.low > self.high
-    }
-    pub fn iter(&self) -> CharIter {
-        (*self).into()
-    }
-}
-
-impl IntoIterator for CharRange {
-    type IntoIter = CharIter;
-    type Item = char;
-    fn into_iter(self) -> CharIter {
-        self.iter()
-    }
-}
-
-impl PartialEq<CharRange> for CharRange {
-    fn eq(&self, other: &CharRange) -> bool {
-        (self.is_empty() && other.is_empty()) || (self.low == other.low && self.high == other.high)
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct CharIter {
-    low: char,
-    high: char,
-}
-
-impl From<CharRange> for CharIter {
-    fn from(range: CharRange) -> CharIter {
-        CharIter {
-            low: range.low,
-            high: range.high,
-        }
-    }
-}
-
-impl From<CharIter> for CharRange {
-    fn from(iter: CharIter) -> CharRange {
-        CharRange {
-            low: iter.low,
-            high: iter.high,
-        }
-    }
-}
-
-impl CharIter {
-    fn advance(&mut self) {
-        if self.low == char::MAX {
-            self.high = '\0';
-        } else {
-            self.low = char::from_u32(self.low as u32 + 1).unwrap();
-        }
-    }
-    fn retreat(&mut self) {
-        if self.high == '\0' {
-            self.low = char::MAX;
-        } else {
-            self.high = char::from_u32(self.high as u32 - 1).unwrap();
-        }
-    }
-    fn next_back(&mut self) -> Option<char> {
-        if self.low > self.high {
-            None
-        } else {
-            let ch = self.high;
-            self.retreat();
-            Some(ch)
-        }
-    }
-}
-
-impl Iterator for CharIter {
-    type Item = char;
-    fn next(&mut self) -> Option<char> {
-        if self.low > self.high {
-            return None;
-        }
-        let ch = self.low;
-        self.advance();
-        Some(ch)
-    }
-}
-
-/// A trait for objects that represent one or more disjoint, non-adjacent
-/// [CharRanges](unic_char_range::CharRange).
-pub trait MultiCharRange {
-    /// Iterate over the disjoint, non-adjacent [CharRange]s in the collection in ascending order.
-    fn iter_ranges<'a>(&'a self) -> Box<dyn Iterator<Item = CharRange> + 'a>;
-    /// The number of ranges in the collection.
-    fn range_count(&self) -> usize;
-}
-/// A collection of `char`s (i.e. Unicode code points), used for storing large continuous ranges
-/// efficiently.
-///
-/// Lookups and insertions are O(log <var>R</var>), where <var>R</var> is the number of disjoint
-/// ranges in the collection.
-///
-/// The easiest way to create instances is using the
-/// [char_collect!](::char_collection::char_collect) macro.
-///
-///
-/// TODO(kpozin): Implement IntoIter.
-#[derive(Clone, Debug, Eq, PartialEq, Default)]
-pub struct CharCollection {
-    ranges: Vec<CharRange>,
-}
-impl CharCollection {
-    /// Create a new, empty `CharCollection`.
-    pub fn new() -> CharCollection {
-        CharCollection::default()
-    }
-    /// Create a new `CharCollection` from a list of disjoint, non-adjacent `CharRange`s, pre-sorted
-    /// in ascending code point order.
-    ///
-    /// This factory method is primarily intended for use in deserializing valid representations of
-    /// `CharCollections`. Will return an error if ranges are out of order, overlapping, or
-    /// adjacent.
-    pub fn from_sorted_ranges<T>(ranges: T) -> Result<CharCollection, Box<dyn Error>>
-    where
-        T: IntoIterator<Item = CharRange>,
-    {
-        // If the original `ranges` is also a Vec, this doesn't result in an extra copy.
-        let collection = CharCollection {
-            ranges: ranges.into_iter().collect(),
-        };
-        let ranges: &Vec<CharRange> = &collection.ranges;
-        match (1..ranges.len()).find(|i| (ranges[*i].low as i64 - ranges[*i - 1].high as i64) <= 1)
-        {
-            Some(i) => Err(format!(
-                "These ranges are out of order, overlapping, or adjacent: {:?}, {:?}",
-                format_range(&ranges[i - 1]),
-                format_range(&ranges[i])
-            )
-            .into()),
-            None => Ok(collection),
-        }
-    }
-    /// Create a new `CharCollection` from a list of `char`s, pre-sorted in ascending code point
-    /// order.
-    ///
-    /// This factory method is primarily intended for use in deserializing valid representations of
-    /// `CharCollections`. Will return an error if chars are out of order or contain duplicates.
-    pub fn from_sorted_chars<T>(chars: T) -> Result<CharCollection, Box<dyn Error>>
-    where
-        T: IntoIterator<Item = char>,
-    {
-        let mut collection = CharCollection::new();
-        for ch in chars.into_iter() {
-            collection.append(ch)?;
-        }
-        Ok(collection)
-    }
-    /// Iterate over all the `char`s in the collection.
-    pub fn iter(&self) -> impl Iterator<Item = char> + '_ {
-        self.ranges.iter().flat_map(CharRange::iter)
-    }
-    /// Test whether the collection contains a specific `char`.
-    ///
-    /// The time complexity is O(log <var>R</var>), where <var>R</var> is the number of ranges in
-    /// the collection.
-    pub fn contains(&self, ch: &char) -> bool {
-        self.find_containing_range(ch).is_ok()
-    }
-    /// Test whether the collection contains an entire range of characters.
-    ///
-    /// The time complexity is O(log <var>R</var>), where <var>R</var> is the number of ranges in
-    /// the collection.
-    pub fn contains_range(&self, range: &CharRange) -> bool {
-        if range.is_empty() {
-            return false;
-        }
-        let lower_existing_range = self.find_containing_range(&range.low);
-        let upper_existing_range = self.find_containing_range(&range.high);
-        // Fully enclosed in existing range.
-        return lower_existing_range == upper_existing_range && lower_existing_range.is_ok();
-    }
-    /// Insert a `char` or other collection of chars into this collection.
-    ///
-    /// Returns `&mut self` for easy chaining.
-    ///
-    /// The time complexity is O(<var>T</var> log(<var>R</var> + <var>T</var>)), where <var>R</var>
-    /// is the number of ranges in this collection and <var>T</var> is the number of ranges in
-    /// `to_add`.
-    pub fn insert<V: MultiCharRange>(&mut self, to_add: &V) -> &mut Self {
-        to_add
-            .iter_ranges()
-            .for_each(|range| self.insert_char_range(&range));
-        self
-    }
-    /// Appends a `char` to the end of the existing collection. Panics if the given `char` is not
-    /// higher than the highest code point in the existing collection.
-    ///
-    /// Returns `&mut self` for easy chaining.
-    ///
-    /// The time complexity is O(1).
-    pub fn append(&mut self, ch: char) -> Result<&mut Self, Box<dyn Error>> {
-        let mut coalesced = false;
-        if let Some(last_range) = self.ranges.last_mut() {
-            if last_range.cmp_char(ch) != Ordering::Less {
-                return Err(format!("Cannot append {:?} after {:?}", ch, last_range.high).into());
-            }
-            if are_chars_adjacent(&last_range.high, &ch) {
-                last_range.high = ch;
-                coalesced = true;
-            }
-        }
-        if !coalesced {
-            self.ranges.push(chars!(ch..=ch));
-        }
-        Ok(self)
-    }
-    /// Appends a `CharRange` to the end of the existing collection. Panics if the given range is
-    /// not higher than the highest code point in the existing collection. (The new range _may_ be
-    /// adjacent to the previous highest range, but may not overlap.)
-    ///
-    /// Returns `&mut self` for easy chaining.
-    ///
-    /// The time complexity is O(1).
-    pub fn append_range(&mut self, range: CharRange) -> Result<&mut Self, Box<dyn Error>> {
-        let mut coalesced = false;
-        if let Some(last_range) = self.ranges.last_mut() {
-            if last_range.cmp_char(range.low) != Ordering::Less {
-                return Err(format!(
-                    "Cannot append {:?} after {:?}",
-                    format_range(&range),
-                    last_range.high
-                )
-                .into());
-            }
-            if are_chars_adjacent(&last_range.high, &range.low) {
-                last_range.high = range.high;
-                coalesced = true;
-            }
-        }
-        if !coalesced {
-            self.ranges.push(range);
-        }
-        Ok(self)
-    }
-    /// Remove a `char` or other collection of chars from this collection.
-    ///
-    /// Returns `&mut self` for easy chaining.
-    ///
-    /// The time complexity is O(<var>T</var> log(<var>R</var> + <var>T</var>)), where <var>R</var>
-    /// is the number of ranges in this collection and <var>T</var> is the number of ranges in
-    /// `to_remove`.
-    pub fn remove<V: MultiCharRange>(&mut self, to_remove: &V) -> &mut Self {
-        to_remove
-            .iter_ranges()
-            .for_each(|range| self.remove_char_range(&range));
-        self
-    }
-    /// Remove all entries from this collection.
-    ///
-    /// Returns `&mut self` for easy chaining.
-    pub fn clear(&mut self) -> &mut Self {
-        self.ranges.clear();
-        self
-    }
-    /// Return the set union of this collection and another one.
-    ///
-    /// The time complexity is O(min(<var>R</var>, <var>T</var>) log(<var>R</var> + <var>T</var>)),
-    /// where <var>R</var> is the number of ranges in this collection and <var>T</var> is the number
-    /// of ranges in `rhs`.
-    pub fn union<V: MultiCharRange>(&self, rhs: &V) -> CharCollection {
-        let mut result: CharCollection;
-        if self.range_count() > rhs.range_count() {
-            result = self.clone();
-            result.insert(rhs);
-        } else {
-            result = rhs.into();
-            result.insert(self);
-        }
-        result
-    }
-    /// Return the set intersection of this collection and another one.
-    ///
-    /// The time complexity is O(min(<var>R</var>, <var>T</var>) log(<var>R</var> + <var>T</var>)),
-    /// where <var>R</var> is the number of ranges in this collection and <var>T</var> is the number
-    /// of ranges in `rhs`.
-    pub fn intersection<V: MultiCharRange>(&self, rhs: &V) -> CharCollection {
-        let mut result: CharCollection;
-        if self.range_count() > rhs.range_count() {
-            result = self.clone();
-            let rhs: CharCollection = rhs.into();
-            result.remove(&rhs.complement());
-        } else {
-            result = rhs.into();
-            result.remove(&self.complement());
-        }
-        result
-    }
-    /// Return the (non-symmetric) set difference of this collection and another one.
-    ///
-    /// The time complexity is O(<var>T</var> log(<var>R</var> + <var>T</var>)), where <var>R</var>
-    /// is the number of ranges in this collection and <var>T</var> is the number of ranges in
-    /// `rhs`.
-    pub fn difference<V: MultiCharRange>(&self, rhs: &V) -> CharCollection {
-        let mut result: CharCollection = self.clone();
-        result.remove(rhs);
-        result
-    }
-    /// Return the set complement of this collection (over the universe of `char`s).
-    ///
-    /// The time complexity is O(<var>R</var>), where <var>R</var> is the number of ranges in this
-    /// collection.
-    pub fn complement(&self) -> CharCollection {
-        if self.ranges.is_empty() {
-            return CharCollection::from(&CharRange::all());
-        }
-        let mut result_ranges: Vec<CharRange> = Vec::new();
-        if self.ranges[0].low != '\u{0}' {
-            result_ranges.push(CharRange::open_right('\u{0}', self.ranges[0].low));
-        }
-        let mut prev_high = self.ranges[0].high;
-        for range in &self.ranges[1..] {
-            result_ranges.push(CharRange::open(prev_high, range.low));
-            prev_high = range.high;
-        }
-        if prev_high != std::char::MAX {
-            result_ranges.push(CharRange::open_left(prev_high, std::char::MAX));
-        }
-        CharCollection {
-            ranges: result_ranges,
-        }
-    }
-    /// Insert a single `CharRange`.
-    ///
-    /// Depending on how the new range relates to existing ranges in
-    /// the collection, it might be subsumed by an existing range, modify the endpoints of an
-    /// existing range, or replace one or more existing ranges.
-    fn insert_char_range(&mut self, new_range: &CharRange) {
-        if new_range.is_empty() {
-            return;
-        }
-        let lower_existing_range = self.find_containing_range(&new_range.low);
-        let upper_existing_range = self.find_containing_range(&new_range.high);
-        // Fully enclosed in existing range.
-        if lower_existing_range == upper_existing_range && lower_existing_range.is_ok() {
-            return;
-        }
-        let new_low: char;
-        let new_high: char;
-        let remove_from_idx: usize;
-        let remove_to_idx: usize;
-        match lower_existing_range {
-            Ok((idx, lower_existing_range)) => {
-                new_low = lower_existing_range.low;
-                remove_from_idx = idx;
-            }
-            Err(idx) => {
-                new_low = new_range.low;
-                remove_from_idx = idx;
-            }
-        }
-        match upper_existing_range {
-            Ok((idx, higher_existing_range)) => {
-                new_high = higher_existing_range.high;
-                remove_to_idx = idx + 1;
-            }
-            Err(idx) => {
-                new_high = new_range.high;
-                remove_to_idx = idx;
-            }
-        }
-        self.replace_ranges(chars!(new_low..=new_high), remove_from_idx..remove_to_idx);
-    }
-    /// Remove a single `CharRange`.
-    ///
-    /// Depending on how the removed range relates to existing ranges in the collection, it might
-    /// remove or modify the endpoints of existing ranges.
-    fn remove_char_range(&mut self, range_to_remove: &CharRange) {
-        if range_to_remove.is_empty() {
-            return;
-        }
-        let lower_existing_range = self.find_containing_range(&range_to_remove.low);
-        let upper_existing_range = self.find_containing_range(&range_to_remove.high);
-        let mut replacement_ranges: Vec<CharRange> = Vec::new();
-        let remove_from_idx: usize;
-        let remove_to_idx: usize;
-        match lower_existing_range {
-            Ok((idx, lower_existing_range)) => {
-                if lower_existing_range.low < range_to_remove.low {
-                    replacement_ranges.push(CharRange::open_right(
-                        lower_existing_range.low,
-                        range_to_remove.low,
-                    ));
-                }
-                remove_from_idx = idx;
-            }
-            Err(idx) => remove_from_idx = idx,
-        }
-        match upper_existing_range {
-            Ok((idx, higher_existing_range)) => {
-                if range_to_remove.high < higher_existing_range.high {
-                    replacement_ranges.push(CharRange::open_left(
-                        range_to_remove.high,
-                        higher_existing_range.high,
-                    ));
-                }
-                remove_to_idx = idx + 1;
-            }
-            Err(idx) => {
-                remove_to_idx = idx;
-            }
-        }
-        self.ranges
-            .splice(remove_from_idx..remove_to_idx, replacement_ranges);
-    }
-    /// Delete all the existing `CharRange`s that fall within `indices_to_replace` in the vector,
-    /// and insert `char_range_to_insert` in their place. If the newly formed range is adjacent to
-    /// a kept range on its left or right, coalesce them.
-    fn replace_ranges(
-        &mut self,
-        mut char_range_to_insert: CharRange,
-        mut indices_to_replace: Range<usize>,
-    ) {
-        // If the newly formed range is adjacent to the range on its left, coalesce the two.
-        if indices_to_replace.start > 0 {
-            let prev_char_range = self.ranges[indices_to_replace.start - 1];
-            if are_chars_adjacent(&prev_char_range.high, &char_range_to_insert.low) {
-                char_range_to_insert.low = prev_char_range.low;
-                indices_to_replace.start -= 1;
-            }
-        }
-        // If the newly formed range is adjacent to the range on its right, coalesce the two.
-        if indices_to_replace.end < self.ranges.len() {
-            let next_char_range = self.ranges[indices_to_replace.end];
-            if are_chars_adjacent(&char_range_to_insert.high, &next_char_range.low) {
-                char_range_to_insert.high = next_char_range.high;
-                indices_to_replace.end += 1;
-            }
-        }
-        self.ranges
-            .splice(indices_to_replace, vec![char_range_to_insert]);
-    }
-    fn find_containing_range(&self, query: &char) -> Result<(usize, CharRange), usize> {
-        let result = self
-            .ranges
-            .binary_search_by(|range| range.cmp_char(query.clone()));
-        match result {
-            Ok(index) => Ok((index, self.ranges[index])),
-            Err(index) => Err(index),
-        }
-    }
-}
-impl MultiCharRange for CharCollection {
-    fn iter_ranges<'a>(&'a self) -> Box<dyn Iterator<Item = CharRange> + 'a> {
-        Box::new(self.ranges.iter().map(|range| range.clone()))
-    }
-    fn range_count(&self) -> usize {
-        self.ranges.len()
-    }
-}
-impl Hash for CharCollection {
-    fn hash<H: Hasher>(&self, state: &mut H) {
-        self.ranges
-            .iter()
-            .for_each(|range| hash_char_range(range, state));
-    }
-}
-fn hash_char_range<H: Hasher>(range: &CharRange, state: &mut H) {
-    range.low.hash(state);
-    range.high.hash(state);
-}
-fn are_chars_adjacent(left: &char, right: &char) -> bool {
-    let mut iter: CharIter = CharRange::open_right(left.clone(), right.clone()).iter();
-    match iter.next_back() {
-        None => false,
-        Some(next_right) => left == &next_right,
-    }
-}
-fn format_range(range: &CharRange) -> String {
-    format!("{}..={}", range.low, range.high)
-}
-#[cfg(test)]
-mod tests {
-    use {
-        super::{are_chars_adjacent, CharCollection, CharRange},
-        std::char,
-        // unic_char_range::{chars, CharRange},
-        std::error::Error,
-    };
-    #[test]
-    fn test_from_sorted_ranges() -> Result<(), Box<dyn Error>> {
-        let expected = char_collect!('a'..='d', 'g'..='l', 'z');
-        let actual = CharCollection::from_sorted_ranges(vec![
-            chars!('a'..='d'),
-            chars!('g'..='l'),
-            chars!('z'..='z'),
-        ])?;
-        assert_eq!(actual, expected);
-        Ok(())
-    }
-    #[test]
-    fn test_from_sorted_ranges_out_of_order() {
-        assert!(CharCollection::from_sorted_ranges(vec![
-            chars!('g'..='l'),
-            chars!('a'..='d'),
-            chars!('z'..='z'),
-        ])
-        .is_err());
-    }
-    #[test]
-    fn test_from_sorted_ranges_overlap() {
-        assert!(CharCollection::from_sorted_ranges(vec![
-            chars!('a'..='d'),
-            chars!('c'..='l'),
-            chars!('z'..='z'),
-        ])
-        .is_err());
-    }
-    #[test]
-    fn test_from_sorted_ranges_adjacent() {
-        assert!(
-            CharCollection::from_sorted_ranges(vec![chars!('a'..='d'), chars!('e'..='g')]).is_err()
-        );
-    }
-    #[test]
-    fn test_from_sorted_chars() -> Result<(), Box<dyn Error>> {
-        let chars = vec!['a', 'b', 'c', 'd', 'g', 'h', 'i', 'j', 'k', 'l', 'z'];
-        let expected = char_collect!('a'..='d', 'g'..='l', 'z');
-        let actual = CharCollection::from_sorted_chars(chars)?;
-        assert_eq!(actual, expected);
-        Ok(())
-    }
-    #[test]
-    fn test_from_sorted_chars_out_of_order() {
-        let chars = vec!['a', 'b', 'c', 'd', 'g', 'h', 'i', 'j', 'k', 'l', 'e'];
-        assert!(CharCollection::from_sorted_chars(chars).is_err());
-    }
-    #[test]
-    fn test_find_containing_range() {
-        let collection = char_collect!({ ('a'..='d') + ('g'..='j') + ('l'..='o') + 'z' });
-        assert_eq!(collection.find_containing_range(&'0'), Err(0));
-        assert_eq!(
-            collection.find_containing_range(&'c'),
-            Ok((0, chars!('a'..='d')))
-        );
-        assert_eq!(collection.find_containing_range(&'e'), Err(1));
-    }
-    #[test]
-    fn test_insert_initial() {
-        let collection = char_collect!('a'..='d');
-        assert_eq!(collection.ranges, vec![chars!('a'..='d')])
-    }
-    #[test]
-    fn test_insert_exact_match() {
-        let mut collection = char_collect!('a'..='d', 'g'..='l');
-        collection += 'a'..='d';
-        assert_eq!(
-            collection.ranges,
-            vec![chars!('a'..='d'), chars!('g'..='l')]
-        );
-    }
-    #[test]
-    fn test_insert_non_overlapping_sorted() {
-        let collection = char_collect!('a'..='d', 'g'..='j', 'l'..='o');
-        assert_eq!(
-            collection.ranges,
-            vec![chars!('a'..='d'), chars!('g'..='j'), chars!('l'..='o')]
-        );
-    }
-    #[test]
-    fn test_insert_non_overlapping_unsorted() {
-        let collection = char_collect!('l'..='o', 'a'..='d', 'l'..='o', 'a'..='d', 'g'..='j');
-        assert_eq!(
-            collection.ranges,
-            vec![chars!('a'..='d'), chars!('g'..='j'), chars!('l'..='o')]
-        );
-    }
-    #[test]
-    fn test_insert_overlapping_all_existent() {
-        let mut collection = char_collect!('l'..='o', 'a'..='d');
-        collection += 'a'..='o';
-        assert_eq!(collection.ranges, vec![chars!('a'..='o')]);
-    }
-    #[test]
-    fn test_insert_overlapping_some_existent() {
-        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
-        collection += 'i'..='n';
-        assert_eq!(
-            collection.ranges,
-            vec![chars!('c'..='e'), chars!('i'..='n'), chars!('p'..='s')]
-        );
-    }
-    #[test]
-    fn test_insert_overlapping_with_intersections() {
-        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
-        collection += 'd'..='k';
-        assert_eq!(
-            collection.ranges,
-            vec![chars!('c'..='m'), chars!('p'..='s')]
-        );
-    }
-    #[test]
-    fn test_insert_coalesce_adjacent_ranges() {
-        let mut collection = char_collect!('a'..='c', 'j'..='m');
-        collection += 'd'..='i';
-        assert_eq!(collection.ranges, vec![chars!('a'..='m')]);
-    }
-    #[test]
-    fn test_append() -> Result<(), Box<dyn Error>> {
-        let mut collection = char_collect!('a'..='c');
-        collection
-            .append('d')?
-            .append('g')?
-            .append('h')?
-            .append('i')?
-            .append('z')?;
-        assert_eq!(collection, char_collect!('a'..='d', 'g'..='i', 'z'));
-        Ok(())
-    }
-    #[test]
-    fn test_append_out_of_order() -> Result<(), Box<dyn Error>> {
-        let mut collection = char_collect!('a'..='c');
-        assert!(collection
-            .append('d')?
-            .append('g')?
-            .append('h')?
-            .append('i')?
-            .append('e')
-            .is_err());
-        Ok(())
-    }
-    #[test]
-    fn test_append_range() -> Result<(), Box<dyn Error>> {
-        let mut collection = char_collect!('a'..='c');
-        collection
-            .append_range(chars!('g'..='i'))?
-            .append_range(chars!('j'..='m'))?;
-        assert_eq!(collection, char_collect!('a'..='c', 'g'..='m'));
-        Ok(())
-    }
-    #[test]
-    fn test_append_range_out_of_order() -> Result<(), Box<dyn Error>> {
-        let mut collection = char_collect!('a'..='c');
-        assert!(collection
-            .append_range(chars!('g'..='i'))?
-            .append_range(chars!('j'..='m'))?
-            .append_range(chars!('k'..='m'))
-            .is_err());
-        Ok(())
-    }
-    #[test]
-    fn test_remove_exact_range() {
-        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
-        collection -= 'j'..='m';
-        assert_eq!(
-            collection.ranges,
-            vec![chars!('c'..='e'), chars!['p'..='s']]
-        );
-    }
-    #[test]
-    fn test_remove_overlapping_all_existent() {
-        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
-        collection -= 'c'..='s';
-        assert_eq!(collection.ranges, vec![]);
-    }
-    #[test]
-    fn test_remove_overlapping_all_existent_superset() {
-        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
-        collection -= 'a'..='z';
-        assert_eq!(collection.ranges, vec![]);
-    }
-    #[test]
-    fn test_remove_one_subrange() {
-        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
-        collection -= 'k'..='l';
-        assert_eq!(
-            collection.ranges,
-            vec![
-                chars!('c'..='e'),
-                chars!('j'..='j'),
-                chars!('m'..='m'),
-                chars!('p'..='s')
-            ]
-        );
-    }
-    #[test]
-    fn test_remove_intersection() {
-        let mut collection = char_collect!('c'..='e', 'j'..='m', 'p'..='s');
-        collection -= 'd'..='q';
-        assert_eq!(
-            collection.ranges,
-            vec![chars!('c'..='c'), chars!('r'..='s')]
-        );
-    }
-    #[test]
-    fn test_complement_simple() {
-        let collection = char_collect!(0x10..=0x50, 0x70..=0x70, 0x99..=0x640);
-        assert_eq!(
-            collection.complement(),
-            char_collect!(
-                0x00..=0x0F,
-                0x51..=0x6F,
-                0x71..=0x98,
-                0x641..=(char::MAX as u32)
-            )
-        );
-    }
-    #[test]
-    fn test_complement_all() {
-        let collection = char_collect!(CharRange::all());
-        assert_eq!(collection.complement(), char_collect!());
-    }
-    #[test]
-    fn test_complement_none() {
-        let collection = char_collect!();
-        assert_eq!(collection.complement(), char_collect!(CharRange::all()));
-    }
-    #[test]
-    fn test_complement_includes_min_and_max() {
-        let collection = char_collect!(0x0..=0x10, 0x40..=0x50, 0xCCCC..=(char::MAX as u32));
-        assert_eq!(
-            collection.complement(),
-            char_collect!(0x11..=0x3F, 0x51..=0xCCCB)
-        );
-    }
-    #[test]
-    fn test_union() {
-        let collection_a = char_collect!('a'..='g', 'm'..='z', 'B'..='R');
-        let collection_b = char_collect!('e'..='q', 'W'..='Y');
-        let expected = char_collect!('a'..='z', 'B'..='R', 'W'..='Y');
-        assert_eq!(collection_a.union(&collection_b), expected);
-        assert_eq!(collection_b.union(&collection_a), expected);
-    }
-    #[test]
-    fn test_intersection() {
-        let collection_a = char_collect!('a'..='g', 'm'..='z');
-        let collection_b = char_collect!('e'..='q');
-        let expected = char_collect!('e'..='g', 'm'..='q');
-        assert_eq!(collection_a.intersection(&collection_b), expected);
-        assert_eq!(collection_b.intersection(&collection_a), expected);
-    }
-    // #[test]
-    // fn test_macro_expressions() {
-    //     use unicode_blocks::UnicodeBlockId::Arabic;
-    //     let collection =
-    //         char_collect!({ ('c'..='e') + ('f'..='h') - ('a'..='d') + Arabic + (0x5..=0x42) });
-    //     assert_eq!(collection, char_collect!(0x5..=0x42, 'e'..='h', Arabic));
-    // }
-    #[test]
-    fn test_iter() {
-        let collection = char_collect!('a'..='c', 'j'..='l', 'x'..='z');
-        let v = collection.iter().collect::<Vec<char>>();
-        assert_eq!(v, vec!['a', 'b', 'c', 'j', 'k', 'l', 'x', 'y', 'z']);
-    }
-    #[test]
-    fn test_are_chars_adjacent() {
-        assert!(are_chars_adjacent(&'a', &'b'));
-        assert!(!are_chars_adjacent(&'b', &'a'));
-        assert!(!are_chars_adjacent(&'a', &'c'));
-    }
-}
diff --git a/components/char_collection/src/conversions.rs b/components/char_collection/src/conversions.rs
deleted file mode 100644
index 7da45e5467d..00000000000
--- a/components/char_collection/src/conversions.rs
+++ /dev/null
@@ -1,123 +0,0 @@
-// Copyright 2019 The Fuchsia Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-//! Conversion (`From`) implementations for [CharCollection], via [MultiCharRange].
-use crate::{CharCollection, CharRange, MultiCharRange};
-use std::boxed::Box;
-use std::convert::TryFrom;
-use std::iter;
-use std::ops::RangeInclusive;
-
-macro_rules! impl_for_range_inclusive_int_type {
-    ($($t:ty),*) => {$(
-        impl MultiCharRange for RangeInclusive<$t> {
-            fn iter_ranges(&self) -> Box<dyn Iterator<Item=CharRange>> {
-                Box::new(iter::once(to_char_range!(self)))
-            }
-            fn range_count(&self) -> usize {
-                1
-            }
-    })*}
-}
-// This macro is needed because there is no way to express "can be cast as u32" using traits.
-macro_rules! to_char_range {
-    ($range:expr) => {
-        CharRange::closed(
-            char::try_from(*$range.start() as u32).unwrap(),
-            char::try_from(*$range.end() as u32).unwrap(),
-        )
-    };
-}
-impl MultiCharRange for char {
-    fn iter_ranges(&self) -> Box<dyn Iterator<Item = CharRange>> {
-        Box::new(std::iter::once(CharRange::closed(*self, *self)))
-    }
-    fn range_count(&self) -> usize {
-        1
-    }
-}
-impl MultiCharRange for CharRange {
-    fn iter_ranges(&self) -> Box<dyn Iterator<Item = CharRange>> {
-        Box::new(iter::once(self.clone()))
-    }
-    fn range_count(&self) -> usize {
-        1
-    }
-}
-impl MultiCharRange for RangeInclusive<char> {
-    fn iter_ranges(&self) -> Box<dyn Iterator<Item = CharRange>> {
-        Box::new(iter::once(CharRange::closed(*self.start(), *self.end())))
-    }
-    fn range_count(&self) -> usize {
-        1
-    }
-}
-impl_for_range_inclusive_int_type!(u8, i8, u32, i32);
-
-impl<T: MultiCharRange> From<&T> for CharCollection {
-    fn from(source: &T) -> Self {
-        let mut collection = CharCollection::new();
-        collection.insert(source);
-        collection
-    }
-}
-#[cfg(test)]
-mod multi_char_range_tests {
-    use crate::{CharRange, MultiCharRange};
-    use paste;
-    #[test]
-    fn test_char() {
-        let source = 'a';
-        assert_eq!(
-            source.iter_ranges().collect::<Vec<CharRange>>(),
-            vec![chars!('a'..='a')]
-        );
-        assert_eq!(source.range_count(), 1);
-    }
-    #[test]
-    fn test_char_range() {
-        let source = chars!('d'..='g');
-        assert_eq!(
-            source.iter_ranges().collect::<Vec<CharRange>>(),
-            vec![chars!('d'..='g')]
-        );
-        assert_eq!(source.range_count(), 1);
-    }
-    #[test]
-    fn test_range_inclusive_char() {
-        let source = 'd'..='g';
-        assert_eq!(
-            source.iter_ranges().collect::<Vec<CharRange>>(),
-            vec![chars!('d'..='g')]
-        );
-        assert_eq!(source.range_count(), 1);
-    }
-    macro_rules! test_range_inclusive_int {
-        ($t:ty) => {
-            paste::item! {
-                #[test]
-                fn [<test_char_range_inclusive_ $t>]() {
-                    let source: std::ops::RangeInclusive<$t> = 0x0..=0x9;
-                        assert_eq!(
-                            source.iter_ranges().collect::<Vec<CharRange>>(),
-                            vec![chars!('\u{0}'..='\u{9}')]
-                    );
-                    assert_eq!(source.range_count(), 1);
-                }
-            }
-        };
-    }
-    test_range_inclusive_int!(u8);
-    test_range_inclusive_int!(i8);
-    test_range_inclusive_int!(u32);
-    test_range_inclusive_int!(i32);
-}
-#[cfg(test)]
-mod from_tests {
-    use crate::CharCollection;
-    #[test]
-    fn test_char() {
-        let actual: CharCollection = (&'a').into();
-        assert_eq!(actual, char_collect!('a'..='a'));
-    }
-}
diff --git a/components/char_collection/src/lib.rs b/components/char_collection/src/lib.rs
deleted file mode 100644
index 54eacbe847e..00000000000
--- a/components/char_collection/src/lib.rs
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2019 The Fuchsia Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-#[macro_use]
-mod macros;
-mod char_collection;
-mod conversions;
-mod operators;
-mod uniset;
-pub use char_collection::CharCollection;
-pub use char_collection::CharIter;
-pub use char_collection::CharRange;
-pub use char_collection::MultiCharRange;
-pub use conversions::*;
-pub use operators::*;
-pub use uniset::UnicodeSet;
diff --git a/components/char_collection/src/macros.rs b/components/char_collection/src/macros.rs
deleted file mode 100644
index 49569781f50..00000000000
--- a/components/char_collection/src/macros.rs
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright 2019 The Fuchsia Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-/// Generate a [CharCollection] from a sequence of `char`s,
-/// [CharRanges](unic_char_range::CharRange), or Unicode [Blocks](unic_ucd_block::Block).
-///
-/// The macro can be used with either a comma-separated list of items, or with an expression
-/// representing set operations.
-///
-#[macro_export]
-macro_rules! char_collect {
-    ({ $($x:tt)+ }) => {
-        {
-            $crate::CharCollection::new() + $($x)*
-        }
-    };
-    ( $( $x:expr ),* ) => {
-        {
-            // Allow unused mut in case the collection is empty.
-            #[allow(unused_mut)]
-            let mut col = $crate::CharCollection::new();
-            $(
-                col.insert(& $x);
-            )*
-            col
-        }
-    };
-}
-#[macro_export]
-macro_rules! chars {
-    ($low:tt .. $high:tt) => {
-        $crate::CharRange::open_right($low, $high)
-    };
-    ($low:tt ..= $high:tt) => {
-        $crate::CharRange::closed($low, $high)
-    };
-    (..) => {
-        $crate::CharRange::all()
-    };
-}
diff --git a/components/char_collection/src/operators.rs b/components/char_collection/src/operators.rs
deleted file mode 100644
index cec8cad4e3d..00000000000
--- a/components/char_collection/src/operators.rs
+++ /dev/null
@@ -1,62 +0,0 @@
-// Copyright 2019 The Fuchsia Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-//! Implementations of standard operators for [CharCollection].
-//!
-//! `+` and `|` are equivalent. `+` is easier to use with `-`, as they have the same operator
-//! precedence.
-use crate::{CharCollection, MultiCharRange};
-use std::convert::Into;
-use std::ops;
-impl<V: MultiCharRange> ops::BitOr<V> for CharCollection {
-    type Output = CharCollection;
-    fn bitor(self, rhs: V) -> Self::Output {
-        let result: CharCollection = self.into();
-        result.union(&rhs)
-    }
-}
-impl<V: MultiCharRange> ops::Add<V> for CharCollection {
-    type Output = CharCollection;
-    fn add(self, rhs: V) -> Self::Output {
-        let result: CharCollection = self.into();
-        result.union(&rhs)
-    }
-}
-impl<V: MultiCharRange> ops::BitOrAssign<V> for CharCollection {
-    fn bitor_assign(&mut self, rhs: V) {
-        self.insert(&rhs);
-    }
-}
-impl<V: MultiCharRange> ops::AddAssign<V> for CharCollection {
-    fn add_assign(&mut self, rhs: V) {
-        self.insert(&rhs);
-    }
-}
-impl<V: MultiCharRange> ops::Sub<V> for CharCollection {
-    type Output = CharCollection;
-    fn sub(self, rhs: V) -> Self::Output {
-        self.difference(&rhs)
-    }
-}
-impl<V: MultiCharRange> ops::SubAssign<V> for CharCollection {
-    fn sub_assign(&mut self, rhs: V) {
-        self.remove(&rhs);
-    }
-}
-impl<V: MultiCharRange> ops::BitAnd<V> for CharCollection {
-    type Output = CharCollection;
-    fn bitand(self, rhs: V) -> Self::Output {
-        self.intersection(&rhs)
-    }
-}
-impl<V: MultiCharRange> ops::BitAndAssign<V> for CharCollection {
-    fn bitand_assign(&mut self, rhs: V) {
-        *self = self.intersection(&rhs);
-    }
-}
-impl ops::Not for CharCollection {
-    type Output = CharCollection;
-    fn not(self) -> Self::Output {
-        self.complement()
-    }
-}
diff --git a/components/char_collection/src/uniset.rs b/components/char_collection/src/uniset.rs
deleted file mode 100644
index 950cb2c74cc..00000000000
--- a/components/char_collection/src/uniset.rs
+++ /dev/null
@@ -1,321 +0,0 @@
-use std::{boxed::Box, error::Error, iter::Iterator, slice::Iter, vec::Vec};
-
-const UNICODESET_MAX: u32 = 0x10FFFF; // does max imply inclusive? else should be 10FFFF
-const UNICODESET_MIN: u32 = 0x000000;
-const BMP_MAX: u32 = 0xFFFF;
-
-/// Given string representation of inversion list create set
-///
-/// Requires starting capacity integer, followed by space delimited integer code points.
-/// There must be an even number of elements (not including the capacity int), and must be
-/// in ascending sorted order.
-///
-/// Example String: `4 0 5 10 15` designates a capacity of size 4, followed by 2 ranges
-/// The ranges are {0, 4} and {10, 14} inclusive
-fn parse_serial_string(serialize_str: &str) -> Result<Vec<u32>, Box<dyn Error>> {
-    let mut serialize = serialize_str.split(" ");
-    let capacity: usize = serialize.next().unwrap().parse()?;
-    if capacity % 2 != 0 {
-        return Err("Capacity must be even".into());
-    }
-    let mut serialized_vec: Vec<u32> = Vec::with_capacity(capacity);
-    let mut prev: Option<u32> = None;
-    for str_ele in serialize {
-        // unsure if the capacity matters if we can expand, but that might be an issue if you expand into too much memory
-        // otherwise shrink_to_fit is possible
-        let parsed: u32 = str_ele.parse()?;
-        if serialized_vec.len() + 1 > serialized_vec.capacity() {
-            return Err("Serialization capacity is too small".into());
-        }
-        if Some(parsed) <= prev {
-            return Err("Serialization must be sorted".into());
-        }
-        serialized_vec.push(parsed);
-        prev = Some(parsed);
-    }
-    if serialized_vec.len() % 2 != 0 {
-        return Err("Serialization must be even".into());
-    }
-    serialized_vec.shrink_to_fit(); // necessary if the length < capacity
-    Ok(serialized_vec)
-}
-
-/// UnicodeSet membership wrapper
-///
-/// Provides exposure to membership functions and constructors from serialized UnicodeSets
-/// and predefined ranges.
-//#[derive(Copy, Clone, Debug, Eq)]
-pub struct UnicodeSet {
-    // If we wanted to use an array to keep the memory on the stack, there is an unsafe nightly feature
-    // https://doc.rust-lang.org/nightly/core/array/trait.FixedSizeArray.html
-    // Allows for traits of fixed size arrays
-    set: Vec<u32>, // is set misleading? could be uset
-}
-
-impl UnicodeSet {
-    /// Returns Result of UnicodeSet from serialized string
-    ///
-    /// Returns an error if the serialized string fails to parse.
-    /// The serialized string requires starting capacity integer, followed by space delimited
-    /// integer code points. There must be an even number of elements (not including the
-    /// capacity int), and must be in ascending sorted order.
-    ///
-    /// Example String: `"4 0 5 10 15"` designates a capacity of size `4`, followed by 2 ranges
-    /// The ranges are `{0, 4}` and `{10, 14}` inclusive
-    pub fn new(serialize: &str) -> Result<UnicodeSet, Box<dyn Error>> {
-        match parse_serial_string(serialize) {
-            Ok(serialize) => Ok(UnicodeSet { set: serialize }),
-            Err(e) => Err(e),
-        }
-    }
-
-    /// Returns Result of UnicodeSet from a single pair of integers defining a range
-    ///
-    /// `start`: inclusive, `end`: exclusive
-    ///
-    /// Returns an error if the range is invalid (out of order and out of bounds).
-    ///
-    /// Example Call: `UnicodeSet::from_range(&0, &15)`
-    pub fn from_range(start: &u32, end: &u32) -> Result<UnicodeSet, Box<dyn Error>> {
-        if start > end {
-            return Err("Range is out of order".into());
-        }
-        if start < &UNICODESET_MIN || end > &UNICODESET_MAX {
-            return Err("Range is out of bounds".into());
-        }
-        Ok(UnicodeSet {
-            set: vec![*start, *end],
-        })
-    }
-
-    /// Returns UnicodeSet spanning entire Unicode range
-    ///
-    /// The range spans from `0x0 -> 0x10FFFF` inclusive
-    pub fn all() -> UnicodeSet {
-        UnicodeSet {
-            set: vec![UNICODESET_MIN, UNICODESET_MAX + 1],
-        }
-    }
-
-    /// Returns UnicodeSet spanning BMP range
-    ///
-    /// The range spans from `0x0 -> 0xFFFF` inclusive
-    pub fn bmp() -> UnicodeSet {
-        UnicodeSet {
-            set: vec![UNICODESET_MIN, BMP_MAX + 1],
-        }
-    }
-    /// Returns an `Iter` of start and stop `u32` points of the UnicodeSet
-    pub fn iter(&self) -> Iter<u32> {
-        self.set.iter()
-    }
-
-    /// Returns the cardinality of the UnicodeSet
-    pub fn size(&self) -> Result<usize, Box<dyn Error>> {
-        if self.set.len() < 2 {
-            return Err("UnicodeSet length < 2".into());
-        }
-        let end: u32 = self.iter().skip(1).step_by(2).sum::<u32>();
-        let start: u32 = self.iter().step_by(2).sum::<u32>();
-        Ok((end - start) as usize)
-    }
-
-    /// Returns whether or not the UnicodeSet is empty
-    pub fn is_empty(&self) -> bool {
-        self.set.len() < 2 // unsure if this is appropriate definition of just self.set.is_empty()
-    }
-
-    /// Wrapper for contains conditions closures
-    fn contains<C>(&self, query: &u32, condition: C) -> bool
-    where
-        C: Fn(usize) -> bool,
-    {
-        match self.set.binary_search(query) {
-            Ok(pos) => {
-                if pos % 2 == 0 {
-                    return condition(pos);
-                } else {
-                    false
-                }
-            }
-            Err(pos) => {
-                if pos % 2 != 0 && pos < self.set.len() {
-                    return condition(pos);
-                } else {
-                    false
-                }
-            }
-        }
-    }
-
-    /// Checks to see the query is in the UnicodeSet
-    ///
-    /// Runs a binary search in `O(log(n))` where `n` is the number of start and end points
-    /// in the set using `std::vec::Vec` implementation
-    ///
-    /// Example: `contains_point(&10)`
-    pub fn contains_point(&self, query: &u32) -> bool {
-        let condition_closure = |_: usize| -> bool { true };
-        self.contains(query, condition_closure)
-    }
-
-    /// Checks to see if the range is in the UnicodeSet, returns a Result
-    ///
-    /// Runs a binary search in `O(log(n))` where `n` is the number of start and end points
-    /// in the set using `std::vec::Vec` implementation
-    ///
-    /// Only runs the search once on the `start` parameter, while the `end` parameter is checked
-    /// in a single `O(1)` step
-    ///
-    /// Example: `contains_range(&0, &10)`
-    pub fn contains_range(&self, start: &u32, end: &u32) -> Result<bool, Box<dyn Error>> {
-        if start >= end {
-            return Err("Range cannot be out of order".into());
-        }
-        let condition_closure = |pos: usize| -> bool {
-            if end < &self.set[pos + 1] {
-                true
-            } else {
-                false
-            }
-        };
-        return Ok(self.contains(start, condition_closure));
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::{parse_serial_string, UnicodeSet, BMP_MAX, UNICODESET_MAX, UNICODESET_MIN};
-    // parse_serial_string
-    #[test]
-    fn test_parse_serial_string() {
-        let expected = vec![2, 3, 4, 5];
-        let actual = parse_serial_string("4 2 3 4 5").unwrap();
-        assert_eq!(actual, expected);
-    }
-    #[test]
-    fn test_parse_serial_string_no_char() {
-        assert!(parse_serial_string("4 2 A 3 4 5").is_err());
-    }
-    #[test]
-    fn test_parse_serial_string_empty() {
-        assert!(parse_serial_string("").is_err());
-    }
-    #[test]
-    fn test_parse_serial_string_wrong_format() {
-        assert!(parse_serial_string("[4, 2, 3, 4, 5  ]").is_err());
-    }
-    #[test]
-    fn test_parse_serial_string_wrong_order() {
-        assert!(parse_serial_string("4 1 0 4 2").is_err());
-    }
-    #[test]
-    fn test_parse_serial_string_single_char_error() {
-        assert!(parse_serial_string("4 1 1 2 2").is_err());
-    }
-    #[test]
-    fn test_parse_serial_string_capacity_not_even() {
-        assert!(parse_serial_string("3 2 3 4").is_err());
-    }
-    #[test]
-    fn test_parse_serial_string_size_not_even() {
-        assert!(parse_serial_string("4 3 2 1").is_err());
-    }
-
-    // UnicodeSet constructors
-    #[test]
-    fn test_unicodeset_new() {
-        let expected = vec![2, 3, 4, 5];
-        let actual = UnicodeSet::new("4 2 3 4 5").unwrap().set;
-        assert_eq!(actual, expected);
-    }
-    #[test]
-    fn test_unicodeset_new_error() {
-        assert!(UnicodeSet::new("3 2 4 3").is_err());
-    }
-    #[test]
-    fn test_unicodeset_from_range() {
-        let expected = vec![4, 10];
-        let actual = UnicodeSet::from_range(&4, &10).unwrap().set;
-        assert_eq!(actual, expected);
-    }
-    #[test]
-    fn test_unicodeset_from_range_bad_order() {
-        assert!(UnicodeSet::from_range(&10, &5).is_err());
-    }
-    #[test]
-    fn test_unicodeset_from_range_out_of_bounds() {
-        assert!(UnicodeSet::from_range(&0, &0x110000).is_err());
-    }
-    #[test]
-    fn test_unicodeset_all() {
-        let expected = vec![UNICODESET_MIN, UNICODESET_MAX + 1];
-        assert_eq!(UnicodeSet::all().set, expected);
-    }
-    #[test]
-    fn test_unicodeset_bmp() {
-        let expected = vec![UNICODESET_MIN, BMP_MAX + 1];
-        assert_eq!(UnicodeSet::bmp().set, expected);
-    }
-    #[test]
-    fn test_unicodeset_contains() {
-        let check = UnicodeSet::new("4 2 5 10 15").unwrap();
-        assert!(check.contains_point(&2));
-        assert!(check.contains_point(&4));
-        assert!(check.contains_point(&10));
-        assert!(check.contains_point(&14));
-    }
-    #[test]
-    fn test_unicodeset_contains_false() {
-        let check = UnicodeSet::new("4 2 5 10 15").unwrap();
-        assert!(!check.contains_point(&1));
-        assert!(!check.contains_point(&5));
-        assert!(!check.contains_point(&9));
-        assert!(!check.contains_point(&15));
-        assert!(!check.contains_point(&16));
-    }
-    #[test]
-    fn test_unicodeset_contains_range() {
-        let check = UnicodeSet::new("4 0 10 15 25").unwrap();
-        assert!(check.contains_range(&2, &5).unwrap());
-        assert!(check.contains_range(&0, &9).unwrap());
-        assert!(check.contains_range(&15, &24).unwrap());
-    }
-    #[test]
-    fn test_unicodeset_contains_range_false() {
-        let check = UnicodeSet::new("4 0 10 15 25").unwrap();
-        assert!(!check.contains_range(&0, &10).unwrap());
-        assert!(!check.contains_range(&15, &25).unwrap());
-        assert!(!check.contains_range(&0, &16).unwrap());
-        assert!(!check.contains_range(&10, &15).unwrap());
-        assert!(!check.contains_range(&11, &14).unwrap());
-    }
-    #[test]
-    fn test_unicodeset_contains_range_invalid() {
-        let check = UnicodeSet::all();
-        assert!(check.contains_range(&10, &0).is_err());
-        assert!(check.contains_range(&0, &0).is_err());
-    }
-    #[test]
-    fn test_unicodeset_size() {
-        let check = UnicodeSet::new("4 2 5 10 15").unwrap();
-        assert_eq!(8, check.size().unwrap());
-        let check = UnicodeSet::all();
-        let expected = UNICODESET_MAX + 1 - UNICODESET_MIN;
-        assert_eq!(expected as usize, check.size().unwrap());
-    }
-    #[test]
-    fn test_unicodeset_size_error() {
-        let check = UnicodeSet { set: vec![0] };
-        assert!(check.size().is_err());
-    }
-    #[test]
-    fn test_unicodeset_is_empty() {
-        let check = UnicodeSet { set: vec![] };
-        assert!(check.is_empty());
-        let check = UnicodeSet { set: vec![0] };
-        assert!(check.is_empty());
-        let check = UnicodeSet::all();
-        assert!(!check.is_empty());
-    }
-}

From cc781c81e876cae03b957e042cef150d2262cf29 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Tue, 23 Jun 2020 22:59:31 +0000
Subject: [PATCH 16/30] Update to repo

---
 components/uniset/Cargo.toml                  |  14 +
 components/uniset/README.md                   |  12 +
 .../uniset/meta/char_collection_lib_test.cmx  |   5 +
 components/uniset/src/lib.rs                  |   6 +
 components/uniset/src/uniset.rs               | 328 ++++++++++++++++++
 5 files changed, 365 insertions(+)
 create mode 100644 components/uniset/Cargo.toml
 create mode 100644 components/uniset/README.md
 create mode 100644 components/uniset/meta/char_collection_lib_test.cmx
 create mode 100644 components/uniset/src/lib.rs
 create mode 100644 components/uniset/src/uniset.rs

diff --git a/components/uniset/Cargo.toml b/components/uniset/Cargo.toml
new file mode 100644
index 00000000000..b7e4103b289
--- /dev/null
+++ b/components/uniset/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "icu4x-unicodeset"
+description = "API for managing Unicode Language and Locale Identifiers"
+version = "0.0.1"
+authors = ["The ICU4X Project Developers"]
+edition = "2018"
+readme = "README.md"
+repository = "https://github.com/unicode-org/icu4x"
+license-file = "../../LICENSE"
+categories = ["internationalization"]
+include = [
+    "src/**/*",
+    "Cargo.toml",
+]
diff --git a/components/uniset/README.md b/components/uniset/README.md
new file mode 100644
index 00000000000..9cb580caa61
--- /dev/null
+++ b/components/uniset/README.md
@@ -0,0 +1,12 @@
+# ICU4X
+
+ICU4X is a set of internationalization components for Unicode.
+
+# Status [![crates.io](http://meritbadge.herokuapp.com/icu4x)](https://crates.io/crates/icu4x)
+
+The project is in an incubation period.
+
+# Authors
+
+The project is managed by a subcommittee of ICU-TC in the Unicode Consortium focused on providing solutions for client-side internationalization.
+
diff --git a/components/uniset/meta/char_collection_lib_test.cmx b/components/uniset/meta/char_collection_lib_test.cmx
new file mode 100644
index 00000000000..3bb56a96aa2
--- /dev/null
+++ b/components/uniset/meta/char_collection_lib_test.cmx
@@ -0,0 +1,5 @@
+{
+    "program": {
+        "binary": "test/char_collection_lib_test"
+    }
+}
\ No newline at end of file
diff --git a/components/uniset/src/lib.rs b/components/uniset/src/lib.rs
new file mode 100644
index 00000000000..c9e5f2c2d87
--- /dev/null
+++ b/components/uniset/src/lib.rs
@@ -0,0 +1,6 @@
+// Copyright 2019 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+#[macro_use]
+mod uniset;
+pub use uniset::UnicodeSet;
diff --git a/components/uniset/src/uniset.rs b/components/uniset/src/uniset.rs
new file mode 100644
index 00000000000..f265fb3cf41
--- /dev/null
+++ b/components/uniset/src/uniset.rs
@@ -0,0 +1,328 @@
+use std::{boxed::Box, error::Error, iter::Iterator, slice::Iter, vec::Vec};
+
+const CODEPOINT_MAX: u32 = 0x10FFFF; // does max imply inclusive? else should be 10FFFF
+const CODEPOINT_MIN: u32 = 0x000000;
+const BMP_MAX: u32 = 0xFFFF;
+
+/// Given string representation of inversion list create set
+///
+/// See UnicodeSet::new for conditions
+fn parse_serial_string(serialize_str: &str) -> Result<Vec<u32>, Box<dyn Error>> {
+    let mut serialize = serialize_str.split(" ");
+    let capacity: usize = serialize
+        .next()
+        .ok_or("Splitting did not yield anything")?
+        .parse()?;
+    if capacity % 2 != 0 {
+        return Err("Capacity must be even".into());
+    }
+    let mut serialized_vec: Vec<u32> = Vec::with_capacity(capacity);
+    let mut prev: Option<u32> = None;
+    for str_elem in serialize {
+        let parsed: u32 = str_elem.parse()?;
+        if serialized_vec.len() + 1 > serialized_vec.capacity() {
+            return Err("Serialization capacity is too small".into());
+        }
+        if Some(parsed) <= prev {
+            return Err("Serialization must be sorted".into());
+        }
+        serialized_vec.push(parsed);
+        prev = Some(parsed);
+    }
+    if serialized_vec.len() % 2 != 0 {
+        return Err("Serialization must be even".into());
+    }
+    serialized_vec.shrink_to_fit(); // necessary if the length < capacity
+    Ok(serialized_vec)
+}
+
+/// UnicodeSet membership wrapper
+///
+/// Provides exposure to membership functions and constructors from serialized UnicodeSets
+/// and predefined ranges.
+//#[derive(Copy, Clone, Debug, Eq)]
+pub struct UnicodeSet {
+    // If we wanted to use an array to keep the memory on the stack, there is an unsafe nightly feature
+    // https://doc.rust-lang.org/nightly/core/array/trait.FixedSizeArray.html
+    // Allows for traits of fixed size arrays
+    inv_list: Vec<u32>,
+}
+
+impl UnicodeSet {
+    /// Returns Result of UnicodeSet from serialized string
+    ///
+    /// Returns an error if the serialized string fails to parse.
+    /// The serialized string requires starting capacity integer, followed by space delimited
+    /// integer code points. There must be an even number of elements (not including the
+    /// capacity int), and must be in ascending sorted order.
+    ///
+    /// Example String: `"4 0 5 10 15"` designates a capacity of size `4`, followed by 2 ranges
+    /// The ranges are `{0, 4}` and `{10, 14}` inclusive
+    pub fn new(serialize: &str) -> Result<UnicodeSet, Box<dyn Error>> {
+        match parse_serial_string(serialize) {
+            Ok(serialize) => Ok(UnicodeSet {
+                inv_list: serialize,
+            }),
+            Err(e) => Err(e),
+        }
+    }
+
+    /// Returns Result of UnicodeSet from a single pair of integers defining a range
+    ///
+    /// `start`: inclusive, `end`: exclusive
+    ///
+    /// Returns an error if the range is invalid (out of order and out of bounds).
+    ///
+    /// Example Call: `UnicodeSet::from_range(&0, &15)`
+    pub fn from_range(start: &u32, end: &u32) -> Result<UnicodeSet, Box<dyn Error>> {
+        if start > end {
+            return Err("Range is out of order".into());
+        }
+        if start < &CODEPOINT_MIN || end > &CODEPOINT_MAX {
+            return Err("Range is out of bounds".into());
+        }
+        Ok(UnicodeSet {
+            inv_list: vec![*start, *end],
+        })
+    }
+
+    /// Returns UnicodeSet spanning entire Unicode range
+    ///
+    /// The range spans from `0x0 -> 0x10FFFF` inclusive
+    pub fn all() -> UnicodeSet {
+        UnicodeSet {
+            inv_list: vec![CODEPOINT_MIN, CODEPOINT_MAX + 1],
+        }
+    }
+
+    /// Returns UnicodeSet spanning BMP range
+    ///
+    /// The range spans from `0x0 -> 0xFFFF` inclusive
+    pub fn bmp() -> UnicodeSet {
+        UnicodeSet {
+            inv_list: vec![CODEPOINT_MIN, BMP_MAX + 1],
+        }
+    }
+
+    /// Returns an `Iter` of start and stop `u32` points of the UnicodeSet
+    pub fn iter(&self) -> Iter<u32> {
+        self.inv_list.iter()
+    }
+
+    /// Returns the cardinality of the UnicodeSet
+    ///
+    ///
+    pub fn size(&self) -> Result<usize, Box<dyn Error>> {
+        if self.inv_list.len() < 2 {
+            return Err("UnicodeSet length < 2".into());
+        }
+        let end_point_sum: u32 = self.iter().skip(1).step_by(2).sum::<u32>();
+        let start_point_sum: u32 = self.iter().step_by(2).sum::<u32>();
+        Ok((end_point_sum - start_point_sum) as usize)
+    }
+
+    /// Returns whether or not the UnicodeSet is empty
+    pub fn is_empty(&self) -> bool {
+        self.inv_list.len() < 2 // unsure if this is appropriate definition of just self.inv_list.is_empty()
+    }
+
+    /// Wrapper for contains conditions closures
+    fn contains<C>(&self, query: &u32, condition: C) -> bool
+    where
+        C: Fn(usize) -> bool,
+    {
+        match self.inv_list.binary_search(query) {
+            Ok(pos) => {
+                if pos % 2 == 0 {
+                    condition(pos)
+                } else {
+                    false
+                }
+            }
+            Err(pos) => {
+                if pos % 2 != 0 && pos < self.inv_list.len() {
+                    condition(pos)
+                } else {
+                    false
+                }
+            }
+        }
+    }
+
+    /// Checks to see the query is in the UnicodeSet
+    ///
+    /// Runs a binary search in `O(log(n))` where `n` is the number of start and end points
+    /// in the set using `std::vec::Vec` implementation
+    ///
+    /// Example: `contains_point(&10)`
+    pub fn contains_point(&self, query: &u32) -> bool {
+        let condition_closure = |_: usize| -> bool { true };
+        self.contains(query, condition_closure)
+    }
+
+    /// Checks to see if the range is in the UnicodeSet, returns a Result
+    ///
+    /// Runs a binary search in `O(log(n))` where `n` is the number of start and end points
+    /// in the set using `std::vec::Vec` implementation
+    ///
+    /// Only runs the search once on the `start` parameter, while the `end` parameter is checked
+    /// in a single `O(1)` step
+    ///
+    /// Example: `contains_range(&0, &10)`
+    pub fn contains_range(&self, start: &u32, end: &u32) -> Result<bool, Box<dyn Error>> {
+        if start >= end {
+            return Err("Range cannot be out of order".into());
+        }
+        let condition_closure = |pos: usize| -> bool {
+            if end < &self.inv_list[pos + 1] {
+                true
+            } else {
+                false
+            }
+        };
+        return Ok(self.contains(start, condition_closure));
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{parse_serial_string, UnicodeSet, BMP_MAX, CODEPOINT_MAX, CODEPOINT_MIN};
+
+    // parse_serial_string
+    #[test]
+    fn test_parse_serial_string() {
+        let expected = vec![2, 3, 4, 5];
+        let actual = parse_serial_string("4 2 3 4 5").unwrap();
+        assert_eq!(actual, expected);
+    }
+    #[test]
+    fn test_parse_serial_string_no_char() {
+        assert!(parse_serial_string("4 2 A 3 4 5").is_err());
+    }
+    #[test]
+    fn test_parse_serial_string_empty() {
+        assert!(parse_serial_string("").is_err());
+    }
+    #[test]
+    fn test_parse_serial_string_wrong_format() {
+        assert!(parse_serial_string("[4, 2, 3, 4, 5  ]").is_err());
+    }
+    #[test]
+    fn test_parse_serial_string_wrong_order() {
+        assert!(parse_serial_string("4 1 0 4 2").is_err());
+    }
+    #[test]
+    fn test_parse_serial_string_single_char_error() {
+        assert!(parse_serial_string("4 1 1 2 2").is_err());
+    }
+    #[test]
+    fn test_parse_serial_string_capacity_not_even() {
+        assert!(parse_serial_string("3 2 3 4").is_err());
+    }
+    #[test]
+    fn test_parse_serial_string_size_not_even() {
+        assert!(parse_serial_string("4 3 2 1").is_err());
+    }
+
+    // UnicodeSet constructors
+    #[test]
+    fn test_unicodeset_new() {
+        let expected = vec![2, 3, 4, 5];
+        let actual = UnicodeSet::new("4 2 3 4 5").unwrap().inv_list;
+        assert_eq!(actual, expected);
+    }
+    #[test]
+    fn test_unicodeset_new_error() {
+        assert!(UnicodeSet::new("3 2 4 3").is_err());
+    }
+    #[test]
+    fn test_unicodeset_from_range() {
+        let expected = vec![4, 10];
+        let actual = UnicodeSet::from_range(&4, &10).unwrap().inv_list;
+        assert_eq!(actual, expected);
+    }
+    #[test]
+    fn test_unicodeset_from_range_bad_order() {
+        assert!(UnicodeSet::from_range(&10, &5).is_err());
+    }
+    #[test]
+    fn test_unicodeset_from_range_out_of_bounds() {
+        assert!(UnicodeSet::from_range(&0, &0x110000).is_err());
+    }
+    #[test]
+    fn test_unicodeset_all() {
+        let expected = vec![CODEPOINT_MIN, CODEPOINT_MAX + 1];
+        assert_eq!(UnicodeSet::all().inv_list, expected);
+    }
+    #[test]
+    fn test_unicodeset_bmp() {
+        let expected = vec![CODEPOINT_MIN, BMP_MAX + 1];
+        assert_eq!(UnicodeSet::bmp().inv_list, expected);
+    }
+
+    // UnicodeSet membership functions
+    #[test]
+    fn test_unicodeset_contains() {
+        let check = UnicodeSet::new("4 2 5 10 15").unwrap();
+        assert!(check.contains_point(&2));
+        assert!(check.contains_point(&4));
+        assert!(check.contains_point(&10));
+        assert!(check.contains_point(&14));
+    }
+    #[test]
+    fn test_unicodeset_contains_false() {
+        let check = UnicodeSet::new("4 2 5 10 15").unwrap();
+        assert!(!check.contains_point(&1));
+        assert!(!check.contains_point(&5));
+        assert!(!check.contains_point(&9));
+        assert!(!check.contains_point(&15));
+        assert!(!check.contains_point(&16));
+    }
+    #[test]
+    fn test_unicodeset_contains_range() {
+        let check = UnicodeSet::new("4 0 10 15 25").unwrap();
+        assert!(check.contains_range(&2, &5).unwrap());
+        assert!(check.contains_range(&0, &9).unwrap());
+        assert!(check.contains_range(&15, &24).unwrap());
+    }
+    #[test]
+    fn test_unicodeset_contains_range_false() {
+        let check = UnicodeSet::new("4 0 10 15 25").unwrap();
+        assert!(!check.contains_range(&0, &10).unwrap());
+        assert!(!check.contains_range(&15, &25).unwrap());
+        assert!(!check.contains_range(&0, &16).unwrap());
+        assert!(!check.contains_range(&10, &15).unwrap());
+        assert!(!check.contains_range(&11, &14).unwrap());
+    }
+    #[test]
+    fn test_unicodeset_contains_range_invalid() {
+        let check = UnicodeSet::all();
+        assert!(check.contains_range(&10, &0).is_err());
+        assert!(check.contains_range(&0, &0).is_err());
+    }
+    #[test]
+    fn test_unicodeset_size() {
+        let check = UnicodeSet::new("4 2 5 10 15").unwrap();
+        assert_eq!(8, check.size().unwrap());
+        let check = UnicodeSet::all();
+        let expected = CODEPOINT_MAX + 1 - CODEPOINT_MIN;
+        assert_eq!(expected as usize, check.size().unwrap());
+    }
+    #[test]
+    fn test_unicodeset_size_error() {
+        let check = UnicodeSet { inv_list: vec![0] };
+        assert!(check.size().is_err());
+    }
+    #[test]
+    fn test_unicodeset_is_empty() {
+        let check = UnicodeSet { inv_list: vec![] };
+        assert!(check.is_empty());
+        let check = UnicodeSet { inv_list: vec![0] };
+        assert!(check.is_empty());
+    }
+    #[test]
+    fn test_unicodeset_is_not_empty() {
+        let check = UnicodeSet::all();
+        assert!(!check.is_empty());
+    }
+}

From cfd9edfc4fb614b529dceea7953b7b2be2ef6870 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Wed, 24 Jun 2020 22:37:18 +0000
Subject: [PATCH 17/30] formatting and cleaning up changes

---
 components/uniset/src/uniset.rs | 160 ++++++++++++++++++++------------
 1 file changed, 99 insertions(+), 61 deletions(-)

diff --git a/components/uniset/src/uniset.rs b/components/uniset/src/uniset.rs
index f265fb3cf41..a8185957a62 100644
--- a/components/uniset/src/uniset.rs
+++ b/components/uniset/src/uniset.rs
@@ -1,7 +1,10 @@
 use std::{boxed::Box, error::Error, iter::Iterator, slice::Iter, vec::Vec};
 
-const CODEPOINT_MAX: u32 = 0x10FFFF; // does max imply inclusive? else should be 10FFFF
+/// Represents the maximum Unicode Code Point, inclusive
+const CODEPOINT_MAX: u32 = 0x10FFFF;
+/// Represents the minimum UNicode Code Point, inclusive
 const CODEPOINT_MIN: u32 = 0x000000;
+/// Represents the end code point of the Basic Multilingual Plane range, starting from code point 0 , inclusive
 const BMP_MAX: u32 = 0xFFFF;
 
 /// Given string representation of inversion list create set
@@ -11,26 +14,43 @@ fn parse_serial_string(serialize_str: &str) -> Result<Vec<u32>, Box<dyn Error>>
     let mut serialize = serialize_str.split(" ");
     let capacity: usize = serialize
         .next()
-        .ok_or("Splitting did not yield anything")?
+        .ok_or(format!(
+            "Splitting by spaces did not yield a capacity: {:?}",
+            serialize
+        ))?
         .parse()?;
     if capacity % 2 != 0 {
-        return Err("Capacity must be even".into());
+        return Err(format!("Capacity must be even. Parsed Capacity: {}", capacity).into());
     }
     let mut serialized_vec: Vec<u32> = Vec::with_capacity(capacity);
     let mut prev: Option<u32> = None;
     for str_elem in serialize {
         let parsed: u32 = str_elem.parse()?;
         if serialized_vec.len() + 1 > serialized_vec.capacity() {
-            return Err("Serialization capacity is too small".into());
+            return Err(format!(
+                "Serialization capacity is too small. Allocated Capacity: {}",
+                capacity
+            )
+            .into());
         }
         if Some(parsed) <= prev {
-            return Err("Serialization must be sorted".into());
+            return Err(format!(
+                "Serialization must be sorted. {:?} followed by {:?}",
+                Some(parsed),
+                prev
+            )
+            .into());
         }
         serialized_vec.push(parsed);
         prev = Some(parsed);
     }
     if serialized_vec.len() % 2 != 0 {
-        return Err("Serialization must be even".into());
+        return Err(format!(
+            "Serialization must be even. Serialization: {:?} Length: {}",
+            serialized_vec,
+            serialized_vec.len()
+        )
+        .into());
     }
     serialized_vec.shrink_to_fit(); // necessary if the length < capacity
     Ok(serialized_vec)
@@ -40,6 +60,7 @@ fn parse_serial_string(serialize_str: &str) -> Result<Vec<u32>, Box<dyn Error>>
 ///
 /// Provides exposure to membership functions and constructors from serialized UnicodeSets
 /// and predefined ranges.
+/// Implements an inversion list.
 //#[derive(Copy, Clone, Debug, Eq)]
 pub struct UnicodeSet {
     // If we wanted to use an array to keep the memory on the stack, there is an unsafe nightly feature
@@ -74,15 +95,19 @@ impl UnicodeSet {
     /// Returns an error if the range is invalid (out of order and out of bounds).
     ///
     /// Example Call: `UnicodeSet::from_range(&0, &15)`
-    pub fn from_range(start: &u32, end: &u32) -> Result<UnicodeSet, Box<dyn Error>> {
+    pub fn from_range(start: u32, end: u32) -> Result<UnicodeSet, Box<dyn Error>> {
         if start > end {
-            return Err("Range is out of order".into());
+            return Err(format!("Range is out of order. start: {} end: {}", start, end).into());
         }
-        if start < &CODEPOINT_MIN || end > &CODEPOINT_MAX {
-            return Err("Range is out of bounds".into());
+        if start < CODEPOINT_MIN || end > CODEPOINT_MAX {
+            return Err(format!(
+                "Range is out of bounds. start: {}, min: {}, end: {}, max: {}",
+                start, CODEPOINT_MIN, end, CODEPOINT_MAX
+            )
+            .into());
         }
         Ok(UnicodeSet {
-            inv_list: vec![*start, *end],
+            inv_list: vec![start, end],
         })
     }
 
@@ -104,21 +129,32 @@ impl UnicodeSet {
         }
     }
 
-    /// Returns an `Iter` of start and stop `u32` points of the UnicodeSet
+    /// Yields an iterator of start and stop points of ranges in the UnicodeSet
+    ///
+    /// Example:
+    ///
+    /// ```
+    /// use icu4x_unicodeset::UnicodeSet;
+    /// let example = UnicodeSet::new("4 0 10 15 20");
+    /// let mut example_iter = example.iter();
+    /// example_iter.next(); // => 0
+    /// example_iter.next(); // => 10
+    /// example_iter.next(); // => 10, etc.
+    /// ```
     pub fn iter(&self) -> Iter<u32> {
         self.inv_list.iter()
     }
 
-    /// Returns the cardinality of the UnicodeSet
-    ///
+    /// Returns the number of elements of the UnicodeSet
     ///
-    pub fn size(&self) -> Result<usize, Box<dyn Error>> {
-        if self.inv_list.len() < 2 {
-            return Err("UnicodeSet length < 2".into());
+    /// Returns an error if a complete range is not defined in the UnicodeSet
+    pub fn size(&self) -> usize {
+        if self.is_empty() {
+            return 0;
         }
         let end_point_sum: u32 = self.iter().skip(1).step_by(2).sum::<u32>();
         let start_point_sum: u32 = self.iter().step_by(2).sum::<u32>();
-        Ok((end_point_sum - start_point_sum) as usize)
+        (end_point_sum - start_point_sum) as usize
     }
 
     /// Returns whether or not the UnicodeSet is empty
@@ -126,12 +162,20 @@ impl UnicodeSet {
         self.inv_list.len() < 2 // unsure if this is appropriate definition of just self.inv_list.is_empty()
     }
 
-    /// Wrapper for contains conditions closures
-    fn contains<C>(&self, query: &u32, condition: C) -> bool
+    /// Wrapper for contains
+    ///
+    /// Takes in a single code point `query`, and a closure `condition`
+    /// to see if the `query` is located in the inversion list.
+    ///
+    /// Example:
+    ///
+    /// `let condition_closure = |_: usize| -> bool {true};`
+    /// `self.contains(10, condition_closure);`
+    fn contains<C>(&self, query: u32, condition: C) -> bool
     where
         C: Fn(usize) -> bool,
     {
-        match self.inv_list.binary_search(query) {
+        match self.inv_list.binary_search(&query) {
             Ok(pos) => {
                 if pos % 2 == 0 {
                     condition(pos)
@@ -155,8 +199,8 @@ impl UnicodeSet {
     /// in the set using `std::vec::Vec` implementation
     ///
     /// Example: `contains_point(&10)`
-    pub fn contains_point(&self, query: &u32) -> bool {
-        let condition_closure = |_: usize| -> bool { true };
+    pub fn contains_point(&self, query: u32) -> bool {
+        let condition_closure = |_| -> bool { true };
         self.contains(query, condition_closure)
     }
 
@@ -169,24 +213,19 @@ impl UnicodeSet {
     /// in a single `O(1)` step
     ///
     /// Example: `contains_range(&0, &10)`
-    pub fn contains_range(&self, start: &u32, end: &u32) -> Result<bool, Box<dyn Error>> {
+    pub fn contains_range(&self, start: u32, end: u32) -> Result<bool, Box<dyn Error>> {
         if start >= end {
             return Err("Range cannot be out of order".into());
         }
-        let condition_closure = |pos: usize| -> bool {
-            if end < &self.inv_list[pos + 1] {
-                true
-            } else {
-                false
-            }
-        };
-        return Ok(self.contains(start, condition_closure));
+        let condition_closure = |pos| end < self.inv_list[pos + 1];
+        Ok(self.contains(start, condition_closure))
     }
 }
 
 #[cfg(test)]
 mod tests {
     use super::{parse_serial_string, UnicodeSet, BMP_MAX, CODEPOINT_MAX, CODEPOINT_MIN};
+    use std::vec::Vec;
 
     // parse_serial_string
     #[test]
@@ -238,16 +277,16 @@ mod tests {
     #[test]
     fn test_unicodeset_from_range() {
         let expected = vec![4, 10];
-        let actual = UnicodeSet::from_range(&4, &10).unwrap().inv_list;
+        let actual = UnicodeSet::from_range(4, 10).unwrap().inv_list;
         assert_eq!(actual, expected);
     }
     #[test]
     fn test_unicodeset_from_range_bad_order() {
-        assert!(UnicodeSet::from_range(&10, &5).is_err());
+        assert!(UnicodeSet::from_range(10, 5).is_err());
     }
     #[test]
     fn test_unicodeset_from_range_out_of_bounds() {
-        assert!(UnicodeSet::from_range(&0, &0x110000).is_err());
+        assert!(UnicodeSet::from_range(0, 0x110000).is_err());
     }
     #[test]
     fn test_unicodeset_all() {
@@ -264,54 +303,53 @@ mod tests {
     #[test]
     fn test_unicodeset_contains() {
         let check = UnicodeSet::new("4 2 5 10 15").unwrap();
-        assert!(check.contains_point(&2));
-        assert!(check.contains_point(&4));
-        assert!(check.contains_point(&10));
-        assert!(check.contains_point(&14));
+        assert!(check.contains_point(2));
+        assert!(check.contains_point(4));
+        assert!(check.contains_point(10));
+        assert!(check.contains_point(14));
     }
     #[test]
     fn test_unicodeset_contains_false() {
         let check = UnicodeSet::new("4 2 5 10 15").unwrap();
-        assert!(!check.contains_point(&1));
-        assert!(!check.contains_point(&5));
-        assert!(!check.contains_point(&9));
-        assert!(!check.contains_point(&15));
-        assert!(!check.contains_point(&16));
+        assert!(!check.contains_point(1));
+        assert!(!check.contains_point(5));
+        assert!(!check.contains_point(9));
+        assert!(!check.contains_point(15));
+        assert!(!check.contains_point(16));
     }
     #[test]
     fn test_unicodeset_contains_range() {
         let check = UnicodeSet::new("4 0 10 15 25").unwrap();
-        assert!(check.contains_range(&2, &5).unwrap());
-        assert!(check.contains_range(&0, &9).unwrap());
-        assert!(check.contains_range(&15, &24).unwrap());
+        assert!(check.contains_range(2, 5).unwrap());
+        assert!(check.contains_range(0, 9).unwrap());
+        assert!(check.contains_range(15, 24).unwrap());
     }
     #[test]
     fn test_unicodeset_contains_range_false() {
         let check = UnicodeSet::new("4 0 10 15 25").unwrap();
-        assert!(!check.contains_range(&0, &10).unwrap());
-        assert!(!check.contains_range(&15, &25).unwrap());
-        assert!(!check.contains_range(&0, &16).unwrap());
-        assert!(!check.contains_range(&10, &15).unwrap());
-        assert!(!check.contains_range(&11, &14).unwrap());
+        assert!(!check.contains_range(0, 10).unwrap());
+        assert!(!check.contains_range(15, 25).unwrap());
+        assert!(!check.contains_range(0, 16).unwrap());
+        assert!(!check.contains_range(10, 15).unwrap());
+        assert!(!check.contains_range(11, 14).unwrap());
     }
     #[test]
     fn test_unicodeset_contains_range_invalid() {
         let check = UnicodeSet::all();
-        assert!(check.contains_range(&10, &0).is_err());
-        assert!(check.contains_range(&0, &0).is_err());
+        assert!(check.contains_range(10, 0).is_err());
+        assert!(check.contains_range(0, 0).is_err());
     }
     #[test]
     fn test_unicodeset_size() {
         let check = UnicodeSet::new("4 2 5 10 15").unwrap();
-        assert_eq!(8, check.size().unwrap());
+        assert_eq!(8, check.size());
         let check = UnicodeSet::all();
         let expected = CODEPOINT_MAX + 1 - CODEPOINT_MIN;
-        assert_eq!(expected as usize, check.size().unwrap());
-    }
-    #[test]
-    fn test_unicodeset_size_error() {
-        let check = UnicodeSet { inv_list: vec![0] };
-        assert!(check.size().is_err());
+        assert_eq!(expected as usize, check.size());
+        let check = UnicodeSet {
+            inv_list: Vec::new(),
+        };
+        assert_eq!(check.size(), 0);
     }
     #[test]
     fn test_unicodeset_is_empty() {

From c41fa76ca1e9a560600d168dcefc18547015053c Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Fri, 26 Jun 2020 21:40:16 +0000
Subject: [PATCH 18/30] replace u32 with char and fix typos and optimizations

---
 components/uniset/src/uniset.rs | 81 +++++++++++++++++++++------------
 1 file changed, 51 insertions(+), 30 deletions(-)

diff --git a/components/uniset/src/uniset.rs b/components/uniset/src/uniset.rs
index a8185957a62..5828c52e0ca 100644
--- a/components/uniset/src/uniset.rs
+++ b/components/uniset/src/uniset.rs
@@ -1,8 +1,8 @@
-use std::{boxed::Box, error::Error, iter::Iterator, slice::Iter, vec::Vec};
+use std::{boxed::Box, error::Error, iter::Iterator, slice::Iter, str::FromStr, vec::Vec};
 
 /// Represents the maximum Unicode Code Point, inclusive
 const CODEPOINT_MAX: u32 = 0x10FFFF;
-/// Represents the minimum UNicode Code Point, inclusive
+/// Represents the minimum Unicode Code Point, inclusive
 const CODEPOINT_MIN: u32 = 0x000000;
 /// Represents the end code point of the Basic Multilingual Plane range, starting from code point 0 , inclusive
 const BMP_MAX: u32 = 0xFFFF;
@@ -69,6 +69,24 @@ pub struct UnicodeSet {
     inv_list: Vec<u32>,
 }
 
+impl FromStr for UnicodeSet {
+    type Err = Box<dyn Error>;
+
+    fn from_str(serialize: &str) -> Result<Self, Self::Err> {
+        match parse_serial_string(serialize) {
+            Ok(serialize) => {
+                if serialize.len() % 2 != 0 {
+                    return Err("Array length must be even".into());
+                }
+                Ok(UnicodeSet {
+                    inv_list: serialize,
+                })
+            }
+            Err(e) => Err(e.into()),
+        }
+    }
+}
+
 impl UnicodeSet {
     /// Returns Result of UnicodeSet from serialized string
     ///
@@ -152,9 +170,11 @@ impl UnicodeSet {
         if self.is_empty() {
             return 0;
         }
-        let end_point_sum: u32 = self.iter().skip(1).step_by(2).sum::<u32>();
-        let start_point_sum: u32 = self.iter().step_by(2).sum::<u32>();
-        (end_point_sum - start_point_sum) as usize
+        let mut sum = 0;
+        for (i, end_point) in self.iter().skip(1).step_by(2).enumerate() {
+            sum += end_point - self.inv_list[2 * i];
+        }
+        sum as usize
     }
 
     /// Returns whether or not the UnicodeSet is empty
@@ -171,11 +191,11 @@ impl UnicodeSet {
     ///
     /// `let condition_closure = |_: usize| -> bool {true};`
     /// `self.contains(10, condition_closure);`
-    fn contains<C>(&self, query: u32, condition: C) -> bool
+    fn contains<C>(&self, query: char, condition: C) -> bool
     where
         C: Fn(usize) -> bool,
     {
-        match self.inv_list.binary_search(&query) {
+        match self.inv_list.binary_search(&(query as u32)) {
             Ok(pos) => {
                 if pos % 2 == 0 {
                     condition(pos)
@@ -199,7 +219,7 @@ impl UnicodeSet {
     /// in the set using `std::vec::Vec` implementation
     ///
     /// Example: `contains_point(&10)`
-    pub fn contains_point(&self, query: u32) -> bool {
+    pub fn contains_point(&self, query: char) -> bool {
         let condition_closure = |_| -> bool { true };
         self.contains(query, condition_closure)
     }
@@ -213,11 +233,11 @@ impl UnicodeSet {
     /// in a single `O(1)` step
     ///
     /// Example: `contains_range(&0, &10)`
-    pub fn contains_range(&self, start: u32, end: u32) -> Result<bool, Box<dyn Error>> {
+    pub fn contains_range(&self, start: char, end: char) -> Result<bool, Box<dyn Error>> {
         if start >= end {
             return Err("Range cannot be out of order".into());
         }
-        let condition_closure = |pos| end < self.inv_list[pos + 1];
+        let condition_closure = |pos| (end as u32) < self.inv_list[pos + 1];
         Ok(self.contains(start, condition_closure))
     }
 }
@@ -225,6 +245,7 @@ impl UnicodeSet {
 #[cfg(test)]
 mod tests {
     use super::{parse_serial_string, UnicodeSet, BMP_MAX, CODEPOINT_MAX, CODEPOINT_MIN};
+    use std::str::FromStr;
     use std::vec::Vec;
 
     // parse_serial_string
@@ -267,7 +288,7 @@ mod tests {
     #[test]
     fn test_unicodeset_new() {
         let expected = vec![2, 3, 4, 5];
-        let actual = UnicodeSet::new("4 2 3 4 5").unwrap().inv_list;
+        let actual = UnicodeSet::from_str("4 2 3 4 5").unwrap().inv_list;
         assert_eq!(actual, expected);
     }
     #[test]
@@ -303,41 +324,41 @@ mod tests {
     #[test]
     fn test_unicodeset_contains() {
         let check = UnicodeSet::new("4 2 5 10 15").unwrap();
-        assert!(check.contains_point(2));
-        assert!(check.contains_point(4));
-        assert!(check.contains_point(10));
-        assert!(check.contains_point(14));
+        assert!(check.contains_point(2 as char));
+        assert!(check.contains_point(4 as char));
+        assert!(check.contains_point(10 as char));
+        assert!(check.contains_point(14 as char));
     }
     #[test]
     fn test_unicodeset_contains_false() {
         let check = UnicodeSet::new("4 2 5 10 15").unwrap();
-        assert!(!check.contains_point(1));
-        assert!(!check.contains_point(5));
-        assert!(!check.contains_point(9));
-        assert!(!check.contains_point(15));
-        assert!(!check.contains_point(16));
+        assert!(!check.contains_point(1 as char));
+        assert!(!check.contains_point(5 as char));
+        assert!(!check.contains_point(9 as char));
+        assert!(!check.contains_point(15 as char));
+        assert!(!check.contains_point(16 as char));
     }
     #[test]
     fn test_unicodeset_contains_range() {
         let check = UnicodeSet::new("4 0 10 15 25").unwrap();
-        assert!(check.contains_range(2, 5).unwrap());
-        assert!(check.contains_range(0, 9).unwrap());
-        assert!(check.contains_range(15, 24).unwrap());
+        assert!(check.contains_range(2 as char, 5 as char).unwrap());
+        assert!(check.contains_range(0 as char, 9 as char).unwrap());
+        assert!(check.contains_range(15 as char, 24 as char).unwrap());
     }
     #[test]
     fn test_unicodeset_contains_range_false() {
         let check = UnicodeSet::new("4 0 10 15 25").unwrap();
-        assert!(!check.contains_range(0, 10).unwrap());
-        assert!(!check.contains_range(15, 25).unwrap());
-        assert!(!check.contains_range(0, 16).unwrap());
-        assert!(!check.contains_range(10, 15).unwrap());
-        assert!(!check.contains_range(11, 14).unwrap());
+        assert!(!check.contains_range(0 as char, 10 as char).unwrap());
+        assert!(!check.contains_range(15 as char, 25 as char).unwrap());
+        assert!(!check.contains_range(0 as char, 16 as char).unwrap());
+        assert!(!check.contains_range(10 as char, 15 as char).unwrap());
+        assert!(!check.contains_range(11 as char, 14 as char).unwrap());
     }
     #[test]
     fn test_unicodeset_contains_range_invalid() {
         let check = UnicodeSet::all();
-        assert!(check.contains_range(10, 0).is_err());
-        assert!(check.contains_range(0, 0).is_err());
+        assert!(check.contains_range(10 as char, 0 as char).is_err());
+        assert!(check.contains_range(0 as char, 0 as char).is_err());
     }
     #[test]
     fn test_unicodeset_size() {

From da5eecf10b9c39caa00fcf6429f7b4fc3f3c26a2 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Thu, 9 Jul 2020 20:11:55 +0000
Subject: [PATCH 19/30] remove unnecessary imports, made more rusty

---
 .../uniset/meta/char_collection_lib_test.cmx  |   5 -
 components/uniset/src/conversions.rs          | 123 ++++++
 components/uniset/src/lib.rs                  |   6 +
 components/uniset/src/uniset.rs               | 395 +++++++-----------
 components/uniset/src/utils.rs                |  64 +++
 5 files changed, 352 insertions(+), 241 deletions(-)
 delete mode 100644 components/uniset/meta/char_collection_lib_test.cmx
 create mode 100644 components/uniset/src/conversions.rs
 create mode 100644 components/uniset/src/utils.rs

diff --git a/components/uniset/meta/char_collection_lib_test.cmx b/components/uniset/meta/char_collection_lib_test.cmx
deleted file mode 100644
index 3bb56a96aa2..00000000000
--- a/components/uniset/meta/char_collection_lib_test.cmx
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-    "program": {
-        "binary": "test/char_collection_lib_test"
-    }
-}
\ No newline at end of file
diff --git a/components/uniset/src/conversions.rs b/components/uniset/src/conversions.rs
new file mode 100644
index 00000000000..484f693d166
--- /dev/null
+++ b/components/uniset/src/conversions.rs
@@ -0,0 +1,123 @@
+use crate::utils::deconstruct_range;
+use crate::UnicodeSet;
+use std::{
+    convert::TryFrom,
+    ops::{Range, RangeBounds, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive},
+};
+
+fn try_from_range_impl(range: impl RangeBounds<char>) -> Result<UnicodeSet, (u32, u32)> {
+    let (from, till) = deconstruct_range(range);
+    if from < till {
+        let set = vec![from, till];
+        Ok(UnicodeSet::try_from(set).unwrap())
+    } else {
+        Err((from, till))
+    }
+}
+
+impl TryFrom<Range<char>> for UnicodeSet {
+    type Error = String;
+
+    fn try_from(range: Range<char>) -> Result<Self, Self::Error> {
+        match try_from_range_impl(range) {
+            Ok(u) => Ok(u),
+            Err((from, till)) => Err(format!("Range must be ascending: {} - {}", from, till)),
+        }
+    }
+}
+
+impl TryFrom<RangeFrom<char>> for UnicodeSet {
+    type Error = String;
+
+    fn try_from(range: RangeFrom<char>) -> Result<Self, Self::Error> {
+        match try_from_range_impl(range) {
+            Ok(u) => Ok(u),
+            Err((from, till)) => Err(format!("Range must be ascending: {} - {}", from, till)),
+        }
+    }
+}
+
+impl TryFrom<RangeFull> for UnicodeSet {
+    type Error = String;
+
+    fn try_from(_: RangeFull) -> Result<Self, Self::Error> {
+        Ok(UnicodeSet::all())
+    }
+}
+
+impl TryFrom<RangeInclusive<char>> for UnicodeSet {
+    type Error = String;
+
+    fn try_from(range: RangeInclusive<char>) -> Result<Self, Self::Error> {
+        match try_from_range_impl(range) {
+            Ok(u) => Ok(u),
+            Err((from, till)) => Err(format!("Range must be ascending: {} - {}", from, till)),
+        }
+    }
+}
+
+impl TryFrom<RangeTo<char>> for UnicodeSet {
+    type Error = String;
+
+    fn try_from(range: RangeTo<char>) -> Result<Self, Self::Error> {
+        match try_from_range_impl(range) {
+            Ok(u) => Ok(u),
+            Err((from, till)) => Err(format!("Range must be ascending: {} - {}", from, till)),
+        }
+    }
+}
+
+impl TryFrom<RangeToInclusive<char>> for UnicodeSet {
+    type Error = String;
+
+    fn try_from(range: RangeToInclusive<char>) -> Result<Self, Self::Error> {
+        Ok(try_from_range_impl(range).unwrap())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::UnicodeSet;
+    use std::convert::TryFrom;
+
+    #[test]
+    fn test_try_from_range() {
+        assert!(UnicodeSet::try_from('A'..'B').is_ok());
+    }
+    #[test]
+    fn test_try_from_range_error() {
+        assert!(UnicodeSet::try_from('A'..'A').is_err());
+    }
+    #[test]
+    fn test_try_from_range_inclusive() {
+        assert!(UnicodeSet::try_from('A'..='A').is_ok());
+    }
+    #[test]
+    fn test_try_from_range_inclusive_err() {
+        assert!(UnicodeSet::try_from('B'..='A').is_err());
+    }
+    #[test]
+    fn test_try_from_range_from() {
+        assert!(UnicodeSet::try_from('A'..).is_ok());
+    }
+    #[test]
+    fn test_try_from_range_from_err() {
+        assert!(UnicodeSet::try_from((std::char::MAX)..).is_err());
+    }
+    #[test]
+    fn test_try_from_range_to() {
+        assert!(UnicodeSet::try_from(..'A').is_ok());
+    }
+    #[test]
+    fn test_try_from_range_to_err() {
+        assert!(UnicodeSet::try_from(..(0 as char)).is_err());
+    }
+    #[test]
+    fn test_try_from_range_to_inclusive() {
+        assert!(UnicodeSet::try_from(..='A').is_ok());
+    }
+    #[test]
+    fn test_try_from_range_full() {
+        assert!(UnicodeSet::try_from(..).is_ok());
+    }
+}
diff --git a/components/uniset/src/lib.rs b/components/uniset/src/lib.rs
index c9e5f2c2d87..1cff18c16f5 100644
--- a/components/uniset/src/lib.rs
+++ b/components/uniset/src/lib.rs
@@ -3,4 +3,10 @@
 // found in the LICENSE file.
 #[macro_use]
 mod uniset;
+mod conversions;
+mod utils;
+// mod iter;
+pub use conversions::*;
 pub use uniset::UnicodeSet;
+pub use utils::*;
+// pub use iter::UnicodeSetIter;
diff --git a/components/uniset/src/uniset.rs b/components/uniset/src/uniset.rs
index 5828c52e0ca..b8b52e2b041 100644
--- a/components/uniset/src/uniset.rs
+++ b/components/uniset/src/uniset.rs
@@ -1,61 +1,15 @@
-use std::{boxed::Box, error::Error, iter::Iterator, slice::Iter, str::FromStr, vec::Vec};
+use std::{
+    char::{from_u32, MAX},
+    convert::TryFrom,
+    ops::RangeBounds,
+    slice::Iter,
+};
 
-/// Represents the maximum Unicode Code Point, inclusive
-const CODEPOINT_MAX: u32 = 0x10FFFF;
-/// Represents the minimum Unicode Code Point, inclusive
-const CODEPOINT_MIN: u32 = 0x000000;
+use crate::utils::{deconstruct_range, is_sorted};
+// use crate::UnicodeSetIter;
 /// Represents the end code point of the Basic Multilingual Plane range, starting from code point 0 , inclusive
 const BMP_MAX: u32 = 0xFFFF;
 
-/// Given string representation of inversion list create set
-///
-/// See UnicodeSet::new for conditions
-fn parse_serial_string(serialize_str: &str) -> Result<Vec<u32>, Box<dyn Error>> {
-    let mut serialize = serialize_str.split(" ");
-    let capacity: usize = serialize
-        .next()
-        .ok_or(format!(
-            "Splitting by spaces did not yield a capacity: {:?}",
-            serialize
-        ))?
-        .parse()?;
-    if capacity % 2 != 0 {
-        return Err(format!("Capacity must be even. Parsed Capacity: {}", capacity).into());
-    }
-    let mut serialized_vec: Vec<u32> = Vec::with_capacity(capacity);
-    let mut prev: Option<u32> = None;
-    for str_elem in serialize {
-        let parsed: u32 = str_elem.parse()?;
-        if serialized_vec.len() + 1 > serialized_vec.capacity() {
-            return Err(format!(
-                "Serialization capacity is too small. Allocated Capacity: {}",
-                capacity
-            )
-            .into());
-        }
-        if Some(parsed) <= prev {
-            return Err(format!(
-                "Serialization must be sorted. {:?} followed by {:?}",
-                Some(parsed),
-                prev
-            )
-            .into());
-        }
-        serialized_vec.push(parsed);
-        prev = Some(parsed);
-    }
-    if serialized_vec.len() % 2 != 0 {
-        return Err(format!(
-            "Serialization must be even. Serialization: {:?} Length: {}",
-            serialized_vec,
-            serialized_vec.len()
-        )
-        .into());
-    }
-    serialized_vec.shrink_to_fit(); // necessary if the length < capacity
-    Ok(serialized_vec)
-}
-
 /// UnicodeSet membership wrapper
 ///
 /// Provides exposure to membership functions and constructors from serialized UnicodeSets
@@ -69,72 +23,28 @@ pub struct UnicodeSet {
     inv_list: Vec<u32>,
 }
 
-impl FromStr for UnicodeSet {
-    type Err = Box<dyn Error>;
+impl TryFrom<Vec<u32>> for UnicodeSet {
+    type Error = String;
 
-    fn from_str(serialize: &str) -> Result<Self, Self::Err> {
-        match parse_serial_string(serialize) {
-            Ok(serialize) => {
-                if serialize.len() % 2 != 0 {
-                    return Err("Array length must be even".into());
-                }
-                Ok(UnicodeSet {
-                    inv_list: serialize,
-                })
-            }
-            Err(e) => Err(e.into()),
+    fn try_from(set: Vec<u32>) -> Result<Self, Self::Error> {
+        if is_sorted(&set) {
+            Ok(UnicodeSet { inv_list: set })
+        } else {
+            Err(format!(
+                "UnicodeSet set must be sorted without duplicates: {:?}",
+                set
+            ))
         }
     }
 }
 
 impl UnicodeSet {
-    /// Returns Result of UnicodeSet from serialized string
-    ///
-    /// Returns an error if the serialized string fails to parse.
-    /// The serialized string requires starting capacity integer, followed by space delimited
-    /// integer code points. There must be an even number of elements (not including the
-    /// capacity int), and must be in ascending sorted order.
-    ///
-    /// Example String: `"4 0 5 10 15"` designates a capacity of size `4`, followed by 2 ranges
-    /// The ranges are `{0, 4}` and `{10, 14}` inclusive
-    pub fn new(serialize: &str) -> Result<UnicodeSet, Box<dyn Error>> {
-        match parse_serial_string(serialize) {
-            Ok(serialize) => Ok(UnicodeSet {
-                inv_list: serialize,
-            }),
-            Err(e) => Err(e),
-        }
-    }
-
-    /// Returns Result of UnicodeSet from a single pair of integers defining a range
-    ///
-    /// `start`: inclusive, `end`: exclusive
-    ///
-    /// Returns an error if the range is invalid (out of order and out of bounds).
-    ///
-    /// Example Call: `UnicodeSet::from_range(&0, &15)`
-    pub fn from_range(start: u32, end: u32) -> Result<UnicodeSet, Box<dyn Error>> {
-        if start > end {
-            return Err(format!("Range is out of order. start: {} end: {}", start, end).into());
-        }
-        if start < CODEPOINT_MIN || end > CODEPOINT_MAX {
-            return Err(format!(
-                "Range is out of bounds. start: {}, min: {}, end: {}, max: {}",
-                start, CODEPOINT_MIN, end, CODEPOINT_MAX
-            )
-            .into());
-        }
-        Ok(UnicodeSet {
-            inv_list: vec![start, end],
-        })
-    }
-
     /// Returns UnicodeSet spanning entire Unicode range
     ///
     /// The range spans from `0x0 -> 0x10FFFF` inclusive
     pub fn all() -> UnicodeSet {
         UnicodeSet {
-            inv_list: vec![CODEPOINT_MIN, CODEPOINT_MAX + 1],
+            inv_list: vec![0, (MAX as u32) + 1],
         }
     }
 
@@ -143,7 +53,7 @@ impl UnicodeSet {
     /// The range spans from `0x0 -> 0xFFFF` inclusive
     pub fn bmp() -> UnicodeSet {
         UnicodeSet {
-            inv_list: vec![CODEPOINT_MIN, BMP_MAX + 1],
+            inv_list: vec![0, BMP_MAX + 1],
         }
     }
 
@@ -153,28 +63,54 @@ impl UnicodeSet {
     ///
     /// ```
     /// use icu4x_unicodeset::UnicodeSet;
-    /// let example = UnicodeSet::new("4 0 10 15 20");
-    /// let mut example_iter = example.iter();
-    /// example_iter.next(); // => 0
-    /// example_iter.next(); // => 10
-    /// example_iter.next(); // => 10, etc.
+    /// use std::convert::TryFrom;
+    /// let example_list = vec![0, 10, 15, 20];
+    /// let example = UnicodeSet::try_from(example_list).unwrap();
+    /// let mut example_ranges = example.ranges();
+    /// assert_eq!(Some(&0), example_ranges.next());
+    /// assert_eq!(Some(&10), example_ranges.next());
+    /// assert_eq!(Some(&15), example_ranges.next());
+    /// assert_eq!(Some(&20), example_ranges.next());     
+    /// assert_eq!(None, example_ranges.next());
     /// ```
-    pub fn iter(&self) -> Iter<u32> {
+    pub fn ranges(&self) -> Iter<u32> {
         self.inv_list.iter()
     }
 
-    /// Returns the number of elements of the UnicodeSet
+    /// Yields an iterator going through the character set in the UnicodeSet
     ///
-    /// Returns an error if a complete range is not defined in the UnicodeSet
+    /// Example:
+    ///
+    /// ```
+    /// use icu4x_unicodeset::UnicodeSet;
+    /// use std::convert::TryFrom;
+    /// let example_list = vec![65, 68, 69, 70];
+    /// let example = UnicodeSet::try_from(example_list).unwrap();
+    /// let mut example_iter = example.iter();
+    /// assert_eq!(Some('A'), example_iter.next());
+    /// assert_eq!(Some('B'), example_iter.next());
+    /// assert_eq!(Some('C'), example_iter.next());
+    /// assert_eq!(Some('E'), example_iter.next());
+    /// assert_eq!(None, example_iter.next());
+    /// ```
+    pub fn iter(&self) -> impl Iterator<Item = char> + '_ {
+        self.inv_list
+            .chunks(2)
+            .flat_map(|pair| (pair[0]..pair[1]))
+            .map(|val| from_u32(val).unwrap())
+    }
+
+    /// Returns the number of elements of the UnicodeSet
     pub fn size(&self) -> usize {
         if self.is_empty() {
             return 0;
         }
-        let mut sum = 0;
-        for (i, end_point) in self.iter().skip(1).step_by(2).enumerate() {
-            sum += end_point - self.inv_list[2 * i];
-        }
-        sum as usize
+        let s: u32 = self
+            .inv_list
+            .chunks(2)
+            .map(|end_points| end_points[1] - end_points[0])
+            .sum();
+        s as usize
     }
 
     /// Returns whether or not the UnicodeSet is empty
@@ -184,30 +120,21 @@ impl UnicodeSet {
 
     /// Wrapper for contains
     ///
-    /// Takes in a single code point `query`, and a closure `condition`
-    /// to see if the `query` is located in the inversion list.
-    ///
-    /// Example:
-    ///
-    /// `let condition_closure = |_: usize| -> bool {true};`
-    /// `self.contains(10, condition_closure);`
-    fn contains<C>(&self, query: char, condition: C) -> bool
-    where
-        C: Fn(usize) -> bool,
-    {
-        match self.inv_list.binary_search(&(query as u32)) {
+    /// Returns an Option as to whether or not it is possible for the query to be contained
+    fn contains_impl(&self, query: u32) -> Option<usize> {
+        match self.inv_list.binary_search(&query) {
             Ok(pos) => {
                 if pos % 2 == 0 {
-                    condition(pos)
+                    Some(pos)
                 } else {
-                    false
+                    None
                 }
             }
             Err(pos) => {
                 if pos % 2 != 0 && pos < self.inv_list.len() {
-                    condition(pos)
+                    Some(pos)
                 } else {
-                    false
+                    None
                 }
             }
         }
@@ -218,10 +145,21 @@ impl UnicodeSet {
     /// Runs a binary search in `O(log(n))` where `n` is the number of start and end points
     /// in the set using `std::vec::Vec` implementation
     ///
-    /// Example: `contains_point(&10)`
-    pub fn contains_point(&self, query: char) -> bool {
-        let condition_closure = |_| -> bool { true };
-        self.contains(query, condition_closure)
+    /// Example:
+    ///
+    /// ```
+    /// use icu4x_unicodeset::UnicodeSet;
+    /// use std::convert::TryFrom;
+    /// let example_list = vec![65, 67, 68, 69];
+    /// let example = UnicodeSet::try_from(example_list).unwrap();
+    /// assert!(example.contains('A'));
+    /// assert!(!example.contains('C'));
+    /// ```
+    pub fn contains(&self, query: char) -> bool {
+        match self.contains_impl(query as u32) {
+            Some(_) => true,
+            None => false,
+        }
     }
 
     /// Checks to see if the range is in the UnicodeSet, returns a Result
@@ -232,140 +170,103 @@ impl UnicodeSet {
     /// Only runs the search once on the `start` parameter, while the `end` parameter is checked
     /// in a single `O(1)` step
     ///
-    /// Example: `contains_range(&0, &10)`
-    pub fn contains_range(&self, start: char, end: char) -> Result<bool, Box<dyn Error>> {
-        if start >= end {
-            return Err("Range cannot be out of order".into());
+    /// Example:
+    ///
+    /// ```
+    /// use icu4x_unicodeset::UnicodeSet;
+    /// use std::convert::TryFrom;
+    /// let example_list = vec![65, 67, 68, 69];
+    /// let example = UnicodeSet::try_from(example_list).unwrap();
+    /// assert!(example.contains_range('A'..'C'));
+    /// assert!(example.contains_range('A'..='B'));
+    /// assert!(!example.contains_range('A'..='C'));
+    /// ```
+    pub fn contains_range(&self, range: impl RangeBounds<char>) -> bool {
+        let (from, till) = deconstruct_range(range);
+        if from >= till {
+            return false;
+        }
+        match self.contains_impl(from) {
+            Some(pos) => (till) <= self.inv_list[pos + 1],
+            None => false,
         }
-        let condition_closure = |pos| (end as u32) < self.inv_list[pos + 1];
-        Ok(self.contains(start, condition_closure))
     }
 }
 
 #[cfg(test)]
 mod tests {
-    use super::{parse_serial_string, UnicodeSet, BMP_MAX, CODEPOINT_MAX, CODEPOINT_MIN};
-    use std::str::FromStr;
-    use std::vec::Vec;
-
-    // parse_serial_string
-    #[test]
-    fn test_parse_serial_string() {
-        let expected = vec![2, 3, 4, 5];
-        let actual = parse_serial_string("4 2 3 4 5").unwrap();
-        assert_eq!(actual, expected);
-    }
-    #[test]
-    fn test_parse_serial_string_no_char() {
-        assert!(parse_serial_string("4 2 A 3 4 5").is_err());
-    }
-    #[test]
-    fn test_parse_serial_string_empty() {
-        assert!(parse_serial_string("").is_err());
-    }
-    #[test]
-    fn test_parse_serial_string_wrong_format() {
-        assert!(parse_serial_string("[4, 2, 3, 4, 5  ]").is_err());
-    }
-    #[test]
-    fn test_parse_serial_string_wrong_order() {
-        assert!(parse_serial_string("4 1 0 4 2").is_err());
-    }
-    #[test]
-    fn test_parse_serial_string_single_char_error() {
-        assert!(parse_serial_string("4 1 1 2 2").is_err());
-    }
-    #[test]
-    fn test_parse_serial_string_capacity_not_even() {
-        assert!(parse_serial_string("3 2 3 4").is_err());
-    }
-    #[test]
-    fn test_parse_serial_string_size_not_even() {
-        assert!(parse_serial_string("4 3 2 1").is_err());
-    }
+    use super::{UnicodeSet, BMP_MAX};
+    use std::{char::MAX, convert::TryFrom, vec::Vec};
 
-    // UnicodeSet constructors
     #[test]
-    fn test_unicodeset_new() {
-        let expected = vec![2, 3, 4, 5];
-        let actual = UnicodeSet::from_str("4 2 3 4 5").unwrap().inv_list;
-        assert_eq!(actual, expected);
+    fn test_unicodeset_try_from_vec() {
+        let check = vec![2, 3, 4, 5];
+        assert!(UnicodeSet::try_from(check).is_ok());
     }
     #[test]
-    fn test_unicodeset_new_error() {
-        assert!(UnicodeSet::new("3 2 4 3").is_err());
-    }
-    #[test]
-    fn test_unicodeset_from_range() {
-        let expected = vec![4, 10];
-        let actual = UnicodeSet::from_range(4, 10).unwrap().inv_list;
-        assert_eq!(actual, expected);
-    }
-    #[test]
-    fn test_unicodeset_from_range_bad_order() {
-        assert!(UnicodeSet::from_range(10, 5).is_err());
-    }
-    #[test]
-    fn test_unicodeset_from_range_out_of_bounds() {
-        assert!(UnicodeSet::from_range(0, 0x110000).is_err());
+    fn test_unicodeset_try_from_vec_error() {
+        let check = vec![1, 1, 2, 3, 4];
+        assert!(UnicodeSet::try_from(check).is_err());
     }
     #[test]
     fn test_unicodeset_all() {
-        let expected = vec![CODEPOINT_MIN, CODEPOINT_MAX + 1];
+        let expected = vec![0, (MAX as u32) + 1];
         assert_eq!(UnicodeSet::all().inv_list, expected);
     }
     #[test]
     fn test_unicodeset_bmp() {
-        let expected = vec![CODEPOINT_MIN, BMP_MAX + 1];
+        let expected = vec![0, BMP_MAX + 1];
         assert_eq!(UnicodeSet::bmp().inv_list, expected);
     }
 
     // UnicodeSet membership functions
     #[test]
     fn test_unicodeset_contains() {
-        let check = UnicodeSet::new("4 2 5 10 15").unwrap();
-        assert!(check.contains_point(2 as char));
-        assert!(check.contains_point(4 as char));
-        assert!(check.contains_point(10 as char));
-        assert!(check.contains_point(14 as char));
+        let ex = vec![2, 5, 10, 15];
+        let check = UnicodeSet::try_from(ex).unwrap();
+        assert!(check.contains(2 as char));
+        assert!(check.contains(4 as char));
+        assert!(check.contains(10 as char));
+        assert!(check.contains(14 as char));
     }
     #[test]
     fn test_unicodeset_contains_false() {
-        let check = UnicodeSet::new("4 2 5 10 15").unwrap();
-        assert!(!check.contains_point(1 as char));
-        assert!(!check.contains_point(5 as char));
-        assert!(!check.contains_point(9 as char));
-        assert!(!check.contains_point(15 as char));
-        assert!(!check.contains_point(16 as char));
+        let ex = vec![2, 5, 10, 15];
+        let check = UnicodeSet::try_from(ex).unwrap();
+        assert!(!check.contains(1 as char));
+        assert!(!check.contains(5 as char));
+        assert!(!check.contains(9 as char));
+        assert!(!check.contains(15 as char));
+        assert!(!check.contains(16 as char));
     }
     #[test]
     fn test_unicodeset_contains_range() {
-        let check = UnicodeSet::new("4 0 10 15 25").unwrap();
-        assert!(check.contains_range(2 as char, 5 as char).unwrap());
-        assert!(check.contains_range(0 as char, 9 as char).unwrap());
-        assert!(check.contains_range(15 as char, 24 as char).unwrap());
+        let ex = vec![65, 70, 75, 85];
+        let check = UnicodeSet::try_from(ex).unwrap();
+        assert!(check.contains_range('A'..='E')); // 65 - 69
+        assert!(check.contains_range('K'..'U')); // 75 - 84
     }
     #[test]
     fn test_unicodeset_contains_range_false() {
-        let check = UnicodeSet::new("4 0 10 15 25").unwrap();
-        assert!(!check.contains_range(0 as char, 10 as char).unwrap());
-        assert!(!check.contains_range(15 as char, 25 as char).unwrap());
-        assert!(!check.contains_range(0 as char, 16 as char).unwrap());
-        assert!(!check.contains_range(10 as char, 15 as char).unwrap());
-        assert!(!check.contains_range(11 as char, 14 as char).unwrap());
+        let ex = vec![65, 70, 75, 85];
+        let check = UnicodeSet::try_from(ex).unwrap();
+        assert!(!check.contains_range('!'..'A')); // 33 - 65
+        assert!(!check.contains_range('F'..'K')); // 70 - 74
+        assert!(!check.contains_range('U'..));
     }
     #[test]
     fn test_unicodeset_contains_range_invalid() {
         let check = UnicodeSet::all();
-        assert!(check.contains_range(10 as char, 0 as char).is_err());
-        assert!(check.contains_range(0 as char, 0 as char).is_err());
+        assert!(!check.contains_range('A'..'!')); // 65 - 33
+        assert!(!check.contains_range('A'..'A'));
     }
     #[test]
     fn test_unicodeset_size() {
-        let check = UnicodeSet::new("4 2 5 10 15").unwrap();
+        let ex = vec![2, 5, 10, 15];
+        let check = UnicodeSet::try_from(ex).unwrap();
         assert_eq!(8, check.size());
         let check = UnicodeSet::all();
-        let expected = CODEPOINT_MAX + 1 - CODEPOINT_MIN;
+        let expected = (MAX as u32) + 1;
         assert_eq!(expected as usize, check.size());
         let check = UnicodeSet {
             inv_list: Vec::new(),
@@ -384,4 +285,26 @@ mod tests {
         let check = UnicodeSet::all();
         assert!(!check.is_empty());
     }
+    #[test]
+    fn test_unicodeset_ranges() {
+        let ex = vec![65, 70, 75, 85];
+        let check = UnicodeSet::try_from(ex).unwrap();
+        let mut iter = check.ranges();
+        assert_eq!(iter.next().unwrap(), &65);
+        assert_eq!(iter.next().unwrap(), &70);
+        assert_eq!(iter.next().unwrap(), &75);
+        assert_eq!(iter.next().unwrap(), &85);
+        assert_eq!(iter.next(), None);
+    }
+    #[test]
+    fn test_unicodeset_iter() {
+        let ex = vec![65, 68, 69, 70];
+        let check = UnicodeSet::try_from(ex).unwrap();
+        let mut iter = check.iter();
+        assert_eq!(Some('A'), iter.next());
+        assert_eq!(Some('B'), iter.next());
+        assert_eq!(Some('C'), iter.next());
+        assert_eq!(Some('E'), iter.next());
+        assert_eq!(None, iter.next());
+    }
 }
diff --git a/components/uniset/src/utils.rs b/components/uniset/src/utils.rs
new file mode 100644
index 00000000000..562c9f08af8
--- /dev/null
+++ b/components/uniset/src/utils.rs
@@ -0,0 +1,64 @@
+use std::{
+    char::MAX,
+    ops::{Bound::*, RangeBounds},
+};
+
+/// Returns whether the vector is sorted ascending non inclusive
+pub fn is_sorted(v: &Vec<u32>) -> bool {
+    v.chunks(2).all(|chunk| chunk[0] < chunk[1])
+}
+
+/// Returns start (inclusive) and end (exclusive) bounds of RangeBounds
+pub fn deconstruct_range(range: impl RangeBounds<char>) -> (u32, u32) {
+    let from = match range.start_bound() {
+        Included(b) => (*b as u32),
+        Excluded(b) => (*b as u32),
+        Unbounded => 0,
+    };
+    let till = match range.end_bound() {
+        Included(b) => (*b as u32) + 1,
+        Excluded(b) => (*b as u32),
+        Unbounded => MAX as u32,
+    };
+    (from, till)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{deconstruct_range, is_sorted};
+    use std::char::MAX;
+
+    #[test]
+    fn test_is_sorted() {
+        let check = vec![2, 3, 4, 5];
+        assert!(is_sorted(&check));
+    }
+    #[test]
+    fn test_is_sorted_out_of_order() {
+        let check = vec![5, 4, 5, 6, 7];
+        assert!(!is_sorted(&check));
+    }
+    #[test]
+    fn test_is_sorted_duplicate() {
+        let check = vec![1, 2, 3, 3, 5];
+        assert!(!is_sorted(&check));
+    }
+
+    // deconstruct_range
+    #[test]
+    fn test_deconstruct_range() {
+        let expected = (65, 69);
+        let check = deconstruct_range('A'..'E'); // Range
+        assert_eq!(check, expected);
+        let check = deconstruct_range('A'..='D'); // Range Inclusive
+        assert_eq!(check, expected);
+        let check = deconstruct_range('A'..); // Range From
+        assert_eq!(check, (65, MAX as u32));
+        let check = deconstruct_range(..'A'); // Range To
+        assert_eq!(check, (0, 65));
+        let check = deconstruct_range(..='A'); // Range To Inclusive
+        assert_eq!(check, (0, 66));
+        let check = deconstruct_range(..); // Range Full
+        assert_eq!(check, (0, MAX as u32));
+    }
+}

From 22b4fe0ff9689e58912ca73e80bb40a4e6391f50 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Thu, 9 Jul 2020 20:20:02 +0000
Subject: [PATCH 20/30] clipply fix

---
 components/uniset/src/uniset.rs | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/components/uniset/src/uniset.rs b/components/uniset/src/uniset.rs
index b8b52e2b041..3c1a3b2c4ee 100644
--- a/components/uniset/src/uniset.rs
+++ b/components/uniset/src/uniset.rs
@@ -156,10 +156,7 @@ impl UnicodeSet {
     /// assert!(!example.contains('C'));
     /// ```
     pub fn contains(&self, query: char) -> bool {
-        match self.contains_impl(query as u32) {
-            Some(_) => true,
-            None => false,
-        }
+        self.contains_impl(query as u32).is_some()
     }
 
     /// Checks to see if the range is in the UnicodeSet, returns a Result

From 8d9138eaab753c91dab1717faa40f4d59704c4e7 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Thu, 9 Jul 2020 20:40:00 +0000
Subject: [PATCH 21/30] more clippy lint fixes

---
 components/uniset/src/utils.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/components/uniset/src/utils.rs b/components/uniset/src/utils.rs
index 562c9f08af8..c324360a698 100644
--- a/components/uniset/src/utils.rs
+++ b/components/uniset/src/utils.rs
@@ -4,7 +4,7 @@ use std::{
 };
 
 /// Returns whether the vector is sorted ascending non inclusive
-pub fn is_sorted(v: &Vec<u32>) -> bool {
+pub fn is_sorted(v: &[u32]) -> bool {
     v.chunks(2).all(|chunk| chunk[0] < chunk[1])
 }
 

From dac8a4a4551b03c68c161ae41c2b752545fe50e8 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Tue, 14 Jul 2020 20:08:50 +0000
Subject: [PATCH 22/30] Architecture checks minus benchmarks

---
 components/uniset/Cargo.toml         |   2 +-
 components/uniset/src/conversions.rs | 100 +++++++++++++--------------
 components/uniset/src/lib.rs         |  13 ++--
 components/uniset/src/uniset.rs      |  68 +++++++++---------
 components/uniset/src/utils.rs       |  54 +++++++++------
 5 files changed, 123 insertions(+), 114 deletions(-)

diff --git a/components/uniset/Cargo.toml b/components/uniset/Cargo.toml
index b7e4103b289..bbe4fe2c01b 100644
--- a/components/uniset/Cargo.toml
+++ b/components/uniset/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "icu4x-unicodeset"
+name = "icu-unicodeset"
 description = "API for managing Unicode Language and Locale Identifiers"
 version = "0.0.1"
 authors = ["The ICU4X Project Developers"]
diff --git a/components/uniset/src/conversions.rs b/components/uniset/src/conversions.rs
index 484f693d166..68e039e9086 100644
--- a/components/uniset/src/conversions.rs
+++ b/components/uniset/src/conversions.rs
@@ -1,3 +1,4 @@
+use super::USetError;
 use crate::utils::deconstruct_range;
 use crate::UnicodeSet;
 use std::{
@@ -5,119 +6,116 @@ use std::{
     ops::{Range, RangeBounds, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive},
 };
 
-fn try_from_range_impl(range: impl RangeBounds<char>) -> Result<UnicodeSet, (u32, u32)> {
+fn try_from_range_impl(range: &impl RangeBounds<char>) -> Result<UnicodeSet, USetError> {
     let (from, till) = deconstruct_range(range);
     if from < till {
         let set = vec![from, till];
         Ok(UnicodeSet::try_from(set).unwrap())
     } else {
-        Err((from, till))
+        Err(USetError::InvalidRange(from, till))
     }
 }
 
-impl TryFrom<Range<char>> for UnicodeSet {
-    type Error = String;
+impl TryFrom<&Range<char>> for UnicodeSet {
+    type Error = USetError;
 
-    fn try_from(range: Range<char>) -> Result<Self, Self::Error> {
-        match try_from_range_impl(range) {
-            Ok(u) => Ok(u),
-            Err((from, till)) => Err(format!("Range must be ascending: {} - {}", from, till)),
-        }
+    fn try_from(range: &Range<char>) -> Result<Self, Self::Error> {
+        try_from_range_impl(range)
     }
 }
 
-impl TryFrom<RangeFrom<char>> for UnicodeSet {
-    type Error = String;
+impl TryFrom<&RangeFrom<char>> for UnicodeSet {
+    type Error = USetError;
 
-    fn try_from(range: RangeFrom<char>) -> Result<Self, Self::Error> {
-        match try_from_range_impl(range) {
-            Ok(u) => Ok(u),
-            Err((from, till)) => Err(format!("Range must be ascending: {} - {}", from, till)),
-        }
+    fn try_from(range: &RangeFrom<char>) -> Result<Self, Self::Error> {
+        try_from_range_impl(range)
     }
 }
 
-impl TryFrom<RangeFull> for UnicodeSet {
-    type Error = String;
+impl TryFrom<&RangeFull> for UnicodeSet {
+    type Error = USetError;
 
-    fn try_from(_: RangeFull) -> Result<Self, Self::Error> {
+    fn try_from(_: &RangeFull) -> Result<Self, Self::Error> {
         Ok(UnicodeSet::all())
     }
 }
 
-impl TryFrom<RangeInclusive<char>> for UnicodeSet {
-    type Error = String;
+impl TryFrom<&RangeInclusive<char>> for UnicodeSet {
+    type Error = USetError;
 
-    fn try_from(range: RangeInclusive<char>) -> Result<Self, Self::Error> {
-        match try_from_range_impl(range) {
-            Ok(u) => Ok(u),
-            Err((from, till)) => Err(format!("Range must be ascending: {} - {}", from, till)),
-        }
+    fn try_from(range: &RangeInclusive<char>) -> Result<Self, Self::Error> {
+        try_from_range_impl(range)
     }
 }
 
-impl TryFrom<RangeTo<char>> for UnicodeSet {
-    type Error = String;
+impl TryFrom<&RangeTo<char>> for UnicodeSet {
+    type Error = USetError;
 
-    fn try_from(range: RangeTo<char>) -> Result<Self, Self::Error> {
-        match try_from_range_impl(range) {
-            Ok(u) => Ok(u),
-            Err((from, till)) => Err(format!("Range must be ascending: {} - {}", from, till)),
-        }
+    fn try_from(range: &RangeTo<char>) -> Result<Self, Self::Error> {
+        try_from_range_impl(range)
     }
 }
 
-impl TryFrom<RangeToInclusive<char>> for UnicodeSet {
-    type Error = String;
+impl TryFrom<&RangeToInclusive<char>> for UnicodeSet {
+    type Error = USetError;
 
-    fn try_from(range: RangeToInclusive<char>) -> Result<Self, Self::Error> {
-        Ok(try_from_range_impl(range).unwrap())
+    fn try_from(range: &RangeToInclusive<char>) -> Result<Self, Self::Error> {
+        try_from_range_impl(range)
     }
 }
 
 #[cfg(test)]
 mod tests {
+    use super::USetError;
     use crate::UnicodeSet;
     use std::convert::TryFrom;
-
     #[test]
     fn test_try_from_range() {
-        assert!(UnicodeSet::try_from('A'..'B').is_ok());
+        let check: Vec<char> = UnicodeSet::try_from(&('A'..'B')).unwrap().iter().collect();
+        assert_eq!(vec!['A'], check);
     }
     #[test]
     fn test_try_from_range_error() {
-        assert!(UnicodeSet::try_from('A'..'A').is_err());
+        let check = UnicodeSet::try_from(&('A'..'A'));
+        assert_eq!(Err(USetError::InvalidRange(65, 65)), check);
     }
     #[test]
     fn test_try_from_range_inclusive() {
-        assert!(UnicodeSet::try_from('A'..='A').is_ok());
+        let check: Vec<char> = UnicodeSet::try_from(&('A'..='A')).unwrap().iter().collect();
+        assert_eq!(vec!['A'], check);
     }
     #[test]
     fn test_try_from_range_inclusive_err() {
-        assert!(UnicodeSet::try_from('B'..='A').is_err());
+        let check = UnicodeSet::try_from(&('B'..'A'));
+        assert_eq!(Err(USetError::InvalidRange(66, 65)), check);
     }
     #[test]
     fn test_try_from_range_from() {
-        assert!(UnicodeSet::try_from('A'..).is_ok());
-    }
-    #[test]
-    fn test_try_from_range_from_err() {
-        assert!(UnicodeSet::try_from((std::char::MAX)..).is_err());
+        let uset = UnicodeSet::try_from(&('A'..)).unwrap();
+        let check: Vec<&u32> = uset.ranges().collect();
+        assert_eq!(vec![&65, &((std::char::MAX as u32) + 1)], check);
     }
     #[test]
     fn test_try_from_range_to() {
-        assert!(UnicodeSet::try_from(..'A').is_ok());
+        let uset = UnicodeSet::try_from(&(..'A')).unwrap();
+        let check: Vec<&u32> = uset.ranges().collect();
+        assert_eq!(vec![&0, &65], check);
     }
     #[test]
     fn test_try_from_range_to_err() {
-        assert!(UnicodeSet::try_from(..(0 as char)).is_err());
+        let check = UnicodeSet::try_from(&(..(0 as char)));
+        assert_eq!(Err(USetError::InvalidRange(0, 0)), check);
     }
     #[test]
     fn test_try_from_range_to_inclusive() {
-        assert!(UnicodeSet::try_from(..='A').is_ok());
+        let uset = UnicodeSet::try_from(&(..='A')).unwrap();
+        let check: Vec<&u32> = uset.ranges().collect();
+        assert_eq!(vec![&0, &66], check);
     }
     #[test]
     fn test_try_from_range_full() {
-        assert!(UnicodeSet::try_from(..).is_ok());
+        let uset = UnicodeSet::try_from(&(..)).unwrap();
+        let check: Vec<&u32> = uset.ranges().collect();
+        assert_eq!(vec![&0, &((std::char::MAX as u32) + 1)], check);
     }
 }
diff --git a/components/uniset/src/lib.rs b/components/uniset/src/lib.rs
index 1cff18c16f5..870eb001e30 100644
--- a/components/uniset/src/lib.rs
+++ b/components/uniset/src/lib.rs
@@ -1,12 +1,15 @@
-// Copyright 2019 The Fuchsia Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
 #[macro_use]
 mod uniset;
 mod conversions;
 mod utils;
-// mod iter;
+
 pub use conversions::*;
 pub use uniset::UnicodeSet;
 pub use utils::*;
-// pub use iter::UnicodeSetIter;
+
+/// Custom Errors for UnicodeSet.
+#[derive(Debug, PartialEq)]
+pub enum USetError {
+    InvalidSet(Vec<u32>),
+    InvalidRange(u32, u32),
+}
diff --git a/components/uniset/src/uniset.rs b/components/uniset/src/uniset.rs
index 3c1a3b2c4ee..ae58d99f9b9 100644
--- a/components/uniset/src/uniset.rs
+++ b/components/uniset/src/uniset.rs
@@ -5,8 +5,8 @@ use std::{
     slice::Iter,
 };
 
-use crate::utils::{deconstruct_range, is_sorted};
-// use crate::UnicodeSetIter;
+use super::USetError;
+use crate::utils::{deconstruct_range, is_valid};
 /// Represents the end code point of the Basic Multilingual Plane range, starting from code point 0 , inclusive
 const BMP_MAX: u32 = 0xFFFF;
 
@@ -14,8 +14,8 @@ const BMP_MAX: u32 = 0xFFFF;
 ///
 /// Provides exposure to membership functions and constructors from serialized UnicodeSets
 /// and predefined ranges.
-/// Implements an inversion list.
-//#[derive(Copy, Clone, Debug, Eq)]
+/// Implements an [inversion list.](https://en.wikipedia.org/wiki/Inversion_list)
+#[derive(Debug, PartialEq)]
 pub struct UnicodeSet {
     // If we wanted to use an array to keep the memory on the stack, there is an unsafe nightly feature
     // https://doc.rust-lang.org/nightly/core/array/trait.FixedSizeArray.html
@@ -24,16 +24,13 @@ pub struct UnicodeSet {
 }
 
 impl TryFrom<Vec<u32>> for UnicodeSet {
-    type Error = String;
+    type Error = USetError;
 
     fn try_from(set: Vec<u32>) -> Result<Self, Self::Error> {
-        if is_sorted(&set) {
+        if is_valid(&set) {
             Ok(UnicodeSet { inv_list: set })
         } else {
-            Err(format!(
-                "UnicodeSet set must be sorted without duplicates: {:?}",
-                set
-            ))
+            Err(USetError::InvalidSet(set))
         }
     }
 }
@@ -62,7 +59,7 @@ impl UnicodeSet {
     /// Example:
     ///
     /// ```
-    /// use icu4x_unicodeset::UnicodeSet;
+    /// use icu_unicodeset::UnicodeSet;
     /// use std::convert::TryFrom;
     /// let example_list = vec![0, 10, 15, 20];
     /// let example = UnicodeSet::try_from(example_list).unwrap();
@@ -82,7 +79,7 @@ impl UnicodeSet {
     /// Example:
     ///
     /// ```
-    /// use icu4x_unicodeset::UnicodeSet;
+    /// use icu_unicodeset::UnicodeSet;
     /// use std::convert::TryFrom;
     /// let example_list = vec![65, 68, 69, 70];
     /// let example = UnicodeSet::try_from(example_list).unwrap();
@@ -115,7 +112,7 @@ impl UnicodeSet {
 
     /// Returns whether or not the UnicodeSet is empty
     pub fn is_empty(&self) -> bool {
-        self.inv_list.len() < 2 // unsure if this is appropriate definition of just self.inv_list.is_empty()
+        self.inv_list.is_empty()
     }
 
     /// Wrapper for contains
@@ -148,7 +145,7 @@ impl UnicodeSet {
     /// Example:
     ///
     /// ```
-    /// use icu4x_unicodeset::UnicodeSet;
+    /// use icu_unicodeset::UnicodeSet;
     /// use std::convert::TryFrom;
     /// let example_list = vec![65, 67, 68, 69];
     /// let example = UnicodeSet::try_from(example_list).unwrap();
@@ -170,15 +167,15 @@ impl UnicodeSet {
     /// Example:
     ///
     /// ```
-    /// use icu4x_unicodeset::UnicodeSet;
+    /// use icu_unicodeset::UnicodeSet;
     /// use std::convert::TryFrom;
     /// let example_list = vec![65, 67, 68, 69];
     /// let example = UnicodeSet::try_from(example_list).unwrap();
-    /// assert!(example.contains_range('A'..'C'));
-    /// assert!(example.contains_range('A'..='B'));
-    /// assert!(!example.contains_range('A'..='C'));
+    /// assert!(example.contains_range(&('A'..'C')));
+    /// assert!(example.contains_range(&('A'..='B')));
+    /// assert!(!example.contains_range(&('A'..='C')));
     /// ```
-    pub fn contains_range(&self, range: impl RangeBounds<char>) -> bool {
+    pub fn contains_range(&self, range: &impl RangeBounds<char>) -> bool {
         let (from, till) = deconstruct_range(range);
         if from >= till {
             return false;
@@ -192,18 +189,19 @@ impl UnicodeSet {
 
 #[cfg(test)]
 mod tests {
-    use super::{UnicodeSet, BMP_MAX};
+    use super::{USetError, UnicodeSet, BMP_MAX};
     use std::{char::MAX, convert::TryFrom, vec::Vec};
 
     #[test]
     fn test_unicodeset_try_from_vec() {
-        let check = vec![2, 3, 4, 5];
-        assert!(UnicodeSet::try_from(check).is_ok());
+        let check = UnicodeSet::try_from(vec![2, 3, 4, 5]).unwrap().inv_list;
+        assert_eq!(vec![2, 3, 4, 5], check);
     }
     #[test]
     fn test_unicodeset_try_from_vec_error() {
         let check = vec![1, 1, 2, 3, 4];
-        assert!(UnicodeSet::try_from(check).is_err());
+        let set = UnicodeSet::try_from(vec![1, 1, 2, 3, 4]);
+        assert_eq!(Err(USetError::InvalidSet(check)), set);
     }
     #[test]
     fn test_unicodeset_all() {
@@ -240,22 +238,22 @@ mod tests {
     fn test_unicodeset_contains_range() {
         let ex = vec![65, 70, 75, 85];
         let check = UnicodeSet::try_from(ex).unwrap();
-        assert!(check.contains_range('A'..='E')); // 65 - 69
-        assert!(check.contains_range('K'..'U')); // 75 - 84
+        assert!(check.contains_range(&('A'..='E'))); // 65 - 69
+        assert!(check.contains_range(&('K'..'U'))); // 75 - 84
     }
     #[test]
     fn test_unicodeset_contains_range_false() {
         let ex = vec![65, 70, 75, 85];
         let check = UnicodeSet::try_from(ex).unwrap();
-        assert!(!check.contains_range('!'..'A')); // 33 - 65
-        assert!(!check.contains_range('F'..'K')); // 70 - 74
-        assert!(!check.contains_range('U'..));
+        assert!(!check.contains_range(&('!'..'A'))); // 33 - 65
+        assert!(!check.contains_range(&('F'..'K'))); // 70 - 74
+        assert!(!check.contains_range(&('U'..)));
     }
     #[test]
     fn test_unicodeset_contains_range_invalid() {
         let check = UnicodeSet::all();
-        assert!(!check.contains_range('A'..'!')); // 65 - 33
-        assert!(!check.contains_range('A'..'A'));
+        assert!(!check.contains_range(&('A'..'!'))); // 65 - 33
+        assert!(!check.contains_range(&('A'..'A')));
     }
     #[test]
     fn test_unicodeset_size() {
@@ -274,8 +272,6 @@ mod tests {
     fn test_unicodeset_is_empty() {
         let check = UnicodeSet { inv_list: vec![] };
         assert!(check.is_empty());
-        let check = UnicodeSet { inv_list: vec![0] };
-        assert!(check.is_empty());
     }
     #[test]
     fn test_unicodeset_is_not_empty() {
@@ -287,10 +283,10 @@ mod tests {
         let ex = vec![65, 70, 75, 85];
         let check = UnicodeSet::try_from(ex).unwrap();
         let mut iter = check.ranges();
-        assert_eq!(iter.next().unwrap(), &65);
-        assert_eq!(iter.next().unwrap(), &70);
-        assert_eq!(iter.next().unwrap(), &75);
-        assert_eq!(iter.next().unwrap(), &85);
+        assert_eq!(iter.next(), Some(&65));
+        assert_eq!(iter.next(), Some(&70));
+        assert_eq!(iter.next(), Some(&75));
+        assert_eq!(iter.next(), Some(&85));
         assert_eq!(iter.next(), None);
     }
     #[test]
diff --git a/components/uniset/src/utils.rs b/components/uniset/src/utils.rs
index c324360a698..a953074e285 100644
--- a/components/uniset/src/utils.rs
+++ b/components/uniset/src/utils.rs
@@ -3,13 +3,16 @@ use std::{
     ops::{Bound::*, RangeBounds},
 };
 
-/// Returns whether the vector is sorted ascending non inclusive
-pub fn is_sorted(v: &[u32]) -> bool {
-    v.chunks(2).all(|chunk| chunk[0] < chunk[1])
+/// Returns whether the vector is sorted ascending non inclusive, of even length,
+/// and within the bounds of `0x0 -> 0x10FFFF` inclusive.
+pub fn is_valid(v: &[u32]) -> bool {
+    v.len() % 2 == 0
+        && v.chunks(2).all(|chunk| chunk[0] < chunk[1])
+        && v[v.len() - 1] <= (MAX as u32) + 1
 }
 
 /// Returns start (inclusive) and end (exclusive) bounds of RangeBounds
-pub fn deconstruct_range(range: impl RangeBounds<char>) -> (u32, u32) {
+pub fn deconstruct_range(range: &impl RangeBounds<char>) -> (u32, u32) {
     let from = match range.start_bound() {
         Included(b) => (*b as u32),
         Excluded(b) => (*b as u32),
@@ -18,47 +21,56 @@ pub fn deconstruct_range(range: impl RangeBounds<char>) -> (u32, u32) {
     let till = match range.end_bound() {
         Included(b) => (*b as u32) + 1,
         Excluded(b) => (*b as u32),
-        Unbounded => MAX as u32,
+        Unbounded => (MAX as u32) + 1,
     };
     (from, till)
 }
 
 #[cfg(test)]
 mod tests {
-    use super::{deconstruct_range, is_sorted};
+    use super::{deconstruct_range, is_valid};
     use std::char::MAX;
 
     #[test]
-    fn test_is_sorted() {
+    fn test_is_valid() {
         let check = vec![2, 3, 4, 5];
-        assert!(is_sorted(&check));
+        assert!(is_valid(&check));
     }
     #[test]
-    fn test_is_sorted_out_of_order() {
+    fn test_is_valid_out_of_order() {
         let check = vec![5, 4, 5, 6, 7];
-        assert!(!is_sorted(&check));
+        assert!(!is_valid(&check));
     }
     #[test]
-    fn test_is_sorted_duplicate() {
+    fn test_is_valid_duplicate() {
         let check = vec![1, 2, 3, 3, 5];
-        assert!(!is_sorted(&check));
+        assert!(!is_valid(&check));
+    }
+    #[test]
+    fn test_is_valid_odd() {
+        let check = vec![1, 2, 3, 4, 5];
+        assert!(!is_valid(&check));
+    }
+    #[test]
+    fn test_is_valid_out_of_range() {
+        let check = vec![1, 2, 3, 4, (MAX as u32) + 1];
+        assert!(!is_valid(&check));
     }
-
     // deconstruct_range
     #[test]
     fn test_deconstruct_range() {
         let expected = (65, 69);
-        let check = deconstruct_range('A'..'E'); // Range
+        let check = deconstruct_range(&('A'..'E')); // Range
         assert_eq!(check, expected);
-        let check = deconstruct_range('A'..='D'); // Range Inclusive
+        let check = deconstruct_range(&('A'..='D')); // Range Inclusive
         assert_eq!(check, expected);
-        let check = deconstruct_range('A'..); // Range From
-        assert_eq!(check, (65, MAX as u32));
-        let check = deconstruct_range(..'A'); // Range To
+        let check = deconstruct_range(&('A'..)); // Range From
+        assert_eq!(check, (65, (MAX as u32) + 1));
+        let check = deconstruct_range(&(..'A')); // Range To
         assert_eq!(check, (0, 65));
-        let check = deconstruct_range(..='A'); // Range To Inclusive
+        let check = deconstruct_range(&(..='A')); // Range To Inclusive
         assert_eq!(check, (0, 66));
-        let check = deconstruct_range(..); // Range Full
-        assert_eq!(check, (0, MAX as u32));
+        let check = deconstruct_range(&(..)); // Range Full
+        assert_eq!(check, (0, (MAX as u32) + 1));
     }
 }

From 7fd908202d3ceeb4c7cac1e224d2dc5cb6db44d8 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Tue, 14 Jul 2020 23:07:06 +0000
Subject: [PATCH 23/30] added benchmarks and fixed surrogate code points in
 iter

---
 components/uniset/Cargo.toml          |  7 ++++
 components/uniset/benches/inv_list.rs | 59 +++++++++++++++++++++++++++
 components/uniset/src/uniset.rs       | 26 ++++++++----
 3 files changed, 85 insertions(+), 7 deletions(-)
 create mode 100644 components/uniset/benches/inv_list.rs

diff --git a/components/uniset/Cargo.toml b/components/uniset/Cargo.toml
index bbe4fe2c01b..279467b3aa6 100644
--- a/components/uniset/Cargo.toml
+++ b/components/uniset/Cargo.toml
@@ -12,3 +12,10 @@ include = [
     "src/**/*",
     "Cargo.toml",
 ]
+
+[dev-dependencies]
+criterion = "0.3"
+
+[[bench]]
+name = "inv_list"
+harness = false
\ No newline at end of file
diff --git a/components/uniset/benches/inv_list.rs b/components/uniset/benches/inv_list.rs
new file mode 100644
index 00000000000..fd7e98b6abc
--- /dev/null
+++ b/components/uniset/benches/inv_list.rs
@@ -0,0 +1,59 @@
+use criterion::{criterion_group, criterion_main, Criterion};
+use icu_unicodeset::UnicodeSet;
+use std::convert::TryFrom;
+
+/// Best Case Contains
+///
+/// Create a single small range and check contains on every value in range
+fn best_case_contains(c: &mut Criterion) {
+    let check = vec![65, 70];
+    let uset = UnicodeSet::try_from(check).unwrap();
+    c.bench_function("inv_list/contains_best", |b| {
+        b.iter(|| uset.iter().map(|c| uset.contains(c)))
+    });
+}
+
+/// Worst Case Contains
+///
+/// Create the maximum number of ranges ([0, 1, 2, 3], etc.) and check contains on 100 first values
+fn worst_case_contains(c: &mut Criterion) {
+    let check: Vec<u32> = (0..((std::char::MAX as u32) + 1)).collect();
+    let uset = UnicodeSet::try_from(check).unwrap();
+    c.bench_function("inv_list/contains_worst", |b| {
+        b.iter(|| uset.iter().take(100).map(|c| uset.contains(c)))
+    });
+}
+/// Best Case Contains Range
+///
+/// Create a single small range and check contains on every value in range
+fn best_case_contains_range(c: &mut Criterion) {
+    let check = vec![65, 70];
+    let uset = UnicodeSet::try_from(check).unwrap();
+    c.bench_function("inv_list/contains_range_best", |b| {
+        b.iter(|| uset.iter().map(|c| uset.contains_range(&('A'..c))))
+    });
+}
+
+/// Worst Case Contains Range
+///
+/// Create the maximum number of ranges ([0, 1, 2, 3], etc.) and check contains on 100 first values
+fn worst_case_contains_range(c: &mut Criterion) {
+    let check: Vec<u32> = (0..((std::char::MAX as u32) + 1)).collect();
+    let start = std::char::from_u32(0).unwrap();
+    let uset = UnicodeSet::try_from(check).unwrap();
+    c.bench_function("inv_list/contains_range_worst", |b| {
+        b.iter(|| {
+            uset.iter()
+                .take(100)
+                .map(|c| uset.contains_range(&(start..c)))
+        })
+    });
+}
+criterion_group!(
+    benches,
+    best_case_contains,
+    worst_case_contains,
+    best_case_contains_range,
+    worst_case_contains_range
+);
+criterion_main!(benches);
diff --git a/components/uniset/src/uniset.rs b/components/uniset/src/uniset.rs
index ae58d99f9b9..0a18d337419 100644
--- a/components/uniset/src/uniset.rs
+++ b/components/uniset/src/uniset.rs
@@ -94,7 +94,7 @@ impl UnicodeSet {
         self.inv_list
             .chunks(2)
             .flat_map(|pair| (pair[0]..pair[1]))
-            .map(|val| from_u32(val).unwrap())
+            .filter_map(from_u32)
     }
 
     /// Returns the number of elements of the UnicodeSet
@@ -140,7 +140,7 @@ impl UnicodeSet {
     /// Checks to see the query is in the UnicodeSet
     ///
     /// Runs a binary search in `O(log(n))` where `n` is the number of start and end points
-    /// in the set using `std::vec::Vec` implementation
+    /// in the set using `std` implementation
     ///
     /// Example:
     ///
@@ -159,10 +159,8 @@ impl UnicodeSet {
     /// Checks to see if the range is in the UnicodeSet, returns a Result
     ///
     /// Runs a binary search in `O(log(n))` where `n` is the number of start and end points
-    /// in the set using `std::vec::Vec` implementation
-    ///
-    /// Only runs the search once on the `start` parameter, while the `end` parameter is checked
-    /// in a single `O(1)` step
+    /// in the set using `std::vec::Vec` implementation Only runs the search once on the `start`
+    /// parameter, while the `end` parameter is checked in a single `O(1)` step
     ///
     /// Example:
     ///
@@ -175,6 +173,20 @@ impl UnicodeSet {
     /// assert!(example.contains_range(&('A'..='B')));
     /// assert!(!example.contains_range(&('A'..='C')));
     /// ```
+    ///
+    /// Surrogate points (`0xD800 -> 0xDFFF`) will return false if the Range contains them but the
+    /// UnicodeSet does not.
+    ///
+    /// Example:
+    ///
+    /// ```
+    /// use icu_unicodeset::UnicodeSet;
+    /// use std::{convert::TryFrom, char::from_u32};
+    /// let check = from_u32(0xD7FE).unwrap() .. from_u32(0xE001).unwrap();
+    /// let example_list = vec![0xD7FE, 0xD7FF, 0xE000, 0xE001];
+    /// let example = UnicodeSet::try_from(example_list).unwrap();
+    /// assert!(!example.contains_range(&(check)));
+    /// ```
     pub fn contains_range(&self, range: &impl RangeBounds<char>) -> bool {
         let (from, till) = deconstruct_range(range);
         if from >= till {
@@ -291,7 +303,7 @@ mod tests {
     }
     #[test]
     fn test_unicodeset_iter() {
-        let ex = vec![65, 68, 69, 70];
+        let ex = vec![65, 68, 69, 70, 0xD800, 0xD801];
         let check = UnicodeSet::try_from(ex).unwrap();
         let mut iter = check.iter();
         assert_eq!(Some('A'), iter.next());

From 0f5a021b4f5b35ac81cd3fc41e6ccd7eb8a21b0c Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Wed, 15 Jul 2020 16:28:02 +0000
Subject: [PATCH 24/30] fix to is_valid

---
 components/uniset/src/uniset.rs | 7 ++++---
 components/uniset/src/utils.rs  | 7 ++++++-
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/components/uniset/src/uniset.rs b/components/uniset/src/uniset.rs
index 0a18d337419..6938057b5ef 100644
--- a/components/uniset/src/uniset.rs
+++ b/components/uniset/src/uniset.rs
@@ -206,13 +206,14 @@ mod tests {
 
     #[test]
     fn test_unicodeset_try_from_vec() {
-        let check = UnicodeSet::try_from(vec![2, 3, 4, 5]).unwrap().inv_list;
-        assert_eq!(vec![2, 3, 4, 5], check);
+        let ex = vec![2, 3, 4, 5];
+        let check = UnicodeSet::try_from(ex.clone()).unwrap().inv_list;
+        assert_eq!(ex, check);
     }
     #[test]
     fn test_unicodeset_try_from_vec_error() {
         let check = vec![1, 1, 2, 3, 4];
-        let set = UnicodeSet::try_from(vec![1, 1, 2, 3, 4]);
+        let set = UnicodeSet::try_from(check.clone());
         assert_eq!(Err(USetError::InvalidSet(check)), set);
     }
     #[test]
diff --git a/components/uniset/src/utils.rs b/components/uniset/src/utils.rs
index a953074e285..6d1d7206b50 100644
--- a/components/uniset/src/utils.rs
+++ b/components/uniset/src/utils.rs
@@ -7,7 +7,7 @@ use std::{
 /// and within the bounds of `0x0 -> 0x10FFFF` inclusive.
 pub fn is_valid(v: &[u32]) -> bool {
     v.len() % 2 == 0
-        && v.chunks(2).all(|chunk| chunk[0] < chunk[1])
+        && v.windows(2).all(|chunk| chunk[0] < chunk[1])
         && v[v.len() - 1] <= (MAX as u32) + 1
 }
 
@@ -37,6 +37,11 @@ mod tests {
         assert!(is_valid(&check));
     }
     #[test]
+    fn test_is_valid_overlapping() {
+        let check = vec![2, 5, 4, 6];
+        assert!(!is_valid(&check));
+    }
+    #[test]
     fn test_is_valid_out_of_order() {
         let check = vec![5, 4, 5, 6, 7];
         assert!(!is_valid(&check));

From c7c4330bb7e82c1310f85170555da6fefac1d9e2 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Wed, 15 Jul 2020 18:26:20 +0000
Subject: [PATCH 25/30] bench changes and other minor fixes

---
 components/uniset/benches/inv_list.rs | 74 +++++++++++----------------
 components/uniset/src/conversions.rs  | 12 ++---
 components/uniset/src/uniset.rs       |  6 +--
 components/uniset/src/utils.rs        | 10 ++--
 4 files changed, 45 insertions(+), 57 deletions(-)

diff --git a/components/uniset/benches/inv_list.rs b/components/uniset/benches/inv_list.rs
index fd7e98b6abc..d53e21f5f2e 100644
--- a/components/uniset/benches/inv_list.rs
+++ b/components/uniset/benches/inv_list.rs
@@ -1,59 +1,43 @@
 use criterion::{criterion_group, criterion_main, Criterion};
 use icu_unicodeset::UnicodeSet;
-use std::convert::TryFrom;
+use std::{convert::TryFrom, char::{MAX, from_u32}};
 
-/// Best Case Contains
-///
-/// Create a single small range and check contains on every value in range
-fn best_case_contains(c: &mut Criterion) {
-    let check = vec![65, 70];
-    let uset = UnicodeSet::try_from(check).unwrap();
-    c.bench_function("inv_list/contains_best", |b| {
-        b.iter(|| uset.iter().map(|c| uset.contains(c)))
-    });
-}
+fn contains_bench(c: &mut Criterion) {
+    let best_ex = vec![65, 70];
+    let best_sample = UnicodeSet::try_from(best_ex).unwrap();
+    let worst_ex: Vec<u32> = (0..((MAX as u32) + 1)).collect();
+    let worst_sample = UnicodeSet::try_from(worst_ex).unwrap();
 
-/// Worst Case Contains
-///
-/// Create the maximum number of ranges ([0, 1, 2, 3], etc.) and check contains on 100 first values
-fn worst_case_contains(c: &mut Criterion) {
-    let check: Vec<u32> = (0..((std::char::MAX as u32) + 1)).collect();
-    let uset = UnicodeSet::try_from(check).unwrap();
-    c.bench_function("inv_list/contains_worst", |b| {
-        b.iter(|| uset.iter().take(100).map(|c| uset.contains(c)))
+    let mut group = c.benchmark_group("uniset/contains");
+    group.bench_with_input("best", &best_sample, |b, sample| {
+        b.iter(|| sample.iter().map(|ch| sample.contains(ch)))
     });
-}
-/// Best Case Contains Range
-///
-/// Create a single small range and check contains on every value in range
-fn best_case_contains_range(c: &mut Criterion) {
-    let check = vec![65, 70];
-    let uset = UnicodeSet::try_from(check).unwrap();
-    c.bench_function("inv_list/contains_range_best", |b| {
-        b.iter(|| uset.iter().map(|c| uset.contains_range(&('A'..c))))
+    group.bench_with_input("worst", &worst_sample, |b, sample| {
+        b.iter(|| sample.iter().take(100).map(|ch| sample.contains(ch)))
     });
+    group.finish();
 }
 
-/// Worst Case Contains Range
-///
-/// Create the maximum number of ranges ([0, 1, 2, 3], etc.) and check contains on 100 first values
-fn worst_case_contains_range(c: &mut Criterion) {
-    let check: Vec<u32> = (0..((std::char::MAX as u32) + 1)).collect();
-    let start = std::char::from_u32(0).unwrap();
-    let uset = UnicodeSet::try_from(check).unwrap();
-    c.bench_function("inv_list/contains_range_worst", |b| {
+fn contains_range_bench(c: &mut Criterion) {
+    let best_ex = vec![65, 70];
+    let best_sample = UnicodeSet::try_from(best_ex).unwrap();
+    let worst_ex: Vec<u32> = (0..((MAX as u32) + 1)).collect();
+    let worst_sample = UnicodeSet::try_from(worst_ex).unwrap();
+
+    let mut group = c.benchmark_group("uniset/contains_range");
+    group.bench_with_input("best", &best_sample, |b, sample| {
+        b.iter(|| sample.iter().map(|ch| sample.contains_range(&('A'..ch))))
+    });
+    group.bench_with_input("worst", &worst_sample, |b, sample| {
         b.iter(|| {
-            uset.iter()
+            sample
+                .iter()
                 .take(100)
-                .map(|c| uset.contains_range(&(start..c)))
+                .map(|ch| sample.contains_range(&(from_u32(0).unwrap()..ch)))
         })
     });
+    group.finish();
 }
-criterion_group!(
-    benches,
-    best_case_contains,
-    worst_case_contains,
-    best_case_contains_range,
-    worst_case_contains_range
-);
+
+criterion_group!(benches, contains_bench, contains_range_bench);
 criterion_main!(benches);
diff --git a/components/uniset/src/conversions.rs b/components/uniset/src/conversions.rs
index 68e039e9086..04230a22c87 100644
--- a/components/uniset/src/conversions.rs
+++ b/components/uniset/src/conversions.rs
@@ -6,7 +6,7 @@ use std::{
     ops::{Range, RangeBounds, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive},
 };
 
-fn try_from_range_impl(range: &impl RangeBounds<char>) -> Result<UnicodeSet, USetError> {
+fn try_from_range(range: &impl RangeBounds<char>) -> Result<UnicodeSet, USetError> {
     let (from, till) = deconstruct_range(range);
     if from < till {
         let set = vec![from, till];
@@ -20,7 +20,7 @@ impl TryFrom<&Range<char>> for UnicodeSet {
     type Error = USetError;
 
     fn try_from(range: &Range<char>) -> Result<Self, Self::Error> {
-        try_from_range_impl(range)
+        try_from_range(range)
     }
 }
 
@@ -28,7 +28,7 @@ impl TryFrom<&RangeFrom<char>> for UnicodeSet {
     type Error = USetError;
 
     fn try_from(range: &RangeFrom<char>) -> Result<Self, Self::Error> {
-        try_from_range_impl(range)
+        try_from_range(range)
     }
 }
 
@@ -44,7 +44,7 @@ impl TryFrom<&RangeInclusive<char>> for UnicodeSet {
     type Error = USetError;
 
     fn try_from(range: &RangeInclusive<char>) -> Result<Self, Self::Error> {
-        try_from_range_impl(range)
+        try_from_range(range)
     }
 }
 
@@ -52,7 +52,7 @@ impl TryFrom<&RangeTo<char>> for UnicodeSet {
     type Error = USetError;
 
     fn try_from(range: &RangeTo<char>) -> Result<Self, Self::Error> {
-        try_from_range_impl(range)
+        try_from_range(range)
     }
 }
 
@@ -60,7 +60,7 @@ impl TryFrom<&RangeToInclusive<char>> for UnicodeSet {
     type Error = USetError;
 
     fn try_from(range: &RangeToInclusive<char>) -> Result<Self, Self::Error> {
-        try_from_range_impl(range)
+        try_from_range(range)
     }
 }
 
diff --git a/components/uniset/src/uniset.rs b/components/uniset/src/uniset.rs
index 6938057b5ef..e3d5f6fa46a 100644
--- a/components/uniset/src/uniset.rs
+++ b/components/uniset/src/uniset.rs
@@ -118,7 +118,7 @@ impl UnicodeSet {
     /// Wrapper for contains
     ///
     /// Returns an Option as to whether or not it is possible for the query to be contained
-    fn contains_impl(&self, query: u32) -> Option<usize> {
+    fn contains_query(&self, query: u32) -> Option<usize> {
         match self.inv_list.binary_search(&query) {
             Ok(pos) => {
                 if pos % 2 == 0 {
@@ -153,7 +153,7 @@ impl UnicodeSet {
     /// assert!(!example.contains('C'));
     /// ```
     pub fn contains(&self, query: char) -> bool {
-        self.contains_impl(query as u32).is_some()
+        self.contains_query(query as u32).is_some()
     }
 
     /// Checks to see if the range is in the UnicodeSet, returns a Result
@@ -192,7 +192,7 @@ impl UnicodeSet {
         if from >= till {
             return false;
         }
-        match self.contains_impl(from) {
+        match self.contains_query(from) {
             Some(pos) => (till) <= self.inv_list[pos + 1],
             None => false,
         }
diff --git a/components/uniset/src/utils.rs b/components/uniset/src/utils.rs
index 6d1d7206b50..b6044c46e30 100644
--- a/components/uniset/src/utils.rs
+++ b/components/uniset/src/utils.rs
@@ -8,14 +8,13 @@ use std::{
 pub fn is_valid(v: &[u32]) -> bool {
     v.len() % 2 == 0
         && v.windows(2).all(|chunk| chunk[0] < chunk[1])
-        && v[v.len() - 1] <= (MAX as u32) + 1
+        && v.last().map_or(false, |e| e <= &((MAX as u32) + 1))
 }
 
 /// Returns start (inclusive) and end (exclusive) bounds of RangeBounds
 pub fn deconstruct_range(range: &impl RangeBounds<char>) -> (u32, u32) {
     let from = match range.start_bound() {
-        Included(b) => (*b as u32),
-        Excluded(b) => (*b as u32),
+        Included(b) | Excluded(b) => (*b as u32),
         Unbounded => 0,
     };
     let till = match range.end_bound() {
@@ -37,6 +36,11 @@ mod tests {
         assert!(is_valid(&check));
     }
     #[test]
+    fn test_is_valid_empty() {
+        let check = vec![];
+        assert!(!is_valid(&check));
+    }
+    #[test]
     fn test_is_valid_overlapping() {
         let check = vec![2, 5, 4, 6];
         assert!(!is_valid(&check));

From a8a4b503c9928802f85eced847b94124e80f76b8 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Wed, 15 Jul 2020 18:28:47 +0000
Subject: [PATCH 26/30] forgot to run fmt

---
 components/uniset/benches/inv_list.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/components/uniset/benches/inv_list.rs b/components/uniset/benches/inv_list.rs
index d53e21f5f2e..d588ce53883 100644
--- a/components/uniset/benches/inv_list.rs
+++ b/components/uniset/benches/inv_list.rs
@@ -1,6 +1,9 @@
 use criterion::{criterion_group, criterion_main, Criterion};
 use icu_unicodeset::UnicodeSet;
-use std::{convert::TryFrom, char::{MAX, from_u32}};
+use std::{
+    char::{from_u32, MAX},
+    convert::TryFrom,
+};
 
 fn contains_bench(c: &mut Criterion) {
     let best_ex = vec![65, 70];

From 845bc352dd1953af2d2154a0682f45dad4deb4aa Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Thu, 16 Jul 2020 17:21:02 +0000
Subject: [PATCH 27/30] change to std::char, and unreachable!() optimizations

---
 components/uniset/benches/inv_list.rs | 11 ++++-------
 components/uniset/src/uniset.rs       | 21 ++++++++-------------
 components/uniset/src/utils.rs        | 19 ++++++++++---------
 3 files changed, 22 insertions(+), 29 deletions(-)

diff --git a/components/uniset/benches/inv_list.rs b/components/uniset/benches/inv_list.rs
index d588ce53883..9ef3e7ea63b 100644
--- a/components/uniset/benches/inv_list.rs
+++ b/components/uniset/benches/inv_list.rs
@@ -1,14 +1,11 @@
 use criterion::{criterion_group, criterion_main, Criterion};
 use icu_unicodeset::UnicodeSet;
-use std::{
-    char::{from_u32, MAX},
-    convert::TryFrom,
-};
+use std::{char, convert::TryFrom};
 
 fn contains_bench(c: &mut Criterion) {
     let best_ex = vec![65, 70];
     let best_sample = UnicodeSet::try_from(best_ex).unwrap();
-    let worst_ex: Vec<u32> = (0..((MAX as u32) + 1)).collect();
+    let worst_ex: Vec<u32> = (0..((char::MAX as u32) + 1)).collect();
     let worst_sample = UnicodeSet::try_from(worst_ex).unwrap();
 
     let mut group = c.benchmark_group("uniset/contains");
@@ -24,7 +21,7 @@ fn contains_bench(c: &mut Criterion) {
 fn contains_range_bench(c: &mut Criterion) {
     let best_ex = vec![65, 70];
     let best_sample = UnicodeSet::try_from(best_ex).unwrap();
-    let worst_ex: Vec<u32> = (0..((MAX as u32) + 1)).collect();
+    let worst_ex: Vec<u32> = (0..((char::MAX as u32) + 1)).collect();
     let worst_sample = UnicodeSet::try_from(worst_ex).unwrap();
 
     let mut group = c.benchmark_group("uniset/contains_range");
@@ -36,7 +33,7 @@ fn contains_range_bench(c: &mut Criterion) {
             sample
                 .iter()
                 .take(100)
-                .map(|ch| sample.contains_range(&(from_u32(0).unwrap()..ch)))
+                .map(|ch| sample.contains_range(&(char::from_u32(0).unwrap()..ch)))
         })
     });
     group.finish();
diff --git a/components/uniset/src/uniset.rs b/components/uniset/src/uniset.rs
index e3d5f6fa46a..4cdf6f69f0a 100644
--- a/components/uniset/src/uniset.rs
+++ b/components/uniset/src/uniset.rs
@@ -1,9 +1,4 @@
-use std::{
-    char::{from_u32, MAX},
-    convert::TryFrom,
-    ops::RangeBounds,
-    slice::Iter,
-};
+use std::{char, convert::TryFrom, ops::RangeBounds, slice::Iter};
 
 use super::USetError;
 use crate::utils::{deconstruct_range, is_valid};
@@ -41,7 +36,7 @@ impl UnicodeSet {
     /// The range spans from `0x0 -> 0x10FFFF` inclusive
     pub fn all() -> UnicodeSet {
         UnicodeSet {
-            inv_list: vec![0, (MAX as u32) + 1],
+            inv_list: vec![0, (char::MAX as u32) + 1],
         }
     }
 
@@ -94,7 +89,7 @@ impl UnicodeSet {
         self.inv_list
             .chunks(2)
             .flat_map(|pair| (pair[0]..pair[1]))
-            .filter_map(from_u32)
+            .filter_map(char::from_u32)
     }
 
     /// Returns the number of elements of the UnicodeSet
@@ -181,8 +176,8 @@ impl UnicodeSet {
     ///
     /// ```
     /// use icu_unicodeset::UnicodeSet;
-    /// use std::{convert::TryFrom, char::from_u32};
-    /// let check = from_u32(0xD7FE).unwrap() .. from_u32(0xE001).unwrap();
+    /// use std::{convert::TryFrom, char};
+    /// let check = char::from_u32(0xD7FE).unwrap() .. char::from_u32(0xE001).unwrap();
     /// let example_list = vec![0xD7FE, 0xD7FF, 0xE000, 0xE001];
     /// let example = UnicodeSet::try_from(example_list).unwrap();
     /// assert!(!example.contains_range(&(check)));
@@ -202,7 +197,7 @@ impl UnicodeSet {
 #[cfg(test)]
 mod tests {
     use super::{USetError, UnicodeSet, BMP_MAX};
-    use std::{char::MAX, convert::TryFrom, vec::Vec};
+    use std::{char, convert::TryFrom, vec::Vec};
 
     #[test]
     fn test_unicodeset_try_from_vec() {
@@ -218,7 +213,7 @@ mod tests {
     }
     #[test]
     fn test_unicodeset_all() {
-        let expected = vec![0, (MAX as u32) + 1];
+        let expected = vec![0, (char::MAX as u32) + 1];
         assert_eq!(UnicodeSet::all().inv_list, expected);
     }
     #[test]
@@ -274,7 +269,7 @@ mod tests {
         let check = UnicodeSet::try_from(ex).unwrap();
         assert_eq!(8, check.size());
         let check = UnicodeSet::all();
-        let expected = (MAX as u32) + 1;
+        let expected = (char::MAX as u32) + 1;
         assert_eq!(expected as usize, check.size());
         let check = UnicodeSet {
             inv_list: Vec::new(),
diff --git a/components/uniset/src/utils.rs b/components/uniset/src/utils.rs
index b6044c46e30..b20c96db391 100644
--- a/components/uniset/src/utils.rs
+++ b/components/uniset/src/utils.rs
@@ -1,5 +1,5 @@
 use std::{
-    char::MAX,
+    char,
     ops::{Bound::*, RangeBounds},
 };
 
@@ -8,19 +8,20 @@ use std::{
 pub fn is_valid(v: &[u32]) -> bool {
     v.len() % 2 == 0
         && v.windows(2).all(|chunk| chunk[0] < chunk[1])
-        && v.last().map_or(false, |e| e <= &((MAX as u32) + 1))
+        && v.last().map_or(false, |e| e <= &((char::MAX as u32) + 1))
 }
 
-/// Returns start (inclusive) and end (exclusive) bounds of RangeBounds
+/// Returns start (inclusive) and end (excluisive) bounds of RangeBounds
 pub fn deconstruct_range(range: &impl RangeBounds<char>) -> (u32, u32) {
     let from = match range.start_bound() {
-        Included(b) | Excluded(b) => (*b as u32),
+        Included(b) => (*b as u32),
+        Excluded(_) => unreachable!(),
         Unbounded => 0,
     };
     let till = match range.end_bound() {
         Included(b) => (*b as u32) + 1,
         Excluded(b) => (*b as u32),
-        Unbounded => (MAX as u32) + 1,
+        Unbounded => (char::MAX as u32) + 1,
     };
     (from, till)
 }
@@ -28,7 +29,7 @@ pub fn deconstruct_range(range: &impl RangeBounds<char>) -> (u32, u32) {
 #[cfg(test)]
 mod tests {
     use super::{deconstruct_range, is_valid};
-    use std::char::MAX;
+    use std::char;
 
     #[test]
     fn test_is_valid() {
@@ -62,7 +63,7 @@ mod tests {
     }
     #[test]
     fn test_is_valid_out_of_range() {
-        let check = vec![1, 2, 3, 4, (MAX as u32) + 1];
+        let check = vec![1, 2, 3, 4, (char::MAX as u32) + 1];
         assert!(!is_valid(&check));
     }
     // deconstruct_range
@@ -74,12 +75,12 @@ mod tests {
         let check = deconstruct_range(&('A'..='D')); // Range Inclusive
         assert_eq!(check, expected);
         let check = deconstruct_range(&('A'..)); // Range From
-        assert_eq!(check, (65, (MAX as u32) + 1));
+        assert_eq!(check, (65, (char::MAX as u32) + 1));
         let check = deconstruct_range(&(..'A')); // Range To
         assert_eq!(check, (0, 65));
         let check = deconstruct_range(&(..='A')); // Range To Inclusive
         assert_eq!(check, (0, 66));
         let check = deconstruct_range(&(..)); // Range Full
-        assert_eq!(check, (0, (MAX as u32) + 1));
+        assert_eq!(check, (0, (char::MAX as u32) + 1));
     }
 }

From dbf3100ca1760041ae33c52c44c95f910b3a8c0d Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Fri, 17 Jul 2020 17:53:29 +0000
Subject: [PATCH 28/30] size() is now constant check, ranges() temp removed

---
 components/uniset/src/conversions.rs |  56 ++++++-----
 components/uniset/src/lib.rs         |   2 +-
 components/uniset/src/uniset.rs      | 139 +++++++++++++--------------
 3 files changed, 98 insertions(+), 99 deletions(-)

diff --git a/components/uniset/src/conversions.rs b/components/uniset/src/conversions.rs
index 04230a22c87..03778d56618 100644
--- a/components/uniset/src/conversions.rs
+++ b/components/uniset/src/conversions.rs
@@ -1,23 +1,24 @@
-use super::USetError;
-use crate::utils::deconstruct_range;
-use crate::UnicodeSet;
 use std::{
     convert::TryFrom,
     ops::{Range, RangeBounds, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive},
 };
 
-fn try_from_range(range: &impl RangeBounds<char>) -> Result<UnicodeSet, USetError> {
+use super::UnicodeSetError;
+use crate::utils::deconstruct_range;
+use crate::UnicodeSet;
+
+fn try_from_range(range: &impl RangeBounds<char>) -> Result<UnicodeSet, UnicodeSetError> {
     let (from, till) = deconstruct_range(range);
     if from < till {
         let set = vec![from, till];
-        Ok(UnicodeSet::try_from(set).unwrap())
+        Ok(UnicodeSet::from_inversion_list(set).unwrap())
     } else {
-        Err(USetError::InvalidRange(from, till))
+        Err(UnicodeSetError::InvalidRange(from, till))
     }
 }
 
 impl TryFrom<&Range<char>> for UnicodeSet {
-    type Error = USetError;
+    type Error = UnicodeSetError;
 
     fn try_from(range: &Range<char>) -> Result<Self, Self::Error> {
         try_from_range(range)
@@ -25,7 +26,7 @@ impl TryFrom<&Range<char>> for UnicodeSet {
 }
 
 impl TryFrom<&RangeFrom<char>> for UnicodeSet {
-    type Error = USetError;
+    type Error = UnicodeSetError;
 
     fn try_from(range: &RangeFrom<char>) -> Result<Self, Self::Error> {
         try_from_range(range)
@@ -33,7 +34,7 @@ impl TryFrom<&RangeFrom<char>> for UnicodeSet {
 }
 
 impl TryFrom<&RangeFull> for UnicodeSet {
-    type Error = USetError;
+    type Error = UnicodeSetError;
 
     fn try_from(_: &RangeFull) -> Result<Self, Self::Error> {
         Ok(UnicodeSet::all())
@@ -41,7 +42,7 @@ impl TryFrom<&RangeFull> for UnicodeSet {
 }
 
 impl TryFrom<&RangeInclusive<char>> for UnicodeSet {
-    type Error = USetError;
+    type Error = UnicodeSetError;
 
     fn try_from(range: &RangeInclusive<char>) -> Result<Self, Self::Error> {
         try_from_range(range)
@@ -49,7 +50,7 @@ impl TryFrom<&RangeInclusive<char>> for UnicodeSet {
 }
 
 impl TryFrom<&RangeTo<char>> for UnicodeSet {
-    type Error = USetError;
+    type Error = UnicodeSetError;
 
     fn try_from(range: &RangeTo<char>) -> Result<Self, Self::Error> {
         try_from_range(range)
@@ -57,7 +58,7 @@ impl TryFrom<&RangeTo<char>> for UnicodeSet {
 }
 
 impl TryFrom<&RangeToInclusive<char>> for UnicodeSet {
-    type Error = USetError;
+    type Error = UnicodeSetError;
 
     fn try_from(range: &RangeToInclusive<char>) -> Result<Self, Self::Error> {
         try_from_range(range)
@@ -66,9 +67,10 @@ impl TryFrom<&RangeToInclusive<char>> for UnicodeSet {
 
 #[cfg(test)]
 mod tests {
-    use super::USetError;
+    use super::UnicodeSetError;
     use crate::UnicodeSet;
-    use std::convert::TryFrom;
+    use std::{char, convert::TryFrom};
+
     #[test]
     fn test_try_from_range() {
         let check: Vec<char> = UnicodeSet::try_from(&('A'..'B')).unwrap().iter().collect();
@@ -77,7 +79,7 @@ mod tests {
     #[test]
     fn test_try_from_range_error() {
         let check = UnicodeSet::try_from(&('A'..'A'));
-        assert_eq!(Err(USetError::InvalidRange(65, 65)), check);
+        assert_eq!(Err(UnicodeSetError::InvalidRange(65, 65)), check);
     }
     #[test]
     fn test_try_from_range_inclusive() {
@@ -87,35 +89,39 @@ mod tests {
     #[test]
     fn test_try_from_range_inclusive_err() {
         let check = UnicodeSet::try_from(&('B'..'A'));
-        assert_eq!(Err(USetError::InvalidRange(66, 65)), check);
+        assert_eq!(Err(UnicodeSetError::InvalidRange(66, 65)), check);
     }
     #[test]
     fn test_try_from_range_from() {
         let uset = UnicodeSet::try_from(&('A'..)).unwrap();
-        let check: Vec<&u32> = uset.ranges().collect();
-        assert_eq!(vec![&65, &((std::char::MAX as u32) + 1)], check);
+        let check: usize = uset.size();
+        let expected: usize = (char::MAX as usize) + 1 - 65;
+        assert_eq!(expected, check);
     }
     #[test]
     fn test_try_from_range_to() {
         let uset = UnicodeSet::try_from(&(..'A')).unwrap();
-        let check: Vec<&u32> = uset.ranges().collect();
-        assert_eq!(vec![&0, &65], check);
+        let check: usize = uset.size();
+        let expected: usize = 65;
+        assert_eq!(expected, check);
     }
     #[test]
     fn test_try_from_range_to_err() {
         let check = UnicodeSet::try_from(&(..(0 as char)));
-        assert_eq!(Err(USetError::InvalidRange(0, 0)), check);
+        assert_eq!(Err(UnicodeSetError::InvalidRange(0, 0)), check);
     }
     #[test]
     fn test_try_from_range_to_inclusive() {
         let uset = UnicodeSet::try_from(&(..='A')).unwrap();
-        let check: Vec<&u32> = uset.ranges().collect();
-        assert_eq!(vec![&0, &66], check);
+        let check: usize = uset.size();
+        let expected: usize = 66;
+        assert_eq!(expected, check);
     }
     #[test]
     fn test_try_from_range_full() {
         let uset = UnicodeSet::try_from(&(..)).unwrap();
-        let check: Vec<&u32> = uset.ranges().collect();
-        assert_eq!(vec![&0, &((std::char::MAX as u32) + 1)], check);
+        let check: usize = uset.size();
+        let expected: usize = (char::MAX as usize) + 1;
+        assert_eq!(expected, check);
     }
 }
diff --git a/components/uniset/src/lib.rs b/components/uniset/src/lib.rs
index 870eb001e30..48bb002b9b1 100644
--- a/components/uniset/src/lib.rs
+++ b/components/uniset/src/lib.rs
@@ -9,7 +9,7 @@ pub use utils::*;
 
 /// Custom Errors for UnicodeSet.
 #[derive(Debug, PartialEq)]
-pub enum USetError {
+pub enum UnicodeSetError {
     InvalidSet(Vec<u32>),
     InvalidRange(u32, u32),
 }
diff --git a/components/uniset/src/uniset.rs b/components/uniset/src/uniset.rs
index 4cdf6f69f0a..ff9243562ed 100644
--- a/components/uniset/src/uniset.rs
+++ b/components/uniset/src/uniset.rs
@@ -1,6 +1,6 @@
-use std::{char, convert::TryFrom, ops::RangeBounds, slice::Iter};
+use std::{char, ops::RangeBounds};
 
-use super::USetError;
+use super::UnicodeSetError;
 use crate::utils::{deconstruct_range, is_valid};
 /// Represents the end code point of the Basic Multilingual Plane range, starting from code point 0 , inclusive
 const BMP_MAX: u32 = 0xFFFF;
@@ -9,34 +9,51 @@ const BMP_MAX: u32 = 0xFFFF;
 ///
 /// Provides exposure to membership functions and constructors from serialized UnicodeSets
 /// and predefined ranges.
-/// Implements an [inversion list.](https://en.wikipedia.org/wiki/Inversion_list)
 #[derive(Debug, PartialEq)]
 pub struct UnicodeSet {
     // If we wanted to use an array to keep the memory on the stack, there is an unsafe nightly feature
     // https://doc.rust-lang.org/nightly/core/array/trait.FixedSizeArray.html
     // Allows for traits of fixed size arrays
+
+    // Implements an [inversion list.](https://en.wikipedia.org/wiki/Inversion_list)
     inv_list: Vec<u32>,
+    size: usize,
 }
 
-impl TryFrom<Vec<u32>> for UnicodeSet {
-    type Error = USetError;
-
-    fn try_from(set: Vec<u32>) -> Result<Self, Self::Error> {
-        if is_valid(&set) {
-            Ok(UnicodeSet { inv_list: set })
+impl UnicodeSet {
+    /// Returns UnicodeSet from an [inversion list.](https://en.wikipedia.org/wiki/Inversion_list)
+    /// represented by a `Vec<u32>` of codepoints.
+    ///
+    /// The inversion list must be of even length, sorted ascending non-overlapping,
+    /// and within the bounds of `0x0 -> 0x10FFFF` inclusive, and end points being exclusive.
+    ///
+    /// Example:
+    ///
+    /// ```
+    /// use icu_unicodeset::UnicodeSet;
+    /// use icu_unicodeset::UnicodeSetError;
+    /// let empty: Vec<u32> = vec![];
+    /// assert_eq!(UnicodeSet::from_inversion_list(empty.clone()), Err(UnicodeSetError::InvalidSet(empty.clone())))
+    /// ```
+    pub fn from_inversion_list(inv_list: Vec<u32>) -> Result<UnicodeSet, UnicodeSetError> {
+        if is_valid(&inv_list) {
+            let size: usize = inv_list
+                .chunks(2)
+                .map(|end_points| end_points[1] - end_points[0])
+                .sum::<u32>() as usize;
+            Ok(UnicodeSet { inv_list, size })
         } else {
-            Err(USetError::InvalidSet(set))
+            Err(UnicodeSetError::InvalidSet(inv_list))
         }
     }
-}
 
-impl UnicodeSet {
     /// Returns UnicodeSet spanning entire Unicode range
     ///
     /// The range spans from `0x0 -> 0x10FFFF` inclusive
     pub fn all() -> UnicodeSet {
         UnicodeSet {
             inv_list: vec![0, (char::MAX as u32) + 1],
+            size: (char::MAX as usize) + 1,
         }
     }
 
@@ -46,38 +63,18 @@ impl UnicodeSet {
     pub fn bmp() -> UnicodeSet {
         UnicodeSet {
             inv_list: vec![0, BMP_MAX + 1],
+            size: (BMP_MAX as usize) + 1,
         }
     }
 
-    /// Yields an iterator of start and stop points of ranges in the UnicodeSet
-    ///
-    /// Example:
-    ///
-    /// ```
-    /// use icu_unicodeset::UnicodeSet;
-    /// use std::convert::TryFrom;
-    /// let example_list = vec![0, 10, 15, 20];
-    /// let example = UnicodeSet::try_from(example_list).unwrap();
-    /// let mut example_ranges = example.ranges();
-    /// assert_eq!(Some(&0), example_ranges.next());
-    /// assert_eq!(Some(&10), example_ranges.next());
-    /// assert_eq!(Some(&15), example_ranges.next());
-    /// assert_eq!(Some(&20), example_ranges.next());     
-    /// assert_eq!(None, example_ranges.next());
-    /// ```
-    pub fn ranges(&self) -> Iter<u32> {
-        self.inv_list.iter()
-    }
-
     /// Yields an iterator going through the character set in the UnicodeSet
     ///
     /// Example:
     ///
     /// ```
     /// use icu_unicodeset::UnicodeSet;
-    /// use std::convert::TryFrom;
     /// let example_list = vec![65, 68, 69, 70];
-    /// let example = UnicodeSet::try_from(example_list).unwrap();
+    /// let example = UnicodeSet::from_inversion_list(example_list).unwrap();
     /// let mut example_iter = example.iter();
     /// assert_eq!(Some('A'), example_iter.next());
     /// assert_eq!(Some('B'), example_iter.next());
@@ -97,12 +94,7 @@ impl UnicodeSet {
         if self.is_empty() {
             return 0;
         }
-        let s: u32 = self
-            .inv_list
-            .chunks(2)
-            .map(|end_points| end_points[1] - end_points[0])
-            .sum();
-        s as usize
+        self.size
     }
 
     /// Returns whether or not the UnicodeSet is empty
@@ -112,7 +104,8 @@ impl UnicodeSet {
 
     /// Wrapper for contains
     ///
-    /// Returns an Option as to whether or not it is possible for the query to be contained
+    /// Returns an `Option` as to whether or not it is possible for the query to be contained.
+    /// The value in the `Option` is the start index of the range that contains the query.
     fn contains_query(&self, query: u32) -> Option<usize> {
         match self.inv_list.binary_search(&query) {
             Ok(pos) => {
@@ -141,9 +134,8 @@ impl UnicodeSet {
     ///
     /// ```
     /// use icu_unicodeset::UnicodeSet;
-    /// use std::convert::TryFrom;
     /// let example_list = vec![65, 67, 68, 69];
-    /// let example = UnicodeSet::try_from(example_list).unwrap();
+    /// let example = UnicodeSet::from_inversion_list(example_list).unwrap();
     /// assert!(example.contains('A'));
     /// assert!(!example.contains('C'));
     /// ```
@@ -161,9 +153,8 @@ impl UnicodeSet {
     ///
     /// ```
     /// use icu_unicodeset::UnicodeSet;
-    /// use std::convert::TryFrom;
     /// let example_list = vec![65, 67, 68, 69];
-    /// let example = UnicodeSet::try_from(example_list).unwrap();
+    /// let example = UnicodeSet::from_inversion_list(example_list).unwrap();
     /// assert!(example.contains_range(&('A'..'C')));
     /// assert!(example.contains_range(&('A'..='B')));
     /// assert!(!example.contains_range(&('A'..='C')));
@@ -176,10 +167,10 @@ impl UnicodeSet {
     ///
     /// ```
     /// use icu_unicodeset::UnicodeSet;
-    /// use std::{convert::TryFrom, char};
+    /// use std::char;
     /// let check = char::from_u32(0xD7FE).unwrap() .. char::from_u32(0xE001).unwrap();
     /// let example_list = vec![0xD7FE, 0xD7FF, 0xE000, 0xE001];
-    /// let example = UnicodeSet::try_from(example_list).unwrap();
+    /// let example = UnicodeSet::from_inversion_list(example_list).unwrap();
     /// assert!(!example.contains_range(&(check)));
     /// ```
     pub fn contains_range(&self, range: &impl RangeBounds<char>) -> bool {
@@ -196,37 +187,46 @@ impl UnicodeSet {
 
 #[cfg(test)]
 mod tests {
-    use super::{USetError, UnicodeSet, BMP_MAX};
-    use std::{char, convert::TryFrom, vec::Vec};
+    use super::{UnicodeSet, UnicodeSetError, BMP_MAX};
+    use std::{char, vec::Vec};
 
     #[test]
     fn test_unicodeset_try_from_vec() {
         let ex = vec![2, 3, 4, 5];
-        let check = UnicodeSet::try_from(ex.clone()).unwrap().inv_list;
-        assert_eq!(ex, check);
+        let check = UnicodeSet::from_inversion_list(ex.clone()).unwrap();
+        assert_eq!(ex, check.inv_list);
+        assert_eq!(2, check.size());
     }
     #[test]
     fn test_unicodeset_try_from_vec_error() {
         let check = vec![1, 1, 2, 3, 4];
-        let set = UnicodeSet::try_from(check.clone());
-        assert_eq!(Err(USetError::InvalidSet(check)), set);
+        let set = UnicodeSet::from_inversion_list(check.clone());
+        assert_eq!(Err(UnicodeSetError::InvalidSet(check)), set);
     }
     #[test]
     fn test_unicodeset_all() {
         let expected = vec![0, (char::MAX as u32) + 1];
-        assert_eq!(UnicodeSet::all().inv_list, expected);
+        assert_eq!(UnicodeSet::all().inv_list, expected.clone());
+        assert_eq!(
+            UnicodeSet::all().size(),
+            (expected[1] - expected[0]) as usize
+        )
     }
     #[test]
     fn test_unicodeset_bmp() {
         let expected = vec![0, BMP_MAX + 1];
-        assert_eq!(UnicodeSet::bmp().inv_list, expected);
+        assert_eq!(UnicodeSet::bmp().inv_list, expected.clone());
+        assert_eq!(
+            UnicodeSet::bmp().size(),
+            (expected[1] - expected[0]) as usize
+        );
     }
 
     // UnicodeSet membership functions
     #[test]
     fn test_unicodeset_contains() {
         let ex = vec![2, 5, 10, 15];
-        let check = UnicodeSet::try_from(ex).unwrap();
+        let check = UnicodeSet::from_inversion_list(ex).unwrap();
         assert!(check.contains(2 as char));
         assert!(check.contains(4 as char));
         assert!(check.contains(10 as char));
@@ -235,7 +235,7 @@ mod tests {
     #[test]
     fn test_unicodeset_contains_false() {
         let ex = vec![2, 5, 10, 15];
-        let check = UnicodeSet::try_from(ex).unwrap();
+        let check = UnicodeSet::from_inversion_list(ex).unwrap();
         assert!(!check.contains(1 as char));
         assert!(!check.contains(5 as char));
         assert!(!check.contains(9 as char));
@@ -245,14 +245,14 @@ mod tests {
     #[test]
     fn test_unicodeset_contains_range() {
         let ex = vec![65, 70, 75, 85];
-        let check = UnicodeSet::try_from(ex).unwrap();
+        let check = UnicodeSet::from_inversion_list(ex).unwrap();
         assert!(check.contains_range(&('A'..='E'))); // 65 - 69
         assert!(check.contains_range(&('K'..'U'))); // 75 - 84
     }
     #[test]
     fn test_unicodeset_contains_range_false() {
         let ex = vec![65, 70, 75, 85];
-        let check = UnicodeSet::try_from(ex).unwrap();
+        let check = UnicodeSet::from_inversion_list(ex).unwrap();
         assert!(!check.contains_range(&('!'..'A'))); // 33 - 65
         assert!(!check.contains_range(&('F'..'K'))); // 70 - 74
         assert!(!check.contains_range(&('U'..)));
@@ -266,19 +266,23 @@ mod tests {
     #[test]
     fn test_unicodeset_size() {
         let ex = vec![2, 5, 10, 15];
-        let check = UnicodeSet::try_from(ex).unwrap();
+        let check = UnicodeSet::from_inversion_list(ex).unwrap();
         assert_eq!(8, check.size());
         let check = UnicodeSet::all();
         let expected = (char::MAX as u32) + 1;
         assert_eq!(expected as usize, check.size());
         let check = UnicodeSet {
             inv_list: Vec::new(),
+            size: 0,
         };
         assert_eq!(check.size(), 0);
     }
     #[test]
     fn test_unicodeset_is_empty() {
-        let check = UnicodeSet { inv_list: vec![] };
+        let check = UnicodeSet {
+            inv_list: vec![],
+            size: 0,
+        };
         assert!(check.is_empty());
     }
     #[test]
@@ -287,20 +291,9 @@ mod tests {
         assert!(!check.is_empty());
     }
     #[test]
-    fn test_unicodeset_ranges() {
-        let ex = vec![65, 70, 75, 85];
-        let check = UnicodeSet::try_from(ex).unwrap();
-        let mut iter = check.ranges();
-        assert_eq!(iter.next(), Some(&65));
-        assert_eq!(iter.next(), Some(&70));
-        assert_eq!(iter.next(), Some(&75));
-        assert_eq!(iter.next(), Some(&85));
-        assert_eq!(iter.next(), None);
-    }
-    #[test]
     fn test_unicodeset_iter() {
         let ex = vec![65, 68, 69, 70, 0xD800, 0xD801];
-        let check = UnicodeSet::try_from(ex).unwrap();
+        let check = UnicodeSet::from_inversion_list(ex).unwrap();
         let mut iter = check.iter();
         assert_eq!(Some('A'), iter.next());
         assert_eq!(Some('B'), iter.next());

From a9acfa28948f92377ab02b268fb8667e65f4f9ce Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Fri, 17 Jul 2020 19:07:40 +0000
Subject: [PATCH 29/30] fixed bench

---
 components/uniset/benches/inv_list.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/components/uniset/benches/inv_list.rs b/components/uniset/benches/inv_list.rs
index 9ef3e7ea63b..31dacb530ee 100644
--- a/components/uniset/benches/inv_list.rs
+++ b/components/uniset/benches/inv_list.rs
@@ -1,12 +1,12 @@
 use criterion::{criterion_group, criterion_main, Criterion};
 use icu_unicodeset::UnicodeSet;
-use std::{char, convert::TryFrom};
+use std::char;
 
 fn contains_bench(c: &mut Criterion) {
     let best_ex = vec![65, 70];
-    let best_sample = UnicodeSet::try_from(best_ex).unwrap();
+    let best_sample = UnicodeSet::from_inversion_list(best_ex).unwrap();
     let worst_ex: Vec<u32> = (0..((char::MAX as u32) + 1)).collect();
-    let worst_sample = UnicodeSet::try_from(worst_ex).unwrap();
+    let worst_sample = UnicodeSet::from_inversion_list(worst_ex).unwrap();
 
     let mut group = c.benchmark_group("uniset/contains");
     group.bench_with_input("best", &best_sample, |b, sample| {
@@ -20,9 +20,9 @@ fn contains_bench(c: &mut Criterion) {
 
 fn contains_range_bench(c: &mut Criterion) {
     let best_ex = vec![65, 70];
-    let best_sample = UnicodeSet::try_from(best_ex).unwrap();
+    let best_sample = UnicodeSet::from_inversion_list(best_ex).unwrap();
     let worst_ex: Vec<u32> = (0..((char::MAX as u32) + 1)).collect();
-    let worst_sample = UnicodeSet::try_from(worst_ex).unwrap();
+    let worst_sample = UnicodeSet::from_inversion_list(worst_ex).unwrap();
 
     let mut group = c.benchmark_group("uniset/contains_range");
     group.bench_with_input("best", &best_sample, |b, sample| {

From 2c54b63897c9c372dd8e96ed1cb792c7b434aba2 Mon Sep 17 00:00:00 2001
From: Evan Peng <evanjp@google.com>
Date: Fri, 17 Jul 2020 19:34:28 +0000
Subject: [PATCH 30/30] clippy checks that cargo clippy doesn't catch locally

---
 components/uniset/src/uniset.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/components/uniset/src/uniset.rs b/components/uniset/src/uniset.rs
index ff9243562ed..d1fa6095c51 100644
--- a/components/uniset/src/uniset.rs
+++ b/components/uniset/src/uniset.rs
@@ -206,7 +206,7 @@ mod tests {
     #[test]
     fn test_unicodeset_all() {
         let expected = vec![0, (char::MAX as u32) + 1];
-        assert_eq!(UnicodeSet::all().inv_list, expected.clone());
+        assert_eq!(UnicodeSet::all().inv_list, expected);
         assert_eq!(
             UnicodeSet::all().size(),
             (expected[1] - expected[0]) as usize
@@ -215,7 +215,7 @@ mod tests {
     #[test]
     fn test_unicodeset_bmp() {
         let expected = vec![0, BMP_MAX + 1];
-        assert_eq!(UnicodeSet::bmp().inv_list, expected.clone());
+        assert_eq!(UnicodeSet::bmp().inv_list, expected);
         assert_eq!(
             UnicodeSet::bmp().size(),
             (expected[1] - expected[0]) as usize