diff --git a/HACKING.md b/HACKING.md
index 6114475fcc..bba1f55b2c 100644
--- a/HACKING.md
+++ b/HACKING.md
@@ -112,7 +112,7 @@ the NFA algorithm, because it was one fewer epsilon transition that it had to
 follow.
 
 There exist more instructions and they are defined and documented in
-src/inst.rs.
+src/prog.rs.
 
 Compilation has several knobs and a few unfortunately complicated invariants.
 Namely, the output of compilation can be one of two types of programs: a
@@ -163,7 +163,7 @@ engine (or engines) to use.
 
 The logic for choosing which engine to execute is in src/exec.rs and is
 documented on the Exec type. Exec values collection regular expression
-Programs (defined in src/program.rs), which contain all the necessary tidbits
+Programs (defined in src/prog.rs), which contain all the necessary tidbits
 for actually executing a regular expression on search text.
 
 For the most part, the execution logic is straight-forward and follows the
diff --git a/README.md b/README.md
index 30056bb63c..b1be030702 100644
--- a/README.md
+++ b/README.md
@@ -128,6 +128,34 @@ fn some_helper_function(text: &str) -> bool {
 Specifically, in this example, the regex will be compiled when it is used for
 the first time. On subsequent uses, it will reuse the previous compilation.
 
+### Usage: match multiple regular expressions simultaneously
+
+This demonstrates how to use a `RegexSet` to match multiple (possibly
+overlapping) regular expressions in a single scan of the search text:
+
+```rust
+use regex::RegexSet;
+
+let set = RegexSet::new(&[
+    r"\w+",
+    r"\d+",
+    r"\pL+",
+    r"foo",
+    r"bar",
+    r"barfoo",
+    r"foobar",
+]).unwrap();
+
+// Iterate over and collect all of the matches.
+let matches: Vec<_> = set.matches("foobar").into_iter().collect();
+assert_eq!(matches, vec![0, 2, 3, 4, 6]);
+
+// You can also test whether a particular regex matched:
+let matches = set.matches("foobar");
+assert!(!matches.matched(5));
+assert!(matches.matched(6));
+```
+
 ### Usage: `regex!` compiler plugin
 
 The `regex!` compiler plugin will compile your regexes at compile time. **This
diff --git a/benches/bench_dynamic_compile.rs b/benches/bench_dynamic_compile.rs
index 17ab319b30..436e3a7ae9 100644
--- a/benches/bench_dynamic_compile.rs
+++ b/benches/bench_dynamic_compile.rs
@@ -8,54 +8,55 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
+use regex_syntax::Expr;
 use test::Bencher;
 
-use regex::internal::ProgramBuilder;
+use regex::internal::Compiler;
 
 #[bench]
 fn compile_simple(b: &mut Bencher) {
     b.iter(|| {
-        let re = r"^bc(d|e)*$";
-        ProgramBuilder::new(&re).compile().unwrap()
+        let re = Expr::parse(r"^bc(d|e)*$").unwrap();
+        Compiler::new().compile(&[re]).unwrap()
     });
 }
 
 #[bench]
 fn compile_simple_bytes(b: &mut Bencher) {
     b.iter(|| {
-        let re = r"^bc(d|e)*$";
-        ProgramBuilder::new(&re).bytes(true).compile().unwrap()
+        let re = Expr::parse(r"^bc(d|e)*$").unwrap();
+        Compiler::new().bytes(true).compile(&[re]).unwrap()
     });
 }
 
 #[bench]
 fn compile_small(b: &mut Bencher) {
     b.iter(|| {
-        let re = r"\p{L}|\p{N}|\s|.|\d";
-        ProgramBuilder::new(&re).compile().unwrap()
+        let re = Expr::parse(r"\p{L}|\p{N}|\s|.|\d").unwrap();
+        Compiler::new().compile(&[re]).unwrap()
     });
 }
 
 #[bench]
 fn compile_small_bytes(b: &mut Bencher) {
     b.iter(|| {
-        let re = r"\p{L}|\p{N}|\s|.|\d";
-        ProgramBuilder::new(&re).bytes(true).compile().unwrap()
+        let re = Expr::parse(r"\p{L}|\p{N}|\s|.|\d").unwrap();
+        Compiler::new().bytes(true).compile(&[re]).unwrap()
     });
 }
 
 #[bench]
 fn compile_huge(b: &mut Bencher) {
     b.iter(|| {
-        let re = r"\p{L}{100}";
-        ProgramBuilder::new(&re).compile().unwrap()
+        let re = Expr::parse(r"\p{L}{100}").unwrap();
+        Compiler::new().compile(&[re]).unwrap()
     });
 }
 
 #[bench]
 fn compile_huge_bytes(b: &mut Bencher) {
     b.iter(|| {
-        let re = r"\p{L}{100}";
-        ProgramBuilder::new(&re).bytes(true).compile().unwrap()
+        let re = Expr::parse(r"\p{L}{100}").unwrap();
+        Compiler::new().bytes(true).compile(&[re]).unwrap()
     });
 }
diff --git a/examples/set.rs b/examples/set.rs
new file mode 100644
index 0000000000..caf0fead51
--- /dev/null
+++ b/examples/set.rs
@@ -0,0 +1,19 @@
+extern crate regex;
+
+use regex::RegexSet;
+
+fn main() {
+    let res = &[
+        "abc",
+        "xyzz",
+        "^[ga-fh-z]+$",
+    ];
+    let text = "abcggggggggxyz";
+    let set = RegexSet::new(res).unwrap();
+    println!("{:?}", set);
+    let m = set.is_match("abcggggggggxyz");
+    println!("match? {:?}", m);
+    for mi in set.matches(text) {
+        println!("{:?}", mi);
+    }
+}
diff --git a/regex-syntax/src/lib.rs b/regex-syntax/src/lib.rs
index 3a77bd3f88..274c318049 100644
--- a/regex-syntax/src/lib.rs
+++ b/regex-syntax/src/lib.rs
@@ -177,6 +177,19 @@ pub enum Repeater {
     },
 }
 
+impl Repeater {
+    /// Returns true if and only if this repetition can match the empty string.
+    fn matches_empty(&self) -> bool {
+        use self::Repeater::*;
+        match *self {
+            ZeroOrOne => true,
+            ZeroOrMore => true,
+            OneOrMore => false,
+            Range { min, .. } => min == 0,
+        }
+    }
+}
+
 /// A character class.
 ///
 /// A character class has a canonical format that the parser guarantees. Its
@@ -315,7 +328,9 @@ impl Expr {
     /// the beginning of text.
     pub fn is_anchored_start(&self) -> bool {
         match *self {
-            Repeat { ref e, .. } => e.is_anchored_start(),
+            Repeat { ref e, r, .. } => {
+                !r.matches_empty() && e.is_anchored_start()
+            }
             Group { ref e, .. } => e.is_anchored_start(),
             Concat(ref es) => es[0].is_anchored_start(),
             Alternate(ref es) => es.iter().all(|e| e.is_anchored_start()),
@@ -328,7 +343,9 @@ impl Expr {
     /// end of the text.
     pub fn is_anchored_end(&self) -> bool {
         match *self {
-            Repeat { ref e, .. } => e.is_anchored_end(),
+            Repeat { ref e, r, .. } => {
+                !r.matches_empty() && e.is_anchored_end()
+            }
             Group { ref e, .. } => e.is_anchored_end(),
             Concat(ref es) => es[es.len() - 1].is_anchored_end(),
             Alternate(ref es) => es.iter().all(|e| e.is_anchored_end()),
@@ -1059,9 +1076,6 @@ mod tests {
         assert!(e("^a|^b").is_anchored_start());
         assert!(e("(^a)|(^b)").is_anchored_start());
         assert!(e("(^(a|b))").is_anchored_start());
-        assert!(e("^*").is_anchored_start());
-        assert!(e("(^)*").is_anchored_start());
-        assert!(e("((^)*)*").is_anchored_start());
 
         assert!(!e("^a|b").is_anchored_start());
         assert!(!e("a|^b").is_anchored_start());
@@ -1074,9 +1088,6 @@ mod tests {
         assert!(e("a$|b$").is_anchored_end());
         assert!(e("(a$)|(b$)").is_anchored_end());
         assert!(e("((a|b)$)").is_anchored_end());
-        assert!(e("$*").is_anchored_end());
-        assert!(e("($)*").is_anchored_end());
-        assert!(e("(($)*)*").is_anchored_end());
 
         assert!(!e("a$|b").is_anchored_end());
         assert!(!e("a|b$").is_anchored_end());
diff --git a/regex_macros/Cargo.toml b/regex_macros/Cargo.toml
index 8f90a034cc..d36839f3fe 100644
--- a/regex_macros/Cargo.toml
+++ b/regex_macros/Cargo.toml
@@ -22,6 +22,10 @@ path = ".."
 version = "0.1"
 features = ["pattern"]
 
+[dependencies.regex-syntax]
+path = "../regex-syntax"
+version = "0.2"
+
 [dev-dependencies]
 lazy_static = "0.1"
 rand = "0.3"
diff --git a/regex_macros/src/lib.rs b/regex_macros/src/lib.rs
index d96aae9ea2..37e96566d6 100644
--- a/regex_macros/src/lib.rs
+++ b/regex_macros/src/lib.rs
@@ -18,8 +18,12 @@
 #![feature(plugin_registrar, quote, rustc_private)]
 
 extern crate regex;
-extern crate syntax;
+extern crate regex_syntax;
 extern crate rustc_plugin;
+extern crate syntax;
+
+use std::collections::BTreeMap;
+use std::usize;
 
 use syntax::ast;
 use syntax::codemap;
@@ -32,7 +36,8 @@ use syntax::ptr::P;
 
 use rustc_plugin::Registry;
 
-use regex::internal::{Inst, EmptyLook, Program, ProgramBuilder};
+use regex::internal::{Compiler, EmptyLook, Inst, Program};
+use regex_syntax::Expr;
 
 /// For the `regex!` syntax extension. Do not use.
 #[plugin_registrar]
@@ -67,15 +72,21 @@ fn native(cx: &mut ExtCtxt, sp: codemap::Span, tts: &[ast::TokenTree])
     };
     // We use the largest possible size limit because this is happening at
     // compile time. We trust the programmer.
-    let bprog = ProgramBuilder::new(&regex).size_limit(::std::usize::MAX);
-    let prog = match bprog.compile() {
+    let expr = match Expr::parse(&regex) {
+        Ok(expr) => expr,
+        Err(err) => {
+            cx.span_err(sp, &err.to_string());
+            return DummyResult::any(sp)
+        }
+    };
+    let prog = match Compiler::new().size_limit(usize::MAX).compile(&[expr]) {
         Ok(re) => re,
         Err(err) => {
             cx.span_err(sp, &err.to_string());
             return DummyResult::any(sp)
         }
     };
-    let names = prog.cap_names.iter().cloned().collect();
+    let names = prog.captures.iter().cloned().collect();
     let mut gen = NfaGen {
         cx: &*cx,
         sp: sp,
@@ -98,8 +109,8 @@ impl<'a> NfaGen<'a> {
     fn code(&mut self) -> P<ast::Expr> {
         // Most or all of the following things are used in the quasiquoted
         // expression returned.
-        let num_cap_locs = 2 * self.prog.num_captures();
-        let num_insts = self.prog.insts.len();
+        let num_cap_locs = 2 * self.prog.captures.len();
+        let num_insts = self.prog.len();
         let cap_names = self.vec_expr(self.names.iter(),
             &mut |cx, name| match *name {
                 Some(ref name) => {
@@ -109,21 +120,20 @@ impl<'a> NfaGen<'a> {
                 None => cx.expr_none(self.sp),
             }
         );
-        let named_groups = {
-            let mut named_groups = ::std::collections::BTreeMap::new();
+        let capture_name_idx = {
+            let mut capture_name_idx = BTreeMap::new();
             for (i, name) in self.names.iter().enumerate() {
                 if let Some(ref name) = *name {
-                    named_groups.insert(name.to_owned(), i);
+                    capture_name_idx.insert(name.to_owned(), i);
                 }
             }
-            self.vec_expr(named_groups.iter(),
+            self.vec_expr(capture_name_idx.iter(),
                 &mut |cx, (name, group_idx)|
                     quote_expr!(cx, ($name, $group_idx))
             )
         };
 
-        let prefix_anchor = self.prog.anchored_begin;
-
+        let is_anchored_start = self.prog.is_anchored_start;
         let step_insts = self.step_insts();
         let add_insts = self.add_insts();
         let regex = &*self.original;
@@ -135,9 +145,9 @@ impl<'a> NfaGen<'a> {
 // the user is only warned about *their* unused variable/code, and not the
 // unused code generated by regex!. See #14185 for an example.
 #[allow(dead_code)]
-static CAP_NAMES: &'static [Option<&'static str>] = &$cap_names;
+static CAPTURES: &'static [Option<&'static str>] = &$cap_names;
 #[allow(dead_code)]
-static NAMED_GROUPS: &'static [(&'static str, usize)] = &$named_groups;
+static CAPTURE_NAME_IDX: &'static [(&'static str, usize)] = &$capture_name_idx;
 
 #[allow(dead_code)]
 fn exec<'t>(
@@ -175,14 +185,14 @@ fn exec<'t>(
             clist.empty(); nlist.empty();
 'LOOP:      loop {
                 if clist.size == 0 {
-                    if matched || (!at.is_beginning() && $prefix_anchor) {
+                    if matched || (!at.is_start() && $is_anchored_start) {
                         break;
                     }
                     // TODO: Prefix matching... Hmm.
                     // Prefix matching now uses a DFA, so I think this is
                     // going to require encoding that DFA statically.
                 }
-                if clist.size == 0 || (!$prefix_anchor && !matched) {
+                if clist.size == 0 || (!$is_anchored_start && !matched) {
                     self.add(clist, &mut caps, 0, at);
                 }
                 let at_next = self.input.at(at.next_pos());
@@ -322,8 +332,8 @@ fn exec<'t>(
 
 ::regex::Regex::Native(::regex::internal::ExNative {
     original: $regex,
-    names: &CAP_NAMES,
-    groups: &NAMED_GROUPS,
+    names: &CAPTURES,
+    groups: &CAPTURE_NAME_IDX,
     prog: exec,
 })
         })
@@ -332,7 +342,7 @@ fn exec<'t>(
     // Generates code for the `add` method, which is responsible for adding
     // zero-width states to the next queue of states to visit.
     fn add_insts(&self) -> P<ast::Expr> {
-        let arms = self.prog.insts.iter().enumerate().map(|(pc, inst)| {
+        let arms = self.prog.iter().enumerate().map(|(pc, inst)| {
             let body = match *inst {
                 Inst::EmptyLook(ref inst) => {
                     let nextpc = inst.goto;
@@ -422,7 +432,7 @@ fn exec<'t>(
     // Generates the code for the `step` method, which processes all states
     // in the current queue that consume a single character.
     fn step_insts(&self) -> P<ast::Expr> {
-        let arms = self.prog.insts.iter().enumerate().map(|(pc, inst)| {
+        let arms = self.prog.iter().enumerate().map(|(pc, inst)| {
             let body = match *inst {
                 Inst::Match => quote_expr!(self.cx, {
                     for (slot, val) in caps.iter_mut().zip(thread_caps.iter()) {
diff --git a/src/backtrack.rs b/src/backtrack.rs
index 6238f296d1..b80ff9cf60 100644
--- a/src/backtrack.rs
+++ b/src/backtrack.rs
@@ -26,10 +26,9 @@
 // the bitset has to be zeroed on each execution, which becomes quite expensive
 // on large bitsets.
 
+use exec::Search;
 use input::{Input, InputAt};
-use inst::InstPtr;
-use program::Program;
-use re::CaptureIdxs;
+use prog::{Program, InstPtr};
 
 /// Returns true iff the given regex and input should be executed by this
 /// engine with reasonable memory usage.
@@ -51,10 +50,10 @@ const MAX_INPUT_SIZE: usize = 128 * (1 << 10);
 
 /// A backtracking matching engine.
 #[derive(Debug)]
-pub struct Backtrack<'a, 'r, 'c, I> {
+pub struct Backtrack<'a, 'b, 'c: 'b, 'm: 'b, 'r, I> {
     prog: &'r Program,
     input: I,
-    caps: &'c mut CaptureIdxs,
+    search: &'b mut Search<'m, 'c>,
     m: &'a mut BacktrackCache,
 }
 
@@ -85,14 +84,14 @@ enum Job {
     SaveRestore { slot: usize, old_pos: Option<usize> },
 }
 
-impl<'a, 'r, 'c, I: Input> Backtrack<'a, 'r, 'c, I> {
+impl<'a, 'b, 'c, 'm, 'r, I: Input> Backtrack<'a, 'b, 'c, 'r, 'm, I> {
     /// Execute the backtracking matching engine.
     ///
     /// If there's a match, `exec` returns `true` and populates the given
     /// captures accordingly.
     pub fn exec(
         prog: &'r Program,
-        mut caps: &mut CaptureIdxs,
+        search: &'b mut Search<'c, 'm>,
         input: I,
         start: usize,
     ) -> bool {
@@ -101,7 +100,7 @@ impl<'a, 'r, 'c, I: Input> Backtrack<'a, 'r, 'c, I> {
         let mut b = Backtrack {
             prog: prog,
             input: input,
-            caps: caps,
+            search: search,
             m: &mut m,
         };
         b.exec_(start)
@@ -124,7 +123,7 @@ impl<'a, 'r, 'c, I: Input> Backtrack<'a, 'r, 'c, I> {
         // (Probably because backtracking is limited to such small
         // inputs/regexes in the first place.)
         let visited_len =
-            (self.prog.insts.len() * (self.input.len() + 1) + BIT_SIZE - 1)
+            (self.prog.len() * (self.input.len() + 1) + BIT_SIZE - 1)
             /
             BIT_SIZE;
         self.m.visited.truncate(visited_len);
@@ -146,8 +145,8 @@ impl<'a, 'r, 'c, I: Input> Backtrack<'a, 'r, 'c, I> {
         self.clear();
         // If this is an anchored regex at the beginning of the input, then
         // we're either already done or we only need to try backtracking once.
-        if self.prog.anchored_begin {
-            return if !at.is_beginning() {
+        if self.prog.is_anchored_start {
+            return if !at.is_start() {
                 false
             } else {
                 self.backtrack(at)
@@ -184,11 +183,16 @@ impl<'a, 'r, 'c, I: Input> Backtrack<'a, 'r, 'c, I> {
             match job {
                 Job::Inst { ip, at } => {
                     if self.step(ip, at) {
-                        return true;
+                        // Only quit if we're matching one regex.
+                        // If we're matching a regex set, then mush on and
+                        // try to find other matches.
+                        if self.search.matches.len() <= 1 {
+                            return true;
+                        }
                     }
                 }
                 Job::SaveRestore { slot, old_pos } => {
-                    self.caps[slot] = old_pos;
+                    self.search.captures[slot] = old_pos;
                 }
             }
         }
@@ -196,26 +200,29 @@ impl<'a, 'r, 'c, I: Input> Backtrack<'a, 'r, 'c, I> {
     }
 
     fn step(&mut self, mut ip: InstPtr, mut at: InputAt) -> bool {
-        use inst::Inst::*;
+        use prog::Inst::*;
         loop {
             // This loop is an optimization to avoid constantly pushing/popping
             // from the stack. Namely, if we're pushing a job only to run it
             // next, avoid the push and just mutate `ip` (and possibly `at`)
             // in place.
-            match self.prog.insts[ip] {
-                Match => return true,
+            match self.prog[ip] {
+                Match(slot) => {
+                    self.search.set_match(slot);
+                    return true;
+                }
                 Save(ref inst) => {
-                    if inst.slot < self.caps.len() {
+                    if inst.slot < self.search.captures.len() {
                         // If this path doesn't work out, then we save the old
                         // capture index (if one exists) in an alternate
                         // job. If the next path fails, then the alternate
                         // job is popped and the old capture index is restored.
-                        let old_pos = self.caps[inst.slot];
+                        let old_pos = self.search.captures[inst.slot];
                         self.m.jobs.push(Job::SaveRestore {
                             slot: inst.slot,
                             old_pos: old_pos,
                         });
-                        self.caps[inst.slot] = Some(at.pos());
+                        self.search.captures[inst.slot] = Some(at.pos());
                     }
                     ip = inst.goto;
                 }
diff --git a/src/compile.rs b/src/compile.rs
index d49f1a9deb..a3471d80b1 100644
--- a/src/compile.rs
+++ b/src/compile.rs
@@ -8,23 +8,20 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-use std::collections::HashSet;
+use std::collections::HashMap;
 use std::iter;
 use std::result;
+use std::sync::Arc;
 
 use syntax::{Expr, Repeater, CharClass, ClassRange};
 use utf8_ranges::{Utf8Range, Utf8Sequence, Utf8Sequences};
 
-use Error;
-use inst::{
-    Insts, Inst, InstPtr, EmptyLook,
+use prog::{
+    Program, Inst, InstPtr, EmptyLook,
     InstSave, InstSplit, InstEmptyLook, InstChar, InstRanges, InstBytes,
 };
 
-pub struct Compiled {
-    pub insts: Insts,
-    pub cap_names: Vec<Option<String>>,
-}
+use Error;
 
 type InstHoleIdx = InstPtr;
 
@@ -38,12 +35,10 @@ struct Patch {
 
 pub struct Compiler {
     insts: Vec<MaybeInst>,
-    cap_names: Vec<Option<String>>,
-    seen_caps: HashSet<usize>,
+    compiled: Program,
+    capture_name_idx: HashMap<String, usize>,
+    num_exprs: usize,
     size_limit: usize,
-    bytes: bool,
-    dfa: bool,
-    reverse: bool,
     suffix_cache: SuffixCache,
     utf8_seqs: Option<Utf8Sequences>,
     byte_classes: ByteClassSet,
@@ -56,12 +51,10 @@ impl Compiler {
     pub fn new() -> Self {
         Compiler {
             insts: vec![],
-            cap_names: vec![None],
-            seen_caps: HashSet::new(),
+            compiled: Program::new(),
+            capture_name_idx: HashMap::new(),
+            num_exprs: 0,
             size_limit: 10 * (1 << 20),
-            bytes: false,
-            dfa: false,
-            reverse: false,
             suffix_cache: SuffixCache::new(1000),
             utf8_seqs: Some(Utf8Sequences::new('\x00', '\x00')),
             byte_classes: ByteClassSet::new(),
@@ -88,7 +81,7 @@ impl Compiler {
     ///
     /// Note that `dfa(true)` implies `bytes(true)`.
     pub fn bytes(mut self, yes: bool) -> Self {
-        self.bytes = yes;
+        self.compiled.is_bytes = yes;
         self
     }
 
@@ -100,15 +93,15 @@ impl Compiler {
     /// based engines handle the preceding `.*?` explicitly, which is difficult
     /// or impossible in the DFA engine.)
     pub fn dfa(mut self, yes: bool) -> Self {
-        self.dfa = yes;
-        self.bytes = yes;
+        self.compiled.is_dfa = yes;
+        self.compiled.is_bytes = yes;
         self
     }
 
     /// When set, the machine returned is suitable for matching text in
     /// reverse. In particular, all concatenations are flipped.
     pub fn reverse(mut self, yes: bool) -> Self {
-        self.reverse = yes;
+        self.compiled.is_reverse = yes;
         self
     }
 
@@ -117,8 +110,27 @@ impl Compiler {
     /// The compiler is guaranteed to succeed unless the program exceeds the
     /// specified size limit. If the size limit is exceeded, then compilation
     /// stops and returns an error.
-    pub fn compile(mut self, expr: &Expr) -> result::Result<Compiled, Error> {
-        if self.dfa && !self.reverse && !expr.is_anchored_start() {
+    pub fn compile(
+        mut self,
+        exprs: &[Expr],
+    ) -> result::Result<Program, Error> {
+        debug_assert!(exprs.len() >= 1);
+        self.num_exprs = exprs.len();
+        if exprs.len() == 1 {
+            self.compile_one(&exprs[0])
+        } else {
+            self.compile_many(exprs)
+        }
+    }
+
+    fn compile_one(mut self, expr: &Expr) -> result::Result<Program, Error> {
+        // If we're compiling a forward DFA and we aren't anchored, then
+        // add a `.*?` before the first capture group.
+        // Other matching engines handle this by baking the logic into the
+        // matching engine itself.
+        self.compiled.is_anchored_start = expr.is_anchored_start();
+        self.compiled.is_anchored_end = expr.is_anchored_end();
+        if self.compiled.needs_dotstar() {
             let patch = try!(self.c(&Expr::Repeat {
                 e: Box::new(Expr::AnyChar),
                 r: Repeater::ZeroOrMore,
@@ -126,20 +138,64 @@ impl Compiler {
             }));
             self.fill_to_next(patch.hole);
         }
+        self.compiled.captures = vec![None];
+        self.compiled.start = self.insts.len();
         let patch = try!(self.c_capture(0, expr));
         self.fill_to_next(patch.hole);
-        self.push_compiled(Inst::Match);
+        self.compiled.matches = vec![self.insts.len()];
+        self.push_compiled(Inst::Match(0));
+        self.compile_finish()
+    }
+
+    fn compile_many(
+        mut self,
+        exprs: &[Expr],
+    ) -> result::Result<Program, Error> {
+        debug_assert!(exprs.len() > 1);
+
+        self.compiled.is_anchored_start =
+            exprs.iter().all(|e| e.is_anchored_start());
+        self.compiled.is_anchored_end =
+            exprs.iter().all(|e| e.is_anchored_end());
+        if self.compiled.needs_dotstar() {
+            let patch = try!(self.c(&Expr::Repeat {
+                e: Box::new(Expr::AnyChar),
+                r: Repeater::ZeroOrMore,
+                greedy: false,
+            }));
+            self.fill_to_next(patch.hole);
+        }
 
-        let byte_classes = self.byte_classes.byte_classes();
-        let insts = self.insts.into_iter().map(|inst| inst.unwrap()).collect();
-        Ok(Compiled {
-            insts: Insts::new(insts, self.bytes, self.reverse, byte_classes),
-            cap_names: self.cap_names,
-        })
+        self.compiled.start = self.insts.len();
+        for (i, expr) in exprs[0..exprs.len() - 1].iter().enumerate() {
+            let split = self.push_split_hole();
+            let Patch { hole, entry } = try!(self.c_capture(0, expr));
+            self.fill_to_next(hole);
+            self.compiled.matches.push(self.insts.len());
+            self.push_compiled(Inst::Match(i));
+
+            let next = self.insts.len();
+            self.fill_split(split, Some(entry), Some(next));
+        }
+        let i = exprs.len() - 1;
+        let Patch { hole, .. } = try!(self.c_capture(0, &exprs[i]));
+        self.fill_to_next(hole);
+        self.compiled.matches.push(self.insts.len());
+        self.push_compiled(Inst::Match(i));
+
+        self.compile_finish()
+    }
+
+    fn compile_finish(mut self) -> result::Result<Program, Error> {
+        self.compiled.insts =
+            self.insts.into_iter().map(|inst| inst.unwrap()).collect();
+        self.compiled.byte_classes = self.byte_classes.byte_classes();
+        self.compiled.capture_name_idx = Arc::new(self.capture_name_idx);
+        Ok(self.compiled)
     }
 
     fn c(&mut self, expr: &Expr) -> Result {
-        use inst;
+        use prog;
         use syntax::Expr::*;
 
         try!(self.check_size());
@@ -159,50 +215,52 @@ impl Compiler {
             Class(ref cls) => {
                 self.c_class(cls)
             }
-            StartLine if self.reverse => {
+            StartLine if self.compiled.is_reverse => {
                 self.byte_classes.set_range(b'\n', b'\n');
-                self.c_empty_look(inst::EmptyLook::EndLine)
+                self.c_empty_look(prog::EmptyLook::EndLine)
             }
             StartLine => {
                 self.byte_classes.set_range(b'\n', b'\n');
-                self.c_empty_look(inst::EmptyLook::StartLine)
+                self.c_empty_look(prog::EmptyLook::StartLine)
             }
-            EndLine if self.reverse => {
+            EndLine if self.compiled.is_reverse => {
                 self.byte_classes.set_range(b'\n', b'\n');
-                self.c_empty_look(inst::EmptyLook::StartLine)
+                self.c_empty_look(prog::EmptyLook::StartLine)
             }
             EndLine => {
                 self.byte_classes.set_range(b'\n', b'\n');
-                self.c_empty_look(inst::EmptyLook::EndLine)
+                self.c_empty_look(prog::EmptyLook::EndLine)
             }
-            StartText if self.reverse => {
-                self.c_empty_look(inst::EmptyLook::EndText)
+            StartText if self.compiled.is_reverse => {
+                self.c_empty_look(prog::EmptyLook::EndText)
             }
             StartText => {
-                self.c_empty_look(inst::EmptyLook::StartText)
+                self.c_empty_look(prog::EmptyLook::StartText)
             }
-            EndText if self.reverse => {
-                self.c_empty_look(inst::EmptyLook::StartText)
+            EndText if self.compiled.is_reverse => {
+                self.c_empty_look(prog::EmptyLook::StartText)
             }
             EndText => {
-                self.c_empty_look(inst::EmptyLook::EndText)
+                self.c_empty_look(prog::EmptyLook::EndText)
             }
-            WordBoundary => self.c_empty_look(inst::EmptyLook::WordBoundary),
+            WordBoundary => self.c_empty_look(prog::EmptyLook::WordBoundary),
             NotWordBoundary => {
-                self.c_empty_look(inst::EmptyLook::NotWordBoundary)
+                self.c_empty_look(prog::EmptyLook::NotWordBoundary)
             }
             Group { ref e, i: None, name: None } => self.c(e),
             Group { ref e, i, ref name } => {
                 // it's impossible to have a named capture without an index
                 let i = i.expect("capture index");
-                if !self.seen_caps.contains(&i) {
-                    self.cap_names.push(name.clone());
-                    self.seen_caps.insert(i);
+                if i >= self.compiled.captures.len() {
+                    self.compiled.captures.push(name.clone());
+                    if let Some(ref name) = *name {
+                        self.capture_name_idx.insert(name.to_owned(), i);
+                    }
                 }
                 self.c_capture(2 * i, e)
             }
             Concat(ref es) => {
-                if self.reverse {
+                if self.compiled.is_reverse {
                     self.c_concat(es.iter().rev())
                 } else {
                     self.c_concat(es)
@@ -214,22 +272,30 @@ impl Compiler {
     }
 
     fn c_capture(&mut self, first_slot: usize, expr: &Expr) -> Result {
-        let entry = self.insts.len();
-        let hole = self.push_hole(InstHole::Save { slot: first_slot });
-        let patch = try!(self.c(expr));
-        self.fill(hole, patch.entry);
-        self.fill_to_next(patch.hole);
-        let hole = self.push_hole(InstHole::Save { slot: first_slot + 1 });
-        Ok(Patch { hole: hole, entry: entry })
+        if self.num_exprs > 1 || self.compiled.is_dfa {
+            // Don't ever compile Save instructions for regex sets because
+            // they are never used. They are also never used in DFA programs
+            // because DFAs can't handle captures.
+            self.c(expr)
+        } else {
+            let entry = self.insts.len();
+            let hole = self.push_hole(InstHole::Save { slot: first_slot });
+            let patch = try!(self.c(expr));
+            self.fill(hole, patch.entry);
+            self.fill_to_next(patch.hole);
+            let hole = self.push_hole(InstHole::Save { slot: first_slot + 1 });
+            Ok(Patch { hole: hole, entry: entry })
+        }
     }
 
     fn c_literal(&mut self, chars: &[char], casei: bool) -> Result {
         assert!(!chars.is_empty());
-        let mut chars: Box<Iterator<Item=&char>> = if self.reverse {
-            Box::new(chars.iter().rev())
-        } else {
-            Box::new(chars.iter())
-        };
+        let mut chars: Box<Iterator<Item=&char>> =
+            if self.compiled.is_reverse {
+                Box::new(chars.iter().rev())
+            } else {
+                Box::new(chars.iter())
+            };
         let first = *chars.next().expect("non-empty literal");
         let Patch { mut hole, entry } = try!(self.c_char(first, casei));
         for &c in chars {
@@ -251,7 +317,7 @@ impl Compiler {
     }
 
     fn c_class(&mut self, ranges: &[ClassRange]) -> Result {
-        if self.bytes {
+        if self.compiled.is_bytes {
             CompileClass {
                 c: self,
                 ranges: ranges,
@@ -694,7 +760,7 @@ impl<'a, 'b> CompileClass<'a, 'b> {
     }
 
     fn c_utf8_seq(&mut self, seq: &Utf8Sequence) -> Result {
-        if self.c.reverse {
+        if self.c.compiled.is_reverse {
             self.c_utf8_seq_(seq)
         } else {
             self.c_utf8_seq_(seq.into_iter().rev())
@@ -704,9 +770,7 @@ impl<'a, 'b> CompileClass<'a, 'b> {
     fn c_utf8_seq_<'r, I>(&mut self, seq: I) -> Result
             where I: IntoIterator<Item=&'r Utf8Range> {
         // The initial instruction for each UTF-8 sequence should be the same.
-        // Since the 0th instruction has always been created by this point,
-        // it's safe to use it as a sentinel here.
-        let mut from_inst = 0;
+        let mut from_inst = ::std::usize::MAX;
         let mut last_hole = Hole::None;
         for byte_range in seq {
             let key = SuffixCacheKey {
@@ -722,7 +786,7 @@ impl<'a, 'b> CompileClass<'a, 'b> {
                 }
             }
             self.c.byte_classes.set_range(byte_range.start, byte_range.end);
-            if from_inst == 0 {
+            if from_inst == ::std::usize::MAX {
                 last_hole = self.c.push_hole(InstHole::Bytes {
                     start: byte_range.start,
                     end: byte_range.end,
@@ -735,8 +799,9 @@ impl<'a, 'b> CompileClass<'a, 'b> {
                 }));
             }
             from_inst = self.c.insts.len().checked_sub(1).unwrap();
+            assert!(from_inst < ::std::usize::MAX);
         }
-        assert!(from_inst > 0);
+        assert!(from_inst < ::std::usize::MAX);
         Ok(Patch { hole: last_hole, entry: from_inst })
     }
 }
diff --git a/src/dfa.rs b/src/dfa.rs
index d8e5c5f6c4..239042a320 100644
--- a/src/dfa.rs
+++ b/src/dfa.rs
@@ -45,8 +45,8 @@ use std::collections::HashMap;
 use std::fmt;
 use std::mem;
 
-use inst::{Insts, Inst};
-use program::Program;
+use exec::Search;
+use prog::{Inst, Program};
 use sparse::SparseSet;
 
 /// The cache limit specifies approximately how much space we're willing to
@@ -70,8 +70,9 @@ const CACHE_LIMIT: usize = 2 * (1<<20);
 /// Generally, a DFA is possible only when there are no word boundary
 /// assertions. This is due to the difficulty (but likely not impossibility)
 /// of tracking multi-byte assertions in the DFA.
-pub fn can_exec(insts: &Insts) -> bool {
-    use inst::EmptyLook::*;
+pub fn can_exec(insts: &Program) -> bool {
+    use prog::Inst::*;
+    use prog::EmptyLook::*;
     // If for some reason we manage to allocate a regex program with more
     // than 2^32-1 instructions, then we can't execute the DFA because we
     // use 32 bit pointers.
@@ -80,37 +81,19 @@ pub fn can_exec(insts: &Insts) -> bool {
     }
     for inst in insts {
         match *inst {
-            Inst::Char(_) | Inst::Ranges(_) => return false,
-            Inst::EmptyLook(ref inst) => {
+            Char(_) | Ranges(_) => return false,
+            EmptyLook(ref inst) => {
                 match inst.look {
                     WordBoundary | NotWordBoundary => return false,
                     StartLine | EndLine | StartText | EndText => {}
                 }
             }
-            Inst::Match | Inst::Save(_) | Inst::Split(_) | Inst::Bytes(_) => {}
+            Match(_) | Save(_) | Split(_) | Bytes(_) => {}
         }
     }
     true
 }
 
-/// The result of running the DFA.
-///
-/// Conceptually, this is essentially equivalent to an `Option<usize>`, where
-/// the value indicates where the end of a match was found, if any. We split
-/// this out into a third state called EarlyMatch, which indicates both that
-/// the caller specified that they didn't care about *where* a match was found,
-/// and that the position at which the earliest match occurred may not be the
-/// correct leftmost-first ending match position.
-///
-/// NoMatch indicates that no match will ever be found and that processing can
-/// quit immediately.
-#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
-pub enum DfaResult {
-    Match(usize),
-    EarlyMatch,
-    NoMatch,
-}
-
 /// A reusable cache of DFA states.
 ///
 /// This cache is reused between multiple invocations of the same regex
@@ -169,7 +152,7 @@ pub struct DfaCache {
 /// N.B. We only use a single lifetime here since all pointers are taken
 /// from the same cache.
 #[derive(Debug)]
-pub struct Dfa<'a> {
+pub struct Dfa<'a, 'b, 'c: 'b, 'm: 'b> {
     /// prog contains the NFA instruction opcodes. DFA execution uses either
     /// the `dfa` instructions or the `dfa_reverse` instructions from
     /// `exec::Executor`. (It never uses `Executor.prog`, which may have
@@ -178,10 +161,10 @@ pub struct Dfa<'a> {
     /// The start state. We record it here because the pointer may change
     /// when the cache is wiped.
     start: StatePtr,
-    /// When set, we can stop searching immediately after we enter a match
-    /// state. (Normally we keep searching in order to provide leftmost-first
-    /// semantics.)
-    quit_on_first_match: bool,
+    /// The search configuration, which includes capture groups. It also
+    /// includes space for indicating which regex matched if executing a
+    /// regex set.
+    search: &'b mut Search<'c, 'm>,
     /// These are all from DfaCache. (Only {qcur,qnext} are missing.)
     compiled: &'a mut HashMap<StateKey, StatePtr>,
     states: &'a mut Vec<State>,
@@ -305,7 +288,7 @@ impl DfaCache {
     }
 }
 
-impl<'a> Dfa<'a> {
+impl<'a, 'b, 'c, 'm> Dfa<'a, 'b, 'c, 'm> {
     /// The main entry point to executing a DFA, which returns the *end* of
     /// a match if one exists, using Perl's "leftmost-first" semantics.
     ///
@@ -318,42 +301,42 @@ impl<'a> Dfa<'a> {
     /// it may seem like we should omit `at` and just rely on the caller to
     /// slice `text` appropriately, it is necessary to tell whether `at` is
     /// at the beginning of `text` or not (i.e., for empty assertions).
-    ///
-    /// quit_on_first_match should be set if the caller doesn't care about
-    /// where the match ends. If a match is found, DfaResult::EarlyMatch is
-    /// returned.
     pub fn exec(
         prog: &'a Program,
+        search: &'b mut Search<'c, 'm>,
         text: &[u8],
         at: usize,
-        quit_on_first_match: bool,
-    ) -> DfaResult {
+    ) -> bool {
         // Retrieve our DFA cache from the program. If another thread tries to
         // execute this DFA *simultaneously*, then a new independent cache is
         // created.
         let mut _cache = prog.cache_dfa();
         let mut cache = &mut **_cache;
-        cache.resize(prog.insts.len());
+        cache.resize(prog.len());
 
         let mut dfa = Dfa {
             prog: prog,
+            start: 0, // filled in below
+            search: search,
             compiled: &mut cache.compiled,
             states: &mut cache.states,
             start_states: &mut cache.start_states,
-            start: 0, // filled in below
             stack: &mut cache.stack,
-            quit_on_first_match: quit_on_first_match,
         };
         dfa.start = match dfa.start_state(&mut cache.qcur, text, at) {
-            STATE_DEAD => return DfaResult::NoMatch,
+            STATE_DEAD => return false,
             si => si,
         };
         debug_assert!(dfa.start != STATE_UNKNOWN);
-        if prog.insts.is_reversed() {
+        let matched = if prog.is_reverse {
             dfa.exec_at_reverse(&mut cache.qcur, &mut cache.qnext, text, at)
         } else {
             dfa.exec_at(&mut cache.qcur, &mut cache.qnext, text, at)
+        };
+        if matched &&  dfa.search.matches.len() <= 1 {
+            dfa.search.set_match(0);
         }
+        matched
     }
 
     /// Executes the DFA on a forward NFA.
@@ -365,7 +348,7 @@ impl<'a> Dfa<'a> {
         qnext: &mut SparseSet,
         text: &[u8],
         at: usize,
-    ) -> DfaResult {
+    ) -> bool {
         // For the most part, the DFA is basically:
         //
         //   last_match = null
@@ -394,22 +377,22 @@ impl<'a> Dfa<'a> {
         //   6. We can't actually do state.next[byte]. Instead, we have to do
         //      state.next[byte_classes[byte]], which permits us to keep the
         //      'next' list very small.
-        debug_assert!(!self.prog.is_reversed());
+        debug_assert!(!self.prog.is_reverse);
 
-        // last_match is the currently known ending match position. It is
+        // The last match is the currently known ending match position. It is
         // reported as an index to the most recent byte that resulted in a
-        // transition to a match state. Its maximum value is `text.len()`,
+        // transition to a match state and is always stored in capture slot `1`
+        // when searching forwards. Its maximum value is `text.len()`,
         // which can only happen after the special EOF sentinel value is fed
         // to the DFA.
-        let mut last_match = DfaResult::NoMatch;
-        let (mut si, mut i) = (self.start, at);
+        let (mut si, mut i, mut matched) = (self.start, at, false);
         while i < text.len() {
             // Our set of literal prefixes can itself be a DFA, but it is
             // offline and can generally be quite a bit faster. (For instance,
             // memchr is used if possible.)
             if !self.prog.prefixes.is_empty() && si == self.start {
                 i = match self.prefix_at(text, i) {
-                    None => return DfaResult::NoMatch,
+                    None => return false,
                     Some(i) => i,
                 };
             }
@@ -418,26 +401,27 @@ impl<'a> Dfa<'a> {
             // but we inline it manually here to avoid the extra branch and
             // also because we know we have a real `u8` (not a `Byte`, which
             // may be the special EOF sentinel value).
-            let cls = self.prog.insts.byte_classes()[text[i] as usize];
+            let cls = self.prog.byte_classes[text[i] as usize];
             let mut next_si = self.states[si as usize].next[cls as usize];
             if next_si <= STATE_DEAD {
                 if next_si == STATE_DEAD {
-                    return last_match;
+                    return matched;
                 }
                 // The next state may not have been cached, so re-compute it
                 // (i.e., follow epsilon transitions).
                 next_si = self.exec_byte(qcur, qnext, si, Byte::byte(text[i]));
                 debug_assert!(next_si != STATE_UNKNOWN);
                 if next_si == STATE_DEAD {
-                    return last_match;
+                    return matched;
                 }
             }
             si = next_si;
             if self.states[si as usize].is_match {
-                if self.quit_on_first_match {
-                    return DfaResult::EarlyMatch;
+                if self.search.quit_after_first_match() {
+                    return true;
                 }
-                last_match = DfaResult::Match(i);
+                matched = true;
+                self.search.set_end(Some(i));
             }
             i += 1;
         }
@@ -445,12 +429,23 @@ impl<'a> Dfa<'a> {
         si = self.next_state(qcur, qnext, si, Byte::eof());
         debug_assert!(si != STATE_UNKNOWN);
         if si == STATE_DEAD {
-            return last_match;
+            return matched;
         }
         if self.states[si as usize].is_match {
-            last_match = DfaResult::Match(text.len());
+            if self.search.quit_after_first_match() {
+                return true;
+            }
+            matched = true;
+            self.search.set_end(Some(text.len()));
         }
-        last_match
+        if matched && self.search.matches.len() != 1 {
+            for &ip in &self.states[si as usize].insts {
+                if let Inst::Match(slot) = self.prog[ip as usize] {
+                    self.search.set_match(slot);
+                }
+            }
+        }
+        matched
     }
 
     /// Executes the DFA on a reverse NFA.
@@ -460,50 +455,54 @@ impl<'a> Dfa<'a> {
         qnext: &mut SparseSet,
         text: &[u8],
         at: usize,
-    ) -> DfaResult {
+    ) -> bool {
         // The comments in `exec_at` above mostly apply here too. The main
         // difference is that we move backwards over the input and we look for
         // the longest possible match instead of the leftmost-first match.
         //
         // N.B. The code duplication here is regrettable. Efforts to improve
         // it without sacrificing performance are welcome. ---AG
-        debug_assert!(self.prog.is_reversed());
-        let mut last_match = DfaResult::NoMatch;
-        let (mut si, mut i) = (self.start, at);
+        debug_assert!(self.prog.is_reverse);
+        let (mut si, mut i, mut matched) = (self.start, at, false);
         while i > 0 {
             i -= 1;
 
-            let cls = self.prog.insts.byte_classes()[text[i] as usize];
+            let cls = self.prog.byte_classes[text[i] as usize];
             let mut next_si = self.states[si as usize].next[cls as usize];
             if next_si <= STATE_DEAD {
                 if next_si == STATE_DEAD {
-                    return last_match;
+                    return matched;
                 }
                 // The next state may not have been cached, so re-compute it
                 // (i.e., follow epsilon transitions).
                 next_si = self.exec_byte(qcur, qnext, si, Byte::byte(text[i]));
                 debug_assert!(next_si != STATE_UNKNOWN);
                 if next_si == STATE_DEAD {
-                    return last_match;
+                    return matched;
                 }
             }
             si = next_si;
             if self.states[si as usize].is_match {
-                if self.quit_on_first_match {
-                    return DfaResult::EarlyMatch;
+                if self.search.quit_after_first_match() {
+                    return true;
                 }
-                last_match = DfaResult::Match(i+1);
+                matched = true;
+                self.search.set_start(Some(i+1));
             }
         }
         si = self.next_state(qcur, qnext, si, Byte::eof());
         debug_assert!(si != STATE_UNKNOWN);
         if si == STATE_DEAD {
-            return last_match;
+            return matched;
         }
         if self.states[si as usize].is_match {
-            last_match = DfaResult::Match(0);
+            if self.search.quit_after_first_match() {
+                return true;
+            }
+            matched = true;
+            self.search.set_start(Some(0));
         }
-        last_match
+        matched
     }
 
     /// Computes the next state given the current state and the current input
@@ -521,7 +520,7 @@ impl<'a> Dfa<'a> {
         mut si: StatePtr,
         b: Byte,
     ) -> StatePtr {
-        use inst::Inst::*;
+        use prog::Inst::*;
 
         // Initialize a queue with the current DFA state's NFA states.
         qcur.clear();
@@ -573,15 +572,21 @@ impl<'a> Dfa<'a> {
         // the current byte.
         qnext.clear();
         for &ip in &*qcur {
-            match self.prog.insts[ip as usize] {
+            match self.prog[ip as usize] {
                 // These states never happen in a byte-based program.
                 Char(_) | Ranges(_) => unreachable!(),
                 // These states are handled when following epsilon transitions.
                 Save(_) | Split(_) | EmptyLook(_) => {}
-                Match => {
+                Match(_) => {
                     flags.set_match(true);
-                    if !self.prog.is_reversed() {
+                    if !self.continue_past_first_match() {
                         break;
+                    } else if self.search.matches.len() != 1 {
+                        // If we are continuing on to find other matches,
+                        // then keep a record of the match states we've seen.
+                        if !qnext.contains_ip(ip as usize) {
+                            qnext.add(ip);
+                        }
                     }
                 }
                 Bytes(ref inst) => {
@@ -592,6 +597,16 @@ impl<'a> Dfa<'a> {
                 }
             }
         }
+        let mut cache = true;
+        if b.is_eof() && self.search.matches.len() != 1 {
+            // If we're processing the last byte of the input and we're
+            // matching a regex set, then make the next state contain the
+            // previous states transitions. We do this so that the main
+            // matching loop can extract all of the match instructions.
+            mem::swap(qcur, qnext);
+            // And don't cache this state because it's totally bunk.
+            cache = false;
+        }
         // We've now built up the set of NFA states that ought to comprise the
         // next DFA state, so try to find it in the cache, and if it doesn't
         // exist, cache it.
@@ -603,7 +618,9 @@ impl<'a> Dfa<'a> {
         debug_assert!(next != STATE_UNKNOWN);
         // And now store our state in the current state's next list.
         let cls = self.byte_class(b);
-        self.states[si as usize].next[cls] = next;
+        if cache {
+            self.states[si as usize].next[cls] = next;
+        }
         next
     }
 
@@ -636,8 +653,8 @@ impl<'a> Dfa<'a> {
         q: &mut SparseSet,
         flags: Flags,
     ) {
-        use inst::Inst::*;
-        use inst::EmptyLook::*;
+        use prog::Inst::*;
+        use prog::EmptyLook::*;
 
         // We need to traverse the NFA to follow epsilon transitions, so avoid
         // recursion with an explicit stack.
@@ -648,9 +665,9 @@ impl<'a> Dfa<'a> {
                 continue;
             }
             q.add(ip as usize);
-            match self.prog.insts[ip as usize] {
+            match self.prog[ip as usize] {
                 Char(_) | Ranges(_) => unreachable!(),
-                Match | Bytes(_) => {}
+                Match(_) | Bytes(_) => {}
                 EmptyLook(ref inst) => {
                     // Only follow empty assertion states if our flags satisfy
                     // the assertion.
@@ -755,8 +772,8 @@ impl<'a> Dfa<'a> {
         q: &SparseSet,
         is_match: bool,
     ) -> Option<(StateKey, Flags)> {
-        use inst::Inst::*;
-        use inst::EmptyLook::*;
+        use prog::Inst::*;
+        use prog::EmptyLook::*;
 
         // We need to build up enough information to recognize pre-built states
         // in the DFA. Generally speaking, this includes every instruction
@@ -770,7 +787,7 @@ impl<'a> Dfa<'a> {
         let mut insts = vec![];
         for &ip in q {
             let ip = usize_to_u32(ip);
-            match self.prog.insts[ip as usize] {
+            match self.prog[ip as usize] {
                 Char(_) | Ranges(_) => unreachable!(),
                 Save(_) => {}
                 Split(_) => {}
@@ -796,12 +813,9 @@ impl<'a> Dfa<'a> {
                         WordBoundary | NotWordBoundary => unreachable!(),
                     }
                 }
-                Match => {
+                Match(_) => {
                     insts.push(ip);
-                    // If this is a reverse program, then we want to continue
-                    // executing to find the longest possible match. Otherwise,
-                    // we only support leftmost-first semantics, so bail out.
-                    if !self.prog.insts.is_reversed() {
+                    if !self.continue_past_first_match() {
                         break;
                     }
                 }
@@ -918,7 +932,7 @@ impl<'a> Dfa<'a> {
         text: &[u8],
         at: usize,
     ) -> StatePtr {
-        let start_flags = if self.prog.insts.is_reversed() {
+        let start_flags = if self.prog.is_reverse {
             self.start_flags_reverse(text, at)
         } else {
             self.start_flags(text, at)
@@ -980,7 +994,7 @@ impl<'a> Dfa<'a> {
     /// invariant: num_byte_classes() == len(State.next)
     fn num_byte_classes(&self) -> usize {
         // We add 1 to account for the special EOF byte.
-        ((self.prog.insts.byte_classes()[255] + 1) + 1) as usize
+        (self.prog.byte_classes[255] as usize + 1) + 1
     }
 
     /// Given an input byte or the special EOF sentinel, return its
@@ -989,10 +1003,22 @@ impl<'a> Dfa<'a> {
         if b.is_eof() {
             self.num_byte_classes() - 1
         } else {
-            self.prog.insts.byte_classes()[b.0 as usize] as usize
+            self.prog.byte_classes[b.0 as usize] as usize
         }
     }
 
+    /// Returns true if the DFA should continue searching past the first match.
+    ///
+    /// Leftmost first semantics in the DFA are preserved by not following NFA
+    /// transitions after the first match is seen.
+    ///
+    /// On occasion, we want to avoid leftmost first semantics to find either
+    /// the longest match (for reverse search) or all possible matches (for
+    /// regex sets).
+    fn continue_past_first_match(&self) -> bool {
+        self.prog.is_reverse || self.search.matches.len() != 1
+    }
+
     /// Approximate size returns the approximate heap space currently used by
     /// the DFA. It is used to determine whether the DFA's state cache needs to
     /// be wiped. Namely, it is possible that for certain regexes on certain
diff --git a/src/exec.rs b/src/exec.rs
index f048c0572f..bd80b2ac18 100644
--- a/src/exec.rs
+++ b/src/exec.rs
@@ -12,14 +12,74 @@ use std::collections::HashMap;
 use std::sync::Arc;
 
 use backtrack::{self, Backtrack};
-use dfa::{self, Dfa, DfaResult};
+use compile::Compiler;
+use dfa::{self, Dfa};
 use input::{ByteInput, CharInput};
+use literals::BuildPrefixes;
 use nfa::Nfa;
-use program::{Program, ProgramBuilder};
-use re::CaptureIdxs;
+use prog::{Program, InstPtr};
+use syntax;
 
 use {Regex, Error};
 
+pub type CaptureSlots<'a> = &'a mut [CaptureSlot];
+
+pub type CaptureSlot = Option<usize>;
+
+/// The parameters to running one of the four match engines.
+#[derive(Debug)]
+pub struct Search<'caps, 'matches> {
+    /// The matching engine writes capture locations to this slice.
+    ///
+    /// Note that some matching engines, like the DFA, have limited support
+    /// for this. The DFA can only fill in one capture location (the end
+    /// location of the match).
+    pub captures: CaptureSlots<'caps>,
+    /// The matching engine indicates which match instructions were executed
+    /// when searching stopped.
+    ///
+    /// In standard searches, there is exactly one value in this slice and it
+    /// should be initialized to `false`. When executing sets of regexes,
+    /// there should be a location for each regex.
+    pub matches: &'matches mut [bool],
+}
+
+impl<'caps, 'matches> Search<'caps, 'matches> {
+    pub fn quit_after_first_match(&self) -> bool {
+        self.captures.is_empty() && self.matches.len() == 1
+    }
+
+    pub fn all_matched(&self) -> bool {
+        self.matches.iter().all(|m| *m)
+    }
+
+    pub fn copy_captures_from(&mut self, caps: &[Option<usize>]) {
+        for (slot, val) in self.captures.iter_mut().zip(caps.iter()) {
+            *slot = *val;
+        }
+    }
+
+    pub fn set_match(&mut self, match_slot: usize) {
+        if let Some(old) = self.matches.get_mut(match_slot) {
+            *old = true;
+        }
+    }
+
+    pub fn set_start(&mut self, pos: Option<usize>) {
+        self.set_capture(0, pos);
+    }
+
+    pub fn set_end(&mut self, pos: Option<usize>) {
+        self.set_capture(1, pos);
+    }
+
+    fn set_capture(&mut self, i: usize, pos: Option<usize>) {
+        if let Some(old_pos) = self.captures.get_mut(i) {
+            *old_pos = pos;
+        }
+    }
+}
+
 /// Exec manages the execution of a regular expression.
 ///
 /// In particular, this manages the various compiled forms of a single regular
@@ -27,6 +87,8 @@ use {Regex, Error};
 /// regular expression.
 #[derive(Clone, Debug)]
 pub struct Exec {
+    /// The original regular expressions given by the caller to compile.
+    res: Vec<String>,
     /// A compiled program that is used in the NFA simulation and backtracking.
     /// It can be byte-based or Unicode codepoint based.
     ///
@@ -60,22 +122,32 @@ pub struct Exec {
 /// Facilitates the construction of an executor by exposing various knobs
 /// to control how a regex is executed and what kinds of resources it's
 /// permitted to use.
-pub struct ExecBuilder<'r> {
-    re: &'r str,
+pub struct ExecBuilder {
+    res: Vec<String>,
     match_engine: MatchEngine,
     size_limit: usize,
     bytes: bool,
 }
 
-impl<'r> ExecBuilder<'r> {
+impl ExecBuilder {
     /// Create a regex execution builder.
     ///
     /// This uses default settings for everything except the regex itself,
     /// which must be provided. Further knobs can be set by calling methods,
     /// and then finally, `build` to actually create the executor.
-    pub fn new(re: &'r str) -> Self {
+    pub fn new(re: &str) -> Self {
+        Self::new_many(&[re])
+    }
+
+    /// Like new, but compiles the union of the given regular expressions.
+    ///
+    /// Note that when compiling 2 or more regular expressions, capture groups
+    /// are completely unsupported. (This means both `find` and `captures`
+    /// wont work.)
+    pub fn new_many<I, S>(res: I) -> Self
+            where S: AsRef<str>, I: IntoIterator<Item=S> {
         ExecBuilder {
-            re: re,
+            res: res.into_iter().map(|s| s.as_ref().to_owned()).collect(),
             match_engine: MatchEngine::Automatic,
             size_limit: 10 * (1 << 20),
             bytes: false,
@@ -145,28 +217,40 @@ impl<'r> ExecBuilder<'r> {
 
     /// Build an executor that can run a regular expression.
     pub fn build(self) -> Result<Exec, Error> {
-        let prog = try!(
-            ProgramBuilder::new(self.re)
-                           .size_limit(self.size_limit)
-                           .bytes(self.bytes)
-                           .compile());
+        if self.res.is_empty() {
+            return Err(Error::InvalidSet);
+        }
+        let mut exprs = vec![];
+        for re in &self.res {
+            exprs.push(try!(syntax::Expr::parse(re)));
+        }
+        let mut prog = try!(
+            Compiler::new()
+                     .size_limit(self.size_limit)
+                     .bytes(self.bytes)
+                     .compile(&exprs));
         let mut dfa = try!(
-            ProgramBuilder::new(self.re)
-                           .size_limit(self.size_limit)
-                           .dfa(true)
-                           .compile());
-        // Because the literal finder on byte-based programs is sub-optimal.
-        // We can use the literals found from a Unicode-based program just
-        // fine for now.
-        dfa.prefixes = prog.prefixes.clone();
+            Compiler::new()
+                     .size_limit(self.size_limit)
+                     .dfa(true)
+                     .compile(&exprs));
         let dfa_reverse = try!(
-            ProgramBuilder::new(self.re)
-                           .size_limit(self.size_limit)
-                           .dfa(true)
-                           .reverse(true)
-                           .compile());
-        let can_dfa = dfa::can_exec(&dfa.insts);
+            Compiler::new()
+                     .size_limit(self.size_limit)
+                     .dfa(true)
+                     .reverse(true)
+                     .compile(&exprs));
+
+        // Compute literal prefixes for only `prog`, which is likely a Unicode
+        // based program. Literal prefix extract currently works better on
+        // Unicode programs.
+        prog.prefixes = BuildPrefixes::new(&prog).literals().into_matcher();
+        // And give it to the DFA too, which can use Unicode prefixes even
+        // though the program itself is byte based.
+        dfa.prefixes = prog.prefixes.clone();
+        let can_dfa = dfa::can_exec(&dfa);
         Ok(Exec {
+            res: self.res,
             prog: prog,
             dfa: dfa,
             dfa_reverse: dfa_reverse,
@@ -196,9 +280,9 @@ impl Exec {
     /// choosing the engine to use. If self.match_engine is Nfa or Backtrack,
     /// then that engine is always used. Otherwise, one is selected
     /// automatically.
-    pub fn exec(
+    pub fn exec<'c, 'm>(
         &self,
-        caps: &mut CaptureIdxs,
+        search: &mut Search<'c, 'm>,
         text: &str,
         start: usize,
     ) -> bool {
@@ -206,132 +290,129 @@ impl Exec {
         // only possible to execute those engines in exec_auto. See comment on
         // MatchEngine below for more details.
         match self.match_engine {
-            MatchEngine::Automatic => self.exec_auto(caps, text, start),
-            MatchEngine::Backtrack => self.exec_backtrack(caps, text, start),
-            MatchEngine::Nfa => self.exec_nfa(caps, text, start),
+            MatchEngine::Automatic => self.exec_auto(search, text, start),
+            MatchEngine::Backtrack => self.exec_backtrack(search, text, start),
+            MatchEngine::Nfa => self.exec_nfa(search, text, start),
         }
     }
 
     /// Like exec, but always selects the engine automatically.
-    pub fn exec_auto(
+    fn exec_auto<'c, 'm>(
         &self,
-        caps: &mut CaptureIdxs,
+        search: &mut Search<'c, 'm>,
         text: &str,
         start: usize,
     ) -> bool {
-        if caps.len() <= 2 && self.prog.is_prefix_match() {
+        if search.captures.len() <= 2 && self.prog.prefixes.at_match() {
             // We should be able to execute the literal engine even if there
             // are more captures by falling back to the NFA engine after a
             // match. However, that's effectively what the NFA engine does
             // already (since it will use the literal engine if it exists).
-            self.exec_literals(caps, text, start)
+            self.exec_literals(search, text, start)
         } else if self.can_dfa {
-            self.exec_dfa(caps, text, start)
+            self.exec_dfa(search, text, start)
         } else {
-            self.exec_auto_nfa(caps, text, start)
+            self.exec_auto_nfa(search, text, start)
         }
     }
 
     /// Like exec, but always tries to execute the lazy DFA.
     ///
     /// Note that self.can_dfa must be true. This will panic otherwise.
-    fn exec_dfa(
+    fn exec_dfa<'a, 'c, 'm>(
         &self,
-        caps: &mut CaptureIdxs,
+        search: &'a mut Search<'c, 'm>,
         text: &str,
         start: usize,
     ) -> bool {
         debug_assert!(self.can_dfa);
         let btext = text.as_bytes();
-        let search = Dfa::exec(&self.dfa, btext, start, caps.is_empty());
-        let match_end = match search {
-            DfaResult::Match(match_end) => match_end,
-            DfaResult::EarlyMatch => return true,
-            DfaResult::NoMatch => return false,
-        };
-        // If caller has not requested any captures, then we don't need to
-        // find the start position.
-        if caps.is_empty() {
-            return true;
+        if !Dfa::exec(&self.dfa, search, btext, start) {
+            return false;
         }
+        let match_end = match search.captures.get(1) {
+            Some(&Some(i)) => i,
+            // The DFA returned true for a match, but did not set any capture
+            // location because the caller didn't ask for them. Therefore, we
+            // can quit immediately.
+            _ => return true,
+        };
         // invariant: caps.len() >= 2 && caps.len() % 2 == 0
         // If the reported end of the match is the same as the start, then we
         // have an empty match and we can quit now.
         if start == match_end {
             // Be careful... If the caller wants sub-captures, than we are
             // obliged to run the NFA to get them.
-            if caps.len() == 2 {
+            if search.captures.len() == 2 {
                 // The caller only needs the start/end, so we can avoid the
                 // NFA here.
-                caps[0] = Some(start);
-                caps[1] = Some(start);
+                search.captures[0] = Some(start);
+                search.captures[1] = Some(start);
                 return true;
             }
-            return self.exec_auto_nfa(caps, text, start);
+            return self.exec_auto_nfa(search, text, start);
         }
         // OK, now we find the start of the match by running the DFA backwards
         // on the text. We *start* the search at the end of the match.
-        let search = Dfa::exec(
-            &self.dfa_reverse, &btext[start..], match_end - start, false);
-        let match_start = match search {
-            DfaResult::Match(match_start) => start + match_start,
-            DfaResult::EarlyMatch => {
-                panic!("BUG: early matches can't happen on reverse search")
-            }
-            DfaResult::NoMatch => {
-                panic!("BUG: forward match implies backward match")
-            }
+        let matched = Dfa::exec(
+            &self.dfa_reverse, search, &btext[start..], match_end - start);
+        if !matched {
+            panic!("BUG: forward match implies backward match");
+        }
+        let match_start = match search.captures.get(0) {
+            Some(&Some(i)) => start + i,
+            _ => panic!("BUG: early match can't happen on reverse search"),
         };
-        if caps.len() == 2 {
+        if search.captures.len() == 2 {
             // If the caller doesn't care about capture locations, then we can
             // avoid running the NFA to fill them in.
-            caps[0] = Some(match_start);
-            caps[1] = Some(match_end);
+            search.captures[0] = Some(match_start);
+            search.captures[1] = Some(match_end);
             return true;
         }
-        self.exec_auto_nfa(caps, text, match_start)
+        self.exec_auto_nfa(search, text, match_start)
     }
 
     /// This is like exec_auto, except it always chooses between either the
     /// full NFA simulation or the bounded backtracking engine.
-    fn exec_auto_nfa(
+    fn exec_auto_nfa<'c, 'm>(
         &self,
-        caps: &mut CaptureIdxs,
+        search: &mut Search<'c, 'm>,
         text: &str,
         start: usize,
     ) -> bool {
-        if backtrack::should_exec(self.prog.insts.len(), text.len()) {
-            self.exec_backtrack(caps, text, start)
+        if backtrack::should_exec(self.prog.len(), text.len()) {
+            self.exec_backtrack(search, text, start)
         } else {
-            self.exec_nfa(caps, text, start)
+            self.exec_nfa(search, text, start)
         }
     }
 
     /// Always run the NFA algorithm.
-    fn exec_nfa(
+    fn exec_nfa<'c, 'm>(
         &self,
-        caps: &mut CaptureIdxs,
+        search: &mut Search<'c, 'm>,
         text: &str,
         start: usize,
     ) -> bool {
-        if self.prog.insts.is_bytes() {
-            Nfa::exec(&self.prog, caps, ByteInput::new(text), start)
+        if self.prog.is_bytes {
+            Nfa::exec(&self.prog, search, ByteInput::new(text), start)
         } else {
-            Nfa::exec(&self.prog, caps, CharInput::new(text), start)
+            Nfa::exec(&self.prog, search, CharInput::new(text), start)
         }
     }
 
     /// Always runs the NFA using bounded backtracking.
-    fn exec_backtrack(
+    fn exec_backtrack<'c, 'm>(
         &self,
-        caps: &mut CaptureIdxs,
+        search: &mut Search<'c, 'm>,
         text: &str,
         start: usize,
     ) -> bool {
-        if self.prog.insts.is_bytes() {
-            Backtrack::exec(&self.prog, caps, ByteInput::new(text), start)
+        if self.prog.is_bytes {
+            Backtrack::exec(&self.prog, search, ByteInput::new(text), start)
         } else {
-            Backtrack::exec(&self.prog, caps, CharInput::new(text), start)
+            Backtrack::exec(&self.prog, search, CharInput::new(text), start)
         }
     }
 
@@ -342,19 +423,19 @@ impl Exec {
     /// regex machinery and use specialized DFAs.
     ///
     /// This panics if the set of literals do not correspond to matches.
-    fn exec_literals(
+    fn exec_literals<'c, 'm>(
         &self,
-        caps: &mut CaptureIdxs,
+        search: &mut Search<'c, 'm>,
         text: &str,
         start: usize,
     ) -> bool {
-        debug_assert!(self.prog.is_prefix_match());
+        debug_assert!(self.prog.prefixes.at_match());
         match self.prog.prefixes.find(&text.as_bytes()[start..]) {
             None => false,
             Some((s, e)) => {
-                if caps.len() == 2 {
-                    caps[0] = Some(start + s);
-                    caps[1] = Some(start + e);
+                if search.captures.len() == 2 {
+                    search.captures[0] = Some(start + s);
+                    search.captures[1] = Some(start + e);
                 }
                 true
             }
@@ -366,28 +447,30 @@ impl Exec {
         Regex::Dynamic(self)
     }
 
-    /// Return the original regular expression string.
-    pub fn regex_str(&self) -> &str {
-        &self.prog.original
+    /// The original regular expressions given by the caller that were
+    /// compiled.
+    pub fn regex_strings(&self) -> &[String] {
+        &self.res
+    }
+
+    /// Return a slice of instruction pointers to match slots.
+    ///
+    /// There is a match slot for every regular expression in this executor.
+    pub fn matches(&self) -> &[InstPtr] {
+        &self.prog.matches
     }
 
     /// Return a slice of capture names.
     ///
     /// Any capture that isn't named is None.
-    pub fn capture_names(&self) -> &[Option<String>] {
-        &self.prog.cap_names
+    pub fn captures(&self) -> &[Option<String>] {
+        &self.prog.captures
     }
 
     /// Return a reference to named groups mapping (from group name to
     /// group position).
-    pub fn named_groups(&self) -> &Arc<HashMap<String, usize>> {
-        &self.prog.named_groups
-    }
-
-    /// Return a fresh allocation for storing all possible captures in the
-    /// underlying regular expression.
-    pub fn alloc_captures(&self) -> Vec<Option<usize>> {
-        self.prog.alloc_captures()
+    pub fn capture_name_idx(&self) -> &Arc<HashMap<String, usize>> {
+        &self.prog.capture_name_idx
     }
 }
 
diff --git a/src/input.rs b/src/input.rs
index dacb36cc0f..04442bf93f 100644
--- a/src/input.rs
+++ b/src/input.rs
@@ -24,7 +24,7 @@ pub struct InputAt {
 
 impl InputAt {
     /// Returns true iff this position is at the beginning of the input.
-    pub fn is_beginning(&self) -> bool {
+    pub fn is_start(&self) -> bool {
         self.pos == 0
     }
 
diff --git a/src/lib.rs b/src/lib.rs
index 84d26fb64d..86acba164c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -210,6 +210,34 @@
 //! # }
 //! ```
 //!
+//! # Example: match multiple regular expressions simultaneously
+//!
+//! This demonstrates how to use a `RegexSet` to match multiple (possibly
+//! overlapping) regular expressions in a single scan of the search text:
+//!
+//! ```rust
+//! use regex::RegexSet;
+//!
+//! let set = RegexSet::new(&[
+//!     r"\w+",
+//!     r"\d+",
+//!     r"\pL+",
+//!     r"foo",
+//!     r"bar",
+//!     r"barfoo",
+//!     r"foobar",
+//! ]).unwrap();
+//!
+//! // Iterate over and collect all of the matches.
+//! let matches: Vec<_> = set.matches("foobar").into_iter().collect();
+//! assert_eq!(matches, vec![0, 2, 3, 4, 6]);
+//!
+//! // You can also test whether a particular regex matched:
+//! let matches = set.matches("foobar");
+//! assert!(!matches.matched(5));
+//! assert!(matches.matched(6));
+//! ```
+//!
 //! # Pay for what you use
 //!
 //! With respect to searching text with a regular expression, there are three
@@ -438,7 +466,9 @@
 //! allowed to store a fixed number of states. (When the limit is reached, its
 //! states are wiped and continues on, possibly duplicating previous work.)
 
-#![deny(missing_docs)]
+#![allow(dead_code, unused_imports, unused_variables)]
+
+// #![deny(missing_docs)]
 #![cfg_attr(test, deny(warnings))]
 #![cfg_attr(feature = "pattern", feature(pattern))]
 #![doc(html_logo_url = "https://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
@@ -457,6 +487,7 @@ pub use re::{
     Replacer, NoExpand, RegexSplits, RegexSplitsN,
     quote, is_match,
 };
+pub use set::{RegexSet, SetMatches, SetMatchesIntoIter, SetMatchesIter};
 
 mod backtrack;
 mod char;
@@ -465,12 +496,12 @@ mod compile;
 mod dfa;
 mod exec;
 mod input;
-mod inst;
 mod literals;
 mod nfa;
 mod pool;
-mod program;
+mod prog;
 mod re;
+mod set;
 mod sparse;
 
 /// The `internal` module exists to support the `regex!` macro and other
@@ -478,9 +509,10 @@ mod sparse;
 #[doc(hidden)]
 pub mod internal {
     pub use char::Char;
+    pub use compile::Compiler;
     pub use exec::{Exec, ExecBuilder};
     pub use input::{Input, CharInput, InputAt};
-    pub use inst::{Inst, EmptyLook, InstRanges};
-    pub use program::{Program, ProgramBuilder};
+    pub use literals::{BuildPrefixes, Literals};
+    pub use prog::{Program, Inst, EmptyLook, InstRanges};
     pub use re::ExNative;
 }
diff --git a/src/literals.rs b/src/literals.rs
index 13d2e150a5..cd3d6d7ffd 100644
--- a/src/literals.rs
+++ b/src/literals.rs
@@ -17,7 +17,7 @@ use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
 use memchr::{memchr, memchr2, memchr3};
 
 use char_utf8::encode_utf8;
-use inst::{Insts, Inst, InstBytes, InstRanges};
+use prog::{Program, Inst, InstBytes, InstRanges};
 
 #[derive(Clone, Eq, PartialEq)]
 pub struct AlternateLiterals {
@@ -200,13 +200,13 @@ impl AlternateLiterals {
 }
 
 pub struct BuildPrefixes<'a> {
-    insts: &'a Insts,
+    insts: &'a Program,
     limit: usize,
     alts: AlternateLiterals,
 }
 
 impl<'a> BuildPrefixes<'a> {
-    pub fn new(insts: &'a Insts) -> Self {
+    pub fn new(insts: &'a Program) -> Self {
         BuildPrefixes {
             insts: insts,
             limit: 250,
@@ -215,7 +215,7 @@ impl<'a> BuildPrefixes<'a> {
     }
 
     pub fn literals(mut self) -> AlternateLiterals {
-        let mut stack = vec![self.insts.skip(self.insts.start())];
+        let mut stack = vec![self.insts.skip(self.insts.start)];
         let mut seen = HashSet::new();
         while let Some(mut pc) = stack.pop() {
             seen.insert(pc);
@@ -272,13 +272,13 @@ impl<'a> BuildPrefixes<'a> {
 }
 
 pub struct BuildRequiredLiterals<'a> {
-    insts: &'a Insts,
+    insts: &'a Program,
     limit: usize,
     alts: AlternateLiterals,
 }
 
 impl<'a> BuildRequiredLiterals<'a> {
-    pub fn new(insts: &'a Insts) -> Self {
+    pub fn new(insts: &'a Program) -> Self {
         BuildRequiredLiterals {
             insts: insts,
             limit: 250,
@@ -292,7 +292,7 @@ impl<'a> BuildRequiredLiterals<'a> {
     }
 
     fn literals(mut self, mut pc: usize) -> AlternateLiterals {
-        use inst::Inst::*;
+        use prog::Inst::*;
         loop {
             let inst = &self.insts[pc];
             match *inst {
@@ -318,7 +318,7 @@ impl<'a> BuildRequiredLiterals<'a> {
                     }
                     pc = inst.goto;
                 }
-                Split(_) | EmptyLook(_) | Match => {
+                Split(_) | EmptyLook(_) | Match(_) => {
                     self.alts.at_match = self.insts.leads_to_match(pc);
                     break;
                 }
@@ -662,31 +662,32 @@ impl fmt::Debug for Literals {
 
 #[cfg(test)]
 mod tests {
-    use program::ProgramBuilder;
-    use super::AlternateLiterals;
+    use compile::Compiler;
+    use super::{AlternateLiterals, BuildPrefixes};
+    use syntax::Expr;
 
     macro_rules! prog {
-        ($re:expr) => { ProgramBuilder::new($re).compile().unwrap() }
-    }
-
-    macro_rules! byte_prog {
-        ($re:expr) => {
-            ProgramBuilder::new($re).bytes(true).compile().unwrap()
-        }
+        ($re:expr) => {{
+            let expr = Expr::parse($re).unwrap();
+            let prog = Compiler::new().compile(&[expr]).unwrap();
+            prog
+        }}
     }
 
     macro_rules! prefixes {
         ($re:expr) => {{
             let p = prog!($re);
-            assert!(!p.prefixes.at_match());
-            p.prefixes.prefixes()
+            let prefixes = BuildPrefixes::new(&p).literals().into_matcher();
+            assert!(!prefixes.at_match());
+            prefixes.prefixes()
         }}
     }
     macro_rules! prefixes_complete {
         ($re:expr) => {{
             let p = prog!($re);
-            assert!(p.prefixes.at_match());
-            p.prefixes.prefixes()
+            let prefixes = BuildPrefixes::new(&p).literals().into_matcher();
+            assert!(prefixes.at_match());
+            prefixes.prefixes()
         }}
     }
 
diff --git a/src/nfa.rs b/src/nfa.rs
index 634d1ffaaf..b78b768d5d 100644
--- a/src/nfa.rs
+++ b/src/nfa.rs
@@ -27,10 +27,9 @@
 
 use std::mem;
 
+use exec::Search;
 use input::{Input, InputAt};
-use inst::InstPtr;
-use program::Program;
-use re::CaptureIdxs;
+use prog::{Program, InstPtr};
 use sparse::SparseSet;
 
 /// An NFA simulation matching engine.
@@ -101,27 +100,27 @@ impl<'r, I: Input> Nfa<'r, I> {
     /// captures accordingly.
     pub fn exec(
         prog: &'r Program,
-        mut caps: &mut CaptureIdxs,
+        search: &mut Search,
         input: I,
         start: usize,
     ) -> bool {
         let mut _cache = prog.cache_nfa();
         let mut cache = &mut **_cache;
-        cache.clist.resize(prog.insts.len(), prog.num_captures());
-        cache.nlist.resize(prog.insts.len(), prog.num_captures());
+        cache.clist.resize(prog.len(), prog.captures.len());
+        cache.nlist.resize(prog.len(), prog.captures.len());
         let at = input.at(start);
         Nfa {
             prog: prog,
             stack: &mut cache.stack,
             input: input,
-        }.exec_(&mut cache.clist, &mut cache.nlist, &mut caps, at)
+        }.exec_(&mut cache.clist, &mut cache.nlist, search, at)
     }
 
     fn exec_(
         &mut self,
         mut clist: &mut Threads,
         mut nlist: &mut Threads,
-        mut caps: &mut CaptureIdxs,
+        mut search: &mut Search,
         mut at: InputAt,
     ) -> bool {
         let mut matched = false;
@@ -137,8 +136,7 @@ impl<'r, I: Input> Nfa<'r, I> {
                 //
                 // 2. If the expression starts with a '^' we can terminate as
                 //    soon as the last thread dies.
-                if matched
-                   || (!at.is_beginning() && self.prog.anchored_begin) {
+                if matched || (!at.is_start() && self.prog.is_anchored_start) {
                     break;
                 }
 
@@ -156,9 +154,8 @@ impl<'r, I: Input> Nfa<'r, I> {
             // This simulates a preceding '.*?' for every regex by adding
             // a state starting at the current position in the input for the
             // beginning of the program only if we don't already have a match.
-            if clist.set.is_empty()
-                || (!self.prog.anchored_begin && !matched) {
-                self.add(&mut clist, &mut caps, 0, at)
+            if clist.set.is_empty() || (!self.prog.is_anchored_start && !matched) {
+                self.add(&mut clist, &mut search.captures, 0, at)
             }
             // The previous call to "add" actually inspects the position just
             // before the current character. For stepping through the machine,
@@ -167,19 +164,34 @@ impl<'r, I: Input> Nfa<'r, I> {
             let at_next = self.input.at(at.next_pos());
             for i in 0..clist.set.len() {
                 let ip = clist.set[i];
-                let tcaps = clist.caps(ip);
-                if self.step(&mut nlist, caps, tcaps, ip, at, at_next) {
-                    matched = true;
-                    if caps.len() == 0 {
+                let step = self.step(
+                    &mut nlist,
+                    search,
+                    clist.caps(ip),
+                    ip,
+                    at,
+                    at_next,
+                );
+                if step {
+                    if !matched {
+                        matched = search.all_matched();
+                    }
+                    if search.quit_after_first_match() {
                         // If we only care if a match occurs (not its
                         // position), then we can quit right now.
                         break 'LOOP;
                     }
-                    // We don't need to check the rest of the threads in this
-                    // set because we've matched something ("leftmost-first").
-                    // However, we still need to check threads in the next set
-                    // to support things like greedy matching.
-                    break;
+                    if search.matches.len() <= 1 {
+                        // We don't need to check the rest of the threads
+                        // in this set because we've matched something
+                        // ("leftmost-first"). However, we still need to check
+                        // threads in the next set to support things like
+                        // greedy matching.
+                        //
+                        // This is only true on normal regexes. For regex sets,
+                        // we need to mush on to observe other matches.
+                        break;
+                    }
                 }
             }
             if at.is_end() {
@@ -207,18 +219,17 @@ impl<'r, I: Input> Nfa<'r, I> {
     fn step(
         &mut self,
         nlist: &mut Threads,
-        caps: &mut [Option<usize>],
+        search: &mut Search,
         thread_caps: &mut [Option<usize>],
         ip: usize,
         at: InputAt,
         at_next: InputAt,
     ) -> bool {
-        use inst::Inst::*;
-        match self.prog.insts[ip] {
-            Match => {
-                for (slot, val) in caps.iter_mut().zip(thread_caps.iter()) {
-                    *slot = *val;
-                }
+        use prog::Inst::*;
+        match self.prog[ip] {
+            Match(match_slot) => {
+                search.copy_captures_from(thread_caps);
+                search.set_match(match_slot);
                 true
             }
             Char(ref inst) => {
@@ -283,14 +294,14 @@ impl<'r, I: Input> Nfa<'r, I> {
         // traverse the set of states. We only push to the stack when we
         // absolutely need recursion (restoring captures or following a
         // branch).
-        use inst::Inst::*;
+        use prog::Inst::*;
         loop {
             // Don't visit states we've already added.
             if nlist.set.contains_ip(ip) {
                 return;
             }
             nlist.set.add(ip);
-            match self.prog.insts[ip] {
+            match self.prog[ip] {
                 EmptyLook(ref inst) => {
                     let prev = self.input.previous_char(at);
                     let next = self.input.next_char(at);
@@ -312,7 +323,7 @@ impl<'r, I: Input> Nfa<'r, I> {
                     self.stack.push(FollowEpsilon::IP(inst.goto2));
                     ip = inst.goto1;
                 }
-                Match | Char(_) | Ranges(_) | Bytes(_) => {
+                Match(_) | Char(_) | Ranges(_) | Bytes(_) => {
                     let mut t = &mut nlist.caps(ip);
                     for (slot, val) in t.iter_mut().zip(thread_caps.iter()) {
                         *slot = *val;
diff --git a/src/inst.rs b/src/prog.rs
similarity index 73%
rename from src/inst.rs
rename to src/prog.rs
index 302a428be8..99ae2e420d 100644
--- a/src/inst.rs
+++ b/src/prog.rs
@@ -1,57 +1,61 @@
+use std::collections::HashMap;
 use std::cmp::Ordering;
 use std::fmt;
 use std::ops::Deref;
 use std::mem;
 use std::slice;
+use std::sync::Arc;
 
+use backtrack::BacktrackCache;
 use char::Char;
-use literals::{BuildPrefixes, Literals};
+use dfa::DfaCache;
+use literals::Literals;
+use nfa::NfaCache;
+use pool::{Pool, PoolGuard};
 
 /// InstPtr represents the index of an instruction in a regex program.
 pub type InstPtr = usize;
 
-/// Insts is a sequence of instructions.
+/// Program is a sequence of instructions and various facts about thos
+/// instructions.
 #[derive(Clone)]
-pub struct Insts {
-    insts: Vec<Inst>,
-    bytes: bool,
-    reverse: bool,
-    byte_classes: Vec<u8>,
+pub struct Program {
+    pub insts: Vec<Inst>,
+    pub matches: Vec<InstPtr>,
+    pub captures: Vec<Option<String>>,
+    pub capture_name_idx: Arc<HashMap<String, usize>>,
+    pub start: InstPtr,
+    pub byte_classes: Vec<u8>,
+    pub is_bytes: bool,
+    pub is_dfa: bool,
+    pub is_reverse: bool,
+    pub is_anchored_start: bool,
+    pub is_anchored_end: bool,
+    pub prefixes: Literals,
+    pub cache: EngineCache,
 }
 
-impl Insts {
-    /// Create a new instruction sequence.
-    ///
-    /// If `bytes` is true, then this instruction sequence must run on raw
-    /// bytes. Otherwise, it is executed on Unicode codepoints.
-    ///
-    /// A Vec<Inst> can be created with the compiler.
-    pub fn new(
-        insts: Vec<Inst>,
-        bytes: bool,
-        reverse: bool,
-        byte_classes: Vec<u8>,
-    ) -> Self {
-        assert!(byte_classes.len() == 256);
-        Insts {
-            insts: insts,
-            bytes: bytes,
-            reverse: reverse,
-            byte_classes: byte_classes,
+impl Program {
+    /// Creates an empty instruction sequence. Fields are given default
+    /// values.
+    pub fn new() -> Self {
+        Program {
+            insts: vec![],
+            matches: vec![],
+            captures: vec![],
+            capture_name_idx: Arc::new(HashMap::new()),
+            start: 0,
+            byte_classes: vec![],
+            is_bytes: false,
+            is_dfa: false,
+            is_reverse: false,
+            is_anchored_start: false,
+            is_anchored_end: false,
+            prefixes: Literals::empty(),
+            cache: EngineCache::new(),
         }
     }
 
-    /// Returns true if and only if this instruction sequence must be executed
-    /// on byte strings.
-    pub fn is_bytes(&self) -> bool {
-        self.bytes
-    }
-
-    /// Returns true if and only if this instruction sequence is reversed.
-    pub fn is_reversed(&self) -> bool {
-        self.reverse
-    }
-
     /// If pc is an index to a no-op instruction (like Save), then return the
     /// next pc that is not a no-op instruction.
     pub fn skip(&self, mut pc: usize) -> usize {
@@ -63,73 +67,40 @@ impl Insts {
         }
     }
 
-    /// Returns a map from input byte to byte class. Each class represents
-    /// a set of bytes that are indistinguishable to the underlying
-    /// instructions.
-    ///
-    /// It is guaranteed to have length 256.
-    pub fn byte_classes(&self) -> &[u8] {
-        &self.byte_classes
-    }
-
-    /// Returns the location of the `Save(0)` instruction, which is present
-    /// in every program and always indicates the logical start of a match.
-    ///
-    /// (DFA programs compile a `.*?` into the program, preceding the `Save(0)`
-    /// instruction, to support unanchored matches. Generally, we want to
-    /// ignore that `.*?` when doing analysis, like extracting prefixes.)
-    pub fn start(&self) -> InstPtr {
-        for (i, inst) in self.iter().enumerate() {
-            match *inst {
-                Inst::Save(ref inst) if inst.slot == 0 => return i,
-                _ => {}
-            }
-        }
-        unreachable!()
-    }
-
     /// Return true if and only if an execution engine at instruction `pc` will
     /// always lead to a match.
     pub fn leads_to_match(&self, pc: usize) -> bool {
+        if self.matches.len() > 1 {
+            // If we have a regex set, then we have more than one ending
+            // state, so leading to one of those states is generally
+            // meaningless.
+            return false;
+        }
         match self[self.skip(pc)] {
-            Inst::Match => true,
+            Inst::Match(_) => true,
             _ => false,
         }
     }
 
-    /// Return true if and only if the regex is anchored at the start of
-    /// search text.
-    pub fn anchored_begin(&self) -> bool {
-        match self.get(1) {
-            Some(&Inst::EmptyLook(ref inst)) => {
-                inst.look == EmptyLook::StartText
-            }
-            _ => false,
-        }
+    /// Returns true if the current configuration demands that an implicit
+    /// `.*?` be prepended to the instruction sequence.
+    pub fn needs_dotstar(&self) -> bool {
+        self.is_dfa && !self.is_reverse && !self.is_anchored_start
     }
 
-    /// Return true if and only if the regex is anchored at the end of
-    /// search text.
-    pub fn anchored_end(&self) -> bool {
-        match self.get(self.len() - 3) {
-            Some(&Inst::EmptyLook(ref inst)) => {
-                inst.look == EmptyLook::EndText
-            }
-            _ => false,
-        }
+    /// Retrieve cached state for NFA execution.
+    pub fn cache_nfa(&self) -> PoolGuard<Box<NfaCache>> {
+        self.cache.nfa.get()
     }
 
-    /// Build a matching engine for all prefix literals in this instruction
-    /// sequence.
-    ///
-    /// If there are no prefix literals (or there are too many), then a
-    /// matching engine that never matches is returned.
-    pub fn prefix_matcher(&self) -> Literals {
-        if self.is_bytes() || self.is_reversed() {
-            Literals::empty()
-        } else {
-            BuildPrefixes::new(self).literals().into_matcher()
-        }
+    /// Retrieve cached state for backtracking execution.
+    pub fn cache_backtrack(&self) -> PoolGuard<Box<BacktrackCache>> {
+        self.cache.backtrack.get()
+    }
+
+    /// Retrieve cached state for DFA execution.
+    pub fn cache_dfa(&self) -> PoolGuard<Box<DfaCache>> {
+        self.cache.dfa.get()
     }
 
     /// Return the approximate heap usage of this instruction sequence in
@@ -142,7 +113,7 @@ impl Insts {
     }
 }
 
-impl Deref for Insts {
+impl Deref for Program {
     type Target = [Inst];
 
     fn deref(&self) -> &Self::Target {
@@ -150,7 +121,7 @@ impl Deref for Insts {
     }
 }
 
-impl fmt::Debug for Insts {
+impl fmt::Debug for Program {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         use self::Inst::*;
 
@@ -171,7 +142,9 @@ impl fmt::Debug for Insts {
         try!(writeln!(f, "--------------------------------"));
         for (pc, inst) in self.iter().enumerate() {
             match *inst {
-                Match => try!(writeln!(f, "{:04} Match", pc)),
+                Match(slot) => {
+                    try!(writeln!(f, "{:04} Match({:?})", pc, slot))
+                }
                 Save(ref inst) => {
                     let s = format!("{:04} Save({})", pc, inst.slot);
                     try!(writeln!(f, "{}", with_goto(pc, inst.goto, s)));
@@ -215,12 +188,45 @@ impl fmt::Debug for Insts {
     }
 }
 
-impl<'a> IntoIterator for &'a Insts {
+impl<'a> IntoIterator for &'a Program {
     type Item = &'a Inst;
     type IntoIter = slice::Iter<'a, Inst>;
     fn into_iter(self) -> Self::IntoIter { self.iter() }
 }
 
+/// EngineCache maintains reusable allocations for each matching engine
+/// available to a particular program.
+///
+/// The allocations are created lazily, so we don't pay for caches that
+/// aren't used.
+///
+/// N.B. These are all behind a pointer because it's fewer bytes to memcpy.
+/// These caches are pushed/popped from the pool a lot, and a smaller
+/// footprint can have an impact on matching small inputs. See, for example,
+/// the hard_32 benchmark.
+#[derive(Debug)]
+pub struct EngineCache {
+    nfa: Pool<Box<NfaCache>>,
+    backtrack: Pool<Box<BacktrackCache>>,
+    dfa: Pool<Box<DfaCache>>,
+}
+
+impl EngineCache {
+    fn new() -> Self {
+        EngineCache {
+            nfa: Pool::new(Box::new(|| Box::new(NfaCache::new()))),
+            backtrack: Pool::new(Box::new(|| Box::new(BacktrackCache::new()))),
+            dfa: Pool::new(Box::new(|| Box::new(DfaCache::new()))),
+        }
+    }
+}
+
+impl Clone for EngineCache {
+    fn clone(&self) -> EngineCache {
+        EngineCache::new()
+    }
+}
+
 /// Inst is an instruction code in a Regex program.
 ///
 /// Regrettably, a regex program either contains Unicode codepoint
@@ -241,7 +247,13 @@ impl<'a> IntoIterator for &'a Insts {
 #[derive(Clone, Debug)]
 pub enum Inst {
     /// Match indicates that the program has reached a match state.
-    Match,
+    ///
+    /// The number in the match corresponds to the Nth logical regular
+    /// expression in this program. This index is always 0 for normal regex
+    /// programs. Values greater than 0 appear when compiling regex sets, and
+    /// each match instruction gets its own unique value. The value corresponds
+    /// to the Nth regex in the set.
+    Match(usize),
     /// Save causes the program to save the current location of the input in
     /// the slot indicated by InstSave.
     Save(InstSave),
diff --git a/src/program.rs b/src/program.rs
deleted file mode 100644
index 9b5dcddc17..0000000000
--- a/src/program.rs
+++ /dev/null
@@ -1,226 +0,0 @@
-// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-use std::collections::HashMap;
-use std::sync::Arc;
-
-use syntax;
-
-use backtrack::BacktrackCache;
-use compile::{Compiled, Compiler};
-use dfa::DfaCache;
-use inst::Insts;
-use nfa::NfaCache;
-use pool::{Pool, PoolGuard};
-use literals::Literals;
-use Error;
-
-
-/// Program represents a compiled regular expression. Once an expression is
-/// compiled, its representation is immutable and will never change.
-/// (Well, almost. In fact, the matching engines cache state that can be
-/// reused on subsequent searches. But this is interior mutability that
-/// shouldn't be observable by the caller.)
-///
-/// A compiled regular expression contains quite a bit more than justs its
-/// opcodes. It also contains capture group names, literal prefixes, the
-/// original regular expression string and some facts about the expression
-/// (like whether it is anchored to the beginning or end of the search text).
-#[derive(Clone, Debug)]
-pub struct Program {
-    /// The original regular expression string.
-    pub original: String,
-    /// A sequence of instructions.
-    pub insts: Insts,
-    /// The sequence of capture group names. There is an entry for each capture
-    /// group index and a name exists only if the capture group is named.
-    pub cap_names: Vec<Option<String>>,
-    /// The map of named capture groups. The keys are group names and
-    /// the values are group indices.
-    pub named_groups: Arc<HashMap<String, usize>>,
-    /// If the regular expression requires a literal prefix in order to have a
-    /// match, that prefix is stored here as a DFA.
-    pub prefixes: Literals,
-    /// True iff program is anchored at the beginning.
-    pub anchored_begin: bool,
-    /// True iff program is anchored at the end.
-    pub anchored_end: bool,
-    /// Cached reusable state for matching engines.
-    pub cache: EngineCache,
-}
-
-/// A builder for compiling a regular expression program.
-pub struct ProgramBuilder {
-    re: String,
-    compiler: Compiler,
-}
-
-impl ProgramBuilder {
-    /// Create a new program builder for the given regular expression.
-    ///
-    /// Afer new is called, it is legal to call compile immediately. Default
-    /// values for other knobs are set automatically.
-    pub fn new(re: &str) -> Self {
-        ProgramBuilder {
-            re: re.to_owned(),
-            compiler: Compiler::new(),
-        }
-    }
-
-    /// Set a size limit that the compiler uses to limit the total number of
-    /// bytes occupied by the opcodes for this regex.
-    pub fn size_limit(mut self, size_limit: usize) -> Self {
-        self.compiler = self.compiler.size_limit(size_limit);
-        self
-    }
-
-    /// Enable compilation of a byte based program.
-    ///
-    /// By default, programs operate on Unicode codepoints.
-    pub fn bytes(mut self, yes: bool) -> Self {
-        self.compiler = self.compiler.bytes(yes);
-        self
-    }
-
-    /// Enable compilation of a byte based DFA program.
-    ///
-    /// This does instruct the compiler to compile a byte based program, but
-    /// it also does other things that are specifically required by the lazy
-    /// DFA, such as adding a `.*?` before the first capture save for
-    /// unanchored regular expressions.
-    pub fn dfa(mut self, yes: bool) -> Self {
-        self.compiler = self.compiler.dfa(yes);
-        self
-    }
-
-    /// Compile the regular expression in reverse.
-    ///
-    /// This is generally only used by the lazy DFA to find the start location
-    /// of a match.
-    pub fn reverse(mut self, yes: bool) -> Self {
-        self.compiler = self.compiler.reverse(yes);
-        self
-    }
-
-    /// Compile the given regular expression under the given configuration.
-    ///
-    /// If the regular expression could not be compiled (e.g., it is too big),
-    /// then return an error.
-    pub fn compile(self) -> Result<Program, Error> {
-        let expr = try!(syntax::Expr::parse(&self.re));
-        let Compiled { insts, cap_names } = try!(self.compiler.compile(&expr));
-        let (prefixes, anchored_begin, anchored_end) = (
-            insts.prefix_matcher(),
-            insts.anchored_begin(),
-            insts.anchored_end(),
-        );
-        let mut named_groups = HashMap::new();
-        for (i, name) in cap_names.iter().enumerate() {
-            if let Some(ref name) = *name {
-                named_groups.insert(name.to_owned(), i);
-            }
-        }
-        Ok(Program {
-            original: self.re,
-            insts: insts,
-            cap_names: cap_names,
-            named_groups: Arc::new(named_groups),
-            prefixes: prefixes,
-            anchored_begin: anchored_begin,
-            anchored_end: anchored_end,
-            cache: EngineCache::new(),
-        })
-    }
-}
-
-impl Program {
-    /// Returns true if the set of literal prefixes implies a match and
-    /// preserves leftmost first matching semantics.
-    ///
-    /// If this returns true, then it is possible to avoid running any of the
-    /// NFA or DFA based matching engines entirely.
-    pub fn is_prefix_match(&self) -> bool {
-        self.prefixes.at_match()
-    }
-
-    /// Returns true if the underlying program is reversed.
-    pub fn is_reversed(&self) -> bool {
-        self.insts.is_reversed()
-    }
-
-    /// Returns the total number of capture groups in the regular expression.
-    /// This includes the zeroth capture.
-    pub fn num_captures(&self) -> usize {
-        self.cap_names.len()
-    }
-
-    /// Allocate new capture groups.
-    pub fn alloc_captures(&self) -> Vec<Option<usize>> {
-        vec![None; 2 * self.num_captures()]
-    }
-
-    /// Retrieve cached state for NFA execution.
-    pub fn cache_nfa(&self) -> PoolGuard<Box<NfaCache>> {
-        self.cache.nfa.get()
-    }
-
-    /// Retrieve cached state for backtracking execution.
-    pub fn cache_backtrack(&self) -> PoolGuard<Box<BacktrackCache>> {
-        self.cache.backtrack.get()
-    }
-
-    /// Retrieve cached state for DFA execution.
-    pub fn cache_dfa(&self) -> PoolGuard<Box<DfaCache>> {
-        self.cache.dfa.get()
-    }
-
-    /// Return the approximate heap usage of this Program in bytes.
-    ///
-    /// Note that this does not include cached engine data.
-    pub fn approximate_size(&self) -> usize {
-        // ignore capture names
-        self.original.len()
-        + self.insts.approximate_size()
-        + self.prefixes.approximate_size()
-    }
-}
-
-/// EngineCache maintains reusable allocations for each matching engine
-/// available to a particular program.
-///
-/// The allocations are created lazily, so we don't pay for caches that
-/// aren't used.
-///
-/// N.B. These are all behind a pointer because it's fewer bytes to memcpy.
-/// These caches are pushed/popped from the pool a lot, and a smaller
-/// footprint can have an impact on matching small inputs. See, for example,
-/// the hard_32 benchmark.
-#[derive(Debug)]
-pub struct EngineCache {
-    nfa: Pool<Box<NfaCache>>,
-    backtrack: Pool<Box<BacktrackCache>>,
-    dfa: Pool<Box<DfaCache>>,
-}
-
-impl EngineCache {
-    fn new() -> Self {
-        EngineCache {
-            nfa: Pool::new(Box::new(|| Box::new(NfaCache::new()))),
-            backtrack: Pool::new(Box::new(|| Box::new(BacktrackCache::new()))),
-            dfa: Pool::new(Box::new(|| Box::new(DfaCache::new()))),
-        }
-    }
-}
-
-impl Clone for EngineCache {
-    fn clone(&self) -> EngineCache {
-        EngineCache::new()
-    }
-}
diff --git a/src/re.rs b/src/re.rs
index 9ce1ef4561..624c74c111 100644
--- a/src/re.rs
+++ b/src/re.rs
@@ -17,7 +17,7 @@ use std::str::pattern::{Pattern, Searcher, SearchStep};
 use std::str::FromStr;
 use std::sync::Arc;
 
-use exec::{Exec, ExecBuilder};
+use exec::{CaptureSlots, Exec, ExecBuilder, Search};
 use syntax;
 
 const REPLACE_EXPAND: &'static str = r"(?x)
@@ -30,9 +30,6 @@ const REPLACE_EXPAND: &'static str = r"(?x)
   )
 ";
 
-/// Type alias for representing capture indices.
-pub type CaptureIdxs = [Option<usize>];
-
 /// Escapes all regular expression meta characters in `text`.
 ///
 /// The string returned may be safely used as a literal in a regular
@@ -67,6 +64,8 @@ pub enum Error {
     /// The compiled program exceeded the set size limit.
     /// The argument is the size limit imposed.
     CompiledTooBig(usize),
+    /// An invalid set is a regex set with fewer than 2 regular expressions.
+    InvalidSet,
     /// Hints that destructuring should not be exhaustive.
     ///
     /// This enum may grow additional variants, so this makes sure clients
@@ -81,6 +80,9 @@ impl ::std::error::Error for Error {
         match *self {
             Error::Syntax(ref err) => err.description(),
             Error::CompiledTooBig(_) => "compiled program too big",
+            Error::InvalidSet => {
+                "sets must contain 2 or more regular expressions"
+            }
             Error::__Nonexhaustive => unreachable!(),
         }
     }
@@ -101,6 +103,9 @@ impl fmt::Display for Error {
                 write!(f, "Compiled regex exceeds size limit of {} bytes.",
                        limit)
             }
+            Error::InvalidSet => {
+                write!(f, "Sets must contain 2 or more regular expressions.")
+            }
             Error::__Nonexhaustive => unreachable!(),
         }
     }
@@ -188,7 +193,7 @@ pub struct ExNative {
     #[doc(hidden)]
     pub groups: &'static &'static [(&'static str, usize)],
     #[doc(hidden)]
-    pub prog: fn(&mut CaptureIdxs, &str, usize) -> bool,
+    pub prog: fn(CaptureSlots, &str, usize) -> bool,
 }
 
 impl Copy for ExNative {}
@@ -634,7 +639,7 @@ impl Regex {
     /// Returns the original string of this regex.
     pub fn as_str(&self) -> &str {
         match *self {
-            Regex::Dynamic(ref exec) => exec.regex_str(),
+            Regex::Dynamic(ref exec) => &exec.regex_strings()[0],
             Regex::Native(ExNative { ref original, .. }) => original,
         }
     }
@@ -644,7 +649,7 @@ impl Regex {
         match *self {
             Regex::Native(ref n) => CaptureNames::Native(n.names.iter()),
             Regex::Dynamic(ref d) => {
-                CaptureNames::Dynamic(d.capture_names().iter())
+                CaptureNames::Dynamic(d.captures().iter())
             }
         }
     }
@@ -653,14 +658,14 @@ impl Regex {
     pub fn captures_len(&self) -> usize {
         match *self {
             Regex::Native(ref n) => n.names.len(),
-            Regex::Dynamic(ref d) => d.capture_names().len()
+            Regex::Dynamic(ref d) => d.captures().len()
         }
     }
 
     fn alloc_captures(&self) -> Vec<Option<usize>> {
         match *self {
             Regex::Native(ref n) => vec![None; 2 * n.names.len()],
-            Regex::Dynamic(ref d) => d.alloc_captures(),
+            Regex::Dynamic(ref d) => vec![None; 2 * d.captures().len()],
         }
     }
 }
@@ -811,7 +816,6 @@ impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
 }
 
 enum NamedGroups {
-    Empty,
     Native(&'static [(&'static str, usize)]),
     Dynamic(Arc<HashMap<String, usize>>),
 }
@@ -819,22 +823,17 @@ enum NamedGroups {
 impl NamedGroups {
     fn from_regex(regex: &Regex) -> NamedGroups {
         match *regex {
-            Regex::Native(ExNative { ref groups, .. }) =>
-                NamedGroups::Native(groups),
+            Regex::Native(ExNative { ref groups, .. }) => {
+                NamedGroups::Native(groups)
+            }
             Regex::Dynamic(ref exec) => {
-                let groups = exec.named_groups();
-                if groups.is_empty() {
-                    NamedGroups::Empty
-                } else {
-                    NamedGroups::Dynamic(groups.clone())
-                }
+                NamedGroups::Dynamic(exec.capture_name_idx().clone())
             }
         }
     }
 
     fn pos(&self, name: &str) -> Option<usize> {
         match *self {
-            NamedGroups::Empty => None,
             NamedGroups::Native(groups) => {
                 groups.binary_search_by(|&(n, _)| n.cmp(name))
                       .ok().map(|i| groups[i].1)
@@ -847,7 +846,6 @@ impl NamedGroups {
 
     fn iter<'n>(&'n self) -> NamedGroupsIter<'n> {
         match *self {
-            NamedGroups::Empty => NamedGroupsIter::Empty,
             NamedGroups::Native(g) => NamedGroupsIter::Native(g.iter()),
             NamedGroups::Dynamic(ref g) => NamedGroupsIter::Dynamic(g.iter()),
         }
@@ -1233,10 +1231,16 @@ unsafe impl<'r, 't> Searcher<'t> for RegexSearcher<'r, 't> {
     }
 }
 
-fn exec(re: &Regex, caps: &mut CaptureIdxs, text: &str, start: usize) -> bool {
+fn exec(re: &Regex, caps: CaptureSlots, text: &str, start: usize) -> bool {
     match *re {
         Regex::Native(ExNative { ref prog, .. }) => (*prog)(caps, text, start),
-        Regex::Dynamic(ref prog) => prog.exec(caps, text, start),
+        Regex::Dynamic(ref prog) => {
+            let mut search = Search {
+                captures: caps,
+                matches: &mut [false],
+            };
+            prog.exec(&mut search, text, start)
+        }
     }
 }
 
diff --git a/src/set.rs b/src/set.rs
new file mode 100644
index 0000000000..fefe009eca
--- /dev/null
+++ b/src/set.rs
@@ -0,0 +1,315 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::fmt;
+use std::iter;
+use std::slice;
+use std::vec;
+
+use syntax::Expr;
+
+use exec::{Exec, ExecBuilder, Search};
+use Error;
+
+/// Match multiple (possibly overlapping) regular expressions in a single scan.
+///
+/// A regex set corresponds to the union of two or more regular expressions.
+/// That is, a regex set will match text where at least one of its
+/// constituent regular expressions matches. A regex set as its formulated here
+/// provides a touch more power: it will also report *which* regular
+/// expressions in the set match. Indeed, this is the key difference between
+/// regex sets and a single `Regex` with many alternates, since only one
+/// alternate can match at a time.
+///
+/// For example, consider regular expressions to match email addresses and
+/// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a
+/// regex set is constructed from those regexes, then searching the text
+/// `foo@example.com` will report both regexes as matching. Of course, one
+/// could accomplish this by compiling each regex on its own and doing two
+/// searches over the text. The key advantage of using a regex set is that it
+/// will report the matching regexes using a *single pass through the text*.
+/// If one has hundreds or thousands of regexes to match repeatedly (like a URL
+/// router for a complex web application or a user agent matcher), then a regex
+/// set can realize huge performance gains.
+///
+/// # Example
+///
+/// This shows how the above two regexes (for matching email addresses and
+/// domains) might work:
+///
+/// ```rust
+/// use regex::RegexSet;
+///
+/// let set = RegexSet::new(&[
+///     r"[a-z]+@[a-z]+\.(com|org|net)",
+///     r"[a-z]+\.(com|org|net)",
+/// ]).unwrap();
+///
+/// // Ask whether any regexes in the set match.
+/// assert!(set.is_match("foo@example.com"));
+///
+/// // Identify which regexes in the set match.
+/// let matches: Vec<_> = set.matches("foo@example.com").into_iter().collect();
+/// assert_eq!(vec![0, 1], matches);
+///
+/// // Try again, but with text that only matches one of the regexes.
+/// let matches: Vec<_> = set.matches("example.com").into_iter().collect();
+/// assert_eq!(vec![1], matches);
+///
+/// // Try again, but with text that doesn't match any regex in the set.
+/// let matches: Vec<_> = set.matches("example").into_iter().collect();
+/// assert!(matches.is_empty());
+/// ```
+///
+/// Note that it would be possible to adapt the above example to using `Regex`
+/// with an expression like:
+///
+/// ```ignore
+/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net))
+/// ```
+///
+/// After a match, one could then inspect the capture groups to figure out
+/// which alternates matched. The problem is that it is hard to make this
+/// approach scale when there are many regexes since the overlap between each
+/// alternate isn't always obvious to reason about.
+///
+/// # Limitations
+///
+/// Regex sets are limited to answering the following two questions:
+///
+/// 1. Does any regex in the set match?
+/// 2. If so, which regexes in the set match?
+///
+/// As with the main `Regex` type, it is cheaper to ask (1) instead of (2)
+/// since the matching engines can stop after the first match is found.
+///
+/// Other features like finding the location of successive matches or their
+/// sub-captures aren't supported. If you need this functionality, the
+/// recommended approach is to compile each regex in the set independently and
+/// selectively match them based on which regexes in the set matched.
+///
+/// # Performance
+///
+/// A `RegexSet` has the same performance characteristics as `Regex`. Namely,
+/// search takes `O(mn)` time, where `m` is proportional to the size of the
+/// regex set and `n` is proportional to the length of the search text.
+#[derive(Clone)]
+pub struct RegexSet(Exec);
+
+impl RegexSet {
+    /// Create a new regex set with the given regular expressions.
+    ///
+    /// This takes an iterator of `S`, where `S` is something that can produce
+    /// a `&str`. If any of the strings in the iterator are not valid regular
+    /// expressions, then an error is returned.
+    ///
+    /// # Example
+    ///
+    /// Create a new regex set from an iterator of strings:
+    ///
+    /// ```rust
+    /// use regex::RegexSet;
+    ///
+    /// let set = RegexSet::new(&[r"\w+", r"\d+"]).unwrap();
+    /// assert!(set.is_match("foo"));
+    /// ```
+    pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error>
+            where S: AsRef<str>, I: IntoIterator<Item=S> {
+        let exec = try!(ExecBuilder::new_many(exprs).build());
+        if exec.regex_strings().len() < 2 {
+            return Err(Error::InvalidSet);
+        }
+        Ok(RegexSet(exec))
+    }
+
+    /// Returns true if and only if one of the regexes in this set matches
+    /// the text given.
+    ///
+    /// This method should be preferred if you only need to test whether any
+    /// of the regexes in the set should match, but don't care about *which*
+    /// regexes matched. This is because the underlying matching engine will
+    /// quit immediately after seeing the first match instead of continuing to
+    /// find all matches.
+    ///
+    /// Note that as with searches using `Regex`, the expression is unanchored
+    /// by default. That is, if the regex does not start with `^` or `\A`, or
+    /// end with `$` or `\z`, then it is permitted to match anywhere in the
+    /// text.
+    ///
+    /// # Example
+    ///
+    /// Tests whether a set matches some text:
+    ///
+    /// ```rust
+    /// use regex::RegexSet;
+    ///
+    /// let set = RegexSet::new(&[r"\w+", r"\d+"]).unwrap();
+    /// assert!(set.is_match("foo"));
+    /// assert!(!set.is_match("☃"));
+    /// ```
+    pub fn is_match(&self, text: &str) -> bool {
+        let mut search = Search { captures: &mut [], matches: &mut [] };
+        self.0.exec(&mut search, text, 0)
+    }
+
+    /// Returns the set of regular expressions that match in the given text.
+    ///
+    /// The set returned contains the index of each regular expression that
+    /// matches in the given text. The index is in correspondence with the
+    /// order of regular expressions given to `RegexSet`'s constructor.
+    ///
+    /// The set can also be used to iterate over the matched indices.
+    ///
+    /// Note that as with searches using `Regex`, the expression is unanchored
+    /// by default. That is, if the regex does not start with `^` or `\A`, or
+    /// end with `$` or `\z`, then it is permitted to match anywhere in the
+    /// text.
+    ///
+    /// # Example
+    ///
+    /// Tests which regular expressions match the given text:
+    ///
+    /// ```rust
+    /// use regex::RegexSet;
+    ///
+    /// let set = RegexSet::new(&[
+    ///     r"\w+",
+    ///     r"\d+",
+    ///     r"\pL+",
+    ///     r"foo",
+    ///     r"bar",
+    ///     r"barfoo",
+    ///     r"foobar",
+    /// ]).unwrap();
+    /// let matches: Vec<_> = set.matches("foobar").into_iter().collect();
+    /// assert_eq!(matches, vec![0, 2, 3, 4, 6]);
+    ///
+    /// // You can also test whether a particular regex matched:
+    /// let matches = set.matches("foobar");
+    /// assert!(!matches.matched(5));
+    /// assert!(matches.matched(6));
+    /// ```
+    pub fn matches(&self, text: &str) -> SetMatches {
+        let mut matches = vec![false; self.0.matches().len()];
+        let matched_any = {
+            let mut search = Search {
+                captures: &mut [],
+                matches: &mut matches
+            };
+            self.0.exec(&mut search, text, 0)
+        };
+        SetMatches {
+            matched_any: matched_any,
+            matches: matches,
+        }
+    }
+
+    /// Returns the total number of regular expressions in this set.
+    pub fn len(&self) -> usize {
+        self.0.regex_strings().len()
+    }
+}
+
+impl fmt::Debug for RegexSet {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "RegexSet({:?})", self.0.regex_strings())
+    }
+}
+
+/// A set of matches returned by a regex set.
+#[derive(Clone, Debug)]
+pub struct SetMatches {
+    matched_any: bool,
+    matches: Vec<bool>,
+}
+
+impl SetMatches {
+    /// Whether this set contains any matches.
+    pub fn matched_any(&self) -> bool {
+        self.matched_any
+    }
+
+    /// Whether the regex at the given index matched.
+    ///
+    /// The index for a regex is determined by its insertion order upon the
+    /// initial construction of a `RegexSet`, starting at `0`.
+    ///
+    /// # Panics
+    ///
+    /// If `regex_index` is greater than or equal to `self.len()`.
+    pub fn matched(&self, regex_index: usize) -> bool {
+        self.matches[regex_index]
+    }
+
+    /// The total number of regexes in the set that created these matches.
+    pub fn len(&self) -> usize {
+        self.matches.len()
+    }
+
+    /// Returns an iterator over indexes in the regex that matched.
+    pub fn iter(&self) -> SetMatchesIter {
+        SetMatchesIter((&*self.matches).into_iter().enumerate())
+    }
+}
+
+impl IntoIterator for SetMatches {
+    type IntoIter = SetMatchesIntoIter;
+    type Item = usize;
+
+    fn into_iter(self) -> Self::IntoIter {
+        SetMatchesIntoIter(self.matches.into_iter().enumerate())
+    }
+}
+
+impl<'a> IntoIterator for &'a SetMatches {
+    type IntoIter = SetMatchesIter<'a>;
+    type Item = usize;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.iter()
+    }
+}
+
+/// An owned iterator over the set of matches from a regex set.
+pub struct SetMatchesIntoIter(iter::Enumerate<vec::IntoIter<bool>>);
+
+impl Iterator for SetMatchesIntoIter {
+    type Item = usize;
+
+    fn next(&mut self) -> Option<usize> {
+        loop {
+            match self.0.next() {
+                None => return None,
+                Some((_, false)) => {}
+                Some((i, true)) => return Some(i),
+            }
+        }
+    }
+}
+
+/// A borrowed iterator over the set of matches from a regex set.
+///
+/// The lifetime `'a` refers to the lifetime of a `SetMatches` value.
+#[derive(Clone)]
+pub struct SetMatchesIter<'a>(iter::Enumerate<slice::Iter<'a, bool>>);
+
+impl<'a> Iterator for SetMatchesIter<'a> {
+    type Item = usize;
+
+    fn next(&mut self) -> Option<usize> {
+        loop {
+            match self.0.next() {
+                None => return None,
+                Some((_, &false)) => {}
+                Some((i, &true)) => return Some(i),
+            }
+        }
+    }
+}
diff --git a/tests/test_dynamic.rs b/tests/test_dynamic.rs
index 98837d70dd..18596ad30b 100644
--- a/tests/test_dynamic.rs
+++ b/tests/test_dynamic.rs
@@ -21,8 +21,15 @@ extern crate regex;
 // regex and the input. Other dynamic tests explicitly set the engine to use.
 macro_rules! regex {
     ($re:expr) => {{
-        use regex::internal::ExecBuilder;
-        ExecBuilder::new($re).build().unwrap().into_regex()
+        use regex::Regex;
+        Regex::new($re).unwrap()
+    }}
+}
+
+macro_rules! regex_set {
+    ($res:expr) => {{
+        use regex::RegexSet;
+        RegexSet::new($res).unwrap()
     }}
 }
 
@@ -32,6 +39,7 @@ macro_rules! searcher_expr { ($e:expr) => ($e) }
 macro_rules! searcher_expr { ($e:expr) => ({}) }
 
 mod tests;
+mod tests_set;
 
 // Regression test for https://github.com/rust-lang/regex/issues/98
 //
@@ -42,3 +50,23 @@ fn regression_many_repeat_stack_overflow() {
     let re = regex!("^.{1,2500}");
     assert_eq!(re.find("a"), Some((0, 1)));
 }
+
+#[test]
+fn set_empty() {
+    use regex::{Error, RegexSet};
+    let err = RegexSet::new::<&[String], &String>(&[]).unwrap_err();
+    match err {
+        Error::InvalidSet => {}
+        err => panic!("expected Error::InvalidSet but got {:?}", err),
+    }
+}
+
+#[test]
+fn set_one() {
+    use regex::{Error, RegexSet};
+    let err = RegexSet::new(&["foo"]).unwrap_err();
+    match err {
+        Error::InvalidSet => {}
+        err => panic!("expected Error::InvalidSet but got {:?}", err),
+    }
+}
diff --git a/tests/tests_set.rs b/tests/tests_set.rs
new file mode 100644
index 0000000000..a0712c89ee
--- /dev/null
+++ b/tests/tests_set.rs
@@ -0,0 +1,45 @@
+macro_rules! mat {
+    ($name:ident, $res:expr, $text:expr, $($match_index:expr),*) => {
+        #[test]
+        fn $name() {
+            let set = regex_set!($res);
+            assert!(set.is_match($text));
+            let expected = vec![$($match_index),*];
+            let matches = set.matches($text);
+            assert!(matches.matched_any());
+            let got: Vec<_> = matches.into_iter().collect();
+            assert_eq!(expected, got);
+        }
+    }
+}
+
+mat!(set1, &["a", "a"], "a", 0, 1);
+mat!(set2, &["a", "a"], "ba", 0, 1);
+mat!(set3, &["a", "b"], "a", 0);
+mat!(set4, &["a", "b"], "b", 1);
+mat!(set5, &["a|b", "b|a"], "b", 0, 1);
+mat!(set6, &["foo", "oo"], "foo", 0, 1);
+mat!(set7, &["^foo", "bar$"], "foo", 0);
+mat!(set8, &["^foo", "bar$"], "foo bar", 0, 1);
+mat!(set9, &["^foo", "bar$"], "bar", 1);
+mat!(set10, &[r"[a-z]+$", "foo"], "01234 foo", 0, 1);
+mat!(set11, &[r"[a-z]+$", "foo"], "foo 01234", 1);
+mat!(set12, &[r".*?", "a"], "zzzzzza", 0, 1);
+mat!(set13, &[r".*", "a"], "zzzzzza", 0, 1);
+mat!(set14, &[r".*", "a"], "zzzzzz", 0);
+
+macro_rules! nomat {
+    ($name:ident, $res:expr, $text:expr) => {
+        #[test]
+        fn $name() {
+            let set = regex_set!($res);
+            assert!(!set.is_match($text));
+            let matches = set.matches($text);
+            assert!(!matches.matched_any());
+            assert_eq!(0, matches.into_iter().count());
+        }
+    }
+}
+
+nomat!(nset1, &["a", "a"], "b");
+nomat!(nset2, &["^foo", "bar$"], "bar foo");