From 64106c4d3d4ddba8c7bc2af75376e6d3d3d75601 Mon Sep 17 00:00:00 2001 From: Date: Mon, 29 Jun 2015 20:16:15 +0000 Subject: Update documentation --- src/regex/char.rs.html | 311 ++++++ src/regex/input.rs.html | 325 ++++++ src/regex/lib.rs.html | 959 ++++++++++++++++++ src/regex/program.rs.html | 1057 ++++++++++++++++++++ src/regex/re.rs.html | 2393 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 5045 insertions(+) create mode 100644 src/regex/char.rs.html create mode 100644 src/regex/input.rs.html create mode 100644 src/regex/lib.rs.html create mode 100644 src/regex/program.rs.html create mode 100644 src/regex/re.rs.html (limited to 'src/regex') diff --git a/src/regex/char.rs.html b/src/regex/char.rs.html new file mode 100644 index 0000000..f830f03 --- /dev/null +++ b/src/regex/char.rs.html @@ -0,0 +1,311 @@ + + + + + + + + + + char.rs.html -- source + + + + + + + + + + + + + + + +
  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::char;
+use std::cmp::Ordering;
+use std::fmt;
+use std::u32;
+
+use syntax;
+
+/// An inline representation of `Option<char>`.
+///
+/// This eliminates the need to do case analysis on `Option<char>` to determine
+/// ordinality with other characters.
+///
+/// (The `Option<char>` is not related to encoding. Instead, it is used in the
+/// matching engines to represent the beginning and ending boundaries of the
+/// search text.)
+#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
+pub struct Char(u32);
+
+impl fmt::Debug for Char {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match char::from_u32(self.0) {
+            None => write!(f, "Empty"),
+            Some(c) => write!(f, "{:?}", c),
+        }
+    }
+}
+
+impl Char {
+    /// Returns true iff the character is absent.
+    #[inline]
+    pub fn is_none(self) -> bool { self.0 == u32::MAX }
+
+    /// Returns the length of the character's UTF-8 encoding.
+    ///
+    /// If the character is absent, then `0` is returned.
+    #[inline]
+    pub fn len_utf8(self) -> usize {
+        char::from_u32(self.0).map(|c| c.len_utf8()).unwrap_or(0)
+    }
+
+    /// Returns the simple case folding of this character.
+    ///
+    /// If the character is absent, then absence is returned.
+    pub fn case_fold(self) -> Char {
+        char::from_u32(self.0).map(syntax::simple_case_fold).into()
+    }
+
+    /// Returns true iff the character is a word character.
+    ///
+    /// If the character is absent, then false is returned.
+    pub fn is_word_char(self) -> bool {
+        char::from_u32(self.0).map(syntax::is_word_char).unwrap_or(false)
+    }
+
+    /// Converts the character to a real primitive `char`.
+    ///
+    /// If the character is absent, then `None` is returned.
+    pub fn as_char(self) -> Option<char> {
+        // This is only used in the `regex!` macro because it expands char
+        // classes into `match` expressions (instead of binary search).
+        char::from_u32(self.0)
+    }
+}
+
+impl From<char> for Char {
+    fn from(c: char) -> Char { Char(c as u32) }
+}
+
+impl From<Option<char>> for Char {
+    fn from(c: Option<char>) -> Char {
+        c.map(|c| c.into()).unwrap_or(Char(u32::MAX))
+    }
+}
+
+impl PartialEq<char> for Char {
+    #[inline]
+    fn eq(&self, other: &char) -> bool { self.0 == *other as u32 }
+}
+
+impl PartialEq<Char> for char {
+    #[inline]
+    fn eq(&self, other: &Char) -> bool { *self as u32 == other.0 }
+}
+
+impl PartialOrd<char> for Char {
+    #[inline]
+    fn partial_cmp(&self, other: &char) -> Option<Ordering> {
+        self.0.partial_cmp(&(*other as u32))
+    }
+}
+
+impl PartialOrd<Char> for char {
+    #[inline]
+    fn partial_cmp(&self, other: &Char) -> Option<Ordering> {
+        (*self as u32).partial_cmp(&other.0)
+    }
+}
+
+
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/regex/input.rs.html b/src/regex/input.rs.html new file mode 100644 index 0000000..dfe5571 --- /dev/null +++ b/src/regex/input.rs.html @@ -0,0 +1,325 @@ + + + + + + + + + + input.rs.html -- source + + + + + + + + + + + + + + + +
  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::ops;
+
+use char::Char;
+use prefix::Prefix;
+
+/// Represents a location in the input.
+#[derive(Clone, Copy, Debug)]
+pub struct InputAt {
+    pos: usize,
+    c: Char,
+    len: usize,
+}
+
+impl InputAt {
+    /// Returns true iff this position is at the beginning of the input.
+    pub fn is_beginning(&self) -> bool {
+        self.pos == 0
+    }
+
+    /// Returns the character at this position.
+    ///
+    /// If this position is just before or after the input, then an absent
+    /// character is returned.
+    pub fn char(&self) -> Char {
+        self.c
+    }
+
+    /// Returns the UTF-8 width of the character at this position.
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Returns the byte offset of this position.
+    pub fn pos(&self) -> usize {
+        self.pos
+    }
+
+    /// Returns the byte offset of the next position in the input.
+    pub fn next_pos(&self) -> usize {
+        self.pos + self.len
+    }
+}
+
+/// An abstraction over input used in the matching engines.
+pub trait Input {
+    /// Return an encoding of the position at byte offset `i`.
+    fn at(&self, i: usize) -> InputAt;
+    /// Return an encoding of the char position just prior to byte offset `i`.
+    fn previous_at(&self, i: usize) -> InputAt;
+    /// Scan the input for a matching prefix.
+    fn prefix_at(&self, prefixes: &Prefix, at: InputAt) -> Option<InputAt>;
+}
+
+/// An input reader over characters.
+///
+/// (This is the only implementation of `Input` at the moment.)
+#[derive(Debug)]
+pub struct CharInput<'t>(&'t str);
+
+impl<'t> CharInput<'t> {
+    /// Return a new character input reader for the given string.
+    pub fn new(s: &'t str) -> CharInput<'t> {
+        CharInput(s)
+    }
+}
+
+impl<'t> ops::Deref for CharInput<'t> {
+    type Target = str;
+
+    fn deref(&self) -> &str {
+        self.0
+    }
+}
+
+impl<'t> Input for CharInput<'t> {
+    // This `inline(always)` increases throughput by almost 25% on the `hard`
+    // benchmarks over a normal `inline` annotation.
+    //
+    // I'm not sure why `#[inline]` isn't enough to convince LLVM, but it is
+    // used *a lot* in the guts of the matching engines.
+    #[inline(always)]
+    fn at(&self, i: usize) -> InputAt {
+        let c = self[i..].chars().next().into();
+        InputAt {
+            pos: i,
+            c: c,
+            len: c.len_utf8(),
+        }
+    }
+
+    fn previous_at(&self, i: usize) -> InputAt {
+        let c: Char = self[..i].chars().rev().next().into();
+        let len = c.len_utf8();
+        InputAt {
+            pos: i - len,
+            c: c,
+            len: len,
+        }
+    }
+
+    fn prefix_at(&self, prefixes: &Prefix, at: InputAt) -> Option<InputAt> {
+        prefixes.find(&self[at.pos()..]).map(|(s, _)| self.at(at.pos() + s))
+    }
+}
+
+
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/regex/lib.rs.html b/src/regex/lib.rs.html new file mode 100644 index 0000000..8331db1 --- /dev/null +++ b/src/regex/lib.rs.html @@ -0,0 +1,959 @@ + + + + + + + + + + lib.rs.html -- source + + + + + + + + + + + + + + + +
  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! This crate provides a native implementation of regular expressions that is
+//! heavily based on RE2 both in syntax and in implementation. Notably,
+//! backreferences and arbitrary lookahead/lookbehind assertions are not
+//! provided. In return, regular expression searching provided by this package
+//! has excellent worst-case performance. The specific syntax supported is
+//! documented further down.
+//!
+//! This crate's documentation provides some simple examples, describes Unicode
+//! support and exhaustively lists the supported syntax. For more specific
+//! details on the API, please see the documentation for the `Regex` type.
+//!
+//! # Usage
+//!
+//! This crate is [on crates.io](https://crates.io/crates/regex) and can be
+//! used by adding `regex` to your dependencies in your project's `Cargo.toml`.
+//!
+//! ```toml
+//! [dependencies]
+//! regex = "0.1.8"
+//! ```
+//!
+//! and this to your crate root:
+//!
+//! ```rust
+//! extern crate regex;
+//! ```
+//!
+//! # First example: find a date
+//!
+//! General use of regular expressions in this package involves compiling an
+//! expression and then using it to search, split or replace text. For example,
+//! to confirm that some text resembles a date:
+//!
+//! ```rust
+//! use regex::Regex;
+//! let re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap();
+//! assert!(re.is_match("2014-01-01"));
+//! ```
+//!
+//! Notice the use of the `^` and `$` anchors. In this crate, every expression
+//! is executed with an implicit `.*?` at the beginning and end, which allows
+//! it to match anywhere in the text. Anchors can be used to ensure that the
+//! full text matches an expression.
+//!
+//! This example also demonstrates the utility of
+//! [raw strings](http://doc.rust-lang.org/stable/reference.html#raw-byte-string-literals)
+//! in Rust, which
+//! are just like regular strings except they are prefixed with an `r` and do
+//! not process any escape sequences. For example, `"\\d"` is the same
+//! expression as `r"\d"`.
+//!
+//! # The `regex!` macro
+//!
+//! Rust's compile-time meta-programming facilities provide a way to write a
+//! `regex!` macro which compiles regular expressions *when your program
+//! compiles*. Said differently, if you only use `regex!` to build regular
+//! expressions in your program, then your program cannot compile with an
+//! invalid regular expression. Moreover, the `regex!` macro compiles the
+//! given expression to native Rust code, which ideally makes it faster.
+//! Unfortunately (or fortunately), the dynamic implementation has had a lot
+//! more optimization work put it into it currently, so it is faster than
+//! the `regex!` macro in most cases.
+//!
+//! To use the `regex!` macro, you must enable the `plugin` feature and import
+//! the `regex_macros` crate as a syntax extension:
+//!
+//! ```ignore
+//! #![feature(plugin)]
+//! #![plugin(regex_macros)]
+//! extern crate regex;
+//!
+//! fn main() {
+//!     let re = regex!(r"^\d{4}-\d{2}-\d{2}$");
+//!     assert!(re.is_match("2014-01-01"));
+//! }
+//! ```
+//!
+//! There are a few things worth mentioning about using the `regex!` macro.
+//! Firstly, the `regex!` macro *only* accepts string *literals*.
+//! Secondly, the `regex` crate *must* be linked with the name `regex` since
+//! the generated code depends on finding symbols in the `regex` crate.
+//!
+//! One downside of using the `regex!` macro is that it can increase the
+//! size of your program's binary since it generates specialized Rust code.
+//! The extra size probably won't be significant for a small number of
+//! expressions, but 100+ calls to `regex!` will probably result in a
+//! noticeably bigger binary.
+//!
+//! **NOTE**: This is implemented using a compiler plugin, which is not
+//! available on the Rust 1.0 beta/stable channels. Therefore, you'll only
+//! be able to use `regex!` on the nightlies.
+//!
+//! # Example: iterating over capture groups
+//!
+//! This crate provides convenient iterators for matching an expression
+//! repeatedly against a search string to find successive non-overlapping
+//! matches. For example, to find all dates in a string and be able to access
+//! them by their component pieces:
+//!
+//! ```rust
+//! # extern crate regex; use regex::Regex;
+//! # fn main() {
+//! let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
+//! let text = "2012-03-14, 2013-01-01 and 2014-07-05";
+//! for cap in re.captures_iter(text) {
+//!     println!("Month: {} Day: {} Year: {}",
+//!              cap.at(2).unwrap_or(""), cap.at(3).unwrap_or(""),
+//!              cap.at(1).unwrap_or(""));
+//! }
+//! // Output:
+//! // Month: 03 Day: 14 Year: 2012
+//! // Month: 01 Day: 01 Year: 2013
+//! // Month: 07 Day: 05 Year: 2014
+//! # }
+//! ```
+//!
+//! Notice that the year is in the capture group indexed at `1`. This is
+//! because the *entire match* is stored in the capture group at index `0`.
+//!
+//! # Example: replacement with named capture groups
+//!
+//! Building on the previous example, perhaps we'd like to rearrange the date
+//! formats. This can be done with text replacement. But to make the code
+//! clearer, we can *name*  our capture groups and use those names as variables
+//! in our replacement text:
+//!
+//! ```rust
+//! # extern crate regex; use regex::Regex;
+//! # fn main() {
+//! let re = Regex::new(r"(?P<y>\d{4})-(?P<m>\d{2})-(?P<d>\d{2})").unwrap();
+//! let before = "2012-03-14, 2013-01-01 and 2014-07-05";
+//! let after = re.replace_all(before, "$m/$d/$y");
+//! assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014");
+//! # }
+//! ```
+//!
+//! The `replace` methods are actually polymorphic in the replacement, which
+//! provides more flexibility than is seen here. (See the documentation for
+//! `Regex::replace` for more details.)
+//!
+//! Note that if your regex gets complicated, you can use the `x` flag to
+//! enable insigificant whitespace mode, which also lets you write comments:
+//!
+//! ```rust
+//! # extern crate regex; use regex::Regex;
+//! # fn main() {
+//! let re = Regex::new(r"(?x)
+//!   (?P<y>\d{4}) # the year
+//!   -
+//!   (?P<m>\d{2}) # the month
+//!   -
+//!   (?P<d>\d{2}) # the day
+//! ").unwrap();
+//! let before = "2012-03-14, 2013-01-01 and 2014-07-05";
+//! let after = re.replace_all(before, "$m/$d/$y");
+//! assert_eq!(after, "03/14/2012, 01/01/2013 and 07/05/2014");
+//! # }
+//! ```
+//!
+//! # Pay for what you use
+//!
+//! With respect to searching text with a regular expression, there are three
+//! questions that can be asked:
+//!
+//! 1. Does the text match this expression?
+//! 2. If so, where does it match?
+//! 3. Where are the submatches?
+//!
+//! Generally speaking, this crate could provide a function to answer only #3,
+//! which would subsume #1 and #2 automatically. However, it can be
+//! significantly more expensive to compute the location of submatches, so it's
+//! best not to do it if you don't need to.
+//!
+//! Therefore, only use what you need. For example, don't use `find` if you
+//! only need to test if an expression matches a string. (Use `is_match`
+//! instead.)
+//!
+//! # Unicode
+//!
+//! This implementation executes regular expressions **only** on sequences of
+//! Unicode scalar values while exposing match locations as byte indices into
+//! the search string.
+//!
+//! Currently, only simple case folding is supported. Namely, when matching
+//! case-insensitively, the characters are first mapped using the
+//! [simple case folding](ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt)
+//! mapping.
+//!
+//! Regular expressions themselves are also **only** interpreted as a sequence
+//! of Unicode scalar values. This means you can use Unicode characters
+//! directly in your expression:
+//!
+//! ```rust
+//! # extern crate regex; use regex::Regex;
+//! # fn main() {
+//! let re = Regex::new(r"(?i)Δ+").unwrap();
+//! assert_eq!(re.find("ΔδΔ"), Some((0, 6)));
+//! # }
+//! ```
+//!
+//! Finally, Unicode general categories and scripts are available as character
+//! classes. For example, you can match a sequence of numerals, Greek or
+//! Cherokee letters:
+//!
+//! ```rust
+//! # extern crate regex; use regex::Regex;
+//! # fn main() {
+//! let re = Regex::new(r"[\pN\p{Greek}\p{Cherokee}]+").unwrap();
+//! assert_eq!(re.find("abcΔᎠβⅠᏴγδⅡxyz"), Some((3, 23)));
+//! # }
+//! ```
+//!
+//! # Syntax
+//!
+//! The syntax supported in this crate is almost in an exact correspondence
+//! with the syntax supported by RE2. It is documented below.
+//!
+//! Note that the regular expression parser and abstract syntax are exposed in
+//! a separate crate,
+//! [`regex-syntax`](../regex_syntax/index.html).
+//!
+//! ## Matching one character
+//!
+//! <pre class="rust">
+//! .           any character except new line (includes new line with s flag)
+//! [xyz]       A character class matching either x, y or z.
+//! [^xyz]      A character class matching any character except x, y and z.
+//! [a-z]       A character class matching any character in range a-z.
+//! \d          digit (\p{Nd})
+//! \D          not digit
+//! [:alpha:]   ASCII character class ([A-Za-z])
+//! [:^alpha:]  Negated ASCII character class ([^A-Za-z])
+//! \pN         One-letter name Unicode character class
+//! \p{Greek}   Unicode character class (general category or script)
+//! \PN         Negated one-letter name Unicode character class
+//! \P{Greek}   negated Unicode character class (general category or script)
+//! </pre>
+//!
+//! Any named character class may appear inside a bracketed `[...]` character
+//! class. For example, `[\p{Greek}\pN]` matches any Greek or numeral
+//! character.
+//!
+//! ## Composites
+//!
+//! <pre class="rust">
+//! xy    concatenation (x followed by y)
+//! x|y   alternation (x or y, prefer x)
+//! </pre>
+//!
+//! ## Repetitions
+//!
+//! <pre class="rust">
+//! x*        zero or more of x (greedy)
+//! x+        one or more of x (greedy)
+//! x?        zero or one of x (greedy)
+//! x*?       zero or more of x (ungreedy)
+//! x+?       one or more of x (ungreedy)
+//! x??       zero or one of x (ungreedy)
+//! x{n,m}    at least n x and at most m x (greedy)
+//! x{n,}     at least n x (greedy)
+//! x{n}      exactly n x
+//! x{n,m}?   at least n x and at most m x (ungreedy)
+//! x{n,}?    at least n x (ungreedy)
+//! x{n}?     exactly n x
+//! </pre>
+//!
+//! ## Empty matches
+//!
+//! <pre class="rust">
+//! ^     the beginning of text (or start-of-line with multi-line mode)
+//! $     the end of text (or end-of-line with multi-line mode)
+//! \A    only the beginning of text (even with multi-line mode enabled)
+//! \z    only the end of text (even with multi-line mode enabled)
+//! \b    a Unicode word boundary (\w on one side and \W, \A, or \z on other)
+//! \B    not a Unicode word boundary
+//! </pre>
+//!
+//! ## Grouping and flags
+//!
+//! <pre class="rust">
+//! (exp)          numbered capture group (indexed by opening parenthesis)
+//! (?P&lt;name&gt;exp)  named (also numbered) capture group (allowed chars: [_0-9a-zA-Z])
+//! (?:exp)        non-capturing group
+//! (?flags)       set flags within current group
+//! (?flags:exp)   set flags for exp (non-capturing)
+//! </pre>
+//!
+//! Flags are each a single character. For example, `(?x)` sets the flag `x`
+//! and `(?-x)` clears the flag `x`. Multiple flags can be set or cleared at
+//! the same time: `(?xy)` sets both the `x` and `y` flags and `(?x-y)` sets
+//! the `x` flag and clears the `y` flag.
+//!
+//! All flags are by default disabled. They are:
+//!
+//! <pre class="rust">
+//! i     case-insensitive
+//! m     multi-line mode: ^ and $ match begin/end of line
+//! s     allow . to match \n
+//! U     swap the meaning of x* and x*?
+//! x     ignore whitespace and allow line comments (starting with `#`)
+//! </pre>
+//!
+//! Here's an example that matches case-insensitively for only part of the
+//! expression:
+//!
+//! ```rust
+//! # extern crate regex; use regex::Regex;
+//! # fn main() {
+//! let re = Regex::new(r"(?i)a+(?-i)b+").unwrap();
+//! let cap = re.captures("AaAaAbbBBBb").unwrap();
+//! assert_eq!(cap.at(0), Some("AaAaAbb"));
+//! # }
+//! ```
+//!
+//! Notice that the `a+` matches either `a` or `A`, but the `b+` only matches
+//! `b`.
+//!
+//! ## Escape sequences
+//!
+//! <pre class="rust">
+//! \*         literal *, works for any punctuation character: \.+*?()|[]{}^$
+//! \a         bell (\x07)
+//! \f         form feed (\x0C)
+//! \t         horizontal tab
+//! \n         new line
+//! \r         carriage return
+//! \v         vertical tab (\x0B)
+//! \123       octal character code (up to three digits)
+//! \x7F       hex character code (exactly two digits)
+//! \x{10FFFF} any hex character code corresponding to a Unicode code point
+//! </pre>
+//!
+//! ## Perl character classes (Unicode friendly)
+//!
+//! These classes are based on the definitions provided in
+//! [UTS#18](http://www.unicode.org/reports/tr18/#Compatibility_Properties):
+//!
+//! <pre class="rust">
+//! \d     digit (\p{Nd})
+//! \D     not digit
+//! \s     whitespace (\p{White_Space})
+//! \S     not whitespace
+//! \w     word character (\p{Alphabetic} + \p{M} + \d + \p{Pc} + \p{Join_Control})
+//! \W     not word character
+//! </pre>
+//!
+//! ## ASCII character classes
+//!
+//! <pre class="rust">
+//! [:alnum:]    alphanumeric ([0-9A-Za-z])
+//! [:alpha:]    alphabetic ([A-Za-z])
+//! [:ascii:]    ASCII ([\x00-\x7F])
+//! [:blank:]    blank ([\t ])
+//! [:cntrl:]    control ([\x00-\x1F\x7F])
+//! [:digit:]    digits ([0-9])
+//! [:graph:]    graphical ([!-~])
+//! [:lower:]    lower case ([a-z])
+//! [:print:]    printable ([ -~])
+//! [:punct:]    punctuation ([!-/:-@[-`{-~])
+//! [:space:]    whitespace ([\t\n\v\f\r ])
+//! [:upper:]    upper case ([A-Z])
+//! [:word:]     word characters ([0-9A-Za-z_])
+//! [:xdigit:]   hex digit ([0-9A-Fa-f])
+//! </pre>
+//!
+//! # Untrusted input
+//!
+//! This crate can handle both untrusted regular expressions and untrusted
+//! search text.
+//!
+//! Untrusted regular expressions are handled by capping the size of a compiled
+//! regular expression. (See `Regex::with_size_limit`.) Without this, it would
+//! be trivial for an attacker to exhaust your system's memory with expressions
+//! like `a{100}{100}{100}`.
+//!
+//! Untrusted search text is allowed because the matching engine(s) in this
+//! crate have time complexity `O(mn)` (with `m ~ regex` and `n ~ search
+//! text`), which means there's no way to cause exponential blow-up like with
+//! some other regular expression engines. (We pay for this by disallowing
+//! features like arbitrary look-ahead and back-references.)
+
+#![deny(missing_docs)]
+#![cfg_attr(test, deny(warnings))]
+#![cfg_attr(feature = "pattern", feature(pattern))]
+#![doc(html_logo_url = "http://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
+       html_favicon_url = "http://www.rust-lang.org/favicon.ico",
+       html_root_url = "http://doc.rust-lang.org/regex/")]
+
+extern crate aho_corasick;
+extern crate memchr;
+extern crate regex_syntax as syntax;
+
+pub use re::{
+    Regex, Error, Captures, SubCaptures, SubCapturesPos, SubCapturesNamed,
+    FindCaptures, FindMatches,
+    Replacer, NoExpand, RegexSplits, RegexSplitsN,
+    quote, is_match,
+};
+
+mod backtrack;
+mod char;
+mod compile;
+mod input;
+mod pool;
+mod prefix;
+mod program;
+mod nfa;
+mod re;
+
+/// The `internal` module exists to support the `regex!` macro and other
+/// suspicious activity, such as testing different matching engines.
+#[doc(hidden)]
+pub mod internal {
+    pub use char::Char;
+    pub use input::{Input, CharInput, InputAt};
+    pub use program::{
+        Program, MatchEngine, CharRanges, Inst, LookInst, OneChar,
+    };
+    pub use re::ExNative;
+    pub use re::Regex::{Dynamic, Native};
+}
+
+
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/regex/program.rs.html b/src/regex/program.rs.html new file mode 100644 index 0000000..8f6ed02 --- /dev/null +++ b/src/regex/program.rs.html @@ -0,0 +1,1057 @@ + + + + + + + + + + program.rs.html -- source + + + + + + + + + + + + + + + +
  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::cmp::{self, Ordering};
+
+use syntax;
+
+use Error;
+use backtrack::{Backtrack, BackMachine};
+use char::Char;
+use compile::Compiler;
+use nfa::{Nfa, NfaThreads};
+use pool::Pool;
+use prefix::Prefix;
+use re::CaptureIdxs;
+
+const NUM_PREFIX_LIMIT: usize = 30;
+const PREFIX_LENGTH_LIMIT: usize = 15;
+
+pub type InstIdx = usize;
+
+/// An instruction, the underlying unit of a compiled regular expression
+#[derive(Clone, Debug)]
+pub enum Inst {
+    /// A match has occurred.
+    /// This is always the last instruction and only occurs in a single spot.
+    /// We could special case this in the code, but it is much clearer to
+    /// handle it as a proper instruction.
+    Match,
+    /// Save the current location in the input into the given capture location.
+    Save(usize),
+    /// Jump to the instruction given.
+    Jump(InstIdx),
+    /// Match either instruction, preferring the first.
+    Split(InstIdx, InstIdx),
+    /// A zero-width instruction. When this instruction matches, the input
+    /// is not advanced.
+    EmptyLook(LookInst),
+    /// Match a single possibly case insensitive character.
+    Char(OneChar),
+    /// Match one or more possibly case insensitive character ranges.
+    Ranges(CharRanges),
+}
+
+/// A single character instruction.
+#[derive(Clone, Debug)]
+pub struct OneChar {
+    /// The character.
+    pub c: char,
+    /// True if the character should be matched case insensitively.
+    /// (i.e., The input character will need to be case folded.)
+    pub casei: bool,
+}
+
+/// A multi-range character class instruction.
+#[derive(Clone, Debug)]
+pub struct CharRanges {
+    /// Sorted sequence of non-overlapping ranges.
+    pub ranges: Vec<(char, char)>,
+    /// Whether to match case insensitively.
+    pub casei: bool,
+}
+
+/// The set of zero-width match instructions.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum LookInst {
+    /// Start of line or input.
+    StartLine,
+    /// End of line or input.
+    EndLine,
+    /// Start of input.
+    StartText,
+    /// End of input.
+    EndText,
+    /// Word character on one side and non-word character on other.
+    WordBoundary,
+    /// Word character on both sides or non-word character on both sides.
+    NotWordBoundary,
+}
+
+impl OneChar {
+    /// Tests whether the given input character matches this instruction.
+    #[inline(always)] // About ~5-15% more throughput then `#[inline]`
+    pub fn matches(&self, c: Char) -> bool {
+        self.c == c || (self.casei && self.c == c.case_fold())
+    }
+}
+
+impl CharRanges {
+    /// Emits a range specifically for the `.` expression.
+    pub fn any() -> CharRanges {
+        CharRanges {
+            ranges: vec![('\x00', '\u{10ffff}')],
+            casei: false,
+        }
+    }
+
+    /// Emits a range specifically for the `(?s).` expression.
+    pub fn any_nonl() -> CharRanges {
+        CharRanges {
+            ranges: vec![('\x00', '\x09'), ('\x0B', '\u{10ffff}')],
+            casei: false,
+        }
+    }
+
+    /// Emits a range from the AST character class.
+    pub fn from_class(cls: syntax::CharClass) -> CharRanges {
+        let casei = cls.is_case_insensitive();
+        CharRanges {
+            ranges: cls.into_iter().map(|r| (r.start, r.end)).collect(),
+            casei: casei,
+        }
+    }
+
+    /// Tests whether the given input character matches this instruction.
+    #[inline(always)] // About ~5-15% more throughput then `#[inline]`
+    pub fn matches(&self, mut c: Char) -> Option<usize> {
+        if self.casei {
+            c = c.case_fold();
+        }
+        // This speeds up the `match_class_unicode` benchmark by checking
+        // some common cases quickly without binary search. e.g., Matching
+        // a Unicode class on predominantly ASCII text.
+        for i in 0..cmp::min(self.ranges.len(), 4) {
+            let r = self.ranges[i];
+            if c < r.0 {
+                return None;
+            }
+            if c <= r.1 {
+                return Some(i);
+            }
+        }
+        self.ranges.binary_search_by(|r| {
+            if r.1 < c {
+                Ordering::Less
+            } else if r.0 > c {
+                Ordering::Greater
+            } else {
+                Ordering::Equal
+            }
+        }).ok()
+    }
+}
+
+impl LookInst {
+    /// Tests whether the pair of characters matches this zero-width
+    /// instruction.
+    pub fn matches(&self, c1: Char, c2: Char) -> bool {
+        use self::LookInst::*;
+        match *self {
+            StartLine => c1.is_none() || c1 == '\n',
+            EndLine => c2.is_none() || c2 == '\n',
+            StartText => c1.is_none(),
+            EndText => c2.is_none(),
+            ref wbty => {
+                let (w1, w2) = (c1.is_word_char(), c2.is_word_char());
+                (*wbty == WordBoundary && w1 ^ w2)
+                || (*wbty == NotWordBoundary && !(w1 ^ w2))
+            }
+        }
+    }
+}
+
+/// The matching engines offered by this regex implementation.
+///
+/// N.B. This is exported for use in testing.
+#[doc(hidden)]
+#[derive(Clone, Copy, Debug)]
+pub enum MatchEngine {
+    /// A bounded backtracking implementation. About twice as fast as the
+    /// NFA, but can only work on small regexes and small input.
+    Backtrack,
+    /// A full NFA simulation. Can always be employed but almost always the
+    /// slowest choice.
+    Nfa,
+    /// If the entire regex is a literal and no capture groups have been
+    /// requested, then we can degrade to a simple substring match.
+    Literals,
+}
+
+/// Program represents a compiled regular expression. Once an expression is
+/// compiled, its representation is immutable and will never change.
+/// (Well, almost. In fact, the matching engines cache state that can be
+/// reused on subsequent searches. But this is interior mutability that
+/// shouldn't be observable by the caller.)
+#[derive(Debug)]
+pub struct Program {
+    /// The original regular expression string.
+    pub original: String,
+    /// A sequence of instructions.
+    pub insts: Vec<Inst>,
+    /// The sequence of capture group names. There is an entry for each capture
+    /// group index and a name exists only if the capture group is named.
+    pub cap_names: Vec<Option<String>>,
+    /// If the regular expression requires a literal prefix in order to have a
+    /// match, that prefix is stored here as a DFA.
+    pub prefixes: Prefix,
+    /// True iff matching any literal prefix indicates a match.
+    pub prefixes_complete: bool,
+    /// True iff program is anchored at the beginning.
+    pub anchored_begin: bool,
+    /// True iff program is anchored at the end.
+    pub anchored_end: bool,
+    /// The type of matching engine to use.
+    /// When `None` (the default), pick an engine automatically.
+    pub engine: Option<MatchEngine>,
+    /// Cached NFA threads.
+    pub nfa_threads: Pool<NfaThreads>,
+    /// Cached backtracking memory.
+    pub backtrack: Pool<BackMachine>,
+}
+
+impl Program {
+    /// Compiles a Regex.
+    pub fn new(
+        engine: Option<MatchEngine>,
+        size_limit: usize,
+        re: &str,
+    ) -> Result<Program, Error> {
+        let expr = try!(syntax::Expr::parse(re));
+        let (insts, cap_names) = try!(Compiler::new(size_limit).compile(expr));
+        let (insts_len, ncaps) = (insts.len(), num_captures(&insts));
+        let create_threads = move || NfaThreads::new(insts_len, ncaps);
+        let create_backtrack = move || BackMachine::new();
+        let mut prog = Program {
+            original: re.into(),
+            insts: insts,
+            cap_names: cap_names,
+            prefixes: Prefix::Empty,
+            prefixes_complete: false,
+            anchored_begin: false,
+            anchored_end: false,
+            engine: engine,
+            nfa_threads: Pool::new(Box::new(create_threads)),
+            backtrack: Pool::new(Box::new(create_backtrack)),
+        };
+
+        prog.find_prefixes();
+        prog.anchored_begin = match prog.insts[1] {
+            Inst::EmptyLook(LookInst::StartText) => true,
+            _ => false,
+        };
+        prog.anchored_end = match prog.insts[prog.insts.len() - 3] {
+            Inst::EmptyLook(LookInst::EndText) => true,
+            _ => false,
+        };
+        Ok(prog)
+    }
+
+    /// Executes a compiled regex program.
+    pub fn exec(
+        &self,
+        caps: &mut CaptureIdxs,
+        text: &str,
+        start: usize,
+    ) -> bool {
+        match self.choose_engine(caps.len(), text) {
+            MatchEngine::Backtrack => Backtrack::exec(self, caps, text, start),
+            MatchEngine::Nfa => Nfa::exec(self, caps, text, start),
+            MatchEngine::Literals => {
+                match self.prefixes.find(&text[start..]) {
+                    None => false,
+                    Some((s, e)) => {
+                        if caps.len() == 2 {
+                            caps[0] = Some(start + s);
+                            caps[1] = Some(start + e);
+                        }
+                        true
+                    }
+                }
+            }
+        }
+    }
+
+    fn choose_engine(&self, cap_len: usize, text: &str) -> MatchEngine {
+        // If the engine is already chosen, then we use it.
+        // But that might not be a good idea. e.g., What if `Literals` is
+        // chosen and it can't work? I guess we should probably check whether
+        // the chosen engine is appropriate or not.
+        self.engine.unwrap_or_else(|| {
+            if cap_len <= 2
+               && self.prefixes.preserves_priority()
+               && self.prefixes_complete {
+                MatchEngine::Literals
+            } else if Backtrack::should_exec(self, text) {
+                // We're only here if the input and regex combined are small.
+                MatchEngine::Backtrack
+            } else {
+                MatchEngine::Nfa
+            }
+        })
+    }
+
+    /// Returns the total number of capture groups in the regular expression.
+    /// This includes the zeroth capture.
+    pub fn num_captures(&self) -> usize {
+        num_captures(&self.insts)
+    }
+
+    /// Allocate new capture groups.
+    pub fn alloc_captures(&self) -> Vec<Option<usize>> {
+        vec![None; 2 * self.num_captures()]
+    }
+
+    /// Find and store a prefix machine for the current program.
+    pub fn find_prefixes(&mut self) {
+        use self::Inst::*;
+
+        let (ps, complete) = self.prefixes_from_insts(1);
+        if ps.len() > 0 {
+            self.prefixes = Prefix::new(ps);
+            self.prefixes_complete = complete;
+            return;
+        }
+        let mut pc = 1;
+        let mut prefixes = vec![];
+        let mut pcomplete = true;
+        while let Split(x, y) = self.insts[pc] {
+            let (xps, xcomplete) = self.prefixes_from_insts(x);
+            let (yps, ycomplete) = self.prefixes_from_insts(y);
+            let mut done = false;
+            match (&self.insts[x], &self.insts[y]) {
+                // We should be able to support this. Add explicit stack. ---AG
+                (&Split(_, _), &Split(_, _)) => return,
+                (_, &Split(_, _)) if xps.len() == 0 => return,
+                (_, &Split(_, _)) => {
+                    pcomplete = pcomplete && xcomplete;
+                    prefixes.extend(xps);
+                    pc = y;
+                }
+                (&Split(_, _), _) if yps.len() == 0 => return,
+                (&Split(_, _), _) => {
+                    pcomplete = pcomplete && ycomplete;
+                    prefixes.extend(yps);
+                    pc = x;
+                }
+                _ if xps.len() == 0 || yps.len() == 0 => return,
+                // This is our base case. We've followed splits the whole
+                // way, which means both instructions lead to a match.
+                _ => {
+                    pcomplete = pcomplete && xcomplete && ycomplete;
+                    prefixes.extend(xps);
+                    prefixes.extend(yps);
+                    done = true;
+                }
+            }
+            // Arg. We've over-extended ourselves, quit with nothing to
+            // show for it.
+            if prefixes.len() > NUM_PREFIX_LIMIT {
+                return;
+            }
+            if done { break; }
+        }
+        self.prefixes = Prefix::new(prefixes);
+        self.prefixes_complete = pcomplete && self.prefixes.len() > 0;
+    }
+
+    /// Find a prefix starting at the given instruction.
+    ///
+    /// Returns `true` in the tuple if the end of the prefix leads trivially
+    /// to a match. (This may report false negatives, but being conservative
+    /// is OK.)
+    fn prefixes_from_insts(&self, mut pc: usize) -> (Vec<String>, bool) {
+        use self::Inst::*;
+
+        let mut complete = true;
+        let mut alts = vec![String::new()];
+        while pc < self.insts.len() {
+            let inst = &self.insts[pc];
+
+            // Each iteration adds one character to every alternate prefix *or*
+            // it stops. Thus, the prefix alternates grow in lock step, and it
+            // suffices to check one of them to see if the prefix limit has been
+            // exceeded.
+            if alts[0].len() > PREFIX_LENGTH_LIMIT {
+                complete = false;
+                break;
+            }
+            match *inst {
+                Save(_) => { pc += 1; continue } // completely ignore it
+                Char(OneChar { c, casei: false }) => {
+                    for alt in &mut alts {
+                        alt.push(c);
+                    }
+                    pc += 1;
+                }
+                Ranges(CharRanges { ref ranges, casei: false }) => {
+                    let nchars = num_chars_in_ranges(ranges);
+                    if alts.len() * nchars > NUM_PREFIX_LIMIT {
+                        complete = false;
+                        break;
+                    }
+
+                    let orig = alts;
+                    alts = Vec::with_capacity(orig.len());
+                    for &(s, e) in ranges {
+                        for c in (s as u32)..(e as u32 + 1){
+                            for alt in &orig {
+                                let mut alt = alt.clone();
+                                alt.push(::std::char::from_u32(c).unwrap());
+                                alts.push(alt);
+                            }
+                        }
+                    }
+                    pc += 1;
+                }
+                Jump(pc2) => pc = pc2,
+                _ => { complete = self.leads_to_match(pc); break }
+            }
+        }
+        if alts[0].len() == 0 {
+            (vec![], false)
+        } else {
+            (alts, complete)
+        }
+    }
+
+    fn leads_to_match(&self, mut pc: usize) -> bool {
+        // I'm pretty sure this is conservative, so it might have some
+        // false negatives.
+        loop {
+            match self.insts[pc] {
+                Inst::Match => return true,
+                Inst::Save(_) => pc += 1,
+                Inst::Jump(pc2) => pc = pc2,
+                _ => return false,
+            }
+        }
+    }
+}
+
+impl Clone for Program {
+    fn clone(&self) -> Program {
+        let (insts_len, ncaps) = (self.insts.len(), self.num_captures());
+        let create_threads = move || NfaThreads::new(insts_len, ncaps);
+        let create_backtrack = move || BackMachine::new();
+        Program {
+            original: self.original.clone(),
+            insts: self.insts.clone(),
+            cap_names: self.cap_names.clone(),
+            prefixes: self.prefixes.clone(),
+            prefixes_complete: self.prefixes_complete,
+            anchored_begin: self.anchored_begin,
+            anchored_end: self.anchored_end,
+            engine: self.engine,
+            nfa_threads: Pool::new(Box::new(create_threads)),
+            backtrack: Pool::new(Box::new(create_backtrack)),
+        }
+    }
+}
+
+/// Return the number of captures in the given sequence of instructions.
+fn num_captures(insts: &[Inst]) -> usize {
+    let mut n = 0;
+    for inst in insts {
+        match *inst {
+            Inst::Save(c) => n = cmp::max(n, c+1),
+            _ => {}
+        }
+    }
+    // There's exactly 2 Save slots for every capture.
+    n / 2
+}
+
+/// Count the number of characters in the given range.
+///
+/// This is useful for pre-emptively limiting the number of prefix literals
+/// we extract from a regex program.
+fn num_chars_in_ranges(ranges: &[(char, char)]) -> usize {
+    ranges.iter()
+          .map(|&(s, e)| (e as u32) - (s as u32))
+          .fold(0, |acc, len| acc + len) as usize
+}
+
+
+ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/regex/re.rs.html b/src/regex/re.rs.html new file mode 100644 index 0000000..3472b45 --- /dev/null +++ b/src/regex/re.rs.html @@ -0,0 +1,2393 @@ + + + + + + + + + + re.rs.html -- source + + + + + + + + + + + + + + + +
   1
+   2
+   3
+   4
+   5
+   6
+   7
+   8
+   9
+  10
+  11
+  12
+  13
+  14
+  15
+  16
+  17
+  18
+  19
+  20
+  21
+  22
+  23
+  24
+  25
+  26
+  27
+  28
+  29
+  30
+  31
+  32
+  33
+  34
+  35
+  36
+  37
+  38
+  39
+  40
+  41
+  42
+  43
+  44
+  45
+  46
+  47
+  48
+  49
+  50
+  51
+  52
+  53
+  54
+  55
+  56
+  57
+  58
+  59
+  60
+  61
+  62
+  63
+  64
+  65
+  66
+  67
+  68
+  69
+  70
+  71
+  72
+  73
+  74
+  75
+  76
+  77
+  78
+  79
+  80
+  81
+  82
+  83
+  84
+  85
+  86
+  87
+  88
+  89
+  90
+  91
+  92
+  93
+  94
+  95
+  96
+  97
+  98
+  99
+ 100
+ 101
+ 102
+ 103
+ 104
+ 105
+ 106
+ 107
+ 108
+ 109
+ 110
+ 111
+ 112
+ 113
+ 114
+ 115
+ 116
+ 117
+ 118
+ 119
+ 120
+ 121
+ 122
+ 123
+ 124
+ 125
+ 126
+ 127
+ 128
+ 129
+ 130
+ 131
+ 132
+ 133
+ 134
+ 135
+ 136
+ 137
+ 138
+ 139
+ 140
+ 141
+ 142
+ 143
+ 144
+ 145
+ 146
+ 147
+ 148
+ 149
+ 150
+ 151
+ 152
+ 153
+ 154
+ 155
+ 156
+ 157
+ 158
+ 159
+ 160
+ 161
+ 162
+ 163
+ 164
+ 165
+ 166
+ 167
+ 168
+ 169
+ 170
+ 171
+ 172
+ 173
+ 174
+ 175
+ 176
+ 177
+ 178
+ 179
+ 180
+ 181
+ 182
+ 183
+ 184
+ 185
+ 186
+ 187
+ 188
+ 189
+ 190
+ 191
+ 192
+ 193
+ 194
+ 195
+ 196
+ 197
+ 198
+ 199
+ 200
+ 201
+ 202
+ 203
+ 204
+ 205
+ 206
+ 207
+ 208
+ 209
+ 210
+ 211
+ 212
+ 213
+ 214
+ 215
+ 216
+ 217
+ 218
+ 219
+ 220
+ 221
+ 222
+ 223
+ 224
+ 225
+ 226
+ 227
+ 228
+ 229
+ 230
+ 231
+ 232
+ 233
+ 234
+ 235
+ 236
+ 237
+ 238
+ 239
+ 240
+ 241
+ 242
+ 243
+ 244
+ 245
+ 246
+ 247
+ 248
+ 249
+ 250
+ 251
+ 252
+ 253
+ 254
+ 255
+ 256
+ 257
+ 258
+ 259
+ 260
+ 261
+ 262
+ 263
+ 264
+ 265
+ 266
+ 267
+ 268
+ 269
+ 270
+ 271
+ 272
+ 273
+ 274
+ 275
+ 276
+ 277
+ 278
+ 279
+ 280
+ 281
+ 282
+ 283
+ 284
+ 285
+ 286
+ 287
+ 288
+ 289
+ 290
+ 291
+ 292
+ 293
+ 294
+ 295
+ 296
+ 297
+ 298
+ 299
+ 300
+ 301
+ 302
+ 303
+ 304
+ 305
+ 306
+ 307
+ 308
+ 309
+ 310
+ 311
+ 312
+ 313
+ 314
+ 315
+ 316
+ 317
+ 318
+ 319
+ 320
+ 321
+ 322
+ 323
+ 324
+ 325
+ 326
+ 327
+ 328
+ 329
+ 330
+ 331
+ 332
+ 333
+ 334
+ 335
+ 336
+ 337
+ 338
+ 339
+ 340
+ 341
+ 342
+ 343
+ 344
+ 345
+ 346
+ 347
+ 348
+ 349
+ 350
+ 351
+ 352
+ 353
+ 354
+ 355
+ 356
+ 357
+ 358
+ 359
+ 360
+ 361
+ 362
+ 363
+ 364
+ 365
+ 366
+ 367
+ 368
+ 369
+ 370
+ 371
+ 372
+ 373
+ 374
+ 375
+ 376
+ 377
+ 378
+ 379
+ 380
+ 381
+ 382
+ 383
+ 384
+ 385
+ 386
+ 387
+ 388
+ 389
+ 390
+ 391
+ 392
+ 393
+ 394
+ 395
+ 396
+ 397
+ 398
+ 399
+ 400
+ 401
+ 402
+ 403
+ 404
+ 405
+ 406
+ 407
+ 408
+ 409
+ 410
+ 411
+ 412
+ 413
+ 414
+ 415
+ 416
+ 417
+ 418
+ 419
+ 420
+ 421
+ 422
+ 423
+ 424
+ 425
+ 426
+ 427
+ 428
+ 429
+ 430
+ 431
+ 432
+ 433
+ 434
+ 435
+ 436
+ 437
+ 438
+ 439
+ 440
+ 441
+ 442
+ 443
+ 444
+ 445
+ 446
+ 447
+ 448
+ 449
+ 450
+ 451
+ 452
+ 453
+ 454
+ 455
+ 456
+ 457
+ 458
+ 459
+ 460
+ 461
+ 462
+ 463
+ 464
+ 465
+ 466
+ 467
+ 468
+ 469
+ 470
+ 471
+ 472
+ 473
+ 474
+ 475
+ 476
+ 477
+ 478
+ 479
+ 480
+ 481
+ 482
+ 483
+ 484
+ 485
+ 486
+ 487
+ 488
+ 489
+ 490
+ 491
+ 492
+ 493
+ 494
+ 495
+ 496
+ 497
+ 498
+ 499
+ 500
+ 501
+ 502
+ 503
+ 504
+ 505
+ 506
+ 507
+ 508
+ 509
+ 510
+ 511
+ 512
+ 513
+ 514
+ 515
+ 516
+ 517
+ 518
+ 519
+ 520
+ 521
+ 522
+ 523
+ 524
+ 525
+ 526
+ 527
+ 528
+ 529
+ 530
+ 531
+ 532
+ 533
+ 534
+ 535
+ 536
+ 537
+ 538
+ 539
+ 540
+ 541
+ 542
+ 543
+ 544
+ 545
+ 546
+ 547
+ 548
+ 549
+ 550
+ 551
+ 552
+ 553
+ 554
+ 555
+ 556
+ 557
+ 558
+ 559
+ 560
+ 561
+ 562
+ 563
+ 564
+ 565
+ 566
+ 567
+ 568
+ 569
+ 570
+ 571
+ 572
+ 573
+ 574
+ 575
+ 576
+ 577
+ 578
+ 579
+ 580
+ 581
+ 582
+ 583
+ 584
+ 585
+ 586
+ 587
+ 588
+ 589
+ 590
+ 591
+ 592
+ 593
+ 594
+ 595
+ 596
+ 597
+ 598
+ 599
+ 600
+ 601
+ 602
+ 603
+ 604
+ 605
+ 606
+ 607
+ 608
+ 609
+ 610
+ 611
+ 612
+ 613
+ 614
+ 615
+ 616
+ 617
+ 618
+ 619
+ 620
+ 621
+ 622
+ 623
+ 624
+ 625
+ 626
+ 627
+ 628
+ 629
+ 630
+ 631
+ 632
+ 633
+ 634
+ 635
+ 636
+ 637
+ 638
+ 639
+ 640
+ 641
+ 642
+ 643
+ 644
+ 645
+ 646
+ 647
+ 648
+ 649
+ 650
+ 651
+ 652
+ 653
+ 654
+ 655
+ 656
+ 657
+ 658
+ 659
+ 660
+ 661
+ 662
+ 663
+ 664
+ 665
+ 666
+ 667
+ 668
+ 669
+ 670
+ 671
+ 672
+ 673
+ 674
+ 675
+ 676
+ 677
+ 678
+ 679
+ 680
+ 681
+ 682
+ 683
+ 684
+ 685
+ 686
+ 687
+ 688
+ 689
+ 690
+ 691
+ 692
+ 693
+ 694
+ 695
+ 696
+ 697
+ 698
+ 699
+ 700
+ 701
+ 702
+ 703
+ 704
+ 705
+ 706
+ 707
+ 708
+ 709
+ 710
+ 711
+ 712
+ 713
+ 714
+ 715
+ 716
+ 717
+ 718
+ 719
+ 720
+ 721
+ 722
+ 723
+ 724
+ 725
+ 726
+ 727
+ 728
+ 729
+ 730
+ 731
+ 732
+ 733
+ 734
+ 735
+ 736
+ 737
+ 738
+ 739
+ 740
+ 741
+ 742
+ 743
+ 744
+ 745
+ 746
+ 747
+ 748
+ 749
+ 750
+ 751
+ 752
+ 753
+ 754
+ 755
+ 756
+ 757
+ 758
+ 759
+ 760
+ 761
+ 762
+ 763
+ 764
+ 765
+ 766
+ 767
+ 768
+ 769
+ 770
+ 771
+ 772
+ 773
+ 774
+ 775
+ 776
+ 777
+ 778
+ 779
+ 780
+ 781
+ 782
+ 783
+ 784
+ 785
+ 786
+ 787
+ 788
+ 789
+ 790
+ 791
+ 792
+ 793
+ 794
+ 795
+ 796
+ 797
+ 798
+ 799
+ 800
+ 801
+ 802
+ 803
+ 804
+ 805
+ 806
+ 807
+ 808
+ 809
+ 810
+ 811
+ 812
+ 813
+ 814
+ 815
+ 816
+ 817
+ 818
+ 819
+ 820
+ 821
+ 822
+ 823
+ 824
+ 825
+ 826
+ 827
+ 828
+ 829
+ 830
+ 831
+ 832
+ 833
+ 834
+ 835
+ 836
+ 837
+ 838
+ 839
+ 840
+ 841
+ 842
+ 843
+ 844
+ 845
+ 846
+ 847
+ 848
+ 849
+ 850
+ 851
+ 852
+ 853
+ 854
+ 855
+ 856
+ 857
+ 858
+ 859
+ 860
+ 861
+ 862
+ 863
+ 864
+ 865
+ 866
+ 867
+ 868
+ 869
+ 870
+ 871
+ 872
+ 873
+ 874
+ 875
+ 876
+ 877
+ 878
+ 879
+ 880
+ 881
+ 882
+ 883
+ 884
+ 885
+ 886
+ 887
+ 888
+ 889
+ 890
+ 891
+ 892
+ 893
+ 894
+ 895
+ 896
+ 897
+ 898
+ 899
+ 900
+ 901
+ 902
+ 903
+ 904
+ 905
+ 906
+ 907
+ 908
+ 909
+ 910
+ 911
+ 912
+ 913
+ 914
+ 915
+ 916
+ 917
+ 918
+ 919
+ 920
+ 921
+ 922
+ 923
+ 924
+ 925
+ 926
+ 927
+ 928
+ 929
+ 930
+ 931
+ 932
+ 933
+ 934
+ 935
+ 936
+ 937
+ 938
+ 939
+ 940
+ 941
+ 942
+ 943
+ 944
+ 945
+ 946
+ 947
+ 948
+ 949
+ 950
+ 951
+ 952
+ 953
+ 954
+ 955
+ 956
+ 957
+ 958
+ 959
+ 960
+ 961
+ 962
+ 963
+ 964
+ 965
+ 966
+ 967
+ 968
+ 969
+ 970
+ 971
+ 972
+ 973
+ 974
+ 975
+ 976
+ 977
+ 978
+ 979
+ 980
+ 981
+ 982
+ 983
+ 984
+ 985
+ 986
+ 987
+ 988
+ 989
+ 990
+ 991
+ 992
+ 993
+ 994
+ 995
+ 996
+ 997
+ 998
+ 999
+1000
+1001
+1002
+1003
+1004
+1005
+1006
+1007
+1008
+1009
+1010
+1011
+1012
+1013
+1014
+1015
+1016
+1017
+1018
+1019
+1020
+1021
+1022
+1023
+1024
+1025
+1026
+1027
+1028
+1029
+1030
+1031
+1032
+1033
+1034
+1035
+1036
+1037
+1038
+1039
+1040
+1041
+1042
+1043
+1044
+1045
+1046
+1047
+1048
+1049
+1050
+1051
+1052
+1053
+1054
+1055
+1056
+1057
+1058
+1059
+1060
+1061
+1062
+1063
+1064
+1065
+1066
+1067
+1068
+1069
+1070
+1071
+1072
+1073
+1074
+1075
+1076
+1077
+1078
+1079
+1080
+1081
+1082
+1083
+1084
+1085
+1086
+1087
+1088
+1089
+1090
+1091
+1092
+1093
+1094
+1095
+1096
+1097
+1098
+1099
+1100
+1101
+1102
+1103
+1104
+1105
+1106
+1107
+1108
+1109
+1110
+1111
+1112
+1113
+1114
+1115
+1116
+1117
+1118
+1119
+1120
+1121
+1122
+1123
+1124
+1125
+1126
+1127
+1128
+1129
+1130
+1131
+1132
+1133
+1134
+1135
+1136
+1137
+1138
+1139
+1140
+1141
+1142
+1143
+1144
+1145
+1146
+1147
+1148
+
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::borrow::Cow;
+use std::collections::HashMap;
+use std::collections::hash_map::Iter;
+use std::fmt;
+#[cfg(feature = "pattern")]
+use std::str::pattern::{Pattern, Searcher, SearchStep};
+use std::str::FromStr;
+
+use program::{Program, MatchEngine};
+use syntax;
+
+const REPLACE_EXPAND: &'static str = r"(?x)
+  (?P<before>^|\b|[^$]) # Ignore `$$name`.
+  \$
+  (?P<name> # Match the actual capture name. Can be...
+    [0-9]+  # A sequence of digits (for indexed captures), or...
+    |
+    [_a-zA-Z][_0-9a-zA-Z]* # A name for named captures.
+  )
+";
+
+/// Type alias for representing capture indices.
+pub type CaptureIdxs = [Option<usize>];
+
+/// Escapes all regular expression meta characters in `text`.
+///
+/// The string returned may be safely used as a literal in a regular
+/// expression.
+pub fn quote(text: &str) -> String {
+    let mut quoted = String::with_capacity(text.len());
+    for c in text.chars() {
+        if syntax::is_punct(c) {
+            quoted.push('\\')
+        }
+        quoted.push(c);
+    }
+    quoted
+}
+
+/// Tests if the given regular expression matches somewhere in the text given.
+///
+/// If there was a problem compiling the regular expression, an error is
+/// returned.
+///
+/// To find submatches, split or replace text, you'll need to compile an
+/// expression first.
+pub fn is_match(regex: &str, text: &str) -> Result<bool, Error> {
+    Regex::new(regex).map(|r| r.is_match(text))
+}
+
+/// An error that occurred during parsing or compiling a regular expression.
+#[derive(Debug)]
+pub enum Error {
+    /// A syntax error.
+    Syntax(syntax::Error),
+    /// The compiled program exceeded the set size limit.
+    /// The argument is the size limit imposed.
+    CompiledTooBig(usize),
+    /// Hints that destructuring should not be exhaustive.
+    ///
+    /// This enum may grow additional variants, so this makes sure clients
+    /// don't count on exhaustive matching. (Otherwise, adding a new variant
+    /// could break existing code.)
+    #[doc(hidden)]
+    __Nonexhaustive,
+}
+
+impl ::std::error::Error for Error {
+    fn description(&self) -> &str {
+        match *self {
+            Error::Syntax(ref err) => err.description(),
+            Error::CompiledTooBig(_) => "compiled program too big",
+            Error::__Nonexhaustive => unreachable!(),
+        }
+    }
+
+    fn cause(&self) -> Option<&::std::error::Error> {
+        match *self {
+            Error::Syntax(ref err) => Some(err),
+            _ => None,
+        }
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Error::Syntax(ref err) => err.fmt(f),
+            Error::CompiledTooBig(limit) => {
+                write!(f, "Compiled regex exceeds size limit of {} bytes.",
+                       limit)
+            }
+            Error::__Nonexhaustive => unreachable!(),
+        }
+    }
+}
+
+impl From<syntax::Error> for Error {
+    fn from(err: syntax::Error) -> Error {
+        Error::Syntax(err)
+    }
+}
+
+/// A compiled regular expression
+///
+/// It is represented as either a sequence of bytecode instructions (dynamic)
+/// or as a specialized Rust function (native). It can be used to search, split
+/// or replace text. All searching is done with an implicit `.*?` at the
+/// beginning and end of an expression. To force an expression to match the
+/// whole string (or a prefix or a suffix), you must use an anchor like `^` or
+/// `$` (or `\A` and `\z`).
+///
+/// While this crate will handle Unicode strings (whether in the regular
+/// expression or in the search text), all positions returned are **byte
+/// indices**. Every byte index is guaranteed to be at a Unicode code point
+/// boundary.
+///
+/// The lifetimes `'r` and `'t` in this crate correspond to the lifetime of a
+/// compiled regular expression and text to search, respectively.
+///
+/// The only methods that allocate new strings are the string replacement
+/// methods. All other methods (searching and splitting) return borrowed
+/// pointers into the string given.
+///
+/// # Examples
+///
+/// Find the location of a US phone number:
+///
+/// ```rust
+/// # use regex::Regex;
+/// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap();
+/// assert_eq!(re.find("phone: 111-222-3333"), Some((7, 19)));
+/// ```
+///
+/// # Using the `std::str::StrExt` methods with `Regex`
+///
+/// > **Note**: This section requires that this crate is currently compiled with
+/// >           the `pattern` Cargo feature enabled.
+///
+/// Since `Regex` implements `Pattern`, you can use regexes with methods
+/// defined on `std::str::StrExt`. For example, `is_match`, `find`, `find_iter`
+/// and `split` can be replaced with `StrExt::contains`, `StrExt::find`,
+/// `StrExt::match_indices` and `StrExt::split`.
+///
+/// Here are some examples:
+///
+/// ```rust,ignore
+/// # use regex::Regex;
+/// let re = Regex::new(r"\d+").unwrap();
+/// let haystack = "a111b222c";
+///
+/// assert!(haystack.contains(&re));
+/// assert_eq!(haystack.find(&re), Some(1));
+/// assert_eq!(haystack.match_indices(&re).collect::<Vec<_>>(),
+///            vec![(1, 4), (5, 8)]);
+/// assert_eq!(haystack.split(&re).collect::<Vec<_>>(), vec!["a", "b", "c"]);
+/// ```
+#[derive(Clone)]
+pub enum Regex {
+    // The representation of `Regex` is exported to support the `regex!`
+    // syntax extension. Do not rely on it.
+    //
+    // See the comments for the `program` module in `lib.rs` for a more
+    // detailed explanation for what `regex!` requires.
+    #[doc(hidden)]
+    Dynamic(Program),
+    #[doc(hidden)]
+    Native(ExNative),
+}
+
+#[doc(hidden)]
+pub struct ExNative {
+    #[doc(hidden)]
+    pub original: &'static str,
+    #[doc(hidden)]
+    pub names: &'static &'static [Option<&'static str>],
+    #[doc(hidden)]
+    pub prog: fn(&mut CaptureIdxs, &str, usize) -> bool,
+}
+
+impl Copy for ExNative {}
+
+impl Clone for ExNative {
+    fn clone(&self) -> ExNative {
+        *self
+    }
+}
+
+impl fmt::Display for Regex {
+    /// Shows the original regular expression.
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", self.as_str())
+    }
+}
+
+impl fmt::Debug for Regex {
+    /// Shows the original regular expression.
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        fmt::Display::fmt(self, f)
+    }
+}
+
+/// Equality comparison is based on the original string. It is possible that
+/// different regular expressions have the same matching behavior, but are
+/// still compared unequal. For example, `\d+` and `\d\d*` match the same set
+/// of strings, but are not considered equal.
+impl PartialEq for Regex {
+    fn eq(&self, other: &Regex) -> bool {
+        self.as_str() == other.as_str()
+    }
+}
+
+impl Eq for Regex {}
+
+impl FromStr for Regex {
+    type Err = Error;
+
+    /// Attempts to parse a string into a regular expression
+    fn from_str(s: &str) -> Result<Regex, Error> {
+        Regex::new(s)
+    }
+}
+
+impl Regex {
+    /// Compiles a dynamic regular expression. Once compiled, it can be
+    /// used repeatedly to search, split or replace text in a string.
+    ///
+    /// If an invalid expression is given, then an error is returned.
+    pub fn new(re: &str) -> Result<Regex, Error> {
+        Regex::with_size_limit(10 * (1 << 20), re)
+    }
+
+    /// Compiles a dynamic regular expression with the given size limit.
+    ///
+    /// The size limit is applied to the size of the *compiled* data structure.
+    /// If the data structure exceeds the size given, then an error is
+    /// returned.
+    ///
+    /// The default size limit used in `new` is 10MB.
+    pub fn with_size_limit(size: usize, re: &str) -> Result<Regex, Error> {
+        Regex::with_engine(None, size, re)
+    }
+
+    /// Compiles a dynamic regular expression and uses given matching engine.
+    ///
+    /// This is exposed for use in testing and shouldn't be used by clients.
+    /// Instead, the regex program should choose the correct matching engine
+    /// to use automatically. (Based on the regex, the size of the input and
+    /// the type of search.)
+    ///
+    /// A value of `None` means that the engine is automatically selected,
+    /// which is the default behavior.
+    ///
+    /// **WARNING**: Passing an unsuitable engine for the given regex/input
+    /// could lead to bad things. (Not unsafe things, but panics, incorrect
+    /// matches and large memory use are all things that could happen.)
+    #[doc(hidden)]
+    pub fn with_engine(
+        engine: Option<MatchEngine>,
+        size: usize,
+        re: &str,
+    ) -> Result<Regex, Error> {
+        Program::new(engine, size, re).map(Regex::Dynamic)
+    }
+
+
+    /// Returns true if and only if the regex matches the string given.
+    ///
+    /// # Example
+    ///
+    /// Test if some text contains at least one word with exactly 13
+    /// characters:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let text = "I categorically deny having triskaidekaphobia.";
+    /// assert!(Regex::new(r"\b\w{13}\b").unwrap().is_match(text));
+    /// # }
+    /// ```
+    pub fn is_match(&self, text: &str) -> bool {
+        exec(self, &mut [], text, 0)
+    }
+
+    /// Returns the start and end byte range of the leftmost-first match in
+    /// `text`. If no match exists, then `None` is returned.
+    ///
+    /// Note that this should only be used if you want to discover the position
+    /// of the match. Testing the existence of a match is faster if you use
+    /// `is_match`.
+    ///
+    /// # Example
+    ///
+    /// Find the start and end location of the first word with exactly 13
+    /// characters:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let text = "I categorically deny having triskaidekaphobia.";
+    /// let pos = Regex::new(r"\b\w{13}\b").unwrap().find(text);
+    /// assert_eq!(pos, Some((2, 15)));
+    /// # }
+    /// ```
+    pub fn find(&self, text: &str) -> Option<(usize, usize)> {
+        let mut caps = [None, None];
+        if exec(self, &mut caps, text, 0) {
+            Some((caps[0].unwrap(), caps[1].unwrap()))
+        } else {
+            None
+        }
+    }
+
+    /// Returns an iterator for each successive non-overlapping match in
+    /// `text`, returning the start and end byte indices with respect to
+    /// `text`.
+    ///
+    /// # Example
+    ///
+    /// Find the start and end location of every word with exactly 13
+    /// characters:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let text = "Retroactively relinquishing remunerations is reprehensible.";
+    /// for pos in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) {
+    ///     println!("{:?}", pos);
+    /// }
+    /// // Output:
+    /// // (0, 13)
+    /// // (14, 27)
+    /// // (28, 41)
+    /// // (45, 58)
+    /// # }
+    /// ```
+    pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> FindMatches<'r, 't> {
+        FindMatches {
+            re: self,
+            search: text,
+            last_end: 0,
+            last_match: None,
+        }
+    }
+
+    /// Returns the capture groups corresponding to the leftmost-first
+    /// match in `text`. Capture group `0` always corresponds to the entire
+    /// match. If no match is found, then `None` is returned.
+    ///
+    /// You should only use `captures` if you need access to submatches.
+    /// Otherwise, `find` is faster for discovering the location of the overall
+    /// match.
+    ///
+    /// # Examples
+    ///
+    /// Say you have some text with movie names and their release years,
+    /// like "'Citizen Kane' (1941)". It'd be nice if we could search for text
+    /// looking like that, while also extracting the movie name and its release
+    /// year separately.
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)").unwrap();
+    /// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
+    /// let caps = re.captures(text).unwrap();
+    /// assert_eq!(caps.at(1), Some("Citizen Kane"));
+    /// assert_eq!(caps.at(2), Some("1941"));
+    /// assert_eq!(caps.at(0), Some("'Citizen Kane' (1941)"));
+    /// # }
+    /// ```
+    ///
+    /// Note that the full match is at capture group `0`. Each subsequent
+    /// capture group is indexed by the order of its opening `(`.
+    ///
+    /// We can make this example a bit clearer by using *named* capture groups:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
+    ///                .unwrap();
+    /// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
+    /// let caps = re.captures(text).unwrap();
+    /// assert_eq!(caps.name("title"), Some("Citizen Kane"));
+    /// assert_eq!(caps.name("year"), Some("1941"));
+    /// assert_eq!(caps.at(0), Some("'Citizen Kane' (1941)"));
+    /// # }
+    /// ```
+    ///
+    /// Here we name the capture groups, which we can access with the `name`
+    /// method. Note that the named capture groups are still accessible with
+    /// `at`.
+    ///
+    /// The `0`th capture group is always unnamed, so it must always be
+    /// accessed with `at(0)`.
+    pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
+        let mut caps = self.alloc_captures();
+        if exec(self, &mut caps, text, 0) {
+            Some(Captures::new(self, text, caps))
+        } else {
+            None
+        }
+    }
+
+    /// Returns an iterator over all the non-overlapping capture groups matched
+    /// in `text`. This is operationally the same as `find_iter` (except it
+    /// yields information about submatches).
+    ///
+    /// # Example
+    ///
+    /// We can use this to find all movie titles and their release years in
+    /// some text, where the movie is formatted like "'Title' (xxxx)":
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)")
+    ///                .unwrap();
+    /// let text = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931).";
+    /// for caps in re.captures_iter(text) {
+    ///     println!("Movie: {:?}, Released: {:?}", caps.name("title"), caps.name("year"));
+    /// }
+    /// // Output:
+    /// // Movie: Citizen Kane, Released: 1941
+    /// // Movie: The Wizard of Oz, Released: 1939
+    /// // Movie: M, Released: 1931
+    /// # }
+    /// ```
+    pub fn captures_iter<'r, 't>(&'r self, text: &'t str)
+                                -> FindCaptures<'r, 't> {
+        FindCaptures {
+            re: self,
+            search: text,
+            last_match: None,
+            last_end: 0,
+        }
+    }
+
+    /// Returns an iterator of substrings of `text` delimited by a match
+    /// of the regular expression.
+    /// Namely, each element of the iterator corresponds to text that *isn't*
+    /// matched by the regular expression.
+    ///
+    /// This method will *not* copy the text given.
+    ///
+    /// # Example
+    ///
+    /// To split a string delimited by arbitrary amounts of spaces or tabs:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"[ \t]+").unwrap();
+    /// let fields: Vec<&str> = re.split("a b \t  c\td    e").collect();
+    /// assert_eq!(fields, vec!("a", "b", "c", "d", "e"));
+    /// # }
+    /// ```
+    pub fn split<'r, 't>(&'r self, text: &'t str) -> RegexSplits<'r, 't> {
+        RegexSplits {
+            finder: self.find_iter(text),
+            last: 0,
+        }
+    }
+
+    /// Returns an iterator of at most `limit` substrings of `text` delimited
+    /// by a match of the regular expression. (A `limit` of `0` will return no
+    /// substrings.)
+    /// Namely, each element of the iterator corresponds to text that *isn't*
+    /// matched by the regular expression.
+    /// The remainder of the string that is not split will be the last element
+    /// in the iterator.
+    ///
+    /// This method will *not* copy the text given.
+    ///
+    /// # Example
+    ///
+    /// Get the first two words in some text:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"\W+").unwrap();
+    /// let fields: Vec<&str> = re.splitn("Hey! How are you?", 3).collect();
+    /// assert_eq!(fields, vec!("Hey", "How", "are you?"));
+    /// # }
+    /// ```
+    pub fn splitn<'r, 't>(&'r self, text: &'t str, limit: usize)
+                         -> RegexSplitsN<'r, 't> {
+        RegexSplitsN {
+            splits: self.split(text),
+            cur: 0,
+            limit: limit,
+        }
+    }
+
+    /// Replaces the leftmost-first match with the replacement provided.
+    /// The replacement can be a regular string (where `$N` and `$name` are
+    /// expanded to match capture groups) or a function that takes the matches'
+    /// `Captures` and returns the replaced string.
+    ///
+    /// If no match is found, then a copy of the string is returned unchanged.
+    ///
+    /// # Examples
+    ///
+    /// Note that this function is polymorphic with respect to the replacement.
+    /// In typical usage, this can just be a normal string:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let re = Regex::new("[^01]+").unwrap();
+    /// assert_eq!(re.replace("1078910", ""), "1010");
+    /// # }
+    /// ```
+    ///
+    /// But anything satisfying the `Replacer` trait will work. For example,
+    /// a closure of type `|&Captures| -> String` provides direct access to the
+    /// captures corresponding to a match. This allows one to access
+    /// submatches easily:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # use regex::Captures; fn main() {
+    /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap();
+    /// let result = re.replace("Springsteen, Bruce", |caps: &Captures| {
+    ///     format!("{} {}", caps.at(2).unwrap_or(""), caps.at(1).unwrap_or(""))
+    /// });
+    /// assert_eq!(result, "Bruce Springsteen");
+    /// # }
+    /// ```
+    ///
+    /// But this is a bit cumbersome to use all the time. Instead, a simple
+    /// syntax is supported that expands `$name` into the corresponding capture
+    /// group. Here's the last example, but using this expansion technique
+    /// with named capture groups:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap();
+    /// let result = re.replace("Springsteen, Bruce", "$first $last");
+    /// assert_eq!(result, "Bruce Springsteen");
+    /// # }
+    /// ```
+    ///
+    /// Note that using `$2` instead of `$first` or `$1` instead of `$last`
+    /// would produce the same result. To write a literal `$` use `$$`.
+    ///
+    /// Finally, sometimes you just want to replace a literal string with no
+    /// submatch expansion. This can be done by wrapping a string with
+    /// `NoExpand`:
+    ///
+    /// ```rust
+    /// # extern crate regex; use regex::Regex;
+    /// # fn main() {
+    /// use regex::NoExpand;
+    ///
+    /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap();
+    /// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last"));
+    /// assert_eq!(result, "$2 $last");
+    /// # }
+    /// ```
+    pub fn replace<R: Replacer>(&self, text: &str, rep: R) -> String {
+        self.replacen(text, 1, rep)
+    }
+
+    /// Replaces all non-overlapping matches in `text` with the
+    /// replacement provided. This is the same as calling `replacen` with
+    /// `limit` set to `0`.
+    ///
+    /// See the documentation for `replace` for details on how to access
+    /// submatches in the replacement string.
+    pub fn replace_all<R: Replacer>(&self, text: &str, rep: R) -> String {
+        self.replacen(text, 0, rep)
+    }
+
+    /// Replaces at most `limit` non-overlapping matches in `text` with the
+    /// replacement provided. If `limit` is 0, then all non-overlapping matches
+    /// are replaced.
+    ///
+    /// See the documentation for `replace` for details on how to access
+    /// submatches in the replacement string.
+    pub fn replacen<R: Replacer>
+                   (&self, text: &str, limit: usize, mut rep: R) -> String {
+        let mut new = String::with_capacity(text.len());
+        let mut last_match = 0;
+
+        if rep.no_expand().is_some() {
+            // borrow checker pains. `rep` is borrowed mutably in the `else`
+            // branch below.
+            let rep = rep.no_expand().unwrap();
+            for (i, (s, e)) in self.find_iter(text).enumerate() {
+                if limit > 0 && i >= limit {
+                    break
+                }
+                new.push_str(&text[last_match..s]);
+                new.push_str(&rep);
+                last_match = e;
+            }
+        } else {
+            for (i, cap) in self.captures_iter(text).enumerate() {
+                if limit > 0 && i >= limit {
+                    break
+                }
+                // unwrap on 0 is OK because captures only reports matches
+                let (s, e) = cap.pos(0).unwrap();
+                new.push_str(&text[last_match..s]);
+                new.push_str(&rep.reg_replace(&cap));
+                last_match = e;
+            }
+        }
+        new.push_str(&text[last_match..]);
+        new
+    }
+
+    /// Returns the original string of this regex.
+    pub fn as_str<'a>(&'a self) -> &'a str {
+        match *self {
+            Regex::Dynamic(Program { ref original, .. }) => original,
+            Regex::Native(ExNative { ref original, .. }) => original,
+        }
+    }
+
+    #[doc(hidden)]
+    pub fn names_iter<'a>(&'a self) -> NamesIter<'a> {
+        match *self {
+            Regex::Native(ref n) => NamesIter::Native(n.names.iter()),
+            Regex::Dynamic(ref d) => NamesIter::Dynamic(d.cap_names.iter())
+        }
+    }
+
+    fn names_len(&self) -> usize {
+        match *self {
+            Regex::Native(ref n) => n.names.len(),
+            Regex::Dynamic(ref d) => d.cap_names.len()
+        }
+    }
+
+    fn alloc_captures(&self) -> Vec<Option<usize>> {
+        match *self {
+            Regex::Native(ref n) => vec![None; 2 * n.names.len()],
+            Regex::Dynamic(ref d) => d.alloc_captures(),
+        }
+    }
+}
+
+pub enum NamesIter<'a> {
+    Native(::std::slice::Iter<'a, Option<&'static str>>),
+    Dynamic(::std::slice::Iter<'a, Option<String>>)
+}
+
+impl<'a> Iterator for NamesIter<'a> {
+    type Item=Option<String>;
+
+    fn next(&mut self) -> Option<Option<String>> {
+        match *self {
+            NamesIter::Native(ref mut i) =>
+                i.next().map(|x| x.map(|s| s.to_owned())),
+            NamesIter::Dynamic(ref mut i) =>
+                i.next().map(|x| x.as_ref().map(|s| s.to_owned())),
+        }
+    }
+}
+
+/// NoExpand indicates literal string replacement.
+///
+/// It can be used with `replace` and `replace_all` to do a literal
+/// string replacement without expanding `$name` to their corresponding
+/// capture groups.
+///
+/// `'r` is the lifetime of the literal text.
+pub struct NoExpand<'t>(pub &'t str);
+
+/// Replacer describes types that can be used to replace matches in a string.
+pub trait Replacer {
+    /// Returns a possibly owned string that is used to replace the match
+    /// corresponding to the `caps` capture group.
+    ///
+    /// The `'a` lifetime refers to the lifetime of a borrowed string when
+    /// a new owned string isn't needed (e.g., for `NoExpand`).
+    fn reg_replace<'a>(&'a mut self, caps: &Captures) -> Cow<'a, str>;
+
+    /// Returns a possibly owned string that never needs expansion.
+    fn no_expand<'a>(&'a mut self) -> Option<Cow<'a, str>> { None }
+}
+
+impl<'t> Replacer for NoExpand<'t> {
+    fn reg_replace<'a>(&'a mut self, _: &Captures) -> Cow<'a, str> {
+        self.0.into()
+    }
+
+    fn no_expand<'a>(&'a mut self) -> Option<Cow<'a, str>> {
+        Some(self.0.into())
+    }
+}
+
+impl<'t> Replacer for &'t str {
+    fn reg_replace<'a>(&'a mut self, caps: &Captures) -> Cow<'a, str> {
+        caps.expand(*self).into()
+    }
+
+    fn no_expand<'a>(&'a mut self) -> Option<Cow<'a, str>> {
+        let re = Regex::new(REPLACE_EXPAND).unwrap();
+        if !re.is_match(self) {
+            Some((*self).into())
+        } else {
+            None
+        }
+    }
+}
+
+impl<F> Replacer for F where F: FnMut(&Captures) -> String {
+    fn reg_replace<'a>(&'a mut self, caps: &Captures) -> Cow<'a, str> {
+        (*self)(caps).into()
+    }
+}
+
+/// Yields all substrings delimited by a regular expression match.
+///
+/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
+/// of the string being split.
+pub struct RegexSplits<'r, 't> {
+    finder: FindMatches<'r, 't>,
+    last: usize,
+}
+
+impl<'r, 't> Iterator for RegexSplits<'r, 't> {
+    type Item = &'t str;
+
+    fn next(&mut self) -> Option<&'t str> {
+        let text = self.finder.search;
+        match self.finder.next() {
+            None => {
+                if self.last >= text.len() {
+                    None
+                } else {
+                    let s = &text[self.last..];
+                    self.last = text.len();
+                    Some(s)
+                }
+            }
+            Some((s, e)) => {
+                let matched = &text[self.last..s];
+                self.last = e;
+                Some(matched)
+            }
+        }
+    }
+}
+
+/// Yields at most `N` substrings delimited by a regular expression match.
+///
+/// The last substring will be whatever remains after splitting.
+///
+/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
+/// of the string being split.
+pub struct RegexSplitsN<'r, 't> {
+    splits: RegexSplits<'r, 't>,
+    cur: usize,
+    limit: usize,
+}
+
+impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
+    type Item = &'t str;
+
+    fn next(&mut self) -> Option<&'t str> {
+        let text = self.splits.finder.search;
+        if self.cur >= self.limit {
+            None
+        } else {
+            self.cur += 1;
+            if self.cur >= self.limit {
+                Some(&text[self.splits.last..])
+            } else {
+                self.splits.next()
+            }
+        }
+    }
+}
+
+/// Captures represents a group of captured strings for a single match.
+///
+/// The 0th capture always corresponds to the entire match. Each subsequent
+/// index corresponds to the next capture group in the regex.
+/// If a capture group is named, then the matched string is *also* available
+/// via the `name` method. (Note that the 0th capture is always unnamed and so
+/// must be accessed with the `at` method.)
+///
+/// Positions returned from a capture group are always byte indices.
+///
+/// `'t` is the lifetime of the matched text.
+pub struct Captures<'t> {
+    text: &'t str,
+    locs: Vec<Option<usize>>,
+    named: Option<HashMap<String, usize>>,
+}
+
+impl<'t> Captures<'t> {
+    fn new(
+        re: &Regex,
+        search: &'t str,
+        locs: Vec<Option<usize>>,
+    ) -> Captures<'t> {
+        let named =
+            if re.names_len() == 0 {
+                None
+            } else {
+                let mut named = HashMap::new();
+                for (i, name) in re.names_iter().enumerate() {
+                    if let Some(name) = name {
+                        named.insert(name, i);
+                    }
+                }
+                Some(named)
+            };
+        Captures {
+            text: search,
+            locs: locs,
+            named: named,
+        }
+    }
+
+    /// Returns the start and end positions of the Nth capture group.
+    /// Returns `None` if `i` is not a valid capture group or if the capture
+    /// group did not match anything.
+    /// The positions returned are *always* byte indices with respect to the
+    /// original string matched.
+    pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
+        let (s, e) = (i * 2, i * 2 + 1);
+        if e >= self.locs.len() || self.locs[s].is_none() {
+            // VM guarantees that each pair of locations are both Some or None.
+            return None
+        }
+        Some((self.locs[s].unwrap(), self.locs[e].unwrap()))
+    }
+
+    /// Returns the matched string for the capture group `i`.  If `i` isn't
+    /// a valid capture group or didn't match anything, then `None` is
+    /// returned.
+    pub fn at(&self, i: usize) -> Option<&'t str> {
+        match self.pos(i) {
+            None => None,
+            Some((s, e)) => Some(&self.text[s..e])
+        }
+    }
+
+    /// Returns the matched string for the capture group named `name`.  If
+    /// `name` isn't a valid capture group or didn't match anything, then
+    /// `None` is returned.
+    pub fn name(&self, name: &str) -> Option<&'t str> {
+        match self.named {
+            None => None,
+            Some(ref h) => {
+                match h.get(name) {
+                    None => None,
+                    Some(i) => self.at(*i),
+                }
+            }
+        }
+    }
+
+    /// Creates an iterator of all the capture groups in order of appearance
+    /// in the regular expression.
+    pub fn iter(&'t self) -> SubCaptures<'t> {
+        SubCaptures { idx: 0, caps: self, }
+    }
+
+    /// Creates an iterator of all the capture group positions in order of
+    /// appearance in the regular expression. Positions are byte indices
+    /// in terms of the original string matched.
+    pub fn iter_pos(&'t self) -> SubCapturesPos<'t> {
+        SubCapturesPos { idx: 0, caps: self, }
+    }
+
+    /// Creates an iterator of all named groups as an tuple with the group
+    /// name and the value. The iterator returns these values in arbitrary
+    /// order.
+    pub fn iter_named(&'t self) -> SubCapturesNamed<'t> {
+        SubCapturesNamed { caps: self, inner: self.named.as_ref().map(|n| n.iter()) }
+    }
+
+    /// Expands all instances of `$name` in `text` to the corresponding capture
+    /// group `name`.
+    ///
+    /// `name` may be an integer corresponding to the index of the
+    /// capture group (counted by order of opening parenthesis where `0` is the
+    /// entire match) or it can be a name (consisting of letters, digits or
+    /// underscores) corresponding to a named capture group.
+    ///
+    /// If `name` isn't a valid capture group (whether the name doesn't exist or
+    /// isn't a valid index), then it is replaced with the empty string.
+    ///
+    /// To write a literal `$` use `$$`.
+    pub fn expand(&self, text: &str) -> String {
+        // How evil can you get?
+        let re = Regex::new(REPLACE_EXPAND).unwrap();
+        let text = re.replace_all(text, |refs: &Captures| -> String {
+            let before = refs.name("before").unwrap_or("");
+            let name = refs.name("name").unwrap_or("");
+            format!("{}{}", before, match name.parse::<usize>() {
+                Err(_) => self.name(name).unwrap_or("").to_string(),
+                Ok(i) => self.at(i).unwrap_or("").to_string(),
+            })
+        });
+        let re = Regex::new(r"\$\$").unwrap();
+        re.replace_all(&text, NoExpand("$"))
+    }
+
+    /// Returns the number of captured groups.
+    #[inline]
+    pub fn len(&self) -> usize { self.locs.len() / 2 }
+
+    /// Returns true if and only if there are no captured groups.
+    #[inline]
+    pub fn is_empty(&self) -> bool { self.len() == 0 }
+}
+
+/// An iterator over capture groups for a particular match of a regular
+/// expression.
+///
+/// `'t` is the lifetime of the matched text.
+pub struct SubCaptures<'t> {
+    idx: usize,
+    caps: &'t Captures<'t>,
+}
+
+impl<'t> Iterator for SubCaptures<'t> {
+    type Item = Option<&'t str>;
+
+    fn next(&mut self) -> Option<Option<&'t str>> {
+        if self.idx < self.caps.len() {
+            self.idx += 1;
+            Some(self.caps.at(self.idx - 1))
+        } else {
+            None
+        }
+    }
+}
+
+/// An iterator over capture group positions for a particular match of a
+/// regular expression.
+///
+/// Positions are byte indices in terms of the original string matched.
+///
+/// `'t` is the lifetime of the matched text.
+pub struct SubCapturesPos<'t> {
+    idx: usize,
+    caps: &'t Captures<'t>,
+}
+
+impl<'t> Iterator for SubCapturesPos<'t> {
+    type Item = Option<(usize, usize)>;
+
+    fn next(&mut self) -> Option<Option<(usize, usize)>> {
+        if self.idx < self.caps.len() {
+            self.idx += 1;
+            Some(self.caps.pos(self.idx - 1))
+        } else {
+            None
+        }
+    }
+}
+
+/// An Iterator over named capture groups as a tuple with the group
+/// name and the value.
+///
+/// `'t` is the lifetime of the matched text.
+pub struct SubCapturesNamed<'t>{
+    caps: &'t Captures<'t>,
+    inner: Option<Iter<'t, String, usize>>,
+}
+
+impl<'t> Iterator for SubCapturesNamed<'t> {
+    type Item = (&'t str, Option<&'t str>);
+
+    fn next(&mut self) -> Option<(&'t str, Option<&'t str>)> {
+        match self.inner.as_mut().map(|it| it.next()).unwrap_or(None) {
+            Some((name, pos)) => Some((name, self.caps.at(*pos))),
+            None => None
+        }
+    }
+}
+
+/// An iterator that yields all non-overlapping capture groups matching a
+/// particular regular expression.
+///
+/// The iterator stops when no more matches can be found.
+///
+/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
+/// of the matched string.
+pub struct FindCaptures<'r, 't> {
+    re: &'r Regex,
+    search: &'t str,
+    last_match: Option<usize>,
+    last_end: usize,
+}
+
+impl<'r, 't> Iterator for FindCaptures<'r, 't> {
+    type Item = Captures<'t>;
+
+    fn next(&mut self) -> Option<Captures<'t>> {
+        if self.last_end > self.search.len() {
+            return None
+        }
+
+        let mut caps = self.re.alloc_captures();
+        if !exec(self.re, &mut caps, self.search, self.last_end) {
+            return None
+        }
+        let (s, e) = (caps[0].unwrap(), caps[1].unwrap());
+
+        // Don't accept empty matches immediately following a match.
+        // i.e., no infinite loops please.
+        if e == s && Some(self.last_end) == self.last_match {
+            if self.last_end >= self.search.len() {
+                return None;
+            }
+            self.last_end += self.search[self.last_end..].chars()
+                                 .next().unwrap().len_utf8();
+            return self.next()
+        }
+        self.last_end = e;
+        self.last_match = Some(self.last_end);
+        Some(Captures::new(self.re, self.search, caps))
+    }
+}
+
+/// An iterator over all non-overlapping matches for a particular string.
+///
+/// The iterator yields a tuple of integers corresponding to the start and end
+/// of the match. The indices are byte offsets. The iterator stops when no more
+/// matches can be found.
+///
+/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
+/// of the matched string.
+pub struct FindMatches<'r, 't> {
+    re: &'r Regex,
+    search: &'t str,
+    last_match: Option<usize>,
+    last_end: usize,
+}
+
+impl<'r, 't> Iterator for FindMatches<'r, 't> {
+    type Item = (usize, usize);
+
+    fn next(&mut self) -> Option<(usize, usize)> {
+        if self.last_end > self.search.len() {
+            return None
+        }
+
+        let mut caps = [None, None];
+        if !exec(self.re, &mut caps, self.search, self.last_end) {
+            return None;
+        }
+        let (s, e) = (caps[0].unwrap(), caps[1].unwrap());
+
+        // Don't accept empty matches immediately following a match.
+        // i.e., no infinite loops please.
+        if e == s && Some(self.last_end) == self.last_match {
+            if self.last_end >= self.search.len() {
+                return None;
+            }
+            self.last_end += self.search[self.last_end..].chars()
+                                 .next().unwrap().len_utf8();
+            return self.next()
+        }
+        self.last_end = e;
+        self.last_match = Some(self.last_end);
+        Some((s, e))
+    }
+}
+
+#[cfg(feature = "pattern")]
+pub struct RegexSearcher<'r, 't> {
+    it: FindMatches<'r, 't>,
+    last_step_end: usize,
+    next_match: Option<(usize, usize)>,
+}
+
+#[cfg(feature = "pattern")]
+impl<'r, 't> Pattern<'t> for &'r Regex {
+    type Searcher = RegexSearcher<'r, 't>;
+
+    fn into_searcher(self, haystack: &'t str) -> RegexSearcher<'r, 't> {
+        RegexSearcher {
+            it: self.find_iter(haystack),
+            last_step_end: 0,
+            next_match: None,
+        }
+    }
+}
+
+#[cfg(feature = "pattern")]
+unsafe impl<'r, 't> Searcher<'t> for RegexSearcher<'r, 't> {
+    #[inline]
+    fn haystack(&self) -> &'t str {
+        self.it.search
+    }
+
+    #[inline]
+    fn next(&mut self) -> SearchStep {
+        if let Some((s, e)) = self.next_match {
+            self.next_match = None;
+            self.last_step_end = e;
+            return SearchStep::Match(s, e);
+        }
+        match self.it.next() {
+            None => {
+                if self.last_step_end < self.haystack().len() {
+                    let last = self.last_step_end;
+                    self.last_step_end = self.haystack().len();
+                    SearchStep::Reject(last, self.haystack().len())
+                } else {
+                    SearchStep::Done
+                }
+            }
+            Some((s, e)) => {
+                if s == self.last_step_end {
+                    self.last_step_end = e;
+                    SearchStep::Match(s, e)
+                } else {
+                    self.next_match = Some((s, e));
+                    let last = self.last_step_end;
+                    self.last_step_end = s;
+                    SearchStep::Reject(last, s)
+                }
+            }
+        }
+    }
+}
+
+fn exec(re: &Regex, caps: &mut CaptureIdxs, text: &str, start: usize) -> bool {
+    match *re {
+        Regex::Native(ExNative { ref prog, .. }) => (*prog)(caps, text, start),
+        Regex::Dynamic(ref prog) => prog.exec(caps, text, start),
+    }
+}
+
+
+ + + + + + + + + + + + + + + \ No newline at end of file -- cgit v1.2.3