From 64106c4d3d4ddba8c7bc2af75376e6d3d3d75601 Mon Sep 17 00:00:00 2001
From:
Date: Mon, 29 Jun 2015 20:16:15 +0000
Subject: Update documentation
---
src/regex/program.rs.html | 1057 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 1057 insertions(+)
create mode 100644 src/regex/program.rs.html
(limited to 'src/regex/program.rs.html')
diff --git a/src/regex/program.rs.html b/src/regex/program.rs.html
new file mode 100644
index 0000000..8f6ed02
--- /dev/null
+++ b/src/regex/program.rs.html
@@ -0,0 +1,1057 @@
+
+
+
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+
+
+
+
+
+
+
+
+
+
+
+use std::cmp::{self, Ordering};
+
+use syntax;
+
+use Error;
+use backtrack::{Backtrack, BackMachine};
+use char::Char;
+use compile::Compiler;
+use nfa::{Nfa, NfaThreads};
+use pool::Pool;
+use prefix::Prefix;
+use re::CaptureIdxs;
+
+const NUM_PREFIX_LIMIT: usize = 30;
+const PREFIX_LENGTH_LIMIT: usize = 15;
+
+pub type InstIdx = usize;
+
+
+#[derive(Clone, Debug)]
+pub enum Inst {
+
+
+
+
+ Match,
+
+ Save(usize),
+
+ Jump(InstIdx),
+
+ Split(InstIdx, InstIdx),
+
+
+ EmptyLook(LookInst),
+
+ Char(OneChar),
+
+ Ranges(CharRanges),
+}
+
+
+#[derive(Clone, Debug)]
+pub struct OneChar {
+
+ pub c: char,
+
+
+ pub casei: bool,
+}
+
+
+#[derive(Clone, Debug)]
+pub struct CharRanges {
+
+ pub ranges: Vec<(char, char)>,
+
+ pub casei: bool,
+}
+
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum LookInst {
+
+ StartLine,
+
+ EndLine,
+
+ StartText,
+
+ EndText,
+
+ WordBoundary,
+
+ NotWordBoundary,
+}
+
+impl OneChar {
+
+ #[inline(always)]
+ pub fn matches(&self, c: Char) -> bool {
+ self.c == c || (self.casei && self.c == c.case_fold())
+ }
+}
+
+impl CharRanges {
+
+ pub fn any() -> CharRanges {
+ CharRanges {
+ ranges: vec![('\x00', '\u{10ffff}')],
+ casei: false,
+ }
+ }
+
+
+ pub fn any_nonl() -> CharRanges {
+ CharRanges {
+ ranges: vec![('\x00', '\x09'), ('\x0B', '\u{10ffff}')],
+ casei: false,
+ }
+ }
+
+
+ pub fn from_class(cls: syntax::CharClass) -> CharRanges {
+ let casei = cls.is_case_insensitive();
+ CharRanges {
+ ranges: cls.into_iter().map(|r| (r.start, r.end)).collect(),
+ casei: casei,
+ }
+ }
+
+
+ #[inline(always)]
+ pub fn matches(&self, mut c: Char) -> Option<usize> {
+ if self.casei {
+ c = c.case_fold();
+ }
+
+
+
+ for i in 0..cmp::min(self.ranges.len(), 4) {
+ let r = self.ranges[i];
+ if c < r.0 {
+ return None;
+ }
+ if c <= r.1 {
+ return Some(i);
+ }
+ }
+ self.ranges.binary_search_by(|r| {
+ if r.1 < c {
+ Ordering::Less
+ } else if r.0 > c {
+ Ordering::Greater
+ } else {
+ Ordering::Equal
+ }
+ }).ok()
+ }
+}
+
+impl LookInst {
+
+
+ pub fn matches(&self, c1: Char, c2: Char) -> bool {
+ use self::LookInst::*;
+ match *self {
+ StartLine => c1.is_none() || c1 == '\n',
+ EndLine => c2.is_none() || c2 == '\n',
+ StartText => c1.is_none(),
+ EndText => c2.is_none(),
+ ref wbty => {
+ let (w1, w2) = (c1.is_word_char(), c2.is_word_char());
+ (*wbty == WordBoundary && w1 ^ w2)
+ || (*wbty == NotWordBoundary && !(w1 ^ w2))
+ }
+ }
+ }
+}
+
+
+
+
+#[doc(hidden)]
+#[derive(Clone, Copy, Debug)]
+pub enum MatchEngine {
+
+
+ Backtrack,
+
+
+ Nfa,
+
+
+ Literals,
+}
+
+
+
+
+
+
+#[derive(Debug)]
+pub struct Program {
+
+ pub original: String,
+
+ pub insts: Vec<Inst>,
+
+
+ pub cap_names: Vec<Option<String>>,
+
+
+ pub prefixes: Prefix,
+
+ pub prefixes_complete: bool,
+
+ pub anchored_begin: bool,
+
+ pub anchored_end: bool,
+
+
+ pub engine: Option<MatchEngine>,
+
+ pub nfa_threads: Pool<NfaThreads>,
+
+ pub backtrack: Pool<BackMachine>,
+}
+
+impl Program {
+
+ pub fn new(
+ engine: Option<MatchEngine>,
+ size_limit: usize,
+ re: &str,
+ ) -> Result<Program, Error> {
+ let expr = try!(syntax::Expr::parse(re));
+ let (insts, cap_names) = try!(Compiler::new(size_limit).compile(expr));
+ let (insts_len, ncaps) = (insts.len(), num_captures(&insts));
+ let create_threads = move || NfaThreads::new(insts_len, ncaps);
+ let create_backtrack = move || BackMachine::new();
+ let mut prog = Program {
+ original: re.into(),
+ insts: insts,
+ cap_names: cap_names,
+ prefixes: Prefix::Empty,
+ prefixes_complete: false,
+ anchored_begin: false,
+ anchored_end: false,
+ engine: engine,
+ nfa_threads: Pool::new(Box::new(create_threads)),
+ backtrack: Pool::new(Box::new(create_backtrack)),
+ };
+
+ prog.find_prefixes();
+ prog.anchored_begin = match prog.insts[1] {
+ Inst::EmptyLook(LookInst::StartText) => true,
+ _ => false,
+ };
+ prog.anchored_end = match prog.insts[prog.insts.len() - 3] {
+ Inst::EmptyLook(LookInst::EndText) => true,
+ _ => false,
+ };
+ Ok(prog)
+ }
+
+
+ pub fn exec(
+ &self,
+ caps: &mut CaptureIdxs,
+ text: &str,
+ start: usize,
+ ) -> bool {
+ match self.choose_engine(caps.len(), text) {
+ MatchEngine::Backtrack => Backtrack::exec(self, caps, text, start),
+ MatchEngine::Nfa => Nfa::exec(self, caps, text, start),
+ MatchEngine::Literals => {
+ match self.prefixes.find(&text[start..]) {
+ None => false,
+ Some((s, e)) => {
+ if caps.len() == 2 {
+ caps[0] = Some(start + s);
+ caps[1] = Some(start + e);
+ }
+ true
+ }
+ }
+ }
+ }
+ }
+
+ fn choose_engine(&self, cap_len: usize, text: &str) -> MatchEngine {
+
+
+
+
+ self.engine.unwrap_or_else(|| {
+ if cap_len <= 2
+ && self.prefixes.preserves_priority()
+ && self.prefixes_complete {
+ MatchEngine::Literals
+ } else if Backtrack::should_exec(self, text) {
+
+ MatchEngine::Backtrack
+ } else {
+ MatchEngine::Nfa
+ }
+ })
+ }
+
+
+
+ pub fn num_captures(&self) -> usize {
+ num_captures(&self.insts)
+ }
+
+
+ pub fn alloc_captures(&self) -> Vec<Option<usize>> {
+ vec![None; 2 * self.num_captures()]
+ }
+
+
+ pub fn find_prefixes(&mut self) {
+ use self::Inst::*;
+
+ let (ps, complete) = self.prefixes_from_insts(1);
+ if ps.len() > 0 {
+ self.prefixes = Prefix::new(ps);
+ self.prefixes_complete = complete;
+ return;
+ }
+ let mut pc = 1;
+ let mut prefixes = vec![];
+ let mut pcomplete = true;
+ while let Split(x, y) = self.insts[pc] {
+ let (xps, xcomplete) = self.prefixes_from_insts(x);
+ let (yps, ycomplete) = self.prefixes_from_insts(y);
+ let mut done = false;
+ match (&self.insts[x], &self.insts[y]) {
+
+ (&Split(_, _), &Split(_, _)) => return,
+ (_, &Split(_, _)) if xps.len() == 0 => return,
+ (_, &Split(_, _)) => {
+ pcomplete = pcomplete && xcomplete;
+ prefixes.extend(xps);
+ pc = y;
+ }
+ (&Split(_, _), _) if yps.len() == 0 => return,
+ (&Split(_, _), _) => {
+ pcomplete = pcomplete && ycomplete;
+ prefixes.extend(yps);
+ pc = x;
+ }
+ _ if xps.len() == 0 || yps.len() == 0 => return,
+
+
+ _ => {
+ pcomplete = pcomplete && xcomplete && ycomplete;
+ prefixes.extend(xps);
+ prefixes.extend(yps);
+ done = true;
+ }
+ }
+
+
+ if prefixes.len() > NUM_PREFIX_LIMIT {
+ return;
+ }
+ if done { break; }
+ }
+ self.prefixes = Prefix::new(prefixes);
+ self.prefixes_complete = pcomplete && self.prefixes.len() > 0;
+ }
+
+
+
+
+
+
+ fn prefixes_from_insts(&self, mut pc: usize) -> (Vec<String>, bool) {
+ use self::Inst::*;
+
+ let mut complete = true;
+ let mut alts = vec![String::new()];
+ while pc < self.insts.len() {
+ let inst = &self.insts[pc];
+
+
+
+
+
+ if alts[0].len() > PREFIX_LENGTH_LIMIT {
+ complete = false;
+ break;
+ }
+ match *inst {
+ Save(_) => { pc += 1; continue }
+ Char(OneChar { c, casei: false }) => {
+ for alt in &mut alts {
+ alt.push(c);
+ }
+ pc += 1;
+ }
+ Ranges(CharRanges { ref ranges, casei: false }) => {
+ let nchars = num_chars_in_ranges(ranges);
+ if alts.len() * nchars > NUM_PREFIX_LIMIT {
+ complete = false;
+ break;
+ }
+
+ let orig = alts;
+ alts = Vec::with_capacity(orig.len());
+ for &(s, e) in ranges {
+ for c in (s as u32)..(e as u32 + 1){
+ for alt in &orig {
+ let mut alt = alt.clone();
+ alt.push(::std::char::from_u32(c).unwrap());
+ alts.push(alt);
+ }
+ }
+ }
+ pc += 1;
+ }
+ Jump(pc2) => pc = pc2,
+ _ => { complete = self.leads_to_match(pc); break }
+ }
+ }
+ if alts[0].len() == 0 {
+ (vec![], false)
+ } else {
+ (alts, complete)
+ }
+ }
+
+ fn leads_to_match(&self, mut pc: usize) -> bool {
+
+
+ loop {
+ match self.insts[pc] {
+ Inst::Match => return true,
+ Inst::Save(_) => pc += 1,
+ Inst::Jump(pc2) => pc = pc2,
+ _ => return false,
+ }
+ }
+ }
+}
+
+impl Clone for Program {
+ fn clone(&self) -> Program {
+ let (insts_len, ncaps) = (self.insts.len(), self.num_captures());
+ let create_threads = move || NfaThreads::new(insts_len, ncaps);
+ let create_backtrack = move || BackMachine::new();
+ Program {
+ original: self.original.clone(),
+ insts: self.insts.clone(),
+ cap_names: self.cap_names.clone(),
+ prefixes: self.prefixes.clone(),
+ prefixes_complete: self.prefixes_complete,
+ anchored_begin: self.anchored_begin,
+ anchored_end: self.anchored_end,
+ engine: self.engine,
+ nfa_threads: Pool::new(Box::new(create_threads)),
+ backtrack: Pool::new(Box::new(create_backtrack)),
+ }
+ }
+}
+
+
+fn num_captures(insts: &[Inst]) -> usize {
+ let mut n = 0;
+ for inst in insts {
+ match *inst {
+ Inst::Save(c) => n = cmp::max(n, c+1),
+ _ => {}
+ }
+ }
+
+ n / 2
+}
+
+
+
+
+
+fn num_chars_in_ranges(ranges: &[(char, char)]) -> usize {
+ ranges.iter()
+ .map(|&(s, e)| (e as u32) - (s as u32))
+ .fold(0, |acc, len| acc + len) as usize
+}
+
+
+