From 64106c4d3d4ddba8c7bc2af75376e6d3d3d75601 Mon Sep 17 00:00:00 2001
From:
Date: Mon, 29 Jun 2015 20:16:15 +0000
Subject: Update documentation
---
src/aho_corasick/autiter.rs.html | 747 ++++++++++++++++
src/aho_corasick/full.rs.html | 329 +++++++
src/aho_corasick/lib.rs.html | 1823 ++++++++++++++++++++++++++++++++++++++
3 files changed, 2899 insertions(+)
create mode 100644 src/aho_corasick/autiter.rs.html
create mode 100644 src/aho_corasick/full.rs.html
create mode 100644 src/aho_corasick/lib.rs.html
(limited to 'src/aho_corasick')
diff --git a/src/aho_corasick/autiter.rs.html b/src/aho_corasick/autiter.rs.html
new file mode 100644
index 0000000..af7f372
--- /dev/null
+++ b/src/aho_corasick/autiter.rs.html
@@ -0,0 +1,747 @@
+
+
+
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+
+use std::io::{self, BufRead, Read};
+
+use super::{ROOT_STATE, PatIdx, StateIdx};
+
+
+pub trait Automaton: Sized {
+
+ fn next_state(&self, si: StateIdx, b: u8) -> StateIdx;
+
+
+
+ fn has_match(&self, si: StateIdx, outi: PatIdx) -> bool;
+
+
+ fn get_match(&self, si: StateIdx, outi: PatIdx, texti: usize) -> Match;
+
+
+
+
+
+
+
+
+ fn skip_to(&self, si: StateIdx, text: &[u8], at: usize) -> usize;
+
+
+ fn is_skippable(&self) -> bool;
+
+
+
+
+ fn patterns(&self) -> &[String];
+
+
+
+
+
+ fn pattern(&self, i: usize) -> &str;
+
+
+ #[inline]
+ fn len(&self) -> usize {
+ self.patterns().len()
+ }
+
+
+ #[inline]
+ fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+
+
+ fn find<'a, 's>(
+ &'a self,
+ s: &'s str,
+ ) -> Matches<'a, 's, Self> {
+ Matches {
+ aut: self,
+ text: s.as_bytes(),
+ texti: 0,
+ si: ROOT_STATE,
+ }
+ }
+
+
+ fn find_overlapping<'a, 's>(
+ &'a self,
+ s: &'s str,
+ ) -> MatchesOverlapping<'a, 's, Self> {
+ MatchesOverlapping {
+ aut: self,
+ text: s.as_bytes(),
+ texti: 0,
+ si: ROOT_STATE,
+ outi: 0,
+ }
+ }
+
+
+ fn stream_find<'a, R: io::Read>(
+ &'a self,
+ rdr: R,
+ ) -> StreamMatches<'a, R, Self> {
+ StreamMatches {
+ aut: self,
+ buf: io::BufReader::new(rdr),
+ texti: 0,
+ si: ROOT_STATE,
+ }
+ }
+
+
+ fn stream_find_overlapping<'a, R: io::Read>(
+ &'a self,
+ rdr: R,
+ ) -> StreamMatchesOverlapping<'a, R, Self> {
+ StreamMatchesOverlapping {
+ aut: self,
+ buf: io::BufReader::new(rdr),
+ texti: 0,
+ si: ROOT_STATE,
+ outi: 0,
+ }
+ }
+}
+
+
+#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
+pub struct Match {
+
+
+
+
+ pub pati: usize,
+
+ pub start: usize,
+
+
+
+
+ pub end: usize,
+}
+
+
+
+
+
+
+
+#[derive(Debug)]
+pub struct Matches<'a, 's, A: 'a + Automaton> {
+ aut: &'a A,
+ text: &'s [u8],
+ texti: usize,
+ si: StateIdx,
+}
+
+impl<'a, 's, A: Automaton> Iterator for Matches<'a, 's, A> {
+ type Item = Match;
+
+ fn next(&mut self) -> Option<Match> {
+
+
+
+
+
+
+ if self.aut.is_skippable() {
+ self.texti = self.aut.skip_to(self.si, self.text, self.texti);
+ while self.texti < self.text.len() {
+ self.si = self.aut.next_state(self.si, self.text[self.texti]);
+ if self.aut.has_match(self.si, 0) {
+ let m = self.aut.get_match(self.si, 0, self.texti);
+ self.si = ROOT_STATE;
+ self.texti += 1;
+ return Some(m);
+ }
+ self.texti = self.aut.skip_to(
+ self.si, self.text, self.texti + 1);
+ }
+ } else {
+ while self.texti < self.text.len() {
+ self.si = self.aut.next_state(self.si, self.text[self.texti]);
+ if self.aut.has_match(self.si, 0) {
+ let m = self.aut.get_match(self.si, 0, self.texti);
+ self.si = ROOT_STATE;
+ self.texti += 1;
+ return Some(m);
+ }
+ self.texti += 1;
+ }
+ }
+ None
+ }
+}
+
+
+
+
+
+
+
+#[derive(Debug)]
+pub struct StreamMatches<'a, R, A: 'a + Automaton> {
+ aut: &'a A,
+ buf: io::BufReader<R>,
+ texti: usize,
+ si: StateIdx,
+}
+
+impl<'a, R: io::Read, A: Automaton> Iterator for StreamMatches<'a, R, A> {
+ type Item = io::Result<Match>;
+
+ fn next(&mut self) -> Option<io::Result<Match>> {
+ let mut m = None;
+ let mut consumed = 0;
+'LOOP: loop {
+ self.buf.consume(consumed);
+ let bs = match self.buf.fill_buf() {
+ Err(err) => return Some(Err(err)),
+ Ok(bs) if bs.len() == 0 => break,
+ Ok(bs) => bs,
+ };
+ consumed = bs.len();
+ for (i, &b) in bs.iter().enumerate() {
+ self.si = self.aut.next_state(self.si, b);
+ if self.aut.has_match(self.si, 0) {
+ m = Some(Ok(self.aut.get_match(self.si, 0, self.texti)));
+ consumed = i + 1;
+ self.texti += 1;
+ self.si = ROOT_STATE;
+ break 'LOOP;
+ }
+ self.texti += 1;
+ }
+ }
+ self.buf.consume(consumed);
+ m
+ }
+}
+
+
+
+
+
+
+
+#[derive(Debug)]
+pub struct MatchesOverlapping<'a, 's, A: 'a + Automaton> {
+ aut: &'a A,
+ text: &'s [u8],
+ texti: usize,
+ si: StateIdx,
+ outi: usize,
+}
+
+impl<'a, 's, A: Automaton> Iterator for MatchesOverlapping<'a, 's, A> {
+ type Item = Match;
+
+ fn next(&mut self) -> Option<Match> {
+ if self.aut.has_match(self.si, self.outi) {
+ let m = self.aut.get_match(self.si, self.outi, self.texti);
+ self.outi += 1;
+ if !self.aut.has_match(self.si, self.outi) {
+ self.texti += 1;
+ }
+ return Some(m);
+ }
+
+ self.outi = 0;
+ self.texti = self.aut.skip_to(self.si, self.text, self.texti);
+ while self.texti < self.text.len() {
+ let b = self.text[self.texti];
+ self.si = self.aut.next_state(self.si, b);
+ if self.aut.has_match(self.si, self.outi) {
+ return self.next();
+ }
+ self.texti = self.aut.skip_to(self.si, self.text, self.texti + 1);
+ }
+ None
+ }
+}
+
+
+
+
+
+
+
+#[derive(Debug)]
+pub struct StreamMatchesOverlapping<'a, R, A: 'a + Automaton> {
+ aut: &'a A,
+ buf: io::BufReader<R>,
+ texti: usize,
+ si: StateIdx,
+ outi: usize,
+}
+
+impl<
+ 'a,
+ R: io::Read,
+ A: Automaton,
+> Iterator for StreamMatchesOverlapping<'a, R, A> {
+ type Item = io::Result<Match>;
+
+ fn next(&mut self) -> Option<io::Result<Match>> {
+ if self.aut.has_match(self.si, self.outi) {
+ let m = self.aut.get_match(self.si, self.outi, self.texti);
+ self.outi += 1;
+ if !self.aut.has_match(self.si, self.outi) {
+ self.texti += 1;
+ }
+ return Some(Ok(m));
+ }
+ let mut m = None;
+ let mut consumed = 0;
+ self.outi = 0;
+'LOOP: loop {
+ self.buf.consume(consumed);
+ let bs = match self.buf.fill_buf() {
+ Err(err) => return Some(Err(err)),
+ Ok(bs) if bs.len() == 0 => break,
+ Ok(bs) => bs,
+ };
+ consumed = bs.len();
+ for (i, &b) in bs.iter().enumerate() {
+ self.si = self.aut.next_state(self.si, b);
+ if self.aut.has_match(self.si, self.outi) {
+ m = Some(Ok(self.aut.get_match(self.si, self.outi,
+ self.texti)));
+ consumed = i + 1;
+ self.outi += 1;
+ if !self.aut.has_match(self.si, self.outi) {
+ self.texti += 1;
+ }
+ break 'LOOP;
+ }
+ self.texti += 1;
+ }
+ }
+ self.buf.consume(consumed);
+ m
+ }
+}
+
+
+
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+
+use memchr::memchr;
+
+use super::{
+ FAIL_STATE, ROOT_STATE,
+ StateIdx, PatIdx,
+ AcAutomaton, Transitions, Match,
+};
+use super::autiter::Automaton;
+
+
+
+
+
+
+
+
+
+#[derive(Clone, Debug)]
+pub struct FullAcAutomaton {
+ pats: Vec<String>,
+
+ trans: Vec<StateIdx>,
+ out: Vec<Vec<PatIdx>>,
+ start_bytes: Vec<u8>,
+}
+
+impl FullAcAutomaton {
+
+
+ pub fn new<T: Transitions>(ac: AcAutomaton<T>) -> FullAcAutomaton {
+ let mut fac = FullAcAutomaton {
+ pats: vec![],
+ trans: vec![FAIL_STATE; 256 * ac.states.len()],
+ out: vec![vec![]; ac.states.len()],
+ start_bytes: vec![],
+ };
+ fac.build_matrix(&ac);
+ fac.pats = ac.pats;
+ fac.start_bytes = ac.start_bytes;
+ fac
+ }
+
+ fn set(&mut self, si: StateIdx, i: u8, goto: StateIdx) {
+ let ns = self.num_states();
+ self.trans[i as usize * ns + si as usize] = goto;
+ }
+
+ #[inline]
+ fn num_states(&self) -> usize {
+ self.out.len()
+ }
+}
+
+impl Automaton for FullAcAutomaton {
+ #[inline]
+ fn next_state(&self, si: StateIdx, i: u8) -> StateIdx {
+ self.trans[i as usize * self.num_states() + si as usize]
+ }
+
+ #[inline]
+ fn get_match(&self, si: StateIdx, outi: usize, texti: usize) -> Match {
+ let pati = self.out[si as usize][outi];
+ let patlen = self.pats[pati].len();
+ let start = texti + 1 - patlen;
+ Match {
+ pati: pati,
+ start: start,
+ end: start + patlen,
+ }
+ }
+
+ #[inline]
+ fn has_match(&self, si: StateIdx, outi: usize) -> bool {
+ outi < self.out[si as usize].len()
+ }
+
+ #[inline]
+ fn skip_to(&self, si: StateIdx, text: &[u8], at: usize) -> usize {
+ if si != ROOT_STATE || !self.is_skippable() {
+ return at;
+ }
+ let b = self.start_bytes[0];
+ match memchr(b, &text[at..]) {
+ None => text.len(),
+ Some(i) => at + i,
+ }
+ }
+
+ #[inline]
+ fn is_skippable(&self) -> bool {
+ self.start_bytes.len() == 1
+ }
+
+ #[inline]
+ fn patterns(&self) -> &[String] {
+ &self.pats
+ }
+
+ #[inline]
+ fn pattern(&self, i: usize) -> &str {
+ &self.pats[i]
+ }
+}
+
+impl FullAcAutomaton {
+ fn build_matrix<T: Transitions>(&mut self, ac: &AcAutomaton<T>) {
+ for (si, s) in ac.states.iter().enumerate().skip(1) {
+ for b in (0..256).map(|b| b as u8) {
+ self.set(si as StateIdx, b, ac.next_state(si as StateIdx, b));
+ }
+ for &pati in &s.out {
+ self.out[si].push(pati);
+ }
+ }
+ }
+}
+
+
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
+515
+516
+517
+518
+519
+520
+521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
+676
+677
+678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
+695
+696
+697
+698
+699
+700
+701
+702
+703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
+747
+748
+749
+750
+751
+752
+753
+754
+755
+756
+757
+758
+759
+760
+761
+762
+763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
+775
+776
+777
+778
+779
+780
+781
+782
+783
+784
+785
+786
+787
+788
+789
+790
+791
+792
+793
+794
+795
+796
+797
+798
+799
+800
+801
+802
+803
+804
+805
+806
+807
+808
+809
+810
+811
+812
+813
+814
+815
+816
+817
+818
+819
+820
+821
+822
+823
+824
+825
+826
+827
+828
+829
+830
+831
+832
+833
+834
+835
+836
+837
+838
+839
+840
+841
+842
+843
+844
+845
+846
+847
+848
+849
+850
+851
+852
+853
+854
+855
+856
+857
+858
+859
+860
+861
+862
+863
+
+
+
+#![deny(missing_docs)]
+
+extern crate memchr;
+#[cfg(test)] extern crate quickcheck;
+#[cfg(test)] extern crate rand;
+
+use std::collections::VecDeque;
+use std::fmt;
+use std::iter::FromIterator;
+
+use memchr::memchr;
+
+pub use self::autiter::{
+ Automaton, Match,
+ Matches, MatchesOverlapping, StreamMatches, StreamMatchesOverlapping,
+};
+pub use self::full::FullAcAutomaton;
+
+
+
+
+#[path = "autiter.rs"]
+mod autiter;
+#[path = "full.rs"]
+mod full;
+
+
+
+
+
+pub type StateIdx = u32;
+
+type PatIdx = usize;
+
+
+const FAIL_STATE: u32 = 0;
+const ROOT_STATE: u32 = 1;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+const DENSE_DEPTH_THRESHOLD: u32 = 3;
+
+
+#[derive(Clone)]
+pub struct AcAutomaton<T=Dense> {
+ pats: Vec<String>,
+ states: Vec<State<T>>,
+ start_bytes: Vec<u8>,
+}
+
+#[derive(Clone)]
+struct State<T> {
+ out: Vec<PatIdx>,
+ fail: StateIdx,
+ goto: T,
+ depth: u32,
+}
+
+impl AcAutomaton {
+
+
+
+
+ pub fn new<S, I>(pats: I) -> AcAutomaton<Dense>
+ where S: Into<String>, I: IntoIterator<Item=S> {
+ AcAutomaton::with_transitions(pats)
+ }
+}
+
+impl<T: Transitions> AcAutomaton<T> {
+
+
+
+
+
+
+ pub fn with_transitions<S, I>(pats: I) -> AcAutomaton<T>
+ where S: Into<String>, I: IntoIterator<Item=S> {
+ AcAutomaton {
+ pats: vec![],
+ states: vec![State::new(0), State::new(0)],
+ start_bytes: vec![],
+ }.build(pats.into_iter().map(Into::into).collect())
+ }
+
+
+
+
+
+ pub fn into_full(self) -> FullAcAutomaton {
+ FullAcAutomaton::new(self)
+ }
+}
+
+impl<T: Transitions> Automaton for AcAutomaton<T> {
+ #[inline]
+ fn next_state(&self, mut si: StateIdx, b: u8) -> StateIdx {
+ loop {
+ let maybe_si = self.states[si as usize].goto(b);
+ if maybe_si != FAIL_STATE {
+ si = maybe_si;
+ break;
+ } else {
+ si = self.states[si as usize].fail;
+ }
+ }
+ si
+ }
+
+ #[inline]
+ fn get_match(&self, si: StateIdx, outi: usize, texti: usize) -> Match {
+ let pati = self.states[si as usize].out[outi];
+ let patlen = self.pats[pati].len();
+ let start = texti + 1 - patlen;
+ Match {
+ pati: pati,
+ start: start,
+ end: start + patlen,
+ }
+ }
+
+ #[inline]
+ fn has_match(&self, si: StateIdx, outi: usize) -> bool {
+ outi < self.states[si as usize].out.len()
+ }
+
+ #[inline]
+ fn skip_to(&self, si: StateIdx, text: &[u8], at: usize) -> usize {
+ if si != ROOT_STATE || !self.is_skippable() {
+ return at;
+ }
+ let b = self.start_bytes[0];
+ match memchr(b, &text[at..]) {
+ None => text.len(),
+ Some(i) => at + i,
+ }
+ }
+
+ #[inline]
+ fn is_skippable(&self) -> bool {
+ self.start_bytes.len() == 1
+ }
+
+ #[inline]
+ fn patterns(&self) -> &[String] {
+ &self.pats
+ }
+
+ #[inline]
+ fn pattern(&self, i: usize) -> &str {
+ &self.pats[i]
+ }
+}
+
+
+
+
+
+impl<T: Transitions> AcAutomaton<T> {
+
+ fn build(mut self, pats: Vec<String>) -> AcAutomaton<T> {
+ for (pati, pat) in pats.iter().enumerate() {
+ if pat.is_empty() {
+ continue;
+ }
+ let mut previ = ROOT_STATE;
+ for &b in pat.as_bytes() {
+ if self.states[previ as usize].goto(b) != FAIL_STATE {
+ previ = self.states[previ as usize].goto(b);
+ } else {
+ let depth = self.states[previ as usize].depth + 1;
+ let nexti = self.add_state(State::new(depth));
+ self.states[previ as usize].set_goto(b, nexti);
+ previ = nexti;
+ }
+ }
+ self.states[previ as usize].out.push(pati);
+ }
+ for c in (0..256).into_iter().map(|c| c as u8) {
+ if self.states[ROOT_STATE as usize].goto(c) == FAIL_STATE {
+ self.states[ROOT_STATE as usize].set_goto(c, ROOT_STATE);
+ } else {
+ self.start_bytes.push(c);
+ }
+ }
+ self.pats = pats;
+ self.fill()
+ }
+
+
+ fn fill(mut self) -> AcAutomaton<T> {
+
+
+ let mut q = VecDeque::new();
+ for c in (0..256).into_iter().map(|c| c as u8) {
+ let si = self.states[ROOT_STATE as usize].goto(c);
+ if si != ROOT_STATE {
+ q.push_front(si);
+ }
+ }
+ while let Some(si) = q.pop_back() {
+ for c in (0..256).into_iter().map(|c| c as u8) {
+ let u = self.states[si as usize].goto(c);
+ if u != FAIL_STATE {
+ q.push_front(u);
+ let mut v = self.states[si as usize].fail;
+ while self.states[v as usize].goto(c) == FAIL_STATE {
+ v = self.states[v as usize].fail;
+ }
+ let ufail = self.states[v as usize].goto(c);
+ self.states[u as usize].fail = ufail;
+ let ufail_out = self.states[ufail as usize].out.clone();
+ self.states[u as usize].out.extend(ufail_out);
+ }
+ }
+ }
+ self
+ }
+
+ fn add_state(&mut self, state: State<T>) -> StateIdx {
+ let i = self.states.len();
+ self.states.push(state);
+ i as StateIdx
+ }
+}
+
+impl<T: Transitions> State<T> {
+ fn new(depth: u32) -> State<T> {
+ State {
+ out: vec![],
+ fail: 1,
+ goto: Transitions::new(depth),
+ depth: depth,
+ }
+ }
+
+ fn goto(&self, b: u8) -> StateIdx { self.goto.goto(b) }
+ fn set_goto(&mut self, b: u8, si: StateIdx) { self.goto.set_goto(b, si); }
+}
+
+
+
+
+
+
+
+
+pub trait Transitions {
+
+ fn new(depth: u32) -> Self;
+
+ fn goto(&self, alpha: u8) -> StateIdx;
+
+ fn set_goto(&mut self, alpha: u8, si: StateIdx);
+}
+
+
+
+
+#[derive(Clone, Debug)]
+pub struct Dense(DenseChoice);
+
+#[derive(Clone, Debug)]
+enum DenseChoice {
+ Sparse(Vec<StateIdx>),
+ Dense(Vec<(u8, StateIdx)>),
+}
+
+impl Transitions for Dense {
+ fn new(depth: u32) -> Dense {
+ if depth <= DENSE_DEPTH_THRESHOLD {
+ Dense(DenseChoice::Sparse(vec![0; 256]))
+ } else {
+ Dense(DenseChoice::Dense(vec![]))
+ }
+ }
+
+ fn goto(&self, b1: u8) -> StateIdx {
+ match self.0 {
+ DenseChoice::Sparse(ref m) => m[b1 as usize],
+ DenseChoice::Dense(ref m) => {
+ for &(b2, si) in m {
+ if b1 == b2 {
+ return si;
+ }
+ }
+ FAIL_STATE
+ }
+ }
+ }
+
+ fn set_goto(&mut self, b: u8, si: StateIdx) {
+ match self.0 {
+ DenseChoice::Sparse(ref mut m) => m[b as usize] = si,
+ DenseChoice::Dense(ref mut m) => m.push((b, si)),
+ }
+ }
+}
+
+
+
+
+
+#[derive(Clone, Debug)]
+pub struct Sparse(Vec<StateIdx>);
+
+impl Transitions for Sparse {
+ fn new(_: u32) -> Sparse {
+ Sparse(vec![0; 256])
+ }
+
+ #[inline]
+ fn goto(&self, b: u8) -> StateIdx {
+ self.0[b as usize]
+ }
+
+ fn set_goto(&mut self, b: u8, si: StateIdx) {
+ self.0[b as usize] = si;
+ }
+}
+
+impl<S: Into<String>> FromIterator<S> for AcAutomaton {
+
+ fn from_iter<T>(it: T) -> AcAutomaton where T: IntoIterator<Item=S> {
+ AcAutomaton::new(it)
+ }
+}
+
+
+
+
+impl<T: Transitions> fmt::Debug for AcAutomaton<T> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ use std::iter::repeat;
+
+ try!(writeln!(f, "{}", repeat('-').take(79).collect::<String>()));
+ try!(writeln!(f, "Patterns: {:?}", self.pats));
+ for (i, state) in self.states.iter().enumerate().skip(1) {
+ try!(writeln!(f, "{:3}: {}", i, state.debug(i == 1)));
+ }
+ write!(f, "{}", repeat('-').take(79).collect::<String>())
+ }
+}
+
+impl<T: Transitions> State<T> {
+ fn debug(&self, root: bool) -> String {
+ format!("State {{ depth: {:?}, out: {:?}, fail: {:?}, goto: {{{}}} }}",
+ self.depth, self.out, self.fail, self.goto_string(root))
+ }
+
+ fn goto_string(&self, root: bool) -> String {
+ use std::char::from_u32;
+
+ let mut goto = vec![];
+ for b in (0..256).map(|b| b as u8) {
+ let si = self.goto(b);
+ if (!root && si == FAIL_STATE) || (root && si == ROOT_STATE) {
+ continue;
+ }
+ goto.push(format!("{} => {}", from_u32(b as u32).unwrap(), si));
+ }
+ goto.connect(", ")
+ }
+}
+
+impl<T: Transitions> fmt::Debug for State<T> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}", self.debug(false))
+ }
+}
+
+impl<T: Transitions> AcAutomaton<T> {
+ #[doc(hidden)]
+ pub fn dot(&self) -> String {
+ use std::fmt::Write;
+ let mut out = String::new();
+ macro_rules! w {
+ ($w:expr, $($tt:tt)*) => { {write!($w, $($tt)*)}.unwrap() }
+ }
+
+ w!(out, r#"
+digraph automaton {{
+ label=<<FONT POINT-SIZE="20">{}</FONT>>;
+ labelloc="l";
+ labeljust="l";
+ rankdir="LR";
+"#, self.pats.connect(", "));
+ for (i, s) in self.states.iter().enumerate().skip(1) {
+ let i = i as u32;
+ if s.out.len() == 0 {
+ w!(out, " {};\n", i);
+ } else {
+ w!(out, " {} [peripheries=2];\n", i);
+ }
+ w!(out, " {} -> {} [style=dashed];\n", i, s.fail);
+ for b in (0..256).map(|b| b as u8) {
+ let si = s.goto(b);
+ if si == FAIL_STATE || (i == ROOT_STATE && si == ROOT_STATE) {
+ continue;
+ }
+ w!(out, " {} -> {} [label={}];\n", i, si, b as char);
+ }
+ }
+ w!(out, "}}");
+ out
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use std::collections::HashSet;
+ use std::io;
+
+ use quickcheck::{Arbitrary, Gen, quickcheck};
+ use rand::Rng;
+
+ use super::{Automaton, AcAutomaton, Match};
+
+ fn aut_find<S>(xs: &[S], haystack: &str) -> Vec<Match>
+ where S: Clone + Into<String> {
+ AcAutomaton::new(xs.to_vec()).find(&haystack).collect()
+ }
+
+ fn aut_finds<S>(xs: &[S], haystack: &str) -> Vec<Match>
+ where S: Clone + Into<String> {
+ let cur = io::Cursor::new(haystack.as_bytes());
+ AcAutomaton::new(xs.to_vec())
+ .stream_find(cur).map(|r| r.unwrap()).collect()
+ }
+
+ fn aut_findf<S>(xs: &[S], haystack: &str) -> Vec<Match>
+ where S: Clone + Into<String> {
+ AcAutomaton::new(xs.to_vec()).into_full().find(haystack).collect()
+ }
+
+ fn aut_findfs<S>(xs: &[S], haystack: &str) -> Vec<Match>
+ where S: Clone + Into<String> {
+ let cur = io::Cursor::new(haystack.as_bytes());
+ AcAutomaton::new(xs.to_vec())
+ .into_full()
+ .stream_find(cur).map(|r| r.unwrap()).collect()
+ }
+
+ fn aut_findo<S>(xs: &[S], haystack: &str) -> Vec<Match>
+ where S: Clone + Into<String> {
+ AcAutomaton::new(xs.to_vec()).find_overlapping(haystack).collect()
+ }
+
+ fn aut_findos<S>(xs: &[S], haystack: &str) -> Vec<Match>
+ where S: Clone + Into<String> {
+ let cur = io::Cursor::new(haystack.as_bytes());
+ AcAutomaton::new(xs.to_vec())
+ .stream_find_overlapping(cur).map(|r| r.unwrap()).collect()
+ }
+
+ fn aut_findfo<S>(xs: &[S], haystack: &str) -> Vec<Match>
+ where S: Clone + Into<String> {
+ AcAutomaton::new(xs.to_vec())
+ .into_full().find_overlapping(haystack).collect()
+ }
+
+ fn aut_findfos<S>(xs: &[S], haystack: &str) -> Vec<Match>
+ where S: Clone + Into<String> {
+ let cur = io::Cursor::new(haystack.as_bytes());
+ AcAutomaton::new(xs.to_vec())
+ .into_full()
+ .stream_find_overlapping(cur).map(|r| r.unwrap()).collect()
+ }
+
+ #[test]
+ fn one_pattern_one_match() {
+ let ns = vec!["a"];
+ let hay = "za";
+ let matches = vec![
+ Match { pati: 0, start: 1, end: 2 },
+ ];
+ assert_eq!(&aut_find(&ns, hay), &matches);
+ assert_eq!(&aut_finds(&ns, hay), &matches);
+ assert_eq!(&aut_findf(&ns, hay), &matches);
+ assert_eq!(&aut_findfs(&ns, hay), &matches);
+ }
+
+ #[test]
+ fn one_pattern_many_match() {
+ let ns = vec!["a"];
+ let hay = "zazazzzza";
+ let matches = vec![
+ Match { pati: 0, start: 1, end: 2 },
+ Match { pati: 0, start: 3, end: 4 },
+ Match { pati: 0, start: 8, end: 9 },
+ ];
+ assert_eq!(&aut_find(&ns, hay), &matches);
+ assert_eq!(&aut_finds(&ns, hay), &matches);
+ assert_eq!(&aut_findf(&ns, hay), &matches);
+ assert_eq!(&aut_findfs(&ns, hay), &matches);
+ }
+
+ #[test]
+ fn one_longer_pattern_one_match() {
+ let ns = vec!["abc"];
+ let hay = "zazabcz";
+ let matches = vec![ Match { pati: 0, start: 3, end: 6 } ];
+ assert_eq!(&aut_find(&ns, hay), &matches);
+ assert_eq!(&aut_finds(&ns, hay), &matches);
+ assert_eq!(&aut_findf(&ns, hay), &matches);
+ assert_eq!(&aut_findfs(&ns, hay), &matches);
+ }
+
+ #[test]
+ fn one_longer_pattern_many_match() {
+ let ns = vec!["abc"];
+ let hay = "zazabczzzzazzzabc";
+ let matches = vec![
+ Match { pati: 0, start: 3, end: 6 },
+ Match { pati: 0, start: 14, end: 17 },
+ ];
+ assert_eq!(&aut_find(&ns, hay), &matches);
+ assert_eq!(&aut_finds(&ns, hay), &matches);
+ assert_eq!(&aut_findf(&ns, hay), &matches);
+ assert_eq!(&aut_findfs(&ns, hay), &matches);
+ }
+
+ #[test]
+ fn many_pattern_one_match() {
+ let ns = vec!["a", "b"];
+ let hay = "zb";
+ let matches = vec![ Match { pati: 1, start: 1, end: 2 } ];
+ assert_eq!(&aut_find(&ns, hay), &matches);
+ assert_eq!(&aut_finds(&ns, hay), &matches);
+ assert_eq!(&aut_findf(&ns, hay), &matches);
+ assert_eq!(&aut_findfs(&ns, hay), &matches);
+ }
+
+ #[test]
+ fn many_pattern_many_match() {
+ let ns = vec!["a", "b"];
+ let hay = "zbzazzzzb";
+ let matches = vec![
+ Match { pati: 1, start: 1, end: 2 },
+ Match { pati: 0, start: 3, end: 4 },
+ Match { pati: 1, start: 8, end: 9 },
+ ];
+ assert_eq!(&aut_find(&ns, hay), &matches);
+ assert_eq!(&aut_finds(&ns, hay), &matches);
+ assert_eq!(&aut_findf(&ns, hay), &matches);
+ assert_eq!(&aut_findfs(&ns, hay), &matches);
+ }
+
+ #[test]
+ fn many_longer_pattern_one_match() {
+ let ns = vec!["abc", "xyz"];
+ let hay = "zazxyzz";
+ let matches = vec![ Match { pati: 1, start: 3, end: 6 } ];
+ assert_eq!(&aut_find(&ns, hay), &matches);
+ assert_eq!(&aut_finds(&ns, hay), &matches);
+ assert_eq!(&aut_findf(&ns, hay), &matches);
+ assert_eq!(&aut_findfs(&ns, hay), &matches);
+ }
+
+ #[test]
+ fn many_longer_pattern_many_match() {
+ let ns = vec!["abc", "xyz"];
+ let hay = "zazxyzzzzzazzzabcxyz";
+ let matches = vec![
+ Match { pati: 1, start: 3, end: 6 },
+ Match { pati: 0, start: 14, end: 17 },
+ Match { pati: 1, start: 17, end: 20 },
+ ];
+ assert_eq!(&aut_find(&ns, hay), &matches);
+ assert_eq!(&aut_finds(&ns, hay), &matches);
+ assert_eq!(&aut_findf(&ns, hay), &matches);
+ assert_eq!(&aut_findfs(&ns, hay), &matches);
+ }
+
+ #[test]
+ fn many_longer_pattern_overlap_one_match() {
+ let ns = vec!["abc", "bc"];
+ let hay = "zazabcz";
+ let matches = vec![
+ Match { pati: 0, start: 3, end: 6 },
+ Match { pati: 1, start: 4, end: 6 },
+ ];
+ assert_eq!(&aut_findo(&ns, hay), &matches);
+ assert_eq!(&aut_findos(&ns, hay), &matches);
+ assert_eq!(&aut_findfo(&ns, hay), &matches);
+ assert_eq!(&aut_findfos(&ns, hay), &matches);
+ }
+
+ #[test]
+ fn many_longer_pattern_overlap_one_match_reverse() {
+ let ns = vec!["abc", "bc"];
+ let hay = "xbc";
+ let matches = vec![ Match { pati: 1, start: 1, end: 3 } ];
+ assert_eq!(&aut_findo(&ns, hay), &matches);
+ assert_eq!(&aut_findos(&ns, hay), &matches);
+ assert_eq!(&aut_findfo(&ns, hay), &matches);
+ assert_eq!(&aut_findfos(&ns, hay), &matches);
+ }
+
+ #[test]
+ fn many_longer_pattern_overlap_many_match() {
+ let ns = vec!["abc", "bc", "c"];
+ let hay = "zzzabczzzbczzzc";
+ let matches = vec![
+ Match { pati: 0, start: 3, end: 6 },
+ Match { pati: 1, start: 4, end: 6 },
+ Match { pati: 2, start: 5, end: 6 },
+ Match { pati: 1, start: 9, end: 11 },
+ Match { pati: 2, start: 10, end: 11 },
+ Match { pati: 2, start: 14, end: 15 },
+ ];
+ assert_eq!(&aut_findo(&ns, hay), &matches);
+ assert_eq!(&aut_findos(&ns, hay), &matches);
+ assert_eq!(&aut_findfo(&ns, hay), &matches);
+ assert_eq!(&aut_findfos(&ns, hay), &matches);
+ }
+
+ #[test]
+ fn many_longer_pattern_overlap_many_match_reverse() {
+ let ns = vec!["abc", "bc", "c"];
+ let hay = "zzzczzzbczzzabc";
+ let matches = vec![
+ Match { pati: 2, start: 3, end: 4 },
+ Match { pati: 1, start: 7, end: 9 },
+ Match { pati: 2, start: 8, end: 9 },
+ Match { pati: 0, start: 12, end: 15 },
+ Match { pati: 1, start: 13, end: 15 },
+ Match { pati: 2, start: 14, end: 15 },
+ ];
+ assert_eq!(&aut_findo(&ns, hay), &matches);
+ assert_eq!(&aut_findos(&ns, hay), &matches);
+ assert_eq!(&aut_findfo(&ns, hay), &matches);
+ assert_eq!(&aut_findfos(&ns, hay), &matches);
+ }
+
+
+
+
+
+ #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
+ pub struct SmallAscii(String);
+
+ impl Arbitrary for SmallAscii {
+ fn arbitrary<G: Gen>(g: &mut G) -> SmallAscii {
+ use std::char::from_u32;
+ SmallAscii((0..2)
+ .map(|_| from_u32(g.gen_range(97, 123)).unwrap())
+ .collect())
+ }
+
+ fn shrink(&self) -> Box<Iterator<Item=SmallAscii>> {
+ Box::new(self.0.shrink().map(SmallAscii))
+ }
+ }
+
+ impl From<SmallAscii> for String {
+ fn from(s: SmallAscii) -> String { s.0 }
+ }
+
+
+
+ #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
+ pub struct BiasAscii(String);
+
+ impl Arbitrary for BiasAscii {
+ fn arbitrary<G: Gen>(g: &mut G) -> BiasAscii {
+ use std::char::from_u32;
+ let size = { let s = g.size(); g.gen_range(0, s) };
+ let mut s = String::with_capacity(size);
+ for _ in 0..size {
+ if g.gen_weighted_bool(3) {
+ s.push(char::arbitrary(g));
+ } else {
+ for _ in 0..5 {
+ s.push(from_u32(g.gen_range(97, 123)).unwrap());
+ }
+ }
+ }
+ BiasAscii(s)
+ }
+
+ fn shrink(&self) -> Box<Iterator<Item=BiasAscii>> {
+ Box::new(self.0.shrink().map(BiasAscii))
+ }
+ }
+
+ fn naive_find<S>(xs: &[S], haystack: &str) -> Vec<Match>
+ where S: Clone + Into<String> {
+ let needles: Vec<String> =
+ xs.to_vec().into_iter().map(Into::into).collect();
+ let mut matches = vec![];
+ for hi in 0..haystack.len() {
+ for (pati, needle) in needles.iter().enumerate() {
+ let needle = needle.as_bytes();
+ if needle.len() == 0 || needle.len() > haystack.len() - hi {
+ continue;
+ }
+ if needle == &haystack.as_bytes()[hi..hi+needle.len()] {
+ matches.push(Match {
+ pati: pati,
+ start: hi,
+ end: hi + needle.len(),
+ });
+ }
+ }
+ }
+ matches
+ }
+
+ #[test]
+ fn qc_ac_equals_naive() {
+ fn prop(needles: Vec<SmallAscii>, haystack: BiasAscii) -> bool {
+ let aut_matches = aut_findo(&needles, &haystack.0);
+ let naive_matches = naive_find(&needles, &haystack.0);
+
+
+ let aset: HashSet<Match> = aut_matches.iter().cloned().collect();
+ let nset: HashSet<Match> = naive_matches.iter().cloned().collect();
+ aset == nset
+ }
+ quickcheck(prop as fn(Vec<SmallAscii>, BiasAscii) -> bool);
+ }
+}
+
+
+