From 64106c4d3d4ddba8c7bc2af75376e6d3d3d75601 Mon Sep 17 00:00:00 2001 From: Date: Mon, 29 Jun 2015 20:16:15 +0000 Subject: Update documentation --- src/aho_corasick/full.rs.html | 329 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 329 insertions(+) create mode 100644 src/aho_corasick/full.rs.html (limited to 'src/aho_corasick/full.rs.html') diff --git a/src/aho_corasick/full.rs.html b/src/aho_corasick/full.rs.html new file mode 100644 index 0000000..f9f4f65 --- /dev/null +++ b/src/aho_corasick/full.rs.html @@ -0,0 +1,329 @@ + + + + + + + + + + full.rs.html -- source + + + + + + + + + + + + + + + +
  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+
+use memchr::memchr;
+
+use super::{
+    FAIL_STATE, ROOT_STATE,
+    StateIdx, PatIdx,
+    AcAutomaton, Transitions, Match,
+};
+use super::autiter::Automaton;
+
+/// A complete Aho-Corasick automaton.
+///
+/// This uses a single transition matrix that permits each input character
+/// to move to the next state with a single lookup in the matrix.
+///
+/// This is as fast as it gets, but it is guaranteed to use a lot of memory.
+/// Namely, it will use at least `4 * 256 * #states`, where the number of
+/// states is capped at length of all patterns concatenated.
+#[derive(Clone, Debug)]
+pub struct FullAcAutomaton {
+    pats: Vec<String>,
+    // i * #states + si
+    trans: Vec<StateIdx>,  // row-major, where states are rows
+    out: Vec<Vec<PatIdx>>, // indexed by StateIdx
+    start_bytes: Vec<u8>,
+}
+
+impl FullAcAutomaton {
+    /// Build a new expanded Aho-Corasick automaton from an existing
+    /// Aho-Corasick automaton.
+    pub fn new<T: Transitions>(ac: AcAutomaton<T>) -> FullAcAutomaton {
+        let mut fac = FullAcAutomaton {
+            pats: vec![],
+            trans: vec![FAIL_STATE; 256 * ac.states.len()],
+            out: vec![vec![]; ac.states.len()],
+            start_bytes: vec![],
+        };
+        fac.build_matrix(&ac);
+        fac.pats = ac.pats;
+        fac.start_bytes = ac.start_bytes;
+        fac
+    }
+
+    fn set(&mut self, si: StateIdx, i: u8, goto: StateIdx) {
+        let ns = self.num_states();
+        self.trans[i as usize * ns + si as usize] = goto;
+    }
+
+    #[inline]
+    fn num_states(&self) -> usize {
+        self.out.len()
+    }
+}
+
+impl Automaton for FullAcAutomaton {
+    #[inline]
+    fn next_state(&self, si: StateIdx, i: u8) -> StateIdx {
+        self.trans[i as usize * self.num_states() + si as usize]
+    }
+
+    #[inline]
+    fn get_match(&self, si: StateIdx, outi: usize, texti: usize) -> Match {
+        let pati = self.out[si as usize][outi];
+        let patlen = self.pats[pati].len();
+        let start = texti + 1 - patlen;
+        Match {
+            pati: pati,
+            start: start,
+            end: start + patlen,
+        }
+    }
+
+    #[inline]
+    fn has_match(&self, si: StateIdx, outi: usize) -> bool {
+        outi < self.out[si as usize].len()
+    }
+
+    #[inline]
+    fn skip_to(&self, si: StateIdx, text: &[u8], at: usize) -> usize {
+        if si != ROOT_STATE || !self.is_skippable() {
+            return at;
+        }
+        let b = self.start_bytes[0];
+        match memchr(b, &text[at..]) {
+            None => text.len(),
+            Some(i) => at + i,
+        }
+    }
+
+    #[inline]
+    fn is_skippable(&self) -> bool {
+        self.start_bytes.len() == 1
+    }
+
+    #[inline]
+    fn patterns(&self) -> &[String] {
+        &self.pats
+    }
+
+    #[inline]
+    fn pattern(&self, i: usize) -> &str {
+        &self.pats[i]
+    }
+}
+
+impl FullAcAutomaton {
+    fn build_matrix<T: Transitions>(&mut self, ac: &AcAutomaton<T>) {
+        for (si, s) in ac.states.iter().enumerate().skip(1) {
+            for b in (0..256).map(|b| b as u8) {
+                self.set(si as StateIdx, b, ac.next_state(si as StateIdx, b));
+            }
+            for &pati in &s.out {
+                self.out[si].push(pati);
+            }
+        }
+    }
+}
+
+
+ + + + + + + + + + + + + + + \ No newline at end of file -- cgit v1.2.3