diff options
Diffstat (limited to 'ops')
-rw-r--r-- | ops/Cargo.toml | 1 | ||||
-rw-r--r-- | ops/src/freq.rs | 92 | ||||
-rw-r--r-- | ops/src/lib.rs | 7 | ||||
-rw-r--r-- | ops/src/stats.rs | 123 |
4 files changed, 129 insertions, 94 deletions
diff --git a/ops/Cargo.toml b/ops/Cargo.toml index 9a31871..f4bf082 100644 --- a/ops/Cargo.toml +++ b/ops/Cargo.toml @@ -12,3 +12,4 @@ log = "0.3.5" ilc-base = "~0.2" blist = "0.0.4" bit-set = "0.3.0" +serde = "~0.7" diff --git a/ops/src/freq.rs b/ops/src/freq.rs deleted file mode 100644 index 4a02d4c..0000000 --- a/ops/src/freq.rs +++ /dev/null @@ -1,92 +0,0 @@ -//! Per-nick word/line statistics - -use ilc_base::{self, Context, Decode, Event}; -use ilc_base::event::Type; - -use std::collections::HashMap; -use std::io::{BufRead, Write}; - -struct Person { - lines: u32, - alpha_lines: u32, - words: u32, -} - -fn words_alpha(s: &str) -> (u32, bool) { - let mut alpha = false; - let mut words = 0; - for w in s.split_whitespace() { - if !w.is_empty() { - words += 1; - if w.chars().any(char::is_alphabetic) { - alpha = true - } - } - } - (words, alpha) -} - -fn strip_nick_prefix(s: &str) -> &str { - if s.is_empty() { - return s; - } - match s.as_bytes()[0] { - b'~' | b'&' | b'@' | b'%' | b'+' => &s[1..], - _ => s, - } -} - -// TODO: Don't print results, return Stats struct -/// Return the `count` most active nicks, with lines, words and words per lines calculated. -/// -/// `usize::MAX` is a good default if you don't want to cap the statistics. -pub fn freq(count: usize, - ctx: &Context, - input: &mut BufRead, - decoder: &mut Decode, - output: &mut Write) - -> ilc_base::Result<()> { - let mut stats: HashMap<String, Person> = HashMap::new(); - - for e in decoder.decode(&ctx, input) { - let m = try!(e); - match m { - Event { ty: Type::Msg { ref from, ref content, .. }, .. } => { - let nick = strip_nick_prefix(from); - if stats.contains_key(nick) { - let p: &mut Person = stats.get_mut(nick).unwrap(); - let (words, alpha) = words_alpha(content); - p.lines += 1; - if alpha { - p.alpha_lines += 1 - } - p.words += words; - } else { - let (words, alpha) = words_alpha(content); - stats.insert(nick.to_owned(), - Person { - lines: 1, - alpha_lines: if alpha { 1 } else { 0 }, - words: words, - }); - } - } - _ => (), - } - } - - let mut stats: Vec<(String, Person)> = stats.into_iter().collect(); - stats.sort_by(|&(_, ref a), &(_, ref b)| b.words.cmp(&a.words)); - - for &(ref name, ref stat) in stats.iter().take(count) { - try!(write!(output, - "{}:\n\tTotal lines: {}\n\tLines without alphabetic characters: {}\n\tTotal \ - words: {}\n\tWords per line: {}\n", - name, - stat.lines, - stat.lines - stat.alpha_lines, - stat.words, - stat.words as f32 / stat.lines as f32)); - } - Ok(()) -} diff --git a/ops/src/lib.rs b/ops/src/lib.rs index f148037..d5aa003 100644 --- a/ops/src/lib.rs +++ b/ops/src/lib.rs @@ -2,10 +2,11 @@ extern crate log; extern crate blist; extern crate bit_set; +extern crate serde; extern crate ilc_base; mod ageset; -pub mod freq; +pub mod stats; /// No-op log parsing pub mod parse { @@ -219,7 +220,9 @@ pub mod merge { // Keep non-empty streams for (offset, idx) in empty.iter().enumerate() { - events.remove(offset + idx); + // `remove` returns an iterator. It's empty, but Rust doesn't know that, + // so suppress the warning like this. + let _ = events.remove(offset + idx); } empty.clear(); } diff --git a/ops/src/stats.rs b/ops/src/stats.rs new file mode 100644 index 0000000..49f4068 --- /dev/null +++ b/ops/src/stats.rs @@ -0,0 +1,123 @@ +//! Per-nick word/line statistics + +use ilc_base::{self, Context, Decode, Event}; +use ilc_base::event::Type; + +use std::collections::HashMap; +use std::io::BufRead; + +use serde::ser::{MapVisitor, Serialize, Serializer}; + +pub struct Stats { + pub freqs: HashMap<String, NickStat>, +} + +impl Serialize for Stats { + fn serialize<S>(&self, s: &mut S) -> Result<(), S::Error> + where S: Serializer + { + struct Visitor<'a>(&'a Stats); + impl<'a> MapVisitor for Visitor<'a> { + fn visit<S>(&mut self, s: &mut S) -> Result<Option<()>, S::Error> + where S: Serializer + { + try!(s.serialize_struct_elt("freqs", &self.0.freqs)); + Ok(None) + } + + fn len(&self) -> Option<usize> { + Some(1) + } + } + s.serialize_struct("Stats", Visitor(self)) + } +} + +pub struct NickStat { + pub lines: u32, + pub alpha_lines: u32, + pub words: u32, +} + +impl Serialize for NickStat { + fn serialize<S>(&self, s: &mut S) -> Result<(), S::Error> + where S: Serializer + { + struct Visitor<'a>(&'a NickStat); + impl<'a> MapVisitor for Visitor<'a> { + fn visit<S>(&mut self, s: &mut S) -> Result<Option<()>, S::Error> + where S: Serializer + { + try!(s.serialize_struct_elt("lines", self.0.lines)); + try!(s.serialize_struct_elt("alpha_lines", self.0.alpha_lines)); + try!(s.serialize_struct_elt("words", self.0.words)); + Ok(None) + } + + fn len(&self) -> Option<usize> { + Some(3) + } + } + + s.serialize_struct("NickStat", Visitor(self)) + } +} + +fn words_alpha(s: &str) -> (u32, bool) { + let mut alpha = false; + let mut words = 0; + for w in s.split_whitespace() { + if !w.is_empty() { + words += 1; + if w.chars().any(char::is_alphabetic) { + alpha = true + } + } + } + (words, alpha) +} + +fn strip_nick(s: &str) -> &str { + if s.is_empty() { + return s; + } + match s.as_bytes()[0] { + b'~' | b'&' | b'@' | b'%' | b'+' => &s[1..], + _ => s, + } + .trim_right_matches('_') +} + +/// Return all active nicks, with lines, words and words per lines counted. +pub fn stats(ctx: &Context, input: &mut BufRead, decoder: &mut Decode) -> ilc_base::Result<Stats> { + let mut freqs: HashMap<String, NickStat> = HashMap::new(); + + for e in decoder.decode(&ctx, input) { + let m = try!(e); + match m { + Event { ty: Type::Msg { ref from, ref content, .. }, .. } => { + let nick = strip_nick(from); + if freqs.contains_key(nick) { + let p: &mut NickStat = freqs.get_mut(nick).unwrap(); + let (words, alpha) = words_alpha(content); + p.lines += 1; + if alpha { + p.alpha_lines += 1 + } + p.words += words; + } else { + let (words, alpha) = words_alpha(content); + freqs.insert(nick.to_owned(), + NickStat { + lines: 1, + alpha_lines: if alpha { 1 } else { 0 }, + words: words, + }); + } + } + _ => (), + } + } + + Ok(Stats { freqs: freqs }) +} |