From 9f5dd9dad6b13476bab2c6eb3c6528f8ad49311a Mon Sep 17 00:00:00 2001 From: Till Höppner Date: Thu, 25 Feb 2016 06:48:03 +0100 Subject: Refactor... everything. --- ops/Cargo.toml | 9 ++++ ops/src/ageset.rs | 47 ++++++++++++++++++++ ops/src/freq.rs | 87 +++++++++++++++++++++++++++++++++++++ ops/src/lib.rs | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 270 insertions(+) create mode 100644 ops/Cargo.toml create mode 100644 ops/src/ageset.rs create mode 100644 ops/src/freq.rs create mode 100644 ops/src/lib.rs (limited to 'ops') diff --git a/ops/Cargo.toml b/ops/Cargo.toml new file mode 100644 index 0000000..d74126c --- /dev/null +++ b/ops/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "ilc-ops" +version = "0.1.0" +authors = ["Till Höppner "] + +[dependencies] +# ilc-base = "*" +ilc-base = { path = "../base" } +blist = "0.0.4" diff --git a/ops/src/ageset.rs b/ops/src/ageset.rs new file mode 100644 index 0000000..c97240f --- /dev/null +++ b/ops/src/ageset.rs @@ -0,0 +1,47 @@ +use std::collections::HashSet; +use std::hash::Hash; + +use blist::BList; + +/// So... this is a rather weird thing. +/// It allows to semi-efficiently check the oldest (earliest insertion) +/// elements for certain criteria and remove them in the order of insertion +/// if the criteria is met. +pub struct AgeSet { + fifo: BList, + set: HashSet, +} + +impl AgeSet + where T: Eq + Hash + Clone +{ + pub fn new() -> Self { + AgeSet { + fifo: BList::new(), + set: HashSet::new(), + } + } + + pub fn contains(&self, t: &T) -> bool { + self.set.contains(t) + } + + pub fn prune(&mut self, kill: F) + where F: Fn(&T) -> bool + { + while let Some(ref e) = self.fifo.front().map(T::clone) { + if kill(&e) { + let removed = self.fifo.pop_front().unwrap(); + self.set.remove(&e); + assert!(*e == removed); + } else { + break; + } + } + } + + pub fn push(&mut self, t: T) { + self.fifo.push_back(t.clone()); + self.set.insert(t); + } +} diff --git a/ops/src/freq.rs b/ops/src/freq.rs new file mode 100644 index 0000000..c5b363a --- /dev/null +++ b/ops/src/freq.rs @@ -0,0 +1,87 @@ +use ilc_base::{self, Context, Decode, Event}; +use ilc_base::event::Type; + +use std::collections::HashMap; +use std::io::{BufRead, Write}; + +struct Person { + lines: u32, + alpha_lines: u32, + words: u32, +} + +fn words_alpha(s: &str) -> (u32, bool) { + let mut alpha = false; + let mut words = 0; + for w in s.split_whitespace() { + if !w.is_empty() { + words += 1; + if w.chars().any(char::is_alphabetic) { + alpha = true + } + } + } + (words, alpha) +} + +fn strip_nick_prefix(s: &str) -> &str { + if s.is_empty() { + return s; + } + match s.as_bytes()[0] { + b'~' | b'&' | b'@' | b'%' | b'+' => &s[1..], + _ => s, + } +} + +// TODO: Don't print results, return Stats struct +pub fn freq(count: usize, + ctx: &Context, + input: &mut BufRead, + decoder: &mut Decode, + output: &mut Write) + -> ilc_base::Result<()> { + let mut stats: HashMap = HashMap::new(); + + for e in decoder.decode(&ctx, input) { + let m = try!(e); + match m { + Event { ty: Type::Msg { ref from, ref content, .. }, .. } => { + let nick = strip_nick_prefix(from); + if stats.contains_key(nick) { + let p: &mut Person = stats.get_mut(nick).unwrap(); + let (words, alpha) = words_alpha(content); + p.lines += 1; + if alpha { + p.alpha_lines += 1 + } + p.words += words; + } else { + let (words, alpha) = words_alpha(content); + stats.insert(nick.to_owned(), + Person { + lines: 1, + alpha_lines: if alpha { 1 } else { 0 }, + words: words, + }); + } + } + _ => (), + } + } + + let mut stats: Vec<(String, Person)> = stats.into_iter().collect(); + stats.sort_by(|&(_, ref a), &(_, ref b)| b.words.cmp(&a.words)); + + for &(ref name, ref stat) in stats.iter().take(count) { + try!(write!(output, + "{}:\n\tTotal lines: {}\n\tLines without alphabetic characters: {}\n\tTotal \ + words: {}\n\tWords per line: {}\n", + name, + stat.lines, + stat.lines - stat.alpha_lines, + stat.words, + stat.words as f32 / stat.lines as f32)); + } + Ok(()) +} diff --git a/ops/src/lib.rs b/ops/src/lib.rs new file mode 100644 index 0000000..8cd5607 --- /dev/null +++ b/ops/src/lib.rs @@ -0,0 +1,127 @@ +extern crate blist; +extern crate ilc_base; + +mod ageset; +pub mod freq; + +pub mod parse { + use ilc_base::{self, Context, Decode}; + use std::io::BufRead; + pub fn parse(ctx: &Context, input: &mut BufRead, decoder: &mut Decode) -> ilc_base::Result<()> { + for e in decoder.decode(&ctx, input) { + try!(e); + } + Ok(()) + } +} + +pub mod convert { + use ilc_base::{self, Context, Decode, Encode}; + use std::io::{BufRead, Write}; + + pub fn convert(ctx: &Context, + input: &mut BufRead, + decoder: &mut Decode, + output: &mut Write, + encoder: &Encode) + -> ilc_base::Result<()> { + for e in decoder.decode(&ctx, input) { + try!(encoder.encode(&ctx, output, &try!(e))); + } + Ok(()) + } +} + +pub mod seen { + use ilc_base::{self, Context, Decode, Encode, Event}; + use std::io::{BufRead, Write}; + + pub fn seen(nick: &str, + ctx: &Context, + input: &mut BufRead, + decoder: &mut Decode, + output: &mut Write, + encoder: &Encode) + -> ilc_base::Result<()> { + let mut last: Option = None; + for e in decoder.decode(&ctx, input) { + let m: Event = try!(e); + if m.ty.involves(nick) && + last.as_ref().map_or(true, + |last| m.time.as_timestamp() > last.time.as_timestamp()) { + last = Some(m) + } + } + if let Some(ref m) = last { + try!(encoder.encode(&ctx, output, m)); + } + Ok(()) + } +} + +pub mod sort { + use ilc_base::{self, Context, Decode, Encode, Event}; + use std::io::{BufRead, Write}; + + pub fn sort(ctx: &Context, + input: &mut BufRead, + decoder: &mut Decode, + output: &mut Write, + encoder: &Encode) + -> ilc_base::Result<()> { + let mut events: Vec = decoder.decode(&ctx, input) + .flat_map(Result::ok) + .collect(); + + events.sort_by(|a, b| a.time.cmp(&b.time)); + for e in events { + try!(encoder.encode(&ctx, output, &e)); + } + Ok(()) + } +} + +pub mod dedup { + use std::io::{BufRead, Write}; + use std::hash::{Hash, Hasher}; + use ageset::AgeSet; + use ilc_base::{self, Context, Decode, Encode, Event}; + + #[derive(Clone, Debug, PartialEq, Eq)] + pub struct NoTimeHash<'a>(pub Event<'a>); + + impl<'a> Hash for NoTimeHash<'a> { + fn hash(&self, state: &mut H) + where H: Hasher + { + self.0.ty.hash(state); + self.0.channel.hash(state); + } + } + + pub fn dedup(ctx: &Context, + input: &mut BufRead, + decoder: &mut Decode, + output: &mut Write, + encoder: &Encode) + -> ilc_base::Result<()> { + let mut backlog = AgeSet::new(); + + for e in decoder.decode(&ctx, input) { + if let Ok(e) = e { + let newest_event = e.clone(); + backlog.prune(move |a: &NoTimeHash| { + let age = newest_event.time.as_timestamp() - a.0.time.as_timestamp(); + age > 5000 + }); + // write `e` if it's a new event + let n = NoTimeHash(e); + if !backlog.contains(&n) { + try!(encoder.encode(&ctx, output, &n.0)); + backlog.push(n); + } + } + } + Ok(()) + } +} -- cgit v1.2.3