aboutsummaryrefslogtreecommitdiff
path: root/ops
diff options
context:
space:
mode:
authorTill Höppner2016-02-25 06:48:03 +0100
committerTill Höppner2016-02-25 06:48:03 +0100
commit9f5dd9dad6b13476bab2c6eb3c6528f8ad49311a (patch)
tree1ca71876029cb466aa6230f1aead05b32f19bf6d /ops
parent685aac1cc537692b2cf9342dcb6c26fa74c3c920 (diff)
downloadilc-9f5dd9dad6b13476bab2c6eb3c6528f8ad49311a.tar.gz
ilc-9f5dd9dad6b13476bab2c6eb3c6528f8ad49311a.tar.xz
ilc-9f5dd9dad6b13476bab2c6eb3c6528f8ad49311a.zip
Refactor... everything.
Diffstat (limited to 'ops')
-rw-r--r--ops/Cargo.toml9
-rw-r--r--ops/src/ageset.rs47
-rw-r--r--ops/src/freq.rs87
-rw-r--r--ops/src/lib.rs127
4 files changed, 270 insertions, 0 deletions
diff --git a/ops/Cargo.toml b/ops/Cargo.toml
new file mode 100644
index 0000000..d74126c
--- /dev/null
+++ b/ops/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "ilc-ops"
+version = "0.1.0"
+authors = ["Till Höppner <till@hoeppner.ws>"]
+
+[dependencies]
+# ilc-base = "*"
+ilc-base = { path = "../base" }
+blist = "0.0.4"
diff --git a/ops/src/ageset.rs b/ops/src/ageset.rs
new file mode 100644
index 0000000..c97240f
--- /dev/null
+++ b/ops/src/ageset.rs
@@ -0,0 +1,47 @@
+use std::collections::HashSet;
+use std::hash::Hash;
+
+use blist::BList;
+
+/// So... this is a rather weird thing.
+/// It allows to semi-efficiently check the oldest (earliest insertion)
+/// elements for certain criteria and remove them in the order of insertion
+/// if the criteria is met.
+pub struct AgeSet<T> {
+ fifo: BList<T>,
+ set: HashSet<T>,
+}
+
+impl<T> AgeSet<T>
+ where T: Eq + Hash + Clone
+{
+ pub fn new() -> Self {
+ AgeSet {
+ fifo: BList::new(),
+ set: HashSet::new(),
+ }
+ }
+
+ pub fn contains(&self, t: &T) -> bool {
+ self.set.contains(t)
+ }
+
+ pub fn prune<F>(&mut self, kill: F)
+ where F: Fn(&T) -> bool
+ {
+ while let Some(ref e) = self.fifo.front().map(T::clone) {
+ if kill(&e) {
+ let removed = self.fifo.pop_front().unwrap();
+ self.set.remove(&e);
+ assert!(*e == removed);
+ } else {
+ break;
+ }
+ }
+ }
+
+ pub fn push(&mut self, t: T) {
+ self.fifo.push_back(t.clone());
+ self.set.insert(t);
+ }
+}
diff --git a/ops/src/freq.rs b/ops/src/freq.rs
new file mode 100644
index 0000000..c5b363a
--- /dev/null
+++ b/ops/src/freq.rs
@@ -0,0 +1,87 @@
+use ilc_base::{self, Context, Decode, Event};
+use ilc_base::event::Type;
+
+use std::collections::HashMap;
+use std::io::{BufRead, Write};
+
+struct Person {
+ lines: u32,
+ alpha_lines: u32,
+ words: u32,
+}
+
+fn words_alpha(s: &str) -> (u32, bool) {
+ let mut alpha = false;
+ let mut words = 0;
+ for w in s.split_whitespace() {
+ if !w.is_empty() {
+ words += 1;
+ if w.chars().any(char::is_alphabetic) {
+ alpha = true
+ }
+ }
+ }
+ (words, alpha)
+}
+
+fn strip_nick_prefix(s: &str) -> &str {
+ if s.is_empty() {
+ return s;
+ }
+ match s.as_bytes()[0] {
+ b'~' | b'&' | b'@' | b'%' | b'+' => &s[1..],
+ _ => s,
+ }
+}
+
+// TODO: Don't print results, return Stats struct
+pub fn freq(count: usize,
+ ctx: &Context,
+ input: &mut BufRead,
+ decoder: &mut Decode,
+ output: &mut Write)
+ -> ilc_base::Result<()> {
+ let mut stats: HashMap<String, Person> = HashMap::new();
+
+ for e in decoder.decode(&ctx, input) {
+ let m = try!(e);
+ match m {
+ Event { ty: Type::Msg { ref from, ref content, .. }, .. } => {
+ let nick = strip_nick_prefix(from);
+ if stats.contains_key(nick) {
+ let p: &mut Person = stats.get_mut(nick).unwrap();
+ let (words, alpha) = words_alpha(content);
+ p.lines += 1;
+ if alpha {
+ p.alpha_lines += 1
+ }
+ p.words += words;
+ } else {
+ let (words, alpha) = words_alpha(content);
+ stats.insert(nick.to_owned(),
+ Person {
+ lines: 1,
+ alpha_lines: if alpha { 1 } else { 0 },
+ words: words,
+ });
+ }
+ }
+ _ => (),
+ }
+ }
+
+ let mut stats: Vec<(String, Person)> = stats.into_iter().collect();
+ stats.sort_by(|&(_, ref a), &(_, ref b)| b.words.cmp(&a.words));
+
+ for &(ref name, ref stat) in stats.iter().take(count) {
+ try!(write!(output,
+ "{}:\n\tTotal lines: {}\n\tLines without alphabetic characters: {}\n\tTotal \
+ words: {}\n\tWords per line: {}\n",
+ name,
+ stat.lines,
+ stat.lines - stat.alpha_lines,
+ stat.words,
+ stat.words as f32 / stat.lines as f32));
+ }
+ Ok(())
+}
diff --git a/ops/src/lib.rs b/ops/src/lib.rs
new file mode 100644
index 0000000..8cd5607
--- /dev/null
+++ b/ops/src/lib.rs
@@ -0,0 +1,127 @@
+extern crate blist;
+extern crate ilc_base;
+
+mod ageset;
+pub mod freq;
+
+pub mod parse {
+ use ilc_base::{self, Context, Decode};
+ use std::io::BufRead;
+ pub fn parse(ctx: &Context, input: &mut BufRead, decoder: &mut Decode) -> ilc_base::Result<()> {
+ for e in decoder.decode(&ctx, input) {
+ try!(e);
+ }
+ Ok(())
+ }
+}
+
+pub mod convert {
+ use ilc_base::{self, Context, Decode, Encode};
+ use std::io::{BufRead, Write};
+
+ pub fn convert(ctx: &Context,
+ input: &mut BufRead,
+ decoder: &mut Decode,
+ output: &mut Write,
+ encoder: &Encode)
+ -> ilc_base::Result<()> {
+ for e in decoder.decode(&ctx, input) {
+ try!(encoder.encode(&ctx, output, &try!(e)));
+ }
+ Ok(())
+ }
+}
+
+pub mod seen {
+ use ilc_base::{self, Context, Decode, Encode, Event};
+ use std::io::{BufRead, Write};
+
+ pub fn seen(nick: &str,
+ ctx: &Context,
+ input: &mut BufRead,
+ decoder: &mut Decode,
+ output: &mut Write,
+ encoder: &Encode)
+ -> ilc_base::Result<()> {
+ let mut last: Option<Event> = None;
+ for e in decoder.decode(&ctx, input) {
+ let m: Event = try!(e);
+ if m.ty.involves(nick) &&
+ last.as_ref().map_or(true,
+ |last| m.time.as_timestamp() > last.time.as_timestamp()) {
+ last = Some(m)
+ }
+ }
+ if let Some(ref m) = last {
+ try!(encoder.encode(&ctx, output, m));
+ }
+ Ok(())
+ }
+}
+
+pub mod sort {
+ use ilc_base::{self, Context, Decode, Encode, Event};
+ use std::io::{BufRead, Write};
+
+ pub fn sort(ctx: &Context,
+ input: &mut BufRead,
+ decoder: &mut Decode,
+ output: &mut Write,
+ encoder: &Encode)
+ -> ilc_base::Result<()> {
+ let mut events: Vec<Event> = decoder.decode(&ctx, input)
+ .flat_map(Result::ok)
+ .collect();
+
+ events.sort_by(|a, b| a.time.cmp(&b.time));
+ for e in events {
+ try!(encoder.encode(&ctx, output, &e));
+ }
+ Ok(())
+ }
+}
+
+pub mod dedup {
+ use std::io::{BufRead, Write};
+ use std::hash::{Hash, Hasher};
+ use ageset::AgeSet;
+ use ilc_base::{self, Context, Decode, Encode, Event};
+
+ #[derive(Clone, Debug, PartialEq, Eq)]
+ pub struct NoTimeHash<'a>(pub Event<'a>);
+
+ impl<'a> Hash for NoTimeHash<'a> {
+ fn hash<H>(&self, state: &mut H)
+ where H: Hasher
+ {
+ self.0.ty.hash(state);
+ self.0.channel.hash(state);
+ }
+ }
+
+ pub fn dedup(ctx: &Context,
+ input: &mut BufRead,
+ decoder: &mut Decode,
+ output: &mut Write,
+ encoder: &Encode)
+ -> ilc_base::Result<()> {
+ let mut backlog = AgeSet::new();
+
+ for e in decoder.decode(&ctx, input) {
+ if let Ok(e) = e {
+ let newest_event = e.clone();
+ backlog.prune(move |a: &NoTimeHash| {
+ let age = newest_event.time.as_timestamp() - a.0.time.as_timestamp();
+ age > 5000
+ });
+ // write `e` if it's a new event
+ let n = NoTimeHash(e);
+ if !backlog.contains(&n) {
+ try!(encoder.encode(&ctx, output, &n.0));
+ backlog.push(n);
+ }
+ }
+ }
+ Ok(())
+ }
+}