aboutsummaryrefslogtreecommitdiff
path: root/ops/src/freq.rs
blob: 4a02d4c0b95d071aa5932f301ae3057b4fabfccb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
//! Per-nick word/line statistics

use ilc_base::{self, Context, Decode, Event};
use ilc_base::event::Type;

use std::collections::HashMap;
use std::io::{BufRead, Write};

struct Person {
    lines: u32,
    alpha_lines: u32,
    words: u32,
}

fn words_alpha(s: &str) -> (u32, bool) {
    let mut alpha = false;
    let mut words = 0;
    for w in s.split_whitespace() {
        if !w.is_empty() {
            words += 1;
            if w.chars().any(char::is_alphabetic) {
                alpha = true
            }
        }
    }
    (words, alpha)
}

fn strip_nick_prefix(s: &str) -> &str {
    if s.is_empty() {
        return s;
    }
    match s.as_bytes()[0] {
        b'~' | b'&' | b'@' | b'%' | b'+' => &s[1..],
        _ => s,
    }
}

// TODO: Don't print results, return Stats struct
/// Return the `count` most active nicks, with lines, words and words per lines calculated.
///
/// `usize::MAX` is a good default if you don't want to cap the statistics.
pub fn freq(count: usize,
            ctx: &Context,
            input: &mut BufRead,
            decoder: &mut Decode,
            output: &mut Write)
            -> ilc_base::Result<()> {
    let mut stats: HashMap<String, Person> = HashMap::new();

    for e in decoder.decode(&ctx, input) {
        let m = try!(e);
        match m {
            Event { ty: Type::Msg { ref from, ref content, .. }, .. } => {
                let nick = strip_nick_prefix(from);
                if stats.contains_key(nick) {
                    let p: &mut Person = stats.get_mut(nick).unwrap();
                    let (words, alpha) = words_alpha(content);
                    p.lines += 1;
                    if alpha {
                        p.alpha_lines += 1
                    }
                    p.words += words;
                } else {
                    let (words, alpha) = words_alpha(content);
                    stats.insert(nick.to_owned(),
                                 Person {
                                     lines: 1,
                                     alpha_lines: if alpha { 1 } else { 0 },
                                     words: words,
                                 });
                }
            }
            _ => (),
        }
    }

    let mut stats: Vec<(String, Person)> = stats.into_iter().collect();
    stats.sort_by(|&(_, ref a), &(_, ref b)| b.words.cmp(&a.words));

    for &(ref name, ref stat) in stats.iter().take(count) {
        try!(write!(output,
                    "{}:\n\tTotal lines: {}\n\tLines without alphabetic characters: {}\n\tTotal \
                     words: {}\n\tWords per line: {}\n",
                    name,
                    stat.lines,
                    stat.lines - stat.alpha_lines,
                    stat.words,
                    stat.words as f32 / stat.lines as f32));
    }
    Ok(())
}