From 79c26253bbd7d78156b85fc3014c16786edc15af Mon Sep 17 00:00:00 2001 From: Till Höppner Date: Thu, 25 Feb 2016 18:08:08 +0100 Subject: Merge alias to ease usage of sort+dedup --- ops/src/freq.rs | 5 +++++ ops/src/lib.rs | 26 +++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) (limited to 'ops') diff --git a/ops/src/freq.rs b/ops/src/freq.rs index c5b363a..4a02d4c 100644 --- a/ops/src/freq.rs +++ b/ops/src/freq.rs @@ -1,3 +1,5 @@ +//! Per-nick word/line statistics + use ilc_base::{self, Context, Decode, Event}; use ilc_base::event::Type; @@ -35,6 +37,9 @@ fn strip_nick_prefix(s: &str) -> &str { } // TODO: Don't print results, return Stats struct +/// Return the `count` most active nicks, with lines, words and words per lines calculated. +/// +/// `usize::MAX` is a good default if you don't want to cap the statistics. pub fn freq(count: usize, ctx: &Context, input: &mut BufRead, diff --git a/ops/src/lib.rs b/ops/src/lib.rs index 8cd5607..e5d92cb 100644 --- a/ops/src/lib.rs +++ b/ops/src/lib.rs @@ -4,9 +4,13 @@ extern crate ilc_base; mod ageset; pub mod freq; +/// No-op log parsing pub mod parse { use ilc_base::{self, Context, Decode}; use std::io::BufRead; + + /// Simply parse the input, without further validation or conversion. No information is stored. + /// This will return `Err` if the decoder yields `Err`. pub fn parse(ctx: &Context, input: &mut BufRead, decoder: &mut Decode) -> ilc_base::Result<()> { for e in decoder.decode(&ctx, input) { try!(e); @@ -15,10 +19,14 @@ pub mod parse { } } +/// Log format conversion pub mod convert { use ilc_base::{self, Context, Decode, Encode}; use std::io::{BufRead, Write}; + /// Convert from one format to another, not necessarily different, format. In combination with a + /// timezone offset, this can be used to correct the timestamps. + /// Will return `Err` and abort conversion if the decoder yields `Err` or re-encoding fails. pub fn convert(ctx: &Context, input: &mut BufRead, decoder: &mut Decode, @@ -32,10 +40,14 @@ pub mod convert { } } +/// Last-seen of nicks pub mod seen { use ilc_base::{self, Context, Decode, Encode, Event}; use std::io::{BufRead, Write}; + /// Return the last message of a given nickname, searching from the beginning of the logs. + /// Will return `Err` if the decoder yields `Err`. This relies on absolute timestamps, and + /// behaviour without full dates is undefined. pub fn seen(nick: &str, ctx: &Context, input: &mut BufRead, @@ -59,10 +71,17 @@ pub mod seen { } } +/// Internal (as opposed to external, not to be confused with private) log sorting pub mod sort { use ilc_base::{self, Context, Decode, Encode, Event}; use std::io::{BufRead, Write}; + /// **Memory-intensive** + /// Sort the input, discarding faulty events. This will + /// read *all events* into memory, then sort them by time and write them back. + /// Behaviour is undefined if events lack full date information. + /// + /// *This should be an external merge-sort, but is a placeholder until implementation* pub fn sort(ctx: &Context, input: &mut BufRead, decoder: &mut Decode, @@ -81,6 +100,7 @@ pub mod sort { } } +/// Event deduplication pub mod dedup { use std::io::{BufRead, Write}; use std::hash::{Hash, Hasher}; @@ -88,7 +108,7 @@ pub mod dedup { use ilc_base::{self, Context, Decode, Encode, Event}; #[derive(Clone, Debug, PartialEq, Eq)] - pub struct NoTimeHash<'a>(pub Event<'a>); + struct NoTimeHash<'a>(pub Event<'a>); impl<'a> Hash for NoTimeHash<'a> { fn hash(&self, state: &mut H) @@ -99,6 +119,10 @@ pub mod dedup { } } + /// Deduplicate subsequent identical elements, e.g. after a sorting + /// operation. This will **not** read all events into memory, and only + /// operate on a short window of events. Therefore, it'll only work correctly + /// on sorted or very short logs. pub fn dedup(ctx: &Context, input: &mut BufRead, decoder: &mut Decode, -- cgit v1.2.3