aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTill Höppner2016-02-25 18:08:08 +0100
committerTill Höppner2016-02-25 18:08:08 +0100
commit79c26253bbd7d78156b85fc3014c16786edc15af (patch)
tree9493c1683fded2efd786e2278da5ac38fc3067b0
parentdf63760e0e345e1ec390c647060a157bf627b67f (diff)
downloadilc-79c26253bbd7d78156b85fc3014c16786edc15af.tar.gz
ilc-79c26253bbd7d78156b85fc3014c16786edc15af.tar.xz
ilc-79c26253bbd7d78156b85fc3014c16786edc15af.zip
Merge alias to ease usage of sort+dedup
-rw-r--r--Cargo.toml9
-rw-r--r--cli/src/lib.rs20
-rw-r--r--ops/src/freq.rs5
-rw-r--r--ops/src/lib.rs26
4 files changed, 56 insertions, 4 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 0178b8b..44479cd 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,6 @@
[package]
authors = ["Till Hoeppner <till@hoeppner.ws>"]
description = "IRC log converter/collector/cruncher"
-homepage = "https://github.com/tilpner/ilc"
license = "Apache-2.0"
name = "ilc"
repository = "https://github.com/tilpner/ilc"
@@ -13,9 +12,13 @@ doc = false
[dependencies]
ilc-cli = "0.1.0"
+ilc-base = "0.1.0"
+ilc-ops = "0.1.0"
+ilc-format-weechat = "0.1.0"
+ilc-format-energymech = "0.1.0"
[profile.release]
-debug = false
+debug = true
debug-assertions = false
-lto = true
+# lto = true
opt-level = 3
diff --git a/cli/src/lib.rs b/cli/src/lib.rs
index 0f91b89..40bc2e7 100644
--- a/cli/src/lib.rs
+++ b/cli/src/lib.rs
@@ -127,6 +127,9 @@ pub fn main() {
.subcommand(SubCommand::with_name("sort").about("Sorts a log by time"))
.subcommand(SubCommand::with_name("dedup")
.about("Removes duplicate log entries in close proximity"))
+ .subcommand(SubCommand::with_name("merge")
+ .about("Merges the input logs. This has to keep everything \
+ in memory"))
.get_matches();
let res = match args.subcommand() {
@@ -177,6 +180,23 @@ pub fn main() {
&mut *e.output(),
&*e.encoder())
}
+ ("merge", Some(args)) => {
+ // TODO: avoid (de-)serialization to weechat
+ let e = Environment(&args);
+ let (ctx, i, d, o, e) = (&e.context(),
+ &mut e.input(),
+ &mut *e.decoder(),
+ &mut *e.output(),
+ &*e.encoder());
+ let mut buffer = Vec::new();
+ match sort::sort(ctx, i, d, &mut buffer, &Weechat) {
+ Err(e) => error(Box::new(e)),
+ _ => (),
+ }
+ let mut read = io::Cursor::new(&buffer);
+ dedup::dedup(ctx, &mut read, &mut Weechat, o, e)
+
+ }
(sc, _) if !sc.is_empty() => panic!("Unimplemented subcommand `{}`, this is a bug", sc),
_ => die("No command specified"),
};
diff --git a/ops/src/freq.rs b/ops/src/freq.rs
index c5b363a..4a02d4c 100644
--- a/ops/src/freq.rs
+++ b/ops/src/freq.rs
@@ -1,3 +1,5 @@
+//! Per-nick word/line statistics
+
use ilc_base::{self, Context, Decode, Event};
use ilc_base::event::Type;
@@ -35,6 +37,9 @@ fn strip_nick_prefix(s: &str) -> &str {
}
// TODO: Don't print results, return Stats struct
+/// Return the `count` most active nicks, with lines, words and words per lines calculated.
+///
+/// `usize::MAX` is a good default if you don't want to cap the statistics.
pub fn freq(count: usize,
ctx: &Context,
input: &mut BufRead,
diff --git a/ops/src/lib.rs b/ops/src/lib.rs
index 8cd5607..e5d92cb 100644
--- a/ops/src/lib.rs
+++ b/ops/src/lib.rs
@@ -4,9 +4,13 @@ extern crate ilc_base;
mod ageset;
pub mod freq;
+/// No-op log parsing
pub mod parse {
use ilc_base::{self, Context, Decode};
use std::io::BufRead;
+
+ /// Simply parse the input, without further validation or conversion. No information is stored.
+ /// This will return `Err` if the decoder yields `Err`.
pub fn parse(ctx: &Context, input: &mut BufRead, decoder: &mut Decode) -> ilc_base::Result<()> {
for e in decoder.decode(&ctx, input) {
try!(e);
@@ -15,10 +19,14 @@ pub mod parse {
}
}
+/// Log format conversion
pub mod convert {
use ilc_base::{self, Context, Decode, Encode};
use std::io::{BufRead, Write};
+ /// Convert from one format to another, not necessarily different, format. In combination with a
+ /// timezone offset, this can be used to correct the timestamps.
+ /// Will return `Err` and abort conversion if the decoder yields `Err` or re-encoding fails.
pub fn convert(ctx: &Context,
input: &mut BufRead,
decoder: &mut Decode,
@@ -32,10 +40,14 @@ pub mod convert {
}
}
+/// Last-seen of nicks
pub mod seen {
use ilc_base::{self, Context, Decode, Encode, Event};
use std::io::{BufRead, Write};
+ /// Return the last message of a given nickname, searching from the beginning of the logs.
+ /// Will return `Err` if the decoder yields `Err`. This relies on absolute timestamps, and
+ /// behaviour without full dates is undefined.
pub fn seen(nick: &str,
ctx: &Context,
input: &mut BufRead,
@@ -59,10 +71,17 @@ pub mod seen {
}
}
+/// Internal (as opposed to external, not to be confused with private) log sorting
pub mod sort {
use ilc_base::{self, Context, Decode, Encode, Event};
use std::io::{BufRead, Write};
+ /// **Memory-intensive**
+ /// Sort the input, discarding faulty events. This will
+ /// read *all events* into memory, then sort them by time and write them back.
+ /// Behaviour is undefined if events lack full date information.
+ ///
+ /// *This should be an external merge-sort, but is a placeholder until implementation*
pub fn sort(ctx: &Context,
input: &mut BufRead,
decoder: &mut Decode,
@@ -81,6 +100,7 @@ pub mod sort {
}
}
+/// Event deduplication
pub mod dedup {
use std::io::{BufRead, Write};
use std::hash::{Hash, Hasher};
@@ -88,7 +108,7 @@ pub mod dedup {
use ilc_base::{self, Context, Decode, Encode, Event};
#[derive(Clone, Debug, PartialEq, Eq)]
- pub struct NoTimeHash<'a>(pub Event<'a>);
+ struct NoTimeHash<'a>(pub Event<'a>);
impl<'a> Hash for NoTimeHash<'a> {
fn hash<H>(&self, state: &mut H)
@@ -99,6 +119,10 @@ pub mod dedup {
}
}
+ /// Deduplicate subsequent identical elements, e.g. after a sorting
+ /// operation. This will **not** read all events into memory, and only
+ /// operate on a short window of events. Therefore, it'll only work correctly
+ /// on sorted or very short logs.
pub fn dedup(ctx: &Context,
input: &mut BufRead,
decoder: &mut Decode,