From 9f5dd9dad6b13476bab2c6eb3c6528f8ad49311a Mon Sep 17 00:00:00 2001 From: Till Höppner Date: Thu, 25 Feb 2016 06:48:03 +0100 Subject: Refactor... everything. --- Cargo.toml | 18 +-- base/Cargo.toml | 9 ++ base/src/context.rs | 9 ++ base/src/error.rs | 54 +++++++ base/src/event.rs | 188 ++++++++++++++++++++++++ base/src/format.rs | 18 +++ base/src/lib.rs | 49 +++++++ base/src/mod.rs | 47 ++++++ cli/Cargo.toml | 22 +++ cli/src/chain.rs | 54 +++++++ cli/src/lib.rs | 331 ++++++++++++++++++++++++++++++++++++++++++ formats/binary/Cargo.toml | 6 + formats/binary/src/lib.rs | 57 ++++++++ formats/energymech/Cargo.toml | 10 ++ formats/energymech/src/lib.rs | 262 +++++++++++++++++++++++++++++++++ formats/irssi/Cargo.toml | 6 + formats/irssi/src/lib.rs | 205 ++++++++++++++++++++++++++ formats/msgpack/Cargo.toml | 6 + formats/msgpack/src/lib.rs | 62 ++++++++ formats/weechat/Cargo.toml | 10 ++ formats/weechat/src/lib.rs | 241 ++++++++++++++++++++++++++++++ ops/Cargo.toml | 9 ++ ops/src/ageset.rs | 47 ++++++ ops/src/freq.rs | 87 +++++++++++ ops/src/lib.rs | 127 ++++++++++++++++ src/main.rs | 125 +--------------- 26 files changed, 1922 insertions(+), 137 deletions(-) create mode 100644 base/Cargo.toml create mode 100644 base/src/context.rs create mode 100644 base/src/error.rs create mode 100644 base/src/event.rs create mode 100644 base/src/format.rs create mode 100644 base/src/lib.rs create mode 100644 base/src/mod.rs create mode 100644 cli/Cargo.toml create mode 100644 cli/src/chain.rs create mode 100644 cli/src/lib.rs create mode 100644 formats/binary/Cargo.toml create mode 100644 formats/binary/src/lib.rs create mode 100644 formats/energymech/Cargo.toml create mode 100644 formats/energymech/src/lib.rs create mode 100644 formats/irssi/Cargo.toml create mode 100644 formats/irssi/src/lib.rs create mode 100644 formats/msgpack/Cargo.toml create mode 100644 formats/msgpack/src/lib.rs create mode 100644 formats/weechat/Cargo.toml create mode 100644 formats/weechat/src/lib.rs create mode 100644 ops/Cargo.toml create mode 100644 ops/src/ageset.rs create mode 100644 ops/src/freq.rs create mode 100644 ops/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index 38e294b..1b06b77 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,25 +5,15 @@ homepage = "https://github.com/tilpner/ilc" license = "Apache-2.0" name = "ilc" repository = "https://github.com/tilpner/ilc" -version = "0.1.2" +version = "0.2.0" [[bin]] -doc = false name = "ilc" +doc = false [dependencies] -tendril = "0.2.1" -bincode = "0.4.0" -blist = "0.0.4" -chrono = "0.2.18" -clap = "2.0.2" -env_logger = "0.3.2" -glob = "0.2.10" -log = "0.3.5" -regex = "0.1.48" -rmp = "0.7.3" -rmp-serialize = "0.7.0" -rustc-serialize = "0.3.16" +# ilc-cli = "*" +ilc-cli = { path = "cli" } [profile.release] debug = false diff --git a/base/Cargo.toml b/base/Cargo.toml new file mode 100644 index 0000000..539990b --- /dev/null +++ b/base/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "ilc-base" +version = "0.1.0" +authors = ["Till Höppner "] + +[dependencies] +log = "0.3.5" +chrono = "0.2.19" +rustc-serialize = "0.3.18" diff --git a/base/src/context.rs b/base/src/context.rs new file mode 100644 index 0000000..4393457 --- /dev/null +++ b/base/src/context.rs @@ -0,0 +1,9 @@ + +use chrono::naive::date::NaiveDate; +use chrono::offset::fixed::FixedOffset; + +pub struct Context { + pub timezone: FixedOffset, + pub override_date: Option, + pub channel: Option, +} diff --git a/base/src/error.rs b/base/src/error.rs new file mode 100644 index 0000000..2806c78 --- /dev/null +++ b/base/src/error.rs @@ -0,0 +1,54 @@ +use std::{error, fmt, io, result}; +use std::error::Error as E; + +use chrono::format::ParseError; + +pub type Result = result::Result; + +#[derive(Debug)] +pub enum Error { + Parse(String), + Chrono(ParseError), + Io(io::Error), + Custom(Box), +} + +impl fmt::Display for Error { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.write_str(self.description()) + } +} + +impl error::Error for Error { + fn description(&self) -> &str { + use self::Error::*; + match self { + &Parse(_) => "error while parsing", + &Chrono(_) => "error while parsing time strings", + &Io(_) => "error during input/output", + &Custom(ref e) => e.description(), + } + } + + fn cause(&self) -> Option<&error::Error> { + use self::Error::*; + match self { + &Parse(ref _e) => None, + &Chrono(ref e) => Some(e), + &Io(ref e) => Some(e), + &Custom(ref e) => e.cause(), + } + } +} + +impl From for Error { + fn from(err: ParseError) -> Error { + Error::Chrono(err) + } +} + +impl From for Error { + fn from(err: io::Error) -> Error { + Error::Io(err) + } +} diff --git a/base/src/event.rs b/base/src/event.rs new file mode 100644 index 0000000..e357800 --- /dev/null +++ b/base/src/event.rs @@ -0,0 +1,188 @@ +// Copyright 2015 Till Höppner +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Common structures to represent the actual log data in memory. +//! These will be used by all formats for encoding and decoding. + +use std::borrow::Cow; +use std::cmp::Ordering; + +use chrono::naive::time::NaiveTime; +use chrono::offset::fixed::FixedOffset; +use chrono::offset::local::Local; +use chrono::offset::TimeZone; + +/// A whole log, in memory. This structure does not specify its +/// use. It may represent a private query, or the log of a channel. +pub struct Log<'a> { + pub entries: Vec>, +} + +/// Different log formats carry different amounts of information. Some might +/// hold enough information to calculate precise timestamps, others might +/// only suffice for the time of day. +#[derive(Clone, Debug, PartialEq, Eq, Ord, Hash, RustcEncodable, RustcDecodable)] +pub enum Time { + Unknown, + Hms(u8, u8, u8), + Timestamp(i64), +} + +impl Time { + pub fn from_format(tz: &FixedOffset, s: &str, f: &str) -> Time { + tz.datetime_from_str(s, f) + .map(|d| d.timestamp()) + .map(Time::Timestamp) + .unwrap_or(Time::Unknown) + } + + pub fn with_format(&self, tz: &FixedOffset, f: &str) -> String { + match self { + &Time::Unknown => panic!("Time data for this event is not present"), + &Time::Hms(h, m, s) => { + format!("{}", + NaiveTime::from_hms(h as u32, m as u32, s as u32).format(f)) + } + &Time::Timestamp(t) => format!("{}", tz.timestamp(t, 0).format(f)), + } + } + + pub fn as_timestamp(&self) -> i64 { + use self::Time::*; + match self { + &Unknown => 0, + &Hms(h, m, s) => { + Local::today() + .and_hms(h as u32, m as u32, s as u32) + .timestamp() + } + &Timestamp(i) => i, + } + } + + pub fn to_timestamp(&self) -> Time { + Time::Timestamp(self.as_timestamp()) + } +} + +impl PartialOrd for Time { + fn partial_cmp(&self, other: &Time) -> Option { + use self::Time::*; + match (self, other) { + (&Unknown, _) | (_, &Unknown) => None, + (&Hms(a_h, a_m, a_s), &Hms(b_h, b_m, b_s)) => { + if (a_h >= b_h && a_m >= b_m && a_s > b_s) || + (a_h >= b_h && a_m > b_m && a_s >= b_s) || + (a_h > b_h && a_m >= b_m && a_s >= b_s) { + Some(Ordering::Greater) + } else { + Some(Ordering::Less) + } + } + (&Timestamp(a), &Timestamp(b)) => Some(a.cmp(&b)), + _ => unimplemented!(), + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash, RustcEncodable, RustcDecodable)] +pub struct Event<'a> { + pub ty: Type<'a>, + pub time: Time, + pub channel: Option>, +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash, RustcEncodable, RustcDecodable)] +pub struct User<'a> { + nicks: Cow<'a, str>, +} + +/// All representable events, such as messages, quits, joins +/// and topic changes. +#[derive(Clone, Debug, Hash, PartialEq, Eq, RustcEncodable, RustcDecodable)] +pub enum Type<'a> { + Connect, + Disconnect, + Msg { + from: Cow<'a, str>, + content: Cow<'a, str>, + }, + Action { + from: Cow<'a, str>, + content: Cow<'a, str>, + }, + Join { + nick: Cow<'a, str>, + mask: Option>, + }, + Part { + nick: Cow<'a, str>, + mask: Option>, + reason: Option>, + }, + Quit { + nick: Cow<'a, str>, + mask: Option>, + reason: Option>, + }, + Nick { + old_nick: Cow<'a, str>, + new_nick: Cow<'a, str>, + }, + Notice { + from: Cow<'a, str>, + content: Cow<'a, str>, + }, + Kick { + kicked_nick: Cow<'a, str>, + kicking_nick: Option>, + kick_message: Option>, + }, + Topic { + topic: Cow<'a, str>, + }, + TopicChange { + nick: Option>, + new_topic: Cow<'a, str>, + }, + Mode { + nick: Option>, + mode: Cow<'a, str>, + masks: Cow<'a, str>, + }, +} + +impl<'a> Type<'a> { + pub fn involves(&self, needle: &str) -> bool { + use self::Type::*; + match self { + &Msg { ref from, .. } => from == needle, + &Action { ref from, .. } => from == needle, + &Join { ref nick, .. } => nick == needle, + &Part { ref nick, .. } => nick == needle, + &Quit { ref nick, .. } => nick == needle, + &Nick { ref old_nick, ref new_nick, .. } => old_nick == needle || new_nick == needle, + &Notice { ref from, .. } => from == needle, + &Kick { ref kicked_nick, ref kicking_nick, .. } => { + *kicked_nick == Cow::Borrowed(needle) || + kicking_nick.as_ref().map_or(false, |k| k.as_ref() == Cow::Borrowed(needle)) + } + &TopicChange { ref nick, .. } => nick.as_ref().map_or(false, |k| k.as_ref() == needle), + &Mode { ref nick, .. } => { + nick.as_ref().map_or(false, |k| k.as_ref() == Cow::Borrowed(needle)) + } + _ => false, + } + } +} diff --git a/base/src/format.rs b/base/src/format.rs new file mode 100644 index 0000000..46b2da6 --- /dev/null +++ b/base/src/format.rs @@ -0,0 +1,18 @@ +use std::borrow::Cow; + +pub fn rejoin(s: &[&str], splits: &[char]) -> Cow<'static, str> { + let len = s.iter().map(|s| s.len()).fold(0, |a, b| a + b); + let mut out = s.iter() + .zip(splits.iter()) + .fold(String::with_capacity(len), |mut s, (b, &split)| { + s.push_str(b); + s.push(split); + s + }); + out.pop(); + Cow::Owned(out) +} + +pub fn strip_one(s: &str) -> String { + if s.len() >= 2 { s[1..(s.len() - 1)].to_owned() } else { String::new() } +} diff --git a/base/src/lib.rs b/base/src/lib.rs new file mode 100644 index 0000000..9e96478 --- /dev/null +++ b/base/src/lib.rs @@ -0,0 +1,49 @@ +// Copyright 2015 Till Höppner +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![feature(slice_patterns)] +extern crate chrono; +#[macro_use] +extern crate log; +extern crate rustc_serialize; +// extern crate bincode; +// extern crate rmp; +// extern crate rmp_serialize as msgpack; + +pub mod event; +// pub mod format; +pub mod context; +pub mod error; +pub mod format; + +use std::io::{BufRead, Write}; + +pub use context::Context; +pub use event::Event; +pub use error::*; + +pub trait Encode { + fn encode<'a>(&'a self, + context: &'a Context, + output: &'a mut Write, + event: &'a Event) + -> error::Result<()>; +} + +pub trait Decode { + fn decode<'a>(&'a mut self, + context: &'a Context, + input: &'a mut BufRead) + -> Box>> + 'a>; +} diff --git a/base/src/mod.rs b/base/src/mod.rs new file mode 100644 index 0000000..5374598 --- /dev/null +++ b/base/src/mod.rs @@ -0,0 +1,47 @@ +// Copyright 2015 Till Höppner +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Traits and structs for conversion between various formats. +//! As the source format may not provide the same information as the +//! target format, all formats must allow for omittable information. + +use std::iter; +use std::io::{BufRead, Write}; +use std::borrow::Cow; + +use event::Event; +use context::Context; + +pub use self::energymech::Energymech; +pub use self::weechat::Weechat; +pub use self::binary::Binary; +pub use self::msgpack::Msgpack; + +mod energymech; +mod weechat; +// pub mod irssi; +mod binary; +mod msgpack; + + +pub struct Dummy; + +impl Decode for Dummy { + fn decode<'a>(&'a mut self, + _context: &'a Context, + _input: &'a mut BufRead) + -> Box>> + 'a> { + Box::new(iter::empty()) + } +} diff --git a/cli/Cargo.toml b/cli/Cargo.toml new file mode 100644 index 0000000..1973cd3 --- /dev/null +++ b/cli/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "ilc-cli" +version = "0.1.0" +authors = ["Till Höppner "] + +[features] +default = ["ilc-format-weechat", "ilc-format-energymech"] + +[dependencies] +log = "0.3.5" +clap = "2.1.2" +chrono = "0.2.19" +env_logger = "0.3.2" +glob = "0.2.10" +# ilc-base = "*" +ilc-base = { path = "../base" } +# ilc-ops = "*" +ilc-ops = { path = "../ops" } +# ilc-format-weechat = { optional = true, version = "*" } +ilc-format-weechat = { optional = true, path = "../formats/weechat" } +# ilc-format-energymech = { optional = true, version = "*" } +ilc-format-energymech = { optional = true, path = "../formats/energymech" } diff --git a/cli/src/chain.rs b/cli/src/chain.rs new file mode 100644 index 0000000..a8014b8 --- /dev/null +++ b/cli/src/chain.rs @@ -0,0 +1,54 @@ +use std::io::{Read, Result, Write}; + +pub struct Chain { + elem: Vec, + index: usize, +} + +impl Read for Chain { + fn read(&mut self, buf: &mut [u8]) -> Result { + loop { + match self.elem.get_mut(self.index) { + Some(ref mut r) => { + match try!(r.read(buf)) { + 0 => self.index += 1, + n => return Ok(n), + } + } + None => return Ok(0), + } + } + } +} + +impl Write for Chain { + fn write(&mut self, buf: &[u8]) -> Result { + loop { + match self.elem.get_mut(self.index) { + Some(ref mut r) => { + match try!(r.write(buf)) { + 0 => self.index += 1, + n => return Ok(n), + } + } + None => return Ok(0), + } + } + } + + fn flush(&mut self) -> Result<()> { + match self.elem.get_mut(self.index) { + Some(ref mut r) => r.flush(), + None => Ok(()), + } + } +} + +impl Chain { + pub fn new(elem: Vec) -> Chain { + Chain { + index: 0, + elem: elem, + } + } +} diff --git a/cli/src/lib.rs b/cli/src/lib.rs new file mode 100644 index 0000000..f49150a --- /dev/null +++ b/cli/src/lib.rs @@ -0,0 +1,331 @@ +// Copyright 2015 Till Höppner +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +extern crate ilc_base; +extern crate ilc_ops; +extern crate ilc_format_weechat; +extern crate ilc_format_energymech; +extern crate chrono; +#[macro_use] +extern crate clap; +#[macro_use] +extern crate log; +extern crate env_logger; +extern crate glob; + +use ilc_base::{Context, Decode, Encode}; +use ilc_ops::*; +use ilc_format_weechat::Weechat; +use ilc_format_energymech::Energymech; + +use clap::{App, AppSettings, Arg, ArgMatches, SubCommand}; + +use chrono::{FixedOffset, NaiveDate}; + +use glob::glob; + +use std::str::FromStr; +use std::fs::File; +use std::path::{Path, PathBuf}; +use std::ffi::OsStr; +use std::io::{self, BufRead, BufReader, BufWriter, Write}; +use std::{process, usize}; +use std::error::Error; + +mod chain; + +pub fn main() { + env_logger::init().unwrap(); + let args = App::new("ilc") + .version(crate_version!()) + .setting(AppSettings::GlobalVersion) + .setting(AppSettings::VersionlessSubcommands) + .setting(AppSettings::ArgRequiredElseHelp) + .author("Till Höppner ") + .about("A converter and statistics utility for IRC log files") + .arg(Arg::with_name("timezone") + .help("UTC offset in the direction of the western hemisphere") + .global(true) + .takes_value(true) + .long("timezone") + .short("t")) + .arg(Arg::with_name("date") + .help("Override the date for this log, ISO 8601, YYYY-MM-DD") + .global(true) + .takes_value(true) + .long("date") + .short("d")) + .arg(Arg::with_name("infer_date") + .help("Try to use the filename as date for the current log") + .global(true) + .long("infer-date")) + .arg(Arg::with_name("channel") + .help("Set a channel for the current log") + .global(true) + .takes_value(true) + .long("channel") + .short("c")) + .arg(Arg::with_name("format") + .help("Set the input and output format for the current log") + .global(true) + .takes_value(true) + .long("format") + .short("f")) + .arg(Arg::with_name("input_format") + .help("Set the input format for the current log") + .global(true) + .conflicts_with("format") + .takes_value(true) + .long("inf")) + .arg(Arg::with_name("output_format") + .help("Set the output format for the current log") + .global(true) + .conflicts_with("format") + .takes_value(true) + .long("outf")) + .arg(Arg::with_name("input_files") + .help("Specify an input file, instead of stdin") + .global(true) + .takes_value(true) + .multiple(true) + .long("input") + .short("i")) + .arg(Arg::with_name("output_file") + .help("Specify an output file, instead of stdout") + .global(true) + .takes_value(true) + .long("output") + .short("o")) + .subcommand(SubCommand::with_name("parse") + .about("Parse the input, checking the format")) + .subcommand(SubCommand::with_name("convert") + .about("Convert from a source to a target format")) + .subcommand(SubCommand::with_name("freq") + .about("Analyse the activity of users by certain metrics") + .arg(Arg::with_name("count") + .help("The number of items to be displayed") + .takes_value(true) + .long("count"))) + .subcommand(SubCommand::with_name("seen") + .about("Print the last line a nick was active") + .arg(Arg::with_name("nick") + .help("The nick you're looking for") + .takes_value(true) + .required(true) + .index(1))) + .subcommand(SubCommand::with_name("sort").about("Sorts a log by time")) + .subcommand(SubCommand::with_name("dedup") + .about("Removes duplicate log entries in close proximity")) + .get_matches(); + + let res = match args.subcommand() { + ("parse", Some(args)) => { + let e = Environment(&args); + parse::parse(&e.context(), &mut e.input(), &mut *e.decoder()) + } + ("convert", Some(args)) => { + let e = Environment(&args); + convert::convert(&e.context(), + &mut e.input(), + &mut *e.decoder(), + &mut *e.output(), + &*e.encoder()) + } + ("freq", Some(args)) => { + let e = Environment(&args); + let count = value_t!(args, "count", usize).unwrap_or(usize::MAX); + freq::freq(count, + &e.context(), + &mut e.input(), + &mut *e.decoder(), + &mut e.output()) + } + ("seen", Some(args)) => { + let e = Environment(&args); + let nick = args.value_of("nick").expect("Required argument not present"); + seen::seen(nick, + &e.context(), + &mut e.input(), + &mut *e.decoder(), + &mut *e.output(), + &Weechat) + } + ("sort", Some(args)) => { + let e = Environment(&args); + sort::sort(&e.context(), + &mut e.input(), + &mut *e.decoder(), + &mut *e.output(), + &*e.encoder()) + } + ("dedup", Some(args)) => { + let e = Environment(&args); + dedup::dedup(&e.context(), + &mut e.input(), + &mut *e.decoder(), + &mut *e.output(), + &*e.encoder()) + } + (sc, _) if !sc.is_empty() => panic!("Unimplemented subcommand `{}`, this is a bug", sc), + _ => Ok(()), + }; + + match res { + Ok(()) => (), + Err(e) => error(Box::new(e)), + } +} + +pub fn error(e: Box) -> ! { + let _ = writeln!(&mut io::stderr(), "Error: {}", e); + let mut e = e.cause(); + while let Some(err) = e { + let _ = writeln!(&mut io::stderr(), "\t{}", err); + e = err.cause(); + } + process::exit(1) +} + +pub fn die(s: &str) -> ! { + let _ = writeln!(&mut io::stderr(), "Aborting: {}", s); + process::exit(1) +} + +pub fn decoder(format: &str) -> Option> { + match format { + "energymech" | "em" => Some(Box::new(Energymech)), + "weechat" | "w" => Some(Box::new(Weechat)), + // "irssi" => Some(Box::new(irssi::Irssi)), + // "binary" => Some(Box::new(Binary)), + // "msgpack" => Some(Box::new(Msgpack)), + _ => None, + } +} + +pub fn encoder(format: &str) -> Option> { + match format { + "energymech" | "em" => Some(Box::new(Energymech)), + "weechat" | "w" => Some(Box::new(Weechat)), + // "irssi" => Some(Box::new(irssi::Irssi)), + // "binary" => Some(Box::new(Binary)), + // "msgpack" => Some(Box::new(Msgpack)), + _ => None, + } +} + +pub fn force_decoder(s: Option<&str>) -> Box { + let inf = match s { + Some(s) => s, + None => die("You didn't specify the input format"), + }; + match decoder(&inf) { + Some(d) => d, + None => die(&format!("The format `{}` is unknown to me", inf)), + } +} + +pub fn force_encoder<'a>(s: Option<&str>) -> Box { + let outf = match s { + Some(s) => s, + None => die("You didn't specify the output format"), + }; + match encoder(&outf) { + Some(e) => e, + None => die(&format!("The format `{}` is unknown to me", outf)), + } +} + +pub struct Environment<'a>(pub &'a ArgMatches<'a>); + +impl<'a> Environment<'a> { + pub fn context(&self) -> Context { + build_context(self.0) + } + pub fn input(&self) -> Box { + open_files(gather_input(self.0)) + } + pub fn output(&self) -> Box { + open_output(self.0) + } + pub fn decoder(&self) -> Box { + force_decoder(self.0.value_of("format").or(self.0.value_of("input_format"))) + } + pub fn encoder(&self) -> Box { + force_encoder(self.0.value_of("format").or(self.0.value_of("output_format"))) + } +} + + +pub fn build_context(args: &ArgMatches) -> Context { + let mut context = Context { + timezone: FixedOffset::west(args.value_of("timezone") + .and_then(|s| s.parse().ok()) + .unwrap_or(0)), + override_date: args.value_of("date").and_then(|d| NaiveDate::from_str(&d).ok()), + channel: args.value_of("channel").map(str::to_owned).clone(), + }; + if args.is_present("infer_date") { + let input_files = gather_input(args); + match input_files.len() { + 0 => die("No input files given, can't infer date"), + 1 => { + if let Some(date) = input_files.get(0) + .map(PathBuf::as_path) + .and_then(Path::file_stem) + .and_then(OsStr::to_str) + .and_then(|s: &str| NaiveDate::from_str(s).ok()) { + context.override_date = Some(date); + } + } + _n => die("Too many input files, can't infer date"), + } + } + context +} + +pub fn gather_input(args: &ArgMatches) -> Vec { + if let Some(iter) = args.values_of("input_files") { + iter.flat_map(|p| { + match glob(p) { + Ok(paths) => paths, + Err(e) => die(&format!("{}", e.msg)), + } + }) + .filter_map(Result::ok) + .collect() + } else { + Vec::new() + } +} + +pub fn open_files(files: Vec) -> Box { + if files.len() > 0 { + Box::new(BufReader::new(chain::Chain::new(files.iter() + .map(|p| File::open(p).unwrap()) + .collect()))) + } else { + Box::new(BufReader::new(io::stdin())) + } +} + +pub fn open_output(args: &ArgMatches) -> Box { + if let Some(out) = args.value_of("output_file") { + match File::create(out) { + Ok(f) => Box::new(BufWriter::new(f)), + Err(e) => error(Box::new(e)), + } + } else { + Box::new(BufWriter::new(io::stdout())) + } +} diff --git a/formats/binary/Cargo.toml b/formats/binary/Cargo.toml new file mode 100644 index 0000000..a4dc9e7 --- /dev/null +++ b/formats/binary/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "ilc-format-binary" +version = "0.1.0" +authors = ["Till Höppner "] + +[dependencies] diff --git a/formats/binary/src/lib.rs b/formats/binary/src/lib.rs new file mode 100644 index 0000000..7cc4281 --- /dev/null +++ b/formats/binary/src/lib.rs @@ -0,0 +1,57 @@ +// Copyright 2015 Till Höppner +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::io::{BufRead, Write}; +use std::iter::Iterator; + +use event::Event; +use context::Context; +use format::{Decode, Encode}; + +use bincode::{self, SizeLimit}; + +pub struct Binary; + +pub struct Iter<'a> { + input: &'a mut BufRead, +} + +impl<'a> Iterator for Iter<'a> { + type Item = ::Result>; + fn next(&mut self) -> Option<::Result>> { + Some(bincode::rustc_serialize::decode_from::<_, Event>(&mut self.input, + SizeLimit::Infinite) + .map_err(|_| ::IlcError::BincodeDecode)) + } +} + +impl Encode for Binary { + fn encode<'a>(&'a self, + _context: &'a Context, + mut output: &'a mut Write, + event: &'a Event) + -> ::Result<()> { + bincode::rustc_serialize::encode_into(event, &mut output, SizeLimit::Infinite) + .map_err(|_| ::IlcError::BincodeEncode) + } +} + +impl Decode for Binary { + fn decode<'a>(&'a mut self, + _context: &'a Context, + input: &'a mut BufRead) + -> Box>> + 'a> { + Box::new(Iter { input: input }) + } +} diff --git a/formats/energymech/Cargo.toml b/formats/energymech/Cargo.toml new file mode 100644 index 0000000..7abcb49 --- /dev/null +++ b/formats/energymech/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "ilc-format-energymech" +version = "0.1.0" +authors = ["Till Höppner "] + +[dependencies] +log = "0.3.5" +chrono = "0.2.19" +# ilc-base = "*" +ilc-base = { path = "../../base" } diff --git a/formats/energymech/src/lib.rs b/formats/energymech/src/lib.rs new file mode 100644 index 0000000..16795cf --- /dev/null +++ b/formats/energymech/src/lib.rs @@ -0,0 +1,262 @@ +// Copyright 2015 Till Höppner +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![feature(slice_patterns)] + +#[macro_use] +extern crate log; +extern crate ilc_base; +extern crate chrono; + +use std::io::{BufRead, Write}; +use std::borrow::{Cow, ToOwned}; +use std::iter::Iterator; + +use ilc_base::event::{Event, Time, Type}; +use ilc_base::format::{rejoin, strip_one}; +use ilc_base::{Context, Decode, Encode}; + +use log::LogLevel::Info; + +use chrono::*; + +pub struct Energymech; + +static TIME_FORMAT: &'static str = "%H:%M:%S"; + +pub struct Iter<'a> { + context: &'a Context, + input: &'a mut BufRead, + buffer: Vec, +} + +impl<'a> Iterator for Iter<'a> { + type Item = ilc_base::Result>; + fn next(&mut self) -> Option>> { + fn parse_time(context: &Context, time: &str) -> Time { + let h = time[1..3].parse::().unwrap(); + let m = time[4..6].parse::().unwrap(); + let s = time[7..9].parse::().unwrap(); + if let Some(date) = context.override_date { + Time::Timestamp(context.timezone + .from_local_date(&date) + .and_time(NaiveTime::from_hms(h, m, s)) + .single() + .expect("Transformed log times can't be represented, due \ + to timezone transitions") + .timestamp()) + } else { + Time::Hms(h as u8, m as u8, s as u8) + } + } + + loop { + self.buffer.clear(); + match self.input.read_until(b'\n', &mut self.buffer) { + Ok(0) | Err(_) => return None, + Ok(_) => (), + } + + let buffer = String::from_utf8_lossy(&self.buffer); + + let mut split_tokens: Vec = Vec::new(); + let tokens = buffer.split(|c: char| { + if c.is_whitespace() { + split_tokens.push(c); + true + } else { + false + } + }) + .collect::>(); + + if log_enabled!(Info) { + info!("Original: `{}`", buffer); + info!("Parsing: {:?}", tokens); + } + + match &tokens[..tokens.len() - 1] { + [time, "*", nick, content..] => { + return Some(Ok(Event { + ty: Type::Action { + from: nick.to_owned().into(), + content: rejoin(content, &split_tokens[3..]), + }, + time: parse_time(&self.context, time), + channel: self.context.channel.clone().map(Into::into), + })) + } + [time, "***", old, "is", "now", "known", "as", new] => { + return Some(Ok(Event { + ty: Type::Nick { + old_nick: old.to_owned().into(), + new_nick: new.to_owned().into(), + }, + time: parse_time(&self.context, time), + channel: self.context.channel.clone().map(Into::into), + })) + } + [time, "***", nick, "sets", "mode:", mode, masks..] => { + return Some(Ok(Event { + ty: Type::Mode { + nick: Some(nick.to_owned().into()), + mode: mode.to_owned().into(), + masks: rejoin(&masks, &split_tokens[6..]).to_owned().into(), + }, + time: parse_time(&self.context, time), + channel: self.context.channel.clone().map(Into::into), + })) + } + [time, "***", "Joins:", nick, host] => { + return Some(Ok(Event { + ty: Type::Join { + nick: nick.to_owned().into(), + mask: Some(strip_one(host).into()), + }, + time: parse_time(&self.context, time), + channel: self.context.channel.clone().map(Into::into), + })) + } + [time, "***", "Parts:", nick, host, reason..] => { + return Some(Ok(Event { + ty: Type::Part { + nick: nick.to_owned().into(), + mask: Some(strip_one(host).into()), + reason: Some(strip_one(&rejoin(reason, &split_tokens[5..])).into()), + }, + time: parse_time(&self.context, time), + channel: self.context.channel.clone().map(Into::into), + })) + } + [time, "***", "Quits:", nick, host, reason..] => { + return Some(Ok(Event { + ty: Type::Quit { + nick: nick.to_owned().into(), + mask: Some(strip_one(host).into()), + reason: Some(strip_one(&rejoin(reason, &split_tokens[5..])).into()), + }, + time: parse_time(&self.context, time), + channel: self.context.channel.clone().map(Into::into), + })) + } + [time, "***", nick, "changes", "topic", "to", topic..] => { + return Some(Ok(Event { + ty: Type::TopicChange { + nick: Some(nick.to_owned().into()), + new_topic: strip_one(&rejoin(topic, &split_tokens[6..])).into(), + }, + time: parse_time(&self.context, time), + channel: self.context.channel.clone().map(Into::into), + })) + } + [time, nick, content..] if nick.starts_with('<') && nick.ends_with('>') => { + return Some(Ok(Event { + ty: Type::Msg { + from: strip_one(nick).into(), + content: rejoin(content, &split_tokens[2..]), + }, + time: parse_time(&self.context, time), + channel: self.context.channel.clone().map(Into::into), + })) + } + _ => (), + } + } + } +} + +impl Decode for Energymech { + fn decode<'a>(&'a mut self, + context: &'a Context, + input: &'a mut BufRead) + -> Box>> + 'a> { + Box::new(Iter { + context: context, + input: input, + buffer: Vec::new(), + }) + } +} + +impl Encode for Energymech { + fn encode<'a>(&'a self, + context: &'a Context, + mut output: &'a mut Write, + event: &'a Event) + -> ilc_base::Result<()> { + match event { + &Event { ty: Type::Msg { ref from, ref content }, ref time, .. } => { + try!(writeln!(&mut output, + "[{}] <{}> {}", + time.with_format(&context.timezone, TIME_FORMAT), + from, + content)) + } + &Event { ty: Type::Action { ref from, ref content }, ref time, .. } => { + try!(writeln!(&mut output, + "[{}] * {} {}", + time.with_format(&context.timezone, TIME_FORMAT), + from, + content)) + } + &Event { ty: Type::Nick { ref old_nick, ref new_nick }, ref time, .. } => { + try!(writeln!(&mut output, + "[{}] *** {} is now known as {}", + time.with_format(&context.timezone, TIME_FORMAT), + old_nick, + new_nick)) + } + &Event { ty: Type::Mode { ref nick, ref mode, ref masks }, ref time, .. } => { + try!(writeln!(&mut output, + "[{}] *** {} sets mode: {} {}", + time.with_format(&context.timezone, TIME_FORMAT), + nick.as_ref().expect("Nickname not present, but required."), + mode, + masks)) + } + &Event { ty: Type::Join { ref nick, ref mask }, ref time, .. } => { + try!(writeln!(&mut output, + "[{}] *** Joins: {} ({})", + time.with_format(&context.timezone, TIME_FORMAT), + nick, + mask.as_ref().expect("Mask not present, but required."))) + } + &Event { ty: Type::Part { ref nick, ref mask, ref reason }, ref time, .. } => { + try!(writeln!(&mut output, + "[{}] *** Parts: {} ({}) ({})", + time.with_format(&context.timezone, TIME_FORMAT), + nick, + mask.as_ref().expect("Mask not present, but required."), + reason.as_ref().unwrap_or(&Cow::Borrowed("")))) + } + &Event { ty: Type::Quit { ref nick, ref mask, ref reason }, ref time, .. } => { + try!(writeln!(&mut output, + "[{}] *** Quits: {} ({}) ({})", + time.with_format(&context.timezone, TIME_FORMAT), + nick, + mask.as_ref().expect("Mask not present, but required."), + reason.as_ref().expect("Reason not present, but required."))) + } + &Event { ty: Type::TopicChange { ref nick, ref new_topic }, ref time, .. } => { + try!(writeln!(&mut output, + "[{}] *** {} changes topic to '{}'", + time.with_format(&context.timezone, TIME_FORMAT), + nick.as_ref().expect("Nick not present, but required."), + new_topic)) + } + _ => (), + } + Ok(()) + } +} diff --git a/formats/irssi/Cargo.toml b/formats/irssi/Cargo.toml new file mode 100644 index 0000000..d59ecf2 --- /dev/null +++ b/formats/irssi/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "ilc-format-irssi" +version = "0.1.0" +authors = ["Till Höppner "] + +[dependencies] diff --git a/formats/irssi/src/lib.rs b/formats/irssi/src/lib.rs new file mode 100644 index 0000000..6afcd61 --- /dev/null +++ b/formats/irssi/src/lib.rs @@ -0,0 +1,205 @@ +// Copyright 2015 Till Höppner +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::io::{ BufRead, Write }; +use std::borrow::{ ToOwned }; +use std::iter::{ Iterator }; + +use event::{ Event, Type, Time }; +use context::Context; +use format::{ Encode, Decode, rejoin, strip_one }; + +use l::LogLevel::Info; + +pub struct Irssi; + +static LOG_OPEN_FORMAT: &'static str = "%a %b %e %T %Y"; +static LINE_FORMAT: &'static str = "%H:%M"; + +pub struct Iter<'a> { + context: &'a Context, + input: &'a mut BufRead, + buffer: Vec +} + +impl<'a> Iterator for Iter<'a> { + type Item = ::Result>; + fn next(&mut self) -> Option<::Result>> { + fn parse_time(c: &Context, date: &str, time: &str) -> Time { + Time::from_format(&c.timezone, &format!("{} {}", date, time), TIME_DATE_FORMAT) + } + + loop { + self.buffer.clear(); + match self.input.read_until(b'\n', &mut self.buffer) { + Ok(0) | Err(_) => return None, + Ok(_) => () + } + + let buffer = String::from_utf8_lossy(&self.buffer); + + let mut split_tokens: Vec = Vec::new(); + let tokens = buffer.split(|c: char| { + if c.is_whitespace() { split_tokens.push(c); true } else { false } + }).collect::>(); + + if log_enabled!(Info) { + info!("Original: `{}`", buffer); + info!("Parsing: {:?}", tokens); + } + + match &tokens[..tokens.len() - 1] { + ["---", "Log", "opened", day_of_week, month, day, time, year] => { + year + }, + ["---", "Log", "closed", day_of_week, month, day, time, year] + => return Some(Ok(Event { + ty: Type::Disconnect, + time: parse_time(&self.context, date, time), + channel: self.context.channel.clone().map(Into::into) + })), + [time, "-!-", nick, host, "has", "joined", channel] + => return Some(Ok(Event { + ty: Type::Join { + nick: nick.to_owned().into(), + mask: Some(strip_one(host).into()), + }, + channel: Some(channel.to_owned().into()), + time: parse_time(&self.context, date, time) + })), + [time, "-!-", nick, host, "has", "left", channel, reason..] + => return Some(Ok(Event { + ty: Type::Part { + nick: nick.to_owned().into(), + mask: Some(strip_one(host).into()), + reason: Some(strip_one(&rejoin(reason, &split_tokens[8..])).into()), + }, + channel: Some(channel.to_owned().into()), + time: parse_time(&self.context, date, time) + })), + [time, "-!-", nick, host, "has", "quit", reason..] + => return Some(Ok(Event { + ty: Type::Quit { + nick: nick.to_owned().into(), + mask: Some(strip_one(host).into()), + reason: Some(strip_one(&rejoin(reason, &split_tokens[7..])).into()), + }, + time: parse_time(&self.context, date, time), + channel: self.context.channel.clone().map(Into::into) + })), + // TODO: reorder + [date, time, "--", notice, content..] + if notice.starts_with("Notice(") + => return Some(Ok(Event { + ty: Type::Notice { + from: notice["Notice(".len()..notice.len() - 2].to_owned().into(), + content: rejoin(content, &split_tokens[4..]), + }, + time: parse_time(&self.context, date, time), + channel: self.context.channel.clone().map(Into::into) + })), + [date, time, "--", nick, verb, "now", "known", "as", new_nick] + if verb == "is" || verb == "are" + => return Some(Ok(Event { + ty: Type::Nick { + old_nick: nick.to_owned().into(), + new_nick: new_nick.to_owned().into() + }, + time: parse_time(&self.context, date, time), + channel: self.context.channel.clone().map(Into::into) + })), + [date, time, sp, "*", nick, msg..] + if sp.clone().is_empty() + => return Some(Ok(Event { + ty: Type::Action { + from: nick.to_owned().into(), + content: rejoin(msg, &split_tokens[5..]), + }, + time: parse_time(&self.context, date, time), + channel: self.context.channel.clone().map(Into::into) + })), + [date, time, nick, msg..] + => return Some(Ok(Event { + ty: Type::Msg { + from: nick.to_owned().into(), + content: rejoin(msg, &split_tokens[3..]), + }, + time: parse_time(&self.context, date, time), + channel: self.context.channel.clone().map(Into::into) + })), + _ => () + } + } + } +} + +impl Decode for Irssi { + fn decode<'a>(&'a mut self, context: &'a Context, input: &'a mut BufRead) -> Box>> + 'a> { + Box::new(Iter { + context: context, + input: input, + buffer: Vec::new() + }) + } +} + +impl Encode for Irssi { + fn encode<'a>(&'a self, context: &'a Context, mut output: &'a mut Write, event: &'a Event) -> ::Result<()> { + match event { + &Event { ty: Type::Msg { ref from, ref content, .. }, ref time, .. } => { + try!(writeln!(&mut output, "{}\t{}\t{}", + time.with_format(&context.timezone, TIME_DATE_FORMAT), from, content)) + }, + &Event { ty: Type::Action { ref from, ref content, .. }, ref time, .. } => { + try!(writeln!(&mut output, "{}\t *\t{} {}", + time.with_format(&context.timezone, TIME_DATE_FORMAT), from, content)) + }, + &Event { ty: Type::Join { ref nick, ref mask, .. }, ref channel, ref time } => { + try!(writeln!(&mut output, "{}\t-->\t{} ({}) has joined {}", + time.with_format(&context.timezone, TIME_DATE_FORMAT), nick, + mask.as_ref().expect("Hostmask not present, but required."), + channel.as_ref().expect("Channel not present, but required."))) + }, + &Event { ty: Type::Part { ref nick, ref mask, ref reason }, ref channel, ref time } => { + try!(write!(&mut output, "{}\t<--\t{} ({}) has left {}", + time.with_format(&context.timezone, TIME_DATE_FORMAT), nick, + mask.as_ref().expect("Hostmask not present, but required."), + channel.as_ref().expect("Channel not present, but required."))); + if reason.is_some() && reason.as_ref().unwrap().len() > 0 { + try!(write!(&mut output, " ({})", reason.as_ref().unwrap())); + } + try!(write!(&mut output, "\n")) + }, + &Event { ty: Type::Quit { ref nick, ref mask, ref reason }, ref time, .. } => { + try!(write!(&mut output, "{}\t<--\t{} ({}) has quit", + time.with_format(&context.timezone, TIME_DATE_FORMAT), nick, + mask.as_ref().expect("Hostmask not present, but required."))); + if reason.is_some() && reason.as_ref().unwrap().len() > 0 { + try!(write!(&mut output, " ({})", reason.as_ref().unwrap())); + } + try!(write!(&mut output, "\n")) + }, + &Event { ty: Type::Disconnect, ref time, .. } => { + try!(writeln!(&mut output, "{}\t--\tirc: disconnected from server", + time.with_format(&context.timezone, TIME_DATE_FORMAT))) + }, + &Event { ty: Type::Notice { ref from, ref content }, ref time, .. } => { + try!(writeln!(&mut output, "{}\t--\tNotice({}): {}", + time.with_format(&context.timezone, TIME_DATE_FORMAT), from, content)) + }, + _ => () + } + Ok(()) + } +} diff --git a/formats/msgpack/Cargo.toml b/formats/msgpack/Cargo.toml new file mode 100644 index 0000000..1e336ca --- /dev/null +++ b/formats/msgpack/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "ilc-format-msgpack" +version = "0.1.0" +authors = ["Till Höppner "] + +[dependencies] diff --git a/formats/msgpack/src/lib.rs b/formats/msgpack/src/lib.rs new file mode 100644 index 0000000..36af1aa --- /dev/null +++ b/formats/msgpack/src/lib.rs @@ -0,0 +1,62 @@ +// Copyright 2015 Till Höppner +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::io::{BufRead, Write}; +use std::iter::Iterator; + +use event::Event; +use context::Context; +use format::{Decode, Encode}; + +use rustc_serialize::{Decodable, Encodable}; +use msgpack::{Decoder, Encoder}; +use rmp::decode::ReadError; + +pub struct Msgpack; + +pub struct Iter<'a> { + input: &'a mut BufRead, +} + +impl<'a> Iterator for Iter<'a> { + type Item = ::Result>; + fn next(&mut self) -> Option<::Result>> { + use msgpack::decode; + match Event::decode(&mut Decoder::new(&mut self.input)) { + Ok(e) => Some(Ok(e)), + Err(decode::Error::InvalidMarkerRead(ReadError::UnexpectedEOF)) => None, + Err(e) => Some(Err(::IlcError::MsgpackDecode(e))), + } + } +} + +impl Encode for Msgpack { + fn encode<'a>(&'a self, + _context: &'a Context, + output: &'a mut Write, + event: &'a Event) + -> ::Result<()> { + event.encode(&mut Encoder::new(output)) + .map_err(|e| ::IlcError::MsgpackEncode(e)) + } +} + +impl Decode for Msgpack { + fn decode<'a>(&'a mut self, + _context: &'a Context, + input: &'a mut BufRead) + -> Box>> + 'a> { + Box::new(Iter { input: input }) + } +} diff --git a/formats/weechat/Cargo.toml b/formats/weechat/Cargo.toml new file mode 100644 index 0000000..5b0ef3f --- /dev/null +++ b/formats/weechat/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "ilc-format-weechat" +version = "0.1.0" +authors = ["Till Höppner "] + +[dependencies] +log = "0.3.5" +chrono = "0.2.19" +# ilc-base = "*" +ilc-base = { path = "../../base" } diff --git a/formats/weechat/src/lib.rs b/formats/weechat/src/lib.rs new file mode 100644 index 0000000..09ad58a --- /dev/null +++ b/formats/weechat/src/lib.rs @@ -0,0 +1,241 @@ +// Copyright 2015 Till Höppner +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![feature(slice_patterns)] + +#[macro_use] +extern crate log; +extern crate ilc_base; + +use std::io::{BufRead, Write}; +use std::borrow::ToOwned; +use std::iter::Iterator; + +use ilc_base::event::{Event, Time, Type}; +use ilc_base::{Context, Decode, Encode}; +use ilc_base::format::{rejoin, strip_one}; + +use log::LogLevel::Info; + +pub struct Weechat; + +static TIME_DATE_FORMAT: &'static str = "%Y-%m-%d %H:%M:%S"; + +pub struct Iter<'a> { + context: &'a Context, + input: &'a mut BufRead, + buffer: Vec, +} + +impl<'a> Iterator for Iter<'a> { + type Item = ilc_base::Result>; + fn next(&mut self) -> Option>> { + fn parse_time(c: &Context, date: &str, time: &str) -> Time { + Time::from_format(&c.timezone, &format!("{} {}", date, time), TIME_DATE_FORMAT) + } + + loop { + self.buffer.clear(); + match self.input.read_until(b'\n', &mut self.buffer) { + Ok(0) | Err(_) => return None, + Ok(_) => (), + } + + let buffer = String::from_utf8_lossy(&self.buffer); + + let mut split_tokens: Vec = Vec::new(); + let tokens = buffer.split(|c: char| { + if c.is_whitespace() { + split_tokens.push(c); + true + } else { + false + } + }) + .collect::>(); + + if log_enabled!(Info) { + info!("Original: `{}`", buffer); + info!("Parsing: {:?}", tokens); + } + + match &tokens[..tokens.len() - 1] { + [date, time, "-->", nick, host, "has", "joined", channel, _..] => { + return Some(Ok(Event { + ty: Type::Join { + nick: nick.to_owned().into(), + mask: Some(strip_one(host).into()), + }, + channel: Some(channel.to_owned().into()), + time: parse_time(&self.context, date, time), + })) + } + [date, time, "<--", nick, host, "has", "left", channel, reason..] => { + return Some(Ok(Event { + ty: Type::Part { + nick: nick.to_owned().into(), + mask: Some(strip_one(host).into()), + reason: Some(strip_one(&rejoin(reason, &split_tokens[8..])).into()), + }, + channel: Some(channel.to_owned().into()), + time: parse_time(&self.context, date, time), + })) + } + [date, time, "<--", nick, host, "has", "quit", reason..] => { + return Some(Ok(Event { + ty: Type::Quit { + nick: nick.to_owned().into(), + mask: Some(strip_one(host).into()), + reason: Some(strip_one(&rejoin(reason, &split_tokens[7..])).into()), + }, + time: parse_time(&self.context, date, time), + channel: self.context.channel.clone().map(Into::into), + })) + } + [date, time, "--", notice, content..] if notice.starts_with("Notice(") => { + return Some(Ok(Event { + ty: Type::Notice { + from: notice["Notice(".len()..notice.len() - 2].to_owned().into(), + content: rejoin(content, &split_tokens[4..]), + }, + time: parse_time(&self.context, date, time), + channel: self.context.channel.clone().map(Into::into), + })) + } + [date, time, "--", "irc:", "disconnected", "from", "server", _..] => { + return Some(Ok(Event { + ty: Type::Disconnect, + time: parse_time(&self.context, date, time), + channel: self.context.channel.clone().map(Into::into), + })) + } + [date, time, "--", nick, verb, "now", "known", "as", new_nick] if verb == "is" || + verb == "are" => { + return Some(Ok(Event { + ty: Type::Nick { + old_nick: nick.to_owned().into(), + new_nick: new_nick.to_owned().into(), + }, + time: parse_time(&self.context, date, time), + channel: self.context.channel.clone().map(Into::into), + })) + } + [date, time, sp, "*", nick, msg..] if sp.clone().is_empty() => { + return Some(Ok(Event { + ty: Type::Action { + from: nick.to_owned().into(), + content: rejoin(msg, &split_tokens[5..]), + }, + time: parse_time(&self.context, date, time), + channel: self.context.channel.clone().map(Into::into), + })) + } + [date, time, nick, msg..] => { + return Some(Ok(Event { + ty: Type::Msg { + from: nick.to_owned().into(), + content: rejoin(msg, &split_tokens[3..]), + }, + time: parse_time(&self.context, date, time), + channel: self.context.channel.clone().map(Into::into), + })) + } + _ => (), + } + } + } +} + +impl Decode for Weechat { + fn decode<'a>(&'a mut self, + context: &'a Context, + input: &'a mut BufRead) + -> Box>> + 'a> { + Box::new(Iter { + context: context, + input: input, + buffer: Vec::new(), + }) + } +} + +impl Encode for Weechat { + fn encode<'a>(&'a self, + context: &'a Context, + mut output: &'a mut Write, + event: &'a Event) + -> ilc_base::Result<()> { + match event { + &Event { ty: Type::Msg { ref from, ref content, .. }, ref time, .. } => { + try!(writeln!(&mut output, + "{}\t{}\t{}", + time.with_format(&context.timezone, TIME_DATE_FORMAT), + from, + content)) + } + &Event { ty: Type::Action { ref from, ref content, .. }, ref time, .. } => { + try!(writeln!(&mut output, + "{}\t *\t{} {}", + time.with_format(&context.timezone, TIME_DATE_FORMAT), + from, + content)) + } + &Event { ty: Type::Join { ref nick, ref mask, .. }, ref channel, ref time } => { + try!(writeln!(&mut output, + "{}\t-->\t{} ({}) has joined {}", + time.with_format(&context.timezone, TIME_DATE_FORMAT), + nick, + mask.as_ref().expect("Hostmask not present, but required."), + channel.as_ref().expect("Channel not present, but required."))) + } + &Event { ty: Type::Part { ref nick, ref mask, ref reason }, ref channel, ref time } => { + try!(write!(&mut output, + "{}\t<--\t{} ({}) has left {}", + time.with_format(&context.timezone, TIME_DATE_FORMAT), + nick, + mask.as_ref().expect("Hostmask not present, but required."), + channel.as_ref().expect("Channel not present, but required."))); + if reason.is_some() && reason.as_ref().unwrap().len() > 0 { + try!(write!(&mut output, " ({})", reason.as_ref().unwrap())); + } + try!(write!(&mut output, "\n")) + } + &Event { ty: Type::Quit { ref nick, ref mask, ref reason }, ref time, .. } => { + try!(write!(&mut output, + "{}\t<--\t{} ({}) has quit", + time.with_format(&context.timezone, TIME_DATE_FORMAT), + nick, + mask.as_ref().expect("Hostmask not present, but required."))); + if reason.is_some() && reason.as_ref().unwrap().len() > 0 { + try!(write!(&mut output, " ({})", reason.as_ref().unwrap())); + } + try!(write!(&mut output, "\n")) + } + &Event { ty: Type::Disconnect, ref time, .. } => { + try!(writeln!(&mut output, + "{}\t--\tirc: disconnected from server", + time.with_format(&context.timezone, TIME_DATE_FORMAT))) + } + &Event { ty: Type::Notice { ref from, ref content }, ref time, .. } => { + try!(writeln!(&mut output, + "{}\t--\tNotice({}): {}", + time.with_format(&context.timezone, TIME_DATE_FORMAT), + from, + content)) + } + _ => (), + } + Ok(()) + } +} diff --git a/ops/Cargo.toml b/ops/Cargo.toml new file mode 100644 index 0000000..d74126c --- /dev/null +++ b/ops/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "ilc-ops" +version = "0.1.0" +authors = ["Till Höppner "] + +[dependencies] +# ilc-base = "*" +ilc-base = { path = "../base" } +blist = "0.0.4" diff --git a/ops/src/ageset.rs b/ops/src/ageset.rs new file mode 100644 index 0000000..c97240f --- /dev/null +++ b/ops/src/ageset.rs @@ -0,0 +1,47 @@ +use std::collections::HashSet; +use std::hash::Hash; + +use blist::BList; + +/// So... this is a rather weird thing. +/// It allows to semi-efficiently check the oldest (earliest insertion) +/// elements for certain criteria and remove them in the order of insertion +/// if the criteria is met. +pub struct AgeSet { + fifo: BList, + set: HashSet, +} + +impl AgeSet + where T: Eq + Hash + Clone +{ + pub fn new() -> Self { + AgeSet { + fifo: BList::new(), + set: HashSet::new(), + } + } + + pub fn contains(&self, t: &T) -> bool { + self.set.contains(t) + } + + pub fn prune(&mut self, kill: F) + where F: Fn(&T) -> bool + { + while let Some(ref e) = self.fifo.front().map(T::clone) { + if kill(&e) { + let removed = self.fifo.pop_front().unwrap(); + self.set.remove(&e); + assert!(*e == removed); + } else { + break; + } + } + } + + pub fn push(&mut self, t: T) { + self.fifo.push_back(t.clone()); + self.set.insert(t); + } +} diff --git a/ops/src/freq.rs b/ops/src/freq.rs new file mode 100644 index 0000000..c5b363a --- /dev/null +++ b/ops/src/freq.rs @@ -0,0 +1,87 @@ +use ilc_base::{self, Context, Decode, Event}; +use ilc_base::event::Type; + +use std::collections::HashMap; +use std::io::{BufRead, Write}; + +struct Person { + lines: u32, + alpha_lines: u32, + words: u32, +} + +fn words_alpha(s: &str) -> (u32, bool) { + let mut alpha = false; + let mut words = 0; + for w in s.split_whitespace() { + if !w.is_empty() { + words += 1; + if w.chars().any(char::is_alphabetic) { + alpha = true + } + } + } + (words, alpha) +} + +fn strip_nick_prefix(s: &str) -> &str { + if s.is_empty() { + return s; + } + match s.as_bytes()[0] { + b'~' | b'&' | b'@' | b'%' | b'+' => &s[1..], + _ => s, + } +} + +// TODO: Don't print results, return Stats struct +pub fn freq(count: usize, + ctx: &Context, + input: &mut BufRead, + decoder: &mut Decode, + output: &mut Write) + -> ilc_base::Result<()> { + let mut stats: HashMap = HashMap::new(); + + for e in decoder.decode(&ctx, input) { + let m = try!(e); + match m { + Event { ty: Type::Msg { ref from, ref content, .. }, .. } => { + let nick = strip_nick_prefix(from); + if stats.contains_key(nick) { + let p: &mut Person = stats.get_mut(nick).unwrap(); + let (words, alpha) = words_alpha(content); + p.lines += 1; + if alpha { + p.alpha_lines += 1 + } + p.words += words; + } else { + let (words, alpha) = words_alpha(content); + stats.insert(nick.to_owned(), + Person { + lines: 1, + alpha_lines: if alpha { 1 } else { 0 }, + words: words, + }); + } + } + _ => (), + } + } + + let mut stats: Vec<(String, Person)> = stats.into_iter().collect(); + stats.sort_by(|&(_, ref a), &(_, ref b)| b.words.cmp(&a.words)); + + for &(ref name, ref stat) in stats.iter().take(count) { + try!(write!(output, + "{}:\n\tTotal lines: {}\n\tLines without alphabetic characters: {}\n\tTotal \ + words: {}\n\tWords per line: {}\n", + name, + stat.lines, + stat.lines - stat.alpha_lines, + stat.words, + stat.words as f32 / stat.lines as f32)); + } + Ok(()) +} diff --git a/ops/src/lib.rs b/ops/src/lib.rs new file mode 100644 index 0000000..8cd5607 --- /dev/null +++ b/ops/src/lib.rs @@ -0,0 +1,127 @@ +extern crate blist; +extern crate ilc_base; + +mod ageset; +pub mod freq; + +pub mod parse { + use ilc_base::{self, Context, Decode}; + use std::io::BufRead; + pub fn parse(ctx: &Context, input: &mut BufRead, decoder: &mut Decode) -> ilc_base::Result<()> { + for e in decoder.decode(&ctx, input) { + try!(e); + } + Ok(()) + } +} + +pub mod convert { + use ilc_base::{self, Context, Decode, Encode}; + use std::io::{BufRead, Write}; + + pub fn convert(ctx: &Context, + input: &mut BufRead, + decoder: &mut Decode, + output: &mut Write, + encoder: &Encode) + -> ilc_base::Result<()> { + for e in decoder.decode(&ctx, input) { + try!(encoder.encode(&ctx, output, &try!(e))); + } + Ok(()) + } +} + +pub mod seen { + use ilc_base::{self, Context, Decode, Encode, Event}; + use std::io::{BufRead, Write}; + + pub fn seen(nick: &str, + ctx: &Context, + input: &mut BufRead, + decoder: &mut Decode, + output: &mut Write, + encoder: &Encode) + -> ilc_base::Result<()> { + let mut last: Option = None; + for e in decoder.decode(&ctx, input) { + let m: Event = try!(e); + if m.ty.involves(nick) && + last.as_ref().map_or(true, + |last| m.time.as_timestamp() > last.time.as_timestamp()) { + last = Some(m) + } + } + if let Some(ref m) = last { + try!(encoder.encode(&ctx, output, m)); + } + Ok(()) + } +} + +pub mod sort { + use ilc_base::{self, Context, Decode, Encode, Event}; + use std::io::{BufRead, Write}; + + pub fn sort(ctx: &Context, + input: &mut BufRead, + decoder: &mut Decode, + output: &mut Write, + encoder: &Encode) + -> ilc_base::Result<()> { + let mut events: Vec = decoder.decode(&ctx, input) + .flat_map(Result::ok) + .collect(); + + events.sort_by(|a, b| a.time.cmp(&b.time)); + for e in events { + try!(encoder.encode(&ctx, output, &e)); + } + Ok(()) + } +} + +pub mod dedup { + use std::io::{BufRead, Write}; + use std::hash::{Hash, Hasher}; + use ageset::AgeSet; + use ilc_base::{self, Context, Decode, Encode, Event}; + + #[derive(Clone, Debug, PartialEq, Eq)] + pub struct NoTimeHash<'a>(pub Event<'a>); + + impl<'a> Hash for NoTimeHash<'a> { + fn hash(&self, state: &mut H) + where H: Hasher + { + self.0.ty.hash(state); + self.0.channel.hash(state); + } + } + + pub fn dedup(ctx: &Context, + input: &mut BufRead, + decoder: &mut Decode, + output: &mut Write, + encoder: &Encode) + -> ilc_base::Result<()> { + let mut backlog = AgeSet::new(); + + for e in decoder.decode(&ctx, input) { + if let Ok(e) = e { + let newest_event = e.clone(); + backlog.prune(move |a: &NoTimeHash| { + let age = newest_event.time.as_timestamp() - a.0.time.as_timestamp(); + age > 5000 + }); + // write `e` if it's a new event + let n = NoTimeHash(e); + if !backlog.contains(&n) { + try!(encoder.encode(&ctx, output, &n.0)); + backlog.push(n); + } + } + } + Ok(()) + } +} diff --git a/src/main.rs b/src/main.rs index 0f4215f..3371a9f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,126 +1,5 @@ -// Copyright 2015 Till Höppner -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -extern crate ilc; -extern crate chrono; -#[macro_use] -extern crate clap; -extern crate rustc_serialize; -#[macro_use] -extern crate log; -extern crate env_logger; -extern crate glob; -extern crate blist; - -use clap::{App, AppSettings, Arg, SubCommand}; - -mod chain; -mod ageset; -mod app; +extern crate ilc_cli; fn main() { - env_logger::init().unwrap(); - let args = App::new("ilc") - .version(crate_version!()) - .setting(AppSettings::GlobalVersion) - .setting(AppSettings::VersionlessSubcommands) - .setting(AppSettings::ArgRequiredElseHelp) - .author("Till Höppner ") - .about("A converter and statistics utility for IRC log files") - .arg(Arg::with_name("timezone") - .help("UTC offset in the direction of the western hemisphere") - .global(true) - .takes_value(true) - .long("timezone") - .short("t")) - .arg(Arg::with_name("date") - .help("Override the date for this log, ISO 8601, YYYY-MM-DD") - .global(true) - .takes_value(true) - .long("date") - .short("d")) - .arg(Arg::with_name("infer_date") - .help("Try to use the filename as date for the current log") - .global(true) - .long("infer-date")) - .arg(Arg::with_name("channel") - .help("Set a channel for the current log") - .global(true) - .takes_value(true) - .long("channel") - .short("c")) - .arg(Arg::with_name("format") - .help("Set the input and output format for the current log") - .global(true) - .takes_value(true) - .long("format") - .short("f")) - .arg(Arg::with_name("input_format") - .help("Set the input format for the current log") - .global(true) - .conflicts_with("format") - .takes_value(true) - .long("inf")) - .arg(Arg::with_name("output_format") - .help("Set the output format for the current log") - .global(true) - .conflicts_with("format") - .takes_value(true) - .long("outf")) - .arg(Arg::with_name("input_files") - .help("Specify an input file, instead of stdin") - .global(true) - .takes_value(true) - .multiple(true) - .long("input") - .short("i")) - .arg(Arg::with_name("output_file") - .help("Specify an output file, instead of stdout") - .global(true) - .takes_value(true) - .long("output") - .short("o")) - .subcommand(SubCommand::with_name("parse") - .about("Parse the input, checking the format")) - .subcommand(SubCommand::with_name("convert") - .about("Convert from a source to a target format")) - .subcommand(SubCommand::with_name("freq") - .about("Analyse the activity of users by certain metrics") - .arg(Arg::with_name("count") - .help("The number of items to be displayed") - .takes_value(true) - .long("count"))) - .subcommand(SubCommand::with_name("seen") - .about("Print the last line a nick was active") - .arg(Arg::with_name("nick") - .help("The nick you're looking for") - .takes_value(true) - .required(true) - .index(1))) - .subcommand(SubCommand::with_name("sort").about("Sorts a log by time")) - .subcommand(SubCommand::with_name("dedup") - .about("Removes duplicate log entries in close proximity")) - .get_matches(); - - match args.subcommand() { - ("parse", Some(args)) => app::parse::parse(args), - ("convert", Some(args)) => app::convert::convert(args), - ("freq", Some(args)) => app::freq::freq(args), - ("seen", Some(args)) => app::seen::seen(args), - ("sort", Some(args)) => app::sort::sort(args), - ("dedup", Some(args)) => app::dedup::dedup(args), - (sc, _) if !sc.is_empty() => panic!("Unimplemented subcommand `{}`, this is a bug", sc), - _ => (), - } + ilc_cli::main(); } -- cgit v1.2.3