From cadd814c28b03205c8277530ef09bffcdba44ec6 Mon Sep 17 00:00:00 2001 From: Till Höppner Date: Wed, 3 Feb 2016 03:38:32 +0100 Subject: Docopt -> clap, main.rs -> src/app Modularise the old main function, switch to clap for easier addition of CLI arguments --- src/app/freq.rs | 77 ++++++++++++ src/app/mod.rs | 226 ++++++++++++++++++++++++++++++++++ src/event.rs | 5 + src/format/mod.rs | 11 +- src/main.rs | 362 +++++++++++------------------------------------------- 5 files changed, 387 insertions(+), 294 deletions(-) create mode 100644 src/app/freq.rs create mode 100644 src/app/mod.rs (limited to 'src') diff --git a/src/app/freq.rs b/src/app/freq.rs new file mode 100644 index 0000000..9446b50 --- /dev/null +++ b/src/app/freq.rs @@ -0,0 +1,77 @@ +use clap::ArgMatches; + +use std::collections::HashMap; + +use ilc::event::{ Event, Type }; + +use super::*; + +struct Person { + lines: u32, + alpha_lines: u32, + words: u32 +} + +fn words_alpha(s: &str) -> (u32, bool) { + let mut alpha = false; + let mut words = 0; + for w in s.split_whitespace() { + if !w.is_empty() { + words += 1; + if w.chars().any(char::is_alphabetic) { alpha = true } + } + } + (words, alpha) +} + +fn strip_nick_prefix(s: &str) -> &str { + if s.is_empty() { return s } + match s.as_bytes()[0] { + b'~' | b'&' | b'@' | b'%' | b'+' => &s[1..], + _ => s + } +} + +pub fn freq(args: &ArgMatches) { + let env = Environment(args); + let (context, mut decoder, mut input, mut output) = (env.context(), env.decoder(), env.input(), env.output()); + + let mut stats: HashMap = HashMap::new(); + + for e in decoder.decode(&context, &mut input) { + let m = match e { + Ok(m) => m, + Err(err) => error(Box::new(err)) + }; + + match m { + Event { ty: Type::Msg { ref from, ref content, .. }, .. } => { + let nick = strip_nick_prefix(from); + if stats.contains_key(nick) { + let p: &mut Person = stats.get_mut(nick).unwrap(); + let (words, alpha) = words_alpha(content); + p.lines += 1; + if alpha { p.alpha_lines += 1 } + p.words += words; + } else { + let (words, alpha) = words_alpha(content); + stats.insert(nick.to_owned(), Person { + lines: 1, + alpha_lines: if alpha { 1 } else { 0 }, + words: words + }); + } + }, + _ => () + } + } + + let mut stats: Vec<(String, Person)> = stats.into_iter().collect(); + stats.sort_by(|&(_, ref a), &(_, ref b)| b.words.cmp(&a.words)); + + for &(ref name, ref stat) in stats.iter() { + let _ = write!(&mut output, + "{}:\n\tTotal lines: {}\n\tLines without alphabetic characters: {}\n\tTotal words: {}\n\tWords per line: {}\n", + name, stat.lines, stat.lines - stat.alpha_lines, stat.words, stat.words as f32 / stat.lines as f32); + } +} diff --git a/src/app/mod.rs b/src/app/mod.rs new file mode 100644 index 0000000..50117e7 --- /dev/null +++ b/src/app/mod.rs @@ -0,0 +1,226 @@ +use clap::ArgMatches; + +use chrono::offset::fixed::FixedOffset; +use chrono::naive::date::NaiveDate; + +use glob::glob; + +use std::process; +use std::str::FromStr; +use std::path::PathBuf; +use std::io::{ self, Write, BufWriter, BufRead, BufReader }; +use std::fs::File; +use std::error::Error; + +use ilc::context::Context; +use ilc::format::{ self, Encode, Decode }; + +use ::chain; + +pub mod freq; + +pub fn error(e: Box) -> ! { + let _ = writeln!(&mut io::stderr(), "Error: {}", e); + let mut e = e.cause(); + while let Some(err) = e { + let _ = writeln!(&mut io::stderr(), "\t{}", err); + e = err.cause(); + } + process::exit(1) +} + +pub fn die(s: &str) -> ! { + let _ = writeln!(&mut io::stderr(), "Aborting: {}", s); + process::exit(1) +} + +pub fn force_decoder(s: Option<&str>) -> Box { + let inf = match s { + Some(s) => s, + None => die("You didn't specify the input format") + }; + match format::decoder(&inf) { + Some(d) => d, + None => die(&format!("The format `{}` is unknown to me", inf)) + } +} + +pub fn force_encoder<'a>(s: Option<&str>) -> Box { + let outf = match s { + Some(s) => s, + None => die("You didn't specify the output format") + }; + match format::encoder(&outf) { + Some(e) => e, + None => die(&format!("The format `{}` is unknown to me", outf)) + } +} + +pub fn build_context(args: &ArgMatches) -> Context { + Context { + timezone: FixedOffset::west(args.value_of("timezone").and_then(|s| s.parse().ok()).unwrap_or(0)), + override_date: args.value_of("date").and_then(|d| NaiveDate::from_str(&d).ok()), + channel: args.value_of("channel").map(str::to_owned).clone() + } +} + +pub fn build_input(args: &ArgMatches) -> Box { + let input_files = args.values_of("input_files"); + if input_files.map(|files| files.count() > 0).unwrap_or(false) { + let input_files: Vec = if let Some(iter) = args.values_of("input_files") { + iter.flat_map(|p| { + match glob(p) { + Ok(paths) => paths, + Err(e) => die(&format!("{}", e.msg)) + } + }).filter_map(Result::ok).collect() + } else { Vec::new() }; + + /*if args.flag_infer_date { + if input_files.len() > 1 { die("Too many input files, can't infer date") } + if let Some(date) = input_files.iter().next() + .map(PathBuf::as_path) + .and_then(Path::file_stem) + .and_then(OsStr::to_str) + .and_then(|s: &str| NaiveDate::from_str(s).ok()) { + context.override_date = Some(date); + } + }*/ + + Box::new(BufReader::new(chain::Chain::new(input_files.iter().map(|p| File::open(p).unwrap()).collect()))) + } else { + Box::new(BufReader::new(io::stdin())) + } +} + +pub fn build_output(args: &ArgMatches) -> Box { + if let Some(out) = args.value_of("output_file") { + match File::create(out) { + Ok(f) => Box::new(BufWriter::new(f)), + Err(e) => error(Box::new(e)) + } + } else { + Box::new(BufWriter::new(io::stdout())) + } +} + +pub struct Environment<'a>(pub &'a ArgMatches<'a>); + +impl<'a> Environment<'a> { + pub fn context(&self) -> Context { build_context(self.0) } + pub fn input(&self) -> Box { build_input(self.0) } + pub fn output(&self) -> Box { build_output(self.0) } + pub fn decoder(&self) -> Box { force_decoder(self.0.value_of("input_format")) } + pub fn encoder(&self) -> Box { force_encoder(self.0.value_of("output_format")) } +} + +pub mod parse { + use clap::ArgMatches; + use super::*; + pub fn parse(args: &ArgMatches) { + let env = Environment(args); + let (context, mut decoder, mut input) = (env.context(), env.decoder(), env.input()); + for e in decoder.decode(&context, &mut input) { + match e { + Err(e) => { println!("Foo!"); error(Box::new(e)) }, + _ => () + } + } + } +} + +pub mod convert { + use clap::ArgMatches; + use super::*; + pub fn convert(args: &ArgMatches) { + let env = Environment(args); + let (context, mut decoder, mut input, encoder, mut output) = + (env.context(), env.decoder(), env.input(), env.encoder(), env.output()); + + for e in decoder.decode(&context, &mut input) { + match e { + Ok(e) => { let _ = encoder.encode(&context, &mut output, &e); }, + Err(e) => error(Box::new(e)) + } + } + } +} + +pub mod seen { + use clap::ArgMatches; + use ilc::event::Event; + use ilc::format::{ self, Encode }; + use super::*; + pub fn seen(args: &ArgMatches) { + let env = Environment(args); + let (context, mut decoder, mut input, mut output) = (env.context(), env.decoder(), env.input(), env.output()); + + let nick = args.value_of("nick").expect("Required argument not present"); + + let mut last: Option = None; + for e in decoder.decode(&context, &mut input) { + let m = match e { + Ok(m) => m, + Err(err) => error(Box::new(err)) + }; + + if m.ty.involves(nick) + && last.as_ref().map_or(true, |last| m.time.as_timestamp() > last.time.as_timestamp()) { last = Some(m) } + } + let encoder = format::weechat3::Weechat3; + if let Some(ref m) = last { + let _ = encoder.encode(&context, &mut output, m); + } + } +} + +pub mod sort { + use clap::ArgMatches; + use ilc::event::Event; + use super::*; + pub fn sort(args: &ArgMatches) { + let env = Environment(args); + let (context, mut decoder, mut input, encoder, mut output) = + (env.context(), env.decoder(), env.input(), env.encoder(), env.output()); + + let mut events: Vec = decoder.decode(&context, &mut input) + .flat_map(Result::ok) + .collect(); + + events.sort_by(|a, b| a.time.cmp(&b.time)); + for e in events { + let _ = encoder.encode(&context, &mut output, &e); + } + } +} + +pub mod dedup { + use clap::ArgMatches; + use ilc::event::NoTimeHash; + use ::ageset::AgeSet; + use super::*; + pub fn dedup(args: &ArgMatches) { + let env = Environment(args); + let (context, mut decoder, mut input, encoder, mut output) = + (env.context(), env.decoder(), env.input(), env.encoder(), env.output()); + + let mut backlog = AgeSet::new(); + + for e in decoder.decode(&context, &mut input) { + if let Ok(e) = e { + let newest_event = e.clone(); + backlog.prune(move |a: &NoTimeHash| { + let age = newest_event.time.as_timestamp() - a.0.time.as_timestamp(); + age > 5000 + }); + // write `e` if it's a new event + let n = NoTimeHash(e); + if !backlog.contains(&n) { + let _ = encoder.encode(&context, &mut output, &n.0); + backlog.push(n); + } + } + } + + } +} diff --git a/src/event.rs b/src/event.rs index f497359..d2ce053 100644 --- a/src/event.rs +++ b/src/event.rs @@ -95,6 +95,11 @@ pub struct Event<'a> { pub channel: Option> } +#[derive(Clone, Debug, PartialEq, Eq, Hash, RustcEncodable, RustcDecodable)] +pub struct User<'a> { + nick: Cow<'a, str> +} + /// All representable events, such as messages, quits, joins /// and topic changes. #[derive(Clone, Debug, Hash, PartialEq, Eq, RustcEncodable, RustcDecodable)] diff --git a/src/format/mod.rs b/src/format/mod.rs index ff3a328..f7de677 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -25,6 +25,7 @@ use context::Context; pub mod energymech; pub mod weechat3; +// pub mod irssi; pub mod binary; pub mod msgpack; @@ -46,8 +47,9 @@ impl Decode for Dummy { pub fn decoder(format: &str) -> Option> { match format { - "energymech" => Some(Box::new(energymech::Energymech)), - "weechat3" => Some(Box::new(weechat3::Weechat3)), + "energymech" | "em" => Some(Box::new(energymech::Energymech)), + "weechat3" | "weechat" | "w3" => Some(Box::new(weechat3::Weechat3)), +// "irssi" => Some(Box::new(irssi::Irssi)), "binary" => Some(Box::new(binary::Binary)), "msgpack" => Some(Box::new(msgpack::Msgpack)), _ => None @@ -56,8 +58,9 @@ pub fn decoder(format: &str) -> Option> { pub fn encoder(format: &str) -> Option> { match format { - "energymech" => Some(Box::new(energymech::Energymech)), - "weechat3" => Some(Box::new(weechat3::Weechat3)), + "energymech" | "em" => Some(Box::new(energymech::Energymech)), + "weechat3" | "weechat" | "w3" => Some(Box::new(weechat3::Weechat3)), +// "irssi" => Some(Box::new(irssi::Irssi)), "binary" => Some(Box::new(binary::Binary)), "msgpack" => Some(Box::new(msgpack::Msgpack)), _ => None diff --git a/src/main.rs b/src/main.rs index 4bbb9ff..2603a27 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,7 +14,8 @@ extern crate ilc; extern crate chrono; -extern crate docopt; +#[macro_use] +extern crate clap; extern crate rustc_serialize; #[macro_use] extern crate log; @@ -22,300 +23,81 @@ extern crate env_logger; extern crate glob; extern crate blist; -use std::process; -use std::io::{ self, BufRead, BufReader, Write, BufWriter }; -use std::path::{ Path, PathBuf }; -use std::fs::File; -use std::error::Error; -use std::str::FromStr; -use std::collections::HashMap; -use std::ffi::OsStr; - -use docopt::Docopt; - -use chrono::offset::fixed::FixedOffset; -use chrono::naive::date::NaiveDate; - -use glob::glob; - -use ilc::context::Context; -use ilc::format::{ self, Encode, Decode }; -use ilc::event::{ Event, Type, NoTimeHash }; - -use ageset::AgeSet; +use clap::{ Arg, App, AppSettings, SubCommand }; mod chain; mod ageset; - -static USAGE: &'static str = r#" -d8b 888 -Y8P 888 - 888 -888 888 .d8888b -888 888 d88P" -888 888 888 -888 888 Y88b. -888 888 "Y8888P - -A converter and statistics utility for IRC log files. - -Usage: - ilc parse [options] [-i FILE...] - ilc convert [options] [-i FILE...] - ilc freq [options] [-i FILE...] - ilc seen [options] [-i FILE...] - ilc sort [options] [-i FILE...] - ilc dedup [options] [-i FILE...] - ilc (-h | --help | -v | --version) - -Options: - -h --help Show this screen. - -v --version Show the version (duh). - --date DATE Override the date for this log. ISO 8601, YYYY-MM-DD. - --tz SECONDS UTC offset in the direction of the western hemisphere. - --channel CH Set a channel for the given log. - --inf INF Set the input format. - --outf OUTF Set the output format. - --in -i IN Give an input file, instead of stdin. - --out -o OUT Give an output file, instead of stdout. - --infer-date Try to use the filename as date for the log. -"#; - -#[derive(RustcDecodable, Debug)] -struct Args { - cmd_parse: bool, - cmd_convert: bool, - cmd_freq: bool, - cmd_seen: bool, - cmd_sort: bool, - cmd_dedup: bool, - arg_file: Vec, - arg_nick: String, - flag_in: Vec, - flag_out: Option, - flag_inf: Option, - flag_outf: Option, - flag_help: bool, - flag_version: bool, - flag_date: Option, - flag_tz: Option, - flag_channel: Option, - flag_infer_date: bool -} - -fn error(e: Box) -> ! { - let _ = writeln!(&mut io::stderr(), "Error: {}", e); - let mut e = e.cause(); - while let Some(err) = e { - let _ = writeln!(&mut io::stderr(), "\t{}", err); - e = err.cause(); - } - process::exit(1) -} - -fn die(s: &str) -> ! { - let _ = writeln!(&mut io::stderr(), "Aborting: {}", s); - process::exit(1) -} - -fn force_decoder(s: Option) -> Box { - let inf = match s { - Some(s) => s, - None => die("You didn't specify the input format") - }; - match format::decoder(&inf) { - Some(d) => d, - None => die(&format!("The format `{}` is unknown to me", inf)) - } -} - -fn force_encoder<'a>(s: Option) -> Box { - let outf = match s { - Some(s) => s, - None => die("You didn't specify the output format") - }; - match format::encoder(&outf) { - Some(e) => e, - None => die(&format!("The format `{}` is unknown to me", outf)) - } -} +mod app; fn main() { env_logger::init().unwrap(); - let args: Args = Docopt::new(USAGE) - .and_then(|d| d.decode()) - .unwrap_or_else(|e| e.exit()); - if args.flag_help { - println!("{}", USAGE); - process::exit(1) - } - - let mut context = Context { - timezone: FixedOffset::west(args.flag_tz.and_then(|s| s.parse().ok()).unwrap_or(0)), - override_date: args.flag_date.and_then(|d| NaiveDate::from_str(&d).ok()), - channel: args.flag_channel.clone() - }; - - let mut input: Box = if args.flag_in.len() > 0 { - let input_files: Vec = args.flag_in.iter() - .flat_map(|p| { - match glob(p) { - Ok(paths) => paths, - Err(e) => die(&format!("{}", e.msg)) - } - }).filter_map(Result::ok).collect();//.map(|p| File::open(p).unwrap()).collect(); - if args.flag_infer_date { - if input_files.len() > 1 { die("Too many input files, can't infer date") } - if let Some(date) = input_files.iter().next() - .map(PathBuf::as_path) - .and_then(Path::file_stem) - .and_then(OsStr::to_str) - .and_then(|s: &str| NaiveDate::from_str(s).ok()) { - context.override_date = Some(date); - } - } - Box::new(BufReader::new(chain::Chain::new(input_files.iter().map(|p| File::open(p).unwrap()).collect()))) - } else { - Box::new(BufReader::new(io::stdin())) - }; - - let mut output: Box = if let Some(out) = args.flag_out { - match File::create(out) { - Ok(f) => Box::new(BufWriter::new(f)), - Err(e) => error(Box::new(e)) - } - } else { - Box::new(BufWriter::new(io::stdout())) - }; - - if args.cmd_parse { - let mut decoder = force_decoder(args.flag_inf); - let encoder = force_encoder(args.flag_outf); - for e in decoder.decode(&context, &mut input) { - let e = e.unwrap(); - let _ = encoder.encode(&context, &mut output, &e); - } - } else if args.cmd_convert { - let mut decoder = force_decoder(args.flag_inf); - let encoder = force_encoder(args.flag_outf); - for e in decoder.decode(&context, &mut input) { - match e { - Ok(e) => { let _ = encoder.encode(&context, &mut output, &e); }, - Err(e) => error(Box::new(e)) - } - } - } else if args.cmd_freq { - struct Person { - lines: u32, - alpha_lines: u32, - words: u32 - } - - fn words_alpha(s: &str) -> (u32, bool) { - let mut alpha = false; - let mut words = 0; - for w in s.split_whitespace() { - if !w.is_empty() { - words += 1; - if w.chars().any(char::is_alphabetic) { alpha = true } - } - } - (words, alpha) - } - - fn strip_nick_prefix(s: &str) -> &str { - if s.is_empty() { return s } - match s.as_bytes()[0] { - b'~' | b'&' | b'@' | b'%' | b'+' => &s[1..], - _ => s - } - } - - let mut stats: HashMap = HashMap::new(); - - let mut decoder = force_decoder(args.flag_inf); - for e in decoder.decode(&context, &mut input) { - let m = match e { - Ok(m) => m, - Err(err) => error(Box::new(err)) - }; - - match m { - Event { ty: Type::Msg { ref from, ref content, .. }, .. } => { - let nick = strip_nick_prefix(from); - if stats.contains_key(nick) { - let p: &mut Person = stats.get_mut(nick).unwrap(); - let (words, alpha) = words_alpha(content); - p.lines += 1; - if alpha { p.alpha_lines += 1 } - p.words += words; - } else { - let (words, alpha) = words_alpha(content); - stats.insert(nick.to_owned(), Person { - lines: 1, - alpha_lines: if alpha { 1 } else { 0 }, - words: words - }); - } - }, - _ => () - } - } - - let mut stats: Vec<(String, Person)> = stats.into_iter().collect(); - stats.sort_by(|&(_, ref a), &(_, ref b)| b.words.cmp(&a.words)); - - for &(ref name, ref stat) in stats.iter() { - let _ = write!(&mut output, - "{}:\n\tTotal lines: {}\n\tLines without alphabetic characters: {}\n\tTotal words: {}\n\tWords per line: {}\n", - name, stat.lines, stat.lines - stat.alpha_lines, stat.words, stat.words as f32 / stat.lines as f32); - } - } else if args.cmd_seen { - let mut decoder = force_decoder(args.flag_inf); - let mut last: Option = None; - for e in decoder.decode(&context, &mut input) { - let m = match e { - Ok(m) => m, - Err(err) => error(Box::new(err)) - }; - - if m.ty.involves(&args.arg_nick) - && last.as_ref().map_or(true, |last| m.time.as_timestamp() > last.time.as_timestamp()) { last = Some(m) } - } - let encoder = format::weechat3::Weechat3; - if let Some(ref m) = last { - let _ = encoder.encode(&context, &mut output, m); - } - } else if args.cmd_sort { - let mut decoder = force_decoder(args.flag_inf); - let encoder = force_encoder(args.flag_outf); - let mut events: Vec = decoder.decode(&context, &mut input) - .flat_map(Result::ok) - .collect(); - - events.sort_by(|a, b| a.time.cmp(&b.time)); - for e in events { - let _ = encoder.encode(&context, &mut output, &e); - } - } else if args.cmd_dedup { - let mut decoder = force_decoder(args.flag_inf); - let encoder = force_encoder(args.flag_outf); - let mut backlog = AgeSet::new(); - - for e in decoder.decode(&context, &mut input) { - if let Ok(e) = e { - let newest_event = e.clone(); - backlog.prune(move |a: &NoTimeHash| { - let age = newest_event.time.as_timestamp() - a.0.time.as_timestamp(); - age > 5000 - }); - // write `e` if it's a new event - let n = NoTimeHash(e); - if !backlog.contains(&n) { - let _ = encoder.encode(&context, &mut output, &n.0); - backlog.push(n); - } - } - } + let args = App::new("ilc") + .version(crate_version!()) + .setting(AppSettings::GlobalVersion) + .setting(AppSettings::VersionlessSubcommands) + .setting(AppSettings::ArgRequiredElseHelp) + .author("Till Höppner ") + .about("A converter and statistics utility for IRC log files") + .arg(Arg::with_name("timezone") + .help("UTC offset in the direction of the western hemisphere") + .global(true) + .takes_value(true) + .long("timezone") + .short("t")) + .arg(Arg::with_name("date") + .help("Override the date for this log, ISO 8601, YYYY-MM-DD") + .global(true) + .takes_value(true) + .long("date") + .short("d")) + .arg(Arg::with_name("channel") + .help("Set a channel for the current log") + .global(true) + .takes_value(true) + .long("channel") + .short("c")) + .arg(Arg::with_name("input_format") + .help("Set the input format for the current log") + .global(true) + .takes_value(true) + .long("inf")) + .arg(Arg::with_name("output_format") + .help("Set the output format for the current log") + .global(true) + .takes_value(true) + .long("outf")) + .arg(Arg::with_name("input_files") + .help("Specify an input file, instead of stdin") + .global(true) + .takes_value(true).multiple(true) + .long("input") + .short("i")) + .arg(Arg::with_name("output_file") + .help("Specify an output file, instead of stdout") + .global(true) + .takes_value(true) + .long("output") + .short("o")) + .subcommand(SubCommand::with_name("parse") + .about("Parse the input, checking the format")) + .subcommand(SubCommand::with_name("convert")) + .subcommand(SubCommand::with_name("freq")) + .subcommand(SubCommand::with_name("seen") + .arg(Arg::with_name("nick") + .help("The nick you're looking for") + .takes_value(true).required(true) + .index(1))) + .subcommand(SubCommand::with_name("sort")) + .subcommand(SubCommand::with_name("dedup")) + .get_matches(); + + match args.subcommand() { + ("parse", Some(args)) => app::parse::parse(args), + ("convert", Some(args)) => app::convert::convert(args), + ("freq", Some(args)) => app::freq::freq(args), + ("seen", Some(args)) => app::seen::seen(args), + ("sort", Some(args)) => app::sort::sort(args), + ("dedup", Some(args)) => app::dedup::dedup(args), + _ => panic!("Unimplemented subcommand, this is a bug") } } -- cgit v1.2.3