From d972eed6b596d415a0aa6117a05bd107dbb8a0ae Mon Sep 17 00:00:00 2001 From: Till Hoeppner Date: Mon, 23 Mar 2015 16:41:44 +0100 Subject: Initial commit. --- .gitignore | 1 + .travis.yml | 1 + Cargo.toml | 12 +++++++ src/format/mod.rs | 17 +++++++++ src/format/weechat3.rs | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 29 +++++++++++++++ src/log.rs | 57 ++++++++++++++++++++++++++++++ src/main.rs | 54 ++++++++++++++++++++++++++++ 8 files changed, 267 insertions(+) create mode 100644 .gitignore create mode 100644 .travis.yml create mode 100644 Cargo.toml create mode 100644 src/format/mod.rs create mode 100644 src/format/weechat3.rs create mode 100644 src/lib.rs create mode 100644 src/log.rs create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..22761ba --- /dev/null +++ b/.travis.yml @@ -0,0 +1 @@ +language: rust diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..1ca4ef8 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,12 @@ +[package] + +name = "ilc" +version = "0.0.1" +authors = ["Till Hoeppner "] + +[dependencies] +regex = "*" +regex_macros = "*" +chrono = "*" +docopt = "*" +rustc-serialize = "*" diff --git a/src/format/mod.rs b/src/format/mod.rs new file mode 100644 index 0000000..9f6d30d --- /dev/null +++ b/src/format/mod.rs @@ -0,0 +1,17 @@ +//! Traits and structs for conversion between various formats. +//! As the source format may not provide the same information as the +//! target format, all formats must allow for omittable information. + +use std::io::{ self, BufRead, Write }; + +use log::Event; + +pub mod weechat3; + +pub trait Encode where W: Write { + fn encode(&self, output: W, event: &Event) -> io::Result<()>; +} + +pub trait Decode where R: BufRead, O: Iterator> { + fn decode(&mut self, input: R) -> O; +} diff --git a/src/format/weechat3.rs b/src/format/weechat3.rs new file mode 100644 index 0000000..f546983 --- /dev/null +++ b/src/format/weechat3.rs @@ -0,0 +1,96 @@ +use std::io::BufRead; +use std::borrow::ToOwned; + +use log::Event; +use format::Decode; + +use regex::Regex; + +use chrono::*; + +pub struct Weechat3; + +static NORMAL_LINE: Regex = regex!(r"^(\d+-\d+-\d+ \d+:\d+:\d+)\t[@%+~&]?([^ <-]\S+)\t(.*)"); +static ACTION_LINE: Regex = regex!(r"^(\d+-\d+-\d+ \d+:\d+:\d+)\t \*\t(\S+) (.*)"); +//static OTHER_LINES: Regex = regex!(r"^(\d+-\d+-\d+ \d+:\d+:\d+)\s(?:--|<--|-->)\s(\S+)\s(\S+)\s(\S+)\s(\S+)\s(\S+)\n$"); +static OTHER_LINES: Regex = regex!(r"(.*)"); + +static TIME_DATE_FORMAT: &'static str = "%Y-%m-%d %H:%M:%S"; + +pub struct Iter where R: BufRead { + input: R, + buffer: String +} + +impl Iterator for Iter where R: BufRead { + type Item = ::Result; + fn next(&mut self) -> Option<::Result> { + fn time(s: &str) -> i64 { + UTC.datetime_from_str(s, TIME_DATE_FORMAT).unwrap().timestamp() + } + + println!("Reading line..."); + self.buffer.clear(); + match self.input.read_line(&mut self.buffer) { + Ok(0) | Err(_) => return None, + Ok(_) => () + } + let line = &self.buffer; + println!("Read line: {}", line); + if let Some(cap) = NORMAL_LINE.captures(line) { + return Some(Ok(Event::Msg { + from: cap.at(1).unwrap().to_owned(), + content: cap.at(2).unwrap().to_owned(), + time: time(cap.at(0).unwrap()) + })) + } else if let Some(cap) = ACTION_LINE.captures(line) { + return Some(Ok(Event::Action { + from: cap.at(1).unwrap().to_owned(), + content: cap.at(2).unwrap().to_owned(), + time: time(cap.at(0).unwrap()) + })) + } else if let Some(cap) = OTHER_LINES.captures(line) { + if cap.at(4) == Some("has") && cap.at(5) == Some("kicked") { + return Some(Ok(Event::Kick { + kicked_nick: cap.at(6).unwrap().to_owned(), + kicking_nick: cap.at(3).unwrap().to_owned(), + kick_message: cap.at(4).unwrap().to_owned(), + time: time(cap.at(0).unwrap()) + })) + } else if cap.at(3) == Some("has") && cap.at(5) == Some("changed") && cap.at(6) == Some("topic") { + return Some(Ok(Event::Topic { + new_topic: cap.at(5).unwrap().to_owned(), + time: time(cap.at(0).unwrap()) + })) + } else if cap.at(3) == Some("Mode") { + return Some(Ok(Event::Mode { + time: time(cap.at(0).unwrap()) + })) + } else if cap.at(5) == Some("has") && cap.at(6) == Some("joined") { + return Some(Ok(Event::Join { + nick: cap.at(3).unwrap().to_owned(), + mask: String::new(), + time: time(cap.at(0).unwrap()) + })) + } else if cap.at(5) == Some("now") && cap.at(6) == Some("known") { + + } + } + Some(Err(::IlcError::Parse(format!("Line `{}` didn't match any rules.", line)))) + } +} + +impl Decode> for Weechat3 where R: BufRead { + fn decode(&mut self, input: R) -> Iter {/* + for line in input.lines() { + let line = &*try!(line); + } else { + handler.err(&format!("Malformatted line: {}", line)); + } + }*/ + Iter { + input: input, + buffer: String::new() + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..0e49d17 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,29 @@ +#![feature(plugin)] +#![plugin(regex_macros)] +extern crate regex; +extern crate chrono; + +pub mod log; +pub mod format; + +use std::error::FromError; +use std::{ io, result }; + +use chrono::format::ParseError; + +pub type Result = result::Result; + +#[derive(Debug, PartialEq)] +pub enum IlcError { + Parse(String), + Chrono(ParseError), + Io(io::Error) +} + +impl FromError for IlcError { + fn from_error(err: ParseError) -> IlcError { IlcError::Chrono(err) } +} + +impl FromError for IlcError { + fn from_error(err: io::Error) -> IlcError { IlcError::Io(err) } +} diff --git a/src/log.rs b/src/log.rs new file mode 100644 index 0000000..3079b38 --- /dev/null +++ b/src/log.rs @@ -0,0 +1,57 @@ +//! Common structures to represent the actual log data in memory. +//! These will be used by all formats for encoding and decoding. + +/// A whole log, in memory. This structure does not specify its +/// use. It may represent a private query, or the log of a channel. +pub struct Log { + pub entries: Vec +} + +/// All representable events, such as messages, quits, joins +/// and topic changes. +#[derive(Debug)] +pub enum Event { + Msg { + from: String, + content: String, + time: i64 + }, + Action { + from: String, + content: String, + time: i64 + }, + Join { + nick: String, + mask: String, + time: i64 + }, + Quit { + nick: String, + mask: String, + time: i64 + }, + Nick { + old: String, + new: String, + time: i64 + }, + Notice { + nick: String, + content: String, + time: i64 + }, + Kick { + kicked_nick: String, + kicking_nick: String, + kick_message: String, + time: i64 + }, + Topic { + new_topic: String, + time: i64 + }, + Mode { + time: i64 + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..1bdf090 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,54 @@ +#![feature(libc, plugin)] +#![plugin(regex_macros)] + +extern crate ilc; +extern crate docopt; +extern crate "rustc-serialize" as rustc_serialize; +extern crate libc; +extern crate regex; + +use std::fs::File; +use std::io::BufReader; + +use docopt::Docopt; + +use ilc::format::{ self, Decode }; + +static USAGE: &'static str = " +A converter and statistics utility for IRC log files. + +Usage: + ilc parse ... + +Options: + -h --help Show this screen. + -v --version Show the version (duh). +"; + +#[derive(RustcDecodable, Debug)] +struct Args { + cmd_parse: bool, + arg_file: Vec, + flag_help: bool, + flag_version: bool +} + +fn main() { + let args: Args = Docopt::new(USAGE) + .and_then(|d| d.decode()) + .unwrap_or_else(|e| e.exit()); + if args.flag_help { + println!("{}", USAGE); + unsafe { libc::funcs::c95::stdlib::exit(1) } + } + + if args.cmd_parse { + let mut parser = format::weechat3::Weechat3; + for file in args.arg_file { + let f: BufReader = BufReader::new(File::open(file).unwrap()); + let iter = parser.decode(f); + println!("Obtained event iterator"); + for e in iter { println!("{:?}", e) } + } + } +} -- cgit v1.2.3