From 19a778d004de584a09fa1d4c6c4bd8803ca80048 Mon Sep 17 00:00:00 2001 From: Till Hoeppner Date: Thu, 11 Jun 2015 12:39:45 +0200 Subject: Some inbetween state of confusion --- src/context.rs | 9 +++ src/event.rs | 114 ++++++++++++++++++++++++++++++++++++ src/format/binary.rs | 23 ++++---- src/format/energymech.rs | 135 +++++++++++++++++++++++++++++++++++++++++++ src/format/mod.rs | 26 +++++++-- src/format/weechat3.rs | 147 ++++++++++++++++++++++++++++------------------- src/freq.rs | 13 ++++- src/lib.rs | 3 +- src/main.rs | 16 +++++- 9 files changed, 406 insertions(+), 80 deletions(-) create mode 100644 src/context.rs create mode 100644 src/event.rs create mode 100644 src/format/energymech.rs diff --git a/src/context.rs b/src/context.rs new file mode 100644 index 0000000..c24dd14 --- /dev/null +++ b/src/context.rs @@ -0,0 +1,9 @@ + +use chrono::date::Date; +use chrono::naive::date::NaiveDate; +use chrono::offset::fixed::FixedOffset; + +pub struct Context { + pub timezone: FixedOffset, + pub override_date: NaiveDate +} diff --git a/src/event.rs b/src/event.rs new file mode 100644 index 0000000..0fab7ae --- /dev/null +++ b/src/event.rs @@ -0,0 +1,114 @@ +// Copyright 2015 Till Höppner +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Common structures to represent the actual log data in memory. +//! These will be used by all formats for encoding and decoding. + +use std::borrow::Cow; + +use chrono::offset::fixed::FixedOffset; +use chrono::offset::TimeZone; +use chrono::datetime::DateTime; + +use context::Context; + +/// A whole log, in memory. This structure does not specify its +/// use. It may represent a private query, or the log of a channel. +pub struct Log<'a> { + pub entries: Vec> +} + +/// Different log formats carry different amounts of information. Some might +/// hold enough information to calculate precise timestamps, others might +/// only suffice for the time of day. +#[derive(Clone, Debug, PartialEq, Hash, RustcEncodable, RustcDecodable)] +pub enum Time { + Unknown, + Hms(u8, u8, u8), + Timestamp(i64) +} + +impl Time { + pub fn from_format(tz: &FixedOffset, s: &str, f: &str) -> Time { + tz.datetime_from_str(s, f) + .map(|d| d.timestamp()) + .map(Time::Timestamp) + .unwrap_or(Time::Unknown) + } + + pub fn with_format(&self, tz: &FixedOffset, f: &str) -> String { + match self { + &Time::Unknown => panic!("Time data for this event is not present"), + &Time::Hms(h, m, s) => unimplemented!(), + &Time::Timestamp(t) => format!("{}", tz.timestamp(t, 0).format(f)) + } + } +} + +#[derive(Clone, Debug, PartialEq, Hash, RustcEncodable, RustcDecodable)] +pub struct Event<'a> { + pub ty: Type<'a>, + pub time: Time, + pub channel: Option> +} + +/// All representable events, such as messages, quits, joins +/// and topic changes. +#[derive(Clone, Debug, Hash, PartialEq, RustcEncodable, RustcDecodable)] +pub enum Type<'a> { + Connect, + Disconnect, + Msg { + from: Cow<'a, str>, + content: Cow<'a, str>, + }, + Action { + from: Cow<'a, str>, + content: Cow<'a, str>, + }, + Join { + nick: Cow<'a, str>, + mask: Option>, + }, + Part { + nick: Cow<'a, str>, + mask: Option>, + reason: Option>, + }, + Quit { + nick: Cow<'a, str>, + mask: Option>, + reason: Option>, + }, + Nick { + old_nick: Cow<'a, str>, + new_nick: Cow<'a, str>, + }, + Notice { + from: Cow<'a, str>, + content: Cow<'a, str>, + }, + Kick { + kicked_nick: Cow<'a, str>, + kicking_nick: Option>, + kick_message: Option>, + }, + Topic { + topic: Cow<'a, str>, + }, + TopicChange { + new_topic: Cow<'a, str>, + }, + Mode +} diff --git a/src/format/binary.rs b/src/format/binary.rs index e8d880f..df57781 100644 --- a/src/format/binary.rs +++ b/src/format/binary.rs @@ -14,35 +14,38 @@ use std::io::{ BufRead, Write }; use std::iter::Iterator; +use std::marker::PhantomData; -use log::Event; +use event::Event; +use context::Context; use format::{ Encode, Decode }; use bincode::{ self, SizeLimit }; pub struct Binary; -pub struct Iter where R: BufRead { +pub struct Iter<'a, R: 'a> where R: BufRead { + _phantom: PhantomData<&'a ()>, input: R } -impl Iterator for Iter where R: BufRead { - type Item = ::Result; - fn next(&mut self) -> Option<::Result> { +impl<'a, R: 'a> Iterator for Iter<'a, R> where R: BufRead { + type Item = ::Result>; + fn next(&mut self) -> Option<::Result>> { Some(bincode::decode_from::(&mut self.input, SizeLimit::Infinite) .map_err(|_| ::IlcError::BincodeDecode)) } } -impl Encode for Binary where W: Write { - fn encode(&self, mut output: W, event: &Event) -> ::Result<()> { +impl<'a, W> Encode<'a, W> for Binary where W: Write { + fn encode(&'a self, context: &'a Context, mut output: W, event: &'a Event) -> ::Result<()> { bincode::encode_into(event, &mut output, SizeLimit::Infinite) .map_err(|_| ::IlcError::BincodeEncode) } } -impl Decode> for Binary where R: BufRead { - fn decode(&mut self, input: R) -> Iter { - Iter { input: input } +impl<'a, R: 'a> Decode<'a, R, Iter<'a, R>> for Binary where R: BufRead { + fn decode(&'a mut self, context: &'a Context, input: R) -> Iter { + Iter { _phantom: PhantomData, input: input } } } diff --git a/src/format/energymech.rs b/src/format/energymech.rs new file mode 100644 index 0000000..a044344 --- /dev/null +++ b/src/format/energymech.rs @@ -0,0 +1,135 @@ +// Copyright 2015 Till Höppner +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::io::{ BufRead, Write }; +use std::borrow::ToOwned; +use std::iter::{ Iterator }; + +use event::{ Event, rejoin }; +use context::Context; +use format::{ Encode, Decode }; + +use l::LogLevel::Info; + +use chrono::*; + +pub struct Energymech; + +static TIME_FORMAT: &'static str = "%H:%M:%S"; + +pub struct Iter<'a, R: 'a> where R: BufRead { + context: &'a Context, + input: R, + buffer: String +} + +impl<'a, R: 'a> Iterator for Iter<'a, R> where R: BufRead { + type Item = ::Result>; + fn next(&mut self) -> Option<::Result>> { + fn timestamp(context: &Context, time: &str) -> i64 { + context.timezone.from_local_date(&context.override_date) + .and_time(NaiveTime::from_hms(time[0..2].parse::().unwrap(), + time[3..5].parse::().unwrap(), + time[6..8].parse::().unwrap())) + .single() + .expect("Transformed log times can't be represented, due to timezone transitions") + .timestamp() + } + fn join(s: &[&str], splits: &[char]) -> String { + let len = s.iter().map(|s| s.len()).sum(); + let mut out = s.iter().zip(splits.iter()).fold(String::with_capacity(len), + |mut s, (b, &split)| { s.push_str(b); s.push(split); s }); + out.pop(); out + } + fn mask(s: &str) -> String { + if s.len() >= 2 { s[1..(s.len() - 1)].to_owned() } else { String::new() } + } + + loop { + self.buffer.clear(); + match self.input.read_line(&mut self.buffer) { + Ok(0) | Err(_) => return None, + Ok(_) => () + } + + let mut split_tokens: Vec = Vec::new(); + let tokens = self.buffer.split( |c: char| { + if c.is_whitespace() { split_tokens.push(c); true } else { false } + }).collect::>(); + if log_enabled!(Info) { + info!("Original: `{}`", self.buffer); + info!("Parsing: {:?}", tokens); + } + match tokens[..tokens.len() - 1].as_ref() { + [time, "*", nick, content..] => return Some(Ok(Event::Action { + from: nick.to_owned(), content: join(content, &split_tokens[3..]), + time: timestamp(&self.context, &mask(time)) + })), + [time, "***", old, "is", "now", "known", "as", new] => return Some(Ok(Event::Nick { + old: old.to_owned(), new: new.to_owned(), + time: timestamp(&self.context, &mask(time)) + })), + [time, "***", "Joins:", nick, host] => return Some(Ok(Event::Join { + nick: nick.to_owned(), mask: mask(host) + })), + [time, "***", "Quits:", nick, host, reason..] => return Some(Ok(Event::Quit { + nick: nick.to_owned(), mask: mask(host), + reason: mask(&join(reason, &split_tokens[5..])), + time: timestamp(&self.context, &mask(time)) + })), + [time, nick, content..] + if nick.starts_with('<') && nick.ends_with('>') + => return Some(Ok(Event::Msg { + from: mask(nick), content: join(content, &split_tokens[2..]), + time: timestamp(&self.context, &mask(time)) + })), + _ => () + } + } + } +} + +impl<'a, R: 'a> Decode<'a, R, Iter<'a, R>> for Energymech where R: BufRead { + fn decode(&'a mut self, context: &'a Context, input: R) -> Iter { + Iter { + context: context, + input: input, + buffer: String::new() + } + } +} + +impl<'a, W> Encode<'a, W> for Energymech where W: Write { + fn encode(&'a self, context: &'a Context, mut output: W, event: &'a Event) -> ::Result<()> { + fn date(t: i64) -> String { + format!("[{}]", UTC.timestamp(t, 0).format(TIME_FORMAT)) + } + match event { + &Event::Msg { ref from, ref content, ref time } => { + try!(writeln!(&mut output, "{} <{}> {}", date(*time), from, content)) + }, + &Event::Action { ref from, ref content, ref time } => { + try!(writeln!(&mut output, "{} * {} {}", date(*time), from, content)) + }, + &Event::Nick { ref old, ref new, ref time } => { + try!(writeln!(&mut output, "{} *** {} is now known as {}", date(*time), old, new)) + }, + &Event::Quit { ref nick, ref mask, ref reason, ref time } => { + try!(writeln!(&mut output, "{} *** Quits: {} ({}) ({})", date(*time), nick, mask, reason)) + }, + _ => () + } + Ok(()) + } +} diff --git a/src/format/mod.rs b/src/format/mod.rs index f5692a0..bc1e7df 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -17,16 +17,30 @@ //! target format, all formats must allow for omittable information. use std::io::{ BufRead, Write }; +use std::borrow::Cow; -use log::Event; +use event::Event; +use context::Context; pub mod weechat3; -pub mod binary; +//pub mod energymech; +//pub mod binary; -pub trait Encode where W: Write { - fn encode(&self, output: W, event: &Event) -> ::Result<()>; +pub trait Encode<'a, W> where W: Write { + fn encode(&'a self, context: &'a Context, output: W, event: &'a Event) -> ::Result<()>; } -pub trait Decode where R: BufRead, O: Iterator> { - fn decode(&mut self, input: R) -> O; +pub trait Decode<'a, R, O> where R: BufRead, O: Iterator>> { + fn decode(&'a mut self, context: &'a Context, input: R) -> O; +} + +fn rejoin(s: &[&str], splits: &[char]) -> Cow<'static, str> { + let len = s.iter().map(|s| s.len()).sum(); + let mut out = s.iter().zip(splits.iter()).fold(String::with_capacity(len), + |mut s, (b, &split)| { s.push_str(b); s.push(split); s }); + out.pop(); Cow::Owned(out) +} + +fn strip_one(s: &str) -> String { + if s.len() >= 2 { s[1..(s.len() - 1)].to_owned() } else { String::new() } } diff --git a/src/format/weechat3.rs b/src/format/weechat3.rs index bc6e968..947bd61 100644 --- a/src/format/weechat3.rs +++ b/src/format/weechat3.rs @@ -13,11 +13,13 @@ // limitations under the License. use std::io::{ BufRead, Write }; -use std::borrow::ToOwned; +use std::borrow::{ ToOwned, Cow, IntoCow }; use std::iter::{ Iterator }; +use std::marker::PhantomData; -use log::Event; -use format::{ Encode, Decode }; +use event::{ Event, Type, Time }; +use context::Context; +use format::{ Encode, Decode, rejoin, strip_one }; use l::LogLevel::Info; @@ -27,25 +29,18 @@ pub struct Weechat3; static TIME_DATE_FORMAT: &'static str = "%Y-%m-%d %H:%M:%S"; -pub struct Iter where R: BufRead { +pub struct Iter<'a, R: 'a> where R: BufRead { + _phantom: PhantomData<&'a ()>, + context: &'a Context, input: R, buffer: String } -impl Iterator for Iter where R: BufRead { - type Item = ::Result; - fn next(&mut self) -> Option<::Result> { - fn timestamp(date: &str, time: &str) -> i64 { - UTC.datetime_from_str(&format!("{} {}", date, time), TIME_DATE_FORMAT).unwrap().timestamp() - } - fn join(s: &[&str], splits: &[char]) -> String { - let len = s.iter().map(|s| s.len()).sum(); - let mut out = s.iter().zip(splits.iter()).fold(String::with_capacity(len), - |mut s, (b, &split)| { s.push_str(b); s.push(split); s }); - out.pop(); out - } - fn mask(s: &str) -> String { - if s.len() >= 2 { s[1..(s.len() - 1)].to_owned() } else { String::new() } +impl<'a: 'b, 'b, R: 'a> Iterator for Iter<'a, R> where R: BufRead { + type Item = ::Result>; + fn next(&mut self) -> Option<::Result>> { + fn parse_time<'b, 'c>(c: &Context, date: &'b str, time: &'c str) -> Time { + Time::from_format(&c.timezone, &format!("{} {}", date, time), TIME_DATE_FORMAT) } loop { @@ -56,80 +51,116 @@ impl Iterator for Iter where R: BufRead { } let mut split_tokens: Vec = Vec::new(); - let tokens = self.buffer.split( |c: char| { + let tokens: Vec<&'b str> = self.buffer.split(|c: char| { if c.is_whitespace() { split_tokens.push(c); true } else { false } }).collect::>(); - if log_enabled!(Info) { + + /*if log_enabled!(Info) { info!("Original: `{}`", self.buffer); info!("Parsing: {:?}", tokens); - } - match tokens[..tokens.len() - 1].as_ref() { - [date, time, "-->", nick, host, "has", "joined", channel, _..] => return Some(Ok(Event::Join { - nick: nick.to_owned(), channel: channel.to_owned(), mask: mask(host), - time: timestamp(date, time) + }*/ + + match &tokens[..tokens.len() - 1] as &'b [&'b str] { + /*[date, time, "-->", nick, host, "has", "joined", channel, _..] + => return Some(Ok(Event { + ty: Type::Join { + nick: nick.to_owned(), + mask: Some(strip_one(host)), + time: timestamp(date, time) + }, + channel: Some(channel.into_cow()), })), - [date, time, "<--", nick, host, "has", "left", channel, reason..] => return Some(Ok(Event::Part { - nick: nick.to_owned(), channel: channel.to_owned(), mask: mask(host), - reason: mask(&join(reason, &split_tokens[8..])), time: timestamp(date, time) + [date, time, "<--", nick, host, "has", "left", channel, reason..] + => return Some(Ok(Event { + ty: Type::Part { + nick: nick.to_owned(), + mask: Some(strip_one(host)), + reason: Some(strip_one(&rejoin(reason, &split_tokens[8..]))), + }, + channel: Some(channel.to_owned()), + time: timestamp(date, time) })), - [date, time, "<--", nick, host, "has", "quit", reason..] => return Some(Ok(Event::Quit { - nick: nick.to_owned(), mask: mask(host), - reason: mask(&join(reason, &split_tokens[7..])), time: timestamp(date, time) + [date, time, "<--", nick, host, "has", "quit", reason..] + => return Some(Ok(Event { + ty: Type::Quit { + nick: nick.to_owned(), + mask: Some(strip_one(host)), + reason: Some(strip_one(&rejoin(reason, &split_tokens[7..]))), + } })), [date, time, "--", notice, content..] if notice.starts_with("Notice(") - => return Some(Ok(Event::Notice { - nick: notice["Notice(".len()..notice.len() - 2].to_owned(), - content: join(content, &split_tokens[4..]), - time: timestamp(date, time) + => return Some(Ok(Event { + ty: Type::Notice { + nick: notice["Notice(".len()..notice.len() - 2].to_owned(), + content: rejoin(content, &split_tokens[4..]), + time: timestamp(date, time) + } })), - [date, time, "--", "irc:", "disconnected", "from", "server", _..] => return Some(Ok(Event::Disconnect { - time: timestamp(date, time) + [date, time, "--", "irc:", "disconnected", "from", "server", _..] + => return Some(Ok(Event { + ty: Type::Disconnect { + time: timestamp(date, time) + } })), [date, time, "--", nick, verb, "now", "known", "as", new_nick] if verb == "is" || verb == "are" - => return Some(Ok(Event::Nick { + => return Some(Ok(Event { + ty: Type::Nick { old: nick.to_owned(), new: new_nick.to_owned(), time: timestamp(date, time) - })), + } + })),*/ [date, time, sp, "*", nick, msg..] - if sp.is_empty() - => return Some(Ok(Event::Action { - from: nick.to_owned(), content: join(msg, &split_tokens[5..]), - time: timestamp(date, time) - })), - [date, time, nick, msg..] => return Some(Ok(Event::Msg { - from: nick.to_owned(), - content: join(msg, &split_tokens[3..]), - time: timestamp(date, time) + if sp.clone().is_empty() + => return Some(Ok(Event { + ty: Type::Action { + from: nick.clone().into_cow(), + content: rejoin(msg, &split_tokens[5..]), + }, + time: parse_time(&self.context, &date.clone().to_owned(), &time.clone().to_owned()), + channel: None })), + /*[date, time, nick, msg..] + => return Some(Ok(Event { + ty: Type::Msg { + from: nick.into(), + content: rejoin(msg, &split_tokens[3..]), + }, + time: parse_time(&self.context, &date, &time), + channel: None + })),*/ _ => () } } } } -impl Decode> for Weechat3 where R: BufRead { - fn decode(&mut self, input: R) -> Iter { +impl<'a, R: 'a> Decode<'static, R, Iter<'a, R>> for Weechat3 where R: BufRead { + fn decode(&'a mut self, context: &'a Context, input: R) -> Iter { Iter { + _phantom: PhantomData, + context: context, input: input, buffer: String::new() } } } -impl Encode for Weechat3 where W: Write { - fn encode(&self, mut output: W, event: &Event) -> ::Result<()> { +impl<'a, W> Encode<'a, W> for Weechat3 where W: Write { + fn encode(&'a self, context: &'a Context, mut output: W, event: &'a Event) -> ::Result<()> { fn date(t: i64) -> String { format!("{}", UTC.timestamp(t, 0).format(TIME_DATE_FORMAT)) } match event { - &Event::Msg { ref from, ref content, ref time } => { - try!(writeln!(&mut output, "{}\t{}\t{}", date(*time), from, content)) + &Event { ty: Type::Msg { ref from, ref content, .. }, ref time, .. } => { + try!(writeln!(&mut output, "{}\t{}\t{}", + time.with_format(&context.timezone, TIME_DATE_FORMAT), from, content)) }, - &Event::Action { ref from, ref content, ref time } => { - try!(writeln!(&mut output, "{}\t *\t{} {}", date(*time), from, content)) + &Event { ty: Type::Action { ref from, ref content, .. }, ref time, .. } => { + try!(writeln!(&mut output, "{}\t *\t{} {}", + time.with_format(&context.timezone, TIME_DATE_FORMAT), from, content)) }, - &Event::Join { ref nick, ref mask, ref channel, ref time } => { + /*&Event::Join { ref nick, ref mask, ref channel, ref time } => { try!(writeln!(&mut output, "{}\t-->\t{} ({}) has joined {}", date(*time), nick, mask, channel)) }, @@ -153,7 +184,7 @@ impl Encode for Weechat3 where W: Write { }, &Event::Notice { ref nick, ref content, ref time } => { try!(writeln!(&mut output, "{}\t--\tNotice({}): {}", date(*time), nick, content)) - }, + },*/ _ => () } Ok(()) diff --git a/src/freq.rs b/src/freq.rs index 0ca4875..bbec971 100644 --- a/src/freq.rs +++ b/src/freq.rs @@ -13,11 +13,16 @@ // limitations under the License. extern crate ilc; +extern crate chrono; use std::io; use std::collections::hash_map::*; +use chrono::offset::fixed::FixedOffset; +use chrono::naive::date::NaiveDate; + use ilc::log::Event::*; +use ilc::context::Context; use ilc::format::{ self, Decode }; struct Person { @@ -41,9 +46,13 @@ fn main() { let stdin = io::stdin(); let mut stats: HashMap = HashMap::new(); + let context = Context { + timezone: FixedOffset::west(0), + override_date: NaiveDate::from_ymd(2015, 6, 10) + }; - let mut parser = format::weechat3::Weechat3; - for e in parser.decode(stdin.lock()) { + let mut parser = format::energymech::Energymech; + for e in parser.decode(&context, stdin.lock()) { let m = match e { Ok(m) => m, Err(err) => panic!(err) diff --git a/src/lib.rs b/src/lib.rs index 27454b7..3df9cfd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,8 +21,9 @@ extern crate log as l; extern crate rustc_serialize; extern crate bincode; -pub mod log; +pub mod event; pub mod format; +pub mod context; use std::convert::From; use std::{ io, result }; diff --git a/src/main.rs b/src/main.rs index 99e2659..de5f85c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -16,6 +16,7 @@ #![plugin(regex_macros)] extern crate ilc; +extern crate chrono; extern crate docopt; extern crate rustc_serialize; extern crate libc; @@ -29,6 +30,10 @@ use std::io::{ self, BufReader }; use docopt::Docopt; +use chrono::offset::fixed::FixedOffset; +use chrono::naive::date::NaiveDate; + +use ilc::context::Context; use ilc::format::{ self, Encode, Decode }; static USAGE: &'static str = r#" @@ -71,13 +76,18 @@ fn main() { } if args.cmd_parse { - let mut parser = format::weechat3::Weechat3; + let context = Context { + timezone: FixedOffset::west(0), + override_date: NaiveDate::from_ymd(2015, 6, 10) + }; + let mut parser = format::energymech::Energymech; + let formatter = format::energymech::Energymech; for file in args.arg_file { let f: BufReader = BufReader::new(File::open(file).unwrap()); - let iter = parser.decode(f); + let iter = parser.decode(&context, f); for e in iter { info!("Parsed: {:?}", e); - drop(parser.encode(io::stdout(), &e.unwrap())); + drop(formatter.encode(&context, io::stdout(), &e.unwrap())); } } } -- cgit v1.2.3