diff options
author | Yorhel <git@yorhel.nl> | 2017-01-05 20:01:32 +0100 |
---|---|---|
committer | Yorhel <git@yorhel.nl> | 2017-01-05 20:01:32 +0100 |
commit | bea3ab5e55acf1da0862c668b73ff4121dd17f39 (patch) | |
tree | 3c26494ec39cbcbf9aff29b1d64af6ec4ac8143f |
Initial commit of a vndb API client
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | Cargo.toml | 11 | ||||
-rw-r--r-- | src/conn.rs | 111 | ||||
-rw-r--r-- | src/lib.rs | 37 | ||||
-rw-r--r-- | src/msg.rs | 112 | ||||
-rw-r--r-- | src/parser.rs | 250 |
6 files changed, 523 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a9d37c5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +target +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..5faf708 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "vndb" +version = "0.1.0" +authors = ["Yorhel <git@yorhel.nl>"] + +[dependencies] +rustls = "0.5.3" +lazy_static = "0.2.2" +webpki = "0.8.0" # Must be the same as used by rustls +netbuf = "0.3.8" +serde_json = "0.8.4" diff --git a/src/conn.rs b/src/conn.rs new file mode 100644 index 0000000..84ebc42 --- /dev/null +++ b/src/conn.rs @@ -0,0 +1,111 @@ +use std::io::Write; +use std::sync::Arc; +use std::error::Error; +use std::net::TcpStream; +use netbuf::Buf; +use rustls::{Session,ClientSession,ClientConfig}; + +use msg::Message; + +const VNDB_HOST: &'static str = "api.vndb.org"; +const VNDB_PORT_RAW: u16 = 19534; +const VNDB_PORT_TLS: u16 = 19535; + + +pub struct Connection { + stream: TcpStream, + tls: Option<ClientSession>, + rbuf: Buf, + wbuf: Buf, // Only used for raw connections. In the case of TLS we directly write to the ClientSession buffer. +} + + +impl Connection { + // Connects to the remote API, but does not yet start the TLS handshake. This is deferred to + // the first recv(), to allow rustls to send the initial command during the handshake. + pub fn connect(hostname: Option<&str>, port: Option<u16>, tls: Option<&Arc<ClientConfig>>) -> Result<Connection,Box<Error>> { + let hostname = hostname.unwrap_or(VNDB_HOST); + let port = port.unwrap_or(if tls.is_some() { VNDB_PORT_TLS } else { VNDB_PORT_RAW }); + + Ok(Connection { + stream: TcpStream::connect((hostname, port))?, + tls: tls.map(|c| ClientSession::new(c, hostname)), + rbuf: Buf::new(), + wbuf: Buf::new(), + }) + } + + // Push a new command to the output buffer. Doesn't actually send anything until a recv() is + // performed. Can be used to pipeline multiple commands. + pub fn send(&mut self, cmd: &Message) { + let w = self.tls.as_mut().map(|x| x as &mut Write).unwrap_or(&mut self.wbuf); + write!(w, "{}\x04", cmd).unwrap(); + } + + // XXX: Both this io_tls and the io_raw code may hang if the OS send buffers are full and the + // server is providing backpressure while waiting for us to read from our recv buffers. + // Properly handling that scenario requires async I/O. In practice, the server will disconnect + // us before that happens. + fn io_tls(&mut self) -> Result<(), Box<Error>> { + let tls = self.tls.as_mut().unwrap(); + while tls.wants_write() { + tls.write_tls(&mut self.stream)?; + } + if tls.wants_read() { + tls.read_tls(&mut self.stream)?; + tls.process_new_packets()?; + self.rbuf.read_from(tls)?; + } + Ok(()) + } + + fn io_raw(&mut self) -> Result<(), Box<Error>> { + while self.wbuf.len() > 0 { + self.wbuf.write_to(&mut self.stream)?; + } + self.rbuf.read_from(&mut self.stream)?; + Ok(()) + } + + // Send any outstanding commands and wait for a reply. Only returns a single response, if you + // expected multiple responses, call this method multiple times. This method blocks until at + // least one full response has been received. + pub fn recv(&mut self) -> Result<Message, Box<Error>> { + let len = { + while !self.rbuf.as_ref().contains(&4) { + if self.tls.is_some() { + self.io_tls()? + } else { + self.io_raw()? + } + } + self.rbuf.as_ref().split(|&c| c == 4).next().unwrap().len() + }; + + let m = Message::parse(::std::str::from_utf8(&self.rbuf[..len])?)?; + self.rbuf.consume(len+1); + Ok(m) + } + + pub fn cmd(&mut self, cmd: &Message) -> Result<Message, Box<Error>> { + self.send(cmd); + self.recv() + } +} + + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn connect() { + let mut c = Connection::connect(None, None, Some(&::TLS_CONF)).unwrap(); + c.send(&Message::parse("login {\"protocol\":1,\"client\":\"vndb-rust\",\"clientver\":\"0.1\"}").unwrap()); + c.send(&Message::parse("dbstats").unwrap()); + let r1 = format!("{}", c.recv().unwrap()); + let r2 = format!("{}", c.recv().unwrap()); + assert_eq!(&r1, "ok"); + assert!(r2.starts_with("dbstats {")); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..c09ab0b --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,37 @@ +#[macro_use] extern crate lazy_static; +extern crate rustls; +extern crate webpki; +extern crate netbuf; +extern crate serde_json; + +mod conn; +mod parser; + +pub mod msg; +pub use conn::Connection; + + +static LE_ROOT: [webpki::TrustAnchor<'static>; 2] = [ + // ISRG Root X1 - The Let's Encrypt root cert + webpki::TrustAnchor { + subject: b"1\x0b0\t\x06\x03U\x04\x06\x13\x02US1)0\'\x06\x03U\x04\n\x13 Internet Security Research Group1\x150\x13\x06\x03U\x04\x03\x13\x0cISRG Root X1", + spki: b"0\r\x06\t*\x86H\x86\xf7\r\x01\x01\x01\x05\x00\x03\x82\x02\x0f\x000\x82\x02\n\x02\x82\x02\x01\x00\xad\xe8$s\xf4\x147\xf3\x9b\x9e+W(\x1c\x87\xbe\xdc\xb7\xdf8\x90\x8cn<\xe6W\xa0x\xf7u\xc2\xa2\xfe\xf5jn\xf6\x00O(\xdb\xdeh\x86lD\x93\xb6\xb1c\xfd\x14\x12k\xbf\x1f\xd2\xea1\x9b!~\xd13<\xbaH\xf5\xddy\xdf\xb3\xb8\xff\x12\xf1!\x9aK\xc1\x8a\x86qiJffl\x8f~<p\xbf\xad)\"\x06\xf3\xe4\xc0\xe6\x80\xae\xe2K\x8f\xb7\x99~\x94\x03\x9f\xd3G\x97|\x99H#S\xe88\xaeO\no\x83.\xd1IW\x8c\x80t\xb6\xda/\xd08\x8d{\x03p!\x1bu\xf20<\xfa\x8f\xae\xdd\xdac\xab\xeb\x16O\xc2\x8e\x11K~\xcf\x0b\xe8\xff\xb5w.\xf4\xb2{J\xe0L\x12%\x0cp\x8d\x03)\xa0\xe1S$\xec\x13\xd9\xee\x19\xbf\x10\xb3J\x8c?\x89\xa3aQ\xde\xac\x87\x07\x94\xf4cq\xec.\xe2o[\x98\x81\xe1\x89\\4ylv\xef;\x90by\xe6\xdb\xa4\x9a/&\xc5\xd0\x10\xe1\x0e\xde\xd9\x10\x8e\x16\xfb\xb7\xf7\xa8\xf7\xc7\xe5\x02\x07\x98\x8f6\x08\x95\xe7\xe27\x96\r6u\x9e\xfb\x0er\xb1\x1d\x9b\xbc\x03\xf9I\x05\xd8\x81\xdd\x05\xb4*\xd6A\xe9\xac\x01v\x95\n\x0f\xd8\xdf\xd5\xbd\x12\x1f5/(\x17l\xd2\x98\xc1\xa8\tdwnG7\xba\xce\xacY^h\x9d\x7fr\xd6\x89\xc5\x06A)>Y>\xdd&\xf5$\xc9\x11\xa7Z\xa3L@\x1fF\xa1\x99\xb5\xa7:Qn\x86;\x9e}r\xa7\x12\x05xY\xed>Qx\x15\x0b\x03\x8f\x8d\xd0/\x05\xb2>{J\x1cKs\x05\x12\xfc\xc6\xea\xe0P\x13|C\x93t\xb3\xcat\xe7\x8e\x1f\x01\x08\xd00\xd4[q6\xb4\x07\xba\xc100\\H\xb7\x82;\x98\xa6}`\x8a\xa2\xa3)\x82\xcc\xba\xbd\x83\x04\x1b\xa2\x83\x03A\xa1\xd6\x05\xf1\x1b\xc2\xb6\xf0\xa8|\x86;F\xa8H*\x88\xdcv\x9av\xbf\x1fj\xa5=\x19\x8f\xeb8\xf3d\xde\xc8+\r\n(\xff\xf7\xdb\xe2\x15B\xd4\"\xd0\']\xe1y\xfe\x18\xe7p\x88\xadN\xe6\xd9\x8b:\xc6\xdd\'Qn\xff\xbcd\xf53CO\x02\x03\x01\x00\x01", + name_constraints: None + }, + // DST Root CA X3 - The IdentTrust cross-sign root cert + webpki::TrustAnchor { + subject: b"1$0\"\x06\x03U\x04\n\x13\x1bDigital Signature Trust Co.1\x170\x15\x06\x03U\x04\x03\x13\x0eDST Root CA X3", + spki: b"0\r\x06\t*\x86H\x86\xf7\r\x01\x01\x01\x05\x00\x03\x82\x01\x0f\x000\x82\x01\n\x02\x82\x01\x01\x00\xdf\xaf\xe9\x97P\x08\x83W\xb4\xccbe\xf6\x90\x82\xec\xc7\xd3,k0\xca[\xec\xd9\xc3}\xc7@\xc1\x18\x14\x8b\xe0\xe83vI*\xe3?!I\x93\xacN\x0e\xaf>H\xcbe\xee\xfc\xd3!\x0fe\xd2*\xd92\x8f\x8c\xe5\xf7w\xb0\x12{\xb5\x95\xc0\x89\xa3\xa9\xba\xeds.z\x0c\x062\x83\xa2~\x8a\x140\xcd\x11\xa0\xe1*8\xb9y\n1\xfdP\xbd\x80e\xdf\xb7Qc\x83\xc8\xe2\x88a\xeaKa\x81\xecRk\xb9\xa2\xe2K\x1a(\x9fH\xa3\x9e\x0c\xda\t\x8e>\x17.\x1e\xdd \xdf[\xc6*\x8a\xab.\xbdp\xad\xc5\x0b\x1a%\x90tr\xc5{j\xab4\xd60\x89\xff\xe5h\x13{T\x0b\xc8\xd6\xae\xecZ\x9c\x92\x1e=d\xb3\x8c\xc6\xdf\xbf\xc9Ap\xec\x16r\xd5&\xec8U9C\xd0\xfc\xfd\x18\\@\xf1\x97\xeb\xd5\x9a\x9b\x8d\x1d\xba\xda%\xb9\xc6\xd8\xdf\xc1\x15\x02:\xab\xdan\xf1>.\xf5\\\x08\x9c<\xd6\x83i\xe4\x10\x9b\x19*\xb6)W\xe3\xe5=\x9b\x9f\xf0\x02]\x02\x03\x01\x00\x01", + name_constraints: None + }, +]; + +lazy_static! { + // Provides a lazily-initialized TLS ClientConfig that can be used with api.vndb.org/TLS and + // (s.)vndb.org/HTTPS. + pub static ref TLS_CONF: ::std::sync::Arc<rustls::ClientConfig> = { + let mut conf = rustls::ClientConfig::new(); + conf.root_store.add_trust_anchors(&LE_ROOT); + ::std::sync::Arc::new(conf) + }; +} diff --git a/src/msg.rs b/src/msg.rs new file mode 100644 index 0000000..e1c5528 --- /dev/null +++ b/src/msg.rs @@ -0,0 +1,112 @@ +use std::fmt; +use serde_json::Value; + +use parser::{parse_filter,parse_message}; + + +#[derive(Debug,Clone)] +pub enum Arg { + BareString(String), + Json(Value), + Filter(Filter), +} + +#[derive(Debug,Clone)] +pub struct Message { + name: String, + args: Vec<Arg>, +} + + +#[derive(Debug,Clone)] +pub enum Filter { + And(Box<Filter>, Box<Filter>), + Or(Box<Filter>, Box<Filter>), + Expr(String, Op, Value), +} + + +#[derive(Debug,Clone,Copy,PartialEq,Eq)] +pub enum Op { + Eq, + NEq, + Le, + LEq, + Gt, + GEq, + Fuzzy, +} + + +impl Op { + pub fn as_str(self) -> &'static str { + match self { + Op::Eq => "=", + Op::NEq => "!=", + Op::Le => "<", + Op::LEq => "<=", + Op::Gt => ">", + Op::GEq => ">=", + Op::Fuzzy => "~", + } + } +} + + +impl fmt::Display for Op { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(self.as_str()) + } +} + + +impl fmt::Display for Filter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + &Filter::And(ref x, ref y) => write!(f, "({} and {})", x, y), + &Filter::Or(ref x, ref y) => write!(f, "({} or {})", x, y), + &Filter::Expr(ref n, o, ref v) => write!(f, "({} {} {})", n, o, v), + } + } +} + + +impl fmt::Display for Arg { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + &Arg::BareString(ref x) => f.write_str(&x), + &Arg::Json(ref x) => write!(f, "{}", x), + &Arg::Filter(ref x) => write!(f, "{}", x), + } + } +} + + +impl fmt::Display for Message { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(&self.name)?; + for a in self.args.iter() { + f.write_str(" ")?; + write!(f, "{}", a)?; + } + Ok(()) + } +} + + +impl Filter { + pub fn parse(s: &str) -> Result<(Filter, &str), &'static str> { + parse_filter(s) + } +} + + +impl Message { + pub fn parse(s: &str) -> Result<Message, &'static str> { + parse_message(s) + } + + pub fn new(name: String, args: Vec<Arg>) -> Message { + Message{name: name, args: args} + } +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..03b288b --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,250 @@ +use serde_json::Value; + +use msg::{Filter,Op,Arg,Message}; + +type Result<T> = ::std::result::Result<T, &'static str>; + + + +/* Parse a JSON value with trailing data. This is a workaround until a proper solution has + * been implemented: https://github.com/serde-rs/json/issues/183 */ +fn parse_json(s: &str) -> Result<(Value, &str)> { + let mut bytes = 0; + let val: Value = ::serde_json::de::StreamDeserializer::new(s.bytes().map(|b| { bytes += 1; Ok(b)} )) + .next().ok_or("Expected JSON value")?.map_err(|_| "Invalid JSON value")?; + + // The JSON deserializer consumes one extra byte for numeric types, subtract that. + match val { Value::I64(_) | Value::U64(_) | Value::F64(_) => bytes -= 1, _ => () }; + + Ok((val, &s[bytes..])) +} + + +fn is_ws(c: char) -> bool { c == ' ' || c == '\t' || c == '\r' || c == '\n' } +fn is_filtername(c: char) -> bool { (c >= 'a' && c <= 'z') || c == '_' } +fn is_msgname(c: char) -> bool { c >= 'a' && c <= 'z' } +fn is_barestr(c: char) -> bool { (c >= 'a' && c <= 'z') || c == '_' || c == ',' } +fn trim_ws(s: &str) -> &str { s.trim_left_matches(is_ws) } + + + + +#[derive(Debug,PartialEq,Clone,Copy)] +enum Token { + Open, + Close, + And, + Or, + Expr, +} + +pub struct FilterParser<'a> { + buf: &'a str, + hasexpr: bool, +} + + +impl<'a> FilterParser<'a> { + // Consume any whitespace + fn conws(&mut self) { + self.buf = trim_ws(self.buf); + } + + // Consume the given number of bytes + fn con(&mut self, bytes: usize) { + self.buf = &self.buf[bytes..]; + } + + fn token_expr(&mut self) -> Result<Filter> { + let name: String = self.buf.chars().take_while(|&c| is_filtername(c)).collect(); + if name.len() == 0 { + return Err("Invalid token"); + } + self.con(name.len()); + self.conws(); + + let op = parse_op(self.buf).ok_or("Expected comparison operator")?; + self.con(op.as_str().len()); + self.conws(); + + let val = parse_json(self.buf)?; + self.buf = val.1; + + Ok(Filter::Expr(name, op, val.0)) + } + + // This tokenizer has two states: + // hasexpr (allows And, Or, Close) + // !hasexpr (allows Open, Expr) + // These states are necessary to handle ambiguity between Expr and the And/Or tokens, and are + // also used to enforce the following properties (which simplifies the parsing step): + // - Expr and And/Or tokens cannot be chained + // - And/Or/Close tokens always follow a Close/Expr token. + // - Expr/Open tokens always follow a Open/And/Or token + // + // An Expr token doesn't consume anything, the caller is expected to run token_expr() to get + // the expression and advance the parsing state. + fn token(&mut self) -> Result<Token> { + self.conws(); + + let ret = match (self.hasexpr, self.buf.chars().next()) { + (_, None) => Err("Unexpected end of input"), + (false,Some('(')) => { self.con(1); Ok(Token::Open) }, + (true, Some(')')) => { self.con(1); Ok(Token::Close) }, + (true, Some('a')) => if self.buf.starts_with("and") { self.con(3); Ok(Token::And) } else { Err("Invalid token") }, + (true, Some('o')) => if self.buf.starts_with("or") { self.con(2); Ok(Token::Or ) } else { Err("Invalid token") }, + (false,_) => Ok(Token::Expr), + _ => Err("Invalid token"), + }; + + self.hasexpr = match ret { Ok(Token::Close) | Ok(Token::Expr) => true, _ => false }; + ret + } + + fn parse(&mut self) -> Result<Filter> { + // This is a simple shunting-yard implementation + let mut exp = Vec::new(); + let mut ops = vec![Token::Open]; // Only And, Or and Open + + if self.token()? != Token::Open { + return Err("Filter must start with an open parentheses"); + } + + while ops.len() > 0 { + match self.token()? { + Token::Expr => exp.push(self.token_expr()?), + + op@Token::Open => ops.push(op), + + Token::Close => { + while let Some(op) = ops.pop() { + if op == Token::Open { + break; + } else { + apply(&mut exp, op); + } + } + }, + + o1@Token::And | o1@Token::Or => { + while let Some(&o2) = ops.last() { + if o2 != Token::Open && (o1 != o2 && o1 == Token::Or) { + ops.pop(); + apply(&mut exp, o2); + } else { + break; + } + } + ops.push(o1); + }, + } + } + Ok(exp.pop().unwrap()) + } +} + + +fn apply(exp: &mut Vec<Filter>, op: Token) { + let right = Box::new(exp.pop().unwrap()); + let left = Box::new(exp.pop().unwrap()); + exp.push(if op == Token::And { Filter::And(left, right) } else { Filter::Or(left, right) }); +} + + +fn parse_op(s: &str) -> Option<Op> { + if s.starts_with("=" ) { Some(Op::Eq ) } + else if s.starts_with("!=") { Some(Op::NEq) } + else if s.starts_with("<=") { Some(Op::LEq) } + else if s.starts_with("<" ) { Some(Op::Le ) } + else if s.starts_with(">=") { Some(Op::GEq) } + else if s.starts_with(">" ) { Some(Op::Gt ) } + else if s.starts_with("~" ) { Some(Op::Fuzzy) } + else { None } +} + + +pub fn parse_filter(s: &str) -> Result<(Filter, &str)> { + let mut p = FilterParser{buf: s, hasexpr: false}; + p.parse().map(|r| (r, p.buf)) +} + + +pub fn parse_message(s: &str) -> Result<Message> { + let mut buf = trim_ws(s); + let mut args = Vec::new(); + + let mut splt = buf.splitn(2, is_ws); + let name = splt.next().ok_or("Empty message")?; + if name.contains(|c| !is_msgname(c)) { + return Err("Invalid message name") + } + + buf = trim_ws(splt.next().unwrap_or("")); + while let Some(c) = buf.chars().next() { + // This match on the first character can be replaced by simply trying parse_filter and + // parse_json in sequence; but that results in less than ideal error messages on badly + // formatted input. + match c { + '(' => { + let v = parse_filter(buf)?; + args.push(Arg::Filter(v.0)); + buf = v.1; + }, + '[' | '{' | '"' => { + let v = parse_json(buf)?; + args.push(Arg::Json(v.0)); + buf = v.1; + }, + _ => { + if let Ok(v) = parse_json(buf) { + args.push(Arg::Json(v.0)); + buf = v.1; + + } else { + let mut splt = buf.splitn(2, is_ws); + let v = splt.next().unwrap(); + + if !v.contains(|c| !is_barestr(c)) { + args.push(Arg::BareString(v.to_string())); + } else { + return Err("Invalid argument") + } + + buf = splt.next().unwrap_or(""); + } + }, + } + buf = trim_ws(buf); + } + + Ok(Message::new(name.to_string(), args)) +} + + +#[test] +fn test_parse_filter() { + let ok = |i, o| { + let s = format!("{}garbage", i); + let f = parse_filter(&s).unwrap(); + assert_eq!(&format!("{}", f.0), o); + assert_eq!(f.1, "garbage"); + }; + ok("(n=1)", "(n = 1)"); + ok("(something_else>=[1,\"str\"])", "(something_else >= [1,\"str\"])"); + ok("(((n=1) and blah=[]))", "((n = 1) and (blah = []))"); + ok("(((n=1) and blah=[] or x=\"hi\"))", "(((n = 1) and (blah = [])) or (x = \"hi\"))"); + ok("(a=1andb=2andc=3)", "((a = 1) and ((b = 2) and (c = 3)))"); + ok("(a=1orb=2andc=3)", "((a = 1) or ((b = 2) and (c = 3)))"); + ok("(a=1orb=2andc=3and(d=4ande=5orf=6)andg=7)", "((a = 1) or ((b = 2) and ((c = 3) and ((((d = 4) and (e = 5)) or (f = 6)) and (g = 7)))))"); + ok("(and=nulloror!=false)", "((and = null) or (or != false))"); + + let nok = |i| { assert!(parse_filter(i).is_err()) }; + nok("()"); + nok("(n=1 n=1)"); + nok("n=1"); + nok("(and)"); + nok("(n=1 and"); + nok("(n=1 and )"); + nok("(n=1 and and n=2)"); + nok(") and n=1"); +} |