diff options
Diffstat (limited to 'vndbapi-msg/src/parser.rs')
-rw-r--r-- | vndbapi-msg/src/parser.rs | 255 |
1 files changed, 255 insertions, 0 deletions
diff --git a/vndbapi-msg/src/parser.rs b/vndbapi-msg/src/parser.rs new file mode 100644 index 0000000..135c71d --- /dev/null +++ b/vndbapi-msg/src/parser.rs @@ -0,0 +1,255 @@ +use serde_json::Value; + +use msg::{Filter,Op,Arg,Message}; + +type Result<T> = ::std::result::Result<T, &'static str>; + + + +/* Parse a JSON value with trailing data. This is a workaround until a proper solution has + * been implemented: https://github.com/serde-rs/json/issues/183 */ +fn parse_json(s: &str) -> Result<(Value, &str)> { + println!("Parsing JSON: {}", s); + let mut bytes = 0; + let val: Value = ::serde_json::Deserializer::from_iter(s.bytes().map(|b| { bytes += 1; Ok(b)} )) + .into_iter().next() + .ok_or("Expected JSON value")? + .map_err(|_| "Invalid JSON value")?; + + // The JSON deserializer consumes one extra byte for numeric types, subtract that. + match val { Value::Number(_) => bytes -= 1, _ => () }; + + Ok((val, &s[bytes..])) +} + + +fn is_ws(c: char) -> bool { c == ' ' || c == '\t' || c == '\r' || c == '\n' } +fn is_filtername(c: char) -> bool { (c >= 'a' && c <= 'z') || c == '_' } +fn is_barestr(c: char) -> bool { (c >= 'a' && c <= 'z') || c == '_' || c == ',' } +fn trim_ws(s: &str) -> &str { s.trim_left_matches(is_ws) } + + + + +#[derive(Debug,PartialEq,Clone,Copy)] +enum Token { + Open, + Close, + And, + Or, + Expr, +} + +pub struct FilterParser<'a> { + buf: &'a str, + hasexpr: bool, +} + + +impl<'a> FilterParser<'a> { + // Consume any whitespace + fn conws(&mut self) { + self.buf = trim_ws(self.buf); + } + + // Consume the given number of bytes + fn con(&mut self, bytes: usize) { + self.buf = &self.buf[bytes..]; + } + + fn token_expr(&mut self) -> Result<Filter> { + let name: String = self.buf.chars().take_while(|&c| is_filtername(c)).collect(); + if name.len() == 0 { + return Err("Invalid token"); + } + self.con(name.len()); + self.conws(); + + let op = parse_op(self.buf).ok_or("Expected comparison operator")?; + self.con(op.as_str().len()); + self.conws(); + + let val = parse_json(self.buf)?; + self.buf = val.1; + + Ok(Filter::Expr(name, op, val.0)) + } + + // This tokenizer has two states: + // hasexpr (allows And, Or, Close) + // !hasexpr (allows Open, Expr) + // These states are necessary to handle ambiguity between Expr and the And/Or tokens, and are + // also used to enforce the following properties (which simplifies the parsing step): + // - Expr and And/Or tokens cannot be chained + // - And/Or/Close tokens always follow a Close/Expr token. + // - Expr/Open tokens always follow a Open/And/Or token + // + // An Expr token doesn't consume anything, the caller is expected to run token_expr() to get + // the expression and advance the parsing state. + fn token(&mut self) -> Result<Token> { + self.conws(); + + let ret = match (self.hasexpr, self.buf.chars().next()) { + (_, None) => Err("Unexpected end of input"), + (false,Some('(')) => { self.con(1); Ok(Token::Open) }, + (true, Some(')')) => { self.con(1); Ok(Token::Close) }, + (true, Some('a')) => if self.buf.starts_with("and") { self.con(3); Ok(Token::And) } else { Err("Invalid token") }, + (true, Some('o')) => if self.buf.starts_with("or") { self.con(2); Ok(Token::Or ) } else { Err("Invalid token") }, + (false,_) => Ok(Token::Expr), + _ => Err("Invalid token"), + }; + + self.hasexpr = match ret { Ok(Token::Close) | Ok(Token::Expr) => true, _ => false }; + ret + } + + fn parse(&mut self) -> Result<Filter> { + // This is a simple shunting-yard implementation + let mut exp = Vec::new(); + let mut ops = vec![Token::Open]; // Only And, Or and Open + + if self.token()? != Token::Open { + return Err("Filter must start with an open parentheses"); + } + + while ops.len() > 0 { + match self.token()? { + Token::Expr => exp.push(self.token_expr()?), + + op@Token::Open => ops.push(op), + + Token::Close => { + while let Some(op) = ops.pop() { + if op == Token::Open { + break; + } else { + apply(&mut exp, op); + } + } + }, + + o1@Token::And | o1@Token::Or => { + while let Some(&o2) = ops.last() { + if o2 != Token::Open && (o1 != o2 && o1 == Token::Or) { + ops.pop(); + apply(&mut exp, o2); + } else { + break; + } + } + ops.push(o1); + }, + } + } + Ok(exp.pop().unwrap()) + } +} + + +fn apply(exp: &mut Vec<Filter>, op: Token) { + let right = Box::new(exp.pop().unwrap()); + let left = Box::new(exp.pop().unwrap()); + exp.push(if op == Token::And { Filter::And(left, right) } else { Filter::Or(left, right) }); +} + + +fn parse_op(s: &str) -> Option<Op> { + if s.starts_with("=" ) { Some(Op::Eq ) } + else if s.starts_with("!=") { Some(Op::NEq) } + else if s.starts_with("<=") { Some(Op::LEq) } + else if s.starts_with("<" ) { Some(Op::Le ) } + else if s.starts_with(">=") { Some(Op::GEq) } + else if s.starts_with(">" ) { Some(Op::Gt ) } + else if s.starts_with("~" ) { Some(Op::Fuzzy) } + else { None } +} + + +pub fn parse_filter(s: &str) -> Result<(Filter, &str)> { + let mut p = FilterParser{buf: s, hasexpr: false}; + p.parse().map(|r| (r, p.buf)) +} + + +pub fn parse_arg(s: &str) -> Result<(Arg, &str)> { + let s = trim_ws(s); + + // This match on the first character can be replaced by simply trying parse_filter and + // parse_json in sequence; but that results in less than ideal error messages on badly + // formatted input. + match s.chars().next() { + None => return Err("Empty argument"), + + Some('(') => { + return parse_filter(s).map(|(v,r)| (Arg::Filter(v), r)); + }, + + Some('[') | Some('{') | Some('"') => { + return parse_json(s).map(|(v,r)| (Arg::Json(v), r)); + }, + + Some(_) => { + if let Ok((v,r)) = parse_json(s) { + return Ok((Arg::Json(v), r)); + + } else { + let mut splt = s.splitn(2, is_ws); + let v = splt.next().unwrap(); + let rem = splt.next().unwrap_or(""); + + if !v.contains(|c| !is_barestr(c)) { + return Ok((Arg::BareString(v.to_string()), rem)); + } else { + return Err("Invalid argument") + } + } + }, + } +} + + +pub fn parse_message(s: &str) -> Result<Message> { + let mut buf = trim_ws(s); + + let mut splt = buf.splitn(2, is_ws); + let name = splt.next().ok_or("Empty message")?; + let mut msg = Message::new(name)?; + + buf = trim_ws(splt.next().unwrap_or("")); + while buf.len() > 0 { + let v = parse_arg(buf)?; + msg = msg.push_arg(v.0); + buf = trim_ws(v.1); + } + + Ok(msg) +} + + +#[test] +fn test_parse_filter() { + let ok = |i, o| { + let s = format!("{}garbage", i); + let f = parse_filter(&s).unwrap(); + assert_eq!(&format!("{}", f.0), o); + assert_eq!(f.1, "garbage"); + }; + ok("(n=1)", "(n = 1)"); + ok("(something_else>=[1,\"str\"])", "(something_else >= [1,\"str\"])"); + ok("(((n=1) and blah=[]))", "((n = 1) and (blah = []))"); + ok("(((n=1) and blah=[] or x=\"hi\"))", "(((n = 1) and (blah = [])) or (x = \"hi\"))"); + ok("(a=1andb=2andc=3)", "((a = 1) and ((b = 2) and (c = 3)))"); + ok("(a=1orb=2andc=3)", "((a = 1) or ((b = 2) and (c = 3)))"); + ok("(a=1orb=2andc=3and(d=4ande=5orf=6)andg=7)", "((a = 1) or ((b = 2) and ((c = 3) and ((((d = 4) and (e = 5)) or (f = 6)) and (g = 7)))))"); + ok("(and=nulloror!=false)", "((and = null) or (or != false))"); + + let nok = |i| { assert!(parse_filter(i).is_err()) }; + nok("()"); + nok("(n=1 n=1)"); + nok("n=1"); + nok("(and)"); + nok("(n=1 and"); + nok("(n=1 and )"); + nok("(n=1 and and n=2)"); + nok(") and n=1"); +} |