use serde_json::Value; use msg::*; fn parse_json(s: &str) -> Result<(Value, &str)> { let s = trim_ws(s); // Serde fails to parse numbers and true/false/null with trailing garbage, so extract those // values manually. let json = if s.starts_with("true") { "true" } else if s.starts_with("false") { "false" } else if s.starts_with("null") { "null" } else if s.starts_with(|c: char| c.is_ascii_digit() || c == '-') { &s[..s.find(|c: char| !c.is_ascii_digit() && c != '-' && c != '.' && c != 'e' && c != 'E').unwrap_or(s.len())] } else { s }; let mut stream = ::serde_json::Deserializer::from_str(json).into_iter::(); let val = stream.next().ok_or(Error::UnexpectedEof)?.map_err(Error::Json)?; Ok((val, &s[stream.byte_offset()..])) } fn is_ws(c: char) -> bool { c == ' ' || c == '\t' || c == '\r' || c == '\n' } fn is_filtername(c: char) -> bool { (c >= 'a' && c <= 'z') || c == '_' } fn is_barestr(c: char) -> bool { (c >= 'a' && c <= 'z') || c == '_' || c == ',' } fn trim_ws(s: &str) -> &str { s.trim_left_matches(is_ws) } #[derive(Debug,PartialEq,Clone,Copy)] enum Token { Open, Close, And, Or, Expr, } pub struct FilterParser<'a> { buf: &'a str, hasexpr: bool, } impl<'a> FilterParser<'a> { // Consume any whitespace fn conws(&mut self) { self.buf = trim_ws(self.buf); } // Consume the given number of bytes fn con(&mut self, bytes: usize) { self.buf = &self.buf[bytes..]; } fn token_expr(&mut self) -> Result { let name: String = self.buf.chars().take_while(|&c| is_filtername(c)).collect(); if name.len() == 0 { return Err(Error::FilterToken); } self.con(name.len()); self.conws(); let op = parse_op(self.buf).ok_or(Error::FilterOp)?; self.con(op.as_str().len()); self.conws(); let val = parse_json(self.buf)?; self.buf = val.1; Ok(Filter::Expr(name, op, val.0)) } // This tokenizer has two states: // hasexpr (allows And, Or, Close) // !hasexpr (allows Open, Expr) // These states are necessary to handle ambiguity between Expr and the And/Or tokens, and are // also used to enforce the following properties (which simplifies the parsing step): // - Expr and And/Or tokens cannot be chained // - And/Or/Close tokens always follow a Close/Expr token. // - Expr/Open tokens always follow a Open/And/Or token // // An Expr token doesn't consume anything, the caller is expected to run token_expr() to get // the expression and advance the parsing state. fn token(&mut self) -> Result { self.conws(); let ret = match (self.hasexpr, self.buf.chars().next()) { (_, None) => Err(Error::UnexpectedEof), (false,Some('(')) => { self.con(1); Ok(Token::Open) }, (true, Some(')')) => { self.con(1); Ok(Token::Close) }, (true, Some('a')) => if self.buf.starts_with("and") { self.con(3); Ok(Token::And) } else { Err(Error::FilterToken) }, (true, Some('o')) => if self.buf.starts_with("or") { self.con(2); Ok(Token::Or ) } else { Err(Error::FilterToken) }, (false,_) => Ok(Token::Expr), _ => Err(Error::FilterToken), }; self.hasexpr = match ret { Ok(Token::Close) | Ok(Token::Expr) => true, _ => false }; ret } fn parse(&mut self) -> Result { // This is a simple shunting-yard implementation let mut exp = Vec::new(); let mut ops = vec![Token::Open]; // Only And, Or and Open if self.token()? != Token::Open { return Err(Error::FilterStart); } while ops.len() > 0 { match self.token()? { Token::Expr => exp.push(self.token_expr()?), op@Token::Open => ops.push(op), Token::Close => { while let Some(op) = ops.pop() { if op == Token::Open { break; } else { apply(&mut exp, op); } } }, o1@Token::And | o1@Token::Or => { while let Some(&o2) = ops.last() { if o2 != Token::Open && (o1 != o2 && o1 == Token::Or) { ops.pop(); apply(&mut exp, o2); } else { break; } } ops.push(o1); }, } } Ok(exp.pop().unwrap()) } } fn apply(exp: &mut Vec, op: Token) { let right = Box::new(exp.pop().unwrap()); let left = Box::new(exp.pop().unwrap()); exp.push(if op == Token::And { Filter::And(left, right) } else { Filter::Or(left, right) }); } fn parse_op(s: &str) -> Option { if s.starts_with("=" ) { Some(Op::Eq ) } else if s.starts_with("!=") { Some(Op::NEq) } else if s.starts_with("<=") { Some(Op::LEq) } else if s.starts_with("<" ) { Some(Op::Le ) } else if s.starts_with(">=") { Some(Op::GEq) } else if s.starts_with(">" ) { Some(Op::Gt ) } else if s.starts_with("~" ) { Some(Op::Fuzzy) } else { None } } pub fn parse_filter(s: &str) -> Result<(Filter, &str)> { let mut p = FilterParser{buf: s, hasexpr: false}; p.parse().map(|r| (r, p.buf)) } pub fn parse_arg(s: &str) -> Result<(Arg, &str)> { let s = trim_ws(s); // This match on the first character can be replaced by simply trying parse_filter and // parse_json in sequence; but that results in less than ideal error messages on badly // formatted input. match s.chars().next() { None => return Err(Error::UnexpectedEof), Some('(') => { return parse_filter(s).map(|(v,r)| (Arg::Filter(v), r)); }, Some('[') | Some('{') | Some('"') => { return parse_json(s).map(|(v,r)| (Arg::Json(v), r)); }, Some(_) => { if let Ok((v,r)) = parse_json(s) { return Ok((Arg::Json(v), r)); } else { let mut splt = s.splitn(2, is_ws); let v = splt.next().unwrap(); let rem = splt.next().unwrap_or(""); if !v.contains(|c| !is_barestr(c)) { return Ok((Arg::BareString(v.to_string()), rem)); } else { return Err(Error::InvalidArg) } } }, } } pub fn parse_message(s: &str) -> Result { let mut buf = trim_ws(s); let mut splt = buf.splitn(2, is_ws); let name = splt.next().ok_or(Error::UnexpectedEof)?; let mut msg = Message::new(name)?; buf = trim_ws(splt.next().unwrap_or("")); while buf.len() > 0 { let v = parse_arg(buf)?; msg = msg.push_arg(v.0); buf = trim_ws(v.1); } Ok(msg) } #[test] fn test_parse_filter() { let ok = |i, o| { let s = format!("{}garbage", i); let msg = format!("Parse of '{}'", s); let f = parse_filter(&s).expect(&msg); assert_eq!(&format!("{}", f.0), o); assert_eq!(f.1, "garbage"); }; ok("(n=-11.2)", "(n = -11.2)"); ok("(something_else>=[1,\"str\"])", "(something_else >= [1,\"str\"])"); ok("( a\t= \t\ntrue )", "(a = true)"); ok("(((n=1) and blah=[]))", "((n = 1) and (blah = []))"); ok("(((n=1) and blah=[] or x=\"hi\"))", "(((n = 1) and (blah = [])) or (x = \"hi\"))"); ok("(a=1andb=2andc=3)", "((a = 1) and ((b = 2) and (c = 3)))"); ok("(a=1orb=2andc=3)", "((a = 1) or ((b = 2) and (c = 3)))"); ok("(a=1orb=2andc=3and(d=4ande=5orf=6)andg=7)", "((a = 1) or ((b = 2) and ((c = 3) and ((((d = 4) and (e = 5)) or (f = 6)) and (g = 7)))))"); ok("(and=nulloror!=false)", "((and = null) or (or != false))"); let nok = |i| { assert!(parse_filter(i).is_err()) }; nok("()"); nok("(n=1 n=1)"); nok("n=1"); nok("(and)"); nok("(n=1 and"); nok("(n=1 and )"); nok("(n=1 and and n=2)"); nok(") and n=1"); }