summaryrefslogtreecommitdiff
path: root/vndbapi-msg/src/parser.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vndbapi-msg/src/parser.rs')
-rw-r--r--vndbapi-msg/src/parser.rs255
1 files changed, 255 insertions, 0 deletions
diff --git a/vndbapi-msg/src/parser.rs b/vndbapi-msg/src/parser.rs
new file mode 100644
index 0000000..135c71d
--- /dev/null
+++ b/vndbapi-msg/src/parser.rs
@@ -0,0 +1,255 @@
+use serde_json::Value;
+
+use msg::{Filter,Op,Arg,Message};
+
+type Result<T> = ::std::result::Result<T, &'static str>;
+
+
+
+/* Parse a JSON value with trailing data. This is a workaround until a proper solution has
+ * been implemented: https://github.com/serde-rs/json/issues/183 */
+fn parse_json(s: &str) -> Result<(Value, &str)> {
+ println!("Parsing JSON: {}", s);
+ let mut bytes = 0;
+ let val: Value = ::serde_json::Deserializer::from_iter(s.bytes().map(|b| { bytes += 1; Ok(b)} ))
+ .into_iter().next()
+ .ok_or("Expected JSON value")?
+ .map_err(|_| "Invalid JSON value")?;
+
+ // The JSON deserializer consumes one extra byte for numeric types, subtract that.
+ match val { Value::Number(_) => bytes -= 1, _ => () };
+
+ Ok((val, &s[bytes..]))
+}
+
+
+fn is_ws(c: char) -> bool { c == ' ' || c == '\t' || c == '\r' || c == '\n' }
+fn is_filtername(c: char) -> bool { (c >= 'a' && c <= 'z') || c == '_' }
+fn is_barestr(c: char) -> bool { (c >= 'a' && c <= 'z') || c == '_' || c == ',' }
+fn trim_ws(s: &str) -> &str { s.trim_left_matches(is_ws) }
+
+
+
+
+#[derive(Debug,PartialEq,Clone,Copy)]
+enum Token {
+ Open,
+ Close,
+ And,
+ Or,
+ Expr,
+}
+
+pub struct FilterParser<'a> {
+ buf: &'a str,
+ hasexpr: bool,
+}
+
+
+impl<'a> FilterParser<'a> {
+ // Consume any whitespace
+ fn conws(&mut self) {
+ self.buf = trim_ws(self.buf);
+ }
+
+ // Consume the given number of bytes
+ fn con(&mut self, bytes: usize) {
+ self.buf = &self.buf[bytes..];
+ }
+
+ fn token_expr(&mut self) -> Result<Filter> {
+ let name: String = self.buf.chars().take_while(|&c| is_filtername(c)).collect();
+ if name.len() == 0 {
+ return Err("Invalid token");
+ }
+ self.con(name.len());
+ self.conws();
+
+ let op = parse_op(self.buf).ok_or("Expected comparison operator")?;
+ self.con(op.as_str().len());
+ self.conws();
+
+ let val = parse_json(self.buf)?;
+ self.buf = val.1;
+
+ Ok(Filter::Expr(name, op, val.0))
+ }
+
+ // This tokenizer has two states:
+ // hasexpr (allows And, Or, Close)
+ // !hasexpr (allows Open, Expr)
+ // These states are necessary to handle ambiguity between Expr and the And/Or tokens, and are
+ // also used to enforce the following properties (which simplifies the parsing step):
+ // - Expr and And/Or tokens cannot be chained
+ // - And/Or/Close tokens always follow a Close/Expr token.
+ // - Expr/Open tokens always follow a Open/And/Or token
+ //
+ // An Expr token doesn't consume anything, the caller is expected to run token_expr() to get
+ // the expression and advance the parsing state.
+ fn token(&mut self) -> Result<Token> {
+ self.conws();
+
+ let ret = match (self.hasexpr, self.buf.chars().next()) {
+ (_, None) => Err("Unexpected end of input"),
+ (false,Some('(')) => { self.con(1); Ok(Token::Open) },
+ (true, Some(')')) => { self.con(1); Ok(Token::Close) },
+ (true, Some('a')) => if self.buf.starts_with("and") { self.con(3); Ok(Token::And) } else { Err("Invalid token") },
+ (true, Some('o')) => if self.buf.starts_with("or") { self.con(2); Ok(Token::Or ) } else { Err("Invalid token") },
+ (false,_) => Ok(Token::Expr),
+ _ => Err("Invalid token"),
+ };
+
+ self.hasexpr = match ret { Ok(Token::Close) | Ok(Token::Expr) => true, _ => false };
+ ret
+ }
+
+ fn parse(&mut self) -> Result<Filter> {
+ // This is a simple shunting-yard implementation
+ let mut exp = Vec::new();
+ let mut ops = vec![Token::Open]; // Only And, Or and Open
+
+ if self.token()? != Token::Open {
+ return Err("Filter must start with an open parentheses");
+ }
+
+ while ops.len() > 0 {
+ match self.token()? {
+ Token::Expr => exp.push(self.token_expr()?),
+
+ op@Token::Open => ops.push(op),
+
+ Token::Close => {
+ while let Some(op) = ops.pop() {
+ if op == Token::Open {
+ break;
+ } else {
+ apply(&mut exp, op);
+ }
+ }
+ },
+
+ o1@Token::And | o1@Token::Or => {
+ while let Some(&o2) = ops.last() {
+ if o2 != Token::Open && (o1 != o2 && o1 == Token::Or) {
+ ops.pop();
+ apply(&mut exp, o2);
+ } else {
+ break;
+ }
+ }
+ ops.push(o1);
+ },
+ }
+ }
+ Ok(exp.pop().unwrap())
+ }
+}
+
+
+fn apply(exp: &mut Vec<Filter>, op: Token) {
+ let right = Box::new(exp.pop().unwrap());
+ let left = Box::new(exp.pop().unwrap());
+ exp.push(if op == Token::And { Filter::And(left, right) } else { Filter::Or(left, right) });
+}
+
+
+fn parse_op(s: &str) -> Option<Op> {
+ if s.starts_with("=" ) { Some(Op::Eq ) }
+ else if s.starts_with("!=") { Some(Op::NEq) }
+ else if s.starts_with("<=") { Some(Op::LEq) }
+ else if s.starts_with("<" ) { Some(Op::Le ) }
+ else if s.starts_with(">=") { Some(Op::GEq) }
+ else if s.starts_with(">" ) { Some(Op::Gt ) }
+ else if s.starts_with("~" ) { Some(Op::Fuzzy) }
+ else { None }
+}
+
+
+pub fn parse_filter(s: &str) -> Result<(Filter, &str)> {
+ let mut p = FilterParser{buf: s, hasexpr: false};
+ p.parse().map(|r| (r, p.buf))
+}
+
+
+pub fn parse_arg(s: &str) -> Result<(Arg, &str)> {
+ let s = trim_ws(s);
+
+ // This match on the first character can be replaced by simply trying parse_filter and
+ // parse_json in sequence; but that results in less than ideal error messages on badly
+ // formatted input.
+ match s.chars().next() {
+ None => return Err("Empty argument"),
+
+ Some('(') => {
+ return parse_filter(s).map(|(v,r)| (Arg::Filter(v), r));
+ },
+
+ Some('[') | Some('{') | Some('"') => {
+ return parse_json(s).map(|(v,r)| (Arg::Json(v), r));
+ },
+
+ Some(_) => {
+ if let Ok((v,r)) = parse_json(s) {
+ return Ok((Arg::Json(v), r));
+
+ } else {
+ let mut splt = s.splitn(2, is_ws);
+ let v = splt.next().unwrap();
+ let rem = splt.next().unwrap_or("");
+
+ if !v.contains(|c| !is_barestr(c)) {
+ return Ok((Arg::BareString(v.to_string()), rem));
+ } else {
+ return Err("Invalid argument")
+ }
+ }
+ },
+ }
+}
+
+
+pub fn parse_message(s: &str) -> Result<Message> {
+ let mut buf = trim_ws(s);
+
+ let mut splt = buf.splitn(2, is_ws);
+ let name = splt.next().ok_or("Empty message")?;
+ let mut msg = Message::new(name)?;
+
+ buf = trim_ws(splt.next().unwrap_or(""));
+ while buf.len() > 0 {
+ let v = parse_arg(buf)?;
+ msg = msg.push_arg(v.0);
+ buf = trim_ws(v.1);
+ }
+
+ Ok(msg)
+}
+
+
+#[test]
+fn test_parse_filter() {
+ let ok = |i, o| {
+ let s = format!("{}garbage", i);
+ let f = parse_filter(&s).unwrap();
+ assert_eq!(&format!("{}", f.0), o);
+ assert_eq!(f.1, "garbage");
+ };
+ ok("(n=1)", "(n = 1)");
+ ok("(something_else>=[1,\"str\"])", "(something_else >= [1,\"str\"])");
+ ok("(((n=1) and blah=[]))", "((n = 1) and (blah = []))");
+ ok("(((n=1) and blah=[] or x=\"hi\"))", "(((n = 1) and (blah = [])) or (x = \"hi\"))");
+ ok("(a=1andb=2andc=3)", "((a = 1) and ((b = 2) and (c = 3)))");
+ ok("(a=1orb=2andc=3)", "((a = 1) or ((b = 2) and (c = 3)))");
+ ok("(a=1orb=2andc=3and(d=4ande=5orf=6)andg=7)", "((a = 1) or ((b = 2) and ((c = 3) and ((((d = 4) and (e = 5)) or (f = 6)) and (g = 7)))))");
+ ok("(and=nulloror!=false)", "((and = null) or (or != false))");
+
+ let nok = |i| { assert!(parse_filter(i).is_err()) };
+ nok("()");
+ nok("(n=1 n=1)");
+ nok("n=1");
+ nok("(and)");
+ nok("(n=1 and");
+ nok("(n=1 and )");
+ nok("(n=1 and and n=2)");
+ nok(") and n=1");
+}