From 26dee6703f180bd471fca367fb07a7e826be13d8 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sat, 4 Dec 2010 14:55:05 +0100 Subject: Initial commit --- json.mll | 266 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 266 insertions(+) create mode 100644 json.mll (limited to 'json.mll') diff --git a/json.mll b/json.mll new file mode 100644 index 0000000..184c1c0 --- /dev/null +++ b/json.mll @@ -0,0 +1,266 @@ +(* Very minimal standard-compliant JSON parser/generator, because the currently + * available JSON libraries for OCaml try to do too much and have too many + * dependencies. + * - The aim of this library is to be small rather than fast. + * - Error detecting is correct. Error reporting, however, is rather minimal. + * (Some portions and ideas of this code were borrowed from yojson) + *) + + + + +(* H E A D E R *) + +{ + +(* Primary JSON data structure. Integers and numbers are left as strings in + * order to avoid losing data to precision or overflows (and int<>string<>float + * conversion really isn't that hard to do in application code) *) +type json = + Object of (string * json) list + | Array of json list + | Bool of bool + | Null + | String of string + | Int of string + | Number of string + +exception JSON_Error +exception JSON_Array_End +exception JSON_Object_End +exception JSON_Incorrect_Type + + +(* Based on common.ml:utf8_of_bytes from yojson, although this one is less efficient *) +let utf8_of_bytes a b c d = + let i = (a lsl 12) lor (b lsl 8) lor (c lsl 4) lor d in + let str = "000" in + if i < 0x80 then ( + str.[0] <- char_of_int i; + String.sub str 0 1 + ) else if i < 0x800 then ( + str.[0] <- char_of_int (0xc0 lor ((i lsr 6) land 0x1f)); + str.[1] <- char_of_int (0x80 lor (i land 0x3f)); + String.sub str 0 2 + ) else ( + str.[0] <- char_of_int (0xe0 lor ((i lsr 12) land 0xf)); + str.[1] <- char_of_int (0x80 lor ((i lsr 6) land 0x3f)); + str.[2] <- char_of_int (0x80 lor (i land 0x3f)); + String.sub str 0 3 + ) + +(* from yojosn -> read.mll:hex *) +let int_of_hex c = + match c with + | '0'..'9' -> int_of_char c - int_of_char '0' + | 'a'..'f' -> int_of_char c - int_of_char 'a' + 10 + | 'A'..'F' -> int_of_char c - int_of_char 'A' + 10 + | _ -> assert false + +(* 0-255 -> \u00(00-ff) (this function isn't the reverse of int_of_hex, but oh well) *) +let hex_of_int i = + let s = "\\u0000" in + let a = i lsr 4 in + let b = i land 0x0f in + s.[4] <- char_of_int ((if a < 10 then int_of_char '0' else int_of_char 'a' - 10) + a); + s.[5] <- char_of_int ((if b < 10 then int_of_char '0' else int_of_char 'a' - 10) + b); + s + + +} + + + + +(* D E F I N I T I O N S *) + +let ws = [' ' '\t' '\n' '\r'] +let digit = ['0'-'9'] +let digit1 = ['1'-'9'] +let integer = '-'? ( digit | ( digit1 digit+ ) ) +let frac = '.' digit+ +let exp = ['e' 'E'] ['+' '-']? +let number = (integer frac) | (integer exp) | (integer frac exp) +let hex = ['0'-'9' 'a'-'f' 'A'-'F'] +let strlit = [^ '\000'-'\031' '"' '\\'] + + + + +(* R U L E S *) + +rule json_val = parse + | ws+ { json_val lexbuf } + | "true" { Bool true } + | "false" { Bool false } + | "null" { Null } + | integer as i { Int i } + | number as n { Number n } + | '"' { String (json_string "" lexbuf) } + | ']' { raise JSON_Array_End } + | '}' { raise JSON_Object_End } + | '[' { + let rec element () = + try ( + json_sep lexbuf; + let v = try json_val lexbuf with JSON_Array_End -> raise JSON_Error in + v :: (element ()) + ) with JSON_Array_End -> [] in + Array ( + try + let v = json_val lexbuf in + v :: (element ()) + with JSON_Array_End -> []) + } + | '{' { + let rec pair () = + try ( + json_sep lexbuf; + let name = try json_val lexbuf with JSON_Object_End -> raise JSON_Error in + let str = match name with + | String x -> x + | _ -> raise JSON_Error + in + json_valsep lexbuf; + let value = try json_val lexbuf with JSON_Object_End -> raise JSON_Error in + (str, value) :: (pair ()) + ) with JSON_Object_End -> [] in + Object ( + try + let name = json_val lexbuf in + let str = match name with + | String x -> x + | _ -> raise JSON_Error + in + json_valsep lexbuf; + let value = try json_val lexbuf with JSON_Object_End -> raise JSON_Error in + (str, value) :: (pair ()) + with JSON_Object_End -> []) + } + | _ + | eof { raise JSON_Error } + +and json_sep = parse + | ',' { () } + | ']' { raise JSON_Array_End } + | '}' { raise JSON_Object_End } + | ws+ { json_sep lexbuf } + | _ + | eof { raise JSON_Error } + +and json_valsep = parse + | ':' { () } + | '}' { raise JSON_Object_End } + | ws+ { json_valsep lexbuf } + | _ + | eof { raise JSON_Error } + +and json_string str = parse + | "\\\"" { json_string (str ^ "\"") lexbuf } + | "\\\\" { json_string (str ^ "\\") lexbuf } + | "\\/" { json_string (str ^ "/") lexbuf } + | "\\b" { json_string (str ^ "\b") lexbuf } + | "\\f" { json_string (str ^ "\x0c") lexbuf } + | "\\n" { json_string (str ^ "\n") lexbuf } + | "\\r" { json_string (str ^ "\r") lexbuf } + | "\\t" { json_string (str ^ "\t") lexbuf } + | "\\u" (hex as a) (hex as b) (hex as c) (hex as d) { + json_string (str ^ (utf8_of_bytes (int_of_hex a) (int_of_hex b) (int_of_hex c) (int_of_hex d))) lexbuf + } + | strlit+ as s { json_string (str ^ s) lexbuf } + | '"' { str } + | _ + | eof { raise JSON_Error } + +and json_string_write out = parse + | "\\" { out "\\\\"; json_string_write out lexbuf } + | "\"" { out "\\\""; json_string_write out lexbuf } + | "\b" { out "\\b"; json_string_write out lexbuf } + | "\x0c" { out "\\f"; json_string_write out lexbuf } + | "\n" { out "\\n"; json_string_write out lexbuf } + | "\r" { out "\\r"; json_string_write out lexbuf } + | "\t" { out "\\t"; json_string_write out lexbuf } + | [^ '\\' '"' '\x00'-'\x1F' '\x7F' ]+ as s { out s; json_string_write out lexbuf } + | [ '\x00'-'\x1F' '\x7F' ] as c { out (hex_of_int (int_of_char c)); json_string_write out lexbuf } + | eof { () } + + + +(* F O O T E R *) + +{ + +let json_of_string str = json_val (Lexing.from_string str) +let json_of_channel ch = json_val (Lexing.from_channel ch) +let json_of_function f = json_val (Lexing.from_function f) + +let json_out_str out str = + out "\""; + json_string_write out (Lexing.from_string str); + out "\"" + +let rec json_gen out obj = + match obj with + | Null -> out "null" + | Bool false -> out "false" + | Bool true -> out "true" + | Int x + | Number x -> out x + | String x -> json_out_str out x + | Array l -> ( + out "["; + let i = ref false in + List.iter (fun x -> + (match !i with true -> out "," | false -> i := true); + json_gen out x + ) l; + out "]" + ) + | Object l -> ( + out "{"; + let i = ref false in + List.iter (fun (n,v) -> + (match !i with true -> out "," | false -> i := true); + json_out_str out n; + out ":"; + json_gen out v + ) l; + out "}" + ) + +let string_of_json obj = + let str = ref "" in + json_gen (fun s -> str := !str ^ s) obj; + !str + + +(* convenience functions *) +let get_string = function | String x -> x | _ -> raise JSON_Incorrect_Type +let get_bool = function | Bool x -> x | _ -> raise JSON_Incorrect_Type +let get_int = function | Int x -> x | _ -> raise JSON_Incorrect_Type +let get_number = function | Number x -> x | _ -> raise JSON_Incorrect_Type +let get_array = function | Array x -> x | _ -> raise JSON_Incorrect_Type +let get_object = function | Object x -> x | _ -> raise JSON_Incorrect_Type +let get_value obj key = + match obj with + | Object l -> let _,v = List.find (fun (n,_) -> n = key) l in v + | _ -> raise JSON_Incorrect_Type + + +} + + + + + +(* Testing stuff... + +#load "_build/json.cmo";; +open Json;; +get_bool;; + +let obj = json_of_string "{\"num\":1,\"more\":false,\"items\":[{\"language\":\"ja\",\"original\":null,\"name\":\"Studi\\u007Fo e.go!\",\"type\":\"co\",\"id\":17}]}";; +string_of_json obj;; + +*) + -- cgit v1.2.3