util.json: New, faster, stricter, more compliant JSON decoder. Now returns nil,err...
authorWaqas Hussain <waqas20@gmail.com>
Tue, 7 May 2013 14:42:44 +0000 (10:42 -0400)
committerWaqas Hussain <waqas20@gmail.com>
Tue, 7 May 2013 14:42:44 +0000 (10:42 -0400)
util/json.lua

index 0ce315b2a47b07d73bd3c28d3e5ebc55af64a2bc..82ebcc4330ef61e56f3185f9cbd75a4f7554500c 100644 (file)
@@ -185,214 +185,177 @@ end
 -----------------------------------
 
 
-function json.decode(json)
-       json = json.." "; -- appending a space ensures valid json wouldn't touch EOF
-       local pos = 1;
-       local current = {};
-       local stack = {};
-       local ch, peek;
-       local function next()
-               ch = json:sub(pos, pos);
-               if ch == "" then error("Unexpected EOF"); end
-               pos = pos+1;
-               peek = json:sub(pos, pos);
-               return ch;
-       end
-       
-       local function skipwhitespace()
-               while ch and (ch == "\r" or ch == "\n" or ch == "\t" or ch == " ") do
-                       next();
+local function _skip_whitespace(json, index)
+       return json:find("[^ \t\r\n]", index) or index; -- no need to check \r\n, we converted those to \t
+end
+local function _fixobject(obj)
+       local __array = obj.__array;
+       if __array then
+               obj.__array = nil;
+               for i,v in ipairs(__array) do
+                       t_insert(obj, v);
                end
        end
-       local function skiplinecomment()
-               repeat next(); until not(ch) or ch == "\r" or ch == "\n";
-               skipwhitespace();
-       end
-       local function skipstarcomment()
-               next(); next(); -- skip '/', '*'
-               while peek and ch ~= "*" and peek ~= "/" do next(); end
-               if not peek then error("eof in star comment") end
-               next(); next(); -- skip '*', '/'
-               skipwhitespace();
-       end
-       local function skipstuff()
-               while true do
-                       skipwhitespace();
-                       if ch == "/" and peek == "*" then
-                               skipstarcomment();
-                       elseif ch == "/" and peek == "/" then
-                               skiplinecomment();
+       local __hash = obj.__hash;
+       if __hash then
+               obj.__hash = nil;
+               local k;
+               for i,v in ipairs(__hash) do
+                       if k ~= nil then
+                               obj[k] = v; k = nil;
                        else
-                               return;
+                               k = v;
                        end
                end
        end
-       
-       local readvalue;
-       local function readarray()
-               local t = setmetatable({}, array_mt);
-               next(); -- skip '['
-               skipstuff();
-               if ch == "]" then next(); return t; end
-               t_insert(t, readvalue());
-               while true do
-                       skipstuff();
-                       if ch == "]" then next(); return t; end
-                       if not ch then error("eof while reading array");
-                       elseif ch == "," then next();
-                       elseif ch then error("unexpected character in array, comma expected"); end
-                       if not ch then error("eof while reading array"); end
-                       t_insert(t, readvalue());
+       return obj;
+end
+local _readvalue, _readstring;
+local function _readobject(json, index)
+       local o = {};
+       while true do
+               local key, val;
+               index = _skip_whitespace(json, index + 1);
+               if json:byte(index) ~= 0x22 then -- "\""
+                       if json:byte(index) == 0x7d then return o, index + 1; end -- "}"
+                       return nil, "key expected";
                end
+               key, index = _readstring(json, index);
+               if key == nil then return nil, index; end
+               index = _skip_whitespace(json, index);
+               if json:byte(index) ~= 0x3a then return nil, "colon expected"; end -- ":"
+               val, index = _readvalue(json, index + 1);
+               if val == nil then return nil, index; end
+               o[key] = val;
+               index = _skip_whitespace(json, index);
+               local b = json:byte(index);
+               if b == 0x7d then return _fixobject(o), index + 1; end -- "}"
+               if b ~= 0x2c then return nil, "object eof"; end -- ","
        end
-       
-       local function checkandskip(c)
-               local x = ch or "eof";
-               if x ~= c then error("unexpected "..x..", '"..c.."' expected"); end
-               next();
-       end
-       local function readliteral(lit, val)
-               for c in lit:gmatch(".") do
-                       checkandskip(c);
+end
+local function _readarray(json, index)
+       local a = {};
+       local oindex = index;
+       while true do
+               local val;
+               val, index = _readvalue(json, index + 1);
+               if val == nil then
+                       if json:byte(oindex + 1) == 0x5d then return setmetatable(a, array_mt), oindex + 2; end -- "]"
+                       return val, index;
                end
-               return val;
+               t_insert(a, val);
+               index = _skip_whitespace(json, index);
+               local b = json:byte(index);
+               if b == 0x5d then return setmetatable(a, array_mt), index + 1; end -- "]"
+               if b ~= 0x2c then return nil, "array eof"; end -- ","
        end
-       local function readstring()
-               local s = {};
-               checkandskip("\"");
-               while ch do
-                       while ch and ch ~= "\\" and ch ~= "\"" do
-                               t_insert(s, ch); next();
-                       end
-                       if ch == "\\" then
-                               next();
-                               if unescapes[ch] then
-                                       t_insert(s, unescapes[ch]);
-                                       next();
-                               elseif ch == "u" then
-                                       local seq = "";
-                                       for i=1,4 do
-                                               next();
-                                               if not ch then error("unexpected eof in string"); end
-                                               if not ch:match("[0-9a-fA-F]") then error("invalid unicode escape sequence in string"); end
-                                               seq = seq..ch;
-                                       end
-                                       t_insert(s, codepoint_to_utf8(tonumber(seq, 16)));
-                                       next();
-                               else error("invalid escape sequence in string"); end
-                       end
-                       if ch == "\"" then
-                               next();
-                               return t_concat(s);
-                       end
-               end
-               error("eof while reading string");
+end
+local _unescape_error;
+local function _unescape_surrogate_func(x)
+       local lead, trail = tonumber(x:sub(3, 6), 16), tonumber(x:sub(9, 12), 16);
+       local codepoint = lead * 0x400 + trail - 0x35FDC00;
+       local a = codepoint % 64;
+       codepoint = (codepoint - a) / 64;
+       local b = codepoint % 64;
+       codepoint = (codepoint - b) / 64;
+       local c = codepoint % 64;
+       codepoint = (codepoint - c) / 64;
+       return s_char(0xF0 + codepoint, 0x80 + c, 0x80 + b, 0x80 + a);
+end
+local function _unescape_func(x)
+       x = x:match("%x%x%x%x", 3);
+       if x then
+               --if x >= 0xD800 and x <= 0xDFFF then _unescape_error = true; end -- bad surrogate pair
+               return codepoint_to_utf8(tonumber(x, 16));
        end
-       local function readnumber()
-               local s = "";
-               if ch == "-" then
-                       s = s..ch; next();
-                       if not ch:match("[0-9]") then error("number format error"); end
-               end
-               if ch == "0" then
-                       s = s..ch; next();
-                       if ch:match("[0-9]") then error("number format error"); end
-               else
-                       while ch and ch:match("[0-9]") do
-                               s = s..ch; next();
-                       end
-               end
-               if ch == "." then
-                       s = s..ch; next();
-                       if not ch:match("[0-9]") then error("number format error"); end
-                       while ch and ch:match("[0-9]") do
-                               s = s..ch; next();
-                       end
-                       if ch == "e" or ch == "E" then
-                               s = s..ch; next();
-                               if ch == "+" or ch == "-" then
-                                       s = s..ch; next();
-                                       if not ch:match("[0-9]") then error("number format error"); end
-                                       while ch and ch:match("[0-9]") do
-                                               s = s..ch; next();
-                                       end
-                               end
-                       end
-               end
-               return tonumber(s);
+       _unescape_error = true;
+end
+function _readstring(json, index)
+       index = index + 1;
+       local endindex = json:find("\"", index, true);
+       if endindex then
+               local s = json:sub(index, endindex - 1);
+               --if s:find("[%z-\31]") then return nil, "control char in string"; end
+               -- FIXME handle control characters
+               _unescape_error = nil;
+               --s = s:gsub("\\u[dD][89abAB]%x%x\\u[dD][cdefCDEF]%x%x", _unescape_surrogate_func);
+               -- FIXME handle escapes beyond BMP
+               s = s:gsub("\\u.?.?.?.?", _unescape_func);
+               if _unescape_error then return nil, "invalid escape"; end
+               return s, endindex + 1;
        end
-       local function readmember(t)
-               skipstuff();
-               local k = readstring();
-               skipstuff();
-               checkandskip(":");
-               t[k] = readvalue();
+       return nil, "string eof";
+end
+local function _readnumber(json, index)
+       local m = json:match("[0-9%.%-eE%+]+", index); -- FIXME do strict checking
+       return tonumber(m), index + #m;
+end
+local function _readnull(json, index)
+       local a, b, c = json:byte(index + 1, index + 3);
+       if a == 0x75 and b == 0x6c and c == 0x6c then
+               return null, index + 4;
        end
-       local function fixobject(obj)
-               local __array = obj.__array;
-               if __array then
-                       obj.__array = nil;
-                       for i,v in ipairs(__array) do
-                               t_insert(obj, v);
-                       end
-               end
-               local __hash = obj.__hash;
-               if __hash then
-                       obj.__hash = nil;
-                       local k;
-                       for i,v in ipairs(__hash) do
-                               if k ~= nil then
-                                       obj[k] = v; k = nil;
-                               else
-                                       k = v;
-                               end
-                       end
-               end
-               return obj;
+       return nil, "null parse failed";
+end
+local function _readtrue(json, index)
+       local a, b, c = json:byte(index + 1, index + 3);
+       if a == 0x72 and b == 0x75 and c == 0x65 then
+               return true, index + 4;
        end
-       local function readobject()
-               local t = {};
-               next(); -- skip '{'
-               skipstuff();
-               if ch == "}" then next(); return t; end
-               if not ch then error("eof while reading object"); end
-               readmember(t);
-               while true do
-                       skipstuff();
-                       if ch == "}" then next(); return fixobject(t); end
-                       if not ch then error("eof while reading object");
-                       elseif ch == "," then next();
-                       elseif ch then error("unexpected character in object, comma expected"); end
-                       if not ch then error("eof while reading object"); end
-                       readmember(t);
-               end
+       return nil, "true parse failed";
+end
+local function _readfalse(json, index)
+       local a, b, c, d = json:byte(index + 1, index + 4);
+       if a == 0x61 and b == 0x6c and c == 0x73 and d == 0x65 then
+               return false, index + 5;
        end
-       
-       function readvalue()
-               skipstuff();
-               while ch do
-                       if ch == "{" then
-                               return readobject();
-                       elseif ch == "[" then
-                               return readarray();
-                       elseif ch == "\"" then
-                               return readstring();
-                       elseif ch:match("[%-0-9%.]") then
-                               return readnumber();
-                       elseif ch == "n" then
-                               return readliteral("null", null);
-                       elseif ch == "t" then
-                               return readliteral("true", true);
-                       elseif ch == "f" then
-                               return readliteral("false", false);
-                       else
-                               error("invalid character at value start: "..ch);
-                       end
-               end
-               error("eof while reading value");
+       return nil, "false parse failed";
+end
+function _readvalue(json, index)
+       index = _skip_whitespace(json, index);
+       local b = json:byte(index);
+       -- TODO try table lookup instead of if-else?
+       if b == 0x7B then -- "{"
+               return _readobject(json, index);
+       elseif b == 0x5B then -- "["
+               return _readarray(json, index);
+       elseif b == 0x22 then -- "\""
+               return _readstring(json, index);
+       elseif b ~= nil and b >= 0x30 and b <= 0x39 or b == 0x2d then -- "0"-"9" or "-"
+               return _readnumber(json, index);
+       elseif b == 0x6e then -- "n"
+               return _readnull(json, index);
+       elseif b == 0x74 then -- "t"
+               return _readtrue(json, index);
+       elseif b == 0x66 then -- "f"
+               return _readfalse(json, index);
+       else
+               return nil, "value expected";
        end
-       next();
-       return readvalue();
+end
+local first_escape = {
+       ["\\\""] = "\\u0022";
+       ["\\\\"] = "\\u005c";
+       ["\\/" ] = "\\u002f";
+       ["\\b" ] = "\\u0008";
+       ["\\f" ] = "\\u000C";
+       ["\\n" ] = "\\u000A";
+       ["\\r" ] = "\\u000D";
+       ["\\t" ] = "\\u0009";
+       ["\\u" ] = "\\u";
+};
+
+function json.decode(json)
+       json = json:gsub("\\.", first_escape) -- get rid of all escapes except \uXXXX, making string parsing much simpler
+               --:gsub("[\r\n]", "\t"); -- \r\n\t are equivalent, we care about none of them, and none of them can be in strings
+       
+       -- TODO do encoding verification
+       
+       local val, index = _readvalue(json, 1);
+       if val == nil then return val, index; end
+       if json:find("[^ \t\r\n]", index) then return nil, "garbage at eof"; end
+
+       return val;
 end
 
 function json.test(object)