util.json: Fixed handling of truncated JSON.
[prosody.git] / util / json.lua
1
2 local type = type;
3 local t_insert, t_concat, t_remove = table.insert, table.concat, table.remove;
4 local s_char = string.char;
5 local tostring, tonumber = tostring, tonumber;
6 local pairs, ipairs = pairs, ipairs;
7 local next = next;
8 local error = error;
9 local newproxy, getmetatable = newproxy, getmetatable;
10 local print = print;
11
12 --module("json")
13 local json = {};
14
15 local null = newproxy and newproxy(true) or {};
16 if getmetatable and getmetatable(null) then
17         getmetatable(null).__tostring = function() return "null"; end;
18 end
19 json.null = null;
20
21 local escapes = {
22         ["\""] = "\\\"", ["\\"] = "\\\\", ["\b"] = "\\b",
23         ["\f"] = "\\f", ["\n"] = "\\n", ["\r"] = "\\r", ["\t"] = "\\t"};
24 local unescapes = {
25         ["\""] = "\"", ["\\"] = "\\", ["/"] = "/",
26         b = "\b", f = "\f", n = "\n", r = "\r", t = "\t"};
27 for i=0,31 do
28         local ch = s_char(i);
29         if not escapes[ch] then escapes[ch] = ("\\u%.4X"):format(i); end
30 end
31
32 local valid_types = {
33         number  = true,
34         string  = true,
35         table   = true,
36         boolean = true
37 };
38 local special_keys = {
39         __array = true;
40         __hash  = true;
41 };
42
43 local simplesave, tablesave, arraysave, stringsave;
44
45 function stringsave(o, buffer)
46         -- FIXME do proper utf-8 and binary data detection
47         t_insert(buffer, "\""..(o:gsub(".", escapes)).."\"");
48 end
49
50 function arraysave(o, buffer)
51         t_insert(buffer, "[");
52         if next(o) then
53                 for i,v in ipairs(o) do
54                         simplesave(v, buffer);
55                         t_insert(buffer, ",");
56                 end
57                 t_remove(buffer);
58         end
59         t_insert(buffer, "]");
60 end
61
62 function tablesave(o, buffer)
63         local __array = {};
64         local __hash = {};
65         local hash = {};
66         for i,v in ipairs(o) do
67                 __array[i] = v;
68         end
69         for k,v in pairs(o) do
70                 local ktype, vtype = type(k), type(v);
71                 if valid_types[vtype] or v == null then
72                         if ktype == "string" and not special_keys[k] then
73                                 hash[k] = v;
74                         elseif (valid_types[ktype] or k == null) and __array[k] == nil then
75                                 __hash[k] = v;
76                         end
77                 end
78         end
79         if next(__hash) ~= nil or next(hash) ~= nil or next(__array) == nil then
80                 t_insert(buffer, "{");
81                 local mark = #buffer;
82                 for k,v in pairs(hash) do
83                         stringsave(k, buffer);
84                         t_insert(buffer, ":");
85                         simplesave(v, buffer);
86                         t_insert(buffer, ",");
87                 end
88                 if next(__hash) ~= nil then
89                         t_insert(buffer, "\"__hash\":[");
90                         for k,v in pairs(__hash) do
91                                 simplesave(k, buffer);
92                                 t_insert(buffer, ",");
93                                 simplesave(v, buffer);
94                                 t_insert(buffer, ",");
95                         end
96                         t_remove(buffer);
97                         t_insert(buffer, "]");
98                         t_insert(buffer, ",");
99                 end
100                 if next(__array) then
101                         t_insert(buffer, "\"__array\":");
102                         arraysave(__array, buffer);
103                         t_insert(buffer, ",");
104                 end
105                 if mark ~= #buffer then t_remove(buffer); end
106                 t_insert(buffer, "}");
107         else
108                 arraysave(__array, buffer);
109         end
110 end
111
112 function simplesave(o, buffer)
113         local t = type(o);
114         if t == "number" then
115                 t_insert(buffer, tostring(o));
116         elseif t == "string" then
117                 stringsave(o, buffer);
118         elseif t == "table" then
119                 tablesave(o, buffer);
120         elseif t == "boolean" then
121                 t_insert(buffer, (o and "true" or "false"));
122         else
123                 t_insert(buffer, "null");
124         end
125 end
126
127 function json.encode(obj)
128         local t = {};
129         simplesave(obj, t);
130         return t_concat(t);
131 end
132
133 -----------------------------------
134
135
136 function json.decode(json)
137         json = json.." "; -- appending a space ensures valid json wouldn't touch EOF
138         local pos = 1;
139         local current = {};
140         local stack = {};
141         local ch, peek;
142         local function next()
143                 ch = json:sub(pos, pos);
144                 if ch == "" then error("Unexpected EOF"); end
145                 pos = pos+1;
146                 peek = json:sub(pos, pos);
147                 return ch;
148         end
149         
150         local function skipwhitespace()
151                 while ch and (ch == "\r" or ch == "\n" or ch == "\t" or ch == " ") do
152                         next();
153                 end
154         end
155         local function skiplinecomment()
156                 repeat next(); until not(ch) or ch == "\r" or ch == "\n";
157                 skipwhitespace();
158         end
159         local function skipstarcomment()
160                 next(); next(); -- skip '/', '*'
161                 while peek and ch ~= "*" and peek ~= "/" do next(); end
162                 if not peek then error("eof in star comment") end
163                 next(); next(); -- skip '*', '/'
164                 skipwhitespace();
165         end
166         local function skipstuff()
167                 while true do
168                         skipwhitespace();
169                         if ch == "/" and peek == "*" then
170                                 skipstarcomment();
171                         elseif ch == "/" and peek == "*" then
172                                 skiplinecomment();
173                         else
174                                 return;
175                         end
176                 end
177         end
178         
179         local readvalue;
180         local function readarray()
181                 local t = {};
182                 next(); -- skip '['
183                 skipstuff();
184                 if ch == "]" then next(); return t; end
185                 t_insert(t, readvalue());
186                 while true do
187                         skipstuff();
188                         if ch == "]" then next(); return t; end
189                         if not ch then error("eof while reading array");
190                         elseif ch == "," then next();
191                         elseif ch then error("unexpected character in array, comma expected"); end
192                         if not ch then error("eof while reading array"); end
193                         t_insert(t, readvalue());
194                 end
195         end
196         
197         local function checkandskip(c)
198                 local x = ch or "eof";
199                 if x ~= c then error("unexpected "..x..", '"..c.."' expected"); end
200                 next();
201         end
202         local function readliteral(lit, val)
203                 for c in lit:gmatch(".") do
204                         checkandskip(c);
205                 end
206                 return val;
207         end
208         local function readstring()
209                 local s = "";
210                 checkandskip("\"");
211                 while ch do
212                         while ch and ch ~= "\\" and ch ~= "\"" do
213                                 s = s..ch; next();
214                         end
215                         if ch == "\\" then
216                                 next();
217                                 if unescapes[ch] then
218                                         s = s..unescapes[ch];
219                                         next();
220                                 elseif ch == "u" then
221                                         local seq = "";
222                                         for i=1,4 do
223                                                 next();
224                                                 if not ch then error("unexpected eof in string"); end
225                                                 if not ch:match("[0-9a-fA-F]") then error("invalid unicode escape sequence in string"); end
226                                                 seq = seq..ch;
227                                         end
228                                         s = s..s.char(tonumber(seq, 16)); -- FIXME do proper utf-8
229                                         next();
230                                 else error("invalid escape sequence in string"); end
231                         end
232                         if ch == "\"" then
233                                 next();
234                                 return s;
235                         end
236                 end
237                 error("eof while reading string");
238         end
239         local function readnumber()
240                 local s = "";
241                 if ch == "-" then
242                         s = s..ch; next();
243                         if not ch:match("[0-9]") then error("number format error"); end
244                 end
245                 if ch == "0" then
246                         s = s..ch; next();
247                         if ch:match("[0-9]") then error("number format error"); end
248                 else
249                         while ch and ch:match("[0-9]") do
250                                 s = s..ch; next();
251                         end
252                 end
253                 if ch == "." then
254                         s = s..ch; next();
255                         if not ch:match("[0-9]") then error("number format error"); end
256                         while ch and ch:match("[0-9]") do
257                                 s = s..ch; next();
258                         end
259                         if ch == "e" or ch == "E" then
260                                 s = s..ch; next();
261                                 if ch == "+" or ch == "-" then
262                                         s = s..ch; next();
263                                         if not ch:match("[0-9]") then error("number format error"); end
264                                         while ch and ch:match("[0-9]") do
265                                                 s = s..ch; next();
266                                         end
267                                 end
268                         end
269                 end
270                 return tonumber(s);
271         end
272         local function readmember(t)
273                 skipstuff();
274                 local k = readstring();
275                 skipstuff();
276                 checkandskip(":");
277                 t[k] = readvalue();
278         end
279         local function fixobject(obj)
280                 local __array = obj.__array;
281                 if __array then
282                         obj.__array = nil;
283                         for i,v in ipairs(__array) do
284                                 t_insert(obj, v);
285                         end
286                 end
287                 local __hash = obj.__hash;
288                 if __hash then
289                         obj.__hash = nil;
290                         local k;
291                         for i,v in ipairs(__hash) do
292                                 if k ~= nil then
293                                         obj[k] = v; k = nil;
294                                 else
295                                         k = v;
296                                 end
297                         end
298                 end
299                 return obj;
300         end
301         local function readobject()
302                 local t = {};
303                 next(); -- skip '{'
304                 skipstuff();
305                 if ch == "}" then next(); return t; end
306                 if not ch then error("eof while reading object"); end
307                 readmember(t);
308                 while true do
309                         skipstuff();
310                         if ch == "}" then next(); return fixobject(t); end
311                         if not ch then error("eof while reading object");
312                         elseif ch == "," then next();
313                         elseif ch then error("unexpected character in object, comma expected"); end
314                         if not ch then error("eof while reading object"); end
315                         readmember(t);
316                 end
317         end
318         
319         function readvalue()
320                 skipstuff();
321                 while ch do
322                         if ch == "{" then
323                                 return readobject();
324                         elseif ch == "[" then
325                                 return readarray();
326                         elseif ch == "\"" then
327                                 return readstring();
328                         elseif ch:match("[%-0-9%.]") then
329                                 return readnumber();
330                         elseif ch == "n" then
331                                 return readliteral("null", null);
332                         elseif ch == "t" then
333                                 return readliteral("true", true);
334                         elseif ch == "f" then
335                                 return readliteral("false", false);
336                         else
337                                 error("invalid character at value start: "..ch);
338                         end
339                 end
340                 error("eof while reading value");
341         end
342         next();
343         return readvalue();
344 end
345
346 function json.test(object)
347         local encoded = json.encode(object);
348         local decoded = json.decode(encoded);
349         local recoded = json.encode(decoded);
350         if encoded ~= recoded then
351                 print("FAILED");
352                 print("encoded:", encoded);
353                 print("recoded:", recoded);
354         else
355                 print(encoded);
356         end
357         return encoded == recoded;
358 end
359
360 return json;