Merge 0.9->trunk
[prosody.git] / util / json.lua
1 -- Prosody IM
2 -- Copyright (C) 2008-2010 Matthew Wild
3 -- Copyright (C) 2008-2010 Waqas Hussain
4 --
5 -- utf8char copyright (C) 2007 Rici Lake
6 --
7 -- This project is MIT/X11 licensed. Please see the
8 -- COPYING file in the source package for more information.
9 --
10
11 local type = type;
12 local t_insert, t_concat, t_remove, t_sort = table.insert, table.concat, table.remove, table.sort;
13 local s_char = string.char;
14 local tostring, tonumber = tostring, tonumber;
15 local pairs, ipairs = pairs, ipairs;
16 local next = next;
17 local error = error;
18 local newproxy, getmetatable = newproxy, getmetatable;
19 local print = print;
20
21 --module("json")
22 local json = {};
23
24 local null = newproxy and newproxy(true) or {};
25 if getmetatable and getmetatable(null) then
26         getmetatable(null).__tostring = function() return "null"; end;
27 end
28 json.null = null;
29
30 local escapes = {
31         ["\""] = "\\\"", ["\\"] = "\\\\", ["\b"] = "\\b",
32         ["\f"] = "\\f", ["\n"] = "\\n", ["\r"] = "\\r", ["\t"] = "\\t"};
33 local unescapes = {
34         ["\""] = "\"", ["\\"] = "\\", ["/"] = "/",
35         b = "\b", f = "\f", n = "\n", r = "\r", t = "\t"};
36 for i=0,31 do
37         local ch = s_char(i);
38         if not escapes[ch] then escapes[ch] = ("\\u%.4X"):format(i); end
39 end
40
41 local function utf8char(i)
42         if i >= 0 then
43                 i = i - i%1
44                 if i < 128 then
45                         return s_char(i)
46                 else
47                         local c1 = i % 64
48                         i = (i - c1) / 64
49                         if i < 32 then
50                                 return s_char(0xC0+i, 0x80+c1)
51                         else
52                                 local c2 = i % 64
53                                 i = (i - c2) / 64
54                                 if i < 16 and (i ~= 13 or c2 < 32) then
55                                         return s_char(0xE0+i, 0x80+c2, 0x80+c1)
56                                 elseif i >= 16 and i < 0x110 then
57                                         local c3 = i % 64
58                                         i = (i - c3) / 64
59                                         return s_char(0xF0+i, 0x80+c3, 0x80+c2, 0x80+c1)
60                                 end
61                         end
62                 end
63         end
64 end
65
66
67 local valid_types = {
68         number  = true,
69         string  = true,
70         table   = true,
71         boolean = true
72 };
73 local special_keys = {
74         __array = true;
75         __hash  = true;
76 };
77
78 local simplesave, tablesave, arraysave, stringsave;
79
80 function stringsave(o, buffer)
81         -- FIXME do proper utf-8 and binary data detection
82         t_insert(buffer, "\""..(o:gsub(".", escapes)).."\"");
83 end
84
85 function arraysave(o, buffer)
86         t_insert(buffer, "[");
87         if next(o) then
88                 for i,v in ipairs(o) do
89                         simplesave(v, buffer);
90                         t_insert(buffer, ",");
91                 end
92                 t_remove(buffer);
93         end
94         t_insert(buffer, "]");
95 end
96
97 function tablesave(o, buffer)
98         local __array = {};
99         local __hash = {};
100         local hash = {};
101         for i,v in ipairs(o) do
102                 __array[i] = v;
103         end
104         for k,v in pairs(o) do
105                 local ktype, vtype = type(k), type(v);
106                 if valid_types[vtype] or v == null then
107                         if ktype == "string" and not special_keys[k] then
108                                 hash[k] = v;
109                         elseif (valid_types[ktype] or k == null) and __array[k] == nil then
110                                 __hash[k] = v;
111                         end
112                 end
113         end
114         if next(__hash) ~= nil or next(hash) ~= nil or next(__array) == nil then
115                 t_insert(buffer, "{");
116                 local mark = #buffer;
117                 if buffer.ordered then
118                         local keys = {};
119                         for k in pairs(hash) do
120                                 t_insert(keys, k);
121                         end
122                         t_sort(keys);
123                         for _,k in ipairs(keys) do
124                                 stringsave(k, buffer);
125                                 t_insert(buffer, ":");
126                                 simplesave(hash[k], buffer);
127                                 t_insert(buffer, ",");
128                         end
129                 else
130                         for k,v in pairs(hash) do
131                                 stringsave(k, buffer);
132                                 t_insert(buffer, ":");
133                                 simplesave(v, buffer);
134                                 t_insert(buffer, ",");
135                         end
136                 end
137                 if next(__hash) ~= nil then
138                         t_insert(buffer, "\"__hash\":[");
139                         for k,v in pairs(__hash) do
140                                 simplesave(k, buffer);
141                                 t_insert(buffer, ",");
142                                 simplesave(v, buffer);
143                                 t_insert(buffer, ",");
144                         end
145                         t_remove(buffer);
146                         t_insert(buffer, "]");
147                         t_insert(buffer, ",");
148                 end
149                 if next(__array) then
150                         t_insert(buffer, "\"__array\":");
151                         arraysave(__array, buffer);
152                         t_insert(buffer, ",");
153                 end
154                 if mark ~= #buffer then t_remove(buffer); end
155                 t_insert(buffer, "}");
156         else
157                 arraysave(__array, buffer);
158         end
159 end
160
161 function simplesave(o, buffer)
162         local t = type(o);
163         if t == "number" then
164                 t_insert(buffer, tostring(o));
165         elseif t == "string" then
166                 stringsave(o, buffer);
167         elseif t == "table" then
168                 tablesave(o, buffer);
169         elseif t == "boolean" then
170                 t_insert(buffer, (o and "true" or "false"));
171         else
172                 t_insert(buffer, "null");
173         end
174 end
175
176 function json.encode(obj)
177         local t = {};
178         simplesave(obj, t);
179         return t_concat(t);
180 end
181 function json.encode_ordered(obj)
182         local t = { ordered = true };
183         simplesave(obj, t);
184         return t_concat(t);
185 end
186 function json.encode_array(obj)
187         local t = {};
188         arraysave(obj, t);
189         return t_concat(t);
190 end
191
192 -----------------------------------
193
194
195 function json.decode(json)
196         json = json.." "; -- appending a space ensures valid json wouldn't touch EOF
197         local pos = 1;
198         local current = {};
199         local stack = {};
200         local ch, peek;
201         local function next()
202                 ch = json:sub(pos, pos);
203                 if ch == "" then error("Unexpected EOF"); end
204                 pos = pos+1;
205                 peek = json:sub(pos, pos);
206                 return ch;
207         end
208         
209         local function skipwhitespace()
210                 while ch and (ch == "\r" or ch == "\n" or ch == "\t" or ch == " ") do
211                         next();
212                 end
213         end
214         local function skiplinecomment()
215                 repeat next(); until not(ch) or ch == "\r" or ch == "\n";
216                 skipwhitespace();
217         end
218         local function skipstarcomment()
219                 next(); next(); -- skip '/', '*'
220                 while peek and ch ~= "*" and peek ~= "/" do next(); end
221                 if not peek then error("eof in star comment") end
222                 next(); next(); -- skip '*', '/'
223                 skipwhitespace();
224         end
225         local function skipstuff()
226                 while true do
227                         skipwhitespace();
228                         if ch == "/" and peek == "*" then
229                                 skipstarcomment();
230                         elseif ch == "/" and peek == "/" then
231                                 skiplinecomment();
232                         else
233                                 return;
234                         end
235                 end
236         end
237         
238         local readvalue;
239         local function readarray()
240                 local t = {};
241                 next(); -- skip '['
242                 skipstuff();
243                 if ch == "]" then next(); return t; end
244                 t_insert(t, readvalue());
245                 while true do
246                         skipstuff();
247                         if ch == "]" then next(); return t; end
248                         if not ch then error("eof while reading array");
249                         elseif ch == "," then next();
250                         elseif ch then error("unexpected character in array, comma expected"); end
251                         if not ch then error("eof while reading array"); end
252                         t_insert(t, readvalue());
253                 end
254         end
255         
256         local function checkandskip(c)
257                 local x = ch or "eof";
258                 if x ~= c then error("unexpected "..x..", '"..c.."' expected"); end
259                 next();
260         end
261         local function readliteral(lit, val)
262                 for c in lit:gmatch(".") do
263                         checkandskip(c);
264                 end
265                 return val;
266         end
267         local function readstring()
268                 local s = "";
269                 checkandskip("\"");
270                 while ch do
271                         while ch and ch ~= "\\" and ch ~= "\"" do
272                                 s = s..ch; next();
273                         end
274                         if ch == "\\" then
275                                 next();
276                                 if unescapes[ch] then
277                                         s = s..unescapes[ch];
278                                         next();
279                                 elseif ch == "u" then
280                                         local seq = "";
281                                         for i=1,4 do
282                                                 next();
283                                                 if not ch then error("unexpected eof in string"); end
284                                                 if not ch:match("[0-9a-fA-F]") then error("invalid unicode escape sequence in string"); end
285                                                 seq = seq..ch;
286                                         end
287                                         s = s..utf8char(tonumber(seq, 16));
288                                         next();
289                                 else error("invalid escape sequence in string"); end
290                         end
291                         if ch == "\"" then
292                                 next();
293                                 return s;
294                         end
295                 end
296                 error("eof while reading string");
297         end
298         local function readnumber()
299                 local s = "";
300                 if ch == "-" then
301                         s = s..ch; next();
302                         if not ch:match("[0-9]") then error("number format error"); end
303                 end
304                 if ch == "0" then
305                         s = s..ch; next();
306                         if ch:match("[0-9]") then error("number format error"); end
307                 else
308                         while ch and ch:match("[0-9]") do
309                                 s = s..ch; next();
310                         end
311                 end
312                 if ch == "." then
313                         s = s..ch; next();
314                         if not ch:match("[0-9]") then error("number format error"); end
315                         while ch and ch:match("[0-9]") do
316                                 s = s..ch; next();
317                         end
318                         if ch == "e" or ch == "E" then
319                                 s = s..ch; next();
320                                 if ch == "+" or ch == "-" then
321                                         s = s..ch; next();
322                                         if not ch:match("[0-9]") then error("number format error"); end
323                                         while ch and ch:match("[0-9]") do
324                                                 s = s..ch; next();
325                                         end
326                                 end
327                         end
328                 end
329                 return tonumber(s);
330         end
331         local function readmember(t)
332                 skipstuff();
333                 local k = readstring();
334                 skipstuff();
335                 checkandskip(":");
336                 t[k] = readvalue();
337         end
338         local function fixobject(obj)
339                 local __array = obj.__array;
340                 if __array then
341                         obj.__array = nil;
342                         for i,v in ipairs(__array) do
343                                 t_insert(obj, v);
344                         end
345                 end
346                 local __hash = obj.__hash;
347                 if __hash then
348                         obj.__hash = nil;
349                         local k;
350                         for i,v in ipairs(__hash) do
351                                 if k ~= nil then
352                                         obj[k] = v; k = nil;
353                                 else
354                                         k = v;
355                                 end
356                         end
357                 end
358                 return obj;
359         end
360         local function readobject()
361                 local t = {};
362                 next(); -- skip '{'
363                 skipstuff();
364                 if ch == "}" then next(); return t; end
365                 if not ch then error("eof while reading object"); end
366                 readmember(t);
367                 while true do
368                         skipstuff();
369                         if ch == "}" then next(); return fixobject(t); end
370                         if not ch then error("eof while reading object");
371                         elseif ch == "," then next();
372                         elseif ch then error("unexpected character in object, comma expected"); end
373                         if not ch then error("eof while reading object"); end
374                         readmember(t);
375                 end
376         end
377         
378         function readvalue()
379                 skipstuff();
380                 while ch do
381                         if ch == "{" then
382                                 return readobject();
383                         elseif ch == "[" then
384                                 return readarray();
385                         elseif ch == "\"" then
386                                 return readstring();
387                         elseif ch:match("[%-0-9%.]") then
388                                 return readnumber();
389                         elseif ch == "n" then
390                                 return readliteral("null", null);
391                         elseif ch == "t" then
392                                 return readliteral("true", true);
393                         elseif ch == "f" then
394                                 return readliteral("false", false);
395                         else
396                                 error("invalid character at value start: "..ch);
397                         end
398                 end
399                 error("eof while reading value");
400         end
401         next();
402         return readvalue();
403 end
404
405 function json.test(object)
406         local encoded = json.encode(object);
407         local decoded = json.decode(encoded);
408         local recoded = json.encode(decoded);
409         if encoded ~= recoded then
410                 print("FAILED");
411                 print("encoded:", encoded);
412                 print("recoded:", recoded);
413         else
414                 print(encoded);
415         end
416         return encoded == recoded;
417 end
418
419 return json;