util.json: Make setmetatable local.
[prosody.git] / util / json.lua
1 -- Prosody IM
2 -- Copyright (C) 2008-2010 Matthew Wild
3 -- Copyright (C) 2008-2010 Waqas Hussain
4 --
5 -- This project is MIT/X11 licensed. Please see the
6 -- COPYING file in the source package for more information.
7 --
8
9 local type = type;
10 local t_insert, t_concat, t_remove, t_sort = table.insert, table.concat, table.remove, table.sort;
11 local s_char = string.char;
12 local tostring, tonumber = tostring, tonumber;
13 local pairs, ipairs = pairs, ipairs;
14 local next = next;
15 local error = error;
16 local newproxy, getmetatable, setmetatable = newproxy, getmetatable, setmetatable;
17 local print = print;
18
19 local has_array, array = pcall(require, "util.array");
20 local array_mt = has_array and getmetatable(array()) or {};
21
22 --module("json")
23 local json = {};
24
25 local null = newproxy and newproxy(true) or {};
26 if getmetatable and getmetatable(null) then
27         getmetatable(null).__tostring = function() return "null"; end;
28 end
29 json.null = null;
30
31 local escapes = {
32         ["\""] = "\\\"", ["\\"] = "\\\\", ["\b"] = "\\b",
33         ["\f"] = "\\f", ["\n"] = "\\n", ["\r"] = "\\r", ["\t"] = "\\t"};
34 local unescapes = {
35         ["\""] = "\"", ["\\"] = "\\", ["/"] = "/",
36         b = "\b", f = "\f", n = "\n", r = "\r", t = "\t"};
37 for i=0,31 do
38         local ch = s_char(i);
39         if not escapes[ch] then escapes[ch] = ("\\u%.4X"):format(i); end
40 end
41
42 local function codepoint_to_utf8(code)
43         if code < 0x80 then return s_char(code); end
44         local bits0_6 = code % 64;
45         if code < 0x800 then
46                 local bits6_5 = (code - bits0_6) / 64;
47                 return s_char(0x80 + 0x40 + bits6_5, 0x80 + bits0_6);
48         end
49         local bits0_12 = code % 4096;
50         local bits6_6 = (bits0_12 - bits0_6) / 64;
51         local bits12_4 = (code - bits0_12) / 4096;
52         return s_char(0x80 + 0x40 + 0x20 + bits12_4, 0x80 + bits6_6, 0x80 + bits0_6);
53 end
54
55 local valid_types = {
56         number  = true,
57         string  = true,
58         table   = true,
59         boolean = true
60 };
61 local special_keys = {
62         __array = true;
63         __hash  = true;
64 };
65
66 local simplesave, tablesave, arraysave, stringsave;
67
68 function stringsave(o, buffer)
69         -- FIXME do proper utf-8 and binary data detection
70         t_insert(buffer, "\""..(o:gsub(".", escapes)).."\"");
71 end
72
73 function arraysave(o, buffer)
74         t_insert(buffer, "[");
75         if next(o) then
76                 for i,v in ipairs(o) do
77                         simplesave(v, buffer);
78                         t_insert(buffer, ",");
79                 end
80                 t_remove(buffer);
81         end
82         t_insert(buffer, "]");
83 end
84
85 function tablesave(o, buffer)
86         local __array = {};
87         local __hash = {};
88         local hash = {};
89         for i,v in ipairs(o) do
90                 __array[i] = v;
91         end
92         for k,v in pairs(o) do
93                 local ktype, vtype = type(k), type(v);
94                 if valid_types[vtype] or v == null then
95                         if ktype == "string" and not special_keys[k] then
96                                 hash[k] = v;
97                         elseif (valid_types[ktype] or k == null) and __array[k] == nil then
98                                 __hash[k] = v;
99                         end
100                 end
101         end
102         if next(__hash) ~= nil or next(hash) ~= nil or next(__array) == nil then
103                 t_insert(buffer, "{");
104                 local mark = #buffer;
105                 if buffer.ordered then
106                         local keys = {};
107                         for k in pairs(hash) do
108                                 t_insert(keys, k);
109                         end
110                         t_sort(keys);
111                         for _,k in ipairs(keys) do
112                                 stringsave(k, buffer);
113                                 t_insert(buffer, ":");
114                                 simplesave(hash[k], buffer);
115                                 t_insert(buffer, ",");
116                         end
117                 else
118                         for k,v in pairs(hash) do
119                                 stringsave(k, buffer);
120                                 t_insert(buffer, ":");
121                                 simplesave(v, buffer);
122                                 t_insert(buffer, ",");
123                         end
124                 end
125                 if next(__hash) ~= nil then
126                         t_insert(buffer, "\"__hash\":[");
127                         for k,v in pairs(__hash) do
128                                 simplesave(k, buffer);
129                                 t_insert(buffer, ",");
130                                 simplesave(v, buffer);
131                                 t_insert(buffer, ",");
132                         end
133                         t_remove(buffer);
134                         t_insert(buffer, "]");
135                         t_insert(buffer, ",");
136                 end
137                 if next(__array) then
138                         t_insert(buffer, "\"__array\":");
139                         arraysave(__array, buffer);
140                         t_insert(buffer, ",");
141                 end
142                 if mark ~= #buffer then t_remove(buffer); end
143                 t_insert(buffer, "}");
144         else
145                 arraysave(__array, buffer);
146         end
147 end
148
149 function simplesave(o, buffer)
150         local t = type(o);
151         if t == "number" then
152                 t_insert(buffer, tostring(o));
153         elseif t == "string" then
154                 stringsave(o, buffer);
155         elseif t == "table" then
156                 local mt = getmetatable(o);
157                 if mt == array_mt then
158                         arraysave(o, buffer);
159                 else
160                         tablesave(o, buffer);
161                 end
162         elseif t == "boolean" then
163                 t_insert(buffer, (o and "true" or "false"));
164         else
165                 t_insert(buffer, "null");
166         end
167 end
168
169 function json.encode(obj)
170         local t = {};
171         simplesave(obj, t);
172         return t_concat(t);
173 end
174 function json.encode_ordered(obj)
175         local t = { ordered = true };
176         simplesave(obj, t);
177         return t_concat(t);
178 end
179 function json.encode_array(obj)
180         local t = {};
181         arraysave(obj, t);
182         return t_concat(t);
183 end
184
185 -----------------------------------
186
187
188 function json.decode(json)
189         json = json.." "; -- appending a space ensures valid json wouldn't touch EOF
190         local pos = 1;
191         local current = {};
192         local stack = {};
193         local ch, peek;
194         local function next()
195                 ch = json:sub(pos, pos);
196                 if ch == "" then error("Unexpected EOF"); end
197                 pos = pos+1;
198                 peek = json:sub(pos, pos);
199                 return ch;
200         end
201         
202         local function skipwhitespace()
203                 while ch and (ch == "\r" or ch == "\n" or ch == "\t" or ch == " ") do
204                         next();
205                 end
206         end
207         local function skiplinecomment()
208                 repeat next(); until not(ch) or ch == "\r" or ch == "\n";
209                 skipwhitespace();
210         end
211         local function skipstarcomment()
212                 next(); next(); -- skip '/', '*'
213                 while peek and ch ~= "*" and peek ~= "/" do next(); end
214                 if not peek then error("eof in star comment") end
215                 next(); next(); -- skip '*', '/'
216                 skipwhitespace();
217         end
218         local function skipstuff()
219                 while true do
220                         skipwhitespace();
221                         if ch == "/" and peek == "*" then
222                                 skipstarcomment();
223                         elseif ch == "/" and peek == "/" then
224                                 skiplinecomment();
225                         else
226                                 return;
227                         end
228                 end
229         end
230         
231         local readvalue;
232         local function readarray()
233                 local t = setmetatable({}, array_mt);
234                 next(); -- skip '['
235                 skipstuff();
236                 if ch == "]" then next(); return t; end
237                 t_insert(t, readvalue());
238                 while true do
239                         skipstuff();
240                         if ch == "]" then next(); return t; end
241                         if not ch then error("eof while reading array");
242                         elseif ch == "," then next();
243                         elseif ch then error("unexpected character in array, comma expected"); end
244                         if not ch then error("eof while reading array"); end
245                         t_insert(t, readvalue());
246                 end
247         end
248         
249         local function checkandskip(c)
250                 local x = ch or "eof";
251                 if x ~= c then error("unexpected "..x..", '"..c.."' expected"); end
252                 next();
253         end
254         local function readliteral(lit, val)
255                 for c in lit:gmatch(".") do
256                         checkandskip(c);
257                 end
258                 return val;
259         end
260         local function readstring()
261                 local s = {};
262                 checkandskip("\"");
263                 while ch do
264                         while ch and ch ~= "\\" and ch ~= "\"" do
265                                 t_insert(s, ch); next();
266                         end
267                         if ch == "\\" then
268                                 next();
269                                 if unescapes[ch] then
270                                         t_insert(s, unescapes[ch]);
271                                         next();
272                                 elseif ch == "u" then
273                                         local seq = "";
274                                         for i=1,4 do
275                                                 next();
276                                                 if not ch then error("unexpected eof in string"); end
277                                                 if not ch:match("[0-9a-fA-F]") then error("invalid unicode escape sequence in string"); end
278                                                 seq = seq..ch;
279                                         end
280                                         t_insert(s, codepoint_to_utf8(tonumber(seq, 16)));
281                                         next();
282                                 else error("invalid escape sequence in string"); end
283                         end
284                         if ch == "\"" then
285                                 next();
286                                 return t_concat(s);
287                         end
288                 end
289                 error("eof while reading string");
290         end
291         local function readnumber()
292                 local s = "";
293                 if ch == "-" then
294                         s = s..ch; next();
295                         if not ch:match("[0-9]") then error("number format error"); end
296                 end
297                 if ch == "0" then
298                         s = s..ch; next();
299                         if ch:match("[0-9]") then error("number format error"); end
300                 else
301                         while ch and ch:match("[0-9]") do
302                                 s = s..ch; next();
303                         end
304                 end
305                 if ch == "." then
306                         s = s..ch; next();
307                         if not ch:match("[0-9]") then error("number format error"); end
308                         while ch and ch:match("[0-9]") do
309                                 s = s..ch; next();
310                         end
311                         if ch == "e" or ch == "E" then
312                                 s = s..ch; next();
313                                 if ch == "+" or ch == "-" then
314                                         s = s..ch; next();
315                                         if not ch:match("[0-9]") then error("number format error"); end
316                                         while ch and ch:match("[0-9]") do
317                                                 s = s..ch; next();
318                                         end
319                                 end
320                         end
321                 end
322                 return tonumber(s);
323         end
324         local function readmember(t)
325                 skipstuff();
326                 local k = readstring();
327                 skipstuff();
328                 checkandskip(":");
329                 t[k] = readvalue();
330         end
331         local function fixobject(obj)
332                 local __array = obj.__array;
333                 if __array then
334                         obj.__array = nil;
335                         for i,v in ipairs(__array) do
336                                 t_insert(obj, v);
337                         end
338                 end
339                 local __hash = obj.__hash;
340                 if __hash then
341                         obj.__hash = nil;
342                         local k;
343                         for i,v in ipairs(__hash) do
344                                 if k ~= nil then
345                                         obj[k] = v; k = nil;
346                                 else
347                                         k = v;
348                                 end
349                         end
350                 end
351                 return obj;
352         end
353         local function readobject()
354                 local t = {};
355                 next(); -- skip '{'
356                 skipstuff();
357                 if ch == "}" then next(); return t; end
358                 if not ch then error("eof while reading object"); end
359                 readmember(t);
360                 while true do
361                         skipstuff();
362                         if ch == "}" then next(); return fixobject(t); end
363                         if not ch then error("eof while reading object");
364                         elseif ch == "," then next();
365                         elseif ch then error("unexpected character in object, comma expected"); end
366                         if not ch then error("eof while reading object"); end
367                         readmember(t);
368                 end
369         end
370         
371         function readvalue()
372                 skipstuff();
373                 while ch do
374                         if ch == "{" then
375                                 return readobject();
376                         elseif ch == "[" then
377                                 return readarray();
378                         elseif ch == "\"" then
379                                 return readstring();
380                         elseif ch:match("[%-0-9%.]") then
381                                 return readnumber();
382                         elseif ch == "n" then
383                                 return readliteral("null", null);
384                         elseif ch == "t" then
385                                 return readliteral("true", true);
386                         elseif ch == "f" then
387                                 return readliteral("false", false);
388                         else
389                                 error("invalid character at value start: "..ch);
390                         end
391                 end
392                 error("eof while reading value");
393         end
394         next();
395         return readvalue();
396 end
397
398 function json.test(object)
399         local encoded = json.encode(object);
400         local decoded = json.decode(encoded);
401         local recoded = json.encode(decoded);
402         if encoded ~= recoded then
403                 print("FAILED");
404                 print("encoded:", encoded);
405                 print("recoded:", recoded);
406         else
407                 print(encoded);
408         end
409         return encoded == recoded;
410 end
411
412 return json;