Merge 0.10->trunk
[prosody.git] / util / xmppstream.lua
1 -- Prosody IM
2 -- Copyright (C) 2008-2010 Matthew Wild
3 -- Copyright (C) 2008-2010 Waqas Hussain
4 --
5 -- This project is MIT/X11 licensed. Please see the
6 -- COPYING file in the source package for more information.
7 --
8
9 local lxp = require "lxp";
10 local st = require "util.stanza";
11 local stanza_mt = st.stanza_mt;
12
13 local error = error;
14 local tostring = tostring;
15 local t_insert = table.insert;
16 local t_concat = table.concat;
17 local t_remove = table.remove;
18 local setmetatable = setmetatable;
19
20 -- COMPAT: w/LuaExpat 1.1.0
21 local lxp_supports_doctype = pcall(lxp.new, { StartDoctypeDecl = false });
22 local lxp_supports_xmldecl = pcall(lxp.new, { XmlDecl = false });
23 local lxp_supports_bytecount = not not lxp.new({}).getcurrentbytecount;
24
25 local default_stanza_size_limit = 1024*1024*10; -- 10MB
26
27 local _ENV = nil;
28
29 local new_parser = lxp.new;
30
31 local xml_namespace = {
32         ["http://www.w3.org/XML/1998/namespace\1lang"] = "xml:lang";
33         ["http://www.w3.org/XML/1998/namespace\1space"] = "xml:space";
34         ["http://www.w3.org/XML/1998/namespace\1base"] = "xml:base";
35         ["http://www.w3.org/XML/1998/namespace\1id"] = "xml:id";
36 };
37
38 local xmlns_streams = "http://etherx.jabber.org/streams";
39
40 local ns_separator = "\1";
41 local ns_pattern = "^([^"..ns_separator.."]*)"..ns_separator.."?(.*)$";
42
43 local function dummy_cb() end
44
45 local function new_sax_handlers(session, stream_callbacks, cb_handleprogress)
46         local xml_handlers = {};
47
48         local cb_streamopened = stream_callbacks.streamopened;
49         local cb_streamclosed = stream_callbacks.streamclosed;
50         local cb_error = stream_callbacks.error or function(session, e, stanza) error("XML stream error: "..tostring(e)..(stanza and ": "..tostring(stanza) or ""),2); end;
51         local cb_handlestanza = stream_callbacks.handlestanza;
52         cb_handleprogress = cb_handleprogress or dummy_cb;
53
54         local stream_ns = stream_callbacks.stream_ns or xmlns_streams;
55         local stream_tag = stream_callbacks.stream_tag or "stream";
56         if stream_ns ~= "" then
57                 stream_tag = stream_ns..ns_separator..stream_tag;
58         end
59         local stream_error_tag = stream_ns..ns_separator..(stream_callbacks.error_tag or "error");
60
61         local stream_default_ns = stream_callbacks.default_ns;
62
63         local stack = {};
64         local chardata, stanza = {};
65         local stanza_size = 0;
66         local non_streamns_depth = 0;
67         function xml_handlers:StartElement(tagname, attr)
68                 if stanza and #chardata > 0 then
69                         -- We have some character data in the buffer
70                         t_insert(stanza, t_concat(chardata));
71                         chardata = {};
72                 end
73                 local curr_ns,name = tagname:match(ns_pattern);
74                 if name == "" then
75                         curr_ns, name = "", curr_ns;
76                 end
77
78                 if curr_ns ~= stream_default_ns or non_streamns_depth > 0 then
79                         attr.xmlns = curr_ns;
80                         non_streamns_depth = non_streamns_depth + 1;
81                 end
82
83                 for i=1,#attr do
84                         local k = attr[i];
85                         attr[i] = nil;
86                         local xmlk = xml_namespace[k];
87                         if xmlk then
88                                 attr[xmlk] = attr[k];
89                                 attr[k] = nil;
90                         end
91                 end
92
93                 if not stanza then --if we are not currently inside a stanza
94                         if lxp_supports_bytecount then
95                                 stanza_size = self:getcurrentbytecount();
96                         end
97                         if session.notopen then
98                                 if tagname == stream_tag then
99                                         non_streamns_depth = 0;
100                                         if cb_streamopened then
101                                                 if lxp_supports_bytecount then
102                                                         cb_handleprogress(stanza_size);
103                                                         stanza_size = 0;
104                                                 end
105                                                 cb_streamopened(session, attr);
106                                         end
107                                 else
108                                         -- Garbage before stream?
109                                         cb_error(session, "no-stream", tagname);
110                                 end
111                                 return;
112                         end
113                         if curr_ns == "jabber:client" and name ~= "iq" and name ~= "presence" and name ~= "message" then
114                                 cb_error(session, "invalid-top-level-element");
115                         end
116
117                         stanza = setmetatable({ name = name, attr = attr, tags = {} }, stanza_mt);
118                 else -- we are inside a stanza, so add a tag
119                         if lxp_supports_bytecount then
120                                 stanza_size = stanza_size + self:getcurrentbytecount();
121                         end
122                         t_insert(stack, stanza);
123                         local oldstanza = stanza;
124                         stanza = setmetatable({ name = name, attr = attr, tags = {} }, stanza_mt);
125                         t_insert(oldstanza, stanza);
126                         t_insert(oldstanza.tags, stanza);
127                 end
128         end
129         if lxp_supports_xmldecl then
130                 function xml_handlers:XmlDecl(version, encoding, standalone)
131                         if lxp_supports_bytecount then
132                                 cb_handleprogress(self:getcurrentbytecount());
133                         end
134                 end
135         end
136         function xml_handlers:StartCdataSection()
137                 if lxp_supports_bytecount then
138                         if stanza then
139                                 stanza_size = stanza_size + self:getcurrentbytecount();
140                         else
141                                 cb_handleprogress(self:getcurrentbytecount());
142                         end
143                 end
144         end
145         function xml_handlers:EndCdataSection()
146                 if lxp_supports_bytecount then
147                         if stanza then
148                                 stanza_size = stanza_size + self:getcurrentbytecount();
149                         else
150                                 cb_handleprogress(self:getcurrentbytecount());
151                         end
152                 end
153         end
154         function xml_handlers:CharacterData(data)
155                 if stanza then
156                         if lxp_supports_bytecount then
157                                 stanza_size = stanza_size + self:getcurrentbytecount();
158                         end
159                         t_insert(chardata, data);
160                 elseif lxp_supports_bytecount then
161                         cb_handleprogress(self:getcurrentbytecount());
162                 end
163         end
164         function xml_handlers:EndElement(tagname)
165                 if lxp_supports_bytecount then
166                         stanza_size = stanza_size + self:getcurrentbytecount()
167                 end
168                 if non_streamns_depth > 0 then
169                         non_streamns_depth = non_streamns_depth - 1;
170                 end
171                 if stanza then
172                         if #chardata > 0 then
173                                 -- We have some character data in the buffer
174                                 t_insert(stanza, t_concat(chardata));
175                                 chardata = {};
176                         end
177                         -- Complete stanza
178                         if #stack == 0 then
179                                 if lxp_supports_bytecount then
180                                         cb_handleprogress(stanza_size);
181                                 end
182                                 stanza_size = 0;
183                                 if tagname ~= stream_error_tag then
184                                         cb_handlestanza(session, stanza);
185                                 else
186                                         cb_error(session, "stream-error", stanza);
187                                 end
188                                 stanza = nil;
189                         else
190                                 stanza = t_remove(stack);
191                         end
192                 else
193                         if cb_streamclosed then
194                                 cb_streamclosed(session);
195                         end
196                 end
197         end
198
199         local function restricted_handler(parser)
200                 cb_error(session, "parse-error", "restricted-xml", "Restricted XML, see RFC 6120 section 11.1.");
201                 if not parser.stop or not parser:stop() then
202                         error("Failed to abort parsing");
203                 end
204         end
205
206         if lxp_supports_doctype then
207                 xml_handlers.StartDoctypeDecl = restricted_handler;
208         end
209         xml_handlers.Comment = restricted_handler;
210         xml_handlers.ProcessingInstruction = restricted_handler;
211
212         local function reset()
213                 stanza, chardata, stanza_size = nil, {}, 0;
214                 stack = {};
215         end
216
217         local function set_session(stream, new_session)
218                 session = new_session;
219         end
220
221         return xml_handlers, { reset = reset, set_session = set_session };
222 end
223
224 local function new(session, stream_callbacks, stanza_size_limit)
225         -- Used to track parser progress (e.g. to enforce size limits)
226         local n_outstanding_bytes = 0;
227         local handle_progress;
228         if lxp_supports_bytecount then
229                 function handle_progress(n_parsed_bytes)
230                         n_outstanding_bytes = n_outstanding_bytes - n_parsed_bytes;
231                 end
232                 stanza_size_limit = stanza_size_limit or default_stanza_size_limit;
233         elseif stanza_size_limit then
234                 error("Stanza size limits are not supported on this version of LuaExpat")
235         end
236
237         local handlers, meta = new_sax_handlers(session, stream_callbacks, handle_progress);
238         local parser = new_parser(handlers, ns_separator, false);
239         local parse = parser.parse;
240
241         function session.open_stream(session, from, to)
242                 local send = session.sends2s or session.send;
243
244                 local attr = {
245                         ["xmlns:stream"] = "http://etherx.jabber.org/streams",
246                         ["xml:lang"] = "en",
247                         xmlns = stream_callbacks.default_ns,
248                         version = session.version and (session.version > 0 and "1.0" or nil),
249                         id = session.streamid,
250                         from = from or session.host, to = to,
251                 };
252                 if session.stream_attrs then
253                         session:stream_attrs(from, to, attr)
254                 end
255                 send("<?xml version='1.0'?>");
256                 send(st.stanza("stream:stream", attr):top_tag());
257                 return true;
258         end
259
260         return {
261                 reset = function ()
262                         parser = new_parser(handlers, ns_separator, false);
263                         parse = parser.parse;
264                         n_outstanding_bytes = 0;
265                         meta.reset();
266                 end,
267                 feed = function (self, data)
268                         if lxp_supports_bytecount then
269                                 n_outstanding_bytes = n_outstanding_bytes + #data;
270                         end
271                         local ok, err = parse(parser, data);
272                         if lxp_supports_bytecount and n_outstanding_bytes > stanza_size_limit then
273                                 return nil, "stanza-too-large";
274                         end
275                         return ok, err;
276                 end,
277                 set_session = meta.set_session;
278         };
279 end
280
281 return {
282         ns_separator = ns_separator;
283         ns_pattern = ns_pattern;
284         new_sax_handlers = new_sax_handlers;
285         new = new;
286 };