2 -- Copyright (C) 2008-2010 Matthew Wild
3 -- Copyright (C) 2008-2010 Waqas Hussain
5 -- This project is MIT/X11 licensed. Please see the
6 -- COPYING file in the source package for more information.
9 local lxp = require "lxp";
10 local st = require "util.stanza";
11 local stanza_mt = st.stanza_mt;
14 local tostring = tostring;
15 local t_insert = table.insert;
16 local t_concat = table.concat;
17 local t_remove = table.remove;
18 local setmetatable = setmetatable;
20 -- COMPAT: w/LuaExpat 1.1.0
21 local lxp_supports_doctype = pcall(lxp.new, { StartDoctypeDecl = false });
22 local lxp_supports_xmldecl = pcall(lxp.new, { XmlDecl = false });
23 local lxp_supports_bytecount = not not lxp.new({}).getcurrentbytecount;
25 local default_stanza_size_limit = 1024*1024*10; -- 10MB
29 local new_parser = lxp.new;
31 local xml_namespace = {
32 ["http://www.w3.org/XML/1998/namespace\1lang"] = "xml:lang";
33 ["http://www.w3.org/XML/1998/namespace\1space"] = "xml:space";
34 ["http://www.w3.org/XML/1998/namespace\1base"] = "xml:base";
35 ["http://www.w3.org/XML/1998/namespace\1id"] = "xml:id";
38 local xmlns_streams = "http://etherx.jabber.org/streams";
40 local ns_separator = "\1";
41 local ns_pattern = "^([^"..ns_separator.."]*)"..ns_separator.."?(.*)$";
43 local function dummy_cb() end
45 local function new_sax_handlers(session, stream_callbacks, cb_handleprogress)
46 local xml_handlers = {};
48 local cb_streamopened = stream_callbacks.streamopened;
49 local cb_streamclosed = stream_callbacks.streamclosed;
50 local cb_error = stream_callbacks.error or function(session, e, stanza) error("XML stream error: "..tostring(e)..(stanza and ": "..tostring(stanza) or ""),2); end;
51 local cb_handlestanza = stream_callbacks.handlestanza;
52 cb_handleprogress = cb_handleprogress or dummy_cb;
54 local stream_ns = stream_callbacks.stream_ns or xmlns_streams;
55 local stream_tag = stream_callbacks.stream_tag or "stream";
56 if stream_ns ~= "" then
57 stream_tag = stream_ns..ns_separator..stream_tag;
59 local stream_error_tag = stream_ns..ns_separator..(stream_callbacks.error_tag or "error");
61 local stream_default_ns = stream_callbacks.default_ns;
64 local chardata, stanza = {};
65 local stanza_size = 0;
66 local non_streamns_depth = 0;
67 function xml_handlers:StartElement(tagname, attr)
68 if stanza and #chardata > 0 then
69 -- We have some character data in the buffer
70 t_insert(stanza, t_concat(chardata));
73 local curr_ns,name = tagname:match(ns_pattern);
75 curr_ns, name = "", curr_ns;
78 if curr_ns ~= stream_default_ns or non_streamns_depth > 0 then
80 non_streamns_depth = non_streamns_depth + 1;
86 local xmlk = xml_namespace[k];
93 if not stanza then --if we are not currently inside a stanza
94 if lxp_supports_bytecount then
95 stanza_size = self:getcurrentbytecount();
97 if session.notopen then
98 if tagname == stream_tag then
99 non_streamns_depth = 0;
100 if cb_streamopened then
101 if lxp_supports_bytecount then
102 cb_handleprogress(stanza_size);
105 cb_streamopened(session, attr);
108 -- Garbage before stream?
109 cb_error(session, "no-stream", tagname);
113 if curr_ns == "jabber:client" and name ~= "iq" and name ~= "presence" and name ~= "message" then
114 cb_error(session, "invalid-top-level-element");
117 stanza = setmetatable({ name = name, attr = attr, tags = {} }, stanza_mt);
118 else -- we are inside a stanza, so add a tag
119 if lxp_supports_bytecount then
120 stanza_size = stanza_size + self:getcurrentbytecount();
122 t_insert(stack, stanza);
123 local oldstanza = stanza;
124 stanza = setmetatable({ name = name, attr = attr, tags = {} }, stanza_mt);
125 t_insert(oldstanza, stanza);
126 t_insert(oldstanza.tags, stanza);
129 if lxp_supports_xmldecl then
130 function xml_handlers:XmlDecl(version, encoding, standalone)
131 if lxp_supports_bytecount then
132 cb_handleprogress(self:getcurrentbytecount());
136 function xml_handlers:StartCdataSection()
137 if lxp_supports_bytecount then
139 stanza_size = stanza_size + self:getcurrentbytecount();
141 cb_handleprogress(self:getcurrentbytecount());
145 function xml_handlers:EndCdataSection()
146 if lxp_supports_bytecount then
148 stanza_size = stanza_size + self:getcurrentbytecount();
150 cb_handleprogress(self:getcurrentbytecount());
154 function xml_handlers:CharacterData(data)
156 if lxp_supports_bytecount then
157 stanza_size = stanza_size + self:getcurrentbytecount();
159 t_insert(chardata, data);
160 elseif lxp_supports_bytecount then
161 cb_handleprogress(self:getcurrentbytecount());
164 function xml_handlers:EndElement(tagname)
165 if lxp_supports_bytecount then
166 stanza_size = stanza_size + self:getcurrentbytecount()
168 if non_streamns_depth > 0 then
169 non_streamns_depth = non_streamns_depth - 1;
172 if #chardata > 0 then
173 -- We have some character data in the buffer
174 t_insert(stanza, t_concat(chardata));
179 if lxp_supports_bytecount then
180 cb_handleprogress(stanza_size);
183 if tagname ~= stream_error_tag then
184 cb_handlestanza(session, stanza);
186 cb_error(session, "stream-error", stanza);
190 stanza = t_remove(stack);
193 if cb_streamclosed then
194 cb_streamclosed(session);
199 local function restricted_handler(parser)
200 cb_error(session, "parse-error", "restricted-xml", "Restricted XML, see RFC 6120 section 11.1.");
201 if not parser.stop or not parser:stop() then
202 error("Failed to abort parsing");
206 if lxp_supports_doctype then
207 xml_handlers.StartDoctypeDecl = restricted_handler;
209 xml_handlers.Comment = restricted_handler;
210 xml_handlers.ProcessingInstruction = restricted_handler;
212 local function reset()
213 stanza, chardata, stanza_size = nil, {}, 0;
217 local function set_session(stream, new_session)
218 session = new_session;
221 return xml_handlers, { reset = reset, set_session = set_session };
224 local function new(session, stream_callbacks, stanza_size_limit)
225 -- Used to track parser progress (e.g. to enforce size limits)
226 local n_outstanding_bytes = 0;
227 local handle_progress;
228 if lxp_supports_bytecount then
229 function handle_progress(n_parsed_bytes)
230 n_outstanding_bytes = n_outstanding_bytes - n_parsed_bytes;
232 stanza_size_limit = stanza_size_limit or default_stanza_size_limit;
233 elseif stanza_size_limit then
234 error("Stanza size limits are not supported on this version of LuaExpat")
237 local handlers, meta = new_sax_handlers(session, stream_callbacks, handle_progress);
238 local parser = new_parser(handlers, ns_separator, false);
239 local parse = parser.parse;
241 function session.open_stream(session, from, to)
242 local send = session.sends2s or session.send;
245 ["xmlns:stream"] = "http://etherx.jabber.org/streams",
247 xmlns = stream_callbacks.default_ns,
248 version = session.version and (session.version > 0 and "1.0" or nil),
249 id = session.streamid,
250 from = from or session.host, to = to,
252 if session.stream_attrs then
253 session:stream_attrs(from, to, attr)
255 send("<?xml version='1.0'?>");
256 send(st.stanza("stream:stream", attr):top_tag());
262 parser = new_parser(handlers, ns_separator, false);
263 parse = parser.parse;
264 n_outstanding_bytes = 0;
267 feed = function (self, data)
268 if lxp_supports_bytecount then
269 n_outstanding_bytes = n_outstanding_bytes + #data;
271 local ok, err = parse(parser, data);
272 if lxp_supports_bytecount and n_outstanding_bytes > stanza_size_limit then
273 return nil, "stanza-too-large";
277 set_session = meta.set_session;
282 ns_separator = ns_separator;
283 ns_pattern = ns_pattern;
284 new_sax_handlers = new_sax_handlers;