util.xmppstream: Reject XML comments, processing instructions and (if supported by...
[prosody.git] / util / xmppstream.lua
1 -- Prosody IM
2 -- Copyright (C) 2008-2010 Matthew Wild
3 -- Copyright (C) 2008-2010 Waqas Hussain
4 -- 
5 -- This project is MIT/X11 licensed. Please see the
6 -- COPYING file in the source package for more information.
7 --
8
9
10 local lxp = require "lxp";
11 local st = require "util.stanza";
12
13 local tostring = tostring;
14 local t_insert = table.insert;
15 local t_concat = table.concat;
16
17 local default_log = require "util.logger".init("xmppstream");
18
19 -- COMPAT: w/LuaExpat 1.1.0
20 local lxp_supports_doctype = pcall(lxp.new, { StartDoctypeDecl = false });
21
22 if not lxp_supports_doctype then
23         default_log("warn", "The version of LuaExpat on your system leaves Prosody "
24                 .."vulnerable to denial-of-service attacks. You should upgrade to "
25                 .."LuaExpat 1.1.1 or higher as soon as possible. See "
26                 .."http://prosody.im/doc/depends#luaexpat for more information.");
27 end
28
29 local error = error;
30
31 module "xmppstream"
32
33 local new_parser = lxp.new;
34
35 local ns_prefixes = {
36         ["http://www.w3.org/XML/1998/namespace"] = "xml";
37 };
38
39 local xmlns_streams = "http://etherx.jabber.org/streams";
40
41 local ns_separator = "\1";
42 local ns_pattern = "^([^"..ns_separator.."]*)"..ns_separator.."?(.*)$";
43
44 _M.ns_separator = ns_separator;
45 _M.ns_pattern = ns_pattern;
46
47 function new_sax_handlers(session, stream_callbacks)
48         local xml_handlers = {};
49         
50         local log = session.log or default_log;
51         
52         local cb_streamopened = stream_callbacks.streamopened;
53         local cb_streamclosed = stream_callbacks.streamclosed;
54         local cb_error = stream_callbacks.error or function(session, e) error("XML stream error: "..tostring(e)); end;
55         local cb_handlestanza = stream_callbacks.handlestanza;
56         
57         local stream_ns = stream_callbacks.stream_ns or xmlns_streams;
58         local stream_tag = stream_callbacks.stream_tag or "stream";
59         if stream_ns ~= "" then
60                 stream_tag = stream_ns..ns_separator..stream_tag;
61         end
62         local stream_error_tag = stream_ns..ns_separator..(stream_callbacks.error_tag or "error");
63         
64         local stream_default_ns = stream_callbacks.default_ns;
65         
66         local chardata, stanza = {};
67         local non_streamns_depth = 0;
68         function xml_handlers:StartElement(tagname, attr)
69                 if stanza and #chardata > 0 then
70                         -- We have some character data in the buffer
71                         stanza:text(t_concat(chardata));
72                         chardata = {};
73                 end
74                 local curr_ns,name = tagname:match(ns_pattern);
75                 if name == "" then
76                         curr_ns, name = "", curr_ns;
77                 end
78
79                 if curr_ns ~= stream_default_ns or non_streamns_depth > 0 then
80                         attr.xmlns = curr_ns;
81                         non_streamns_depth = non_streamns_depth + 1;
82                 end
83                 
84                 -- FIXME !!!!!
85                 for i=1,#attr do
86                         local k = attr[i];
87                         attr[i] = nil;
88                         local ns, nm = k:match(ns_pattern);
89                         if nm ~= "" then
90                                 ns = ns_prefixes[ns];
91                                 if ns then
92                                         attr[ns..":"..nm] = attr[k];
93                                         attr[k] = nil;
94                                 end
95                         end
96                 end
97                 
98                 if not stanza then --if we are not currently inside a stanza
99                         if session.notopen then
100                                 if tagname == stream_tag then
101                                         non_streamns_depth = 0;
102                                         if cb_streamopened then
103                                                 cb_streamopened(session, attr);
104                                         end
105                                 else
106                                         -- Garbage before stream?
107                                         cb_error(session, "no-stream");
108                                 end
109                                 return;
110                         end
111                         if curr_ns == "jabber:client" and name ~= "iq" and name ~= "presence" and name ~= "message" then
112                                 cb_error(session, "invalid-top-level-element");
113                         end
114                         
115                         stanza = st.stanza(name, attr);
116                 else -- we are inside a stanza, so add a tag
117                         stanza:tag(name, attr);
118                 end
119         end
120         function xml_handlers:CharacterData(data)
121                 if stanza then
122                         t_insert(chardata, data);
123                 end
124         end
125         function xml_handlers:EndElement(tagname)
126                 if non_streamns_depth > 0 then
127                         non_streamns_depth = non_streamns_depth - 1;
128                 end
129                 if stanza then
130                         if #chardata > 0 then
131                                 -- We have some character data in the buffer
132                                 stanza:text(t_concat(chardata));
133                                 chardata = {};
134                         end
135                         -- Complete stanza
136                         local last_add = stanza.last_add;
137                         if not last_add or #last_add == 0 then
138                                 if tagname ~= stream_error_tag then
139                                         cb_handlestanza(session, stanza);
140                                 else
141                                         cb_error(session, "stream-error", stanza);
142                                 end
143                                 stanza = nil;
144                         else
145                                 stanza:up();
146                         end
147                 else
148                         if tagname == stream_tag then
149                                 if cb_streamclosed then
150                                         cb_streamclosed(session);
151                                 end
152                         else
153                                 local curr_ns,name = tagname:match(ns_pattern);
154                                 if name == "" then
155                                         curr_ns, name = "", curr_ns;
156                                 end
157                                 cb_error(session, "parse-error", "unexpected-element-close", name);
158                         end
159                         stanza, chardata = nil, {};
160                 end
161         end
162         
163         local function restricted_handler()
164                 cb_error(session, "parse-error", "restricted-xml", "Restricted XML, see RFC 6120 section 11.1.");
165         end
166         
167         if lxp_supports_doctype then
168                 xml_handlers.StartDoctypeDecl = restricted_handler;
169         end
170         xml_handlers.Comment = restricted_handler;
171         xml_handlers.StartCdataSection = restricted_handler;
172         xml_handlers.ProcessingInstruction = restricted_handler;
173         
174         local function reset()
175                 stanza, chardata = nil, {};
176         end
177         
178         local function set_session(stream, new_session)
179                 session = new_session;
180                 log = new_session.log or default_log;
181         end
182         
183         return xml_handlers, { reset = reset, set_session = set_session };
184 end
185
186 function new(session, stream_callbacks)
187         local handlers, meta = new_sax_handlers(session, stream_callbacks);
188         local parser = new_parser(handlers, ns_separator);
189         local parse = parser.parse;
190
191         return {
192                 reset = function ()
193                         parser = new_parser(handlers, ns_separator);
194                         parse = parser.parse;
195                         meta.reset();
196                 end,
197                 feed = function (self, data)
198                         return parse(parser, data);
199                 end,
200                 set_session = meta.set_session;
201         };
202 end
203
204 return _M;