Merge 0.10->trunk
[prosody.git] / fallbacks / lxp.lua
1
2 local coroutine = coroutine;
3 local tonumber = tonumber;
4 local string = string;
5 local setmetatable, getmetatable = setmetatable, getmetatable;
6 local pairs = pairs;
7
8 local deadroutine = coroutine.create(function() end);
9 coroutine.resume(deadroutine);
10
11 module("lxp")
12
13 local entity_map = setmetatable({
14         ["amp"] = "&";
15         ["gt"] = ">";
16         ["lt"] = "<";
17         ["apos"] = "'";
18         ["quot"] = "\"";
19 }, {__index = function(_, s)
20                 if s:sub(1,1) == "#" then
21                         if s:sub(2,2) == "x" then
22                                 return string.char(tonumber(s:sub(3), 16));
23                         else
24                                 return string.char(tonumber(s:sub(2)));
25                         end
26                 end
27         end
28 });
29 local function xml_unescape(str)
30         return (str:gsub("&(.-);", entity_map));
31 end
32 local function parse_tag(s)
33         local name,sattr=(s):gmatch("([^%s]+)(.*)")();
34         local attr = {};
35         for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end
36         return name, attr;
37 end
38
39 local function parser(data, handlers, ns_separator)
40         local function read_until(str)
41                 local pos = data:find(str, nil, true);
42                 while not pos do
43                         data = data..coroutine.yield();
44                         pos = data:find(str, nil, true);
45                 end
46                 local r = data:sub(1, pos);
47                 data = data:sub(pos+1);
48                 return r;
49         end
50         local function read_before(str)
51                 local pos = data:find(str, nil, true);
52                 while not pos do
53                         data = data..coroutine.yield();
54                         pos = data:find(str, nil, true);
55                 end
56                 local r = data:sub(1, pos-1);
57                 data = data:sub(pos);
58                 return r;
59         end
60         local function peek()
61                 while #data == 0 do data = coroutine.yield(); end
62                 return data:sub(1,1);
63         end
64
65         local ns = { xml = "http://www.w3.org/XML/1998/namespace" };
66         ns.__index = ns;
67         local function apply_ns(name, dodefault)
68                 local prefix,n = name:match("^([^:]*):(.*)$");
69                 if prefix and ns[prefix] then
70                         return ns[prefix]..ns_separator..n;
71                 end
72                 if dodefault and ns[""] then
73                         return ns[""]..ns_separator..name;
74                 end
75                 return name;
76         end
77         local function push(tag, attr)
78                 ns = setmetatable({}, ns);
79                 for k,v in pairs(attr) do
80                         local xmlns = k == "xmlns" and "" or k:match("^xmlns:(.*)$");
81                         if xmlns then
82                                 ns[xmlns] = v;
83                                 attr[k] = nil;
84                         end
85                 end
86                 local newattr, n = {}, 0;
87                 for k,v in pairs(attr) do
88                         n = n+1;
89                         k = apply_ns(k);
90                         newattr[n] = k;
91                         newattr[k] = v;
92                 end
93                 tag = apply_ns(tag, true);
94                 ns[0] = tag;
95                 ns.__index = ns;
96                 return tag, newattr;
97         end
98         local function pop()
99                 local tag = ns[0];
100                 ns = getmetatable(ns);
101                 return tag;
102         end
103
104         while true do
105                 if peek() == "<" then
106                         local elem = read_until(">"):sub(2,-2);
107                         if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions
108                         elseif elem:sub(1,1) == "/" then -- end tag
109                                 elem = elem:sub(2);
110                                 local name = pop();
111                                 handlers:EndElement(name); -- TODO check for start-end tag name match
112                         elseif elem:sub(-1,-1) == "/" then -- empty tag
113                                 elem = elem:sub(1,-2);
114                                 local name,attr = parse_tag(elem);
115                                 name,attr = push(name,attr);
116                                 handlers:StartElement(name,attr);
117                                 name = pop();
118                                 handlers:EndElement(name);
119                         else -- start tag
120                                 local name,attr = parse_tag(elem);
121                                 name,attr = push(name,attr);
122                                 handlers:StartElement(name,attr);
123                         end
124                 else
125                         local text = read_before("<");
126                         handlers:CharacterData(xml_unescape(text));
127                 end
128         end
129 end
130
131 function new(handlers, ns_separator)
132         local co = coroutine.create(parser);
133         return {
134                 parse = function(self, data)
135                         if not data then
136                                 co = deadroutine;
137                                 return true; -- eof
138                         end
139                         local success, result = coroutine.resume(co, data, handlers, ns_separator);
140                         if result then
141                                 co = deadroutine;
142                                 return nil, result; -- error
143                         end
144                         return true; -- success
145                 end;
146         };
147 end
148
149 return _M;