fallbacks/lxp.lua: Pure Lua pseudo-XML parser. Implements the same API as LuaExpat.
[prosody.git] / fallbacks / lxp.lua
1 \r
2 local coroutine = coroutine;\r
3 local tonumber = tonumber;\r
4 local string = string;\r
5 local setmetatable, getmetatable = setmetatable, getmetatable;\r
6 local pairs = pairs;\r
7 \r
8 local deadroutine = coroutine.create(function() end);\r
9 coroutine.resume(deadroutine);\r
10 \r
11 module("lxp")\r
12 \r
13 local entity_map = setmetatable({\r
14         ["amp"] = "&";\r
15         ["gt"] = ">";\r
16         ["lt"] = "<";\r
17         ["apos"] = "'";\r
18         ["quot"] = "\"";\r
19 }, {__index = function(_, s)\r
20                 if s:sub(1,1) == "#" then\r
21                         if s:sub(2,2) == "x" then\r
22                                 return string.char(tonumber(s:sub(3), 16));\r
23                         else\r
24                                 return string.char(tonumber(s:sub(2)));\r
25                         end\r
26                 end\r
27         end\r
28 });\r
29 local function xml_unescape(str)\r
30         return (str:gsub("&(.-);", entity_map));\r
31 end\r
32 local function parse_tag(s)\r
33         local name,sattr=(s):gmatch("([^%s]+)(.*)")();\r
34         local attr = {};\r
35         for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end\r
36         return name, attr;\r
37 end\r
38 \r
39 local function parser(data, handlers, ns_separator)\r
40         local function read_until(str)\r
41                 local pos = data:find(str, nil, true);\r
42                 while not pos do\r
43                         data = data..coroutine.yield();\r
44                         pos = data:find(str, nil, true);\r
45                 end\r
46                 local r = data:sub(1, pos);\r
47                 data = data:sub(pos+1);\r
48                 return r;\r
49         end\r
50         local function read_before(str)\r
51                 local pos = data:find(str, nil, true);\r
52                 while not pos do\r
53                         data = data..coroutine.yield();\r
54                         pos = data:find(str, nil, true);\r
55                 end\r
56                 local r = data:sub(1, pos-1);\r
57                 data = data:sub(pos);\r
58                 return r;\r
59         end\r
60         local function peek()\r
61                 while #data == 0 do data = coroutine.yield(); end\r
62                 return data:sub(1,1);\r
63         end\r
64         \r
65         local ns = { xml = "http://www.w3.org/XML/1998/namespace" };\r
66         ns.__index = ns;\r
67         local function apply_ns(name, dodefault)\r
68                 local prefix,n = name:match("^([^:]*):(.*)$");\r
69                 if prefix and ns[prefix] then\r
70                         return ns[prefix]..ns_separator..n;\r
71                 end\r
72                 if dodefault and ns[""] then\r
73                         return ns[""]..ns_separator..name;\r
74                 end\r
75                 return name;\r
76         end\r
77         local function push(tag, attr)\r
78                 ns = setmetatable({}, ns);\r
79                 for k,v in pairs(attr) do\r
80                         local xmlns = k == "xmlns" and "" or k:match("^xmlns:(.*)$");\r
81                         if xmlns then\r
82                                 ns[xmlns] = v;\r
83                                 attr[k] = nil;\r
84                         end\r
85                 end\r
86                 local newattr, n = {}, 0;\r
87                 for k,v in pairs(attr) do\r
88                         n = n+1;\r
89                         k = apply_ns(k);\r
90                         newattr[n] = k;\r
91                         newattr[k] = v;\r
92                 end\r
93                 tag = apply_ns(tag, true);\r
94                 ns[0] = tag;\r
95                 ns.__index = ns;\r
96                 return tag, newattr;\r
97         end\r
98         local function pop()\r
99                 local tag = ns[0];\r
100                 ns = getmetatable(ns);\r
101                 return tag;\r
102         end\r
103         \r
104         while true do\r
105                 if peek() == "<" then\r
106                         local elem = read_until(">"):sub(2,-2);\r
107                         if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions\r
108                         elseif elem:sub(1,1) == "/" then -- end tag\r
109                                 elem = elem:sub(2);\r
110                                 local name = pop();\r
111                                 handlers:EndElement(name); -- TODO check for start-end tag name match\r
112                         elseif elem:sub(-1,-1) == "/" then -- empty tag\r
113                                 elem = elem:sub(1,-2);\r
114                                 local name,attr = parse_tag(elem);\r
115                                 name,attr = push(name,attr);\r
116                                 handlers:StartElement(name,attr);\r
117                                 name = pop();\r
118                                 handlers:EndElement(name);\r
119                         else -- start tag\r
120                                 local name,attr = parse_tag(elem);\r
121                                 name,attr = push(name,attr);\r
122                                 handlers:StartElement(name,attr);\r
123                         end\r
124                 else\r
125                         local text = read_before("<");\r
126                         handlers:CharacterData(xml_unescape(text));\r
127                 end\r
128         end\r
129 end\r
130 \r
131 function new(handlers, ns_separator)\r
132         local co = coroutine.create(parser);\r
133         return {\r
134                 parse = function(self, data)\r
135                         if not data then\r
136                                 co = deadroutine;\r
137                                 return true; -- eof\r
138                         end\r
139                         local success, result = coroutine.resume(co, data, handlers, ns_separator);\r
140                         if result then\r
141                                 co = deadroutine;\r
142                                 return nil, result; -- error\r
143                         end\r
144                         return true; -- success\r
145                 end;\r
146         };\r
147 end\r
148 \r
149 return _M;\r