Merge 0.6.2/waqas with 0.6.2/MattJ
[prosody.git] / plugins / storage / xmlparse.lib.lua
1 \r
2 local st = require "util.stanza";\r
3 \r
4 -- XML parser\r
5 local parse_xml = (function()\r
6         local entity_map = setmetatable({\r
7                 ["amp"] = "&";\r
8                 ["gt"] = ">";\r
9                 ["lt"] = "<";\r
10                 ["apos"] = "'";\r
11                 ["quot"] = "\"";\r
12         }, {__index = function(_, s)\r
13                         if s:sub(1,1) == "#" then\r
14                                 if s:sub(2,2) == "x" then\r
15                                         return string.char(tonumber(s:sub(3), 16));\r
16                                 else\r
17                                         return string.char(tonumber(s:sub(2)));\r
18                                 end\r
19                         end\r
20                 end\r
21         });\r
22         local function xml_unescape(str)\r
23                 return (str:gsub("&(.-);", entity_map));\r
24         end\r
25         local function parse_tag(s)\r
26                 local name,sattr=(s):gmatch("([^%s]+)(.*)")();\r
27                 local attr = {};\r
28                 for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end\r
29                 return name, attr;\r
30         end\r
31         return function(xml)\r
32                 local stanza = st.stanza("root");\r
33                 local regexp = "<([^>]*)>([^<]*)";\r
34                 for elem, text in xml:gmatch(regexp) do\r
35                         if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions\r
36                         elseif elem:sub(1,1) == "/" then -- end tag\r
37                                 elem = elem:sub(2);\r
38                                 stanza:up(); -- TODO check for start-end tag name match\r
39                         elseif elem:sub(-1,-1) == "/" then -- empty tag\r
40                                 elem = elem:sub(1,-2);\r
41                                 local name,attr = parse_tag(elem);\r
42                                 stanza:tag(name, attr):up();\r
43                         else -- start tag\r
44                                 local name,attr = parse_tag(elem);\r
45                                 stanza:tag(name, attr);\r
46                         end\r
47                         if #text ~= 0 then -- text\r
48                                 stanza:text(xml_unescape(text));\r
49                         end\r
50                 end\r
51                 return stanza.tags[1];\r
52         end\r
53 end)();\r
54 -- end of XML parser\r
55 \r
56 return parse_xml;\r