2 local st = require "util.stanza";
\r
5 local parse_xml = (function()
\r
6 local entity_map = setmetatable({
\r
12 }, {__index = function(_, s)
\r
13 if s:sub(1,1) == "#" then
\r
14 if s:sub(2,2) == "x" then
\r
15 return string.char(tonumber(s:sub(3), 16));
\r
17 return string.char(tonumber(s:sub(2)));
\r
22 local function xml_unescape(str)
\r
23 return (str:gsub("&(.-);", entity_map));
\r
25 local function parse_tag(s)
\r
26 local name,sattr=(s):gmatch("([^%s]+)(.*)")();
\r
28 for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end
\r
31 return function(xml)
\r
32 local stanza = st.stanza("root");
\r
33 local regexp = "<([^>]*)>([^<]*)";
\r
34 for elem, text in xml:gmatch(regexp) do
\r
35 if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions
\r
36 elseif elem:sub(1,1) == "/" then -- end tag
\r
38 stanza:up(); -- TODO check for start-end tag name match
\r
39 elseif elem:sub(-1,-1) == "/" then -- empty tag
\r
40 elem = elem:sub(1,-2);
\r
41 local name,attr = parse_tag(elem);
\r
42 stanza:tag(name, attr):up();
\r
44 local name,attr = parse_tag(elem);
\r
45 stanza:tag(name, attr);
\r
47 if #text ~= 0 then -- text
\r
48 stanza:text(xml_unescape(text));
\r
51 return stanza.tags[1];
\r
54 -- end of XML parser
\r