fallbacks/lxp.lua: Pure Lua pseudo-XML parser. Implements the same API as LuaExpat.
authorWaqas Hussain <waqas20@gmail.com>
Thu, 2 Dec 2010 12:11:51 +0000 (17:11 +0500)
committerWaqas Hussain <waqas20@gmail.com>
Thu, 2 Dec 2010 12:11:51 +0000 (17:11 +0500)
fallbacks/lxp.lua [new file with mode: 0644]

diff --git a/fallbacks/lxp.lua b/fallbacks/lxp.lua
new file mode 100644 (file)
index 0000000..21cab4a
--- /dev/null
@@ -0,0 +1,149 @@
+\r
+local coroutine = coroutine;\r
+local tonumber = tonumber;\r
+local string = string;\r
+local setmetatable, getmetatable = setmetatable, getmetatable;\r
+local pairs = pairs;\r
+\r
+local deadroutine = coroutine.create(function() end);\r
+coroutine.resume(deadroutine);\r
+\r
+module("lxp")\r
+\r
+local entity_map = setmetatable({\r
+       ["amp"] = "&";\r
+       ["gt"] = ">";\r
+       ["lt"] = "<";\r
+       ["apos"] = "'";\r
+       ["quot"] = "\"";\r
+}, {__index = function(_, s)\r
+               if s:sub(1,1) == "#" then\r
+                       if s:sub(2,2) == "x" then\r
+                               return string.char(tonumber(s:sub(3), 16));\r
+                       else\r
+                               return string.char(tonumber(s:sub(2)));\r
+                       end\r
+               end\r
+       end\r
+});\r
+local function xml_unescape(str)\r
+       return (str:gsub("&(.-);", entity_map));\r
+end\r
+local function parse_tag(s)\r
+       local name,sattr=(s):gmatch("([^%s]+)(.*)")();\r
+       local attr = {};\r
+       for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end\r
+       return name, attr;\r
+end\r
+\r
+local function parser(data, handlers, ns_separator)\r
+       local function read_until(str)\r
+               local pos = data:find(str, nil, true);\r
+               while not pos do\r
+                       data = data..coroutine.yield();\r
+                       pos = data:find(str, nil, true);\r
+               end\r
+               local r = data:sub(1, pos);\r
+               data = data:sub(pos+1);\r
+               return r;\r
+       end\r
+       local function read_before(str)\r
+               local pos = data:find(str, nil, true);\r
+               while not pos do\r
+                       data = data..coroutine.yield();\r
+                       pos = data:find(str, nil, true);\r
+               end\r
+               local r = data:sub(1, pos-1);\r
+               data = data:sub(pos);\r
+               return r;\r
+       end\r
+       local function peek()\r
+               while #data == 0 do data = coroutine.yield(); end\r
+               return data:sub(1,1);\r
+       end\r
+       \r
+       local ns = { xml = "http://www.w3.org/XML/1998/namespace" };\r
+       ns.__index = ns;\r
+       local function apply_ns(name, dodefault)\r
+               local prefix,n = name:match("^([^:]*):(.*)$");\r
+               if prefix and ns[prefix] then\r
+                       return ns[prefix]..ns_separator..n;\r
+               end\r
+               if dodefault and ns[""] then\r
+                       return ns[""]..ns_separator..name;\r
+               end\r
+               return name;\r
+       end\r
+       local function push(tag, attr)\r
+               ns = setmetatable({}, ns);\r
+               for k,v in pairs(attr) do\r
+                       local xmlns = k == "xmlns" and "" or k:match("^xmlns:(.*)$");\r
+                       if xmlns then\r
+                               ns[xmlns] = v;\r
+                               attr[k] = nil;\r
+                       end\r
+               end\r
+               local newattr, n = {}, 0;\r
+               for k,v in pairs(attr) do\r
+                       n = n+1;\r
+                       k = apply_ns(k);\r
+                       newattr[n] = k;\r
+                       newattr[k] = v;\r
+               end\r
+               tag = apply_ns(tag, true);\r
+               ns[0] = tag;\r
+               ns.__index = ns;\r
+               return tag, newattr;\r
+       end\r
+       local function pop()\r
+               local tag = ns[0];\r
+               ns = getmetatable(ns);\r
+               return tag;\r
+       end\r
+       \r
+       while true do\r
+               if peek() == "<" then\r
+                       local elem = read_until(">"):sub(2,-2);\r
+                       if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions\r
+                       elseif elem:sub(1,1) == "/" then -- end tag\r
+                               elem = elem:sub(2);\r
+                               local name = pop();\r
+                               handlers:EndElement(name); -- TODO check for start-end tag name match\r
+                       elseif elem:sub(-1,-1) == "/" then -- empty tag\r
+                               elem = elem:sub(1,-2);\r
+                               local name,attr = parse_tag(elem);\r
+                               name,attr = push(name,attr);\r
+                               handlers:StartElement(name,attr);\r
+                               name = pop();\r
+                               handlers:EndElement(name);\r
+                       else -- start tag\r
+                               local name,attr = parse_tag(elem);\r
+                               name,attr = push(name,attr);\r
+                               handlers:StartElement(name,attr);\r
+                       end\r
+               else\r
+                       local text = read_before("<");\r
+                       handlers:CharacterData(xml_unescape(text));\r
+               end\r
+       end\r
+end\r
+\r
+function new(handlers, ns_separator)\r
+       local co = coroutine.create(parser);\r
+       return {\r
+               parse = function(self, data)\r
+                       if not data then\r
+                               co = deadroutine;\r
+                               return true; -- eof\r
+                       end\r
+                       local success, result = coroutine.resume(co, data, handlers, ns_separator);\r
+                       if result then\r
+                               co = deadroutine;\r
+                               return nil, result; -- error\r
+                       end\r
+                       return true; -- success\r
+               end;\r
+       };\r
+end\r
+\r
+return _M;\r