tools/erlparse: Optimisations aplenty for faster processing of large files
[prosody.git] / tools / ejabberdsql2prosody.lua
1 #!/usr/bin/env lua
2 -- Prosody IM
3 -- Copyright (C) 2008-2010 Matthew Wild
4 -- Copyright (C) 2008-2010 Waqas Hussain
5 -- 
6 -- This project is MIT/X11 licensed. Please see the
7 -- COPYING file in the source package for more information.
8 --
9
10 package.path = package.path ..";../?.lua";
11 local serialize = require "util.serialization".serialize;
12 local st = require "util.stanza";
13 package.loaded["util.logger"] = {init = function() return function() end; end}
14 local dm = require "util.datamanager"
15 dm.set_data_path("data");
16
17 function parseFile(filename)
18 ------
19
20 local file = nil;
21 local last = nil;
22 local function read(expected)
23         local ch;
24         if last then
25                 ch = last; last = nil;
26         else ch = file:read(1); end
27         if expected and ch ~= expected then error("expected: "..expected.."; got: "..(ch or "nil")); end
28         return ch;
29 end
30 local function pushback(ch)
31         if last then error(); end
32         last = ch;
33 end
34 local function peek()
35         if not last then last = read(); end
36         return last;
37 end
38
39 local escapes = {
40         ["\\0"] = "\0";
41         ["\\'"] = "'";
42         ["\\\""] = "\"";
43         ["\\b"] = "\b";
44         ["\\n"] = "\n";
45         ["\\r"] = "\r";
46         ["\\t"] = "\t";
47         ["\\Z"] = "\26";
48         ["\\\\"] = "\\";
49         ["\\%"] = "%";
50         ["\\_"] = "_";
51 }
52 local function unescape(s)
53         return escapes[s] or error("Unknown escape sequence: "..s);
54 end
55 local function readString()
56         read("'");
57         local s = "";
58         while true do
59                 local ch = peek();
60                 if ch == "\\" then
61                         s = s..unescape(read()..read());
62                 elseif ch == "'" then
63                         break;
64                 else
65                         s = s..read();
66                 end
67         end
68         read("'");
69         return s;
70 end
71 local function readNonString()
72         local s = "";
73         while true do
74                 if peek() == "," or peek() == ")" then
75                         break;
76                 else
77                         s = s..read();
78                 end
79         end
80         return tonumber(s);
81 end
82 local function readItem()
83         if peek() == "'" then
84                 return readString();
85         else
86                 return readNonString();
87         end
88 end
89 local function readTuple()
90         local items = {}
91         read("(");
92         while peek() ~= ")" do
93                 table.insert(items, readItem());
94                 if peek() == ")" then break; end
95                 read(",");
96         end
97         read(")");
98         return items;
99 end
100 local function readTuples()
101         if peek() ~= "(" then read("("); end
102         local tuples = {};
103         while true do
104                 table.insert(tuples, readTuple());
105                 if peek() == "," then read() end
106                 if peek() == ";" then break; end
107         end
108         return tuples;
109 end
110 local function readTableName()
111         local tname = "";
112         while peek() ~= "`" do tname = tname..read(); end
113         return tname;
114 end
115 local function readInsert()
116         if peek() == nil then return nil; end
117         for ch in ("INSERT INTO `"):gmatch(".") do -- find line starting with this
118                 if peek() == ch then
119                         read(); -- found
120                 else -- match failed, skip line
121                         while peek() and read() ~= "\n" do end
122                         return nil;
123                 end
124         end
125         local tname = readTableName();
126         for ch in ("` VALUES "):gmatch(".") do read(ch); end -- expect this
127         local tuples = readTuples();
128         read(";"); read("\n");
129         return tname, tuples;
130 end
131
132 local function readFile(filename)
133         file = io.open(filename);
134         if not file then error("File not found: "..filename); os.exit(0); end
135         local t = {};
136         while true do
137                 local tname, tuples = readInsert();
138                 if tname then
139                         if t[tname] then
140                                 local t_name = t[tname];
141                                 for i=1,#tuples do
142                                         table.insert(t_name, tuples[i]);
143                                 end
144                         else
145                                 t[tname] = tuples;
146                         end
147                 elseif peek() == nil then
148                         break;
149                 end
150         end
151         return t;
152 end
153
154 return readFile(filename);
155
156 ------
157 end
158
159 -- XML parser
160 local parse_xml = (function()
161         local entity_map = setmetatable({
162                 ["amp"] = "&";
163                 ["gt"] = ">";
164                 ["lt"] = "<";
165                 ["apos"] = "'";
166                 ["quot"] = "\"";
167         }, {__index = function(_, s)
168                         if s:sub(1,1) == "#" then
169                                 if s:sub(2,2) == "x" then
170                                         return string.char(tonumber(s:sub(3), 16));
171                                 else
172                                         return string.char(tonumber(s:sub(2)));
173                                 end
174                         end
175                 end
176         });
177         local function xml_unescape(str)
178                 return (str:gsub("&(.-);", entity_map));
179         end
180         local function parse_tag(s)
181                 local name,sattr=(s):gmatch("([^%s]+)(.*)")();
182                 local attr = {};
183                 for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end
184                 return name, attr;
185         end
186         return function(xml)
187                 local stanza = st.stanza("root");
188                 local regexp = "<([^>]*)>([^<]*)";
189                 for elem, text in xml:gmatch(regexp) do
190                         if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions
191                         elseif elem:sub(1,1) == "/" then -- end tag
192                                 elem = elem:sub(2);
193                                 stanza:up(); -- TODO check for start-end tag name match
194                         elseif elem:sub(-1,-1) == "/" then -- empty tag
195                                 elem = elem:sub(1,-2);
196                                 local name,attr = parse_tag(elem);
197                                 stanza:tag(name, attr):up();
198                         else -- start tag
199                                 local name,attr = parse_tag(elem);
200                                 stanza:tag(name, attr);
201                         end
202                         if #text ~= 0 then -- text
203                                 stanza:text(xml_unescape(text));
204                         end
205                 end
206                 return stanza.tags[1];
207         end
208 end)();
209 -- end of XML parser
210
211 local arg, host = ...;
212 local help = "/? -? ? /h -h /help -help --help";
213 if not(arg and host) or help:find(arg, 1, true) then
214         print([[ejabberd SQL DB dump importer for Prosody
215
216   Usage: ejabberdsql2prosody.lua filename.txt hostname
217
218 The file can be generated using mysqldump:
219   mysqldump db_name > filename.txt]]);
220         os.exit(1);
221 end
222 local map = {
223         ["last"] = {"username", "seconds", "state"};
224         ["privacy_default_list"] = {"username", "name"};
225         ["privacy_list"] = {"username", "name", "id"};
226         ["privacy_list_data"] = {"id", "t", "value", "action", "ord", "match_all", "match_iq", "match_message", "match_presence_in", "match_presence_out"};
227         ["private_storage"] = {"username", "namespace", "data"};
228         ["rostergroups"] = {"username", "jid", "grp"};
229         ["rosterusers"] = {"username", "jid", "nick", "subscription", "ask", "askmessage", "server", "subscribe", "type"};
230         ["spool"] = {"username", "xml", "seq"};
231         ["users"] = {"username", "password"};
232         ["vcard"] = {"username", "vcard"};
233         --["vcard_search"] = {};
234 }
235 local NULL = {};
236 local t = parseFile(arg);
237 for name, data in pairs(t) do
238         local m = map[name];
239         if m then
240                 if #data > 0 and #data[1] ~= #m then
241                         print("[warning] expected "..#m.." columns for table `"..name.."`, found "..#data[1]);
242                 end
243                 for i=1,#data do
244                         local row = data[i];
245                         for j=1,#m do
246                                 row[m[j]] = row[j];
247                                 row[j] = nil;
248                         end
249                 end
250         end
251 end
252 --print(serialize(t));
253
254 for i, row in ipairs(t["users"] or NULL) do
255         local node, password = row.username, row.password;
256         local ret, err = dm.store(node, host, "accounts", {password = password});
257         print("["..(err or "success").."] accounts: "..node.."@"..host);
258 end
259
260 function roster(node, host, jid, item)
261         local roster = dm.load(node, host, "roster") or {};
262         roster[jid] = item;
263         local ret, err = dm.store(node, host, "roster", roster);
264         print("["..(err or "success").."] roster: " ..node.."@"..host.." - "..jid);
265 end
266 function roster_pending(node, host, jid)
267         local roster = dm.load(node, host, "roster") or {};
268         roster.pending = roster.pending or {};
269         roster.pending[jid] = true;
270         local ret, err = dm.store(node, host, "roster", roster);
271         print("["..(err or "success").."] roster-pending: " ..node.."@"..host.." - "..jid);
272 end
273 function roster_group(node, host, jid, group)
274         local roster = dm.load(node, host, "roster") or {};
275         local item = roster[jid];
276         if not item then print("Warning: No roster item "..jid.." for user "..node..", can't put in group "..group); return; end
277         item.groups[group] = true;
278         local ret, err = dm.store(node, host, "roster", roster);
279         print("["..(err or "success").."] roster-group: " ..node.."@"..host.." - "..jid.." - "..group);
280 end
281 function private_storage(node, host, xmlns, stanza)
282         local private = dm.load(node, host, "private") or {};
283         private[stanza.name..":"..xmlns] = st.preserialize(stanza);
284         local ret, err = dm.store(node, host, "private", private);
285         print("["..(err or "success").."] private: " ..node.."@"..host.." - "..xmlns);
286 end
287 function offline_msg(node, host, t, stanza)
288         stanza.attr.stamp = os.date("!%Y-%m-%dT%H:%M:%SZ", t);
289         stanza.attr.stamp_legacy = os.date("!%Y%m%dT%H:%M:%S", t);
290         local ret, err = dm.list_append(node, host, "offline", st.preserialize(stanza));
291         print("["..(err or "success").."] offline: " ..node.."@"..host.." - "..os.date("!%Y-%m-%dT%H:%M:%SZ", t));
292 end
293 for i, row in ipairs(t["rosterusers"] or NULL) do
294         local node, contact = row.username, row.jid;
295         local name = row.nick;
296         if name == "" then name = nil; end
297         local subscription = row.subscription;
298         if subscription == "N" then
299                 subscription = "none"
300         elseif subscription == "B" then
301                 subscription = "both"
302         elseif subscription == "F" then
303                 subscription = "from"
304         elseif subscription == "T" then
305                 subscription = "to"
306         else error("Unknown subscription type: "..subscription) end;
307         local ask = row.ask;
308         if ask == "N" then
309                 ask = nil;
310         elseif ask == "O" then
311                 ask = "subscribe";
312         elseif ask == "I" then
313                 roster_pending(node, host, contact);
314                 ask = nil;
315         elseif ask == "B" then
316                 roster_pending(node, host, contact);
317                 ask = "subscribe";
318         else error("Unknown ask type: "..ask); end
319         local item = {name = name, ask = ask, subscription = subscription, groups = {}};
320         roster(node, host, contact, item);
321 end
322 for i, row in ipairs(t["rostergroups"] or NULL) do
323         roster_group(row.username, host, row.jid, row.grp);
324 end
325 for i, row in ipairs(t["vcard"] or NULL) do
326         local ret, err = dm.store(row.username, host, "vcard", st.preserialize(parse_xml(row.vcard)));
327         print("["..(err or "success").."] vCard: "..row.username.."@"..host);
328 end
329 for i, row in ipairs(t["private_storage"] or NULL) do
330         private_storage(row.username, host, row.namespace, parse_xml(row.data));
331 end
332 table.sort(t["spool"] or NULL, function(a,b) return a.seq < b.seq; end); -- sort by sequence number, just in case
333 local time_offset = os.difftime(os.time(os.date("!*t")), os.time(os.date("*t"))) -- to deal with timezones
334 local date_parse = function(s)
335         local year, month, day, hour, min, sec = s:match("(....)-?(..)-?(..)T(..):(..):(..)");
336         return os.time({year=year, month=month, day=day, hour=hour, min=min, sec=sec-time_offset});
337 end
338 for i, row in ipairs(t["spool"] or NULL) do
339         local stanza = parse_xml(row.xml);
340         local last_child = stanza.tags[#stanza.tags];
341         if not last_child or last_child ~= stanza[#stanza] then error("Last child of offline message is not a tag"); end
342         if last_child.name ~= "x" and last_child.attr.xmlns ~= "jabber:x:delay" then error("Last child of offline message is not a timestamp"); end
343         stanza[#stanza], stanza.tags[#stanza.tags] = nil, nil;
344         local t = date_parse(last_child.attr.stamp);
345         offline_msg(row.username, host, t, stanza);
346 end