util.encodings: Fix small typo introduced in 7f789266b741
[prosody.git] / tools / ejabberdsql2prosody.lua
1 #!/usr/bin/env lua
2 -- Prosody IM
3 -- Copyright (C) 2008-2010 Matthew Wild
4 -- Copyright (C) 2008-2010 Waqas Hussain
5 -- 
6 -- This project is MIT/X11 licensed. Please see the
7 -- COPYING file in the source package for more information.
8 --
9
10 prosody = {};
11
12 package.path = package.path ..";../?.lua";
13 local serialize = require "util.serialization".serialize;
14 local st = require "util.stanza";
15 package.loaded["util.logger"] = {init = function() return function() end; end}
16 local dm = require "util.datamanager"
17 dm.set_data_path("data");
18
19 function parseFile(filename)
20 ------
21
22 local file = nil;
23 local last = nil;
24 local function read(expected)
25         local ch;
26         if last then
27                 ch = last; last = nil;
28         else ch = file:read(1); end
29         if expected and ch ~= expected then error("expected: "..expected.."; got: "..(ch or "nil")); end
30         return ch;
31 end
32 local function pushback(ch)
33         if last then error(); end
34         last = ch;
35 end
36 local function peek()
37         if not last then last = read(); end
38         return last;
39 end
40
41 local escapes = {
42         ["\\0"] = "\0";
43         ["\\'"] = "'";
44         ["\\\""] = "\"";
45         ["\\b"] = "\b";
46         ["\\n"] = "\n";
47         ["\\r"] = "\r";
48         ["\\t"] = "\t";
49         ["\\Z"] = "\26";
50         ["\\\\"] = "\\";
51         ["\\%"] = "%";
52         ["\\_"] = "_";
53 }
54 local function unescape(s)
55         return escapes[s] or error("Unknown escape sequence: "..s);
56 end
57 local function readString()
58         read("'");
59         local s = "";
60         while true do
61                 local ch = peek();
62                 if ch == "\\" then
63                         s = s..unescape(read()..read());
64                 elseif ch == "'" then
65                         break;
66                 else
67                         s = s..read();
68                 end
69         end
70         read("'");
71         return s;
72 end
73 local function readNonString()
74         local s = "";
75         while true do
76                 if peek() == "," or peek() == ")" then
77                         break;
78                 else
79                         s = s..read();
80                 end
81         end
82         return tonumber(s);
83 end
84 local function readItem()
85         if peek() == "'" then
86                 return readString();
87         else
88                 return readNonString();
89         end
90 end
91 local function readTuple()
92         local items = {}
93         read("(");
94         while peek() ~= ")" do
95                 table.insert(items, readItem());
96                 if peek() == ")" then break; end
97                 read(",");
98         end
99         read(")");
100         return items;
101 end
102 local function readTuples()
103         if peek() ~= "(" then read("("); end
104         local tuples = {};
105         while true do
106                 table.insert(tuples, readTuple());
107                 if peek() == "," then read() end
108                 if peek() == ";" then break; end
109         end
110         return tuples;
111 end
112 local function readTableName()
113         local tname = "";
114         while peek() ~= "`" do tname = tname..read(); end
115         return tname;
116 end
117 local function readInsert()
118         if peek() == nil then return nil; end
119         for ch in ("INSERT INTO `"):gmatch(".") do -- find line starting with this
120                 if peek() == ch then
121                         read(); -- found
122                 else -- match failed, skip line
123                         while peek() and read() ~= "\n" do end
124                         return nil;
125                 end
126         end
127         local tname = readTableName();
128         for ch in ("` VALUES "):gmatch(".") do read(ch); end -- expect this
129         local tuples = readTuples();
130         read(";"); read("\n");
131         return tname, tuples;
132 end
133
134 local function readFile(filename)
135         file = io.open(filename);
136         if not file then error("File not found: "..filename); os.exit(0); end
137         local t = {};
138         while true do
139                 local tname, tuples = readInsert();
140                 if tname then
141                         if t[tname] then
142                                 local t_name = t[tname];
143                                 for i=1,#tuples do
144                                         table.insert(t_name, tuples[i]);
145                                 end
146                         else
147                                 t[tname] = tuples;
148                         end
149                 elseif peek() == nil then
150                         break;
151                 end
152         end
153         return t;
154 end
155
156 return readFile(filename);
157
158 ------
159 end
160
161 -- XML parser
162 local parse_xml = (function()
163         local entity_map = setmetatable({
164                 ["amp"] = "&";
165                 ["gt"] = ">";
166                 ["lt"] = "<";
167                 ["apos"] = "'";
168                 ["quot"] = "\"";
169         }, {__index = function(_, s)
170                         if s:sub(1,1) == "#" then
171                                 if s:sub(2,2) == "x" then
172                                         return string.char(tonumber(s:sub(3), 16));
173                                 else
174                                         return string.char(tonumber(s:sub(2)));
175                                 end
176                         end
177                 end
178         });
179         local function xml_unescape(str)
180                 return (str:gsub("&(.-);", entity_map));
181         end
182         local function parse_tag(s)
183                 local name,sattr=(s):gmatch("([^%s]+)(.*)")();
184                 local attr = {};
185                 for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end
186                 return name, attr;
187         end
188         return function(xml)
189                 local stanza = st.stanza("root");
190                 local regexp = "<([^>]*)>([^<]*)";
191                 for elem, text in xml:gmatch(regexp) do
192                         if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions
193                         elseif elem:sub(1,1) == "/" then -- end tag
194                                 elem = elem:sub(2);
195                                 stanza:up(); -- TODO check for start-end tag name match
196                         elseif elem:sub(-1,-1) == "/" then -- empty tag
197                                 elem = elem:sub(1,-2);
198                                 local name,attr = parse_tag(elem);
199                                 stanza:tag(name, attr):up();
200                         else -- start tag
201                                 local name,attr = parse_tag(elem);
202                                 stanza:tag(name, attr);
203                         end
204                         if #text ~= 0 then -- text
205                                 stanza:text(xml_unescape(text));
206                         end
207                 end
208                 return stanza.tags[1];
209         end
210 end)();
211 -- end of XML parser
212
213 local arg, host = ...;
214 local help = "/? -? ? /h -h /help -help --help";
215 if not(arg and host) or help:find(arg, 1, true) then
216         print([[ejabberd SQL DB dump importer for Prosody
217
218   Usage: ejabberdsql2prosody.lua filename.txt hostname
219
220 The file can be generated using mysqldump:
221   mysqldump db_name > filename.txt]]);
222         os.exit(1);
223 end
224 local map = {
225         ["last"] = {"username", "seconds", "state"};
226         ["privacy_default_list"] = {"username", "name"};
227         ["privacy_list"] = {"username", "name", "id"};
228         ["privacy_list_data"] = {"id", "t", "value", "action", "ord", "match_all", "match_iq", "match_message", "match_presence_in", "match_presence_out"};
229         ["private_storage"] = {"username", "namespace", "data"};
230         ["rostergroups"] = {"username", "jid", "grp"};
231         ["rosterusers"] = {"username", "jid", "nick", "subscription", "ask", "askmessage", "server", "subscribe", "type"};
232         ["spool"] = {"username", "xml", "seq"};
233         ["users"] = {"username", "password"};
234         ["vcard"] = {"username", "vcard"};
235         --["vcard_search"] = {};
236 }
237 local NULL = {};
238 local t = parseFile(arg);
239 for name, data in pairs(t) do
240         local m = map[name];
241         if m then
242                 if #data > 0 and #data[1] ~= #m then
243                         print("[warning] expected "..#m.." columns for table `"..name.."`, found "..#data[1]);
244                 end
245                 for i=1,#data do
246                         local row = data[i];
247                         for j=1,#m do
248                                 row[m[j]] = row[j];
249                                 row[j] = nil;
250                         end
251                 end
252         end
253 end
254 --print(serialize(t));
255
256 for i, row in ipairs(t["users"] or NULL) do
257         local node, password = row.username, row.password;
258         local ret, err = dm.store(node, host, "accounts", {password = password});
259         print("["..(err or "success").."] accounts: "..node.."@"..host);
260 end
261
262 function roster(node, host, jid, item)
263         local roster = dm.load(node, host, "roster") or {};
264         roster[jid] = item;
265         local ret, err = dm.store(node, host, "roster", roster);
266         print("["..(err or "success").."] roster: " ..node.."@"..host.." - "..jid);
267 end
268 function roster_pending(node, host, jid)
269         local roster = dm.load(node, host, "roster") or {};
270         roster.pending = roster.pending or {};
271         roster.pending[jid] = true;
272         local ret, err = dm.store(node, host, "roster", roster);
273         print("["..(err or "success").."] roster-pending: " ..node.."@"..host.." - "..jid);
274 end
275 function roster_group(node, host, jid, group)
276         local roster = dm.load(node, host, "roster") or {};
277         local item = roster[jid];
278         if not item then print("Warning: No roster item "..jid.." for user "..node..", can't put in group "..group); return; end
279         item.groups[group] = true;
280         local ret, err = dm.store(node, host, "roster", roster);
281         print("["..(err or "success").."] roster-group: " ..node.."@"..host.." - "..jid.." - "..group);
282 end
283 function private_storage(node, host, xmlns, stanza)
284         local private = dm.load(node, host, "private") or {};
285         private[stanza.name..":"..xmlns] = st.preserialize(stanza);
286         local ret, err = dm.store(node, host, "private", private);
287         print("["..(err or "success").."] private: " ..node.."@"..host.." - "..xmlns);
288 end
289 function offline_msg(node, host, t, stanza)
290         stanza.attr.stamp = os.date("!%Y-%m-%dT%H:%M:%SZ", t);
291         stanza.attr.stamp_legacy = os.date("!%Y%m%dT%H:%M:%S", t);
292         local ret, err = dm.list_append(node, host, "offline", st.preserialize(stanza));
293         print("["..(err or "success").."] offline: " ..node.."@"..host.." - "..os.date("!%Y-%m-%dT%H:%M:%SZ", t));
294 end
295 for i, row in ipairs(t["rosterusers"] or NULL) do
296         local node, contact = row.username, row.jid;
297         local name = row.nick;
298         if name == "" then name = nil; end
299         local subscription = row.subscription;
300         if subscription == "N" then
301                 subscription = "none"
302         elseif subscription == "B" then
303                 subscription = "both"
304         elseif subscription == "F" then
305                 subscription = "from"
306         elseif subscription == "T" then
307                 subscription = "to"
308         else error("Unknown subscription type: "..subscription) end;
309         local ask = row.ask;
310         if ask == "N" then
311                 ask = nil;
312         elseif ask == "O" then
313                 ask = "subscribe";
314         elseif ask == "I" then
315                 roster_pending(node, host, contact);
316                 ask = nil;
317         elseif ask == "B" then
318                 roster_pending(node, host, contact);
319                 ask = "subscribe";
320         else error("Unknown ask type: "..ask); end
321         local item = {name = name, ask = ask, subscription = subscription, groups = {}};
322         roster(node, host, contact, item);
323 end
324 for i, row in ipairs(t["rostergroups"] or NULL) do
325         roster_group(row.username, host, row.jid, row.grp);
326 end
327 for i, row in ipairs(t["vcard"] or NULL) do
328         local ret, err = dm.store(row.username, host, "vcard", st.preserialize(parse_xml(row.vcard)));
329         print("["..(err or "success").."] vCard: "..row.username.."@"..host);
330 end
331 for i, row in ipairs(t["private_storage"] or NULL) do
332         private_storage(row.username, host, row.namespace, parse_xml(row.data));
333 end
334 table.sort(t["spool"] or NULL, function(a,b) return a.seq < b.seq; end); -- sort by sequence number, just in case
335 local time_offset = os.difftime(os.time(os.date("!*t")), os.time(os.date("*t"))) -- to deal with timezones
336 local date_parse = function(s)
337         local year, month, day, hour, min, sec = s:match("(....)-?(..)-?(..)T(..):(..):(..)");
338         return os.time({year=year, month=month, day=day, hour=hour, min=min, sec=sec-time_offset});
339 end
340 for i, row in ipairs(t["spool"] or NULL) do
341         local stanza = parse_xml(row.xml);
342         local last_child = stanza.tags[#stanza.tags];
343         if not last_child or last_child ~= stanza[#stanza] then error("Last child of offline message is not a tag"); end
344         if last_child.name ~= "x" and last_child.attr.xmlns ~= "jabber:x:delay" then error("Last child of offline message is not a timestamp"); end
345         stanza[#stanza], stanza.tags[#stanza.tags] = nil, nil;
346         local t = date_parse(last_child.attr.stamp);
347         offline_msg(row.username, host, t, stanza);
348 end