s2smanager: Remove srv_hosts from session when connected, this fixes attempting to...
[prosody.git] / core / s2smanager.lua
index f4f7ad3683103c2a66b93b3ad3752423d2e39105..879084d86554db54dd19c5dbcac55ff1d59cab83 100644 (file)
 local hosts = hosts;
 local sessions = sessions;
 local core_process_stanza = function(a, b) core_process_stanza(a, b); end
+local add_task = require "util.timer".add_task;
 local socket = require "socket";
 local format = string.format;
 local t_insert, t_sort = table.insert, table.sort;
 local get_traceback = debug.traceback;
-local tostring, pairs, ipairs, getmetatable, print, newproxy, error, tonumber
-    = tostring, pairs, ipairs, getmetatable, print, newproxy, error, tonumber;
+local tostring, pairs, ipairs, getmetatable, newproxy, error, tonumber
+    = tostring, pairs, ipairs, getmetatable, newproxy, error, tonumber;
 
 local idna_to_ascii = require "util.encodings".idna.to_ascii;
 local connlisteners_get = require "net.connlisteners".get;
@@ -34,9 +35,11 @@ local log = logger_init("s2smanager");
 
 local sha256_hash = require "util.hashes".sha256;
 
-local dialback_secret = sha256_hash(tostring{} .. math.random() .. socket.gettime(), true);
+local dialback_secret = uuid_gen();
 
-local dns = require "net.dns";
+local adns = require "net.adns";
+
+local dns_timeout = config.get("*", "core", "dns_timeout") or 60;
 
 incoming_s2s = {};
 local incoming_s2s = incoming_s2s;
@@ -48,7 +51,7 @@ local function compare_srv_priorities(a,b) return a.priority < b.priority or a.w
 local function bounce_sendq(session)
        local sendq = session.sendq;
        if sendq then
-               session.log("debug", "sending error replies for "..#sendq.." queued stanzas because of failed outgoing connection to "..tostring(session.to_host));
+               session.log("info", "sending error replies for "..#sendq.." queued stanzas because of failed outgoing connection to "..tostring(session.to_host));
                local dummy = {
                        type = "s2sin";
                        send = function(s)
@@ -77,7 +80,7 @@ function send_to_host(from_host, to_host, data)
                -- We have a connection to this host already
                if host.type == "s2sout_unauthed" and data.name ~= "db:verify" and ((not data.xmlns) or data.xmlns == "jabber:client" or data.xmlns == "jabber:server") then
                        (host.log or log)("debug", "trying to send over unauthed s2sout to "..to_host);
-                       if not host.notopen and not host.dialback_key then
+                       if not host.notopen and not host.dialback_key and host.sends2s then
                                host.log("debug", "dialback had not been initiated");
                                initiate_dialback(host);
                        end
@@ -105,7 +108,11 @@ function send_to_host(from_host, to_host, data)
                local host_session = new_outgoing(from_host, to_host);
                -- Store in buffer
                host_session.sendq = { {tostring(data), st.reply(data)} };
-               if not host_session.conn then destroy_session(host_session); end
+               log("debug", "stanza [%s] queued until connection complete", tostring(data.name));
+               if (not host_session.connecting) and (not host_session.conn) then
+                       log("warn", "Connection to %s failed already, destroying session...", to_host);
+                       destroy_session(host_session);
+               end
        end
 end
 
@@ -137,47 +144,97 @@ function new_outgoing(from_host, to_host)
                
                attempt_connection(host_session);
                
+               if not host_session.sends2s then                
+                       -- A sends2s which buffers data (until the stream is opened)
+                       -- note that data in this buffer will be sent before the stream is authed
+                       -- and will not be ack'd in any way, successful or otherwise
+                       local buffer;
+                       function host_session.sends2s(data)
+                               if not buffer then
+                                       buffer = {};
+                                       host_session.send_buffer = buffer;
+                               end
+                               log("debug", "Buffering data on unconnected s2sout to %s", to_host);
+                               buffer[#buffer+1] = data;
+                               log("debug", "Buffered item %d: %s", #buffer, tostring(data));
+                       end
+                       
+               end
+
                return host_session;
 end
 
 
 function attempt_connection(host_session, err)
        local from_host, to_host = host_session.from_host, host_session.to_host;
-       local conn, handler = socket.tcp()
-
        local connect_host, connect_port = idna_to_ascii(to_host), 5269;
        
        if not err then -- This is our first attempt
-               local answer = dns.lookup("_xmpp-server._tcp."..connect_host..".", "SRV");
-               
-               if answer then
-                       log("debug", to_host.." has SRV records, handling...");
-                       local srv_hosts = {};
-                       host_session.srv_hosts = srv_hosts;
-                       for _, record in ipairs(answer) do
-                               t_insert(srv_hosts, record.srv);
+               log("debug", "First attempt to connect to %s, starting with SRV lookup...", to_host);
+               host_session.connecting = true;
+               local answer, handle;
+               handle = adns.lookup(function (answer)
+                       handle = nil;
+                       host_session.connecting = nil;
+                       if answer then
+                               log("debug", to_host.." has SRV records, handling...");
+                               local srv_hosts = {};
+                               host_session.srv_hosts = srv_hosts;
+                               for _, record in ipairs(answer) do
+                                       t_insert(srv_hosts, record.srv);
+                               end
+                               t_sort(srv_hosts, compare_srv_priorities);
+                               
+                               local srv_choice = srv_hosts[1];
+                               host_session.srv_choice = 1;
+                               if srv_choice then
+                                       connect_host, connect_port = srv_choice.target or to_host, srv_choice.port or connect_port;
+                                       log("debug", "Best record found, will connect to %s:%d", connect_host, connect_port);
+                               end
+                       else
+                               log("debug", to_host.." has no SRV records, falling back to A");
                        end
-                       t_sort(srv_hosts, compare_srv_priorities);
-                       
-                       local srv_choice = srv_hosts[1];
-                       host_session.srv_choice = 1;
-                       if srv_choice then
-                               connect_host, connect_port = srv_choice.target or to_host, srv_choice.port or connect_port;
-                               log("debug", "Best record found, will connect to %s:%d", connect_host, connect_port);
+                       -- Try with SRV, or just the plain hostname if no SRV
+                       return try_connect(host_session, connect_host, connect_port);
+               end, "_xmpp-server._tcp."..connect_host..".", "SRV");
+               
+               -- Set handler for DNS timeout
+               add_task(dns_timeout, function ()
+                       if handle then
+                               adns.cancel(handle, true);
                        end
-               end
+               end);
+               
+               log("debug", "DNS lookup for %s sent, waiting for response before we can connect", to_host);
+               return true; -- Attempt in progress
        elseif host_session.srv_hosts and #host_session.srv_hosts > host_session.srv_choice then -- Not our first attempt, and we also have SRV
                host_session.srv_choice = host_session.srv_choice + 1;
                local srv_choice = host_session.srv_hosts[host_session.srv_choice];
                connect_host, connect_port = srv_choice.target or to_host, srv_choice.port or connect_port;
-               host_session.log("debug", "Connection failed (%s). Attempt #%d: This time to %s:%d", tostring(err), host_session.srv_choice, connect_host, connect_port);
+               host_session.log("info", "Connection failed (%s). Attempt #%d: This time to %s:%d", tostring(err), host_session.srv_choice, connect_host, connect_port);
        else
-               host_session.log("debug", "Out of connection options, can't connect to %s", tostring(host_session.to_host));
+               host_session.log("info", "Out of connection options, can't connect to %s", tostring(host_session.to_host));
                -- We're out of options
                return false;
        end
        
+       if not (connect_host and connect_port) then
+               -- Likely we couldn't resolve DNS
+               log("warn", "Hmm, we're without a host (%s) and port (%s) to connect to for %s, giving up :(", tostring(connect_host), tostring(connect_port), tostring(to_host));
+               return false;
+       end
+       
+       return try_connect(host_session, connect_host, connect_port);
+end
+
+function try_connect(host_session, connect_host, connect_port)
+       host_session.log("info", "Beginning new connection attempt to %s (%s:%d)", host_session.to_host, connect_host, connect_port);
        -- Ok, we're going to try to connect
+       
+       local from_host, to_host = host_session.from_host, host_session.to_host;
+       
+       local conn, handler = socket.tcp()
+
        conn:settimeout(0);
        local success, err = conn:connect(connect_host, connect_port);
        if not success and err ~= "timeout" then
@@ -197,6 +254,7 @@ function attempt_connection(host_session, err)
        host_session.sends2s = function (t) log("debug", "sending: %s", tostring(t)); w(tostring(t)); end
        
        conn.write(format([[<stream:stream xmlns='jabber:server' xmlns:db='jabber:server:dialback' xmlns:stream='http://etherx.jabber.org/streams' from='%s' to='%s' version='1.0'>]], from_host, to_host));
+       log("debug", "Connection attempt in progress...");
        return true;
 end
 
@@ -234,6 +292,20 @@ function streamopened(session, attr)
                if not attr.id then error("stream response did not give us a streamid!!!"); end
                session.streamid = attr.id;
        
+               -- Send unauthed buffer
+               -- (stanzas which are fine to send before dialback)
+               -- Note that this is *not* the stanza queue (which 
+               -- we can only send if auth succeeds) :)
+               local send_buffer = session.send_buffer;
+               if send_buffer and #send_buffer > 0 then
+                       log("debug", "Sending s2s send_buffer now...");
+                       for i, data in ipairs(send_buffer) do
+                               session.sends2s(tostring(data));
+                               send_buffer[i] = nil;
+                       end
+               end
+               session.send_buffer = nil;
+       
                if not session.dialback_verifying then
                        initiate_dialback(session);
                else
@@ -244,6 +316,14 @@ function streamopened(session, attr)
        session.notopen = nil;
 end
 
+function streamclosed(session)
+       (session.log or log)("debug", "</stream:stream>");
+       if session.sends2s then
+               session.sends2s("</stream:stream>");
+       end
+       session.notopen = true;
+end
+
 function initiate_dialback(session)
        -- generate dialback key
        session.dialback_key = generate_dialback(session.streamid, session.to_host, session.from_host);
@@ -272,7 +352,7 @@ function make_authenticated(session, host)
        else
                return false;
        end
-       session.log("info", "connection is now authenticated");
+       session.log("debug", "connection %s->%s is now authenticated", session.from_host or "(unknown)", session.to_host or "(unknown)");
        
        mark_connected(session);
        
@@ -284,7 +364,7 @@ function mark_connected(session)
        
        local from, to = session.from_host, session.to_host;
        
-       session.log("debug", session.direction.." s2s connection "..from.."->"..to.." is now complete");
+       session.log("info", session.direction.." s2s connection "..from.."->"..to.." complete");
        
        local send_to_host = send_to_host;
        function session.send(data) send_to_host(to, from, data); end
@@ -299,13 +379,14 @@ function mark_connected(session)
                        end
                        session.sendq = nil;
                end
+               
+               session.srv_hosts = nil;
        end
 end
 
 function destroy_session(session)
        (session.log or log)("info", "Destroying "..tostring(session.direction).." session "..tostring(session.from_host).."->"..tostring(session.to_host));
        
-       
        if session.direction == "outgoing" then
                hosts[session.from_host].s2sout[session.to_host] = nil;
                bounce_sendq(session);