Only use broadcast at the start of the PMTU discovery phase.
[tinc] / src / net_packet.c
index 626f8b1..4b3496d 100644 (file)
@@ -1,7 +1,9 @@
 /*
     net_packet.c -- Handles in- and outgoing VPN packets
     Copyright (C) 1998-2005 Ivo Timmermans,
-                  2000-2009 Guus Sliepen <guus@tinc-vpn.org>
+                  2000-2011 Guus Sliepen <guus@tinc-vpn.org>
+                  2010      Timothy Redaelli <timothy@redaelli.eu>
+                  2010      Brandon Black <blblack@gmail.com>
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
 #include <openssl/pem.h>
 #include <openssl/hmac.h>
 
+#ifdef HAVE_ZLIB
 #include <zlib.h>
+#endif
+
+#ifdef HAVE_LZO
 #include LZO1X_H
+#endif
 
 #include "avl_tree.h"
 #include "conf.h"
@@ -36,7 +43,6 @@
 #include "ethernet.h"
 #include "event.h"
 #include "graph.h"
-#include "list.h"
 #include "logger.h"
 #include "net.h"
 #include "netutl.h"
 #include "utils.h"
 #include "xalloc.h"
 
-#ifdef WSAEMSGSIZE
-#define EMSGSIZE WSAEMSGSIZE
-#endif
-
 int keylifetime = 0;
 int keyexpires = 0;
+#ifdef HAVE_LZO
 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
+#endif
 
 static void send_udppacket(node_t *, vpn_packet_t *);
 
+unsigned replaywin = 16;
+bool localdiscovery = false;
+
 #define MAX_SEQNO 1073741824
 
-// mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
-// mtuprobes ==    31: sleep pinginterval seconds
-// mtuprobes ==    32: send 1 burst, sleep pingtimeout second
-// mtuprobes ==    33: no response from other side, restart PMTU discovery process
+/* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
+   mtuprobes ==    31: sleep pinginterval seconds
+   mtuprobes ==    32: send 1 burst, sleep pingtimeout second
+   mtuprobes ==    33: no response from other side, restart PMTU discovery process
+
+   Probes are sent in batches of three, with random sizes between the lower and
+   upper boundaries for the MTU thus far discovered.
+
+   In case local discovery is enabled, a fourth packet is added to each batch,
+   which will be broadcast to the local network.
+*/
 
 void send_mtu_probe(node_t *n) {
        vpn_packet_t packet;
@@ -78,19 +92,28 @@ void send_mtu_probe(node_t *n) {
        }
 
        if(n->mtuprobes > 32) {
+               if(!n->minmtu) {
+                       n->mtuprobes = 31;
+                       timeout = pinginterval;
+                       goto end;
+               }
+
                ifdebug(TRAFFIC) logger(LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
                n->mtuprobes = 1;
                n->minmtu = 0;
                n->maxmtu = MTU;
        }
 
-       if(n->mtuprobes >= 10 && !n->minmtu) {
+       if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
                ifdebug(TRAFFIC) logger(LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
-               n->mtuprobes = 0;
-               return;
+               n->mtuprobes = 31;
        }
 
        if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
+               if(n->minmtu > n->maxmtu)
+                       n->minmtu = n->maxmtu;
+               else
+                       n->maxmtu = n->minmtu;
                n->mtu = n->minmtu;
                ifdebug(TRAFFIC) logger(LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
                n->mtuprobes = 31;
@@ -103,7 +126,7 @@ void send_mtu_probe(node_t *n) {
                timeout = pingtimeout;
        }
 
-       for(i = 0; i < 3; i++) {
+       for(i = 0; i < 3 + localdiscovery; i++) {
                if(n->maxmtu <= n->minmtu)
                        len = n->maxmtu;
                else
@@ -115,7 +138,10 @@ void send_mtu_probe(node_t *n) {
                memset(packet.data, 0, 14);
                RAND_pseudo_bytes(packet.data + 14, len - 14);
                packet.len = len;
-               packet.priority = 0;
+               if(i >= 3 && n->mtuprobes <= 10)
+                       packet.priority = -1;
+               else
+                       packet.priority = 0;
 
                ifdebug(TRAFFIC) logger(LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
 
@@ -137,50 +163,76 @@ void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
                packet->data[0] = 1;
                send_udppacket(n, packet);
        } else {
+               if(n->mtuprobes > 30) {
+                       if(n->minmtu)
+                               n->mtuprobes = 30;
+                       else
+                               n->mtuprobes = 1;
+               }
+
                if(len > n->maxmtu)
                        len = n->maxmtu;
                if(n->minmtu < len)
                        n->minmtu = len;
-               if(n->mtuprobes > 30)
-                       n->mtuprobes = 30;
        }
 }
 
 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
-       if(level == 10) {
+       if(level == 0) {
+               memcpy(dest, source, len);
+               return len;
+       } else if(level == 10) {
+#ifdef HAVE_LZO
                lzo_uint lzolen = MAXSIZE;
                lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
                return lzolen;
+#else
+               return -1;
+#endif
        } else if(level < 10) {
+#ifdef HAVE_ZLIB
                unsigned long destlen = MAXSIZE;
                if(compress2(dest, &destlen, source, len, level) == Z_OK)
                        return destlen;
                else
+#endif
                        return -1;
        } else {
+#ifdef HAVE_LZO
                lzo_uint lzolen = MAXSIZE;
                lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
                return lzolen;
+#else
+               return -1;
+#endif
        }
        
        return -1;
 }
 
 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
-       if(level > 9) {
+       if(level == 0) {
+               memcpy(dest, source, len);
+               return len;
+       } else if(level > 9) {
+#ifdef HAVE_LZO
                lzo_uint lzolen = MAXSIZE;
                if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
                        return lzolen;
                else
+#endif
                        return -1;
-       } else {
+       }
+#ifdef HAVE_ZLIB
+       else {
                unsigned long destlen = MAXSIZE;
                if(uncompress(dest, &destlen, source, len) == Z_OK)
                        return destlen;
                else
                        return -1;
        }
-       
+#endif
+
        return -1;
 }
 
@@ -264,25 +316,32 @@ static void receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
        inpkt->len -= sizeof(inpkt->seqno);
        inpkt->seqno = ntohl(inpkt->seqno);
 
-       if(inpkt->seqno != n->received_seqno + 1) {
-               if(inpkt->seqno >= n->received_seqno + sizeof(n->late) * 8) {
-                       logger(LOG_WARNING, "Lost %d packets from %s (%s)",
-                                          inpkt->seqno - n->received_seqno - 1, n->name, n->hostname);
-                       
-                       memset(n->late, 0, sizeof(n->late));
-               } else if (inpkt->seqno <= n->received_seqno) {
-                       if((n->received_seqno >= sizeof(n->late) * 8 && inpkt->seqno <= n->received_seqno - sizeof(n->late) * 8) || !(n->late[(inpkt->seqno / 8) % sizeof(n->late)] & (1 << inpkt->seqno % 8))) {
-                               logger(LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
-                                          n->name, n->hostname, inpkt->seqno, n->received_seqno);
-                               return;
+       if(replaywin) {
+               if(inpkt->seqno != n->received_seqno + 1) {
+                       if(inpkt->seqno >= n->received_seqno + replaywin * 8) {
+                               if(n->farfuture++ < replaywin >> 2) {
+                                       logger(LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
+                                               n->name, n->hostname, inpkt->seqno - n->received_seqno - 1, n->farfuture);
+                                       return;
+                               }
+                               logger(LOG_WARNING, "Lost %d packets from %s (%s)",
+                                               inpkt->seqno - n->received_seqno - 1, n->name, n->hostname);
+                               memset(n->late, 0, replaywin);
+                       } else if (inpkt->seqno <= n->received_seqno) {
+                               if((n->received_seqno >= replaywin * 8 && inpkt->seqno <= n->received_seqno - replaywin * 8) || !(n->late[(inpkt->seqno / 8) % replaywin] & (1 << inpkt->seqno % 8))) {
+                                       logger(LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
+                                               n->name, n->hostname, inpkt->seqno, n->received_seqno);
+                                       return;
+                               }
+                       } else {
+                               for(i = n->received_seqno + 1; i < inpkt->seqno; i++)
+                                       n->late[(i / 8) % replaywin] |= 1 << i % 8;
                        }
-               } else {
-                       for(i = n->received_seqno + 1; i < inpkt->seqno; i++)
-                               n->late[(i / 8) % sizeof(n->late)] |= 1 << i % 8;
                }
+
+               n->farfuture = 0;
+               n->late[(inpkt->seqno / 8) % replaywin] &= ~(1 << inpkt->seqno % 8);
        }
-       
-       n->late[(inpkt->seqno / 8) % sizeof(n->late)] &= ~(1 << inpkt->seqno % 8);
 
        if(inpkt->seqno > n->received_seqno)
                n->received_seqno = inpkt->seqno;
@@ -316,7 +375,7 @@ static void receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
                receive_packet(n, inpkt);
 }
 
-void receive_tcppacket(connection_t *c, char *buffer, int len) {
+void receive_tcppacket(connection_t *c, const char *buffer, int len) {
        vpn_packet_t outpkt;
 
        outpkt.len = len;
@@ -337,9 +396,10 @@ static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
        vpn_packet_t *outpkt;
        int origlen;
        int outlen, outpad;
+#if defined(SOL_IP) && defined(IP_TOS)
        static int priority = 0;
+#endif
        int origpriority;
-       int sock;
 
        if(!n->status.reachable) {
                ifdebug(TRAFFIC) logger(LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
@@ -353,10 +413,10 @@ static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
                                   "No valid key known yet for %s (%s), forwarding via TCP",
                                   n->name, n->hostname);
 
-               if(!n->status.waitingforkey)
+               if(n->last_req_key + 10 <= now) {
                        send_req_key(n);
-
-               n->status.waitingforkey = true;
+                       n->last_req_key = now;
+               }
 
                send_tcppacket(n->nexthop->connection, origpkt);
 
@@ -365,10 +425,13 @@ static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
 
        if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (inpkt->data[12] | inpkt->data[13])) {
                ifdebug(TRAFFIC) logger(LOG_INFO,
-                               "Packet for %s (%s) larger than minimum MTU, forwarding via TCP",
-                               n->name, n->hostname);
+                               "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
+                               n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
 
-               send_tcppacket(n->nexthop->connection, origpkt);
+               if(n != n->nexthop)
+                       send_packet(n->nexthop, origpkt);
+               else
+                       send_tcppacket(n->nexthop->connection, origpkt);
 
                return;
        }
@@ -423,33 +486,58 @@ static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
 
        /* Determine which socket we have to use */
 
-       for(sock = 0; sock < listen_sockets; sock++)
-               if(n->address.sa.sa_family == listen_socket[sock].sa.sa.sa_family)
-                       break;
-
-       if(sock >= listen_sockets)
-               sock = 0;                               /* If none is available, just use the first and hope for the best. */
+       if(n->address.sa.sa_family != listen_socket[n->sock].sa.sa.sa_family) {
+               for(int sock = 0; sock < listen_sockets; sock++) {
+                       if(n->address.sa.sa_family == listen_socket[sock].sa.sa.sa_family) {
+                               n->sock = sock;
+                               break;
+                       }
+               }
+       }
 
        /* Send the packet */
 
+       struct sockaddr *sa;
+       socklen_t sl;
+       int sock;
+
+       /* Overloaded use of priority field: -1 means local broadcast */
+
+       if(origpriority == -1 && n->prevedge) {
+               struct sockaddr_in in;
+               in.sin_family = AF_INET;
+               in.sin_addr.s_addr = -1;
+               in.sin_port = n->prevedge->address.in.sin_port;
+               sa = (struct sockaddr *)&in;
+               sl = sizeof in;
+               sock = 0;
+       } else {
+               if(origpriority == -1)
+                       origpriority = 0;
+
+               sa = &(n->address.sa);
+               sl = SALEN(n->address.sa);
+               sock = n->sock;
+       }
+
 #if defined(SOL_IP) && defined(IP_TOS)
        if(priorityinheritance && origpriority != priority
-          && listen_socket[sock].sa.sa.sa_family == AF_INET) {
+          && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
                priority = origpriority;
                ifdebug(TRAFFIC) logger(LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
-               if(setsockopt(listen_socket[sock].udp, SOL_IP, IP_TOS, &priority, sizeof(priority)))    /* SO_PRIORITY doesn't seem to work */
+               if(setsockopt(listen_socket[n->sock].udp, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
                        logger(LOG_ERR, "System call `%s' failed: %s", "setsockopt", strerror(errno));
        }
 #endif
 
-       if((sendto(listen_socket[sock].udp, (char *) &inpkt->seqno, inpkt->len, 0, &(n->address.sa), SALEN(n->address.sa))) < 0) {
-               if(errno == EMSGSIZE) {
+       if(sendto(listen_socket[sock].udp, (char *) &inpkt->seqno, inpkt->len, 0, sa, sl) < 0 && !sockwouldblock(sockerrno)) {
+               if(sockmsgsize(sockerrno)) {
                        if(n->maxmtu >= origlen)
                                n->maxmtu = origlen - 1;
                        if(n->mtu >= origlen)
                                n->mtu = origlen - 1;
                } else
-                       logger(LOG_ERR, "Error sending packet to %s (%s): %s", n->name, n->hostname, strerror(errno));
+                       logger(LOG_ERR, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
        }
 
 end:
@@ -465,7 +553,7 @@ void send_packet(const node_t *n, vpn_packet_t *packet) {
        if(n == myself) {
                if(overwrite_mac)
                         memcpy(packet->data, mymac.x, ETH_ALEN);
-               write_packet(packet);
+               devops.write(packet);
                return;
        }
 
@@ -521,20 +609,21 @@ static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
        avl_node_t *node;
        edge_t *e;
        node_t *n = NULL;
+       bool hard = false;
        static time_t last_hard_try = 0;
 
        for(node = edge_weight_tree->head; node; node = node->next) {
                e = node->data;
 
+               if(e->to == myself)
+                       continue;
+
                if(sockaddrcmp_noport(from, &e->address)) {
                        if(last_hard_try == now)
                                continue;
-                       last_hard_try = now;
+                       hard = true;
                }
 
-               if(!n)
-                       n = e->to;
-
                if(!try_mac(e->to, pkt))
                        continue;
 
@@ -542,6 +631,10 @@ static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
                break;
        }
 
+       if(hard)
+               last_hard_try = now;
+
+       last_hard_try = now;
        return n;
 }
 
@@ -552,11 +645,11 @@ void handle_incoming_vpn_data(int sock) {
        socklen_t fromlen = sizeof(from);
        node_t *n;
 
-       pkt.len = recvfrom(sock, (char *) &pkt.seqno, MAXSIZE, 0, &from.sa, &fromlen);
+       pkt.len = recvfrom(listen_socket[sock].udp, (char *) &pkt.seqno, MAXSIZE, 0, &from.sa, &fromlen);
 
        if(pkt.len < 0) {
-               if(errno != EAGAIN && errno != EINTR)
-                       logger(LOG_ERR, "Receiving packet failed: %s", strerror(errno));
+               if(!sockwouldblock(sockerrno))
+                       logger(LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
                return;
        }
 
@@ -578,5 +671,7 @@ void handle_incoming_vpn_data(int sock) {
                        return;
        }
 
+       n->sock = sock;
+
        receive_udppacket(n, &pkt);
 }