Don't send MTU probes smaller than 512 bytes.
[tinc] / src / net_packet.c
index a1b174e..626114c 100644 (file)
@@ -1,7 +1,7 @@
 /*
     net_packet.c -- Handles in- and outgoing VPN packets
     Copyright (C) 1998-2005 Ivo Timmermans,
-                  2000-2013 Guus Sliepen <guus@tinc-vpn.org>
+                  2000-2014 Guus Sliepen <guus@tinc-vpn.org>
                   2010      Timothy Redaelli <timothy@redaelli.eu>
                   2010      Brandon Black <blblack@gmail.com>
 
 #include "utils.h"
 #include "xalloc.h"
 
+#ifndef MAX
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
 int keylifetime = 0;
 #ifdef HAVE_LZO
 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
@@ -55,145 +59,58 @@ static void send_udppacket(node_t *, vpn_packet_t *);
 
 unsigned replaywin = 16;
 bool localdiscovery = true;
+bool udp_discovery = true;
+int udp_discovery_interval = 9;
+int udp_discovery_timeout = 30;
 
 #define MAX_SEQNO 1073741824
 
-/* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
-   mtuprobes ==    31: sleep pinginterval seconds
-   mtuprobes ==    32: send 1 burst, sleep pingtimeout second
-   mtuprobes ==    33: no response from other side, restart PMTU discovery process
-
-   Probes are sent in batches of at least three, with random sizes between the
-   lower and upper boundaries for the MTU thus far discovered.
-
-   After the initial discovery, a fourth packet is added to each batch with a
-   size larger than the currently known PMTU, to test if the PMTU has increased.
-
-   In case local discovery is enabled, another packet is added to each batch,
-   which will be broadcast to the local network.
-
-*/
-
-static void send_mtu_probe_handler(void *data) {
-       node_t *n = data;
-       int timeout = 1;
-
-       n->mtuprobes++;
-
-       if(!n->status.reachable || !n->status.validkey) {
-               logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
-               n->mtuprobes = 0;
+static void try_fix_mtu(node_t *n) {
+       if(n->mtuprobes < 0)
                return;
-       }
-
-       if(n->mtuprobes > 32) {
-               if(!n->minmtu) {
-                       n->mtuprobes = 31;
-                       timeout = pinginterval;
-                       goto end;
-               }
 
-               logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
-               n->status.udp_confirmed = false;
-               n->mtuprobes = 1;
-               n->minmtu = 0;
-               n->maxmtu = MTU;
-       }
-
-       if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
-               logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
-               n->mtuprobes = 31;
-       }
-
-       if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
+       if(n->mtuprobes == 20 || n->minmtu >= n->maxmtu) {
                if(n->minmtu > n->maxmtu)
                        n->minmtu = n->maxmtu;
                else
                        n->maxmtu = n->minmtu;
                n->mtu = n->minmtu;
                logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
-               n->mtuprobes = 31;
+               n->mtuprobes = -1;
        }
-
-       if(n->mtuprobes == 31) {
-               timeout = pinginterval;
-               goto end;
-       } else if(n->mtuprobes == 32) {
-               timeout = pingtimeout;
-       }
-
-       for(int i = 0; i < 4 + localdiscovery; i++) {
-               int len;
-
-               if(i == 0) {
-                       if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
-                               continue;
-                       len = n->maxmtu + 8;
-               } else if(n->maxmtu <= n->minmtu) {
-                       len = n->maxmtu;
-               } else {
-                       len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
-               }
-
-               if(len < 64)
-                       len = 64;
-
-               vpn_packet_t packet;
-               memset(packet.data, 0, 14);
-               randomize(packet.data + 14, len - 14);
-               packet.len = len;
-               packet.priority = 0;
-               n->status.send_locally = i >= 4 && n->mtuprobes <= 10 && n->prevedge;
-
-               logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
-
-               send_udppacket(n, &packet);
-       }
-
-       n->status.send_locally = false;
-       n->probe_counter = 0;
-       gettimeofday(&n->probe_time, NULL);
-
-       /* Calculate the packet loss of incoming traffic by comparing the rate of
-          packets received to the rate with which the sequence number has increased.
-        */
-
-       if(n->received > n->prev_received)
-               n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
-       else
-               n->packetloss = n->received_seqno <= n->prev_received_seqno;
-
-       n->prev_received_seqno = n->received_seqno;
-       n->prev_received = n->received;
-
-end:
-       timeout_set(&n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
 }
 
-void send_mtu_probe(node_t *n) {
-       timeout_add(&n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
-       send_mtu_probe_handler(n);
+static void udp_probe_timeout_handler(void *data) {
+       node_t *n = data;
+       if(!n->status.udp_confirmed)
+               return;
+
+       logger(DEBUG_TRAFFIC, LOG_INFO, "Too much time has elapsed since last UDP ping response from %s (%s), stopping UDP communication", n->name, n->hostname);
+       n->status.udp_confirmed = false;
+       n->mtuprobes = 0;
+       n->minmtu = 0;
+       n->maxmtu = MTU;
 }
 
-static void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
-       if(!packet->data[0]) {
-               logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe request %d from %s (%s)", packet->len, n->name, n->hostname);
+static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
+       if(!DATA(packet)[0]) {
+               logger(DEBUG_TRAFFIC, LOG_INFO, "Got UDP probe request %d from %s (%s)", packet->len, n->name, n->hostname);
 
                /* It's a probe request, send back a reply */
 
                /* Type 2 probe replies were introduced in protocol 17.3 */
                if ((n->options >> 24) >= 3) {
-                       uint8_t* data = packet->data;
+                       uint8_t *data = DATA(packet);
                        *data++ = 2;
                        uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
                        struct timeval now;
                        gettimeofday(&now, NULL);
                        uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
                        uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
-                       packet->len = data - packet->data;
+                       packet->len -= 10;
                } else {
                        /* Legacy protocol: n won't understand type 2 probe replies. */
-                       packet->data[0] = 1;
+                       DATA(packet)[0] = 1;
                }
 
                /* Temporarily set udp_confirmed, so that the reply is sent
@@ -205,48 +122,44 @@ static void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
                n->status.udp_confirmed = udp_confirmed;
        } else {
                length_t probelen = len;
-               if (packet->data[0] == 2) {
+               if (DATA(packet)[0] == 2) {
                        if (len < 3)
-                               logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) MTU probe reply from %s (%s)", n->name, n->hostname);
+                               logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) UDP probe reply from %s (%s)", n->name, n->hostname);
                        else {
-                               uint16_t probelen16; memcpy(&probelen16, packet->data + 1, 2); probelen = ntohs(probelen16);
+                               uint16_t probelen16; memcpy(&probelen16, DATA(packet) + 1, 2); probelen = ntohs(probelen16);
                        }
                }
-               logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d MTU probe reply %d from %s (%s)", packet->data[0], probelen, n->name, n->hostname);
+               logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d UDP probe reply %d from %s (%s)", DATA(packet)[0], probelen, n->name, n->hostname);
 
                /* It's a valid reply: now we know bidirectional communication
                   is possible using the address and socket that the reply
                   packet used. */
-
                n->status.udp_confirmed = true;
 
-               /* If we haven't established the PMTU yet, restart the discovery process. */
-
-               if(n->mtuprobes > 30) {
-                       if (probelen == n->maxmtu + 8) {
-                               logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
-                               n->maxmtu = MTU;
-                               n->mtuprobes = 10;
-                               return;
-                       }
+               if(udp_discovery) {
+                       timeout_del(&n->udp_ping_timeout);
+                       timeout_add(&n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timeval){udp_discovery_timeout, 0});
+               }
 
-                       if(n->minmtu)
-                               n->mtuprobes = 30;
-                       else
-                               n->mtuprobes = 1;
+               if(probelen >= n->maxmtu + 1) {
+                       logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
+                       n->maxmtu = MTU;
+                       n->mtuprobes = 0;
+                       return;
                }
 
                /* If applicable, raise the minimum supported MTU */
 
                if(probelen > n->maxmtu)
                        probelen = n->maxmtu;
-               if(n->minmtu < probelen)
+               if(n->minmtu < probelen) {
                        n->minmtu = probelen;
+                       try_fix_mtu(n);
+               }
 
-               /* Calculate RTT and bandwidth.
+               /* Calculate RTT.
                   The RTT is the time between the MTU probe burst was sent and the first
-                  reply is received. The bandwidth is measured using the time between the
-                  arrival of the first and third probe reply (or type 2 probe requests).
+                  reply is received.
                 */
 
                struct timeval now, diff;
@@ -254,9 +167,9 @@ static void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
                timersub(&now, &n->probe_time, &diff);
 
                struct timeval probe_timestamp = now;
-               if (packet->data[0] == 2 && packet->len >= 11) {
-                       uint32_t sec; memcpy(&sec, packet->data + 3, 4);
-                       uint32_t usec; memcpy(&usec, packet->data + 7, 4);
+               if (DATA(packet)[0] == 2 && packet->len >= 11) {
+                       uint32_t sec; memcpy(&sec, DATA(packet) + 3, 4);
+                       uint32_t usec; memcpy(&usec, DATA(packet) + 7, 4);
                        probe_timestamp.tv_sec = ntohl(sec);
                        probe_timestamp.tv_usec = ntohl(usec);
                }
@@ -266,11 +179,7 @@ static void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
                if(n->probe_counter == 1) {
                        n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
                        n->probe_time = probe_timestamp;
-               } else if(n->probe_counter == 3) {
-                       struct timeval probe_timestamp_diff;
-                       timersub(&probe_timestamp, &n->probe_time, &probe_timestamp_diff);
-                       n->bandwidth = 2.0 * probelen / (probe_timestamp_diff.tv_sec + probe_timestamp_diff.tv_usec * 1e-6);
-                       logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
+                       logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->packetloss * 1e2);
                }
        }
 }
@@ -348,12 +257,16 @@ static void receive_packet(node_t *n, vpn_packet_t *packet) {
 
 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
        if(n->status.sptps)
-               return sptps_verify_datagram(&n->sptps, (char *)&inpkt->seqno, inpkt->len);
+               return sptps_verify_datagram(&n->sptps, DATA(inpkt), inpkt->len);
 
-       if(!digest_active(n->indigest) || inpkt->len < sizeof inpkt->seqno + digest_length(n->indigest))
+#ifdef DISABLE_LEGACY
+       return false;
+#else
+       if(!digest_active(n->indigest) || inpkt->len < sizeof(seqno_t) + digest_length(n->indigest))
                return false;
 
-       return digest_verify(n->indigest, &inpkt->seqno, inpkt->len - digest_length(n->indigest), (const char *)&inpkt->seqno + inpkt->len - digest_length(n->indigest));
+       return digest_verify(n->indigest, SEQNO(inpkt), inpkt->len - digest_length(n->indigest), DATA(inpkt) + inpkt->len - digest_length(n->indigest));
+#endif
 }
 
 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
@@ -361,6 +274,8 @@ static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
        vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
        int nextpkt = 0;
        size_t outlen;
+       pkt1.offset = DEFAULT_PACKET_OFFSET;
+       pkt2.offset = DEFAULT_PACKET_OFFSET;
 
        if(n->status.sptps) {
                if(!n->sptps.state) {
@@ -372,9 +287,17 @@ static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
                        }
                        return false;
                }
-               return sptps_receive_data(&n->sptps, (char *)&inpkt->seqno, inpkt->len);
+               inpkt->offset += 2 * sizeof(node_id_t);
+               if(!sptps_receive_data(&n->sptps, DATA(inpkt), inpkt->len - 2 * sizeof(node_id_t))) {
+                       logger(DEBUG_TRAFFIC, LOG_ERR, "Got bad packet from %s (%s)", n->name, n->hostname);
+                       return false;
+               }
+               return true;
        }
 
+#ifdef DISABLE_LEGACY
+       return false;
+#else
        if(!n->status.validkey) {
                logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
                return false;
@@ -382,17 +305,21 @@ static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
 
        /* Check packet length */
 
-       if(inpkt->len < sizeof inpkt->seqno + digest_length(n->indigest)) {
+       if(inpkt->len < sizeof(seqno_t) + digest_length(n->indigest)) {
                logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
                                        n->name, n->hostname);
                return false;
        }
 
+       /* It's a legacy UDP packet, the data starts after the seqno */
+
+       inpkt->offset += sizeof(seqno_t);
+
        /* Check the message authentication code */
 
        if(digest_active(n->indigest)) {
                inpkt->len -= digest_length(n->indigest);
-               if(!digest_verify(n->indigest, &inpkt->seqno, inpkt->len, (const char *)&inpkt->seqno + inpkt->len)) {
+               if(!digest_verify(n->indigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
                        logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
                        return false;
                }
@@ -403,7 +330,7 @@ static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
                vpn_packet_t *outpkt = pkt[nextpkt++];
                outlen = MAXSIZE;
 
-               if(!cipher_decrypt(n->incipher, &inpkt->seqno, inpkt->len, &outpkt->seqno, &outlen, true)) {
+               if(!cipher_decrypt(n->incipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
                        logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
                        return false;
                }
@@ -414,10 +341,10 @@ static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
 
        /* Check the sequence number */
 
-       inpkt->len -= sizeof inpkt->seqno;
-       uint32_t seqno;
-       memcpy(&seqno, inpkt->seqno, sizeof seqno);
+       seqno_t seqno;
+       memcpy(&seqno, SEQNO(inpkt), sizeof seqno);
        seqno = ntohl(seqno);
+       inpkt->len -= sizeof seqno;
 
        if(replaywin) {
                if(seqno != n->received_seqno + 1) {
@@ -461,7 +388,7 @@ static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
        if(n->incompression) {
                vpn_packet_t *outpkt = pkt[nextpkt++];
 
-               if((outpkt->len = uncompress_packet(outpkt->data, inpkt->data, inpkt->len, n->incompression)) < 0) {
+               if((outpkt->len = uncompress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->incompression)) < 0) {
                        logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
                                                 n->name, n->hostname);
                        return false;
@@ -474,17 +401,19 @@ static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
 
        inpkt->priority = 0;
 
-       if(!inpkt->data[12] && !inpkt->data[13])
-               mtu_probe_h(n, inpkt, origlen);
+       if(!DATA(inpkt)[12] && !DATA(inpkt)[13])
+               udp_probe_h(n, inpkt, origlen);
        else
                receive_packet(n, inpkt);
        return true;
+#endif
 }
 
 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
        vpn_packet_t outpkt;
+       outpkt.offset = DEFAULT_PACKET_OFFSET;
 
-       if(len > sizeof outpkt.data)
+       if(len > sizeof outpkt.data - outpkt.offset)
                return;
 
        outpkt.len = len;
@@ -492,30 +421,20 @@ void receive_tcppacket(connection_t *c, const char *buffer, int len) {
                outpkt.priority = 0;
        else
                outpkt.priority = -1;
-       memcpy(outpkt.data, buffer, len);
+       memcpy(DATA(&outpkt), buffer, len);
 
        receive_packet(c->node, &outpkt);
 }
 
 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
-       if(!n->status.validkey) {
-               logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
-               if(!n->status.waitingforkey)
-                       send_req_key(n);
-               else if(n->last_req_key + 10 < now.tv_sec) {
-                       logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
-                       sptps_stop(&n->sptps);
-                       n->status.waitingforkey = false;
-                       send_req_key(n);
-               }
+       if(!n->status.validkey && !n->connection)
                return;
-       }
 
        uint8_t type = 0;
        int offset = 0;
 
-       if(!(origpkt->data[12] | origpkt->data[13])) {
-               sptps_send_record(&n->sptps, PKT_PROBE, (char *)origpkt->data, origpkt->len);
+       if(!(DATA(origpkt)[12] | DATA(origpkt)[13])) {
+               sptps_send_record(&n->sptps, PKT_PROBE, (char *)DATA(origpkt), origpkt->len);
                return;
        }
 
@@ -530,7 +449,8 @@ static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
        vpn_packet_t outpkt;
 
        if(n->outcompression) {
-               int len = compress_packet(outpkt.data + offset, origpkt->data + offset, origpkt->len - offset, n->outcompression);
+               outpkt.offset = 0;
+               int len = compress_packet(DATA(&outpkt) + offset, DATA(origpkt) + offset, origpkt->len - offset, n->outcompression);
                if(len < 0) {
                        logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
                } else if(len < origpkt->len - offset) {
@@ -540,7 +460,14 @@ static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
                }
        }
 
-       sptps_send_record(&n->sptps, type, (char *)origpkt->data + offset, origpkt->len - offset);
+       /* If we have a direct metaconnection to n, and we can't use UDP, then
+          don't bother with SPTPS and just use a "plaintext" PACKET message.
+          We don't really care about end-to-end security since we're not
+          sending the message through any intermediate nodes. */
+       if(n->connection && origpkt->len > n->minmtu)
+               send_tcppacket(n->connection, origpkt);
+       else
+               sptps_send_record(&n->sptps, type, DATA(origpkt) + offset, origpkt->len - offset);
        return;
 }
 
@@ -633,6 +560,9 @@ static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
        int origpriority = origpkt->priority;
 #endif
 
+       pkt1.offset = DEFAULT_PACKET_OFFSET;
+       pkt2.offset = DEFAULT_PACKET_OFFSET;
+
        if(!n->status.reachable) {
                logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
                return;
@@ -641,24 +571,20 @@ static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
        if(n->status.sptps)
                return send_sptps_packet(n, origpkt);
 
+#ifdef DISABLE_LEGACY
+       return;
+#else
        /* Make sure we have a valid key */
 
        if(!n->status.validkey) {
                logger(DEBUG_TRAFFIC, LOG_INFO,
                                   "No valid key known yet for %s (%s), forwarding via TCP",
                                   n->name, n->hostname);
-
-               if(n->last_req_key + 10 <= now.tv_sec) {
-                       send_req_key(n);
-                       n->last_req_key = now.tv_sec;
-               }
-
                send_tcppacket(n->nexthop->connection, origpkt);
-
                return;
        }
 
-       if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (inpkt->data[12] | inpkt->data[13])) {
+       if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (DATA(inpkt)[12] | DATA(inpkt)[13])) {
                logger(DEBUG_TRAFFIC, LOG_INFO,
                                "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
                                n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
@@ -676,7 +602,7 @@ static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
        if(n->outcompression) {
                outpkt = pkt[nextpkt++];
 
-               if((outpkt->len = compress_packet(outpkt->data, inpkt->data, inpkt->len, n->outcompression)) < 0) {
+               if((outpkt->len = compress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->outcompression)) < 0) {
                        logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
                                   n->name, n->hostname);
                        return;
@@ -687,9 +613,9 @@ static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
 
        /* Add sequence number */
 
-       uint32_t seqno = htonl(++(n->sent_seqno));
-       memcpy(inpkt->seqno, &seqno, sizeof inpkt->seqno);
-       inpkt->len += sizeof inpkt->seqno;
+       seqno_t seqno = htonl(++(n->sent_seqno));
+       memcpy(SEQNO(inpkt), &seqno, sizeof seqno);
+       inpkt->len += sizeof seqno;
 
        /* Encrypt the packet */
 
@@ -697,7 +623,7 @@ static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
                outpkt = pkt[nextpkt++];
                outlen = MAXSIZE;
 
-               if(!cipher_encrypt(n->outcipher, inpkt->seqno, inpkt->len, outpkt->seqno, &outlen, true)) {
+               if(!cipher_encrypt(n->outcipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
                        logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
                        goto end;
                }
@@ -709,7 +635,7 @@ static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
        /* Add the message authentication code */
 
        if(digest_active(n->outdigest)) {
-               if(!digest_create(n->outdigest, inpkt->seqno, inpkt->len, inpkt->seqno + inpkt->len)) {
+               if(!digest_create(n->outdigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
                        logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
                        goto end;
                }
@@ -737,30 +663,33 @@ static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
        }
 #endif
 
-       if(sendto(listen_socket[sock].udp.fd, inpkt->seqno, inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
+       if(sendto(listen_socket[sock].udp.fd, SEQNO(inpkt), inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
                if(sockmsgsize(sockerrno)) {
                        if(n->maxmtu >= origlen)
                                n->maxmtu = origlen - 1;
                        if(n->mtu >= origlen)
                                n->mtu = origlen - 1;
+                       try_fix_mtu(n);
                } else
                        logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
        }
 
 end:
        origpkt->len = origlen;
+#endif
 }
 
-static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const char *data, size_t len) {
+static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
        node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
        bool direct = from == myself && to == relay;
        bool relay_supported = (relay->options >> 24) >= 4;
+       bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
 
        /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
           TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
                 This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
 
-       if(type == SPTPS_HANDSHAKE || ((myself->options | relay->options) & OPTION_TCPONLY) || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
+       if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
                char buf[len * 4 / 3 + 5];
                b64encode(data, buf, len);
                /* If no valid key is known yet, send the packets using ANS_KEY requests,
@@ -779,7 +708,7 @@ static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const char
        if(relay_supported) {
                if(direct) {
                        /* Inform the recipient that this packet was sent directly. */
-                       node_id_t nullid = {0};
+                       node_id_t nullid = {};
                        memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
                } else {
                        memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
@@ -805,6 +734,7 @@ static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const char
                                relay->maxmtu = len - 1;
                        if(relay->mtu >= len)
                                relay->mtu = len - 1;
+                       try_fix_mtu(relay);
                } else {
                        logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
                        return false;
@@ -814,11 +744,11 @@ static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const char
        return true;
 }
 
-bool send_sptps_data(void *handle, uint8_t type, const char *data, size_t len) {
+bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
        return send_sptps_data_priv(handle, myself, type, data, len);
 }
 
-bool receive_sptps_record(void *handle, uint8_t type, const char *data, uint16_t len) {
+bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
        node_t *from = handle;
 
        if(type == SPTPS_HANDSHAKE) {
@@ -836,11 +766,12 @@ bool receive_sptps_record(void *handle, uint8_t type, const char *data, uint16_t
        }
 
        vpn_packet_t inpkt;
+       inpkt.offset = DEFAULT_PACKET_OFFSET;
 
        if(type == PKT_PROBE) {
                inpkt.len = len;
-               memcpy(inpkt.data, data, len);
-               mtu_probe_h(from, &inpkt, len);
+               memcpy(DATA(&inpkt), data, len);
+               udp_probe_h(from, &inpkt, len);
                return true;
        }
 
@@ -859,7 +790,7 @@ bool receive_sptps_record(void *handle, uint8_t type, const char *data, uint16_t
 
        int offset = (type & PKT_MAC) ? 0 : 14;
        if(type & PKT_COMPRESSED) {
-               length_t ulen = uncompress_packet(inpkt.data + offset, (const uint8_t *)data, len, from->incompression);
+               length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
                if(ulen < 0) {
                        return false;
                } else {
@@ -868,25 +799,25 @@ bool receive_sptps_record(void *handle, uint8_t type, const char *data, uint16_t
                if(inpkt.len > MAXSIZE)
                        abort();
        } else {
-               memcpy(inpkt.data + offset, data, len);
+               memcpy(DATA(&inpkt) + offset, data, len);
                inpkt.len = len + offset;
        }
 
        /* Generate the Ethernet packet type if necessary */
        if(offset) {
-               switch(inpkt.data[14] >> 4) {
+               switch(DATA(&inpkt)[14] >> 4) {
                        case 4:
-                               inpkt.data[12] = 0x08;
-                               inpkt.data[13] = 0x00;
+                               DATA(&inpkt)[12] = 0x08;
+                               DATA(&inpkt)[13] = 0x00;
                                break;
                        case 6:
-                               inpkt.data[12] = 0x86;
-                               inpkt.data[13] = 0xDD;
+                               DATA(&inpkt)[12] = 0x86;
+                               DATA(&inpkt)[13] = 0xDD;
                                break;
                        default:
                                logger(DEBUG_TRAFFIC, LOG_ERR,
                                                   "Unknown IP version %d while reading packet from %s (%s)",
-                                                  inpkt.data[14] >> 4, from->name, from->hostname);
+                                                  DATA(&inpkt)[14] >> 4, from->name, from->hostname);
                                return false;
                }
        }
@@ -895,6 +826,181 @@ bool receive_sptps_record(void *handle, uint8_t type, const char *data, uint16_t
        return true;
 }
 
+// This function tries to get SPTPS keys, if they aren't already known.
+// This function makes no guarantees - it is up to the caller to check the node's state to figure out if the keys are available.
+static void try_sptps(node_t *n) {
+       if(n->status.validkey)
+               return;
+
+       logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
+
+       if(!n->status.waitingforkey)
+               send_req_key(n);
+       else if(n->last_req_key + 10 < now.tv_sec) {
+               logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
+               sptps_stop(&n->sptps);
+               n->status.waitingforkey = false;
+               send_req_key(n);
+       }
+
+       return;
+}
+
+static void send_udp_probe_packet(node_t *n, int len) {
+       vpn_packet_t packet;
+       packet.offset = DEFAULT_PACKET_OFFSET;
+       memset(DATA(&packet), 0, 14);
+       randomize(DATA(&packet) + 14, len - 14);
+       packet.len = len;
+       packet.priority = 0;
+
+       logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %d to %s (%s)", len, n->name, n->hostname);
+
+       send_udppacket(n, &packet);
+}
+
+// This function tries to establish a UDP tunnel to a node so that packets can be sent.
+// If a tunnel is already established, it makes sure it stays up.
+// This function makes no guarantees - it is up to the caller to check the node's state to figure out if UDP is usable.
+static void try_udp(node_t* n) {
+       if(!udp_discovery)
+               return;
+
+       struct timeval now;
+       gettimeofday(&now, NULL);
+       struct timeval ping_tx_elapsed;
+       timersub(&now, &n->udp_ping_sent, &ping_tx_elapsed);
+
+       if(ping_tx_elapsed.tv_sec >= udp_discovery_interval) {
+               send_udp_probe_packet(n, MAX(n->minmtu, 16));
+               n->udp_ping_sent = now;
+
+               if(localdiscovery && !n->status.udp_confirmed && n->prevedge) {
+                       n->status.send_locally = true;
+                       send_udp_probe_packet(n, 16);
+                       n->status.send_locally = false;
+               }
+       }
+}
+
+// This function tries to determines the MTU of a node.
+// By calling this function repeatedly, n->minmtu will be progressively increased, and at some point, n->mtu will be fixed to n->minmtu.
+// If the MTU is already fixed, this function checks if it can be increased.
+static void try_mtu(node_t *n) {
+       if(!(n->options & OPTION_PMTU_DISCOVERY))
+               return;
+
+       if(udp_discovery && !n->status.udp_confirmed) {
+               n->mtuprobes = 0;
+               n->minmtu = 0;
+               n->maxmtu = MTU;
+               return;
+       }
+
+       /* mtuprobes == 0..19: initial discovery, send bursts with 1 second interval, mtuprobes++
+          mtuprobes ==    20: fix MTU, and go to -1
+          mtuprobes ==    -1: send one >maxmtu probe every pingtimeout */
+
+       struct timeval now;
+       gettimeofday(&now, NULL);
+       struct timeval elapsed;
+       timersub(&now, &n->probe_sent_time, &elapsed);
+       if(n->mtuprobes >= 0) {
+               if(n->mtuprobes != 0 && elapsed.tv_sec == 0 && elapsed.tv_usec < 333333)
+                       return;
+       } else {
+               if(elapsed.tv_sec < pingtimeout)
+                       return;
+       }
+
+       try_fix_mtu(n);
+
+       int timeout;
+       if(n->mtuprobes < 0) {
+               /* After the initial discovery, we only send one >maxmtu probe
+                  to detect PMTU increases. */
+               if(n->maxmtu + 1 < MTU)
+                       send_udp_probe_packet(n, n->maxmtu + 1);
+       } else {
+               /* Decreasing the number of probes per cycle might make the algorithm react faster to lost packets,
+                  but it will typically increase convergence time in the no-loss case. */
+               const length_t probes_per_cycle = 8;
+
+               /* This magic value was determined using math simulations.
+                  It will result in a 1329-byte first probe, followed (if there was a reply) by a 1407-byte probe.
+                  Since 1407 is just below the range of tinc MTUs over typical networks,
+                  this fine-tuning allows tinc to cover a lot of ground very quickly. */
+               const float multiplier = 0.97;
+
+               const float cycle_position = probes_per_cycle - (n->mtuprobes % probes_per_cycle) - 1;
+               const length_t minmtu = MAX(n->minmtu, 512);
+               const float interval = n->maxmtu - minmtu;
+
+               /* The core of the discovery algorithm is this exponential.
+                  It produces very large probes early in the cycle, and then it very quickly decreases the probe size.
+                  This reflects the fact that in the most difficult cases, we don't get any feedback for probes that
+                  are too large, and therefore we need to concentrate on small offsets so that we can quickly converge
+                  on the precise MTU as we are approaching it.
+                  The last probe of the cycle is always 1 byte in size - this is to make sure we'll get at least one
+                  reply per cycle so that we can make progress. */
+               const length_t offset = powf(interval, multiplier * cycle_position / (probes_per_cycle - 1));
+
+               send_udp_probe_packet(n, minmtu + offset);
+               if(n->mtuprobes >= 0)
+                       n->mtuprobes++;
+       }
+
+       n->probe_counter = 0;
+       n->probe_sent_time = now;
+       n->probe_time = now;
+
+       /* Calculate the packet loss of incoming traffic by comparing the rate of
+          packets received to the rate with which the sequence number has increased.
+          TODO: this is unrelated to PMTU discovery - it should be moved elsewhere.
+        */
+
+       if(n->received > n->prev_received)
+               n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
+       else
+               n->packetloss = n->received_seqno <= n->prev_received_seqno;
+
+       n->prev_received_seqno = n->received_seqno;
+       n->prev_received = n->received;
+}
+
+// This function tries to establish a tunnel to a node (or its relay) so that packets can be sent (e.g. get SPTPS keys).
+// If a tunnel is already established, it tries to improve it (e.g. by trying to establish a UDP tunnel instead of TCP).
+// This function makes no guarantees - it is up to the caller to check the node's state to figure out if TCP and/or UDP is usable.
+// By calling this function repeatedly, the tunnel is gradually improved until we hit the wall imposed by the underlying network environment.
+// It is recommended to call this function every time a packet is sent (or intended to be sent) to a node,
+// so that the tunnel keeps improving as packets flow, and then gracefully downgrades itself as it goes idle.
+static void try_tx(node_t *n) {
+       /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET
+          messages anyway, so there's no need for SPTPS at all. Otherwise, get the keys. */
+       if(n->status.sptps && !(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))) {
+               try_sptps(n);
+               if (!n->status.validkey)
+                       return;
+       }
+
+       node_t *via = (n->via == myself) ? n->nexthop : n->via;
+       
+       if((myself->options | via->options) & OPTION_TCPONLY)
+               return;
+
+       if(!n->status.sptps && !via->status.validkey && via->last_req_key + 10 <= now.tv_sec) {
+               send_req_key(via);
+               via->last_req_key = now.tv_sec;
+       } else if(via == n || !n->status.sptps || (via->options >> 24) >= 4) {
+               try_udp(via);
+               try_mtu(via);
+       }
+
+       /* If we don't know how to reach "via" yet, then try to reach it through a relay. */
+       if(n->status.sptps && !via->status.udp_confirmed && via->nexthop != via && (via->nexthop->options >> 24) >= 4)
+               try_tx(via->nexthop);
+}
+
 /*
   send a packet to the given vpn ip.
 */
@@ -903,7 +1009,7 @@ void send_packet(node_t *n, vpn_packet_t *packet) {
 
        if(n == myself) {
                if(overwrite_mac)
-                        memcpy(packet->data, mymac.x, ETH_ALEN);
+                        memcpy(DATA(packet), mymac.x, ETH_ALEN);
                n->out_packets++;
                n->out_bytes += packet->len;
                devops.write(packet);
@@ -924,7 +1030,7 @@ void send_packet(node_t *n, vpn_packet_t *packet) {
 
        if(n->status.sptps) {
                send_sptps_packet(n, packet);
-               return;
+               goto end;
        }
 
        via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
@@ -938,6 +1044,12 @@ void send_packet(node_t *n, vpn_packet_t *packet) {
                        terminate_connection(via->connection, true);
        } else
                send_udppacket(via, packet);
+
+end:
+       /* Try to improve the tunnel.
+          Note that we do this *after* we send the packet because sending actual packets take priority
+          with regard to the send buffer space and latency. */
+       try_tx(n);
 }
 
 /* Broadcast a packet using the minimum spanning tree */
@@ -1015,13 +1127,14 @@ void handle_incoming_vpn_data(void *data, int flags) {
        listen_socket_t *ls = data;
        vpn_packet_t pkt;
        char *hostname;
-       sockaddr_t from = {{0}};
-       socklen_t fromlen = sizeof from;
-       node_t *n = NULL;
-       node_t *to = myself;
-       int len;
+       node_id_t nullid = {};
+       sockaddr_t addr = {};
+       socklen_t addrlen = sizeof addr;
+       node_t *from, *to;
+       bool direct = false;
 
-       len = recvfrom(ls->udp.fd, &pkt.dstid, MAXSIZE, 0, &from.sa, &fromlen);
+       pkt.offset = 0;
+       int len = recvfrom(ls->udp.fd, DATA(&pkt), MAXSIZE, 0, &addr.sa, &addrlen);
 
        if(len <= 0 || len > MAXSIZE) {
                if(!sockwouldblock(sockerrno))
@@ -1031,70 +1144,76 @@ void handle_incoming_vpn_data(void *data, int flags) {
 
        pkt.len = len;
 
-       sockaddrunmap(&from); /* Some braindead IPv6 implementations do stupid things. */
+       sockaddrunmap(&addr); /* Some braindead IPv6 implementations do stupid things. */
 
-       bool direct = false;
-       if(len >= sizeof pkt.dstid + sizeof pkt.srcid) {
-               n = lookup_node_id(&pkt.srcid);
-               if(n) {
-                       node_id_t nullid = {0};
-                       if(memcmp(&pkt.dstid, &nullid, sizeof nullid) == 0) {
-                               /* A zero dstid is used to indicate a direct, non-relayed packet. */
-                               direct = true;
-                       } else {
-                               to = lookup_node_id(&pkt.dstid);
-                               if(!to) {
-                                       logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet presumably sent by %s (%s) but with unknown destination ID", n->name, n->hostname);
-                                       return;
-                               }
-                       }
-                       pkt.len -= sizeof pkt.dstid + sizeof pkt.srcid;
-               }
-       }
+       // Try to figure out who sent this packet.
 
-       if(to != myself) {
-               /* We are being asked to relay this packet. */
+       node_t *n = lookup_node_udp(&addr);
 
-               /* Don't allow random strangers to relay through us. Note that we check for *any* known address since we are not necessarily the first relay. */
-               if (!lookup_node_udp(&from)) {
-                       logger(DEBUG_PROTOCOL, LOG_WARNING, "Refusing to relay packet from (presumably) %s (%s) to (presumably) %s (%s) because the packet comes from an unknown address", n->name, n->hostname, to->name, to->hostname);
-                       return;
+       if(!n) {
+               // It might be from a 1.1 node, which might have a source ID in the packet.
+               pkt.offset = 2 * sizeof(node_id_t);
+               from = lookup_node_id(SRCID(&pkt));
+               if(from && !memcmp(DSTID(&pkt), &nullid, sizeof nullid) && from->status.sptps) {
+                       if(sptps_verify_datagram(&from->sptps, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t)))
+                               n = from;
+                       else
+                               goto skip_harder;
                }
-
-               send_sptps_data_priv(to, n, 0, pkt.seqno, pkt.len);
-               return;
        }
 
        if(!n) {
-               /* Most likely an old-style packet without node IDs. */
-               direct = true;
-               memmove(pkt.seqno, &pkt.dstid, sizeof pkt - offsetof(vpn_packet_t, seqno));
-               n = lookup_node_udp(&from);
+               pkt.offset = 0;
+               n = try_harder(&addr, &pkt);
        }
 
-       if(!n)
-               n = try_harder(&from, &pkt);
-
+skip_harder:
        if(!n) {
                if(debug_level >= DEBUG_PROTOCOL) {
-                       hostname = sockaddr2hostname(&from);
+                       hostname = sockaddr2hostname(&addr);
                        logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
                        free(hostname);
                }
                return;
        }
 
-       if(!receive_udppacket(n, &pkt))
+       if(n->status.sptps) {
+               pkt.offset = 2 * sizeof(node_id_t);
+
+               if(!memcmp(DSTID(&pkt), &nullid, sizeof nullid)) {
+                       direct = true;
+                       from = n;
+                       to = myself;
+               } else {
+                       from = lookup_node_id(SRCID(&pkt));
+                       to = lookup_node_id(DSTID(&pkt));
+               }
+               if(!from || !to) {
+                       logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from %s (%s) with unknown source and/or destination ID", n->name, n->hostname);
+                       return;
+               }
+
+               if(to != myself) {
+                       send_sptps_data_priv(to, n, 0, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t));
+                       return;
+               }
+       } else {
+               direct = true;
+               from = n;
+       }
+
+       pkt.offset = 0;
+       if(!receive_udppacket(from, &pkt))
                return;
 
        n->sock = ls - listen_socket;
-       if(direct && sockaddrcmp(&from, &n->address))
-               update_node_udp(n, &from);
+       if(direct && sockaddrcmp(&addr, &n->address))
+               update_node_udp(n, &addr);
 }
 
 void handle_device_data(void *data, int flags) {
        vpn_packet_t packet;
-
+       packet.offset = DEFAULT_PACKET_OFFSET;
        packet.priority = 0;
 
        if(devops.read(&packet)) {